{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999621656388332, "eval_steps": 500, "global_step": 13215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.56687223336234e-05, "grad_norm": 272.8875732421875, "learning_rate": 2.7231467473524962e-08, "loss": 2.6021, "step": 1 }, { "epoch": 0.0001513374446672468, "grad_norm": 513.3167114257812, "learning_rate": 5.4462934947049924e-08, "loss": 2.8401, "step": 2 }, { "epoch": 0.0002270061670008702, "grad_norm": 390.2225036621094, "learning_rate": 8.169440242057489e-08, "loss": 2.7988, "step": 3 }, { "epoch": 0.0003026748893344936, "grad_norm": 272.265625, "learning_rate": 1.0892586989409985e-07, "loss": 2.912, "step": 4 }, { "epoch": 0.000378343611668117, "grad_norm": 306.4111633300781, "learning_rate": 1.3615733736762482e-07, "loss": 2.8548, "step": 5 }, { "epoch": 0.0004540123340017404, "grad_norm": 251.80809020996094, "learning_rate": 1.6338880484114979e-07, "loss": 2.885, "step": 6 }, { "epoch": 0.0005296810563353637, "grad_norm": 884.6312866210938, "learning_rate": 1.9062027231467473e-07, "loss": 2.9442, "step": 7 }, { "epoch": 0.0006053497786689872, "grad_norm": 391.5308837890625, "learning_rate": 2.178517397881997e-07, "loss": 3.0565, "step": 8 }, { "epoch": 0.0006810185010026105, "grad_norm": 295.6712646484375, "learning_rate": 2.4508320726172467e-07, "loss": 2.9165, "step": 9 }, { "epoch": 0.000756687223336234, "grad_norm": 733.3089599609375, "learning_rate": 2.7231467473524963e-07, "loss": 2.7356, "step": 10 }, { "epoch": 0.0008323559456698573, "grad_norm": 363.34002685546875, "learning_rate": 2.995461422087746e-07, "loss": 2.6017, "step": 11 }, { "epoch": 0.0009080246680034808, "grad_norm": 256.9432373046875, "learning_rate": 3.2677760968229957e-07, "loss": 2.4895, "step": 12 }, { "epoch": 0.0009836933903371041, "grad_norm": 314.6168212890625, "learning_rate": 3.5400907715582454e-07, "loss": 2.8149, "step": 13 }, { "epoch": 0.0010593621126707275, "grad_norm": 266.22314453125, "learning_rate": 3.8124054462934946e-07, "loss": 2.6049, "step": 14 }, { "epoch": 0.001135030835004351, "grad_norm": 263.47576904296875, "learning_rate": 4.084720121028744e-07, "loss": 2.5644, "step": 15 }, { "epoch": 0.0012106995573379744, "grad_norm": 223.0679473876953, "learning_rate": 4.357034795763994e-07, "loss": 2.6094, "step": 16 }, { "epoch": 0.0012863682796715977, "grad_norm": 248.41636657714844, "learning_rate": 4.629349470499244e-07, "loss": 2.4842, "step": 17 }, { "epoch": 0.001362037002005221, "grad_norm": 826.5869750976562, "learning_rate": 4.901664145234493e-07, "loss": 2.6023, "step": 18 }, { "epoch": 0.0014377057243388446, "grad_norm": 265.9122009277344, "learning_rate": 5.173978819969742e-07, "loss": 2.4341, "step": 19 }, { "epoch": 0.001513374446672468, "grad_norm": 190.68650817871094, "learning_rate": 5.446293494704993e-07, "loss": 2.3036, "step": 20 }, { "epoch": 0.0015890431690060913, "grad_norm": 184.1608123779297, "learning_rate": 5.718608169440242e-07, "loss": 2.3596, "step": 21 }, { "epoch": 0.0016647118913397146, "grad_norm": 425.2568054199219, "learning_rate": 5.990922844175492e-07, "loss": 2.4074, "step": 22 }, { "epoch": 0.0017403806136733382, "grad_norm": 203.991455078125, "learning_rate": 6.263237518910741e-07, "loss": 2.2616, "step": 23 }, { "epoch": 0.0018160493360069615, "grad_norm": 135.3365020751953, "learning_rate": 6.535552193645991e-07, "loss": 2.1286, "step": 24 }, { "epoch": 0.0018917180583405849, "grad_norm": 136.6835174560547, "learning_rate": 6.807866868381241e-07, "loss": 2.404, "step": 25 }, { "epoch": 0.0019673867806742082, "grad_norm": 105.73369598388672, "learning_rate": 7.080181543116491e-07, "loss": 2.2063, "step": 26 }, { "epoch": 0.002043055503007832, "grad_norm": 86.31427001953125, "learning_rate": 7.352496217851739e-07, "loss": 1.9829, "step": 27 }, { "epoch": 0.002118724225341455, "grad_norm": 124.91061401367188, "learning_rate": 7.624810892586989e-07, "loss": 1.7351, "step": 28 }, { "epoch": 0.0021943929476750785, "grad_norm": 48.06635284423828, "learning_rate": 7.897125567322239e-07, "loss": 2.0282, "step": 29 }, { "epoch": 0.002270061670008702, "grad_norm": 85.16302490234375, "learning_rate": 8.169440242057488e-07, "loss": 1.6746, "step": 30 }, { "epoch": 0.002345730392342325, "grad_norm": 54.819854736328125, "learning_rate": 8.441754916792739e-07, "loss": 1.9421, "step": 31 }, { "epoch": 0.0024213991146759487, "grad_norm": 44.45909881591797, "learning_rate": 8.714069591527988e-07, "loss": 1.6696, "step": 32 }, { "epoch": 0.0024970678370095723, "grad_norm": 46.482994079589844, "learning_rate": 8.986384266263238e-07, "loss": 1.7952, "step": 33 }, { "epoch": 0.0025727365593431954, "grad_norm": 117.33851623535156, "learning_rate": 9.258698940998488e-07, "loss": 1.6118, "step": 34 }, { "epoch": 0.002648405281676819, "grad_norm": 40.68963623046875, "learning_rate": 9.531013615733736e-07, "loss": 2.1438, "step": 35 }, { "epoch": 0.002724074004010442, "grad_norm": 50.864559173583984, "learning_rate": 9.803328290468987e-07, "loss": 1.9518, "step": 36 }, { "epoch": 0.0027997427263440657, "grad_norm": 55.14411544799805, "learning_rate": 1.0075642965204236e-06, "loss": 1.8224, "step": 37 }, { "epoch": 0.002875411448677689, "grad_norm": 112.16168975830078, "learning_rate": 1.0347957639939485e-06, "loss": 1.5419, "step": 38 }, { "epoch": 0.0029510801710113123, "grad_norm": 37.99906539916992, "learning_rate": 1.0620272314674736e-06, "loss": 1.5378, "step": 39 }, { "epoch": 0.003026748893344936, "grad_norm": 85.35050964355469, "learning_rate": 1.0892586989409985e-06, "loss": 1.6139, "step": 40 }, { "epoch": 0.0031024176156785595, "grad_norm": 50.46384048461914, "learning_rate": 1.1164901664145235e-06, "loss": 1.6364, "step": 41 }, { "epoch": 0.0031780863380121826, "grad_norm": 74.24533081054688, "learning_rate": 1.1437216338880484e-06, "loss": 1.8635, "step": 42 }, { "epoch": 0.003253755060345806, "grad_norm": 75.34041595458984, "learning_rate": 1.1709531013615733e-06, "loss": 1.6906, "step": 43 }, { "epoch": 0.0033294237826794293, "grad_norm": 63.062156677246094, "learning_rate": 1.1981845688350984e-06, "loss": 1.5042, "step": 44 }, { "epoch": 0.003405092505013053, "grad_norm": 80.37957763671875, "learning_rate": 1.2254160363086233e-06, "loss": 1.5012, "step": 45 }, { "epoch": 0.0034807612273466764, "grad_norm": 35.123077392578125, "learning_rate": 1.2526475037821482e-06, "loss": 1.3791, "step": 46 }, { "epoch": 0.0035564299496802995, "grad_norm": 64.57160949707031, "learning_rate": 1.2798789712556734e-06, "loss": 1.5318, "step": 47 }, { "epoch": 0.003632098672013923, "grad_norm": 44.77900314331055, "learning_rate": 1.3071104387291983e-06, "loss": 1.4773, "step": 48 }, { "epoch": 0.0037077673943475466, "grad_norm": 29.615036010742188, "learning_rate": 1.3343419062027232e-06, "loss": 1.3712, "step": 49 }, { "epoch": 0.0037834361166811698, "grad_norm": 45.1904411315918, "learning_rate": 1.3615733736762481e-06, "loss": 1.539, "step": 50 }, { "epoch": 0.0038591048390147933, "grad_norm": 26.724008560180664, "learning_rate": 1.3888048411497732e-06, "loss": 1.2884, "step": 51 }, { "epoch": 0.0039347735613484165, "grad_norm": 26.483842849731445, "learning_rate": 1.4160363086232982e-06, "loss": 1.2467, "step": 52 }, { "epoch": 0.00401044228368204, "grad_norm": 25.459138870239258, "learning_rate": 1.443267776096823e-06, "loss": 1.376, "step": 53 }, { "epoch": 0.004086111006015664, "grad_norm": 612.6622314453125, "learning_rate": 1.4704992435703478e-06, "loss": 1.3352, "step": 54 }, { "epoch": 0.004161779728349287, "grad_norm": 12.120221138000488, "learning_rate": 1.497730711043873e-06, "loss": 1.1906, "step": 55 }, { "epoch": 0.00423744845068291, "grad_norm": 49.24264144897461, "learning_rate": 1.5249621785173978e-06, "loss": 1.23, "step": 56 }, { "epoch": 0.004313117173016533, "grad_norm": 22.598411560058594, "learning_rate": 1.5521936459909227e-06, "loss": 1.2773, "step": 57 }, { "epoch": 0.004388785895350157, "grad_norm": 22.006301879882812, "learning_rate": 1.5794251134644479e-06, "loss": 1.381, "step": 58 }, { "epoch": 0.0044644546176837805, "grad_norm": 20.625085830688477, "learning_rate": 1.6066565809379728e-06, "loss": 1.3473, "step": 59 }, { "epoch": 0.004540123340017404, "grad_norm": 29.52547264099121, "learning_rate": 1.6338880484114977e-06, "loss": 1.3312, "step": 60 }, { "epoch": 0.004615792062351028, "grad_norm": 13.293269157409668, "learning_rate": 1.6611195158850228e-06, "loss": 1.0988, "step": 61 }, { "epoch": 0.00469146078468465, "grad_norm": 9.618423461914062, "learning_rate": 1.6883509833585477e-06, "loss": 1.1596, "step": 62 }, { "epoch": 0.004767129507018274, "grad_norm": 9.537832260131836, "learning_rate": 1.7155824508320727e-06, "loss": 1.1767, "step": 63 }, { "epoch": 0.0048427982293518974, "grad_norm": 17.054410934448242, "learning_rate": 1.7428139183055976e-06, "loss": 1.2305, "step": 64 }, { "epoch": 0.004918466951685521, "grad_norm": 23.415481567382812, "learning_rate": 1.7700453857791227e-06, "loss": 1.1254, "step": 65 }, { "epoch": 0.0049941356740191446, "grad_norm": 6.73915958404541, "learning_rate": 1.7972768532526476e-06, "loss": 1.049, "step": 66 }, { "epoch": 0.005069804396352767, "grad_norm": 21.562658309936523, "learning_rate": 1.8245083207261725e-06, "loss": 1.1231, "step": 67 }, { "epoch": 0.005145473118686391, "grad_norm": 16.345674514770508, "learning_rate": 1.8517397881996977e-06, "loss": 1.0867, "step": 68 }, { "epoch": 0.005221141841020014, "grad_norm": 10.502206802368164, "learning_rate": 1.8789712556732226e-06, "loss": 1.3118, "step": 69 }, { "epoch": 0.005296810563353638, "grad_norm": 8.155364036560059, "learning_rate": 1.9062027231467473e-06, "loss": 1.2176, "step": 70 }, { "epoch": 0.0053724792856872615, "grad_norm": 18.021318435668945, "learning_rate": 1.933434190620272e-06, "loss": 1.1159, "step": 71 }, { "epoch": 0.005448148008020884, "grad_norm": 10.645997047424316, "learning_rate": 1.9606656580937973e-06, "loss": 1.3196, "step": 72 }, { "epoch": 0.005523816730354508, "grad_norm": 12.845873832702637, "learning_rate": 1.987897125567322e-06, "loss": 1.1641, "step": 73 }, { "epoch": 0.005599485452688131, "grad_norm": 7.4295148849487305, "learning_rate": 2.015128593040847e-06, "loss": 1.2317, "step": 74 }, { "epoch": 0.005675154175021755, "grad_norm": 12.633703231811523, "learning_rate": 2.0423600605143723e-06, "loss": 1.207, "step": 75 }, { "epoch": 0.005750822897355378, "grad_norm": 12.621756553649902, "learning_rate": 2.069591527987897e-06, "loss": 1.2415, "step": 76 }, { "epoch": 0.005826491619689002, "grad_norm": 7.364112377166748, "learning_rate": 2.096822995461422e-06, "loss": 1.2141, "step": 77 }, { "epoch": 0.005902160342022625, "grad_norm": 7.511165142059326, "learning_rate": 2.1240544629349472e-06, "loss": 1.0121, "step": 78 }, { "epoch": 0.005977829064356248, "grad_norm": 14.060636520385742, "learning_rate": 2.151285930408472e-06, "loss": 1.1539, "step": 79 }, { "epoch": 0.006053497786689872, "grad_norm": 49.88886642456055, "learning_rate": 2.178517397881997e-06, "loss": 1.0104, "step": 80 }, { "epoch": 0.006129166509023495, "grad_norm": 6.6888017654418945, "learning_rate": 2.205748865355522e-06, "loss": 0.9701, "step": 81 }, { "epoch": 0.006204835231357119, "grad_norm": 7.409695148468018, "learning_rate": 2.232980332829047e-06, "loss": 1.0943, "step": 82 }, { "epoch": 0.006280503953690742, "grad_norm": 9.27059268951416, "learning_rate": 2.260211800302572e-06, "loss": 1.1012, "step": 83 }, { "epoch": 0.006356172676024365, "grad_norm": 8.880078315734863, "learning_rate": 2.2874432677760967e-06, "loss": 1.0485, "step": 84 }, { "epoch": 0.006431841398357989, "grad_norm": 8.912999153137207, "learning_rate": 2.314674735249622e-06, "loss": 1.0787, "step": 85 }, { "epoch": 0.006507510120691612, "grad_norm": 6.5734028816223145, "learning_rate": 2.3419062027231466e-06, "loss": 1.1154, "step": 86 }, { "epoch": 0.006583178843025236, "grad_norm": 6.327483654022217, "learning_rate": 2.369137670196672e-06, "loss": 1.1694, "step": 87 }, { "epoch": 0.0066588475653588585, "grad_norm": 12.942300796508789, "learning_rate": 2.396369137670197e-06, "loss": 0.9813, "step": 88 }, { "epoch": 0.006734516287692482, "grad_norm": 10.303013801574707, "learning_rate": 2.423600605143722e-06, "loss": 1.0833, "step": 89 }, { "epoch": 0.006810185010026106, "grad_norm": 7.25071907043457, "learning_rate": 2.4508320726172467e-06, "loss": 1.1512, "step": 90 }, { "epoch": 0.006885853732359729, "grad_norm": 4.276927471160889, "learning_rate": 2.4780635400907718e-06, "loss": 1.1131, "step": 91 }, { "epoch": 0.006961522454693353, "grad_norm": 11.979231834411621, "learning_rate": 2.5052950075642965e-06, "loss": 1.1562, "step": 92 }, { "epoch": 0.0070371911770269755, "grad_norm": 6.277972221374512, "learning_rate": 2.532526475037821e-06, "loss": 1.1771, "step": 93 }, { "epoch": 0.007112859899360599, "grad_norm": 7.063577175140381, "learning_rate": 2.5597579425113467e-06, "loss": 0.9926, "step": 94 }, { "epoch": 0.007188528621694223, "grad_norm": 8.788790702819824, "learning_rate": 2.5869894099848714e-06, "loss": 0.9004, "step": 95 }, { "epoch": 0.007264197344027846, "grad_norm": 7.295443058013916, "learning_rate": 2.6142208774583966e-06, "loss": 1.1524, "step": 96 }, { "epoch": 0.00733986606636147, "grad_norm": 9.583985328674316, "learning_rate": 2.6414523449319213e-06, "loss": 1.1887, "step": 97 }, { "epoch": 0.007415534788695093, "grad_norm": 7.807141304016113, "learning_rate": 2.6686838124054464e-06, "loss": 1.0794, "step": 98 }, { "epoch": 0.007491203511028716, "grad_norm": 9.609314918518066, "learning_rate": 2.695915279878971e-06, "loss": 1.0667, "step": 99 }, { "epoch": 0.0075668722333623395, "grad_norm": 7.190148830413818, "learning_rate": 2.7231467473524962e-06, "loss": 1.1177, "step": 100 }, { "epoch": 0.007642540955695963, "grad_norm": 5.9306206703186035, "learning_rate": 2.750378214826021e-06, "loss": 1.0946, "step": 101 }, { "epoch": 0.007718209678029587, "grad_norm": 6.257357597351074, "learning_rate": 2.7776096822995465e-06, "loss": 1.1852, "step": 102 }, { "epoch": 0.00779387840036321, "grad_norm": 4.176384449005127, "learning_rate": 2.804841149773071e-06, "loss": 1.1085, "step": 103 }, { "epoch": 0.007869547122696833, "grad_norm": 5.795867919921875, "learning_rate": 2.8320726172465963e-06, "loss": 1.064, "step": 104 }, { "epoch": 0.007945215845030457, "grad_norm": 87.45132446289062, "learning_rate": 2.859304084720121e-06, "loss": 1.0562, "step": 105 }, { "epoch": 0.00802088456736408, "grad_norm": 5.800858020782471, "learning_rate": 2.886535552193646e-06, "loss": 1.213, "step": 106 }, { "epoch": 0.008096553289697703, "grad_norm": 6.449529647827148, "learning_rate": 2.913767019667171e-06, "loss": 1.2756, "step": 107 }, { "epoch": 0.008172222012031327, "grad_norm": 15.208488464355469, "learning_rate": 2.9409984871406956e-06, "loss": 1.1593, "step": 108 }, { "epoch": 0.00824789073436495, "grad_norm": 6.07266902923584, "learning_rate": 2.968229954614221e-06, "loss": 0.9689, "step": 109 }, { "epoch": 0.008323559456698574, "grad_norm": 5.045470714569092, "learning_rate": 2.995461422087746e-06, "loss": 1.088, "step": 110 }, { "epoch": 0.008399228179032197, "grad_norm": 8.590188980102539, "learning_rate": 3.022692889561271e-06, "loss": 1.0692, "step": 111 }, { "epoch": 0.00847489690136582, "grad_norm": 9.020132064819336, "learning_rate": 3.0499243570347956e-06, "loss": 1.0444, "step": 112 }, { "epoch": 0.008550565623699444, "grad_norm": 5.601812362670898, "learning_rate": 3.0771558245083208e-06, "loss": 1.0266, "step": 113 }, { "epoch": 0.008626234346033067, "grad_norm": 3.23660945892334, "learning_rate": 3.1043872919818455e-06, "loss": 1.0381, "step": 114 }, { "epoch": 0.008701903068366691, "grad_norm": 4.900957107543945, "learning_rate": 3.131618759455371e-06, "loss": 1.1345, "step": 115 }, { "epoch": 0.008777571790700314, "grad_norm": 7.415119171142578, "learning_rate": 3.1588502269288957e-06, "loss": 0.9433, "step": 116 }, { "epoch": 0.008853240513033937, "grad_norm": 4.038257122039795, "learning_rate": 3.186081694402421e-06, "loss": 1.1984, "step": 117 }, { "epoch": 0.008928909235367561, "grad_norm": 4.441867828369141, "learning_rate": 3.2133131618759456e-06, "loss": 1.0773, "step": 118 }, { "epoch": 0.009004577957701184, "grad_norm": 5.363611698150635, "learning_rate": 3.2405446293494707e-06, "loss": 1.1884, "step": 119 }, { "epoch": 0.009080246680034808, "grad_norm": 6.429077625274658, "learning_rate": 3.2677760968229954e-06, "loss": 1.0864, "step": 120 }, { "epoch": 0.00915591540236843, "grad_norm": 4.815485000610352, "learning_rate": 3.2950075642965205e-06, "loss": 1.0554, "step": 121 }, { "epoch": 0.009231584124702055, "grad_norm": 11.878281593322754, "learning_rate": 3.3222390317700457e-06, "loss": 1.2129, "step": 122 }, { "epoch": 0.009307252847035678, "grad_norm": 6.188513278961182, "learning_rate": 3.3494704992435704e-06, "loss": 1.0672, "step": 123 }, { "epoch": 0.0093829215693693, "grad_norm": 4.279908657073975, "learning_rate": 3.3767019667170955e-06, "loss": 1.1063, "step": 124 }, { "epoch": 0.009458590291702925, "grad_norm": 8.566632270812988, "learning_rate": 3.40393343419062e-06, "loss": 0.9051, "step": 125 }, { "epoch": 0.009534259014036548, "grad_norm": 10.101946830749512, "learning_rate": 3.4311649016641453e-06, "loss": 1.1801, "step": 126 }, { "epoch": 0.009609927736370172, "grad_norm": 3.696890115737915, "learning_rate": 3.45839636913767e-06, "loss": 1.1499, "step": 127 }, { "epoch": 0.009685596458703795, "grad_norm": 4.319789409637451, "learning_rate": 3.485627836611195e-06, "loss": 1.1783, "step": 128 }, { "epoch": 0.009761265181037418, "grad_norm": 3.6111180782318115, "learning_rate": 3.51285930408472e-06, "loss": 1.2035, "step": 129 }, { "epoch": 0.009836933903371042, "grad_norm": 6.946579933166504, "learning_rate": 3.5400907715582454e-06, "loss": 1.429, "step": 130 }, { "epoch": 0.009912602625704665, "grad_norm": 4.380500316619873, "learning_rate": 3.56732223903177e-06, "loss": 0.9982, "step": 131 }, { "epoch": 0.009988271348038289, "grad_norm": 110.80493927001953, "learning_rate": 3.5945537065052952e-06, "loss": 1.053, "step": 132 }, { "epoch": 0.010063940070371912, "grad_norm": 4.770391941070557, "learning_rate": 3.62178517397882e-06, "loss": 0.9609, "step": 133 }, { "epoch": 0.010139608792705535, "grad_norm": 4.621954441070557, "learning_rate": 3.649016641452345e-06, "loss": 1.0428, "step": 134 }, { "epoch": 0.010215277515039159, "grad_norm": 4.873030185699463, "learning_rate": 3.6762481089258698e-06, "loss": 1.0355, "step": 135 }, { "epoch": 0.010290946237372782, "grad_norm": 5.315187931060791, "learning_rate": 3.7034795763993953e-06, "loss": 1.2877, "step": 136 }, { "epoch": 0.010366614959706406, "grad_norm": 3.4662725925445557, "learning_rate": 3.73071104387292e-06, "loss": 0.9696, "step": 137 }, { "epoch": 0.010442283682040029, "grad_norm": 6.4335784912109375, "learning_rate": 3.757942511346445e-06, "loss": 1.0649, "step": 138 }, { "epoch": 0.010517952404373651, "grad_norm": 5.575058937072754, "learning_rate": 3.78517397881997e-06, "loss": 1.2124, "step": 139 }, { "epoch": 0.010593621126707276, "grad_norm": 4.741557598114014, "learning_rate": 3.8124054462934946e-06, "loss": 1.0165, "step": 140 }, { "epoch": 0.010669289849040899, "grad_norm": 6.094121932983398, "learning_rate": 3.83963691376702e-06, "loss": 1.1381, "step": 141 }, { "epoch": 0.010744958571374523, "grad_norm": 3.2143642902374268, "learning_rate": 3.866868381240544e-06, "loss": 1.1538, "step": 142 }, { "epoch": 0.010820627293708146, "grad_norm": 8.476461410522461, "learning_rate": 3.89409984871407e-06, "loss": 0.9853, "step": 143 }, { "epoch": 0.010896296016041768, "grad_norm": 5.929498672485352, "learning_rate": 3.921331316187595e-06, "loss": 1.2708, "step": 144 }, { "epoch": 0.010971964738375393, "grad_norm": 3.594865560531616, "learning_rate": 3.948562783661119e-06, "loss": 1.077, "step": 145 }, { "epoch": 0.011047633460709015, "grad_norm": 5.143126964569092, "learning_rate": 3.975794251134644e-06, "loss": 0.8743, "step": 146 }, { "epoch": 0.01112330218304264, "grad_norm": 3.9312331676483154, "learning_rate": 4.00302571860817e-06, "loss": 1.0474, "step": 147 }, { "epoch": 0.011198970905376263, "grad_norm": 8.739069938659668, "learning_rate": 4.030257186081694e-06, "loss": 1.1, "step": 148 }, { "epoch": 0.011274639627709885, "grad_norm": 4.785227298736572, "learning_rate": 4.05748865355522e-06, "loss": 1.0795, "step": 149 }, { "epoch": 0.01135030835004351, "grad_norm": 4.471089839935303, "learning_rate": 4.0847201210287446e-06, "loss": 1.0992, "step": 150 }, { "epoch": 0.011425977072377132, "grad_norm": 9.019781112670898, "learning_rate": 4.111951588502269e-06, "loss": 1.0866, "step": 151 }, { "epoch": 0.011501645794710757, "grad_norm": 5.071846961975098, "learning_rate": 4.139183055975794e-06, "loss": 1.1729, "step": 152 }, { "epoch": 0.01157731451704438, "grad_norm": 4.376003742218018, "learning_rate": 4.1664145234493195e-06, "loss": 1.203, "step": 153 }, { "epoch": 0.011652983239378004, "grad_norm": 6.377297878265381, "learning_rate": 4.193645990922844e-06, "loss": 1.1541, "step": 154 }, { "epoch": 0.011728651961711627, "grad_norm": 7.824789047241211, "learning_rate": 4.220877458396369e-06, "loss": 1.0479, "step": 155 }, { "epoch": 0.01180432068404525, "grad_norm": 4.779434680938721, "learning_rate": 4.2481089258698945e-06, "loss": 1.0529, "step": 156 }, { "epoch": 0.011879989406378874, "grad_norm": 8.425029754638672, "learning_rate": 4.275340393343419e-06, "loss": 1.0992, "step": 157 }, { "epoch": 0.011955658128712496, "grad_norm": 5.336421966552734, "learning_rate": 4.302571860816944e-06, "loss": 1.0226, "step": 158 }, { "epoch": 0.012031326851046121, "grad_norm": 7.749419212341309, "learning_rate": 4.329803328290469e-06, "loss": 1.0299, "step": 159 }, { "epoch": 0.012106995573379744, "grad_norm": 8.775020599365234, "learning_rate": 4.357034795763994e-06, "loss": 1.2269, "step": 160 }, { "epoch": 0.012182664295713366, "grad_norm": 6.738064289093018, "learning_rate": 4.384266263237519e-06, "loss": 1.0477, "step": 161 }, { "epoch": 0.01225833301804699, "grad_norm": 4.490780353546143, "learning_rate": 4.411497730711044e-06, "loss": 1.0986, "step": 162 }, { "epoch": 0.012334001740380613, "grad_norm": 4.881013870239258, "learning_rate": 4.438729198184569e-06, "loss": 1.0979, "step": 163 }, { "epoch": 0.012409670462714238, "grad_norm": 3.8033907413482666, "learning_rate": 4.465960665658094e-06, "loss": 1.2036, "step": 164 }, { "epoch": 0.01248533918504786, "grad_norm": 4.627386093139648, "learning_rate": 4.4931921331316185e-06, "loss": 1.0643, "step": 165 }, { "epoch": 0.012561007907381483, "grad_norm": 4.1467108726501465, "learning_rate": 4.520423600605144e-06, "loss": 1.0594, "step": 166 }, { "epoch": 0.012636676629715108, "grad_norm": 3.9435312747955322, "learning_rate": 4.54765506807867e-06, "loss": 1.0522, "step": 167 }, { "epoch": 0.01271234535204873, "grad_norm": 7.734802722930908, "learning_rate": 4.5748865355521935e-06, "loss": 1.1185, "step": 168 }, { "epoch": 0.012788014074382355, "grad_norm": 11.209372520446777, "learning_rate": 4.602118003025719e-06, "loss": 1.0703, "step": 169 }, { "epoch": 0.012863682796715977, "grad_norm": 4.650092124938965, "learning_rate": 4.629349470499244e-06, "loss": 0.9494, "step": 170 }, { "epoch": 0.0129393515190496, "grad_norm": 3.6348683834075928, "learning_rate": 4.6565809379727684e-06, "loss": 0.9492, "step": 171 }, { "epoch": 0.013015020241383225, "grad_norm": 4.223328590393066, "learning_rate": 4.683812405446293e-06, "loss": 1.0867, "step": 172 }, { "epoch": 0.013090688963716847, "grad_norm": 3.262395143508911, "learning_rate": 4.711043872919819e-06, "loss": 1.0776, "step": 173 }, { "epoch": 0.013166357686050472, "grad_norm": 8.45308780670166, "learning_rate": 4.738275340393344e-06, "loss": 0.9743, "step": 174 }, { "epoch": 0.013242026408384094, "grad_norm": 6.289074420928955, "learning_rate": 4.765506807866868e-06, "loss": 1.0345, "step": 175 }, { "epoch": 0.013317695130717717, "grad_norm": 3.5306406021118164, "learning_rate": 4.792738275340394e-06, "loss": 0.9317, "step": 176 }, { "epoch": 0.013393363853051342, "grad_norm": 2.8451457023620605, "learning_rate": 4.819969742813918e-06, "loss": 1.1339, "step": 177 }, { "epoch": 0.013469032575384964, "grad_norm": 4.440673351287842, "learning_rate": 4.847201210287444e-06, "loss": 1.1597, "step": 178 }, { "epoch": 0.013544701297718589, "grad_norm": 4.818862438201904, "learning_rate": 4.874432677760968e-06, "loss": 1.2254, "step": 179 }, { "epoch": 0.013620370020052211, "grad_norm": 28.1450138092041, "learning_rate": 4.901664145234493e-06, "loss": 1.0227, "step": 180 }, { "epoch": 0.013696038742385834, "grad_norm": 5.457899570465088, "learning_rate": 4.928895612708019e-06, "loss": 0.9296, "step": 181 }, { "epoch": 0.013771707464719458, "grad_norm": 7.308279514312744, "learning_rate": 4.9561270801815436e-06, "loss": 1.0794, "step": 182 }, { "epoch": 0.013847376187053081, "grad_norm": 4.073877811431885, "learning_rate": 4.983358547655068e-06, "loss": 1.0149, "step": 183 }, { "epoch": 0.013923044909386706, "grad_norm": 4.441009044647217, "learning_rate": 5.010590015128593e-06, "loss": 1.1169, "step": 184 }, { "epoch": 0.013998713631720328, "grad_norm": 3.17683744430542, "learning_rate": 5.0378214826021185e-06, "loss": 0.9667, "step": 185 }, { "epoch": 0.014074382354053951, "grad_norm": 5.486913204193115, "learning_rate": 5.065052950075642e-06, "loss": 0.9504, "step": 186 }, { "epoch": 0.014150051076387575, "grad_norm": 3.611471176147461, "learning_rate": 5.092284417549168e-06, "loss": 1.0478, "step": 187 }, { "epoch": 0.014225719798721198, "grad_norm": 4.8300981521606445, "learning_rate": 5.1195158850226935e-06, "loss": 1.0817, "step": 188 }, { "epoch": 0.014301388521054823, "grad_norm": 13.371825218200684, "learning_rate": 5.146747352496218e-06, "loss": 1.046, "step": 189 }, { "epoch": 0.014377057243388445, "grad_norm": 4.5194621086120605, "learning_rate": 5.173978819969743e-06, "loss": 0.9503, "step": 190 }, { "epoch": 0.01445272596572207, "grad_norm": 4.531895637512207, "learning_rate": 5.201210287443268e-06, "loss": 0.8728, "step": 191 }, { "epoch": 0.014528394688055692, "grad_norm": 4.822807788848877, "learning_rate": 5.228441754916793e-06, "loss": 0.9806, "step": 192 }, { "epoch": 0.014604063410389315, "grad_norm": 2.7862422466278076, "learning_rate": 5.255673222390318e-06, "loss": 1.0006, "step": 193 }, { "epoch": 0.01467973213272294, "grad_norm": 4.66079044342041, "learning_rate": 5.2829046898638426e-06, "loss": 1.0805, "step": 194 }, { "epoch": 0.014755400855056562, "grad_norm": 3.729022264480591, "learning_rate": 5.310136157337367e-06, "loss": 1.1511, "step": 195 }, { "epoch": 0.014831069577390187, "grad_norm": 2.492928981781006, "learning_rate": 5.337367624810893e-06, "loss": 1.0107, "step": 196 }, { "epoch": 0.01490673829972381, "grad_norm": 5.297464370727539, "learning_rate": 5.364599092284418e-06, "loss": 1.0515, "step": 197 }, { "epoch": 0.014982407022057432, "grad_norm": 5.455732345581055, "learning_rate": 5.391830559757942e-06, "loss": 0.9973, "step": 198 }, { "epoch": 0.015058075744391056, "grad_norm": 10.293705940246582, "learning_rate": 5.419062027231468e-06, "loss": 1.0379, "step": 199 }, { "epoch": 0.015133744466724679, "grad_norm": 6.046285152435303, "learning_rate": 5.4462934947049925e-06, "loss": 1.0908, "step": 200 }, { "epoch": 0.015209413189058303, "grad_norm": 19.921154022216797, "learning_rate": 5.473524962178517e-06, "loss": 0.9654, "step": 201 }, { "epoch": 0.015285081911391926, "grad_norm": 3.6806235313415527, "learning_rate": 5.500756429652042e-06, "loss": 1.1749, "step": 202 }, { "epoch": 0.015360750633725549, "grad_norm": 17.536718368530273, "learning_rate": 5.5279878971255674e-06, "loss": 0.9876, "step": 203 }, { "epoch": 0.015436419356059173, "grad_norm": 6.348948955535889, "learning_rate": 5.555219364599093e-06, "loss": 1.1173, "step": 204 }, { "epoch": 0.015512088078392796, "grad_norm": 3.97938871383667, "learning_rate": 5.582450832072617e-06, "loss": 0.9842, "step": 205 }, { "epoch": 0.01558775680072642, "grad_norm": 22.651533126831055, "learning_rate": 5.609682299546142e-06, "loss": 1.2029, "step": 206 }, { "epoch": 0.015663425523060045, "grad_norm": 2.902076005935669, "learning_rate": 5.636913767019667e-06, "loss": 0.9287, "step": 207 }, { "epoch": 0.015739094245393666, "grad_norm": 4.252765655517578, "learning_rate": 5.664145234493193e-06, "loss": 1.1368, "step": 208 }, { "epoch": 0.01581476296772729, "grad_norm": 7.302497386932373, "learning_rate": 5.6913767019667165e-06, "loss": 0.909, "step": 209 }, { "epoch": 0.015890431690060915, "grad_norm": 3.428435802459717, "learning_rate": 5.718608169440242e-06, "loss": 0.8431, "step": 210 }, { "epoch": 0.015966100412394536, "grad_norm": 26.798280715942383, "learning_rate": 5.745839636913768e-06, "loss": 1.055, "step": 211 }, { "epoch": 0.01604176913472816, "grad_norm": 2.7646429538726807, "learning_rate": 5.773071104387292e-06, "loss": 0.9447, "step": 212 }, { "epoch": 0.016117437857061784, "grad_norm": 41.349822998046875, "learning_rate": 5.800302571860817e-06, "loss": 0.9701, "step": 213 }, { "epoch": 0.016193106579395405, "grad_norm": 3.406937599182129, "learning_rate": 5.827534039334342e-06, "loss": 0.8292, "step": 214 }, { "epoch": 0.01626877530172903, "grad_norm": 4.102505207061768, "learning_rate": 5.854765506807867e-06, "loss": 1.0225, "step": 215 }, { "epoch": 0.016344444024062654, "grad_norm": 3.140641689300537, "learning_rate": 5.881996974281391e-06, "loss": 1.0587, "step": 216 }, { "epoch": 0.01642011274639628, "grad_norm": 3.22949481010437, "learning_rate": 5.909228441754917e-06, "loss": 0.9173, "step": 217 }, { "epoch": 0.0164957814687299, "grad_norm": 3.503279209136963, "learning_rate": 5.936459909228442e-06, "loss": 1.0667, "step": 218 }, { "epoch": 0.016571450191063524, "grad_norm": 3.5009310245513916, "learning_rate": 5.963691376701967e-06, "loss": 0.9895, "step": 219 }, { "epoch": 0.01664711891339715, "grad_norm": 2.58160400390625, "learning_rate": 5.990922844175492e-06, "loss": 0.995, "step": 220 }, { "epoch": 0.01672278763573077, "grad_norm": 3.4536960124969482, "learning_rate": 6.018154311649016e-06, "loss": 0.965, "step": 221 }, { "epoch": 0.016798456358064394, "grad_norm": 4.148844242095947, "learning_rate": 6.045385779122542e-06, "loss": 1.1884, "step": 222 }, { "epoch": 0.01687412508039802, "grad_norm": 3.2756500244140625, "learning_rate": 6.0726172465960674e-06, "loss": 1.0833, "step": 223 }, { "epoch": 0.01694979380273164, "grad_norm": 11.904067993164062, "learning_rate": 6.099848714069591e-06, "loss": 0.9768, "step": 224 }, { "epoch": 0.017025462525065264, "grad_norm": 3.4661612510681152, "learning_rate": 6.127080181543117e-06, "loss": 1.0659, "step": 225 }, { "epoch": 0.017101131247398888, "grad_norm": 4.6939167976379395, "learning_rate": 6.1543116490166416e-06, "loss": 1.0258, "step": 226 }, { "epoch": 0.017176799969732513, "grad_norm": 2.9636013507843018, "learning_rate": 6.181543116490167e-06, "loss": 1.0982, "step": 227 }, { "epoch": 0.017252468692066134, "grad_norm": 4.7203826904296875, "learning_rate": 6.208774583963691e-06, "loss": 1.1271, "step": 228 }, { "epoch": 0.017328137414399758, "grad_norm": 16.543560028076172, "learning_rate": 6.2360060514372165e-06, "loss": 1.0417, "step": 229 }, { "epoch": 0.017403806136733382, "grad_norm": 3.9379353523254395, "learning_rate": 6.263237518910742e-06, "loss": 1.0619, "step": 230 }, { "epoch": 0.017479474859067003, "grad_norm": 3.562490701675415, "learning_rate": 6.290468986384266e-06, "loss": 0.8995, "step": 231 }, { "epoch": 0.017555143581400628, "grad_norm": 3.5367138385772705, "learning_rate": 6.3177004538577915e-06, "loss": 1.0685, "step": 232 }, { "epoch": 0.017630812303734252, "grad_norm": 4.708889961242676, "learning_rate": 6.344931921331316e-06, "loss": 0.9722, "step": 233 }, { "epoch": 0.017706481026067873, "grad_norm": 3.517542600631714, "learning_rate": 6.372163388804842e-06, "loss": 1.0182, "step": 234 }, { "epoch": 0.017782149748401498, "grad_norm": 5.070290565490723, "learning_rate": 6.399394856278366e-06, "loss": 0.9933, "step": 235 }, { "epoch": 0.017857818470735122, "grad_norm": 3.898589849472046, "learning_rate": 6.426626323751891e-06, "loss": 0.8593, "step": 236 }, { "epoch": 0.017933487193068746, "grad_norm": 3.1231884956359863, "learning_rate": 6.453857791225417e-06, "loss": 1.2586, "step": 237 }, { "epoch": 0.018009155915402367, "grad_norm": 4.742455005645752, "learning_rate": 6.481089258698941e-06, "loss": 1.0591, "step": 238 }, { "epoch": 0.018084824637735992, "grad_norm": 5.456655502319336, "learning_rate": 6.508320726172466e-06, "loss": 0.7356, "step": 239 }, { "epoch": 0.018160493360069616, "grad_norm": 3.905632495880127, "learning_rate": 6.535552193645991e-06, "loss": 0.881, "step": 240 }, { "epoch": 0.018236162082403237, "grad_norm": 2.6957733631134033, "learning_rate": 6.562783661119516e-06, "loss": 1.0049, "step": 241 }, { "epoch": 0.01831183080473686, "grad_norm": 3.1716785430908203, "learning_rate": 6.590015128593041e-06, "loss": 0.981, "step": 242 }, { "epoch": 0.018387499527070486, "grad_norm": 3.713045597076416, "learning_rate": 6.617246596066566e-06, "loss": 0.9511, "step": 243 }, { "epoch": 0.01846316824940411, "grad_norm": 6.078882694244385, "learning_rate": 6.644478063540091e-06, "loss": 0.8243, "step": 244 }, { "epoch": 0.01853883697173773, "grad_norm": 3.7608482837677, "learning_rate": 6.671709531013616e-06, "loss": 0.8829, "step": 245 }, { "epoch": 0.018614505694071356, "grad_norm": 6.1873321533203125, "learning_rate": 6.698940998487141e-06, "loss": 1.0227, "step": 246 }, { "epoch": 0.01869017441640498, "grad_norm": 3.6704089641571045, "learning_rate": 6.726172465960665e-06, "loss": 0.9248, "step": 247 }, { "epoch": 0.0187658431387386, "grad_norm": 3.1460700035095215, "learning_rate": 6.753403933434191e-06, "loss": 1.0313, "step": 248 }, { "epoch": 0.018841511861072226, "grad_norm": 3.855139970779419, "learning_rate": 6.780635400907716e-06, "loss": 0.9423, "step": 249 }, { "epoch": 0.01891718058340585, "grad_norm": 3.8225889205932617, "learning_rate": 6.80786686838124e-06, "loss": 0.9772, "step": 250 }, { "epoch": 0.01899284930573947, "grad_norm": 2.7261159420013428, "learning_rate": 6.835098335854766e-06, "loss": 0.9998, "step": 251 }, { "epoch": 0.019068518028073096, "grad_norm": 2.887666702270508, "learning_rate": 6.862329803328291e-06, "loss": 1.052, "step": 252 }, { "epoch": 0.01914418675040672, "grad_norm": 3.3819353580474854, "learning_rate": 6.889561270801816e-06, "loss": 0.9431, "step": 253 }, { "epoch": 0.019219855472740344, "grad_norm": 3.0159807205200195, "learning_rate": 6.91679273827534e-06, "loss": 0.9702, "step": 254 }, { "epoch": 0.019295524195073965, "grad_norm": 3.413630962371826, "learning_rate": 6.944024205748866e-06, "loss": 0.8845, "step": 255 }, { "epoch": 0.01937119291740759, "grad_norm": 5.078720569610596, "learning_rate": 6.97125567322239e-06, "loss": 0.9984, "step": 256 }, { "epoch": 0.019446861639741214, "grad_norm": 3.1097350120544434, "learning_rate": 6.998487140695916e-06, "loss": 1.0355, "step": 257 }, { "epoch": 0.019522530362074835, "grad_norm": 4.542701721191406, "learning_rate": 7.02571860816944e-06, "loss": 1.1622, "step": 258 }, { "epoch": 0.01959819908440846, "grad_norm": 10.559003829956055, "learning_rate": 7.052950075642965e-06, "loss": 1.2439, "step": 259 }, { "epoch": 0.019673867806742084, "grad_norm": 5.9329609870910645, "learning_rate": 7.080181543116491e-06, "loss": 1.0442, "step": 260 }, { "epoch": 0.019749536529075705, "grad_norm": 3.9243202209472656, "learning_rate": 7.1074130105900155e-06, "loss": 0.9679, "step": 261 }, { "epoch": 0.01982520525140933, "grad_norm": 4.9199910163879395, "learning_rate": 7.13464447806354e-06, "loss": 1.1109, "step": 262 }, { "epoch": 0.019900873973742954, "grad_norm": 2.8583781719207764, "learning_rate": 7.161875945537065e-06, "loss": 1.0152, "step": 263 }, { "epoch": 0.019976542696076578, "grad_norm": 3.566678524017334, "learning_rate": 7.1891074130105905e-06, "loss": 1.0698, "step": 264 }, { "epoch": 0.0200522114184102, "grad_norm": 4.623547554016113, "learning_rate": 7.216338880484114e-06, "loss": 1.0657, "step": 265 }, { "epoch": 0.020127880140743824, "grad_norm": 3.9932198524475098, "learning_rate": 7.24357034795764e-06, "loss": 0.9034, "step": 266 }, { "epoch": 0.020203548863077448, "grad_norm": 2.8435165882110596, "learning_rate": 7.2708018154311654e-06, "loss": 0.8764, "step": 267 }, { "epoch": 0.02027921758541107, "grad_norm": 3.262338161468506, "learning_rate": 7.29803328290469e-06, "loss": 1.1094, "step": 268 }, { "epoch": 0.020354886307744693, "grad_norm": 8.468903541564941, "learning_rate": 7.325264750378215e-06, "loss": 0.9228, "step": 269 }, { "epoch": 0.020430555030078318, "grad_norm": 3.0359139442443848, "learning_rate": 7.3524962178517395e-06, "loss": 0.8587, "step": 270 }, { "epoch": 0.02050622375241194, "grad_norm": 8.242533683776855, "learning_rate": 7.379727685325265e-06, "loss": 1.0187, "step": 271 }, { "epoch": 0.020581892474745563, "grad_norm": 2.938859224319458, "learning_rate": 7.406959152798791e-06, "loss": 1.0385, "step": 272 }, { "epoch": 0.020657561197079188, "grad_norm": 3.197413921356201, "learning_rate": 7.4341906202723145e-06, "loss": 0.9181, "step": 273 }, { "epoch": 0.020733229919412812, "grad_norm": 5.897491931915283, "learning_rate": 7.46142208774584e-06, "loss": 1.0107, "step": 274 }, { "epoch": 0.020808898641746433, "grad_norm": 3.680340528488159, "learning_rate": 7.488653555219365e-06, "loss": 1.0152, "step": 275 }, { "epoch": 0.020884567364080057, "grad_norm": 6.313503265380859, "learning_rate": 7.51588502269289e-06, "loss": 1.0487, "step": 276 }, { "epoch": 0.020960236086413682, "grad_norm": 3.8063981533050537, "learning_rate": 7.543116490166414e-06, "loss": 1.0217, "step": 277 }, { "epoch": 0.021035904808747303, "grad_norm": 9.276975631713867, "learning_rate": 7.57034795763994e-06, "loss": 0.9587, "step": 278 }, { "epoch": 0.021111573531080927, "grad_norm": 4.729788303375244, "learning_rate": 7.597579425113465e-06, "loss": 0.8946, "step": 279 }, { "epoch": 0.02118724225341455, "grad_norm": 3.3282487392425537, "learning_rate": 7.624810892586989e-06, "loss": 1.1086, "step": 280 }, { "epoch": 0.021262910975748176, "grad_norm": 2.672250509262085, "learning_rate": 7.652042360060515e-06, "loss": 0.8411, "step": 281 }, { "epoch": 0.021338579698081797, "grad_norm": 4.054312705993652, "learning_rate": 7.67927382753404e-06, "loss": 1.0374, "step": 282 }, { "epoch": 0.02141424842041542, "grad_norm": 4.272651195526123, "learning_rate": 7.706505295007564e-06, "loss": 1.039, "step": 283 }, { "epoch": 0.021489917142749046, "grad_norm": 3.3986735343933105, "learning_rate": 7.733736762481089e-06, "loss": 0.9995, "step": 284 }, { "epoch": 0.021565585865082667, "grad_norm": 4.5488481521606445, "learning_rate": 7.760968229954613e-06, "loss": 1.1762, "step": 285 }, { "epoch": 0.02164125458741629, "grad_norm": 4.396289348602295, "learning_rate": 7.78819969742814e-06, "loss": 0.9458, "step": 286 }, { "epoch": 0.021716923309749916, "grad_norm": 4.582161903381348, "learning_rate": 7.815431164901665e-06, "loss": 1.0327, "step": 287 }, { "epoch": 0.021792592032083537, "grad_norm": 4.647852420806885, "learning_rate": 7.84266263237519e-06, "loss": 1.0392, "step": 288 }, { "epoch": 0.02186826075441716, "grad_norm": 5.218382358551025, "learning_rate": 7.869894099848714e-06, "loss": 0.9462, "step": 289 }, { "epoch": 0.021943929476750786, "grad_norm": 5.8986029624938965, "learning_rate": 7.897125567322239e-06, "loss": 0.975, "step": 290 }, { "epoch": 0.02201959819908441, "grad_norm": 3.1991119384765625, "learning_rate": 7.924357034795765e-06, "loss": 0.8724, "step": 291 }, { "epoch": 0.02209526692141803, "grad_norm": 3.476820230484009, "learning_rate": 7.951588502269288e-06, "loss": 0.9873, "step": 292 }, { "epoch": 0.022170935643751655, "grad_norm": 3.6004443168640137, "learning_rate": 7.978819969742815e-06, "loss": 0.8335, "step": 293 }, { "epoch": 0.02224660436608528, "grad_norm": 2.7738335132598877, "learning_rate": 8.00605143721634e-06, "loss": 1.0594, "step": 294 }, { "epoch": 0.0223222730884189, "grad_norm": 4.626110076904297, "learning_rate": 8.033282904689864e-06, "loss": 1.0716, "step": 295 }, { "epoch": 0.022397941810752525, "grad_norm": 4.319602966308594, "learning_rate": 8.060514372163389e-06, "loss": 0.9551, "step": 296 }, { "epoch": 0.02247361053308615, "grad_norm": 4.208806991577148, "learning_rate": 8.087745839636913e-06, "loss": 0.8445, "step": 297 }, { "epoch": 0.02254927925541977, "grad_norm": 4.937840938568115, "learning_rate": 8.11497730711044e-06, "loss": 1.0403, "step": 298 }, { "epoch": 0.022624947977753395, "grad_norm": 6.741675853729248, "learning_rate": 8.142208774583963e-06, "loss": 0.9795, "step": 299 }, { "epoch": 0.02270061670008702, "grad_norm": 4.731930255889893, "learning_rate": 8.169440242057489e-06, "loss": 0.9626, "step": 300 }, { "epoch": 0.022776285422420644, "grad_norm": 3.032463550567627, "learning_rate": 8.196671709531014e-06, "loss": 0.8131, "step": 301 }, { "epoch": 0.022851954144754265, "grad_norm": 3.7094199657440186, "learning_rate": 8.223903177004539e-06, "loss": 0.9966, "step": 302 }, { "epoch": 0.02292762286708789, "grad_norm": 4.705551624298096, "learning_rate": 8.251134644478063e-06, "loss": 1.0382, "step": 303 }, { "epoch": 0.023003291589421514, "grad_norm": 4.8940510749816895, "learning_rate": 8.278366111951588e-06, "loss": 0.9043, "step": 304 }, { "epoch": 0.023078960311755135, "grad_norm": 5.167042255401611, "learning_rate": 8.305597579425114e-06, "loss": 0.7667, "step": 305 }, { "epoch": 0.02315462903408876, "grad_norm": 5.086777687072754, "learning_rate": 8.332829046898639e-06, "loss": 0.8992, "step": 306 }, { "epoch": 0.023230297756422384, "grad_norm": 12.379859924316406, "learning_rate": 8.360060514372164e-06, "loss": 1.0104, "step": 307 }, { "epoch": 0.023305966478756008, "grad_norm": 5.195709228515625, "learning_rate": 8.387291981845688e-06, "loss": 0.9138, "step": 308 }, { "epoch": 0.02338163520108963, "grad_norm": 4.756507396697998, "learning_rate": 8.414523449319213e-06, "loss": 0.9112, "step": 309 }, { "epoch": 0.023457303923423253, "grad_norm": 4.002053737640381, "learning_rate": 8.441754916792738e-06, "loss": 0.8504, "step": 310 }, { "epoch": 0.023532972645756878, "grad_norm": 4.4022064208984375, "learning_rate": 8.468986384266263e-06, "loss": 0.9997, "step": 311 }, { "epoch": 0.0236086413680905, "grad_norm": 6.589466571807861, "learning_rate": 8.496217851739789e-06, "loss": 0.9104, "step": 312 }, { "epoch": 0.023684310090424123, "grad_norm": 4.6508989334106445, "learning_rate": 8.523449319213314e-06, "loss": 0.9674, "step": 313 }, { "epoch": 0.023759978812757748, "grad_norm": 4.066075325012207, "learning_rate": 8.550680786686838e-06, "loss": 0.8831, "step": 314 }, { "epoch": 0.02383564753509137, "grad_norm": 8.613035202026367, "learning_rate": 8.577912254160363e-06, "loss": 0.8986, "step": 315 }, { "epoch": 0.023911316257424993, "grad_norm": 3.7301228046417236, "learning_rate": 8.605143721633888e-06, "loss": 0.7915, "step": 316 }, { "epoch": 0.023986984979758617, "grad_norm": 8.060126304626465, "learning_rate": 8.632375189107414e-06, "loss": 0.9269, "step": 317 }, { "epoch": 0.024062653702092242, "grad_norm": 4.43103551864624, "learning_rate": 8.659606656580937e-06, "loss": 0.9495, "step": 318 }, { "epoch": 0.024138322424425863, "grad_norm": 3.900829553604126, "learning_rate": 8.686838124054464e-06, "loss": 0.8808, "step": 319 }, { "epoch": 0.024213991146759487, "grad_norm": 4.360715866088867, "learning_rate": 8.714069591527988e-06, "loss": 1.0735, "step": 320 }, { "epoch": 0.02428965986909311, "grad_norm": 5.8079633712768555, "learning_rate": 8.741301059001513e-06, "loss": 1.0089, "step": 321 }, { "epoch": 0.024365328591426733, "grad_norm": 31.655710220336914, "learning_rate": 8.768532526475038e-06, "loss": 1.0929, "step": 322 }, { "epoch": 0.024440997313760357, "grad_norm": 5.170853137969971, "learning_rate": 8.795763993948562e-06, "loss": 1.066, "step": 323 }, { "epoch": 0.02451666603609398, "grad_norm": 3.7530641555786133, "learning_rate": 8.822995461422089e-06, "loss": 0.8663, "step": 324 }, { "epoch": 0.024592334758427602, "grad_norm": 4.36927604675293, "learning_rate": 8.850226928895612e-06, "loss": 0.8544, "step": 325 }, { "epoch": 0.024668003480761227, "grad_norm": 3.9863076210021973, "learning_rate": 8.877458396369138e-06, "loss": 0.935, "step": 326 }, { "epoch": 0.02474367220309485, "grad_norm": 3.438127040863037, "learning_rate": 8.904689863842663e-06, "loss": 0.9287, "step": 327 }, { "epoch": 0.024819340925428476, "grad_norm": 5.561075210571289, "learning_rate": 8.931921331316188e-06, "loss": 1.0388, "step": 328 }, { "epoch": 0.024895009647762097, "grad_norm": 4.477010726928711, "learning_rate": 8.959152798789712e-06, "loss": 0.9875, "step": 329 }, { "epoch": 0.02497067837009572, "grad_norm": 4.917139053344727, "learning_rate": 8.986384266263237e-06, "loss": 0.8978, "step": 330 }, { "epoch": 0.025046347092429345, "grad_norm": 4.206781387329102, "learning_rate": 9.013615733736763e-06, "loss": 0.96, "step": 331 }, { "epoch": 0.025122015814762966, "grad_norm": 2.9883878231048584, "learning_rate": 9.040847201210288e-06, "loss": 0.857, "step": 332 }, { "epoch": 0.02519768453709659, "grad_norm": 9.420825958251953, "learning_rate": 9.068078668683813e-06, "loss": 1.0236, "step": 333 }, { "epoch": 0.025273353259430215, "grad_norm": 4.354033470153809, "learning_rate": 9.09531013615734e-06, "loss": 0.9295, "step": 334 }, { "epoch": 0.025349021981763836, "grad_norm": 4.949868679046631, "learning_rate": 9.122541603630862e-06, "loss": 0.7853, "step": 335 }, { "epoch": 0.02542469070409746, "grad_norm": 4.656820297241211, "learning_rate": 9.149773071104387e-06, "loss": 1.096, "step": 336 }, { "epoch": 0.025500359426431085, "grad_norm": 4.696194171905518, "learning_rate": 9.177004538577912e-06, "loss": 0.8964, "step": 337 }, { "epoch": 0.02557602814876471, "grad_norm": 4.845102310180664, "learning_rate": 9.204236006051438e-06, "loss": 1.1736, "step": 338 }, { "epoch": 0.02565169687109833, "grad_norm": 6.742333889007568, "learning_rate": 9.231467473524963e-06, "loss": 0.9915, "step": 339 }, { "epoch": 0.025727365593431955, "grad_norm": 4.6622538566589355, "learning_rate": 9.258698940998487e-06, "loss": 1.0591, "step": 340 }, { "epoch": 0.02580303431576558, "grad_norm": 4.871918201446533, "learning_rate": 9.285930408472014e-06, "loss": 1.0679, "step": 341 }, { "epoch": 0.0258787030380992, "grad_norm": 3.380262851715088, "learning_rate": 9.313161875945537e-06, "loss": 0.7519, "step": 342 }, { "epoch": 0.025954371760432825, "grad_norm": 4.895992755889893, "learning_rate": 9.340393343419062e-06, "loss": 0.9666, "step": 343 }, { "epoch": 0.02603004048276645, "grad_norm": 5.649062633514404, "learning_rate": 9.367624810892586e-06, "loss": 0.9477, "step": 344 }, { "epoch": 0.026105709205100074, "grad_norm": 5.298853397369385, "learning_rate": 9.394856278366113e-06, "loss": 0.8321, "step": 345 }, { "epoch": 0.026181377927433695, "grad_norm": 15.001054763793945, "learning_rate": 9.422087745839637e-06, "loss": 0.8249, "step": 346 }, { "epoch": 0.02625704664976732, "grad_norm": 7.537627220153809, "learning_rate": 9.449319213313162e-06, "loss": 0.8955, "step": 347 }, { "epoch": 0.026332715372100943, "grad_norm": 6.6606245040893555, "learning_rate": 9.476550680786688e-06, "loss": 0.9237, "step": 348 }, { "epoch": 0.026408384094434564, "grad_norm": 7.1370673179626465, "learning_rate": 9.503782148260213e-06, "loss": 1.0608, "step": 349 }, { "epoch": 0.02648405281676819, "grad_norm": 4.019873142242432, "learning_rate": 9.531013615733736e-06, "loss": 0.9431, "step": 350 }, { "epoch": 0.026559721539101813, "grad_norm": 3.8298895359039307, "learning_rate": 9.558245083207261e-06, "loss": 0.9514, "step": 351 }, { "epoch": 0.026635390261435434, "grad_norm": 3.851069688796997, "learning_rate": 9.585476550680787e-06, "loss": 1.044, "step": 352 }, { "epoch": 0.02671105898376906, "grad_norm": 5.827500343322754, "learning_rate": 9.612708018154312e-06, "loss": 0.8259, "step": 353 }, { "epoch": 0.026786727706102683, "grad_norm": 4.617655277252197, "learning_rate": 9.639939485627837e-06, "loss": 0.8898, "step": 354 }, { "epoch": 0.026862396428436307, "grad_norm": 7.916740417480469, "learning_rate": 9.667170953101363e-06, "loss": 0.8235, "step": 355 }, { "epoch": 0.02693806515076993, "grad_norm": 4.243466377258301, "learning_rate": 9.694402420574888e-06, "loss": 0.9921, "step": 356 }, { "epoch": 0.027013733873103553, "grad_norm": 6.203061580657959, "learning_rate": 9.72163388804841e-06, "loss": 1.0662, "step": 357 }, { "epoch": 0.027089402595437177, "grad_norm": 4.784158229827881, "learning_rate": 9.748865355521936e-06, "loss": 0.8327, "step": 358 }, { "epoch": 0.027165071317770798, "grad_norm": 4.381805896759033, "learning_rate": 9.776096822995462e-06, "loss": 1.0241, "step": 359 }, { "epoch": 0.027240740040104423, "grad_norm": 4.59453821182251, "learning_rate": 9.803328290468987e-06, "loss": 1.1991, "step": 360 }, { "epoch": 0.027316408762438047, "grad_norm": 4.59682035446167, "learning_rate": 9.830559757942511e-06, "loss": 0.9785, "step": 361 }, { "epoch": 0.027392077484771668, "grad_norm": 3.2296361923217773, "learning_rate": 9.857791225416038e-06, "loss": 0.8848, "step": 362 }, { "epoch": 0.027467746207105292, "grad_norm": 4.408949375152588, "learning_rate": 9.885022692889562e-06, "loss": 1.1622, "step": 363 }, { "epoch": 0.027543414929438917, "grad_norm": 4.724997520446777, "learning_rate": 9.912254160363087e-06, "loss": 0.7884, "step": 364 }, { "epoch": 0.02761908365177254, "grad_norm": 3.5149667263031006, "learning_rate": 9.93948562783661e-06, "loss": 0.9649, "step": 365 }, { "epoch": 0.027694752374106162, "grad_norm": 3.3947033882141113, "learning_rate": 9.966717095310137e-06, "loss": 0.8381, "step": 366 }, { "epoch": 0.027770421096439787, "grad_norm": 7.352261066436768, "learning_rate": 9.993948562783661e-06, "loss": 0.8653, "step": 367 }, { "epoch": 0.02784608981877341, "grad_norm": 5.134012699127197, "learning_rate": 1.0021180030257186e-05, "loss": 0.8702, "step": 368 }, { "epoch": 0.027921758541107032, "grad_norm": 4.905878067016602, "learning_rate": 1.0048411497730712e-05, "loss": 0.9004, "step": 369 }, { "epoch": 0.027997427263440657, "grad_norm": 6.044192790985107, "learning_rate": 1.0075642965204237e-05, "loss": 1.0201, "step": 370 }, { "epoch": 0.02807309598577428, "grad_norm": 4.332431316375732, "learning_rate": 1.0102874432677762e-05, "loss": 0.9866, "step": 371 }, { "epoch": 0.028148764708107902, "grad_norm": 5.870851516723633, "learning_rate": 1.0130105900151285e-05, "loss": 0.8737, "step": 372 }, { "epoch": 0.028224433430441526, "grad_norm": 4.059363842010498, "learning_rate": 1.0157337367624811e-05, "loss": 0.851, "step": 373 }, { "epoch": 0.02830010215277515, "grad_norm": 5.465144634246826, "learning_rate": 1.0184568835098336e-05, "loss": 1.0343, "step": 374 }, { "epoch": 0.028375770875108775, "grad_norm": 4.673175811767578, "learning_rate": 1.021180030257186e-05, "loss": 0.9237, "step": 375 }, { "epoch": 0.028451439597442396, "grad_norm": 3.5958478450775146, "learning_rate": 1.0239031770045387e-05, "loss": 0.9103, "step": 376 }, { "epoch": 0.02852710831977602, "grad_norm": 9.658095359802246, "learning_rate": 1.0266263237518912e-05, "loss": 1.0272, "step": 377 }, { "epoch": 0.028602777042109645, "grad_norm": 4.175169944763184, "learning_rate": 1.0293494704992436e-05, "loss": 0.8041, "step": 378 }, { "epoch": 0.028678445764443266, "grad_norm": 3.949751853942871, "learning_rate": 1.0320726172465961e-05, "loss": 0.8977, "step": 379 }, { "epoch": 0.02875411448677689, "grad_norm": 4.572116374969482, "learning_rate": 1.0347957639939486e-05, "loss": 0.8524, "step": 380 }, { "epoch": 0.028829783209110515, "grad_norm": 4.106285095214844, "learning_rate": 1.037518910741301e-05, "loss": 1.0366, "step": 381 }, { "epoch": 0.02890545193144414, "grad_norm": 3.8881635665893555, "learning_rate": 1.0402420574886535e-05, "loss": 0.8127, "step": 382 }, { "epoch": 0.02898112065377776, "grad_norm": 6.574056625366211, "learning_rate": 1.0429652042360062e-05, "loss": 0.805, "step": 383 }, { "epoch": 0.029056789376111385, "grad_norm": 4.1317057609558105, "learning_rate": 1.0456883509833586e-05, "loss": 1.1139, "step": 384 }, { "epoch": 0.02913245809844501, "grad_norm": 6.96987771987915, "learning_rate": 1.0484114977307111e-05, "loss": 1.0626, "step": 385 }, { "epoch": 0.02920812682077863, "grad_norm": 5.030163764953613, "learning_rate": 1.0511346444780636e-05, "loss": 0.7772, "step": 386 }, { "epoch": 0.029283795543112254, "grad_norm": 8.177231788635254, "learning_rate": 1.053857791225416e-05, "loss": 0.9079, "step": 387 }, { "epoch": 0.02935946426544588, "grad_norm": 4.530209541320801, "learning_rate": 1.0565809379727685e-05, "loss": 0.8447, "step": 388 }, { "epoch": 0.0294351329877795, "grad_norm": 12.534080505371094, "learning_rate": 1.059304084720121e-05, "loss": 0.9899, "step": 389 }, { "epoch": 0.029510801710113124, "grad_norm": 4.852429389953613, "learning_rate": 1.0620272314674735e-05, "loss": 1.0398, "step": 390 }, { "epoch": 0.02958647043244675, "grad_norm": 4.2400054931640625, "learning_rate": 1.0647503782148261e-05, "loss": 0.8936, "step": 391 }, { "epoch": 0.029662139154780373, "grad_norm": 5.989685535430908, "learning_rate": 1.0674735249621786e-05, "loss": 0.8694, "step": 392 }, { "epoch": 0.029737807877113994, "grad_norm": 4.045843124389648, "learning_rate": 1.070196671709531e-05, "loss": 0.9632, "step": 393 }, { "epoch": 0.02981347659944762, "grad_norm": 4.707093238830566, "learning_rate": 1.0729198184568837e-05, "loss": 0.8046, "step": 394 }, { "epoch": 0.029889145321781243, "grad_norm": 5.141930103302002, "learning_rate": 1.075642965204236e-05, "loss": 0.9606, "step": 395 }, { "epoch": 0.029964814044114864, "grad_norm": 9.92322826385498, "learning_rate": 1.0783661119515884e-05, "loss": 0.9988, "step": 396 }, { "epoch": 0.03004048276644849, "grad_norm": 5.097169399261475, "learning_rate": 1.0810892586989409e-05, "loss": 0.8988, "step": 397 }, { "epoch": 0.030116151488782113, "grad_norm": 4.876684665679932, "learning_rate": 1.0838124054462936e-05, "loss": 0.874, "step": 398 }, { "epoch": 0.030191820211115734, "grad_norm": 4.846562385559082, "learning_rate": 1.086535552193646e-05, "loss": 0.8658, "step": 399 }, { "epoch": 0.030267488933449358, "grad_norm": 5.538702011108398, "learning_rate": 1.0892586989409985e-05, "loss": 1.0144, "step": 400 }, { "epoch": 0.030343157655782983, "grad_norm": 4.698038578033447, "learning_rate": 1.0919818456883511e-05, "loss": 0.9173, "step": 401 }, { "epoch": 0.030418826378116607, "grad_norm": 6.059201717376709, "learning_rate": 1.0947049924357034e-05, "loss": 0.7875, "step": 402 }, { "epoch": 0.030494495100450228, "grad_norm": 6.118393421173096, "learning_rate": 1.0974281391830559e-05, "loss": 0.9391, "step": 403 }, { "epoch": 0.030570163822783852, "grad_norm": 4.088007926940918, "learning_rate": 1.1001512859304084e-05, "loss": 0.7948, "step": 404 }, { "epoch": 0.030645832545117477, "grad_norm": 4.4451799392700195, "learning_rate": 1.102874432677761e-05, "loss": 0.9371, "step": 405 }, { "epoch": 0.030721501267451098, "grad_norm": 4.529284477233887, "learning_rate": 1.1055975794251135e-05, "loss": 0.9817, "step": 406 }, { "epoch": 0.030797169989784722, "grad_norm": 7.541872978210449, "learning_rate": 1.108320726172466e-05, "loss": 0.8889, "step": 407 }, { "epoch": 0.030872838712118347, "grad_norm": 3.850817918777466, "learning_rate": 1.1110438729198186e-05, "loss": 0.8753, "step": 408 }, { "epoch": 0.030948507434451968, "grad_norm": 3.5445756912231445, "learning_rate": 1.113767019667171e-05, "loss": 0.8393, "step": 409 }, { "epoch": 0.031024176156785592, "grad_norm": 5.169709205627441, "learning_rate": 1.1164901664145234e-05, "loss": 0.9982, "step": 410 }, { "epoch": 0.031099844879119216, "grad_norm": 3.5694003105163574, "learning_rate": 1.1192133131618758e-05, "loss": 0.7693, "step": 411 }, { "epoch": 0.03117551360145284, "grad_norm": 5.7016921043396, "learning_rate": 1.1219364599092285e-05, "loss": 0.8983, "step": 412 }, { "epoch": 0.031251182323786465, "grad_norm": 5.174305438995361, "learning_rate": 1.124659606656581e-05, "loss": 0.9126, "step": 413 }, { "epoch": 0.03132685104612009, "grad_norm": 4.78248929977417, "learning_rate": 1.1273827534039334e-05, "loss": 1.0237, "step": 414 }, { "epoch": 0.03140251976845371, "grad_norm": 4.276739120483398, "learning_rate": 1.130105900151286e-05, "loss": 0.953, "step": 415 }, { "epoch": 0.03147818849078733, "grad_norm": 5.136653900146484, "learning_rate": 1.1328290468986385e-05, "loss": 0.8138, "step": 416 }, { "epoch": 0.031553857213120956, "grad_norm": 3.566028356552124, "learning_rate": 1.1355521936459908e-05, "loss": 1.0327, "step": 417 }, { "epoch": 0.03162952593545458, "grad_norm": 3.272423267364502, "learning_rate": 1.1382753403933433e-05, "loss": 1.0262, "step": 418 }, { "epoch": 0.031705194657788205, "grad_norm": 4.595939636230469, "learning_rate": 1.140998487140696e-05, "loss": 0.8515, "step": 419 }, { "epoch": 0.03178086338012183, "grad_norm": 3.546163320541382, "learning_rate": 1.1437216338880484e-05, "loss": 0.7262, "step": 420 }, { "epoch": 0.03185653210245545, "grad_norm": 4.943700313568115, "learning_rate": 1.1464447806354009e-05, "loss": 0.9381, "step": 421 }, { "epoch": 0.03193220082478907, "grad_norm": 5.774724960327148, "learning_rate": 1.1491679273827535e-05, "loss": 0.9081, "step": 422 }, { "epoch": 0.032007869547122696, "grad_norm": 4.097910404205322, "learning_rate": 1.151891074130106e-05, "loss": 0.8876, "step": 423 }, { "epoch": 0.03208353826945632, "grad_norm": 4.992226600646973, "learning_rate": 1.1546142208774585e-05, "loss": 0.8628, "step": 424 }, { "epoch": 0.032159206991789945, "grad_norm": 4.852366924285889, "learning_rate": 1.1573373676248108e-05, "loss": 0.9476, "step": 425 }, { "epoch": 0.03223487571412357, "grad_norm": 5.32084321975708, "learning_rate": 1.1600605143721634e-05, "loss": 0.8845, "step": 426 }, { "epoch": 0.03231054443645719, "grad_norm": 5.613223552703857, "learning_rate": 1.1627836611195159e-05, "loss": 0.756, "step": 427 }, { "epoch": 0.03238621315879081, "grad_norm": 4.22434139251709, "learning_rate": 1.1655068078668683e-05, "loss": 0.9494, "step": 428 }, { "epoch": 0.032461881881124435, "grad_norm": 4.021113395690918, "learning_rate": 1.168229954614221e-05, "loss": 0.7561, "step": 429 }, { "epoch": 0.03253755060345806, "grad_norm": 4.726623058319092, "learning_rate": 1.1709531013615735e-05, "loss": 0.9944, "step": 430 }, { "epoch": 0.032613219325791684, "grad_norm": 5.192655563354492, "learning_rate": 1.173676248108926e-05, "loss": 0.9211, "step": 431 }, { "epoch": 0.03268888804812531, "grad_norm": 3.9181580543518066, "learning_rate": 1.1763993948562782e-05, "loss": 0.9154, "step": 432 }, { "epoch": 0.03276455677045893, "grad_norm": 6.171100616455078, "learning_rate": 1.1791225416036309e-05, "loss": 0.9509, "step": 433 }, { "epoch": 0.03284022549279256, "grad_norm": 3.8820559978485107, "learning_rate": 1.1818456883509833e-05, "loss": 1.0268, "step": 434 }, { "epoch": 0.032915894215126175, "grad_norm": 4.744935512542725, "learning_rate": 1.1845688350983358e-05, "loss": 0.9251, "step": 435 }, { "epoch": 0.0329915629374598, "grad_norm": 3.204756736755371, "learning_rate": 1.1872919818456884e-05, "loss": 0.8481, "step": 436 }, { "epoch": 0.033067231659793424, "grad_norm": 4.440789699554443, "learning_rate": 1.190015128593041e-05, "loss": 1.072, "step": 437 }, { "epoch": 0.03314290038212705, "grad_norm": 4.594890594482422, "learning_rate": 1.1927382753403934e-05, "loss": 0.8416, "step": 438 }, { "epoch": 0.03321856910446067, "grad_norm": 8.748790740966797, "learning_rate": 1.195461422087746e-05, "loss": 0.7003, "step": 439 }, { "epoch": 0.0332942378267943, "grad_norm": 6.574450969696045, "learning_rate": 1.1981845688350983e-05, "loss": 0.9899, "step": 440 }, { "epoch": 0.033369906549127915, "grad_norm": 3.3959763050079346, "learning_rate": 1.2009077155824508e-05, "loss": 0.8483, "step": 441 }, { "epoch": 0.03344557527146154, "grad_norm": 9.578702926635742, "learning_rate": 1.2036308623298033e-05, "loss": 0.6854, "step": 442 }, { "epoch": 0.03352124399379516, "grad_norm": 10.351158142089844, "learning_rate": 1.2063540090771559e-05, "loss": 0.9046, "step": 443 }, { "epoch": 0.03359691271612879, "grad_norm": 3.339411497116089, "learning_rate": 1.2090771558245084e-05, "loss": 0.8759, "step": 444 }, { "epoch": 0.03367258143846241, "grad_norm": 3.9380717277526855, "learning_rate": 1.2118003025718608e-05, "loss": 0.869, "step": 445 }, { "epoch": 0.03374825016079604, "grad_norm": 3.6196255683898926, "learning_rate": 1.2145234493192135e-05, "loss": 0.853, "step": 446 }, { "epoch": 0.03382391888312966, "grad_norm": 3.6324236392974854, "learning_rate": 1.2172465960665658e-05, "loss": 0.9892, "step": 447 }, { "epoch": 0.03389958760546328, "grad_norm": 4.3121185302734375, "learning_rate": 1.2199697428139183e-05, "loss": 0.8917, "step": 448 }, { "epoch": 0.0339752563277969, "grad_norm": 6.199253559112549, "learning_rate": 1.2226928895612707e-05, "loss": 0.8886, "step": 449 }, { "epoch": 0.03405092505013053, "grad_norm": 5.536099433898926, "learning_rate": 1.2254160363086234e-05, "loss": 0.8908, "step": 450 }, { "epoch": 0.03412659377246415, "grad_norm": 4.678923606872559, "learning_rate": 1.2281391830559758e-05, "loss": 1.0336, "step": 451 }, { "epoch": 0.034202262494797776, "grad_norm": 5.405990123748779, "learning_rate": 1.2308623298033283e-05, "loss": 0.8555, "step": 452 }, { "epoch": 0.0342779312171314, "grad_norm": 5.1637749671936035, "learning_rate": 1.233585476550681e-05, "loss": 0.8824, "step": 453 }, { "epoch": 0.034353599939465025, "grad_norm": 8.398664474487305, "learning_rate": 1.2363086232980334e-05, "loss": 0.993, "step": 454 }, { "epoch": 0.03442926866179864, "grad_norm": 4.705966472625732, "learning_rate": 1.2390317700453857e-05, "loss": 0.9114, "step": 455 }, { "epoch": 0.03450493738413227, "grad_norm": 4.132481575012207, "learning_rate": 1.2417549167927382e-05, "loss": 1.108, "step": 456 }, { "epoch": 0.03458060610646589, "grad_norm": 6.405203342437744, "learning_rate": 1.2444780635400908e-05, "loss": 1.0141, "step": 457 }, { "epoch": 0.034656274828799516, "grad_norm": 4.2582597732543945, "learning_rate": 1.2472012102874433e-05, "loss": 0.8596, "step": 458 }, { "epoch": 0.03473194355113314, "grad_norm": 4.002652168273926, "learning_rate": 1.2499243570347958e-05, "loss": 0.9137, "step": 459 }, { "epoch": 0.034807612273466765, "grad_norm": 4.454577445983887, "learning_rate": 1.2526475037821484e-05, "loss": 0.8705, "step": 460 }, { "epoch": 0.03488328099580039, "grad_norm": 4.909870147705078, "learning_rate": 1.2553706505295009e-05, "loss": 1.1714, "step": 461 }, { "epoch": 0.03495894971813401, "grad_norm": 7.202528953552246, "learning_rate": 1.2580937972768532e-05, "loss": 0.9013, "step": 462 }, { "epoch": 0.03503461844046763, "grad_norm": 4.110122203826904, "learning_rate": 1.2608169440242057e-05, "loss": 0.8812, "step": 463 }, { "epoch": 0.035110287162801256, "grad_norm": 3.5475730895996094, "learning_rate": 1.2635400907715583e-05, "loss": 0.9704, "step": 464 }, { "epoch": 0.03518595588513488, "grad_norm": 3.4889214038848877, "learning_rate": 1.2662632375189108e-05, "loss": 0.9656, "step": 465 }, { "epoch": 0.035261624607468504, "grad_norm": 3.9124395847320557, "learning_rate": 1.2689863842662632e-05, "loss": 0.7515, "step": 466 }, { "epoch": 0.03533729332980213, "grad_norm": 6.498013496398926, "learning_rate": 1.2717095310136159e-05, "loss": 0.8926, "step": 467 }, { "epoch": 0.035412962052135746, "grad_norm": 3.9321653842926025, "learning_rate": 1.2744326777609683e-05, "loss": 0.7346, "step": 468 }, { "epoch": 0.03548863077446937, "grad_norm": 5.16299295425415, "learning_rate": 1.2771558245083208e-05, "loss": 0.8197, "step": 469 }, { "epoch": 0.035564299496802995, "grad_norm": 4.675112247467041, "learning_rate": 1.2798789712556731e-05, "loss": 0.8971, "step": 470 }, { "epoch": 0.03563996821913662, "grad_norm": 2.9948925971984863, "learning_rate": 1.2826021180030258e-05, "loss": 0.8531, "step": 471 }, { "epoch": 0.035715636941470244, "grad_norm": 4.1595354080200195, "learning_rate": 1.2853252647503782e-05, "loss": 0.8913, "step": 472 }, { "epoch": 0.03579130566380387, "grad_norm": 2.9230337142944336, "learning_rate": 1.2880484114977307e-05, "loss": 1.1192, "step": 473 }, { "epoch": 0.03586697438613749, "grad_norm": 6.0981831550598145, "learning_rate": 1.2907715582450833e-05, "loss": 0.8437, "step": 474 }, { "epoch": 0.03594264310847111, "grad_norm": 6.705804824829102, "learning_rate": 1.2934947049924358e-05, "loss": 0.9358, "step": 475 }, { "epoch": 0.036018311830804735, "grad_norm": 3.546778440475464, "learning_rate": 1.2962178517397883e-05, "loss": 1.0628, "step": 476 }, { "epoch": 0.03609398055313836, "grad_norm": 3.6508278846740723, "learning_rate": 1.2989409984871406e-05, "loss": 0.8347, "step": 477 }, { "epoch": 0.036169649275471984, "grad_norm": 5.718278884887695, "learning_rate": 1.3016641452344932e-05, "loss": 0.9513, "step": 478 }, { "epoch": 0.03624531799780561, "grad_norm": 9.246580123901367, "learning_rate": 1.3043872919818457e-05, "loss": 0.9638, "step": 479 }, { "epoch": 0.03632098672013923, "grad_norm": 5.03000545501709, "learning_rate": 1.3071104387291982e-05, "loss": 0.9532, "step": 480 }, { "epoch": 0.03639665544247286, "grad_norm": 4.656915187835693, "learning_rate": 1.3098335854765508e-05, "loss": 0.9497, "step": 481 }, { "epoch": 0.036472324164806474, "grad_norm": 4.1055731773376465, "learning_rate": 1.3125567322239033e-05, "loss": 0.782, "step": 482 }, { "epoch": 0.0365479928871401, "grad_norm": 3.7302215099334717, "learning_rate": 1.3152798789712557e-05, "loss": 0.8432, "step": 483 }, { "epoch": 0.03662366160947372, "grad_norm": 4.635787487030029, "learning_rate": 1.3180030257186082e-05, "loss": 0.857, "step": 484 }, { "epoch": 0.03669933033180735, "grad_norm": 4.681149482727051, "learning_rate": 1.3207261724659607e-05, "loss": 0.7984, "step": 485 }, { "epoch": 0.03677499905414097, "grad_norm": 6.412924289703369, "learning_rate": 1.3234493192133132e-05, "loss": 0.8818, "step": 486 }, { "epoch": 0.0368506677764746, "grad_norm": 3.8682901859283447, "learning_rate": 1.3261724659606656e-05, "loss": 0.7964, "step": 487 }, { "epoch": 0.03692633649880822, "grad_norm": 5.080738544464111, "learning_rate": 1.3288956127080183e-05, "loss": 0.8436, "step": 488 }, { "epoch": 0.03700200522114184, "grad_norm": 3.049335241317749, "learning_rate": 1.3316187594553707e-05, "loss": 0.9249, "step": 489 }, { "epoch": 0.03707767394347546, "grad_norm": 4.670293807983398, "learning_rate": 1.3343419062027232e-05, "loss": 0.729, "step": 490 }, { "epoch": 0.03715334266580909, "grad_norm": 4.936186790466309, "learning_rate": 1.3370650529500757e-05, "loss": 0.8145, "step": 491 }, { "epoch": 0.03722901138814271, "grad_norm": 2.468773365020752, "learning_rate": 1.3397881996974281e-05, "loss": 1.0231, "step": 492 }, { "epoch": 0.037304680110476336, "grad_norm": 3.9304311275482178, "learning_rate": 1.3425113464447806e-05, "loss": 0.9577, "step": 493 }, { "epoch": 0.03738034883280996, "grad_norm": 3.941254138946533, "learning_rate": 1.345234493192133e-05, "loss": 0.8663, "step": 494 }, { "epoch": 0.03745601755514358, "grad_norm": 3.897300958633423, "learning_rate": 1.3479576399394857e-05, "loss": 0.9065, "step": 495 }, { "epoch": 0.0375316862774772, "grad_norm": 5.854770183563232, "learning_rate": 1.3506807866868382e-05, "loss": 0.8533, "step": 496 }, { "epoch": 0.03760735499981083, "grad_norm": 5.508477210998535, "learning_rate": 1.3534039334341907e-05, "loss": 0.7829, "step": 497 }, { "epoch": 0.03768302372214445, "grad_norm": 3.432650566101074, "learning_rate": 1.3561270801815431e-05, "loss": 0.9835, "step": 498 }, { "epoch": 0.037758692444478076, "grad_norm": 4.13020133972168, "learning_rate": 1.3588502269288958e-05, "loss": 0.9426, "step": 499 }, { "epoch": 0.0378343611668117, "grad_norm": 3.107402801513672, "learning_rate": 1.361573373676248e-05, "loss": 0.7618, "step": 500 }, { "epoch": 0.037910029889145325, "grad_norm": 6.790006637573242, "learning_rate": 1.3642965204236005e-05, "loss": 1.0121, "step": 501 }, { "epoch": 0.03798569861147894, "grad_norm": 4.519580841064453, "learning_rate": 1.3670196671709532e-05, "loss": 0.887, "step": 502 }, { "epoch": 0.03806136733381257, "grad_norm": 4.3927903175354, "learning_rate": 1.3697428139183057e-05, "loss": 0.848, "step": 503 }, { "epoch": 0.03813703605614619, "grad_norm": 4.329632759094238, "learning_rate": 1.3724659606656581e-05, "loss": 0.8182, "step": 504 }, { "epoch": 0.038212704778479815, "grad_norm": 5.273471355438232, "learning_rate": 1.3751891074130106e-05, "loss": 0.9141, "step": 505 }, { "epoch": 0.03828837350081344, "grad_norm": 3.8324403762817383, "learning_rate": 1.3779122541603632e-05, "loss": 0.9766, "step": 506 }, { "epoch": 0.038364042223147064, "grad_norm": 3.876749038696289, "learning_rate": 1.3806354009077157e-05, "loss": 0.8484, "step": 507 }, { "epoch": 0.03843971094548069, "grad_norm": 4.648043155670166, "learning_rate": 1.383358547655068e-05, "loss": 0.9291, "step": 508 }, { "epoch": 0.038515379667814306, "grad_norm": 4.072823524475098, "learning_rate": 1.3860816944024205e-05, "loss": 1.0211, "step": 509 }, { "epoch": 0.03859104839014793, "grad_norm": 7.409148216247559, "learning_rate": 1.3888048411497731e-05, "loss": 0.9665, "step": 510 }, { "epoch": 0.038666717112481555, "grad_norm": 5.668654441833496, "learning_rate": 1.3915279878971256e-05, "loss": 0.8603, "step": 511 }, { "epoch": 0.03874238583481518, "grad_norm": 4.457876205444336, "learning_rate": 1.394251134644478e-05, "loss": 0.8055, "step": 512 }, { "epoch": 0.038818054557148804, "grad_norm": 4.217092514038086, "learning_rate": 1.3969742813918307e-05, "loss": 0.893, "step": 513 }, { "epoch": 0.03889372327948243, "grad_norm": 4.033523082733154, "learning_rate": 1.3996974281391832e-05, "loss": 0.8748, "step": 514 }, { "epoch": 0.03896939200181605, "grad_norm": 3.2417023181915283, "learning_rate": 1.4024205748865355e-05, "loss": 0.7512, "step": 515 }, { "epoch": 0.03904506072414967, "grad_norm": 4.064194679260254, "learning_rate": 1.405143721633888e-05, "loss": 0.991, "step": 516 }, { "epoch": 0.039120729446483295, "grad_norm": 5.263235569000244, "learning_rate": 1.4078668683812406e-05, "loss": 0.8356, "step": 517 }, { "epoch": 0.03919639816881692, "grad_norm": 3.2027482986450195, "learning_rate": 1.410590015128593e-05, "loss": 1.0551, "step": 518 }, { "epoch": 0.039272066891150544, "grad_norm": 6.763327121734619, "learning_rate": 1.4133131618759455e-05, "loss": 0.8429, "step": 519 }, { "epoch": 0.03934773561348417, "grad_norm": 4.308533668518066, "learning_rate": 1.4160363086232982e-05, "loss": 0.8884, "step": 520 }, { "epoch": 0.03942340433581779, "grad_norm": 4.909972667694092, "learning_rate": 1.4187594553706506e-05, "loss": 0.7806, "step": 521 }, { "epoch": 0.03949907305815141, "grad_norm": 3.7141098976135254, "learning_rate": 1.4214826021180031e-05, "loss": 0.7759, "step": 522 }, { "epoch": 0.039574741780485034, "grad_norm": 4.333841800689697, "learning_rate": 1.4242057488653554e-05, "loss": 0.9594, "step": 523 }, { "epoch": 0.03965041050281866, "grad_norm": 7.9005866050720215, "learning_rate": 1.426928895612708e-05, "loss": 0.9144, "step": 524 }, { "epoch": 0.03972607922515228, "grad_norm": 4.86323881149292, "learning_rate": 1.4296520423600605e-05, "loss": 0.85, "step": 525 }, { "epoch": 0.03980174794748591, "grad_norm": 2.843881130218506, "learning_rate": 1.432375189107413e-05, "loss": 0.9132, "step": 526 }, { "epoch": 0.03987741666981953, "grad_norm": 3.5814990997314453, "learning_rate": 1.4350983358547656e-05, "loss": 0.8243, "step": 527 }, { "epoch": 0.039953085392153156, "grad_norm": 3.7590556144714355, "learning_rate": 1.4378214826021181e-05, "loss": 0.8779, "step": 528 }, { "epoch": 0.040028754114486774, "grad_norm": 4.117438316345215, "learning_rate": 1.4405446293494706e-05, "loss": 0.9465, "step": 529 }, { "epoch": 0.0401044228368204, "grad_norm": 6.806588649749756, "learning_rate": 1.4432677760968229e-05, "loss": 0.8587, "step": 530 }, { "epoch": 0.04018009155915402, "grad_norm": 3.3301045894622803, "learning_rate": 1.4459909228441755e-05, "loss": 0.8867, "step": 531 }, { "epoch": 0.04025576028148765, "grad_norm": 3.395404577255249, "learning_rate": 1.448714069591528e-05, "loss": 0.9337, "step": 532 }, { "epoch": 0.04033142900382127, "grad_norm": 6.818991184234619, "learning_rate": 1.4514372163388804e-05, "loss": 1.0161, "step": 533 }, { "epoch": 0.040407097726154896, "grad_norm": 3.7646358013153076, "learning_rate": 1.4541603630862331e-05, "loss": 0.9049, "step": 534 }, { "epoch": 0.04048276644848852, "grad_norm": 3.209998369216919, "learning_rate": 1.4568835098335856e-05, "loss": 0.9199, "step": 535 }, { "epoch": 0.04055843517082214, "grad_norm": 4.078510761260986, "learning_rate": 1.459606656580938e-05, "loss": 0.9277, "step": 536 }, { "epoch": 0.04063410389315576, "grad_norm": 4.5334153175354, "learning_rate": 1.4623298033282907e-05, "loss": 0.9826, "step": 537 }, { "epoch": 0.04070977261548939, "grad_norm": 4.209270000457764, "learning_rate": 1.465052950075643e-05, "loss": 0.8722, "step": 538 }, { "epoch": 0.04078544133782301, "grad_norm": 4.012211799621582, "learning_rate": 1.4677760968229954e-05, "loss": 0.7578, "step": 539 }, { "epoch": 0.040861110060156636, "grad_norm": 3.805192232131958, "learning_rate": 1.4704992435703479e-05, "loss": 0.7765, "step": 540 }, { "epoch": 0.04093677878249026, "grad_norm": 6.301825046539307, "learning_rate": 1.4732223903177005e-05, "loss": 0.8551, "step": 541 }, { "epoch": 0.04101244750482388, "grad_norm": 3.2638895511627197, "learning_rate": 1.475945537065053e-05, "loss": 0.81, "step": 542 }, { "epoch": 0.0410881162271575, "grad_norm": 4.314562797546387, "learning_rate": 1.4786686838124055e-05, "loss": 1.0944, "step": 543 }, { "epoch": 0.041163784949491126, "grad_norm": 3.090569496154785, "learning_rate": 1.4813918305597581e-05, "loss": 1.0042, "step": 544 }, { "epoch": 0.04123945367182475, "grad_norm": 3.7688913345336914, "learning_rate": 1.4841149773071104e-05, "loss": 0.9544, "step": 545 }, { "epoch": 0.041315122394158375, "grad_norm": 4.588676929473877, "learning_rate": 1.4868381240544629e-05, "loss": 0.7449, "step": 546 }, { "epoch": 0.041390791116492, "grad_norm": 6.916925430297852, "learning_rate": 1.4895612708018154e-05, "loss": 0.9825, "step": 547 }, { "epoch": 0.041466459838825624, "grad_norm": 3.3256642818450928, "learning_rate": 1.492284417549168e-05, "loss": 0.9232, "step": 548 }, { "epoch": 0.04154212856115924, "grad_norm": 5.033417224884033, "learning_rate": 1.4950075642965205e-05, "loss": 0.9566, "step": 549 }, { "epoch": 0.041617797283492866, "grad_norm": 3.85809063911438, "learning_rate": 1.497730711043873e-05, "loss": 0.993, "step": 550 }, { "epoch": 0.04169346600582649, "grad_norm": 5.949283599853516, "learning_rate": 1.5004538577912256e-05, "loss": 0.9769, "step": 551 }, { "epoch": 0.041769134728160115, "grad_norm": 4.104134559631348, "learning_rate": 1.503177004538578e-05, "loss": 0.8464, "step": 552 }, { "epoch": 0.04184480345049374, "grad_norm": 5.211521148681641, "learning_rate": 1.5059001512859304e-05, "loss": 0.8422, "step": 553 }, { "epoch": 0.041920472172827364, "grad_norm": 3.4157001972198486, "learning_rate": 1.5086232980332828e-05, "loss": 0.8472, "step": 554 }, { "epoch": 0.04199614089516099, "grad_norm": 3.895693778991699, "learning_rate": 1.5113464447806355e-05, "loss": 0.7614, "step": 555 }, { "epoch": 0.042071809617494606, "grad_norm": 4.627487659454346, "learning_rate": 1.514069591527988e-05, "loss": 0.9064, "step": 556 }, { "epoch": 0.04214747833982823, "grad_norm": 3.6824750900268555, "learning_rate": 1.5167927382753404e-05, "loss": 0.8688, "step": 557 }, { "epoch": 0.042223147062161855, "grad_norm": 3.035003185272217, "learning_rate": 1.519515885022693e-05, "loss": 0.9319, "step": 558 }, { "epoch": 0.04229881578449548, "grad_norm": 3.1040902137756348, "learning_rate": 1.5222390317700455e-05, "loss": 0.8281, "step": 559 }, { "epoch": 0.0423744845068291, "grad_norm": 3.9689624309539795, "learning_rate": 1.5249621785173978e-05, "loss": 0.8638, "step": 560 }, { "epoch": 0.04245015322916273, "grad_norm": 4.4839701652526855, "learning_rate": 1.5276853252647503e-05, "loss": 0.7902, "step": 561 }, { "epoch": 0.04252582195149635, "grad_norm": 5.034473419189453, "learning_rate": 1.530408472012103e-05, "loss": 0.8222, "step": 562 }, { "epoch": 0.04260149067382997, "grad_norm": 3.7882745265960693, "learning_rate": 1.5331316187594552e-05, "loss": 0.8985, "step": 563 }, { "epoch": 0.042677159396163594, "grad_norm": 4.125184059143066, "learning_rate": 1.535854765506808e-05, "loss": 0.9292, "step": 564 }, { "epoch": 0.04275282811849722, "grad_norm": 4.87890625, "learning_rate": 1.5385779122541605e-05, "loss": 0.7701, "step": 565 }, { "epoch": 0.04282849684083084, "grad_norm": 4.0733513832092285, "learning_rate": 1.5413010590015128e-05, "loss": 0.9535, "step": 566 }, { "epoch": 0.04290416556316447, "grad_norm": 5.096096515655518, "learning_rate": 1.5440242057488655e-05, "loss": 0.7682, "step": 567 }, { "epoch": 0.04297983428549809, "grad_norm": 3.4975779056549072, "learning_rate": 1.5467473524962178e-05, "loss": 0.9259, "step": 568 }, { "epoch": 0.04305550300783171, "grad_norm": 4.021080493927002, "learning_rate": 1.5494704992435704e-05, "loss": 0.7498, "step": 569 }, { "epoch": 0.043131171730165334, "grad_norm": 4.798002243041992, "learning_rate": 1.5521936459909227e-05, "loss": 0.9342, "step": 570 }, { "epoch": 0.04320684045249896, "grad_norm": 3.3058738708496094, "learning_rate": 1.5549167927382753e-05, "loss": 0.8086, "step": 571 }, { "epoch": 0.04328250917483258, "grad_norm": 4.445735931396484, "learning_rate": 1.557639939485628e-05, "loss": 0.8993, "step": 572 }, { "epoch": 0.04335817789716621, "grad_norm": 3.855530023574829, "learning_rate": 1.5603630862329803e-05, "loss": 0.8861, "step": 573 }, { "epoch": 0.04343384661949983, "grad_norm": 3.9461214542388916, "learning_rate": 1.563086232980333e-05, "loss": 0.8456, "step": 574 }, { "epoch": 0.043509515341833456, "grad_norm": 2.9989559650421143, "learning_rate": 1.5658093797276852e-05, "loss": 0.9882, "step": 575 }, { "epoch": 0.043585184064167073, "grad_norm": 6.7230916023254395, "learning_rate": 1.568532526475038e-05, "loss": 0.8234, "step": 576 }, { "epoch": 0.0436608527865007, "grad_norm": 4.629927635192871, "learning_rate": 1.57125567322239e-05, "loss": 0.8634, "step": 577 }, { "epoch": 0.04373652150883432, "grad_norm": 6.619353294372559, "learning_rate": 1.5739788199697428e-05, "loss": 0.8626, "step": 578 }, { "epoch": 0.04381219023116795, "grad_norm": 2.4462685585021973, "learning_rate": 1.5767019667170954e-05, "loss": 0.7996, "step": 579 }, { "epoch": 0.04388785895350157, "grad_norm": 3.107055902481079, "learning_rate": 1.5794251134644477e-05, "loss": 1.1027, "step": 580 }, { "epoch": 0.043963527675835196, "grad_norm": 3.176931858062744, "learning_rate": 1.5821482602118004e-05, "loss": 0.8589, "step": 581 }, { "epoch": 0.04403919639816882, "grad_norm": 6.571891784667969, "learning_rate": 1.584871406959153e-05, "loss": 0.8045, "step": 582 }, { "epoch": 0.04411486512050244, "grad_norm": 4.314690589904785, "learning_rate": 1.5875945537065053e-05, "loss": 0.8888, "step": 583 }, { "epoch": 0.04419053384283606, "grad_norm": 3.6380622386932373, "learning_rate": 1.5903177004538576e-05, "loss": 0.7593, "step": 584 }, { "epoch": 0.044266202565169686, "grad_norm": 5.430633544921875, "learning_rate": 1.5930408472012103e-05, "loss": 0.884, "step": 585 }, { "epoch": 0.04434187128750331, "grad_norm": 3.1226465702056885, "learning_rate": 1.595763993948563e-05, "loss": 0.9011, "step": 586 }, { "epoch": 0.044417540009836935, "grad_norm": 3.8268587589263916, "learning_rate": 1.5984871406959152e-05, "loss": 0.8305, "step": 587 }, { "epoch": 0.04449320873217056, "grad_norm": 5.864771842956543, "learning_rate": 1.601210287443268e-05, "loss": 0.7973, "step": 588 }, { "epoch": 0.044568877454504184, "grad_norm": 4.280256748199463, "learning_rate": 1.6039334341906205e-05, "loss": 0.7368, "step": 589 }, { "epoch": 0.0446445461768378, "grad_norm": 4.382325649261475, "learning_rate": 1.6066565809379728e-05, "loss": 0.8988, "step": 590 }, { "epoch": 0.044720214899171426, "grad_norm": 4.108618259429932, "learning_rate": 1.609379727685325e-05, "loss": 0.8688, "step": 591 }, { "epoch": 0.04479588362150505, "grad_norm": 3.6567695140838623, "learning_rate": 1.6121028744326777e-05, "loss": 0.8829, "step": 592 }, { "epoch": 0.044871552343838675, "grad_norm": 3.6836469173431396, "learning_rate": 1.6148260211800304e-05, "loss": 1.0172, "step": 593 }, { "epoch": 0.0449472210661723, "grad_norm": 3.5387299060821533, "learning_rate": 1.6175491679273827e-05, "loss": 0.7189, "step": 594 }, { "epoch": 0.045022889788505924, "grad_norm": 13.61294174194336, "learning_rate": 1.6202723146747353e-05, "loss": 0.7605, "step": 595 }, { "epoch": 0.04509855851083954, "grad_norm": 4.561513900756836, "learning_rate": 1.622995461422088e-05, "loss": 0.7954, "step": 596 }, { "epoch": 0.045174227233173166, "grad_norm": 4.984888553619385, "learning_rate": 1.6257186081694402e-05, "loss": 1.0385, "step": 597 }, { "epoch": 0.04524989595550679, "grad_norm": 3.820335865020752, "learning_rate": 1.6284417549167925e-05, "loss": 0.8676, "step": 598 }, { "epoch": 0.045325564677840414, "grad_norm": 3.2544524669647217, "learning_rate": 1.6311649016641452e-05, "loss": 0.9514, "step": 599 }, { "epoch": 0.04540123340017404, "grad_norm": 5.345118999481201, "learning_rate": 1.6338880484114978e-05, "loss": 0.7714, "step": 600 }, { "epoch": 0.04547690212250766, "grad_norm": 3.907956123352051, "learning_rate": 1.63661119515885e-05, "loss": 0.8939, "step": 601 }, { "epoch": 0.04555257084484129, "grad_norm": 6.510712146759033, "learning_rate": 1.6393343419062028e-05, "loss": 0.8035, "step": 602 }, { "epoch": 0.045628239567174905, "grad_norm": 3.6979787349700928, "learning_rate": 1.6420574886535554e-05, "loss": 0.8553, "step": 603 }, { "epoch": 0.04570390828950853, "grad_norm": 4.597548007965088, "learning_rate": 1.6447806354009077e-05, "loss": 0.803, "step": 604 }, { "epoch": 0.045779577011842154, "grad_norm": 4.338045120239258, "learning_rate": 1.64750378214826e-05, "loss": 0.878, "step": 605 }, { "epoch": 0.04585524573417578, "grad_norm": 4.208822727203369, "learning_rate": 1.6502269288956126e-05, "loss": 0.807, "step": 606 }, { "epoch": 0.0459309144565094, "grad_norm": 3.9648923873901367, "learning_rate": 1.6529500756429653e-05, "loss": 0.8748, "step": 607 }, { "epoch": 0.04600658317884303, "grad_norm": 2.76554536819458, "learning_rate": 1.6556732223903176e-05, "loss": 0.8559, "step": 608 }, { "epoch": 0.04608225190117665, "grad_norm": 5.518862724304199, "learning_rate": 1.6583963691376702e-05, "loss": 0.8451, "step": 609 }, { "epoch": 0.04615792062351027, "grad_norm": 4.203677177429199, "learning_rate": 1.661119515885023e-05, "loss": 0.9215, "step": 610 }, { "epoch": 0.046233589345843894, "grad_norm": 3.4287822246551514, "learning_rate": 1.6638426626323752e-05, "loss": 0.8132, "step": 611 }, { "epoch": 0.04630925806817752, "grad_norm": 4.197726726531982, "learning_rate": 1.6665658093797278e-05, "loss": 0.8095, "step": 612 }, { "epoch": 0.04638492679051114, "grad_norm": 4.408070087432861, "learning_rate": 1.66928895612708e-05, "loss": 1.0774, "step": 613 }, { "epoch": 0.04646059551284477, "grad_norm": 3.8713626861572266, "learning_rate": 1.6720121028744328e-05, "loss": 0.7434, "step": 614 }, { "epoch": 0.04653626423517839, "grad_norm": 3.408956527709961, "learning_rate": 1.674735249621785e-05, "loss": 0.8511, "step": 615 }, { "epoch": 0.046611932957512016, "grad_norm": 2.916395902633667, "learning_rate": 1.6774583963691377e-05, "loss": 1.0129, "step": 616 }, { "epoch": 0.04668760167984563, "grad_norm": 3.563767671585083, "learning_rate": 1.6801815431164903e-05, "loss": 0.927, "step": 617 }, { "epoch": 0.04676327040217926, "grad_norm": 12.874147415161133, "learning_rate": 1.6829046898638426e-05, "loss": 0.7429, "step": 618 }, { "epoch": 0.04683893912451288, "grad_norm": 4.559039115905762, "learning_rate": 1.6856278366111953e-05, "loss": 0.9711, "step": 619 }, { "epoch": 0.04691460784684651, "grad_norm": 5.084630489349365, "learning_rate": 1.6883509833585476e-05, "loss": 0.87, "step": 620 }, { "epoch": 0.04699027656918013, "grad_norm": 4.294825553894043, "learning_rate": 1.6910741301059002e-05, "loss": 1.1267, "step": 621 }, { "epoch": 0.047065945291513755, "grad_norm": 14.884833335876465, "learning_rate": 1.6937972768532525e-05, "loss": 0.9987, "step": 622 }, { "epoch": 0.04714161401384737, "grad_norm": 3.429875373840332, "learning_rate": 1.696520423600605e-05, "loss": 0.8943, "step": 623 }, { "epoch": 0.047217282736181, "grad_norm": 4.022202014923096, "learning_rate": 1.6992435703479578e-05, "loss": 0.9646, "step": 624 }, { "epoch": 0.04729295145851462, "grad_norm": 3.040421962738037, "learning_rate": 1.70196671709531e-05, "loss": 0.9784, "step": 625 }, { "epoch": 0.047368620180848246, "grad_norm": 4.135276794433594, "learning_rate": 1.7046898638426627e-05, "loss": 0.793, "step": 626 }, { "epoch": 0.04744428890318187, "grad_norm": 3.7351131439208984, "learning_rate": 1.7074130105900154e-05, "loss": 0.7567, "step": 627 }, { "epoch": 0.047519957625515495, "grad_norm": 3.320626974105835, "learning_rate": 1.7101361573373677e-05, "loss": 0.6704, "step": 628 }, { "epoch": 0.04759562634784912, "grad_norm": 4.5212178230285645, "learning_rate": 1.71285930408472e-05, "loss": 0.8622, "step": 629 }, { "epoch": 0.04767129507018274, "grad_norm": 4.007808208465576, "learning_rate": 1.7155824508320726e-05, "loss": 0.8503, "step": 630 }, { "epoch": 0.04774696379251636, "grad_norm": 4.011386394500732, "learning_rate": 1.7183055975794253e-05, "loss": 0.9573, "step": 631 }, { "epoch": 0.047822632514849986, "grad_norm": 4.2028937339782715, "learning_rate": 1.7210287443267776e-05, "loss": 0.8327, "step": 632 }, { "epoch": 0.04789830123718361, "grad_norm": 3.389353036880493, "learning_rate": 1.7237518910741302e-05, "loss": 0.8626, "step": 633 }, { "epoch": 0.047973969959517235, "grad_norm": 3.483424663543701, "learning_rate": 1.726475037821483e-05, "loss": 1.0075, "step": 634 }, { "epoch": 0.04804963868185086, "grad_norm": 2.878598213195801, "learning_rate": 1.729198184568835e-05, "loss": 0.8132, "step": 635 }, { "epoch": 0.048125307404184484, "grad_norm": 4.429380893707275, "learning_rate": 1.7319213313161874e-05, "loss": 0.8764, "step": 636 }, { "epoch": 0.0482009761265181, "grad_norm": 3.748349189758301, "learning_rate": 1.73464447806354e-05, "loss": 0.7818, "step": 637 }, { "epoch": 0.048276644848851726, "grad_norm": 2.982710838317871, "learning_rate": 1.7373676248108927e-05, "loss": 0.8131, "step": 638 }, { "epoch": 0.04835231357118535, "grad_norm": 3.4813013076782227, "learning_rate": 1.740090771558245e-05, "loss": 1.0064, "step": 639 }, { "epoch": 0.048427982293518974, "grad_norm": 4.015783309936523, "learning_rate": 1.7428139183055977e-05, "loss": 0.775, "step": 640 }, { "epoch": 0.0485036510158526, "grad_norm": 5.063205242156982, "learning_rate": 1.7455370650529503e-05, "loss": 0.8112, "step": 641 }, { "epoch": 0.04857931973818622, "grad_norm": 3.2861835956573486, "learning_rate": 1.7482602118003026e-05, "loss": 0.8949, "step": 642 }, { "epoch": 0.04865498846051984, "grad_norm": 4.188798904418945, "learning_rate": 1.750983358547655e-05, "loss": 0.7644, "step": 643 }, { "epoch": 0.048730657182853465, "grad_norm": 2.6496074199676514, "learning_rate": 1.7537065052950075e-05, "loss": 0.7834, "step": 644 }, { "epoch": 0.04880632590518709, "grad_norm": 3.977748155593872, "learning_rate": 1.7564296520423602e-05, "loss": 0.7691, "step": 645 }, { "epoch": 0.048881994627520714, "grad_norm": 4.396695613861084, "learning_rate": 1.7591527987897125e-05, "loss": 0.7951, "step": 646 }, { "epoch": 0.04895766334985434, "grad_norm": 3.3221042156219482, "learning_rate": 1.761875945537065e-05, "loss": 0.69, "step": 647 }, { "epoch": 0.04903333207218796, "grad_norm": 4.295675754547119, "learning_rate": 1.7645990922844178e-05, "loss": 0.6565, "step": 648 }, { "epoch": 0.04910900079452159, "grad_norm": 3.0245003700256348, "learning_rate": 1.76732223903177e-05, "loss": 0.9869, "step": 649 }, { "epoch": 0.049184669516855205, "grad_norm": 3.450180768966675, "learning_rate": 1.7700453857791224e-05, "loss": 0.894, "step": 650 }, { "epoch": 0.04926033823918883, "grad_norm": 3.598787546157837, "learning_rate": 1.772768532526475e-05, "loss": 0.8114, "step": 651 }, { "epoch": 0.049336006961522454, "grad_norm": 3.394605875015259, "learning_rate": 1.7754916792738276e-05, "loss": 0.913, "step": 652 }, { "epoch": 0.04941167568385608, "grad_norm": 3.7939605712890625, "learning_rate": 1.77821482602118e-05, "loss": 0.9026, "step": 653 }, { "epoch": 0.0494873444061897, "grad_norm": 3.1907098293304443, "learning_rate": 1.7809379727685326e-05, "loss": 0.8377, "step": 654 }, { "epoch": 0.04956301312852333, "grad_norm": 3.942924976348877, "learning_rate": 1.7836611195158852e-05, "loss": 0.8688, "step": 655 }, { "epoch": 0.04963868185085695, "grad_norm": 3.0986690521240234, "learning_rate": 1.7863842662632375e-05, "loss": 0.8261, "step": 656 }, { "epoch": 0.04971435057319057, "grad_norm": 4.000396728515625, "learning_rate": 1.78910741301059e-05, "loss": 0.8151, "step": 657 }, { "epoch": 0.04979001929552419, "grad_norm": 4.246333122253418, "learning_rate": 1.7918305597579425e-05, "loss": 0.7004, "step": 658 }, { "epoch": 0.04986568801785782, "grad_norm": 3.094942092895508, "learning_rate": 1.794553706505295e-05, "loss": 0.8833, "step": 659 }, { "epoch": 0.04994135674019144, "grad_norm": 2.5228271484375, "learning_rate": 1.7972768532526474e-05, "loss": 0.786, "step": 660 }, { "epoch": 0.05001702546252507, "grad_norm": 3.9577856063842773, "learning_rate": 1.8e-05, "loss": 0.9358, "step": 661 }, { "epoch": 0.05009269418485869, "grad_norm": 2.605454444885254, "learning_rate": 1.7999999718195446e-05, "loss": 0.7314, "step": 662 }, { "epoch": 0.050168362907192315, "grad_norm": 4.23893928527832, "learning_rate": 1.79999988727818e-05, "loss": 0.7816, "step": 663 }, { "epoch": 0.05024403162952593, "grad_norm": 3.738476037979126, "learning_rate": 1.7999997463759113e-05, "loss": 0.7877, "step": 664 }, { "epoch": 0.05031970035185956, "grad_norm": 3.7416157722473145, "learning_rate": 1.7999995491127477e-05, "loss": 0.9857, "step": 665 }, { "epoch": 0.05039536907419318, "grad_norm": 2.7435741424560547, "learning_rate": 1.7999992954887013e-05, "loss": 0.683, "step": 666 }, { "epoch": 0.050471037796526806, "grad_norm": 2.920893430709839, "learning_rate": 1.7999989855037883e-05, "loss": 0.9957, "step": 667 }, { "epoch": 0.05054670651886043, "grad_norm": 3.039703607559204, "learning_rate": 1.7999986191580278e-05, "loss": 0.7383, "step": 668 }, { "epoch": 0.050622375241194055, "grad_norm": 3.4982380867004395, "learning_rate": 1.7999981964514427e-05, "loss": 0.9463, "step": 669 }, { "epoch": 0.05069804396352767, "grad_norm": 6.600189208984375, "learning_rate": 1.7999977173840594e-05, "loss": 0.9587, "step": 670 }, { "epoch": 0.0507737126858613, "grad_norm": 4.686428070068359, "learning_rate": 1.7999971819559082e-05, "loss": 0.8895, "step": 671 }, { "epoch": 0.05084938140819492, "grad_norm": 3.403703451156616, "learning_rate": 1.799996590167023e-05, "loss": 0.9366, "step": 672 }, { "epoch": 0.050925050130528546, "grad_norm": 3.352269411087036, "learning_rate": 1.7999959420174395e-05, "loss": 0.9894, "step": 673 }, { "epoch": 0.05100071885286217, "grad_norm": 3.1470065116882324, "learning_rate": 1.7999952375072e-05, "loss": 0.8608, "step": 674 }, { "epoch": 0.051076387575195795, "grad_norm": 5.2624897956848145, "learning_rate": 1.7999944766363475e-05, "loss": 0.8582, "step": 675 }, { "epoch": 0.05115205629752942, "grad_norm": 4.0696187019348145, "learning_rate": 1.7999936594049297e-05, "loss": 0.8385, "step": 676 }, { "epoch": 0.05122772501986304, "grad_norm": 4.101423740386963, "learning_rate": 1.7999927858129984e-05, "loss": 0.8864, "step": 677 }, { "epoch": 0.05130339374219666, "grad_norm": 4.04774284362793, "learning_rate": 1.7999918558606075e-05, "loss": 0.8205, "step": 678 }, { "epoch": 0.051379062464530285, "grad_norm": 4.320160388946533, "learning_rate": 1.7999908695478162e-05, "loss": 1.0081, "step": 679 }, { "epoch": 0.05145473118686391, "grad_norm": 4.122174263000488, "learning_rate": 1.7999898268746852e-05, "loss": 0.6311, "step": 680 }, { "epoch": 0.051530399909197534, "grad_norm": 4.779628276824951, "learning_rate": 1.7999887278412806e-05, "loss": 0.9552, "step": 681 }, { "epoch": 0.05160606863153116, "grad_norm": 3.8603785037994385, "learning_rate": 1.7999875724476707e-05, "loss": 0.889, "step": 682 }, { "epoch": 0.05168173735386478, "grad_norm": 2.222905158996582, "learning_rate": 1.7999863606939286e-05, "loss": 1.0504, "step": 683 }, { "epoch": 0.0517574060761984, "grad_norm": 2.802685260772705, "learning_rate": 1.7999850925801292e-05, "loss": 0.9105, "step": 684 }, { "epoch": 0.051833074798532025, "grad_norm": 3.3969781398773193, "learning_rate": 1.7999837681063527e-05, "loss": 0.7931, "step": 685 }, { "epoch": 0.05190874352086565, "grad_norm": 3.549208641052246, "learning_rate": 1.7999823872726814e-05, "loss": 0.9147, "step": 686 }, { "epoch": 0.051984412243199274, "grad_norm": 2.738788366317749, "learning_rate": 1.7999809500792023e-05, "loss": 0.6898, "step": 687 }, { "epoch": 0.0520600809655329, "grad_norm": 5.0861711502075195, "learning_rate": 1.799979456526005e-05, "loss": 0.9764, "step": 688 }, { "epoch": 0.05213574968786652, "grad_norm": 3.7050702571868896, "learning_rate": 1.799977906613184e-05, "loss": 0.8309, "step": 689 }, { "epoch": 0.05221141841020015, "grad_norm": 2.8487472534179688, "learning_rate": 1.7999763003408348e-05, "loss": 0.798, "step": 690 }, { "epoch": 0.052287087132533765, "grad_norm": 3.320040464401245, "learning_rate": 1.7999746377090593e-05, "loss": 0.9132, "step": 691 }, { "epoch": 0.05236275585486739, "grad_norm": 2.664503574371338, "learning_rate": 1.7999729187179606e-05, "loss": 0.8706, "step": 692 }, { "epoch": 0.052438424577201014, "grad_norm": 5.23117208480835, "learning_rate": 1.7999711433676474e-05, "loss": 0.7535, "step": 693 }, { "epoch": 0.05251409329953464, "grad_norm": 4.686688423156738, "learning_rate": 1.7999693116582302e-05, "loss": 0.9761, "step": 694 }, { "epoch": 0.05258976202186826, "grad_norm": 2.983670234680176, "learning_rate": 1.7999674235898237e-05, "loss": 0.8898, "step": 695 }, { "epoch": 0.05266543074420189, "grad_norm": 3.057015895843506, "learning_rate": 1.7999654791625463e-05, "loss": 0.7925, "step": 696 }, { "epoch": 0.052741099466535504, "grad_norm": 3.9914684295654297, "learning_rate": 1.79996347837652e-05, "loss": 0.7957, "step": 697 }, { "epoch": 0.05281676818886913, "grad_norm": 3.987391233444214, "learning_rate": 1.7999614212318696e-05, "loss": 0.9454, "step": 698 }, { "epoch": 0.05289243691120275, "grad_norm": 4.3634138107299805, "learning_rate": 1.7999593077287244e-05, "loss": 1.1532, "step": 699 }, { "epoch": 0.05296810563353638, "grad_norm": 3.055154800415039, "learning_rate": 1.799957137867216e-05, "loss": 0.8241, "step": 700 }, { "epoch": 0.05304377435587, "grad_norm": 3.825345277786255, "learning_rate": 1.7999549116474813e-05, "loss": 0.8979, "step": 701 }, { "epoch": 0.053119443078203626, "grad_norm": 4.292139530181885, "learning_rate": 1.7999526290696592e-05, "loss": 0.9344, "step": 702 }, { "epoch": 0.05319511180053725, "grad_norm": 4.645684719085693, "learning_rate": 1.7999502901338925e-05, "loss": 0.9731, "step": 703 }, { "epoch": 0.05327078052287087, "grad_norm": 2.999361753463745, "learning_rate": 1.7999478948403278e-05, "loss": 0.9273, "step": 704 }, { "epoch": 0.05334644924520449, "grad_norm": 2.876819610595703, "learning_rate": 1.7999454431891153e-05, "loss": 0.8832, "step": 705 }, { "epoch": 0.05342211796753812, "grad_norm": 3.5443317890167236, "learning_rate": 1.7999429351804084e-05, "loss": 0.792, "step": 706 }, { "epoch": 0.05349778668987174, "grad_norm": 2.4923086166381836, "learning_rate": 1.799940370814364e-05, "loss": 0.8596, "step": 707 }, { "epoch": 0.053573455412205366, "grad_norm": 3.5171520709991455, "learning_rate": 1.799937750091143e-05, "loss": 0.8311, "step": 708 }, { "epoch": 0.05364912413453899, "grad_norm": 3.3235208988189697, "learning_rate": 1.799935073010909e-05, "loss": 0.7817, "step": 709 }, { "epoch": 0.053724792856872615, "grad_norm": 3.130582809448242, "learning_rate": 1.79993233957383e-05, "loss": 0.8418, "step": 710 }, { "epoch": 0.05380046157920623, "grad_norm": 2.8573694229125977, "learning_rate": 1.7999295497800774e-05, "loss": 0.7832, "step": 711 }, { "epoch": 0.05387613030153986, "grad_norm": 3.514740467071533, "learning_rate": 1.7999267036298257e-05, "loss": 0.8998, "step": 712 }, { "epoch": 0.05395179902387348, "grad_norm": 4.873480796813965, "learning_rate": 1.799923801123253e-05, "loss": 0.9173, "step": 713 }, { "epoch": 0.054027467746207106, "grad_norm": 3.092484951019287, "learning_rate": 1.7999208422605412e-05, "loss": 0.922, "step": 714 }, { "epoch": 0.05410313646854073, "grad_norm": 3.5271174907684326, "learning_rate": 1.7999178270418757e-05, "loss": 0.8321, "step": 715 }, { "epoch": 0.054178805190874355, "grad_norm": 3.2208545207977295, "learning_rate": 1.799914755467445e-05, "loss": 0.9818, "step": 716 }, { "epoch": 0.05425447391320797, "grad_norm": 3.0340662002563477, "learning_rate": 1.7999116275374415e-05, "loss": 0.7099, "step": 717 }, { "epoch": 0.054330142635541596, "grad_norm": 3.023000717163086, "learning_rate": 1.799908443252061e-05, "loss": 0.835, "step": 718 }, { "epoch": 0.05440581135787522, "grad_norm": 4.08595609664917, "learning_rate": 1.799905202611504e-05, "loss": 0.8734, "step": 719 }, { "epoch": 0.054481480080208845, "grad_norm": 3.454214572906494, "learning_rate": 1.799901905615972e-05, "loss": 1.0036, "step": 720 }, { "epoch": 0.05455714880254247, "grad_norm": 3.4939661026000977, "learning_rate": 1.799898552265672e-05, "loss": 0.7938, "step": 721 }, { "epoch": 0.054632817524876094, "grad_norm": 4.215449333190918, "learning_rate": 1.799895142560814e-05, "loss": 0.9008, "step": 722 }, { "epoch": 0.05470848624720972, "grad_norm": 3.8113982677459717, "learning_rate": 1.799891676501612e-05, "loss": 0.8452, "step": 723 }, { "epoch": 0.054784154969543336, "grad_norm": 3.6723668575286865, "learning_rate": 1.7998881540882822e-05, "loss": 0.8416, "step": 724 }, { "epoch": 0.05485982369187696, "grad_norm": 3.342585325241089, "learning_rate": 1.7998845753210456e-05, "loss": 0.9927, "step": 725 }, { "epoch": 0.054935492414210585, "grad_norm": 3.9180972576141357, "learning_rate": 1.7998809402001267e-05, "loss": 0.9583, "step": 726 }, { "epoch": 0.05501116113654421, "grad_norm": 3.55850887298584, "learning_rate": 1.7998772487257524e-05, "loss": 0.8274, "step": 727 }, { "epoch": 0.055086829858877834, "grad_norm": 3.635193347930908, "learning_rate": 1.799873500898154e-05, "loss": 0.851, "step": 728 }, { "epoch": 0.05516249858121146, "grad_norm": 2.7853517532348633, "learning_rate": 1.799869696717567e-05, "loss": 0.778, "step": 729 }, { "epoch": 0.05523816730354508, "grad_norm": 3.3416101932525635, "learning_rate": 1.799865836184229e-05, "loss": 0.8563, "step": 730 }, { "epoch": 0.0553138360258787, "grad_norm": 3.1914992332458496, "learning_rate": 1.7998619192983812e-05, "loss": 0.7991, "step": 731 }, { "epoch": 0.055389504748212325, "grad_norm": 3.1963469982147217, "learning_rate": 1.79985794606027e-05, "loss": 0.9286, "step": 732 }, { "epoch": 0.05546517347054595, "grad_norm": 3.363598346710205, "learning_rate": 1.7998539164701437e-05, "loss": 0.8608, "step": 733 }, { "epoch": 0.05554084219287957, "grad_norm": 3.9688327312469482, "learning_rate": 1.7998498305282548e-05, "loss": 0.947, "step": 734 }, { "epoch": 0.0556165109152132, "grad_norm": 3.629190683364868, "learning_rate": 1.7998456882348587e-05, "loss": 0.7682, "step": 735 }, { "epoch": 0.05569217963754682, "grad_norm": 2.6202425956726074, "learning_rate": 1.7998414895902153e-05, "loss": 0.8611, "step": 736 }, { "epoch": 0.05576784835988045, "grad_norm": 3.04758882522583, "learning_rate": 1.7998372345945874e-05, "loss": 1.0072, "step": 737 }, { "epoch": 0.055843517082214064, "grad_norm": 3.110172748565674, "learning_rate": 1.7998329232482415e-05, "loss": 0.7794, "step": 738 }, { "epoch": 0.05591918580454769, "grad_norm": 3.5827243328094482, "learning_rate": 1.7998285555514472e-05, "loss": 0.8902, "step": 739 }, { "epoch": 0.05599485452688131, "grad_norm": 3.689215898513794, "learning_rate": 1.799824131504479e-05, "loss": 0.9457, "step": 740 }, { "epoch": 0.05607052324921494, "grad_norm": 3.847498893737793, "learning_rate": 1.799819651107613e-05, "loss": 0.9951, "step": 741 }, { "epoch": 0.05614619197154856, "grad_norm": 3.818758249282837, "learning_rate": 1.7998151143611298e-05, "loss": 0.8568, "step": 742 }, { "epoch": 0.056221860693882186, "grad_norm": 4.948990821838379, "learning_rate": 1.799810521265314e-05, "loss": 0.7821, "step": 743 }, { "epoch": 0.056297529416215804, "grad_norm": 2.994140625, "learning_rate": 1.799805871820453e-05, "loss": 0.8261, "step": 744 }, { "epoch": 0.05637319813854943, "grad_norm": 3.428760528564453, "learning_rate": 1.799801166026838e-05, "loss": 0.9666, "step": 745 }, { "epoch": 0.05644886686088305, "grad_norm": 3.410270929336548, "learning_rate": 1.7997964038847636e-05, "loss": 0.7529, "step": 746 }, { "epoch": 0.05652453558321668, "grad_norm": 2.595470428466797, "learning_rate": 1.7997915853945282e-05, "loss": 0.9564, "step": 747 }, { "epoch": 0.0566002043055503, "grad_norm": 2.552440881729126, "learning_rate": 1.7997867105564336e-05, "loss": 0.925, "step": 748 }, { "epoch": 0.056675873027883926, "grad_norm": 3.9681804180145264, "learning_rate": 1.7997817793707845e-05, "loss": 1.0332, "step": 749 }, { "epoch": 0.05675154175021755, "grad_norm": 2.687912940979004, "learning_rate": 1.7997767918378904e-05, "loss": 0.8711, "step": 750 }, { "epoch": 0.05682721047255117, "grad_norm": 3.232062578201294, "learning_rate": 1.799771747958063e-05, "loss": 0.6525, "step": 751 }, { "epoch": 0.05690287919488479, "grad_norm": 3.3690457344055176, "learning_rate": 1.7997666477316194e-05, "loss": 0.9147, "step": 752 }, { "epoch": 0.05697854791721842, "grad_norm": 3.5086419582366943, "learning_rate": 1.7997614911588774e-05, "loss": 0.9292, "step": 753 }, { "epoch": 0.05705421663955204, "grad_norm": 2.7476987838745117, "learning_rate": 1.7997562782401604e-05, "loss": 0.7515, "step": 754 }, { "epoch": 0.057129885361885666, "grad_norm": 2.3388469219207764, "learning_rate": 1.7997510089757956e-05, "loss": 1.0614, "step": 755 }, { "epoch": 0.05720555408421929, "grad_norm": 3.508303165435791, "learning_rate": 1.7997456833661124e-05, "loss": 0.7057, "step": 756 }, { "epoch": 0.057281222806552914, "grad_norm": 4.021640300750732, "learning_rate": 1.7997403014114445e-05, "loss": 1.0216, "step": 757 }, { "epoch": 0.05735689152888653, "grad_norm": 5.258941173553467, "learning_rate": 1.7997348631121287e-05, "loss": 0.8469, "step": 758 }, { "epoch": 0.057432560251220156, "grad_norm": 3.1040396690368652, "learning_rate": 1.7997293684685055e-05, "loss": 0.8839, "step": 759 }, { "epoch": 0.05750822897355378, "grad_norm": 3.224198341369629, "learning_rate": 1.7997238174809194e-05, "loss": 0.8264, "step": 760 }, { "epoch": 0.057583897695887405, "grad_norm": 3.097722291946411, "learning_rate": 1.7997182101497175e-05, "loss": 0.7879, "step": 761 }, { "epoch": 0.05765956641822103, "grad_norm": 3.591596841812134, "learning_rate": 1.7997125464752517e-05, "loss": 0.8322, "step": 762 }, { "epoch": 0.057735235140554654, "grad_norm": 3.280409336090088, "learning_rate": 1.7997068264578757e-05, "loss": 0.8275, "step": 763 }, { "epoch": 0.05781090386288828, "grad_norm": 3.701860189437866, "learning_rate": 1.7997010500979488e-05, "loss": 0.8116, "step": 764 }, { "epoch": 0.057886572585221896, "grad_norm": 3.2338805198669434, "learning_rate": 1.7996952173958317e-05, "loss": 0.8088, "step": 765 }, { "epoch": 0.05796224130755552, "grad_norm": 3.3278093338012695, "learning_rate": 1.79968932835189e-05, "loss": 0.7139, "step": 766 }, { "epoch": 0.058037910029889145, "grad_norm": 2.84871768951416, "learning_rate": 1.799683382966493e-05, "loss": 0.8951, "step": 767 }, { "epoch": 0.05811357875222277, "grad_norm": 3.250761032104492, "learning_rate": 1.7996773812400124e-05, "loss": 0.834, "step": 768 }, { "epoch": 0.058189247474556394, "grad_norm": 3.869211435317993, "learning_rate": 1.7996713231728244e-05, "loss": 0.9022, "step": 769 }, { "epoch": 0.05826491619689002, "grad_norm": 3.068364143371582, "learning_rate": 1.7996652087653082e-05, "loss": 0.882, "step": 770 }, { "epoch": 0.058340584919223636, "grad_norm": 3.9008500576019287, "learning_rate": 1.7996590380178466e-05, "loss": 0.956, "step": 771 }, { "epoch": 0.05841625364155726, "grad_norm": 5.665666580200195, "learning_rate": 1.7996528109308266e-05, "loss": 0.8128, "step": 772 }, { "epoch": 0.058491922363890884, "grad_norm": 3.024960517883301, "learning_rate": 1.7996465275046374e-05, "loss": 0.9174, "step": 773 }, { "epoch": 0.05856759108622451, "grad_norm": 3.01311993598938, "learning_rate": 1.7996401877396733e-05, "loss": 0.8168, "step": 774 }, { "epoch": 0.05864325980855813, "grad_norm": 3.073803186416626, "learning_rate": 1.7996337916363302e-05, "loss": 0.7588, "step": 775 }, { "epoch": 0.05871892853089176, "grad_norm": 3.6292426586151123, "learning_rate": 1.7996273391950095e-05, "loss": 1.1097, "step": 776 }, { "epoch": 0.05879459725322538, "grad_norm": 3.8415868282318115, "learning_rate": 1.7996208304161153e-05, "loss": 0.9531, "step": 777 }, { "epoch": 0.058870265975559, "grad_norm": 2.900418996810913, "learning_rate": 1.799614265300055e-05, "loss": 0.8801, "step": 778 }, { "epoch": 0.058945934697892624, "grad_norm": 3.2337486743927, "learning_rate": 1.7996076438472395e-05, "loss": 0.92, "step": 779 }, { "epoch": 0.05902160342022625, "grad_norm": 2.9472317695617676, "learning_rate": 1.7996009660580836e-05, "loss": 0.8633, "step": 780 }, { "epoch": 0.05909727214255987, "grad_norm": 2.4730706214904785, "learning_rate": 1.7995942319330056e-05, "loss": 0.8554, "step": 781 }, { "epoch": 0.0591729408648935, "grad_norm": 5.070908546447754, "learning_rate": 1.7995874414724272e-05, "loss": 0.7889, "step": 782 }, { "epoch": 0.05924860958722712, "grad_norm": 3.5135512351989746, "learning_rate": 1.7995805946767734e-05, "loss": 0.802, "step": 783 }, { "epoch": 0.059324278309560746, "grad_norm": 3.356902599334717, "learning_rate": 1.7995736915464735e-05, "loss": 0.8238, "step": 784 }, { "epoch": 0.059399947031894364, "grad_norm": 3.2595343589782715, "learning_rate": 1.7995667320819595e-05, "loss": 0.8915, "step": 785 }, { "epoch": 0.05947561575422799, "grad_norm": 2.725177526473999, "learning_rate": 1.799559716283667e-05, "loss": 0.7803, "step": 786 }, { "epoch": 0.05955128447656161, "grad_norm": 3.298215389251709, "learning_rate": 1.7995526441520354e-05, "loss": 0.9538, "step": 787 }, { "epoch": 0.05962695319889524, "grad_norm": 4.367799758911133, "learning_rate": 1.7995455156875077e-05, "loss": 0.9063, "step": 788 }, { "epoch": 0.05970262192122886, "grad_norm": 2.9157984256744385, "learning_rate": 1.7995383308905307e-05, "loss": 0.8681, "step": 789 }, { "epoch": 0.059778290643562486, "grad_norm": 3.340041399002075, "learning_rate": 1.7995310897615537e-05, "loss": 0.9215, "step": 790 }, { "epoch": 0.05985395936589611, "grad_norm": 3.1033027172088623, "learning_rate": 1.7995237923010306e-05, "loss": 0.8081, "step": 791 }, { "epoch": 0.05992962808822973, "grad_norm": 3.03116774559021, "learning_rate": 1.799516438509418e-05, "loss": 0.9414, "step": 792 }, { "epoch": 0.06000529681056335, "grad_norm": 3.3425679206848145, "learning_rate": 1.7995090283871765e-05, "loss": 0.8291, "step": 793 }, { "epoch": 0.06008096553289698, "grad_norm": 2.8552069664001465, "learning_rate": 1.7995015619347707e-05, "loss": 0.9352, "step": 794 }, { "epoch": 0.0601566342552306, "grad_norm": 3.53585147857666, "learning_rate": 1.7994940391526674e-05, "loss": 0.8699, "step": 795 }, { "epoch": 0.060232302977564225, "grad_norm": 2.6548848152160645, "learning_rate": 1.7994864600413383e-05, "loss": 0.806, "step": 796 }, { "epoch": 0.06030797169989785, "grad_norm": 2.734811782836914, "learning_rate": 1.7994788246012578e-05, "loss": 0.73, "step": 797 }, { "epoch": 0.06038364042223147, "grad_norm": 3.6536951065063477, "learning_rate": 1.7994711328329038e-05, "loss": 0.7225, "step": 798 }, { "epoch": 0.06045930914456509, "grad_norm": 2.3973493576049805, "learning_rate": 1.7994633847367582e-05, "loss": 0.661, "step": 799 }, { "epoch": 0.060534977866898716, "grad_norm": 4.086350917816162, "learning_rate": 1.7994555803133065e-05, "loss": 0.8949, "step": 800 }, { "epoch": 0.06061064658923234, "grad_norm": 2.1674516201019287, "learning_rate": 1.799447719563037e-05, "loss": 1.0709, "step": 801 }, { "epoch": 0.060686315311565965, "grad_norm": 3.184936761856079, "learning_rate": 1.799439802486442e-05, "loss": 0.7324, "step": 802 }, { "epoch": 0.06076198403389959, "grad_norm": 2.968808889389038, "learning_rate": 1.7994318290840178e-05, "loss": 0.84, "step": 803 }, { "epoch": 0.060837652756233214, "grad_norm": 3.6430764198303223, "learning_rate": 1.799423799356263e-05, "loss": 0.8638, "step": 804 }, { "epoch": 0.06091332147856683, "grad_norm": 2.8016927242279053, "learning_rate": 1.799415713303681e-05, "loss": 0.7604, "step": 805 }, { "epoch": 0.060988990200900456, "grad_norm": 7.259315013885498, "learning_rate": 1.799407570926778e-05, "loss": 0.9089, "step": 806 }, { "epoch": 0.06106465892323408, "grad_norm": 4.342022895812988, "learning_rate": 1.7993993722260635e-05, "loss": 0.7734, "step": 807 }, { "epoch": 0.061140327645567705, "grad_norm": 4.369460582733154, "learning_rate": 1.7993911172020517e-05, "loss": 0.8225, "step": 808 }, { "epoch": 0.06121599636790133, "grad_norm": 3.1216466426849365, "learning_rate": 1.7993828058552593e-05, "loss": 1.0397, "step": 809 }, { "epoch": 0.061291665090234954, "grad_norm": 3.13508677482605, "learning_rate": 1.799374438186206e-05, "loss": 0.9094, "step": 810 }, { "epoch": 0.06136733381256858, "grad_norm": 11.32715892791748, "learning_rate": 1.799366014195417e-05, "loss": 0.9524, "step": 811 }, { "epoch": 0.061443002534902195, "grad_norm": 2.600041389465332, "learning_rate": 1.799357533883419e-05, "loss": 0.7271, "step": 812 }, { "epoch": 0.06151867125723582, "grad_norm": 3.431683301925659, "learning_rate": 1.7993489972507434e-05, "loss": 0.8767, "step": 813 }, { "epoch": 0.061594339979569444, "grad_norm": 3.009431838989258, "learning_rate": 1.799340404297925e-05, "loss": 0.8367, "step": 814 }, { "epoch": 0.06167000870190307, "grad_norm": 3.2158117294311523, "learning_rate": 1.7993317550255014e-05, "loss": 0.8516, "step": 815 }, { "epoch": 0.06174567742423669, "grad_norm": 3.753148317337036, "learning_rate": 1.7993230494340145e-05, "loss": 0.8619, "step": 816 }, { "epoch": 0.06182134614657032, "grad_norm": 3.217808485031128, "learning_rate": 1.7993142875240097e-05, "loss": 0.7954, "step": 817 }, { "epoch": 0.061897014868903935, "grad_norm": 6.548532009124756, "learning_rate": 1.7993054692960354e-05, "loss": 0.8667, "step": 818 }, { "epoch": 0.06197268359123756, "grad_norm": 3.1087334156036377, "learning_rate": 1.7992965947506437e-05, "loss": 0.9301, "step": 819 }, { "epoch": 0.062048352313571184, "grad_norm": 4.297720432281494, "learning_rate": 1.7992876638883907e-05, "loss": 0.753, "step": 820 }, { "epoch": 0.06212402103590481, "grad_norm": 3.4362447261810303, "learning_rate": 1.7992786767098353e-05, "loss": 0.7636, "step": 821 }, { "epoch": 0.06219968975823843, "grad_norm": 2.7632527351379395, "learning_rate": 1.799269633215541e-05, "loss": 0.8575, "step": 822 }, { "epoch": 0.06227535848057206, "grad_norm": 3.267557144165039, "learning_rate": 1.7992605334060736e-05, "loss": 0.7376, "step": 823 }, { "epoch": 0.06235102720290568, "grad_norm": 3.381315231323242, "learning_rate": 1.7992513772820027e-05, "loss": 0.9032, "step": 824 }, { "epoch": 0.0624266959252393, "grad_norm": 3.6174585819244385, "learning_rate": 1.7992421648439024e-05, "loss": 0.7052, "step": 825 }, { "epoch": 0.06250236464757293, "grad_norm": 3.082953929901123, "learning_rate": 1.799232896092349e-05, "loss": 0.9377, "step": 826 }, { "epoch": 0.06257803336990655, "grad_norm": 5.397732734680176, "learning_rate": 1.7992235710279233e-05, "loss": 0.8913, "step": 827 }, { "epoch": 0.06265370209224018, "grad_norm": 3.0445351600646973, "learning_rate": 1.799214189651209e-05, "loss": 0.9004, "step": 828 }, { "epoch": 0.06272937081457379, "grad_norm": 3.1507112979888916, "learning_rate": 1.799204751962794e-05, "loss": 1.0471, "step": 829 }, { "epoch": 0.06280503953690741, "grad_norm": 4.134524822235107, "learning_rate": 1.7991952579632688e-05, "loss": 0.8125, "step": 830 }, { "epoch": 0.06288070825924104, "grad_norm": 2.9399423599243164, "learning_rate": 1.799185707653228e-05, "loss": 0.7965, "step": 831 }, { "epoch": 0.06295637698157466, "grad_norm": 4.048933506011963, "learning_rate": 1.7991761010332704e-05, "loss": 0.8824, "step": 832 }, { "epoch": 0.06303204570390829, "grad_norm": 2.8442611694335938, "learning_rate": 1.7991664381039968e-05, "loss": 0.6825, "step": 833 }, { "epoch": 0.06310771442624191, "grad_norm": 3.6340487003326416, "learning_rate": 1.7991567188660125e-05, "loss": 0.8944, "step": 834 }, { "epoch": 0.06318338314857554, "grad_norm": 3.6592376232147217, "learning_rate": 1.7991469433199264e-05, "loss": 0.8148, "step": 835 }, { "epoch": 0.06325905187090916, "grad_norm": 3.6150155067443848, "learning_rate": 1.7991371114663503e-05, "loss": 0.6471, "step": 836 }, { "epoch": 0.06333472059324279, "grad_norm": 3.5820491313934326, "learning_rate": 1.7991272233059003e-05, "loss": 0.9492, "step": 837 }, { "epoch": 0.06341038931557641, "grad_norm": 3.483809471130371, "learning_rate": 1.7991172788391953e-05, "loss": 0.7662, "step": 838 }, { "epoch": 0.06348605803791003, "grad_norm": 3.0306272506713867, "learning_rate": 1.7991072780668585e-05, "loss": 0.9009, "step": 839 }, { "epoch": 0.06356172676024366, "grad_norm": 3.402259588241577, "learning_rate": 1.7990972209895155e-05, "loss": 0.7558, "step": 840 }, { "epoch": 0.06363739548257728, "grad_norm": 2.673753023147583, "learning_rate": 1.7990871076077967e-05, "loss": 0.7811, "step": 841 }, { "epoch": 0.0637130642049109, "grad_norm": 4.777785778045654, "learning_rate": 1.799076937922335e-05, "loss": 0.8718, "step": 842 }, { "epoch": 0.06378873292724452, "grad_norm": 3.1430823802948, "learning_rate": 1.799066711933768e-05, "loss": 0.7224, "step": 843 }, { "epoch": 0.06386440164957814, "grad_norm": 3.542694568634033, "learning_rate": 1.799056429642735e-05, "loss": 0.7901, "step": 844 }, { "epoch": 0.06394007037191177, "grad_norm": 3.038499116897583, "learning_rate": 1.7990460910498806e-05, "loss": 0.79, "step": 845 }, { "epoch": 0.06401573909424539, "grad_norm": 3.5024659633636475, "learning_rate": 1.7990356961558523e-05, "loss": 0.9269, "step": 846 }, { "epoch": 0.06409140781657902, "grad_norm": 4.1338067054748535, "learning_rate": 1.7990252449613008e-05, "loss": 0.9418, "step": 847 }, { "epoch": 0.06416707653891264, "grad_norm": 3.374940872192383, "learning_rate": 1.7990147374668806e-05, "loss": 0.9184, "step": 848 }, { "epoch": 0.06424274526124626, "grad_norm": 3.2009170055389404, "learning_rate": 1.7990041736732497e-05, "loss": 0.7091, "step": 849 }, { "epoch": 0.06431841398357989, "grad_norm": 4.257187366485596, "learning_rate": 1.79899355358107e-05, "loss": 0.958, "step": 850 }, { "epoch": 0.06439408270591351, "grad_norm": 3.449984550476074, "learning_rate": 1.798982877191006e-05, "loss": 0.7824, "step": 851 }, { "epoch": 0.06446975142824714, "grad_norm": 4.163568019866943, "learning_rate": 1.798972144503727e-05, "loss": 0.8174, "step": 852 }, { "epoch": 0.06454542015058076, "grad_norm": 4.116754531860352, "learning_rate": 1.7989613555199045e-05, "loss": 0.69, "step": 853 }, { "epoch": 0.06462108887291439, "grad_norm": 4.340511322021484, "learning_rate": 1.798950510240214e-05, "loss": 0.8673, "step": 854 }, { "epoch": 0.06469675759524801, "grad_norm": 4.204843521118164, "learning_rate": 1.798939608665335e-05, "loss": 0.7685, "step": 855 }, { "epoch": 0.06477242631758162, "grad_norm": 5.48193359375, "learning_rate": 1.7989286507959505e-05, "loss": 0.8221, "step": 856 }, { "epoch": 0.06484809503991525, "grad_norm": 3.1936569213867188, "learning_rate": 1.7989176366327463e-05, "loss": 0.8692, "step": 857 }, { "epoch": 0.06492376376224887, "grad_norm": 3.160611867904663, "learning_rate": 1.7989065661764122e-05, "loss": 0.7909, "step": 858 }, { "epoch": 0.0649994324845825, "grad_norm": 2.972747564315796, "learning_rate": 1.7988954394276416e-05, "loss": 0.9906, "step": 859 }, { "epoch": 0.06507510120691612, "grad_norm": 3.2030298709869385, "learning_rate": 1.798884256387131e-05, "loss": 0.8621, "step": 860 }, { "epoch": 0.06515076992924974, "grad_norm": 3.12058162689209, "learning_rate": 1.7988730170555808e-05, "loss": 0.9119, "step": 861 }, { "epoch": 0.06522643865158337, "grad_norm": 3.048793077468872, "learning_rate": 1.7988617214336953e-05, "loss": 0.8322, "step": 862 }, { "epoch": 0.06530210737391699, "grad_norm": 3.0921437740325928, "learning_rate": 1.7988503695221814e-05, "loss": 0.8441, "step": 863 }, { "epoch": 0.06537777609625062, "grad_norm": 2.826828718185425, "learning_rate": 1.7988389613217504e-05, "loss": 0.9022, "step": 864 }, { "epoch": 0.06545344481858424, "grad_norm": 2.6267917156219482, "learning_rate": 1.798827496833116e-05, "loss": 0.8584, "step": 865 }, { "epoch": 0.06552911354091787, "grad_norm": 2.9729437828063965, "learning_rate": 1.7988159760569968e-05, "loss": 0.9119, "step": 866 }, { "epoch": 0.06560478226325149, "grad_norm": 2.4244964122772217, "learning_rate": 1.798804398994114e-05, "loss": 0.7932, "step": 867 }, { "epoch": 0.06568045098558511, "grad_norm": 4.761054515838623, "learning_rate": 1.7987927656451928e-05, "loss": 0.9412, "step": 868 }, { "epoch": 0.06575611970791873, "grad_norm": 2.717557191848755, "learning_rate": 1.7987810760109615e-05, "loss": 0.7506, "step": 869 }, { "epoch": 0.06583178843025235, "grad_norm": 3.001830577850342, "learning_rate": 1.798769330092152e-05, "loss": 0.8709, "step": 870 }, { "epoch": 0.06590745715258597, "grad_norm": 3.638742208480835, "learning_rate": 1.7987575278895005e-05, "loss": 0.7777, "step": 871 }, { "epoch": 0.0659831258749196, "grad_norm": 2.8013672828674316, "learning_rate": 1.798745669403745e-05, "loss": 0.7571, "step": 872 }, { "epoch": 0.06605879459725322, "grad_norm": 2.839331865310669, "learning_rate": 1.7987337546356293e-05, "loss": 0.7515, "step": 873 }, { "epoch": 0.06613446331958685, "grad_norm": 3.6502885818481445, "learning_rate": 1.798721783585899e-05, "loss": 0.7892, "step": 874 }, { "epoch": 0.06621013204192047, "grad_norm": 2.118971347808838, "learning_rate": 1.7987097562553037e-05, "loss": 0.9736, "step": 875 }, { "epoch": 0.0662858007642541, "grad_norm": 2.8734261989593506, "learning_rate": 1.7986976726445966e-05, "loss": 0.783, "step": 876 }, { "epoch": 0.06636146948658772, "grad_norm": 2.609933376312256, "learning_rate": 1.7986855327545346e-05, "loss": 0.7125, "step": 877 }, { "epoch": 0.06643713820892135, "grad_norm": 3.158010721206665, "learning_rate": 1.798673336585878e-05, "loss": 0.7234, "step": 878 }, { "epoch": 0.06651280693125497, "grad_norm": 3.257824182510376, "learning_rate": 1.7986610841393902e-05, "loss": 0.9167, "step": 879 }, { "epoch": 0.0665884756535886, "grad_norm": 2.97019100189209, "learning_rate": 1.7986487754158386e-05, "loss": 0.7206, "step": 880 }, { "epoch": 0.06666414437592222, "grad_norm": 2.5462703704833984, "learning_rate": 1.7986364104159942e-05, "loss": 0.8476, "step": 881 }, { "epoch": 0.06673981309825583, "grad_norm": 3.024618625640869, "learning_rate": 1.7986239891406314e-05, "loss": 0.929, "step": 882 }, { "epoch": 0.06681548182058945, "grad_norm": 4.000933647155762, "learning_rate": 1.7986115115905276e-05, "loss": 0.8126, "step": 883 }, { "epoch": 0.06689115054292308, "grad_norm": 2.9372494220733643, "learning_rate": 1.798598977766465e-05, "loss": 0.8261, "step": 884 }, { "epoch": 0.0669668192652567, "grad_norm": 2.813204526901245, "learning_rate": 1.7985863876692276e-05, "loss": 0.8327, "step": 885 }, { "epoch": 0.06704248798759033, "grad_norm": 3.385720729827881, "learning_rate": 1.798573741299604e-05, "loss": 0.7472, "step": 886 }, { "epoch": 0.06711815670992395, "grad_norm": 2.617894172668457, "learning_rate": 1.798561038658387e-05, "loss": 0.781, "step": 887 }, { "epoch": 0.06719382543225758, "grad_norm": 3.153611660003662, "learning_rate": 1.798548279746371e-05, "loss": 0.7154, "step": 888 }, { "epoch": 0.0672694941545912, "grad_norm": 2.9759254455566406, "learning_rate": 1.7985354645643556e-05, "loss": 0.7758, "step": 889 }, { "epoch": 0.06734516287692482, "grad_norm": 3.233285665512085, "learning_rate": 1.798522593113143e-05, "loss": 0.8326, "step": 890 }, { "epoch": 0.06742083159925845, "grad_norm": 3.2557930946350098, "learning_rate": 1.7985096653935396e-05, "loss": 0.7994, "step": 891 }, { "epoch": 0.06749650032159207, "grad_norm": 3.4396860599517822, "learning_rate": 1.7984966814063547e-05, "loss": 0.8146, "step": 892 }, { "epoch": 0.0675721690439257, "grad_norm": 2.9307057857513428, "learning_rate": 1.7984836411524018e-05, "loss": 0.8404, "step": 893 }, { "epoch": 0.06764783776625932, "grad_norm": 3.1052684783935547, "learning_rate": 1.798470544632497e-05, "loss": 0.9605, "step": 894 }, { "epoch": 0.06772350648859295, "grad_norm": 3.313931465148926, "learning_rate": 1.798457391847461e-05, "loss": 0.7114, "step": 895 }, { "epoch": 0.06779917521092656, "grad_norm": 3.335641860961914, "learning_rate": 1.7984441827981166e-05, "loss": 0.9155, "step": 896 }, { "epoch": 0.06787484393326018, "grad_norm": 2.163098096847534, "learning_rate": 1.7984309174852918e-05, "loss": 0.807, "step": 897 }, { "epoch": 0.0679505126555938, "grad_norm": 3.4636337757110596, "learning_rate": 1.7984175959098172e-05, "loss": 0.7748, "step": 898 }, { "epoch": 0.06802618137792743, "grad_norm": 2.3630826473236084, "learning_rate": 1.798404218072527e-05, "loss": 0.7619, "step": 899 }, { "epoch": 0.06810185010026105, "grad_norm": 2.6266446113586426, "learning_rate": 1.7983907839742587e-05, "loss": 0.9418, "step": 900 }, { "epoch": 0.06817751882259468, "grad_norm": 2.760838747024536, "learning_rate": 1.798377293615854e-05, "loss": 0.836, "step": 901 }, { "epoch": 0.0682531875449283, "grad_norm": 3.108145236968994, "learning_rate": 1.798363746998157e-05, "loss": 0.7683, "step": 902 }, { "epoch": 0.06832885626726193, "grad_norm": 2.807042360305786, "learning_rate": 1.7983501441220168e-05, "loss": 0.9376, "step": 903 }, { "epoch": 0.06840452498959555, "grad_norm": 3.531285047531128, "learning_rate": 1.798336484988285e-05, "loss": 0.7328, "step": 904 }, { "epoch": 0.06848019371192918, "grad_norm": 3.469963788986206, "learning_rate": 1.7983227695978168e-05, "loss": 0.7034, "step": 905 }, { "epoch": 0.0685558624342628, "grad_norm": 3.210841417312622, "learning_rate": 1.798308997951471e-05, "loss": 0.6951, "step": 906 }, { "epoch": 0.06863153115659643, "grad_norm": 2.795273542404175, "learning_rate": 1.798295170050111e-05, "loss": 0.7853, "step": 907 }, { "epoch": 0.06870719987893005, "grad_norm": 4.241882801055908, "learning_rate": 1.7982812858946015e-05, "loss": 0.8056, "step": 908 }, { "epoch": 0.06878286860126366, "grad_norm": 2.5910651683807373, "learning_rate": 1.7982673454858125e-05, "loss": 0.6758, "step": 909 }, { "epoch": 0.06885853732359729, "grad_norm": 3.2898170948028564, "learning_rate": 1.798253348824617e-05, "loss": 0.8563, "step": 910 }, { "epoch": 0.06893420604593091, "grad_norm": 3.170915126800537, "learning_rate": 1.7982392959118914e-05, "loss": 0.9903, "step": 911 }, { "epoch": 0.06900987476826453, "grad_norm": 2.6784350872039795, "learning_rate": 1.7982251867485162e-05, "loss": 0.82, "step": 912 }, { "epoch": 0.06908554349059816, "grad_norm": 2.870120048522949, "learning_rate": 1.798211021335374e-05, "loss": 0.7205, "step": 913 }, { "epoch": 0.06916121221293178, "grad_norm": 3.627228260040283, "learning_rate": 1.798196799673353e-05, "loss": 0.7787, "step": 914 }, { "epoch": 0.06923688093526541, "grad_norm": 3.563584089279175, "learning_rate": 1.7981825217633433e-05, "loss": 0.9949, "step": 915 }, { "epoch": 0.06931254965759903, "grad_norm": 3.695765495300293, "learning_rate": 1.7981681876062388e-05, "loss": 0.673, "step": 916 }, { "epoch": 0.06938821837993266, "grad_norm": 3.3603649139404297, "learning_rate": 1.798153797202937e-05, "loss": 0.9694, "step": 917 }, { "epoch": 0.06946388710226628, "grad_norm": 3.820831537246704, "learning_rate": 1.7981393505543403e-05, "loss": 0.9224, "step": 918 }, { "epoch": 0.0695395558245999, "grad_norm": 3.589085102081299, "learning_rate": 1.798124847661352e-05, "loss": 0.8123, "step": 919 }, { "epoch": 0.06961522454693353, "grad_norm": 3.0185937881469727, "learning_rate": 1.798110288524881e-05, "loss": 0.8082, "step": 920 }, { "epoch": 0.06969089326926715, "grad_norm": 3.6897995471954346, "learning_rate": 1.7980956731458387e-05, "loss": 0.9175, "step": 921 }, { "epoch": 0.06976656199160078, "grad_norm": 3.113912582397461, "learning_rate": 1.7980810015251407e-05, "loss": 0.888, "step": 922 }, { "epoch": 0.06984223071393439, "grad_norm": 2.264333486557007, "learning_rate": 1.7980662736637054e-05, "loss": 0.6739, "step": 923 }, { "epoch": 0.06991789943626801, "grad_norm": 1.843481421470642, "learning_rate": 1.7980514895624558e-05, "loss": 1.0251, "step": 924 }, { "epoch": 0.06999356815860164, "grad_norm": 3.6482927799224854, "learning_rate": 1.798036649222317e-05, "loss": 0.7277, "step": 925 }, { "epoch": 0.07006923688093526, "grad_norm": 11.99400520324707, "learning_rate": 1.7980217526442186e-05, "loss": 0.9066, "step": 926 }, { "epoch": 0.07014490560326889, "grad_norm": 2.5644752979278564, "learning_rate": 1.7980067998290935e-05, "loss": 0.887, "step": 927 }, { "epoch": 0.07022057432560251, "grad_norm": 3.71718692779541, "learning_rate": 1.797991790777878e-05, "loss": 0.8685, "step": 928 }, { "epoch": 0.07029624304793614, "grad_norm": 2.8822622299194336, "learning_rate": 1.797976725491512e-05, "loss": 0.8336, "step": 929 }, { "epoch": 0.07037191177026976, "grad_norm": 2.9357829093933105, "learning_rate": 1.7979616039709396e-05, "loss": 0.8856, "step": 930 }, { "epoch": 0.07044758049260338, "grad_norm": 2.640735387802124, "learning_rate": 1.7979464262171067e-05, "loss": 0.7398, "step": 931 }, { "epoch": 0.07052324921493701, "grad_norm": 3.1476693153381348, "learning_rate": 1.7979311922309645e-05, "loss": 0.9748, "step": 932 }, { "epoch": 0.07059891793727063, "grad_norm": 2.6864423751831055, "learning_rate": 1.7979159020134668e-05, "loss": 0.7716, "step": 933 }, { "epoch": 0.07067458665960426, "grad_norm": 2.750220537185669, "learning_rate": 1.797900555565571e-05, "loss": 0.8742, "step": 934 }, { "epoch": 0.07075025538193788, "grad_norm": 2.5933568477630615, "learning_rate": 1.7978851528882382e-05, "loss": 0.833, "step": 935 }, { "epoch": 0.07082592410427149, "grad_norm": 2.8534131050109863, "learning_rate": 1.7978696939824333e-05, "loss": 0.8054, "step": 936 }, { "epoch": 0.07090159282660512, "grad_norm": 3.7665860652923584, "learning_rate": 1.7978541788491237e-05, "loss": 0.9409, "step": 937 }, { "epoch": 0.07097726154893874, "grad_norm": 2.939113140106201, "learning_rate": 1.7978386074892816e-05, "loss": 0.8041, "step": 938 }, { "epoch": 0.07105293027127237, "grad_norm": 3.101107597351074, "learning_rate": 1.7978229799038816e-05, "loss": 0.8247, "step": 939 }, { "epoch": 0.07112859899360599, "grad_norm": 2.7688238620758057, "learning_rate": 1.7978072960939034e-05, "loss": 0.8326, "step": 940 }, { "epoch": 0.07120426771593961, "grad_norm": 3.1420252323150635, "learning_rate": 1.797791556060328e-05, "loss": 0.8231, "step": 941 }, { "epoch": 0.07127993643827324, "grad_norm": 2.776109218597412, "learning_rate": 1.7977757598041417e-05, "loss": 0.7977, "step": 942 }, { "epoch": 0.07135560516060686, "grad_norm": 4.262285232543945, "learning_rate": 1.7977599073263335e-05, "loss": 0.7962, "step": 943 }, { "epoch": 0.07143127388294049, "grad_norm": 3.1178438663482666, "learning_rate": 1.7977439986278962e-05, "loss": 0.8491, "step": 944 }, { "epoch": 0.07150694260527411, "grad_norm": 3.3614895343780518, "learning_rate": 1.797728033709826e-05, "loss": 0.9089, "step": 945 }, { "epoch": 0.07158261132760774, "grad_norm": 2.6752171516418457, "learning_rate": 1.797712012573123e-05, "loss": 0.9587, "step": 946 }, { "epoch": 0.07165828004994136, "grad_norm": 3.405928373336792, "learning_rate": 1.79769593521879e-05, "loss": 0.8991, "step": 947 }, { "epoch": 0.07173394877227499, "grad_norm": 2.2228682041168213, "learning_rate": 1.7976798016478336e-05, "loss": 1.106, "step": 948 }, { "epoch": 0.07180961749460861, "grad_norm": 2.7371156215667725, "learning_rate": 1.797663611861265e-05, "loss": 0.8074, "step": 949 }, { "epoch": 0.07188528621694222, "grad_norm": 3.274010181427002, "learning_rate": 1.7976473658600977e-05, "loss": 0.8784, "step": 950 }, { "epoch": 0.07196095493927585, "grad_norm": 3.2630934715270996, "learning_rate": 1.797631063645349e-05, "loss": 0.929, "step": 951 }, { "epoch": 0.07203662366160947, "grad_norm": 3.075411796569824, "learning_rate": 1.7976147052180395e-05, "loss": 0.7251, "step": 952 }, { "epoch": 0.0721122923839431, "grad_norm": 2.965583324432373, "learning_rate": 1.797598290579194e-05, "loss": 0.7216, "step": 953 }, { "epoch": 0.07218796110627672, "grad_norm": 2.7841546535491943, "learning_rate": 1.797581819729841e-05, "loss": 0.8072, "step": 954 }, { "epoch": 0.07226362982861034, "grad_norm": 3.408371686935425, "learning_rate": 1.7975652926710108e-05, "loss": 0.7652, "step": 955 }, { "epoch": 0.07233929855094397, "grad_norm": 3.180001974105835, "learning_rate": 1.7975487094037386e-05, "loss": 0.9272, "step": 956 }, { "epoch": 0.07241496727327759, "grad_norm": 3.346219301223755, "learning_rate": 1.7975320699290637e-05, "loss": 0.9778, "step": 957 }, { "epoch": 0.07249063599561122, "grad_norm": 2.9968905448913574, "learning_rate": 1.7975153742480274e-05, "loss": 0.8965, "step": 958 }, { "epoch": 0.07256630471794484, "grad_norm": 3.1787264347076416, "learning_rate": 1.7974986223616754e-05, "loss": 0.7344, "step": 959 }, { "epoch": 0.07264197344027847, "grad_norm": 3.266357898712158, "learning_rate": 1.797481814271057e-05, "loss": 0.8381, "step": 960 }, { "epoch": 0.07271764216261209, "grad_norm": 3.33705472946167, "learning_rate": 1.7974649499772244e-05, "loss": 0.745, "step": 961 }, { "epoch": 0.07279331088494571, "grad_norm": 3.2236170768737793, "learning_rate": 1.797448029481234e-05, "loss": 0.7465, "step": 962 }, { "epoch": 0.07286897960727932, "grad_norm": 3.4352869987487793, "learning_rate": 1.797431052784145e-05, "loss": 1.004, "step": 963 }, { "epoch": 0.07294464832961295, "grad_norm": 3.1209468841552734, "learning_rate": 1.797414019887021e-05, "loss": 0.9475, "step": 964 }, { "epoch": 0.07302031705194657, "grad_norm": 7.6214823722839355, "learning_rate": 1.7973969307909286e-05, "loss": 0.8257, "step": 965 }, { "epoch": 0.0730959857742802, "grad_norm": 3.500762939453125, "learning_rate": 1.797379785496938e-05, "loss": 0.8723, "step": 966 }, { "epoch": 0.07317165449661382, "grad_norm": 3.0872161388397217, "learning_rate": 1.7973625840061224e-05, "loss": 0.8551, "step": 967 }, { "epoch": 0.07324732321894745, "grad_norm": 3.6307787895202637, "learning_rate": 1.7973453263195595e-05, "loss": 0.8331, "step": 968 }, { "epoch": 0.07332299194128107, "grad_norm": 3.153038501739502, "learning_rate": 1.79732801243833e-05, "loss": 0.8229, "step": 969 }, { "epoch": 0.0733986606636147, "grad_norm": 1.7069755792617798, "learning_rate": 1.797310642363518e-05, "loss": 0.9515, "step": 970 }, { "epoch": 0.07347432938594832, "grad_norm": 2.6972126960754395, "learning_rate": 1.797293216096211e-05, "loss": 0.7397, "step": 971 }, { "epoch": 0.07354999810828194, "grad_norm": 2.9356179237365723, "learning_rate": 1.7972757336375012e-05, "loss": 0.8123, "step": 972 }, { "epoch": 0.07362566683061557, "grad_norm": 2.5552573204040527, "learning_rate": 1.7972581949884823e-05, "loss": 0.8397, "step": 973 }, { "epoch": 0.0737013355529492, "grad_norm": 2.462688684463501, "learning_rate": 1.7972406001502535e-05, "loss": 0.8085, "step": 974 }, { "epoch": 0.07377700427528282, "grad_norm": 2.716464042663574, "learning_rate": 1.797222949123916e-05, "loss": 1.0184, "step": 975 }, { "epoch": 0.07385267299761644, "grad_norm": 2.534637451171875, "learning_rate": 1.797205241910576e-05, "loss": 0.769, "step": 976 }, { "epoch": 0.07392834171995005, "grad_norm": 2.6971538066864014, "learning_rate": 1.797187478511341e-05, "loss": 0.8612, "step": 977 }, { "epoch": 0.07400401044228368, "grad_norm": 2.319307565689087, "learning_rate": 1.797169658927325e-05, "loss": 0.6711, "step": 978 }, { "epoch": 0.0740796791646173, "grad_norm": 3.083146333694458, "learning_rate": 1.7971517831596428e-05, "loss": 0.9988, "step": 979 }, { "epoch": 0.07415534788695093, "grad_norm": 3.323866367340088, "learning_rate": 1.7971338512094144e-05, "loss": 0.83, "step": 980 }, { "epoch": 0.07423101660928455, "grad_norm": 2.6332504749298096, "learning_rate": 1.7971158630777623e-05, "loss": 0.8075, "step": 981 }, { "epoch": 0.07430668533161817, "grad_norm": 3.7535693645477295, "learning_rate": 1.797097818765813e-05, "loss": 0.7579, "step": 982 }, { "epoch": 0.0743823540539518, "grad_norm": 3.424109697341919, "learning_rate": 1.797079718274697e-05, "loss": 0.9592, "step": 983 }, { "epoch": 0.07445802277628542, "grad_norm": 2.7965245246887207, "learning_rate": 1.797061561605548e-05, "loss": 0.8751, "step": 984 }, { "epoch": 0.07453369149861905, "grad_norm": 2.6444272994995117, "learning_rate": 1.7970433487595018e-05, "loss": 0.8987, "step": 985 }, { "epoch": 0.07460936022095267, "grad_norm": 2.68102765083313, "learning_rate": 1.7970250797377002e-05, "loss": 0.8993, "step": 986 }, { "epoch": 0.0746850289432863, "grad_norm": 2.6379127502441406, "learning_rate": 1.7970067545412865e-05, "loss": 0.7778, "step": 987 }, { "epoch": 0.07476069766561992, "grad_norm": 4.525475025177002, "learning_rate": 1.796988373171409e-05, "loss": 0.8909, "step": 988 }, { "epoch": 0.07483636638795355, "grad_norm": 2.7560689449310303, "learning_rate": 1.7969699356292177e-05, "loss": 0.8144, "step": 989 }, { "epoch": 0.07491203511028716, "grad_norm": 2.7288384437561035, "learning_rate": 1.7969514419158682e-05, "loss": 0.802, "step": 990 }, { "epoch": 0.07498770383262078, "grad_norm": 3.1117804050445557, "learning_rate": 1.7969328920325184e-05, "loss": 0.7979, "step": 991 }, { "epoch": 0.0750633725549544, "grad_norm": 3.190317392349243, "learning_rate": 1.79691428598033e-05, "loss": 0.773, "step": 992 }, { "epoch": 0.07513904127728803, "grad_norm": 4.079197883605957, "learning_rate": 1.7968956237604678e-05, "loss": 0.6914, "step": 993 }, { "epoch": 0.07521470999962165, "grad_norm": 2.5737321376800537, "learning_rate": 1.796876905374101e-05, "loss": 0.752, "step": 994 }, { "epoch": 0.07529037872195528, "grad_norm": 3.0443410873413086, "learning_rate": 1.796858130822401e-05, "loss": 0.8317, "step": 995 }, { "epoch": 0.0753660474442889, "grad_norm": 3.2446975708007812, "learning_rate": 1.7968393001065445e-05, "loss": 0.7763, "step": 996 }, { "epoch": 0.07544171616662253, "grad_norm": 3.4776625633239746, "learning_rate": 1.79682041322771e-05, "loss": 0.6775, "step": 997 }, { "epoch": 0.07551738488895615, "grad_norm": 2.343702554702759, "learning_rate": 1.796801470187081e-05, "loss": 0.9111, "step": 998 }, { "epoch": 0.07559305361128978, "grad_norm": 2.4391534328460693, "learning_rate": 1.7967824709858428e-05, "loss": 1.1345, "step": 999 }, { "epoch": 0.0756687223336234, "grad_norm": 2.1746954917907715, "learning_rate": 1.796763415625186e-05, "loss": 0.6839, "step": 1000 }, { "epoch": 0.07574439105595702, "grad_norm": 2.8759877681732178, "learning_rate": 1.7967443041063037e-05, "loss": 0.793, "step": 1001 }, { "epoch": 0.07582005977829065, "grad_norm": 2.7766287326812744, "learning_rate": 1.7967251364303927e-05, "loss": 0.9124, "step": 1002 }, { "epoch": 0.07589572850062427, "grad_norm": 3.971047878265381, "learning_rate": 1.796705912598653e-05, "loss": 0.9387, "step": 1003 }, { "epoch": 0.07597139722295788, "grad_norm": 3.4732584953308105, "learning_rate": 1.796686632612289e-05, "loss": 0.7853, "step": 1004 }, { "epoch": 0.07604706594529151, "grad_norm": 2.530043125152588, "learning_rate": 1.7966672964725074e-05, "loss": 0.7116, "step": 1005 }, { "epoch": 0.07612273466762513, "grad_norm": 3.753622531890869, "learning_rate": 1.79664790418052e-05, "loss": 0.7789, "step": 1006 }, { "epoch": 0.07619840338995876, "grad_norm": 2.8898422718048096, "learning_rate": 1.7966284557375405e-05, "loss": 0.8084, "step": 1007 }, { "epoch": 0.07627407211229238, "grad_norm": 3.0014569759368896, "learning_rate": 1.7966089511447872e-05, "loss": 0.9103, "step": 1008 }, { "epoch": 0.076349740834626, "grad_norm": 3.0454745292663574, "learning_rate": 1.7965893904034813e-05, "loss": 0.841, "step": 1009 }, { "epoch": 0.07642540955695963, "grad_norm": 6.323338031768799, "learning_rate": 1.7965697735148482e-05, "loss": 0.7438, "step": 1010 }, { "epoch": 0.07650107827929326, "grad_norm": 2.459744930267334, "learning_rate": 1.7965501004801158e-05, "loss": 0.7822, "step": 1011 }, { "epoch": 0.07657674700162688, "grad_norm": 2.981001138687134, "learning_rate": 1.796530371300516e-05, "loss": 0.9066, "step": 1012 }, { "epoch": 0.0766524157239605, "grad_norm": 2.747135639190674, "learning_rate": 1.7965105859772847e-05, "loss": 0.6591, "step": 1013 }, { "epoch": 0.07672808444629413, "grad_norm": 2.3893380165100098, "learning_rate": 1.796490744511661e-05, "loss": 0.7462, "step": 1014 }, { "epoch": 0.07680375316862775, "grad_norm": 3.2017297744750977, "learning_rate": 1.796470846904887e-05, "loss": 0.9729, "step": 1015 }, { "epoch": 0.07687942189096138, "grad_norm": 3.246903896331787, "learning_rate": 1.7964508931582095e-05, "loss": 0.6984, "step": 1016 }, { "epoch": 0.07695509061329499, "grad_norm": 2.975456953048706, "learning_rate": 1.7964308832728775e-05, "loss": 0.8159, "step": 1017 }, { "epoch": 0.07703075933562861, "grad_norm": 2.729341506958008, "learning_rate": 1.796410817250144e-05, "loss": 0.8509, "step": 1018 }, { "epoch": 0.07710642805796224, "grad_norm": 3.228543758392334, "learning_rate": 1.7963906950912657e-05, "loss": 0.9637, "step": 1019 }, { "epoch": 0.07718209678029586, "grad_norm": 2.6817281246185303, "learning_rate": 1.7963705167975032e-05, "loss": 0.9073, "step": 1020 }, { "epoch": 0.07725776550262949, "grad_norm": 2.673149585723877, "learning_rate": 1.7963502823701195e-05, "loss": 0.7813, "step": 1021 }, { "epoch": 0.07733343422496311, "grad_norm": 2.8436264991760254, "learning_rate": 1.7963299918103818e-05, "loss": 0.7875, "step": 1022 }, { "epoch": 0.07740910294729673, "grad_norm": 3.4467597007751465, "learning_rate": 1.796309645119561e-05, "loss": 0.7861, "step": 1023 }, { "epoch": 0.07748477166963036, "grad_norm": 4.416311740875244, "learning_rate": 1.7962892422989313e-05, "loss": 0.8662, "step": 1024 }, { "epoch": 0.07756044039196398, "grad_norm": 2.7716546058654785, "learning_rate": 1.79626878334977e-05, "loss": 0.6516, "step": 1025 }, { "epoch": 0.07763610911429761, "grad_norm": 2.379066228866577, "learning_rate": 1.796248268273359e-05, "loss": 0.7032, "step": 1026 }, { "epoch": 0.07771177783663123, "grad_norm": 3.2015442848205566, "learning_rate": 1.7962276970709827e-05, "loss": 0.8266, "step": 1027 }, { "epoch": 0.07778744655896486, "grad_norm": 2.301879405975342, "learning_rate": 1.796207069743929e-05, "loss": 0.8037, "step": 1028 }, { "epoch": 0.07786311528129848, "grad_norm": 2.7185168266296387, "learning_rate": 1.7961863862934897e-05, "loss": 0.8516, "step": 1029 }, { "epoch": 0.0779387840036321, "grad_norm": 3.952467679977417, "learning_rate": 1.796165646720961e-05, "loss": 0.8924, "step": 1030 }, { "epoch": 0.07801445272596572, "grad_norm": 2.7374305725097656, "learning_rate": 1.79614485102764e-05, "loss": 0.8953, "step": 1031 }, { "epoch": 0.07809012144829934, "grad_norm": 3.123100996017456, "learning_rate": 1.7961239992148306e-05, "loss": 0.9221, "step": 1032 }, { "epoch": 0.07816579017063296, "grad_norm": 2.811434507369995, "learning_rate": 1.7961030912838376e-05, "loss": 0.7309, "step": 1033 }, { "epoch": 0.07824145889296659, "grad_norm": 3.855139970779419, "learning_rate": 1.796082127235971e-05, "loss": 0.7561, "step": 1034 }, { "epoch": 0.07831712761530021, "grad_norm": 3.214775562286377, "learning_rate": 1.796061107072543e-05, "loss": 0.965, "step": 1035 }, { "epoch": 0.07839279633763384, "grad_norm": 2.650777578353882, "learning_rate": 1.7960400307948706e-05, "loss": 0.9342, "step": 1036 }, { "epoch": 0.07846846505996746, "grad_norm": 2.863734722137451, "learning_rate": 1.796018898404273e-05, "loss": 0.8635, "step": 1037 }, { "epoch": 0.07854413378230109, "grad_norm": 5.087371349334717, "learning_rate": 1.795997709902074e-05, "loss": 0.6543, "step": 1038 }, { "epoch": 0.07861980250463471, "grad_norm": 2.6036596298217773, "learning_rate": 1.7959764652896006e-05, "loss": 0.8956, "step": 1039 }, { "epoch": 0.07869547122696834, "grad_norm": 2.6661086082458496, "learning_rate": 1.7959551645681827e-05, "loss": 0.9456, "step": 1040 }, { "epoch": 0.07877113994930196, "grad_norm": 2.790140390396118, "learning_rate": 1.7959338077391547e-05, "loss": 0.8146, "step": 1041 }, { "epoch": 0.07884680867163558, "grad_norm": 3.7499725818634033, "learning_rate": 1.795912394803854e-05, "loss": 0.7346, "step": 1042 }, { "epoch": 0.07892247739396921, "grad_norm": 2.917370080947876, "learning_rate": 1.7958909257636214e-05, "loss": 0.8348, "step": 1043 }, { "epoch": 0.07899814611630282, "grad_norm": 2.5935680866241455, "learning_rate": 1.795869400619801e-05, "loss": 0.6081, "step": 1044 }, { "epoch": 0.07907381483863644, "grad_norm": 2.097604990005493, "learning_rate": 1.7958478193737412e-05, "loss": 0.7379, "step": 1045 }, { "epoch": 0.07914948356097007, "grad_norm": 2.9579460620880127, "learning_rate": 1.7958261820267936e-05, "loss": 0.9227, "step": 1046 }, { "epoch": 0.0792251522833037, "grad_norm": 4.755364418029785, "learning_rate": 1.7958044885803133e-05, "loss": 0.8909, "step": 1047 }, { "epoch": 0.07930082100563732, "grad_norm": 2.9622743129730225, "learning_rate": 1.7957827390356577e-05, "loss": 0.6475, "step": 1048 }, { "epoch": 0.07937648972797094, "grad_norm": 2.9405174255371094, "learning_rate": 1.7957609333941906e-05, "loss": 0.8427, "step": 1049 }, { "epoch": 0.07945215845030457, "grad_norm": 2.8209495544433594, "learning_rate": 1.795739071657276e-05, "loss": 0.7966, "step": 1050 }, { "epoch": 0.07952782717263819, "grad_norm": 2.4763989448547363, "learning_rate": 1.795717153826284e-05, "loss": 0.6992, "step": 1051 }, { "epoch": 0.07960349589497182, "grad_norm": 2.8910422325134277, "learning_rate": 1.7956951799025865e-05, "loss": 0.8601, "step": 1052 }, { "epoch": 0.07967916461730544, "grad_norm": 2.9164462089538574, "learning_rate": 1.7956731498875598e-05, "loss": 0.8017, "step": 1053 }, { "epoch": 0.07975483333963906, "grad_norm": 2.7864677906036377, "learning_rate": 1.7956510637825835e-05, "loss": 0.8465, "step": 1054 }, { "epoch": 0.07983050206197269, "grad_norm": 3.374191999435425, "learning_rate": 1.7956289215890405e-05, "loss": 0.8502, "step": 1055 }, { "epoch": 0.07990617078430631, "grad_norm": 4.065507411956787, "learning_rate": 1.795606723308318e-05, "loss": 0.8219, "step": 1056 }, { "epoch": 0.07998183950663992, "grad_norm": 3.0881083011627197, "learning_rate": 1.7955844689418055e-05, "loss": 0.8383, "step": 1057 }, { "epoch": 0.08005750822897355, "grad_norm": 2.8912618160247803, "learning_rate": 1.7955621584908968e-05, "loss": 1.0209, "step": 1058 }, { "epoch": 0.08013317695130717, "grad_norm": 2.972893714904785, "learning_rate": 1.7955397919569894e-05, "loss": 0.7862, "step": 1059 }, { "epoch": 0.0802088456736408, "grad_norm": 3.151890277862549, "learning_rate": 1.7955173693414835e-05, "loss": 0.9524, "step": 1060 }, { "epoch": 0.08028451439597442, "grad_norm": 3.1022751331329346, "learning_rate": 1.7954948906457836e-05, "loss": 0.9726, "step": 1061 }, { "epoch": 0.08036018311830805, "grad_norm": 3.058262825012207, "learning_rate": 1.7954723558712973e-05, "loss": 0.8667, "step": 1062 }, { "epoch": 0.08043585184064167, "grad_norm": 3.0045084953308105, "learning_rate": 1.7954497650194356e-05, "loss": 0.7895, "step": 1063 }, { "epoch": 0.0805115205629753, "grad_norm": 2.1319167613983154, "learning_rate": 1.7954271180916137e-05, "loss": 0.6551, "step": 1064 }, { "epoch": 0.08058718928530892, "grad_norm": 4.05554723739624, "learning_rate": 1.795404415089249e-05, "loss": 1.0026, "step": 1065 }, { "epoch": 0.08066285800764254, "grad_norm": 2.6859283447265625, "learning_rate": 1.795381656013764e-05, "loss": 0.7522, "step": 1066 }, { "epoch": 0.08073852672997617, "grad_norm": 2.984954833984375, "learning_rate": 1.795358840866584e-05, "loss": 0.9283, "step": 1067 }, { "epoch": 0.08081419545230979, "grad_norm": 5.049993991851807, "learning_rate": 1.7953359696491368e-05, "loss": 0.883, "step": 1068 }, { "epoch": 0.08088986417464342, "grad_norm": 3.801880359649658, "learning_rate": 1.7953130423628558e-05, "loss": 0.8939, "step": 1069 }, { "epoch": 0.08096553289697704, "grad_norm": 4.104866981506348, "learning_rate": 1.795290059009176e-05, "loss": 0.725, "step": 1070 }, { "epoch": 0.08104120161931065, "grad_norm": 3.0002243518829346, "learning_rate": 1.7952670195895373e-05, "loss": 0.9259, "step": 1071 }, { "epoch": 0.08111687034164428, "grad_norm": 4.523970127105713, "learning_rate": 1.7952439241053818e-05, "loss": 0.8686, "step": 1072 }, { "epoch": 0.0811925390639779, "grad_norm": 4.293980598449707, "learning_rate": 1.7952207725581565e-05, "loss": 0.9891, "step": 1073 }, { "epoch": 0.08126820778631152, "grad_norm": 6.949241638183594, "learning_rate": 1.7951975649493112e-05, "loss": 0.7205, "step": 1074 }, { "epoch": 0.08134387650864515, "grad_norm": 2.5196433067321777, "learning_rate": 1.795174301280298e-05, "loss": 0.7382, "step": 1075 }, { "epoch": 0.08141954523097877, "grad_norm": 3.6562304496765137, "learning_rate": 1.7951509815525758e-05, "loss": 0.7558, "step": 1076 }, { "epoch": 0.0814952139533124, "grad_norm": 2.6832971572875977, "learning_rate": 1.7951276057676035e-05, "loss": 0.7999, "step": 1077 }, { "epoch": 0.08157088267564602, "grad_norm": 2.6821744441986084, "learning_rate": 1.795104173926845e-05, "loss": 0.7422, "step": 1078 }, { "epoch": 0.08164655139797965, "grad_norm": 3.0677764415740967, "learning_rate": 1.795080686031768e-05, "loss": 0.9085, "step": 1079 }, { "epoch": 0.08172222012031327, "grad_norm": 2.698085069656372, "learning_rate": 1.7950571420838438e-05, "loss": 0.8755, "step": 1080 }, { "epoch": 0.0817978888426469, "grad_norm": 2.651939630508423, "learning_rate": 1.7950335420845463e-05, "loss": 0.7144, "step": 1081 }, { "epoch": 0.08187355756498052, "grad_norm": 3.2069571018218994, "learning_rate": 1.7950098860353534e-05, "loss": 0.834, "step": 1082 }, { "epoch": 0.08194922628731414, "grad_norm": 2.9685559272766113, "learning_rate": 1.7949861739377464e-05, "loss": 0.9, "step": 1083 }, { "epoch": 0.08202489500964776, "grad_norm": 3.860081434249878, "learning_rate": 1.7949624057932108e-05, "loss": 0.8808, "step": 1084 }, { "epoch": 0.08210056373198138, "grad_norm": 3.6514697074890137, "learning_rate": 1.7949385816032348e-05, "loss": 0.8848, "step": 1085 }, { "epoch": 0.082176232454315, "grad_norm": 2.7582156658172607, "learning_rate": 1.79491470136931e-05, "loss": 0.8554, "step": 1086 }, { "epoch": 0.08225190117664863, "grad_norm": 3.2017011642456055, "learning_rate": 1.7948907650929322e-05, "loss": 0.9358, "step": 1087 }, { "epoch": 0.08232756989898225, "grad_norm": 2.749764919281006, "learning_rate": 1.7948667727756e-05, "loss": 0.7399, "step": 1088 }, { "epoch": 0.08240323862131588, "grad_norm": 3.237679958343506, "learning_rate": 1.7948427244188163e-05, "loss": 1.0332, "step": 1089 }, { "epoch": 0.0824789073436495, "grad_norm": 3.3725643157958984, "learning_rate": 1.794818620024087e-05, "loss": 0.8071, "step": 1090 }, { "epoch": 0.08255457606598313, "grad_norm": 3.030790090560913, "learning_rate": 1.7947944595929215e-05, "loss": 0.973, "step": 1091 }, { "epoch": 0.08263024478831675, "grad_norm": 3.141195058822632, "learning_rate": 1.794770243126833e-05, "loss": 0.8654, "step": 1092 }, { "epoch": 0.08270591351065038, "grad_norm": 2.7971293926239014, "learning_rate": 1.7947459706273376e-05, "loss": 0.9006, "step": 1093 }, { "epoch": 0.082781582232984, "grad_norm": 2.6309781074523926, "learning_rate": 1.7947216420959556e-05, "loss": 0.8958, "step": 1094 }, { "epoch": 0.08285725095531762, "grad_norm": 2.7773690223693848, "learning_rate": 1.7946972575342104e-05, "loss": 0.7116, "step": 1095 }, { "epoch": 0.08293291967765125, "grad_norm": 3.7063984870910645, "learning_rate": 1.7946728169436292e-05, "loss": 0.8178, "step": 1096 }, { "epoch": 0.08300858839998487, "grad_norm": 2.698293685913086, "learning_rate": 1.7946483203257426e-05, "loss": 0.7834, "step": 1097 }, { "epoch": 0.08308425712231848, "grad_norm": 3.520792245864868, "learning_rate": 1.7946237676820842e-05, "loss": 0.9203, "step": 1098 }, { "epoch": 0.08315992584465211, "grad_norm": 5.286828517913818, "learning_rate": 1.794599159014192e-05, "loss": 0.9913, "step": 1099 }, { "epoch": 0.08323559456698573, "grad_norm": 2.7940480709075928, "learning_rate": 1.7945744943236073e-05, "loss": 0.8903, "step": 1100 }, { "epoch": 0.08331126328931936, "grad_norm": 3.531196355819702, "learning_rate": 1.794549773611874e-05, "loss": 0.9732, "step": 1101 }, { "epoch": 0.08338693201165298, "grad_norm": 2.8770856857299805, "learning_rate": 1.7945249968805412e-05, "loss": 0.7176, "step": 1102 }, { "epoch": 0.0834626007339866, "grad_norm": 2.6823272705078125, "learning_rate": 1.794500164131159e-05, "loss": 0.7169, "step": 1103 }, { "epoch": 0.08353826945632023, "grad_norm": 14.177001953125, "learning_rate": 1.794475275365284e-05, "loss": 0.7207, "step": 1104 }, { "epoch": 0.08361393817865385, "grad_norm": 3.2320713996887207, "learning_rate": 1.7944503305844738e-05, "loss": 0.799, "step": 1105 }, { "epoch": 0.08368960690098748, "grad_norm": 3.453160524368286, "learning_rate": 1.794425329790291e-05, "loss": 0.6669, "step": 1106 }, { "epoch": 0.0837652756233211, "grad_norm": 2.6871984004974365, "learning_rate": 1.794400272984301e-05, "loss": 0.8554, "step": 1107 }, { "epoch": 0.08384094434565473, "grad_norm": 3.1583449840545654, "learning_rate": 1.7943751601680732e-05, "loss": 0.8331, "step": 1108 }, { "epoch": 0.08391661306798835, "grad_norm": 2.431509256362915, "learning_rate": 1.79434999134318e-05, "loss": 0.9261, "step": 1109 }, { "epoch": 0.08399228179032198, "grad_norm": 5.224951267242432, "learning_rate": 1.7943247665111978e-05, "loss": 0.8823, "step": 1110 }, { "epoch": 0.08406795051265559, "grad_norm": 3.4264092445373535, "learning_rate": 1.7942994856737063e-05, "loss": 0.7422, "step": 1111 }, { "epoch": 0.08414361923498921, "grad_norm": 3.036907196044922, "learning_rate": 1.7942741488322882e-05, "loss": 0.8846, "step": 1112 }, { "epoch": 0.08421928795732284, "grad_norm": 2.8463785648345947, "learning_rate": 1.7942487559885306e-05, "loss": 0.8043, "step": 1113 }, { "epoch": 0.08429495667965646, "grad_norm": 3.0611727237701416, "learning_rate": 1.7942233071440235e-05, "loss": 0.7897, "step": 1114 }, { "epoch": 0.08437062540199008, "grad_norm": 3.0145342350006104, "learning_rate": 1.7941978023003604e-05, "loss": 0.8423, "step": 1115 }, { "epoch": 0.08444629412432371, "grad_norm": 2.9739110469818115, "learning_rate": 1.794172241459139e-05, "loss": 0.9366, "step": 1116 }, { "epoch": 0.08452196284665733, "grad_norm": 3.091020345687866, "learning_rate": 1.7941466246219597e-05, "loss": 0.7762, "step": 1117 }, { "epoch": 0.08459763156899096, "grad_norm": 2.4491589069366455, "learning_rate": 1.7941209517904267e-05, "loss": 0.791, "step": 1118 }, { "epoch": 0.08467330029132458, "grad_norm": 3.116558313369751, "learning_rate": 1.794095222966148e-05, "loss": 0.8275, "step": 1119 }, { "epoch": 0.0847489690136582, "grad_norm": 3.4584758281707764, "learning_rate": 1.7940694381507345e-05, "loss": 0.9236, "step": 1120 }, { "epoch": 0.08482463773599183, "grad_norm": 3.4700706005096436, "learning_rate": 1.794043597345801e-05, "loss": 0.9343, "step": 1121 }, { "epoch": 0.08490030645832546, "grad_norm": 2.679157257080078, "learning_rate": 1.7940177005529653e-05, "loss": 0.908, "step": 1122 }, { "epoch": 0.08497597518065908, "grad_norm": 4.534050941467285, "learning_rate": 1.7939917477738502e-05, "loss": 0.761, "step": 1123 }, { "epoch": 0.0850516439029927, "grad_norm": 3.0127124786376953, "learning_rate": 1.79396573901008e-05, "loss": 0.7555, "step": 1124 }, { "epoch": 0.08512731262532632, "grad_norm": 2.5386579036712646, "learning_rate": 1.793939674263284e-05, "loss": 0.8402, "step": 1125 }, { "epoch": 0.08520298134765994, "grad_norm": 3.1187853813171387, "learning_rate": 1.793913553535094e-05, "loss": 0.7755, "step": 1126 }, { "epoch": 0.08527865006999356, "grad_norm": 3.3859164714813232, "learning_rate": 1.793887376827146e-05, "loss": 0.7611, "step": 1127 }, { "epoch": 0.08535431879232719, "grad_norm": 2.509925127029419, "learning_rate": 1.7938611441410795e-05, "loss": 0.7936, "step": 1128 }, { "epoch": 0.08542998751466081, "grad_norm": 2.7846434116363525, "learning_rate": 1.793834855478537e-05, "loss": 0.6917, "step": 1129 }, { "epoch": 0.08550565623699444, "grad_norm": 3.471247673034668, "learning_rate": 1.7938085108411648e-05, "loss": 0.8178, "step": 1130 }, { "epoch": 0.08558132495932806, "grad_norm": 2.7567524909973145, "learning_rate": 1.7937821102306127e-05, "loss": 0.8146, "step": 1131 }, { "epoch": 0.08565699368166169, "grad_norm": 3.3898468017578125, "learning_rate": 1.793755653648534e-05, "loss": 0.7553, "step": 1132 }, { "epoch": 0.08573266240399531, "grad_norm": 7.726632595062256, "learning_rate": 1.7937291410965855e-05, "loss": 0.803, "step": 1133 }, { "epoch": 0.08580833112632894, "grad_norm": 3.307058811187744, "learning_rate": 1.7937025725764273e-05, "loss": 0.9423, "step": 1134 }, { "epoch": 0.08588399984866256, "grad_norm": 3.0027151107788086, "learning_rate": 1.793675948089724e-05, "loss": 0.8663, "step": 1135 }, { "epoch": 0.08595966857099618, "grad_norm": 2.042656898498535, "learning_rate": 1.793649267638142e-05, "loss": 1.0062, "step": 1136 }, { "epoch": 0.08603533729332981, "grad_norm": 4.205798625946045, "learning_rate": 1.7936225312233523e-05, "loss": 0.7688, "step": 1137 }, { "epoch": 0.08611100601566342, "grad_norm": 2.7063050270080566, "learning_rate": 1.7935957388470297e-05, "loss": 0.7372, "step": 1138 }, { "epoch": 0.08618667473799704, "grad_norm": 2.97853422164917, "learning_rate": 1.7935688905108513e-05, "loss": 0.8734, "step": 1139 }, { "epoch": 0.08626234346033067, "grad_norm": 2.819689989089966, "learning_rate": 1.793541986216499e-05, "loss": 0.8523, "step": 1140 }, { "epoch": 0.08633801218266429, "grad_norm": 3.4364449977874756, "learning_rate": 1.7935150259656575e-05, "loss": 0.9452, "step": 1141 }, { "epoch": 0.08641368090499792, "grad_norm": 3.2552027702331543, "learning_rate": 1.7934880097600153e-05, "loss": 0.955, "step": 1142 }, { "epoch": 0.08648934962733154, "grad_norm": 2.6701014041900635, "learning_rate": 1.7934609376012637e-05, "loss": 0.909, "step": 1143 }, { "epoch": 0.08656501834966517, "grad_norm": 3.5823423862457275, "learning_rate": 1.7934338094910986e-05, "loss": 0.7366, "step": 1144 }, { "epoch": 0.08664068707199879, "grad_norm": 4.914628505706787, "learning_rate": 1.7934066254312185e-05, "loss": 0.7957, "step": 1145 }, { "epoch": 0.08671635579433241, "grad_norm": 3.194420576095581, "learning_rate": 1.7933793854233258e-05, "loss": 0.8123, "step": 1146 }, { "epoch": 0.08679202451666604, "grad_norm": 3.094132900238037, "learning_rate": 1.7933520894691268e-05, "loss": 0.9446, "step": 1147 }, { "epoch": 0.08686769323899966, "grad_norm": 3.0485928058624268, "learning_rate": 1.7933247375703302e-05, "loss": 0.8139, "step": 1148 }, { "epoch": 0.08694336196133329, "grad_norm": 2.725642681121826, "learning_rate": 1.7932973297286493e-05, "loss": 0.8204, "step": 1149 }, { "epoch": 0.08701903068366691, "grad_norm": 2.9291388988494873, "learning_rate": 1.7932698659458002e-05, "loss": 0.7845, "step": 1150 }, { "epoch": 0.08709469940600054, "grad_norm": 3.9883525371551514, "learning_rate": 1.793242346223503e-05, "loss": 0.9038, "step": 1151 }, { "epoch": 0.08717036812833415, "grad_norm": 2.8218777179718018, "learning_rate": 1.7932147705634813e-05, "loss": 0.9964, "step": 1152 }, { "epoch": 0.08724603685066777, "grad_norm": 3.1680803298950195, "learning_rate": 1.7931871389674615e-05, "loss": 0.8961, "step": 1153 }, { "epoch": 0.0873217055730014, "grad_norm": 3.147364616394043, "learning_rate": 1.7931594514371738e-05, "loss": 0.8748, "step": 1154 }, { "epoch": 0.08739737429533502, "grad_norm": 2.9208531379699707, "learning_rate": 1.7931317079743526e-05, "loss": 0.8665, "step": 1155 }, { "epoch": 0.08747304301766864, "grad_norm": 3.3694779872894287, "learning_rate": 1.793103908580735e-05, "loss": 0.955, "step": 1156 }, { "epoch": 0.08754871174000227, "grad_norm": 2.678873300552368, "learning_rate": 1.793076053258062e-05, "loss": 0.7752, "step": 1157 }, { "epoch": 0.0876243804623359, "grad_norm": 2.5789995193481445, "learning_rate": 1.793048142008078e-05, "loss": 0.8021, "step": 1158 }, { "epoch": 0.08770004918466952, "grad_norm": 2.937720537185669, "learning_rate": 1.793020174832531e-05, "loss": 0.7064, "step": 1159 }, { "epoch": 0.08777571790700314, "grad_norm": 3.5758986473083496, "learning_rate": 1.7929921517331725e-05, "loss": 0.7418, "step": 1160 }, { "epoch": 0.08785138662933677, "grad_norm": 3.9661405086517334, "learning_rate": 1.792964072711757e-05, "loss": 0.9024, "step": 1161 }, { "epoch": 0.08792705535167039, "grad_norm": 2.9074490070343018, "learning_rate": 1.792935937770043e-05, "loss": 1.0127, "step": 1162 }, { "epoch": 0.08800272407400402, "grad_norm": 3.0278260707855225, "learning_rate": 1.7929077469097923e-05, "loss": 0.8291, "step": 1163 }, { "epoch": 0.08807839279633764, "grad_norm": 2.941364049911499, "learning_rate": 1.792879500132771e-05, "loss": 0.7886, "step": 1164 }, { "epoch": 0.08815406151867125, "grad_norm": 3.2918078899383545, "learning_rate": 1.7928511974407468e-05, "loss": 0.8103, "step": 1165 }, { "epoch": 0.08822973024100488, "grad_norm": 3.120277166366577, "learning_rate": 1.7928228388354932e-05, "loss": 0.8215, "step": 1166 }, { "epoch": 0.0883053989633385, "grad_norm": 3.4401562213897705, "learning_rate": 1.7927944243187857e-05, "loss": 0.8355, "step": 1167 }, { "epoch": 0.08838106768567212, "grad_norm": 3.9332945346832275, "learning_rate": 1.7927659538924037e-05, "loss": 0.966, "step": 1168 }, { "epoch": 0.08845673640800575, "grad_norm": 3.2182464599609375, "learning_rate": 1.7927374275581298e-05, "loss": 0.8184, "step": 1169 }, { "epoch": 0.08853240513033937, "grad_norm": 4.130304336547852, "learning_rate": 1.792708845317751e-05, "loss": 0.8465, "step": 1170 }, { "epoch": 0.088608073852673, "grad_norm": 3.9958245754241943, "learning_rate": 1.792680207173057e-05, "loss": 0.7846, "step": 1171 }, { "epoch": 0.08868374257500662, "grad_norm": 3.126950740814209, "learning_rate": 1.792651513125841e-05, "loss": 0.9448, "step": 1172 }, { "epoch": 0.08875941129734025, "grad_norm": 2.5483791828155518, "learning_rate": 1.7926227631779e-05, "loss": 0.818, "step": 1173 }, { "epoch": 0.08883508001967387, "grad_norm": 3.175549268722534, "learning_rate": 1.7925939573310348e-05, "loss": 0.8815, "step": 1174 }, { "epoch": 0.0889107487420075, "grad_norm": 3.0271804332733154, "learning_rate": 1.7925650955870484e-05, "loss": 0.8213, "step": 1175 }, { "epoch": 0.08898641746434112, "grad_norm": 4.855120658874512, "learning_rate": 1.7925361779477492e-05, "loss": 0.7978, "step": 1176 }, { "epoch": 0.08906208618667474, "grad_norm": 2.6035373210906982, "learning_rate": 1.792507204414948e-05, "loss": 0.7594, "step": 1177 }, { "epoch": 0.08913775490900837, "grad_norm": 2.7867379188537598, "learning_rate": 1.7924781749904583e-05, "loss": 0.9074, "step": 1178 }, { "epoch": 0.08921342363134198, "grad_norm": 2.4809141159057617, "learning_rate": 1.792449089676099e-05, "loss": 0.7016, "step": 1179 }, { "epoch": 0.0892890923536756, "grad_norm": 3.8243072032928467, "learning_rate": 1.7924199484736912e-05, "loss": 0.9437, "step": 1180 }, { "epoch": 0.08936476107600923, "grad_norm": 2.4355239868164062, "learning_rate": 1.7923907513850598e-05, "loss": 0.7985, "step": 1181 }, { "epoch": 0.08944042979834285, "grad_norm": 3.5210628509521484, "learning_rate": 1.792361498412033e-05, "loss": 0.9034, "step": 1182 }, { "epoch": 0.08951609852067648, "grad_norm": 3.205606460571289, "learning_rate": 1.7923321895564434e-05, "loss": 0.836, "step": 1183 }, { "epoch": 0.0895917672430101, "grad_norm": 2.4469003677368164, "learning_rate": 1.7923028248201254e-05, "loss": 0.8001, "step": 1184 }, { "epoch": 0.08966743596534373, "grad_norm": 3.101621150970459, "learning_rate": 1.792273404204919e-05, "loss": 0.9058, "step": 1185 }, { "epoch": 0.08974310468767735, "grad_norm": 2.8717565536499023, "learning_rate": 1.7922439277126656e-05, "loss": 0.8039, "step": 1186 }, { "epoch": 0.08981877341001097, "grad_norm": 2.7401671409606934, "learning_rate": 1.7922143953452117e-05, "loss": 0.8037, "step": 1187 }, { "epoch": 0.0898944421323446, "grad_norm": 3.1042513847351074, "learning_rate": 1.7921848071044065e-05, "loss": 0.8021, "step": 1188 }, { "epoch": 0.08997011085467822, "grad_norm": 3.410513162612915, "learning_rate": 1.7921551629921033e-05, "loss": 0.7713, "step": 1189 }, { "epoch": 0.09004577957701185, "grad_norm": 2.913480758666992, "learning_rate": 1.792125463010158e-05, "loss": 0.6799, "step": 1190 }, { "epoch": 0.09012144829934547, "grad_norm": 3.294175863265991, "learning_rate": 1.792095707160431e-05, "loss": 0.9056, "step": 1191 }, { "epoch": 0.09019711702167908, "grad_norm": 3.1891300678253174, "learning_rate": 1.792065895444785e-05, "loss": 0.8954, "step": 1192 }, { "epoch": 0.0902727857440127, "grad_norm": 2.7054443359375, "learning_rate": 1.7920360278650874e-05, "loss": 0.8146, "step": 1193 }, { "epoch": 0.09034845446634633, "grad_norm": 2.5543649196624756, "learning_rate": 1.7920061044232086e-05, "loss": 0.7524, "step": 1194 }, { "epoch": 0.09042412318867996, "grad_norm": 2.7355525493621826, "learning_rate": 1.7919761251210227e-05, "loss": 0.8009, "step": 1195 }, { "epoch": 0.09049979191101358, "grad_norm": 2.5173544883728027, "learning_rate": 1.7919460899604065e-05, "loss": 0.8508, "step": 1196 }, { "epoch": 0.0905754606333472, "grad_norm": 4.627716064453125, "learning_rate": 1.7919159989432417e-05, "loss": 0.7769, "step": 1197 }, { "epoch": 0.09065112935568083, "grad_norm": 3.5971720218658447, "learning_rate": 1.7918858520714118e-05, "loss": 0.7218, "step": 1198 }, { "epoch": 0.09072679807801445, "grad_norm": 3.731093168258667, "learning_rate": 1.791855649346805e-05, "loss": 0.9134, "step": 1199 }, { "epoch": 0.09080246680034808, "grad_norm": 3.367734670639038, "learning_rate": 1.7918253907713133e-05, "loss": 0.7829, "step": 1200 }, { "epoch": 0.0908781355226817, "grad_norm": 2.7320973873138428, "learning_rate": 1.791795076346831e-05, "loss": 0.8174, "step": 1201 }, { "epoch": 0.09095380424501533, "grad_norm": 2.977246046066284, "learning_rate": 1.7917647060752562e-05, "loss": 0.9902, "step": 1202 }, { "epoch": 0.09102947296734895, "grad_norm": 2.8450841903686523, "learning_rate": 1.7917342799584916e-05, "loss": 0.7602, "step": 1203 }, { "epoch": 0.09110514168968258, "grad_norm": 2.729694128036499, "learning_rate": 1.791703797998442e-05, "loss": 0.7567, "step": 1204 }, { "epoch": 0.0911808104120162, "grad_norm": 2.8043293952941895, "learning_rate": 1.7916732601970166e-05, "loss": 0.8673, "step": 1205 }, { "epoch": 0.09125647913434981, "grad_norm": 2.3596503734588623, "learning_rate": 1.7916426665561276e-05, "loss": 0.6582, "step": 1206 }, { "epoch": 0.09133214785668343, "grad_norm": 3.7093918323516846, "learning_rate": 1.791612017077691e-05, "loss": 0.8625, "step": 1207 }, { "epoch": 0.09140781657901706, "grad_norm": 3.144490957260132, "learning_rate": 1.791581311763626e-05, "loss": 0.7741, "step": 1208 }, { "epoch": 0.09148348530135068, "grad_norm": 2.924307346343994, "learning_rate": 1.7915505506158553e-05, "loss": 0.7959, "step": 1209 }, { "epoch": 0.09155915402368431, "grad_norm": 3.3152287006378174, "learning_rate": 1.7915197336363054e-05, "loss": 0.928, "step": 1210 }, { "epoch": 0.09163482274601793, "grad_norm": 3.4837348461151123, "learning_rate": 1.7914888608269068e-05, "loss": 0.8039, "step": 1211 }, { "epoch": 0.09171049146835156, "grad_norm": 3.198897361755371, "learning_rate": 1.791457932189592e-05, "loss": 0.8471, "step": 1212 }, { "epoch": 0.09178616019068518, "grad_norm": 2.604722023010254, "learning_rate": 1.791426947726298e-05, "loss": 0.7698, "step": 1213 }, { "epoch": 0.0918618289130188, "grad_norm": 5.606075286865234, "learning_rate": 1.7913959074389655e-05, "loss": 0.7748, "step": 1214 }, { "epoch": 0.09193749763535243, "grad_norm": 3.001539707183838, "learning_rate": 1.7913648113295382e-05, "loss": 0.8456, "step": 1215 }, { "epoch": 0.09201316635768605, "grad_norm": 2.6370272636413574, "learning_rate": 1.7913336593999634e-05, "loss": 0.8696, "step": 1216 }, { "epoch": 0.09208883508001968, "grad_norm": 3.1904518604278564, "learning_rate": 1.791302451652192e-05, "loss": 0.7573, "step": 1217 }, { "epoch": 0.0921645038023533, "grad_norm": 4.082500457763672, "learning_rate": 1.791271188088178e-05, "loss": 0.6788, "step": 1218 }, { "epoch": 0.09224017252468691, "grad_norm": 3.6102075576782227, "learning_rate": 1.7912398687098794e-05, "loss": 0.867, "step": 1219 }, { "epoch": 0.09231584124702054, "grad_norm": 2.7773969173431396, "learning_rate": 1.7912084935192577e-05, "loss": 0.7679, "step": 1220 }, { "epoch": 0.09239150996935416, "grad_norm": 2.8499839305877686, "learning_rate": 1.791177062518278e-05, "loss": 0.819, "step": 1221 }, { "epoch": 0.09246717869168779, "grad_norm": 2.839503765106201, "learning_rate": 1.7911455757089076e-05, "loss": 0.8847, "step": 1222 }, { "epoch": 0.09254284741402141, "grad_norm": 3.260828733444214, "learning_rate": 1.7911140330931193e-05, "loss": 0.8336, "step": 1223 }, { "epoch": 0.09261851613635504, "grad_norm": 2.5245110988616943, "learning_rate": 1.7910824346728882e-05, "loss": 1.0465, "step": 1224 }, { "epoch": 0.09269418485868866, "grad_norm": 3.2489395141601562, "learning_rate": 1.7910507804501925e-05, "loss": 0.7982, "step": 1225 }, { "epoch": 0.09276985358102229, "grad_norm": 2.756540060043335, "learning_rate": 1.7910190704270155e-05, "loss": 0.818, "step": 1226 }, { "epoch": 0.09284552230335591, "grad_norm": 2.708287477493286, "learning_rate": 1.7909873046053417e-05, "loss": 0.7818, "step": 1227 }, { "epoch": 0.09292119102568953, "grad_norm": 3.1949870586395264, "learning_rate": 1.7909554829871615e-05, "loss": 0.7934, "step": 1228 }, { "epoch": 0.09299685974802316, "grad_norm": 3.080357074737549, "learning_rate": 1.7909236055744675e-05, "loss": 0.7607, "step": 1229 }, { "epoch": 0.09307252847035678, "grad_norm": 3.3811452388763428, "learning_rate": 1.790891672369255e-05, "loss": 0.9556, "step": 1230 }, { "epoch": 0.09314819719269041, "grad_norm": 2.7672924995422363, "learning_rate": 1.790859683373525e-05, "loss": 0.7895, "step": 1231 }, { "epoch": 0.09322386591502403, "grad_norm": 2.8778274059295654, "learning_rate": 1.7908276385892802e-05, "loss": 0.7614, "step": 1232 }, { "epoch": 0.09329953463735764, "grad_norm": 3.135817289352417, "learning_rate": 1.7907955380185276e-05, "loss": 0.7486, "step": 1233 }, { "epoch": 0.09337520335969127, "grad_norm": 2.9601290225982666, "learning_rate": 1.790763381663277e-05, "loss": 0.7852, "step": 1234 }, { "epoch": 0.09345087208202489, "grad_norm": 2.9267449378967285, "learning_rate": 1.790731169525542e-05, "loss": 0.9109, "step": 1235 }, { "epoch": 0.09352654080435852, "grad_norm": 2.473426342010498, "learning_rate": 1.7906989016073405e-05, "loss": 0.8989, "step": 1236 }, { "epoch": 0.09360220952669214, "grad_norm": 3.5370841026306152, "learning_rate": 1.790666577910693e-05, "loss": 0.7684, "step": 1237 }, { "epoch": 0.09367787824902576, "grad_norm": 2.9519424438476562, "learning_rate": 1.7906341984376237e-05, "loss": 0.8645, "step": 1238 }, { "epoch": 0.09375354697135939, "grad_norm": 3.7128260135650635, "learning_rate": 1.79060176319016e-05, "loss": 0.903, "step": 1239 }, { "epoch": 0.09382921569369301, "grad_norm": 2.769270420074463, "learning_rate": 1.7905692721703332e-05, "loss": 0.7397, "step": 1240 }, { "epoch": 0.09390488441602664, "grad_norm": 2.524073600769043, "learning_rate": 1.7905367253801784e-05, "loss": 0.6321, "step": 1241 }, { "epoch": 0.09398055313836026, "grad_norm": 2.9674630165100098, "learning_rate": 1.7905041228217335e-05, "loss": 0.8209, "step": 1242 }, { "epoch": 0.09405622186069389, "grad_norm": 2.350693941116333, "learning_rate": 1.79047146449704e-05, "loss": 0.8143, "step": 1243 }, { "epoch": 0.09413189058302751, "grad_norm": 2.93047833442688, "learning_rate": 1.7904387504081435e-05, "loss": 0.8576, "step": 1244 }, { "epoch": 0.09420755930536114, "grad_norm": 2.9106175899505615, "learning_rate": 1.7904059805570923e-05, "loss": 0.7653, "step": 1245 }, { "epoch": 0.09428322802769475, "grad_norm": 2.6790316104888916, "learning_rate": 1.7903731549459388e-05, "loss": 0.8273, "step": 1246 }, { "epoch": 0.09435889675002837, "grad_norm": 2.455004930496216, "learning_rate": 1.7903402735767385e-05, "loss": 0.7973, "step": 1247 }, { "epoch": 0.094434565472362, "grad_norm": 3.4603185653686523, "learning_rate": 1.7903073364515504e-05, "loss": 0.7254, "step": 1248 }, { "epoch": 0.09451023419469562, "grad_norm": 3.0400404930114746, "learning_rate": 1.790274343572437e-05, "loss": 0.8121, "step": 1249 }, { "epoch": 0.09458590291702924, "grad_norm": 2.6459450721740723, "learning_rate": 1.7902412949414652e-05, "loss": 0.871, "step": 1250 }, { "epoch": 0.09466157163936287, "grad_norm": 3.258230686187744, "learning_rate": 1.790208190560704e-05, "loss": 0.8936, "step": 1251 }, { "epoch": 0.09473724036169649, "grad_norm": 3.0523667335510254, "learning_rate": 1.7901750304322267e-05, "loss": 0.8872, "step": 1252 }, { "epoch": 0.09481290908403012, "grad_norm": 3.323202610015869, "learning_rate": 1.79014181455811e-05, "loss": 0.9111, "step": 1253 }, { "epoch": 0.09488857780636374, "grad_norm": 2.76963210105896, "learning_rate": 1.7901085429404335e-05, "loss": 0.8893, "step": 1254 }, { "epoch": 0.09496424652869737, "grad_norm": 2.809248685836792, "learning_rate": 1.790075215581281e-05, "loss": 0.8977, "step": 1255 }, { "epoch": 0.09503991525103099, "grad_norm": 3.7128524780273438, "learning_rate": 1.79004183248274e-05, "loss": 0.846, "step": 1256 }, { "epoch": 0.09511558397336461, "grad_norm": 2.7078073024749756, "learning_rate": 1.7900083936469003e-05, "loss": 0.8137, "step": 1257 }, { "epoch": 0.09519125269569824, "grad_norm": 3.0570812225341797, "learning_rate": 1.7899748990758564e-05, "loss": 0.9453, "step": 1258 }, { "epoch": 0.09526692141803185, "grad_norm": 3.7988598346710205, "learning_rate": 1.789941348771706e-05, "loss": 0.6545, "step": 1259 }, { "epoch": 0.09534259014036547, "grad_norm": 2.1266133785247803, "learning_rate": 1.7899077427365496e-05, "loss": 0.7383, "step": 1260 }, { "epoch": 0.0954182588626991, "grad_norm": 2.4982497692108154, "learning_rate": 1.7898740809724925e-05, "loss": 0.6417, "step": 1261 }, { "epoch": 0.09549392758503272, "grad_norm": 3.788729190826416, "learning_rate": 1.789840363481642e-05, "loss": 0.7754, "step": 1262 }, { "epoch": 0.09556959630736635, "grad_norm": 2.876657009124756, "learning_rate": 1.78980659026611e-05, "loss": 0.8848, "step": 1263 }, { "epoch": 0.09564526502969997, "grad_norm": 2.669171094894409, "learning_rate": 1.789772761328011e-05, "loss": 0.7351, "step": 1264 }, { "epoch": 0.0957209337520336, "grad_norm": 2.671163558959961, "learning_rate": 1.7897388766694643e-05, "loss": 0.9625, "step": 1265 }, { "epoch": 0.09579660247436722, "grad_norm": 3.019002676010132, "learning_rate": 1.789704936292591e-05, "loss": 0.6518, "step": 1266 }, { "epoch": 0.09587227119670085, "grad_norm": 2.8676607608795166, "learning_rate": 1.789670940199517e-05, "loss": 0.8793, "step": 1267 }, { "epoch": 0.09594793991903447, "grad_norm": 2.384054660797119, "learning_rate": 1.789636888392371e-05, "loss": 0.8888, "step": 1268 }, { "epoch": 0.0960236086413681, "grad_norm": 4.045263290405273, "learning_rate": 1.789602780873286e-05, "loss": 0.8233, "step": 1269 }, { "epoch": 0.09609927736370172, "grad_norm": 2.7333438396453857, "learning_rate": 1.7895686176443973e-05, "loss": 0.8029, "step": 1270 }, { "epoch": 0.09617494608603534, "grad_norm": 2.8120603561401367, "learning_rate": 1.7895343987078446e-05, "loss": 0.9133, "step": 1271 }, { "epoch": 0.09625061480836897, "grad_norm": 2.5713160037994385, "learning_rate": 1.789500124065771e-05, "loss": 0.8908, "step": 1272 }, { "epoch": 0.09632628353070258, "grad_norm": 3.9488155841827393, "learning_rate": 1.7894657937203222e-05, "loss": 0.8737, "step": 1273 }, { "epoch": 0.0964019522530362, "grad_norm": 4.038736820220947, "learning_rate": 1.7894314076736486e-05, "loss": 0.9211, "step": 1274 }, { "epoch": 0.09647762097536983, "grad_norm": 2.971062660217285, "learning_rate": 1.789396965927904e-05, "loss": 0.8733, "step": 1275 }, { "epoch": 0.09655328969770345, "grad_norm": 2.516186237335205, "learning_rate": 1.789362468485244e-05, "loss": 0.7466, "step": 1276 }, { "epoch": 0.09662895842003708, "grad_norm": 3.4188225269317627, "learning_rate": 1.78932791534783e-05, "loss": 0.9705, "step": 1277 }, { "epoch": 0.0967046271423707, "grad_norm": 2.534454822540283, "learning_rate": 1.7892933065178257e-05, "loss": 0.8904, "step": 1278 }, { "epoch": 0.09678029586470432, "grad_norm": 2.6995832920074463, "learning_rate": 1.789258641997398e-05, "loss": 0.6556, "step": 1279 }, { "epoch": 0.09685596458703795, "grad_norm": 3.069735050201416, "learning_rate": 1.789223921788718e-05, "loss": 0.9564, "step": 1280 }, { "epoch": 0.09693163330937157, "grad_norm": 4.048869609832764, "learning_rate": 1.7891891458939597e-05, "loss": 0.846, "step": 1281 }, { "epoch": 0.0970073020317052, "grad_norm": 2.709456443786621, "learning_rate": 1.7891543143153014e-05, "loss": 0.7941, "step": 1282 }, { "epoch": 0.09708297075403882, "grad_norm": 3.2407002449035645, "learning_rate": 1.7891194270549238e-05, "loss": 0.7715, "step": 1283 }, { "epoch": 0.09715863947637245, "grad_norm": 4.350454807281494, "learning_rate": 1.7890844841150122e-05, "loss": 0.8406, "step": 1284 }, { "epoch": 0.09723430819870607, "grad_norm": 3.1144919395446777, "learning_rate": 1.789049485497754e-05, "loss": 0.9677, "step": 1285 }, { "epoch": 0.09730997692103968, "grad_norm": 5.615466594696045, "learning_rate": 1.7890144312053423e-05, "loss": 0.8997, "step": 1286 }, { "epoch": 0.0973856456433733, "grad_norm": 3.7363674640655518, "learning_rate": 1.788979321239971e-05, "loss": 0.8649, "step": 1287 }, { "epoch": 0.09746131436570693, "grad_norm": 2.72586989402771, "learning_rate": 1.7889441556038394e-05, "loss": 0.8034, "step": 1288 }, { "epoch": 0.09753698308804055, "grad_norm": 3.393902540206909, "learning_rate": 1.7889089342991495e-05, "loss": 1.0079, "step": 1289 }, { "epoch": 0.09761265181037418, "grad_norm": 2.7981932163238525, "learning_rate": 1.7888736573281073e-05, "loss": 0.7422, "step": 1290 }, { "epoch": 0.0976883205327078, "grad_norm": 3.0770058631896973, "learning_rate": 1.7888383246929213e-05, "loss": 0.8174, "step": 1291 }, { "epoch": 0.09776398925504143, "grad_norm": 3.3533740043640137, "learning_rate": 1.7888029363958048e-05, "loss": 0.753, "step": 1292 }, { "epoch": 0.09783965797737505, "grad_norm": 3.7199461460113525, "learning_rate": 1.788767492438974e-05, "loss": 0.7974, "step": 1293 }, { "epoch": 0.09791532669970868, "grad_norm": 3.1728827953338623, "learning_rate": 1.788731992824648e-05, "loss": 0.6211, "step": 1294 }, { "epoch": 0.0979909954220423, "grad_norm": 3.2378153800964355, "learning_rate": 1.7886964375550497e-05, "loss": 0.8974, "step": 1295 }, { "epoch": 0.09806666414437593, "grad_norm": 2.729747772216797, "learning_rate": 1.7886608266324063e-05, "loss": 0.8367, "step": 1296 }, { "epoch": 0.09814233286670955, "grad_norm": 3.429668664932251, "learning_rate": 1.7886251600589478e-05, "loss": 0.8581, "step": 1297 }, { "epoch": 0.09821800158904317, "grad_norm": 2.603367567062378, "learning_rate": 1.7885894378369077e-05, "loss": 0.789, "step": 1298 }, { "epoch": 0.0982936703113768, "grad_norm": 3.1779685020446777, "learning_rate": 1.7885536599685227e-05, "loss": 0.8841, "step": 1299 }, { "epoch": 0.09836933903371041, "grad_norm": 2.518272638320923, "learning_rate": 1.7885178264560335e-05, "loss": 0.7745, "step": 1300 }, { "epoch": 0.09844500775604403, "grad_norm": 2.8336172103881836, "learning_rate": 1.7884819373016844e-05, "loss": 0.7445, "step": 1301 }, { "epoch": 0.09852067647837766, "grad_norm": 3.497591972351074, "learning_rate": 1.7884459925077227e-05, "loss": 0.7151, "step": 1302 }, { "epoch": 0.09859634520071128, "grad_norm": 1.6441137790679932, "learning_rate": 1.7884099920763995e-05, "loss": 0.9654, "step": 1303 }, { "epoch": 0.09867201392304491, "grad_norm": 2.5127291679382324, "learning_rate": 1.788373936009969e-05, "loss": 0.625, "step": 1304 }, { "epoch": 0.09874768264537853, "grad_norm": 2.6683413982391357, "learning_rate": 1.788337824310689e-05, "loss": 0.9659, "step": 1305 }, { "epoch": 0.09882335136771216, "grad_norm": 2.3852410316467285, "learning_rate": 1.7883016569808213e-05, "loss": 0.8527, "step": 1306 }, { "epoch": 0.09889902009004578, "grad_norm": 3.4505207538604736, "learning_rate": 1.788265434022631e-05, "loss": 0.797, "step": 1307 }, { "epoch": 0.0989746888123794, "grad_norm": 3.126436471939087, "learning_rate": 1.7882291554383862e-05, "loss": 0.8493, "step": 1308 }, { "epoch": 0.09905035753471303, "grad_norm": 2.6196725368499756, "learning_rate": 1.7881928212303586e-05, "loss": 0.8203, "step": 1309 }, { "epoch": 0.09912602625704665, "grad_norm": 3.3748865127563477, "learning_rate": 1.788156431400824e-05, "loss": 0.9412, "step": 1310 }, { "epoch": 0.09920169497938028, "grad_norm": 3.1734228134155273, "learning_rate": 1.788119985952061e-05, "loss": 0.8705, "step": 1311 }, { "epoch": 0.0992773637017139, "grad_norm": 2.6753692626953125, "learning_rate": 1.7880834848863517e-05, "loss": 0.7506, "step": 1312 }, { "epoch": 0.09935303242404751, "grad_norm": 2.6498630046844482, "learning_rate": 1.788046928205982e-05, "loss": 0.7981, "step": 1313 }, { "epoch": 0.09942870114638114, "grad_norm": 3.271476984024048, "learning_rate": 1.788010315913242e-05, "loss": 0.8675, "step": 1314 }, { "epoch": 0.09950436986871476, "grad_norm": 2.3523943424224854, "learning_rate": 1.7879736480104234e-05, "loss": 0.8247, "step": 1315 }, { "epoch": 0.09958003859104839, "grad_norm": 2.718277931213379, "learning_rate": 1.787936924499823e-05, "loss": 0.8838, "step": 1316 }, { "epoch": 0.09965570731338201, "grad_norm": 2.883328676223755, "learning_rate": 1.7879001453837406e-05, "loss": 0.8652, "step": 1317 }, { "epoch": 0.09973137603571564, "grad_norm": 4.095880508422852, "learning_rate": 1.787863310664479e-05, "loss": 0.8405, "step": 1318 }, { "epoch": 0.09980704475804926, "grad_norm": 3.1553285121917725, "learning_rate": 1.7878264203443453e-05, "loss": 0.8698, "step": 1319 }, { "epoch": 0.09988271348038288, "grad_norm": 3.0702428817749023, "learning_rate": 1.7877894744256494e-05, "loss": 0.8055, "step": 1320 }, { "epoch": 0.09995838220271651, "grad_norm": 3.2465262413024902, "learning_rate": 1.7877524729107054e-05, "loss": 0.8476, "step": 1321 }, { "epoch": 0.10003405092505013, "grad_norm": 2.630358934402466, "learning_rate": 1.7877154158018306e-05, "loss": 0.7447, "step": 1322 }, { "epoch": 0.10010971964738376, "grad_norm": 3.00877046585083, "learning_rate": 1.7876783031013445e-05, "loss": 0.7267, "step": 1323 }, { "epoch": 0.10018538836971738, "grad_norm": 3.247757911682129, "learning_rate": 1.7876411348115726e-05, "loss": 0.9357, "step": 1324 }, { "epoch": 0.100261057092051, "grad_norm": 2.484332323074341, "learning_rate": 1.7876039109348413e-05, "loss": 0.8055, "step": 1325 }, { "epoch": 0.10033672581438463, "grad_norm": 2.731320858001709, "learning_rate": 1.7875666314734823e-05, "loss": 0.7875, "step": 1326 }, { "epoch": 0.10041239453671824, "grad_norm": 2.6516387462615967, "learning_rate": 1.7875292964298306e-05, "loss": 0.8813, "step": 1327 }, { "epoch": 0.10048806325905187, "grad_norm": 3.460632085800171, "learning_rate": 1.7874919058062234e-05, "loss": 0.8705, "step": 1328 }, { "epoch": 0.10056373198138549, "grad_norm": 3.08870792388916, "learning_rate": 1.7874544596050024e-05, "loss": 0.9075, "step": 1329 }, { "epoch": 0.10063940070371911, "grad_norm": 2.5822556018829346, "learning_rate": 1.787416957828513e-05, "loss": 0.7953, "step": 1330 }, { "epoch": 0.10071506942605274, "grad_norm": 2.9101879596710205, "learning_rate": 1.7873794004791034e-05, "loss": 0.7639, "step": 1331 }, { "epoch": 0.10079073814838636, "grad_norm": 3.263343095779419, "learning_rate": 1.7873417875591257e-05, "loss": 0.789, "step": 1332 }, { "epoch": 0.10086640687071999, "grad_norm": 3.0501604080200195, "learning_rate": 1.7873041190709348e-05, "loss": 0.8689, "step": 1333 }, { "epoch": 0.10094207559305361, "grad_norm": 10.238188743591309, "learning_rate": 1.7872663950168907e-05, "loss": 0.8529, "step": 1334 }, { "epoch": 0.10101774431538724, "grad_norm": 2.7818658351898193, "learning_rate": 1.7872286153993548e-05, "loss": 0.883, "step": 1335 }, { "epoch": 0.10109341303772086, "grad_norm": 3.551182985305786, "learning_rate": 1.7871907802206934e-05, "loss": 0.7372, "step": 1336 }, { "epoch": 0.10116908176005449, "grad_norm": 3.187056541442871, "learning_rate": 1.7871528894832758e-05, "loss": 0.8921, "step": 1337 }, { "epoch": 0.10124475048238811, "grad_norm": 3.0303001403808594, "learning_rate": 1.7871149431894747e-05, "loss": 0.9956, "step": 1338 }, { "epoch": 0.10132041920472173, "grad_norm": 2.6252822875976562, "learning_rate": 1.787076941341667e-05, "loss": 0.6815, "step": 1339 }, { "epoch": 0.10139608792705535, "grad_norm": 2.8354198932647705, "learning_rate": 1.787038883942232e-05, "loss": 0.8485, "step": 1340 }, { "epoch": 0.10147175664938897, "grad_norm": 2.798238754272461, "learning_rate": 1.787000770993553e-05, "loss": 0.8172, "step": 1341 }, { "epoch": 0.1015474253717226, "grad_norm": 3.534174680709839, "learning_rate": 1.7869626024980167e-05, "loss": 0.7315, "step": 1342 }, { "epoch": 0.10162309409405622, "grad_norm": 2.5657639503479004, "learning_rate": 1.7869243784580133e-05, "loss": 0.6961, "step": 1343 }, { "epoch": 0.10169876281638984, "grad_norm": 3.29061222076416, "learning_rate": 1.7868860988759372e-05, "loss": 0.7886, "step": 1344 }, { "epoch": 0.10177443153872347, "grad_norm": 2.4909753799438477, "learning_rate": 1.7868477637541845e-05, "loss": 0.8066, "step": 1345 }, { "epoch": 0.10185010026105709, "grad_norm": 3.2499606609344482, "learning_rate": 1.7868093730951568e-05, "loss": 0.8099, "step": 1346 }, { "epoch": 0.10192576898339072, "grad_norm": 2.9574480056762695, "learning_rate": 1.7867709269012575e-05, "loss": 0.7153, "step": 1347 }, { "epoch": 0.10200143770572434, "grad_norm": 2.3955531120300293, "learning_rate": 1.786732425174895e-05, "loss": 1.0746, "step": 1348 }, { "epoch": 0.10207710642805796, "grad_norm": 2.7731571197509766, "learning_rate": 1.7866938679184797e-05, "loss": 0.8378, "step": 1349 }, { "epoch": 0.10215277515039159, "grad_norm": 2.637190818786621, "learning_rate": 1.7866552551344267e-05, "loss": 0.9882, "step": 1350 }, { "epoch": 0.10222844387272521, "grad_norm": 3.307260036468506, "learning_rate": 1.7866165868251535e-05, "loss": 0.8507, "step": 1351 }, { "epoch": 0.10230411259505884, "grad_norm": 3.339919090270996, "learning_rate": 1.786577862993082e-05, "loss": 0.8656, "step": 1352 }, { "epoch": 0.10237978131739246, "grad_norm": 3.161778211593628, "learning_rate": 1.7865390836406373e-05, "loss": 0.8829, "step": 1353 }, { "epoch": 0.10245545003972607, "grad_norm": 7.813992500305176, "learning_rate": 1.786500248770248e-05, "loss": 0.7528, "step": 1354 }, { "epoch": 0.1025311187620597, "grad_norm": 3.0543980598449707, "learning_rate": 1.7864613583843453e-05, "loss": 0.7212, "step": 1355 }, { "epoch": 0.10260678748439332, "grad_norm": 5.032805442810059, "learning_rate": 1.7864224124853656e-05, "loss": 0.772, "step": 1356 }, { "epoch": 0.10268245620672695, "grad_norm": 2.9874513149261475, "learning_rate": 1.7863834110757476e-05, "loss": 0.7245, "step": 1357 }, { "epoch": 0.10275812492906057, "grad_norm": 3.358482837677002, "learning_rate": 1.786344354157933e-05, "loss": 0.9143, "step": 1358 }, { "epoch": 0.1028337936513942, "grad_norm": 2.869976758956909, "learning_rate": 1.7863052417343684e-05, "loss": 0.7532, "step": 1359 }, { "epoch": 0.10290946237372782, "grad_norm": 3.7541520595550537, "learning_rate": 1.7862660738075028e-05, "loss": 0.8099, "step": 1360 }, { "epoch": 0.10298513109606144, "grad_norm": 2.752075433731079, "learning_rate": 1.7862268503797893e-05, "loss": 0.7319, "step": 1361 }, { "epoch": 0.10306079981839507, "grad_norm": 3.171396017074585, "learning_rate": 1.786187571453684e-05, "loss": 0.7254, "step": 1362 }, { "epoch": 0.10313646854072869, "grad_norm": 2.9250547885894775, "learning_rate": 1.7861482370316464e-05, "loss": 0.7121, "step": 1363 }, { "epoch": 0.10321213726306232, "grad_norm": 3.4453530311584473, "learning_rate": 1.78610884711614e-05, "loss": 0.8577, "step": 1364 }, { "epoch": 0.10328780598539594, "grad_norm": 3.4727487564086914, "learning_rate": 1.7860694017096323e-05, "loss": 0.7081, "step": 1365 }, { "epoch": 0.10336347470772957, "grad_norm": 2.7783381938934326, "learning_rate": 1.7860299008145922e-05, "loss": 0.8319, "step": 1366 }, { "epoch": 0.10343914343006318, "grad_norm": 2.9331109523773193, "learning_rate": 1.785990344433494e-05, "loss": 0.772, "step": 1367 }, { "epoch": 0.1035148121523968, "grad_norm": 2.4165093898773193, "learning_rate": 1.7859507325688146e-05, "loss": 0.8665, "step": 1368 }, { "epoch": 0.10359048087473043, "grad_norm": 3.596440553665161, "learning_rate": 1.7859110652230352e-05, "loss": 0.8207, "step": 1369 }, { "epoch": 0.10366614959706405, "grad_norm": 3.142336368560791, "learning_rate": 1.7858713423986392e-05, "loss": 0.8451, "step": 1370 }, { "epoch": 0.10374181831939767, "grad_norm": 3.1580209732055664, "learning_rate": 1.7858315640981147e-05, "loss": 0.8636, "step": 1371 }, { "epoch": 0.1038174870417313, "grad_norm": 2.814429998397827, "learning_rate": 1.7857917303239527e-05, "loss": 0.8564, "step": 1372 }, { "epoch": 0.10389315576406492, "grad_norm": 2.6497771739959717, "learning_rate": 1.7857518410786472e-05, "loss": 0.7462, "step": 1373 }, { "epoch": 0.10396882448639855, "grad_norm": 2.9846158027648926, "learning_rate": 1.7857118963646963e-05, "loss": 0.706, "step": 1374 }, { "epoch": 0.10404449320873217, "grad_norm": 3.386066436767578, "learning_rate": 1.785671896184602e-05, "loss": 0.7739, "step": 1375 }, { "epoch": 0.1041201619310658, "grad_norm": 4.174465179443359, "learning_rate": 1.7856318405408694e-05, "loss": 0.8237, "step": 1376 }, { "epoch": 0.10419583065339942, "grad_norm": 3.151991844177246, "learning_rate": 1.785591729436006e-05, "loss": 0.8827, "step": 1377 }, { "epoch": 0.10427149937573305, "grad_norm": 3.0265588760375977, "learning_rate": 1.785551562872524e-05, "loss": 0.7439, "step": 1378 }, { "epoch": 0.10434716809806667, "grad_norm": 2.958704710006714, "learning_rate": 1.7855113408529395e-05, "loss": 0.7563, "step": 1379 }, { "epoch": 0.1044228368204003, "grad_norm": 2.7380552291870117, "learning_rate": 1.7854710633797703e-05, "loss": 0.6439, "step": 1380 }, { "epoch": 0.1044985055427339, "grad_norm": 2.067690134048462, "learning_rate": 1.785430730455539e-05, "loss": 0.8905, "step": 1381 }, { "epoch": 0.10457417426506753, "grad_norm": 2.9177439212799072, "learning_rate": 1.785390342082772e-05, "loss": 0.9292, "step": 1382 }, { "epoch": 0.10464984298740115, "grad_norm": 3.5004894733428955, "learning_rate": 1.7853498982639977e-05, "loss": 0.7962, "step": 1383 }, { "epoch": 0.10472551170973478, "grad_norm": 2.974620819091797, "learning_rate": 1.7853093990017494e-05, "loss": 0.8304, "step": 1384 }, { "epoch": 0.1048011804320684, "grad_norm": 3.4205729961395264, "learning_rate": 1.785268844298563e-05, "loss": 0.8792, "step": 1385 }, { "epoch": 0.10487684915440203, "grad_norm": 3.7999086380004883, "learning_rate": 1.7852282341569782e-05, "loss": 0.9034, "step": 1386 }, { "epoch": 0.10495251787673565, "grad_norm": 3.3099162578582764, "learning_rate": 1.7851875685795383e-05, "loss": 0.9594, "step": 1387 }, { "epoch": 0.10502818659906928, "grad_norm": 3.3331620693206787, "learning_rate": 1.78514684756879e-05, "loss": 0.9426, "step": 1388 }, { "epoch": 0.1051038553214029, "grad_norm": 2.9932851791381836, "learning_rate": 1.7851060711272827e-05, "loss": 0.7063, "step": 1389 }, { "epoch": 0.10517952404373652, "grad_norm": 3.7370762825012207, "learning_rate": 1.7850652392575712e-05, "loss": 0.9669, "step": 1390 }, { "epoch": 0.10525519276607015, "grad_norm": 3.106455087661743, "learning_rate": 1.785024351962211e-05, "loss": 0.7777, "step": 1391 }, { "epoch": 0.10533086148840377, "grad_norm": 3.7146058082580566, "learning_rate": 1.784983409243764e-05, "loss": 0.9436, "step": 1392 }, { "epoch": 0.1054065302107374, "grad_norm": 2.6527533531188965, "learning_rate": 1.784942411104793e-05, "loss": 0.7476, "step": 1393 }, { "epoch": 0.10548219893307101, "grad_norm": 2.8467373847961426, "learning_rate": 1.7849013575478664e-05, "loss": 0.6969, "step": 1394 }, { "epoch": 0.10555786765540463, "grad_norm": 3.833505153656006, "learning_rate": 1.7848602485755542e-05, "loss": 0.8751, "step": 1395 }, { "epoch": 0.10563353637773826, "grad_norm": 3.4765021800994873, "learning_rate": 1.7848190841904314e-05, "loss": 0.7033, "step": 1396 }, { "epoch": 0.10570920510007188, "grad_norm": 2.6900827884674072, "learning_rate": 1.784777864395076e-05, "loss": 0.7621, "step": 1397 }, { "epoch": 0.1057848738224055, "grad_norm": 4.700630187988281, "learning_rate": 1.7847365891920688e-05, "loss": 0.9304, "step": 1398 }, { "epoch": 0.10586054254473913, "grad_norm": 3.0544216632843018, "learning_rate": 1.7846952585839946e-05, "loss": 0.8242, "step": 1399 }, { "epoch": 0.10593621126707276, "grad_norm": 2.659796714782715, "learning_rate": 1.784653872573442e-05, "loss": 0.8753, "step": 1400 }, { "epoch": 0.10601187998940638, "grad_norm": 2.934483289718628, "learning_rate": 1.784612431163003e-05, "loss": 0.7843, "step": 1401 }, { "epoch": 0.10608754871174, "grad_norm": 2.5334622859954834, "learning_rate": 1.784570934355272e-05, "loss": 0.8974, "step": 1402 }, { "epoch": 0.10616321743407363, "grad_norm": 1.8454688787460327, "learning_rate": 1.784529382152848e-05, "loss": 0.999, "step": 1403 }, { "epoch": 0.10623888615640725, "grad_norm": 3.1255669593811035, "learning_rate": 1.7844877745583333e-05, "loss": 0.8522, "step": 1404 }, { "epoch": 0.10631455487874088, "grad_norm": 2.9718644618988037, "learning_rate": 1.7844461115743334e-05, "loss": 0.8519, "step": 1405 }, { "epoch": 0.1063902236010745, "grad_norm": 2.7671189308166504, "learning_rate": 1.7844043932034572e-05, "loss": 0.9112, "step": 1406 }, { "epoch": 0.10646589232340813, "grad_norm": 3.579397678375244, "learning_rate": 1.7843626194483174e-05, "loss": 0.8709, "step": 1407 }, { "epoch": 0.10654156104574174, "grad_norm": 2.603875160217285, "learning_rate": 1.78432079031153e-05, "loss": 0.7613, "step": 1408 }, { "epoch": 0.10661722976807536, "grad_norm": 2.835737466812134, "learning_rate": 1.7842789057957146e-05, "loss": 0.821, "step": 1409 }, { "epoch": 0.10669289849040899, "grad_norm": 2.738487958908081, "learning_rate": 1.784236965903494e-05, "loss": 0.7394, "step": 1410 }, { "epoch": 0.10676856721274261, "grad_norm": 2.9330735206604004, "learning_rate": 1.7841949706374944e-05, "loss": 0.9316, "step": 1411 }, { "epoch": 0.10684423593507623, "grad_norm": 3.281982898712158, "learning_rate": 1.784152920000346e-05, "loss": 0.7128, "step": 1412 }, { "epoch": 0.10691990465740986, "grad_norm": 2.7696800231933594, "learning_rate": 1.7841108139946824e-05, "loss": 0.8896, "step": 1413 }, { "epoch": 0.10699557337974348, "grad_norm": 6.206698894500732, "learning_rate": 1.7840686526231394e-05, "loss": 0.8619, "step": 1414 }, { "epoch": 0.10707124210207711, "grad_norm": 2.468066453933716, "learning_rate": 1.7840264358883585e-05, "loss": 0.5857, "step": 1415 }, { "epoch": 0.10714691082441073, "grad_norm": 3.581237316131592, "learning_rate": 1.7839841637929827e-05, "loss": 0.8797, "step": 1416 }, { "epoch": 0.10722257954674436, "grad_norm": 2.6938111782073975, "learning_rate": 1.7839418363396596e-05, "loss": 0.6231, "step": 1417 }, { "epoch": 0.10729824826907798, "grad_norm": 3.101189613342285, "learning_rate": 1.7838994535310393e-05, "loss": 0.9891, "step": 1418 }, { "epoch": 0.1073739169914116, "grad_norm": 3.6879820823669434, "learning_rate": 1.7838570153697767e-05, "loss": 0.7532, "step": 1419 }, { "epoch": 0.10744958571374523, "grad_norm": 3.3243961334228516, "learning_rate": 1.783814521858529e-05, "loss": 0.7972, "step": 1420 }, { "epoch": 0.10752525443607884, "grad_norm": 3.053708076477051, "learning_rate": 1.783771972999957e-05, "loss": 0.8763, "step": 1421 }, { "epoch": 0.10760092315841246, "grad_norm": 3.4024465084075928, "learning_rate": 1.783729368796726e-05, "loss": 0.8386, "step": 1422 }, { "epoch": 0.10767659188074609, "grad_norm": 3.4709556102752686, "learning_rate": 1.7836867092515034e-05, "loss": 0.8942, "step": 1423 }, { "epoch": 0.10775226060307971, "grad_norm": 2.8416600227355957, "learning_rate": 1.783643994366961e-05, "loss": 0.9081, "step": 1424 }, { "epoch": 0.10782792932541334, "grad_norm": 2.391524076461792, "learning_rate": 1.7836012241457736e-05, "loss": 0.6704, "step": 1425 }, { "epoch": 0.10790359804774696, "grad_norm": 2.441361665725708, "learning_rate": 1.7835583985906197e-05, "loss": 0.6307, "step": 1426 }, { "epoch": 0.10797926677008059, "grad_norm": 2.4388327598571777, "learning_rate": 1.7835155177041807e-05, "loss": 0.7966, "step": 1427 }, { "epoch": 0.10805493549241421, "grad_norm": 2.3517658710479736, "learning_rate": 1.7834725814891427e-05, "loss": 0.7591, "step": 1428 }, { "epoch": 0.10813060421474784, "grad_norm": 2.327765941619873, "learning_rate": 1.783429589948194e-05, "loss": 0.8393, "step": 1429 }, { "epoch": 0.10820627293708146, "grad_norm": 2.1608386039733887, "learning_rate": 1.7833865430840273e-05, "loss": 0.6654, "step": 1430 }, { "epoch": 0.10828194165941508, "grad_norm": 2.7661025524139404, "learning_rate": 1.783343440899338e-05, "loss": 0.6717, "step": 1431 }, { "epoch": 0.10835761038174871, "grad_norm": 3.8667588233947754, "learning_rate": 1.783300283396825e-05, "loss": 0.8499, "step": 1432 }, { "epoch": 0.10843327910408233, "grad_norm": 2.459967851638794, "learning_rate": 1.7832570705791915e-05, "loss": 0.9147, "step": 1433 }, { "epoch": 0.10850894782641594, "grad_norm": 2.2012739181518555, "learning_rate": 1.7832138024491435e-05, "loss": 0.787, "step": 1434 }, { "epoch": 0.10858461654874957, "grad_norm": 2.780473232269287, "learning_rate": 1.7831704790093903e-05, "loss": 0.9463, "step": 1435 }, { "epoch": 0.10866028527108319, "grad_norm": 2.5738842487335205, "learning_rate": 1.7831271002626457e-05, "loss": 0.7366, "step": 1436 }, { "epoch": 0.10873595399341682, "grad_norm": 2.993759870529175, "learning_rate": 1.7830836662116253e-05, "loss": 0.7384, "step": 1437 }, { "epoch": 0.10881162271575044, "grad_norm": 2.661965847015381, "learning_rate": 1.7830401768590494e-05, "loss": 0.7393, "step": 1438 }, { "epoch": 0.10888729143808407, "grad_norm": 2.5119550228118896, "learning_rate": 1.782996632207642e-05, "loss": 0.8387, "step": 1439 }, { "epoch": 0.10896296016041769, "grad_norm": 2.4007089138031006, "learning_rate": 1.7829530322601288e-05, "loss": 0.8684, "step": 1440 }, { "epoch": 0.10903862888275132, "grad_norm": 2.434774398803711, "learning_rate": 1.7829093770192415e-05, "loss": 0.746, "step": 1441 }, { "epoch": 0.10911429760508494, "grad_norm": 3.004561185836792, "learning_rate": 1.782865666487713e-05, "loss": 0.7922, "step": 1442 }, { "epoch": 0.10918996632741856, "grad_norm": 2.905332565307617, "learning_rate": 1.7828219006682814e-05, "loss": 0.7966, "step": 1443 }, { "epoch": 0.10926563504975219, "grad_norm": 2.940967559814453, "learning_rate": 1.7827780795636866e-05, "loss": 0.8431, "step": 1444 }, { "epoch": 0.10934130377208581, "grad_norm": 2.504350185394287, "learning_rate": 1.782734203176673e-05, "loss": 0.938, "step": 1445 }, { "epoch": 0.10941697249441944, "grad_norm": 2.725872278213501, "learning_rate": 1.782690271509989e-05, "loss": 0.9283, "step": 1446 }, { "epoch": 0.10949264121675306, "grad_norm": 2.9516894817352295, "learning_rate": 1.7826462845663853e-05, "loss": 0.8293, "step": 1447 }, { "epoch": 0.10956830993908667, "grad_norm": 3.0764172077178955, "learning_rate": 1.782602242348616e-05, "loss": 0.9247, "step": 1448 }, { "epoch": 0.1096439786614203, "grad_norm": 3.033979892730713, "learning_rate": 1.7825581448594394e-05, "loss": 0.7406, "step": 1449 }, { "epoch": 0.10971964738375392, "grad_norm": 2.9168546199798584, "learning_rate": 1.782513992101618e-05, "loss": 0.8797, "step": 1450 }, { "epoch": 0.10979531610608755, "grad_norm": 2.965071201324463, "learning_rate": 1.782469784077915e-05, "loss": 0.7152, "step": 1451 }, { "epoch": 0.10987098482842117, "grad_norm": 2.7454051971435547, "learning_rate": 1.7824255207911008e-05, "loss": 0.8399, "step": 1452 }, { "epoch": 0.1099466535507548, "grad_norm": 3.45354962348938, "learning_rate": 1.782381202243946e-05, "loss": 0.8285, "step": 1453 }, { "epoch": 0.11002232227308842, "grad_norm": 2.291821002960205, "learning_rate": 1.7823368284392266e-05, "loss": 0.8612, "step": 1454 }, { "epoch": 0.11009799099542204, "grad_norm": 2.7993972301483154, "learning_rate": 1.782292399379721e-05, "loss": 0.7609, "step": 1455 }, { "epoch": 0.11017365971775567, "grad_norm": 2.7965731620788574, "learning_rate": 1.7822479150682113e-05, "loss": 0.8857, "step": 1456 }, { "epoch": 0.11024932844008929, "grad_norm": 2.9071121215820312, "learning_rate": 1.782203375507484e-05, "loss": 0.6945, "step": 1457 }, { "epoch": 0.11032499716242292, "grad_norm": 6.042922496795654, "learning_rate": 1.7821587807003278e-05, "loss": 0.7653, "step": 1458 }, { "epoch": 0.11040066588475654, "grad_norm": 2.6927385330200195, "learning_rate": 1.782114130649536e-05, "loss": 0.9095, "step": 1459 }, { "epoch": 0.11047633460709017, "grad_norm": 2.8487069606781006, "learning_rate": 1.7820694253579036e-05, "loss": 0.8508, "step": 1460 }, { "epoch": 0.11055200332942378, "grad_norm": 3.1788697242736816, "learning_rate": 1.782024664828231e-05, "loss": 0.7621, "step": 1461 }, { "epoch": 0.1106276720517574, "grad_norm": 3.724763870239258, "learning_rate": 1.7819798490633212e-05, "loss": 0.6952, "step": 1462 }, { "epoch": 0.11070334077409102, "grad_norm": 2.963629961013794, "learning_rate": 1.7819349780659806e-05, "loss": 0.7546, "step": 1463 }, { "epoch": 0.11077900949642465, "grad_norm": 2.979599952697754, "learning_rate": 1.781890051839019e-05, "loss": 0.8406, "step": 1464 }, { "epoch": 0.11085467821875827, "grad_norm": 2.474740505218506, "learning_rate": 1.78184507038525e-05, "loss": 0.7787, "step": 1465 }, { "epoch": 0.1109303469410919, "grad_norm": 2.5070388317108154, "learning_rate": 1.7818000337074906e-05, "loss": 0.7781, "step": 1466 }, { "epoch": 0.11100601566342552, "grad_norm": 2.9093334674835205, "learning_rate": 1.7817549418085607e-05, "loss": 0.7751, "step": 1467 }, { "epoch": 0.11108168438575915, "grad_norm": 2.6724863052368164, "learning_rate": 1.7817097946912847e-05, "loss": 0.8846, "step": 1468 }, { "epoch": 0.11115735310809277, "grad_norm": 2.9973912239074707, "learning_rate": 1.7816645923584896e-05, "loss": 0.701, "step": 1469 }, { "epoch": 0.1112330218304264, "grad_norm": 2.5031442642211914, "learning_rate": 1.781619334813006e-05, "loss": 0.7866, "step": 1470 }, { "epoch": 0.11130869055276002, "grad_norm": 2.907050609588623, "learning_rate": 1.781574022057668e-05, "loss": 0.7756, "step": 1471 }, { "epoch": 0.11138435927509364, "grad_norm": 2.397503137588501, "learning_rate": 1.7815286540953133e-05, "loss": 0.7306, "step": 1472 }, { "epoch": 0.11146002799742727, "grad_norm": 2.7645323276519775, "learning_rate": 1.7814832309287835e-05, "loss": 0.81, "step": 1473 }, { "epoch": 0.1115356967197609, "grad_norm": 2.5474066734313965, "learning_rate": 1.7814377525609223e-05, "loss": 1.0083, "step": 1474 }, { "epoch": 0.1116113654420945, "grad_norm": 3.7379724979400635, "learning_rate": 1.7813922189945782e-05, "loss": 0.8414, "step": 1475 }, { "epoch": 0.11168703416442813, "grad_norm": 2.205005645751953, "learning_rate": 1.7813466302326027e-05, "loss": 0.8559, "step": 1476 }, { "epoch": 0.11176270288676175, "grad_norm": 2.9247653484344482, "learning_rate": 1.7813009862778505e-05, "loss": 0.7688, "step": 1477 }, { "epoch": 0.11183837160909538, "grad_norm": 2.9259767532348633, "learning_rate": 1.7812552871331803e-05, "loss": 0.8447, "step": 1478 }, { "epoch": 0.111914040331429, "grad_norm": 2.8542733192443848, "learning_rate": 1.7812095328014533e-05, "loss": 0.7469, "step": 1479 }, { "epoch": 0.11198970905376263, "grad_norm": 2.260713577270508, "learning_rate": 1.7811637232855356e-05, "loss": 0.6106, "step": 1480 }, { "epoch": 0.11206537777609625, "grad_norm": 2.205512046813965, "learning_rate": 1.7811178585882952e-05, "loss": 0.8235, "step": 1481 }, { "epoch": 0.11214104649842987, "grad_norm": 2.5569989681243896, "learning_rate": 1.781071938712605e-05, "loss": 0.8225, "step": 1482 }, { "epoch": 0.1122167152207635, "grad_norm": 2.4361040592193604, "learning_rate": 1.7810259636613398e-05, "loss": 0.8132, "step": 1483 }, { "epoch": 0.11229238394309712, "grad_norm": 3.278949737548828, "learning_rate": 1.7809799334373792e-05, "loss": 0.909, "step": 1484 }, { "epoch": 0.11236805266543075, "grad_norm": 3.451547145843506, "learning_rate": 1.780933848043606e-05, "loss": 0.8415, "step": 1485 }, { "epoch": 0.11244372138776437, "grad_norm": 2.9717342853546143, "learning_rate": 1.7808877074829058e-05, "loss": 0.8156, "step": 1486 }, { "epoch": 0.112519390110098, "grad_norm": 2.911635398864746, "learning_rate": 1.7808415117581683e-05, "loss": 0.892, "step": 1487 }, { "epoch": 0.11259505883243161, "grad_norm": 2.9125287532806396, "learning_rate": 1.7807952608722862e-05, "loss": 0.8326, "step": 1488 }, { "epoch": 0.11267072755476523, "grad_norm": 2.8065741062164307, "learning_rate": 1.7807489548281562e-05, "loss": 0.888, "step": 1489 }, { "epoch": 0.11274639627709886, "grad_norm": 2.326284408569336, "learning_rate": 1.780702593628678e-05, "loss": 0.6024, "step": 1490 }, { "epoch": 0.11282206499943248, "grad_norm": 2.677926778793335, "learning_rate": 1.7806561772767548e-05, "loss": 0.7457, "step": 1491 }, { "epoch": 0.1128977337217661, "grad_norm": 2.430309534072876, "learning_rate": 1.7806097057752933e-05, "loss": 0.7384, "step": 1492 }, { "epoch": 0.11297340244409973, "grad_norm": 2.58219838142395, "learning_rate": 1.780563179127204e-05, "loss": 0.8198, "step": 1493 }, { "epoch": 0.11304907116643335, "grad_norm": 2.838965892791748, "learning_rate": 1.7805165973354e-05, "loss": 0.9538, "step": 1494 }, { "epoch": 0.11312473988876698, "grad_norm": 2.168802499771118, "learning_rate": 1.780469960402799e-05, "loss": 0.9857, "step": 1495 }, { "epoch": 0.1132004086111006, "grad_norm": 3.0226144790649414, "learning_rate": 1.7804232683323212e-05, "loss": 0.8795, "step": 1496 }, { "epoch": 0.11327607733343423, "grad_norm": 2.2193140983581543, "learning_rate": 1.7803765211268907e-05, "loss": 0.8259, "step": 1497 }, { "epoch": 0.11335174605576785, "grad_norm": 2.614348888397217, "learning_rate": 1.7803297187894352e-05, "loss": 0.7653, "step": 1498 }, { "epoch": 0.11342741477810148, "grad_norm": 2.413205862045288, "learning_rate": 1.780282861322885e-05, "loss": 0.8608, "step": 1499 }, { "epoch": 0.1135030835004351, "grad_norm": 2.941840648651123, "learning_rate": 1.780235948730175e-05, "loss": 0.7904, "step": 1500 }, { "epoch": 0.11357875222276873, "grad_norm": 2.9199554920196533, "learning_rate": 1.780188981014243e-05, "loss": 0.8542, "step": 1501 }, { "epoch": 0.11365442094510234, "grad_norm": 2.8440537452697754, "learning_rate": 1.7801419581780295e-05, "loss": 0.766, "step": 1502 }, { "epoch": 0.11373008966743596, "grad_norm": 2.193862199783325, "learning_rate": 1.7800948802244805e-05, "loss": 0.8432, "step": 1503 }, { "epoch": 0.11380575838976958, "grad_norm": 2.660568952560425, "learning_rate": 1.7800477471565435e-05, "loss": 0.8334, "step": 1504 }, { "epoch": 0.11388142711210321, "grad_norm": 2.565652847290039, "learning_rate": 1.78000055897717e-05, "loss": 0.7056, "step": 1505 }, { "epoch": 0.11395709583443683, "grad_norm": 2.698594808578491, "learning_rate": 1.7799533156893153e-05, "loss": 0.8236, "step": 1506 }, { "epoch": 0.11403276455677046, "grad_norm": 2.662174940109253, "learning_rate": 1.779906017295938e-05, "loss": 0.7694, "step": 1507 }, { "epoch": 0.11410843327910408, "grad_norm": 2.9940743446350098, "learning_rate": 1.7798586637999993e-05, "loss": 1.1411, "step": 1508 }, { "epoch": 0.1141841020014377, "grad_norm": 2.8996222019195557, "learning_rate": 1.7798112552044658e-05, "loss": 0.715, "step": 1509 }, { "epoch": 0.11425977072377133, "grad_norm": 2.917023181915283, "learning_rate": 1.7797637915123058e-05, "loss": 0.5476, "step": 1510 }, { "epoch": 0.11433543944610496, "grad_norm": 2.769496440887451, "learning_rate": 1.7797162727264917e-05, "loss": 0.8295, "step": 1511 }, { "epoch": 0.11441110816843858, "grad_norm": 2.2324085235595703, "learning_rate": 1.779668698849999e-05, "loss": 0.7346, "step": 1512 }, { "epoch": 0.1144867768907722, "grad_norm": 3.295725107192993, "learning_rate": 1.7796210698858077e-05, "loss": 0.7722, "step": 1513 }, { "epoch": 0.11456244561310583, "grad_norm": 2.2366225719451904, "learning_rate": 1.7795733858368992e-05, "loss": 0.7013, "step": 1514 }, { "epoch": 0.11463811433543944, "grad_norm": 3.1166555881500244, "learning_rate": 1.7795256467062612e-05, "loss": 0.8173, "step": 1515 }, { "epoch": 0.11471378305777306, "grad_norm": 2.2865703105926514, "learning_rate": 1.779477852496882e-05, "loss": 0.745, "step": 1516 }, { "epoch": 0.11478945178010669, "grad_norm": 2.9082911014556885, "learning_rate": 1.779430003211755e-05, "loss": 0.9206, "step": 1517 }, { "epoch": 0.11486512050244031, "grad_norm": 2.4655425548553467, "learning_rate": 1.779382098853877e-05, "loss": 0.7449, "step": 1518 }, { "epoch": 0.11494078922477394, "grad_norm": 3.6643152236938477, "learning_rate": 1.7793341394262476e-05, "loss": 0.8769, "step": 1519 }, { "epoch": 0.11501645794710756, "grad_norm": 2.4818716049194336, "learning_rate": 1.7792861249318704e-05, "loss": 0.7565, "step": 1520 }, { "epoch": 0.11509212666944119, "grad_norm": 2.0801405906677246, "learning_rate": 1.779238055373752e-05, "loss": 0.5753, "step": 1521 }, { "epoch": 0.11516779539177481, "grad_norm": 3.2640860080718994, "learning_rate": 1.779189930754903e-05, "loss": 0.6951, "step": 1522 }, { "epoch": 0.11524346411410843, "grad_norm": 3.1288063526153564, "learning_rate": 1.7791417510783368e-05, "loss": 0.8465, "step": 1523 }, { "epoch": 0.11531913283644206, "grad_norm": 3.1117849349975586, "learning_rate": 1.7790935163470706e-05, "loss": 0.685, "step": 1524 }, { "epoch": 0.11539480155877568, "grad_norm": 2.816326379776001, "learning_rate": 1.779045226564125e-05, "loss": 0.7518, "step": 1525 }, { "epoch": 0.11547047028110931, "grad_norm": 3.014407157897949, "learning_rate": 1.7789968817325242e-05, "loss": 0.8803, "step": 1526 }, { "epoch": 0.11554613900344293, "grad_norm": 3.064116954803467, "learning_rate": 1.7789484818552954e-05, "loss": 0.7059, "step": 1527 }, { "epoch": 0.11562180772577656, "grad_norm": 2.1914854049682617, "learning_rate": 1.77890002693547e-05, "loss": 0.7042, "step": 1528 }, { "epoch": 0.11569747644811017, "grad_norm": 3.057530403137207, "learning_rate": 1.7788515169760824e-05, "loss": 0.876, "step": 1529 }, { "epoch": 0.11577314517044379, "grad_norm": 2.713554859161377, "learning_rate": 1.7788029519801703e-05, "loss": 0.8374, "step": 1530 }, { "epoch": 0.11584881389277742, "grad_norm": 2.849468231201172, "learning_rate": 1.7787543319507743e-05, "loss": 0.924, "step": 1531 }, { "epoch": 0.11592448261511104, "grad_norm": 3.1437432765960693, "learning_rate": 1.7787056568909405e-05, "loss": 0.8471, "step": 1532 }, { "epoch": 0.11600015133744467, "grad_norm": 2.3561949729919434, "learning_rate": 1.778656926803716e-05, "loss": 0.8902, "step": 1533 }, { "epoch": 0.11607582005977829, "grad_norm": 1.9011698961257935, "learning_rate": 1.778608141692153e-05, "loss": 0.8698, "step": 1534 }, { "epoch": 0.11615148878211191, "grad_norm": 3.898846387863159, "learning_rate": 1.7785593015593066e-05, "loss": 0.7568, "step": 1535 }, { "epoch": 0.11622715750444554, "grad_norm": 3.060079574584961, "learning_rate": 1.7785104064082347e-05, "loss": 0.863, "step": 1536 }, { "epoch": 0.11630282622677916, "grad_norm": 3.5187714099884033, "learning_rate": 1.7784614562419998e-05, "loss": 0.8006, "step": 1537 }, { "epoch": 0.11637849494911279, "grad_norm": 2.9314115047454834, "learning_rate": 1.7784124510636672e-05, "loss": 0.9548, "step": 1538 }, { "epoch": 0.11645416367144641, "grad_norm": 2.3972244262695312, "learning_rate": 1.7783633908763062e-05, "loss": 0.6688, "step": 1539 }, { "epoch": 0.11652983239378004, "grad_norm": 2.985501766204834, "learning_rate": 1.7783142756829882e-05, "loss": 0.7211, "step": 1540 }, { "epoch": 0.11660550111611366, "grad_norm": 2.532233476638794, "learning_rate": 1.7782651054867895e-05, "loss": 0.8695, "step": 1541 }, { "epoch": 0.11668116983844727, "grad_norm": 3.1398353576660156, "learning_rate": 1.7782158802907893e-05, "loss": 0.796, "step": 1542 }, { "epoch": 0.1167568385607809, "grad_norm": 3.156766414642334, "learning_rate": 1.7781666000980705e-05, "loss": 0.8581, "step": 1543 }, { "epoch": 0.11683250728311452, "grad_norm": 3.025268793106079, "learning_rate": 1.7781172649117186e-05, "loss": 0.7749, "step": 1544 }, { "epoch": 0.11690817600544814, "grad_norm": 3.071802854537964, "learning_rate": 1.7780678747348236e-05, "loss": 0.7598, "step": 1545 }, { "epoch": 0.11698384472778177, "grad_norm": 4.598686218261719, "learning_rate": 1.7780184295704778e-05, "loss": 0.8049, "step": 1546 }, { "epoch": 0.1170595134501154, "grad_norm": 3.0059025287628174, "learning_rate": 1.7779689294217784e-05, "loss": 0.7546, "step": 1547 }, { "epoch": 0.11713518217244902, "grad_norm": 2.655482292175293, "learning_rate": 1.777919374291825e-05, "loss": 0.941, "step": 1548 }, { "epoch": 0.11721085089478264, "grad_norm": 2.7230942249298096, "learning_rate": 1.7778697641837208e-05, "loss": 0.8749, "step": 1549 }, { "epoch": 0.11728651961711627, "grad_norm": 3.8638458251953125, "learning_rate": 1.7778200991005724e-05, "loss": 0.7645, "step": 1550 }, { "epoch": 0.11736218833944989, "grad_norm": 2.8020644187927246, "learning_rate": 1.7777703790454906e-05, "loss": 0.6915, "step": 1551 }, { "epoch": 0.11743785706178352, "grad_norm": 4.486051559448242, "learning_rate": 1.777720604021588e-05, "loss": 0.7654, "step": 1552 }, { "epoch": 0.11751352578411714, "grad_norm": 3.036688804626465, "learning_rate": 1.7776707740319828e-05, "loss": 0.9693, "step": 1553 }, { "epoch": 0.11758919450645076, "grad_norm": 2.724858045578003, "learning_rate": 1.7776208890797947e-05, "loss": 0.6755, "step": 1554 }, { "epoch": 0.11766486322878439, "grad_norm": 3.0751144886016846, "learning_rate": 1.7775709491681482e-05, "loss": 0.9963, "step": 1555 }, { "epoch": 0.117740531951118, "grad_norm": 2.686180591583252, "learning_rate": 1.7775209543001703e-05, "loss": 0.8259, "step": 1556 }, { "epoch": 0.11781620067345162, "grad_norm": 2.430630683898926, "learning_rate": 1.777470904478992e-05, "loss": 0.8329, "step": 1557 }, { "epoch": 0.11789186939578525, "grad_norm": 2.6584362983703613, "learning_rate": 1.7774207997077477e-05, "loss": 0.8525, "step": 1558 }, { "epoch": 0.11796753811811887, "grad_norm": 2.8905189037323, "learning_rate": 1.777370639989575e-05, "loss": 0.8348, "step": 1559 }, { "epoch": 0.1180432068404525, "grad_norm": 2.841679334640503, "learning_rate": 1.777320425327615e-05, "loss": 0.8827, "step": 1560 }, { "epoch": 0.11811887556278612, "grad_norm": 2.7715628147125244, "learning_rate": 1.777270155725012e-05, "loss": 0.8298, "step": 1561 }, { "epoch": 0.11819454428511975, "grad_norm": 3.1917660236358643, "learning_rate": 1.777219831184915e-05, "loss": 0.8082, "step": 1562 }, { "epoch": 0.11827021300745337, "grad_norm": 3.6017658710479736, "learning_rate": 1.7771694517104746e-05, "loss": 0.7245, "step": 1563 }, { "epoch": 0.118345881729787, "grad_norm": 3.7225780487060547, "learning_rate": 1.777119017304846e-05, "loss": 0.7443, "step": 1564 }, { "epoch": 0.11842155045212062, "grad_norm": 3.468682289123535, "learning_rate": 1.7770685279711877e-05, "loss": 0.7181, "step": 1565 }, { "epoch": 0.11849721917445424, "grad_norm": 3.647542715072632, "learning_rate": 1.7770179837126613e-05, "loss": 0.7155, "step": 1566 }, { "epoch": 0.11857288789678787, "grad_norm": 3.232402801513672, "learning_rate": 1.7769673845324322e-05, "loss": 0.7418, "step": 1567 }, { "epoch": 0.11864855661912149, "grad_norm": 2.8265175819396973, "learning_rate": 1.776916730433669e-05, "loss": 0.7867, "step": 1568 }, { "epoch": 0.1187242253414551, "grad_norm": 2.74609637260437, "learning_rate": 1.7768660214195437e-05, "loss": 0.7622, "step": 1569 }, { "epoch": 0.11879989406378873, "grad_norm": 2.700554609298706, "learning_rate": 1.7768152574932323e-05, "loss": 0.9818, "step": 1570 }, { "epoch": 0.11887556278612235, "grad_norm": 2.617316722869873, "learning_rate": 1.776764438657913e-05, "loss": 0.8502, "step": 1571 }, { "epoch": 0.11895123150845598, "grad_norm": 2.603131055831909, "learning_rate": 1.7767135649167694e-05, "loss": 0.7249, "step": 1572 }, { "epoch": 0.1190269002307896, "grad_norm": 2.8606648445129395, "learning_rate": 1.7766626362729864e-05, "loss": 0.7766, "step": 1573 }, { "epoch": 0.11910256895312323, "grad_norm": 3.8220481872558594, "learning_rate": 1.776611652729754e-05, "loss": 0.7473, "step": 1574 }, { "epoch": 0.11917823767545685, "grad_norm": 2.6276204586029053, "learning_rate": 1.7765606142902642e-05, "loss": 0.8983, "step": 1575 }, { "epoch": 0.11925390639779047, "grad_norm": 1.8426728248596191, "learning_rate": 1.7765095209577137e-05, "loss": 1.0027, "step": 1576 }, { "epoch": 0.1193295751201241, "grad_norm": 3.0207700729370117, "learning_rate": 1.776458372735302e-05, "loss": 0.7059, "step": 1577 }, { "epoch": 0.11940524384245772, "grad_norm": 2.4392471313476562, "learning_rate": 1.776407169626232e-05, "loss": 0.6909, "step": 1578 }, { "epoch": 0.11948091256479135, "grad_norm": 3.044403076171875, "learning_rate": 1.7763559116337107e-05, "loss": 0.7622, "step": 1579 }, { "epoch": 0.11955658128712497, "grad_norm": 2.991702079772949, "learning_rate": 1.776304598760948e-05, "loss": 0.7587, "step": 1580 }, { "epoch": 0.1196322500094586, "grad_norm": 2.4249448776245117, "learning_rate": 1.7762532310111565e-05, "loss": 0.6753, "step": 1581 }, { "epoch": 0.11970791873179222, "grad_norm": 2.8478915691375732, "learning_rate": 1.7762018083875536e-05, "loss": 0.6938, "step": 1582 }, { "epoch": 0.11978358745412583, "grad_norm": 2.7412543296813965, "learning_rate": 1.7761503308933594e-05, "loss": 0.6663, "step": 1583 }, { "epoch": 0.11985925617645946, "grad_norm": 2.393305540084839, "learning_rate": 1.776098798531798e-05, "loss": 0.9015, "step": 1584 }, { "epoch": 0.11993492489879308, "grad_norm": 3.5225398540496826, "learning_rate": 1.776047211306096e-05, "loss": 0.7928, "step": 1585 }, { "epoch": 0.1200105936211267, "grad_norm": 2.1863973140716553, "learning_rate": 1.7759955692194843e-05, "loss": 0.8247, "step": 1586 }, { "epoch": 0.12008626234346033, "grad_norm": 2.749263048171997, "learning_rate": 1.7759438722751962e-05, "loss": 0.7481, "step": 1587 }, { "epoch": 0.12016193106579395, "grad_norm": 2.8146281242370605, "learning_rate": 1.7758921204764704e-05, "loss": 0.8196, "step": 1588 }, { "epoch": 0.12023759978812758, "grad_norm": 2.485360622406006, "learning_rate": 1.7758403138265465e-05, "loss": 0.7325, "step": 1589 }, { "epoch": 0.1203132685104612, "grad_norm": 2.7749016284942627, "learning_rate": 1.7757884523286697e-05, "loss": 0.9098, "step": 1590 }, { "epoch": 0.12038893723279483, "grad_norm": 2.742647647857666, "learning_rate": 1.775736535986087e-05, "loss": 0.8401, "step": 1591 }, { "epoch": 0.12046460595512845, "grad_norm": 2.826692581176758, "learning_rate": 1.7756845648020502e-05, "loss": 0.8449, "step": 1592 }, { "epoch": 0.12054027467746208, "grad_norm": 3.2634994983673096, "learning_rate": 1.7756325387798138e-05, "loss": 0.7922, "step": 1593 }, { "epoch": 0.1206159433997957, "grad_norm": 2.3694705963134766, "learning_rate": 1.7755804579226352e-05, "loss": 0.8471, "step": 1594 }, { "epoch": 0.12069161212212932, "grad_norm": 2.5997414588928223, "learning_rate": 1.775528322233777e-05, "loss": 0.7086, "step": 1595 }, { "epoch": 0.12076728084446293, "grad_norm": 2.655085802078247, "learning_rate": 1.775476131716503e-05, "loss": 0.746, "step": 1596 }, { "epoch": 0.12084294956679656, "grad_norm": 2.6947407722473145, "learning_rate": 1.7754238863740822e-05, "loss": 0.8031, "step": 1597 }, { "epoch": 0.12091861828913018, "grad_norm": 3.005265951156616, "learning_rate": 1.775371586209786e-05, "loss": 0.7431, "step": 1598 }, { "epoch": 0.12099428701146381, "grad_norm": 2.7612268924713135, "learning_rate": 1.7753192312268897e-05, "loss": 0.8103, "step": 1599 }, { "epoch": 0.12106995573379743, "grad_norm": 2.702098846435547, "learning_rate": 1.775266821428672e-05, "loss": 0.7749, "step": 1600 }, { "epoch": 0.12114562445613106, "grad_norm": 3.0759353637695312, "learning_rate": 1.7752143568184155e-05, "loss": 0.7911, "step": 1601 }, { "epoch": 0.12122129317846468, "grad_norm": 2.7693514823913574, "learning_rate": 1.7751618373994046e-05, "loss": 0.7506, "step": 1602 }, { "epoch": 0.1212969619007983, "grad_norm": 2.7553870677948, "learning_rate": 1.775109263174929e-05, "loss": 0.6945, "step": 1603 }, { "epoch": 0.12137263062313193, "grad_norm": 3.5849199295043945, "learning_rate": 1.7750566341482813e-05, "loss": 0.9296, "step": 1604 }, { "epoch": 0.12144829934546555, "grad_norm": 2.122450828552246, "learning_rate": 1.7750039503227564e-05, "loss": 0.8155, "step": 1605 }, { "epoch": 0.12152396806779918, "grad_norm": 2.7267143726348877, "learning_rate": 1.774951211701654e-05, "loss": 0.8056, "step": 1606 }, { "epoch": 0.1215996367901328, "grad_norm": 2.3027350902557373, "learning_rate": 1.774898418288277e-05, "loss": 0.923, "step": 1607 }, { "epoch": 0.12167530551246643, "grad_norm": 2.8576319217681885, "learning_rate": 1.774845570085931e-05, "loss": 0.7195, "step": 1608 }, { "epoch": 0.12175097423480005, "grad_norm": 2.155660390853882, "learning_rate": 1.7747926670979264e-05, "loss": 0.6233, "step": 1609 }, { "epoch": 0.12182664295713366, "grad_norm": 6.00687837600708, "learning_rate": 1.774739709327575e-05, "loss": 0.8258, "step": 1610 }, { "epoch": 0.12190231167946729, "grad_norm": 2.7313666343688965, "learning_rate": 1.774686696778194e-05, "loss": 0.7564, "step": 1611 }, { "epoch": 0.12197798040180091, "grad_norm": 2.975884199142456, "learning_rate": 1.774633629453103e-05, "loss": 0.8001, "step": 1612 }, { "epoch": 0.12205364912413454, "grad_norm": 2.4056174755096436, "learning_rate": 1.7745805073556252e-05, "loss": 0.6628, "step": 1613 }, { "epoch": 0.12212931784646816, "grad_norm": 2.666964292526245, "learning_rate": 1.7745273304890872e-05, "loss": 0.7826, "step": 1614 }, { "epoch": 0.12220498656880179, "grad_norm": 2.7258975505828857, "learning_rate": 1.7744740988568195e-05, "loss": 0.6598, "step": 1615 }, { "epoch": 0.12228065529113541, "grad_norm": 3.982149600982666, "learning_rate": 1.774420812462155e-05, "loss": 0.8036, "step": 1616 }, { "epoch": 0.12235632401346903, "grad_norm": 2.672240734100342, "learning_rate": 1.7743674713084312e-05, "loss": 0.7409, "step": 1617 }, { "epoch": 0.12243199273580266, "grad_norm": 2.278903007507324, "learning_rate": 1.774314075398988e-05, "loss": 0.7339, "step": 1618 }, { "epoch": 0.12250766145813628, "grad_norm": 3.2077767848968506, "learning_rate": 1.7742606247371698e-05, "loss": 0.859, "step": 1619 }, { "epoch": 0.12258333018046991, "grad_norm": 2.292569398880005, "learning_rate": 1.7742071193263233e-05, "loss": 0.73, "step": 1620 }, { "epoch": 0.12265899890280353, "grad_norm": 2.9495203495025635, "learning_rate": 1.7741535591697998e-05, "loss": 0.8434, "step": 1621 }, { "epoch": 0.12273466762513716, "grad_norm": 2.9479892253875732, "learning_rate": 1.7740999442709528e-05, "loss": 0.7948, "step": 1622 }, { "epoch": 0.12281033634747077, "grad_norm": 2.773385763168335, "learning_rate": 1.7740462746331402e-05, "loss": 0.8904, "step": 1623 }, { "epoch": 0.12288600506980439, "grad_norm": 3.8508312702178955, "learning_rate": 1.773992550259723e-05, "loss": 0.7778, "step": 1624 }, { "epoch": 0.12296167379213802, "grad_norm": 3.088562488555908, "learning_rate": 1.7739387711540655e-05, "loss": 0.7213, "step": 1625 }, { "epoch": 0.12303734251447164, "grad_norm": 2.9575798511505127, "learning_rate": 1.7738849373195352e-05, "loss": 0.8504, "step": 1626 }, { "epoch": 0.12311301123680526, "grad_norm": 4.4509596824646, "learning_rate": 1.7738310487595038e-05, "loss": 0.8436, "step": 1627 }, { "epoch": 0.12318867995913889, "grad_norm": 3.1381912231445312, "learning_rate": 1.773777105477346e-05, "loss": 0.7903, "step": 1628 }, { "epoch": 0.12326434868147251, "grad_norm": 2.663259267807007, "learning_rate": 1.773723107476439e-05, "loss": 0.8887, "step": 1629 }, { "epoch": 0.12334001740380614, "grad_norm": 2.6209001541137695, "learning_rate": 1.773669054760166e-05, "loss": 0.8118, "step": 1630 }, { "epoch": 0.12341568612613976, "grad_norm": 2.6906800270080566, "learning_rate": 1.7736149473319102e-05, "loss": 0.8902, "step": 1631 }, { "epoch": 0.12349135484847339, "grad_norm": 2.746408700942993, "learning_rate": 1.7735607851950613e-05, "loss": 0.8419, "step": 1632 }, { "epoch": 0.12356702357080701, "grad_norm": 4.190911293029785, "learning_rate": 1.7735065683530103e-05, "loss": 0.8135, "step": 1633 }, { "epoch": 0.12364269229314064, "grad_norm": 4.614360332489014, "learning_rate": 1.7734522968091528e-05, "loss": 0.8491, "step": 1634 }, { "epoch": 0.12371836101547426, "grad_norm": 3.484330892562866, "learning_rate": 1.7733979705668877e-05, "loss": 0.8769, "step": 1635 }, { "epoch": 0.12379402973780787, "grad_norm": 2.535391092300415, "learning_rate": 1.7733435896296164e-05, "loss": 0.8932, "step": 1636 }, { "epoch": 0.1238696984601415, "grad_norm": 2.774635076522827, "learning_rate": 1.773289154000745e-05, "loss": 1.014, "step": 1637 }, { "epoch": 0.12394536718247512, "grad_norm": 2.7426695823669434, "learning_rate": 1.773234663683682e-05, "loss": 0.6978, "step": 1638 }, { "epoch": 0.12402103590480874, "grad_norm": 2.296440362930298, "learning_rate": 1.77318011868184e-05, "loss": 0.8189, "step": 1639 }, { "epoch": 0.12409670462714237, "grad_norm": 2.881760835647583, "learning_rate": 1.773125518998635e-05, "loss": 0.8082, "step": 1640 }, { "epoch": 0.12417237334947599, "grad_norm": 2.310656785964966, "learning_rate": 1.773070864637486e-05, "loss": 0.8178, "step": 1641 }, { "epoch": 0.12424804207180962, "grad_norm": 3.410867691040039, "learning_rate": 1.7730161556018154e-05, "loss": 0.8674, "step": 1642 }, { "epoch": 0.12432371079414324, "grad_norm": 2.032334327697754, "learning_rate": 1.7729613918950496e-05, "loss": 0.8109, "step": 1643 }, { "epoch": 0.12439937951647687, "grad_norm": 2.293539047241211, "learning_rate": 1.7729065735206177e-05, "loss": 0.8798, "step": 1644 }, { "epoch": 0.12447504823881049, "grad_norm": 2.2878317832946777, "learning_rate": 1.7728517004819527e-05, "loss": 0.7187, "step": 1645 }, { "epoch": 0.12455071696114411, "grad_norm": 2.5122296810150146, "learning_rate": 1.772796772782492e-05, "loss": 0.8013, "step": 1646 }, { "epoch": 0.12462638568347774, "grad_norm": 2.389878511428833, "learning_rate": 1.7727417904256734e-05, "loss": 0.8499, "step": 1647 }, { "epoch": 0.12470205440581136, "grad_norm": 2.666120767593384, "learning_rate": 1.7726867534149413e-05, "loss": 0.723, "step": 1648 }, { "epoch": 0.12477772312814499, "grad_norm": 3.555640697479248, "learning_rate": 1.7726316617537424e-05, "loss": 0.8265, "step": 1649 }, { "epoch": 0.1248533918504786, "grad_norm": 2.2089498043060303, "learning_rate": 1.7725765154455262e-05, "loss": 0.8063, "step": 1650 }, { "epoch": 0.12492906057281222, "grad_norm": 4.1747918128967285, "learning_rate": 1.7725213144937464e-05, "loss": 0.9545, "step": 1651 }, { "epoch": 0.12500472929514586, "grad_norm": 2.3813788890838623, "learning_rate": 1.7724660589018597e-05, "loss": 0.8837, "step": 1652 }, { "epoch": 0.12508039801747947, "grad_norm": 3.1693472862243652, "learning_rate": 1.7724107486733268e-05, "loss": 0.8958, "step": 1653 }, { "epoch": 0.1251560667398131, "grad_norm": 2.4706733226776123, "learning_rate": 1.772355383811611e-05, "loss": 0.8077, "step": 1654 }, { "epoch": 0.12523173546214672, "grad_norm": 2.8894009590148926, "learning_rate": 1.7722999643201794e-05, "loss": 0.7148, "step": 1655 }, { "epoch": 0.12530740418448036, "grad_norm": 2.7157387733459473, "learning_rate": 1.7722444902025025e-05, "loss": 0.72, "step": 1656 }, { "epoch": 0.12538307290681397, "grad_norm": 4.078158855438232, "learning_rate": 1.7721889614620548e-05, "loss": 0.7737, "step": 1657 }, { "epoch": 0.12545874162914758, "grad_norm": 2.9151313304901123, "learning_rate": 1.772133378102313e-05, "loss": 0.8545, "step": 1658 }, { "epoch": 0.12553441035148122, "grad_norm": 5.656601905822754, "learning_rate": 1.7720777401267586e-05, "loss": 0.8819, "step": 1659 }, { "epoch": 0.12561007907381483, "grad_norm": 2.7481420040130615, "learning_rate": 1.7720220475388756e-05, "loss": 0.7213, "step": 1660 }, { "epoch": 0.12568574779614847, "grad_norm": 3.2132253646850586, "learning_rate": 1.771966300342151e-05, "loss": 0.8183, "step": 1661 }, { "epoch": 0.12576141651848208, "grad_norm": 2.4579145908355713, "learning_rate": 1.771910498540077e-05, "loss": 0.8897, "step": 1662 }, { "epoch": 0.12583708524081572, "grad_norm": 2.608400344848633, "learning_rate": 1.7718546421361465e-05, "loss": 0.8401, "step": 1663 }, { "epoch": 0.12591275396314933, "grad_norm": 2.6007118225097656, "learning_rate": 1.771798731133859e-05, "loss": 0.906, "step": 1664 }, { "epoch": 0.12598842268548296, "grad_norm": 2.439805746078491, "learning_rate": 1.7717427655367153e-05, "loss": 0.9405, "step": 1665 }, { "epoch": 0.12606409140781658, "grad_norm": 2.689601421356201, "learning_rate": 1.7716867453482198e-05, "loss": 0.8398, "step": 1666 }, { "epoch": 0.1261397601301502, "grad_norm": 2.9567224979400635, "learning_rate": 1.7716306705718814e-05, "loss": 0.8278, "step": 1667 }, { "epoch": 0.12621542885248382, "grad_norm": 3.0706992149353027, "learning_rate": 1.7715745412112107e-05, "loss": 0.7353, "step": 1668 }, { "epoch": 0.12629109757481746, "grad_norm": 2.521843910217285, "learning_rate": 1.7715183572697234e-05, "loss": 0.91, "step": 1669 }, { "epoch": 0.12636676629715107, "grad_norm": 2.7420334815979004, "learning_rate": 1.771462118750938e-05, "loss": 0.7666, "step": 1670 }, { "epoch": 0.12644243501948468, "grad_norm": 2.520315647125244, "learning_rate": 1.7714058256583758e-05, "loss": 0.8706, "step": 1671 }, { "epoch": 0.12651810374181832, "grad_norm": 2.752073287963867, "learning_rate": 1.7713494779955625e-05, "loss": 0.9659, "step": 1672 }, { "epoch": 0.12659377246415193, "grad_norm": 2.854764223098755, "learning_rate": 1.771293075766026e-05, "loss": 0.8267, "step": 1673 }, { "epoch": 0.12666944118648557, "grad_norm": 2.201077938079834, "learning_rate": 1.7712366189732995e-05, "loss": 0.7964, "step": 1674 }, { "epoch": 0.12674510990881918, "grad_norm": 3.6953394412994385, "learning_rate": 1.7711801076209182e-05, "loss": 0.7227, "step": 1675 }, { "epoch": 0.12682077863115282, "grad_norm": 2.4317433834075928, "learning_rate": 1.7711235417124207e-05, "loss": 0.8278, "step": 1676 }, { "epoch": 0.12689644735348643, "grad_norm": 3.292365550994873, "learning_rate": 1.771066921251349e-05, "loss": 0.8908, "step": 1677 }, { "epoch": 0.12697211607582007, "grad_norm": 2.3354146480560303, "learning_rate": 1.7710102462412498e-05, "loss": 0.7178, "step": 1678 }, { "epoch": 0.12704778479815368, "grad_norm": 2.138141393661499, "learning_rate": 1.7709535166856718e-05, "loss": 0.7817, "step": 1679 }, { "epoch": 0.12712345352048732, "grad_norm": 2.597642183303833, "learning_rate": 1.7708967325881675e-05, "loss": 0.7315, "step": 1680 }, { "epoch": 0.12719912224282093, "grad_norm": 2.2730774879455566, "learning_rate": 1.7708398939522927e-05, "loss": 0.7304, "step": 1681 }, { "epoch": 0.12727479096515457, "grad_norm": 2.7624247074127197, "learning_rate": 1.7707830007816073e-05, "loss": 0.8055, "step": 1682 }, { "epoch": 0.12735045968748818, "grad_norm": 2.6193227767944336, "learning_rate": 1.770726053079674e-05, "loss": 0.8424, "step": 1683 }, { "epoch": 0.1274261284098218, "grad_norm": 2.64697265625, "learning_rate": 1.770669050850059e-05, "loss": 0.8105, "step": 1684 }, { "epoch": 0.12750179713215543, "grad_norm": 2.303603172302246, "learning_rate": 1.770611994096332e-05, "loss": 0.8021, "step": 1685 }, { "epoch": 0.12757746585448904, "grad_norm": 2.714512348175049, "learning_rate": 1.7705548828220657e-05, "loss": 0.7102, "step": 1686 }, { "epoch": 0.12765313457682267, "grad_norm": 3.6520884037017822, "learning_rate": 1.7704977170308372e-05, "loss": 0.7449, "step": 1687 }, { "epoch": 0.12772880329915628, "grad_norm": 3.0547521114349365, "learning_rate": 1.770440496726226e-05, "loss": 0.8081, "step": 1688 }, { "epoch": 0.12780447202148992, "grad_norm": 2.003868579864502, "learning_rate": 1.770383221911816e-05, "loss": 0.9698, "step": 1689 }, { "epoch": 0.12788014074382353, "grad_norm": 2.442770481109619, "learning_rate": 1.7703258925911927e-05, "loss": 0.6974, "step": 1690 }, { "epoch": 0.12795580946615717, "grad_norm": 4.01525354385376, "learning_rate": 1.7702685087679475e-05, "loss": 0.8011, "step": 1691 }, { "epoch": 0.12803147818849078, "grad_norm": 2.3456971645355225, "learning_rate": 1.7702110704456735e-05, "loss": 0.7804, "step": 1692 }, { "epoch": 0.12810714691082442, "grad_norm": 3.353431224822998, "learning_rate": 1.7701535776279678e-05, "loss": 0.648, "step": 1693 }, { "epoch": 0.12818281563315803, "grad_norm": 3.051726818084717, "learning_rate": 1.7700960303184303e-05, "loss": 0.6802, "step": 1694 }, { "epoch": 0.12825848435549167, "grad_norm": 2.835416316986084, "learning_rate": 1.7700384285206653e-05, "loss": 0.734, "step": 1695 }, { "epoch": 0.12833415307782528, "grad_norm": 2.5643491744995117, "learning_rate": 1.7699807722382798e-05, "loss": 0.6028, "step": 1696 }, { "epoch": 0.1284098218001589, "grad_norm": 2.56653094291687, "learning_rate": 1.7699230614748846e-05, "loss": 0.7887, "step": 1697 }, { "epoch": 0.12848549052249253, "grad_norm": 3.0402891635894775, "learning_rate": 1.7698652962340934e-05, "loss": 0.7655, "step": 1698 }, { "epoch": 0.12856115924482614, "grad_norm": 2.826277017593384, "learning_rate": 1.769807476519524e-05, "loss": 0.9516, "step": 1699 }, { "epoch": 0.12863682796715978, "grad_norm": 2.662139415740967, "learning_rate": 1.7697496023347972e-05, "loss": 0.6862, "step": 1700 }, { "epoch": 0.1287124966894934, "grad_norm": 2.36557936668396, "learning_rate": 1.769691673683537e-05, "loss": 0.6471, "step": 1701 }, { "epoch": 0.12878816541182703, "grad_norm": 2.5000391006469727, "learning_rate": 1.7696336905693713e-05, "loss": 0.7876, "step": 1702 }, { "epoch": 0.12886383413416064, "grad_norm": 2.8562798500061035, "learning_rate": 1.7695756529959313e-05, "loss": 0.7194, "step": 1703 }, { "epoch": 0.12893950285649428, "grad_norm": 2.4133670330047607, "learning_rate": 1.7695175609668516e-05, "loss": 0.816, "step": 1704 }, { "epoch": 0.1290151715788279, "grad_norm": 2.8379642963409424, "learning_rate": 1.7694594144857696e-05, "loss": 0.8176, "step": 1705 }, { "epoch": 0.12909084030116152, "grad_norm": 2.2420616149902344, "learning_rate": 1.769401213556327e-05, "loss": 0.7886, "step": 1706 }, { "epoch": 0.12916650902349514, "grad_norm": 2.5862045288085938, "learning_rate": 1.769342958182168e-05, "loss": 0.723, "step": 1707 }, { "epoch": 0.12924217774582877, "grad_norm": 2.9615068435668945, "learning_rate": 1.7692846483669416e-05, "loss": 0.8738, "step": 1708 }, { "epoch": 0.12931784646816238, "grad_norm": 2.824129581451416, "learning_rate": 1.7692262841142988e-05, "loss": 0.7763, "step": 1709 }, { "epoch": 0.12939351519049602, "grad_norm": 2.8640074729919434, "learning_rate": 1.7691678654278947e-05, "loss": 0.7885, "step": 1710 }, { "epoch": 0.12946918391282963, "grad_norm": 3.950695753097534, "learning_rate": 1.7691093923113875e-05, "loss": 0.8261, "step": 1711 }, { "epoch": 0.12954485263516324, "grad_norm": 2.261723041534424, "learning_rate": 1.769050864768439e-05, "loss": 0.806, "step": 1712 }, { "epoch": 0.12962052135749688, "grad_norm": 2.690190076828003, "learning_rate": 1.768992282802715e-05, "loss": 0.7319, "step": 1713 }, { "epoch": 0.1296961900798305, "grad_norm": 3.1310863494873047, "learning_rate": 1.768933646417883e-05, "loss": 0.6613, "step": 1714 }, { "epoch": 0.12977185880216413, "grad_norm": 2.7063982486724854, "learning_rate": 1.768874955617616e-05, "loss": 0.8432, "step": 1715 }, { "epoch": 0.12984752752449774, "grad_norm": 2.001281499862671, "learning_rate": 1.768816210405589e-05, "loss": 0.6304, "step": 1716 }, { "epoch": 0.12992319624683138, "grad_norm": 2.623138189315796, "learning_rate": 1.7687574107854808e-05, "loss": 0.8827, "step": 1717 }, { "epoch": 0.129998864969165, "grad_norm": 3.3270182609558105, "learning_rate": 1.7686985567609735e-05, "loss": 0.6884, "step": 1718 }, { "epoch": 0.13007453369149863, "grad_norm": 3.246429204940796, "learning_rate": 1.7686396483357528e-05, "loss": 0.8164, "step": 1719 }, { "epoch": 0.13015020241383224, "grad_norm": 2.8202688694000244, "learning_rate": 1.7685806855135077e-05, "loss": 0.7462, "step": 1720 }, { "epoch": 0.13022587113616588, "grad_norm": 1.7720720767974854, "learning_rate": 1.768521668297931e-05, "loss": 0.9133, "step": 1721 }, { "epoch": 0.1303015398584995, "grad_norm": 2.7071969509124756, "learning_rate": 1.768462596692718e-05, "loss": 0.857, "step": 1722 }, { "epoch": 0.13037720858083313, "grad_norm": 2.4455504417419434, "learning_rate": 1.7684034707015686e-05, "loss": 0.7251, "step": 1723 }, { "epoch": 0.13045287730316674, "grad_norm": 2.843379020690918, "learning_rate": 1.768344290328185e-05, "loss": 0.9646, "step": 1724 }, { "epoch": 0.13052854602550035, "grad_norm": 3.5226151943206787, "learning_rate": 1.7682850555762735e-05, "loss": 0.7941, "step": 1725 }, { "epoch": 0.13060421474783399, "grad_norm": 2.8279130458831787, "learning_rate": 1.768225766449543e-05, "loss": 0.7988, "step": 1726 }, { "epoch": 0.1306798834701676, "grad_norm": 3.3851380348205566, "learning_rate": 1.7681664229517074e-05, "loss": 0.7569, "step": 1727 }, { "epoch": 0.13075555219250123, "grad_norm": 2.9672915935516357, "learning_rate": 1.7681070250864817e-05, "loss": 0.8753, "step": 1728 }, { "epoch": 0.13083122091483484, "grad_norm": 2.609323740005493, "learning_rate": 1.768047572857587e-05, "loss": 0.7996, "step": 1729 }, { "epoch": 0.13090688963716848, "grad_norm": 2.4190008640289307, "learning_rate": 1.7679880662687453e-05, "loss": 0.7647, "step": 1730 }, { "epoch": 0.1309825583595021, "grad_norm": 3.3898887634277344, "learning_rate": 1.7679285053236838e-05, "loss": 0.7903, "step": 1731 }, { "epoch": 0.13105822708183573, "grad_norm": 2.466832160949707, "learning_rate": 1.767868890026132e-05, "loss": 0.6835, "step": 1732 }, { "epoch": 0.13113389580416934, "grad_norm": 3.120836019515991, "learning_rate": 1.767809220379823e-05, "loss": 0.7485, "step": 1733 }, { "epoch": 0.13120956452650298, "grad_norm": 2.3976681232452393, "learning_rate": 1.7677494963884935e-05, "loss": 0.7093, "step": 1734 }, { "epoch": 0.1312852332488366, "grad_norm": 3.236889600753784, "learning_rate": 1.7676897180558844e-05, "loss": 0.7506, "step": 1735 }, { "epoch": 0.13136090197117023, "grad_norm": 3.0290706157684326, "learning_rate": 1.7676298853857387e-05, "loss": 0.7999, "step": 1736 }, { "epoch": 0.13143657069350384, "grad_norm": 2.8244903087615967, "learning_rate": 1.767569998381803e-05, "loss": 0.6637, "step": 1737 }, { "epoch": 0.13151223941583745, "grad_norm": 2.6339778900146484, "learning_rate": 1.7675100570478282e-05, "loss": 0.81, "step": 1738 }, { "epoch": 0.1315879081381711, "grad_norm": 3.118966817855835, "learning_rate": 1.7674500613875678e-05, "loss": 0.8709, "step": 1739 }, { "epoch": 0.1316635768605047, "grad_norm": 3.025437593460083, "learning_rate": 1.767390011404779e-05, "loss": 0.8147, "step": 1740 }, { "epoch": 0.13173924558283834, "grad_norm": 3.028900623321533, "learning_rate": 1.767329907103222e-05, "loss": 0.8358, "step": 1741 }, { "epoch": 0.13181491430517195, "grad_norm": 2.833244800567627, "learning_rate": 1.767269748486661e-05, "loss": 0.7973, "step": 1742 }, { "epoch": 0.1318905830275056, "grad_norm": 2.8717784881591797, "learning_rate": 1.7672095355588632e-05, "loss": 0.8679, "step": 1743 }, { "epoch": 0.1319662517498392, "grad_norm": 2.449739456176758, "learning_rate": 1.7671492683235993e-05, "loss": 0.9128, "step": 1744 }, { "epoch": 0.13204192047217284, "grad_norm": 4.743024826049805, "learning_rate": 1.7670889467846435e-05, "loss": 0.884, "step": 1745 }, { "epoch": 0.13211758919450645, "grad_norm": 2.698247194290161, "learning_rate": 1.7670285709457732e-05, "loss": 0.7641, "step": 1746 }, { "epoch": 0.13219325791684008, "grad_norm": 2.852203607559204, "learning_rate": 1.76696814081077e-05, "loss": 0.7957, "step": 1747 }, { "epoch": 0.1322689266391737, "grad_norm": 2.8371152877807617, "learning_rate": 1.7669076563834174e-05, "loss": 0.8086, "step": 1748 }, { "epoch": 0.13234459536150733, "grad_norm": 2.7854745388031006, "learning_rate": 1.7668471176675033e-05, "loss": 0.9564, "step": 1749 }, { "epoch": 0.13242026408384094, "grad_norm": 4.4708051681518555, "learning_rate": 1.7667865246668193e-05, "loss": 0.7811, "step": 1750 }, { "epoch": 0.13249593280617455, "grad_norm": 3.00451922416687, "learning_rate": 1.7667258773851596e-05, "loss": 0.9362, "step": 1751 }, { "epoch": 0.1325716015285082, "grad_norm": 2.8037302494049072, "learning_rate": 1.7666651758263218e-05, "loss": 0.8856, "step": 1752 }, { "epoch": 0.1326472702508418, "grad_norm": 2.7416181564331055, "learning_rate": 1.7666044199941077e-05, "loss": 0.7625, "step": 1753 }, { "epoch": 0.13272293897317544, "grad_norm": 3.8341903686523438, "learning_rate": 1.766543609892322e-05, "loss": 0.7551, "step": 1754 }, { "epoch": 0.13279860769550905, "grad_norm": 2.7719779014587402, "learning_rate": 1.7664827455247725e-05, "loss": 0.6949, "step": 1755 }, { "epoch": 0.1328742764178427, "grad_norm": 11.289454460144043, "learning_rate": 1.766421826895271e-05, "loss": 0.7725, "step": 1756 }, { "epoch": 0.1329499451401763, "grad_norm": 4.495529651641846, "learning_rate": 1.7663608540076325e-05, "loss": 0.935, "step": 1757 }, { "epoch": 0.13302561386250994, "grad_norm": 3.4977807998657227, "learning_rate": 1.766299826865675e-05, "loss": 0.7704, "step": 1758 }, { "epoch": 0.13310128258484355, "grad_norm": 3.455537796020508, "learning_rate": 1.7662387454732206e-05, "loss": 0.7805, "step": 1759 }, { "epoch": 0.1331769513071772, "grad_norm": 2.7036778926849365, "learning_rate": 1.766177609834094e-05, "loss": 0.7823, "step": 1760 }, { "epoch": 0.1332526200295108, "grad_norm": 2.983051061630249, "learning_rate": 1.7661164199521238e-05, "loss": 0.8118, "step": 1761 }, { "epoch": 0.13332828875184444, "grad_norm": 2.9861648082733154, "learning_rate": 1.7660551758311424e-05, "loss": 0.755, "step": 1762 }, { "epoch": 0.13340395747417805, "grad_norm": 3.221959352493286, "learning_rate": 1.7659938774749843e-05, "loss": 0.8239, "step": 1763 }, { "epoch": 0.13347962619651166, "grad_norm": 2.7048370838165283, "learning_rate": 1.765932524887489e-05, "loss": 0.7978, "step": 1764 }, { "epoch": 0.1335552949188453, "grad_norm": 3.0908560752868652, "learning_rate": 1.765871118072498e-05, "loss": 0.951, "step": 1765 }, { "epoch": 0.1336309636411789, "grad_norm": 2.1981539726257324, "learning_rate": 1.765809657033857e-05, "loss": 0.6185, "step": 1766 }, { "epoch": 0.13370663236351255, "grad_norm": 2.284193277359009, "learning_rate": 1.765748141775415e-05, "loss": 1.0244, "step": 1767 }, { "epoch": 0.13378230108584616, "grad_norm": 3.8819425106048584, "learning_rate": 1.7656865723010242e-05, "loss": 0.7626, "step": 1768 }, { "epoch": 0.1338579698081798, "grad_norm": 2.6897873878479004, "learning_rate": 1.7656249486145405e-05, "loss": 0.666, "step": 1769 }, { "epoch": 0.1339336385305134, "grad_norm": 3.00016713142395, "learning_rate": 1.7655632707198225e-05, "loss": 0.8359, "step": 1770 }, { "epoch": 0.13400930725284704, "grad_norm": 2.38024640083313, "learning_rate": 1.7655015386207326e-05, "loss": 0.7616, "step": 1771 }, { "epoch": 0.13408497597518065, "grad_norm": 2.9403862953186035, "learning_rate": 1.7654397523211374e-05, "loss": 0.804, "step": 1772 }, { "epoch": 0.1341606446975143, "grad_norm": 3.072727680206299, "learning_rate": 1.7653779118249055e-05, "loss": 0.7256, "step": 1773 }, { "epoch": 0.1342363134198479, "grad_norm": 3.713294267654419, "learning_rate": 1.76531601713591e-05, "loss": 0.842, "step": 1774 }, { "epoch": 0.13431198214218154, "grad_norm": 2.764293909072876, "learning_rate": 1.7652540682580267e-05, "loss": 0.7639, "step": 1775 }, { "epoch": 0.13438765086451515, "grad_norm": 2.774491310119629, "learning_rate": 1.765192065195135e-05, "loss": 0.8338, "step": 1776 }, { "epoch": 0.1344633195868488, "grad_norm": 2.9168760776519775, "learning_rate": 1.765130007951118e-05, "loss": 0.7613, "step": 1777 }, { "epoch": 0.1345389883091824, "grad_norm": 2.8239054679870605, "learning_rate": 1.7650678965298615e-05, "loss": 0.908, "step": 1778 }, { "epoch": 0.134614657031516, "grad_norm": 2.766092538833618, "learning_rate": 1.7650057309352554e-05, "loss": 0.8853, "step": 1779 }, { "epoch": 0.13469032575384965, "grad_norm": 2.6427292823791504, "learning_rate": 1.7649435111711926e-05, "loss": 0.7613, "step": 1780 }, { "epoch": 0.13476599447618326, "grad_norm": 3.0190093517303467, "learning_rate": 1.7648812372415697e-05, "loss": 0.7216, "step": 1781 }, { "epoch": 0.1348416631985169, "grad_norm": 2.665872573852539, "learning_rate": 1.7648189091502863e-05, "loss": 0.9648, "step": 1782 }, { "epoch": 0.1349173319208505, "grad_norm": 2.2282044887542725, "learning_rate": 1.7647565269012458e-05, "loss": 0.8866, "step": 1783 }, { "epoch": 0.13499300064318415, "grad_norm": 2.46911883354187, "learning_rate": 1.7646940904983545e-05, "loss": 0.7876, "step": 1784 }, { "epoch": 0.13506866936551776, "grad_norm": 2.569694757461548, "learning_rate": 1.7646315999455224e-05, "loss": 0.5898, "step": 1785 }, { "epoch": 0.1351443380878514, "grad_norm": 2.6884071826934814, "learning_rate": 1.764569055246663e-05, "loss": 0.8368, "step": 1786 }, { "epoch": 0.135220006810185, "grad_norm": 2.3804450035095215, "learning_rate": 1.764506456405693e-05, "loss": 0.8086, "step": 1787 }, { "epoch": 0.13529567553251864, "grad_norm": 2.71864914894104, "learning_rate": 1.7644438034265326e-05, "loss": 0.6779, "step": 1788 }, { "epoch": 0.13537134425485225, "grad_norm": 2.8667616844177246, "learning_rate": 1.7643810963131053e-05, "loss": 0.9573, "step": 1789 }, { "epoch": 0.1354470129771859, "grad_norm": 2.3643083572387695, "learning_rate": 1.764318335069338e-05, "loss": 0.9273, "step": 1790 }, { "epoch": 0.1355226816995195, "grad_norm": 2.5259487628936768, "learning_rate": 1.764255519699161e-05, "loss": 0.6811, "step": 1791 }, { "epoch": 0.13559835042185311, "grad_norm": 3.6173150539398193, "learning_rate": 1.764192650206508e-05, "loss": 0.9071, "step": 1792 }, { "epoch": 0.13567401914418675, "grad_norm": 3.5628244876861572, "learning_rate": 1.7641297265953158e-05, "loss": 0.7724, "step": 1793 }, { "epoch": 0.13574968786652036, "grad_norm": 2.5445380210876465, "learning_rate": 1.7640667488695258e-05, "loss": 0.8766, "step": 1794 }, { "epoch": 0.135825356588854, "grad_norm": 2.36080002784729, "learning_rate": 1.764003717033081e-05, "loss": 0.7998, "step": 1795 }, { "epoch": 0.1359010253111876, "grad_norm": 2.7486581802368164, "learning_rate": 1.763940631089929e-05, "loss": 0.7066, "step": 1796 }, { "epoch": 0.13597669403352125, "grad_norm": 3.2052993774414062, "learning_rate": 1.7638774910440197e-05, "loss": 0.7667, "step": 1797 }, { "epoch": 0.13605236275585486, "grad_norm": 2.3594470024108887, "learning_rate": 1.7638142968993086e-05, "loss": 0.8154, "step": 1798 }, { "epoch": 0.1361280314781885, "grad_norm": 3.531343936920166, "learning_rate": 1.7637510486597517e-05, "loss": 0.7966, "step": 1799 }, { "epoch": 0.1362037002005221, "grad_norm": 2.1436774730682373, "learning_rate": 1.7636877463293108e-05, "loss": 0.9163, "step": 1800 }, { "epoch": 0.13627936892285575, "grad_norm": 2.2388010025024414, "learning_rate": 1.76362438991195e-05, "loss": 0.8786, "step": 1801 }, { "epoch": 0.13635503764518936, "grad_norm": 2.846320390701294, "learning_rate": 1.7635609794116362e-05, "loss": 0.8172, "step": 1802 }, { "epoch": 0.136430706367523, "grad_norm": 2.405848264694214, "learning_rate": 1.7634975148323405e-05, "loss": 0.9217, "step": 1803 }, { "epoch": 0.1365063750898566, "grad_norm": 2.645883321762085, "learning_rate": 1.763433996178038e-05, "loss": 0.8546, "step": 1804 }, { "epoch": 0.13658204381219022, "grad_norm": 2.8361809253692627, "learning_rate": 1.763370423452706e-05, "loss": 0.8501, "step": 1805 }, { "epoch": 0.13665771253452386, "grad_norm": 2.7826128005981445, "learning_rate": 1.7633067966603254e-05, "loss": 0.9248, "step": 1806 }, { "epoch": 0.13673338125685747, "grad_norm": 2.6363344192504883, "learning_rate": 1.7632431158048808e-05, "loss": 0.8338, "step": 1807 }, { "epoch": 0.1368090499791911, "grad_norm": 3.479905366897583, "learning_rate": 1.7631793808903604e-05, "loss": 0.8466, "step": 1808 }, { "epoch": 0.13688471870152472, "grad_norm": 3.4462170600891113, "learning_rate": 1.7631155919207556e-05, "loss": 0.9066, "step": 1809 }, { "epoch": 0.13696038742385835, "grad_norm": 3.2740397453308105, "learning_rate": 1.76305174890006e-05, "loss": 0.7991, "step": 1810 }, { "epoch": 0.13703605614619196, "grad_norm": 2.6029398441314697, "learning_rate": 1.7629878518322732e-05, "loss": 0.7337, "step": 1811 }, { "epoch": 0.1371117248685256, "grad_norm": 2.4479711055755615, "learning_rate": 1.7629239007213957e-05, "loss": 0.7885, "step": 1812 }, { "epoch": 0.1371873935908592, "grad_norm": 2.370789051055908, "learning_rate": 1.7628598955714322e-05, "loss": 0.8362, "step": 1813 }, { "epoch": 0.13726306231319285, "grad_norm": 2.8538105487823486, "learning_rate": 1.7627958363863914e-05, "loss": 0.6775, "step": 1814 }, { "epoch": 0.13733873103552646, "grad_norm": 2.297853469848633, "learning_rate": 1.7627317231702847e-05, "loss": 0.597, "step": 1815 }, { "epoch": 0.1374143997578601, "grad_norm": 3.4059054851531982, "learning_rate": 1.762667555927127e-05, "loss": 0.6599, "step": 1816 }, { "epoch": 0.1374900684801937, "grad_norm": 4.349469184875488, "learning_rate": 1.762603334660937e-05, "loss": 0.715, "step": 1817 }, { "epoch": 0.13756573720252732, "grad_norm": 2.5945630073547363, "learning_rate": 1.762539059375736e-05, "loss": 0.7752, "step": 1818 }, { "epoch": 0.13764140592486096, "grad_norm": 2.1834769248962402, "learning_rate": 1.7624747300755493e-05, "loss": 0.7783, "step": 1819 }, { "epoch": 0.13771707464719457, "grad_norm": 2.6315038204193115, "learning_rate": 1.7624103467644055e-05, "loss": 0.7731, "step": 1820 }, { "epoch": 0.1377927433695282, "grad_norm": 2.684382915496826, "learning_rate": 1.7623459094463363e-05, "loss": 0.6977, "step": 1821 }, { "epoch": 0.13786841209186182, "grad_norm": 2.139249086380005, "learning_rate": 1.762281418125377e-05, "loss": 0.6902, "step": 1822 }, { "epoch": 0.13794408081419546, "grad_norm": 5.037784099578857, "learning_rate": 1.7622168728055665e-05, "loss": 0.823, "step": 1823 }, { "epoch": 0.13801974953652907, "grad_norm": 3.9115712642669678, "learning_rate": 1.762152273490947e-05, "loss": 0.7472, "step": 1824 }, { "epoch": 0.1380954182588627, "grad_norm": 2.827516555786133, "learning_rate": 1.7620876201855633e-05, "loss": 0.8842, "step": 1825 }, { "epoch": 0.13817108698119632, "grad_norm": 2.3763670921325684, "learning_rate": 1.7620229128934644e-05, "loss": 0.7502, "step": 1826 }, { "epoch": 0.13824675570352996, "grad_norm": 2.8924078941345215, "learning_rate": 1.7619581516187026e-05, "loss": 0.8482, "step": 1827 }, { "epoch": 0.13832242442586357, "grad_norm": 2.8543429374694824, "learning_rate": 1.7618933363653333e-05, "loss": 0.7102, "step": 1828 }, { "epoch": 0.1383980931481972, "grad_norm": 2.7342612743377686, "learning_rate": 1.7618284671374157e-05, "loss": 0.7424, "step": 1829 }, { "epoch": 0.13847376187053081, "grad_norm": 3.0927932262420654, "learning_rate": 1.7617635439390123e-05, "loss": 0.8179, "step": 1830 }, { "epoch": 0.13854943059286445, "grad_norm": 2.353637933731079, "learning_rate": 1.761698566774188e-05, "loss": 0.7647, "step": 1831 }, { "epoch": 0.13862509931519806, "grad_norm": 2.7687482833862305, "learning_rate": 1.7616335356470128e-05, "loss": 0.9106, "step": 1832 }, { "epoch": 0.13870076803753167, "grad_norm": 4.123755931854248, "learning_rate": 1.7615684505615587e-05, "loss": 0.9028, "step": 1833 }, { "epoch": 0.1387764367598653, "grad_norm": 2.1787283420562744, "learning_rate": 1.7615033115219012e-05, "loss": 0.6567, "step": 1834 }, { "epoch": 0.13885210548219892, "grad_norm": 2.4294352531433105, "learning_rate": 1.76143811853212e-05, "loss": 0.8595, "step": 1835 }, { "epoch": 0.13892777420453256, "grad_norm": 2.3959708213806152, "learning_rate": 1.7613728715962978e-05, "loss": 0.7286, "step": 1836 }, { "epoch": 0.13900344292686617, "grad_norm": 1.9494025707244873, "learning_rate": 1.7613075707185203e-05, "loss": 0.721, "step": 1837 }, { "epoch": 0.1390791116491998, "grad_norm": 1.5588613748550415, "learning_rate": 1.7612422159028767e-05, "loss": 0.9273, "step": 1838 }, { "epoch": 0.13915478037153342, "grad_norm": 3.5548200607299805, "learning_rate": 1.7611768071534604e-05, "loss": 0.7202, "step": 1839 }, { "epoch": 0.13923044909386706, "grad_norm": 2.5746283531188965, "learning_rate": 1.7611113444743665e-05, "loss": 0.708, "step": 1840 }, { "epoch": 0.13930611781620067, "grad_norm": 2.695033073425293, "learning_rate": 1.7610458278696955e-05, "loss": 0.8412, "step": 1841 }, { "epoch": 0.1393817865385343, "grad_norm": 2.7120509147644043, "learning_rate": 1.7609802573435495e-05, "loss": 0.8491, "step": 1842 }, { "epoch": 0.13945745526086792, "grad_norm": 2.5624072551727295, "learning_rate": 1.7609146329000353e-05, "loss": 0.8119, "step": 1843 }, { "epoch": 0.13953312398320156, "grad_norm": 2.7809038162231445, "learning_rate": 1.760848954543262e-05, "loss": 0.7659, "step": 1844 }, { "epoch": 0.13960879270553517, "grad_norm": 2.43100905418396, "learning_rate": 1.760783222277343e-05, "loss": 0.6677, "step": 1845 }, { "epoch": 0.13968446142786878, "grad_norm": 2.619065999984741, "learning_rate": 1.7607174361063944e-05, "loss": 0.9192, "step": 1846 }, { "epoch": 0.13976013015020242, "grad_norm": 2.997462511062622, "learning_rate": 1.7606515960345362e-05, "loss": 0.7037, "step": 1847 }, { "epoch": 0.13983579887253603, "grad_norm": 2.8004891872406006, "learning_rate": 1.7605857020658913e-05, "loss": 0.6762, "step": 1848 }, { "epoch": 0.13991146759486967, "grad_norm": 2.8649933338165283, "learning_rate": 1.760519754204586e-05, "loss": 0.6628, "step": 1849 }, { "epoch": 0.13998713631720328, "grad_norm": 2.591527223587036, "learning_rate": 1.760453752454751e-05, "loss": 0.8484, "step": 1850 }, { "epoch": 0.14006280503953691, "grad_norm": 2.9254584312438965, "learning_rate": 1.7603876968205185e-05, "loss": 0.9029, "step": 1851 }, { "epoch": 0.14013847376187052, "grad_norm": 2.7631466388702393, "learning_rate": 1.7603215873060256e-05, "loss": 0.8673, "step": 1852 }, { "epoch": 0.14021414248420416, "grad_norm": 2.2092506885528564, "learning_rate": 1.7602554239154126e-05, "loss": 0.7803, "step": 1853 }, { "epoch": 0.14028981120653777, "grad_norm": 5.1182026863098145, "learning_rate": 1.7601892066528224e-05, "loss": 0.7412, "step": 1854 }, { "epoch": 0.1403654799288714, "grad_norm": 2.7302863597869873, "learning_rate": 1.7601229355224018e-05, "loss": 0.7575, "step": 1855 }, { "epoch": 0.14044114865120502, "grad_norm": 2.1949663162231445, "learning_rate": 1.7600566105283013e-05, "loss": 0.5413, "step": 1856 }, { "epoch": 0.14051681737353866, "grad_norm": 2.7179486751556396, "learning_rate": 1.7599902316746737e-05, "loss": 0.7399, "step": 1857 }, { "epoch": 0.14059248609587227, "grad_norm": 2.4774746894836426, "learning_rate": 1.7599237989656765e-05, "loss": 0.7259, "step": 1858 }, { "epoch": 0.14066815481820588, "grad_norm": 2.5634522438049316, "learning_rate": 1.7598573124054694e-05, "loss": 0.7805, "step": 1859 }, { "epoch": 0.14074382354053952, "grad_norm": 2.349278450012207, "learning_rate": 1.7597907719982165e-05, "loss": 0.8274, "step": 1860 }, { "epoch": 0.14081949226287313, "grad_norm": 2.56119441986084, "learning_rate": 1.7597241777480846e-05, "loss": 1.0099, "step": 1861 }, { "epoch": 0.14089516098520677, "grad_norm": 2.2470288276672363, "learning_rate": 1.759657529659244e-05, "loss": 0.7354, "step": 1862 }, { "epoch": 0.14097082970754038, "grad_norm": 2.7131481170654297, "learning_rate": 1.7595908277358683e-05, "loss": 0.8058, "step": 1863 }, { "epoch": 0.14104649842987402, "grad_norm": 2.9659440517425537, "learning_rate": 1.7595240719821348e-05, "loss": 0.6039, "step": 1864 }, { "epoch": 0.14112216715220763, "grad_norm": 2.83231782913208, "learning_rate": 1.7594572624022236e-05, "loss": 1.0244, "step": 1865 }, { "epoch": 0.14119783587454127, "grad_norm": 2.453878402709961, "learning_rate": 1.7593903990003194e-05, "loss": 0.9862, "step": 1866 }, { "epoch": 0.14127350459687488, "grad_norm": 2.553098678588867, "learning_rate": 1.7593234817806085e-05, "loss": 0.8229, "step": 1867 }, { "epoch": 0.14134917331920852, "grad_norm": 2.8726489543914795, "learning_rate": 1.7592565107472817e-05, "loss": 0.8612, "step": 1868 }, { "epoch": 0.14142484204154213, "grad_norm": 2.7454302310943604, "learning_rate": 1.759189485904533e-05, "loss": 0.912, "step": 1869 }, { "epoch": 0.14150051076387576, "grad_norm": 2.6439144611358643, "learning_rate": 1.7591224072565598e-05, "loss": 0.6824, "step": 1870 }, { "epoch": 0.14157617948620937, "grad_norm": 3.527799367904663, "learning_rate": 1.7590552748075626e-05, "loss": 0.9554, "step": 1871 }, { "epoch": 0.14165184820854299, "grad_norm": 3.4755446910858154, "learning_rate": 1.7589880885617457e-05, "loss": 0.7975, "step": 1872 }, { "epoch": 0.14172751693087662, "grad_norm": 2.0089948177337646, "learning_rate": 1.7589208485233164e-05, "loss": 0.7643, "step": 1873 }, { "epoch": 0.14180318565321023, "grad_norm": 2.9593629837036133, "learning_rate": 1.7588535546964853e-05, "loss": 0.8146, "step": 1874 }, { "epoch": 0.14187885437554387, "grad_norm": 2.3689467906951904, "learning_rate": 1.758786207085467e-05, "loss": 0.9881, "step": 1875 }, { "epoch": 0.14195452309787748, "grad_norm": 2.658514976501465, "learning_rate": 1.758718805694479e-05, "loss": 0.5528, "step": 1876 }, { "epoch": 0.14203019182021112, "grad_norm": 2.965433359146118, "learning_rate": 1.7586513505277414e-05, "loss": 0.6984, "step": 1877 }, { "epoch": 0.14210586054254473, "grad_norm": 3.3098106384277344, "learning_rate": 1.758583841589479e-05, "loss": 0.9566, "step": 1878 }, { "epoch": 0.14218152926487837, "grad_norm": 2.4268958568573, "learning_rate": 1.7585162788839197e-05, "loss": 0.8622, "step": 1879 }, { "epoch": 0.14225719798721198, "grad_norm": 3.331698417663574, "learning_rate": 1.7584486624152943e-05, "loss": 0.8862, "step": 1880 }, { "epoch": 0.14233286670954562, "grad_norm": 2.746612787246704, "learning_rate": 1.758380992187837e-05, "loss": 0.8097, "step": 1881 }, { "epoch": 0.14240853543187923, "grad_norm": 2.4068593978881836, "learning_rate": 1.7583132682057857e-05, "loss": 0.8202, "step": 1882 }, { "epoch": 0.14248420415421287, "grad_norm": 2.4909160137176514, "learning_rate": 1.7582454904733815e-05, "loss": 0.7272, "step": 1883 }, { "epoch": 0.14255987287654648, "grad_norm": 2.3360512256622314, "learning_rate": 1.7581776589948686e-05, "loss": 0.7754, "step": 1884 }, { "epoch": 0.14263554159888012, "grad_norm": 2.4334325790405273, "learning_rate": 1.758109773774495e-05, "loss": 0.7806, "step": 1885 }, { "epoch": 0.14271121032121373, "grad_norm": 3.528743028640747, "learning_rate": 1.758041834816512e-05, "loss": 0.7452, "step": 1886 }, { "epoch": 0.14278687904354734, "grad_norm": 2.863650321960449, "learning_rate": 1.757973842125174e-05, "loss": 0.908, "step": 1887 }, { "epoch": 0.14286254776588098, "grad_norm": 2.7504138946533203, "learning_rate": 1.757905795704739e-05, "loss": 0.8345, "step": 1888 }, { "epoch": 0.1429382164882146, "grad_norm": 2.4951789379119873, "learning_rate": 1.7578376955594682e-05, "loss": 0.7721, "step": 1889 }, { "epoch": 0.14301388521054823, "grad_norm": 2.6636242866516113, "learning_rate": 1.7577695416936263e-05, "loss": 0.9099, "step": 1890 }, { "epoch": 0.14308955393288184, "grad_norm": 2.8234310150146484, "learning_rate": 1.7577013341114815e-05, "loss": 1.0285, "step": 1891 }, { "epoch": 0.14316522265521547, "grad_norm": 3.015465259552002, "learning_rate": 1.7576330728173047e-05, "loss": 0.6027, "step": 1892 }, { "epoch": 0.14324089137754908, "grad_norm": 2.617048740386963, "learning_rate": 1.7575647578153716e-05, "loss": 0.9196, "step": 1893 }, { "epoch": 0.14331656009988272, "grad_norm": 3.0418808460235596, "learning_rate": 1.757496389109959e-05, "loss": 0.68, "step": 1894 }, { "epoch": 0.14339222882221633, "grad_norm": 2.8369641304016113, "learning_rate": 1.7574279667053494e-05, "loss": 0.8018, "step": 1895 }, { "epoch": 0.14346789754454997, "grad_norm": 2.903010129928589, "learning_rate": 1.7573594906058273e-05, "loss": 0.7899, "step": 1896 }, { "epoch": 0.14354356626688358, "grad_norm": 3.224677324295044, "learning_rate": 1.7572909608156805e-05, "loss": 0.8495, "step": 1897 }, { "epoch": 0.14361923498921722, "grad_norm": 3.112607955932617, "learning_rate": 1.7572223773392012e-05, "loss": 0.7727, "step": 1898 }, { "epoch": 0.14369490371155083, "grad_norm": 2.8533096313476562, "learning_rate": 1.757153740180684e-05, "loss": 0.7837, "step": 1899 }, { "epoch": 0.14377057243388444, "grad_norm": 7.023125648498535, "learning_rate": 1.7570850493444273e-05, "loss": 0.7611, "step": 1900 }, { "epoch": 0.14384624115621808, "grad_norm": 3.051293134689331, "learning_rate": 1.7570163048347325e-05, "loss": 0.8792, "step": 1901 }, { "epoch": 0.1439219098785517, "grad_norm": 2.489708185195923, "learning_rate": 1.7569475066559046e-05, "loss": 0.7576, "step": 1902 }, { "epoch": 0.14399757860088533, "grad_norm": 3.0184719562530518, "learning_rate": 1.7568786548122527e-05, "loss": 0.704, "step": 1903 }, { "epoch": 0.14407324732321894, "grad_norm": 2.3036086559295654, "learning_rate": 1.7568097493080874e-05, "loss": 0.622, "step": 1904 }, { "epoch": 0.14414891604555258, "grad_norm": 2.6877403259277344, "learning_rate": 1.7567407901477243e-05, "loss": 0.6003, "step": 1905 }, { "epoch": 0.1442245847678862, "grad_norm": 2.290517568588257, "learning_rate": 1.7566717773354822e-05, "loss": 0.7039, "step": 1906 }, { "epoch": 0.14430025349021983, "grad_norm": 3.5468292236328125, "learning_rate": 1.7566027108756826e-05, "loss": 0.9343, "step": 1907 }, { "epoch": 0.14437592221255344, "grad_norm": 2.674797296524048, "learning_rate": 1.7565335907726505e-05, "loss": 0.7771, "step": 1908 }, { "epoch": 0.14445159093488708, "grad_norm": 2.3369529247283936, "learning_rate": 1.7564644170307146e-05, "loss": 0.7907, "step": 1909 }, { "epoch": 0.14452725965722069, "grad_norm": 2.8003242015838623, "learning_rate": 1.756395189654207e-05, "loss": 0.9282, "step": 1910 }, { "epoch": 0.14460292837955432, "grad_norm": 3.076770305633545, "learning_rate": 1.7563259086474627e-05, "loss": 0.8343, "step": 1911 }, { "epoch": 0.14467859710188793, "grad_norm": 2.4851486682891846, "learning_rate": 1.7562565740148202e-05, "loss": 0.813, "step": 1912 }, { "epoch": 0.14475426582422155, "grad_norm": 2.4978880882263184, "learning_rate": 1.756187185760621e-05, "loss": 0.7405, "step": 1913 }, { "epoch": 0.14482993454655518, "grad_norm": 2.255244255065918, "learning_rate": 1.7561177438892118e-05, "loss": 0.6685, "step": 1914 }, { "epoch": 0.1449056032688888, "grad_norm": 2.5343151092529297, "learning_rate": 1.7560482484049402e-05, "loss": 0.8281, "step": 1915 }, { "epoch": 0.14498127199122243, "grad_norm": 2.053952693939209, "learning_rate": 1.7559786993121583e-05, "loss": 0.6369, "step": 1916 }, { "epoch": 0.14505694071355604, "grad_norm": 2.417632818222046, "learning_rate": 1.755909096615222e-05, "loss": 0.8845, "step": 1917 }, { "epoch": 0.14513260943588968, "grad_norm": 2.182724714279175, "learning_rate": 1.7558394403184892e-05, "loss": 0.7699, "step": 1918 }, { "epoch": 0.1452082781582233, "grad_norm": 2.5295472145080566, "learning_rate": 1.755769730426323e-05, "loss": 0.8116, "step": 1919 }, { "epoch": 0.14528394688055693, "grad_norm": 2.3989028930664062, "learning_rate": 1.7556999669430882e-05, "loss": 0.7237, "step": 1920 }, { "epoch": 0.14535961560289054, "grad_norm": 2.519035816192627, "learning_rate": 1.755630149873154e-05, "loss": 0.9275, "step": 1921 }, { "epoch": 0.14543528432522418, "grad_norm": 2.8320114612579346, "learning_rate": 1.755560279220892e-05, "loss": 0.8483, "step": 1922 }, { "epoch": 0.1455109530475578, "grad_norm": 2.433471202850342, "learning_rate": 1.755490354990678e-05, "loss": 0.7238, "step": 1923 }, { "epoch": 0.14558662176989143, "grad_norm": 2.112417697906494, "learning_rate": 1.7554203771868918e-05, "loss": 0.8088, "step": 1924 }, { "epoch": 0.14566229049222504, "grad_norm": 3.250727891921997, "learning_rate": 1.755350345813914e-05, "loss": 0.7521, "step": 1925 }, { "epoch": 0.14573795921455865, "grad_norm": 2.6410152912139893, "learning_rate": 1.7552802608761317e-05, "loss": 0.8002, "step": 1926 }, { "epoch": 0.1458136279368923, "grad_norm": 2.4262301921844482, "learning_rate": 1.7552101223779325e-05, "loss": 0.7778, "step": 1927 }, { "epoch": 0.1458892966592259, "grad_norm": 2.871870517730713, "learning_rate": 1.7551399303237097e-05, "loss": 0.8634, "step": 1928 }, { "epoch": 0.14596496538155954, "grad_norm": 2.4270362854003906, "learning_rate": 1.7550696847178586e-05, "loss": 0.8465, "step": 1929 }, { "epoch": 0.14604063410389315, "grad_norm": 2.422614097595215, "learning_rate": 1.7549993855647778e-05, "loss": 0.7685, "step": 1930 }, { "epoch": 0.14611630282622678, "grad_norm": 2.4596493244171143, "learning_rate": 1.7549290328688707e-05, "loss": 0.8147, "step": 1931 }, { "epoch": 0.1461919715485604, "grad_norm": 2.994337558746338, "learning_rate": 1.754858626634542e-05, "loss": 0.7431, "step": 1932 }, { "epoch": 0.14626764027089403, "grad_norm": 3.324469804763794, "learning_rate": 1.754788166866201e-05, "loss": 0.9868, "step": 1933 }, { "epoch": 0.14634330899322764, "grad_norm": 2.912229061126709, "learning_rate": 1.7547176535682607e-05, "loss": 0.8108, "step": 1934 }, { "epoch": 0.14641897771556128, "grad_norm": 3.092162847518921, "learning_rate": 1.754647086745136e-05, "loss": 0.7094, "step": 1935 }, { "epoch": 0.1464946464378949, "grad_norm": 2.593982219696045, "learning_rate": 1.754576466401247e-05, "loss": 0.7324, "step": 1936 }, { "epoch": 0.14657031516022853, "grad_norm": 2.84027361869812, "learning_rate": 1.7545057925410154e-05, "loss": 0.707, "step": 1937 }, { "epoch": 0.14664598388256214, "grad_norm": 2.8210887908935547, "learning_rate": 1.754435065168867e-05, "loss": 0.8568, "step": 1938 }, { "epoch": 0.14672165260489575, "grad_norm": 2.999987840652466, "learning_rate": 1.754364284289232e-05, "loss": 0.768, "step": 1939 }, { "epoch": 0.1467973213272294, "grad_norm": 3.40769362449646, "learning_rate": 1.7542934499065413e-05, "loss": 0.896, "step": 1940 }, { "epoch": 0.146872990049563, "grad_norm": 2.4607155323028564, "learning_rate": 1.7542225620252318e-05, "loss": 0.7203, "step": 1941 }, { "epoch": 0.14694865877189664, "grad_norm": 2.867579460144043, "learning_rate": 1.754151620649743e-05, "loss": 0.7944, "step": 1942 }, { "epoch": 0.14702432749423025, "grad_norm": 2.73762583732605, "learning_rate": 1.7540806257845167e-05, "loss": 0.8689, "step": 1943 }, { "epoch": 0.1470999962165639, "grad_norm": 2.8091366291046143, "learning_rate": 1.7540095774339995e-05, "loss": 0.85, "step": 1944 }, { "epoch": 0.1471756649388975, "grad_norm": 2.514357089996338, "learning_rate": 1.75393847560264e-05, "loss": 0.9253, "step": 1945 }, { "epoch": 0.14725133366123114, "grad_norm": 2.452388286590576, "learning_rate": 1.7538673202948913e-05, "loss": 0.5905, "step": 1946 }, { "epoch": 0.14732700238356475, "grad_norm": 2.8314881324768066, "learning_rate": 1.7537961115152093e-05, "loss": 0.7482, "step": 1947 }, { "epoch": 0.1474026711058984, "grad_norm": 3.156912088394165, "learning_rate": 1.7537248492680532e-05, "loss": 0.7742, "step": 1948 }, { "epoch": 0.147478339828232, "grad_norm": 2.562253713607788, "learning_rate": 1.7536535335578858e-05, "loss": 0.8122, "step": 1949 }, { "epoch": 0.14755400855056564, "grad_norm": 2.4131200313568115, "learning_rate": 1.7535821643891732e-05, "loss": 0.8995, "step": 1950 }, { "epoch": 0.14762967727289925, "grad_norm": 2.465721368789673, "learning_rate": 1.7535107417663845e-05, "loss": 0.7528, "step": 1951 }, { "epoch": 0.14770534599523288, "grad_norm": 2.287510633468628, "learning_rate": 1.7534392656939927e-05, "loss": 0.7947, "step": 1952 }, { "epoch": 0.1477810147175665, "grad_norm": 2.6098928451538086, "learning_rate": 1.7533677361764738e-05, "loss": 0.8785, "step": 1953 }, { "epoch": 0.1478566834399001, "grad_norm": 2.5681772232055664, "learning_rate": 1.7532961532183065e-05, "loss": 0.7377, "step": 1954 }, { "epoch": 0.14793235216223374, "grad_norm": 2.2554850578308105, "learning_rate": 1.753224516823975e-05, "loss": 0.7856, "step": 1955 }, { "epoch": 0.14800802088456735, "grad_norm": 2.409834146499634, "learning_rate": 1.7531528269979642e-05, "loss": 0.7473, "step": 1956 }, { "epoch": 0.148083689606901, "grad_norm": 4.173964023590088, "learning_rate": 1.753081083744764e-05, "loss": 0.9151, "step": 1957 }, { "epoch": 0.1481593583292346, "grad_norm": 3.092050790786743, "learning_rate": 1.753009287068867e-05, "loss": 0.8214, "step": 1958 }, { "epoch": 0.14823502705156824, "grad_norm": 2.639125108718872, "learning_rate": 1.7529374369747697e-05, "loss": 0.6853, "step": 1959 }, { "epoch": 0.14831069577390185, "grad_norm": 2.0184338092803955, "learning_rate": 1.7528655334669715e-05, "loss": 0.7324, "step": 1960 }, { "epoch": 0.1483863644962355, "grad_norm": 2.531182289123535, "learning_rate": 1.7527935765499746e-05, "loss": 0.8407, "step": 1961 }, { "epoch": 0.1484620332185691, "grad_norm": 2.4710707664489746, "learning_rate": 1.7527215662282862e-05, "loss": 0.7486, "step": 1962 }, { "epoch": 0.14853770194090274, "grad_norm": 2.496640920639038, "learning_rate": 1.7526495025064147e-05, "loss": 0.7771, "step": 1963 }, { "epoch": 0.14861337066323635, "grad_norm": 2.6928977966308594, "learning_rate": 1.752577385388874e-05, "loss": 0.7551, "step": 1964 }, { "epoch": 0.14868903938557, "grad_norm": 3.124969720840454, "learning_rate": 1.75250521488018e-05, "loss": 0.8272, "step": 1965 }, { "epoch": 0.1487647081079036, "grad_norm": 2.917346477508545, "learning_rate": 1.7524329909848514e-05, "loss": 0.8293, "step": 1966 }, { "epoch": 0.1488403768302372, "grad_norm": 4.432890892028809, "learning_rate": 1.7523607137074124e-05, "loss": 0.6315, "step": 1967 }, { "epoch": 0.14891604555257085, "grad_norm": 2.767793893814087, "learning_rate": 1.7522883830523887e-05, "loss": 0.8295, "step": 1968 }, { "epoch": 0.14899171427490446, "grad_norm": 2.901761531829834, "learning_rate": 1.7522159990243096e-05, "loss": 0.8094, "step": 1969 }, { "epoch": 0.1490673829972381, "grad_norm": 3.11002254486084, "learning_rate": 1.7521435616277083e-05, "loss": 0.801, "step": 1970 }, { "epoch": 0.1491430517195717, "grad_norm": 2.691927433013916, "learning_rate": 1.7520710708671207e-05, "loss": 0.8218, "step": 1971 }, { "epoch": 0.14921872044190534, "grad_norm": 2.540382146835327, "learning_rate": 1.751998526747087e-05, "loss": 0.8464, "step": 1972 }, { "epoch": 0.14929438916423896, "grad_norm": 2.418942451477051, "learning_rate": 1.75192592927215e-05, "loss": 0.7234, "step": 1973 }, { "epoch": 0.1493700578865726, "grad_norm": 3.039180278778076, "learning_rate": 1.7518532784468555e-05, "loss": 0.7202, "step": 1974 }, { "epoch": 0.1494457266089062, "grad_norm": 3.523127555847168, "learning_rate": 1.7517805742757537e-05, "loss": 0.83, "step": 1975 }, { "epoch": 0.14952139533123984, "grad_norm": 2.5864298343658447, "learning_rate": 1.751707816763397e-05, "loss": 0.9138, "step": 1976 }, { "epoch": 0.14959706405357345, "grad_norm": 2.7911880016326904, "learning_rate": 1.7516350059143425e-05, "loss": 0.7544, "step": 1977 }, { "epoch": 0.1496727327759071, "grad_norm": 3.0859878063201904, "learning_rate": 1.7515621417331493e-05, "loss": 0.77, "step": 1978 }, { "epoch": 0.1497484014982407, "grad_norm": 2.647671699523926, "learning_rate": 1.7514892242243805e-05, "loss": 0.842, "step": 1979 }, { "epoch": 0.1498240702205743, "grad_norm": 2.811793804168701, "learning_rate": 1.7514162533926024e-05, "loss": 0.8385, "step": 1980 }, { "epoch": 0.14989973894290795, "grad_norm": 2.701957941055298, "learning_rate": 1.7513432292423846e-05, "loss": 0.726, "step": 1981 }, { "epoch": 0.14997540766524156, "grad_norm": 2.3943700790405273, "learning_rate": 1.7512701517783006e-05, "loss": 0.8246, "step": 1982 }, { "epoch": 0.1500510763875752, "grad_norm": 2.8560993671417236, "learning_rate": 1.751197021004926e-05, "loss": 0.8316, "step": 1983 }, { "epoch": 0.1501267451099088, "grad_norm": 2.4664223194122314, "learning_rate": 1.7511238369268408e-05, "loss": 0.8168, "step": 1984 }, { "epoch": 0.15020241383224245, "grad_norm": 2.2594258785247803, "learning_rate": 1.7510505995486278e-05, "loss": 0.6974, "step": 1985 }, { "epoch": 0.15027808255457606, "grad_norm": 2.1898341178894043, "learning_rate": 1.7509773088748744e-05, "loss": 0.7319, "step": 1986 }, { "epoch": 0.1503537512769097, "grad_norm": 2.4452483654022217, "learning_rate": 1.7509039649101688e-05, "loss": 0.9508, "step": 1987 }, { "epoch": 0.1504294199992433, "grad_norm": 2.685222864151001, "learning_rate": 1.750830567659105e-05, "loss": 0.6802, "step": 1988 }, { "epoch": 0.15050508872157695, "grad_norm": 2.3486924171447754, "learning_rate": 1.7507571171262793e-05, "loss": 0.745, "step": 1989 }, { "epoch": 0.15058075744391056, "grad_norm": 2.269319534301758, "learning_rate": 1.7506836133162912e-05, "loss": 0.621, "step": 1990 }, { "epoch": 0.1506564261662442, "grad_norm": 2.322559118270874, "learning_rate": 1.7506100562337433e-05, "loss": 0.706, "step": 1991 }, { "epoch": 0.1507320948885778, "grad_norm": 2.2499871253967285, "learning_rate": 1.7505364458832433e-05, "loss": 0.8762, "step": 1992 }, { "epoch": 0.15080776361091142, "grad_norm": 2.313094139099121, "learning_rate": 1.7504627822693997e-05, "loss": 0.8429, "step": 1993 }, { "epoch": 0.15088343233324505, "grad_norm": 2.01436710357666, "learning_rate": 1.750389065396826e-05, "loss": 0.7233, "step": 1994 }, { "epoch": 0.15095910105557866, "grad_norm": 3.0272583961486816, "learning_rate": 1.7503152952701382e-05, "loss": 0.7201, "step": 1995 }, { "epoch": 0.1510347697779123, "grad_norm": 2.1522340774536133, "learning_rate": 1.7502414718939565e-05, "loss": 0.6485, "step": 1996 }, { "epoch": 0.1511104385002459, "grad_norm": 2.177858829498291, "learning_rate": 1.750167595272904e-05, "loss": 0.7589, "step": 1997 }, { "epoch": 0.15118610722257955, "grad_norm": 2.149120569229126, "learning_rate": 1.750093665411607e-05, "loss": 0.6136, "step": 1998 }, { "epoch": 0.15126177594491316, "grad_norm": 2.5754570960998535, "learning_rate": 1.7500196823146948e-05, "loss": 0.6516, "step": 1999 }, { "epoch": 0.1513374446672468, "grad_norm": 2.3540103435516357, "learning_rate": 1.749945645986801e-05, "loss": 0.7556, "step": 2000 }, { "epoch": 0.1514131133895804, "grad_norm": 2.42387318611145, "learning_rate": 1.7498715564325618e-05, "loss": 0.7252, "step": 2001 }, { "epoch": 0.15148878211191405, "grad_norm": 2.679372549057007, "learning_rate": 1.749797413656617e-05, "loss": 0.7455, "step": 2002 }, { "epoch": 0.15156445083424766, "grad_norm": 2.259680986404419, "learning_rate": 1.7497232176636094e-05, "loss": 0.7875, "step": 2003 }, { "epoch": 0.1516401195565813, "grad_norm": 2.2387616634368896, "learning_rate": 1.7496489684581854e-05, "loss": 0.7815, "step": 2004 }, { "epoch": 0.1517157882789149, "grad_norm": 3.6523263454437256, "learning_rate": 1.7495746660449954e-05, "loss": 0.626, "step": 2005 }, { "epoch": 0.15179145700124855, "grad_norm": 2.6934144496917725, "learning_rate": 1.7495003104286916e-05, "loss": 0.7533, "step": 2006 }, { "epoch": 0.15186712572358216, "grad_norm": 2.329314947128296, "learning_rate": 1.749425901613931e-05, "loss": 0.7858, "step": 2007 }, { "epoch": 0.15194279444591577, "grad_norm": 3.2107553482055664, "learning_rate": 1.7493514396053727e-05, "loss": 0.7217, "step": 2008 }, { "epoch": 0.1520184631682494, "grad_norm": 2.6318206787109375, "learning_rate": 1.7492769244076804e-05, "loss": 0.7701, "step": 2009 }, { "epoch": 0.15209413189058302, "grad_norm": 2.870945453643799, "learning_rate": 1.7492023560255202e-05, "loss": 0.8018, "step": 2010 }, { "epoch": 0.15216980061291666, "grad_norm": 2.799070119857788, "learning_rate": 1.7491277344635616e-05, "loss": 0.7906, "step": 2011 }, { "epoch": 0.15224546933525027, "grad_norm": 3.5722224712371826, "learning_rate": 1.7490530597264778e-05, "loss": 0.7233, "step": 2012 }, { "epoch": 0.1523211380575839, "grad_norm": 2.482611894607544, "learning_rate": 1.7489783318189455e-05, "loss": 0.7542, "step": 2013 }, { "epoch": 0.15239680677991752, "grad_norm": 2.59255051612854, "learning_rate": 1.748903550745644e-05, "loss": 0.7671, "step": 2014 }, { "epoch": 0.15247247550225115, "grad_norm": 3.2076199054718018, "learning_rate": 1.7488287165112564e-05, "loss": 0.8936, "step": 2015 }, { "epoch": 0.15254814422458476, "grad_norm": 3.4623420238494873, "learning_rate": 1.748753829120469e-05, "loss": 0.7874, "step": 2016 }, { "epoch": 0.1526238129469184, "grad_norm": 2.2293150424957275, "learning_rate": 1.748678888577972e-05, "loss": 0.8013, "step": 2017 }, { "epoch": 0.152699481669252, "grad_norm": 2.1837499141693115, "learning_rate": 1.748603894888458e-05, "loss": 0.8155, "step": 2018 }, { "epoch": 0.15277515039158565, "grad_norm": 2.574540138244629, "learning_rate": 1.748528848056623e-05, "loss": 0.6882, "step": 2019 }, { "epoch": 0.15285081911391926, "grad_norm": 2.3444480895996094, "learning_rate": 1.7484537480871676e-05, "loss": 0.9241, "step": 2020 }, { "epoch": 0.15292648783625287, "grad_norm": 2.1523497104644775, "learning_rate": 1.7483785949847937e-05, "loss": 0.7816, "step": 2021 }, { "epoch": 0.1530021565585865, "grad_norm": 2.120872735977173, "learning_rate": 1.7483033887542087e-05, "loss": 0.7581, "step": 2022 }, { "epoch": 0.15307782528092012, "grad_norm": 2.379638910293579, "learning_rate": 1.7482281294001218e-05, "loss": 0.7142, "step": 2023 }, { "epoch": 0.15315349400325376, "grad_norm": 1.9186440706253052, "learning_rate": 1.7481528169272455e-05, "loss": 0.6981, "step": 2024 }, { "epoch": 0.15322916272558737, "grad_norm": 2.0575826168060303, "learning_rate": 1.7480774513402966e-05, "loss": 0.6741, "step": 2025 }, { "epoch": 0.153304831447921, "grad_norm": 2.492513656616211, "learning_rate": 1.7480020326439945e-05, "loss": 0.845, "step": 2026 }, { "epoch": 0.15338050017025462, "grad_norm": 2.5875332355499268, "learning_rate": 1.7479265608430632e-05, "loss": 0.7312, "step": 2027 }, { "epoch": 0.15345616889258826, "grad_norm": 3.0655195713043213, "learning_rate": 1.7478510359422273e-05, "loss": 0.8206, "step": 2028 }, { "epoch": 0.15353183761492187, "grad_norm": 2.516611337661743, "learning_rate": 1.7477754579462173e-05, "loss": 0.6667, "step": 2029 }, { "epoch": 0.1536075063372555, "grad_norm": 2.1421144008636475, "learning_rate": 1.7476998268597665e-05, "loss": 0.7155, "step": 2030 }, { "epoch": 0.15368317505958912, "grad_norm": 2.9190337657928467, "learning_rate": 1.7476241426876104e-05, "loss": 0.756, "step": 2031 }, { "epoch": 0.15375884378192275, "grad_norm": 3.1374433040618896, "learning_rate": 1.747548405434489e-05, "loss": 0.7129, "step": 2032 }, { "epoch": 0.15383451250425637, "grad_norm": 3.1990249156951904, "learning_rate": 1.747472615105145e-05, "loss": 0.7409, "step": 2033 }, { "epoch": 0.15391018122658998, "grad_norm": 2.4455344676971436, "learning_rate": 1.7473967717043255e-05, "loss": 0.6613, "step": 2034 }, { "epoch": 0.15398584994892361, "grad_norm": 2.024739980697632, "learning_rate": 1.747320875236779e-05, "loss": 0.8867, "step": 2035 }, { "epoch": 0.15406151867125722, "grad_norm": 2.6114304065704346, "learning_rate": 1.747244925707258e-05, "loss": 0.8703, "step": 2036 }, { "epoch": 0.15413718739359086, "grad_norm": 3.779912233352661, "learning_rate": 1.7471689231205206e-05, "loss": 0.8262, "step": 2037 }, { "epoch": 0.15421285611592447, "grad_norm": 3.2516801357269287, "learning_rate": 1.7470928674813242e-05, "loss": 0.7587, "step": 2038 }, { "epoch": 0.1542885248382581, "grad_norm": 2.9008138179779053, "learning_rate": 1.7470167587944333e-05, "loss": 0.7588, "step": 2039 }, { "epoch": 0.15436419356059172, "grad_norm": 2.617128610610962, "learning_rate": 1.7469405970646126e-05, "loss": 0.6199, "step": 2040 }, { "epoch": 0.15443986228292536, "grad_norm": 3.0137505531311035, "learning_rate": 1.746864382296633e-05, "loss": 0.7195, "step": 2041 }, { "epoch": 0.15451553100525897, "grad_norm": 2.683501720428467, "learning_rate": 1.7467881144952664e-05, "loss": 0.8571, "step": 2042 }, { "epoch": 0.1545911997275926, "grad_norm": 2.612112283706665, "learning_rate": 1.7467117936652896e-05, "loss": 0.8931, "step": 2043 }, { "epoch": 0.15466686844992622, "grad_norm": 3.511695384979248, "learning_rate": 1.7466354198114813e-05, "loss": 0.7837, "step": 2044 }, { "epoch": 0.15474253717225986, "grad_norm": 2.829535961151123, "learning_rate": 1.7465589929386248e-05, "loss": 0.8148, "step": 2045 }, { "epoch": 0.15481820589459347, "grad_norm": 2.3426201343536377, "learning_rate": 1.746482513051506e-05, "loss": 0.6724, "step": 2046 }, { "epoch": 0.15489387461692708, "grad_norm": 2.5401344299316406, "learning_rate": 1.7464059801549144e-05, "loss": 0.9651, "step": 2047 }, { "epoch": 0.15496954333926072, "grad_norm": 2.8630175590515137, "learning_rate": 1.7463293942536427e-05, "loss": 0.8498, "step": 2048 }, { "epoch": 0.15504521206159433, "grad_norm": 2.4896228313446045, "learning_rate": 1.746252755352487e-05, "loss": 0.862, "step": 2049 }, { "epoch": 0.15512088078392797, "grad_norm": 2.259605646133423, "learning_rate": 1.7461760634562468e-05, "loss": 0.633, "step": 2050 }, { "epoch": 0.15519654950626158, "grad_norm": 2.4651870727539062, "learning_rate": 1.7460993185697244e-05, "loss": 0.7007, "step": 2051 }, { "epoch": 0.15527221822859522, "grad_norm": 2.3934268951416016, "learning_rate": 1.7460225206977262e-05, "loss": 0.9508, "step": 2052 }, { "epoch": 0.15534788695092883, "grad_norm": 2.429025650024414, "learning_rate": 1.7459456698450613e-05, "loss": 0.6615, "step": 2053 }, { "epoch": 0.15542355567326246, "grad_norm": 2.19804310798645, "learning_rate": 1.7458687660165425e-05, "loss": 0.8376, "step": 2054 }, { "epoch": 0.15549922439559608, "grad_norm": 2.211962938308716, "learning_rate": 1.7457918092169857e-05, "loss": 0.8152, "step": 2055 }, { "epoch": 0.1555748931179297, "grad_norm": 2.254776954650879, "learning_rate": 1.74571479945121e-05, "loss": 0.8516, "step": 2056 }, { "epoch": 0.15565056184026332, "grad_norm": 2.47976016998291, "learning_rate": 1.7456377367240385e-05, "loss": 0.8315, "step": 2057 }, { "epoch": 0.15572623056259696, "grad_norm": 2.8059258460998535, "learning_rate": 1.7455606210402966e-05, "loss": 0.7777, "step": 2058 }, { "epoch": 0.15580189928493057, "grad_norm": 2.620880603790283, "learning_rate": 1.7454834524048138e-05, "loss": 0.6418, "step": 2059 }, { "epoch": 0.1558775680072642, "grad_norm": 2.414295196533203, "learning_rate": 1.7454062308224226e-05, "loss": 0.7401, "step": 2060 }, { "epoch": 0.15595323672959782, "grad_norm": 2.7423794269561768, "learning_rate": 1.7453289562979585e-05, "loss": 0.8576, "step": 2061 }, { "epoch": 0.15602890545193143, "grad_norm": 3.214839458465576, "learning_rate": 1.7452516288362612e-05, "loss": 0.8235, "step": 2062 }, { "epoch": 0.15610457417426507, "grad_norm": 2.462529182434082, "learning_rate": 1.7451742484421733e-05, "loss": 0.8605, "step": 2063 }, { "epoch": 0.15618024289659868, "grad_norm": 2.2474400997161865, "learning_rate": 1.7450968151205402e-05, "loss": 0.7083, "step": 2064 }, { "epoch": 0.15625591161893232, "grad_norm": 2.528843879699707, "learning_rate": 1.7450193288762116e-05, "loss": 0.8239, "step": 2065 }, { "epoch": 0.15633158034126593, "grad_norm": 2.497174024581909, "learning_rate": 1.7449417897140387e-05, "loss": 0.7607, "step": 2066 }, { "epoch": 0.15640724906359957, "grad_norm": 3.976351261138916, "learning_rate": 1.7448641976388783e-05, "loss": 0.7265, "step": 2067 }, { "epoch": 0.15648291778593318, "grad_norm": 2.027620792388916, "learning_rate": 1.7447865526555894e-05, "loss": 0.8558, "step": 2068 }, { "epoch": 0.15655858650826682, "grad_norm": 2.824955701828003, "learning_rate": 1.7447088547690343e-05, "loss": 0.9394, "step": 2069 }, { "epoch": 0.15663425523060043, "grad_norm": 2.474083185195923, "learning_rate": 1.7446311039840784e-05, "loss": 0.8471, "step": 2070 }, { "epoch": 0.15670992395293407, "grad_norm": 2.226369619369507, "learning_rate": 1.744553300305591e-05, "loss": 0.7453, "step": 2071 }, { "epoch": 0.15678559267526768, "grad_norm": 2.525721788406372, "learning_rate": 1.7444754437384443e-05, "loss": 1.0301, "step": 2072 }, { "epoch": 0.15686126139760131, "grad_norm": 2.348961591720581, "learning_rate": 1.7443975342875138e-05, "loss": 0.7909, "step": 2073 }, { "epoch": 0.15693693011993493, "grad_norm": 2.763505697250366, "learning_rate": 1.7443195719576785e-05, "loss": 0.8576, "step": 2074 }, { "epoch": 0.15701259884226854, "grad_norm": 2.2634928226470947, "learning_rate": 1.7442415567538213e-05, "loss": 0.9044, "step": 2075 }, { "epoch": 0.15708826756460217, "grad_norm": 2.370476722717285, "learning_rate": 1.7441634886808265e-05, "loss": 0.8432, "step": 2076 }, { "epoch": 0.15716393628693578, "grad_norm": 2.5229246616363525, "learning_rate": 1.7440853677435842e-05, "loss": 0.7714, "step": 2077 }, { "epoch": 0.15723960500926942, "grad_norm": 2.90761137008667, "learning_rate": 1.744007193946986e-05, "loss": 0.6313, "step": 2078 }, { "epoch": 0.15731527373160303, "grad_norm": 2.4619617462158203, "learning_rate": 1.7439289672959275e-05, "loss": 0.7495, "step": 2079 }, { "epoch": 0.15739094245393667, "grad_norm": 3.04870867729187, "learning_rate": 1.743850687795307e-05, "loss": 0.8112, "step": 2080 }, { "epoch": 0.15746661117627028, "grad_norm": 2.4026286602020264, "learning_rate": 1.7437723554500277e-05, "loss": 0.772, "step": 2081 }, { "epoch": 0.15754227989860392, "grad_norm": 2.5476691722869873, "learning_rate": 1.743693970264994e-05, "loss": 0.8682, "step": 2082 }, { "epoch": 0.15761794862093753, "grad_norm": 2.528425931930542, "learning_rate": 1.7436155322451153e-05, "loss": 0.9005, "step": 2083 }, { "epoch": 0.15769361734327117, "grad_norm": 2.272146224975586, "learning_rate": 1.743537041395303e-05, "loss": 0.6577, "step": 2084 }, { "epoch": 0.15776928606560478, "grad_norm": 2.186119794845581, "learning_rate": 1.743458497720473e-05, "loss": 0.7003, "step": 2085 }, { "epoch": 0.15784495478793842, "grad_norm": 2.385634660720825, "learning_rate": 1.743379901225544e-05, "loss": 0.8375, "step": 2086 }, { "epoch": 0.15792062351027203, "grad_norm": 3.0107641220092773, "learning_rate": 1.7433012519154378e-05, "loss": 0.8261, "step": 2087 }, { "epoch": 0.15799629223260564, "grad_norm": 2.3825418949127197, "learning_rate": 1.7432225497950792e-05, "loss": 0.729, "step": 2088 }, { "epoch": 0.15807196095493928, "grad_norm": 2.1834664344787598, "learning_rate": 1.7431437948693975e-05, "loss": 0.6568, "step": 2089 }, { "epoch": 0.1581476296772729, "grad_norm": 2.4563395977020264, "learning_rate": 1.7430649871433245e-05, "loss": 0.7753, "step": 2090 }, { "epoch": 0.15822329839960653, "grad_norm": 2.7324671745300293, "learning_rate": 1.742986126621795e-05, "loss": 0.7523, "step": 2091 }, { "epoch": 0.15829896712194014, "grad_norm": 2.3517651557922363, "learning_rate": 1.7429072133097478e-05, "loss": 0.7389, "step": 2092 }, { "epoch": 0.15837463584427378, "grad_norm": 2.4391419887542725, "learning_rate": 1.7428282472121245e-05, "loss": 0.6748, "step": 2093 }, { "epoch": 0.1584503045666074, "grad_norm": 3.3053195476531982, "learning_rate": 1.7427492283338704e-05, "loss": 0.7699, "step": 2094 }, { "epoch": 0.15852597328894102, "grad_norm": 2.2691550254821777, "learning_rate": 1.7426701566799337e-05, "loss": 0.8406, "step": 2095 }, { "epoch": 0.15860164201127464, "grad_norm": 2.280519723892212, "learning_rate": 1.7425910322552666e-05, "loss": 0.8129, "step": 2096 }, { "epoch": 0.15867731073360827, "grad_norm": 2.6433169841766357, "learning_rate": 1.7425118550648234e-05, "loss": 0.7612, "step": 2097 }, { "epoch": 0.15875297945594188, "grad_norm": 2.356234550476074, "learning_rate": 1.742432625113563e-05, "loss": 0.7845, "step": 2098 }, { "epoch": 0.15882864817827552, "grad_norm": 2.7373485565185547, "learning_rate": 1.742353342406447e-05, "loss": 0.6441, "step": 2099 }, { "epoch": 0.15890431690060913, "grad_norm": 2.2492804527282715, "learning_rate": 1.7422740069484397e-05, "loss": 0.6834, "step": 2100 }, { "epoch": 0.15897998562294274, "grad_norm": 2.219045639038086, "learning_rate": 1.7421946187445104e-05, "loss": 0.7691, "step": 2101 }, { "epoch": 0.15905565434527638, "grad_norm": 2.9211206436157227, "learning_rate": 1.7421151777996297e-05, "loss": 0.629, "step": 2102 }, { "epoch": 0.15913132306761, "grad_norm": 3.131239414215088, "learning_rate": 1.7420356841187732e-05, "loss": 0.7825, "step": 2103 }, { "epoch": 0.15920699178994363, "grad_norm": 2.3928134441375732, "learning_rate": 1.7419561377069183e-05, "loss": 0.8064, "step": 2104 }, { "epoch": 0.15928266051227724, "grad_norm": 2.74513840675354, "learning_rate": 1.741876538569047e-05, "loss": 0.8204, "step": 2105 }, { "epoch": 0.15935832923461088, "grad_norm": 2.3963100910186768, "learning_rate": 1.741796886710144e-05, "loss": 1.0466, "step": 2106 }, { "epoch": 0.1594339979569445, "grad_norm": 2.196810007095337, "learning_rate": 1.7417171821351973e-05, "loss": 0.6747, "step": 2107 }, { "epoch": 0.15950966667927813, "grad_norm": 2.6434783935546875, "learning_rate": 1.741637424849198e-05, "loss": 0.8187, "step": 2108 }, { "epoch": 0.15958533540161174, "grad_norm": 2.148426055908203, "learning_rate": 1.741557614857141e-05, "loss": 0.7376, "step": 2109 }, { "epoch": 0.15966100412394538, "grad_norm": 3.2016146183013916, "learning_rate": 1.741477752164024e-05, "loss": 0.7088, "step": 2110 }, { "epoch": 0.159736672846279, "grad_norm": 2.8447532653808594, "learning_rate": 1.7413978367748488e-05, "loss": 0.8271, "step": 2111 }, { "epoch": 0.15981234156861263, "grad_norm": 2.411562919616699, "learning_rate": 1.7413178686946198e-05, "loss": 0.801, "step": 2112 }, { "epoch": 0.15988801029094624, "grad_norm": 2.4940292835235596, "learning_rate": 1.7412378479283445e-05, "loss": 0.838, "step": 2113 }, { "epoch": 0.15996367901327985, "grad_norm": 2.510559320449829, "learning_rate": 1.7411577744810343e-05, "loss": 0.7729, "step": 2114 }, { "epoch": 0.16003934773561349, "grad_norm": 2.928279399871826, "learning_rate": 1.7410776483577036e-05, "loss": 0.9162, "step": 2115 }, { "epoch": 0.1601150164579471, "grad_norm": 2.311375617980957, "learning_rate": 1.7409974695633702e-05, "loss": 0.7684, "step": 2116 }, { "epoch": 0.16019068518028073, "grad_norm": 2.5286309719085693, "learning_rate": 1.740917238103055e-05, "loss": 0.7551, "step": 2117 }, { "epoch": 0.16026635390261434, "grad_norm": 2.7157998085021973, "learning_rate": 1.740836953981783e-05, "loss": 0.9096, "step": 2118 }, { "epoch": 0.16034202262494798, "grad_norm": 2.3941192626953125, "learning_rate": 1.7407566172045808e-05, "loss": 0.7154, "step": 2119 }, { "epoch": 0.1604176913472816, "grad_norm": 3.259812355041504, "learning_rate": 1.74067622777648e-05, "loss": 0.7483, "step": 2120 }, { "epoch": 0.16049336006961523, "grad_norm": 2.8680622577667236, "learning_rate": 1.740595785702515e-05, "loss": 0.7112, "step": 2121 }, { "epoch": 0.16056902879194884, "grad_norm": 2.4445548057556152, "learning_rate": 1.7405152909877228e-05, "loss": 0.7903, "step": 2122 }, { "epoch": 0.16064469751428248, "grad_norm": 2.763958215713501, "learning_rate": 1.7404347436371446e-05, "loss": 0.7796, "step": 2123 }, { "epoch": 0.1607203662366161, "grad_norm": 2.0752618312835693, "learning_rate": 1.7403541436558246e-05, "loss": 0.7199, "step": 2124 }, { "epoch": 0.16079603495894973, "grad_norm": 2.1711764335632324, "learning_rate": 1.74027349104881e-05, "loss": 0.6946, "step": 2125 }, { "epoch": 0.16087170368128334, "grad_norm": 2.151015043258667, "learning_rate": 1.7401927858211516e-05, "loss": 0.7603, "step": 2126 }, { "epoch": 0.16094737240361698, "grad_norm": 4.292178153991699, "learning_rate": 1.7401120279779035e-05, "loss": 0.7286, "step": 2127 }, { "epoch": 0.1610230411259506, "grad_norm": 4.497610569000244, "learning_rate": 1.7400312175241226e-05, "loss": 0.8232, "step": 2128 }, { "epoch": 0.1610987098482842, "grad_norm": 2.505603551864624, "learning_rate": 1.73995035446487e-05, "loss": 0.7859, "step": 2129 }, { "epoch": 0.16117437857061784, "grad_norm": 2.1637277603149414, "learning_rate": 1.73986943880521e-05, "loss": 0.7861, "step": 2130 }, { "epoch": 0.16125004729295145, "grad_norm": 3.3411808013916016, "learning_rate": 1.7397884705502088e-05, "loss": 0.8564, "step": 2131 }, { "epoch": 0.1613257160152851, "grad_norm": 2.2842612266540527, "learning_rate": 1.7397074497049378e-05, "loss": 0.799, "step": 2132 }, { "epoch": 0.1614013847376187, "grad_norm": 3.669283628463745, "learning_rate": 1.73962637627447e-05, "loss": 0.7311, "step": 2133 }, { "epoch": 0.16147705345995234, "grad_norm": 2.489490032196045, "learning_rate": 1.7395452502638826e-05, "loss": 0.8819, "step": 2134 }, { "epoch": 0.16155272218228595, "grad_norm": 2.164292097091675, "learning_rate": 1.7394640716782564e-05, "loss": 0.8138, "step": 2135 }, { "epoch": 0.16162839090461958, "grad_norm": 2.452188014984131, "learning_rate": 1.739382840522675e-05, "loss": 0.7811, "step": 2136 }, { "epoch": 0.1617040596269532, "grad_norm": 2.700749397277832, "learning_rate": 1.739301556802225e-05, "loss": 0.7128, "step": 2137 }, { "epoch": 0.16177972834928683, "grad_norm": 3.382140874862671, "learning_rate": 1.7392202205219974e-05, "loss": 0.9743, "step": 2138 }, { "epoch": 0.16185539707162044, "grad_norm": 2.4139597415924072, "learning_rate": 1.739138831687085e-05, "loss": 0.7896, "step": 2139 }, { "epoch": 0.16193106579395408, "grad_norm": 2.2004284858703613, "learning_rate": 1.7390573903025845e-05, "loss": 0.7469, "step": 2140 }, { "epoch": 0.1620067345162877, "grad_norm": 2.8019471168518066, "learning_rate": 1.7389758963735967e-05, "loss": 0.7453, "step": 2141 }, { "epoch": 0.1620824032386213, "grad_norm": 2.7168564796447754, "learning_rate": 1.7388943499052246e-05, "loss": 0.8727, "step": 2142 }, { "epoch": 0.16215807196095494, "grad_norm": 2.5624351501464844, "learning_rate": 1.7388127509025748e-05, "loss": 0.7883, "step": 2143 }, { "epoch": 0.16223374068328855, "grad_norm": 2.199237585067749, "learning_rate": 1.738731099370758e-05, "loss": 0.8322, "step": 2144 }, { "epoch": 0.1623094094056222, "grad_norm": 2.3310837745666504, "learning_rate": 1.7386493953148867e-05, "loss": 0.7478, "step": 2145 }, { "epoch": 0.1623850781279558, "grad_norm": 2.901883363723755, "learning_rate": 1.7385676387400777e-05, "loss": 0.7515, "step": 2146 }, { "epoch": 0.16246074685028944, "grad_norm": 2.0343589782714844, "learning_rate": 1.7384858296514507e-05, "loss": 0.6157, "step": 2147 }, { "epoch": 0.16253641557262305, "grad_norm": 2.7100648880004883, "learning_rate": 1.7384039680541295e-05, "loss": 0.8054, "step": 2148 }, { "epoch": 0.1626120842949567, "grad_norm": 2.2112245559692383, "learning_rate": 1.7383220539532396e-05, "loss": 0.7847, "step": 2149 }, { "epoch": 0.1626877530172903, "grad_norm": 2.090649127960205, "learning_rate": 1.7382400873539117e-05, "loss": 0.7328, "step": 2150 }, { "epoch": 0.16276342173962394, "grad_norm": 3.082857847213745, "learning_rate": 1.738158068261278e-05, "loss": 0.6708, "step": 2151 }, { "epoch": 0.16283909046195755, "grad_norm": 2.9798154830932617, "learning_rate": 1.7380759966804754e-05, "loss": 0.9559, "step": 2152 }, { "epoch": 0.16291475918429119, "grad_norm": 2.2334978580474854, "learning_rate": 1.7379938726166428e-05, "loss": 0.6963, "step": 2153 }, { "epoch": 0.1629904279066248, "grad_norm": 3.03721022605896, "learning_rate": 1.737911696074924e-05, "loss": 0.7875, "step": 2154 }, { "epoch": 0.1630660966289584, "grad_norm": 2.496274471282959, "learning_rate": 1.7378294670604644e-05, "loss": 0.8951, "step": 2155 }, { "epoch": 0.16314176535129205, "grad_norm": 2.7649779319763184, "learning_rate": 1.7377471855784138e-05, "loss": 0.8018, "step": 2156 }, { "epoch": 0.16321743407362566, "grad_norm": 5.206967353820801, "learning_rate": 1.7376648516339247e-05, "loss": 0.7761, "step": 2157 }, { "epoch": 0.1632931027959593, "grad_norm": 2.439495086669922, "learning_rate": 1.7375824652321533e-05, "loss": 0.6983, "step": 2158 }, { "epoch": 0.1633687715182929, "grad_norm": 2.5603365898132324, "learning_rate": 1.737500026378259e-05, "loss": 0.7642, "step": 2159 }, { "epoch": 0.16344444024062654, "grad_norm": 2.957632303237915, "learning_rate": 1.7374175350774042e-05, "loss": 0.829, "step": 2160 }, { "epoch": 0.16352010896296015, "grad_norm": 2.3568167686462402, "learning_rate": 1.7373349913347546e-05, "loss": 0.8891, "step": 2161 }, { "epoch": 0.1635957776852938, "grad_norm": 2.483896017074585, "learning_rate": 1.7372523951554797e-05, "loss": 0.6859, "step": 2162 }, { "epoch": 0.1636714464076274, "grad_norm": 2.430391788482666, "learning_rate": 1.737169746544752e-05, "loss": 0.8047, "step": 2163 }, { "epoch": 0.16374711512996104, "grad_norm": 2.568268060684204, "learning_rate": 1.7370870455077468e-05, "loss": 0.6092, "step": 2164 }, { "epoch": 0.16382278385229465, "grad_norm": 2.6781516075134277, "learning_rate": 1.7370042920496433e-05, "loss": 0.7879, "step": 2165 }, { "epoch": 0.1638984525746283, "grad_norm": 2.247915506362915, "learning_rate": 1.7369214861756238e-05, "loss": 0.7788, "step": 2166 }, { "epoch": 0.1639741212969619, "grad_norm": 2.176671028137207, "learning_rate": 1.7368386278908742e-05, "loss": 0.8544, "step": 2167 }, { "epoch": 0.1640497900192955, "grad_norm": 2.515249013900757, "learning_rate": 1.7367557172005827e-05, "loss": 0.7041, "step": 2168 }, { "epoch": 0.16412545874162915, "grad_norm": 2.1374387741088867, "learning_rate": 1.736672754109942e-05, "loss": 0.8569, "step": 2169 }, { "epoch": 0.16420112746396276, "grad_norm": 2.5364511013031006, "learning_rate": 1.7365897386241472e-05, "loss": 0.7735, "step": 2170 }, { "epoch": 0.1642767961862964, "grad_norm": 2.0026803016662598, "learning_rate": 1.7365066707483972e-05, "loss": 0.8604, "step": 2171 }, { "epoch": 0.16435246490863, "grad_norm": 20.266813278198242, "learning_rate": 1.736423550487894e-05, "loss": 0.871, "step": 2172 }, { "epoch": 0.16442813363096365, "grad_norm": 1.7573026418685913, "learning_rate": 1.736340377847843e-05, "loss": 0.6222, "step": 2173 }, { "epoch": 0.16450380235329726, "grad_norm": 2.631108522415161, "learning_rate": 1.736257152833452e-05, "loss": 0.7496, "step": 2174 }, { "epoch": 0.1645794710756309, "grad_norm": 2.149601459503174, "learning_rate": 1.7361738754499332e-05, "loss": 0.7281, "step": 2175 }, { "epoch": 0.1646551397979645, "grad_norm": 2.6889591217041016, "learning_rate": 1.736090545702502e-05, "loss": 0.7323, "step": 2176 }, { "epoch": 0.16473080852029814, "grad_norm": 2.2632665634155273, "learning_rate": 1.736007163596377e-05, "loss": 0.8867, "step": 2177 }, { "epoch": 0.16480647724263175, "grad_norm": 2.988801956176758, "learning_rate": 1.735923729136779e-05, "loss": 0.8742, "step": 2178 }, { "epoch": 0.1648821459649654, "grad_norm": 2.2661144733428955, "learning_rate": 1.7358402423289332e-05, "loss": 0.6946, "step": 2179 }, { "epoch": 0.164957814687299, "grad_norm": 2.401752233505249, "learning_rate": 1.735756703178068e-05, "loss": 0.8024, "step": 2180 }, { "epoch": 0.16503348340963264, "grad_norm": 2.7266244888305664, "learning_rate": 1.7356731116894153e-05, "loss": 0.7484, "step": 2181 }, { "epoch": 0.16510915213196625, "grad_norm": 3.7801167964935303, "learning_rate": 1.7355894678682094e-05, "loss": 0.7794, "step": 2182 }, { "epoch": 0.16518482085429986, "grad_norm": 2.931405544281006, "learning_rate": 1.7355057717196883e-05, "loss": 0.6981, "step": 2183 }, { "epoch": 0.1652604895766335, "grad_norm": 3.314436435699463, "learning_rate": 1.7354220232490932e-05, "loss": 0.8774, "step": 2184 }, { "epoch": 0.1653361582989671, "grad_norm": 2.7740743160247803, "learning_rate": 1.735338222461669e-05, "loss": 0.6553, "step": 2185 }, { "epoch": 0.16541182702130075, "grad_norm": 2.309906482696533, "learning_rate": 1.735254369362664e-05, "loss": 0.7038, "step": 2186 }, { "epoch": 0.16548749574363436, "grad_norm": 2.7351341247558594, "learning_rate": 1.7351704639573284e-05, "loss": 0.7777, "step": 2187 }, { "epoch": 0.165563164465968, "grad_norm": 3.7266592979431152, "learning_rate": 1.735086506250917e-05, "loss": 0.7223, "step": 2188 }, { "epoch": 0.1656388331883016, "grad_norm": 7.991847038269043, "learning_rate": 1.7350024962486876e-05, "loss": 0.6462, "step": 2189 }, { "epoch": 0.16571450191063525, "grad_norm": 2.2966339588165283, "learning_rate": 1.7349184339559015e-05, "loss": 0.8108, "step": 2190 }, { "epoch": 0.16579017063296886, "grad_norm": 2.601431131362915, "learning_rate": 1.7348343193778223e-05, "loss": 0.7492, "step": 2191 }, { "epoch": 0.1658658393553025, "grad_norm": 3.003119945526123, "learning_rate": 1.7347501525197177e-05, "loss": 0.7404, "step": 2192 }, { "epoch": 0.1659415080776361, "grad_norm": 2.2687859535217285, "learning_rate": 1.734665933386859e-05, "loss": 0.7979, "step": 2193 }, { "epoch": 0.16601717679996975, "grad_norm": 2.463181257247925, "learning_rate": 1.73458166198452e-05, "loss": 0.8188, "step": 2194 }, { "epoch": 0.16609284552230336, "grad_norm": 2.8143796920776367, "learning_rate": 1.7344973383179776e-05, "loss": 0.8257, "step": 2195 }, { "epoch": 0.16616851424463697, "grad_norm": 2.4394776821136475, "learning_rate": 1.7344129623925128e-05, "loss": 0.7174, "step": 2196 }, { "epoch": 0.1662441829669706, "grad_norm": 2.9498252868652344, "learning_rate": 1.7343285342134096e-05, "loss": 0.7468, "step": 2197 }, { "epoch": 0.16631985168930422, "grad_norm": 2.7809300422668457, "learning_rate": 1.734244053785955e-05, "loss": 0.917, "step": 2198 }, { "epoch": 0.16639552041163785, "grad_norm": 2.0343682765960693, "learning_rate": 1.7341595211154397e-05, "loss": 0.8867, "step": 2199 }, { "epoch": 0.16647118913397146, "grad_norm": 2.406003713607788, "learning_rate": 1.7340749362071567e-05, "loss": 0.7902, "step": 2200 }, { "epoch": 0.1665468578563051, "grad_norm": 3.3123080730438232, "learning_rate": 1.733990299066404e-05, "loss": 0.8509, "step": 2201 }, { "epoch": 0.1666225265786387, "grad_norm": 2.0788116455078125, "learning_rate": 1.733905609698481e-05, "loss": 0.6832, "step": 2202 }, { "epoch": 0.16669819530097235, "grad_norm": 2.732825517654419, "learning_rate": 1.7338208681086916e-05, "loss": 0.789, "step": 2203 }, { "epoch": 0.16677386402330596, "grad_norm": 2.7512941360473633, "learning_rate": 1.7337360743023425e-05, "loss": 0.9523, "step": 2204 }, { "epoch": 0.1668495327456396, "grad_norm": 2.181548833847046, "learning_rate": 1.733651228284744e-05, "loss": 0.7516, "step": 2205 }, { "epoch": 0.1669252014679732, "grad_norm": 2.5979111194610596, "learning_rate": 1.733566330061209e-05, "loss": 0.861, "step": 2206 }, { "epoch": 0.16700087019030685, "grad_norm": 2.890141248703003, "learning_rate": 1.7334813796370546e-05, "loss": 0.8529, "step": 2207 }, { "epoch": 0.16707653891264046, "grad_norm": 2.580782175064087, "learning_rate": 1.7333963770176002e-05, "loss": 0.8297, "step": 2208 }, { "epoch": 0.16715220763497407, "grad_norm": 3.2536733150482178, "learning_rate": 1.7333113222081692e-05, "loss": 0.723, "step": 2209 }, { "epoch": 0.1672278763573077, "grad_norm": 2.6812384128570557, "learning_rate": 1.733226215214088e-05, "loss": 0.84, "step": 2210 }, { "epoch": 0.16730354507964132, "grad_norm": 4.129171371459961, "learning_rate": 1.733141056040686e-05, "loss": 0.8535, "step": 2211 }, { "epoch": 0.16737921380197496, "grad_norm": 2.4980597496032715, "learning_rate": 1.7330558446932965e-05, "loss": 0.8225, "step": 2212 }, { "epoch": 0.16745488252430857, "grad_norm": 2.615471839904785, "learning_rate": 1.7329705811772556e-05, "loss": 0.7097, "step": 2213 }, { "epoch": 0.1675305512466422, "grad_norm": 2.604362964630127, "learning_rate": 1.7328852654979026e-05, "loss": 0.8121, "step": 2214 }, { "epoch": 0.16760621996897582, "grad_norm": 2.1432902812957764, "learning_rate": 1.732799897660581e-05, "loss": 0.5405, "step": 2215 }, { "epoch": 0.16768188869130946, "grad_norm": 2.9844110012054443, "learning_rate": 1.7327144776706355e-05, "loss": 0.8734, "step": 2216 }, { "epoch": 0.16775755741364307, "grad_norm": 2.605469226837158, "learning_rate": 1.7326290055334162e-05, "loss": 0.8101, "step": 2217 }, { "epoch": 0.1678332261359767, "grad_norm": 3.859015464782715, "learning_rate": 1.7325434812542757e-05, "loss": 0.7934, "step": 2218 }, { "epoch": 0.16790889485831031, "grad_norm": 2.154299020767212, "learning_rate": 1.7324579048385696e-05, "loss": 0.7312, "step": 2219 }, { "epoch": 0.16798456358064395, "grad_norm": 3.647308111190796, "learning_rate": 1.732372276291657e-05, "loss": 0.7668, "step": 2220 }, { "epoch": 0.16806023230297756, "grad_norm": 2.0847365856170654, "learning_rate": 1.7322865956189003e-05, "loss": 0.7016, "step": 2221 }, { "epoch": 0.16813590102531117, "grad_norm": 2.722703695297241, "learning_rate": 1.732200862825665e-05, "loss": 0.7803, "step": 2222 }, { "epoch": 0.1682115697476448, "grad_norm": 2.674581527709961, "learning_rate": 1.7321150779173197e-05, "loss": 0.854, "step": 2223 }, { "epoch": 0.16828723846997842, "grad_norm": 3.1795260906219482, "learning_rate": 1.732029240899237e-05, "loss": 0.7935, "step": 2224 }, { "epoch": 0.16836290719231206, "grad_norm": 2.396897792816162, "learning_rate": 1.7319433517767923e-05, "loss": 0.7769, "step": 2225 }, { "epoch": 0.16843857591464567, "grad_norm": 2.776615619659424, "learning_rate": 1.731857410555364e-05, "loss": 0.6984, "step": 2226 }, { "epoch": 0.1685142446369793, "grad_norm": 2.690028429031372, "learning_rate": 1.731771417240334e-05, "loss": 0.7615, "step": 2227 }, { "epoch": 0.16858991335931292, "grad_norm": 2.915459156036377, "learning_rate": 1.731685371837088e-05, "loss": 0.9331, "step": 2228 }, { "epoch": 0.16866558208164656, "grad_norm": 2.515017509460449, "learning_rate": 1.7315992743510135e-05, "loss": 0.7996, "step": 2229 }, { "epoch": 0.16874125080398017, "grad_norm": 2.5439369678497314, "learning_rate": 1.7315131247875028e-05, "loss": 0.873, "step": 2230 }, { "epoch": 0.1688169195263138, "grad_norm": 3.400592803955078, "learning_rate": 1.7314269231519512e-05, "loss": 0.8382, "step": 2231 }, { "epoch": 0.16889258824864742, "grad_norm": 3.8131964206695557, "learning_rate": 1.7313406694497562e-05, "loss": 0.786, "step": 2232 }, { "epoch": 0.16896825697098106, "grad_norm": 2.194751501083374, "learning_rate": 1.7312543636863197e-05, "loss": 0.7376, "step": 2233 }, { "epoch": 0.16904392569331467, "grad_norm": 2.373616933822632, "learning_rate": 1.731168005867046e-05, "loss": 0.6248, "step": 2234 }, { "epoch": 0.1691195944156483, "grad_norm": 2.5149641036987305, "learning_rate": 1.731081595997344e-05, "loss": 0.8259, "step": 2235 }, { "epoch": 0.16919526313798192, "grad_norm": 2.6134889125823975, "learning_rate": 1.730995134082624e-05, "loss": 0.804, "step": 2236 }, { "epoch": 0.16927093186031553, "grad_norm": 2.909189462661743, "learning_rate": 1.730908620128301e-05, "loss": 0.8914, "step": 2237 }, { "epoch": 0.16934660058264916, "grad_norm": 2.5435116291046143, "learning_rate": 1.7308220541397926e-05, "loss": 0.8368, "step": 2238 }, { "epoch": 0.16942226930498278, "grad_norm": 2.7107224464416504, "learning_rate": 1.7307354361225204e-05, "loss": 0.9474, "step": 2239 }, { "epoch": 0.1694979380273164, "grad_norm": 2.7715609073638916, "learning_rate": 1.730648766081908e-05, "loss": 0.7627, "step": 2240 }, { "epoch": 0.16957360674965002, "grad_norm": 2.654773473739624, "learning_rate": 1.730562044023383e-05, "loss": 0.7729, "step": 2241 }, { "epoch": 0.16964927547198366, "grad_norm": 2.386650800704956, "learning_rate": 1.730475269952377e-05, "loss": 0.6608, "step": 2242 }, { "epoch": 0.16972494419431727, "grad_norm": 2.307753086090088, "learning_rate": 1.730388443874323e-05, "loss": 0.7689, "step": 2243 }, { "epoch": 0.1698006129166509, "grad_norm": 2.196772336959839, "learning_rate": 1.7303015657946592e-05, "loss": 0.7859, "step": 2244 }, { "epoch": 0.16987628163898452, "grad_norm": 2.593203544616699, "learning_rate": 1.730214635718826e-05, "loss": 0.7733, "step": 2245 }, { "epoch": 0.16995195036131816, "grad_norm": 2.494314193725586, "learning_rate": 1.7301276536522664e-05, "loss": 0.7156, "step": 2246 }, { "epoch": 0.17002761908365177, "grad_norm": 2.721299648284912, "learning_rate": 1.7300406196004286e-05, "loss": 0.738, "step": 2247 }, { "epoch": 0.1701032878059854, "grad_norm": 2.1940648555755615, "learning_rate": 1.7299535335687622e-05, "loss": 0.7942, "step": 2248 }, { "epoch": 0.17017895652831902, "grad_norm": 2.7148277759552, "learning_rate": 1.7298663955627216e-05, "loss": 0.8078, "step": 2249 }, { "epoch": 0.17025462525065263, "grad_norm": 3.0322153568267822, "learning_rate": 1.729779205587763e-05, "loss": 0.9257, "step": 2250 }, { "epoch": 0.17033029397298627, "grad_norm": 2.168626546859741, "learning_rate": 1.7296919636493464e-05, "loss": 0.8326, "step": 2251 }, { "epoch": 0.17040596269531988, "grad_norm": 2.8294248580932617, "learning_rate": 1.729604669752936e-05, "loss": 0.8557, "step": 2252 }, { "epoch": 0.17048163141765352, "grad_norm": 2.4835293292999268, "learning_rate": 1.7295173239039975e-05, "loss": 0.7724, "step": 2253 }, { "epoch": 0.17055730013998713, "grad_norm": 3.3324198722839355, "learning_rate": 1.7294299261080015e-05, "loss": 0.7424, "step": 2254 }, { "epoch": 0.17063296886232077, "grad_norm": 2.104118824005127, "learning_rate": 1.7293424763704206e-05, "loss": 0.7898, "step": 2255 }, { "epoch": 0.17070863758465438, "grad_norm": 2.7089343070983887, "learning_rate": 1.7292549746967316e-05, "loss": 0.9443, "step": 2256 }, { "epoch": 0.17078430630698802, "grad_norm": 3.223379611968994, "learning_rate": 1.7291674210924138e-05, "loss": 0.7764, "step": 2257 }, { "epoch": 0.17085997502932163, "grad_norm": 2.709465980529785, "learning_rate": 1.7290798155629502e-05, "loss": 0.7964, "step": 2258 }, { "epoch": 0.17093564375165526, "grad_norm": 2.5600428581237793, "learning_rate": 1.7289921581138273e-05, "loss": 0.7413, "step": 2259 }, { "epoch": 0.17101131247398887, "grad_norm": 1.963610053062439, "learning_rate": 1.7289044487505337e-05, "loss": 0.6844, "step": 2260 }, { "epoch": 0.1710869811963225, "grad_norm": 2.8103370666503906, "learning_rate": 1.728816687478563e-05, "loss": 0.6223, "step": 2261 }, { "epoch": 0.17116264991865612, "grad_norm": 2.9981131553649902, "learning_rate": 1.7287288743034103e-05, "loss": 0.7519, "step": 2262 }, { "epoch": 0.17123831864098973, "grad_norm": 2.655627965927124, "learning_rate": 1.728641009230575e-05, "loss": 0.7625, "step": 2263 }, { "epoch": 0.17131398736332337, "grad_norm": 2.775040626525879, "learning_rate": 1.72855309226556e-05, "loss": 0.7466, "step": 2264 }, { "epoch": 0.17138965608565698, "grad_norm": 3.4195356369018555, "learning_rate": 1.72846512341387e-05, "loss": 0.6637, "step": 2265 }, { "epoch": 0.17146532480799062, "grad_norm": 3.398912191390991, "learning_rate": 1.7283771026810144e-05, "loss": 0.7456, "step": 2266 }, { "epoch": 0.17154099353032423, "grad_norm": 2.160043478012085, "learning_rate": 1.7282890300725054e-05, "loss": 0.6909, "step": 2267 }, { "epoch": 0.17161666225265787, "grad_norm": 1.9356389045715332, "learning_rate": 1.7282009055938587e-05, "loss": 0.7153, "step": 2268 }, { "epoch": 0.17169233097499148, "grad_norm": 3.335268259048462, "learning_rate": 1.728112729250592e-05, "loss": 0.6971, "step": 2269 }, { "epoch": 0.17176799969732512, "grad_norm": 2.4439728260040283, "learning_rate": 1.728024501048228e-05, "loss": 0.9026, "step": 2270 }, { "epoch": 0.17184366841965873, "grad_norm": 3.08240008354187, "learning_rate": 1.7279362209922922e-05, "loss": 0.7948, "step": 2271 }, { "epoch": 0.17191933714199237, "grad_norm": 2.764417886734009, "learning_rate": 1.7278478890883115e-05, "loss": 0.8714, "step": 2272 }, { "epoch": 0.17199500586432598, "grad_norm": 2.609437942504883, "learning_rate": 1.727759505341819e-05, "loss": 0.7332, "step": 2273 }, { "epoch": 0.17207067458665962, "grad_norm": 2.7951455116271973, "learning_rate": 1.7276710697583485e-05, "loss": 0.7675, "step": 2274 }, { "epoch": 0.17214634330899323, "grad_norm": 2.2547028064727783, "learning_rate": 1.7275825823434386e-05, "loss": 0.7684, "step": 2275 }, { "epoch": 0.17222201203132684, "grad_norm": 2.3958706855773926, "learning_rate": 1.727494043102631e-05, "loss": 0.7689, "step": 2276 }, { "epoch": 0.17229768075366048, "grad_norm": 2.6258463859558105, "learning_rate": 1.7274054520414697e-05, "loss": 0.6702, "step": 2277 }, { "epoch": 0.1723733494759941, "grad_norm": 2.128289222717285, "learning_rate": 1.7273168091655028e-05, "loss": 0.8102, "step": 2278 }, { "epoch": 0.17244901819832772, "grad_norm": 2.5921168327331543, "learning_rate": 1.727228114480282e-05, "loss": 0.7875, "step": 2279 }, { "epoch": 0.17252468692066134, "grad_norm": 2.994091033935547, "learning_rate": 1.7271393679913604e-05, "loss": 0.8095, "step": 2280 }, { "epoch": 0.17260035564299497, "grad_norm": 2.6522233486175537, "learning_rate": 1.7270505697042966e-05, "loss": 0.8525, "step": 2281 }, { "epoch": 0.17267602436532858, "grad_norm": 1.99582040309906, "learning_rate": 1.7269617196246514e-05, "loss": 0.7875, "step": 2282 }, { "epoch": 0.17275169308766222, "grad_norm": 2.60740065574646, "learning_rate": 1.726872817757988e-05, "loss": 0.8859, "step": 2283 }, { "epoch": 0.17282736180999583, "grad_norm": 1.9426453113555908, "learning_rate": 1.7267838641098748e-05, "loss": 0.9347, "step": 2284 }, { "epoch": 0.17290303053232947, "grad_norm": 2.676067352294922, "learning_rate": 1.7266948586858816e-05, "loss": 0.674, "step": 2285 }, { "epoch": 0.17297869925466308, "grad_norm": 2.255591869354248, "learning_rate": 1.7266058014915826e-05, "loss": 0.7917, "step": 2286 }, { "epoch": 0.17305436797699672, "grad_norm": 2.7783968448638916, "learning_rate": 1.7265166925325547e-05, "loss": 0.8044, "step": 2287 }, { "epoch": 0.17313003669933033, "grad_norm": 2.7550559043884277, "learning_rate": 1.7264275318143784e-05, "loss": 0.6446, "step": 2288 }, { "epoch": 0.17320570542166394, "grad_norm": 3.500746488571167, "learning_rate": 1.726338319342637e-05, "loss": 0.8033, "step": 2289 }, { "epoch": 0.17328137414399758, "grad_norm": 2.1858432292938232, "learning_rate": 1.7262490551229173e-05, "loss": 0.647, "step": 2290 }, { "epoch": 0.1733570428663312, "grad_norm": 2.3065178394317627, "learning_rate": 1.726159739160809e-05, "loss": 0.9231, "step": 2291 }, { "epoch": 0.17343271158866483, "grad_norm": 2.7201311588287354, "learning_rate": 1.7260703714619062e-05, "loss": 0.8506, "step": 2292 }, { "epoch": 0.17350838031099844, "grad_norm": 2.0879547595977783, "learning_rate": 1.725980952031805e-05, "loss": 0.8429, "step": 2293 }, { "epoch": 0.17358404903333208, "grad_norm": 6.11818265914917, "learning_rate": 1.7258914808761048e-05, "loss": 0.6266, "step": 2294 }, { "epoch": 0.1736597177556657, "grad_norm": 2.2385120391845703, "learning_rate": 1.7258019580004084e-05, "loss": 0.8526, "step": 2295 }, { "epoch": 0.17373538647799933, "grad_norm": 3.8654024600982666, "learning_rate": 1.725712383410323e-05, "loss": 0.7531, "step": 2296 }, { "epoch": 0.17381105520033294, "grad_norm": 2.5260634422302246, "learning_rate": 1.7256227571114577e-05, "loss": 0.7502, "step": 2297 }, { "epoch": 0.17388672392266658, "grad_norm": 2.305957794189453, "learning_rate": 1.7255330791094244e-05, "loss": 0.7199, "step": 2298 }, { "epoch": 0.17396239264500019, "grad_norm": 2.708401918411255, "learning_rate": 1.7254433494098393e-05, "loss": 0.762, "step": 2299 }, { "epoch": 0.17403806136733382, "grad_norm": 3.0765719413757324, "learning_rate": 1.7253535680183228e-05, "loss": 0.6405, "step": 2300 }, { "epoch": 0.17411373008966743, "grad_norm": 2.6883769035339355, "learning_rate": 1.7252637349404956e-05, "loss": 0.7477, "step": 2301 }, { "epoch": 0.17418939881200107, "grad_norm": 5.647037982940674, "learning_rate": 1.725173850181984e-05, "loss": 0.833, "step": 2302 }, { "epoch": 0.17426506753433468, "grad_norm": 2.7593023777008057, "learning_rate": 1.725083913748418e-05, "loss": 0.8364, "step": 2303 }, { "epoch": 0.1743407362566683, "grad_norm": 2.6544225215911865, "learning_rate": 1.7249939256454277e-05, "loss": 0.7959, "step": 2304 }, { "epoch": 0.17441640497900193, "grad_norm": 2.8478567600250244, "learning_rate": 1.7249038858786496e-05, "loss": 0.8185, "step": 2305 }, { "epoch": 0.17449207370133554, "grad_norm": 2.937596082687378, "learning_rate": 1.7248137944537224e-05, "loss": 0.7666, "step": 2306 }, { "epoch": 0.17456774242366918, "grad_norm": 2.3636603355407715, "learning_rate": 1.7247236513762876e-05, "loss": 0.8019, "step": 2307 }, { "epoch": 0.1746434111460028, "grad_norm": 2.4322621822357178, "learning_rate": 1.72463345665199e-05, "loss": 0.7484, "step": 2308 }, { "epoch": 0.17471907986833643, "grad_norm": 2.590067148208618, "learning_rate": 1.7245432102864782e-05, "loss": 0.6762, "step": 2309 }, { "epoch": 0.17479474859067004, "grad_norm": 2.2933037281036377, "learning_rate": 1.7244529122854035e-05, "loss": 0.7488, "step": 2310 }, { "epoch": 0.17487041731300368, "grad_norm": 2.3996517658233643, "learning_rate": 1.724362562654421e-05, "loss": 0.7258, "step": 2311 }, { "epoch": 0.1749460860353373, "grad_norm": 2.4362945556640625, "learning_rate": 1.7242721613991887e-05, "loss": 0.743, "step": 2312 }, { "epoch": 0.17502175475767093, "grad_norm": 2.572498321533203, "learning_rate": 1.7241817085253678e-05, "loss": 0.7258, "step": 2313 }, { "epoch": 0.17509742348000454, "grad_norm": 2.252002477645874, "learning_rate": 1.724091204038622e-05, "loss": 0.752, "step": 2314 }, { "epoch": 0.17517309220233818, "grad_norm": 2.6183085441589355, "learning_rate": 1.7240006479446202e-05, "loss": 0.5796, "step": 2315 }, { "epoch": 0.1752487609246718, "grad_norm": 3.0919864177703857, "learning_rate": 1.723910040249032e-05, "loss": 0.841, "step": 2316 }, { "epoch": 0.1753244296470054, "grad_norm": 2.5486013889312744, "learning_rate": 1.7238193809575325e-05, "loss": 0.8376, "step": 2317 }, { "epoch": 0.17540009836933904, "grad_norm": 2.988577127456665, "learning_rate": 1.723728670075799e-05, "loss": 0.8065, "step": 2318 }, { "epoch": 0.17547576709167265, "grad_norm": 2.734192371368408, "learning_rate": 1.7236379076095118e-05, "loss": 0.7786, "step": 2319 }, { "epoch": 0.17555143581400628, "grad_norm": 2.8744254112243652, "learning_rate": 1.723547093564355e-05, "loss": 0.8307, "step": 2320 }, { "epoch": 0.1756271045363399, "grad_norm": 2.407675266265869, "learning_rate": 1.7234562279460156e-05, "loss": 0.7314, "step": 2321 }, { "epoch": 0.17570277325867353, "grad_norm": 2.588304281234741, "learning_rate": 1.7233653107601833e-05, "loss": 0.7172, "step": 2322 }, { "epoch": 0.17577844198100714, "grad_norm": 2.3886618614196777, "learning_rate": 1.7232743420125526e-05, "loss": 0.9478, "step": 2323 }, { "epoch": 0.17585411070334078, "grad_norm": 3.020280599594116, "learning_rate": 1.7231833217088195e-05, "loss": 0.8497, "step": 2324 }, { "epoch": 0.1759297794256744, "grad_norm": 2.5860559940338135, "learning_rate": 1.7230922498546847e-05, "loss": 0.8636, "step": 2325 }, { "epoch": 0.17600544814800803, "grad_norm": 2.4977405071258545, "learning_rate": 1.7230011264558506e-05, "loss": 0.8239, "step": 2326 }, { "epoch": 0.17608111687034164, "grad_norm": 2.03897762298584, "learning_rate": 1.7229099515180243e-05, "loss": 0.6944, "step": 2327 }, { "epoch": 0.17615678559267528, "grad_norm": 3.2319719791412354, "learning_rate": 1.7228187250469154e-05, "loss": 0.9229, "step": 2328 }, { "epoch": 0.1762324543150089, "grad_norm": 2.1355583667755127, "learning_rate": 1.7227274470482363e-05, "loss": 0.9233, "step": 2329 }, { "epoch": 0.1763081230373425, "grad_norm": 2.936140298843384, "learning_rate": 1.7226361175277034e-05, "loss": 0.6154, "step": 2330 }, { "epoch": 0.17638379175967614, "grad_norm": 2.1924805641174316, "learning_rate": 1.7225447364910364e-05, "loss": 0.9198, "step": 2331 }, { "epoch": 0.17645946048200975, "grad_norm": 2.7870981693267822, "learning_rate": 1.7224533039439573e-05, "loss": 0.7164, "step": 2332 }, { "epoch": 0.1765351292043434, "grad_norm": 2.4783294200897217, "learning_rate": 1.722361819892192e-05, "loss": 0.6998, "step": 2333 }, { "epoch": 0.176610797926677, "grad_norm": 2.5335793495178223, "learning_rate": 1.7222702843414703e-05, "loss": 0.7745, "step": 2334 }, { "epoch": 0.17668646664901064, "grad_norm": 2.6729066371917725, "learning_rate": 1.7221786972975234e-05, "loss": 0.8394, "step": 2335 }, { "epoch": 0.17676213537134425, "grad_norm": 2.4603934288024902, "learning_rate": 1.7220870587660872e-05, "loss": 0.8235, "step": 2336 }, { "epoch": 0.17683780409367789, "grad_norm": 2.5660200119018555, "learning_rate": 1.7219953687529006e-05, "loss": 0.8368, "step": 2337 }, { "epoch": 0.1769134728160115, "grad_norm": 2.5912487506866455, "learning_rate": 1.7219036272637054e-05, "loss": 0.7614, "step": 2338 }, { "epoch": 0.17698914153834513, "grad_norm": 2.4109630584716797, "learning_rate": 1.7218118343042468e-05, "loss": 0.693, "step": 2339 }, { "epoch": 0.17706481026067875, "grad_norm": 2.0487425327301025, "learning_rate": 1.7217199898802726e-05, "loss": 0.9291, "step": 2340 }, { "epoch": 0.17714047898301238, "grad_norm": 2.833705425262451, "learning_rate": 1.721628093997535e-05, "loss": 0.8157, "step": 2341 }, { "epoch": 0.177216147705346, "grad_norm": 2.298569679260254, "learning_rate": 1.7215361466617892e-05, "loss": 0.8041, "step": 2342 }, { "epoch": 0.1772918164276796, "grad_norm": 2.4448776245117188, "learning_rate": 1.7214441478787923e-05, "loss": 0.8001, "step": 2343 }, { "epoch": 0.17736748515001324, "grad_norm": 2.5493338108062744, "learning_rate": 1.7213520976543057e-05, "loss": 0.9375, "step": 2344 }, { "epoch": 0.17744315387234685, "grad_norm": 2.8180043697357178, "learning_rate": 1.7212599959940947e-05, "loss": 0.8333, "step": 2345 }, { "epoch": 0.1775188225946805, "grad_norm": 2.575085163116455, "learning_rate": 1.7211678429039264e-05, "loss": 0.7597, "step": 2346 }, { "epoch": 0.1775944913170141, "grad_norm": 2.7505435943603516, "learning_rate": 1.721075638389572e-05, "loss": 0.7751, "step": 2347 }, { "epoch": 0.17767016003934774, "grad_norm": 2.285794973373413, "learning_rate": 1.7209833824568047e-05, "loss": 0.7253, "step": 2348 }, { "epoch": 0.17774582876168135, "grad_norm": 3.101331949234009, "learning_rate": 1.7208910751114033e-05, "loss": 0.7653, "step": 2349 }, { "epoch": 0.177821497484015, "grad_norm": 2.0107734203338623, "learning_rate": 1.7207987163591474e-05, "loss": 0.7867, "step": 2350 }, { "epoch": 0.1778971662063486, "grad_norm": 2.603510618209839, "learning_rate": 1.720706306205821e-05, "loss": 0.8109, "step": 2351 }, { "epoch": 0.17797283492868224, "grad_norm": 4.241885185241699, "learning_rate": 1.7206138446572113e-05, "loss": 0.7568, "step": 2352 }, { "epoch": 0.17804850365101585, "grad_norm": 2.324835777282715, "learning_rate": 1.720521331719109e-05, "loss": 0.6711, "step": 2353 }, { "epoch": 0.1781241723733495, "grad_norm": 3.291964292526245, "learning_rate": 1.7204287673973062e-05, "loss": 0.7461, "step": 2354 }, { "epoch": 0.1781998410956831, "grad_norm": 4.23928689956665, "learning_rate": 1.7203361516976007e-05, "loss": 0.6751, "step": 2355 }, { "epoch": 0.17827550981801674, "grad_norm": 2.199976682662964, "learning_rate": 1.7202434846257922e-05, "loss": 0.8, "step": 2356 }, { "epoch": 0.17835117854035035, "grad_norm": 1.6747773885726929, "learning_rate": 1.7201507661876838e-05, "loss": 1.0083, "step": 2357 }, { "epoch": 0.17842684726268396, "grad_norm": 2.7017219066619873, "learning_rate": 1.7200579963890814e-05, "loss": 0.8006, "step": 2358 }, { "epoch": 0.1785025159850176, "grad_norm": 2.3280436992645264, "learning_rate": 1.719965175235795e-05, "loss": 0.8232, "step": 2359 }, { "epoch": 0.1785781847073512, "grad_norm": 2.4222941398620605, "learning_rate": 1.7198723027336374e-05, "loss": 0.6266, "step": 2360 }, { "epoch": 0.17865385342968484, "grad_norm": 2.6295063495635986, "learning_rate": 1.7197793788884245e-05, "loss": 0.7572, "step": 2361 }, { "epoch": 0.17872952215201846, "grad_norm": 2.855011224746704, "learning_rate": 1.7196864037059748e-05, "loss": 0.8607, "step": 2362 }, { "epoch": 0.1788051908743521, "grad_norm": 2.3858821392059326, "learning_rate": 1.7195933771921118e-05, "loss": 0.6728, "step": 2363 }, { "epoch": 0.1788808595966857, "grad_norm": 3.4347083568573, "learning_rate": 1.7195002993526604e-05, "loss": 0.7073, "step": 2364 }, { "epoch": 0.17895652831901934, "grad_norm": 2.68788480758667, "learning_rate": 1.71940717019345e-05, "loss": 0.853, "step": 2365 }, { "epoch": 0.17903219704135295, "grad_norm": 2.82952880859375, "learning_rate": 1.7193139897203122e-05, "loss": 0.8541, "step": 2366 }, { "epoch": 0.1791078657636866, "grad_norm": 2.4485409259796143, "learning_rate": 1.7192207579390824e-05, "loss": 0.6887, "step": 2367 }, { "epoch": 0.1791835344860202, "grad_norm": 2.3793416023254395, "learning_rate": 1.7191274748555987e-05, "loss": 0.9804, "step": 2368 }, { "epoch": 0.17925920320835384, "grad_norm": 2.226893663406372, "learning_rate": 1.7190341404757034e-05, "loss": 0.877, "step": 2369 }, { "epoch": 0.17933487193068745, "grad_norm": 2.134824752807617, "learning_rate": 1.7189407548052412e-05, "loss": 0.8302, "step": 2370 }, { "epoch": 0.17941054065302106, "grad_norm": 2.6807243824005127, "learning_rate": 1.7188473178500602e-05, "loss": 0.6898, "step": 2371 }, { "epoch": 0.1794862093753547, "grad_norm": 2.6654117107391357, "learning_rate": 1.7187538296160115e-05, "loss": 0.6984, "step": 2372 }, { "epoch": 0.1795618780976883, "grad_norm": 2.5605947971343994, "learning_rate": 1.71866029010895e-05, "loss": 0.8535, "step": 2373 }, { "epoch": 0.17963754682002195, "grad_norm": 3.3925888538360596, "learning_rate": 1.718566699334733e-05, "loss": 0.8313, "step": 2374 }, { "epoch": 0.17971321554235556, "grad_norm": 2.7094411849975586, "learning_rate": 1.7184730572992222e-05, "loss": 0.6807, "step": 2375 }, { "epoch": 0.1797888842646892, "grad_norm": 3.0134902000427246, "learning_rate": 1.718379364008281e-05, "loss": 0.7635, "step": 2376 }, { "epoch": 0.1798645529870228, "grad_norm": 2.536494255065918, "learning_rate": 1.718285619467777e-05, "loss": 0.9329, "step": 2377 }, { "epoch": 0.17994022170935645, "grad_norm": 2.6002416610717773, "learning_rate": 1.718191823683581e-05, "loss": 0.9118, "step": 2378 }, { "epoch": 0.18001589043169006, "grad_norm": 2.81915283203125, "learning_rate": 1.7180979766615663e-05, "loss": 0.7235, "step": 2379 }, { "epoch": 0.1800915591540237, "grad_norm": 2.888113260269165, "learning_rate": 1.7180040784076106e-05, "loss": 0.6503, "step": 2380 }, { "epoch": 0.1801672278763573, "grad_norm": 2.9773685932159424, "learning_rate": 1.7179101289275937e-05, "loss": 0.704, "step": 2381 }, { "epoch": 0.18024289659869094, "grad_norm": 2.8631527423858643, "learning_rate": 1.717816128227399e-05, "loss": 0.8687, "step": 2382 }, { "epoch": 0.18031856532102455, "grad_norm": 2.364201068878174, "learning_rate": 1.7177220763129133e-05, "loss": 0.6779, "step": 2383 }, { "epoch": 0.18039423404335816, "grad_norm": 2.255162239074707, "learning_rate": 1.7176279731900264e-05, "loss": 0.6428, "step": 2384 }, { "epoch": 0.1804699027656918, "grad_norm": 3.1965222358703613, "learning_rate": 1.717533818864631e-05, "loss": 0.7524, "step": 2385 }, { "epoch": 0.1805455714880254, "grad_norm": 2.435013771057129, "learning_rate": 1.717439613342624e-05, "loss": 0.8193, "step": 2386 }, { "epoch": 0.18062124021035905, "grad_norm": 3.8150510787963867, "learning_rate": 1.7173453566299044e-05, "loss": 0.7033, "step": 2387 }, { "epoch": 0.18069690893269266, "grad_norm": 2.313161849975586, "learning_rate": 1.717251048732375e-05, "loss": 0.8132, "step": 2388 }, { "epoch": 0.1807725776550263, "grad_norm": 3.1583354473114014, "learning_rate": 1.717156689655941e-05, "loss": 0.7569, "step": 2389 }, { "epoch": 0.1808482463773599, "grad_norm": 2.503884792327881, "learning_rate": 1.717062279406513e-05, "loss": 0.6835, "step": 2390 }, { "epoch": 0.18092391509969355, "grad_norm": 2.8149566650390625, "learning_rate": 1.716967817990002e-05, "loss": 0.8832, "step": 2391 }, { "epoch": 0.18099958382202716, "grad_norm": 2.804032802581787, "learning_rate": 1.7168733054123238e-05, "loss": 0.9157, "step": 2392 }, { "epoch": 0.1810752525443608, "grad_norm": 2.569125175476074, "learning_rate": 1.7167787416793973e-05, "loss": 0.9452, "step": 2393 }, { "epoch": 0.1811509212666944, "grad_norm": 2.8881638050079346, "learning_rate": 1.7166841267971438e-05, "loss": 0.8094, "step": 2394 }, { "epoch": 0.18122658998902805, "grad_norm": 2.7067344188690186, "learning_rate": 1.716589460771489e-05, "loss": 0.8893, "step": 2395 }, { "epoch": 0.18130225871136166, "grad_norm": 3.1878957748413086, "learning_rate": 1.716494743608361e-05, "loss": 0.8778, "step": 2396 }, { "epoch": 0.18137792743369527, "grad_norm": 2.745701313018799, "learning_rate": 1.7163999753136913e-05, "loss": 0.9082, "step": 2397 }, { "epoch": 0.1814535961560289, "grad_norm": 2.593395471572876, "learning_rate": 1.7163051558934146e-05, "loss": 0.8964, "step": 2398 }, { "epoch": 0.18152926487836252, "grad_norm": 2.715348243713379, "learning_rate": 1.716210285353469e-05, "loss": 0.6536, "step": 2399 }, { "epoch": 0.18160493360069616, "grad_norm": 2.5695583820343018, "learning_rate": 1.716115363699795e-05, "loss": 0.7906, "step": 2400 }, { "epoch": 0.18168060232302977, "grad_norm": 5.8394455909729, "learning_rate": 1.7160203909383375e-05, "loss": 0.7837, "step": 2401 }, { "epoch": 0.1817562710453634, "grad_norm": 2.660222291946411, "learning_rate": 1.715925367075044e-05, "loss": 0.6843, "step": 2402 }, { "epoch": 0.18183193976769702, "grad_norm": 3.027378559112549, "learning_rate": 1.7158302921158647e-05, "loss": 0.8906, "step": 2403 }, { "epoch": 0.18190760849003065, "grad_norm": 2.216245174407959, "learning_rate": 1.715735166066754e-05, "loss": 0.9702, "step": 2404 }, { "epoch": 0.18198327721236426, "grad_norm": 2.4401350021362305, "learning_rate": 1.7156399889336684e-05, "loss": 0.7855, "step": 2405 }, { "epoch": 0.1820589459346979, "grad_norm": 2.4370734691619873, "learning_rate": 1.715544760722569e-05, "loss": 0.784, "step": 2406 }, { "epoch": 0.1821346146570315, "grad_norm": 5.49038553237915, "learning_rate": 1.7154494814394186e-05, "loss": 0.7539, "step": 2407 }, { "epoch": 0.18221028337936515, "grad_norm": 2.7418787479400635, "learning_rate": 1.7153541510901844e-05, "loss": 0.8306, "step": 2408 }, { "epoch": 0.18228595210169876, "grad_norm": 2.403918981552124, "learning_rate": 1.7152587696808358e-05, "loss": 0.6963, "step": 2409 }, { "epoch": 0.1823616208240324, "grad_norm": 3.1243717670440674, "learning_rate": 1.7151633372173467e-05, "loss": 0.8133, "step": 2410 }, { "epoch": 0.182437289546366, "grad_norm": 3.145928382873535, "learning_rate": 1.7150678537056925e-05, "loss": 0.7925, "step": 2411 }, { "epoch": 0.18251295826869962, "grad_norm": 3.3592019081115723, "learning_rate": 1.7149723191518533e-05, "loss": 0.8252, "step": 2412 }, { "epoch": 0.18258862699103326, "grad_norm": 2.1932833194732666, "learning_rate": 1.714876733561811e-05, "loss": 0.8144, "step": 2413 }, { "epoch": 0.18266429571336687, "grad_norm": 2.8968517780303955, "learning_rate": 1.7147810969415526e-05, "loss": 0.7965, "step": 2414 }, { "epoch": 0.1827399644357005, "grad_norm": 2.438443660736084, "learning_rate": 1.7146854092970663e-05, "loss": 0.7568, "step": 2415 }, { "epoch": 0.18281563315803412, "grad_norm": 2.609872341156006, "learning_rate": 1.7145896706343445e-05, "loss": 0.6807, "step": 2416 }, { "epoch": 0.18289130188036776, "grad_norm": 2.623194932937622, "learning_rate": 1.714493880959383e-05, "loss": 0.7449, "step": 2417 }, { "epoch": 0.18296697060270137, "grad_norm": 2.9819412231445312, "learning_rate": 1.7143980402781804e-05, "loss": 0.9333, "step": 2418 }, { "epoch": 0.183042639325035, "grad_norm": 2.4386146068573, "learning_rate": 1.7143021485967382e-05, "loss": 0.7959, "step": 2419 }, { "epoch": 0.18311830804736862, "grad_norm": 2.293463706970215, "learning_rate": 1.7142062059210618e-05, "loss": 0.8353, "step": 2420 }, { "epoch": 0.18319397676970225, "grad_norm": 3.0216493606567383, "learning_rate": 1.7141102122571593e-05, "loss": 0.8713, "step": 2421 }, { "epoch": 0.18326964549203587, "grad_norm": 2.45841908454895, "learning_rate": 1.7140141676110424e-05, "loss": 0.8078, "step": 2422 }, { "epoch": 0.1833453142143695, "grad_norm": 2.933749198913574, "learning_rate": 1.713918071988725e-05, "loss": 0.8919, "step": 2423 }, { "epoch": 0.18342098293670311, "grad_norm": 2.992274761199951, "learning_rate": 1.713821925396226e-05, "loss": 0.7021, "step": 2424 }, { "epoch": 0.18349665165903672, "grad_norm": 2.653069257736206, "learning_rate": 1.7137257278395655e-05, "loss": 0.8012, "step": 2425 }, { "epoch": 0.18357232038137036, "grad_norm": 2.428311586380005, "learning_rate": 1.7136294793247677e-05, "loss": 0.8426, "step": 2426 }, { "epoch": 0.18364798910370397, "grad_norm": 3.16813063621521, "learning_rate": 1.7135331798578607e-05, "loss": 0.8006, "step": 2427 }, { "epoch": 0.1837236578260376, "grad_norm": 3.0173721313476562, "learning_rate": 1.7134368294448746e-05, "loss": 0.7249, "step": 2428 }, { "epoch": 0.18379932654837122, "grad_norm": 3.074843406677246, "learning_rate": 1.7133404280918435e-05, "loss": 0.8502, "step": 2429 }, { "epoch": 0.18387499527070486, "grad_norm": 2.8122646808624268, "learning_rate": 1.713243975804804e-05, "loss": 0.8029, "step": 2430 }, { "epoch": 0.18395066399303847, "grad_norm": 2.632542610168457, "learning_rate": 1.7131474725897958e-05, "loss": 0.9138, "step": 2431 }, { "epoch": 0.1840263327153721, "grad_norm": 2.9249496459960938, "learning_rate": 1.7130509184528634e-05, "loss": 0.8061, "step": 2432 }, { "epoch": 0.18410200143770572, "grad_norm": 2.6574416160583496, "learning_rate": 1.7129543134000528e-05, "loss": 0.846, "step": 2433 }, { "epoch": 0.18417767016003936, "grad_norm": 2.1470108032226562, "learning_rate": 1.7128576574374138e-05, "loss": 0.7422, "step": 2434 }, { "epoch": 0.18425333888237297, "grad_norm": 3.626044750213623, "learning_rate": 1.712760950570999e-05, "loss": 0.8358, "step": 2435 }, { "epoch": 0.1843290076047066, "grad_norm": 2.8826382160186768, "learning_rate": 1.7126641928068642e-05, "loss": 0.6992, "step": 2436 }, { "epoch": 0.18440467632704022, "grad_norm": 2.540454149246216, "learning_rate": 1.7125673841510696e-05, "loss": 0.7279, "step": 2437 }, { "epoch": 0.18448034504937383, "grad_norm": 4.273547172546387, "learning_rate": 1.7124705246096776e-05, "loss": 0.9469, "step": 2438 }, { "epoch": 0.18455601377170747, "grad_norm": 2.69283390045166, "learning_rate": 1.712373614188753e-05, "loss": 0.7379, "step": 2439 }, { "epoch": 0.18463168249404108, "grad_norm": 2.7413628101348877, "learning_rate": 1.712276652894365e-05, "loss": 0.8033, "step": 2440 }, { "epoch": 0.18470735121637472, "grad_norm": 2.5811452865600586, "learning_rate": 1.7121796407325864e-05, "loss": 0.7025, "step": 2441 }, { "epoch": 0.18478301993870833, "grad_norm": 2.8517563343048096, "learning_rate": 1.7120825777094916e-05, "loss": 0.8967, "step": 2442 }, { "epoch": 0.18485868866104196, "grad_norm": 2.390089988708496, "learning_rate": 1.7119854638311587e-05, "loss": 0.7239, "step": 2443 }, { "epoch": 0.18493435738337557, "grad_norm": 3.3105874061584473, "learning_rate": 1.71188829910367e-05, "loss": 0.8914, "step": 2444 }, { "epoch": 0.1850100261057092, "grad_norm": 3.032083511352539, "learning_rate": 1.7117910835331104e-05, "loss": 0.85, "step": 2445 }, { "epoch": 0.18508569482804282, "grad_norm": 2.4324803352355957, "learning_rate": 1.7116938171255672e-05, "loss": 0.5128, "step": 2446 }, { "epoch": 0.18516136355037646, "grad_norm": 2.444301128387451, "learning_rate": 1.711596499887132e-05, "loss": 0.8662, "step": 2447 }, { "epoch": 0.18523703227271007, "grad_norm": 2.0396249294281006, "learning_rate": 1.711499131823899e-05, "loss": 0.6182, "step": 2448 }, { "epoch": 0.1853127009950437, "grad_norm": 2.8817059993743896, "learning_rate": 1.7114017129419654e-05, "loss": 0.8314, "step": 2449 }, { "epoch": 0.18538836971737732, "grad_norm": 2.181652545928955, "learning_rate": 1.7113042432474323e-05, "loss": 0.7013, "step": 2450 }, { "epoch": 0.18546403843971093, "grad_norm": 2.367856740951538, "learning_rate": 1.7112067227464035e-05, "loss": 0.818, "step": 2451 }, { "epoch": 0.18553970716204457, "grad_norm": 2.3162214756011963, "learning_rate": 1.7111091514449857e-05, "loss": 0.8122, "step": 2452 }, { "epoch": 0.18561537588437818, "grad_norm": 2.549567937850952, "learning_rate": 1.7110115293492893e-05, "loss": 0.7482, "step": 2453 }, { "epoch": 0.18569104460671182, "grad_norm": 1.9302699565887451, "learning_rate": 1.7109138564654283e-05, "loss": 0.8955, "step": 2454 }, { "epoch": 0.18576671332904543, "grad_norm": 2.4720423221588135, "learning_rate": 1.7108161327995182e-05, "loss": 0.7715, "step": 2455 }, { "epoch": 0.18584238205137907, "grad_norm": 2.2266173362731934, "learning_rate": 1.7107183583576798e-05, "loss": 0.7588, "step": 2456 }, { "epoch": 0.18591805077371268, "grad_norm": 2.241393804550171, "learning_rate": 1.7106205331460356e-05, "loss": 0.6046, "step": 2457 }, { "epoch": 0.18599371949604632, "grad_norm": 2.3776702880859375, "learning_rate": 1.7105226571707115e-05, "loss": 0.7866, "step": 2458 }, { "epoch": 0.18606938821837993, "grad_norm": 3.0117900371551514, "learning_rate": 1.7104247304378372e-05, "loss": 0.68, "step": 2459 }, { "epoch": 0.18614505694071357, "grad_norm": 2.2148845195770264, "learning_rate": 1.7103267529535453e-05, "loss": 0.8208, "step": 2460 }, { "epoch": 0.18622072566304718, "grad_norm": 2.5542850494384766, "learning_rate": 1.710228724723971e-05, "loss": 0.7452, "step": 2461 }, { "epoch": 0.18629639438538081, "grad_norm": 2.4348342418670654, "learning_rate": 1.7101306457552532e-05, "loss": 0.8503, "step": 2462 }, { "epoch": 0.18637206310771443, "grad_norm": 2.9579315185546875, "learning_rate": 1.7100325160535344e-05, "loss": 0.9084, "step": 2463 }, { "epoch": 0.18644773183004806, "grad_norm": 2.543611764907837, "learning_rate": 1.7099343356249594e-05, "loss": 0.7714, "step": 2464 }, { "epoch": 0.18652340055238167, "grad_norm": 2.5883982181549072, "learning_rate": 1.7098361044756762e-05, "loss": 0.6883, "step": 2465 }, { "epoch": 0.18659906927471528, "grad_norm": 2.596781015396118, "learning_rate": 1.7097378226118372e-05, "loss": 0.7947, "step": 2466 }, { "epoch": 0.18667473799704892, "grad_norm": 1.9685479402542114, "learning_rate": 1.709639490039597e-05, "loss": 0.8463, "step": 2467 }, { "epoch": 0.18675040671938253, "grad_norm": 2.6280531883239746, "learning_rate": 1.7095411067651128e-05, "loss": 0.7112, "step": 2468 }, { "epoch": 0.18682607544171617, "grad_norm": 4.373874187469482, "learning_rate": 1.7094426727945463e-05, "loss": 0.7965, "step": 2469 }, { "epoch": 0.18690174416404978, "grad_norm": 2.3263533115386963, "learning_rate": 1.7093441881340614e-05, "loss": 0.7271, "step": 2470 }, { "epoch": 0.18697741288638342, "grad_norm": 2.311795711517334, "learning_rate": 1.709245652789826e-05, "loss": 0.7724, "step": 2471 }, { "epoch": 0.18705308160871703, "grad_norm": 2.1716461181640625, "learning_rate": 1.7091470667680102e-05, "loss": 0.7175, "step": 2472 }, { "epoch": 0.18712875033105067, "grad_norm": 2.450676202774048, "learning_rate": 1.7090484300747882e-05, "loss": 0.7955, "step": 2473 }, { "epoch": 0.18720441905338428, "grad_norm": 2.3514206409454346, "learning_rate": 1.7089497427163362e-05, "loss": 0.6674, "step": 2474 }, { "epoch": 0.18728008777571792, "grad_norm": 2.2294673919677734, "learning_rate": 1.7088510046988355e-05, "loss": 0.6138, "step": 2475 }, { "epoch": 0.18735575649805153, "grad_norm": 2.8601155281066895, "learning_rate": 1.7087522160284684e-05, "loss": 0.7145, "step": 2476 }, { "epoch": 0.18743142522038517, "grad_norm": 2.8566646575927734, "learning_rate": 1.7086533767114216e-05, "loss": 0.8687, "step": 2477 }, { "epoch": 0.18750709394271878, "grad_norm": 2.680727005004883, "learning_rate": 1.7085544867538854e-05, "loss": 0.6916, "step": 2478 }, { "epoch": 0.1875827626650524, "grad_norm": 2.9633843898773193, "learning_rate": 1.7084555461620514e-05, "loss": 0.8821, "step": 2479 }, { "epoch": 0.18765843138738603, "grad_norm": 2.1105926036834717, "learning_rate": 1.7083565549421166e-05, "loss": 0.713, "step": 2480 }, { "epoch": 0.18773410010971964, "grad_norm": 2.6153006553649902, "learning_rate": 1.7082575131002796e-05, "loss": 0.6687, "step": 2481 }, { "epoch": 0.18780976883205328, "grad_norm": 2.08457088470459, "learning_rate": 1.708158420642743e-05, "loss": 0.7836, "step": 2482 }, { "epoch": 0.18788543755438689, "grad_norm": 2.58174467086792, "learning_rate": 1.7080592775757122e-05, "loss": 0.8457, "step": 2483 }, { "epoch": 0.18796110627672052, "grad_norm": 2.193037271499634, "learning_rate": 1.707960083905396e-05, "loss": 0.7799, "step": 2484 }, { "epoch": 0.18803677499905413, "grad_norm": 2.2527616024017334, "learning_rate": 1.707860839638006e-05, "loss": 0.6341, "step": 2485 }, { "epoch": 0.18811244372138777, "grad_norm": 2.2028379440307617, "learning_rate": 1.707761544779757e-05, "loss": 0.6469, "step": 2486 }, { "epoch": 0.18818811244372138, "grad_norm": 2.2932393550872803, "learning_rate": 1.7076621993368676e-05, "loss": 0.6554, "step": 2487 }, { "epoch": 0.18826378116605502, "grad_norm": 2.409485101699829, "learning_rate": 1.7075628033155593e-05, "loss": 0.905, "step": 2488 }, { "epoch": 0.18833944988838863, "grad_norm": 3.5237672328948975, "learning_rate": 1.707463356722056e-05, "loss": 0.7915, "step": 2489 }, { "epoch": 0.18841511861072227, "grad_norm": 2.7192323207855225, "learning_rate": 1.7073638595625856e-05, "loss": 0.8663, "step": 2490 }, { "epoch": 0.18849078733305588, "grad_norm": 2.6464853286743164, "learning_rate": 1.707264311843379e-05, "loss": 0.9029, "step": 2491 }, { "epoch": 0.1885664560553895, "grad_norm": 2.4088399410247803, "learning_rate": 1.7071647135706702e-05, "loss": 0.7941, "step": 2492 }, { "epoch": 0.18864212477772313, "grad_norm": 2.637516975402832, "learning_rate": 1.7070650647506966e-05, "loss": 0.679, "step": 2493 }, { "epoch": 0.18871779350005674, "grad_norm": 2.1144580841064453, "learning_rate": 1.7069653653896982e-05, "loss": 0.7291, "step": 2494 }, { "epoch": 0.18879346222239038, "grad_norm": 2.473254680633545, "learning_rate": 1.7068656154939183e-05, "loss": 0.7055, "step": 2495 }, { "epoch": 0.188869130944724, "grad_norm": 2.5873825550079346, "learning_rate": 1.7067658150696043e-05, "loss": 0.8274, "step": 2496 }, { "epoch": 0.18894479966705763, "grad_norm": 2.4804913997650146, "learning_rate": 1.706665964123005e-05, "loss": 0.7887, "step": 2497 }, { "epoch": 0.18902046838939124, "grad_norm": 3.1803600788116455, "learning_rate": 1.7065660626603745e-05, "loss": 0.9983, "step": 2498 }, { "epoch": 0.18909613711172488, "grad_norm": 3.4321467876434326, "learning_rate": 1.706466110687968e-05, "loss": 0.8888, "step": 2499 }, { "epoch": 0.1891718058340585, "grad_norm": 2.6586387157440186, "learning_rate": 1.706366108212045e-05, "loss": 0.9563, "step": 2500 }, { "epoch": 0.18924747455639213, "grad_norm": 2.2730400562286377, "learning_rate": 1.7062660552388687e-05, "loss": 0.722, "step": 2501 }, { "epoch": 0.18932314327872574, "grad_norm": 2.669356107711792, "learning_rate": 1.706165951774704e-05, "loss": 0.7355, "step": 2502 }, { "epoch": 0.18939881200105937, "grad_norm": 3.200263738632202, "learning_rate": 1.70606579782582e-05, "loss": 0.6391, "step": 2503 }, { "epoch": 0.18947448072339299, "grad_norm": 2.0862410068511963, "learning_rate": 1.7059655933984886e-05, "loss": 0.7393, "step": 2504 }, { "epoch": 0.1895501494457266, "grad_norm": 2.3829562664031982, "learning_rate": 1.7058653384989852e-05, "loss": 0.7792, "step": 2505 }, { "epoch": 0.18962581816806023, "grad_norm": 2.7192342281341553, "learning_rate": 1.7057650331335875e-05, "loss": 0.8724, "step": 2506 }, { "epoch": 0.18970148689039384, "grad_norm": 2.462100028991699, "learning_rate": 1.7056646773085773e-05, "loss": 0.8377, "step": 2507 }, { "epoch": 0.18977715561272748, "grad_norm": 2.152848243713379, "learning_rate": 1.705564271030239e-05, "loss": 0.8334, "step": 2508 }, { "epoch": 0.1898528243350611, "grad_norm": 2.1347358226776123, "learning_rate": 1.705463814304861e-05, "loss": 0.7019, "step": 2509 }, { "epoch": 0.18992849305739473, "grad_norm": 2.732978343963623, "learning_rate": 1.7053633071387336e-05, "loss": 0.8409, "step": 2510 }, { "epoch": 0.19000416177972834, "grad_norm": 2.211718797683716, "learning_rate": 1.7052627495381507e-05, "loss": 0.7797, "step": 2511 }, { "epoch": 0.19007983050206198, "grad_norm": 2.507467031478882, "learning_rate": 1.7051621415094105e-05, "loss": 0.7145, "step": 2512 }, { "epoch": 0.1901554992243956, "grad_norm": 2.3044538497924805, "learning_rate": 1.7050614830588122e-05, "loss": 0.8306, "step": 2513 }, { "epoch": 0.19023116794672923, "grad_norm": 2.1056079864501953, "learning_rate": 1.7049607741926603e-05, "loss": 0.9237, "step": 2514 }, { "epoch": 0.19030683666906284, "grad_norm": 2.6704158782958984, "learning_rate": 1.704860014917261e-05, "loss": 0.8201, "step": 2515 }, { "epoch": 0.19038250539139648, "grad_norm": 3.0640957355499268, "learning_rate": 1.7047592052389243e-05, "loss": 0.8734, "step": 2516 }, { "epoch": 0.1904581741137301, "grad_norm": 2.7852325439453125, "learning_rate": 1.7046583451639635e-05, "loss": 0.8072, "step": 2517 }, { "epoch": 0.1905338428360637, "grad_norm": 3.1452224254608154, "learning_rate": 1.7045574346986942e-05, "loss": 0.6817, "step": 2518 }, { "epoch": 0.19060951155839734, "grad_norm": 2.4162135124206543, "learning_rate": 1.7044564738494367e-05, "loss": 0.8041, "step": 2519 }, { "epoch": 0.19068518028073095, "grad_norm": 2.7600903511047363, "learning_rate": 1.704355462622512e-05, "loss": 0.8216, "step": 2520 }, { "epoch": 0.1907608490030646, "grad_norm": 2.507215976715088, "learning_rate": 1.7042544010242473e-05, "loss": 0.8253, "step": 2521 }, { "epoch": 0.1908365177253982, "grad_norm": 2.914543628692627, "learning_rate": 1.7041532890609703e-05, "loss": 0.9177, "step": 2522 }, { "epoch": 0.19091218644773184, "grad_norm": 3.545713186264038, "learning_rate": 1.704052126739014e-05, "loss": 0.6333, "step": 2523 }, { "epoch": 0.19098785517006545, "grad_norm": 2.720325469970703, "learning_rate": 1.7039509140647124e-05, "loss": 0.7723, "step": 2524 }, { "epoch": 0.19106352389239908, "grad_norm": 2.6830990314483643, "learning_rate": 1.703849651044404e-05, "loss": 0.8263, "step": 2525 }, { "epoch": 0.1911391926147327, "grad_norm": 2.6255667209625244, "learning_rate": 1.703748337684431e-05, "loss": 0.7983, "step": 2526 }, { "epoch": 0.19121486133706633, "grad_norm": 3.0577828884124756, "learning_rate": 1.7036469739911374e-05, "loss": 0.5705, "step": 2527 }, { "epoch": 0.19129053005939994, "grad_norm": 2.4407639503479004, "learning_rate": 1.703545559970871e-05, "loss": 0.7921, "step": 2528 }, { "epoch": 0.19136619878173358, "grad_norm": 3.169311046600342, "learning_rate": 1.7034440956299825e-05, "loss": 0.6525, "step": 2529 }, { "epoch": 0.1914418675040672, "grad_norm": 2.345228433609009, "learning_rate": 1.703342580974826e-05, "loss": 0.6853, "step": 2530 }, { "epoch": 0.19151753622640083, "grad_norm": 2.094965696334839, "learning_rate": 1.703241016011759e-05, "loss": 0.6976, "step": 2531 }, { "epoch": 0.19159320494873444, "grad_norm": 2.3443291187286377, "learning_rate": 1.7031394007471415e-05, "loss": 0.6738, "step": 2532 }, { "epoch": 0.19166887367106805, "grad_norm": 2.716376304626465, "learning_rate": 1.703037735187337e-05, "loss": 0.8052, "step": 2533 }, { "epoch": 0.1917445423934017, "grad_norm": 2.7396631240844727, "learning_rate": 1.7029360193387123e-05, "loss": 0.6116, "step": 2534 }, { "epoch": 0.1918202111157353, "grad_norm": 2.562323570251465, "learning_rate": 1.702834253207637e-05, "loss": 0.7081, "step": 2535 }, { "epoch": 0.19189587983806894, "grad_norm": 2.4629499912261963, "learning_rate": 1.702732436800484e-05, "loss": 0.8838, "step": 2536 }, { "epoch": 0.19197154856040255, "grad_norm": 2.0351247787475586, "learning_rate": 1.7026305701236294e-05, "loss": 0.9576, "step": 2537 }, { "epoch": 0.1920472172827362, "grad_norm": 3.575366973876953, "learning_rate": 1.7025286531834525e-05, "loss": 0.6759, "step": 2538 }, { "epoch": 0.1921228860050698, "grad_norm": 2.341073513031006, "learning_rate": 1.7024266859863358e-05, "loss": 0.7948, "step": 2539 }, { "epoch": 0.19219855472740344, "grad_norm": 2.288145065307617, "learning_rate": 1.7023246685386646e-05, "loss": 0.6851, "step": 2540 }, { "epoch": 0.19227422344973705, "grad_norm": 2.1108577251434326, "learning_rate": 1.7022226008468275e-05, "loss": 0.783, "step": 2541 }, { "epoch": 0.19234989217207069, "grad_norm": 2.4641237258911133, "learning_rate": 1.7021204829172166e-05, "loss": 0.833, "step": 2542 }, { "epoch": 0.1924255608944043, "grad_norm": 3.356717109680176, "learning_rate": 1.7020183147562267e-05, "loss": 0.7958, "step": 2543 }, { "epoch": 0.19250122961673793, "grad_norm": 2.6340181827545166, "learning_rate": 1.7019160963702556e-05, "loss": 0.7378, "step": 2544 }, { "epoch": 0.19257689833907154, "grad_norm": 2.6588118076324463, "learning_rate": 1.701813827765705e-05, "loss": 0.9538, "step": 2545 }, { "epoch": 0.19265256706140516, "grad_norm": 2.6437458992004395, "learning_rate": 1.7017115089489794e-05, "loss": 0.739, "step": 2546 }, { "epoch": 0.1927282357837388, "grad_norm": 2.546844005584717, "learning_rate": 1.7016091399264856e-05, "loss": 0.8209, "step": 2547 }, { "epoch": 0.1928039045060724, "grad_norm": 2.78609299659729, "learning_rate": 1.701506720704635e-05, "loss": 0.9092, "step": 2548 }, { "epoch": 0.19287957322840604, "grad_norm": 2.1498682498931885, "learning_rate": 1.7014042512898414e-05, "loss": 0.8103, "step": 2549 }, { "epoch": 0.19295524195073965, "grad_norm": 2.6014087200164795, "learning_rate": 1.701301731688521e-05, "loss": 1.07, "step": 2550 }, { "epoch": 0.1930309106730733, "grad_norm": 2.4372475147247314, "learning_rate": 1.7011991619070948e-05, "loss": 0.785, "step": 2551 }, { "epoch": 0.1931065793954069, "grad_norm": 2.3356244564056396, "learning_rate": 1.7010965419519858e-05, "loss": 0.689, "step": 2552 }, { "epoch": 0.19318224811774054, "grad_norm": 2.393585443496704, "learning_rate": 1.70099387182962e-05, "loss": 0.7593, "step": 2553 }, { "epoch": 0.19325791684007415, "grad_norm": 2.5867748260498047, "learning_rate": 1.700891151546427e-05, "loss": 0.7933, "step": 2554 }, { "epoch": 0.1933335855624078, "grad_norm": 2.6387293338775635, "learning_rate": 1.7007883811088403e-05, "loss": 0.7924, "step": 2555 }, { "epoch": 0.1934092542847414, "grad_norm": 1.7899304628372192, "learning_rate": 1.7006855605232947e-05, "loss": 0.8936, "step": 2556 }, { "epoch": 0.19348492300707504, "grad_norm": 3.2812225818634033, "learning_rate": 1.7005826897962294e-05, "loss": 0.7595, "step": 2557 }, { "epoch": 0.19356059172940865, "grad_norm": 3.634249448776245, "learning_rate": 1.7004797689340873e-05, "loss": 0.8057, "step": 2558 }, { "epoch": 0.19363626045174226, "grad_norm": 2.197214365005493, "learning_rate": 1.7003767979433126e-05, "loss": 0.7255, "step": 2559 }, { "epoch": 0.1937119291740759, "grad_norm": 2.3928468227386475, "learning_rate": 1.7002737768303542e-05, "loss": 1.0792, "step": 2560 }, { "epoch": 0.1937875978964095, "grad_norm": 2.169796943664551, "learning_rate": 1.7001707056016633e-05, "loss": 0.8442, "step": 2561 }, { "epoch": 0.19386326661874315, "grad_norm": 2.0690131187438965, "learning_rate": 1.7000675842636948e-05, "loss": 0.7823, "step": 2562 }, { "epoch": 0.19393893534107676, "grad_norm": 2.256768226623535, "learning_rate": 1.6999644128229065e-05, "loss": 0.7462, "step": 2563 }, { "epoch": 0.1940146040634104, "grad_norm": 2.095914602279663, "learning_rate": 1.6998611912857592e-05, "loss": 0.8174, "step": 2564 }, { "epoch": 0.194090272785744, "grad_norm": 3.0056588649749756, "learning_rate": 1.6997579196587173e-05, "loss": 0.8705, "step": 2565 }, { "epoch": 0.19416594150807764, "grad_norm": 2.5607128143310547, "learning_rate": 1.6996545979482475e-05, "loss": 0.7777, "step": 2566 }, { "epoch": 0.19424161023041125, "grad_norm": 2.9612107276916504, "learning_rate": 1.6995512261608202e-05, "loss": 0.6993, "step": 2567 }, { "epoch": 0.1943172789527449, "grad_norm": 2.610933780670166, "learning_rate": 1.6994478043029095e-05, "loss": 0.7044, "step": 2568 }, { "epoch": 0.1943929476750785, "grad_norm": 2.2328102588653564, "learning_rate": 1.699344332380991e-05, "loss": 0.6661, "step": 2569 }, { "epoch": 0.19446861639741214, "grad_norm": 2.128195285797119, "learning_rate": 1.6992408104015458e-05, "loss": 0.735, "step": 2570 }, { "epoch": 0.19454428511974575, "grad_norm": 3.4304070472717285, "learning_rate": 1.6991372383710555e-05, "loss": 0.9446, "step": 2571 }, { "epoch": 0.19461995384207936, "grad_norm": 2.2780416011810303, "learning_rate": 1.6990336162960066e-05, "loss": 0.8719, "step": 2572 }, { "epoch": 0.194695622564413, "grad_norm": 2.545768976211548, "learning_rate": 1.6989299441828883e-05, "loss": 0.7445, "step": 2573 }, { "epoch": 0.1947712912867466, "grad_norm": 2.4428822994232178, "learning_rate": 1.698826222038193e-05, "loss": 0.6788, "step": 2574 }, { "epoch": 0.19484696000908025, "grad_norm": 2.0839898586273193, "learning_rate": 1.6987224498684157e-05, "loss": 0.7712, "step": 2575 }, { "epoch": 0.19492262873141386, "grad_norm": 2.200305938720703, "learning_rate": 1.6986186276800554e-05, "loss": 0.6872, "step": 2576 }, { "epoch": 0.1949982974537475, "grad_norm": 2.497018575668335, "learning_rate": 1.6985147554796134e-05, "loss": 0.7094, "step": 2577 }, { "epoch": 0.1950739661760811, "grad_norm": 2.3949403762817383, "learning_rate": 1.698410833273595e-05, "loss": 0.778, "step": 2578 }, { "epoch": 0.19514963489841475, "grad_norm": 3.1228713989257812, "learning_rate": 1.698306861068508e-05, "loss": 0.8602, "step": 2579 }, { "epoch": 0.19522530362074836, "grad_norm": 2.2190120220184326, "learning_rate": 1.6982028388708625e-05, "loss": 0.8146, "step": 2580 }, { "epoch": 0.195300972343082, "grad_norm": 1.9382598400115967, "learning_rate": 1.698098766687174e-05, "loss": 0.636, "step": 2581 }, { "epoch": 0.1953766410654156, "grad_norm": 2.97119402885437, "learning_rate": 1.6979946445239595e-05, "loss": 0.732, "step": 2582 }, { "epoch": 0.19545230978774925, "grad_norm": 2.2818760871887207, "learning_rate": 1.6978904723877394e-05, "loss": 0.7439, "step": 2583 }, { "epoch": 0.19552797851008286, "grad_norm": 3.0975162982940674, "learning_rate": 1.697786250285037e-05, "loss": 0.8555, "step": 2584 }, { "epoch": 0.1956036472324165, "grad_norm": 2.345454454421997, "learning_rate": 1.6976819782223792e-05, "loss": 0.7178, "step": 2585 }, { "epoch": 0.1956793159547501, "grad_norm": 2.2025437355041504, "learning_rate": 1.697577656206296e-05, "loss": 0.8992, "step": 2586 }, { "epoch": 0.19575498467708372, "grad_norm": 2.2825613021850586, "learning_rate": 1.6974732842433202e-05, "loss": 0.667, "step": 2587 }, { "epoch": 0.19583065339941735, "grad_norm": 2.3459088802337646, "learning_rate": 1.697368862339988e-05, "loss": 0.5786, "step": 2588 }, { "epoch": 0.19590632212175096, "grad_norm": 2.5378456115722656, "learning_rate": 1.697264390502839e-05, "loss": 0.7354, "step": 2589 }, { "epoch": 0.1959819908440846, "grad_norm": 2.5877671241760254, "learning_rate": 1.697159868738415e-05, "loss": 0.7067, "step": 2590 }, { "epoch": 0.1960576595664182, "grad_norm": 2.3794543743133545, "learning_rate": 1.6970552970532616e-05, "loss": 0.8205, "step": 2591 }, { "epoch": 0.19613332828875185, "grad_norm": 2.144336462020874, "learning_rate": 1.6969506754539278e-05, "loss": 0.882, "step": 2592 }, { "epoch": 0.19620899701108546, "grad_norm": 2.0169589519500732, "learning_rate": 1.6968460039469644e-05, "loss": 0.8049, "step": 2593 }, { "epoch": 0.1962846657334191, "grad_norm": 1.9170702695846558, "learning_rate": 1.6967412825389272e-05, "loss": 0.6913, "step": 2594 }, { "epoch": 0.1963603344557527, "grad_norm": 2.5606637001037598, "learning_rate": 1.6966365112363743e-05, "loss": 0.7495, "step": 2595 }, { "epoch": 0.19643600317808635, "grad_norm": 2.407437801361084, "learning_rate": 1.696531690045866e-05, "loss": 0.8785, "step": 2596 }, { "epoch": 0.19651167190041996, "grad_norm": 2.435490369796753, "learning_rate": 1.696426818973967e-05, "loss": 0.7802, "step": 2597 }, { "epoch": 0.1965873406227536, "grad_norm": 2.0081589221954346, "learning_rate": 1.696321898027245e-05, "loss": 0.6976, "step": 2598 }, { "epoch": 0.1966630093450872, "grad_norm": 2.957495927810669, "learning_rate": 1.6962169272122697e-05, "loss": 0.7115, "step": 2599 }, { "epoch": 0.19673867806742082, "grad_norm": 2.271768808364868, "learning_rate": 1.6961119065356155e-05, "loss": 0.8056, "step": 2600 }, { "epoch": 0.19681434678975446, "grad_norm": 1.9800879955291748, "learning_rate": 1.6960068360038584e-05, "loss": 0.6092, "step": 2601 }, { "epoch": 0.19689001551208807, "grad_norm": 2.3020243644714355, "learning_rate": 1.695901715623579e-05, "loss": 0.8472, "step": 2602 }, { "epoch": 0.1969656842344217, "grad_norm": 2.3017523288726807, "learning_rate": 1.6957965454013597e-05, "loss": 0.8187, "step": 2603 }, { "epoch": 0.19704135295675532, "grad_norm": 2.2522785663604736, "learning_rate": 1.6956913253437868e-05, "loss": 0.7279, "step": 2604 }, { "epoch": 0.19711702167908896, "grad_norm": 2.6222681999206543, "learning_rate": 1.6955860554574495e-05, "loss": 0.8215, "step": 2605 }, { "epoch": 0.19719269040142257, "grad_norm": 2.9026076793670654, "learning_rate": 1.6954807357489407e-05, "loss": 0.6979, "step": 2606 }, { "epoch": 0.1972683591237562, "grad_norm": 3.418788194656372, "learning_rate": 1.6953753662248547e-05, "loss": 0.7305, "step": 2607 }, { "epoch": 0.19734402784608981, "grad_norm": 2.4104363918304443, "learning_rate": 1.695269946891791e-05, "loss": 0.7632, "step": 2608 }, { "epoch": 0.19741969656842345, "grad_norm": 2.85041880607605, "learning_rate": 1.695164477756351e-05, "loss": 0.7216, "step": 2609 }, { "epoch": 0.19749536529075706, "grad_norm": 2.079584836959839, "learning_rate": 1.695058958825139e-05, "loss": 0.7418, "step": 2610 }, { "epoch": 0.1975710340130907, "grad_norm": 2.3523175716400146, "learning_rate": 1.6949533901047643e-05, "loss": 0.7151, "step": 2611 }, { "epoch": 0.1976467027354243, "grad_norm": 2.4954283237457275, "learning_rate": 1.6948477716018366e-05, "loss": 0.8416, "step": 2612 }, { "epoch": 0.19772237145775792, "grad_norm": 2.920403480529785, "learning_rate": 1.6947421033229706e-05, "loss": 0.7814, "step": 2613 }, { "epoch": 0.19779804018009156, "grad_norm": 2.6211936473846436, "learning_rate": 1.6946363852747838e-05, "loss": 0.7247, "step": 2614 }, { "epoch": 0.19787370890242517, "grad_norm": 3.040184259414673, "learning_rate": 1.694530617463896e-05, "loss": 0.8646, "step": 2615 }, { "epoch": 0.1979493776247588, "grad_norm": 2.736969232559204, "learning_rate": 1.6944247998969318e-05, "loss": 0.6909, "step": 2616 }, { "epoch": 0.19802504634709242, "grad_norm": 2.3474671840667725, "learning_rate": 1.694318932580517e-05, "loss": 0.7216, "step": 2617 }, { "epoch": 0.19810071506942606, "grad_norm": 2.3557677268981934, "learning_rate": 1.6942130155212808e-05, "loss": 0.7854, "step": 2618 }, { "epoch": 0.19817638379175967, "grad_norm": 3.080761432647705, "learning_rate": 1.6941070487258573e-05, "loss": 0.7452, "step": 2619 }, { "epoch": 0.1982520525140933, "grad_norm": 2.3704681396484375, "learning_rate": 1.694001032200882e-05, "loss": 0.6745, "step": 2620 }, { "epoch": 0.19832772123642692, "grad_norm": 2.427135467529297, "learning_rate": 1.6938949659529935e-05, "loss": 0.8975, "step": 2621 }, { "epoch": 0.19840338995876056, "grad_norm": 2.0975005626678467, "learning_rate": 1.693788849988835e-05, "loss": 0.619, "step": 2622 }, { "epoch": 0.19847905868109417, "grad_norm": 2.5328569412231445, "learning_rate": 1.6936826843150512e-05, "loss": 0.7884, "step": 2623 }, { "epoch": 0.1985547274034278, "grad_norm": 5.670637130737305, "learning_rate": 1.6935764689382904e-05, "loss": 0.8431, "step": 2624 }, { "epoch": 0.19863039612576142, "grad_norm": 2.570315361022949, "learning_rate": 1.6934702038652046e-05, "loss": 0.8699, "step": 2625 }, { "epoch": 0.19870606484809503, "grad_norm": 2.185997247695923, "learning_rate": 1.693363889102448e-05, "loss": 0.8449, "step": 2626 }, { "epoch": 0.19878173357042866, "grad_norm": 2.4839537143707275, "learning_rate": 1.6932575246566788e-05, "loss": 0.8201, "step": 2627 }, { "epoch": 0.19885740229276228, "grad_norm": 2.1971192359924316, "learning_rate": 1.6931511105345575e-05, "loss": 0.7496, "step": 2628 }, { "epoch": 0.1989330710150959, "grad_norm": 2.9695796966552734, "learning_rate": 1.6930446467427484e-05, "loss": 0.7093, "step": 2629 }, { "epoch": 0.19900873973742952, "grad_norm": 2.3504600524902344, "learning_rate": 1.6929381332879187e-05, "loss": 0.7825, "step": 2630 }, { "epoch": 0.19908440845976316, "grad_norm": 2.4642410278320312, "learning_rate": 1.6928315701767382e-05, "loss": 0.8154, "step": 2631 }, { "epoch": 0.19916007718209677, "grad_norm": 2.417527675628662, "learning_rate": 1.6927249574158803e-05, "loss": 0.7947, "step": 2632 }, { "epoch": 0.1992357459044304, "grad_norm": 2.4972589015960693, "learning_rate": 1.692618295012022e-05, "loss": 0.714, "step": 2633 }, { "epoch": 0.19931141462676402, "grad_norm": 2.6074717044830322, "learning_rate": 1.6925115829718424e-05, "loss": 0.7362, "step": 2634 }, { "epoch": 0.19938708334909766, "grad_norm": 2.2612643241882324, "learning_rate": 1.692404821302024e-05, "loss": 0.8082, "step": 2635 }, { "epoch": 0.19946275207143127, "grad_norm": 3.058591842651367, "learning_rate": 1.6922980100092524e-05, "loss": 0.729, "step": 2636 }, { "epoch": 0.1995384207937649, "grad_norm": 2.8211417198181152, "learning_rate": 1.6921911491002175e-05, "loss": 0.6836, "step": 2637 }, { "epoch": 0.19961408951609852, "grad_norm": 2.3648903369903564, "learning_rate": 1.69208423858161e-05, "loss": 0.7884, "step": 2638 }, { "epoch": 0.19968975823843216, "grad_norm": 2.64884877204895, "learning_rate": 1.691977278460126e-05, "loss": 0.8841, "step": 2639 }, { "epoch": 0.19976542696076577, "grad_norm": 2.9867165088653564, "learning_rate": 1.6918702687424628e-05, "loss": 0.7115, "step": 2640 }, { "epoch": 0.19984109568309938, "grad_norm": 2.427945137023926, "learning_rate": 1.6917632094353226e-05, "loss": 1.0163, "step": 2641 }, { "epoch": 0.19991676440543302, "grad_norm": 2.252856969833374, "learning_rate": 1.6916561005454093e-05, "loss": 0.8115, "step": 2642 }, { "epoch": 0.19999243312776663, "grad_norm": 2.524324893951416, "learning_rate": 1.6915489420794304e-05, "loss": 0.6516, "step": 2643 }, { "epoch": 0.20006810185010027, "grad_norm": 2.3426573276519775, "learning_rate": 1.691441734044096e-05, "loss": 0.7816, "step": 2644 }, { "epoch": 0.20014377057243388, "grad_norm": 2.6278955936431885, "learning_rate": 1.691334476446121e-05, "loss": 0.6821, "step": 2645 }, { "epoch": 0.20021943929476752, "grad_norm": 2.5441291332244873, "learning_rate": 1.6912271692922216e-05, "loss": 0.8005, "step": 2646 }, { "epoch": 0.20029510801710113, "grad_norm": 3.0294198989868164, "learning_rate": 1.691119812589118e-05, "loss": 0.7918, "step": 2647 }, { "epoch": 0.20037077673943476, "grad_norm": 2.6301164627075195, "learning_rate": 1.6910124063435322e-05, "loss": 0.7959, "step": 2648 }, { "epoch": 0.20044644546176837, "grad_norm": 6.36815071105957, "learning_rate": 1.6909049505621912e-05, "loss": 0.8501, "step": 2649 }, { "epoch": 0.200522114184102, "grad_norm": 11.723066329956055, "learning_rate": 1.6907974452518245e-05, "loss": 0.6644, "step": 2650 }, { "epoch": 0.20059778290643562, "grad_norm": 30.52318572998047, "learning_rate": 1.690689890419164e-05, "loss": 0.8288, "step": 2651 }, { "epoch": 0.20067345162876926, "grad_norm": 2.54634165763855, "learning_rate": 1.6905822860709446e-05, "loss": 0.807, "step": 2652 }, { "epoch": 0.20074912035110287, "grad_norm": 2.408019542694092, "learning_rate": 1.690474632213906e-05, "loss": 0.9354, "step": 2653 }, { "epoch": 0.20082478907343648, "grad_norm": 3.3913652896881104, "learning_rate": 1.690366928854789e-05, "loss": 0.8307, "step": 2654 }, { "epoch": 0.20090045779577012, "grad_norm": 3.0079503059387207, "learning_rate": 1.6902591760003387e-05, "loss": 0.6633, "step": 2655 }, { "epoch": 0.20097612651810373, "grad_norm": 2.45389461517334, "learning_rate": 1.6901513736573027e-05, "loss": 0.7959, "step": 2656 }, { "epoch": 0.20105179524043737, "grad_norm": 2.300036907196045, "learning_rate": 1.690043521832432e-05, "loss": 0.7693, "step": 2657 }, { "epoch": 0.20112746396277098, "grad_norm": 2.469834566116333, "learning_rate": 1.6899356205324807e-05, "loss": 0.8985, "step": 2658 }, { "epoch": 0.20120313268510462, "grad_norm": 2.4078941345214844, "learning_rate": 1.6898276697642056e-05, "loss": 0.8893, "step": 2659 }, { "epoch": 0.20127880140743823, "grad_norm": 2.462860584259033, "learning_rate": 1.6897196695343672e-05, "loss": 0.8923, "step": 2660 }, { "epoch": 0.20135447012977187, "grad_norm": 2.2678468227386475, "learning_rate": 1.6896116198497295e-05, "loss": 0.8364, "step": 2661 }, { "epoch": 0.20143013885210548, "grad_norm": 2.6497673988342285, "learning_rate": 1.6895035207170577e-05, "loss": 0.7843, "step": 2662 }, { "epoch": 0.20150580757443912, "grad_norm": 2.344269037246704, "learning_rate": 1.6893953721431218e-05, "loss": 0.7275, "step": 2663 }, { "epoch": 0.20158147629677273, "grad_norm": 2.5566043853759766, "learning_rate": 1.689287174134695e-05, "loss": 0.856, "step": 2664 }, { "epoch": 0.20165714501910637, "grad_norm": 2.8160223960876465, "learning_rate": 1.689178926698552e-05, "loss": 0.7982, "step": 2665 }, { "epoch": 0.20173281374143998, "grad_norm": 5.079640865325928, "learning_rate": 1.6890706298414722e-05, "loss": 0.8701, "step": 2666 }, { "epoch": 0.2018084824637736, "grad_norm": 2.1939332485198975, "learning_rate": 1.6889622835702372e-05, "loss": 0.6718, "step": 2667 }, { "epoch": 0.20188415118610722, "grad_norm": 2.220892906188965, "learning_rate": 1.6888538878916328e-05, "loss": 0.6956, "step": 2668 }, { "epoch": 0.20195981990844084, "grad_norm": 2.5196173191070557, "learning_rate": 1.688745442812446e-05, "loss": 0.864, "step": 2669 }, { "epoch": 0.20203548863077447, "grad_norm": 2.502357244491577, "learning_rate": 1.6886369483394683e-05, "loss": 0.7481, "step": 2670 }, { "epoch": 0.20211115735310808, "grad_norm": 3.1497068405151367, "learning_rate": 1.6885284044794946e-05, "loss": 0.7413, "step": 2671 }, { "epoch": 0.20218682607544172, "grad_norm": 2.358307361602783, "learning_rate": 1.6884198112393216e-05, "loss": 0.8536, "step": 2672 }, { "epoch": 0.20226249479777533, "grad_norm": 3.1044929027557373, "learning_rate": 1.68831116862575e-05, "loss": 0.8081, "step": 2673 }, { "epoch": 0.20233816352010897, "grad_norm": 2.201646327972412, "learning_rate": 1.6882024766455832e-05, "loss": 0.9349, "step": 2674 }, { "epoch": 0.20241383224244258, "grad_norm": 2.6423752307891846, "learning_rate": 1.6880937353056283e-05, "loss": 0.7464, "step": 2675 }, { "epoch": 0.20248950096477622, "grad_norm": 2.547576427459717, "learning_rate": 1.6879849446126942e-05, "loss": 0.6216, "step": 2676 }, { "epoch": 0.20256516968710983, "grad_norm": 2.7774250507354736, "learning_rate": 1.6878761045735946e-05, "loss": 0.828, "step": 2677 }, { "epoch": 0.20264083840944347, "grad_norm": 2.6742935180664062, "learning_rate": 1.6877672151951446e-05, "loss": 0.7657, "step": 2678 }, { "epoch": 0.20271650713177708, "grad_norm": 2.081855535507202, "learning_rate": 1.687658276484164e-05, "loss": 0.7594, "step": 2679 }, { "epoch": 0.2027921758541107, "grad_norm": 2.9770843982696533, "learning_rate": 1.6875492884474744e-05, "loss": 0.777, "step": 2680 }, { "epoch": 0.20286784457644433, "grad_norm": 2.1680080890655518, "learning_rate": 1.6874402510919013e-05, "loss": 0.8131, "step": 2681 }, { "epoch": 0.20294351329877794, "grad_norm": 2.5214853286743164, "learning_rate": 1.6873311644242726e-05, "loss": 0.8729, "step": 2682 }, { "epoch": 0.20301918202111158, "grad_norm": 2.355656862258911, "learning_rate": 1.68722202845142e-05, "loss": 0.7149, "step": 2683 }, { "epoch": 0.2030948507434452, "grad_norm": 2.722672939300537, "learning_rate": 1.6871128431801776e-05, "loss": 0.7906, "step": 2684 }, { "epoch": 0.20317051946577883, "grad_norm": 2.526291608810425, "learning_rate": 1.6870036086173833e-05, "loss": 0.9885, "step": 2685 }, { "epoch": 0.20324618818811244, "grad_norm": 2.7537612915039062, "learning_rate": 1.686894324769877e-05, "loss": 0.7336, "step": 2686 }, { "epoch": 0.20332185691044607, "grad_norm": 2.314716100692749, "learning_rate": 1.686784991644504e-05, "loss": 0.6739, "step": 2687 }, { "epoch": 0.20339752563277969, "grad_norm": 2.298309087753296, "learning_rate": 1.6866756092481092e-05, "loss": 0.655, "step": 2688 }, { "epoch": 0.20347319435511332, "grad_norm": 2.149913787841797, "learning_rate": 1.6865661775875437e-05, "loss": 0.688, "step": 2689 }, { "epoch": 0.20354886307744693, "grad_norm": 2.236656904220581, "learning_rate": 1.68645669666966e-05, "loss": 0.8814, "step": 2690 }, { "epoch": 0.20362453179978057, "grad_norm": 2.557054042816162, "learning_rate": 1.686347166501314e-05, "loss": 0.8819, "step": 2691 }, { "epoch": 0.20370020052211418, "grad_norm": 7.46500301361084, "learning_rate": 1.6862375870893653e-05, "loss": 0.807, "step": 2692 }, { "epoch": 0.2037758692444478, "grad_norm": 2.449782609939575, "learning_rate": 1.686127958440676e-05, "loss": 0.6193, "step": 2693 }, { "epoch": 0.20385153796678143, "grad_norm": 2.736797571182251, "learning_rate": 1.6860182805621112e-05, "loss": 0.9145, "step": 2694 }, { "epoch": 0.20392720668911504, "grad_norm": 2.61444354057312, "learning_rate": 1.6859085534605395e-05, "loss": 0.8425, "step": 2695 }, { "epoch": 0.20400287541144868, "grad_norm": 2.4622318744659424, "learning_rate": 1.6857987771428323e-05, "loss": 0.9507, "step": 2696 }, { "epoch": 0.2040785441337823, "grad_norm": 2.3272974491119385, "learning_rate": 1.6856889516158637e-05, "loss": 0.774, "step": 2697 }, { "epoch": 0.20415421285611593, "grad_norm": 2.636510133743286, "learning_rate": 1.685579076886512e-05, "loss": 0.7583, "step": 2698 }, { "epoch": 0.20422988157844954, "grad_norm": 5.449431896209717, "learning_rate": 1.6854691529616578e-05, "loss": 0.7954, "step": 2699 }, { "epoch": 0.20430555030078318, "grad_norm": 2.3265273571014404, "learning_rate": 1.6853591798481845e-05, "loss": 0.6401, "step": 2700 }, { "epoch": 0.2043812190231168, "grad_norm": 2.6790006160736084, "learning_rate": 1.685249157552979e-05, "loss": 0.9875, "step": 2701 }, { "epoch": 0.20445688774545043, "grad_norm": 2.484577178955078, "learning_rate": 1.6851390860829317e-05, "loss": 0.7331, "step": 2702 }, { "epoch": 0.20453255646778404, "grad_norm": 2.061288833618164, "learning_rate": 1.6850289654449355e-05, "loss": 0.7615, "step": 2703 }, { "epoch": 0.20460822519011768, "grad_norm": 1.9732778072357178, "learning_rate": 1.684918795645886e-05, "loss": 0.7419, "step": 2704 }, { "epoch": 0.2046838939124513, "grad_norm": 2.913220167160034, "learning_rate": 1.684808576692683e-05, "loss": 0.7499, "step": 2705 }, { "epoch": 0.20475956263478493, "grad_norm": 1.9376118183135986, "learning_rate": 1.6846983085922287e-05, "loss": 0.8705, "step": 2706 }, { "epoch": 0.20483523135711854, "grad_norm": 2.383751153945923, "learning_rate": 1.684587991351428e-05, "loss": 0.8206, "step": 2707 }, { "epoch": 0.20491090007945215, "grad_norm": 2.5557644367218018, "learning_rate": 1.68447762497719e-05, "loss": 0.802, "step": 2708 }, { "epoch": 0.20498656880178578, "grad_norm": 2.486907958984375, "learning_rate": 1.6843672094764253e-05, "loss": 0.8066, "step": 2709 }, { "epoch": 0.2050622375241194, "grad_norm": 2.465059757232666, "learning_rate": 1.6842567448560494e-05, "loss": 0.8275, "step": 2710 }, { "epoch": 0.20513790624645303, "grad_norm": 2.361616611480713, "learning_rate": 1.6841462311229796e-05, "loss": 0.7221, "step": 2711 }, { "epoch": 0.20521357496878664, "grad_norm": 3.4184422492980957, "learning_rate": 1.6840356682841362e-05, "loss": 0.6928, "step": 2712 }, { "epoch": 0.20528924369112028, "grad_norm": 2.329210042953491, "learning_rate": 1.6839250563464436e-05, "loss": 0.7858, "step": 2713 }, { "epoch": 0.2053649124134539, "grad_norm": 2.920400619506836, "learning_rate": 1.6838143953168285e-05, "loss": 0.7444, "step": 2714 }, { "epoch": 0.20544058113578753, "grad_norm": 2.222848415374756, "learning_rate": 1.6837036852022205e-05, "loss": 0.7223, "step": 2715 }, { "epoch": 0.20551624985812114, "grad_norm": 2.549514055252075, "learning_rate": 1.683592926009553e-05, "loss": 0.8735, "step": 2716 }, { "epoch": 0.20559191858045478, "grad_norm": 3.1271274089813232, "learning_rate": 1.6834821177457625e-05, "loss": 0.7578, "step": 2717 }, { "epoch": 0.2056675873027884, "grad_norm": 2.493976354598999, "learning_rate": 1.683371260417787e-05, "loss": 0.714, "step": 2718 }, { "epoch": 0.20574325602512203, "grad_norm": 2.656724214553833, "learning_rate": 1.6832603540325702e-05, "loss": 0.9036, "step": 2719 }, { "epoch": 0.20581892474745564, "grad_norm": 1.9792472124099731, "learning_rate": 1.683149398597056e-05, "loss": 0.8492, "step": 2720 }, { "epoch": 0.20589459346978925, "grad_norm": 3.5724356174468994, "learning_rate": 1.6830383941181938e-05, "loss": 0.7013, "step": 2721 }, { "epoch": 0.2059702621921229, "grad_norm": 2.62611985206604, "learning_rate": 1.6829273406029347e-05, "loss": 0.7121, "step": 2722 }, { "epoch": 0.2060459309144565, "grad_norm": 2.143665075302124, "learning_rate": 1.6828162380582334e-05, "loss": 0.686, "step": 2723 }, { "epoch": 0.20612159963679014, "grad_norm": 2.061655044555664, "learning_rate": 1.682705086491047e-05, "loss": 0.9111, "step": 2724 }, { "epoch": 0.20619726835912375, "grad_norm": 2.1191210746765137, "learning_rate": 1.6825938859083365e-05, "loss": 0.7933, "step": 2725 }, { "epoch": 0.20627293708145739, "grad_norm": 2.6450743675231934, "learning_rate": 1.6824826363170658e-05, "loss": 0.6982, "step": 2726 }, { "epoch": 0.206348605803791, "grad_norm": 1.9432772397994995, "learning_rate": 1.6823713377242015e-05, "loss": 0.7923, "step": 2727 }, { "epoch": 0.20642427452612463, "grad_norm": 2.247926712036133, "learning_rate": 1.6822599901367134e-05, "loss": 0.7163, "step": 2728 }, { "epoch": 0.20649994324845825, "grad_norm": 2.1911559104919434, "learning_rate": 1.6821485935615748e-05, "loss": 0.7813, "step": 2729 }, { "epoch": 0.20657561197079188, "grad_norm": 2.491403341293335, "learning_rate": 1.6820371480057613e-05, "loss": 0.8105, "step": 2730 }, { "epoch": 0.2066512806931255, "grad_norm": 2.1198718547821045, "learning_rate": 1.6819256534762525e-05, "loss": 0.8581, "step": 2731 }, { "epoch": 0.20672694941545913, "grad_norm": 2.3443424701690674, "learning_rate": 1.68181410998003e-05, "loss": 0.791, "step": 2732 }, { "epoch": 0.20680261813779274, "grad_norm": 1.8470584154129028, "learning_rate": 1.6817025175240793e-05, "loss": 0.8894, "step": 2733 }, { "epoch": 0.20687828686012635, "grad_norm": 2.2806928157806396, "learning_rate": 1.6815908761153887e-05, "loss": 0.6281, "step": 2734 }, { "epoch": 0.20695395558246, "grad_norm": 2.3737926483154297, "learning_rate": 1.681479185760949e-05, "loss": 0.6816, "step": 2735 }, { "epoch": 0.2070296243047936, "grad_norm": 2.267542600631714, "learning_rate": 1.681367446467756e-05, "loss": 0.7355, "step": 2736 }, { "epoch": 0.20710529302712724, "grad_norm": 2.259472608566284, "learning_rate": 1.6812556582428052e-05, "loss": 0.7937, "step": 2737 }, { "epoch": 0.20718096174946085, "grad_norm": 2.7698655128479004, "learning_rate": 1.6811438210930987e-05, "loss": 0.8421, "step": 2738 }, { "epoch": 0.2072566304717945, "grad_norm": 2.1473047733306885, "learning_rate": 1.6810319350256397e-05, "loss": 0.6328, "step": 2739 }, { "epoch": 0.2073322991941281, "grad_norm": 2.5372629165649414, "learning_rate": 1.6809200000474347e-05, "loss": 0.7475, "step": 2740 }, { "epoch": 0.20740796791646174, "grad_norm": 2.0664265155792236, "learning_rate": 1.6808080161654935e-05, "loss": 0.6881, "step": 2741 }, { "epoch": 0.20748363663879535, "grad_norm": 4.4088311195373535, "learning_rate": 1.6806959833868288e-05, "loss": 0.705, "step": 2742 }, { "epoch": 0.207559305361129, "grad_norm": 3.005873918533325, "learning_rate": 1.6805839017184565e-05, "loss": 0.8028, "step": 2743 }, { "epoch": 0.2076349740834626, "grad_norm": 2.4871227741241455, "learning_rate": 1.6804717711673957e-05, "loss": 0.8642, "step": 2744 }, { "epoch": 0.20771064280579624, "grad_norm": 2.4536328315734863, "learning_rate": 1.680359591740668e-05, "loss": 0.6728, "step": 2745 }, { "epoch": 0.20778631152812985, "grad_norm": 2.521181583404541, "learning_rate": 1.680247363445299e-05, "loss": 0.6283, "step": 2746 }, { "epoch": 0.20786198025046346, "grad_norm": 2.403087615966797, "learning_rate": 1.680135086288316e-05, "loss": 0.7569, "step": 2747 }, { "epoch": 0.2079376489727971, "grad_norm": 2.606722116470337, "learning_rate": 1.6800227602767513e-05, "loss": 0.8054, "step": 2748 }, { "epoch": 0.2080133176951307, "grad_norm": 2.361828088760376, "learning_rate": 1.679910385417638e-05, "loss": 0.8643, "step": 2749 }, { "epoch": 0.20808898641746434, "grad_norm": 3.0654191970825195, "learning_rate": 1.679797961718014e-05, "loss": 0.8303, "step": 2750 }, { "epoch": 0.20816465513979795, "grad_norm": 2.4694888591766357, "learning_rate": 1.6796854891849195e-05, "loss": 0.9629, "step": 2751 }, { "epoch": 0.2082403238621316, "grad_norm": 2.855731964111328, "learning_rate": 1.6795729678253977e-05, "loss": 0.9194, "step": 2752 }, { "epoch": 0.2083159925844652, "grad_norm": 2.1465506553649902, "learning_rate": 1.6794603976464953e-05, "loss": 0.7555, "step": 2753 }, { "epoch": 0.20839166130679884, "grad_norm": 2.5873541831970215, "learning_rate": 1.6793477786552618e-05, "loss": 0.7371, "step": 2754 }, { "epoch": 0.20846733002913245, "grad_norm": 2.087071180343628, "learning_rate": 1.679235110858749e-05, "loss": 0.6777, "step": 2755 }, { "epoch": 0.2085429987514661, "grad_norm": 3.9847989082336426, "learning_rate": 1.679122394264014e-05, "loss": 0.8945, "step": 2756 }, { "epoch": 0.2086186674737997, "grad_norm": 2.593203067779541, "learning_rate": 1.6790096288781148e-05, "loss": 0.6988, "step": 2757 }, { "epoch": 0.20869433619613334, "grad_norm": 2.5116524696350098, "learning_rate": 1.6788968147081126e-05, "loss": 0.812, "step": 2758 }, { "epoch": 0.20877000491846695, "grad_norm": 2.77005672454834, "learning_rate": 1.6787839517610727e-05, "loss": 0.877, "step": 2759 }, { "epoch": 0.2088456736408006, "grad_norm": 2.331638813018799, "learning_rate": 1.6786710400440627e-05, "loss": 0.8041, "step": 2760 }, { "epoch": 0.2089213423631342, "grad_norm": 2.0847373008728027, "learning_rate": 1.678558079564154e-05, "loss": 0.8226, "step": 2761 }, { "epoch": 0.2089970110854678, "grad_norm": 2.118413209915161, "learning_rate": 1.6784450703284197e-05, "loss": 0.8281, "step": 2762 }, { "epoch": 0.20907267980780145, "grad_norm": 2.4616172313690186, "learning_rate": 1.6783320123439376e-05, "loss": 0.7126, "step": 2763 }, { "epoch": 0.20914834853013506, "grad_norm": 2.1653876304626465, "learning_rate": 1.6782189056177875e-05, "loss": 0.7439, "step": 2764 }, { "epoch": 0.2092240172524687, "grad_norm": 2.1744189262390137, "learning_rate": 1.6781057501570522e-05, "loss": 0.8306, "step": 2765 }, { "epoch": 0.2092996859748023, "grad_norm": 2.326197624206543, "learning_rate": 1.6779925459688186e-05, "loss": 0.7588, "step": 2766 }, { "epoch": 0.20937535469713595, "grad_norm": 2.3091301918029785, "learning_rate": 1.677879293060175e-05, "loss": 0.7054, "step": 2767 }, { "epoch": 0.20945102341946956, "grad_norm": 2.6318981647491455, "learning_rate": 1.6777659914382144e-05, "loss": 0.8123, "step": 2768 }, { "epoch": 0.2095266921418032, "grad_norm": 2.382268190383911, "learning_rate": 1.6776526411100315e-05, "loss": 0.815, "step": 2769 }, { "epoch": 0.2096023608641368, "grad_norm": 2.143889904022217, "learning_rate": 1.6775392420827253e-05, "loss": 0.7313, "step": 2770 }, { "epoch": 0.20967802958647044, "grad_norm": 2.355656385421753, "learning_rate": 1.6774257943633967e-05, "loss": 0.7956, "step": 2771 }, { "epoch": 0.20975369830880405, "grad_norm": 2.269749641418457, "learning_rate": 1.6773122979591503e-05, "loss": 0.7962, "step": 2772 }, { "epoch": 0.2098293670311377, "grad_norm": 2.584016799926758, "learning_rate": 1.6771987528770938e-05, "loss": 0.8603, "step": 2773 }, { "epoch": 0.2099050357534713, "grad_norm": 2.4904229640960693, "learning_rate": 1.6770851591243378e-05, "loss": 0.7932, "step": 2774 }, { "epoch": 0.2099807044758049, "grad_norm": 3.181654453277588, "learning_rate": 1.6769715167079953e-05, "loss": 0.9061, "step": 2775 }, { "epoch": 0.21005637319813855, "grad_norm": 2.1907131671905518, "learning_rate": 1.6768578256351835e-05, "loss": 0.7714, "step": 2776 }, { "epoch": 0.21013204192047216, "grad_norm": 2.349133253097534, "learning_rate": 1.6767440859130222e-05, "loss": 0.8765, "step": 2777 }, { "epoch": 0.2102077106428058, "grad_norm": 2.340827226638794, "learning_rate": 1.6766302975486342e-05, "loss": 0.8702, "step": 2778 }, { "epoch": 0.2102833793651394, "grad_norm": 2.352503776550293, "learning_rate": 1.6765164605491445e-05, "loss": 0.6379, "step": 2779 }, { "epoch": 0.21035904808747305, "grad_norm": 2.20408034324646, "learning_rate": 1.6764025749216826e-05, "loss": 0.6901, "step": 2780 }, { "epoch": 0.21043471680980666, "grad_norm": 2.612621784210205, "learning_rate": 1.6762886406733803e-05, "loss": 0.6788, "step": 2781 }, { "epoch": 0.2105103855321403, "grad_norm": 2.921649932861328, "learning_rate": 1.6761746578113727e-05, "loss": 0.7544, "step": 2782 }, { "epoch": 0.2105860542544739, "grad_norm": 3.306110382080078, "learning_rate": 1.6760606263427975e-05, "loss": 0.7943, "step": 2783 }, { "epoch": 0.21066172297680755, "grad_norm": 2.2433836460113525, "learning_rate": 1.675946546274796e-05, "loss": 0.9816, "step": 2784 }, { "epoch": 0.21073739169914116, "grad_norm": 2.480579137802124, "learning_rate": 1.6758324176145117e-05, "loss": 0.7943, "step": 2785 }, { "epoch": 0.2108130604214748, "grad_norm": 3.2746899127960205, "learning_rate": 1.675718240369092e-05, "loss": 0.8868, "step": 2786 }, { "epoch": 0.2108887291438084, "grad_norm": 2.4543137550354004, "learning_rate": 1.675604014545687e-05, "loss": 0.7192, "step": 2787 }, { "epoch": 0.21096439786614202, "grad_norm": 2.593430519104004, "learning_rate": 1.6754897401514504e-05, "loss": 0.7476, "step": 2788 }, { "epoch": 0.21104006658847566, "grad_norm": 2.6094298362731934, "learning_rate": 1.675375417193538e-05, "loss": 0.8261, "step": 2789 }, { "epoch": 0.21111573531080927, "grad_norm": 2.957797050476074, "learning_rate": 1.6752610456791093e-05, "loss": 0.777, "step": 2790 }, { "epoch": 0.2111914040331429, "grad_norm": 3.8428449630737305, "learning_rate": 1.6751466256153257e-05, "loss": 0.8653, "step": 2791 }, { "epoch": 0.21126707275547651, "grad_norm": 2.952915668487549, "learning_rate": 1.675032157009354e-05, "loss": 0.7709, "step": 2792 }, { "epoch": 0.21134274147781015, "grad_norm": 2.7285871505737305, "learning_rate": 1.6749176398683616e-05, "loss": 0.7093, "step": 2793 }, { "epoch": 0.21141841020014376, "grad_norm": 16.232563018798828, "learning_rate": 1.67480307419952e-05, "loss": 0.8038, "step": 2794 }, { "epoch": 0.2114940789224774, "grad_norm": 2.1137609481811523, "learning_rate": 1.6746884600100038e-05, "loss": 0.8155, "step": 2795 }, { "epoch": 0.211569747644811, "grad_norm": 2.6953883171081543, "learning_rate": 1.674573797306991e-05, "loss": 0.736, "step": 2796 }, { "epoch": 0.21164541636714465, "grad_norm": 2.4279799461364746, "learning_rate": 1.6744590860976615e-05, "loss": 0.7183, "step": 2797 }, { "epoch": 0.21172108508947826, "grad_norm": 2.5715172290802, "learning_rate": 1.6743443263891994e-05, "loss": 0.8558, "step": 2798 }, { "epoch": 0.2117967538118119, "grad_norm": 2.9246373176574707, "learning_rate": 1.6742295181887908e-05, "loss": 0.8916, "step": 2799 }, { "epoch": 0.2118724225341455, "grad_norm": 2.751868486404419, "learning_rate": 1.6741146615036255e-05, "loss": 0.9153, "step": 2800 }, { "epoch": 0.21194809125647912, "grad_norm": 2.7025206089019775, "learning_rate": 1.6739997563408967e-05, "loss": 0.6688, "step": 2801 }, { "epoch": 0.21202375997881276, "grad_norm": 2.7657957077026367, "learning_rate": 1.6738848027077994e-05, "loss": 0.9089, "step": 2802 }, { "epoch": 0.21209942870114637, "grad_norm": 2.771183967590332, "learning_rate": 1.6737698006115326e-05, "loss": 0.8091, "step": 2803 }, { "epoch": 0.21217509742348, "grad_norm": 2.281928300857544, "learning_rate": 1.6736547500592985e-05, "loss": 0.7638, "step": 2804 }, { "epoch": 0.21225076614581362, "grad_norm": 2.5218539237976074, "learning_rate": 1.673539651058302e-05, "loss": 0.9364, "step": 2805 }, { "epoch": 0.21232643486814726, "grad_norm": 2.1761555671691895, "learning_rate": 1.6734245036157498e-05, "loss": 0.7687, "step": 2806 }, { "epoch": 0.21240210359048087, "grad_norm": 2.418473720550537, "learning_rate": 1.6733093077388543e-05, "loss": 0.676, "step": 2807 }, { "epoch": 0.2124777723128145, "grad_norm": 1.9938814640045166, "learning_rate": 1.673194063434828e-05, "loss": 0.745, "step": 2808 }, { "epoch": 0.21255344103514812, "grad_norm": 2.486959934234619, "learning_rate": 1.6730787707108895e-05, "loss": 0.8677, "step": 2809 }, { "epoch": 0.21262910975748175, "grad_norm": 2.121563673019409, "learning_rate": 1.6729634295742573e-05, "loss": 0.7888, "step": 2810 }, { "epoch": 0.21270477847981537, "grad_norm": 2.618818759918213, "learning_rate": 1.6728480400321553e-05, "loss": 0.6763, "step": 2811 }, { "epoch": 0.212780447202149, "grad_norm": 2.66679310798645, "learning_rate": 1.6727326020918095e-05, "loss": 0.761, "step": 2812 }, { "epoch": 0.21285611592448261, "grad_norm": 3.1060845851898193, "learning_rate": 1.6726171157604486e-05, "loss": 0.7265, "step": 2813 }, { "epoch": 0.21293178464681625, "grad_norm": 2.5589113235473633, "learning_rate": 1.672501581045305e-05, "loss": 0.8843, "step": 2814 }, { "epoch": 0.21300745336914986, "grad_norm": 2.34407901763916, "learning_rate": 1.672385997953614e-05, "loss": 0.8115, "step": 2815 }, { "epoch": 0.21308312209148347, "grad_norm": 2.688868522644043, "learning_rate": 1.6722703664926135e-05, "loss": 0.6882, "step": 2816 }, { "epoch": 0.2131587908138171, "grad_norm": 2.3896734714508057, "learning_rate": 1.672154686669545e-05, "loss": 0.7876, "step": 2817 }, { "epoch": 0.21323445953615072, "grad_norm": 2.3544201850891113, "learning_rate": 1.6720389584916525e-05, "loss": 0.8239, "step": 2818 }, { "epoch": 0.21331012825848436, "grad_norm": 2.2516539096832275, "learning_rate": 1.671923181966183e-05, "loss": 0.7131, "step": 2819 }, { "epoch": 0.21338579698081797, "grad_norm": 3.938749074935913, "learning_rate": 1.671807357100387e-05, "loss": 0.7547, "step": 2820 }, { "epoch": 0.2134614657031516, "grad_norm": 2.0515267848968506, "learning_rate": 1.6716914839015185e-05, "loss": 0.6756, "step": 2821 }, { "epoch": 0.21353713442548522, "grad_norm": 2.133115768432617, "learning_rate": 1.6715755623768334e-05, "loss": 0.7228, "step": 2822 }, { "epoch": 0.21361280314781886, "grad_norm": 2.3022468090057373, "learning_rate": 1.6714595925335906e-05, "loss": 0.8373, "step": 2823 }, { "epoch": 0.21368847187015247, "grad_norm": 2.543943166732788, "learning_rate": 1.671343574379053e-05, "loss": 0.9197, "step": 2824 }, { "epoch": 0.2137641405924861, "grad_norm": 2.1154918670654297, "learning_rate": 1.6712275079204863e-05, "loss": 0.9015, "step": 2825 }, { "epoch": 0.21383980931481972, "grad_norm": 2.0648181438446045, "learning_rate": 1.671111393165158e-05, "loss": 0.8252, "step": 2826 }, { "epoch": 0.21391547803715336, "grad_norm": 2.3254873752593994, "learning_rate": 1.6709952301203405e-05, "loss": 0.7808, "step": 2827 }, { "epoch": 0.21399114675948697, "grad_norm": 2.4721736907958984, "learning_rate": 1.670879018793308e-05, "loss": 0.7166, "step": 2828 }, { "epoch": 0.21406681548182058, "grad_norm": 2.7000808715820312, "learning_rate": 1.6707627591913382e-05, "loss": 0.8353, "step": 2829 }, { "epoch": 0.21414248420415422, "grad_norm": 2.1937551498413086, "learning_rate": 1.6706464513217115e-05, "loss": 0.874, "step": 2830 }, { "epoch": 0.21421815292648783, "grad_norm": 4.846593379974365, "learning_rate": 1.670530095191711e-05, "loss": 0.9048, "step": 2831 }, { "epoch": 0.21429382164882146, "grad_norm": 2.571143627166748, "learning_rate": 1.6704136908086242e-05, "loss": 0.871, "step": 2832 }, { "epoch": 0.21436949037115507, "grad_norm": 2.5679643154144287, "learning_rate": 1.67029723817974e-05, "loss": 0.7586, "step": 2833 }, { "epoch": 0.2144451590934887, "grad_norm": 2.327501058578491, "learning_rate": 1.670180737312351e-05, "loss": 0.7948, "step": 2834 }, { "epoch": 0.21452082781582232, "grad_norm": 2.3081796169281006, "learning_rate": 1.670064188213754e-05, "loss": 0.8191, "step": 2835 }, { "epoch": 0.21459649653815596, "grad_norm": 2.2802083492279053, "learning_rate": 1.669947590891246e-05, "loss": 0.7863, "step": 2836 }, { "epoch": 0.21467216526048957, "grad_norm": 1.9905712604522705, "learning_rate": 1.6698309453521298e-05, "loss": 0.6816, "step": 2837 }, { "epoch": 0.2147478339828232, "grad_norm": 2.6414380073547363, "learning_rate": 1.66971425160371e-05, "loss": 0.7644, "step": 2838 }, { "epoch": 0.21482350270515682, "grad_norm": 2.325744390487671, "learning_rate": 1.6695975096532946e-05, "loss": 0.9109, "step": 2839 }, { "epoch": 0.21489917142749046, "grad_norm": 2.3487441539764404, "learning_rate": 1.6694807195081934e-05, "loss": 0.7954, "step": 2840 }, { "epoch": 0.21497484014982407, "grad_norm": 2.7436139583587646, "learning_rate": 1.6693638811757206e-05, "loss": 0.9525, "step": 2841 }, { "epoch": 0.21505050887215768, "grad_norm": 2.626749038696289, "learning_rate": 1.6692469946631935e-05, "loss": 0.7477, "step": 2842 }, { "epoch": 0.21512617759449132, "grad_norm": 2.0960118770599365, "learning_rate": 1.6691300599779314e-05, "loss": 0.828, "step": 2843 }, { "epoch": 0.21520184631682493, "grad_norm": 2.273026704788208, "learning_rate": 1.6690130771272576e-05, "loss": 0.6678, "step": 2844 }, { "epoch": 0.21527751503915857, "grad_norm": 1.8988648653030396, "learning_rate": 1.6688960461184974e-05, "loss": 0.6153, "step": 2845 }, { "epoch": 0.21535318376149218, "grad_norm": 2.7627248764038086, "learning_rate": 1.6687789669589797e-05, "loss": 0.8108, "step": 2846 }, { "epoch": 0.21542885248382582, "grad_norm": 7.99500846862793, "learning_rate": 1.6686618396560365e-05, "loss": 0.7978, "step": 2847 }, { "epoch": 0.21550452120615943, "grad_norm": 2.0139832496643066, "learning_rate": 1.668544664217003e-05, "loss": 0.8848, "step": 2848 }, { "epoch": 0.21558018992849307, "grad_norm": 2.0754570960998535, "learning_rate": 1.668427440649217e-05, "loss": 0.8815, "step": 2849 }, { "epoch": 0.21565585865082668, "grad_norm": 2.176095485687256, "learning_rate": 1.668310168960019e-05, "loss": 0.7272, "step": 2850 }, { "epoch": 0.21573152737316031, "grad_norm": 2.36541748046875, "learning_rate": 1.668192849156753e-05, "loss": 0.6364, "step": 2851 }, { "epoch": 0.21580719609549393, "grad_norm": 2.3747830390930176, "learning_rate": 1.6680754812467666e-05, "loss": 0.8652, "step": 2852 }, { "epoch": 0.21588286481782756, "grad_norm": 2.5572152137756348, "learning_rate": 1.667958065237409e-05, "loss": 0.7099, "step": 2853 }, { "epoch": 0.21595853354016117, "grad_norm": 2.4014647006988525, "learning_rate": 1.6678406011360337e-05, "loss": 0.9067, "step": 2854 }, { "epoch": 0.21603420226249478, "grad_norm": 2.304295301437378, "learning_rate": 1.6677230889499966e-05, "loss": 0.6992, "step": 2855 }, { "epoch": 0.21610987098482842, "grad_norm": 1.8173857927322388, "learning_rate": 1.667605528686656e-05, "loss": 1.0161, "step": 2856 }, { "epoch": 0.21618553970716203, "grad_norm": 2.0855274200439453, "learning_rate": 1.6674879203533748e-05, "loss": 0.7699, "step": 2857 }, { "epoch": 0.21626120842949567, "grad_norm": 1.9764469861984253, "learning_rate": 1.6673702639575176e-05, "loss": 0.8007, "step": 2858 }, { "epoch": 0.21633687715182928, "grad_norm": 2.9846057891845703, "learning_rate": 1.6672525595064527e-05, "loss": 0.7783, "step": 2859 }, { "epoch": 0.21641254587416292, "grad_norm": 2.2411768436431885, "learning_rate": 1.667134807007551e-05, "loss": 0.6131, "step": 2860 }, { "epoch": 0.21648821459649653, "grad_norm": 2.099818229675293, "learning_rate": 1.6670170064681858e-05, "loss": 0.7569, "step": 2861 }, { "epoch": 0.21656388331883017, "grad_norm": 2.3399593830108643, "learning_rate": 1.6668991578957354e-05, "loss": 0.7872, "step": 2862 }, { "epoch": 0.21663955204116378, "grad_norm": 2.527578115463257, "learning_rate": 1.666781261297579e-05, "loss": 0.6493, "step": 2863 }, { "epoch": 0.21671522076349742, "grad_norm": 2.33992862701416, "learning_rate": 1.6666633166811004e-05, "loss": 0.7972, "step": 2864 }, { "epoch": 0.21679088948583103, "grad_norm": 2.1418380737304688, "learning_rate": 1.666545324053685e-05, "loss": 0.7548, "step": 2865 }, { "epoch": 0.21686655820816467, "grad_norm": 2.6442832946777344, "learning_rate": 1.6664272834227218e-05, "loss": 0.8697, "step": 2866 }, { "epoch": 0.21694222693049828, "grad_norm": 2.5152578353881836, "learning_rate": 1.666309194795603e-05, "loss": 0.9075, "step": 2867 }, { "epoch": 0.2170178956528319, "grad_norm": 2.834080696105957, "learning_rate": 1.6661910581797246e-05, "loss": 0.7793, "step": 2868 }, { "epoch": 0.21709356437516553, "grad_norm": 3.0419914722442627, "learning_rate": 1.6660728735824834e-05, "loss": 0.7984, "step": 2869 }, { "epoch": 0.21716923309749914, "grad_norm": 2.7962558269500732, "learning_rate": 1.6659546410112815e-05, "loss": 0.9006, "step": 2870 }, { "epoch": 0.21724490181983278, "grad_norm": 2.511221408843994, "learning_rate": 1.6658363604735224e-05, "loss": 0.8622, "step": 2871 }, { "epoch": 0.21732057054216639, "grad_norm": 2.652181386947632, "learning_rate": 1.6657180319766134e-05, "loss": 0.866, "step": 2872 }, { "epoch": 0.21739623926450002, "grad_norm": 2.3145172595977783, "learning_rate": 1.6655996555279645e-05, "loss": 0.8523, "step": 2873 }, { "epoch": 0.21747190798683363, "grad_norm": 2.5612776279449463, "learning_rate": 1.665481231134989e-05, "loss": 0.8403, "step": 2874 }, { "epoch": 0.21754757670916727, "grad_norm": 1.8979072570800781, "learning_rate": 1.665362758805103e-05, "loss": 0.679, "step": 2875 }, { "epoch": 0.21762324543150088, "grad_norm": 2.4776549339294434, "learning_rate": 1.6652442385457255e-05, "loss": 0.9507, "step": 2876 }, { "epoch": 0.21769891415383452, "grad_norm": 3.0830917358398438, "learning_rate": 1.6651256703642786e-05, "loss": 0.7769, "step": 2877 }, { "epoch": 0.21777458287616813, "grad_norm": 2.253445625305176, "learning_rate": 1.6650070542681876e-05, "loss": 0.7806, "step": 2878 }, { "epoch": 0.21785025159850177, "grad_norm": 2.626587152481079, "learning_rate": 1.6648883902648805e-05, "loss": 0.784, "step": 2879 }, { "epoch": 0.21792592032083538, "grad_norm": 2.3169028759002686, "learning_rate": 1.6647696783617887e-05, "loss": 0.594, "step": 2880 }, { "epoch": 0.21800158904316902, "grad_norm": 2.367800712585449, "learning_rate": 1.6646509185663458e-05, "loss": 0.8395, "step": 2881 }, { "epoch": 0.21807725776550263, "grad_norm": 2.1728861331939697, "learning_rate": 1.6645321108859894e-05, "loss": 0.8088, "step": 2882 }, { "epoch": 0.21815292648783624, "grad_norm": 2.3152570724487305, "learning_rate": 1.6644132553281592e-05, "loss": 0.6268, "step": 2883 }, { "epoch": 0.21822859521016988, "grad_norm": 4.68181037902832, "learning_rate": 1.6642943519002983e-05, "loss": 0.8115, "step": 2884 }, { "epoch": 0.2183042639325035, "grad_norm": 2.9083902835845947, "learning_rate": 1.6641754006098537e-05, "loss": 0.9005, "step": 2885 }, { "epoch": 0.21837993265483713, "grad_norm": 2.559480667114258, "learning_rate": 1.6640564014642732e-05, "loss": 0.7178, "step": 2886 }, { "epoch": 0.21845560137717074, "grad_norm": 2.6691970825195312, "learning_rate": 1.66393735447101e-05, "loss": 0.6153, "step": 2887 }, { "epoch": 0.21853127009950438, "grad_norm": 2.705824851989746, "learning_rate": 1.663818259637519e-05, "loss": 0.9264, "step": 2888 }, { "epoch": 0.218606938821838, "grad_norm": 2.3913021087646484, "learning_rate": 1.6636991169712577e-05, "loss": 0.6872, "step": 2889 }, { "epoch": 0.21868260754417163, "grad_norm": 2.4797730445861816, "learning_rate": 1.6635799264796877e-05, "loss": 0.8335, "step": 2890 }, { "epoch": 0.21875827626650524, "grad_norm": 2.379905939102173, "learning_rate": 1.663460688170273e-05, "loss": 0.8438, "step": 2891 }, { "epoch": 0.21883394498883887, "grad_norm": 2.6139721870422363, "learning_rate": 1.6633414020504805e-05, "loss": 0.7048, "step": 2892 }, { "epoch": 0.21890961371117248, "grad_norm": 2.9872958660125732, "learning_rate": 1.6632220681277806e-05, "loss": 0.7684, "step": 2893 }, { "epoch": 0.21898528243350612, "grad_norm": 1.7575455904006958, "learning_rate": 1.6631026864096465e-05, "loss": 0.9336, "step": 2894 }, { "epoch": 0.21906095115583973, "grad_norm": 2.184025287628174, "learning_rate": 1.6629832569035537e-05, "loss": 0.7969, "step": 2895 }, { "epoch": 0.21913661987817334, "grad_norm": 2.2860093116760254, "learning_rate": 1.6628637796169815e-05, "loss": 0.7119, "step": 2896 }, { "epoch": 0.21921228860050698, "grad_norm": 2.7359209060668945, "learning_rate": 1.6627442545574122e-05, "loss": 0.696, "step": 2897 }, { "epoch": 0.2192879573228406, "grad_norm": 2.2823524475097656, "learning_rate": 1.6626246817323307e-05, "loss": 0.7375, "step": 2898 }, { "epoch": 0.21936362604517423, "grad_norm": 2.211632490158081, "learning_rate": 1.6625050611492246e-05, "loss": 0.8292, "step": 2899 }, { "epoch": 0.21943929476750784, "grad_norm": 2.5959115028381348, "learning_rate": 1.6623853928155857e-05, "loss": 0.9269, "step": 2900 }, { "epoch": 0.21951496348984148, "grad_norm": 2.787301778793335, "learning_rate": 1.6622656767389077e-05, "loss": 0.8806, "step": 2901 }, { "epoch": 0.2195906322121751, "grad_norm": 2.558061361312866, "learning_rate": 1.6621459129266875e-05, "loss": 0.8496, "step": 2902 }, { "epoch": 0.21966630093450873, "grad_norm": 2.832548141479492, "learning_rate": 1.662026101386425e-05, "loss": 0.9092, "step": 2903 }, { "epoch": 0.21974196965684234, "grad_norm": 2.5366275310516357, "learning_rate": 1.6619062421256235e-05, "loss": 0.7832, "step": 2904 }, { "epoch": 0.21981763837917598, "grad_norm": 2.213609218597412, "learning_rate": 1.6617863351517885e-05, "loss": 0.9086, "step": 2905 }, { "epoch": 0.2198933071015096, "grad_norm": 2.412593126296997, "learning_rate": 1.6616663804724297e-05, "loss": 0.8328, "step": 2906 }, { "epoch": 0.21996897582384323, "grad_norm": 2.010378122329712, "learning_rate": 1.6615463780950583e-05, "loss": 0.8728, "step": 2907 }, { "epoch": 0.22004464454617684, "grad_norm": 2.071139335632324, "learning_rate": 1.66142632802719e-05, "loss": 0.6447, "step": 2908 }, { "epoch": 0.22012031326851045, "grad_norm": 2.2854831218719482, "learning_rate": 1.6613062302763417e-05, "loss": 0.7125, "step": 2909 }, { "epoch": 0.2201959819908441, "grad_norm": 2.3209803104400635, "learning_rate": 1.6611860848500354e-05, "loss": 0.7449, "step": 2910 }, { "epoch": 0.2202716507131777, "grad_norm": 2.6688926219940186, "learning_rate": 1.6610658917557942e-05, "loss": 0.7961, "step": 2911 }, { "epoch": 0.22034731943551134, "grad_norm": 2.9156992435455322, "learning_rate": 1.6609456510011454e-05, "loss": 0.7054, "step": 2912 }, { "epoch": 0.22042298815784495, "grad_norm": 2.726867437362671, "learning_rate": 1.6608253625936185e-05, "loss": 0.9102, "step": 2913 }, { "epoch": 0.22049865688017858, "grad_norm": 2.408320903778076, "learning_rate": 1.6607050265407473e-05, "loss": 0.6769, "step": 2914 }, { "epoch": 0.2205743256025122, "grad_norm": 2.9164679050445557, "learning_rate": 1.660584642850066e-05, "loss": 0.8634, "step": 2915 }, { "epoch": 0.22064999432484583, "grad_norm": 2.25538969039917, "learning_rate": 1.660464211529115e-05, "loss": 0.6191, "step": 2916 }, { "epoch": 0.22072566304717944, "grad_norm": 2.494601249694824, "learning_rate": 1.660343732585435e-05, "loss": 0.8541, "step": 2917 }, { "epoch": 0.22080133176951308, "grad_norm": 2.4575066566467285, "learning_rate": 1.6602232060265712e-05, "loss": 0.7013, "step": 2918 }, { "epoch": 0.2208770004918467, "grad_norm": 2.584141731262207, "learning_rate": 1.660102631860072e-05, "loss": 0.8288, "step": 2919 }, { "epoch": 0.22095266921418033, "grad_norm": 2.5871658325195312, "learning_rate": 1.659982010093487e-05, "loss": 0.8565, "step": 2920 }, { "epoch": 0.22102833793651394, "grad_norm": 3.7710883617401123, "learning_rate": 1.6598613407343707e-05, "loss": 0.9081, "step": 2921 }, { "epoch": 0.22110400665884755, "grad_norm": 2.9032387733459473, "learning_rate": 1.659740623790279e-05, "loss": 0.8016, "step": 2922 }, { "epoch": 0.2211796753811812, "grad_norm": 2.4792866706848145, "learning_rate": 1.6596198592687727e-05, "loss": 0.6606, "step": 2923 }, { "epoch": 0.2212553441035148, "grad_norm": 2.214895486831665, "learning_rate": 1.6594990471774135e-05, "loss": 0.7439, "step": 2924 }, { "epoch": 0.22133101282584844, "grad_norm": 2.056378126144409, "learning_rate": 1.659378187523768e-05, "loss": 0.794, "step": 2925 }, { "epoch": 0.22140668154818205, "grad_norm": 2.1232733726501465, "learning_rate": 1.659257280315404e-05, "loss": 0.7306, "step": 2926 }, { "epoch": 0.2214823502705157, "grad_norm": 2.4625091552734375, "learning_rate": 1.659136325559893e-05, "loss": 0.8396, "step": 2927 }, { "epoch": 0.2215580189928493, "grad_norm": 2.7897610664367676, "learning_rate": 1.6590153232648106e-05, "loss": 0.8533, "step": 2928 }, { "epoch": 0.22163368771518294, "grad_norm": 2.2553727626800537, "learning_rate": 1.6588942734377333e-05, "loss": 0.6447, "step": 2929 }, { "epoch": 0.22170935643751655, "grad_norm": 2.372699737548828, "learning_rate": 1.658773176086242e-05, "loss": 0.7301, "step": 2930 }, { "epoch": 0.22178502515985019, "grad_norm": 2.075169086456299, "learning_rate": 1.6586520312179203e-05, "loss": 0.7797, "step": 2931 }, { "epoch": 0.2218606938821838, "grad_norm": 1.9998425245285034, "learning_rate": 1.658530838840355e-05, "loss": 0.8402, "step": 2932 }, { "epoch": 0.22193636260451743, "grad_norm": 2.4670629501342773, "learning_rate": 1.658409598961135e-05, "loss": 0.7589, "step": 2933 }, { "epoch": 0.22201203132685104, "grad_norm": 1.9075826406478882, "learning_rate": 1.6582883115878526e-05, "loss": 0.7386, "step": 2934 }, { "epoch": 0.22208770004918468, "grad_norm": 2.3455803394317627, "learning_rate": 1.6581669767281037e-05, "loss": 0.8606, "step": 2935 }, { "epoch": 0.2221633687715183, "grad_norm": 3.289113998413086, "learning_rate": 1.6580455943894866e-05, "loss": 0.5393, "step": 2936 }, { "epoch": 0.2222390374938519, "grad_norm": 1.9734731912612915, "learning_rate": 1.6579241645796026e-05, "loss": 0.8515, "step": 2937 }, { "epoch": 0.22231470621618554, "grad_norm": 2.288149118423462, "learning_rate": 1.6578026873060556e-05, "loss": 0.7706, "step": 2938 }, { "epoch": 0.22239037493851915, "grad_norm": 2.2239696979522705, "learning_rate": 1.6576811625764537e-05, "loss": 0.882, "step": 2939 }, { "epoch": 0.2224660436608528, "grad_norm": 2.2956271171569824, "learning_rate": 1.6575595903984065e-05, "loss": 0.704, "step": 2940 }, { "epoch": 0.2225417123831864, "grad_norm": 2.0809905529022217, "learning_rate": 1.6574379707795277e-05, "loss": 1.0292, "step": 2941 }, { "epoch": 0.22261738110552004, "grad_norm": 2.3830835819244385, "learning_rate": 1.6573163037274333e-05, "loss": 0.6888, "step": 2942 }, { "epoch": 0.22269304982785365, "grad_norm": 2.550462007522583, "learning_rate": 1.6571945892497423e-05, "loss": 0.7566, "step": 2943 }, { "epoch": 0.2227687185501873, "grad_norm": 2.1050031185150146, "learning_rate": 1.6570728273540773e-05, "loss": 0.7667, "step": 2944 }, { "epoch": 0.2228443872725209, "grad_norm": 3.8642396926879883, "learning_rate": 1.6569510180480632e-05, "loss": 0.7821, "step": 2945 }, { "epoch": 0.22292005599485454, "grad_norm": 2.2704646587371826, "learning_rate": 1.656829161339328e-05, "loss": 0.8253, "step": 2946 }, { "epoch": 0.22299572471718815, "grad_norm": 4.2045111656188965, "learning_rate": 1.6567072572355026e-05, "loss": 0.7635, "step": 2947 }, { "epoch": 0.2230713934395218, "grad_norm": 2.0793330669403076, "learning_rate": 1.656585305744222e-05, "loss": 0.8678, "step": 2948 }, { "epoch": 0.2231470621618554, "grad_norm": 2.1388299465179443, "learning_rate": 1.6564633068731215e-05, "loss": 0.7312, "step": 2949 }, { "epoch": 0.223222730884189, "grad_norm": 2.120715856552124, "learning_rate": 1.6563412606298426e-05, "loss": 0.7689, "step": 2950 }, { "epoch": 0.22329839960652265, "grad_norm": 2.12400484085083, "learning_rate": 1.6562191670220272e-05, "loss": 0.6913, "step": 2951 }, { "epoch": 0.22337406832885626, "grad_norm": 2.987435817718506, "learning_rate": 1.656097026057322e-05, "loss": 0.7776, "step": 2952 }, { "epoch": 0.2234497370511899, "grad_norm": 2.098076105117798, "learning_rate": 1.6559748377433756e-05, "loss": 0.66, "step": 2953 }, { "epoch": 0.2235254057735235, "grad_norm": 2.531951904296875, "learning_rate": 1.6558526020878395e-05, "loss": 0.8598, "step": 2954 }, { "epoch": 0.22360107449585714, "grad_norm": 2.2897439002990723, "learning_rate": 1.655730319098369e-05, "loss": 0.8132, "step": 2955 }, { "epoch": 0.22367674321819075, "grad_norm": 2.279578924179077, "learning_rate": 1.6556079887826215e-05, "loss": 0.7632, "step": 2956 }, { "epoch": 0.2237524119405244, "grad_norm": 2.306779623031616, "learning_rate": 1.6554856111482576e-05, "loss": 0.8243, "step": 2957 }, { "epoch": 0.223828080662858, "grad_norm": 2.461686849594116, "learning_rate": 1.6553631862029413e-05, "loss": 0.7213, "step": 2958 }, { "epoch": 0.22390374938519164, "grad_norm": 2.0661559104919434, "learning_rate": 1.6552407139543393e-05, "loss": 0.8158, "step": 2959 }, { "epoch": 0.22397941810752525, "grad_norm": 2.2545764446258545, "learning_rate": 1.655118194410121e-05, "loss": 0.7361, "step": 2960 }, { "epoch": 0.2240550868298589, "grad_norm": 2.0152387619018555, "learning_rate": 1.6549956275779588e-05, "loss": 0.7121, "step": 2961 }, { "epoch": 0.2241307555521925, "grad_norm": 2.965620756149292, "learning_rate": 1.6548730134655286e-05, "loss": 0.6965, "step": 2962 }, { "epoch": 0.2242064242745261, "grad_norm": 2.6358821392059326, "learning_rate": 1.6547503520805087e-05, "loss": 0.7382, "step": 2963 }, { "epoch": 0.22428209299685975, "grad_norm": 3.588127613067627, "learning_rate": 1.6546276434305805e-05, "loss": 0.868, "step": 2964 }, { "epoch": 0.22435776171919336, "grad_norm": 2.1781110763549805, "learning_rate": 1.654504887523429e-05, "loss": 0.829, "step": 2965 }, { "epoch": 0.224433430441527, "grad_norm": 2.047546863555908, "learning_rate": 1.6543820843667405e-05, "loss": 0.6987, "step": 2966 }, { "epoch": 0.2245090991638606, "grad_norm": 2.5507969856262207, "learning_rate": 1.654259233968206e-05, "loss": 0.6387, "step": 2967 }, { "epoch": 0.22458476788619425, "grad_norm": 2.1714725494384766, "learning_rate": 1.654136336335519e-05, "loss": 0.8598, "step": 2968 }, { "epoch": 0.22466043660852786, "grad_norm": 2.6356706619262695, "learning_rate": 1.654013391476375e-05, "loss": 0.8211, "step": 2969 }, { "epoch": 0.2247361053308615, "grad_norm": 2.035926342010498, "learning_rate": 1.653890399398474e-05, "loss": 0.6791, "step": 2970 }, { "epoch": 0.2248117740531951, "grad_norm": 2.380887508392334, "learning_rate": 1.6537673601095178e-05, "loss": 0.8578, "step": 2971 }, { "epoch": 0.22488744277552875, "grad_norm": 2.41847562789917, "learning_rate": 1.6536442736172114e-05, "loss": 0.8885, "step": 2972 }, { "epoch": 0.22496311149786236, "grad_norm": 2.5144875049591064, "learning_rate": 1.653521139929263e-05, "loss": 0.8791, "step": 2973 }, { "epoch": 0.225038780220196, "grad_norm": 2.021480083465576, "learning_rate": 1.6533979590533838e-05, "loss": 0.8099, "step": 2974 }, { "epoch": 0.2251144489425296, "grad_norm": 2.1968541145324707, "learning_rate": 1.6532747309972876e-05, "loss": 0.762, "step": 2975 }, { "epoch": 0.22519011766486322, "grad_norm": 1.8280404806137085, "learning_rate": 1.6531514557686913e-05, "loss": 0.7402, "step": 2976 }, { "epoch": 0.22526578638719685, "grad_norm": 2.3831984996795654, "learning_rate": 1.6530281333753148e-05, "loss": 0.7373, "step": 2977 }, { "epoch": 0.22534145510953046, "grad_norm": 2.2614855766296387, "learning_rate": 1.6529047638248808e-05, "loss": 0.7542, "step": 2978 }, { "epoch": 0.2254171238318641, "grad_norm": 2.100390911102295, "learning_rate": 1.6527813471251158e-05, "loss": 0.7678, "step": 2979 }, { "epoch": 0.2254927925541977, "grad_norm": 2.33455491065979, "learning_rate": 1.6526578832837476e-05, "loss": 0.8819, "step": 2980 }, { "epoch": 0.22556846127653135, "grad_norm": 1.782531976699829, "learning_rate": 1.6525343723085085e-05, "loss": 0.6764, "step": 2981 }, { "epoch": 0.22564412999886496, "grad_norm": 2.2353334426879883, "learning_rate": 1.652410814207133e-05, "loss": 0.7287, "step": 2982 }, { "epoch": 0.2257197987211986, "grad_norm": 3.260190486907959, "learning_rate": 1.652287208987359e-05, "loss": 0.8681, "step": 2983 }, { "epoch": 0.2257954674435322, "grad_norm": 2.1882760524749756, "learning_rate": 1.6521635566569266e-05, "loss": 0.8207, "step": 2984 }, { "epoch": 0.22587113616586585, "grad_norm": 2.4284403324127197, "learning_rate": 1.6520398572235794e-05, "loss": 0.8992, "step": 2985 }, { "epoch": 0.22594680488819946, "grad_norm": 2.3258399963378906, "learning_rate": 1.6519161106950638e-05, "loss": 0.9157, "step": 2986 }, { "epoch": 0.2260224736105331, "grad_norm": 2.984490156173706, "learning_rate": 1.6517923170791298e-05, "loss": 0.8868, "step": 2987 }, { "epoch": 0.2260981423328667, "grad_norm": 2.800849437713623, "learning_rate": 1.651668476383529e-05, "loss": 0.7888, "step": 2988 }, { "epoch": 0.22617381105520035, "grad_norm": 2.9878838062286377, "learning_rate": 1.651544588616017e-05, "loss": 0.7345, "step": 2989 }, { "epoch": 0.22624947977753396, "grad_norm": 3.69124174118042, "learning_rate": 1.651420653784352e-05, "loss": 0.744, "step": 2990 }, { "epoch": 0.22632514849986757, "grad_norm": 2.3240010738372803, "learning_rate": 1.6512966718962958e-05, "loss": 0.7195, "step": 2991 }, { "epoch": 0.2264008172222012, "grad_norm": 2.4493560791015625, "learning_rate": 1.6511726429596115e-05, "loss": 0.8408, "step": 2992 }, { "epoch": 0.22647648594453482, "grad_norm": 2.2705140113830566, "learning_rate": 1.6510485669820668e-05, "loss": 0.601, "step": 2993 }, { "epoch": 0.22655215466686845, "grad_norm": 2.269789695739746, "learning_rate": 1.6509244439714317e-05, "loss": 0.8425, "step": 2994 }, { "epoch": 0.22662782338920207, "grad_norm": 2.8810999393463135, "learning_rate": 1.6508002739354793e-05, "loss": 0.7285, "step": 2995 }, { "epoch": 0.2267034921115357, "grad_norm": 2.548349142074585, "learning_rate": 1.650676056881985e-05, "loss": 0.7676, "step": 2996 }, { "epoch": 0.22677916083386931, "grad_norm": 3.094545841217041, "learning_rate": 1.6505517928187282e-05, "loss": 0.9013, "step": 2997 }, { "epoch": 0.22685482955620295, "grad_norm": 2.7249162197113037, "learning_rate": 1.6504274817534906e-05, "loss": 0.6787, "step": 2998 }, { "epoch": 0.22693049827853656, "grad_norm": 2.3722381591796875, "learning_rate": 1.650303123694057e-05, "loss": 0.6625, "step": 2999 }, { "epoch": 0.2270061670008702, "grad_norm": 2.4005649089813232, "learning_rate": 1.650178718648215e-05, "loss": 0.7726, "step": 3000 }, { "epoch": 0.2270818357232038, "grad_norm": 2.658586263656616, "learning_rate": 1.6500542666237553e-05, "loss": 0.7898, "step": 3001 }, { "epoch": 0.22715750444553745, "grad_norm": 2.125000238418579, "learning_rate": 1.649929767628471e-05, "loss": 0.7455, "step": 3002 }, { "epoch": 0.22723317316787106, "grad_norm": 2.5988640785217285, "learning_rate": 1.6498052216701595e-05, "loss": 0.7898, "step": 3003 }, { "epoch": 0.22730884189020467, "grad_norm": 2.0731637477874756, "learning_rate": 1.64968062875662e-05, "loss": 0.6588, "step": 3004 }, { "epoch": 0.2273845106125383, "grad_norm": 2.0830650329589844, "learning_rate": 1.6495559888956544e-05, "loss": 0.7186, "step": 3005 }, { "epoch": 0.22746017933487192, "grad_norm": 4.79794979095459, "learning_rate": 1.6494313020950687e-05, "loss": 0.7021, "step": 3006 }, { "epoch": 0.22753584805720556, "grad_norm": 3.0674867630004883, "learning_rate": 1.6493065683626706e-05, "loss": 0.639, "step": 3007 }, { "epoch": 0.22761151677953917, "grad_norm": 2.5474140644073486, "learning_rate": 1.6491817877062718e-05, "loss": 0.7456, "step": 3008 }, { "epoch": 0.2276871855018728, "grad_norm": 3.6565370559692383, "learning_rate": 1.6490569601336864e-05, "loss": 0.7771, "step": 3009 }, { "epoch": 0.22776285422420642, "grad_norm": 2.16518497467041, "learning_rate": 1.6489320856527312e-05, "loss": 0.7738, "step": 3010 }, { "epoch": 0.22783852294654006, "grad_norm": 2.3201420307159424, "learning_rate": 1.648807164271227e-05, "loss": 0.7261, "step": 3011 }, { "epoch": 0.22791419166887367, "grad_norm": 2.659714698791504, "learning_rate": 1.6486821959969954e-05, "loss": 0.7438, "step": 3012 }, { "epoch": 0.2279898603912073, "grad_norm": 2.2226853370666504, "learning_rate": 1.6485571808378637e-05, "loss": 0.6663, "step": 3013 }, { "epoch": 0.22806552911354092, "grad_norm": 2.8635661602020264, "learning_rate": 1.64843211880166e-05, "loss": 0.6857, "step": 3014 }, { "epoch": 0.22814119783587455, "grad_norm": 2.4834866523742676, "learning_rate": 1.6483070098962165e-05, "loss": 0.7628, "step": 3015 }, { "epoch": 0.22821686655820816, "grad_norm": 2.6139743328094482, "learning_rate": 1.6481818541293675e-05, "loss": 0.8111, "step": 3016 }, { "epoch": 0.22829253528054178, "grad_norm": 2.68338942527771, "learning_rate": 1.648056651508951e-05, "loss": 0.6424, "step": 3017 }, { "epoch": 0.2283682040028754, "grad_norm": 2.5253429412841797, "learning_rate": 1.6479314020428078e-05, "loss": 0.8043, "step": 3018 }, { "epoch": 0.22844387272520902, "grad_norm": 3.081286907196045, "learning_rate": 1.6478061057387804e-05, "loss": 0.785, "step": 3019 }, { "epoch": 0.22851954144754266, "grad_norm": 2.6442153453826904, "learning_rate": 1.6476807626047164e-05, "loss": 0.6959, "step": 3020 }, { "epoch": 0.22859521016987627, "grad_norm": 2.3536460399627686, "learning_rate": 1.6475553726484645e-05, "loss": 0.8099, "step": 3021 }, { "epoch": 0.2286708788922099, "grad_norm": 2.2801482677459717, "learning_rate": 1.647429935877878e-05, "loss": 0.8345, "step": 3022 }, { "epoch": 0.22874654761454352, "grad_norm": 2.2949955463409424, "learning_rate": 1.6473044523008106e-05, "loss": 0.9149, "step": 3023 }, { "epoch": 0.22882221633687716, "grad_norm": 2.6356618404388428, "learning_rate": 1.6471789219251216e-05, "loss": 0.7342, "step": 3024 }, { "epoch": 0.22889788505921077, "grad_norm": 2.3132026195526123, "learning_rate": 1.647053344758672e-05, "loss": 0.6673, "step": 3025 }, { "epoch": 0.2289735537815444, "grad_norm": 2.86631178855896, "learning_rate": 1.6469277208093256e-05, "loss": 0.8654, "step": 3026 }, { "epoch": 0.22904922250387802, "grad_norm": 2.4042892456054688, "learning_rate": 1.646802050084949e-05, "loss": 0.7001, "step": 3027 }, { "epoch": 0.22912489122621166, "grad_norm": 2.6637120246887207, "learning_rate": 1.6466763325934133e-05, "loss": 0.7731, "step": 3028 }, { "epoch": 0.22920055994854527, "grad_norm": 3.4634320735931396, "learning_rate": 1.64655056834259e-05, "loss": 0.9874, "step": 3029 }, { "epoch": 0.22927622867087888, "grad_norm": 2.559786558151245, "learning_rate": 1.646424757340356e-05, "loss": 0.788, "step": 3030 }, { "epoch": 0.22935189739321252, "grad_norm": 2.1549887657165527, "learning_rate": 1.646298899594589e-05, "loss": 0.7187, "step": 3031 }, { "epoch": 0.22942756611554613, "grad_norm": 2.1260504722595215, "learning_rate": 1.6461729951131712e-05, "loss": 0.7892, "step": 3032 }, { "epoch": 0.22950323483787977, "grad_norm": 2.535245656967163, "learning_rate": 1.6460470439039874e-05, "loss": 0.7686, "step": 3033 }, { "epoch": 0.22957890356021338, "grad_norm": 2.4702205657958984, "learning_rate": 1.6459210459749244e-05, "loss": 0.9208, "step": 3034 }, { "epoch": 0.22965457228254701, "grad_norm": 2.4642333984375, "learning_rate": 1.645795001333873e-05, "loss": 0.745, "step": 3035 }, { "epoch": 0.22973024100488063, "grad_norm": 2.5127851963043213, "learning_rate": 1.6456689099887263e-05, "loss": 0.9197, "step": 3036 }, { "epoch": 0.22980590972721426, "grad_norm": 2.6181676387786865, "learning_rate": 1.6455427719473806e-05, "loss": 0.6855, "step": 3037 }, { "epoch": 0.22988157844954787, "grad_norm": 2.3293540477752686, "learning_rate": 1.6454165872177354e-05, "loss": 0.6547, "step": 3038 }, { "epoch": 0.2299572471718815, "grad_norm": 2.4066195487976074, "learning_rate": 1.6452903558076925e-05, "loss": 0.8718, "step": 3039 }, { "epoch": 0.23003291589421512, "grad_norm": 2.3516995906829834, "learning_rate": 1.6451640777251567e-05, "loss": 0.7497, "step": 3040 }, { "epoch": 0.23010858461654876, "grad_norm": 2.0895943641662598, "learning_rate": 1.6450377529780363e-05, "loss": 0.768, "step": 3041 }, { "epoch": 0.23018425333888237, "grad_norm": 2.606595039367676, "learning_rate": 1.6449113815742422e-05, "loss": 0.7996, "step": 3042 }, { "epoch": 0.23025992206121598, "grad_norm": 2.8909833431243896, "learning_rate": 1.644784963521688e-05, "loss": 0.8167, "step": 3043 }, { "epoch": 0.23033559078354962, "grad_norm": 2.0377085208892822, "learning_rate": 1.6446584988282907e-05, "loss": 0.7504, "step": 3044 }, { "epoch": 0.23041125950588323, "grad_norm": 2.66420841217041, "learning_rate": 1.6445319875019694e-05, "loss": 0.6552, "step": 3045 }, { "epoch": 0.23048692822821687, "grad_norm": 2.240417718887329, "learning_rate": 1.644405429550647e-05, "loss": 0.7903, "step": 3046 }, { "epoch": 0.23056259695055048, "grad_norm": 2.7434213161468506, "learning_rate": 1.6442788249822486e-05, "loss": 0.8382, "step": 3047 }, { "epoch": 0.23063826567288412, "grad_norm": 2.1214497089385986, "learning_rate": 1.6441521738047033e-05, "loss": 0.8043, "step": 3048 }, { "epoch": 0.23071393439521773, "grad_norm": 2.15208101272583, "learning_rate": 1.6440254760259416e-05, "loss": 0.9723, "step": 3049 }, { "epoch": 0.23078960311755137, "grad_norm": 3.5610761642456055, "learning_rate": 1.6438987316538985e-05, "loss": 0.8386, "step": 3050 }, { "epoch": 0.23086527183988498, "grad_norm": 2.2690629959106445, "learning_rate": 1.643771940696511e-05, "loss": 0.7666, "step": 3051 }, { "epoch": 0.23094094056221862, "grad_norm": 2.7488620281219482, "learning_rate": 1.6436451031617182e-05, "loss": 0.738, "step": 3052 }, { "epoch": 0.23101660928455223, "grad_norm": 2.229719877243042, "learning_rate": 1.6435182190574643e-05, "loss": 0.8551, "step": 3053 }, { "epoch": 0.23109227800688587, "grad_norm": 2.4451205730438232, "learning_rate": 1.6433912883916944e-05, "loss": 0.688, "step": 3054 }, { "epoch": 0.23116794672921948, "grad_norm": 2.5764944553375244, "learning_rate": 1.6432643111723578e-05, "loss": 0.7756, "step": 3055 }, { "epoch": 0.23124361545155311, "grad_norm": 2.299663782119751, "learning_rate": 1.6431372874074057e-05, "loss": 0.7981, "step": 3056 }, { "epoch": 0.23131928417388672, "grad_norm": 2.5289340019226074, "learning_rate": 1.6430102171047935e-05, "loss": 0.6726, "step": 3057 }, { "epoch": 0.23139495289622034, "grad_norm": 3.0668952465057373, "learning_rate": 1.6428831002724782e-05, "loss": 0.8785, "step": 3058 }, { "epoch": 0.23147062161855397, "grad_norm": 2.457798480987549, "learning_rate": 1.6427559369184202e-05, "loss": 0.7681, "step": 3059 }, { "epoch": 0.23154629034088758, "grad_norm": 2.192192316055298, "learning_rate": 1.6426287270505837e-05, "loss": 0.7763, "step": 3060 }, { "epoch": 0.23162195906322122, "grad_norm": 2.4625298976898193, "learning_rate": 1.6425014706769337e-05, "loss": 0.6961, "step": 3061 }, { "epoch": 0.23169762778555483, "grad_norm": 2.2496135234832764, "learning_rate": 1.64237416780544e-05, "loss": 0.8604, "step": 3062 }, { "epoch": 0.23177329650788847, "grad_norm": 2.194805860519409, "learning_rate": 1.642246818444075e-05, "loss": 0.7537, "step": 3063 }, { "epoch": 0.23184896523022208, "grad_norm": 2.501255512237549, "learning_rate": 1.6421194226008138e-05, "loss": 0.8666, "step": 3064 }, { "epoch": 0.23192463395255572, "grad_norm": 2.5918145179748535, "learning_rate": 1.6419919802836337e-05, "loss": 0.6322, "step": 3065 }, { "epoch": 0.23200030267488933, "grad_norm": 2.162069082260132, "learning_rate": 1.641864491500516e-05, "loss": 0.7903, "step": 3066 }, { "epoch": 0.23207597139722297, "grad_norm": 2.95308518409729, "learning_rate": 1.6417369562594444e-05, "loss": 0.8792, "step": 3067 }, { "epoch": 0.23215164011955658, "grad_norm": 2.692232131958008, "learning_rate": 1.6416093745684054e-05, "loss": 0.7282, "step": 3068 }, { "epoch": 0.23222730884189022, "grad_norm": 2.6581435203552246, "learning_rate": 1.6414817464353888e-05, "loss": 0.8014, "step": 3069 }, { "epoch": 0.23230297756422383, "grad_norm": 2.171360969543457, "learning_rate": 1.6413540718683872e-05, "loss": 0.7544, "step": 3070 }, { "epoch": 0.23237864628655744, "grad_norm": 2.7274041175842285, "learning_rate": 1.6412263508753952e-05, "loss": 0.6874, "step": 3071 }, { "epoch": 0.23245431500889108, "grad_norm": 2.1489665508270264, "learning_rate": 1.6410985834644123e-05, "loss": 0.7909, "step": 3072 }, { "epoch": 0.2325299837312247, "grad_norm": 1.950114369392395, "learning_rate": 1.6409707696434388e-05, "loss": 0.7571, "step": 3073 }, { "epoch": 0.23260565245355833, "grad_norm": 4.101839542388916, "learning_rate": 1.640842909420479e-05, "loss": 0.7565, "step": 3074 }, { "epoch": 0.23268132117589194, "grad_norm": 2.2927932739257812, "learning_rate": 1.6407150028035402e-05, "loss": 0.6963, "step": 3075 }, { "epoch": 0.23275698989822557, "grad_norm": 2.3485589027404785, "learning_rate": 1.6405870498006326e-05, "loss": 0.6547, "step": 3076 }, { "epoch": 0.23283265862055919, "grad_norm": 2.404630422592163, "learning_rate": 1.640459050419768e-05, "loss": 0.8066, "step": 3077 }, { "epoch": 0.23290832734289282, "grad_norm": 2.318694591522217, "learning_rate": 1.640331004668963e-05, "loss": 0.8715, "step": 3078 }, { "epoch": 0.23298399606522643, "grad_norm": 2.4930219650268555, "learning_rate": 1.6402029125562357e-05, "loss": 0.7058, "step": 3079 }, { "epoch": 0.23305966478756007, "grad_norm": 3.169928789138794, "learning_rate": 1.640074774089608e-05, "loss": 0.758, "step": 3080 }, { "epoch": 0.23313533350989368, "grad_norm": 2.3577566146850586, "learning_rate": 1.6399465892771045e-05, "loss": 0.8166, "step": 3081 }, { "epoch": 0.23321100223222732, "grad_norm": 2.1574015617370605, "learning_rate": 1.6398183581267522e-05, "loss": 0.8251, "step": 3082 }, { "epoch": 0.23328667095456093, "grad_norm": 2.141531229019165, "learning_rate": 1.639690080646581e-05, "loss": 0.785, "step": 3083 }, { "epoch": 0.23336233967689454, "grad_norm": 2.964078426361084, "learning_rate": 1.639561756844625e-05, "loss": 0.6618, "step": 3084 }, { "epoch": 0.23343800839922818, "grad_norm": 2.588050603866577, "learning_rate": 1.6394333867289198e-05, "loss": 0.7254, "step": 3085 }, { "epoch": 0.2335136771215618, "grad_norm": 3.1595702171325684, "learning_rate": 1.639304970307504e-05, "loss": 0.759, "step": 3086 }, { "epoch": 0.23358934584389543, "grad_norm": 2.7546892166137695, "learning_rate": 1.63917650758842e-05, "loss": 0.8624, "step": 3087 }, { "epoch": 0.23366501456622904, "grad_norm": 2.3731441497802734, "learning_rate": 1.639047998579712e-05, "loss": 0.7255, "step": 3088 }, { "epoch": 0.23374068328856268, "grad_norm": 2.9596669673919678, "learning_rate": 1.6389194432894283e-05, "loss": 0.8804, "step": 3089 }, { "epoch": 0.2338163520108963, "grad_norm": 2.525820732116699, "learning_rate": 1.638790841725619e-05, "loss": 0.8342, "step": 3090 }, { "epoch": 0.23389202073322993, "grad_norm": 2.2738192081451416, "learning_rate": 1.6386621938963375e-05, "loss": 0.7513, "step": 3091 }, { "epoch": 0.23396768945556354, "grad_norm": 1.9771041870117188, "learning_rate": 1.6385334998096405e-05, "loss": 0.8262, "step": 3092 }, { "epoch": 0.23404335817789718, "grad_norm": 2.0869479179382324, "learning_rate": 1.638404759473587e-05, "loss": 0.7338, "step": 3093 }, { "epoch": 0.2341190269002308, "grad_norm": 2.7630059719085693, "learning_rate": 1.6382759728962392e-05, "loss": 0.9286, "step": 3094 }, { "epoch": 0.23419469562256442, "grad_norm": 2.188873291015625, "learning_rate": 1.638147140085662e-05, "loss": 0.7531, "step": 3095 }, { "epoch": 0.23427036434489804, "grad_norm": 1.8856064081192017, "learning_rate": 1.6380182610499234e-05, "loss": 0.7139, "step": 3096 }, { "epoch": 0.23434603306723165, "grad_norm": 2.62967848777771, "learning_rate": 1.637889335797094e-05, "loss": 0.8767, "step": 3097 }, { "epoch": 0.23442170178956528, "grad_norm": 10.917149543762207, "learning_rate": 1.6377603643352483e-05, "loss": 0.8107, "step": 3098 }, { "epoch": 0.2344973705118989, "grad_norm": 2.7370142936706543, "learning_rate": 1.6376313466724624e-05, "loss": 0.7963, "step": 3099 }, { "epoch": 0.23457303923423253, "grad_norm": 1.870970606803894, "learning_rate": 1.6375022828168153e-05, "loss": 0.6093, "step": 3100 }, { "epoch": 0.23464870795656614, "grad_norm": 2.654796600341797, "learning_rate": 1.6373731727763902e-05, "loss": 0.766, "step": 3101 }, { "epoch": 0.23472437667889978, "grad_norm": 2.636352062225342, "learning_rate": 1.6372440165592717e-05, "loss": 0.7713, "step": 3102 }, { "epoch": 0.2348000454012334, "grad_norm": 2.9713962078094482, "learning_rate": 1.6371148141735488e-05, "loss": 0.7387, "step": 3103 }, { "epoch": 0.23487571412356703, "grad_norm": 2.823192596435547, "learning_rate": 1.636985565627312e-05, "loss": 0.7023, "step": 3104 }, { "epoch": 0.23495138284590064, "grad_norm": 2.4954514503479004, "learning_rate": 1.6368562709286553e-05, "loss": 0.7699, "step": 3105 }, { "epoch": 0.23502705156823428, "grad_norm": 2.6096956729888916, "learning_rate": 1.6367269300856755e-05, "loss": 0.6207, "step": 3106 }, { "epoch": 0.2351027202905679, "grad_norm": 2.9008142948150635, "learning_rate": 1.636597543106473e-05, "loss": 0.8394, "step": 3107 }, { "epoch": 0.23517838901290153, "grad_norm": 1.9445158243179321, "learning_rate": 1.636468109999149e-05, "loss": 0.6827, "step": 3108 }, { "epoch": 0.23525405773523514, "grad_norm": 2.0866832733154297, "learning_rate": 1.6363386307718106e-05, "loss": 0.8256, "step": 3109 }, { "epoch": 0.23532972645756878, "grad_norm": 2.2782726287841797, "learning_rate": 1.6362091054325657e-05, "loss": 0.6447, "step": 3110 }, { "epoch": 0.2354053951799024, "grad_norm": 2.6688876152038574, "learning_rate": 1.636079533989525e-05, "loss": 0.7119, "step": 3111 }, { "epoch": 0.235481063902236, "grad_norm": 2.297727346420288, "learning_rate": 1.6359499164508034e-05, "loss": 0.9497, "step": 3112 }, { "epoch": 0.23555673262456964, "grad_norm": 2.075216054916382, "learning_rate": 1.6358202528245173e-05, "loss": 0.8736, "step": 3113 }, { "epoch": 0.23563240134690325, "grad_norm": 2.638568878173828, "learning_rate": 1.6356905431187874e-05, "loss": 0.7503, "step": 3114 }, { "epoch": 0.23570807006923689, "grad_norm": 3.3016517162323, "learning_rate": 1.635560787341736e-05, "loss": 0.6998, "step": 3115 }, { "epoch": 0.2357837387915705, "grad_norm": 2.7137296199798584, "learning_rate": 1.635430985501489e-05, "loss": 0.8485, "step": 3116 }, { "epoch": 0.23585940751390413, "grad_norm": 2.4911835193634033, "learning_rate": 1.6353011376061752e-05, "loss": 0.7518, "step": 3117 }, { "epoch": 0.23593507623623775, "grad_norm": 2.5743257999420166, "learning_rate": 1.6351712436639254e-05, "loss": 0.8575, "step": 3118 }, { "epoch": 0.23601074495857138, "grad_norm": 1.9882807731628418, "learning_rate": 1.635041303682875e-05, "loss": 0.7866, "step": 3119 }, { "epoch": 0.236086413680905, "grad_norm": 2.3012471199035645, "learning_rate": 1.6349113176711606e-05, "loss": 0.7392, "step": 3120 }, { "epoch": 0.23616208240323863, "grad_norm": 2.3356645107269287, "learning_rate": 1.6347812856369225e-05, "loss": 0.7093, "step": 3121 }, { "epoch": 0.23623775112557224, "grad_norm": 1.8433892726898193, "learning_rate": 1.6346512075883035e-05, "loss": 0.7198, "step": 3122 }, { "epoch": 0.23631341984790588, "grad_norm": 2.4490842819213867, "learning_rate": 1.6345210835334502e-05, "loss": 0.8044, "step": 3123 }, { "epoch": 0.2363890885702395, "grad_norm": 2.7389450073242188, "learning_rate": 1.6343909134805106e-05, "loss": 0.7449, "step": 3124 }, { "epoch": 0.2364647572925731, "grad_norm": 2.162238836288452, "learning_rate": 1.6342606974376367e-05, "loss": 0.6568, "step": 3125 }, { "epoch": 0.23654042601490674, "grad_norm": 2.0842134952545166, "learning_rate": 1.634130435412983e-05, "loss": 0.5613, "step": 3126 }, { "epoch": 0.23661609473724035, "grad_norm": 2.217766284942627, "learning_rate": 1.6340001274147074e-05, "loss": 0.7913, "step": 3127 }, { "epoch": 0.236691763459574, "grad_norm": 2.228721857070923, "learning_rate": 1.6338697734509694e-05, "loss": 0.8567, "step": 3128 }, { "epoch": 0.2367674321819076, "grad_norm": 2.4052469730377197, "learning_rate": 1.6337393735299325e-05, "loss": 0.7, "step": 3129 }, { "epoch": 0.23684310090424124, "grad_norm": 2.7400004863739014, "learning_rate": 1.633608927659763e-05, "loss": 0.7754, "step": 3130 }, { "epoch": 0.23691876962657485, "grad_norm": 2.2973527908325195, "learning_rate": 1.6334784358486296e-05, "loss": 0.8086, "step": 3131 }, { "epoch": 0.2369944383489085, "grad_norm": 2.4160079956054688, "learning_rate": 1.6333478981047043e-05, "loss": 0.7429, "step": 3132 }, { "epoch": 0.2370701070712421, "grad_norm": 2.779785394668579, "learning_rate": 1.6332173144361613e-05, "loss": 0.6959, "step": 3133 }, { "epoch": 0.23714577579357574, "grad_norm": 2.1350619792938232, "learning_rate": 1.633086684851179e-05, "loss": 0.8856, "step": 3134 }, { "epoch": 0.23722144451590935, "grad_norm": 3.6366331577301025, "learning_rate": 1.632956009357937e-05, "loss": 0.6962, "step": 3135 }, { "epoch": 0.23729711323824298, "grad_norm": 2.5807621479034424, "learning_rate": 1.6328252879646195e-05, "loss": 0.7641, "step": 3136 }, { "epoch": 0.2373727819605766, "grad_norm": 2.8703842163085938, "learning_rate": 1.632694520679412e-05, "loss": 0.793, "step": 3137 }, { "epoch": 0.2374484506829102, "grad_norm": 2.0771567821502686, "learning_rate": 1.632563707510504e-05, "loss": 0.7813, "step": 3138 }, { "epoch": 0.23752411940524384, "grad_norm": 2.532623529434204, "learning_rate": 1.6324328484660867e-05, "loss": 0.722, "step": 3139 }, { "epoch": 0.23759978812757745, "grad_norm": 1.9321879148483276, "learning_rate": 1.632301943554356e-05, "loss": 0.7015, "step": 3140 }, { "epoch": 0.2376754568499111, "grad_norm": 2.117048978805542, "learning_rate": 1.6321709927835087e-05, "loss": 0.7204, "step": 3141 }, { "epoch": 0.2377511255722447, "grad_norm": 2.478074550628662, "learning_rate": 1.6320399961617458e-05, "loss": 0.7196, "step": 3142 }, { "epoch": 0.23782679429457834, "grad_norm": 2.4688057899475098, "learning_rate": 1.6319089536972706e-05, "loss": 0.7519, "step": 3143 }, { "epoch": 0.23790246301691195, "grad_norm": 2.998124599456787, "learning_rate": 1.6317778653982898e-05, "loss": 0.8621, "step": 3144 }, { "epoch": 0.2379781317392456, "grad_norm": 3.5607550144195557, "learning_rate": 1.631646731273012e-05, "loss": 0.748, "step": 3145 }, { "epoch": 0.2380538004615792, "grad_norm": 2.3379786014556885, "learning_rate": 1.631515551329649e-05, "loss": 0.5764, "step": 3146 }, { "epoch": 0.23812946918391284, "grad_norm": 2.382059097290039, "learning_rate": 1.6313843255764167e-05, "loss": 0.7674, "step": 3147 }, { "epoch": 0.23820513790624645, "grad_norm": 2.690565586090088, "learning_rate": 1.6312530540215322e-05, "loss": 0.823, "step": 3148 }, { "epoch": 0.2382808066285801, "grad_norm": 2.1526098251342773, "learning_rate": 1.631121736673216e-05, "loss": 0.8074, "step": 3149 }, { "epoch": 0.2383564753509137, "grad_norm": 2.1651923656463623, "learning_rate": 1.6309903735396925e-05, "loss": 0.7172, "step": 3150 }, { "epoch": 0.2384321440732473, "grad_norm": 2.0754897594451904, "learning_rate": 1.6308589646291873e-05, "loss": 0.7341, "step": 3151 }, { "epoch": 0.23850781279558095, "grad_norm": 2.7566630840301514, "learning_rate": 1.6307275099499297e-05, "loss": 0.813, "step": 3152 }, { "epoch": 0.23858348151791456, "grad_norm": 2.0758936405181885, "learning_rate": 1.630596009510152e-05, "loss": 0.7604, "step": 3153 }, { "epoch": 0.2386591502402482, "grad_norm": 2.4266982078552246, "learning_rate": 1.6304644633180893e-05, "loss": 0.806, "step": 3154 }, { "epoch": 0.2387348189625818, "grad_norm": 2.900785446166992, "learning_rate": 1.630332871381979e-05, "loss": 0.7293, "step": 3155 }, { "epoch": 0.23881048768491545, "grad_norm": 2.0471670627593994, "learning_rate": 1.6302012337100624e-05, "loss": 0.7067, "step": 3156 }, { "epoch": 0.23888615640724906, "grad_norm": 2.2892119884490967, "learning_rate": 1.6300695503105825e-05, "loss": 0.8938, "step": 3157 }, { "epoch": 0.2389618251295827, "grad_norm": 1.9991192817687988, "learning_rate": 1.629937821191786e-05, "loss": 0.7893, "step": 3158 }, { "epoch": 0.2390374938519163, "grad_norm": 2.1983959674835205, "learning_rate": 1.6298060463619224e-05, "loss": 0.6493, "step": 3159 }, { "epoch": 0.23911316257424994, "grad_norm": 2.451901912689209, "learning_rate": 1.629674225829244e-05, "loss": 0.6745, "step": 3160 }, { "epoch": 0.23918883129658355, "grad_norm": 2.5038340091705322, "learning_rate": 1.6295423596020052e-05, "loss": 0.8159, "step": 3161 }, { "epoch": 0.2392645000189172, "grad_norm": 2.6183676719665527, "learning_rate": 1.6294104476884643e-05, "loss": 0.747, "step": 3162 }, { "epoch": 0.2393401687412508, "grad_norm": 3.3365650177001953, "learning_rate": 1.6292784900968818e-05, "loss": 0.8315, "step": 3163 }, { "epoch": 0.23941583746358444, "grad_norm": 2.1766538619995117, "learning_rate": 1.6291464868355216e-05, "loss": 0.7855, "step": 3164 }, { "epoch": 0.23949150618591805, "grad_norm": 2.9773683547973633, "learning_rate": 1.6290144379126498e-05, "loss": 0.8054, "step": 3165 }, { "epoch": 0.23956717490825166, "grad_norm": 2.252821445465088, "learning_rate": 1.6288823433365365e-05, "loss": 0.754, "step": 3166 }, { "epoch": 0.2396428436305853, "grad_norm": 2.627490520477295, "learning_rate": 1.628750203115453e-05, "loss": 0.8792, "step": 3167 }, { "epoch": 0.2397185123529189, "grad_norm": 2.516615867614746, "learning_rate": 1.6286180172576748e-05, "loss": 0.8076, "step": 3168 }, { "epoch": 0.23979418107525255, "grad_norm": 3.4849398136138916, "learning_rate": 1.6284857857714798e-05, "loss": 0.7961, "step": 3169 }, { "epoch": 0.23986984979758616, "grad_norm": 2.3548803329467773, "learning_rate": 1.6283535086651487e-05, "loss": 0.7389, "step": 3170 }, { "epoch": 0.2399455185199198, "grad_norm": 2.4780502319335938, "learning_rate": 1.6282211859469652e-05, "loss": 0.7191, "step": 3171 }, { "epoch": 0.2400211872422534, "grad_norm": 2.856328248977661, "learning_rate": 1.6280888176252153e-05, "loss": 0.8256, "step": 3172 }, { "epoch": 0.24009685596458705, "grad_norm": 3.004737615585327, "learning_rate": 1.627956403708189e-05, "loss": 0.843, "step": 3173 }, { "epoch": 0.24017252468692066, "grad_norm": 2.3179590702056885, "learning_rate": 1.627823944204178e-05, "loss": 0.7477, "step": 3174 }, { "epoch": 0.2402481934092543, "grad_norm": 2.164140462875366, "learning_rate": 1.627691439121478e-05, "loss": 0.7412, "step": 3175 }, { "epoch": 0.2403238621315879, "grad_norm": 2.5224568843841553, "learning_rate": 1.6275588884683858e-05, "loss": 0.6791, "step": 3176 }, { "epoch": 0.24039953085392154, "grad_norm": 2.7676162719726562, "learning_rate": 1.6274262922532033e-05, "loss": 0.652, "step": 3177 }, { "epoch": 0.24047519957625516, "grad_norm": 1.807481288909912, "learning_rate": 1.6272936504842333e-05, "loss": 0.93, "step": 3178 }, { "epoch": 0.24055086829858877, "grad_norm": 2.958256483078003, "learning_rate": 1.627160963169783e-05, "loss": 0.7591, "step": 3179 }, { "epoch": 0.2406265370209224, "grad_norm": 3.4339983463287354, "learning_rate": 1.6270282303181606e-05, "loss": 0.7686, "step": 3180 }, { "epoch": 0.24070220574325601, "grad_norm": 8.310561180114746, "learning_rate": 1.6268954519376792e-05, "loss": 0.7554, "step": 3181 }, { "epoch": 0.24077787446558965, "grad_norm": 1.9216821193695068, "learning_rate": 1.6267626280366538e-05, "loss": 0.6726, "step": 3182 }, { "epoch": 0.24085354318792326, "grad_norm": 2.743393659591675, "learning_rate": 1.626629758623402e-05, "loss": 0.7734, "step": 3183 }, { "epoch": 0.2409292119102569, "grad_norm": 2.5748066902160645, "learning_rate": 1.6264968437062438e-05, "loss": 0.8017, "step": 3184 }, { "epoch": 0.2410048806325905, "grad_norm": 2.8789925575256348, "learning_rate": 1.626363883293504e-05, "loss": 0.7882, "step": 3185 }, { "epoch": 0.24108054935492415, "grad_norm": 2.611142158508301, "learning_rate": 1.6262308773935085e-05, "loss": 0.8026, "step": 3186 }, { "epoch": 0.24115621807725776, "grad_norm": 2.30765700340271, "learning_rate": 1.6260978260145867e-05, "loss": 0.8759, "step": 3187 }, { "epoch": 0.2412318867995914, "grad_norm": 2.0693159103393555, "learning_rate": 1.62596472916507e-05, "loss": 0.7008, "step": 3188 }, { "epoch": 0.241307555521925, "grad_norm": 2.8635051250457764, "learning_rate": 1.6258315868532945e-05, "loss": 0.7966, "step": 3189 }, { "epoch": 0.24138322424425865, "grad_norm": 2.766191244125366, "learning_rate": 1.625698399087597e-05, "loss": 0.6112, "step": 3190 }, { "epoch": 0.24145889296659226, "grad_norm": 2.440380811691284, "learning_rate": 1.6255651658763185e-05, "loss": 0.8754, "step": 3191 }, { "epoch": 0.24153456168892587, "grad_norm": 2.209153175354004, "learning_rate": 1.625431887227803e-05, "loss": 0.7281, "step": 3192 }, { "epoch": 0.2416102304112595, "grad_norm": 1.9143271446228027, "learning_rate": 1.625298563150396e-05, "loss": 0.8007, "step": 3193 }, { "epoch": 0.24168589913359312, "grad_norm": 2.3368210792541504, "learning_rate": 1.6251651936524473e-05, "loss": 0.8231, "step": 3194 }, { "epoch": 0.24176156785592676, "grad_norm": 2.0771644115448, "learning_rate": 1.6250317787423087e-05, "loss": 0.7381, "step": 3195 }, { "epoch": 0.24183723657826037, "grad_norm": 4.1516289710998535, "learning_rate": 1.624898318428335e-05, "loss": 0.6193, "step": 3196 }, { "epoch": 0.241912905300594, "grad_norm": 2.5363802909851074, "learning_rate": 1.6247648127188842e-05, "loss": 0.8325, "step": 3197 }, { "epoch": 0.24198857402292762, "grad_norm": 2.0077271461486816, "learning_rate": 1.6246312616223164e-05, "loss": 0.83, "step": 3198 }, { "epoch": 0.24206424274526125, "grad_norm": 2.6140248775482178, "learning_rate": 1.6244976651469952e-05, "loss": 0.8015, "step": 3199 }, { "epoch": 0.24213991146759486, "grad_norm": 2.4978835582733154, "learning_rate": 1.624364023301287e-05, "loss": 0.7106, "step": 3200 }, { "epoch": 0.2422155801899285, "grad_norm": 2.355581521987915, "learning_rate": 1.624230336093561e-05, "loss": 0.8455, "step": 3201 }, { "epoch": 0.2422912489122621, "grad_norm": 3.009420871734619, "learning_rate": 1.6240966035321887e-05, "loss": 0.6477, "step": 3202 }, { "epoch": 0.24236691763459575, "grad_norm": 3.0516929626464844, "learning_rate": 1.623962825625545e-05, "loss": 0.7656, "step": 3203 }, { "epoch": 0.24244258635692936, "grad_norm": 4.451990604400635, "learning_rate": 1.6238290023820077e-05, "loss": 0.8312, "step": 3204 }, { "epoch": 0.24251825507926297, "grad_norm": 2.4959819316864014, "learning_rate": 1.6236951338099567e-05, "loss": 0.815, "step": 3205 }, { "epoch": 0.2425939238015966, "grad_norm": 2.653895854949951, "learning_rate": 1.6235612199177765e-05, "loss": 0.6203, "step": 3206 }, { "epoch": 0.24266959252393022, "grad_norm": 4.231168746948242, "learning_rate": 1.6234272607138517e-05, "loss": 0.716, "step": 3207 }, { "epoch": 0.24274526124626386, "grad_norm": 2.9942550659179688, "learning_rate": 1.6232932562065727e-05, "loss": 0.783, "step": 3208 }, { "epoch": 0.24282092996859747, "grad_norm": 2.903759241104126, "learning_rate": 1.6231592064043298e-05, "loss": 0.7758, "step": 3209 }, { "epoch": 0.2428965986909311, "grad_norm": 2.2011756896972656, "learning_rate": 1.6230251113155188e-05, "loss": 0.9041, "step": 3210 }, { "epoch": 0.24297226741326472, "grad_norm": 2.3656907081604004, "learning_rate": 1.622890970948537e-05, "loss": 0.8788, "step": 3211 }, { "epoch": 0.24304793613559836, "grad_norm": 2.8599255084991455, "learning_rate": 1.6227567853117842e-05, "loss": 0.8441, "step": 3212 }, { "epoch": 0.24312360485793197, "grad_norm": 2.9189417362213135, "learning_rate": 1.6226225544136638e-05, "loss": 0.7811, "step": 3213 }, { "epoch": 0.2431992735802656, "grad_norm": 2.1617062091827393, "learning_rate": 1.622488278262582e-05, "loss": 0.6992, "step": 3214 }, { "epoch": 0.24327494230259922, "grad_norm": 2.509108066558838, "learning_rate": 1.6223539568669476e-05, "loss": 0.7945, "step": 3215 }, { "epoch": 0.24335061102493286, "grad_norm": 2.4163920879364014, "learning_rate": 1.6222195902351715e-05, "loss": 0.7454, "step": 3216 }, { "epoch": 0.24342627974726647, "grad_norm": 1.949989914894104, "learning_rate": 1.622085178375669e-05, "loss": 0.7069, "step": 3217 }, { "epoch": 0.2435019484696001, "grad_norm": 2.657010078430176, "learning_rate": 1.6219507212968568e-05, "loss": 0.6595, "step": 3218 }, { "epoch": 0.24357761719193372, "grad_norm": 2.2271595001220703, "learning_rate": 1.6218162190071557e-05, "loss": 0.8176, "step": 3219 }, { "epoch": 0.24365328591426733, "grad_norm": 2.0614137649536133, "learning_rate": 1.6216816715149884e-05, "loss": 0.8147, "step": 3220 }, { "epoch": 0.24372895463660096, "grad_norm": 3.104268789291382, "learning_rate": 1.6215470788287803e-05, "loss": 0.7427, "step": 3221 }, { "epoch": 0.24380462335893457, "grad_norm": 2.268542528152466, "learning_rate": 1.6214124409569605e-05, "loss": 0.78, "step": 3222 }, { "epoch": 0.2438802920812682, "grad_norm": 2.576122283935547, "learning_rate": 1.6212777579079606e-05, "loss": 0.7915, "step": 3223 }, { "epoch": 0.24395596080360182, "grad_norm": 1.9363725185394287, "learning_rate": 1.6211430296902145e-05, "loss": 0.7399, "step": 3224 }, { "epoch": 0.24403162952593546, "grad_norm": 1.9388312101364136, "learning_rate": 1.621008256312159e-05, "loss": 0.7831, "step": 3225 }, { "epoch": 0.24410729824826907, "grad_norm": 2.938326835632324, "learning_rate": 1.620873437782235e-05, "loss": 0.744, "step": 3226 }, { "epoch": 0.2441829669706027, "grad_norm": 2.0602774620056152, "learning_rate": 1.6207385741088843e-05, "loss": 0.7965, "step": 3227 }, { "epoch": 0.24425863569293632, "grad_norm": 2.867607593536377, "learning_rate": 1.620603665300553e-05, "loss": 0.7248, "step": 3228 }, { "epoch": 0.24433430441526996, "grad_norm": 2.3602488040924072, "learning_rate": 1.6204687113656895e-05, "loss": 0.6803, "step": 3229 }, { "epoch": 0.24440997313760357, "grad_norm": 2.2412686347961426, "learning_rate": 1.6203337123127456e-05, "loss": 0.7207, "step": 3230 }, { "epoch": 0.2444856418599372, "grad_norm": 2.8568761348724365, "learning_rate": 1.620198668150174e-05, "loss": 0.7997, "step": 3231 }, { "epoch": 0.24456131058227082, "grad_norm": 2.4794857501983643, "learning_rate": 1.620063578886433e-05, "loss": 0.6912, "step": 3232 }, { "epoch": 0.24463697930460443, "grad_norm": 2.476989507675171, "learning_rate": 1.6199284445299815e-05, "loss": 0.9426, "step": 3233 }, { "epoch": 0.24471264802693807, "grad_norm": 2.4754297733306885, "learning_rate": 1.619793265089282e-05, "loss": 0.8036, "step": 3234 }, { "epoch": 0.24478831674927168, "grad_norm": 2.367055654525757, "learning_rate": 1.6196580405728005e-05, "loss": 0.7739, "step": 3235 }, { "epoch": 0.24486398547160532, "grad_norm": 2.6627862453460693, "learning_rate": 1.6195227709890047e-05, "loss": 0.7749, "step": 3236 }, { "epoch": 0.24493965419393893, "grad_norm": 2.5135037899017334, "learning_rate": 1.6193874563463657e-05, "loss": 0.7816, "step": 3237 }, { "epoch": 0.24501532291627257, "grad_norm": 3.7979977130889893, "learning_rate": 1.6192520966533574e-05, "loss": 0.7276, "step": 3238 }, { "epoch": 0.24509099163860618, "grad_norm": 2.5826263427734375, "learning_rate": 1.6191166919184564e-05, "loss": 0.7003, "step": 3239 }, { "epoch": 0.24516666036093981, "grad_norm": 2.326385259628296, "learning_rate": 1.6189812421501424e-05, "loss": 0.549, "step": 3240 }, { "epoch": 0.24524232908327342, "grad_norm": 2.296499490737915, "learning_rate": 1.6188457473568974e-05, "loss": 0.8263, "step": 3241 }, { "epoch": 0.24531799780560706, "grad_norm": 2.9421603679656982, "learning_rate": 1.6187102075472067e-05, "loss": 0.8154, "step": 3242 }, { "epoch": 0.24539366652794067, "grad_norm": 2.3418660163879395, "learning_rate": 1.6185746227295585e-05, "loss": 0.8657, "step": 3243 }, { "epoch": 0.2454693352502743, "grad_norm": 2.390544891357422, "learning_rate": 1.618438992912443e-05, "loss": 0.7327, "step": 3244 }, { "epoch": 0.24554500397260792, "grad_norm": 2.8358356952667236, "learning_rate": 1.6183033181043542e-05, "loss": 0.9002, "step": 3245 }, { "epoch": 0.24562067269494153, "grad_norm": 2.2084474563598633, "learning_rate": 1.6181675983137884e-05, "loss": 0.7483, "step": 3246 }, { "epoch": 0.24569634141727517, "grad_norm": 2.7843167781829834, "learning_rate": 1.6180318335492445e-05, "loss": 0.7849, "step": 3247 }, { "epoch": 0.24577201013960878, "grad_norm": 2.2138888835906982, "learning_rate": 1.617896023819225e-05, "loss": 0.7737, "step": 3248 }, { "epoch": 0.24584767886194242, "grad_norm": 2.3047590255737305, "learning_rate": 1.6177601691322344e-05, "loss": 0.6689, "step": 3249 }, { "epoch": 0.24592334758427603, "grad_norm": 2.421382188796997, "learning_rate": 1.6176242694967803e-05, "loss": 0.697, "step": 3250 }, { "epoch": 0.24599901630660967, "grad_norm": 2.865973472595215, "learning_rate": 1.6174883249213736e-05, "loss": 0.6845, "step": 3251 }, { "epoch": 0.24607468502894328, "grad_norm": 2.016404628753662, "learning_rate": 1.6173523354145275e-05, "loss": 0.7247, "step": 3252 }, { "epoch": 0.24615035375127692, "grad_norm": 2.101076602935791, "learning_rate": 1.617216300984758e-05, "loss": 0.9097, "step": 3253 }, { "epoch": 0.24622602247361053, "grad_norm": 2.44075083732605, "learning_rate": 1.6170802216405835e-05, "loss": 0.7383, "step": 3254 }, { "epoch": 0.24630169119594417, "grad_norm": 2.7942304611206055, "learning_rate": 1.6169440973905266e-05, "loss": 0.862, "step": 3255 }, { "epoch": 0.24637735991827778, "grad_norm": 2.607255458831787, "learning_rate": 1.6168079282431113e-05, "loss": 0.8421, "step": 3256 }, { "epoch": 0.24645302864061142, "grad_norm": 3.629255771636963, "learning_rate": 1.6166717142068654e-05, "loss": 0.7941, "step": 3257 }, { "epoch": 0.24652869736294503, "grad_norm": 3.293483257293701, "learning_rate": 1.6165354552903182e-05, "loss": 0.9336, "step": 3258 }, { "epoch": 0.24660436608527864, "grad_norm": 2.2481791973114014, "learning_rate": 1.6163991515020035e-05, "loss": 0.7163, "step": 3259 }, { "epoch": 0.24668003480761228, "grad_norm": 5.270366668701172, "learning_rate": 1.616262802850457e-05, "loss": 0.8224, "step": 3260 }, { "epoch": 0.24675570352994589, "grad_norm": 2.6033430099487305, "learning_rate": 1.616126409344217e-05, "loss": 0.8148, "step": 3261 }, { "epoch": 0.24683137225227952, "grad_norm": 3.4595351219177246, "learning_rate": 1.6159899709918247e-05, "loss": 0.653, "step": 3262 }, { "epoch": 0.24690704097461313, "grad_norm": 2.154088020324707, "learning_rate": 1.615853487801825e-05, "loss": 0.8671, "step": 3263 }, { "epoch": 0.24698270969694677, "grad_norm": 2.9141697883605957, "learning_rate": 1.615716959782764e-05, "loss": 0.846, "step": 3264 }, { "epoch": 0.24705837841928038, "grad_norm": 2.8304619789123535, "learning_rate": 1.6155803869431927e-05, "loss": 0.584, "step": 3265 }, { "epoch": 0.24713404714161402, "grad_norm": 6.038189888000488, "learning_rate": 1.615443769291663e-05, "loss": 0.5921, "step": 3266 }, { "epoch": 0.24720971586394763, "grad_norm": 2.2429237365722656, "learning_rate": 1.6153071068367302e-05, "loss": 0.7524, "step": 3267 }, { "epoch": 0.24728538458628127, "grad_norm": 2.542436361312866, "learning_rate": 1.6151703995869533e-05, "loss": 0.6946, "step": 3268 }, { "epoch": 0.24736105330861488, "grad_norm": 2.85803484916687, "learning_rate": 1.6150336475508923e-05, "loss": 0.9324, "step": 3269 }, { "epoch": 0.24743672203094852, "grad_norm": 2.8291127681732178, "learning_rate": 1.614896850737112e-05, "loss": 0.933, "step": 3270 }, { "epoch": 0.24751239075328213, "grad_norm": 3.0022454261779785, "learning_rate": 1.6147600091541782e-05, "loss": 0.5986, "step": 3271 }, { "epoch": 0.24758805947561574, "grad_norm": 2.692591428756714, "learning_rate": 1.614623122810661e-05, "loss": 0.7886, "step": 3272 }, { "epoch": 0.24766372819794938, "grad_norm": 3.29293155670166, "learning_rate": 1.6144861917151322e-05, "loss": 0.8193, "step": 3273 }, { "epoch": 0.247739396920283, "grad_norm": 2.4391543865203857, "learning_rate": 1.614349215876168e-05, "loss": 0.8077, "step": 3274 }, { "epoch": 0.24781506564261663, "grad_norm": 2.349703788757324, "learning_rate": 1.6142121953023447e-05, "loss": 0.7945, "step": 3275 }, { "epoch": 0.24789073436495024, "grad_norm": 2.3136823177337646, "learning_rate": 1.6140751300022437e-05, "loss": 0.7307, "step": 3276 }, { "epoch": 0.24796640308728388, "grad_norm": 2.6560375690460205, "learning_rate": 1.6139380199844487e-05, "loss": 0.7428, "step": 3277 }, { "epoch": 0.2480420718096175, "grad_norm": 2.5470519065856934, "learning_rate": 1.6138008652575455e-05, "loss": 0.7495, "step": 3278 }, { "epoch": 0.24811774053195113, "grad_norm": 3.2811646461486816, "learning_rate": 1.6136636658301236e-05, "loss": 0.9781, "step": 3279 }, { "epoch": 0.24819340925428474, "grad_norm": 2.395357608795166, "learning_rate": 1.6135264217107744e-05, "loss": 0.6281, "step": 3280 }, { "epoch": 0.24826907797661837, "grad_norm": 2.6726183891296387, "learning_rate": 1.6133891329080933e-05, "loss": 0.7946, "step": 3281 }, { "epoch": 0.24834474669895198, "grad_norm": 2.373309373855591, "learning_rate": 1.6132517994306767e-05, "loss": 0.7204, "step": 3282 }, { "epoch": 0.24842041542128562, "grad_norm": 2.439506769180298, "learning_rate": 1.6131144212871264e-05, "loss": 0.7067, "step": 3283 }, { "epoch": 0.24849608414361923, "grad_norm": 2.1670548915863037, "learning_rate": 1.6129769984860435e-05, "loss": 0.9527, "step": 3284 }, { "epoch": 0.24857175286595287, "grad_norm": 2.238982677459717, "learning_rate": 1.6128395310360356e-05, "loss": 0.6461, "step": 3285 }, { "epoch": 0.24864742158828648, "grad_norm": 3.4354398250579834, "learning_rate": 1.6127020189457107e-05, "loss": 0.7255, "step": 3286 }, { "epoch": 0.2487230903106201, "grad_norm": 3.187068223953247, "learning_rate": 1.6125644622236797e-05, "loss": 0.9041, "step": 3287 }, { "epoch": 0.24879875903295373, "grad_norm": 2.2371320724487305, "learning_rate": 1.6124268608785578e-05, "loss": 0.7082, "step": 3288 }, { "epoch": 0.24887442775528734, "grad_norm": 1.9986299276351929, "learning_rate": 1.6122892149189616e-05, "loss": 0.6645, "step": 3289 }, { "epoch": 0.24895009647762098, "grad_norm": 2.3427042961120605, "learning_rate": 1.6121515243535107e-05, "loss": 0.7438, "step": 3290 }, { "epoch": 0.2490257651999546, "grad_norm": 11.90526294708252, "learning_rate": 1.612013789190828e-05, "loss": 0.7747, "step": 3291 }, { "epoch": 0.24910143392228823, "grad_norm": 2.641674280166626, "learning_rate": 1.611876009439539e-05, "loss": 0.8812, "step": 3292 }, { "epoch": 0.24917710264462184, "grad_norm": 2.691256046295166, "learning_rate": 1.6117381851082717e-05, "loss": 0.6981, "step": 3293 }, { "epoch": 0.24925277136695548, "grad_norm": 2.4926040172576904, "learning_rate": 1.6116003162056574e-05, "loss": 0.7059, "step": 3294 }, { "epoch": 0.2493284400892891, "grad_norm": 1.984239101409912, "learning_rate": 1.6114624027403297e-05, "loss": 0.7439, "step": 3295 }, { "epoch": 0.24940410881162273, "grad_norm": 2.4202473163604736, "learning_rate": 1.611324444720925e-05, "loss": 0.8114, "step": 3296 }, { "epoch": 0.24947977753395634, "grad_norm": 2.3358452320098877, "learning_rate": 1.611186442156083e-05, "loss": 0.7779, "step": 3297 }, { "epoch": 0.24955544625628998, "grad_norm": 2.353821277618408, "learning_rate": 1.6110483950544454e-05, "loss": 0.7116, "step": 3298 }, { "epoch": 0.24963111497862359, "grad_norm": 2.44838547706604, "learning_rate": 1.610910303424658e-05, "loss": 0.7111, "step": 3299 }, { "epoch": 0.2497067837009572, "grad_norm": 2.416057586669922, "learning_rate": 1.6107721672753678e-05, "loss": 0.7076, "step": 3300 }, { "epoch": 0.24978245242329083, "grad_norm": 2.881209373474121, "learning_rate": 1.6106339866152255e-05, "loss": 0.9059, "step": 3301 }, { "epoch": 0.24985812114562445, "grad_norm": 2.15285325050354, "learning_rate": 1.6104957614528846e-05, "loss": 0.6258, "step": 3302 }, { "epoch": 0.24993378986795808, "grad_norm": 2.7021114826202393, "learning_rate": 1.610357491797001e-05, "loss": 0.8291, "step": 3303 }, { "epoch": 0.2500094585902917, "grad_norm": 2.52158260345459, "learning_rate": 1.6102191776562335e-05, "loss": 0.7096, "step": 3304 }, { "epoch": 0.25008512731262533, "grad_norm": 2.6078898906707764, "learning_rate": 1.6100808190392446e-05, "loss": 0.7502, "step": 3305 }, { "epoch": 0.25016079603495894, "grad_norm": 2.7016923427581787, "learning_rate": 1.6099424159546976e-05, "loss": 0.6632, "step": 3306 }, { "epoch": 0.25023646475729255, "grad_norm": 2.718710422515869, "learning_rate": 1.6098039684112605e-05, "loss": 0.6973, "step": 3307 }, { "epoch": 0.2503121334796262, "grad_norm": 2.2537267208099365, "learning_rate": 1.6096654764176027e-05, "loss": 0.6324, "step": 3308 }, { "epoch": 0.25038780220195983, "grad_norm": 2.0894060134887695, "learning_rate": 1.609526939982398e-05, "loss": 0.711, "step": 3309 }, { "epoch": 0.25046347092429344, "grad_norm": 2.1615793704986572, "learning_rate": 1.6093883591143212e-05, "loss": 0.8218, "step": 3310 }, { "epoch": 0.25053913964662705, "grad_norm": 2.357072114944458, "learning_rate": 1.609249733822051e-05, "loss": 0.7687, "step": 3311 }, { "epoch": 0.2506148083689607, "grad_norm": 2.5962414741516113, "learning_rate": 1.6091110641142683e-05, "loss": 1.0468, "step": 3312 }, { "epoch": 0.25069047709129433, "grad_norm": 2.7395946979522705, "learning_rate": 1.608972349999657e-05, "loss": 0.7615, "step": 3313 }, { "epoch": 0.25076614581362794, "grad_norm": 3.2760064601898193, "learning_rate": 1.6088335914869047e-05, "loss": 0.7543, "step": 3314 }, { "epoch": 0.25084181453596155, "grad_norm": 2.215672254562378, "learning_rate": 1.6086947885846997e-05, "loss": 0.8131, "step": 3315 }, { "epoch": 0.25091748325829516, "grad_norm": 2.628455638885498, "learning_rate": 1.6085559413017353e-05, "loss": 0.8267, "step": 3316 }, { "epoch": 0.2509931519806288, "grad_norm": 2.1428425312042236, "learning_rate": 1.608417049646706e-05, "loss": 0.6553, "step": 3317 }, { "epoch": 0.25106882070296244, "grad_norm": 2.397225856781006, "learning_rate": 1.6082781136283094e-05, "loss": 0.7837, "step": 3318 }, { "epoch": 0.25114448942529605, "grad_norm": 1.8074523210525513, "learning_rate": 1.6081391332552464e-05, "loss": 0.6386, "step": 3319 }, { "epoch": 0.25122015814762966, "grad_norm": 2.304368257522583, "learning_rate": 1.608000108536221e-05, "loss": 0.7698, "step": 3320 }, { "epoch": 0.2512958268699633, "grad_norm": 2.147972583770752, "learning_rate": 1.6078610394799386e-05, "loss": 0.77, "step": 3321 }, { "epoch": 0.25137149559229693, "grad_norm": 2.355785846710205, "learning_rate": 1.6077219260951082e-05, "loss": 0.6399, "step": 3322 }, { "epoch": 0.25144716431463054, "grad_norm": 2.293780565261841, "learning_rate": 1.607582768390442e-05, "loss": 0.7807, "step": 3323 }, { "epoch": 0.25152283303696416, "grad_norm": 1.981724500656128, "learning_rate": 1.6074435663746543e-05, "loss": 0.6969, "step": 3324 }, { "epoch": 0.2515985017592978, "grad_norm": 2.2810912132263184, "learning_rate": 1.6073043200564623e-05, "loss": 0.711, "step": 3325 }, { "epoch": 0.25167417048163143, "grad_norm": 1.999266266822815, "learning_rate": 1.607165029444586e-05, "loss": 0.7184, "step": 3326 }, { "epoch": 0.25174983920396504, "grad_norm": 2.708433151245117, "learning_rate": 1.6070256945477485e-05, "loss": 0.7204, "step": 3327 }, { "epoch": 0.25182550792629865, "grad_norm": 2.3254494667053223, "learning_rate": 1.606886315374675e-05, "loss": 0.6824, "step": 3328 }, { "epoch": 0.25190117664863226, "grad_norm": 2.2623493671417236, "learning_rate": 1.606746891934094e-05, "loss": 0.8442, "step": 3329 }, { "epoch": 0.25197684537096593, "grad_norm": 2.412431240081787, "learning_rate": 1.606607424234737e-05, "loss": 0.8786, "step": 3330 }, { "epoch": 0.25205251409329954, "grad_norm": 2.111781597137451, "learning_rate": 1.6064679122853372e-05, "loss": 0.836, "step": 3331 }, { "epoch": 0.25212818281563315, "grad_norm": 2.9553704261779785, "learning_rate": 1.6063283560946322e-05, "loss": 0.8473, "step": 3332 }, { "epoch": 0.25220385153796676, "grad_norm": 2.696552276611328, "learning_rate": 1.6061887556713608e-05, "loss": 0.7043, "step": 3333 }, { "epoch": 0.2522795202603004, "grad_norm": 1.8179265260696411, "learning_rate": 1.6060491110242655e-05, "loss": 0.8593, "step": 3334 }, { "epoch": 0.25235518898263404, "grad_norm": 2.586488962173462, "learning_rate": 1.6059094221620913e-05, "loss": 0.8374, "step": 3335 }, { "epoch": 0.25243085770496765, "grad_norm": 2.9181783199310303, "learning_rate": 1.6057696890935857e-05, "loss": 0.8011, "step": 3336 }, { "epoch": 0.25250652642730126, "grad_norm": 2.254702568054199, "learning_rate": 1.6056299118274993e-05, "loss": 0.6613, "step": 3337 }, { "epoch": 0.2525821951496349, "grad_norm": 1.7335007190704346, "learning_rate": 1.6054900903725856e-05, "loss": 0.6857, "step": 3338 }, { "epoch": 0.25265786387196854, "grad_norm": 2.5397019386291504, "learning_rate": 1.605350224737601e-05, "loss": 0.911, "step": 3339 }, { "epoch": 0.25273353259430215, "grad_norm": 1.3949565887451172, "learning_rate": 1.6052103149313037e-05, "loss": 0.9984, "step": 3340 }, { "epoch": 0.25280920131663576, "grad_norm": 1.854061245918274, "learning_rate": 1.6050703609624554e-05, "loss": 0.7489, "step": 3341 }, { "epoch": 0.25288487003896937, "grad_norm": 2.37091064453125, "learning_rate": 1.604930362839821e-05, "loss": 0.7097, "step": 3342 }, { "epoch": 0.25296053876130303, "grad_norm": 2.4179093837738037, "learning_rate": 1.604790320572167e-05, "loss": 0.7679, "step": 3343 }, { "epoch": 0.25303620748363664, "grad_norm": 2.1405601501464844, "learning_rate": 1.6046502341682637e-05, "loss": 0.6784, "step": 3344 }, { "epoch": 0.25311187620597025, "grad_norm": 2.4001498222351074, "learning_rate": 1.6045101036368833e-05, "loss": 0.7994, "step": 3345 }, { "epoch": 0.25318754492830386, "grad_norm": 2.001662492752075, "learning_rate": 1.6043699289868018e-05, "loss": 0.8095, "step": 3346 }, { "epoch": 0.25326321365063753, "grad_norm": 2.297889471054077, "learning_rate": 1.6042297102267972e-05, "loss": 0.8036, "step": 3347 }, { "epoch": 0.25333888237297114, "grad_norm": 2.313671588897705, "learning_rate": 1.6040894473656502e-05, "loss": 0.6839, "step": 3348 }, { "epoch": 0.25341455109530475, "grad_norm": 2.0433685779571533, "learning_rate": 1.603949140412145e-05, "loss": 0.7668, "step": 3349 }, { "epoch": 0.25349021981763836, "grad_norm": 2.314209461212158, "learning_rate": 1.6038087893750673e-05, "loss": 0.806, "step": 3350 }, { "epoch": 0.25356588853997203, "grad_norm": 2.245436906814575, "learning_rate": 1.6036683942632073e-05, "loss": 0.7672, "step": 3351 }, { "epoch": 0.25364155726230564, "grad_norm": 1.8543047904968262, "learning_rate": 1.6035279550853564e-05, "loss": 0.6501, "step": 3352 }, { "epoch": 0.25371722598463925, "grad_norm": 2.085162878036499, "learning_rate": 1.6033874718503092e-05, "loss": 0.7534, "step": 3353 }, { "epoch": 0.25379289470697286, "grad_norm": 2.3843894004821777, "learning_rate": 1.6032469445668636e-05, "loss": 0.8074, "step": 3354 }, { "epoch": 0.25386856342930647, "grad_norm": 2.223787307739258, "learning_rate": 1.6031063732438197e-05, "loss": 0.7994, "step": 3355 }, { "epoch": 0.25394423215164014, "grad_norm": 2.203183650970459, "learning_rate": 1.6029657578899808e-05, "loss": 0.7981, "step": 3356 }, { "epoch": 0.25401990087397375, "grad_norm": 2.084005355834961, "learning_rate": 1.6028250985141524e-05, "loss": 0.8071, "step": 3357 }, { "epoch": 0.25409556959630736, "grad_norm": 2.4879648685455322, "learning_rate": 1.602684395125143e-05, "loss": 0.7354, "step": 3358 }, { "epoch": 0.25417123831864097, "grad_norm": 2.1480824947357178, "learning_rate": 1.602543647731764e-05, "loss": 0.775, "step": 3359 }, { "epoch": 0.25424690704097463, "grad_norm": 2.283517360687256, "learning_rate": 1.6024028563428296e-05, "loss": 0.7326, "step": 3360 }, { "epoch": 0.25432257576330825, "grad_norm": 2.376298666000366, "learning_rate": 1.6022620209671567e-05, "loss": 0.6533, "step": 3361 }, { "epoch": 0.25439824448564186, "grad_norm": 2.1882760524749756, "learning_rate": 1.6021211416135644e-05, "loss": 0.8149, "step": 3362 }, { "epoch": 0.25447391320797547, "grad_norm": 2.442096471786499, "learning_rate": 1.601980218290875e-05, "loss": 0.8602, "step": 3363 }, { "epoch": 0.25454958193030913, "grad_norm": 2.5917186737060547, "learning_rate": 1.6018392510079145e-05, "loss": 0.7316, "step": 3364 }, { "epoch": 0.25462525065264274, "grad_norm": 2.8332271575927734, "learning_rate": 1.6016982397735098e-05, "loss": 0.6501, "step": 3365 }, { "epoch": 0.25470091937497635, "grad_norm": 2.322115182876587, "learning_rate": 1.6015571845964914e-05, "loss": 0.6404, "step": 3366 }, { "epoch": 0.25477658809730996, "grad_norm": 1.6798187494277954, "learning_rate": 1.6014160854856933e-05, "loss": 0.6577, "step": 3367 }, { "epoch": 0.2548522568196436, "grad_norm": 2.1623387336730957, "learning_rate": 1.601274942449951e-05, "loss": 0.7403, "step": 3368 }, { "epoch": 0.25492792554197724, "grad_norm": 2.2108073234558105, "learning_rate": 1.6011337554981044e-05, "loss": 0.626, "step": 3369 }, { "epoch": 0.25500359426431085, "grad_norm": 2.5659923553466797, "learning_rate": 1.6009925246389933e-05, "loss": 0.7742, "step": 3370 }, { "epoch": 0.25507926298664446, "grad_norm": 2.251542091369629, "learning_rate": 1.6008512498814637e-05, "loss": 0.8051, "step": 3371 }, { "epoch": 0.25515493170897807, "grad_norm": 2.227972984313965, "learning_rate": 1.6007099312343618e-05, "loss": 0.7986, "step": 3372 }, { "epoch": 0.25523060043131174, "grad_norm": 2.539790153503418, "learning_rate": 1.6005685687065375e-05, "loss": 0.7455, "step": 3373 }, { "epoch": 0.25530626915364535, "grad_norm": 2.585012435913086, "learning_rate": 1.6004271623068436e-05, "loss": 0.8405, "step": 3374 }, { "epoch": 0.25538193787597896, "grad_norm": 2.184983015060425, "learning_rate": 1.6002857120441354e-05, "loss": 0.6086, "step": 3375 }, { "epoch": 0.25545760659831257, "grad_norm": 3.0355441570281982, "learning_rate": 1.6001442179272708e-05, "loss": 0.8099, "step": 3376 }, { "epoch": 0.25553327532064624, "grad_norm": 2.2966806888580322, "learning_rate": 1.600002679965111e-05, "loss": 0.8367, "step": 3377 }, { "epoch": 0.25560894404297985, "grad_norm": 2.3266594409942627, "learning_rate": 1.599861098166519e-05, "loss": 0.7798, "step": 3378 }, { "epoch": 0.25568461276531346, "grad_norm": 2.5856709480285645, "learning_rate": 1.5997194725403614e-05, "loss": 0.7653, "step": 3379 }, { "epoch": 0.25576028148764707, "grad_norm": 1.9472054243087769, "learning_rate": 1.5995778030955073e-05, "loss": 0.8388, "step": 3380 }, { "epoch": 0.2558359502099807, "grad_norm": 2.6131577491760254, "learning_rate": 1.599436089840829e-05, "loss": 0.8128, "step": 3381 }, { "epoch": 0.25591161893231434, "grad_norm": 2.5530786514282227, "learning_rate": 1.5992943327851998e-05, "loss": 0.7969, "step": 3382 }, { "epoch": 0.25598728765464795, "grad_norm": 2.0992929935455322, "learning_rate": 1.599152531937498e-05, "loss": 0.7408, "step": 3383 }, { "epoch": 0.25606295637698157, "grad_norm": 2.5544259548187256, "learning_rate": 1.599010687306603e-05, "loss": 0.8137, "step": 3384 }, { "epoch": 0.2561386250993152, "grad_norm": 2.344470500946045, "learning_rate": 1.5988687989013985e-05, "loss": 0.6803, "step": 3385 }, { "epoch": 0.25621429382164884, "grad_norm": 2.069218873977661, "learning_rate": 1.5987268667307688e-05, "loss": 0.8429, "step": 3386 }, { "epoch": 0.25628996254398245, "grad_norm": 1.9888510704040527, "learning_rate": 1.598584890803603e-05, "loss": 0.8587, "step": 3387 }, { "epoch": 0.25636563126631606, "grad_norm": 2.630401849746704, "learning_rate": 1.5984428711287917e-05, "loss": 0.8905, "step": 3388 }, { "epoch": 0.2564412999886497, "grad_norm": 2.1368906497955322, "learning_rate": 1.5983008077152292e-05, "loss": 0.6999, "step": 3389 }, { "epoch": 0.25651696871098334, "grad_norm": 3.064831495285034, "learning_rate": 1.598158700571811e-05, "loss": 0.6628, "step": 3390 }, { "epoch": 0.25659263743331695, "grad_norm": 2.474104881286621, "learning_rate": 1.598016549707437e-05, "loss": 0.8611, "step": 3391 }, { "epoch": 0.25666830615565056, "grad_norm": 2.035888671875, "learning_rate": 1.5978743551310094e-05, "loss": 0.6389, "step": 3392 }, { "epoch": 0.25674397487798417, "grad_norm": 2.2236475944519043, "learning_rate": 1.597732116851432e-05, "loss": 0.7512, "step": 3393 }, { "epoch": 0.2568196436003178, "grad_norm": 2.101870536804199, "learning_rate": 1.5975898348776128e-05, "loss": 0.8177, "step": 3394 }, { "epoch": 0.25689531232265145, "grad_norm": 2.129502058029175, "learning_rate": 1.5974475092184618e-05, "loss": 0.6882, "step": 3395 }, { "epoch": 0.25697098104498506, "grad_norm": 3.0002481937408447, "learning_rate": 1.5973051398828923e-05, "loss": 0.7554, "step": 3396 }, { "epoch": 0.25704664976731867, "grad_norm": 2.7170114517211914, "learning_rate": 1.5971627268798193e-05, "loss": 0.6824, "step": 3397 }, { "epoch": 0.2571223184896523, "grad_norm": 2.4976844787597656, "learning_rate": 1.5970202702181613e-05, "loss": 0.8525, "step": 3398 }, { "epoch": 0.25719798721198595, "grad_norm": 2.3807260990142822, "learning_rate": 1.59687776990684e-05, "loss": 0.6943, "step": 3399 }, { "epoch": 0.25727365593431956, "grad_norm": 2.146085023880005, "learning_rate": 1.5967352259547786e-05, "loss": 0.7723, "step": 3400 }, { "epoch": 0.25734932465665317, "grad_norm": 2.7178564071655273, "learning_rate": 1.596592638370904e-05, "loss": 0.6769, "step": 3401 }, { "epoch": 0.2574249933789868, "grad_norm": 2.112178325653076, "learning_rate": 1.5964500071641446e-05, "loss": 0.8901, "step": 3402 }, { "epoch": 0.25750066210132044, "grad_norm": 2.2620973587036133, "learning_rate": 1.5963073323434336e-05, "loss": 0.9132, "step": 3403 }, { "epoch": 0.25757633082365405, "grad_norm": 2.1333353519439697, "learning_rate": 1.5961646139177053e-05, "loss": 0.8134, "step": 3404 }, { "epoch": 0.25765199954598766, "grad_norm": 2.5050787925720215, "learning_rate": 1.5960218518958977e-05, "loss": 0.863, "step": 3405 }, { "epoch": 0.2577276682683213, "grad_norm": 1.9675660133361816, "learning_rate": 1.59587904628695e-05, "loss": 0.8025, "step": 3406 }, { "epoch": 0.2578033369906549, "grad_norm": 2.3362104892730713, "learning_rate": 1.5957361970998056e-05, "loss": 0.8922, "step": 3407 }, { "epoch": 0.25787900571298855, "grad_norm": 2.6554508209228516, "learning_rate": 1.5955933043434102e-05, "loss": 0.6258, "step": 3408 }, { "epoch": 0.25795467443532216, "grad_norm": 2.41428542137146, "learning_rate": 1.5954503680267128e-05, "loss": 0.8198, "step": 3409 }, { "epoch": 0.2580303431576558, "grad_norm": 2.5038862228393555, "learning_rate": 1.5953073881586637e-05, "loss": 0.7589, "step": 3410 }, { "epoch": 0.2581060118799894, "grad_norm": 2.199652671813965, "learning_rate": 1.5951643647482172e-05, "loss": 0.6257, "step": 3411 }, { "epoch": 0.25818168060232305, "grad_norm": 2.1079485416412354, "learning_rate": 1.5950212978043294e-05, "loss": 0.6186, "step": 3412 }, { "epoch": 0.25825734932465666, "grad_norm": 2.202430248260498, "learning_rate": 1.5948781873359602e-05, "loss": 0.7587, "step": 3413 }, { "epoch": 0.25833301804699027, "grad_norm": 2.260615110397339, "learning_rate": 1.5947350333520713e-05, "loss": 0.7012, "step": 3414 }, { "epoch": 0.2584086867693239, "grad_norm": 2.422053337097168, "learning_rate": 1.5945918358616276e-05, "loss": 0.9323, "step": 3415 }, { "epoch": 0.25848435549165755, "grad_norm": 2.8548550605773926, "learning_rate": 1.5944485948735965e-05, "loss": 0.722, "step": 3416 }, { "epoch": 0.25856002421399116, "grad_norm": 2.1494009494781494, "learning_rate": 1.5943053103969484e-05, "loss": 0.8007, "step": 3417 }, { "epoch": 0.25863569293632477, "grad_norm": 2.599536180496216, "learning_rate": 1.594161982440656e-05, "loss": 0.9085, "step": 3418 }, { "epoch": 0.2587113616586584, "grad_norm": 2.498399257659912, "learning_rate": 1.5940186110136952e-05, "loss": 0.7815, "step": 3419 }, { "epoch": 0.25878703038099204, "grad_norm": 2.293846845626831, "learning_rate": 1.593875196125044e-05, "loss": 0.6341, "step": 3420 }, { "epoch": 0.25886269910332566, "grad_norm": 1.788888931274414, "learning_rate": 1.593731737783684e-05, "loss": 0.6127, "step": 3421 }, { "epoch": 0.25893836782565927, "grad_norm": 2.166012763977051, "learning_rate": 1.5935882359985986e-05, "loss": 0.6978, "step": 3422 }, { "epoch": 0.2590140365479929, "grad_norm": 2.089470148086548, "learning_rate": 1.5934446907787748e-05, "loss": 0.7217, "step": 3423 }, { "epoch": 0.2590897052703265, "grad_norm": 1.9885095357894897, "learning_rate": 1.5933011021332015e-05, "loss": 0.6653, "step": 3424 }, { "epoch": 0.25916537399266015, "grad_norm": 2.147557497024536, "learning_rate": 1.5931574700708704e-05, "loss": 0.6181, "step": 3425 }, { "epoch": 0.25924104271499376, "grad_norm": 1.9154552221298218, "learning_rate": 1.5930137946007768e-05, "loss": 0.7011, "step": 3426 }, { "epoch": 0.2593167114373274, "grad_norm": 1.8677332401275635, "learning_rate": 1.592870075731918e-05, "loss": 0.8459, "step": 3427 }, { "epoch": 0.259392380159661, "grad_norm": 2.35475492477417, "learning_rate": 1.592726313473294e-05, "loss": 0.8357, "step": 3428 }, { "epoch": 0.25946804888199465, "grad_norm": 2.0991413593292236, "learning_rate": 1.592582507833908e-05, "loss": 0.715, "step": 3429 }, { "epoch": 0.25954371760432826, "grad_norm": 2.366481304168701, "learning_rate": 1.592438658822765e-05, "loss": 0.751, "step": 3430 }, { "epoch": 0.25961938632666187, "grad_norm": 2.203183650970459, "learning_rate": 1.5922947664488733e-05, "loss": 0.863, "step": 3431 }, { "epoch": 0.2596950550489955, "grad_norm": 2.090794324874878, "learning_rate": 1.5921508307212445e-05, "loss": 0.7527, "step": 3432 }, { "epoch": 0.25977072377132915, "grad_norm": 2.188838005065918, "learning_rate": 1.592006851648892e-05, "loss": 0.8538, "step": 3433 }, { "epoch": 0.25984639249366276, "grad_norm": 2.623730182647705, "learning_rate": 1.5918628292408323e-05, "loss": 1.0331, "step": 3434 }, { "epoch": 0.25992206121599637, "grad_norm": 1.948943853378296, "learning_rate": 1.591718763506084e-05, "loss": 0.705, "step": 3435 }, { "epoch": 0.25999772993833, "grad_norm": 2.0423173904418945, "learning_rate": 1.59157465445367e-05, "loss": 0.7737, "step": 3436 }, { "epoch": 0.2600733986606636, "grad_norm": 3.1950523853302, "learning_rate": 1.591430502092614e-05, "loss": 0.7861, "step": 3437 }, { "epoch": 0.26014906738299726, "grad_norm": 1.802661657333374, "learning_rate": 1.5912863064319437e-05, "loss": 0.6932, "step": 3438 }, { "epoch": 0.26022473610533087, "grad_norm": 2.07924485206604, "learning_rate": 1.591142067480689e-05, "loss": 0.7227, "step": 3439 }, { "epoch": 0.2603004048276645, "grad_norm": 2.3398637771606445, "learning_rate": 1.5909977852478826e-05, "loss": 0.8542, "step": 3440 }, { "epoch": 0.2603760735499981, "grad_norm": 2.2240166664123535, "learning_rate": 1.5908534597425597e-05, "loss": 0.6994, "step": 3441 }, { "epoch": 0.26045174227233175, "grad_norm": 2.661499261856079, "learning_rate": 1.5907090909737592e-05, "loss": 0.7015, "step": 3442 }, { "epoch": 0.26052741099466536, "grad_norm": 2.6953768730163574, "learning_rate": 1.590564678950521e-05, "loss": 0.8241, "step": 3443 }, { "epoch": 0.260603079716999, "grad_norm": 2.4903414249420166, "learning_rate": 1.590420223681889e-05, "loss": 0.648, "step": 3444 }, { "epoch": 0.2606787484393326, "grad_norm": 1.9942926168441772, "learning_rate": 1.5902757251769097e-05, "loss": 0.7933, "step": 3445 }, { "epoch": 0.26075441716166625, "grad_norm": 2.222245931625366, "learning_rate": 1.590131183444632e-05, "loss": 0.8263, "step": 3446 }, { "epoch": 0.26083008588399986, "grad_norm": 2.2102739810943604, "learning_rate": 1.589986598494107e-05, "loss": 0.7452, "step": 3447 }, { "epoch": 0.2609057546063335, "grad_norm": 2.0333497524261475, "learning_rate": 1.5898419703343896e-05, "loss": 0.7399, "step": 3448 }, { "epoch": 0.2609814233286671, "grad_norm": 3.434465169906616, "learning_rate": 1.5896972989745372e-05, "loss": 0.5499, "step": 3449 }, { "epoch": 0.2610570920510007, "grad_norm": 1.980613112449646, "learning_rate": 1.589552584423609e-05, "loss": 0.7486, "step": 3450 }, { "epoch": 0.26113276077333436, "grad_norm": 2.2190256118774414, "learning_rate": 1.5894078266906676e-05, "loss": 0.7358, "step": 3451 }, { "epoch": 0.26120842949566797, "grad_norm": 2.138643264770508, "learning_rate": 1.5892630257847783e-05, "loss": 0.7376, "step": 3452 }, { "epoch": 0.2612840982180016, "grad_norm": 2.1247470378875732, "learning_rate": 1.589118181715009e-05, "loss": 0.5772, "step": 3453 }, { "epoch": 0.2613597669403352, "grad_norm": 2.137392520904541, "learning_rate": 1.58897329449043e-05, "loss": 0.8956, "step": 3454 }, { "epoch": 0.26143543566266886, "grad_norm": 2.029174327850342, "learning_rate": 1.588828364120115e-05, "loss": 0.7217, "step": 3455 }, { "epoch": 0.26151110438500247, "grad_norm": 1.8149274587631226, "learning_rate": 1.5886833906131404e-05, "loss": 0.9841, "step": 3456 }, { "epoch": 0.2615867731073361, "grad_norm": 2.2353672981262207, "learning_rate": 1.588538373978584e-05, "loss": 0.7479, "step": 3457 }, { "epoch": 0.2616624418296697, "grad_norm": 2.4940500259399414, "learning_rate": 1.5883933142255276e-05, "loss": 0.6687, "step": 3458 }, { "epoch": 0.26173811055200336, "grad_norm": 2.020583391189575, "learning_rate": 1.5882482113630554e-05, "loss": 0.7496, "step": 3459 }, { "epoch": 0.26181377927433697, "grad_norm": 2.2853190898895264, "learning_rate": 1.5881030654002542e-05, "loss": 0.7181, "step": 3460 }, { "epoch": 0.2618894479966706, "grad_norm": 2.1658430099487305, "learning_rate": 1.5879578763462135e-05, "loss": 0.7435, "step": 3461 }, { "epoch": 0.2619651167190042, "grad_norm": 2.1128828525543213, "learning_rate": 1.5878126442100252e-05, "loss": 0.7692, "step": 3462 }, { "epoch": 0.2620407854413378, "grad_norm": 2.092738628387451, "learning_rate": 1.5876673690007848e-05, "loss": 0.6514, "step": 3463 }, { "epoch": 0.26211645416367146, "grad_norm": 2.4423155784606934, "learning_rate": 1.587522050727589e-05, "loss": 0.8233, "step": 3464 }, { "epoch": 0.2621921228860051, "grad_norm": 2.969520092010498, "learning_rate": 1.5873766893995392e-05, "loss": 0.7755, "step": 3465 }, { "epoch": 0.2622677916083387, "grad_norm": 2.1669178009033203, "learning_rate": 1.5872312850257378e-05, "loss": 0.8578, "step": 3466 }, { "epoch": 0.2623434603306723, "grad_norm": 2.1200549602508545, "learning_rate": 1.5870858376152904e-05, "loss": 0.8403, "step": 3467 }, { "epoch": 0.26241912905300596, "grad_norm": 2.184720039367676, "learning_rate": 1.5869403471773058e-05, "loss": 0.6616, "step": 3468 }, { "epoch": 0.2624947977753396, "grad_norm": 2.18776798248291, "learning_rate": 1.5867948137208945e-05, "loss": 0.8047, "step": 3469 }, { "epoch": 0.2625704664976732, "grad_norm": 2.2961819171905518, "learning_rate": 1.5866492372551707e-05, "loss": 0.8281, "step": 3470 }, { "epoch": 0.2626461352200068, "grad_norm": 2.440213918685913, "learning_rate": 1.5865036177892508e-05, "loss": 0.7852, "step": 3471 }, { "epoch": 0.26272180394234046, "grad_norm": 2.684682846069336, "learning_rate": 1.586357955332254e-05, "loss": 0.7171, "step": 3472 }, { "epoch": 0.26279747266467407, "grad_norm": 2.211758613586426, "learning_rate": 1.5862122498933016e-05, "loss": 0.8172, "step": 3473 }, { "epoch": 0.2628731413870077, "grad_norm": 2.2629692554473877, "learning_rate": 1.5860665014815192e-05, "loss": 0.7832, "step": 3474 }, { "epoch": 0.2629488101093413, "grad_norm": 2.0024826526641846, "learning_rate": 1.5859207101060336e-05, "loss": 0.8227, "step": 3475 }, { "epoch": 0.2630244788316749, "grad_norm": 2.5234375, "learning_rate": 1.585774875775974e-05, "loss": 0.8759, "step": 3476 }, { "epoch": 0.26310014755400857, "grad_norm": 2.879760503768921, "learning_rate": 1.585628998500474e-05, "loss": 0.7474, "step": 3477 }, { "epoch": 0.2631758162763422, "grad_norm": 2.5393941402435303, "learning_rate": 1.5854830782886686e-05, "loss": 0.8035, "step": 3478 }, { "epoch": 0.2632514849986758, "grad_norm": 2.9086737632751465, "learning_rate": 1.5853371151496956e-05, "loss": 0.7489, "step": 3479 }, { "epoch": 0.2633271537210094, "grad_norm": 2.603519916534424, "learning_rate": 1.5851911090926957e-05, "loss": 0.8353, "step": 3480 }, { "epoch": 0.26340282244334307, "grad_norm": 3.2521355152130127, "learning_rate": 1.5850450601268123e-05, "loss": 0.8392, "step": 3481 }, { "epoch": 0.2634784911656767, "grad_norm": 1.8981279134750366, "learning_rate": 1.5848989682611916e-05, "loss": 0.7701, "step": 3482 }, { "epoch": 0.2635541598880103, "grad_norm": 2.0759172439575195, "learning_rate": 1.5847528335049825e-05, "loss": 0.8041, "step": 3483 }, { "epoch": 0.2636298286103439, "grad_norm": 2.702728748321533, "learning_rate": 1.584606655867336e-05, "loss": 0.8192, "step": 3484 }, { "epoch": 0.26370549733267756, "grad_norm": 2.305008888244629, "learning_rate": 1.5844604353574065e-05, "loss": 0.788, "step": 3485 }, { "epoch": 0.2637811660550112, "grad_norm": 2.113942861557007, "learning_rate": 1.5843141719843506e-05, "loss": 0.8344, "step": 3486 }, { "epoch": 0.2638568347773448, "grad_norm": 1.9294511079788208, "learning_rate": 1.584167865757328e-05, "loss": 0.6665, "step": 3487 }, { "epoch": 0.2639325034996784, "grad_norm": 2.4283103942871094, "learning_rate": 1.584021516685501e-05, "loss": 0.7448, "step": 3488 }, { "epoch": 0.264008172222012, "grad_norm": 2.7982795238494873, "learning_rate": 1.583875124778034e-05, "loss": 0.797, "step": 3489 }, { "epoch": 0.26408384094434567, "grad_norm": 2.033411741256714, "learning_rate": 1.5837286900440946e-05, "loss": 0.6918, "step": 3490 }, { "epoch": 0.2641595096666793, "grad_norm": 1.9575029611587524, "learning_rate": 1.5835822124928536e-05, "loss": 0.7613, "step": 3491 }, { "epoch": 0.2642351783890129, "grad_norm": 2.232651472091675, "learning_rate": 1.583435692133483e-05, "loss": 0.7767, "step": 3492 }, { "epoch": 0.2643108471113465, "grad_norm": 2.3022804260253906, "learning_rate": 1.5832891289751595e-05, "loss": 0.6333, "step": 3493 }, { "epoch": 0.26438651583368017, "grad_norm": 2.3175253868103027, "learning_rate": 1.58314252302706e-05, "loss": 0.6724, "step": 3494 }, { "epoch": 0.2644621845560138, "grad_norm": 2.0862104892730713, "learning_rate": 1.5829958742983665e-05, "loss": 0.7843, "step": 3495 }, { "epoch": 0.2645378532783474, "grad_norm": 2.384624481201172, "learning_rate": 1.5828491827982625e-05, "loss": 0.6976, "step": 3496 }, { "epoch": 0.264613522000681, "grad_norm": 2.224984645843506, "learning_rate": 1.5827024485359337e-05, "loss": 0.7435, "step": 3497 }, { "epoch": 0.26468919072301467, "grad_norm": 2.280029535293579, "learning_rate": 1.5825556715205696e-05, "loss": 0.7737, "step": 3498 }, { "epoch": 0.2647648594453483, "grad_norm": 2.348893642425537, "learning_rate": 1.5824088517613618e-05, "loss": 0.7458, "step": 3499 }, { "epoch": 0.2648405281676819, "grad_norm": 2.458357334136963, "learning_rate": 1.5822619892675042e-05, "loss": 0.7923, "step": 3500 }, { "epoch": 0.2649161968900155, "grad_norm": 2.259758710861206, "learning_rate": 1.5821150840481944e-05, "loss": 0.9079, "step": 3501 }, { "epoch": 0.2649918656123491, "grad_norm": 2.3717095851898193, "learning_rate": 1.5819681361126315e-05, "loss": 0.9236, "step": 3502 }, { "epoch": 0.2650675343346828, "grad_norm": 1.9060953855514526, "learning_rate": 1.5818211454700185e-05, "loss": 0.6778, "step": 3503 }, { "epoch": 0.2651432030570164, "grad_norm": 2.2435457706451416, "learning_rate": 1.5816741121295602e-05, "loss": 0.7405, "step": 3504 }, { "epoch": 0.26521887177935, "grad_norm": 1.7485630512237549, "learning_rate": 1.5815270361004638e-05, "loss": 0.6545, "step": 3505 }, { "epoch": 0.2652945405016836, "grad_norm": 2.0698351860046387, "learning_rate": 1.5813799173919403e-05, "loss": 0.7109, "step": 3506 }, { "epoch": 0.2653702092240173, "grad_norm": 2.1378233432769775, "learning_rate": 1.5812327560132024e-05, "loss": 0.639, "step": 3507 }, { "epoch": 0.2654458779463509, "grad_norm": 2.570868492126465, "learning_rate": 1.581085551973466e-05, "loss": 0.8756, "step": 3508 }, { "epoch": 0.2655215466686845, "grad_norm": 2.893656015396118, "learning_rate": 1.5809383052819496e-05, "loss": 0.812, "step": 3509 }, { "epoch": 0.2655972153910181, "grad_norm": 2.2518537044525146, "learning_rate": 1.580791015947874e-05, "loss": 0.8163, "step": 3510 }, { "epoch": 0.26567288411335177, "grad_norm": 2.2821381092071533, "learning_rate": 1.580643683980463e-05, "loss": 0.6801, "step": 3511 }, { "epoch": 0.2657485528356854, "grad_norm": 2.068220376968384, "learning_rate": 1.580496309388943e-05, "loss": 0.8859, "step": 3512 }, { "epoch": 0.265824221558019, "grad_norm": 2.3530995845794678, "learning_rate": 1.580348892182543e-05, "loss": 0.8019, "step": 3513 }, { "epoch": 0.2658998902803526, "grad_norm": 2.0066163539886475, "learning_rate": 1.580201432370495e-05, "loss": 0.7801, "step": 3514 }, { "epoch": 0.2659755590026862, "grad_norm": 2.1849310398101807, "learning_rate": 1.5800539299620333e-05, "loss": 0.8071, "step": 3515 }, { "epoch": 0.2660512277250199, "grad_norm": 2.290064573287964, "learning_rate": 1.5799063849663948e-05, "loss": 0.6413, "step": 3516 }, { "epoch": 0.2661268964473535, "grad_norm": 2.0501484870910645, "learning_rate": 1.5797587973928197e-05, "loss": 0.6741, "step": 3517 }, { "epoch": 0.2662025651696871, "grad_norm": 2.559082508087158, "learning_rate": 1.57961116725055e-05, "loss": 0.7717, "step": 3518 }, { "epoch": 0.2662782338920207, "grad_norm": 2.377612829208374, "learning_rate": 1.579463494548831e-05, "loss": 0.7944, "step": 3519 }, { "epoch": 0.2663539026143544, "grad_norm": 2.3611176013946533, "learning_rate": 1.57931577929691e-05, "loss": 0.7483, "step": 3520 }, { "epoch": 0.266429571336688, "grad_norm": 1.9215701818466187, "learning_rate": 1.5791680215040376e-05, "loss": 0.6026, "step": 3521 }, { "epoch": 0.2665052400590216, "grad_norm": 2.0787158012390137, "learning_rate": 1.5790202211794675e-05, "loss": 0.7157, "step": 3522 }, { "epoch": 0.2665809087813552, "grad_norm": 2.295515298843384, "learning_rate": 1.5788723783324546e-05, "loss": 0.7047, "step": 3523 }, { "epoch": 0.2666565775036889, "grad_norm": 1.9624741077423096, "learning_rate": 1.5787244929722578e-05, "loss": 0.7119, "step": 3524 }, { "epoch": 0.2667322462260225, "grad_norm": 2.571765661239624, "learning_rate": 1.5785765651081377e-05, "loss": 0.7344, "step": 3525 }, { "epoch": 0.2668079149483561, "grad_norm": 3.0177741050720215, "learning_rate": 1.5784285947493585e-05, "loss": 0.9012, "step": 3526 }, { "epoch": 0.2668835836706897, "grad_norm": 2.370260238647461, "learning_rate": 1.5782805819051865e-05, "loss": 0.838, "step": 3527 }, { "epoch": 0.2669592523930233, "grad_norm": 2.122828722000122, "learning_rate": 1.5781325265848906e-05, "loss": 0.8281, "step": 3528 }, { "epoch": 0.267034921115357, "grad_norm": 1.9243050813674927, "learning_rate": 1.5779844287977424e-05, "loss": 0.7285, "step": 3529 }, { "epoch": 0.2671105898376906, "grad_norm": 2.585332155227661, "learning_rate": 1.577836288553016e-05, "loss": 0.8387, "step": 3530 }, { "epoch": 0.2671862585600242, "grad_norm": 2.6601555347442627, "learning_rate": 1.5776881058599897e-05, "loss": 0.7493, "step": 3531 }, { "epoch": 0.2672619272823578, "grad_norm": 2.395071029663086, "learning_rate": 1.577539880727942e-05, "loss": 0.8335, "step": 3532 }, { "epoch": 0.2673375960046915, "grad_norm": 1.9745339155197144, "learning_rate": 1.5773916131661553e-05, "loss": 0.6619, "step": 3533 }, { "epoch": 0.2674132647270251, "grad_norm": 2.2702884674072266, "learning_rate": 1.577243303183915e-05, "loss": 0.7704, "step": 3534 }, { "epoch": 0.2674889334493587, "grad_norm": 2.0734710693359375, "learning_rate": 1.5770949507905085e-05, "loss": 0.7202, "step": 3535 }, { "epoch": 0.2675646021716923, "grad_norm": 1.9147382974624634, "learning_rate": 1.576946555995226e-05, "loss": 0.6244, "step": 3536 }, { "epoch": 0.267640270894026, "grad_norm": 2.5039188861846924, "learning_rate": 1.576798118807361e-05, "loss": 0.6814, "step": 3537 }, { "epoch": 0.2677159396163596, "grad_norm": 2.3731849193573, "learning_rate": 1.5766496392362088e-05, "loss": 0.7602, "step": 3538 }, { "epoch": 0.2677916083386932, "grad_norm": 2.7606589794158936, "learning_rate": 1.5765011172910676e-05, "loss": 0.7816, "step": 3539 }, { "epoch": 0.2678672770610268, "grad_norm": 2.252899408340454, "learning_rate": 1.576352552981238e-05, "loss": 0.8885, "step": 3540 }, { "epoch": 0.2679429457833605, "grad_norm": 2.393841505050659, "learning_rate": 1.5762039463160244e-05, "loss": 0.7985, "step": 3541 }, { "epoch": 0.2680186145056941, "grad_norm": 2.1034390926361084, "learning_rate": 1.5760552973047324e-05, "loss": 0.7088, "step": 3542 }, { "epoch": 0.2680942832280277, "grad_norm": 2.355592727661133, "learning_rate": 1.5759066059566708e-05, "loss": 0.7645, "step": 3543 }, { "epoch": 0.2681699519503613, "grad_norm": 2.2355756759643555, "learning_rate": 1.575757872281152e-05, "loss": 0.745, "step": 3544 }, { "epoch": 0.2682456206726949, "grad_norm": 2.194815158843994, "learning_rate": 1.5756090962874887e-05, "loss": 0.5606, "step": 3545 }, { "epoch": 0.2683212893950286, "grad_norm": 2.1128334999084473, "learning_rate": 1.5754602779849992e-05, "loss": 0.7998, "step": 3546 }, { "epoch": 0.2683969581173622, "grad_norm": 2.3708901405334473, "learning_rate": 1.5753114173830024e-05, "loss": 0.8299, "step": 3547 }, { "epoch": 0.2684726268396958, "grad_norm": 2.555021286010742, "learning_rate": 1.5751625144908203e-05, "loss": 0.7638, "step": 3548 }, { "epoch": 0.2685482955620294, "grad_norm": 2.081984758377075, "learning_rate": 1.5750135693177777e-05, "loss": 0.7852, "step": 3549 }, { "epoch": 0.2686239642843631, "grad_norm": 2.4795191287994385, "learning_rate": 1.5748645818732025e-05, "loss": 0.7854, "step": 3550 }, { "epoch": 0.2686996330066967, "grad_norm": 2.0074732303619385, "learning_rate": 1.574715552166424e-05, "loss": 0.8557, "step": 3551 }, { "epoch": 0.2687753017290303, "grad_norm": 2.3227956295013428, "learning_rate": 1.5745664802067755e-05, "loss": 0.6981, "step": 3552 }, { "epoch": 0.2688509704513639, "grad_norm": 3.9149978160858154, "learning_rate": 1.5744173660035923e-05, "loss": 0.7036, "step": 3553 }, { "epoch": 0.2689266391736976, "grad_norm": 2.3597822189331055, "learning_rate": 1.574268209566212e-05, "loss": 0.7453, "step": 3554 }, { "epoch": 0.2690023078960312, "grad_norm": 3.120544672012329, "learning_rate": 1.574119010903976e-05, "loss": 0.7463, "step": 3555 }, { "epoch": 0.2690779766183648, "grad_norm": 2.2731995582580566, "learning_rate": 1.573969770026227e-05, "loss": 0.7326, "step": 3556 }, { "epoch": 0.2691536453406984, "grad_norm": 3.586319923400879, "learning_rate": 1.5738204869423107e-05, "loss": 0.7289, "step": 3557 }, { "epoch": 0.269229314063032, "grad_norm": 2.4169609546661377, "learning_rate": 1.5736711616615765e-05, "loss": 0.8624, "step": 3558 }, { "epoch": 0.2693049827853657, "grad_norm": 2.7807676792144775, "learning_rate": 1.5735217941933754e-05, "loss": 0.7344, "step": 3559 }, { "epoch": 0.2693806515076993, "grad_norm": 2.2362794876098633, "learning_rate": 1.5733723845470606e-05, "loss": 0.8274, "step": 3560 }, { "epoch": 0.2694563202300329, "grad_norm": 2.450251817703247, "learning_rate": 1.5732229327319895e-05, "loss": 0.7416, "step": 3561 }, { "epoch": 0.2695319889523665, "grad_norm": 2.79219126701355, "learning_rate": 1.573073438757521e-05, "loss": 0.6722, "step": 3562 }, { "epoch": 0.2696076576747002, "grad_norm": 2.2558610439300537, "learning_rate": 1.5729239026330167e-05, "loss": 0.5821, "step": 3563 }, { "epoch": 0.2696833263970338, "grad_norm": 2.438255786895752, "learning_rate": 1.572774324367841e-05, "loss": 0.8141, "step": 3564 }, { "epoch": 0.2697589951193674, "grad_norm": 2.1649351119995117, "learning_rate": 1.572624703971361e-05, "loss": 0.6348, "step": 3565 }, { "epoch": 0.269834663841701, "grad_norm": 2.2655069828033447, "learning_rate": 1.5724750414529466e-05, "loss": 0.6662, "step": 3566 }, { "epoch": 0.2699103325640347, "grad_norm": 2.0114426612854004, "learning_rate": 1.57232533682197e-05, "loss": 0.7854, "step": 3567 }, { "epoch": 0.2699860012863683, "grad_norm": 2.8764772415161133, "learning_rate": 1.5721755900878062e-05, "loss": 0.6343, "step": 3568 }, { "epoch": 0.2700616700087019, "grad_norm": 2.2476704120635986, "learning_rate": 1.5720258012598332e-05, "loss": 0.9252, "step": 3569 }, { "epoch": 0.2701373387310355, "grad_norm": 2.010709047317505, "learning_rate": 1.5718759703474307e-05, "loss": 0.639, "step": 3570 }, { "epoch": 0.2702130074533691, "grad_norm": 2.497084617614746, "learning_rate": 1.571726097359982e-05, "loss": 0.7162, "step": 3571 }, { "epoch": 0.2702886761757028, "grad_norm": 2.4472103118896484, "learning_rate": 1.571576182306872e-05, "loss": 0.796, "step": 3572 }, { "epoch": 0.2703643448980364, "grad_norm": 2.1636784076690674, "learning_rate": 1.5714262251974896e-05, "loss": 0.7544, "step": 3573 }, { "epoch": 0.27044001362037, "grad_norm": 2.9408183097839355, "learning_rate": 1.5712762260412256e-05, "loss": 0.9256, "step": 3574 }, { "epoch": 0.2705156823427036, "grad_norm": 1.9260292053222656, "learning_rate": 1.571126184847473e-05, "loss": 0.893, "step": 3575 }, { "epoch": 0.2705913510650373, "grad_norm": 3.143998861312866, "learning_rate": 1.5709761016256277e-05, "loss": 0.7053, "step": 3576 }, { "epoch": 0.2706670197873709, "grad_norm": 2.45214581489563, "learning_rate": 1.570825976385089e-05, "loss": 0.7357, "step": 3577 }, { "epoch": 0.2707426885097045, "grad_norm": 2.4344959259033203, "learning_rate": 1.570675809135258e-05, "loss": 0.7794, "step": 3578 }, { "epoch": 0.2708183572320381, "grad_norm": 2.231781005859375, "learning_rate": 1.5705255998855384e-05, "loss": 0.8033, "step": 3579 }, { "epoch": 0.2708940259543718, "grad_norm": 2.1317062377929688, "learning_rate": 1.570375348645337e-05, "loss": 0.7642, "step": 3580 }, { "epoch": 0.2709696946767054, "grad_norm": 2.1097607612609863, "learning_rate": 1.570225055424063e-05, "loss": 0.6544, "step": 3581 }, { "epoch": 0.271045363399039, "grad_norm": 3.234607219696045, "learning_rate": 1.5700747202311284e-05, "loss": 0.8878, "step": 3582 }, { "epoch": 0.2711210321213726, "grad_norm": 2.072981119155884, "learning_rate": 1.5699243430759477e-05, "loss": 0.692, "step": 3583 }, { "epoch": 0.27119670084370623, "grad_norm": 2.0358352661132812, "learning_rate": 1.5697739239679374e-05, "loss": 0.7518, "step": 3584 }, { "epoch": 0.2712723695660399, "grad_norm": 2.821546792984009, "learning_rate": 1.569623462916518e-05, "loss": 0.8253, "step": 3585 }, { "epoch": 0.2713480382883735, "grad_norm": 2.268690824508667, "learning_rate": 1.569472959931111e-05, "loss": 0.7328, "step": 3586 }, { "epoch": 0.2714237070107071, "grad_norm": 2.1014082431793213, "learning_rate": 1.5693224150211427e-05, "loss": 0.7633, "step": 3587 }, { "epoch": 0.2714993757330407, "grad_norm": 2.4852254390716553, "learning_rate": 1.5691718281960395e-05, "loss": 0.6053, "step": 3588 }, { "epoch": 0.2715750444553744, "grad_norm": 2.0812735557556152, "learning_rate": 1.569021199465232e-05, "loss": 0.6779, "step": 3589 }, { "epoch": 0.271650713177708, "grad_norm": 2.4176812171936035, "learning_rate": 1.5688705288381533e-05, "loss": 0.6303, "step": 3590 }, { "epoch": 0.2717263819000416, "grad_norm": 2.428131103515625, "learning_rate": 1.5687198163242388e-05, "loss": 0.6474, "step": 3591 }, { "epoch": 0.2718020506223752, "grad_norm": 2.6284077167510986, "learning_rate": 1.568569061932926e-05, "loss": 0.6688, "step": 3592 }, { "epoch": 0.2718777193447089, "grad_norm": 2.674342155456543, "learning_rate": 1.5684182656736566e-05, "loss": 0.7523, "step": 3593 }, { "epoch": 0.2719533880670425, "grad_norm": 2.8079347610473633, "learning_rate": 1.5682674275558734e-05, "loss": 0.7762, "step": 3594 }, { "epoch": 0.2720290567893761, "grad_norm": 2.347954273223877, "learning_rate": 1.568116547589022e-05, "loss": 0.7758, "step": 3595 }, { "epoch": 0.2721047255117097, "grad_norm": 2.2305753231048584, "learning_rate": 1.567965625782552e-05, "loss": 0.8519, "step": 3596 }, { "epoch": 0.27218039423404333, "grad_norm": 2.6532304286956787, "learning_rate": 1.567814662145914e-05, "loss": 0.6987, "step": 3597 }, { "epoch": 0.272256062956377, "grad_norm": 3.169668197631836, "learning_rate": 1.5676636566885616e-05, "loss": 0.7087, "step": 3598 }, { "epoch": 0.2723317316787106, "grad_norm": 2.8570854663848877, "learning_rate": 1.5675126094199516e-05, "loss": 0.7527, "step": 3599 }, { "epoch": 0.2724074004010442, "grad_norm": 2.194880247116089, "learning_rate": 1.567361520349543e-05, "loss": 0.7146, "step": 3600 }, { "epoch": 0.27248306912337783, "grad_norm": 2.3188719749450684, "learning_rate": 1.5672103894867978e-05, "loss": 0.8719, "step": 3601 }, { "epoch": 0.2725587378457115, "grad_norm": 2.677793502807617, "learning_rate": 1.5670592168411797e-05, "loss": 0.7991, "step": 3602 }, { "epoch": 0.2726344065680451, "grad_norm": 2.2928788661956787, "learning_rate": 1.566908002422156e-05, "loss": 0.767, "step": 3603 }, { "epoch": 0.2727100752903787, "grad_norm": 2.1832549571990967, "learning_rate": 1.566756746239196e-05, "loss": 0.7049, "step": 3604 }, { "epoch": 0.27278574401271233, "grad_norm": 1.460695743560791, "learning_rate": 1.5666054483017722e-05, "loss": 0.8869, "step": 3605 }, { "epoch": 0.272861412735046, "grad_norm": 2.577636241912842, "learning_rate": 1.566454108619359e-05, "loss": 0.7205, "step": 3606 }, { "epoch": 0.2729370814573796, "grad_norm": 1.833762764930725, "learning_rate": 1.5663027272014337e-05, "loss": 0.9315, "step": 3607 }, { "epoch": 0.2730127501797132, "grad_norm": 1.9866582155227661, "learning_rate": 1.566151304057477e-05, "loss": 0.6925, "step": 3608 }, { "epoch": 0.2730884189020468, "grad_norm": 2.173614501953125, "learning_rate": 1.565999839196971e-05, "loss": 0.8448, "step": 3609 }, { "epoch": 0.27316408762438044, "grad_norm": 1.7537118196487427, "learning_rate": 1.5658483326294008e-05, "loss": 0.6487, "step": 3610 }, { "epoch": 0.2732397563467141, "grad_norm": 1.7981582880020142, "learning_rate": 1.5656967843642544e-05, "loss": 0.8801, "step": 3611 }, { "epoch": 0.2733154250690477, "grad_norm": 2.380213499069214, "learning_rate": 1.5655451944110223e-05, "loss": 0.7393, "step": 3612 }, { "epoch": 0.2733910937913813, "grad_norm": 2.3052456378936768, "learning_rate": 1.5653935627791976e-05, "loss": 0.8234, "step": 3613 }, { "epoch": 0.27346676251371493, "grad_norm": 2.1715638637542725, "learning_rate": 1.5652418894782755e-05, "loss": 0.6953, "step": 3614 }, { "epoch": 0.2735424312360486, "grad_norm": 2.075761079788208, "learning_rate": 1.565090174517755e-05, "loss": 0.662, "step": 3615 }, { "epoch": 0.2736180999583822, "grad_norm": 1.88273024559021, "learning_rate": 1.5649384179071363e-05, "loss": 0.6704, "step": 3616 }, { "epoch": 0.2736937686807158, "grad_norm": 2.0616722106933594, "learning_rate": 1.5647866196559234e-05, "loss": 0.6941, "step": 3617 }, { "epoch": 0.27376943740304943, "grad_norm": 2.087221145629883, "learning_rate": 1.564634779773622e-05, "loss": 0.866, "step": 3618 }, { "epoch": 0.2738451061253831, "grad_norm": 3.6184213161468506, "learning_rate": 1.5644828982697413e-05, "loss": 0.6817, "step": 3619 }, { "epoch": 0.2739207748477167, "grad_norm": 1.8233073949813843, "learning_rate": 1.5643309751537922e-05, "loss": 0.7882, "step": 3620 }, { "epoch": 0.2739964435700503, "grad_norm": 2.140226364135742, "learning_rate": 1.564179010435289e-05, "loss": 0.7102, "step": 3621 }, { "epoch": 0.27407211229238393, "grad_norm": 1.8913706541061401, "learning_rate": 1.5640270041237475e-05, "loss": 0.7444, "step": 3622 }, { "epoch": 0.27414778101471754, "grad_norm": 2.4049935340881348, "learning_rate": 1.5638749562286875e-05, "loss": 0.7232, "step": 3623 }, { "epoch": 0.2742234497370512, "grad_norm": 2.22857666015625, "learning_rate": 1.5637228667596302e-05, "loss": 0.7192, "step": 3624 }, { "epoch": 0.2742991184593848, "grad_norm": 2.1087875366210938, "learning_rate": 1.5635707357261007e-05, "loss": 0.6453, "step": 3625 }, { "epoch": 0.2743747871817184, "grad_norm": 3.1201884746551514, "learning_rate": 1.563418563137625e-05, "loss": 0.7145, "step": 3626 }, { "epoch": 0.27445045590405204, "grad_norm": 2.400087356567383, "learning_rate": 1.5632663490037334e-05, "loss": 0.747, "step": 3627 }, { "epoch": 0.2745261246263857, "grad_norm": 2.568697214126587, "learning_rate": 1.563114093333958e-05, "loss": 0.7742, "step": 3628 }, { "epoch": 0.2746017933487193, "grad_norm": 2.265756607055664, "learning_rate": 1.562961796137833e-05, "loss": 0.8239, "step": 3629 }, { "epoch": 0.2746774620710529, "grad_norm": 2.1188042163848877, "learning_rate": 1.5628094574248962e-05, "loss": 0.818, "step": 3630 }, { "epoch": 0.27475313079338654, "grad_norm": 2.4410367012023926, "learning_rate": 1.562657077204687e-05, "loss": 0.8349, "step": 3631 }, { "epoch": 0.2748287995157202, "grad_norm": 2.043889284133911, "learning_rate": 1.562504655486749e-05, "loss": 0.7269, "step": 3632 }, { "epoch": 0.2749044682380538, "grad_norm": 2.041012763977051, "learning_rate": 1.5623521922806263e-05, "loss": 0.7347, "step": 3633 }, { "epoch": 0.2749801369603874, "grad_norm": 2.0784006118774414, "learning_rate": 1.5621996875958668e-05, "loss": 0.5876, "step": 3634 }, { "epoch": 0.27505580568272103, "grad_norm": 2.4200499057769775, "learning_rate": 1.5620471414420212e-05, "loss": 0.6634, "step": 3635 }, { "epoch": 0.27513147440505464, "grad_norm": 2.1126153469085693, "learning_rate": 1.5618945538286423e-05, "loss": 0.801, "step": 3636 }, { "epoch": 0.2752071431273883, "grad_norm": 2.4012205600738525, "learning_rate": 1.561741924765286e-05, "loss": 0.7216, "step": 3637 }, { "epoch": 0.2752828118497219, "grad_norm": 2.6323201656341553, "learning_rate": 1.5615892542615095e-05, "loss": 0.7527, "step": 3638 }, { "epoch": 0.27535848057205553, "grad_norm": 2.035029649734497, "learning_rate": 1.5614365423268742e-05, "loss": 0.9722, "step": 3639 }, { "epoch": 0.27543414929438914, "grad_norm": 1.8150397539138794, "learning_rate": 1.561283788970943e-05, "loss": 0.6651, "step": 3640 }, { "epoch": 0.2755098180167228, "grad_norm": 2.298706293106079, "learning_rate": 1.5611309942032827e-05, "loss": 0.6894, "step": 3641 }, { "epoch": 0.2755854867390564, "grad_norm": 2.7103264331817627, "learning_rate": 1.5609781580334607e-05, "loss": 0.8144, "step": 3642 }, { "epoch": 0.27566115546139003, "grad_norm": 1.96636962890625, "learning_rate": 1.560825280471049e-05, "loss": 0.7921, "step": 3643 }, { "epoch": 0.27573682418372364, "grad_norm": 2.5422213077545166, "learning_rate": 1.5606723615256205e-05, "loss": 0.8534, "step": 3644 }, { "epoch": 0.2758124929060573, "grad_norm": 2.5183253288269043, "learning_rate": 1.560519401206752e-05, "loss": 0.6715, "step": 3645 }, { "epoch": 0.2758881616283909, "grad_norm": 2.7416634559631348, "learning_rate": 1.5603663995240223e-05, "loss": 0.7949, "step": 3646 }, { "epoch": 0.2759638303507245, "grad_norm": 2.2581331729888916, "learning_rate": 1.5602133564870126e-05, "loss": 0.7579, "step": 3647 }, { "epoch": 0.27603949907305814, "grad_norm": 1.9897353649139404, "learning_rate": 1.5600602721053073e-05, "loss": 0.6848, "step": 3648 }, { "epoch": 0.27611516779539175, "grad_norm": 1.6935006380081177, "learning_rate": 1.5599071463884927e-05, "loss": 0.7117, "step": 3649 }, { "epoch": 0.2761908365177254, "grad_norm": 2.8586652278900146, "learning_rate": 1.5597539793461584e-05, "loss": 0.9308, "step": 3650 }, { "epoch": 0.276266505240059, "grad_norm": 1.9398143291473389, "learning_rate": 1.5596007709878957e-05, "loss": 0.7423, "step": 3651 }, { "epoch": 0.27634217396239263, "grad_norm": 3.198117733001709, "learning_rate": 1.5594475213232995e-05, "loss": 0.8414, "step": 3652 }, { "epoch": 0.27641784268472624, "grad_norm": 2.2561442852020264, "learning_rate": 1.5592942303619667e-05, "loss": 0.6747, "step": 3653 }, { "epoch": 0.2764935114070599, "grad_norm": 1.7593903541564941, "learning_rate": 1.5591408981134966e-05, "loss": 0.7299, "step": 3654 }, { "epoch": 0.2765691801293935, "grad_norm": 2.0782854557037354, "learning_rate": 1.5589875245874918e-05, "loss": 0.6619, "step": 3655 }, { "epoch": 0.27664484885172713, "grad_norm": 3.2505691051483154, "learning_rate": 1.5588341097935565e-05, "loss": 0.7653, "step": 3656 }, { "epoch": 0.27672051757406074, "grad_norm": 2.4041876792907715, "learning_rate": 1.5586806537412987e-05, "loss": 0.7123, "step": 3657 }, { "epoch": 0.2767961862963944, "grad_norm": 2.018611431121826, "learning_rate": 1.5585271564403276e-05, "loss": 0.6393, "step": 3658 }, { "epoch": 0.276871855018728, "grad_norm": 2.0278730392456055, "learning_rate": 1.558373617900256e-05, "loss": 0.7515, "step": 3659 }, { "epoch": 0.27694752374106163, "grad_norm": 2.383406400680542, "learning_rate": 1.558220038130699e-05, "loss": 0.7467, "step": 3660 }, { "epoch": 0.27702319246339524, "grad_norm": 2.429568290710449, "learning_rate": 1.5580664171412743e-05, "loss": 0.7286, "step": 3661 }, { "epoch": 0.2770988611857289, "grad_norm": 2.291583776473999, "learning_rate": 1.5579127549416024e-05, "loss": 0.7208, "step": 3662 }, { "epoch": 0.2771745299080625, "grad_norm": 2.344414710998535, "learning_rate": 1.5577590515413054e-05, "loss": 0.7562, "step": 3663 }, { "epoch": 0.2772501986303961, "grad_norm": 2.6295807361602783, "learning_rate": 1.5576053069500093e-05, "loss": 0.7099, "step": 3664 }, { "epoch": 0.27732586735272974, "grad_norm": 2.575855016708374, "learning_rate": 1.557451521177342e-05, "loss": 0.8234, "step": 3665 }, { "epoch": 0.27740153607506335, "grad_norm": 2.7682507038116455, "learning_rate": 1.557297694232934e-05, "loss": 0.8003, "step": 3666 }, { "epoch": 0.277477204797397, "grad_norm": 2.386277675628662, "learning_rate": 1.5571438261264184e-05, "loss": 0.8584, "step": 3667 }, { "epoch": 0.2775528735197306, "grad_norm": 2.271766424179077, "learning_rate": 1.5569899168674308e-05, "loss": 0.7653, "step": 3668 }, { "epoch": 0.27762854224206424, "grad_norm": 2.1660287380218506, "learning_rate": 1.55683596646561e-05, "loss": 0.845, "step": 3669 }, { "epoch": 0.27770421096439785, "grad_norm": 2.2329893112182617, "learning_rate": 1.5566819749305962e-05, "loss": 0.7212, "step": 3670 }, { "epoch": 0.2777798796867315, "grad_norm": 2.5628223419189453, "learning_rate": 1.5565279422720335e-05, "loss": 0.919, "step": 3671 }, { "epoch": 0.2778555484090651, "grad_norm": 2.2546982765197754, "learning_rate": 1.556373868499567e-05, "loss": 0.6941, "step": 3672 }, { "epoch": 0.27793121713139873, "grad_norm": 2.420389413833618, "learning_rate": 1.556219753622846e-05, "loss": 0.7521, "step": 3673 }, { "epoch": 0.27800688585373234, "grad_norm": 1.9865230321884155, "learning_rate": 1.556065597651522e-05, "loss": 0.775, "step": 3674 }, { "epoch": 0.278082554576066, "grad_norm": 2.0972037315368652, "learning_rate": 1.5559114005952483e-05, "loss": 0.72, "step": 3675 }, { "epoch": 0.2781582232983996, "grad_norm": 3.4112987518310547, "learning_rate": 1.555757162463681e-05, "loss": 0.7276, "step": 3676 }, { "epoch": 0.27823389202073323, "grad_norm": 2.490372896194458, "learning_rate": 1.5556028832664793e-05, "loss": 0.7068, "step": 3677 }, { "epoch": 0.27830956074306684, "grad_norm": 2.6258339881896973, "learning_rate": 1.5554485630133045e-05, "loss": 0.801, "step": 3678 }, { "epoch": 0.27838522946540045, "grad_norm": 1.8640928268432617, "learning_rate": 1.5552942017138204e-05, "loss": 0.8175, "step": 3679 }, { "epoch": 0.2784608981877341, "grad_norm": 3.355138063430786, "learning_rate": 1.5551397993776943e-05, "loss": 0.7382, "step": 3680 }, { "epoch": 0.27853656691006773, "grad_norm": 2.3146259784698486, "learning_rate": 1.554985356014595e-05, "loss": 0.6254, "step": 3681 }, { "epoch": 0.27861223563240134, "grad_norm": 1.9555891752243042, "learning_rate": 1.5548308716341944e-05, "loss": 0.6862, "step": 3682 }, { "epoch": 0.27868790435473495, "grad_norm": 2.361454725265503, "learning_rate": 1.554676346246166e-05, "loss": 0.6829, "step": 3683 }, { "epoch": 0.2787635730770686, "grad_norm": 2.4104437828063965, "learning_rate": 1.5545217798601878e-05, "loss": 0.6886, "step": 3684 }, { "epoch": 0.2788392417994022, "grad_norm": 2.2208261489868164, "learning_rate": 1.5543671724859387e-05, "loss": 0.6815, "step": 3685 }, { "epoch": 0.27891491052173584, "grad_norm": 2.3924684524536133, "learning_rate": 1.5542125241331006e-05, "loss": 0.8088, "step": 3686 }, { "epoch": 0.27899057924406945, "grad_norm": 2.2955551147460938, "learning_rate": 1.5540578348113585e-05, "loss": 0.8344, "step": 3687 }, { "epoch": 0.2790662479664031, "grad_norm": 2.611711025238037, "learning_rate": 1.553903104530399e-05, "loss": 0.7999, "step": 3688 }, { "epoch": 0.2791419166887367, "grad_norm": 2.359159231185913, "learning_rate": 1.5537483332999123e-05, "loss": 0.7241, "step": 3689 }, { "epoch": 0.27921758541107033, "grad_norm": 2.494488000869751, "learning_rate": 1.5535935211295906e-05, "loss": 0.7985, "step": 3690 }, { "epoch": 0.27929325413340395, "grad_norm": 2.4356842041015625, "learning_rate": 1.5534386680291286e-05, "loss": 0.8035, "step": 3691 }, { "epoch": 0.27936892285573756, "grad_norm": 2.5868098735809326, "learning_rate": 1.5532837740082237e-05, "loss": 0.9104, "step": 3692 }, { "epoch": 0.2794445915780712, "grad_norm": 2.140434741973877, "learning_rate": 1.5531288390765757e-05, "loss": 0.7656, "step": 3693 }, { "epoch": 0.27952026030040483, "grad_norm": 2.476284980773926, "learning_rate": 1.5529738632438873e-05, "loss": 0.7472, "step": 3694 }, { "epoch": 0.27959592902273844, "grad_norm": 2.0590710639953613, "learning_rate": 1.552818846519864e-05, "loss": 0.7425, "step": 3695 }, { "epoch": 0.27967159774507205, "grad_norm": 1.7913882732391357, "learning_rate": 1.552663788914213e-05, "loss": 0.7771, "step": 3696 }, { "epoch": 0.2797472664674057, "grad_norm": 2.3980860710144043, "learning_rate": 1.552508690436644e-05, "loss": 0.8346, "step": 3697 }, { "epoch": 0.27982293518973933, "grad_norm": 2.179081678390503, "learning_rate": 1.552353551096871e-05, "loss": 0.7082, "step": 3698 }, { "epoch": 0.27989860391207294, "grad_norm": 2.324483871459961, "learning_rate": 1.5521983709046084e-05, "loss": 0.6597, "step": 3699 }, { "epoch": 0.27997427263440655, "grad_norm": 3.7304763793945312, "learning_rate": 1.5520431498695743e-05, "loss": 0.6375, "step": 3700 }, { "epoch": 0.2800499413567402, "grad_norm": 2.4392082691192627, "learning_rate": 1.5518878880014894e-05, "loss": 0.6816, "step": 3701 }, { "epoch": 0.28012561007907383, "grad_norm": 2.4349966049194336, "learning_rate": 1.5517325853100762e-05, "loss": 0.79, "step": 3702 }, { "epoch": 0.28020127880140744, "grad_norm": 2.9991888999938965, "learning_rate": 1.5515772418050605e-05, "loss": 0.6722, "step": 3703 }, { "epoch": 0.28027694752374105, "grad_norm": 2.6778488159179688, "learning_rate": 1.5514218574961706e-05, "loss": 0.6987, "step": 3704 }, { "epoch": 0.28035261624607466, "grad_norm": 2.7935636043548584, "learning_rate": 1.5512664323931372e-05, "loss": 0.7712, "step": 3705 }, { "epoch": 0.2804282849684083, "grad_norm": 2.4512863159179688, "learning_rate": 1.5511109665056934e-05, "loss": 0.7726, "step": 3706 }, { "epoch": 0.28050395369074194, "grad_norm": 2.603178024291992, "learning_rate": 1.5509554598435745e-05, "loss": 0.6195, "step": 3707 }, { "epoch": 0.28057962241307555, "grad_norm": 2.1387760639190674, "learning_rate": 1.5507999124165196e-05, "loss": 0.6634, "step": 3708 }, { "epoch": 0.28065529113540916, "grad_norm": 2.0796477794647217, "learning_rate": 1.550644324234269e-05, "loss": 0.6855, "step": 3709 }, { "epoch": 0.2807309598577428, "grad_norm": 2.175611734390259, "learning_rate": 1.5504886953065666e-05, "loss": 0.7012, "step": 3710 }, { "epoch": 0.28080662858007643, "grad_norm": 2.2386960983276367, "learning_rate": 1.550333025643158e-05, "loss": 0.7295, "step": 3711 }, { "epoch": 0.28088229730241004, "grad_norm": 4.274651527404785, "learning_rate": 1.5501773152537922e-05, "loss": 0.7778, "step": 3712 }, { "epoch": 0.28095796602474365, "grad_norm": 2.236834764480591, "learning_rate": 1.5500215641482197e-05, "loss": 0.8035, "step": 3713 }, { "epoch": 0.2810336347470773, "grad_norm": 1.7886241674423218, "learning_rate": 1.5498657723361946e-05, "loss": 0.9152, "step": 3714 }, { "epoch": 0.28110930346941093, "grad_norm": 2.0251529216766357, "learning_rate": 1.5497099398274727e-05, "loss": 0.7473, "step": 3715 }, { "epoch": 0.28118497219174454, "grad_norm": 2.066375970840454, "learning_rate": 1.5495540666318133e-05, "loss": 0.806, "step": 3716 }, { "epoch": 0.28126064091407815, "grad_norm": 2.595116138458252, "learning_rate": 1.5493981527589768e-05, "loss": 0.9727, "step": 3717 }, { "epoch": 0.28133630963641176, "grad_norm": 2.0963737964630127, "learning_rate": 1.549242198218728e-05, "loss": 0.7616, "step": 3718 }, { "epoch": 0.28141197835874543, "grad_norm": 2.7268869876861572, "learning_rate": 1.5490862030208326e-05, "loss": 0.75, "step": 3719 }, { "epoch": 0.28148764708107904, "grad_norm": 2.4957454204559326, "learning_rate": 1.5489301671750602e-05, "loss": 0.7749, "step": 3720 }, { "epoch": 0.28156331580341265, "grad_norm": 2.260963201522827, "learning_rate": 1.5487740906911814e-05, "loss": 0.7527, "step": 3721 }, { "epoch": 0.28163898452574626, "grad_norm": 2.0017857551574707, "learning_rate": 1.5486179735789708e-05, "loss": 0.5984, "step": 3722 }, { "epoch": 0.2817146532480799, "grad_norm": 1.8426270484924316, "learning_rate": 1.548461815848205e-05, "loss": 0.7738, "step": 3723 }, { "epoch": 0.28179032197041354, "grad_norm": 2.3114354610443115, "learning_rate": 1.5483056175086624e-05, "loss": 0.6493, "step": 3724 }, { "epoch": 0.28186599069274715, "grad_norm": 2.0738372802734375, "learning_rate": 1.5481493785701255e-05, "loss": 0.7372, "step": 3725 }, { "epoch": 0.28194165941508076, "grad_norm": 2.051266670227051, "learning_rate": 1.547993099042378e-05, "loss": 0.6902, "step": 3726 }, { "epoch": 0.2820173281374144, "grad_norm": 2.181405544281006, "learning_rate": 1.547836778935207e-05, "loss": 0.7094, "step": 3727 }, { "epoch": 0.28209299685974804, "grad_norm": 2.6144001483917236, "learning_rate": 1.5476804182584015e-05, "loss": 0.7553, "step": 3728 }, { "epoch": 0.28216866558208165, "grad_norm": 2.2755348682403564, "learning_rate": 1.5475240170217532e-05, "loss": 0.7335, "step": 3729 }, { "epoch": 0.28224433430441526, "grad_norm": 2.2966911792755127, "learning_rate": 1.547367575235057e-05, "loss": 0.7893, "step": 3730 }, { "epoch": 0.28232000302674887, "grad_norm": 2.246685266494751, "learning_rate": 1.547211092908109e-05, "loss": 0.7984, "step": 3731 }, { "epoch": 0.28239567174908253, "grad_norm": 2.2465322017669678, "learning_rate": 1.547054570050709e-05, "loss": 0.8449, "step": 3732 }, { "epoch": 0.28247134047141614, "grad_norm": 2.385525703430176, "learning_rate": 1.546898006672659e-05, "loss": 0.7544, "step": 3733 }, { "epoch": 0.28254700919374975, "grad_norm": 2.5891146659851074, "learning_rate": 1.5467414027837638e-05, "loss": 0.7069, "step": 3734 }, { "epoch": 0.28262267791608336, "grad_norm": 3.729670763015747, "learning_rate": 1.54658475839383e-05, "loss": 0.8181, "step": 3735 }, { "epoch": 0.28269834663841703, "grad_norm": 2.792766571044922, "learning_rate": 1.546428073512667e-05, "loss": 0.7421, "step": 3736 }, { "epoch": 0.28277401536075064, "grad_norm": 1.9378312826156616, "learning_rate": 1.5462713481500875e-05, "loss": 0.4965, "step": 3737 }, { "epoch": 0.28284968408308425, "grad_norm": 2.495208978652954, "learning_rate": 1.5461145823159063e-05, "loss": 0.6608, "step": 3738 }, { "epoch": 0.28292535280541786, "grad_norm": 2.5819809436798096, "learning_rate": 1.5459577760199396e-05, "loss": 0.6334, "step": 3739 }, { "epoch": 0.28300102152775153, "grad_norm": 2.375948667526245, "learning_rate": 1.5458009292720077e-05, "loss": 0.7322, "step": 3740 }, { "epoch": 0.28307669025008514, "grad_norm": 2.2175607681274414, "learning_rate": 1.545644042081933e-05, "loss": 0.9031, "step": 3741 }, { "epoch": 0.28315235897241875, "grad_norm": 2.4103119373321533, "learning_rate": 1.54548711445954e-05, "loss": 0.9138, "step": 3742 }, { "epoch": 0.28322802769475236, "grad_norm": 3.062034845352173, "learning_rate": 1.5453301464146563e-05, "loss": 0.8161, "step": 3743 }, { "epoch": 0.28330369641708597, "grad_norm": 2.3038175106048584, "learning_rate": 1.5451731379571115e-05, "loss": 0.7118, "step": 3744 }, { "epoch": 0.28337936513941964, "grad_norm": 3.113598108291626, "learning_rate": 1.545016089096738e-05, "loss": 0.772, "step": 3745 }, { "epoch": 0.28345503386175325, "grad_norm": 2.0970757007598877, "learning_rate": 1.544858999843371e-05, "loss": 0.8035, "step": 3746 }, { "epoch": 0.28353070258408686, "grad_norm": 2.837749481201172, "learning_rate": 1.5447018702068475e-05, "loss": 0.8556, "step": 3747 }, { "epoch": 0.28360637130642047, "grad_norm": 2.4374568462371826, "learning_rate": 1.5445447001970078e-05, "loss": 0.6574, "step": 3748 }, { "epoch": 0.28368204002875413, "grad_norm": 2.6389224529266357, "learning_rate": 1.544387489823694e-05, "loss": 0.6553, "step": 3749 }, { "epoch": 0.28375770875108774, "grad_norm": 2.4554734230041504, "learning_rate": 1.5442302390967517e-05, "loss": 0.7377, "step": 3750 }, { "epoch": 0.28383337747342136, "grad_norm": 2.6802544593811035, "learning_rate": 1.544072948026028e-05, "loss": 0.6677, "step": 3751 }, { "epoch": 0.28390904619575497, "grad_norm": 2.261831283569336, "learning_rate": 1.543915616621373e-05, "loss": 0.6648, "step": 3752 }, { "epoch": 0.28398471491808863, "grad_norm": 2.759446144104004, "learning_rate": 1.5437582448926395e-05, "loss": 0.5712, "step": 3753 }, { "epoch": 0.28406038364042224, "grad_norm": 1.8836723566055298, "learning_rate": 1.5436008328496827e-05, "loss": 0.7159, "step": 3754 }, { "epoch": 0.28413605236275585, "grad_norm": 2.804567813873291, "learning_rate": 1.54344338050236e-05, "loss": 0.7679, "step": 3755 }, { "epoch": 0.28421172108508946, "grad_norm": 2.44331431388855, "learning_rate": 1.5432858878605315e-05, "loss": 0.8358, "step": 3756 }, { "epoch": 0.2842873898074231, "grad_norm": 1.8980436325073242, "learning_rate": 1.54312835493406e-05, "loss": 0.6766, "step": 3757 }, { "epoch": 0.28436305852975674, "grad_norm": 3.5173916816711426, "learning_rate": 1.542970781732811e-05, "loss": 0.8005, "step": 3758 }, { "epoch": 0.28443872725209035, "grad_norm": 2.506789207458496, "learning_rate": 1.542813168266652e-05, "loss": 0.7122, "step": 3759 }, { "epoch": 0.28451439597442396, "grad_norm": 2.8228511810302734, "learning_rate": 1.5426555145454533e-05, "loss": 0.7872, "step": 3760 }, { "epoch": 0.28459006469675757, "grad_norm": 2.5148704051971436, "learning_rate": 1.5424978205790875e-05, "loss": 0.8348, "step": 3761 }, { "epoch": 0.28466573341909124, "grad_norm": 2.349149227142334, "learning_rate": 1.54234008637743e-05, "loss": 0.9365, "step": 3762 }, { "epoch": 0.28474140214142485, "grad_norm": 2.6498892307281494, "learning_rate": 1.542182311950359e-05, "loss": 0.8949, "step": 3763 }, { "epoch": 0.28481707086375846, "grad_norm": 2.6310479640960693, "learning_rate": 1.5420244973077547e-05, "loss": 0.793, "step": 3764 }, { "epoch": 0.28489273958609207, "grad_norm": 2.2707366943359375, "learning_rate": 1.5418666424595e-05, "loss": 0.7804, "step": 3765 }, { "epoch": 0.28496840830842574, "grad_norm": 2.279681444168091, "learning_rate": 1.5417087474154796e-05, "loss": 0.7311, "step": 3766 }, { "epoch": 0.28504407703075935, "grad_norm": 2.6598429679870605, "learning_rate": 1.541550812185582e-05, "loss": 0.727, "step": 3767 }, { "epoch": 0.28511974575309296, "grad_norm": 2.0504753589630127, "learning_rate": 1.5413928367796974e-05, "loss": 0.7835, "step": 3768 }, { "epoch": 0.28519541447542657, "grad_norm": 2.460989236831665, "learning_rate": 1.541234821207719e-05, "loss": 0.7642, "step": 3769 }, { "epoch": 0.28527108319776023, "grad_norm": 2.5266435146331787, "learning_rate": 1.5410767654795423e-05, "loss": 0.6354, "step": 3770 }, { "epoch": 0.28534675192009384, "grad_norm": 2.2240378856658936, "learning_rate": 1.540918669605065e-05, "loss": 0.8202, "step": 3771 }, { "epoch": 0.28542242064242745, "grad_norm": 2.3329272270202637, "learning_rate": 1.540760533594187e-05, "loss": 0.6528, "step": 3772 }, { "epoch": 0.28549808936476107, "grad_norm": 2.2796730995178223, "learning_rate": 1.5406023574568124e-05, "loss": 0.6997, "step": 3773 }, { "epoch": 0.2855737580870947, "grad_norm": 2.3111414909362793, "learning_rate": 1.5404441412028464e-05, "loss": 0.7873, "step": 3774 }, { "epoch": 0.28564942680942834, "grad_norm": 2.165189266204834, "learning_rate": 1.5402858848421962e-05, "loss": 0.7418, "step": 3775 }, { "epoch": 0.28572509553176195, "grad_norm": 3.3214333057403564, "learning_rate": 1.5401275883847736e-05, "loss": 0.7305, "step": 3776 }, { "epoch": 0.28580076425409556, "grad_norm": 2.159792900085449, "learning_rate": 1.539969251840491e-05, "loss": 0.6443, "step": 3777 }, { "epoch": 0.2858764329764292, "grad_norm": 2.1362006664276123, "learning_rate": 1.5398108752192636e-05, "loss": 0.9493, "step": 3778 }, { "epoch": 0.28595210169876284, "grad_norm": 2.815425157546997, "learning_rate": 1.53965245853101e-05, "loss": 0.7395, "step": 3779 }, { "epoch": 0.28602777042109645, "grad_norm": 2.310188055038452, "learning_rate": 1.53949400178565e-05, "loss": 0.8445, "step": 3780 }, { "epoch": 0.28610343914343006, "grad_norm": 2.610882043838501, "learning_rate": 1.539335504993108e-05, "loss": 0.6956, "step": 3781 }, { "epoch": 0.28617910786576367, "grad_norm": 1.9927890300750732, "learning_rate": 1.5391769681633084e-05, "loss": 0.761, "step": 3782 }, { "epoch": 0.28625477658809734, "grad_norm": 2.202202558517456, "learning_rate": 1.53901839130618e-05, "loss": 0.8485, "step": 3783 }, { "epoch": 0.28633044531043095, "grad_norm": 3.0478105545043945, "learning_rate": 1.5388597744316527e-05, "loss": 0.7307, "step": 3784 }, { "epoch": 0.28640611403276456, "grad_norm": 2.1386568546295166, "learning_rate": 1.5387011175496604e-05, "loss": 0.7544, "step": 3785 }, { "epoch": 0.28648178275509817, "grad_norm": 2.2375717163085938, "learning_rate": 1.538542420670138e-05, "loss": 0.9585, "step": 3786 }, { "epoch": 0.2865574514774318, "grad_norm": 2.476581573486328, "learning_rate": 1.5383836838030242e-05, "loss": 0.6515, "step": 3787 }, { "epoch": 0.28663312019976545, "grad_norm": 2.101915121078491, "learning_rate": 1.538224906958259e-05, "loss": 0.7691, "step": 3788 }, { "epoch": 0.28670878892209906, "grad_norm": 2.667830467224121, "learning_rate": 1.538066090145786e-05, "loss": 0.7572, "step": 3789 }, { "epoch": 0.28678445764443267, "grad_norm": 2.5487112998962402, "learning_rate": 1.5379072333755508e-05, "loss": 0.5705, "step": 3790 }, { "epoch": 0.2868601263667663, "grad_norm": 2.0503430366516113, "learning_rate": 1.5377483366575012e-05, "loss": 0.8871, "step": 3791 }, { "epoch": 0.28693579508909994, "grad_norm": 1.8013478517532349, "learning_rate": 1.5375894000015883e-05, "loss": 0.7667, "step": 3792 }, { "epoch": 0.28701146381143355, "grad_norm": 11.85326862335205, "learning_rate": 1.5374304234177648e-05, "loss": 0.9177, "step": 3793 }, { "epoch": 0.28708713253376716, "grad_norm": 2.1834912300109863, "learning_rate": 1.5372714069159865e-05, "loss": 0.7887, "step": 3794 }, { "epoch": 0.2871628012561008, "grad_norm": 2.3547914028167725, "learning_rate": 1.5371123505062116e-05, "loss": 0.5846, "step": 3795 }, { "epoch": 0.28723846997843444, "grad_norm": 2.845733404159546, "learning_rate": 1.5369532541984003e-05, "loss": 0.6585, "step": 3796 }, { "epoch": 0.28731413870076805, "grad_norm": 2.8735568523406982, "learning_rate": 1.5367941180025162e-05, "loss": 0.7482, "step": 3797 }, { "epoch": 0.28738980742310166, "grad_norm": 2.0185205936431885, "learning_rate": 1.536634941928525e-05, "loss": 0.7186, "step": 3798 }, { "epoch": 0.2874654761454353, "grad_norm": 2.4860315322875977, "learning_rate": 1.5364757259863943e-05, "loss": 0.6788, "step": 3799 }, { "epoch": 0.2875411448677689, "grad_norm": 1.9986555576324463, "learning_rate": 1.5363164701860953e-05, "loss": 0.7885, "step": 3800 }, { "epoch": 0.28761681359010255, "grad_norm": 2.455195665359497, "learning_rate": 1.536157174537601e-05, "loss": 0.7411, "step": 3801 }, { "epoch": 0.28769248231243616, "grad_norm": 2.079657793045044, "learning_rate": 1.5359978390508865e-05, "loss": 0.8515, "step": 3802 }, { "epoch": 0.28776815103476977, "grad_norm": 2.4668712615966797, "learning_rate": 1.5358384637359304e-05, "loss": 0.6981, "step": 3803 }, { "epoch": 0.2878438197571034, "grad_norm": 2.006376028060913, "learning_rate": 1.535679048602713e-05, "loss": 0.6889, "step": 3804 }, { "epoch": 0.28791948847943705, "grad_norm": 2.464771032333374, "learning_rate": 1.5355195936612178e-05, "loss": 0.7923, "step": 3805 }, { "epoch": 0.28799515720177066, "grad_norm": 2.3989076614379883, "learning_rate": 1.53536009892143e-05, "loss": 0.7724, "step": 3806 }, { "epoch": 0.28807082592410427, "grad_norm": 2.19671893119812, "learning_rate": 1.5352005643933378e-05, "loss": 0.7459, "step": 3807 }, { "epoch": 0.2881464946464379, "grad_norm": 2.1972532272338867, "learning_rate": 1.5350409900869317e-05, "loss": 0.6167, "step": 3808 }, { "epoch": 0.28822216336877154, "grad_norm": 4.029229164123535, "learning_rate": 1.534881376012205e-05, "loss": 0.7394, "step": 3809 }, { "epoch": 0.28829783209110516, "grad_norm": 2.5666446685791016, "learning_rate": 1.534721722179153e-05, "loss": 0.7416, "step": 3810 }, { "epoch": 0.28837350081343877, "grad_norm": 2.153628349304199, "learning_rate": 1.534562028597774e-05, "loss": 0.6953, "step": 3811 }, { "epoch": 0.2884491695357724, "grad_norm": 2.0070858001708984, "learning_rate": 1.5344022952780678e-05, "loss": 0.7555, "step": 3812 }, { "epoch": 0.288524838258106, "grad_norm": 2.8213300704956055, "learning_rate": 1.5342425222300384e-05, "loss": 0.7651, "step": 3813 }, { "epoch": 0.28860050698043965, "grad_norm": 2.5318684577941895, "learning_rate": 1.534082709463691e-05, "loss": 0.8429, "step": 3814 }, { "epoch": 0.28867617570277326, "grad_norm": 2.2411584854125977, "learning_rate": 1.533922856989033e-05, "loss": 1.041, "step": 3815 }, { "epoch": 0.2887518444251069, "grad_norm": 2.1817727088928223, "learning_rate": 1.5337629648160754e-05, "loss": 0.6029, "step": 3816 }, { "epoch": 0.2888275131474405, "grad_norm": 1.9107509851455688, "learning_rate": 1.5336030329548315e-05, "loss": 0.7631, "step": 3817 }, { "epoch": 0.28890318186977415, "grad_norm": 2.7551729679107666, "learning_rate": 1.533443061415316e-05, "loss": 0.6992, "step": 3818 }, { "epoch": 0.28897885059210776, "grad_norm": 3.3587355613708496, "learning_rate": 1.533283050207547e-05, "loss": 0.5839, "step": 3819 }, { "epoch": 0.28905451931444137, "grad_norm": 2.6251888275146484, "learning_rate": 1.533122999341546e-05, "loss": 0.8546, "step": 3820 }, { "epoch": 0.289130188036775, "grad_norm": 3.0454955101013184, "learning_rate": 1.532962908827334e-05, "loss": 0.8615, "step": 3821 }, { "epoch": 0.28920585675910865, "grad_norm": 2.355125904083252, "learning_rate": 1.532802778674938e-05, "loss": 0.7968, "step": 3822 }, { "epoch": 0.28928152548144226, "grad_norm": 2.498617649078369, "learning_rate": 1.5326426088943854e-05, "loss": 0.757, "step": 3823 }, { "epoch": 0.28935719420377587, "grad_norm": 2.536444664001465, "learning_rate": 1.532482399495706e-05, "loss": 0.9326, "step": 3824 }, { "epoch": 0.2894328629261095, "grad_norm": 2.7442028522491455, "learning_rate": 1.532322150488933e-05, "loss": 0.8168, "step": 3825 }, { "epoch": 0.2895085316484431, "grad_norm": 2.647066116333008, "learning_rate": 1.532161861884102e-05, "loss": 0.7789, "step": 3826 }, { "epoch": 0.28958420037077676, "grad_norm": 2.5589473247528076, "learning_rate": 1.5320015336912505e-05, "loss": 0.6668, "step": 3827 }, { "epoch": 0.28965986909311037, "grad_norm": 1.8725637197494507, "learning_rate": 1.531841165920419e-05, "loss": 0.8211, "step": 3828 }, { "epoch": 0.289735537815444, "grad_norm": 2.117074966430664, "learning_rate": 1.53168075858165e-05, "loss": 0.8106, "step": 3829 }, { "epoch": 0.2898112065377776, "grad_norm": 2.3661158084869385, "learning_rate": 1.5315203116849883e-05, "loss": 0.6988, "step": 3830 }, { "epoch": 0.28988687526011125, "grad_norm": 2.031886100769043, "learning_rate": 1.5313598252404824e-05, "loss": 0.7676, "step": 3831 }, { "epoch": 0.28996254398244486, "grad_norm": 3.7571802139282227, "learning_rate": 1.5311992992581824e-05, "loss": 0.8097, "step": 3832 }, { "epoch": 0.2900382127047785, "grad_norm": 2.4319801330566406, "learning_rate": 1.5310387337481405e-05, "loss": 0.7101, "step": 3833 }, { "epoch": 0.2901138814271121, "grad_norm": 2.1088521480560303, "learning_rate": 1.530878128720412e-05, "loss": 0.9235, "step": 3834 }, { "epoch": 0.29018955014944575, "grad_norm": 2.3383238315582275, "learning_rate": 1.5307174841850546e-05, "loss": 0.8772, "step": 3835 }, { "epoch": 0.29026521887177936, "grad_norm": 2.2183845043182373, "learning_rate": 1.530556800152129e-05, "loss": 0.8155, "step": 3836 }, { "epoch": 0.290340887594113, "grad_norm": 2.790646553039551, "learning_rate": 1.530396076631696e-05, "loss": 0.7333, "step": 3837 }, { "epoch": 0.2904165563164466, "grad_norm": 1.850390076637268, "learning_rate": 1.5302353136338226e-05, "loss": 0.8824, "step": 3838 }, { "epoch": 0.2904922250387802, "grad_norm": 2.336447238922119, "learning_rate": 1.530074511168575e-05, "loss": 0.6944, "step": 3839 }, { "epoch": 0.29056789376111386, "grad_norm": 2.158458709716797, "learning_rate": 1.5299136692460238e-05, "loss": 0.8595, "step": 3840 }, { "epoch": 0.29064356248344747, "grad_norm": 2.0733799934387207, "learning_rate": 1.5297527878762413e-05, "loss": 0.6355, "step": 3841 }, { "epoch": 0.2907192312057811, "grad_norm": 2.5459089279174805, "learning_rate": 1.529591867069302e-05, "loss": 0.7965, "step": 3842 }, { "epoch": 0.2907948999281147, "grad_norm": 2.3149020671844482, "learning_rate": 1.529430906835284e-05, "loss": 0.7332, "step": 3843 }, { "epoch": 0.29087056865044836, "grad_norm": 2.2165815830230713, "learning_rate": 1.5292699071842665e-05, "loss": 0.6736, "step": 3844 }, { "epoch": 0.29094623737278197, "grad_norm": 2.079904794692993, "learning_rate": 1.5291088681263325e-05, "loss": 0.6289, "step": 3845 }, { "epoch": 0.2910219060951156, "grad_norm": 2.122159957885742, "learning_rate": 1.5289477896715662e-05, "loss": 0.8419, "step": 3846 }, { "epoch": 0.2910975748174492, "grad_norm": 1.8512182235717773, "learning_rate": 1.5287866718300548e-05, "loss": 0.6351, "step": 3847 }, { "epoch": 0.29117324353978286, "grad_norm": 1.8471953868865967, "learning_rate": 1.5286255146118886e-05, "loss": 0.8519, "step": 3848 }, { "epoch": 0.29124891226211647, "grad_norm": 1.9632765054702759, "learning_rate": 1.5284643180271593e-05, "loss": 0.8273, "step": 3849 }, { "epoch": 0.2913245809844501, "grad_norm": 2.095710515975952, "learning_rate": 1.5283030820859614e-05, "loss": 0.7674, "step": 3850 }, { "epoch": 0.2914002497067837, "grad_norm": 2.047574520111084, "learning_rate": 1.528141806798393e-05, "loss": 0.7336, "step": 3851 }, { "epoch": 0.2914759184291173, "grad_norm": 3.1790666580200195, "learning_rate": 1.5279804921745526e-05, "loss": 0.6697, "step": 3852 }, { "epoch": 0.29155158715145096, "grad_norm": 1.7570991516113281, "learning_rate": 1.5278191382245424e-05, "loss": 0.7305, "step": 3853 }, { "epoch": 0.2916272558737846, "grad_norm": 6.570018291473389, "learning_rate": 1.5276577449584677e-05, "loss": 0.8719, "step": 3854 }, { "epoch": 0.2917029245961182, "grad_norm": 2.8734898567199707, "learning_rate": 1.5274963123864346e-05, "loss": 0.7329, "step": 3855 }, { "epoch": 0.2917785933184518, "grad_norm": 2.37735915184021, "learning_rate": 1.527334840518553e-05, "loss": 0.7074, "step": 3856 }, { "epoch": 0.29185426204078546, "grad_norm": 2.6533119678497314, "learning_rate": 1.5271733293649347e-05, "loss": 0.7864, "step": 3857 }, { "epoch": 0.29192993076311907, "grad_norm": 2.2681920528411865, "learning_rate": 1.5270117789356937e-05, "loss": 0.9383, "step": 3858 }, { "epoch": 0.2920055994854527, "grad_norm": 2.4914307594299316, "learning_rate": 1.5268501892409472e-05, "loss": 0.7368, "step": 3859 }, { "epoch": 0.2920812682077863, "grad_norm": 2.2388360500335693, "learning_rate": 1.5266885602908145e-05, "loss": 0.8739, "step": 3860 }, { "epoch": 0.29215693693011996, "grad_norm": 1.967874526977539, "learning_rate": 1.526526892095417e-05, "loss": 0.6916, "step": 3861 }, { "epoch": 0.29223260565245357, "grad_norm": 2.43058443069458, "learning_rate": 1.5263651846648794e-05, "loss": 0.8727, "step": 3862 }, { "epoch": 0.2923082743747872, "grad_norm": 2.0961925983428955, "learning_rate": 1.5262034380093276e-05, "loss": 0.8726, "step": 3863 }, { "epoch": 0.2923839430971208, "grad_norm": 2.120222330093384, "learning_rate": 1.5260416521388916e-05, "loss": 0.6642, "step": 3864 }, { "epoch": 0.2924596118194544, "grad_norm": 2.0524697303771973, "learning_rate": 1.525879827063702e-05, "loss": 0.6988, "step": 3865 }, { "epoch": 0.29253528054178807, "grad_norm": 2.791609525680542, "learning_rate": 1.5257179627938935e-05, "loss": 0.8412, "step": 3866 }, { "epoch": 0.2926109492641217, "grad_norm": 2.2559304237365723, "learning_rate": 1.5255560593396025e-05, "loss": 1.0062, "step": 3867 }, { "epoch": 0.2926866179864553, "grad_norm": 2.290409803390503, "learning_rate": 1.5253941167109677e-05, "loss": 0.6786, "step": 3868 }, { "epoch": 0.2927622867087889, "grad_norm": 3.779801368713379, "learning_rate": 1.5252321349181305e-05, "loss": 0.8315, "step": 3869 }, { "epoch": 0.29283795543112257, "grad_norm": 2.773968458175659, "learning_rate": 1.5250701139712347e-05, "loss": 0.8372, "step": 3870 }, { "epoch": 0.2929136241534562, "grad_norm": 2.577420473098755, "learning_rate": 1.5249080538804266e-05, "loss": 0.7793, "step": 3871 }, { "epoch": 0.2929892928757898, "grad_norm": 2.174755573272705, "learning_rate": 1.5247459546558554e-05, "loss": 0.6802, "step": 3872 }, { "epoch": 0.2930649615981234, "grad_norm": 2.247739315032959, "learning_rate": 1.5245838163076712e-05, "loss": 0.8264, "step": 3873 }, { "epoch": 0.29314063032045706, "grad_norm": 2.067796468734741, "learning_rate": 1.5244216388460285e-05, "loss": 0.8444, "step": 3874 }, { "epoch": 0.2932162990427907, "grad_norm": 2.176210880279541, "learning_rate": 1.5242594222810835e-05, "loss": 0.6093, "step": 3875 }, { "epoch": 0.2932919677651243, "grad_norm": 2.150022029876709, "learning_rate": 1.5240971666229939e-05, "loss": 0.724, "step": 3876 }, { "epoch": 0.2933676364874579, "grad_norm": 2.2023675441741943, "learning_rate": 1.5239348718819215e-05, "loss": 0.8256, "step": 3877 }, { "epoch": 0.2934433052097915, "grad_norm": 2.1717021465301514, "learning_rate": 1.5237725380680291e-05, "loss": 0.8227, "step": 3878 }, { "epoch": 0.29351897393212517, "grad_norm": 2.7426464557647705, "learning_rate": 1.523610165191483e-05, "loss": 0.8648, "step": 3879 }, { "epoch": 0.2935946426544588, "grad_norm": 1.9793200492858887, "learning_rate": 1.5234477532624512e-05, "loss": 0.5645, "step": 3880 }, { "epoch": 0.2936703113767924, "grad_norm": 2.058037281036377, "learning_rate": 1.5232853022911048e-05, "loss": 0.7363, "step": 3881 }, { "epoch": 0.293745980099126, "grad_norm": 2.488274097442627, "learning_rate": 1.5231228122876167e-05, "loss": 0.8012, "step": 3882 }, { "epoch": 0.29382164882145967, "grad_norm": 2.5905423164367676, "learning_rate": 1.5229602832621628e-05, "loss": 0.7683, "step": 3883 }, { "epoch": 0.2938973175437933, "grad_norm": 2.189631938934326, "learning_rate": 1.5227977152249211e-05, "loss": 0.6833, "step": 3884 }, { "epoch": 0.2939729862661269, "grad_norm": 3.1091902256011963, "learning_rate": 1.522635108186072e-05, "loss": 0.6846, "step": 3885 }, { "epoch": 0.2940486549884605, "grad_norm": 2.442972421646118, "learning_rate": 1.5224724621557985e-05, "loss": 0.7811, "step": 3886 }, { "epoch": 0.29412432371079417, "grad_norm": 1.9295117855072021, "learning_rate": 1.5223097771442863e-05, "loss": 0.7857, "step": 3887 }, { "epoch": 0.2941999924331278, "grad_norm": 2.101855516433716, "learning_rate": 1.522147053161723e-05, "loss": 0.7761, "step": 3888 }, { "epoch": 0.2942756611554614, "grad_norm": 2.1722593307495117, "learning_rate": 1.5219842902182986e-05, "loss": 0.6753, "step": 3889 }, { "epoch": 0.294351329877795, "grad_norm": 1.9771231412887573, "learning_rate": 1.5218214883242067e-05, "loss": 0.6856, "step": 3890 }, { "epoch": 0.29442699860012866, "grad_norm": 2.3351590633392334, "learning_rate": 1.5216586474896416e-05, "loss": 0.6703, "step": 3891 }, { "epoch": 0.2945026673224623, "grad_norm": 2.4224588871002197, "learning_rate": 1.5214957677248015e-05, "loss": 0.7411, "step": 3892 }, { "epoch": 0.2945783360447959, "grad_norm": 1.6721079349517822, "learning_rate": 1.5213328490398863e-05, "loss": 0.8959, "step": 3893 }, { "epoch": 0.2946540047671295, "grad_norm": 1.949367642402649, "learning_rate": 1.5211698914450985e-05, "loss": 0.564, "step": 3894 }, { "epoch": 0.2947296734894631, "grad_norm": 2.243993043899536, "learning_rate": 1.5210068949506428e-05, "loss": 0.7671, "step": 3895 }, { "epoch": 0.2948053422117968, "grad_norm": 1.8077340126037598, "learning_rate": 1.5208438595667269e-05, "loss": 0.8371, "step": 3896 }, { "epoch": 0.2948810109341304, "grad_norm": 2.3621714115142822, "learning_rate": 1.5206807853035604e-05, "loss": 0.6225, "step": 3897 }, { "epoch": 0.294956679656464, "grad_norm": 2.770918846130371, "learning_rate": 1.5205176721713558e-05, "loss": 0.6834, "step": 3898 }, { "epoch": 0.2950323483787976, "grad_norm": 2.04931902885437, "learning_rate": 1.5203545201803273e-05, "loss": 0.7648, "step": 3899 }, { "epoch": 0.29510801710113127, "grad_norm": 2.5135276317596436, "learning_rate": 1.5201913293406924e-05, "loss": 0.6688, "step": 3900 }, { "epoch": 0.2951836858234649, "grad_norm": 1.7943437099456787, "learning_rate": 1.5200280996626705e-05, "loss": 0.5577, "step": 3901 }, { "epoch": 0.2952593545457985, "grad_norm": 2.1226847171783447, "learning_rate": 1.5198648311564836e-05, "loss": 0.6473, "step": 3902 }, { "epoch": 0.2953350232681321, "grad_norm": 2.3555779457092285, "learning_rate": 1.5197015238323561e-05, "loss": 0.9919, "step": 3903 }, { "epoch": 0.29541069199046577, "grad_norm": 3.256469964981079, "learning_rate": 1.5195381777005147e-05, "loss": 1.0487, "step": 3904 }, { "epoch": 0.2954863607127994, "grad_norm": 2.362062931060791, "learning_rate": 1.5193747927711889e-05, "loss": 0.7727, "step": 3905 }, { "epoch": 0.295562029435133, "grad_norm": 1.6341859102249146, "learning_rate": 1.5192113690546101e-05, "loss": 0.6764, "step": 3906 }, { "epoch": 0.2956376981574666, "grad_norm": 2.2178308963775635, "learning_rate": 1.5190479065610129e-05, "loss": 0.8589, "step": 3907 }, { "epoch": 0.2957133668798002, "grad_norm": 2.287083148956299, "learning_rate": 1.5188844053006334e-05, "loss": 0.8661, "step": 3908 }, { "epoch": 0.2957890356021339, "grad_norm": 2.168025016784668, "learning_rate": 1.5187208652837105e-05, "loss": 0.7258, "step": 3909 }, { "epoch": 0.2958647043244675, "grad_norm": 2.3210017681121826, "learning_rate": 1.5185572865204861e-05, "loss": 0.9169, "step": 3910 }, { "epoch": 0.2959403730468011, "grad_norm": 2.4655609130859375, "learning_rate": 1.5183936690212038e-05, "loss": 0.9665, "step": 3911 }, { "epoch": 0.2960160417691347, "grad_norm": 2.277387857437134, "learning_rate": 1.5182300127961097e-05, "loss": 0.8959, "step": 3912 }, { "epoch": 0.2960917104914684, "grad_norm": 2.419633388519287, "learning_rate": 1.5180663178554527e-05, "loss": 0.7201, "step": 3913 }, { "epoch": 0.296167379213802, "grad_norm": 2.0627200603485107, "learning_rate": 1.5179025842094837e-05, "loss": 0.8001, "step": 3914 }, { "epoch": 0.2962430479361356, "grad_norm": 2.119424343109131, "learning_rate": 1.5177388118684563e-05, "loss": 0.7499, "step": 3915 }, { "epoch": 0.2963187166584692, "grad_norm": 2.0635826587677, "learning_rate": 1.5175750008426266e-05, "loss": 0.7671, "step": 3916 }, { "epoch": 0.29639438538080287, "grad_norm": 1.9896836280822754, "learning_rate": 1.5174111511422528e-05, "loss": 0.694, "step": 3917 }, { "epoch": 0.2964700541031365, "grad_norm": 2.2198286056518555, "learning_rate": 1.517247262777596e-05, "loss": 0.8069, "step": 3918 }, { "epoch": 0.2965457228254701, "grad_norm": 2.9682750701904297, "learning_rate": 1.5170833357589188e-05, "loss": 0.8306, "step": 3919 }, { "epoch": 0.2966213915478037, "grad_norm": 5.279101848602295, "learning_rate": 1.5169193700964875e-05, "loss": 0.891, "step": 3920 }, { "epoch": 0.2966970602701373, "grad_norm": 2.3643431663513184, "learning_rate": 1.5167553658005695e-05, "loss": 0.7815, "step": 3921 }, { "epoch": 0.296772728992471, "grad_norm": 2.442836046218872, "learning_rate": 1.516591322881436e-05, "loss": 0.7634, "step": 3922 }, { "epoch": 0.2968483977148046, "grad_norm": 2.029599905014038, "learning_rate": 1.5164272413493597e-05, "loss": 0.7216, "step": 3923 }, { "epoch": 0.2969240664371382, "grad_norm": 2.4150075912475586, "learning_rate": 1.5162631212146155e-05, "loss": 0.8002, "step": 3924 }, { "epoch": 0.2969997351594718, "grad_norm": 2.4529528617858887, "learning_rate": 1.5160989624874815e-05, "loss": 0.6906, "step": 3925 }, { "epoch": 0.2970754038818055, "grad_norm": 2.5680036544799805, "learning_rate": 1.5159347651782379e-05, "loss": 0.7421, "step": 3926 }, { "epoch": 0.2971510726041391, "grad_norm": 2.2897586822509766, "learning_rate": 1.515770529297167e-05, "loss": 0.7853, "step": 3927 }, { "epoch": 0.2972267413264727, "grad_norm": 2.2733471393585205, "learning_rate": 1.5156062548545538e-05, "loss": 0.6197, "step": 3928 }, { "epoch": 0.2973024100488063, "grad_norm": 2.1105287075042725, "learning_rate": 1.515441941860686e-05, "loss": 0.8367, "step": 3929 }, { "epoch": 0.29737807877114, "grad_norm": 2.4715983867645264, "learning_rate": 1.515277590325853e-05, "loss": 0.6271, "step": 3930 }, { "epoch": 0.2974537474934736, "grad_norm": 1.8005375862121582, "learning_rate": 1.5151132002603475e-05, "loss": 0.7891, "step": 3931 }, { "epoch": 0.2975294162158072, "grad_norm": 2.270176410675049, "learning_rate": 1.5149487716744637e-05, "loss": 0.6982, "step": 3932 }, { "epoch": 0.2976050849381408, "grad_norm": 2.324087142944336, "learning_rate": 1.5147843045784989e-05, "loss": 0.7359, "step": 3933 }, { "epoch": 0.2976807536604744, "grad_norm": 2.229957103729248, "learning_rate": 1.5146197989827526e-05, "loss": 0.6015, "step": 3934 }, { "epoch": 0.2977564223828081, "grad_norm": 1.9169422388076782, "learning_rate": 1.5144552548975264e-05, "loss": 0.6176, "step": 3935 }, { "epoch": 0.2978320911051417, "grad_norm": 2.401707172393799, "learning_rate": 1.5142906723331248e-05, "loss": 0.7862, "step": 3936 }, { "epoch": 0.2979077598274753, "grad_norm": 2.221273899078369, "learning_rate": 1.5141260512998544e-05, "loss": 0.7928, "step": 3937 }, { "epoch": 0.2979834285498089, "grad_norm": 2.259052276611328, "learning_rate": 1.5139613918080243e-05, "loss": 0.8092, "step": 3938 }, { "epoch": 0.2980590972721426, "grad_norm": 2.435391426086426, "learning_rate": 1.5137966938679463e-05, "loss": 0.7887, "step": 3939 }, { "epoch": 0.2981347659944762, "grad_norm": 1.9297043085098267, "learning_rate": 1.5136319574899338e-05, "loss": 0.763, "step": 3940 }, { "epoch": 0.2982104347168098, "grad_norm": 1.8579686880111694, "learning_rate": 1.5134671826843034e-05, "loss": 0.6998, "step": 3941 }, { "epoch": 0.2982861034391434, "grad_norm": 2.0775258541107178, "learning_rate": 1.5133023694613743e-05, "loss": 0.7513, "step": 3942 }, { "epoch": 0.2983617721614771, "grad_norm": 2.1866838932037354, "learning_rate": 1.5131375178314666e-05, "loss": 0.8225, "step": 3943 }, { "epoch": 0.2984374408838107, "grad_norm": 1.9399791955947876, "learning_rate": 1.5129726278049046e-05, "loss": 0.7335, "step": 3944 }, { "epoch": 0.2985131096061443, "grad_norm": 2.0289242267608643, "learning_rate": 1.5128076993920142e-05, "loss": 0.7298, "step": 3945 }, { "epoch": 0.2985887783284779, "grad_norm": 1.8820334672927856, "learning_rate": 1.5126427326031233e-05, "loss": 0.7299, "step": 3946 }, { "epoch": 0.2986644470508115, "grad_norm": 1.9488524198532104, "learning_rate": 1.5124777274485631e-05, "loss": 0.7782, "step": 3947 }, { "epoch": 0.2987401157731452, "grad_norm": 2.3379132747650146, "learning_rate": 1.5123126839386668e-05, "loss": 0.7691, "step": 3948 }, { "epoch": 0.2988157844954788, "grad_norm": 2.2485504150390625, "learning_rate": 1.5121476020837695e-05, "loss": 0.8717, "step": 3949 }, { "epoch": 0.2988914532178124, "grad_norm": 2.426990509033203, "learning_rate": 1.5119824818942093e-05, "loss": 0.8312, "step": 3950 }, { "epoch": 0.298967121940146, "grad_norm": 2.0571541786193848, "learning_rate": 1.511817323380327e-05, "loss": 0.7172, "step": 3951 }, { "epoch": 0.2990427906624797, "grad_norm": 2.4347405433654785, "learning_rate": 1.5116521265524652e-05, "loss": 0.9351, "step": 3952 }, { "epoch": 0.2991184593848133, "grad_norm": 2.8219993114471436, "learning_rate": 1.5114868914209686e-05, "loss": 0.6793, "step": 3953 }, { "epoch": 0.2991941281071469, "grad_norm": 2.1219239234924316, "learning_rate": 1.5113216179961852e-05, "loss": 0.826, "step": 3954 }, { "epoch": 0.2992697968294805, "grad_norm": 2.106398344039917, "learning_rate": 1.511156306288465e-05, "loss": 0.7407, "step": 3955 }, { "epoch": 0.2993454655518142, "grad_norm": 2.503176689147949, "learning_rate": 1.5109909563081598e-05, "loss": 0.7621, "step": 3956 }, { "epoch": 0.2994211342741478, "grad_norm": 1.9739935398101807, "learning_rate": 1.510825568065625e-05, "loss": 0.6997, "step": 3957 }, { "epoch": 0.2994968029964814, "grad_norm": 2.651623010635376, "learning_rate": 1.5106601415712173e-05, "loss": 0.7928, "step": 3958 }, { "epoch": 0.299572471718815, "grad_norm": 2.1895413398742676, "learning_rate": 1.5104946768352966e-05, "loss": 0.6159, "step": 3959 }, { "epoch": 0.2996481404411486, "grad_norm": 2.34374737739563, "learning_rate": 1.5103291738682245e-05, "loss": 0.7105, "step": 3960 }, { "epoch": 0.2997238091634823, "grad_norm": 2.078537702560425, "learning_rate": 1.5101636326803654e-05, "loss": 0.7007, "step": 3961 }, { "epoch": 0.2997994778858159, "grad_norm": 2.177887439727783, "learning_rate": 1.5099980532820864e-05, "loss": 0.6706, "step": 3962 }, { "epoch": 0.2998751466081495, "grad_norm": 1.6755671501159668, "learning_rate": 1.5098324356837562e-05, "loss": 0.6632, "step": 3963 }, { "epoch": 0.2999508153304831, "grad_norm": 3.093810796737671, "learning_rate": 1.5096667798957465e-05, "loss": 0.7289, "step": 3964 }, { "epoch": 0.3000264840528168, "grad_norm": 2.284776449203491, "learning_rate": 1.509501085928431e-05, "loss": 0.8436, "step": 3965 }, { "epoch": 0.3001021527751504, "grad_norm": 2.3349032402038574, "learning_rate": 1.5093353537921863e-05, "loss": 0.7769, "step": 3966 }, { "epoch": 0.300177821497484, "grad_norm": 2.382188081741333, "learning_rate": 1.5091695834973908e-05, "loss": 0.7436, "step": 3967 }, { "epoch": 0.3002534902198176, "grad_norm": 2.356771469116211, "learning_rate": 1.5090037750544255e-05, "loss": 0.7242, "step": 3968 }, { "epoch": 0.3003291589421513, "grad_norm": 2.8641955852508545, "learning_rate": 1.5088379284736744e-05, "loss": 0.8392, "step": 3969 }, { "epoch": 0.3004048276644849, "grad_norm": 2.1027886867523193, "learning_rate": 1.5086720437655228e-05, "loss": 0.6559, "step": 3970 }, { "epoch": 0.3004804963868185, "grad_norm": 2.1024560928344727, "learning_rate": 1.5085061209403593e-05, "loss": 0.8123, "step": 3971 }, { "epoch": 0.3005561651091521, "grad_norm": 3.435528516769409, "learning_rate": 1.5083401600085741e-05, "loss": 0.7778, "step": 3972 }, { "epoch": 0.30063183383148573, "grad_norm": 2.4583373069763184, "learning_rate": 1.5081741609805608e-05, "loss": 0.6514, "step": 3973 }, { "epoch": 0.3007075025538194, "grad_norm": 2.3399574756622314, "learning_rate": 1.5080081238667143e-05, "loss": 0.7155, "step": 3974 }, { "epoch": 0.300783171276153, "grad_norm": 2.8221993446350098, "learning_rate": 1.5078420486774327e-05, "loss": 0.9117, "step": 3975 }, { "epoch": 0.3008588399984866, "grad_norm": 2.924146890640259, "learning_rate": 1.5076759354231156e-05, "loss": 0.6425, "step": 3976 }, { "epoch": 0.3009345087208202, "grad_norm": 2.2757415771484375, "learning_rate": 1.5075097841141663e-05, "loss": 0.9589, "step": 3977 }, { "epoch": 0.3010101774431539, "grad_norm": 2.634446620941162, "learning_rate": 1.5073435947609891e-05, "loss": 0.7762, "step": 3978 }, { "epoch": 0.3010858461654875, "grad_norm": 2.1489665508270264, "learning_rate": 1.5071773673739918e-05, "loss": 0.9189, "step": 3979 }, { "epoch": 0.3011615148878211, "grad_norm": 3.0982818603515625, "learning_rate": 1.507011101963584e-05, "loss": 0.7364, "step": 3980 }, { "epoch": 0.3012371836101547, "grad_norm": 3.0719218254089355, "learning_rate": 1.5068447985401776e-05, "loss": 0.6404, "step": 3981 }, { "epoch": 0.3013128523324884, "grad_norm": 2.7266042232513428, "learning_rate": 1.5066784571141874e-05, "loss": 0.7629, "step": 3982 }, { "epoch": 0.301388521054822, "grad_norm": 3.0816164016723633, "learning_rate": 1.5065120776960294e-05, "loss": 0.682, "step": 3983 }, { "epoch": 0.3014641897771556, "grad_norm": 2.005387306213379, "learning_rate": 1.5063456602961237e-05, "loss": 0.6885, "step": 3984 }, { "epoch": 0.3015398584994892, "grad_norm": 2.6402080059051514, "learning_rate": 1.5061792049248918e-05, "loss": 0.7662, "step": 3985 }, { "epoch": 0.30161552722182283, "grad_norm": 2.094618797302246, "learning_rate": 1.5060127115927572e-05, "loss": 0.7406, "step": 3986 }, { "epoch": 0.3016911959441565, "grad_norm": 2.5571320056915283, "learning_rate": 1.5058461803101466e-05, "loss": 0.6746, "step": 3987 }, { "epoch": 0.3017668646664901, "grad_norm": 2.0813798904418945, "learning_rate": 1.5056796110874885e-05, "loss": 0.6651, "step": 3988 }, { "epoch": 0.3018425333888237, "grad_norm": 1.9654511213302612, "learning_rate": 1.5055130039352146e-05, "loss": 0.7259, "step": 3989 }, { "epoch": 0.30191820211115733, "grad_norm": 2.2641611099243164, "learning_rate": 1.5053463588637577e-05, "loss": 0.6931, "step": 3990 }, { "epoch": 0.301993870833491, "grad_norm": 2.238877058029175, "learning_rate": 1.5051796758835534e-05, "loss": 0.7844, "step": 3991 }, { "epoch": 0.3020695395558246, "grad_norm": 3.144057512283325, "learning_rate": 1.505012955005041e-05, "loss": 0.9696, "step": 3992 }, { "epoch": 0.3021452082781582, "grad_norm": 1.9449888467788696, "learning_rate": 1.5048461962386602e-05, "loss": 0.7733, "step": 3993 }, { "epoch": 0.3022208770004918, "grad_norm": 2.664398193359375, "learning_rate": 1.5046793995948543e-05, "loss": 0.8057, "step": 3994 }, { "epoch": 0.3022965457228255, "grad_norm": 3.067143678665161, "learning_rate": 1.504512565084069e-05, "loss": 0.6894, "step": 3995 }, { "epoch": 0.3023722144451591, "grad_norm": 2.999467372894287, "learning_rate": 1.5043456927167511e-05, "loss": 0.7235, "step": 3996 }, { "epoch": 0.3024478831674927, "grad_norm": 2.0027337074279785, "learning_rate": 1.5041787825033516e-05, "loss": 0.5782, "step": 3997 }, { "epoch": 0.3025235518898263, "grad_norm": 3.10082745552063, "learning_rate": 1.5040118344543226e-05, "loss": 0.6152, "step": 3998 }, { "epoch": 0.30259922061215994, "grad_norm": 2.1244442462921143, "learning_rate": 1.5038448485801188e-05, "loss": 0.6947, "step": 3999 }, { "epoch": 0.3026748893344936, "grad_norm": 3.423577070236206, "learning_rate": 1.5036778248911973e-05, "loss": 0.7686, "step": 4000 }, { "epoch": 0.3027505580568272, "grad_norm": 2.019320249557495, "learning_rate": 1.5035107633980182e-05, "loss": 0.7844, "step": 4001 }, { "epoch": 0.3028262267791608, "grad_norm": 1.8496774435043335, "learning_rate": 1.503343664111043e-05, "loss": 0.6277, "step": 4002 }, { "epoch": 0.30290189550149443, "grad_norm": 2.151859998703003, "learning_rate": 1.5031765270407362e-05, "loss": 0.6403, "step": 4003 }, { "epoch": 0.3029775642238281, "grad_norm": 2.207608222961426, "learning_rate": 1.5030093521975642e-05, "loss": 0.9016, "step": 4004 }, { "epoch": 0.3030532329461617, "grad_norm": 2.543726921081543, "learning_rate": 1.5028421395919961e-05, "loss": 0.8025, "step": 4005 }, { "epoch": 0.3031289016684953, "grad_norm": 1.9269533157348633, "learning_rate": 1.5026748892345037e-05, "loss": 0.6987, "step": 4006 }, { "epoch": 0.30320457039082893, "grad_norm": 2.5526182651519775, "learning_rate": 1.5025076011355602e-05, "loss": 0.7898, "step": 4007 }, { "epoch": 0.3032802391131626, "grad_norm": 2.2586183547973633, "learning_rate": 1.5023402753056422e-05, "loss": 0.782, "step": 4008 }, { "epoch": 0.3033559078354962, "grad_norm": 2.1747992038726807, "learning_rate": 1.5021729117552276e-05, "loss": 0.77, "step": 4009 }, { "epoch": 0.3034315765578298, "grad_norm": 2.0999064445495605, "learning_rate": 1.5020055104947979e-05, "loss": 0.6958, "step": 4010 }, { "epoch": 0.30350724528016343, "grad_norm": 2.135993003845215, "learning_rate": 1.501838071534836e-05, "loss": 0.633, "step": 4011 }, { "epoch": 0.3035829140024971, "grad_norm": 3.0176174640655518, "learning_rate": 1.5016705948858274e-05, "loss": 0.6997, "step": 4012 }, { "epoch": 0.3036585827248307, "grad_norm": 2.2656147480010986, "learning_rate": 1.5015030805582602e-05, "loss": 0.7354, "step": 4013 }, { "epoch": 0.3037342514471643, "grad_norm": 2.75590443611145, "learning_rate": 1.5013355285626243e-05, "loss": 0.5873, "step": 4014 }, { "epoch": 0.3038099201694979, "grad_norm": 2.2523720264434814, "learning_rate": 1.501167938909413e-05, "loss": 0.795, "step": 4015 }, { "epoch": 0.30388558889183154, "grad_norm": 2.134129047393799, "learning_rate": 1.501000311609121e-05, "loss": 0.7115, "step": 4016 }, { "epoch": 0.3039612576141652, "grad_norm": 2.428209066390991, "learning_rate": 1.5008326466722451e-05, "loss": 0.7494, "step": 4017 }, { "epoch": 0.3040369263364988, "grad_norm": 2.9792733192443848, "learning_rate": 1.500664944109286e-05, "loss": 0.9121, "step": 4018 }, { "epoch": 0.3041125950588324, "grad_norm": 3.070441961288452, "learning_rate": 1.5004972039307451e-05, "loss": 0.7321, "step": 4019 }, { "epoch": 0.30418826378116604, "grad_norm": 2.1026179790496826, "learning_rate": 1.5003294261471272e-05, "loss": 0.5796, "step": 4020 }, { "epoch": 0.3042639325034997, "grad_norm": 2.6371750831604004, "learning_rate": 1.5001616107689388e-05, "loss": 0.7323, "step": 4021 }, { "epoch": 0.3043396012258333, "grad_norm": 2.2472572326660156, "learning_rate": 1.4999937578066893e-05, "loss": 0.8368, "step": 4022 }, { "epoch": 0.3044152699481669, "grad_norm": 4.721674919128418, "learning_rate": 1.4998258672708901e-05, "loss": 0.552, "step": 4023 }, { "epoch": 0.30449093867050053, "grad_norm": 4.181674480438232, "learning_rate": 1.499657939172055e-05, "loss": 0.7753, "step": 4024 }, { "epoch": 0.3045666073928342, "grad_norm": 2.1796109676361084, "learning_rate": 1.4994899735207e-05, "loss": 0.6785, "step": 4025 }, { "epoch": 0.3046422761151678, "grad_norm": 2.1673810482025146, "learning_rate": 1.499321970327344e-05, "loss": 0.743, "step": 4026 }, { "epoch": 0.3047179448375014, "grad_norm": 2.26334285736084, "learning_rate": 1.4991539296025078e-05, "loss": 0.7859, "step": 4027 }, { "epoch": 0.30479361355983503, "grad_norm": 2.063838005065918, "learning_rate": 1.4989858513567147e-05, "loss": 1.0079, "step": 4028 }, { "epoch": 0.30486928228216864, "grad_norm": 2.168710708618164, "learning_rate": 1.4988177356004902e-05, "loss": 0.6894, "step": 4029 }, { "epoch": 0.3049449510045023, "grad_norm": 1.7202786207199097, "learning_rate": 1.4986495823443621e-05, "loss": 0.8765, "step": 4030 }, { "epoch": 0.3050206197268359, "grad_norm": 2.792025327682495, "learning_rate": 1.4984813915988614e-05, "loss": 0.6793, "step": 4031 }, { "epoch": 0.30509628844916953, "grad_norm": 2.359570264816284, "learning_rate": 1.4983131633745196e-05, "loss": 0.7668, "step": 4032 }, { "epoch": 0.30517195717150314, "grad_norm": 4.47695779800415, "learning_rate": 1.4981448976818725e-05, "loss": 0.8328, "step": 4033 }, { "epoch": 0.3052476258938368, "grad_norm": 2.457249879837036, "learning_rate": 1.4979765945314574e-05, "loss": 0.6713, "step": 4034 }, { "epoch": 0.3053232946161704, "grad_norm": 1.8373754024505615, "learning_rate": 1.497808253933814e-05, "loss": 0.6746, "step": 4035 }, { "epoch": 0.305398963338504, "grad_norm": 2.597541332244873, "learning_rate": 1.497639875899484e-05, "loss": 0.6358, "step": 4036 }, { "epoch": 0.30547463206083764, "grad_norm": 2.0362725257873535, "learning_rate": 1.4974714604390118e-05, "loss": 0.8613, "step": 4037 }, { "epoch": 0.3055503007831713, "grad_norm": 2.046318292617798, "learning_rate": 1.4973030075629447e-05, "loss": 0.7307, "step": 4038 }, { "epoch": 0.3056259695055049, "grad_norm": 2.4216933250427246, "learning_rate": 1.4971345172818313e-05, "loss": 0.7361, "step": 4039 }, { "epoch": 0.3057016382278385, "grad_norm": 2.259838104248047, "learning_rate": 1.4969659896062226e-05, "loss": 0.8991, "step": 4040 }, { "epoch": 0.30577730695017213, "grad_norm": 1.8823871612548828, "learning_rate": 1.4967974245466731e-05, "loss": 0.7365, "step": 4041 }, { "epoch": 0.30585297567250574, "grad_norm": 2.1523728370666504, "learning_rate": 1.4966288221137388e-05, "loss": 0.7233, "step": 4042 }, { "epoch": 0.3059286443948394, "grad_norm": 5.254127502441406, "learning_rate": 1.4964601823179776e-05, "loss": 0.6328, "step": 4043 }, { "epoch": 0.306004313117173, "grad_norm": 1.8347951173782349, "learning_rate": 1.4962915051699506e-05, "loss": 0.7929, "step": 4044 }, { "epoch": 0.30607998183950663, "grad_norm": 2.36879563331604, "learning_rate": 1.4961227906802212e-05, "loss": 0.6739, "step": 4045 }, { "epoch": 0.30615565056184024, "grad_norm": 1.9774808883666992, "learning_rate": 1.4959540388593543e-05, "loss": 0.8385, "step": 4046 }, { "epoch": 0.3062313192841739, "grad_norm": 1.8404202461242676, "learning_rate": 1.4957852497179182e-05, "loss": 0.7847, "step": 4047 }, { "epoch": 0.3063069880065075, "grad_norm": 2.2674901485443115, "learning_rate": 1.4956164232664825e-05, "loss": 0.7625, "step": 4048 }, { "epoch": 0.30638265672884113, "grad_norm": 1.9228596687316895, "learning_rate": 1.4954475595156198e-05, "loss": 0.7071, "step": 4049 }, { "epoch": 0.30645832545117474, "grad_norm": 2.0531787872314453, "learning_rate": 1.4952786584759053e-05, "loss": 0.7727, "step": 4050 }, { "epoch": 0.3065339941735084, "grad_norm": 2.334456205368042, "learning_rate": 1.4951097201579159e-05, "loss": 0.7362, "step": 4051 }, { "epoch": 0.306609662895842, "grad_norm": 2.5954339504241943, "learning_rate": 1.4949407445722308e-05, "loss": 0.6712, "step": 4052 }, { "epoch": 0.3066853316181756, "grad_norm": 2.0414674282073975, "learning_rate": 1.4947717317294321e-05, "loss": 0.7052, "step": 4053 }, { "epoch": 0.30676100034050924, "grad_norm": 2.296151638031006, "learning_rate": 1.4946026816401037e-05, "loss": 0.6226, "step": 4054 }, { "epoch": 0.30683666906284285, "grad_norm": 1.8123749494552612, "learning_rate": 1.4944335943148323e-05, "loss": 0.7195, "step": 4055 }, { "epoch": 0.3069123377851765, "grad_norm": 2.317089319229126, "learning_rate": 1.4942644697642067e-05, "loss": 0.728, "step": 4056 }, { "epoch": 0.3069880065075101, "grad_norm": 2.25272536277771, "learning_rate": 1.4940953079988179e-05, "loss": 0.6677, "step": 4057 }, { "epoch": 0.30706367522984374, "grad_norm": 2.574553966522217, "learning_rate": 1.4939261090292592e-05, "loss": 0.961, "step": 4058 }, { "epoch": 0.30713934395217735, "grad_norm": 2.785632610321045, "learning_rate": 1.4937568728661265e-05, "loss": 0.7058, "step": 4059 }, { "epoch": 0.307215012674511, "grad_norm": 2.4276599884033203, "learning_rate": 1.4935875995200183e-05, "loss": 0.7033, "step": 4060 }, { "epoch": 0.3072906813968446, "grad_norm": 2.2110235691070557, "learning_rate": 1.4934182890015345e-05, "loss": 0.6735, "step": 4061 }, { "epoch": 0.30736635011917823, "grad_norm": 2.3032000064849854, "learning_rate": 1.4932489413212782e-05, "loss": 0.8252, "step": 4062 }, { "epoch": 0.30744201884151184, "grad_norm": 2.245957136154175, "learning_rate": 1.4930795564898543e-05, "loss": 0.9702, "step": 4063 }, { "epoch": 0.3075176875638455, "grad_norm": 2.7905473709106445, "learning_rate": 1.4929101345178703e-05, "loss": 0.6039, "step": 4064 }, { "epoch": 0.3075933562861791, "grad_norm": 2.3959288597106934, "learning_rate": 1.4927406754159361e-05, "loss": 0.6691, "step": 4065 }, { "epoch": 0.30766902500851273, "grad_norm": 1.9613829851150513, "learning_rate": 1.4925711791946636e-05, "loss": 0.8457, "step": 4066 }, { "epoch": 0.30774469373084634, "grad_norm": 2.6765880584716797, "learning_rate": 1.492401645864667e-05, "loss": 0.8385, "step": 4067 }, { "epoch": 0.30782036245317995, "grad_norm": 2.0931665897369385, "learning_rate": 1.4922320754365636e-05, "loss": 0.6882, "step": 4068 }, { "epoch": 0.3078960311755136, "grad_norm": 2.6953020095825195, "learning_rate": 1.4920624679209723e-05, "loss": 0.7359, "step": 4069 }, { "epoch": 0.30797169989784723, "grad_norm": 2.3795089721679688, "learning_rate": 1.4918928233285139e-05, "loss": 0.7182, "step": 4070 }, { "epoch": 0.30804736862018084, "grad_norm": 1.9124236106872559, "learning_rate": 1.4917231416698126e-05, "loss": 0.6546, "step": 4071 }, { "epoch": 0.30812303734251445, "grad_norm": 2.0559885501861572, "learning_rate": 1.4915534229554944e-05, "loss": 0.6896, "step": 4072 }, { "epoch": 0.3081987060648481, "grad_norm": 1.9751750230789185, "learning_rate": 1.4913836671961874e-05, "loss": 0.7115, "step": 4073 }, { "epoch": 0.3082743747871817, "grad_norm": 2.7253572940826416, "learning_rate": 1.4912138744025223e-05, "loss": 0.6748, "step": 4074 }, { "epoch": 0.30835004350951534, "grad_norm": 2.238069534301758, "learning_rate": 1.4910440445851325e-05, "loss": 0.7391, "step": 4075 }, { "epoch": 0.30842571223184895, "grad_norm": 2.452892780303955, "learning_rate": 1.4908741777546527e-05, "loss": 0.8675, "step": 4076 }, { "epoch": 0.3085013809541826, "grad_norm": 2.315485715866089, "learning_rate": 1.4907042739217208e-05, "loss": 0.7192, "step": 4077 }, { "epoch": 0.3085770496765162, "grad_norm": 2.2211544513702393, "learning_rate": 1.4905343330969766e-05, "loss": 0.7764, "step": 4078 }, { "epoch": 0.30865271839884983, "grad_norm": 2.0151994228363037, "learning_rate": 1.4903643552910628e-05, "loss": 0.7262, "step": 4079 }, { "epoch": 0.30872838712118345, "grad_norm": 2.6214284896850586, "learning_rate": 1.4901943405146233e-05, "loss": 0.7586, "step": 4080 }, { "epoch": 0.30880405584351706, "grad_norm": 2.426795244216919, "learning_rate": 1.4900242887783053e-05, "loss": 0.7345, "step": 4081 }, { "epoch": 0.3088797245658507, "grad_norm": 1.778331995010376, "learning_rate": 1.4898542000927582e-05, "loss": 0.606, "step": 4082 }, { "epoch": 0.30895539328818433, "grad_norm": 2.1181724071502686, "learning_rate": 1.4896840744686331e-05, "loss": 0.7484, "step": 4083 }, { "epoch": 0.30903106201051794, "grad_norm": 2.628530740737915, "learning_rate": 1.4895139119165837e-05, "loss": 0.7351, "step": 4084 }, { "epoch": 0.30910673073285155, "grad_norm": 1.912762999534607, "learning_rate": 1.489343712447267e-05, "loss": 0.7039, "step": 4085 }, { "epoch": 0.3091823994551852, "grad_norm": 2.245521306991577, "learning_rate": 1.4891734760713405e-05, "loss": 0.985, "step": 4086 }, { "epoch": 0.30925806817751883, "grad_norm": 6.195135593414307, "learning_rate": 1.4890032027994655e-05, "loss": 0.7202, "step": 4087 }, { "epoch": 0.30933373689985244, "grad_norm": 2.0465266704559326, "learning_rate": 1.4888328926423048e-05, "loss": 0.7164, "step": 4088 }, { "epoch": 0.30940940562218605, "grad_norm": 2.153587818145752, "learning_rate": 1.4886625456105235e-05, "loss": 0.8307, "step": 4089 }, { "epoch": 0.3094850743445197, "grad_norm": 2.2596750259399414, "learning_rate": 1.48849216171479e-05, "loss": 0.7311, "step": 4090 }, { "epoch": 0.3095607430668533, "grad_norm": 1.8946340084075928, "learning_rate": 1.4883217409657739e-05, "loss": 0.7878, "step": 4091 }, { "epoch": 0.30963641178918694, "grad_norm": 1.9765899181365967, "learning_rate": 1.4881512833741475e-05, "loss": 0.746, "step": 4092 }, { "epoch": 0.30971208051152055, "grad_norm": 2.942574977874756, "learning_rate": 1.4879807889505856e-05, "loss": 0.6554, "step": 4093 }, { "epoch": 0.30978774923385416, "grad_norm": 2.359882116317749, "learning_rate": 1.4878102577057643e-05, "loss": 0.6133, "step": 4094 }, { "epoch": 0.3098634179561878, "grad_norm": 2.197938919067383, "learning_rate": 1.487639689650364e-05, "loss": 0.7502, "step": 4095 }, { "epoch": 0.30993908667852144, "grad_norm": 2.5012736320495605, "learning_rate": 1.4874690847950654e-05, "loss": 0.7593, "step": 4096 }, { "epoch": 0.31001475540085505, "grad_norm": 2.0770883560180664, "learning_rate": 1.4872984431505528e-05, "loss": 0.8841, "step": 4097 }, { "epoch": 0.31009042412318866, "grad_norm": 2.287703275680542, "learning_rate": 1.4871277647275122e-05, "loss": 0.7934, "step": 4098 }, { "epoch": 0.3101660928455223, "grad_norm": 1.9843212366104126, "learning_rate": 1.486957049536632e-05, "loss": 0.7782, "step": 4099 }, { "epoch": 0.31024176156785593, "grad_norm": 2.0489273071289062, "learning_rate": 1.4867862975886027e-05, "loss": 0.7255, "step": 4100 }, { "epoch": 0.31031743029018954, "grad_norm": 2.2509491443634033, "learning_rate": 1.4866155088941175e-05, "loss": 0.7689, "step": 4101 }, { "epoch": 0.31039309901252315, "grad_norm": 2.3707261085510254, "learning_rate": 1.4864446834638721e-05, "loss": 0.8923, "step": 4102 }, { "epoch": 0.3104687677348568, "grad_norm": 2.4059784412384033, "learning_rate": 1.4862738213085634e-05, "loss": 0.7605, "step": 4103 }, { "epoch": 0.31054443645719043, "grad_norm": 2.0712780952453613, "learning_rate": 1.4861029224388921e-05, "loss": 0.7258, "step": 4104 }, { "epoch": 0.31062010517952404, "grad_norm": 2.2427139282226562, "learning_rate": 1.4859319868655602e-05, "loss": 0.6634, "step": 4105 }, { "epoch": 0.31069577390185765, "grad_norm": 2.8183469772338867, "learning_rate": 1.4857610145992719e-05, "loss": 0.7484, "step": 4106 }, { "epoch": 0.31077144262419126, "grad_norm": 2.3078558444976807, "learning_rate": 1.4855900056507343e-05, "loss": 0.8037, "step": 4107 }, { "epoch": 0.31084711134652493, "grad_norm": 1.7662575244903564, "learning_rate": 1.4854189600306565e-05, "loss": 0.5929, "step": 4108 }, { "epoch": 0.31092278006885854, "grad_norm": 2.5993802547454834, "learning_rate": 1.48524787774975e-05, "loss": 0.7729, "step": 4109 }, { "epoch": 0.31099844879119215, "grad_norm": 1.8144737482070923, "learning_rate": 1.4850767588187285e-05, "loss": 0.6611, "step": 4110 }, { "epoch": 0.31107411751352576, "grad_norm": 1.6812607049942017, "learning_rate": 1.4849056032483081e-05, "loss": 0.7923, "step": 4111 }, { "epoch": 0.3111497862358594, "grad_norm": 2.2537429332733154, "learning_rate": 1.484734411049207e-05, "loss": 0.7171, "step": 4112 }, { "epoch": 0.31122545495819304, "grad_norm": 2.3459930419921875, "learning_rate": 1.4845631822321456e-05, "loss": 0.7479, "step": 4113 }, { "epoch": 0.31130112368052665, "grad_norm": 1.9687094688415527, "learning_rate": 1.484391916807847e-05, "loss": 0.7927, "step": 4114 }, { "epoch": 0.31137679240286026, "grad_norm": 2.511396646499634, "learning_rate": 1.4842206147870365e-05, "loss": 0.8989, "step": 4115 }, { "epoch": 0.3114524611251939, "grad_norm": 2.6915669441223145, "learning_rate": 1.4840492761804417e-05, "loss": 0.8171, "step": 4116 }, { "epoch": 0.31152812984752754, "grad_norm": 1.9705544710159302, "learning_rate": 1.483877900998792e-05, "loss": 0.7195, "step": 4117 }, { "epoch": 0.31160379856986115, "grad_norm": 1.8884872198104858, "learning_rate": 1.4837064892528197e-05, "loss": 0.7556, "step": 4118 }, { "epoch": 0.31167946729219476, "grad_norm": 2.239375591278076, "learning_rate": 1.4835350409532592e-05, "loss": 0.696, "step": 4119 }, { "epoch": 0.3117551360145284, "grad_norm": 1.9161548614501953, "learning_rate": 1.4833635561108469e-05, "loss": 0.6043, "step": 4120 }, { "epoch": 0.31183080473686203, "grad_norm": 2.44989275932312, "learning_rate": 1.483192034736322e-05, "loss": 0.8091, "step": 4121 }, { "epoch": 0.31190647345919564, "grad_norm": 2.2470176219940186, "learning_rate": 1.4830204768404253e-05, "loss": 0.7588, "step": 4122 }, { "epoch": 0.31198214218152925, "grad_norm": 2.389315128326416, "learning_rate": 1.482848882433901e-05, "loss": 0.8738, "step": 4123 }, { "epoch": 0.31205781090386286, "grad_norm": 2.3633134365081787, "learning_rate": 1.4826772515274943e-05, "loss": 0.7753, "step": 4124 }, { "epoch": 0.31213347962619653, "grad_norm": 2.183661699295044, "learning_rate": 1.4825055841319536e-05, "loss": 0.8454, "step": 4125 }, { "epoch": 0.31220914834853014, "grad_norm": 2.239809989929199, "learning_rate": 1.4823338802580294e-05, "loss": 0.7674, "step": 4126 }, { "epoch": 0.31228481707086375, "grad_norm": 2.594881296157837, "learning_rate": 1.4821621399164737e-05, "loss": 0.8039, "step": 4127 }, { "epoch": 0.31236048579319736, "grad_norm": 2.0808498859405518, "learning_rate": 1.4819903631180423e-05, "loss": 0.8132, "step": 4128 }, { "epoch": 0.31243615451553103, "grad_norm": 2.28064227104187, "learning_rate": 1.4818185498734914e-05, "loss": 0.7763, "step": 4129 }, { "epoch": 0.31251182323786464, "grad_norm": 2.041588068008423, "learning_rate": 1.4816467001935815e-05, "loss": 0.7704, "step": 4130 }, { "epoch": 0.31258749196019825, "grad_norm": 2.0679996013641357, "learning_rate": 1.4814748140890738e-05, "loss": 0.6893, "step": 4131 }, { "epoch": 0.31266316068253186, "grad_norm": 1.9504202604293823, "learning_rate": 1.4813028915707326e-05, "loss": 0.8287, "step": 4132 }, { "epoch": 0.3127388294048655, "grad_norm": 2.44740891456604, "learning_rate": 1.4811309326493244e-05, "loss": 0.7966, "step": 4133 }, { "epoch": 0.31281449812719914, "grad_norm": 2.279268264770508, "learning_rate": 1.4809589373356173e-05, "loss": 0.9172, "step": 4134 }, { "epoch": 0.31289016684953275, "grad_norm": 2.8255929946899414, "learning_rate": 1.4807869056403823e-05, "loss": 0.7796, "step": 4135 }, { "epoch": 0.31296583557186636, "grad_norm": 1.751339077949524, "learning_rate": 1.480614837574393e-05, "loss": 0.7826, "step": 4136 }, { "epoch": 0.31304150429419997, "grad_norm": 2.050893545150757, "learning_rate": 1.4804427331484249e-05, "loss": 0.7079, "step": 4137 }, { "epoch": 0.31311717301653363, "grad_norm": 2.0679502487182617, "learning_rate": 1.480270592373255e-05, "loss": 0.7366, "step": 4138 }, { "epoch": 0.31319284173886724, "grad_norm": 2.277479887008667, "learning_rate": 1.480098415259664e-05, "loss": 0.701, "step": 4139 }, { "epoch": 0.31326851046120086, "grad_norm": 3.157134771347046, "learning_rate": 1.479926201818434e-05, "loss": 0.748, "step": 4140 }, { "epoch": 0.31334417918353447, "grad_norm": 2.37265682220459, "learning_rate": 1.4797539520603497e-05, "loss": 0.7892, "step": 4141 }, { "epoch": 0.31341984790586813, "grad_norm": 1.8097896575927734, "learning_rate": 1.4795816659961974e-05, "loss": 0.8259, "step": 4142 }, { "epoch": 0.31349551662820174, "grad_norm": 1.9213300943374634, "learning_rate": 1.4794093436367668e-05, "loss": 0.8412, "step": 4143 }, { "epoch": 0.31357118535053535, "grad_norm": 1.8709452152252197, "learning_rate": 1.479236984992849e-05, "loss": 0.7159, "step": 4144 }, { "epoch": 0.31364685407286896, "grad_norm": 2.649545192718506, "learning_rate": 1.4790645900752377e-05, "loss": 0.7186, "step": 4145 }, { "epoch": 0.31372252279520263, "grad_norm": 2.275909662246704, "learning_rate": 1.478892158894729e-05, "loss": 0.6128, "step": 4146 }, { "epoch": 0.31379819151753624, "grad_norm": 2.2135143280029297, "learning_rate": 1.4787196914621208e-05, "loss": 0.9426, "step": 4147 }, { "epoch": 0.31387386023986985, "grad_norm": 2.2389373779296875, "learning_rate": 1.4785471877882138e-05, "loss": 0.7118, "step": 4148 }, { "epoch": 0.31394952896220346, "grad_norm": 1.9949945211410522, "learning_rate": 1.4783746478838108e-05, "loss": 0.7185, "step": 4149 }, { "epoch": 0.31402519768453707, "grad_norm": 1.941359519958496, "learning_rate": 1.4782020717597163e-05, "loss": 0.6794, "step": 4150 }, { "epoch": 0.31410086640687074, "grad_norm": 2.1652402877807617, "learning_rate": 1.478029459426738e-05, "loss": 0.7128, "step": 4151 }, { "epoch": 0.31417653512920435, "grad_norm": 2.5514955520629883, "learning_rate": 1.4778568108956857e-05, "loss": 0.8677, "step": 4152 }, { "epoch": 0.31425220385153796, "grad_norm": 2.4251315593719482, "learning_rate": 1.4776841261773706e-05, "loss": 0.8487, "step": 4153 }, { "epoch": 0.31432787257387157, "grad_norm": 2.3087351322174072, "learning_rate": 1.477511405282607e-05, "loss": 0.7846, "step": 4154 }, { "epoch": 0.31440354129620524, "grad_norm": 1.8735215663909912, "learning_rate": 1.4773386482222115e-05, "loss": 0.7894, "step": 4155 }, { "epoch": 0.31447921001853885, "grad_norm": 2.0851855278015137, "learning_rate": 1.4771658550070024e-05, "loss": 0.7316, "step": 4156 }, { "epoch": 0.31455487874087246, "grad_norm": 2.0207672119140625, "learning_rate": 1.4769930256478008e-05, "loss": 0.7909, "step": 4157 }, { "epoch": 0.31463054746320607, "grad_norm": 2.3125038146972656, "learning_rate": 1.4768201601554295e-05, "loss": 0.7246, "step": 4158 }, { "epoch": 0.31470621618553973, "grad_norm": 2.17578387260437, "learning_rate": 1.4766472585407142e-05, "loss": 0.7306, "step": 4159 }, { "epoch": 0.31478188490787334, "grad_norm": 1.933734655380249, "learning_rate": 1.4764743208144827e-05, "loss": 0.7355, "step": 4160 }, { "epoch": 0.31485755363020695, "grad_norm": 2.213831901550293, "learning_rate": 1.4763013469875644e-05, "loss": 0.8326, "step": 4161 }, { "epoch": 0.31493322235254056, "grad_norm": 2.065779209136963, "learning_rate": 1.4761283370707919e-05, "loss": 0.8269, "step": 4162 }, { "epoch": 0.3150088910748742, "grad_norm": 2.665332794189453, "learning_rate": 1.4759552910749993e-05, "loss": 0.7556, "step": 4163 }, { "epoch": 0.31508455979720784, "grad_norm": 1.8183536529541016, "learning_rate": 1.4757822090110236e-05, "loss": 0.7538, "step": 4164 }, { "epoch": 0.31516022851954145, "grad_norm": 2.538285493850708, "learning_rate": 1.4756090908897039e-05, "loss": 0.8137, "step": 4165 }, { "epoch": 0.31523589724187506, "grad_norm": 2.3421831130981445, "learning_rate": 1.4754359367218808e-05, "loss": 0.7443, "step": 4166 }, { "epoch": 0.3153115659642087, "grad_norm": 3.463080406188965, "learning_rate": 1.4752627465183985e-05, "loss": 0.783, "step": 4167 }, { "epoch": 0.31538723468654234, "grad_norm": 2.2434184551239014, "learning_rate": 1.4750895202901021e-05, "loss": 0.8343, "step": 4168 }, { "epoch": 0.31546290340887595, "grad_norm": 2.3080389499664307, "learning_rate": 1.4749162580478401e-05, "loss": 0.6915, "step": 4169 }, { "epoch": 0.31553857213120956, "grad_norm": 1.77947998046875, "learning_rate": 1.4747429598024625e-05, "loss": 0.6928, "step": 4170 }, { "epoch": 0.31561424085354317, "grad_norm": 1.8366894721984863, "learning_rate": 1.4745696255648219e-05, "loss": 0.7257, "step": 4171 }, { "epoch": 0.31568990957587684, "grad_norm": 2.3540196418762207, "learning_rate": 1.4743962553457729e-05, "loss": 0.7414, "step": 4172 }, { "epoch": 0.31576557829821045, "grad_norm": 2.082271099090576, "learning_rate": 1.4742228491561723e-05, "loss": 0.8058, "step": 4173 }, { "epoch": 0.31584124702054406, "grad_norm": 1.8880605697631836, "learning_rate": 1.4740494070068799e-05, "loss": 0.8184, "step": 4174 }, { "epoch": 0.31591691574287767, "grad_norm": 2.252822160720825, "learning_rate": 1.4738759289087569e-05, "loss": 0.8572, "step": 4175 }, { "epoch": 0.3159925844652113, "grad_norm": 1.9916536808013916, "learning_rate": 1.4737024148726668e-05, "loss": 0.7948, "step": 4176 }, { "epoch": 0.31606825318754495, "grad_norm": 2.2267072200775146, "learning_rate": 1.4735288649094764e-05, "loss": 0.7164, "step": 4177 }, { "epoch": 0.31614392190987856, "grad_norm": 2.865279197692871, "learning_rate": 1.4733552790300531e-05, "loss": 0.8885, "step": 4178 }, { "epoch": 0.31621959063221217, "grad_norm": 2.009626626968384, "learning_rate": 1.473181657245268e-05, "loss": 0.6854, "step": 4179 }, { "epoch": 0.3162952593545458, "grad_norm": 2.28889536857605, "learning_rate": 1.4730079995659935e-05, "loss": 0.6812, "step": 4180 }, { "epoch": 0.31637092807687944, "grad_norm": 1.8733904361724854, "learning_rate": 1.4728343060031046e-05, "loss": 0.7784, "step": 4181 }, { "epoch": 0.31644659679921305, "grad_norm": 2.0452613830566406, "learning_rate": 1.4726605765674788e-05, "loss": 0.71, "step": 4182 }, { "epoch": 0.31652226552154666, "grad_norm": 3.1773879528045654, "learning_rate": 1.4724868112699957e-05, "loss": 0.7515, "step": 4183 }, { "epoch": 0.3165979342438803, "grad_norm": 1.9944331645965576, "learning_rate": 1.4723130101215364e-05, "loss": 0.686, "step": 4184 }, { "epoch": 0.31667360296621394, "grad_norm": 2.6416072845458984, "learning_rate": 1.4721391731329856e-05, "loss": 0.8275, "step": 4185 }, { "epoch": 0.31674927168854755, "grad_norm": 2.302402973175049, "learning_rate": 1.4719653003152291e-05, "loss": 0.6254, "step": 4186 }, { "epoch": 0.31682494041088116, "grad_norm": 2.278822422027588, "learning_rate": 1.4717913916791561e-05, "loss": 0.9094, "step": 4187 }, { "epoch": 0.3169006091332148, "grad_norm": 2.715550422668457, "learning_rate": 1.4716174472356563e-05, "loss": 0.8543, "step": 4188 }, { "epoch": 0.3169762778555484, "grad_norm": 2.0558013916015625, "learning_rate": 1.4714434669956228e-05, "loss": 0.8176, "step": 4189 }, { "epoch": 0.31705194657788205, "grad_norm": 2.050335645675659, "learning_rate": 1.4712694509699517e-05, "loss": 0.7816, "step": 4190 }, { "epoch": 0.31712761530021566, "grad_norm": 2.250823497772217, "learning_rate": 1.4710953991695394e-05, "loss": 0.7854, "step": 4191 }, { "epoch": 0.31720328402254927, "grad_norm": 2.516578435897827, "learning_rate": 1.4709213116052864e-05, "loss": 0.7717, "step": 4192 }, { "epoch": 0.3172789527448829, "grad_norm": 2.0898921489715576, "learning_rate": 1.4707471882880942e-05, "loss": 0.7474, "step": 4193 }, { "epoch": 0.31735462146721655, "grad_norm": 2.837730646133423, "learning_rate": 1.470573029228867e-05, "loss": 0.6944, "step": 4194 }, { "epoch": 0.31743029018955016, "grad_norm": 2.0054285526275635, "learning_rate": 1.4703988344385113e-05, "loss": 0.902, "step": 4195 }, { "epoch": 0.31750595891188377, "grad_norm": 2.452274799346924, "learning_rate": 1.4702246039279356e-05, "loss": 0.7572, "step": 4196 }, { "epoch": 0.3175816276342174, "grad_norm": 2.3126518726348877, "learning_rate": 1.470050337708051e-05, "loss": 0.8434, "step": 4197 }, { "epoch": 0.31765729635655104, "grad_norm": 1.9818753004074097, "learning_rate": 1.4698760357897703e-05, "loss": 0.7367, "step": 4198 }, { "epoch": 0.31773296507888465, "grad_norm": 2.2270803451538086, "learning_rate": 1.4697016981840091e-05, "loss": 0.7117, "step": 4199 }, { "epoch": 0.31780863380121827, "grad_norm": 2.7719006538391113, "learning_rate": 1.469527324901685e-05, "loss": 0.902, "step": 4200 }, { "epoch": 0.3178843025235519, "grad_norm": 2.4166553020477295, "learning_rate": 1.4693529159537179e-05, "loss": 0.7287, "step": 4201 }, { "epoch": 0.3179599712458855, "grad_norm": 2.3354287147521973, "learning_rate": 1.4691784713510294e-05, "loss": 0.7441, "step": 4202 }, { "epoch": 0.31803563996821915, "grad_norm": 2.553823709487915, "learning_rate": 1.4690039911045443e-05, "loss": 0.7037, "step": 4203 }, { "epoch": 0.31811130869055276, "grad_norm": 3.147313356399536, "learning_rate": 1.4688294752251888e-05, "loss": 0.8055, "step": 4204 }, { "epoch": 0.3181869774128864, "grad_norm": 2.2056877613067627, "learning_rate": 1.4686549237238917e-05, "loss": 0.6627, "step": 4205 }, { "epoch": 0.31826264613522, "grad_norm": 2.1581954956054688, "learning_rate": 1.4684803366115841e-05, "loss": 0.7106, "step": 4206 }, { "epoch": 0.31833831485755365, "grad_norm": 2.5941429138183594, "learning_rate": 1.468305713899199e-05, "loss": 0.8563, "step": 4207 }, { "epoch": 0.31841398357988726, "grad_norm": 2.1362545490264893, "learning_rate": 1.468131055597672e-05, "loss": 0.7852, "step": 4208 }, { "epoch": 0.31848965230222087, "grad_norm": 3.146714925765991, "learning_rate": 1.4679563617179408e-05, "loss": 0.8228, "step": 4209 }, { "epoch": 0.3185653210245545, "grad_norm": 2.1198391914367676, "learning_rate": 1.4677816322709452e-05, "loss": 0.8172, "step": 4210 }, { "epoch": 0.31864098974688815, "grad_norm": 2.367765426635742, "learning_rate": 1.4676068672676274e-05, "loss": 0.6736, "step": 4211 }, { "epoch": 0.31871665846922176, "grad_norm": 1.9327032566070557, "learning_rate": 1.4674320667189317e-05, "loss": 0.5936, "step": 4212 }, { "epoch": 0.31879232719155537, "grad_norm": 2.0183558464050293, "learning_rate": 1.4672572306358048e-05, "loss": 0.707, "step": 4213 }, { "epoch": 0.318867995913889, "grad_norm": 2.5018794536590576, "learning_rate": 1.4670823590291953e-05, "loss": 0.8315, "step": 4214 }, { "epoch": 0.3189436646362226, "grad_norm": 1.932250738143921, "learning_rate": 1.466907451910054e-05, "loss": 0.6677, "step": 4215 }, { "epoch": 0.31901933335855626, "grad_norm": 2.460291862487793, "learning_rate": 1.4667325092893349e-05, "loss": 0.7883, "step": 4216 }, { "epoch": 0.31909500208088987, "grad_norm": 2.749185800552368, "learning_rate": 1.466557531177993e-05, "loss": 0.6801, "step": 4217 }, { "epoch": 0.3191706708032235, "grad_norm": 2.338014602661133, "learning_rate": 1.4663825175869858e-05, "loss": 0.7228, "step": 4218 }, { "epoch": 0.3192463395255571, "grad_norm": 2.1747679710388184, "learning_rate": 1.4662074685272735e-05, "loss": 0.6736, "step": 4219 }, { "epoch": 0.31932200824789075, "grad_norm": 2.656726360321045, "learning_rate": 1.4660323840098184e-05, "loss": 0.6297, "step": 4220 }, { "epoch": 0.31939767697022436, "grad_norm": 2.0806820392608643, "learning_rate": 1.4658572640455842e-05, "loss": 0.6698, "step": 4221 }, { "epoch": 0.319473345692558, "grad_norm": 2.605621099472046, "learning_rate": 1.4656821086455383e-05, "loss": 0.8796, "step": 4222 }, { "epoch": 0.3195490144148916, "grad_norm": 2.9140431880950928, "learning_rate": 1.465506917820649e-05, "loss": 0.6627, "step": 4223 }, { "epoch": 0.31962468313722525, "grad_norm": 2.8789541721343994, "learning_rate": 1.4653316915818876e-05, "loss": 0.683, "step": 4224 }, { "epoch": 0.31970035185955886, "grad_norm": 2.5387415885925293, "learning_rate": 1.465156429940227e-05, "loss": 0.8299, "step": 4225 }, { "epoch": 0.3197760205818925, "grad_norm": 2.163238048553467, "learning_rate": 1.4649811329066428e-05, "loss": 0.7167, "step": 4226 }, { "epoch": 0.3198516893042261, "grad_norm": 2.0733134746551514, "learning_rate": 1.4648058004921126e-05, "loss": 0.7044, "step": 4227 }, { "epoch": 0.3199273580265597, "grad_norm": 2.783489227294922, "learning_rate": 1.4646304327076165e-05, "loss": 0.9027, "step": 4228 }, { "epoch": 0.32000302674889336, "grad_norm": 2.3627331256866455, "learning_rate": 1.4644550295641367e-05, "loss": 0.8586, "step": 4229 }, { "epoch": 0.32007869547122697, "grad_norm": 4.856930255889893, "learning_rate": 1.464279591072657e-05, "loss": 0.6494, "step": 4230 }, { "epoch": 0.3201543641935606, "grad_norm": 2.0555548667907715, "learning_rate": 1.4641041172441642e-05, "loss": 0.7398, "step": 4231 }, { "epoch": 0.3202300329158942, "grad_norm": 2.1479907035827637, "learning_rate": 1.4639286080896468e-05, "loss": 0.769, "step": 4232 }, { "epoch": 0.32030570163822786, "grad_norm": 2.175365924835205, "learning_rate": 1.4637530636200965e-05, "loss": 0.6443, "step": 4233 }, { "epoch": 0.32038137036056147, "grad_norm": 3.0681824684143066, "learning_rate": 1.4635774838465055e-05, "loss": 0.8433, "step": 4234 }, { "epoch": 0.3204570390828951, "grad_norm": 2.8694117069244385, "learning_rate": 1.46340186877987e-05, "loss": 0.6974, "step": 4235 }, { "epoch": 0.3205327078052287, "grad_norm": 3.192309856414795, "learning_rate": 1.4632262184311872e-05, "loss": 0.7687, "step": 4236 }, { "epoch": 0.32060837652756236, "grad_norm": 2.460700750350952, "learning_rate": 1.4630505328114569e-05, "loss": 0.8654, "step": 4237 }, { "epoch": 0.32068404524989597, "grad_norm": 2.13419771194458, "learning_rate": 1.4628748119316807e-05, "loss": 0.6749, "step": 4238 }, { "epoch": 0.3207597139722296, "grad_norm": 2.089223623275757, "learning_rate": 1.4626990558028636e-05, "loss": 0.8542, "step": 4239 }, { "epoch": 0.3208353826945632, "grad_norm": 2.2618892192840576, "learning_rate": 1.4625232644360117e-05, "loss": 0.7101, "step": 4240 }, { "epoch": 0.32091105141689685, "grad_norm": 2.1564035415649414, "learning_rate": 1.4623474378421333e-05, "loss": 0.7211, "step": 4241 }, { "epoch": 0.32098672013923046, "grad_norm": 2.1531484127044678, "learning_rate": 1.4621715760322398e-05, "loss": 0.7455, "step": 4242 }, { "epoch": 0.3210623888615641, "grad_norm": 2.0043506622314453, "learning_rate": 1.4619956790173435e-05, "loss": 0.8069, "step": 4243 }, { "epoch": 0.3211380575838977, "grad_norm": 2.2013986110687256, "learning_rate": 1.4618197468084605e-05, "loss": 0.8981, "step": 4244 }, { "epoch": 0.3212137263062313, "grad_norm": 2.18361496925354, "learning_rate": 1.4616437794166073e-05, "loss": 0.9738, "step": 4245 }, { "epoch": 0.32128939502856496, "grad_norm": 2.511524200439453, "learning_rate": 1.4614677768528046e-05, "loss": 0.6794, "step": 4246 }, { "epoch": 0.32136506375089857, "grad_norm": 2.317763090133667, "learning_rate": 1.4612917391280734e-05, "loss": 0.7569, "step": 4247 }, { "epoch": 0.3214407324732322, "grad_norm": 2.617969274520874, "learning_rate": 1.4611156662534382e-05, "loss": 0.7191, "step": 4248 }, { "epoch": 0.3215164011955658, "grad_norm": 2.400247097015381, "learning_rate": 1.4609395582399249e-05, "loss": 0.72, "step": 4249 }, { "epoch": 0.32159206991789946, "grad_norm": 2.0886826515197754, "learning_rate": 1.4607634150985624e-05, "loss": 0.7317, "step": 4250 }, { "epoch": 0.32166773864023307, "grad_norm": 3.8635036945343018, "learning_rate": 1.460587236840381e-05, "loss": 0.7681, "step": 4251 }, { "epoch": 0.3217434073625667, "grad_norm": 2.07661771774292, "learning_rate": 1.4604110234764138e-05, "loss": 0.6945, "step": 4252 }, { "epoch": 0.3218190760849003, "grad_norm": 2.098093032836914, "learning_rate": 1.4602347750176957e-05, "loss": 0.6292, "step": 4253 }, { "epoch": 0.32189474480723396, "grad_norm": 2.4790546894073486, "learning_rate": 1.4600584914752637e-05, "loss": 0.7044, "step": 4254 }, { "epoch": 0.32197041352956757, "grad_norm": 2.268695831298828, "learning_rate": 1.4598821728601579e-05, "loss": 0.7005, "step": 4255 }, { "epoch": 0.3220460822519012, "grad_norm": 2.2798666954040527, "learning_rate": 1.4597058191834192e-05, "loss": 0.7742, "step": 4256 }, { "epoch": 0.3221217509742348, "grad_norm": 3.1297192573547363, "learning_rate": 1.4595294304560919e-05, "loss": 0.7833, "step": 4257 }, { "epoch": 0.3221974196965684, "grad_norm": 2.595353841781616, "learning_rate": 1.4593530066892218e-05, "loss": 0.7436, "step": 4258 }, { "epoch": 0.32227308841890207, "grad_norm": 2.4126555919647217, "learning_rate": 1.4591765478938577e-05, "loss": 0.798, "step": 4259 }, { "epoch": 0.3223487571412357, "grad_norm": 2.161207675933838, "learning_rate": 1.4590000540810492e-05, "loss": 0.6859, "step": 4260 }, { "epoch": 0.3224244258635693, "grad_norm": 2.0117027759552, "learning_rate": 1.4588235252618494e-05, "loss": 0.7389, "step": 4261 }, { "epoch": 0.3225000945859029, "grad_norm": 2.079958915710449, "learning_rate": 1.458646961447313e-05, "loss": 0.788, "step": 4262 }, { "epoch": 0.32257576330823656, "grad_norm": 1.884169340133667, "learning_rate": 1.458470362648497e-05, "loss": 0.8014, "step": 4263 }, { "epoch": 0.3226514320305702, "grad_norm": 2.48260760307312, "learning_rate": 1.4582937288764604e-05, "loss": 0.8323, "step": 4264 }, { "epoch": 0.3227271007529038, "grad_norm": 2.3636391162872314, "learning_rate": 1.458117060142265e-05, "loss": 0.8989, "step": 4265 }, { "epoch": 0.3228027694752374, "grad_norm": 1.976210594177246, "learning_rate": 1.4579403564569741e-05, "loss": 0.8588, "step": 4266 }, { "epoch": 0.32287843819757106, "grad_norm": 8.20315170288086, "learning_rate": 1.4577636178316533e-05, "loss": 0.836, "step": 4267 }, { "epoch": 0.32295410691990467, "grad_norm": 3.0736913681030273, "learning_rate": 1.4575868442773708e-05, "loss": 0.8805, "step": 4268 }, { "epoch": 0.3230297756422383, "grad_norm": 2.023252010345459, "learning_rate": 1.4574100358051967e-05, "loss": 0.6687, "step": 4269 }, { "epoch": 0.3231054443645719, "grad_norm": 2.3063673973083496, "learning_rate": 1.4572331924262033e-05, "loss": 0.7783, "step": 4270 }, { "epoch": 0.3231811130869055, "grad_norm": 3.034928321838379, "learning_rate": 1.4570563141514651e-05, "loss": 0.9137, "step": 4271 }, { "epoch": 0.32325678180923917, "grad_norm": 3.5648000240325928, "learning_rate": 1.4568794009920588e-05, "loss": 0.723, "step": 4272 }, { "epoch": 0.3233324505315728, "grad_norm": 2.6870532035827637, "learning_rate": 1.456702452959063e-05, "loss": 0.8169, "step": 4273 }, { "epoch": 0.3234081192539064, "grad_norm": 2.3009469509124756, "learning_rate": 1.4565254700635593e-05, "loss": 0.751, "step": 4274 }, { "epoch": 0.32348378797624, "grad_norm": 1.9499804973602295, "learning_rate": 1.4563484523166307e-05, "loss": 0.6896, "step": 4275 }, { "epoch": 0.32355945669857367, "grad_norm": 2.177910804748535, "learning_rate": 1.4561713997293621e-05, "loss": 0.6619, "step": 4276 }, { "epoch": 0.3236351254209073, "grad_norm": 2.637498378753662, "learning_rate": 1.4559943123128418e-05, "loss": 0.8201, "step": 4277 }, { "epoch": 0.3237107941432409, "grad_norm": 2.287648916244507, "learning_rate": 1.4558171900781594e-05, "loss": 0.8366, "step": 4278 }, { "epoch": 0.3237864628655745, "grad_norm": 2.5055220127105713, "learning_rate": 1.455640033036407e-05, "loss": 0.7382, "step": 4279 }, { "epoch": 0.32386213158790816, "grad_norm": 2.373974323272705, "learning_rate": 1.4554628411986783e-05, "loss": 0.8601, "step": 4280 }, { "epoch": 0.3239378003102418, "grad_norm": 2.571704387664795, "learning_rate": 1.45528561457607e-05, "loss": 0.7664, "step": 4281 }, { "epoch": 0.3240134690325754, "grad_norm": 2.084261655807495, "learning_rate": 1.4551083531796807e-05, "loss": 0.8355, "step": 4282 }, { "epoch": 0.324089137754909, "grad_norm": 13.960339546203613, "learning_rate": 1.4549310570206106e-05, "loss": 0.6196, "step": 4283 }, { "epoch": 0.3241648064772426, "grad_norm": 2.3920133113861084, "learning_rate": 1.454753726109963e-05, "loss": 0.718, "step": 4284 }, { "epoch": 0.3242404751995763, "grad_norm": 4.138522148132324, "learning_rate": 1.4545763604588427e-05, "loss": 0.6937, "step": 4285 }, { "epoch": 0.3243161439219099, "grad_norm": 2.481703281402588, "learning_rate": 1.454398960078357e-05, "loss": 0.7318, "step": 4286 }, { "epoch": 0.3243918126442435, "grad_norm": 2.211909532546997, "learning_rate": 1.4542215249796151e-05, "loss": 0.6617, "step": 4287 }, { "epoch": 0.3244674813665771, "grad_norm": 2.126004457473755, "learning_rate": 1.454044055173729e-05, "loss": 0.6787, "step": 4288 }, { "epoch": 0.32454315008891077, "grad_norm": 2.684769630432129, "learning_rate": 1.4538665506718119e-05, "loss": 0.6737, "step": 4289 }, { "epoch": 0.3246188188112444, "grad_norm": 1.9614814519882202, "learning_rate": 1.4536890114849804e-05, "loss": 0.6616, "step": 4290 }, { "epoch": 0.324694487533578, "grad_norm": 2.360924482345581, "learning_rate": 1.4535114376243518e-05, "loss": 0.7476, "step": 4291 }, { "epoch": 0.3247701562559116, "grad_norm": 2.6671664714813232, "learning_rate": 1.4533338291010469e-05, "loss": 0.745, "step": 4292 }, { "epoch": 0.32484582497824527, "grad_norm": 3.544961929321289, "learning_rate": 1.453156185926188e-05, "loss": 0.7626, "step": 4293 }, { "epoch": 0.3249214937005789, "grad_norm": 1.3772363662719727, "learning_rate": 1.4529785081108993e-05, "loss": 0.8338, "step": 4294 }, { "epoch": 0.3249971624229125, "grad_norm": 2.098928213119507, "learning_rate": 1.4528007956663081e-05, "loss": 0.8097, "step": 4295 }, { "epoch": 0.3250728311452461, "grad_norm": 2.549992084503174, "learning_rate": 1.452623048603543e-05, "loss": 0.8661, "step": 4296 }, { "epoch": 0.3251484998675797, "grad_norm": 2.1404833793640137, "learning_rate": 1.4524452669337353e-05, "loss": 0.6822, "step": 4297 }, { "epoch": 0.3252241685899134, "grad_norm": 2.1892287731170654, "learning_rate": 1.452267450668018e-05, "loss": 0.7878, "step": 4298 }, { "epoch": 0.325299837312247, "grad_norm": 2.762024164199829, "learning_rate": 1.4520895998175267e-05, "loss": 0.8672, "step": 4299 }, { "epoch": 0.3253755060345806, "grad_norm": 2.249630928039551, "learning_rate": 1.451911714393399e-05, "loss": 0.7267, "step": 4300 }, { "epoch": 0.3254511747569142, "grad_norm": 2.6956732273101807, "learning_rate": 1.451733794406775e-05, "loss": 0.7433, "step": 4301 }, { "epoch": 0.3255268434792479, "grad_norm": 1.908390760421753, "learning_rate": 1.4515558398687958e-05, "loss": 0.838, "step": 4302 }, { "epoch": 0.3256025122015815, "grad_norm": 1.9399058818817139, "learning_rate": 1.4513778507906063e-05, "loss": 0.7207, "step": 4303 }, { "epoch": 0.3256781809239151, "grad_norm": 2.5229694843292236, "learning_rate": 1.4511998271833522e-05, "loss": 0.8095, "step": 4304 }, { "epoch": 0.3257538496462487, "grad_norm": 1.9527181386947632, "learning_rate": 1.4510217690581824e-05, "loss": 0.6514, "step": 4305 }, { "epoch": 0.32582951836858237, "grad_norm": 2.4306392669677734, "learning_rate": 1.4508436764262467e-05, "loss": 0.7142, "step": 4306 }, { "epoch": 0.325905187090916, "grad_norm": 2.332416534423828, "learning_rate": 1.4506655492986985e-05, "loss": 0.6613, "step": 4307 }, { "epoch": 0.3259808558132496, "grad_norm": 1.7992033958435059, "learning_rate": 1.4504873876866928e-05, "loss": 0.6745, "step": 4308 }, { "epoch": 0.3260565245355832, "grad_norm": 2.053755283355713, "learning_rate": 1.4503091916013861e-05, "loss": 0.8298, "step": 4309 }, { "epoch": 0.3261321932579168, "grad_norm": 2.6498591899871826, "learning_rate": 1.4501309610539382e-05, "loss": 0.7602, "step": 4310 }, { "epoch": 0.3262078619802505, "grad_norm": 2.5992958545684814, "learning_rate": 1.44995269605551e-05, "loss": 0.7103, "step": 4311 }, { "epoch": 0.3262835307025841, "grad_norm": 2.5793347358703613, "learning_rate": 1.4497743966172652e-05, "loss": 0.6369, "step": 4312 }, { "epoch": 0.3263591994249177, "grad_norm": 2.428252696990967, "learning_rate": 1.4495960627503695e-05, "loss": 0.7442, "step": 4313 }, { "epoch": 0.3264348681472513, "grad_norm": 2.169275999069214, "learning_rate": 1.449417694465991e-05, "loss": 0.6564, "step": 4314 }, { "epoch": 0.326510536869585, "grad_norm": 2.4342880249023438, "learning_rate": 1.449239291775299e-05, "loss": 0.7119, "step": 4315 }, { "epoch": 0.3265862055919186, "grad_norm": 2.3536922931671143, "learning_rate": 1.4490608546894663e-05, "loss": 0.7913, "step": 4316 }, { "epoch": 0.3266618743142522, "grad_norm": 2.1272964477539062, "learning_rate": 1.4488823832196671e-05, "loss": 0.8102, "step": 4317 }, { "epoch": 0.3267375430365858, "grad_norm": 2.338432550430298, "learning_rate": 1.4487038773770778e-05, "loss": 0.7231, "step": 4318 }, { "epoch": 0.3268132117589195, "grad_norm": 2.033783197402954, "learning_rate": 1.4485253371728769e-05, "loss": 0.7513, "step": 4319 }, { "epoch": 0.3268888804812531, "grad_norm": 2.384861946105957, "learning_rate": 1.448346762618245e-05, "loss": 0.6621, "step": 4320 }, { "epoch": 0.3269645492035867, "grad_norm": 2.846778631210327, "learning_rate": 1.4481681537243652e-05, "loss": 0.739, "step": 4321 }, { "epoch": 0.3270402179259203, "grad_norm": 2.581815242767334, "learning_rate": 1.447989510502423e-05, "loss": 0.6875, "step": 4322 }, { "epoch": 0.3271158866482539, "grad_norm": 2.7722952365875244, "learning_rate": 1.4478108329636053e-05, "loss": 0.7614, "step": 4323 }, { "epoch": 0.3271915553705876, "grad_norm": 1.9879074096679688, "learning_rate": 1.4476321211191012e-05, "loss": 0.8599, "step": 4324 }, { "epoch": 0.3272672240929212, "grad_norm": 2.1524455547332764, "learning_rate": 1.4474533749801024e-05, "loss": 0.7467, "step": 4325 }, { "epoch": 0.3273428928152548, "grad_norm": 1.877977728843689, "learning_rate": 1.4472745945578023e-05, "loss": 0.7021, "step": 4326 }, { "epoch": 0.3274185615375884, "grad_norm": 2.359576940536499, "learning_rate": 1.4470957798633974e-05, "loss": 0.8945, "step": 4327 }, { "epoch": 0.3274942302599221, "grad_norm": 2.371588706970215, "learning_rate": 1.4469169309080853e-05, "loss": 0.8712, "step": 4328 }, { "epoch": 0.3275698989822557, "grad_norm": 2.2514874935150146, "learning_rate": 1.4467380477030658e-05, "loss": 0.645, "step": 4329 }, { "epoch": 0.3276455677045893, "grad_norm": 1.858174204826355, "learning_rate": 1.4465591302595415e-05, "loss": 0.8199, "step": 4330 }, { "epoch": 0.3277212364269229, "grad_norm": 2.374924421310425, "learning_rate": 1.4463801785887165e-05, "loss": 0.8076, "step": 4331 }, { "epoch": 0.3277969051492566, "grad_norm": 2.052088737487793, "learning_rate": 1.4462011927017977e-05, "loss": 0.7276, "step": 4332 }, { "epoch": 0.3278725738715902, "grad_norm": 2.3067727088928223, "learning_rate": 1.4460221726099936e-05, "loss": 0.7695, "step": 4333 }, { "epoch": 0.3279482425939238, "grad_norm": 2.285053014755249, "learning_rate": 1.445843118324515e-05, "loss": 0.8209, "step": 4334 }, { "epoch": 0.3280239113162574, "grad_norm": 2.512913703918457, "learning_rate": 1.4456640298565749e-05, "loss": 0.7486, "step": 4335 }, { "epoch": 0.328099580038591, "grad_norm": 2.5502045154571533, "learning_rate": 1.4454849072173882e-05, "loss": 0.8489, "step": 4336 }, { "epoch": 0.3281752487609247, "grad_norm": 1.8892613649368286, "learning_rate": 1.4453057504181723e-05, "loss": 0.838, "step": 4337 }, { "epoch": 0.3282509174832583, "grad_norm": 2.4267325401306152, "learning_rate": 1.4451265594701467e-05, "loss": 0.8521, "step": 4338 }, { "epoch": 0.3283265862055919, "grad_norm": 2.127098321914673, "learning_rate": 1.4449473343845326e-05, "loss": 0.8956, "step": 4339 }, { "epoch": 0.3284022549279255, "grad_norm": 1.7218542098999023, "learning_rate": 1.444768075172554e-05, "loss": 0.8289, "step": 4340 }, { "epoch": 0.3284779236502592, "grad_norm": 2.7613525390625, "learning_rate": 1.4445887818454365e-05, "loss": 0.8301, "step": 4341 }, { "epoch": 0.3285535923725928, "grad_norm": 2.5535645484924316, "learning_rate": 1.4444094544144084e-05, "loss": 0.7195, "step": 4342 }, { "epoch": 0.3286292610949264, "grad_norm": 2.61027193069458, "learning_rate": 1.4442300928906988e-05, "loss": 0.8215, "step": 4343 }, { "epoch": 0.32870492981726, "grad_norm": 2.108738899230957, "learning_rate": 1.4440506972855407e-05, "loss": 0.6972, "step": 4344 }, { "epoch": 0.3287805985395937, "grad_norm": 2.198636054992676, "learning_rate": 1.4438712676101686e-05, "loss": 0.8218, "step": 4345 }, { "epoch": 0.3288562672619273, "grad_norm": 2.278756618499756, "learning_rate": 1.4436918038758184e-05, "loss": 0.752, "step": 4346 }, { "epoch": 0.3289319359842609, "grad_norm": 2.669025421142578, "learning_rate": 1.4435123060937291e-05, "loss": 0.8767, "step": 4347 }, { "epoch": 0.3290076047065945, "grad_norm": 2.104459047317505, "learning_rate": 1.443332774275141e-05, "loss": 0.735, "step": 4348 }, { "epoch": 0.3290832734289282, "grad_norm": 2.096700429916382, "learning_rate": 1.4431532084312973e-05, "loss": 0.7188, "step": 4349 }, { "epoch": 0.3291589421512618, "grad_norm": 2.3591086864471436, "learning_rate": 1.4429736085734429e-05, "loss": 0.7212, "step": 4350 }, { "epoch": 0.3292346108735954, "grad_norm": 2.4446537494659424, "learning_rate": 1.4427939747128252e-05, "loss": 0.7998, "step": 4351 }, { "epoch": 0.329310279595929, "grad_norm": 1.968955397605896, "learning_rate": 1.442614306860693e-05, "loss": 0.7152, "step": 4352 }, { "epoch": 0.3293859483182626, "grad_norm": 2.0163991451263428, "learning_rate": 1.4424346050282977e-05, "loss": 0.7869, "step": 4353 }, { "epoch": 0.3294616170405963, "grad_norm": 2.4209890365600586, "learning_rate": 1.4422548692268934e-05, "loss": 0.8442, "step": 4354 }, { "epoch": 0.3295372857629299, "grad_norm": 2.772582530975342, "learning_rate": 1.442075099467735e-05, "loss": 0.8652, "step": 4355 }, { "epoch": 0.3296129544852635, "grad_norm": 2.462894916534424, "learning_rate": 1.4418952957620806e-05, "loss": 0.659, "step": 4356 }, { "epoch": 0.3296886232075971, "grad_norm": 2.011859893798828, "learning_rate": 1.4417154581211901e-05, "loss": 0.7464, "step": 4357 }, { "epoch": 0.3297642919299308, "grad_norm": 1.890229344367981, "learning_rate": 1.4415355865563254e-05, "loss": 0.8068, "step": 4358 }, { "epoch": 0.3298399606522644, "grad_norm": 2.171454906463623, "learning_rate": 1.441355681078751e-05, "loss": 0.7567, "step": 4359 }, { "epoch": 0.329915629374598, "grad_norm": 2.6762516498565674, "learning_rate": 1.4411757416997329e-05, "loss": 0.6791, "step": 4360 }, { "epoch": 0.3299912980969316, "grad_norm": 2.497652530670166, "learning_rate": 1.4409957684305392e-05, "loss": 0.8792, "step": 4361 }, { "epoch": 0.3300669668192653, "grad_norm": 3.6293368339538574, "learning_rate": 1.440815761282441e-05, "loss": 0.7134, "step": 4362 }, { "epoch": 0.3301426355415989, "grad_norm": 2.222104787826538, "learning_rate": 1.4406357202667102e-05, "loss": 0.7128, "step": 4363 }, { "epoch": 0.3302183042639325, "grad_norm": 2.209268093109131, "learning_rate": 1.4404556453946224e-05, "loss": 0.8376, "step": 4364 }, { "epoch": 0.3302939729862661, "grad_norm": 1.9097639322280884, "learning_rate": 1.440275536677454e-05, "loss": 0.8323, "step": 4365 }, { "epoch": 0.3303696417085997, "grad_norm": 2.237016201019287, "learning_rate": 1.4400953941264837e-05, "loss": 0.7609, "step": 4366 }, { "epoch": 0.3304453104309334, "grad_norm": 1.9410020112991333, "learning_rate": 1.4399152177529932e-05, "loss": 0.6425, "step": 4367 }, { "epoch": 0.330520979153267, "grad_norm": 2.629134178161621, "learning_rate": 1.4397350075682652e-05, "loss": 0.5823, "step": 4368 }, { "epoch": 0.3305966478756006, "grad_norm": 2.269423246383667, "learning_rate": 1.4395547635835856e-05, "loss": 0.6952, "step": 4369 }, { "epoch": 0.3306723165979342, "grad_norm": 3.469186544418335, "learning_rate": 1.4393744858102417e-05, "loss": 0.7122, "step": 4370 }, { "epoch": 0.3307479853202679, "grad_norm": 2.325718879699707, "learning_rate": 1.4391941742595224e-05, "loss": 0.7564, "step": 4371 }, { "epoch": 0.3308236540426015, "grad_norm": 2.014582872390747, "learning_rate": 1.4390138289427204e-05, "loss": 0.7626, "step": 4372 }, { "epoch": 0.3308993227649351, "grad_norm": 2.6732168197631836, "learning_rate": 1.438833449871129e-05, "loss": 0.7594, "step": 4373 }, { "epoch": 0.3309749914872687, "grad_norm": 1.793832778930664, "learning_rate": 1.4386530370560439e-05, "loss": 0.7993, "step": 4374 }, { "epoch": 0.3310506602096024, "grad_norm": 2.3724443912506104, "learning_rate": 1.4384725905087638e-05, "loss": 0.8349, "step": 4375 }, { "epoch": 0.331126328931936, "grad_norm": 2.2450218200683594, "learning_rate": 1.4382921102405882e-05, "loss": 0.7646, "step": 4376 }, { "epoch": 0.3312019976542696, "grad_norm": 2.3006644248962402, "learning_rate": 1.4381115962628197e-05, "loss": 0.7745, "step": 4377 }, { "epoch": 0.3312776663766032, "grad_norm": 2.5185351371765137, "learning_rate": 1.4379310485867626e-05, "loss": 0.7865, "step": 4378 }, { "epoch": 0.33135333509893683, "grad_norm": 3.231492280960083, "learning_rate": 1.4377504672237231e-05, "loss": 0.7233, "step": 4379 }, { "epoch": 0.3314290038212705, "grad_norm": 3.705059766769409, "learning_rate": 1.4375698521850104e-05, "loss": 0.8611, "step": 4380 }, { "epoch": 0.3315046725436041, "grad_norm": 2.2593576908111572, "learning_rate": 1.4373892034819347e-05, "loss": 0.7857, "step": 4381 }, { "epoch": 0.3315803412659377, "grad_norm": 2.1838455200195312, "learning_rate": 1.4372085211258087e-05, "loss": 0.7414, "step": 4382 }, { "epoch": 0.3316560099882713, "grad_norm": 2.32033371925354, "learning_rate": 1.4370278051279481e-05, "loss": 0.9, "step": 4383 }, { "epoch": 0.331731678710605, "grad_norm": 2.2040934562683105, "learning_rate": 1.4368470554996691e-05, "loss": 0.7841, "step": 4384 }, { "epoch": 0.3318073474329386, "grad_norm": 2.480590343475342, "learning_rate": 1.4366662722522909e-05, "loss": 0.7788, "step": 4385 }, { "epoch": 0.3318830161552722, "grad_norm": 2.308894157409668, "learning_rate": 1.4364854553971351e-05, "loss": 0.8387, "step": 4386 }, { "epoch": 0.3319586848776058, "grad_norm": 2.260097026824951, "learning_rate": 1.4363046049455249e-05, "loss": 0.8178, "step": 4387 }, { "epoch": 0.3320343535999395, "grad_norm": 2.4592983722686768, "learning_rate": 1.4361237209087857e-05, "loss": 0.7605, "step": 4388 }, { "epoch": 0.3321100223222731, "grad_norm": 2.417006254196167, "learning_rate": 1.435942803298245e-05, "loss": 0.7592, "step": 4389 }, { "epoch": 0.3321856910446067, "grad_norm": 2.0404160022735596, "learning_rate": 1.4357618521252326e-05, "loss": 0.7705, "step": 4390 }, { "epoch": 0.3322613597669403, "grad_norm": 1.8098578453063965, "learning_rate": 1.4355808674010805e-05, "loss": 0.7147, "step": 4391 }, { "epoch": 0.33233702848927393, "grad_norm": 2.279453754425049, "learning_rate": 1.4353998491371217e-05, "loss": 0.8241, "step": 4392 }, { "epoch": 0.3324126972116076, "grad_norm": 1.8218796253204346, "learning_rate": 1.435218797344693e-05, "loss": 0.8129, "step": 4393 }, { "epoch": 0.3324883659339412, "grad_norm": 2.245039939880371, "learning_rate": 1.4350377120351316e-05, "loss": 0.8392, "step": 4394 }, { "epoch": 0.3325640346562748, "grad_norm": 4.042459011077881, "learning_rate": 1.4348565932197786e-05, "loss": 0.8678, "step": 4395 }, { "epoch": 0.33263970337860843, "grad_norm": 2.0353004932403564, "learning_rate": 1.4346754409099758e-05, "loss": 0.7939, "step": 4396 }, { "epoch": 0.3327153721009421, "grad_norm": 2.1609416007995605, "learning_rate": 1.4344942551170673e-05, "loss": 0.7398, "step": 4397 }, { "epoch": 0.3327910408232757, "grad_norm": 2.067974805831909, "learning_rate": 1.4343130358523998e-05, "loss": 0.79, "step": 4398 }, { "epoch": 0.3328667095456093, "grad_norm": 1.9276955127716064, "learning_rate": 1.4341317831273221e-05, "loss": 0.7935, "step": 4399 }, { "epoch": 0.33294237826794293, "grad_norm": 2.694018840789795, "learning_rate": 1.4339504969531843e-05, "loss": 0.7347, "step": 4400 }, { "epoch": 0.3330180469902766, "grad_norm": 2.018950939178467, "learning_rate": 1.4337691773413394e-05, "loss": 0.5026, "step": 4401 }, { "epoch": 0.3330937157126102, "grad_norm": 1.8888964653015137, "learning_rate": 1.4335878243031423e-05, "loss": 0.6937, "step": 4402 }, { "epoch": 0.3331693844349438, "grad_norm": 2.4670050144195557, "learning_rate": 1.4334064378499495e-05, "loss": 0.7629, "step": 4403 }, { "epoch": 0.3332450531572774, "grad_norm": 2.6195714473724365, "learning_rate": 1.4332250179931207e-05, "loss": 0.7759, "step": 4404 }, { "epoch": 0.33332072187961104, "grad_norm": 2.684854507446289, "learning_rate": 1.4330435647440165e-05, "loss": 0.6832, "step": 4405 }, { "epoch": 0.3333963906019447, "grad_norm": 2.092449903488159, "learning_rate": 1.4328620781140001e-05, "loss": 0.5791, "step": 4406 }, { "epoch": 0.3334720593242783, "grad_norm": 1.9735100269317627, "learning_rate": 1.432680558114437e-05, "loss": 0.7944, "step": 4407 }, { "epoch": 0.3335477280466119, "grad_norm": 2.075486421585083, "learning_rate": 1.4324990047566943e-05, "loss": 0.8772, "step": 4408 }, { "epoch": 0.33362339676894553, "grad_norm": 1.8212954998016357, "learning_rate": 1.4323174180521418e-05, "loss": 0.7393, "step": 4409 }, { "epoch": 0.3336990654912792, "grad_norm": 2.295945167541504, "learning_rate": 1.4321357980121509e-05, "loss": 0.7714, "step": 4410 }, { "epoch": 0.3337747342136128, "grad_norm": 2.0244014263153076, "learning_rate": 1.4319541446480951e-05, "loss": 0.7639, "step": 4411 }, { "epoch": 0.3338504029359464, "grad_norm": 2.317169189453125, "learning_rate": 1.43177245797135e-05, "loss": 0.8307, "step": 4412 }, { "epoch": 0.33392607165828003, "grad_norm": 2.066453218460083, "learning_rate": 1.431590737993294e-05, "loss": 0.7536, "step": 4413 }, { "epoch": 0.3340017403806137, "grad_norm": 2.337251663208008, "learning_rate": 1.4314089847253063e-05, "loss": 0.8076, "step": 4414 }, { "epoch": 0.3340774091029473, "grad_norm": 2.4293739795684814, "learning_rate": 1.4312271981787692e-05, "loss": 0.7072, "step": 4415 }, { "epoch": 0.3341530778252809, "grad_norm": 2.3487699031829834, "learning_rate": 1.431045378365067e-05, "loss": 0.8849, "step": 4416 }, { "epoch": 0.33422874654761453, "grad_norm": 1.5888330936431885, "learning_rate": 1.4308635252955854e-05, "loss": 0.7692, "step": 4417 }, { "epoch": 0.33430441526994814, "grad_norm": 2.819643974304199, "learning_rate": 1.430681638981713e-05, "loss": 0.7928, "step": 4418 }, { "epoch": 0.3343800839922818, "grad_norm": 2.25228214263916, "learning_rate": 1.4304997194348399e-05, "loss": 0.6886, "step": 4419 }, { "epoch": 0.3344557527146154, "grad_norm": 2.0968821048736572, "learning_rate": 1.4303177666663582e-05, "loss": 0.7954, "step": 4420 }, { "epoch": 0.33453142143694903, "grad_norm": 2.138329267501831, "learning_rate": 1.4301357806876632e-05, "loss": 0.5807, "step": 4421 }, { "epoch": 0.33460709015928264, "grad_norm": 2.1104984283447266, "learning_rate": 1.4299537615101503e-05, "loss": 0.7762, "step": 4422 }, { "epoch": 0.3346827588816163, "grad_norm": 2.5498058795928955, "learning_rate": 1.4297717091452193e-05, "loss": 0.8644, "step": 4423 }, { "epoch": 0.3347584276039499, "grad_norm": 2.219202995300293, "learning_rate": 1.4295896236042702e-05, "loss": 0.7394, "step": 4424 }, { "epoch": 0.3348340963262835, "grad_norm": 2.217406988143921, "learning_rate": 1.429407504898706e-05, "loss": 0.8712, "step": 4425 }, { "epoch": 0.33490976504861714, "grad_norm": 2.460085153579712, "learning_rate": 1.4292253530399316e-05, "loss": 0.8625, "step": 4426 }, { "epoch": 0.3349854337709508, "grad_norm": 2.620077610015869, "learning_rate": 1.429043168039354e-05, "loss": 0.4811, "step": 4427 }, { "epoch": 0.3350611024932844, "grad_norm": 2.3812079429626465, "learning_rate": 1.4288609499083819e-05, "loss": 0.736, "step": 4428 }, { "epoch": 0.335136771215618, "grad_norm": 2.081484794616699, "learning_rate": 1.4286786986584267e-05, "loss": 0.7107, "step": 4429 }, { "epoch": 0.33521243993795163, "grad_norm": 1.9492160081863403, "learning_rate": 1.428496414300901e-05, "loss": 0.8236, "step": 4430 }, { "epoch": 0.33528810866028524, "grad_norm": 2.160243034362793, "learning_rate": 1.428314096847221e-05, "loss": 0.8853, "step": 4431 }, { "epoch": 0.3353637773826189, "grad_norm": 2.322145462036133, "learning_rate": 1.428131746308803e-05, "loss": 0.8845, "step": 4432 }, { "epoch": 0.3354394461049525, "grad_norm": 2.935598134994507, "learning_rate": 1.427949362697067e-05, "loss": 0.7397, "step": 4433 }, { "epoch": 0.33551511482728613, "grad_norm": 2.1716995239257812, "learning_rate": 1.4277669460234346e-05, "loss": 0.6468, "step": 4434 }, { "epoch": 0.33559078354961974, "grad_norm": 1.9296468496322632, "learning_rate": 1.4275844962993288e-05, "loss": 0.783, "step": 4435 }, { "epoch": 0.3356664522719534, "grad_norm": 2.0291011333465576, "learning_rate": 1.4274020135361758e-05, "loss": 0.7367, "step": 4436 }, { "epoch": 0.335742120994287, "grad_norm": 2.1033778190612793, "learning_rate": 1.4272194977454024e-05, "loss": 0.7526, "step": 4437 }, { "epoch": 0.33581778971662063, "grad_norm": 2.0603630542755127, "learning_rate": 1.427036948938439e-05, "loss": 0.722, "step": 4438 }, { "epoch": 0.33589345843895424, "grad_norm": 2.2816059589385986, "learning_rate": 1.4268543671267173e-05, "loss": 0.6255, "step": 4439 }, { "epoch": 0.3359691271612879, "grad_norm": 2.037482976913452, "learning_rate": 1.4266717523216709e-05, "loss": 0.5998, "step": 4440 }, { "epoch": 0.3360447958836215, "grad_norm": 1.928592562675476, "learning_rate": 1.426489104534736e-05, "loss": 0.8288, "step": 4441 }, { "epoch": 0.3361204646059551, "grad_norm": 1.9821760654449463, "learning_rate": 1.4263064237773506e-05, "loss": 0.6743, "step": 4442 }, { "epoch": 0.33619613332828874, "grad_norm": 2.9042348861694336, "learning_rate": 1.4261237100609543e-05, "loss": 0.9823, "step": 4443 }, { "epoch": 0.33627180205062235, "grad_norm": 2.4665040969848633, "learning_rate": 1.4259409633969901e-05, "loss": 0.7231, "step": 4444 }, { "epoch": 0.336347470772956, "grad_norm": 2.4456706047058105, "learning_rate": 1.4257581837969012e-05, "loss": 0.8436, "step": 4445 }, { "epoch": 0.3364231394952896, "grad_norm": 2.440807342529297, "learning_rate": 1.4255753712721347e-05, "loss": 0.7894, "step": 4446 }, { "epoch": 0.33649880821762324, "grad_norm": 1.8564308881759644, "learning_rate": 1.4253925258341384e-05, "loss": 0.7516, "step": 4447 }, { "epoch": 0.33657447693995685, "grad_norm": 2.5682311058044434, "learning_rate": 1.4252096474943626e-05, "loss": 0.701, "step": 4448 }, { "epoch": 0.3366501456622905, "grad_norm": 2.0853443145751953, "learning_rate": 1.4250267362642604e-05, "loss": 0.761, "step": 4449 }, { "epoch": 0.3367258143846241, "grad_norm": 2.6252856254577637, "learning_rate": 1.4248437921552855e-05, "loss": 0.718, "step": 4450 }, { "epoch": 0.33680148310695773, "grad_norm": 2.0729100704193115, "learning_rate": 1.4246608151788947e-05, "loss": 0.6689, "step": 4451 }, { "epoch": 0.33687715182929134, "grad_norm": 3.019207715988159, "learning_rate": 1.424477805346547e-05, "loss": 0.7578, "step": 4452 }, { "epoch": 0.336952820551625, "grad_norm": 1.869498610496521, "learning_rate": 1.4242947626697024e-05, "loss": 0.6262, "step": 4453 }, { "epoch": 0.3370284892739586, "grad_norm": 9.356559753417969, "learning_rate": 1.4241116871598241e-05, "loss": 0.6689, "step": 4454 }, { "epoch": 0.33710415799629223, "grad_norm": 2.5027689933776855, "learning_rate": 1.423928578828377e-05, "loss": 0.7085, "step": 4455 }, { "epoch": 0.33717982671862584, "grad_norm": 1.68281090259552, "learning_rate": 1.4237454376868275e-05, "loss": 0.7766, "step": 4456 }, { "epoch": 0.33725549544095945, "grad_norm": 3.305651903152466, "learning_rate": 1.4235622637466449e-05, "loss": 0.7257, "step": 4457 }, { "epoch": 0.3373311641632931, "grad_norm": 2.6844162940979004, "learning_rate": 1.4233790570192997e-05, "loss": 0.7563, "step": 4458 }, { "epoch": 0.33740683288562673, "grad_norm": 1.9760267734527588, "learning_rate": 1.423195817516265e-05, "loss": 0.6608, "step": 4459 }, { "epoch": 0.33748250160796034, "grad_norm": 2.771921157836914, "learning_rate": 1.4230125452490165e-05, "loss": 0.8818, "step": 4460 }, { "epoch": 0.33755817033029395, "grad_norm": 1.8411167860031128, "learning_rate": 1.4228292402290303e-05, "loss": 0.7705, "step": 4461 }, { "epoch": 0.3376338390526276, "grad_norm": 8.400818824768066, "learning_rate": 1.4226459024677864e-05, "loss": 0.7038, "step": 4462 }, { "epoch": 0.3377095077749612, "grad_norm": 2.2699685096740723, "learning_rate": 1.4224625319767654e-05, "loss": 0.7097, "step": 4463 }, { "epoch": 0.33778517649729484, "grad_norm": 2.2146406173706055, "learning_rate": 1.422279128767451e-05, "loss": 0.7979, "step": 4464 }, { "epoch": 0.33786084521962845, "grad_norm": 1.8910346031188965, "learning_rate": 1.4220956928513283e-05, "loss": 0.7868, "step": 4465 }, { "epoch": 0.3379365139419621, "grad_norm": 2.668886423110962, "learning_rate": 1.4219122242398842e-05, "loss": 0.7427, "step": 4466 }, { "epoch": 0.3380121826642957, "grad_norm": 2.642848014831543, "learning_rate": 1.4217287229446089e-05, "loss": 0.6824, "step": 4467 }, { "epoch": 0.33808785138662933, "grad_norm": 2.7786381244659424, "learning_rate": 1.4215451889769936e-05, "loss": 0.8064, "step": 4468 }, { "epoch": 0.33816352010896294, "grad_norm": 2.077474355697632, "learning_rate": 1.4213616223485314e-05, "loss": 0.7639, "step": 4469 }, { "epoch": 0.3382391888312966, "grad_norm": 2.304389715194702, "learning_rate": 1.4211780230707184e-05, "loss": 0.7471, "step": 4470 }, { "epoch": 0.3383148575536302, "grad_norm": 8.346323013305664, "learning_rate": 1.4209943911550519e-05, "loss": 0.6181, "step": 4471 }, { "epoch": 0.33839052627596383, "grad_norm": 1.981086254119873, "learning_rate": 1.4208107266130313e-05, "loss": 0.656, "step": 4472 }, { "epoch": 0.33846619499829744, "grad_norm": 2.607759714126587, "learning_rate": 1.4206270294561587e-05, "loss": 0.8316, "step": 4473 }, { "epoch": 0.33854186372063105, "grad_norm": 2.3243844509124756, "learning_rate": 1.4204432996959373e-05, "loss": 0.7953, "step": 4474 }, { "epoch": 0.3386175324429647, "grad_norm": 2.651670217514038, "learning_rate": 1.4202595373438735e-05, "loss": 0.7781, "step": 4475 }, { "epoch": 0.33869320116529833, "grad_norm": 2.400404930114746, "learning_rate": 1.4200757424114745e-05, "loss": 0.6033, "step": 4476 }, { "epoch": 0.33876886988763194, "grad_norm": 2.4451756477355957, "learning_rate": 1.4198919149102506e-05, "loss": 0.7409, "step": 4477 }, { "epoch": 0.33884453860996555, "grad_norm": 1.9781309366226196, "learning_rate": 1.4197080548517134e-05, "loss": 0.8582, "step": 4478 }, { "epoch": 0.3389202073322992, "grad_norm": 2.500493288040161, "learning_rate": 1.4195241622473765e-05, "loss": 0.6146, "step": 4479 }, { "epoch": 0.3389958760546328, "grad_norm": 2.1779065132141113, "learning_rate": 1.419340237108757e-05, "loss": 0.7664, "step": 4480 }, { "epoch": 0.33907154477696644, "grad_norm": 2.633241891860962, "learning_rate": 1.4191562794473713e-05, "loss": 0.7935, "step": 4481 }, { "epoch": 0.33914721349930005, "grad_norm": 1.5423035621643066, "learning_rate": 1.4189722892747406e-05, "loss": 0.957, "step": 4482 }, { "epoch": 0.3392228822216337, "grad_norm": 2.039738178253174, "learning_rate": 1.4187882666023866e-05, "loss": 0.6976, "step": 4483 }, { "epoch": 0.3392985509439673, "grad_norm": 2.4406769275665283, "learning_rate": 1.4186042114418331e-05, "loss": 0.7359, "step": 4484 }, { "epoch": 0.33937421966630094, "grad_norm": 2.1993062496185303, "learning_rate": 1.4184201238046069e-05, "loss": 0.6014, "step": 4485 }, { "epoch": 0.33944988838863455, "grad_norm": 2.3027052879333496, "learning_rate": 1.4182360037022355e-05, "loss": 0.8225, "step": 4486 }, { "epoch": 0.33952555711096816, "grad_norm": 2.9717302322387695, "learning_rate": 1.4180518511462497e-05, "loss": 0.7826, "step": 4487 }, { "epoch": 0.3396012258333018, "grad_norm": 2.1658003330230713, "learning_rate": 1.4178676661481813e-05, "loss": 0.8349, "step": 4488 }, { "epoch": 0.33967689455563543, "grad_norm": 2.308877944946289, "learning_rate": 1.417683448719564e-05, "loss": 0.6189, "step": 4489 }, { "epoch": 0.33975256327796904, "grad_norm": 1.7710340023040771, "learning_rate": 1.4174991988719355e-05, "loss": 0.7385, "step": 4490 }, { "epoch": 0.33982823200030265, "grad_norm": 2.1375606060028076, "learning_rate": 1.4173149166168332e-05, "loss": 0.739, "step": 4491 }, { "epoch": 0.3399039007226363, "grad_norm": 2.5131471157073975, "learning_rate": 1.4171306019657974e-05, "loss": 0.7041, "step": 4492 }, { "epoch": 0.33997956944496993, "grad_norm": 2.478649139404297, "learning_rate": 1.416946254930371e-05, "loss": 0.7395, "step": 4493 }, { "epoch": 0.34005523816730354, "grad_norm": 2.2513961791992188, "learning_rate": 1.416761875522098e-05, "loss": 0.7747, "step": 4494 }, { "epoch": 0.34013090688963715, "grad_norm": 2.2607710361480713, "learning_rate": 1.416577463752525e-05, "loss": 0.6677, "step": 4495 }, { "epoch": 0.3402065756119708, "grad_norm": 1.9935152530670166, "learning_rate": 1.4163930196332004e-05, "loss": 0.7986, "step": 4496 }, { "epoch": 0.34028224433430443, "grad_norm": 2.159668207168579, "learning_rate": 1.4162085431756746e-05, "loss": 1.073, "step": 4497 }, { "epoch": 0.34035791305663804, "grad_norm": 1.8614026308059692, "learning_rate": 1.4160240343915002e-05, "loss": 0.5685, "step": 4498 }, { "epoch": 0.34043358177897165, "grad_norm": 2.1379287242889404, "learning_rate": 1.4158394932922315e-05, "loss": 0.7633, "step": 4499 }, { "epoch": 0.34050925050130526, "grad_norm": 2.2675228118896484, "learning_rate": 1.4156549198894257e-05, "loss": 0.8834, "step": 4500 }, { "epoch": 0.3405849192236389, "grad_norm": 2.828331708908081, "learning_rate": 1.415470314194641e-05, "loss": 0.8714, "step": 4501 }, { "epoch": 0.34066058794597254, "grad_norm": 2.267286777496338, "learning_rate": 1.4152856762194377e-05, "loss": 0.7331, "step": 4502 }, { "epoch": 0.34073625666830615, "grad_norm": 2.4142282009124756, "learning_rate": 1.415101005975379e-05, "loss": 0.7392, "step": 4503 }, { "epoch": 0.34081192539063976, "grad_norm": 2.212761878967285, "learning_rate": 1.4149163034740291e-05, "loss": 0.7835, "step": 4504 }, { "epoch": 0.3408875941129734, "grad_norm": 2.463355541229248, "learning_rate": 1.4147315687269547e-05, "loss": 0.75, "step": 4505 }, { "epoch": 0.34096326283530703, "grad_norm": 2.5899860858917236, "learning_rate": 1.414546801745725e-05, "loss": 0.6397, "step": 4506 }, { "epoch": 0.34103893155764065, "grad_norm": 2.4131081104278564, "learning_rate": 1.4143620025419099e-05, "loss": 0.7682, "step": 4507 }, { "epoch": 0.34111460027997426, "grad_norm": 2.420891284942627, "learning_rate": 1.414177171127083e-05, "loss": 0.7546, "step": 4508 }, { "epoch": 0.3411902690023079, "grad_norm": 2.2223784923553467, "learning_rate": 1.4139923075128185e-05, "loss": 0.6996, "step": 4509 }, { "epoch": 0.34126593772464153, "grad_norm": 2.2744736671447754, "learning_rate": 1.413807411710693e-05, "loss": 0.846, "step": 4510 }, { "epoch": 0.34134160644697514, "grad_norm": 1.9180521965026855, "learning_rate": 1.4136224837322857e-05, "loss": 0.7467, "step": 4511 }, { "epoch": 0.34141727516930875, "grad_norm": 2.1215381622314453, "learning_rate": 1.413437523589177e-05, "loss": 0.7741, "step": 4512 }, { "epoch": 0.34149294389164236, "grad_norm": 2.879868984222412, "learning_rate": 1.4132525312929501e-05, "loss": 0.7951, "step": 4513 }, { "epoch": 0.34156861261397603, "grad_norm": 2.1960246562957764, "learning_rate": 1.4130675068551898e-05, "loss": 0.7194, "step": 4514 }, { "epoch": 0.34164428133630964, "grad_norm": 2.2980356216430664, "learning_rate": 1.4128824502874824e-05, "loss": 0.7676, "step": 4515 }, { "epoch": 0.34171995005864325, "grad_norm": 2.5067970752716064, "learning_rate": 1.412697361601417e-05, "loss": 0.6625, "step": 4516 }, { "epoch": 0.34179561878097686, "grad_norm": 2.2618825435638428, "learning_rate": 1.4125122408085849e-05, "loss": 0.7369, "step": 4517 }, { "epoch": 0.34187128750331053, "grad_norm": 3.191148281097412, "learning_rate": 1.4123270879205787e-05, "loss": 0.694, "step": 4518 }, { "epoch": 0.34194695622564414, "grad_norm": 1.9559108018875122, "learning_rate": 1.412141902948993e-05, "loss": 0.6441, "step": 4519 }, { "epoch": 0.34202262494797775, "grad_norm": 2.6855931282043457, "learning_rate": 1.4119566859054249e-05, "loss": 0.8047, "step": 4520 }, { "epoch": 0.34209829367031136, "grad_norm": 1.9309477806091309, "learning_rate": 1.4117714368014732e-05, "loss": 0.7086, "step": 4521 }, { "epoch": 0.342173962392645, "grad_norm": 2.5932657718658447, "learning_rate": 1.4115861556487388e-05, "loss": 0.8026, "step": 4522 }, { "epoch": 0.34224963111497864, "grad_norm": 2.6737778186798096, "learning_rate": 1.4114008424588249e-05, "loss": 0.7373, "step": 4523 }, { "epoch": 0.34232529983731225, "grad_norm": 1.9361664056777954, "learning_rate": 1.411215497243336e-05, "loss": 0.6394, "step": 4524 }, { "epoch": 0.34240096855964586, "grad_norm": 2.822296619415283, "learning_rate": 1.4110301200138793e-05, "loss": 0.6913, "step": 4525 }, { "epoch": 0.34247663728197947, "grad_norm": 1.9152315855026245, "learning_rate": 1.4108447107820634e-05, "loss": 0.7587, "step": 4526 }, { "epoch": 0.34255230600431313, "grad_norm": 2.4261810779571533, "learning_rate": 1.4106592695594997e-05, "loss": 0.7208, "step": 4527 }, { "epoch": 0.34262797472664674, "grad_norm": 2.7643346786499023, "learning_rate": 1.4104737963578006e-05, "loss": 0.7681, "step": 4528 }, { "epoch": 0.34270364344898036, "grad_norm": 1.8809469938278198, "learning_rate": 1.4102882911885817e-05, "loss": 0.7987, "step": 4529 }, { "epoch": 0.34277931217131397, "grad_norm": 2.6248059272766113, "learning_rate": 1.4101027540634591e-05, "loss": 0.7945, "step": 4530 }, { "epoch": 0.34285498089364763, "grad_norm": 2.5834579467773438, "learning_rate": 1.4099171849940526e-05, "loss": 0.7192, "step": 4531 }, { "epoch": 0.34293064961598124, "grad_norm": 2.4351603984832764, "learning_rate": 1.4097315839919824e-05, "loss": 0.79, "step": 4532 }, { "epoch": 0.34300631833831485, "grad_norm": 1.948221206665039, "learning_rate": 1.4095459510688717e-05, "loss": 0.5718, "step": 4533 }, { "epoch": 0.34308198706064846, "grad_norm": 2.2318930625915527, "learning_rate": 1.4093602862363455e-05, "loss": 0.673, "step": 4534 }, { "epoch": 0.34315765578298213, "grad_norm": 2.6008236408233643, "learning_rate": 1.4091745895060307e-05, "loss": 0.7378, "step": 4535 }, { "epoch": 0.34323332450531574, "grad_norm": 2.340876579284668, "learning_rate": 1.4089888608895564e-05, "loss": 0.6036, "step": 4536 }, { "epoch": 0.34330899322764935, "grad_norm": 2.4017269611358643, "learning_rate": 1.4088031003985535e-05, "loss": 0.6923, "step": 4537 }, { "epoch": 0.34338466194998296, "grad_norm": 1.9680914878845215, "learning_rate": 1.4086173080446543e-05, "loss": 0.91, "step": 4538 }, { "epoch": 0.34346033067231657, "grad_norm": 1.9939541816711426, "learning_rate": 1.4084314838394944e-05, "loss": 0.7028, "step": 4539 }, { "epoch": 0.34353599939465024, "grad_norm": 2.0983216762542725, "learning_rate": 1.4082456277947105e-05, "loss": 0.8404, "step": 4540 }, { "epoch": 0.34361166811698385, "grad_norm": 2.5122082233428955, "learning_rate": 1.4080597399219415e-05, "loss": 0.7113, "step": 4541 }, { "epoch": 0.34368733683931746, "grad_norm": 2.6036436557769775, "learning_rate": 1.4078738202328287e-05, "loss": 0.7351, "step": 4542 }, { "epoch": 0.34376300556165107, "grad_norm": 2.5619027614593506, "learning_rate": 1.4076878687390143e-05, "loss": 0.8871, "step": 4543 }, { "epoch": 0.34383867428398474, "grad_norm": 2.4395925998687744, "learning_rate": 1.4075018854521434e-05, "loss": 0.7969, "step": 4544 }, { "epoch": 0.34391434300631835, "grad_norm": 2.3042728900909424, "learning_rate": 1.4073158703838632e-05, "loss": 0.6841, "step": 4545 }, { "epoch": 0.34399001172865196, "grad_norm": 1.9354417324066162, "learning_rate": 1.4071298235458222e-05, "loss": 0.6648, "step": 4546 }, { "epoch": 0.34406568045098557, "grad_norm": 2.020469903945923, "learning_rate": 1.4069437449496715e-05, "loss": 0.8337, "step": 4547 }, { "epoch": 0.34414134917331923, "grad_norm": 2.3265795707702637, "learning_rate": 1.4067576346070637e-05, "loss": 0.7247, "step": 4548 }, { "epoch": 0.34421701789565284, "grad_norm": 1.9919472932815552, "learning_rate": 1.4065714925296538e-05, "loss": 0.7174, "step": 4549 }, { "epoch": 0.34429268661798645, "grad_norm": 4.500394821166992, "learning_rate": 1.4063853187290988e-05, "loss": 0.8554, "step": 4550 }, { "epoch": 0.34436835534032006, "grad_norm": 3.009972095489502, "learning_rate": 1.4061991132170571e-05, "loss": 0.9941, "step": 4551 }, { "epoch": 0.3444440240626537, "grad_norm": 2.4552001953125, "learning_rate": 1.4060128760051897e-05, "loss": 0.7654, "step": 4552 }, { "epoch": 0.34451969278498734, "grad_norm": 1.9885838031768799, "learning_rate": 1.4058266071051593e-05, "loss": 0.858, "step": 4553 }, { "epoch": 0.34459536150732095, "grad_norm": 2.1382341384887695, "learning_rate": 1.4056403065286308e-05, "loss": 0.8359, "step": 4554 }, { "epoch": 0.34467103022965456, "grad_norm": 1.8705214262008667, "learning_rate": 1.4054539742872708e-05, "loss": 0.6685, "step": 4555 }, { "epoch": 0.3447466989519882, "grad_norm": 2.376476287841797, "learning_rate": 1.405267610392748e-05, "loss": 0.9648, "step": 4556 }, { "epoch": 0.34482236767432184, "grad_norm": 2.8174350261688232, "learning_rate": 1.4050812148567337e-05, "loss": 0.7507, "step": 4557 }, { "epoch": 0.34489803639665545, "grad_norm": 2.0304601192474365, "learning_rate": 1.4048947876908994e-05, "loss": 1.0117, "step": 4558 }, { "epoch": 0.34497370511898906, "grad_norm": 2.247481107711792, "learning_rate": 1.4047083289069209e-05, "loss": 0.7993, "step": 4559 }, { "epoch": 0.34504937384132267, "grad_norm": 2.2741150856018066, "learning_rate": 1.4045218385164743e-05, "loss": 0.8526, "step": 4560 }, { "epoch": 0.34512504256365634, "grad_norm": 1.9066141843795776, "learning_rate": 1.4043353165312383e-05, "loss": 0.676, "step": 4561 }, { "epoch": 0.34520071128598995, "grad_norm": 2.4865188598632812, "learning_rate": 1.4041487629628936e-05, "loss": 0.7842, "step": 4562 }, { "epoch": 0.34527638000832356, "grad_norm": 2.2557551860809326, "learning_rate": 1.4039621778231228e-05, "loss": 0.8724, "step": 4563 }, { "epoch": 0.34535204873065717, "grad_norm": 3.9412360191345215, "learning_rate": 1.4037755611236103e-05, "loss": 0.6768, "step": 4564 }, { "epoch": 0.3454277174529908, "grad_norm": 2.0502843856811523, "learning_rate": 1.403588912876043e-05, "loss": 0.614, "step": 4565 }, { "epoch": 0.34550338617532445, "grad_norm": 2.2128398418426514, "learning_rate": 1.403402233092109e-05, "loss": 0.8441, "step": 4566 }, { "epoch": 0.34557905489765806, "grad_norm": 1.9671826362609863, "learning_rate": 1.403215521783499e-05, "loss": 0.7818, "step": 4567 }, { "epoch": 0.34565472361999167, "grad_norm": 2.188419818878174, "learning_rate": 1.4030287789619055e-05, "loss": 0.7472, "step": 4568 }, { "epoch": 0.3457303923423253, "grad_norm": 2.099836587905884, "learning_rate": 1.4028420046390227e-05, "loss": 0.7362, "step": 4569 }, { "epoch": 0.34580606106465894, "grad_norm": 2.1307973861694336, "learning_rate": 1.4026551988265472e-05, "loss": 0.824, "step": 4570 }, { "epoch": 0.34588172978699255, "grad_norm": 2.5330209732055664, "learning_rate": 1.4024683615361774e-05, "loss": 0.8464, "step": 4571 }, { "epoch": 0.34595739850932616, "grad_norm": 2.512056589126587, "learning_rate": 1.4022814927796137e-05, "loss": 0.8106, "step": 4572 }, { "epoch": 0.3460330672316598, "grad_norm": 2.4882543087005615, "learning_rate": 1.4020945925685584e-05, "loss": 0.7155, "step": 4573 }, { "epoch": 0.34610873595399344, "grad_norm": 2.098118782043457, "learning_rate": 1.4019076609147158e-05, "loss": 0.6978, "step": 4574 }, { "epoch": 0.34618440467632705, "grad_norm": 2.1440021991729736, "learning_rate": 1.401720697829792e-05, "loss": 0.6937, "step": 4575 }, { "epoch": 0.34626007339866066, "grad_norm": 2.386068820953369, "learning_rate": 1.401533703325495e-05, "loss": 0.7601, "step": 4576 }, { "epoch": 0.34633574212099427, "grad_norm": 2.6994619369506836, "learning_rate": 1.4013466774135355e-05, "loss": 0.5682, "step": 4577 }, { "epoch": 0.3464114108433279, "grad_norm": 2.2204699516296387, "learning_rate": 1.4011596201056259e-05, "loss": 0.7343, "step": 4578 }, { "epoch": 0.34648707956566155, "grad_norm": 2.2778122425079346, "learning_rate": 1.4009725314134795e-05, "loss": 0.673, "step": 4579 }, { "epoch": 0.34656274828799516, "grad_norm": 2.2870404720306396, "learning_rate": 1.4007854113488132e-05, "loss": 0.7098, "step": 4580 }, { "epoch": 0.34663841701032877, "grad_norm": 1.8866722583770752, "learning_rate": 1.4005982599233442e-05, "loss": 0.6607, "step": 4581 }, { "epoch": 0.3467140857326624, "grad_norm": 1.7772208452224731, "learning_rate": 1.4004110771487935e-05, "loss": 0.8816, "step": 4582 }, { "epoch": 0.34678975445499605, "grad_norm": 2.5590288639068604, "learning_rate": 1.4002238630368825e-05, "loss": 0.8935, "step": 4583 }, { "epoch": 0.34686542317732966, "grad_norm": 2.385871171951294, "learning_rate": 1.4000366175993354e-05, "loss": 0.8033, "step": 4584 }, { "epoch": 0.34694109189966327, "grad_norm": 2.1621901988983154, "learning_rate": 1.3998493408478778e-05, "loss": 0.8084, "step": 4585 }, { "epoch": 0.3470167606219969, "grad_norm": 2.374621868133545, "learning_rate": 1.3996620327942377e-05, "loss": 0.7405, "step": 4586 }, { "epoch": 0.34709242934433054, "grad_norm": 1.7292189598083496, "learning_rate": 1.3994746934501451e-05, "loss": 0.759, "step": 4587 }, { "epoch": 0.34716809806666415, "grad_norm": 1.871042013168335, "learning_rate": 1.3992873228273317e-05, "loss": 0.6202, "step": 4588 }, { "epoch": 0.34724376678899777, "grad_norm": 2.0830862522125244, "learning_rate": 1.3990999209375314e-05, "loss": 0.915, "step": 4589 }, { "epoch": 0.3473194355113314, "grad_norm": 2.267789363861084, "learning_rate": 1.3989124877924795e-05, "loss": 0.8031, "step": 4590 }, { "epoch": 0.34739510423366504, "grad_norm": 2.1552555561065674, "learning_rate": 1.3987250234039143e-05, "loss": 0.818, "step": 4591 }, { "epoch": 0.34747077295599865, "grad_norm": 1.9342949390411377, "learning_rate": 1.3985375277835748e-05, "loss": 0.7326, "step": 4592 }, { "epoch": 0.34754644167833226, "grad_norm": 2.7303926944732666, "learning_rate": 1.3983500009432028e-05, "loss": 0.7703, "step": 4593 }, { "epoch": 0.3476221104006659, "grad_norm": 2.3340682983398438, "learning_rate": 1.3981624428945419e-05, "loss": 0.6769, "step": 4594 }, { "epoch": 0.3476977791229995, "grad_norm": 2.0905210971832275, "learning_rate": 1.3979748536493376e-05, "loss": 0.8423, "step": 4595 }, { "epoch": 0.34777344784533315, "grad_norm": 2.2317705154418945, "learning_rate": 1.3977872332193375e-05, "loss": 0.6386, "step": 4596 }, { "epoch": 0.34784911656766676, "grad_norm": 2.2130250930786133, "learning_rate": 1.3975995816162904e-05, "loss": 0.8846, "step": 4597 }, { "epoch": 0.34792478529000037, "grad_norm": 2.068493127822876, "learning_rate": 1.3974118988519486e-05, "loss": 0.838, "step": 4598 }, { "epoch": 0.348000454012334, "grad_norm": 2.0455193519592285, "learning_rate": 1.3972241849380645e-05, "loss": 0.6782, "step": 4599 }, { "epoch": 0.34807612273466765, "grad_norm": 1.4974215030670166, "learning_rate": 1.3970364398863938e-05, "loss": 0.7652, "step": 4600 }, { "epoch": 0.34815179145700126, "grad_norm": 1.7549113035202026, "learning_rate": 1.3968486637086936e-05, "loss": 0.8342, "step": 4601 }, { "epoch": 0.34822746017933487, "grad_norm": 2.3468220233917236, "learning_rate": 1.3966608564167231e-05, "loss": 0.6665, "step": 4602 }, { "epoch": 0.3483031289016685, "grad_norm": 2.2296173572540283, "learning_rate": 1.3964730180222437e-05, "loss": 0.8537, "step": 4603 }, { "epoch": 0.34837879762400215, "grad_norm": 2.597890853881836, "learning_rate": 1.3962851485370178e-05, "loss": 0.7739, "step": 4604 }, { "epoch": 0.34845446634633576, "grad_norm": 2.384274482727051, "learning_rate": 1.3960972479728105e-05, "loss": 0.7863, "step": 4605 }, { "epoch": 0.34853013506866937, "grad_norm": 2.0478515625, "learning_rate": 1.3959093163413893e-05, "loss": 0.8538, "step": 4606 }, { "epoch": 0.348605803791003, "grad_norm": 2.03800630569458, "learning_rate": 1.3957213536545227e-05, "loss": 0.716, "step": 4607 }, { "epoch": 0.3486814725133366, "grad_norm": 2.1581063270568848, "learning_rate": 1.3955333599239813e-05, "loss": 0.6596, "step": 4608 }, { "epoch": 0.34875714123567025, "grad_norm": 2.4075028896331787, "learning_rate": 1.3953453351615387e-05, "loss": 0.7188, "step": 4609 }, { "epoch": 0.34883280995800386, "grad_norm": 2.663573980331421, "learning_rate": 1.3951572793789685e-05, "loss": 0.6679, "step": 4610 }, { "epoch": 0.3489084786803375, "grad_norm": 1.9524000883102417, "learning_rate": 1.3949691925880481e-05, "loss": 0.8243, "step": 4611 }, { "epoch": 0.3489841474026711, "grad_norm": 2.518850803375244, "learning_rate": 1.3947810748005563e-05, "loss": 0.7894, "step": 4612 }, { "epoch": 0.34905981612500475, "grad_norm": 3.2514965534210205, "learning_rate": 1.3945929260282729e-05, "loss": 0.9017, "step": 4613 }, { "epoch": 0.34913548484733836, "grad_norm": 2.429962635040283, "learning_rate": 1.3944047462829808e-05, "loss": 0.7169, "step": 4614 }, { "epoch": 0.349211153569672, "grad_norm": 2.0547587871551514, "learning_rate": 1.3942165355764644e-05, "loss": 0.7357, "step": 4615 }, { "epoch": 0.3492868222920056, "grad_norm": 1.767414927482605, "learning_rate": 1.3940282939205102e-05, "loss": 0.8615, "step": 4616 }, { "epoch": 0.34936249101433925, "grad_norm": 3.1079351902008057, "learning_rate": 1.3938400213269061e-05, "loss": 0.7191, "step": 4617 }, { "epoch": 0.34943815973667286, "grad_norm": 2.7524807453155518, "learning_rate": 1.3936517178074428e-05, "loss": 0.8091, "step": 4618 }, { "epoch": 0.34951382845900647, "grad_norm": 2.906195878982544, "learning_rate": 1.3934633833739122e-05, "loss": 0.7484, "step": 4619 }, { "epoch": 0.3495894971813401, "grad_norm": 2.340843677520752, "learning_rate": 1.3932750180381083e-05, "loss": 0.6663, "step": 4620 }, { "epoch": 0.3496651659036737, "grad_norm": 2.3411765098571777, "learning_rate": 1.3930866218118278e-05, "loss": 0.7908, "step": 4621 }, { "epoch": 0.34974083462600736, "grad_norm": 2.4406328201293945, "learning_rate": 1.3928981947068676e-05, "loss": 0.8513, "step": 4622 }, { "epoch": 0.34981650334834097, "grad_norm": 2.0467100143432617, "learning_rate": 1.3927097367350286e-05, "loss": 0.7571, "step": 4623 }, { "epoch": 0.3498921720706746, "grad_norm": 11.604252815246582, "learning_rate": 1.3925212479081125e-05, "loss": 0.7548, "step": 4624 }, { "epoch": 0.3499678407930082, "grad_norm": 2.3513424396514893, "learning_rate": 1.3923327282379224e-05, "loss": 0.7268, "step": 4625 }, { "epoch": 0.35004350951534186, "grad_norm": 2.104701042175293, "learning_rate": 1.3921441777362647e-05, "loss": 0.7942, "step": 4626 }, { "epoch": 0.35011917823767547, "grad_norm": 2.1579172611236572, "learning_rate": 1.3919555964149467e-05, "loss": 0.7785, "step": 4627 }, { "epoch": 0.3501948469600091, "grad_norm": 1.857549786567688, "learning_rate": 1.391766984285778e-05, "loss": 0.7444, "step": 4628 }, { "epoch": 0.3502705156823427, "grad_norm": 2.2894248962402344, "learning_rate": 1.3915783413605705e-05, "loss": 0.6749, "step": 4629 }, { "epoch": 0.35034618440467635, "grad_norm": 2.237196207046509, "learning_rate": 1.3913896676511369e-05, "loss": 0.845, "step": 4630 }, { "epoch": 0.35042185312700996, "grad_norm": 1.8301403522491455, "learning_rate": 1.3912009631692933e-05, "loss": 0.8037, "step": 4631 }, { "epoch": 0.3504975218493436, "grad_norm": 2.5242412090301514, "learning_rate": 1.3910122279268563e-05, "loss": 0.8057, "step": 4632 }, { "epoch": 0.3505731905716772, "grad_norm": 2.2044260501861572, "learning_rate": 1.3908234619356456e-05, "loss": 0.8019, "step": 4633 }, { "epoch": 0.3506488592940108, "grad_norm": 2.227987289428711, "learning_rate": 1.3906346652074823e-05, "loss": 0.7116, "step": 4634 }, { "epoch": 0.35072452801634446, "grad_norm": 2.293186902999878, "learning_rate": 1.3904458377541892e-05, "loss": 0.6701, "step": 4635 }, { "epoch": 0.35080019673867807, "grad_norm": 2.3717048168182373, "learning_rate": 1.3902569795875918e-05, "loss": 0.8981, "step": 4636 }, { "epoch": 0.3508758654610117, "grad_norm": 2.319140672683716, "learning_rate": 1.3900680907195162e-05, "loss": 0.718, "step": 4637 }, { "epoch": 0.3509515341833453, "grad_norm": 2.073474168777466, "learning_rate": 1.389879171161792e-05, "loss": 0.5373, "step": 4638 }, { "epoch": 0.35102720290567896, "grad_norm": 1.9484330415725708, "learning_rate": 1.3896902209262496e-05, "loss": 0.7226, "step": 4639 }, { "epoch": 0.35110287162801257, "grad_norm": 2.9964051246643066, "learning_rate": 1.3895012400247216e-05, "loss": 0.6546, "step": 4640 }, { "epoch": 0.3511785403503462, "grad_norm": 2.1370325088500977, "learning_rate": 1.3893122284690426e-05, "loss": 0.9182, "step": 4641 }, { "epoch": 0.3512542090726798, "grad_norm": 2.6059162616729736, "learning_rate": 1.3891231862710495e-05, "loss": 0.8695, "step": 4642 }, { "epoch": 0.35132987779501346, "grad_norm": 2.8685076236724854, "learning_rate": 1.3889341134425802e-05, "loss": 0.6888, "step": 4643 }, { "epoch": 0.35140554651734707, "grad_norm": 2.9174864292144775, "learning_rate": 1.3887450099954757e-05, "loss": 0.7698, "step": 4644 }, { "epoch": 0.3514812152396807, "grad_norm": 2.5367772579193115, "learning_rate": 1.3885558759415778e-05, "loss": 0.7116, "step": 4645 }, { "epoch": 0.3515568839620143, "grad_norm": 2.0750954151153564, "learning_rate": 1.3883667112927305e-05, "loss": 0.6243, "step": 4646 }, { "epoch": 0.3516325526843479, "grad_norm": 1.9517443180084229, "learning_rate": 1.3881775160607804e-05, "loss": 0.596, "step": 4647 }, { "epoch": 0.35170822140668156, "grad_norm": 2.155714988708496, "learning_rate": 1.387988290257575e-05, "loss": 0.636, "step": 4648 }, { "epoch": 0.3517838901290152, "grad_norm": 2.3737261295318604, "learning_rate": 1.3877990338949647e-05, "loss": 0.5942, "step": 4649 }, { "epoch": 0.3518595588513488, "grad_norm": 2.2594027519226074, "learning_rate": 1.3876097469848013e-05, "loss": 0.7346, "step": 4650 }, { "epoch": 0.3519352275736824, "grad_norm": 2.5711894035339355, "learning_rate": 1.3874204295389382e-05, "loss": 0.7492, "step": 4651 }, { "epoch": 0.35201089629601606, "grad_norm": 1.7782737016677856, "learning_rate": 1.3872310815692313e-05, "loss": 0.7328, "step": 4652 }, { "epoch": 0.3520865650183497, "grad_norm": 2.088243246078491, "learning_rate": 1.3870417030875383e-05, "loss": 0.6446, "step": 4653 }, { "epoch": 0.3521622337406833, "grad_norm": 1.7141531705856323, "learning_rate": 1.3868522941057184e-05, "loss": 0.6885, "step": 4654 }, { "epoch": 0.3522379024630169, "grad_norm": 1.8343784809112549, "learning_rate": 1.3866628546356334e-05, "loss": 0.7294, "step": 4655 }, { "epoch": 0.35231357118535056, "grad_norm": 2.9176583290100098, "learning_rate": 1.386473384689146e-05, "loss": 0.6369, "step": 4656 }, { "epoch": 0.35238923990768417, "grad_norm": 2.6107664108276367, "learning_rate": 1.3862838842781222e-05, "loss": 0.6926, "step": 4657 }, { "epoch": 0.3524649086300178, "grad_norm": 2.2640960216522217, "learning_rate": 1.3860943534144288e-05, "loss": 0.6889, "step": 4658 }, { "epoch": 0.3525405773523514, "grad_norm": 2.1459038257598877, "learning_rate": 1.3859047921099342e-05, "loss": 0.8295, "step": 4659 }, { "epoch": 0.352616246074685, "grad_norm": 2.1899123191833496, "learning_rate": 1.3857152003765108e-05, "loss": 0.7613, "step": 4660 }, { "epoch": 0.35269191479701867, "grad_norm": 2.238300085067749, "learning_rate": 1.3855255782260297e-05, "loss": 0.7159, "step": 4661 }, { "epoch": 0.3527675835193523, "grad_norm": 2.169271945953369, "learning_rate": 1.3853359256703668e-05, "loss": 0.7097, "step": 4662 }, { "epoch": 0.3528432522416859, "grad_norm": 2.406660795211792, "learning_rate": 1.3851462427213986e-05, "loss": 0.9081, "step": 4663 }, { "epoch": 0.3529189209640195, "grad_norm": 2.2769057750701904, "learning_rate": 1.3849565293910034e-05, "loss": 0.7397, "step": 4664 }, { "epoch": 0.35299458968635317, "grad_norm": 2.031386137008667, "learning_rate": 1.3847667856910621e-05, "loss": 0.8047, "step": 4665 }, { "epoch": 0.3530702584086868, "grad_norm": 2.512640953063965, "learning_rate": 1.3845770116334561e-05, "loss": 0.8456, "step": 4666 }, { "epoch": 0.3531459271310204, "grad_norm": 2.3293848037719727, "learning_rate": 1.384387207230071e-05, "loss": 0.6356, "step": 4667 }, { "epoch": 0.353221595853354, "grad_norm": 2.598417043685913, "learning_rate": 1.384197372492792e-05, "loss": 0.7213, "step": 4668 }, { "epoch": 0.35329726457568766, "grad_norm": 2.296818733215332, "learning_rate": 1.3840075074335074e-05, "loss": 0.7332, "step": 4669 }, { "epoch": 0.3533729332980213, "grad_norm": 1.8637245893478394, "learning_rate": 1.3838176120641071e-05, "loss": 0.4631, "step": 4670 }, { "epoch": 0.3534486020203549, "grad_norm": 2.56315541267395, "learning_rate": 1.3836276863964834e-05, "loss": 0.7563, "step": 4671 }, { "epoch": 0.3535242707426885, "grad_norm": 2.7444026470184326, "learning_rate": 1.3834377304425298e-05, "loss": 0.7095, "step": 4672 }, { "epoch": 0.3535999394650221, "grad_norm": 1.994140386581421, "learning_rate": 1.3832477442141416e-05, "loss": 0.6721, "step": 4673 }, { "epoch": 0.35367560818735577, "grad_norm": 2.4888455867767334, "learning_rate": 1.3830577277232164e-05, "loss": 0.7671, "step": 4674 }, { "epoch": 0.3537512769096894, "grad_norm": 1.9443211555480957, "learning_rate": 1.3828676809816543e-05, "loss": 0.795, "step": 4675 }, { "epoch": 0.353826945632023, "grad_norm": 2.254079818725586, "learning_rate": 1.3826776040013563e-05, "loss": 0.6806, "step": 4676 }, { "epoch": 0.3539026143543566, "grad_norm": 2.345444440841675, "learning_rate": 1.3824874967942251e-05, "loss": 0.6459, "step": 4677 }, { "epoch": 0.35397828307669027, "grad_norm": 2.6580867767333984, "learning_rate": 1.382297359372167e-05, "loss": 0.9527, "step": 4678 }, { "epoch": 0.3540539517990239, "grad_norm": 2.4587290287017822, "learning_rate": 1.3821071917470877e-05, "loss": 0.8039, "step": 4679 }, { "epoch": 0.3541296205213575, "grad_norm": 1.9164375066757202, "learning_rate": 1.3819169939308969e-05, "loss": 0.7868, "step": 4680 }, { "epoch": 0.3542052892436911, "grad_norm": 2.0153324604034424, "learning_rate": 1.3817267659355055e-05, "loss": 0.739, "step": 4681 }, { "epoch": 0.35428095796602477, "grad_norm": 2.2942817211151123, "learning_rate": 1.3815365077728255e-05, "loss": 0.8144, "step": 4682 }, { "epoch": 0.3543566266883584, "grad_norm": 2.601985216140747, "learning_rate": 1.3813462194547724e-05, "loss": 0.7122, "step": 4683 }, { "epoch": 0.354432295410692, "grad_norm": 1.8438481092453003, "learning_rate": 1.3811559009932615e-05, "loss": 0.7519, "step": 4684 }, { "epoch": 0.3545079641330256, "grad_norm": 2.120218276977539, "learning_rate": 1.3809655524002124e-05, "loss": 0.7314, "step": 4685 }, { "epoch": 0.3545836328553592, "grad_norm": 2.4902069568634033, "learning_rate": 1.3807751736875446e-05, "loss": 0.6525, "step": 4686 }, { "epoch": 0.3546593015776929, "grad_norm": 2.555546998977661, "learning_rate": 1.3805847648671803e-05, "loss": 0.7323, "step": 4687 }, { "epoch": 0.3547349703000265, "grad_norm": 2.1880481243133545, "learning_rate": 1.3803943259510439e-05, "loss": 0.792, "step": 4688 }, { "epoch": 0.3548106390223601, "grad_norm": 2.3539915084838867, "learning_rate": 1.3802038569510606e-05, "loss": 0.7203, "step": 4689 }, { "epoch": 0.3548863077446937, "grad_norm": 2.6515328884124756, "learning_rate": 1.3800133578791591e-05, "loss": 0.8069, "step": 4690 }, { "epoch": 0.3549619764670274, "grad_norm": 1.9614640474319458, "learning_rate": 1.3798228287472683e-05, "loss": 0.7215, "step": 4691 }, { "epoch": 0.355037645189361, "grad_norm": 2.137275218963623, "learning_rate": 1.37963226956732e-05, "loss": 0.8132, "step": 4692 }, { "epoch": 0.3551133139116946, "grad_norm": 1.8678892850875854, "learning_rate": 1.3794416803512477e-05, "loss": 0.5912, "step": 4693 }, { "epoch": 0.3551889826340282, "grad_norm": 2.0661749839782715, "learning_rate": 1.379251061110987e-05, "loss": 0.7803, "step": 4694 }, { "epoch": 0.35526465135636187, "grad_norm": 2.396214485168457, "learning_rate": 1.3790604118584744e-05, "loss": 0.7889, "step": 4695 }, { "epoch": 0.3553403200786955, "grad_norm": 2.3043839931488037, "learning_rate": 1.3788697326056494e-05, "loss": 0.8395, "step": 4696 }, { "epoch": 0.3554159888010291, "grad_norm": 2.3201632499694824, "learning_rate": 1.3786790233644529e-05, "loss": 0.875, "step": 4697 }, { "epoch": 0.3554916575233627, "grad_norm": 2.381060838699341, "learning_rate": 1.3784882841468276e-05, "loss": 0.8286, "step": 4698 }, { "epoch": 0.35556732624569637, "grad_norm": 2.4320642948150635, "learning_rate": 1.3782975149647184e-05, "loss": 0.6862, "step": 4699 }, { "epoch": 0.35564299496803, "grad_norm": 2.320983409881592, "learning_rate": 1.378106715830072e-05, "loss": 0.6916, "step": 4700 }, { "epoch": 0.3557186636903636, "grad_norm": 2.415428876876831, "learning_rate": 1.3779158867548367e-05, "loss": 0.6812, "step": 4701 }, { "epoch": 0.3557943324126972, "grad_norm": 2.3122177124023438, "learning_rate": 1.3777250277509621e-05, "loss": 0.6497, "step": 4702 }, { "epoch": 0.3558700011350308, "grad_norm": 2.4607081413269043, "learning_rate": 1.3775341388304019e-05, "loss": 0.6586, "step": 4703 }, { "epoch": 0.3559456698573645, "grad_norm": 2.4338831901550293, "learning_rate": 1.3773432200051093e-05, "loss": 0.7655, "step": 4704 }, { "epoch": 0.3560213385796981, "grad_norm": 3.2349586486816406, "learning_rate": 1.3771522712870401e-05, "loss": 0.7787, "step": 4705 }, { "epoch": 0.3560970073020317, "grad_norm": 2.1032230854034424, "learning_rate": 1.3769612926881526e-05, "loss": 0.7262, "step": 4706 }, { "epoch": 0.3561726760243653, "grad_norm": 2.523385524749756, "learning_rate": 1.3767702842204059e-05, "loss": 0.7117, "step": 4707 }, { "epoch": 0.356248344746699, "grad_norm": 2.341728687286377, "learning_rate": 1.3765792458957624e-05, "loss": 0.7133, "step": 4708 }, { "epoch": 0.3563240134690326, "grad_norm": 2.2252790927886963, "learning_rate": 1.3763881777261847e-05, "loss": 0.771, "step": 4709 }, { "epoch": 0.3563996821913662, "grad_norm": 2.088966131210327, "learning_rate": 1.3761970797236386e-05, "loss": 0.7424, "step": 4710 }, { "epoch": 0.3564753509136998, "grad_norm": 2.176795244216919, "learning_rate": 1.3760059519000912e-05, "loss": 0.7256, "step": 4711 }, { "epoch": 0.3565510196360335, "grad_norm": 2.541574716567993, "learning_rate": 1.3758147942675115e-05, "loss": 0.7764, "step": 4712 }, { "epoch": 0.3566266883583671, "grad_norm": 2.2796478271484375, "learning_rate": 1.3756236068378706e-05, "loss": 0.7737, "step": 4713 }, { "epoch": 0.3567023570807007, "grad_norm": 2.664128541946411, "learning_rate": 1.3754323896231409e-05, "loss": 0.8546, "step": 4714 }, { "epoch": 0.3567780258030343, "grad_norm": 2.4556145668029785, "learning_rate": 1.3752411426352971e-05, "loss": 0.7036, "step": 4715 }, { "epoch": 0.3568536945253679, "grad_norm": 2.207688093185425, "learning_rate": 1.375049865886316e-05, "loss": 0.7345, "step": 4716 }, { "epoch": 0.3569293632477016, "grad_norm": 2.008202075958252, "learning_rate": 1.3748585593881757e-05, "loss": 0.7968, "step": 4717 }, { "epoch": 0.3570050319700352, "grad_norm": 1.9544659852981567, "learning_rate": 1.3746672231528565e-05, "loss": 0.7988, "step": 4718 }, { "epoch": 0.3570807006923688, "grad_norm": 2.3357014656066895, "learning_rate": 1.3744758571923408e-05, "loss": 0.7303, "step": 4719 }, { "epoch": 0.3571563694147024, "grad_norm": 2.123999834060669, "learning_rate": 1.3742844615186122e-05, "loss": 0.7725, "step": 4720 }, { "epoch": 0.3572320381370361, "grad_norm": 2.152657985687256, "learning_rate": 1.3740930361436565e-05, "loss": 0.8359, "step": 4721 }, { "epoch": 0.3573077068593697, "grad_norm": 2.6231911182403564, "learning_rate": 1.3739015810794616e-05, "loss": 0.8116, "step": 4722 }, { "epoch": 0.3573833755817033, "grad_norm": 2.069714307785034, "learning_rate": 1.3737100963380164e-05, "loss": 0.7383, "step": 4723 }, { "epoch": 0.3574590443040369, "grad_norm": 2.1583962440490723, "learning_rate": 1.3735185819313134e-05, "loss": 0.7683, "step": 4724 }, { "epoch": 0.3575347130263706, "grad_norm": 2.315765619277954, "learning_rate": 1.3733270378713448e-05, "loss": 0.708, "step": 4725 }, { "epoch": 0.3576103817487042, "grad_norm": 1.5999698638916016, "learning_rate": 1.3731354641701064e-05, "loss": 0.7547, "step": 4726 }, { "epoch": 0.3576860504710378, "grad_norm": 2.330720901489258, "learning_rate": 1.3729438608395951e-05, "loss": 0.7628, "step": 4727 }, { "epoch": 0.3577617191933714, "grad_norm": 2.1309075355529785, "learning_rate": 1.3727522278918094e-05, "loss": 0.632, "step": 4728 }, { "epoch": 0.357837387915705, "grad_norm": 2.6685056686401367, "learning_rate": 1.3725605653387502e-05, "loss": 0.8374, "step": 4729 }, { "epoch": 0.3579130566380387, "grad_norm": 1.8115298748016357, "learning_rate": 1.3723688731924195e-05, "loss": 0.6296, "step": 4730 }, { "epoch": 0.3579887253603723, "grad_norm": 2.554457664489746, "learning_rate": 1.3721771514648227e-05, "loss": 0.6815, "step": 4731 }, { "epoch": 0.3580643940827059, "grad_norm": 2.170767068862915, "learning_rate": 1.3719854001679654e-05, "loss": 0.7365, "step": 4732 }, { "epoch": 0.3581400628050395, "grad_norm": 1.8011586666107178, "learning_rate": 1.3717936193138555e-05, "loss": 0.707, "step": 4733 }, { "epoch": 0.3582157315273732, "grad_norm": 2.423759937286377, "learning_rate": 1.371601808914503e-05, "loss": 0.7133, "step": 4734 }, { "epoch": 0.3582914002497068, "grad_norm": 2.005772113800049, "learning_rate": 1.3714099689819203e-05, "loss": 0.711, "step": 4735 }, { "epoch": 0.3583670689720404, "grad_norm": 2.3678839206695557, "learning_rate": 1.3712180995281207e-05, "loss": 0.7133, "step": 4736 }, { "epoch": 0.358442737694374, "grad_norm": 1.848137617111206, "learning_rate": 1.3710262005651195e-05, "loss": 0.5987, "step": 4737 }, { "epoch": 0.3585184064167077, "grad_norm": 1.9859153032302856, "learning_rate": 1.370834272104934e-05, "loss": 0.6771, "step": 4738 }, { "epoch": 0.3585940751390413, "grad_norm": 1.895273208618164, "learning_rate": 1.3706423141595834e-05, "loss": 0.7763, "step": 4739 }, { "epoch": 0.3586697438613749, "grad_norm": 2.161606788635254, "learning_rate": 1.370450326741089e-05, "loss": 0.7725, "step": 4740 }, { "epoch": 0.3587454125837085, "grad_norm": 2.1437206268310547, "learning_rate": 1.3702583098614734e-05, "loss": 0.7627, "step": 4741 }, { "epoch": 0.3588210813060421, "grad_norm": 2.3979063034057617, "learning_rate": 1.3700662635327618e-05, "loss": 0.8281, "step": 4742 }, { "epoch": 0.3588967500283758, "grad_norm": 1.9266215562820435, "learning_rate": 1.36987418776698e-05, "loss": 0.7498, "step": 4743 }, { "epoch": 0.3589724187507094, "grad_norm": 1.760246992111206, "learning_rate": 1.369682082576157e-05, "loss": 0.7122, "step": 4744 }, { "epoch": 0.359048087473043, "grad_norm": 2.123413562774658, "learning_rate": 1.369489947972323e-05, "loss": 0.6687, "step": 4745 }, { "epoch": 0.3591237561953766, "grad_norm": 1.9650204181671143, "learning_rate": 1.3692977839675095e-05, "loss": 0.6699, "step": 4746 }, { "epoch": 0.3591994249177103, "grad_norm": 2.239622116088867, "learning_rate": 1.3691055905737511e-05, "loss": 0.6695, "step": 4747 }, { "epoch": 0.3592750936400439, "grad_norm": 2.251483201980591, "learning_rate": 1.3689133678030834e-05, "loss": 0.7291, "step": 4748 }, { "epoch": 0.3593507623623775, "grad_norm": 2.2021195888519287, "learning_rate": 1.368721115667544e-05, "loss": 0.682, "step": 4749 }, { "epoch": 0.3594264310847111, "grad_norm": 2.117192506790161, "learning_rate": 1.3685288341791724e-05, "loss": 0.8493, "step": 4750 }, { "epoch": 0.3595020998070448, "grad_norm": 1.8998351097106934, "learning_rate": 1.3683365233500096e-05, "loss": 0.6437, "step": 4751 }, { "epoch": 0.3595777685293784, "grad_norm": 2.776381731033325, "learning_rate": 1.3681441831920991e-05, "loss": 0.8318, "step": 4752 }, { "epoch": 0.359653437251712, "grad_norm": 2.061583995819092, "learning_rate": 1.3679518137174854e-05, "loss": 0.6635, "step": 4753 }, { "epoch": 0.3597291059740456, "grad_norm": 2.730015516281128, "learning_rate": 1.367759414938216e-05, "loss": 0.8673, "step": 4754 }, { "epoch": 0.3598047746963792, "grad_norm": 1.971379041671753, "learning_rate": 1.3675669868663386e-05, "loss": 0.6937, "step": 4755 }, { "epoch": 0.3598804434187129, "grad_norm": 2.2239530086517334, "learning_rate": 1.3673745295139044e-05, "loss": 0.7792, "step": 4756 }, { "epoch": 0.3599561121410465, "grad_norm": 2.4830453395843506, "learning_rate": 1.3671820428929654e-05, "loss": 0.8029, "step": 4757 }, { "epoch": 0.3600317808633801, "grad_norm": 3.1367461681365967, "learning_rate": 1.3669895270155762e-05, "loss": 0.7619, "step": 4758 }, { "epoch": 0.3601074495857137, "grad_norm": 2.4550986289978027, "learning_rate": 1.3667969818937922e-05, "loss": 0.6998, "step": 4759 }, { "epoch": 0.3601831183080474, "grad_norm": 2.109701633453369, "learning_rate": 1.3666044075396713e-05, "loss": 0.7235, "step": 4760 }, { "epoch": 0.360258787030381, "grad_norm": 2.4037647247314453, "learning_rate": 1.3664118039652732e-05, "loss": 0.8367, "step": 4761 }, { "epoch": 0.3603344557527146, "grad_norm": 2.3242437839508057, "learning_rate": 1.3662191711826594e-05, "loss": 0.6881, "step": 4762 }, { "epoch": 0.3604101244750482, "grad_norm": 2.0473146438598633, "learning_rate": 1.3660265092038933e-05, "loss": 0.6282, "step": 4763 }, { "epoch": 0.3604857931973819, "grad_norm": 2.0871689319610596, "learning_rate": 1.3658338180410396e-05, "loss": 0.7049, "step": 4764 }, { "epoch": 0.3605614619197155, "grad_norm": 2.224372625350952, "learning_rate": 1.3656410977061659e-05, "loss": 0.8585, "step": 4765 }, { "epoch": 0.3606371306420491, "grad_norm": 2.71907114982605, "learning_rate": 1.3654483482113403e-05, "loss": 0.6829, "step": 4766 }, { "epoch": 0.3607127993643827, "grad_norm": 2.1918084621429443, "learning_rate": 1.365255569568634e-05, "loss": 0.744, "step": 4767 }, { "epoch": 0.36078846808671633, "grad_norm": 1.7974071502685547, "learning_rate": 1.3650627617901187e-05, "loss": 0.8826, "step": 4768 }, { "epoch": 0.36086413680905, "grad_norm": 2.0619354248046875, "learning_rate": 1.3648699248878694e-05, "loss": 0.771, "step": 4769 }, { "epoch": 0.3609398055313836, "grad_norm": 2.1140151023864746, "learning_rate": 1.3646770588739617e-05, "loss": 0.8686, "step": 4770 }, { "epoch": 0.3610154742537172, "grad_norm": 1.8046315908432007, "learning_rate": 1.3644841637604734e-05, "loss": 0.6257, "step": 4771 }, { "epoch": 0.3610911429760508, "grad_norm": 2.37791109085083, "learning_rate": 1.3642912395594848e-05, "loss": 0.7334, "step": 4772 }, { "epoch": 0.3611668116983845, "grad_norm": 1.7635067701339722, "learning_rate": 1.3640982862830768e-05, "loss": 0.6244, "step": 4773 }, { "epoch": 0.3612424804207181, "grad_norm": 2.401155471801758, "learning_rate": 1.3639053039433334e-05, "loss": 0.6292, "step": 4774 }, { "epoch": 0.3613181491430517, "grad_norm": 1.8144967555999756, "learning_rate": 1.3637122925523391e-05, "loss": 0.8272, "step": 4775 }, { "epoch": 0.3613938178653853, "grad_norm": 2.4782304763793945, "learning_rate": 1.3635192521221815e-05, "loss": 0.7252, "step": 4776 }, { "epoch": 0.361469486587719, "grad_norm": 1.9335335493087769, "learning_rate": 1.363326182664949e-05, "loss": 0.8413, "step": 4777 }, { "epoch": 0.3615451553100526, "grad_norm": 1.8808588981628418, "learning_rate": 1.363133084192732e-05, "loss": 0.6569, "step": 4778 }, { "epoch": 0.3616208240323862, "grad_norm": 1.8410663604736328, "learning_rate": 1.3629399567176237e-05, "loss": 0.7255, "step": 4779 }, { "epoch": 0.3616964927547198, "grad_norm": 1.8325263261795044, "learning_rate": 1.3627468002517179e-05, "loss": 0.7194, "step": 4780 }, { "epoch": 0.36177216147705343, "grad_norm": 2.408630847930908, "learning_rate": 1.3625536148071109e-05, "loss": 0.7562, "step": 4781 }, { "epoch": 0.3618478301993871, "grad_norm": 2.4441497325897217, "learning_rate": 1.3623604003959004e-05, "loss": 0.7213, "step": 4782 }, { "epoch": 0.3619234989217207, "grad_norm": 2.6353988647460938, "learning_rate": 1.3621671570301858e-05, "loss": 0.8314, "step": 4783 }, { "epoch": 0.3619991676440543, "grad_norm": 2.562260866165161, "learning_rate": 1.3619738847220694e-05, "loss": 0.6955, "step": 4784 }, { "epoch": 0.36207483636638793, "grad_norm": 2.1167261600494385, "learning_rate": 1.361780583483654e-05, "loss": 0.6659, "step": 4785 }, { "epoch": 0.3621505050887216, "grad_norm": 2.0852859020233154, "learning_rate": 1.3615872533270452e-05, "loss": 0.8003, "step": 4786 }, { "epoch": 0.3622261738110552, "grad_norm": 2.634650945663452, "learning_rate": 1.3613938942643491e-05, "loss": 0.851, "step": 4787 }, { "epoch": 0.3623018425333888, "grad_norm": 2.0236427783966064, "learning_rate": 1.3612005063076753e-05, "loss": 0.6714, "step": 4788 }, { "epoch": 0.36237751125572243, "grad_norm": 2.218775510787964, "learning_rate": 1.361007089469134e-05, "loss": 0.8034, "step": 4789 }, { "epoch": 0.3624531799780561, "grad_norm": 1.8466893434524536, "learning_rate": 1.3608136437608379e-05, "loss": 0.8057, "step": 4790 }, { "epoch": 0.3625288487003897, "grad_norm": 1.9202516078948975, "learning_rate": 1.3606201691949005e-05, "loss": 0.8247, "step": 4791 }, { "epoch": 0.3626045174227233, "grad_norm": 2.387627601623535, "learning_rate": 1.3604266657834388e-05, "loss": 0.6645, "step": 4792 }, { "epoch": 0.3626801861450569, "grad_norm": 2.0650217533111572, "learning_rate": 1.36023313353857e-05, "loss": 0.6833, "step": 4793 }, { "epoch": 0.36275585486739054, "grad_norm": 2.685912847518921, "learning_rate": 1.3600395724724133e-05, "loss": 0.7034, "step": 4794 }, { "epoch": 0.3628315235897242, "grad_norm": 2.143637180328369, "learning_rate": 1.3598459825970912e-05, "loss": 0.6371, "step": 4795 }, { "epoch": 0.3629071923120578, "grad_norm": 2.5087201595306396, "learning_rate": 1.3596523639247263e-05, "loss": 0.6024, "step": 4796 }, { "epoch": 0.3629828610343914, "grad_norm": 2.4101240634918213, "learning_rate": 1.3594587164674435e-05, "loss": 0.7716, "step": 4797 }, { "epoch": 0.36305852975672503, "grad_norm": 2.613996982574463, "learning_rate": 1.3592650402373699e-05, "loss": 0.7144, "step": 4798 }, { "epoch": 0.3631341984790587, "grad_norm": 3.9812352657318115, "learning_rate": 1.359071335246634e-05, "loss": 0.6945, "step": 4799 }, { "epoch": 0.3632098672013923, "grad_norm": 2.4522552490234375, "learning_rate": 1.3588776015073662e-05, "loss": 0.7366, "step": 4800 }, { "epoch": 0.3632855359237259, "grad_norm": 2.1591553688049316, "learning_rate": 1.3586838390316987e-05, "loss": 0.6077, "step": 4801 }, { "epoch": 0.36336120464605953, "grad_norm": 2.159881114959717, "learning_rate": 1.3584900478317658e-05, "loss": 0.6745, "step": 4802 }, { "epoch": 0.3634368733683932, "grad_norm": 2.3063745498657227, "learning_rate": 1.3582962279197031e-05, "loss": 0.7862, "step": 4803 }, { "epoch": 0.3635125420907268, "grad_norm": 2.219144582748413, "learning_rate": 1.3581023793076485e-05, "loss": 0.8561, "step": 4804 }, { "epoch": 0.3635882108130604, "grad_norm": 6.310362815856934, "learning_rate": 1.3579085020077409e-05, "loss": 0.6899, "step": 4805 }, { "epoch": 0.36366387953539403, "grad_norm": 2.508697748184204, "learning_rate": 1.3577145960321223e-05, "loss": 0.8235, "step": 4806 }, { "epoch": 0.36373954825772764, "grad_norm": 2.050865888595581, "learning_rate": 1.357520661392935e-05, "loss": 0.6781, "step": 4807 }, { "epoch": 0.3638152169800613, "grad_norm": 2.487555742263794, "learning_rate": 1.357326698102324e-05, "loss": 0.7999, "step": 4808 }, { "epoch": 0.3638908857023949, "grad_norm": 2.141352415084839, "learning_rate": 1.3571327061724362e-05, "loss": 0.7551, "step": 4809 }, { "epoch": 0.36396655442472853, "grad_norm": 2.2181026935577393, "learning_rate": 1.3569386856154194e-05, "loss": 0.8361, "step": 4810 }, { "epoch": 0.36404222314706214, "grad_norm": 1.389456033706665, "learning_rate": 1.3567446364434246e-05, "loss": 0.8809, "step": 4811 }, { "epoch": 0.3641178918693958, "grad_norm": 2.0791516304016113, "learning_rate": 1.356550558668603e-05, "loss": 0.6581, "step": 4812 }, { "epoch": 0.3641935605917294, "grad_norm": 2.7910525798797607, "learning_rate": 1.3563564523031091e-05, "loss": 0.7416, "step": 4813 }, { "epoch": 0.364269229314063, "grad_norm": 2.5519115924835205, "learning_rate": 1.3561623173590978e-05, "loss": 0.7204, "step": 4814 }, { "epoch": 0.36434489803639664, "grad_norm": 2.1502325534820557, "learning_rate": 1.3559681538487269e-05, "loss": 0.8517, "step": 4815 }, { "epoch": 0.3644205667587303, "grad_norm": 2.1360151767730713, "learning_rate": 1.3557739617841558e-05, "loss": 0.7458, "step": 4816 }, { "epoch": 0.3644962354810639, "grad_norm": 2.29506254196167, "learning_rate": 1.3555797411775447e-05, "loss": 0.705, "step": 4817 }, { "epoch": 0.3645719042033975, "grad_norm": 2.2674145698547363, "learning_rate": 1.3553854920410568e-05, "loss": 0.7909, "step": 4818 }, { "epoch": 0.36464757292573113, "grad_norm": 2.701314926147461, "learning_rate": 1.3551912143868564e-05, "loss": 0.7936, "step": 4819 }, { "epoch": 0.3647232416480648, "grad_norm": 2.063055992126465, "learning_rate": 1.35499690822711e-05, "loss": 0.6838, "step": 4820 }, { "epoch": 0.3647989103703984, "grad_norm": 2.2792537212371826, "learning_rate": 1.3548025735739852e-05, "loss": 0.7913, "step": 4821 }, { "epoch": 0.364874579092732, "grad_norm": 2.7173550128936768, "learning_rate": 1.3546082104396528e-05, "loss": 0.7174, "step": 4822 }, { "epoch": 0.36495024781506563, "grad_norm": 2.250936508178711, "learning_rate": 1.3544138188362835e-05, "loss": 0.8527, "step": 4823 }, { "epoch": 0.36502591653739924, "grad_norm": 2.110093355178833, "learning_rate": 1.354219398776051e-05, "loss": 0.9162, "step": 4824 }, { "epoch": 0.3651015852597329, "grad_norm": 2.133039951324463, "learning_rate": 1.354024950271131e-05, "loss": 0.7488, "step": 4825 }, { "epoch": 0.3651772539820665, "grad_norm": 1.9301999807357788, "learning_rate": 1.3538304733337e-05, "loss": 0.7952, "step": 4826 }, { "epoch": 0.36525292270440013, "grad_norm": 2.617494821548462, "learning_rate": 1.3536359679759369e-05, "loss": 0.6829, "step": 4827 }, { "epoch": 0.36532859142673374, "grad_norm": 2.2446556091308594, "learning_rate": 1.3534414342100221e-05, "loss": 0.684, "step": 4828 }, { "epoch": 0.3654042601490674, "grad_norm": 2.0811996459960938, "learning_rate": 1.3532468720481382e-05, "loss": 0.6984, "step": 4829 }, { "epoch": 0.365479928871401, "grad_norm": 2.136030673980713, "learning_rate": 1.3530522815024692e-05, "loss": 0.6672, "step": 4830 }, { "epoch": 0.3655555975937346, "grad_norm": 2.017378330230713, "learning_rate": 1.3528576625852012e-05, "loss": 0.8001, "step": 4831 }, { "epoch": 0.36563126631606824, "grad_norm": 2.336763620376587, "learning_rate": 1.3526630153085214e-05, "loss": 0.8168, "step": 4832 }, { "epoch": 0.3657069350384019, "grad_norm": 2.5141499042510986, "learning_rate": 1.352468339684619e-05, "loss": 0.7466, "step": 4833 }, { "epoch": 0.3657826037607355, "grad_norm": 2.2805662155151367, "learning_rate": 1.3522736357256866e-05, "loss": 0.6801, "step": 4834 }, { "epoch": 0.3658582724830691, "grad_norm": 2.272472381591797, "learning_rate": 1.3520789034439158e-05, "loss": 0.74, "step": 4835 }, { "epoch": 0.36593394120540274, "grad_norm": 2.805711507797241, "learning_rate": 1.351884142851502e-05, "loss": 0.6199, "step": 4836 }, { "epoch": 0.36600960992773635, "grad_norm": 2.3359363079071045, "learning_rate": 1.3516893539606415e-05, "loss": 0.6921, "step": 4837 }, { "epoch": 0.36608527865007, "grad_norm": 2.1814374923706055, "learning_rate": 1.3514945367835328e-05, "loss": 0.6558, "step": 4838 }, { "epoch": 0.3661609473724036, "grad_norm": 1.8459466695785522, "learning_rate": 1.3512996913323758e-05, "loss": 0.6672, "step": 4839 }, { "epoch": 0.36623661609473723, "grad_norm": 2.3520541191101074, "learning_rate": 1.3511048176193727e-05, "loss": 0.842, "step": 4840 }, { "epoch": 0.36631228481707084, "grad_norm": 1.4185298681259155, "learning_rate": 1.3509099156567269e-05, "loss": 0.9015, "step": 4841 }, { "epoch": 0.3663879535394045, "grad_norm": 1.6182681322097778, "learning_rate": 1.3507149854566433e-05, "loss": 0.7128, "step": 4842 }, { "epoch": 0.3664636222617381, "grad_norm": 2.5852813720703125, "learning_rate": 1.3505200270313298e-05, "loss": 0.7549, "step": 4843 }, { "epoch": 0.36653929098407173, "grad_norm": 2.689218044281006, "learning_rate": 1.3503250403929951e-05, "loss": 0.6497, "step": 4844 }, { "epoch": 0.36661495970640534, "grad_norm": 2.295428514480591, "learning_rate": 1.3501300255538499e-05, "loss": 0.8129, "step": 4845 }, { "epoch": 0.366690628428739, "grad_norm": 2.2084271907806396, "learning_rate": 1.3499349825261065e-05, "loss": 0.6761, "step": 4846 }, { "epoch": 0.3667662971510726, "grad_norm": 2.2909162044525146, "learning_rate": 1.3497399113219792e-05, "loss": 0.8429, "step": 4847 }, { "epoch": 0.36684196587340623, "grad_norm": 2.24273419380188, "learning_rate": 1.349544811953684e-05, "loss": 0.7073, "step": 4848 }, { "epoch": 0.36691763459573984, "grad_norm": 1.7732197046279907, "learning_rate": 1.3493496844334386e-05, "loss": 0.6471, "step": 4849 }, { "epoch": 0.36699330331807345, "grad_norm": 2.0910346508026123, "learning_rate": 1.3491545287734628e-05, "loss": 0.7475, "step": 4850 }, { "epoch": 0.3670689720404071, "grad_norm": 2.107093095779419, "learning_rate": 1.3489593449859774e-05, "loss": 0.7203, "step": 4851 }, { "epoch": 0.3671446407627407, "grad_norm": 2.1504039764404297, "learning_rate": 1.348764133083206e-05, "loss": 0.6333, "step": 4852 }, { "epoch": 0.36722030948507434, "grad_norm": 1.9339256286621094, "learning_rate": 1.3485688930773729e-05, "loss": 0.6488, "step": 4853 }, { "epoch": 0.36729597820740795, "grad_norm": 2.6421656608581543, "learning_rate": 1.348373624980705e-05, "loss": 0.6136, "step": 4854 }, { "epoch": 0.3673716469297416, "grad_norm": 2.230567693710327, "learning_rate": 1.3481783288054306e-05, "loss": 0.7886, "step": 4855 }, { "epoch": 0.3674473156520752, "grad_norm": 2.4279305934906006, "learning_rate": 1.3479830045637794e-05, "loss": 0.8438, "step": 4856 }, { "epoch": 0.36752298437440883, "grad_norm": 2.177305221557617, "learning_rate": 1.3477876522679835e-05, "loss": 0.6058, "step": 4857 }, { "epoch": 0.36759865309674244, "grad_norm": 2.3177402019500732, "learning_rate": 1.3475922719302765e-05, "loss": 0.8804, "step": 4858 }, { "epoch": 0.3676743218190761, "grad_norm": 2.5845775604248047, "learning_rate": 1.3473968635628939e-05, "loss": 0.7402, "step": 4859 }, { "epoch": 0.3677499905414097, "grad_norm": 2.1138968467712402, "learning_rate": 1.3472014271780725e-05, "loss": 0.5874, "step": 4860 }, { "epoch": 0.36782565926374333, "grad_norm": 2.863762378692627, "learning_rate": 1.3470059627880516e-05, "loss": 0.5876, "step": 4861 }, { "epoch": 0.36790132798607694, "grad_norm": 2.387801170349121, "learning_rate": 1.3468104704050713e-05, "loss": 0.7677, "step": 4862 }, { "epoch": 0.36797699670841055, "grad_norm": 2.5543148517608643, "learning_rate": 1.3466149500413742e-05, "loss": 0.8206, "step": 4863 }, { "epoch": 0.3680526654307442, "grad_norm": 2.3808276653289795, "learning_rate": 1.3464194017092043e-05, "loss": 0.8768, "step": 4864 }, { "epoch": 0.36812833415307783, "grad_norm": 3.0910837650299072, "learning_rate": 1.3462238254208076e-05, "loss": 0.7585, "step": 4865 }, { "epoch": 0.36820400287541144, "grad_norm": 2.5641915798187256, "learning_rate": 1.3460282211884317e-05, "loss": 0.911, "step": 4866 }, { "epoch": 0.36827967159774505, "grad_norm": 2.5306897163391113, "learning_rate": 1.345832589024326e-05, "loss": 0.7569, "step": 4867 }, { "epoch": 0.3683553403200787, "grad_norm": 2.912998914718628, "learning_rate": 1.3456369289407418e-05, "loss": 0.6358, "step": 4868 }, { "epoch": 0.3684310090424123, "grad_norm": 2.1079254150390625, "learning_rate": 1.3454412409499314e-05, "loss": 0.612, "step": 4869 }, { "epoch": 0.36850667776474594, "grad_norm": 2.1984755992889404, "learning_rate": 1.3452455250641498e-05, "loss": 0.6124, "step": 4870 }, { "epoch": 0.36858234648707955, "grad_norm": 2.0500175952911377, "learning_rate": 1.3450497812956535e-05, "loss": 0.6647, "step": 4871 }, { "epoch": 0.3686580152094132, "grad_norm": 2.169865846633911, "learning_rate": 1.3448540096567004e-05, "loss": 0.6461, "step": 4872 }, { "epoch": 0.3687336839317468, "grad_norm": 4.670175075531006, "learning_rate": 1.3446582101595503e-05, "loss": 0.6869, "step": 4873 }, { "epoch": 0.36880935265408044, "grad_norm": 1.9263705015182495, "learning_rate": 1.3444623828164646e-05, "loss": 0.729, "step": 4874 }, { "epoch": 0.36888502137641405, "grad_norm": 2.334681510925293, "learning_rate": 1.3442665276397076e-05, "loss": 0.8118, "step": 4875 }, { "epoch": 0.36896069009874766, "grad_norm": 2.442364454269409, "learning_rate": 1.3440706446415433e-05, "loss": 0.7216, "step": 4876 }, { "epoch": 0.3690363588210813, "grad_norm": 3.1642048358917236, "learning_rate": 1.3438747338342389e-05, "loss": 0.7009, "step": 4877 }, { "epoch": 0.36911202754341493, "grad_norm": 2.8377344608306885, "learning_rate": 1.3436787952300629e-05, "loss": 0.758, "step": 4878 }, { "epoch": 0.36918769626574854, "grad_norm": 2.899456262588501, "learning_rate": 1.3434828288412859e-05, "loss": 0.6575, "step": 4879 }, { "epoch": 0.36926336498808215, "grad_norm": 2.240098476409912, "learning_rate": 1.34328683468018e-05, "loss": 0.7384, "step": 4880 }, { "epoch": 0.3693390337104158, "grad_norm": 2.007436513900757, "learning_rate": 1.3430908127590185e-05, "loss": 0.757, "step": 4881 }, { "epoch": 0.36941470243274943, "grad_norm": 1.9840151071548462, "learning_rate": 1.342894763090077e-05, "loss": 0.6856, "step": 4882 }, { "epoch": 0.36949037115508304, "grad_norm": 2.434241771697998, "learning_rate": 1.3426986856856331e-05, "loss": 0.8133, "step": 4883 }, { "epoch": 0.36956603987741665, "grad_norm": 2.2398934364318848, "learning_rate": 1.3425025805579656e-05, "loss": 0.7309, "step": 4884 }, { "epoch": 0.3696417085997503, "grad_norm": 1.9073582887649536, "learning_rate": 1.3423064477193551e-05, "loss": 0.8249, "step": 4885 }, { "epoch": 0.36971737732208393, "grad_norm": 2.0606343746185303, "learning_rate": 1.3421102871820848e-05, "loss": 0.7246, "step": 4886 }, { "epoch": 0.36979304604441754, "grad_norm": 2.2094714641571045, "learning_rate": 1.341914098958438e-05, "loss": 0.7137, "step": 4887 }, { "epoch": 0.36986871476675115, "grad_norm": 2.191936731338501, "learning_rate": 1.341717883060701e-05, "loss": 0.6196, "step": 4888 }, { "epoch": 0.36994438348908476, "grad_norm": 1.9013859033584595, "learning_rate": 1.3415216395011615e-05, "loss": 0.7521, "step": 4889 }, { "epoch": 0.3700200522114184, "grad_norm": 2.026242733001709, "learning_rate": 1.3413253682921088e-05, "loss": 0.5991, "step": 4890 }, { "epoch": 0.37009572093375204, "grad_norm": 1.986952304840088, "learning_rate": 1.3411290694458343e-05, "loss": 0.6441, "step": 4891 }, { "epoch": 0.37017138965608565, "grad_norm": 1.9378926753997803, "learning_rate": 1.3409327429746304e-05, "loss": 0.7499, "step": 4892 }, { "epoch": 0.37024705837841926, "grad_norm": 2.1102092266082764, "learning_rate": 1.3407363888907925e-05, "loss": 0.7828, "step": 4893 }, { "epoch": 0.3703227271007529, "grad_norm": 1.809720516204834, "learning_rate": 1.340540007206616e-05, "loss": 0.7066, "step": 4894 }, { "epoch": 0.37039839582308653, "grad_norm": 2.9328057765960693, "learning_rate": 1.3403435979343995e-05, "loss": 0.8314, "step": 4895 }, { "epoch": 0.37047406454542015, "grad_norm": 1.9853700399398804, "learning_rate": 1.3401471610864426e-05, "loss": 0.8033, "step": 4896 }, { "epoch": 0.37054973326775376, "grad_norm": 2.24923038482666, "learning_rate": 1.3399506966750466e-05, "loss": 0.779, "step": 4897 }, { "epoch": 0.3706254019900874, "grad_norm": 2.0755980014801025, "learning_rate": 1.3397542047125156e-05, "loss": 0.6533, "step": 4898 }, { "epoch": 0.37070107071242103, "grad_norm": 1.9873055219650269, "learning_rate": 1.3395576852111535e-05, "loss": 0.9006, "step": 4899 }, { "epoch": 0.37077673943475464, "grad_norm": 1.7531121969223022, "learning_rate": 1.3393611381832675e-05, "loss": 0.7746, "step": 4900 }, { "epoch": 0.37085240815708825, "grad_norm": 2.0152571201324463, "learning_rate": 1.3391645636411661e-05, "loss": 0.6594, "step": 4901 }, { "epoch": 0.37092807687942186, "grad_norm": 1.8413496017456055, "learning_rate": 1.3389679615971593e-05, "loss": 0.793, "step": 4902 }, { "epoch": 0.37100374560175553, "grad_norm": 2.1065332889556885, "learning_rate": 1.338771332063559e-05, "loss": 0.7577, "step": 4903 }, { "epoch": 0.37107941432408914, "grad_norm": 2.4198694229125977, "learning_rate": 1.3385746750526784e-05, "loss": 0.7809, "step": 4904 }, { "epoch": 0.37115508304642275, "grad_norm": 2.0909576416015625, "learning_rate": 1.3383779905768336e-05, "loss": 0.6867, "step": 4905 }, { "epoch": 0.37123075176875636, "grad_norm": 2.1321213245391846, "learning_rate": 1.3381812786483408e-05, "loss": 0.7633, "step": 4906 }, { "epoch": 0.37130642049109003, "grad_norm": 3.150540351867676, "learning_rate": 1.3379845392795192e-05, "loss": 0.7857, "step": 4907 }, { "epoch": 0.37138208921342364, "grad_norm": 2.434208631515503, "learning_rate": 1.337787772482689e-05, "loss": 0.8088, "step": 4908 }, { "epoch": 0.37145775793575725, "grad_norm": 3.1003522872924805, "learning_rate": 1.3375909782701728e-05, "loss": 0.775, "step": 4909 }, { "epoch": 0.37153342665809086, "grad_norm": 2.1573758125305176, "learning_rate": 1.337394156654294e-05, "loss": 0.6811, "step": 4910 }, { "epoch": 0.3716090953804245, "grad_norm": 2.903069019317627, "learning_rate": 1.3371973076473783e-05, "loss": 0.6592, "step": 4911 }, { "epoch": 0.37168476410275814, "grad_norm": 2.0902886390686035, "learning_rate": 1.3370004312617533e-05, "loss": 0.772, "step": 4912 }, { "epoch": 0.37176043282509175, "grad_norm": 2.2064809799194336, "learning_rate": 1.3368035275097477e-05, "loss": 0.6503, "step": 4913 }, { "epoch": 0.37183610154742536, "grad_norm": 2.047928810119629, "learning_rate": 1.3366065964036927e-05, "loss": 0.7396, "step": 4914 }, { "epoch": 0.37191177026975897, "grad_norm": 2.1945924758911133, "learning_rate": 1.3364096379559203e-05, "loss": 0.757, "step": 4915 }, { "epoch": 0.37198743899209263, "grad_norm": 2.299427032470703, "learning_rate": 1.3362126521787649e-05, "loss": 0.8464, "step": 4916 }, { "epoch": 0.37206310771442624, "grad_norm": 2.420886278152466, "learning_rate": 1.3360156390845623e-05, "loss": 0.7771, "step": 4917 }, { "epoch": 0.37213877643675985, "grad_norm": 2.385572910308838, "learning_rate": 1.33581859868565e-05, "loss": 0.7385, "step": 4918 }, { "epoch": 0.37221444515909347, "grad_norm": 2.7393147945404053, "learning_rate": 1.3356215309943676e-05, "loss": 0.8137, "step": 4919 }, { "epoch": 0.37229011388142713, "grad_norm": 1.9456791877746582, "learning_rate": 1.3354244360230558e-05, "loss": 0.783, "step": 4920 }, { "epoch": 0.37236578260376074, "grad_norm": 1.7846276760101318, "learning_rate": 1.3352273137840579e-05, "loss": 0.5752, "step": 4921 }, { "epoch": 0.37244145132609435, "grad_norm": 2.710305690765381, "learning_rate": 1.3350301642897174e-05, "loss": 0.6808, "step": 4922 }, { "epoch": 0.37251712004842796, "grad_norm": 2.421003818511963, "learning_rate": 1.3348329875523812e-05, "loss": 0.7287, "step": 4923 }, { "epoch": 0.37259278877076163, "grad_norm": 2.223174810409546, "learning_rate": 1.3346357835843968e-05, "loss": 0.6746, "step": 4924 }, { "epoch": 0.37266845749309524, "grad_norm": 2.102065086364746, "learning_rate": 1.334438552398114e-05, "loss": 0.6813, "step": 4925 }, { "epoch": 0.37274412621542885, "grad_norm": 2.145731210708618, "learning_rate": 1.334241294005884e-05, "loss": 0.7174, "step": 4926 }, { "epoch": 0.37281979493776246, "grad_norm": 2.4170005321502686, "learning_rate": 1.3340440084200594e-05, "loss": 0.7821, "step": 4927 }, { "epoch": 0.3728954636600961, "grad_norm": 2.3091304302215576, "learning_rate": 1.3338466956529953e-05, "loss": 0.6898, "step": 4928 }, { "epoch": 0.37297113238242974, "grad_norm": 2.37115216255188, "learning_rate": 1.3336493557170476e-05, "loss": 0.6841, "step": 4929 }, { "epoch": 0.37304680110476335, "grad_norm": 2.1162335872650146, "learning_rate": 1.3334519886245749e-05, "loss": 0.5999, "step": 4930 }, { "epoch": 0.37312246982709696, "grad_norm": 1.9648668766021729, "learning_rate": 1.3332545943879367e-05, "loss": 0.7191, "step": 4931 }, { "epoch": 0.37319813854943057, "grad_norm": 2.149312973022461, "learning_rate": 1.3330571730194945e-05, "loss": 0.6175, "step": 4932 }, { "epoch": 0.37327380727176424, "grad_norm": 2.0248279571533203, "learning_rate": 1.3328597245316115e-05, "loss": 0.6749, "step": 4933 }, { "epoch": 0.37334947599409785, "grad_norm": 2.0984058380126953, "learning_rate": 1.3326622489366525e-05, "loss": 0.6864, "step": 4934 }, { "epoch": 0.37342514471643146, "grad_norm": 1.864095687866211, "learning_rate": 1.3324647462469841e-05, "loss": 0.8771, "step": 4935 }, { "epoch": 0.37350081343876507, "grad_norm": 2.171860933303833, "learning_rate": 1.3322672164749742e-05, "loss": 0.6689, "step": 4936 }, { "epoch": 0.37357648216109873, "grad_norm": 2.0032029151916504, "learning_rate": 1.3320696596329935e-05, "loss": 0.7789, "step": 4937 }, { "epoch": 0.37365215088343234, "grad_norm": 1.7304359674453735, "learning_rate": 1.3318720757334126e-05, "loss": 0.5789, "step": 4938 }, { "epoch": 0.37372781960576595, "grad_norm": 2.128831624984741, "learning_rate": 1.3316744647886063e-05, "loss": 0.7089, "step": 4939 }, { "epoch": 0.37380348832809956, "grad_norm": 2.6077706813812256, "learning_rate": 1.3314768268109483e-05, "loss": 0.6515, "step": 4940 }, { "epoch": 0.37387915705043323, "grad_norm": 2.1214993000030518, "learning_rate": 1.3312791618128161e-05, "loss": 0.689, "step": 4941 }, { "epoch": 0.37395482577276684, "grad_norm": 2.303440809249878, "learning_rate": 1.3310814698065876e-05, "loss": 0.8364, "step": 4942 }, { "epoch": 0.37403049449510045, "grad_norm": 2.154649496078491, "learning_rate": 1.3308837508046431e-05, "loss": 0.7713, "step": 4943 }, { "epoch": 0.37410616321743406, "grad_norm": 1.6795384883880615, "learning_rate": 1.3306860048193649e-05, "loss": 0.5444, "step": 4944 }, { "epoch": 0.3741818319397677, "grad_norm": 1.9569112062454224, "learning_rate": 1.3304882318631358e-05, "loss": 0.7279, "step": 4945 }, { "epoch": 0.37425750066210134, "grad_norm": 2.33054518699646, "learning_rate": 1.3302904319483413e-05, "loss": 0.8949, "step": 4946 }, { "epoch": 0.37433316938443495, "grad_norm": 2.1638824939727783, "learning_rate": 1.3300926050873681e-05, "loss": 0.6947, "step": 4947 }, { "epoch": 0.37440883810676856, "grad_norm": 2.491903066635132, "learning_rate": 1.3298947512926052e-05, "loss": 0.7139, "step": 4948 }, { "epoch": 0.37448450682910217, "grad_norm": 3.184447765350342, "learning_rate": 1.3296968705764422e-05, "loss": 0.9073, "step": 4949 }, { "epoch": 0.37456017555143584, "grad_norm": 2.1764798164367676, "learning_rate": 1.3294989629512715e-05, "loss": 0.8647, "step": 4950 }, { "epoch": 0.37463584427376945, "grad_norm": 2.0613832473754883, "learning_rate": 1.3293010284294867e-05, "loss": 0.7197, "step": 4951 }, { "epoch": 0.37471151299610306, "grad_norm": 2.1119112968444824, "learning_rate": 1.3291030670234827e-05, "loss": 0.7002, "step": 4952 }, { "epoch": 0.37478718171843667, "grad_norm": 2.2408530712127686, "learning_rate": 1.328905078745657e-05, "loss": 0.6455, "step": 4953 }, { "epoch": 0.37486285044077033, "grad_norm": 2.5578296184539795, "learning_rate": 1.3287070636084077e-05, "loss": 0.7374, "step": 4954 }, { "epoch": 0.37493851916310394, "grad_norm": 2.552988052368164, "learning_rate": 1.3285090216241359e-05, "loss": 0.7484, "step": 4955 }, { "epoch": 0.37501418788543756, "grad_norm": 2.490983724594116, "learning_rate": 1.328310952805243e-05, "loss": 0.8299, "step": 4956 }, { "epoch": 0.37508985660777117, "grad_norm": 2.3703956604003906, "learning_rate": 1.3281128571641329e-05, "loss": 0.6885, "step": 4957 }, { "epoch": 0.3751655253301048, "grad_norm": 2.2803077697753906, "learning_rate": 1.3279147347132111e-05, "loss": 0.6471, "step": 4958 }, { "epoch": 0.37524119405243844, "grad_norm": 2.4633090496063232, "learning_rate": 1.327716585464884e-05, "loss": 0.7724, "step": 4959 }, { "epoch": 0.37531686277477205, "grad_norm": 1.877164363861084, "learning_rate": 1.3275184094315617e-05, "loss": 0.8416, "step": 4960 }, { "epoch": 0.37539253149710566, "grad_norm": 2.59460711479187, "learning_rate": 1.3273202066256534e-05, "loss": 0.8155, "step": 4961 }, { "epoch": 0.3754682002194393, "grad_norm": 1.790168285369873, "learning_rate": 1.3271219770595716e-05, "loss": 0.7016, "step": 4962 }, { "epoch": 0.37554386894177294, "grad_norm": 2.1875531673431396, "learning_rate": 1.3269237207457305e-05, "loss": 0.7702, "step": 4963 }, { "epoch": 0.37561953766410655, "grad_norm": 1.8856444358825684, "learning_rate": 1.3267254376965449e-05, "loss": 0.7305, "step": 4964 }, { "epoch": 0.37569520638644016, "grad_norm": 2.1934876441955566, "learning_rate": 1.3265271279244324e-05, "loss": 0.7793, "step": 4965 }, { "epoch": 0.37577087510877377, "grad_norm": 1.9856321811676025, "learning_rate": 1.3263287914418111e-05, "loss": 0.7695, "step": 4966 }, { "epoch": 0.37584654383110744, "grad_norm": 2.125422239303589, "learning_rate": 1.3261304282611025e-05, "loss": 0.9119, "step": 4967 }, { "epoch": 0.37592221255344105, "grad_norm": 2.8048200607299805, "learning_rate": 1.3259320383947279e-05, "loss": 0.7251, "step": 4968 }, { "epoch": 0.37599788127577466, "grad_norm": 2.0084269046783447, "learning_rate": 1.3257336218551115e-05, "loss": 0.6035, "step": 4969 }, { "epoch": 0.37607354999810827, "grad_norm": 2.4063563346862793, "learning_rate": 1.3255351786546786e-05, "loss": 0.7344, "step": 4970 }, { "epoch": 0.3761492187204419, "grad_norm": 2.7517759799957275, "learning_rate": 1.3253367088058567e-05, "loss": 0.7425, "step": 4971 }, { "epoch": 0.37622488744277555, "grad_norm": 2.8988542556762695, "learning_rate": 1.3251382123210743e-05, "loss": 0.7319, "step": 4972 }, { "epoch": 0.37630055616510916, "grad_norm": 2.421642541885376, "learning_rate": 1.324939689212762e-05, "loss": 0.888, "step": 4973 }, { "epoch": 0.37637622488744277, "grad_norm": 2.56626558303833, "learning_rate": 1.324741139493352e-05, "loss": 0.654, "step": 4974 }, { "epoch": 0.3764518936097764, "grad_norm": 1.87079918384552, "learning_rate": 1.3245425631752777e-05, "loss": 0.7278, "step": 4975 }, { "epoch": 0.37652756233211004, "grad_norm": 2.264610767364502, "learning_rate": 1.3243439602709754e-05, "loss": 0.5887, "step": 4976 }, { "epoch": 0.37660323105444365, "grad_norm": 2.095689058303833, "learning_rate": 1.3241453307928816e-05, "loss": 0.7291, "step": 4977 }, { "epoch": 0.37667889977677727, "grad_norm": 1.5480690002441406, "learning_rate": 1.3239466747534355e-05, "loss": 0.6431, "step": 4978 }, { "epoch": 0.3767545684991109, "grad_norm": 2.456465721130371, "learning_rate": 1.3237479921650772e-05, "loss": 0.6346, "step": 4979 }, { "epoch": 0.37683023722144454, "grad_norm": 1.8623820543289185, "learning_rate": 1.323549283040249e-05, "loss": 0.7686, "step": 4980 }, { "epoch": 0.37690590594377815, "grad_norm": 3.514700174331665, "learning_rate": 1.3233505473913951e-05, "loss": 0.6733, "step": 4981 }, { "epoch": 0.37698157466611176, "grad_norm": 2.421954393386841, "learning_rate": 1.3231517852309602e-05, "loss": 0.6811, "step": 4982 }, { "epoch": 0.3770572433884454, "grad_norm": 3.265939950942993, "learning_rate": 1.3229529965713925e-05, "loss": 0.8858, "step": 4983 }, { "epoch": 0.377132912110779, "grad_norm": 2.31626558303833, "learning_rate": 1.3227541814251395e-05, "loss": 0.8614, "step": 4984 }, { "epoch": 0.37720858083311265, "grad_norm": 1.9577152729034424, "learning_rate": 1.3225553398046527e-05, "loss": 0.7351, "step": 4985 }, { "epoch": 0.37728424955544626, "grad_norm": 1.793585181236267, "learning_rate": 1.3223564717223837e-05, "loss": 0.7094, "step": 4986 }, { "epoch": 0.37735991827777987, "grad_norm": 2.228999137878418, "learning_rate": 1.3221575771907864e-05, "loss": 0.6827, "step": 4987 }, { "epoch": 0.3774355870001135, "grad_norm": 2.4481256008148193, "learning_rate": 1.321958656222316e-05, "loss": 0.7283, "step": 4988 }, { "epoch": 0.37751125572244715, "grad_norm": 2.0559909343719482, "learning_rate": 1.32175970882943e-05, "loss": 0.6605, "step": 4989 }, { "epoch": 0.37758692444478076, "grad_norm": 2.7114646434783936, "learning_rate": 1.3215607350245869e-05, "loss": 0.7703, "step": 4990 }, { "epoch": 0.37766259316711437, "grad_norm": 2.345587968826294, "learning_rate": 1.3213617348202471e-05, "loss": 0.8246, "step": 4991 }, { "epoch": 0.377738261889448, "grad_norm": 2.660860538482666, "learning_rate": 1.3211627082288725e-05, "loss": 0.7403, "step": 4992 }, { "epoch": 0.37781393061178165, "grad_norm": 2.2472615242004395, "learning_rate": 1.320963655262927e-05, "loss": 0.7548, "step": 4993 }, { "epoch": 0.37788959933411526, "grad_norm": 2.2617874145507812, "learning_rate": 1.3207645759348759e-05, "loss": 0.7703, "step": 4994 }, { "epoch": 0.37796526805644887, "grad_norm": 2.100846290588379, "learning_rate": 1.3205654702571858e-05, "loss": 0.7233, "step": 4995 }, { "epoch": 0.3780409367787825, "grad_norm": 2.057562828063965, "learning_rate": 1.320366338242326e-05, "loss": 0.6308, "step": 4996 }, { "epoch": 0.3781166055011161, "grad_norm": 2.615999937057495, "learning_rate": 1.3201671799027663e-05, "loss": 0.7651, "step": 4997 }, { "epoch": 0.37819227422344975, "grad_norm": 2.463115692138672, "learning_rate": 1.319967995250979e-05, "loss": 0.7944, "step": 4998 }, { "epoch": 0.37826794294578336, "grad_norm": 2.52860689163208, "learning_rate": 1.3197687842994374e-05, "loss": 0.7515, "step": 4999 }, { "epoch": 0.378343611668117, "grad_norm": 1.9558433294296265, "learning_rate": 1.3195695470606167e-05, "loss": 0.8407, "step": 5000 }, { "epoch": 0.3784192803904506, "grad_norm": 2.168161153793335, "learning_rate": 1.319370283546994e-05, "loss": 0.7869, "step": 5001 }, { "epoch": 0.37849494911278425, "grad_norm": 2.3098533153533936, "learning_rate": 1.3191709937710478e-05, "loss": 0.7613, "step": 5002 }, { "epoch": 0.37857061783511786, "grad_norm": 2.507798910140991, "learning_rate": 1.3189716777452581e-05, "loss": 0.6891, "step": 5003 }, { "epoch": 0.3786462865574515, "grad_norm": 2.061244249343872, "learning_rate": 1.318772335482107e-05, "loss": 0.72, "step": 5004 }, { "epoch": 0.3787219552797851, "grad_norm": 2.2283413410186768, "learning_rate": 1.3185729669940776e-05, "loss": 0.7914, "step": 5005 }, { "epoch": 0.37879762400211875, "grad_norm": 2.303812026977539, "learning_rate": 1.3183735722936554e-05, "loss": 0.7093, "step": 5006 }, { "epoch": 0.37887329272445236, "grad_norm": 2.085308313369751, "learning_rate": 1.3181741513933265e-05, "loss": 0.8941, "step": 5007 }, { "epoch": 0.37894896144678597, "grad_norm": 1.7646946907043457, "learning_rate": 1.3179747043055802e-05, "loss": 0.6162, "step": 5008 }, { "epoch": 0.3790246301691196, "grad_norm": 2.3402299880981445, "learning_rate": 1.3177752310429057e-05, "loss": 0.7871, "step": 5009 }, { "epoch": 0.3791002988914532, "grad_norm": 2.1623239517211914, "learning_rate": 1.317575731617795e-05, "loss": 0.832, "step": 5010 }, { "epoch": 0.37917596761378686, "grad_norm": 2.153862953186035, "learning_rate": 1.3173762060427414e-05, "loss": 0.6814, "step": 5011 }, { "epoch": 0.37925163633612047, "grad_norm": 2.1609599590301514, "learning_rate": 1.31717665433024e-05, "loss": 0.7195, "step": 5012 }, { "epoch": 0.3793273050584541, "grad_norm": 1.9634231328964233, "learning_rate": 1.316977076492787e-05, "loss": 0.6328, "step": 5013 }, { "epoch": 0.3794029737807877, "grad_norm": 2.0286974906921387, "learning_rate": 1.316777472542881e-05, "loss": 0.8721, "step": 5014 }, { "epoch": 0.37947864250312136, "grad_norm": 4.155838966369629, "learning_rate": 1.3165778424930214e-05, "loss": 0.8739, "step": 5015 }, { "epoch": 0.37955431122545497, "grad_norm": 2.0985116958618164, "learning_rate": 1.31637818635571e-05, "loss": 0.692, "step": 5016 }, { "epoch": 0.3796299799477886, "grad_norm": 2.7467665672302246, "learning_rate": 1.3161785041434501e-05, "loss": 0.7331, "step": 5017 }, { "epoch": 0.3797056486701222, "grad_norm": 6.863169193267822, "learning_rate": 1.3159787958687457e-05, "loss": 0.8108, "step": 5018 }, { "epoch": 0.37978131739245585, "grad_norm": 2.4260308742523193, "learning_rate": 1.3157790615441042e-05, "loss": 0.6481, "step": 5019 }, { "epoch": 0.37985698611478946, "grad_norm": 2.156952142715454, "learning_rate": 1.3155793011820327e-05, "loss": 0.6652, "step": 5020 }, { "epoch": 0.3799326548371231, "grad_norm": 2.060511350631714, "learning_rate": 1.3153795147950412e-05, "loss": 0.7653, "step": 5021 }, { "epoch": 0.3800083235594567, "grad_norm": 1.6299368143081665, "learning_rate": 1.3151797023956411e-05, "loss": 0.9353, "step": 5022 }, { "epoch": 0.3800839922817903, "grad_norm": 2.0761969089508057, "learning_rate": 1.3149798639963451e-05, "loss": 0.6743, "step": 5023 }, { "epoch": 0.38015966100412396, "grad_norm": 2.3090572357177734, "learning_rate": 1.3147799996096682e-05, "loss": 0.8114, "step": 5024 }, { "epoch": 0.38023532972645757, "grad_norm": 2.1611216068267822, "learning_rate": 1.3145801092481256e-05, "loss": 0.8595, "step": 5025 }, { "epoch": 0.3803109984487912, "grad_norm": 2.3790178298950195, "learning_rate": 1.3143801929242359e-05, "loss": 0.6275, "step": 5026 }, { "epoch": 0.3803866671711248, "grad_norm": 2.3763949871063232, "learning_rate": 1.3141802506505183e-05, "loss": 0.8345, "step": 5027 }, { "epoch": 0.38046233589345846, "grad_norm": 2.2187368869781494, "learning_rate": 1.3139802824394936e-05, "loss": 0.7276, "step": 5028 }, { "epoch": 0.38053800461579207, "grad_norm": 1.9624474048614502, "learning_rate": 1.313780288303685e-05, "loss": 0.6358, "step": 5029 }, { "epoch": 0.3806136733381257, "grad_norm": 1.6875008344650269, "learning_rate": 1.3135802682556162e-05, "loss": 0.745, "step": 5030 }, { "epoch": 0.3806893420604593, "grad_norm": 3.063140392303467, "learning_rate": 1.3133802223078132e-05, "loss": 0.8704, "step": 5031 }, { "epoch": 0.38076501078279296, "grad_norm": 2.5540196895599365, "learning_rate": 1.3131801504728037e-05, "loss": 0.6593, "step": 5032 }, { "epoch": 0.38084067950512657, "grad_norm": 2.9757909774780273, "learning_rate": 1.3129800527631167e-05, "loss": 0.7676, "step": 5033 }, { "epoch": 0.3809163482274602, "grad_norm": 2.7554965019226074, "learning_rate": 1.3127799291912833e-05, "loss": 0.7257, "step": 5034 }, { "epoch": 0.3809920169497938, "grad_norm": 2.5679843425750732, "learning_rate": 1.3125797797698358e-05, "loss": 0.7173, "step": 5035 }, { "epoch": 0.3810676856721274, "grad_norm": 2.0927176475524902, "learning_rate": 1.3123796045113075e-05, "loss": 0.707, "step": 5036 }, { "epoch": 0.38114335439446106, "grad_norm": 3.1577799320220947, "learning_rate": 1.312179403428235e-05, "loss": 0.8109, "step": 5037 }, { "epoch": 0.3812190231167947, "grad_norm": 1.8900063037872314, "learning_rate": 1.3119791765331549e-05, "loss": 0.7693, "step": 5038 }, { "epoch": 0.3812946918391283, "grad_norm": 2.2258424758911133, "learning_rate": 1.3117789238386063e-05, "loss": 0.6866, "step": 5039 }, { "epoch": 0.3813703605614619, "grad_norm": 1.6877254247665405, "learning_rate": 1.3115786453571299e-05, "loss": 0.6474, "step": 5040 }, { "epoch": 0.38144602928379556, "grad_norm": 2.2569453716278076, "learning_rate": 1.311378341101267e-05, "loss": 0.6449, "step": 5041 }, { "epoch": 0.3815216980061292, "grad_norm": 2.3201940059661865, "learning_rate": 1.3111780110835622e-05, "loss": 0.7697, "step": 5042 }, { "epoch": 0.3815973667284628, "grad_norm": 2.2311851978302, "learning_rate": 1.3109776553165604e-05, "loss": 0.5872, "step": 5043 }, { "epoch": 0.3816730354507964, "grad_norm": 1.6546425819396973, "learning_rate": 1.3107772738128085e-05, "loss": 0.6899, "step": 5044 }, { "epoch": 0.38174870417313006, "grad_norm": 2.160982131958008, "learning_rate": 1.3105768665848551e-05, "loss": 0.7574, "step": 5045 }, { "epoch": 0.38182437289546367, "grad_norm": 2.2722971439361572, "learning_rate": 1.3103764336452501e-05, "loss": 0.75, "step": 5046 }, { "epoch": 0.3819000416177973, "grad_norm": 2.3404366970062256, "learning_rate": 1.310175975006546e-05, "loss": 0.6267, "step": 5047 }, { "epoch": 0.3819757103401309, "grad_norm": 1.7116867303848267, "learning_rate": 1.3099754906812952e-05, "loss": 0.6965, "step": 5048 }, { "epoch": 0.38205137906246456, "grad_norm": 1.8109760284423828, "learning_rate": 1.3097749806820535e-05, "loss": 0.6197, "step": 5049 }, { "epoch": 0.38212704778479817, "grad_norm": 2.044471025466919, "learning_rate": 1.309574445021377e-05, "loss": 0.8108, "step": 5050 }, { "epoch": 0.3822027165071318, "grad_norm": 2.0608906745910645, "learning_rate": 1.309373883711824e-05, "loss": 0.7047, "step": 5051 }, { "epoch": 0.3822783852294654, "grad_norm": 5.626868724822998, "learning_rate": 1.3091732967659546e-05, "loss": 0.9076, "step": 5052 }, { "epoch": 0.382354053951799, "grad_norm": 2.2120423316955566, "learning_rate": 1.3089726841963296e-05, "loss": 0.7146, "step": 5053 }, { "epoch": 0.38242972267413267, "grad_norm": 2.1795167922973633, "learning_rate": 1.3087720460155122e-05, "loss": 0.7101, "step": 5054 }, { "epoch": 0.3825053913964663, "grad_norm": 2.11128568649292, "learning_rate": 1.3085713822360676e-05, "loss": 0.8643, "step": 5055 }, { "epoch": 0.3825810601187999, "grad_norm": 2.028358221054077, "learning_rate": 1.3083706928705612e-05, "loss": 0.7917, "step": 5056 }, { "epoch": 0.3826567288411335, "grad_norm": 2.1836349964141846, "learning_rate": 1.3081699779315615e-05, "loss": 0.9105, "step": 5057 }, { "epoch": 0.38273239756346716, "grad_norm": 1.974503517150879, "learning_rate": 1.3079692374316374e-05, "loss": 0.7627, "step": 5058 }, { "epoch": 0.3828080662858008, "grad_norm": 2.411986827850342, "learning_rate": 1.3077684713833602e-05, "loss": 0.8903, "step": 5059 }, { "epoch": 0.3828837350081344, "grad_norm": 2.188807725906372, "learning_rate": 1.3075676797993023e-05, "loss": 0.6245, "step": 5060 }, { "epoch": 0.382959403730468, "grad_norm": 3.615983247756958, "learning_rate": 1.3073668626920381e-05, "loss": 0.7468, "step": 5061 }, { "epoch": 0.38303507245280166, "grad_norm": 2.4570207595825195, "learning_rate": 1.3071660200741436e-05, "loss": 0.8592, "step": 5062 }, { "epoch": 0.38311074117513527, "grad_norm": 2.342355966567993, "learning_rate": 1.3069651519581959e-05, "loss": 0.6396, "step": 5063 }, { "epoch": 0.3831864098974689, "grad_norm": 2.1598551273345947, "learning_rate": 1.3067642583567737e-05, "loss": 0.5799, "step": 5064 }, { "epoch": 0.3832620786198025, "grad_norm": 1.9163577556610107, "learning_rate": 1.3065633392824586e-05, "loss": 0.658, "step": 5065 }, { "epoch": 0.3833377473421361, "grad_norm": 2.467026472091675, "learning_rate": 1.3063623947478318e-05, "loss": 0.7139, "step": 5066 }, { "epoch": 0.38341341606446977, "grad_norm": 1.7132724523544312, "learning_rate": 1.3061614247654775e-05, "loss": 0.7353, "step": 5067 }, { "epoch": 0.3834890847868034, "grad_norm": 1.9084765911102295, "learning_rate": 1.3059604293479815e-05, "loss": 0.6118, "step": 5068 }, { "epoch": 0.383564753509137, "grad_norm": 1.8953239917755127, "learning_rate": 1.3057594085079298e-05, "loss": 0.8124, "step": 5069 }, { "epoch": 0.3836404222314706, "grad_norm": 2.3783981800079346, "learning_rate": 1.305558362257912e-05, "loss": 0.763, "step": 5070 }, { "epoch": 0.38371609095380427, "grad_norm": 2.3013756275177, "learning_rate": 1.3053572906105177e-05, "loss": 0.6881, "step": 5071 }, { "epoch": 0.3837917596761379, "grad_norm": 2.113539934158325, "learning_rate": 1.3051561935783388e-05, "loss": 0.8303, "step": 5072 }, { "epoch": 0.3838674283984715, "grad_norm": 2.5680975914001465, "learning_rate": 1.3049550711739684e-05, "loss": 0.7595, "step": 5073 }, { "epoch": 0.3839430971208051, "grad_norm": 1.8435100317001343, "learning_rate": 1.3047539234100018e-05, "loss": 0.6678, "step": 5074 }, { "epoch": 0.38401876584313877, "grad_norm": 2.025412082672119, "learning_rate": 1.3045527502990358e-05, "loss": 0.8392, "step": 5075 }, { "epoch": 0.3840944345654724, "grad_norm": 2.096165895462036, "learning_rate": 1.3043515518536674e-05, "loss": 0.8409, "step": 5076 }, { "epoch": 0.384170103287806, "grad_norm": 1.9506720304489136, "learning_rate": 1.3041503280864974e-05, "loss": 0.7444, "step": 5077 }, { "epoch": 0.3842457720101396, "grad_norm": 1.969355583190918, "learning_rate": 1.3039490790101266e-05, "loss": 0.6558, "step": 5078 }, { "epoch": 0.3843214407324732, "grad_norm": 1.9142673015594482, "learning_rate": 1.303747804637158e-05, "loss": 0.7879, "step": 5079 }, { "epoch": 0.3843971094548069, "grad_norm": 1.9106582403182983, "learning_rate": 1.3035465049801958e-05, "loss": 0.7209, "step": 5080 }, { "epoch": 0.3844727781771405, "grad_norm": 2.3156635761260986, "learning_rate": 1.3033451800518464e-05, "loss": 0.8002, "step": 5081 }, { "epoch": 0.3845484468994741, "grad_norm": 2.1822335720062256, "learning_rate": 1.3031438298647174e-05, "loss": 0.7506, "step": 5082 }, { "epoch": 0.3846241156218077, "grad_norm": 2.149963617324829, "learning_rate": 1.3029424544314173e-05, "loss": 0.6489, "step": 5083 }, { "epoch": 0.38469978434414137, "grad_norm": 2.2395076751708984, "learning_rate": 1.3027410537645578e-05, "loss": 0.6394, "step": 5084 }, { "epoch": 0.384775453066475, "grad_norm": 2.702310562133789, "learning_rate": 1.3025396278767511e-05, "loss": 0.7583, "step": 5085 }, { "epoch": 0.3848511217888086, "grad_norm": 6.533085823059082, "learning_rate": 1.3023381767806106e-05, "loss": 0.759, "step": 5086 }, { "epoch": 0.3849267905111422, "grad_norm": 2.2441484928131104, "learning_rate": 1.302136700488752e-05, "loss": 0.6975, "step": 5087 }, { "epoch": 0.38500245923347587, "grad_norm": 2.1438467502593994, "learning_rate": 1.301935199013793e-05, "loss": 0.6995, "step": 5088 }, { "epoch": 0.3850781279558095, "grad_norm": 2.285844564437866, "learning_rate": 1.3017336723683519e-05, "loss": 0.7527, "step": 5089 }, { "epoch": 0.3851537966781431, "grad_norm": 2.3817970752716064, "learning_rate": 1.3015321205650483e-05, "loss": 0.6128, "step": 5090 }, { "epoch": 0.3852294654004767, "grad_norm": 2.551360845565796, "learning_rate": 1.3013305436165049e-05, "loss": 0.7447, "step": 5091 }, { "epoch": 0.3853051341228103, "grad_norm": 2.2289671897888184, "learning_rate": 1.3011289415353446e-05, "loss": 0.8124, "step": 5092 }, { "epoch": 0.385380802845144, "grad_norm": 2.501476526260376, "learning_rate": 1.300927314334193e-05, "loss": 0.6713, "step": 5093 }, { "epoch": 0.3854564715674776, "grad_norm": 1.9687072038650513, "learning_rate": 1.300725662025676e-05, "loss": 0.6829, "step": 5094 }, { "epoch": 0.3855321402898112, "grad_norm": 2.438424825668335, "learning_rate": 1.3005239846224218e-05, "loss": 0.791, "step": 5095 }, { "epoch": 0.3856078090121448, "grad_norm": 2.1504287719726562, "learning_rate": 1.3003222821370605e-05, "loss": 0.7567, "step": 5096 }, { "epoch": 0.3856834777344785, "grad_norm": 2.486421823501587, "learning_rate": 1.3001205545822228e-05, "loss": 0.5951, "step": 5097 }, { "epoch": 0.3857591464568121, "grad_norm": 1.9564738273620605, "learning_rate": 1.299918801970542e-05, "loss": 0.8513, "step": 5098 }, { "epoch": 0.3858348151791457, "grad_norm": 2.0457041263580322, "learning_rate": 1.2997170243146524e-05, "loss": 0.7346, "step": 5099 }, { "epoch": 0.3859104839014793, "grad_norm": 1.925238847732544, "learning_rate": 1.2995152216271898e-05, "loss": 0.7619, "step": 5100 }, { "epoch": 0.385986152623813, "grad_norm": 2.6253859996795654, "learning_rate": 1.2993133939207918e-05, "loss": 0.8293, "step": 5101 }, { "epoch": 0.3860618213461466, "grad_norm": 2.5588762760162354, "learning_rate": 1.2991115412080976e-05, "loss": 0.7825, "step": 5102 }, { "epoch": 0.3861374900684802, "grad_norm": 1.8942152261734009, "learning_rate": 1.2989096635017476e-05, "loss": 0.7372, "step": 5103 }, { "epoch": 0.3862131587908138, "grad_norm": 2.209826946258545, "learning_rate": 1.2987077608143845e-05, "loss": 0.575, "step": 5104 }, { "epoch": 0.3862888275131474, "grad_norm": 2.1787028312683105, "learning_rate": 1.2985058331586516e-05, "loss": 0.8091, "step": 5105 }, { "epoch": 0.3863644962354811, "grad_norm": 2.4378044605255127, "learning_rate": 1.2983038805471949e-05, "loss": 0.5765, "step": 5106 }, { "epoch": 0.3864401649578147, "grad_norm": 2.141134023666382, "learning_rate": 1.2981019029926606e-05, "loss": 0.7571, "step": 5107 }, { "epoch": 0.3865158336801483, "grad_norm": 4.0216064453125, "learning_rate": 1.2978999005076976e-05, "loss": 0.8407, "step": 5108 }, { "epoch": 0.3865915024024819, "grad_norm": 2.1336159706115723, "learning_rate": 1.2976978731049559e-05, "loss": 0.794, "step": 5109 }, { "epoch": 0.3866671711248156, "grad_norm": 2.151615858078003, "learning_rate": 1.2974958207970868e-05, "loss": 0.6768, "step": 5110 }, { "epoch": 0.3867428398471492, "grad_norm": 1.8506669998168945, "learning_rate": 1.2972937435967443e-05, "loss": 0.6728, "step": 5111 }, { "epoch": 0.3868185085694828, "grad_norm": 2.7511610984802246, "learning_rate": 1.2970916415165822e-05, "loss": 0.7382, "step": 5112 }, { "epoch": 0.3868941772918164, "grad_norm": 2.5931057929992676, "learning_rate": 1.296889514569257e-05, "loss": 0.8731, "step": 5113 }, { "epoch": 0.3869698460141501, "grad_norm": 3.5109941959381104, "learning_rate": 1.296687362767427e-05, "loss": 0.8754, "step": 5114 }, { "epoch": 0.3870455147364837, "grad_norm": 2.4638400077819824, "learning_rate": 1.2964851861237511e-05, "loss": 0.7972, "step": 5115 }, { "epoch": 0.3871211834588173, "grad_norm": 2.775519371032715, "learning_rate": 1.2962829846508908e-05, "loss": 0.6752, "step": 5116 }, { "epoch": 0.3871968521811509, "grad_norm": 2.1849584579467773, "learning_rate": 1.2960807583615081e-05, "loss": 0.8533, "step": 5117 }, { "epoch": 0.3872725209034845, "grad_norm": 2.431049346923828, "learning_rate": 1.295878507268267e-05, "loss": 0.7406, "step": 5118 }, { "epoch": 0.3873481896258182, "grad_norm": 2.2878475189208984, "learning_rate": 1.2956762313838335e-05, "loss": 0.7887, "step": 5119 }, { "epoch": 0.3874238583481518, "grad_norm": 2.3642971515655518, "learning_rate": 1.2954739307208746e-05, "loss": 0.723, "step": 5120 }, { "epoch": 0.3874995270704854, "grad_norm": 1.9779037237167358, "learning_rate": 1.295271605292059e-05, "loss": 0.644, "step": 5121 }, { "epoch": 0.387575195792819, "grad_norm": 2.5578744411468506, "learning_rate": 1.2950692551100573e-05, "loss": 0.7849, "step": 5122 }, { "epoch": 0.3876508645151527, "grad_norm": 2.2762012481689453, "learning_rate": 1.2948668801875408e-05, "loss": 0.7393, "step": 5123 }, { "epoch": 0.3877265332374863, "grad_norm": 2.143754720687866, "learning_rate": 1.2946644805371833e-05, "loss": 0.7024, "step": 5124 }, { "epoch": 0.3878022019598199, "grad_norm": 2.0929954051971436, "learning_rate": 1.2944620561716592e-05, "loss": 0.717, "step": 5125 }, { "epoch": 0.3878778706821535, "grad_norm": 2.0429515838623047, "learning_rate": 1.2942596071036455e-05, "loss": 0.7081, "step": 5126 }, { "epoch": 0.3879535394044872, "grad_norm": 2.1083133220672607, "learning_rate": 1.2940571333458201e-05, "loss": 0.6678, "step": 5127 }, { "epoch": 0.3880292081268208, "grad_norm": 2.166097640991211, "learning_rate": 1.2938546349108623e-05, "loss": 0.6017, "step": 5128 }, { "epoch": 0.3881048768491544, "grad_norm": 1.7243160009384155, "learning_rate": 1.2936521118114534e-05, "loss": 0.6601, "step": 5129 }, { "epoch": 0.388180545571488, "grad_norm": 2.28934383392334, "learning_rate": 1.2934495640602759e-05, "loss": 0.7419, "step": 5130 }, { "epoch": 0.3882562142938216, "grad_norm": 2.0433170795440674, "learning_rate": 1.2932469916700144e-05, "loss": 0.8201, "step": 5131 }, { "epoch": 0.3883318830161553, "grad_norm": 2.5278637409210205, "learning_rate": 1.2930443946533543e-05, "loss": 0.7638, "step": 5132 }, { "epoch": 0.3884075517384889, "grad_norm": 1.9174318313598633, "learning_rate": 1.2928417730229827e-05, "loss": 0.7162, "step": 5133 }, { "epoch": 0.3884832204608225, "grad_norm": 2.190006732940674, "learning_rate": 1.2926391267915892e-05, "loss": 0.7597, "step": 5134 }, { "epoch": 0.3885588891831561, "grad_norm": 2.2163407802581787, "learning_rate": 1.292436455971863e-05, "loss": 0.5617, "step": 5135 }, { "epoch": 0.3886345579054898, "grad_norm": 1.9766048192977905, "learning_rate": 1.2922337605764971e-05, "loss": 0.7222, "step": 5136 }, { "epoch": 0.3887102266278234, "grad_norm": 2.149446964263916, "learning_rate": 1.2920310406181842e-05, "loss": 0.7806, "step": 5137 }, { "epoch": 0.388785895350157, "grad_norm": 1.846808910369873, "learning_rate": 1.2918282961096197e-05, "loss": 0.7699, "step": 5138 }, { "epoch": 0.3888615640724906, "grad_norm": 2.0156519412994385, "learning_rate": 1.2916255270635001e-05, "loss": 0.6868, "step": 5139 }, { "epoch": 0.3889372327948243, "grad_norm": 1.861183524131775, "learning_rate": 1.2914227334925231e-05, "loss": 0.6657, "step": 5140 }, { "epoch": 0.3890129015171579, "grad_norm": 2.640993118286133, "learning_rate": 1.2912199154093886e-05, "loss": 0.627, "step": 5141 }, { "epoch": 0.3890885702394915, "grad_norm": 2.4647865295410156, "learning_rate": 1.2910170728267974e-05, "loss": 0.7462, "step": 5142 }, { "epoch": 0.3891642389618251, "grad_norm": 2.260634422302246, "learning_rate": 1.2908142057574526e-05, "loss": 0.8352, "step": 5143 }, { "epoch": 0.3892399076841587, "grad_norm": 2.117558002471924, "learning_rate": 1.2906113142140582e-05, "loss": 0.8288, "step": 5144 }, { "epoch": 0.3893155764064924, "grad_norm": 2.3098366260528564, "learning_rate": 1.29040839820932e-05, "loss": 0.7885, "step": 5145 }, { "epoch": 0.389391245128826, "grad_norm": 1.80618155002594, "learning_rate": 1.2902054577559451e-05, "loss": 0.7721, "step": 5146 }, { "epoch": 0.3894669138511596, "grad_norm": 1.6692981719970703, "learning_rate": 1.2900024928666424e-05, "loss": 0.6533, "step": 5147 }, { "epoch": 0.3895425825734932, "grad_norm": 2.307391405105591, "learning_rate": 1.2897995035541223e-05, "loss": 0.6559, "step": 5148 }, { "epoch": 0.3896182512958269, "grad_norm": 2.1332476139068604, "learning_rate": 1.2895964898310961e-05, "loss": 0.7055, "step": 5149 }, { "epoch": 0.3896939200181605, "grad_norm": 2.272970199584961, "learning_rate": 1.289393451710278e-05, "loss": 0.7819, "step": 5150 }, { "epoch": 0.3897695887404941, "grad_norm": 2.7969746589660645, "learning_rate": 1.289190389204382e-05, "loss": 0.7392, "step": 5151 }, { "epoch": 0.3898452574628277, "grad_norm": 2.1883418560028076, "learning_rate": 1.2889873023261257e-05, "loss": 0.7967, "step": 5152 }, { "epoch": 0.3899209261851614, "grad_norm": 1.9223883152008057, "learning_rate": 1.288784191088226e-05, "loss": 0.7576, "step": 5153 }, { "epoch": 0.389996594907495, "grad_norm": 2.0606937408447266, "learning_rate": 1.2885810555034028e-05, "loss": 0.7704, "step": 5154 }, { "epoch": 0.3900722636298286, "grad_norm": 1.9041752815246582, "learning_rate": 1.2883778955843772e-05, "loss": 0.8243, "step": 5155 }, { "epoch": 0.3901479323521622, "grad_norm": 2.1987617015838623, "learning_rate": 1.2881747113438716e-05, "loss": 0.8491, "step": 5156 }, { "epoch": 0.39022360107449583, "grad_norm": 2.152064323425293, "learning_rate": 1.2879715027946101e-05, "loss": 0.7676, "step": 5157 }, { "epoch": 0.3902992697968295, "grad_norm": 2.4647457599639893, "learning_rate": 1.2877682699493179e-05, "loss": 0.7452, "step": 5158 }, { "epoch": 0.3903749385191631, "grad_norm": 1.774983286857605, "learning_rate": 1.2875650128207228e-05, "loss": 0.8262, "step": 5159 }, { "epoch": 0.3904506072414967, "grad_norm": 1.9229451417922974, "learning_rate": 1.2873617314215528e-05, "loss": 0.7605, "step": 5160 }, { "epoch": 0.3905262759638303, "grad_norm": 2.0117905139923096, "learning_rate": 1.2871584257645385e-05, "loss": 0.6922, "step": 5161 }, { "epoch": 0.390601944686164, "grad_norm": 2.2805237770080566, "learning_rate": 1.2869550958624115e-05, "loss": 0.9432, "step": 5162 }, { "epoch": 0.3906776134084976, "grad_norm": 2.6723804473876953, "learning_rate": 1.2867517417279045e-05, "loss": 0.741, "step": 5163 }, { "epoch": 0.3907532821308312, "grad_norm": 1.9901678562164307, "learning_rate": 1.2865483633737528e-05, "loss": 0.7327, "step": 5164 }, { "epoch": 0.3908289508531648, "grad_norm": 2.1943933963775635, "learning_rate": 1.286344960812692e-05, "loss": 0.8089, "step": 5165 }, { "epoch": 0.3909046195754985, "grad_norm": 2.47472882270813, "learning_rate": 1.2861415340574604e-05, "loss": 0.7517, "step": 5166 }, { "epoch": 0.3909802882978321, "grad_norm": 2.1459341049194336, "learning_rate": 1.2859380831207969e-05, "loss": 0.7393, "step": 5167 }, { "epoch": 0.3910559570201657, "grad_norm": 2.2298531532287598, "learning_rate": 1.2857346080154425e-05, "loss": 0.7545, "step": 5168 }, { "epoch": 0.3911316257424993, "grad_norm": 2.5509769916534424, "learning_rate": 1.2855311087541393e-05, "loss": 0.9029, "step": 5169 }, { "epoch": 0.391207294464833, "grad_norm": 2.2877771854400635, "learning_rate": 1.285327585349631e-05, "loss": 0.6986, "step": 5170 }, { "epoch": 0.3912829631871666, "grad_norm": 1.683161735534668, "learning_rate": 1.2851240378146632e-05, "loss": 0.8001, "step": 5171 }, { "epoch": 0.3913586319095002, "grad_norm": 1.9525566101074219, "learning_rate": 1.2849204661619822e-05, "loss": 0.7955, "step": 5172 }, { "epoch": 0.3914343006318338, "grad_norm": 2.2705700397491455, "learning_rate": 1.284716870404337e-05, "loss": 0.734, "step": 5173 }, { "epoch": 0.39150996935416743, "grad_norm": 1.9373785257339478, "learning_rate": 1.2845132505544766e-05, "loss": 0.7796, "step": 5174 }, { "epoch": 0.3915856380765011, "grad_norm": 2.5509033203125, "learning_rate": 1.284309606625153e-05, "loss": 0.7603, "step": 5175 }, { "epoch": 0.3916613067988347, "grad_norm": 3.8573920726776123, "learning_rate": 1.2841059386291191e-05, "loss": 0.681, "step": 5176 }, { "epoch": 0.3917369755211683, "grad_norm": 2.2207155227661133, "learning_rate": 1.2839022465791285e-05, "loss": 0.7496, "step": 5177 }, { "epoch": 0.39181264424350193, "grad_norm": 3.7512471675872803, "learning_rate": 1.283698530487938e-05, "loss": 0.6177, "step": 5178 }, { "epoch": 0.3918883129658356, "grad_norm": 2.095038890838623, "learning_rate": 1.283494790368304e-05, "loss": 0.7261, "step": 5179 }, { "epoch": 0.3919639816881692, "grad_norm": 2.247019052505493, "learning_rate": 1.2832910262329862e-05, "loss": 0.7003, "step": 5180 }, { "epoch": 0.3920396504105028, "grad_norm": 2.304305076599121, "learning_rate": 1.2830872380947447e-05, "loss": 0.7956, "step": 5181 }, { "epoch": 0.3921153191328364, "grad_norm": 1.8406106233596802, "learning_rate": 1.282883425966341e-05, "loss": 0.882, "step": 5182 }, { "epoch": 0.3921909878551701, "grad_norm": 3.2401795387268066, "learning_rate": 1.2826795898605389e-05, "loss": 0.7532, "step": 5183 }, { "epoch": 0.3922666565775037, "grad_norm": 2.3485889434814453, "learning_rate": 1.282475729790103e-05, "loss": 0.595, "step": 5184 }, { "epoch": 0.3923423252998373, "grad_norm": 1.8012150526046753, "learning_rate": 1.2822718457678001e-05, "loss": 0.6598, "step": 5185 }, { "epoch": 0.3924179940221709, "grad_norm": 2.22017502784729, "learning_rate": 1.2820679378063978e-05, "loss": 0.6302, "step": 5186 }, { "epoch": 0.39249366274450453, "grad_norm": 2.1888411045074463, "learning_rate": 1.2818640059186653e-05, "loss": 0.7243, "step": 5187 }, { "epoch": 0.3925693314668382, "grad_norm": 2.191774845123291, "learning_rate": 1.2816600501173737e-05, "loss": 0.7592, "step": 5188 }, { "epoch": 0.3926450001891718, "grad_norm": 2.351590871810913, "learning_rate": 1.2814560704152955e-05, "loss": 0.6887, "step": 5189 }, { "epoch": 0.3927206689115054, "grad_norm": 2.8945960998535156, "learning_rate": 1.2812520668252039e-05, "loss": 0.6931, "step": 5190 }, { "epoch": 0.39279633763383903, "grad_norm": 2.134856700897217, "learning_rate": 1.281048039359875e-05, "loss": 0.8368, "step": 5191 }, { "epoch": 0.3928720063561727, "grad_norm": 2.4972636699676514, "learning_rate": 1.2808439880320855e-05, "loss": 0.837, "step": 5192 }, { "epoch": 0.3929476750785063, "grad_norm": 2.14408016204834, "learning_rate": 1.2806399128546137e-05, "loss": 0.7233, "step": 5193 }, { "epoch": 0.3930233438008399, "grad_norm": 1.9677777290344238, "learning_rate": 1.2804358138402394e-05, "loss": 0.6967, "step": 5194 }, { "epoch": 0.39309901252317353, "grad_norm": 1.9467759132385254, "learning_rate": 1.280231691001744e-05, "loss": 0.7552, "step": 5195 }, { "epoch": 0.3931746812455072, "grad_norm": 2.6038689613342285, "learning_rate": 1.2800275443519102e-05, "loss": 0.7232, "step": 5196 }, { "epoch": 0.3932503499678408, "grad_norm": 2.1820638179779053, "learning_rate": 1.2798233739035222e-05, "loss": 0.8716, "step": 5197 }, { "epoch": 0.3933260186901744, "grad_norm": 2.153744697570801, "learning_rate": 1.2796191796693666e-05, "loss": 0.7718, "step": 5198 }, { "epoch": 0.393401687412508, "grad_norm": 2.0024526119232178, "learning_rate": 1.2794149616622297e-05, "loss": 0.7673, "step": 5199 }, { "epoch": 0.39347735613484164, "grad_norm": 1.9214941263198853, "learning_rate": 1.2792107198949008e-05, "loss": 0.6543, "step": 5200 }, { "epoch": 0.3935530248571753, "grad_norm": 2.2117514610290527, "learning_rate": 1.2790064543801701e-05, "loss": 0.7172, "step": 5201 }, { "epoch": 0.3936286935795089, "grad_norm": 2.943007707595825, "learning_rate": 1.2788021651308295e-05, "loss": 0.7764, "step": 5202 }, { "epoch": 0.3937043623018425, "grad_norm": 2.1258654594421387, "learning_rate": 1.278597852159672e-05, "loss": 0.7708, "step": 5203 }, { "epoch": 0.39378003102417614, "grad_norm": 2.93727445602417, "learning_rate": 1.2783935154794924e-05, "loss": 0.779, "step": 5204 }, { "epoch": 0.3938556997465098, "grad_norm": 1.9181923866271973, "learning_rate": 1.2781891551030873e-05, "loss": 0.7721, "step": 5205 }, { "epoch": 0.3939313684688434, "grad_norm": 2.5555663108825684, "learning_rate": 1.2779847710432538e-05, "loss": 0.7231, "step": 5206 }, { "epoch": 0.394007037191177, "grad_norm": 2.6176486015319824, "learning_rate": 1.2777803633127914e-05, "loss": 0.7323, "step": 5207 }, { "epoch": 0.39408270591351063, "grad_norm": 2.785477876663208, "learning_rate": 1.2775759319245007e-05, "loss": 0.6702, "step": 5208 }, { "epoch": 0.3941583746358443, "grad_norm": 2.178852081298828, "learning_rate": 1.2773714768911842e-05, "loss": 0.6602, "step": 5209 }, { "epoch": 0.3942340433581779, "grad_norm": 2.0223734378814697, "learning_rate": 1.277166998225645e-05, "loss": 0.7339, "step": 5210 }, { "epoch": 0.3943097120805115, "grad_norm": 3.054377555847168, "learning_rate": 1.2769624959406885e-05, "loss": 0.8046, "step": 5211 }, { "epoch": 0.39438538080284513, "grad_norm": 2.5661230087280273, "learning_rate": 1.2767579700491215e-05, "loss": 0.9283, "step": 5212 }, { "epoch": 0.39446104952517874, "grad_norm": 2.2824318408966064, "learning_rate": 1.2765534205637514e-05, "loss": 0.7967, "step": 5213 }, { "epoch": 0.3945367182475124, "grad_norm": 2.100961923599243, "learning_rate": 1.2763488474973886e-05, "loss": 0.589, "step": 5214 }, { "epoch": 0.394612386969846, "grad_norm": 2.079869508743286, "learning_rate": 1.2761442508628432e-05, "loss": 0.8719, "step": 5215 }, { "epoch": 0.39468805569217963, "grad_norm": 2.3895928859710693, "learning_rate": 1.2759396306729288e-05, "loss": 0.8063, "step": 5216 }, { "epoch": 0.39476372441451324, "grad_norm": 2.4894683361053467, "learning_rate": 1.2757349869404585e-05, "loss": 0.7269, "step": 5217 }, { "epoch": 0.3948393931368469, "grad_norm": 2.383577346801758, "learning_rate": 1.275530319678248e-05, "loss": 0.7615, "step": 5218 }, { "epoch": 0.3949150618591805, "grad_norm": 2.04361629486084, "learning_rate": 1.2753256288991145e-05, "loss": 0.5673, "step": 5219 }, { "epoch": 0.3949907305815141, "grad_norm": 2.225693941116333, "learning_rate": 1.2751209146158758e-05, "loss": 0.8495, "step": 5220 }, { "epoch": 0.39506639930384774, "grad_norm": 2.191380739212036, "learning_rate": 1.2749161768413526e-05, "loss": 0.6206, "step": 5221 }, { "epoch": 0.3951420680261814, "grad_norm": 1.8933615684509277, "learning_rate": 1.2747114155883653e-05, "loss": 0.6419, "step": 5222 }, { "epoch": 0.395217736748515, "grad_norm": 2.0595176219940186, "learning_rate": 1.2745066308697374e-05, "loss": 0.8184, "step": 5223 }, { "epoch": 0.3952934054708486, "grad_norm": 2.14420485496521, "learning_rate": 1.274301822698293e-05, "loss": 0.7094, "step": 5224 }, { "epoch": 0.39536907419318223, "grad_norm": 2.4212982654571533, "learning_rate": 1.274096991086858e-05, "loss": 0.6066, "step": 5225 }, { "epoch": 0.39544474291551585, "grad_norm": 2.151181221008301, "learning_rate": 1.2738921360482592e-05, "loss": 0.7381, "step": 5226 }, { "epoch": 0.3955204116378495, "grad_norm": 2.084139823913574, "learning_rate": 1.2736872575953256e-05, "loss": 0.7257, "step": 5227 }, { "epoch": 0.3955960803601831, "grad_norm": 2.635713577270508, "learning_rate": 1.2734823557408872e-05, "loss": 0.6161, "step": 5228 }, { "epoch": 0.39567174908251673, "grad_norm": 2.126365900039673, "learning_rate": 1.2732774304977758e-05, "loss": 0.7688, "step": 5229 }, { "epoch": 0.39574741780485034, "grad_norm": 1.905297040939331, "learning_rate": 1.2730724818788245e-05, "loss": 0.6682, "step": 5230 }, { "epoch": 0.395823086527184, "grad_norm": 2.004648447036743, "learning_rate": 1.2728675098968672e-05, "loss": 0.5425, "step": 5231 }, { "epoch": 0.3958987552495176, "grad_norm": 2.102177381515503, "learning_rate": 1.272662514564741e-05, "loss": 0.7984, "step": 5232 }, { "epoch": 0.39597442397185123, "grad_norm": 2.1269235610961914, "learning_rate": 1.2724574958952827e-05, "loss": 0.8195, "step": 5233 }, { "epoch": 0.39605009269418484, "grad_norm": 2.0913987159729004, "learning_rate": 1.2722524539013312e-05, "loss": 0.8296, "step": 5234 }, { "epoch": 0.3961257614165185, "grad_norm": 2.1840765476226807, "learning_rate": 1.2720473885957271e-05, "loss": 0.6681, "step": 5235 }, { "epoch": 0.3962014301388521, "grad_norm": 2.1150200366973877, "learning_rate": 1.271842299991312e-05, "loss": 0.6953, "step": 5236 }, { "epoch": 0.39627709886118573, "grad_norm": 2.1616060733795166, "learning_rate": 1.2716371881009295e-05, "loss": 0.7757, "step": 5237 }, { "epoch": 0.39635276758351934, "grad_norm": 2.005535840988159, "learning_rate": 1.2714320529374241e-05, "loss": 0.7313, "step": 5238 }, { "epoch": 0.39642843630585295, "grad_norm": 1.6546169519424438, "learning_rate": 1.2712268945136425e-05, "loss": 0.8232, "step": 5239 }, { "epoch": 0.3965041050281866, "grad_norm": 1.8763610124588013, "learning_rate": 1.271021712842432e-05, "loss": 0.7103, "step": 5240 }, { "epoch": 0.3965797737505202, "grad_norm": 2.4322524070739746, "learning_rate": 1.2708165079366417e-05, "loss": 0.7621, "step": 5241 }, { "epoch": 0.39665544247285384, "grad_norm": 2.386225700378418, "learning_rate": 1.2706112798091226e-05, "loss": 0.8655, "step": 5242 }, { "epoch": 0.39673111119518745, "grad_norm": 1.8513740301132202, "learning_rate": 1.2704060284727262e-05, "loss": 0.7369, "step": 5243 }, { "epoch": 0.3968067799175211, "grad_norm": 2.741036891937256, "learning_rate": 1.2702007539403062e-05, "loss": 0.7186, "step": 5244 }, { "epoch": 0.3968824486398547, "grad_norm": 2.038377046585083, "learning_rate": 1.2699954562247177e-05, "loss": 0.8323, "step": 5245 }, { "epoch": 0.39695811736218833, "grad_norm": 1.7875847816467285, "learning_rate": 1.2697901353388168e-05, "loss": 0.8214, "step": 5246 }, { "epoch": 0.39703378608452194, "grad_norm": 2.403543472290039, "learning_rate": 1.269584791295462e-05, "loss": 0.8744, "step": 5247 }, { "epoch": 0.3971094548068556, "grad_norm": 1.7908776998519897, "learning_rate": 1.269379424107512e-05, "loss": 0.6912, "step": 5248 }, { "epoch": 0.3971851235291892, "grad_norm": 2.3532586097717285, "learning_rate": 1.2691740337878277e-05, "loss": 0.6537, "step": 5249 }, { "epoch": 0.39726079225152283, "grad_norm": 2.0470757484436035, "learning_rate": 1.2689686203492713e-05, "loss": 0.7524, "step": 5250 }, { "epoch": 0.39733646097385644, "grad_norm": 1.9975119829177856, "learning_rate": 1.2687631838047064e-05, "loss": 0.7166, "step": 5251 }, { "epoch": 0.39741212969619005, "grad_norm": 1.9511202573776245, "learning_rate": 1.2685577241669984e-05, "loss": 0.7518, "step": 5252 }, { "epoch": 0.3974877984185237, "grad_norm": 2.32716703414917, "learning_rate": 1.2683522414490138e-05, "loss": 0.7032, "step": 5253 }, { "epoch": 0.39756346714085733, "grad_norm": 2.9506001472473145, "learning_rate": 1.2681467356636202e-05, "loss": 0.8021, "step": 5254 }, { "epoch": 0.39763913586319094, "grad_norm": 1.7488332986831665, "learning_rate": 1.2679412068236875e-05, "loss": 0.5693, "step": 5255 }, { "epoch": 0.39771480458552455, "grad_norm": 2.3912007808685303, "learning_rate": 1.2677356549420862e-05, "loss": 0.8525, "step": 5256 }, { "epoch": 0.3977904733078582, "grad_norm": 2.2952723503112793, "learning_rate": 1.2675300800316889e-05, "loss": 0.7448, "step": 5257 }, { "epoch": 0.3978661420301918, "grad_norm": 3.195134401321411, "learning_rate": 1.2673244821053692e-05, "loss": 0.7458, "step": 5258 }, { "epoch": 0.39794181075252544, "grad_norm": 2.2581069469451904, "learning_rate": 1.267118861176002e-05, "loss": 0.6457, "step": 5259 }, { "epoch": 0.39801747947485905, "grad_norm": 2.550224542617798, "learning_rate": 1.266913217256465e-05, "loss": 0.8612, "step": 5260 }, { "epoch": 0.3980931481971927, "grad_norm": 2.841343879699707, "learning_rate": 1.2667075503596348e-05, "loss": 0.6993, "step": 5261 }, { "epoch": 0.3981688169195263, "grad_norm": 2.263087034225464, "learning_rate": 1.2665018604983924e-05, "loss": 0.7111, "step": 5262 }, { "epoch": 0.39824448564185994, "grad_norm": 2.2085769176483154, "learning_rate": 1.2662961476856177e-05, "loss": 0.7621, "step": 5263 }, { "epoch": 0.39832015436419355, "grad_norm": 1.9521921873092651, "learning_rate": 1.2660904119341937e-05, "loss": 0.7074, "step": 5264 }, { "epoch": 0.39839582308652716, "grad_norm": 1.6472288370132446, "learning_rate": 1.265884653257004e-05, "loss": 0.7033, "step": 5265 }, { "epoch": 0.3984714918088608, "grad_norm": 2.273076057434082, "learning_rate": 1.2656788716669338e-05, "loss": 0.6346, "step": 5266 }, { "epoch": 0.39854716053119443, "grad_norm": 2.0756750106811523, "learning_rate": 1.26547306717687e-05, "loss": 0.7563, "step": 5267 }, { "epoch": 0.39862282925352804, "grad_norm": 1.9330674409866333, "learning_rate": 1.2652672397997006e-05, "loss": 0.7495, "step": 5268 }, { "epoch": 0.39869849797586165, "grad_norm": 1.7233151197433472, "learning_rate": 1.2650613895483152e-05, "loss": 0.6678, "step": 5269 }, { "epoch": 0.3987741666981953, "grad_norm": 1.934144377708435, "learning_rate": 1.2648555164356047e-05, "loss": 0.7826, "step": 5270 }, { "epoch": 0.39884983542052893, "grad_norm": 2.4113855361938477, "learning_rate": 1.2646496204744618e-05, "loss": 0.6297, "step": 5271 }, { "epoch": 0.39892550414286254, "grad_norm": 1.8144162893295288, "learning_rate": 1.2644437016777803e-05, "loss": 0.5998, "step": 5272 }, { "epoch": 0.39900117286519615, "grad_norm": 1.9845155477523804, "learning_rate": 1.2642377600584556e-05, "loss": 0.7491, "step": 5273 }, { "epoch": 0.3990768415875298, "grad_norm": 2.8481497764587402, "learning_rate": 1.264031795629384e-05, "loss": 0.5937, "step": 5274 }, { "epoch": 0.39915251030986343, "grad_norm": 2.570568323135376, "learning_rate": 1.263825808403464e-05, "loss": 0.8319, "step": 5275 }, { "epoch": 0.39922817903219704, "grad_norm": 2.246908187866211, "learning_rate": 1.2636197983935953e-05, "loss": 0.6984, "step": 5276 }, { "epoch": 0.39930384775453065, "grad_norm": 2.2146944999694824, "learning_rate": 1.2634137656126784e-05, "loss": 0.7518, "step": 5277 }, { "epoch": 0.3993795164768643, "grad_norm": 2.186021327972412, "learning_rate": 1.2632077100736164e-05, "loss": 0.7488, "step": 5278 }, { "epoch": 0.3994551851991979, "grad_norm": 1.7041964530944824, "learning_rate": 1.2630016317893127e-05, "loss": 0.6456, "step": 5279 }, { "epoch": 0.39953085392153154, "grad_norm": 1.9374867677688599, "learning_rate": 1.2627955307726726e-05, "loss": 0.6803, "step": 5280 }, { "epoch": 0.39960652264386515, "grad_norm": 2.4757678508758545, "learning_rate": 1.2625894070366033e-05, "loss": 0.6904, "step": 5281 }, { "epoch": 0.39968219136619876, "grad_norm": 2.042297840118408, "learning_rate": 1.2623832605940122e-05, "loss": 0.7731, "step": 5282 }, { "epoch": 0.3997578600885324, "grad_norm": 1.9081017971038818, "learning_rate": 1.2621770914578095e-05, "loss": 0.7498, "step": 5283 }, { "epoch": 0.39983352881086603, "grad_norm": 1.9273154735565186, "learning_rate": 1.2619708996409056e-05, "loss": 0.7335, "step": 5284 }, { "epoch": 0.39990919753319965, "grad_norm": 2.073868989944458, "learning_rate": 1.2617646851562134e-05, "loss": 0.7556, "step": 5285 }, { "epoch": 0.39998486625553326, "grad_norm": 1.6271218061447144, "learning_rate": 1.2615584480166465e-05, "loss": 0.7457, "step": 5286 }, { "epoch": 0.4000605349778669, "grad_norm": 3.472792387008667, "learning_rate": 1.2613521882351204e-05, "loss": 0.7719, "step": 5287 }, { "epoch": 0.40013620370020053, "grad_norm": 2.107931613922119, "learning_rate": 1.2611459058245511e-05, "loss": 0.6264, "step": 5288 }, { "epoch": 0.40021187242253414, "grad_norm": 2.132664203643799, "learning_rate": 1.2609396007978573e-05, "loss": 0.6974, "step": 5289 }, { "epoch": 0.40028754114486775, "grad_norm": 1.6444696187973022, "learning_rate": 1.2607332731679584e-05, "loss": 0.6695, "step": 5290 }, { "epoch": 0.4003632098672014, "grad_norm": 2.9400112628936768, "learning_rate": 1.260526922947775e-05, "loss": 0.7455, "step": 5291 }, { "epoch": 0.40043887858953503, "grad_norm": 3.270721197128296, "learning_rate": 1.2603205501502296e-05, "loss": 0.7693, "step": 5292 }, { "epoch": 0.40051454731186864, "grad_norm": 1.8366196155548096, "learning_rate": 1.260114154788246e-05, "loss": 0.6392, "step": 5293 }, { "epoch": 0.40059021603420225, "grad_norm": 2.108292818069458, "learning_rate": 1.2599077368747494e-05, "loss": 0.7311, "step": 5294 }, { "epoch": 0.40066588475653586, "grad_norm": 2.0967061519622803, "learning_rate": 1.259701296422666e-05, "loss": 0.7679, "step": 5295 }, { "epoch": 0.4007415534788695, "grad_norm": 1.9956434965133667, "learning_rate": 1.2594948334449241e-05, "loss": 0.7826, "step": 5296 }, { "epoch": 0.40081722220120314, "grad_norm": 2.254016160964966, "learning_rate": 1.259288347954453e-05, "loss": 0.5853, "step": 5297 }, { "epoch": 0.40089289092353675, "grad_norm": 1.7637948989868164, "learning_rate": 1.2590818399641833e-05, "loss": 0.8752, "step": 5298 }, { "epoch": 0.40096855964587036, "grad_norm": 2.54941987991333, "learning_rate": 1.2588753094870477e-05, "loss": 0.7963, "step": 5299 }, { "epoch": 0.401044228368204, "grad_norm": 1.4599640369415283, "learning_rate": 1.2586687565359791e-05, "loss": 0.7636, "step": 5300 }, { "epoch": 0.40111989709053764, "grad_norm": 2.2230331897735596, "learning_rate": 1.2584621811239133e-05, "loss": 0.7579, "step": 5301 }, { "epoch": 0.40119556581287125, "grad_norm": 1.8923826217651367, "learning_rate": 1.2582555832637862e-05, "loss": 0.6866, "step": 5302 }, { "epoch": 0.40127123453520486, "grad_norm": 2.1083552837371826, "learning_rate": 1.2580489629685354e-05, "loss": 0.6996, "step": 5303 }, { "epoch": 0.4013469032575385, "grad_norm": 2.3579092025756836, "learning_rate": 1.2578423202511008e-05, "loss": 0.7097, "step": 5304 }, { "epoch": 0.40142257197987213, "grad_norm": 2.3279149532318115, "learning_rate": 1.2576356551244226e-05, "loss": 0.7892, "step": 5305 }, { "epoch": 0.40149824070220574, "grad_norm": 1.9012402296066284, "learning_rate": 1.2574289676014431e-05, "loss": 0.6017, "step": 5306 }, { "epoch": 0.40157390942453935, "grad_norm": 1.9098631143569946, "learning_rate": 1.2572222576951054e-05, "loss": 0.7714, "step": 5307 }, { "epoch": 0.40164957814687297, "grad_norm": 2.4168155193328857, "learning_rate": 1.2570155254183544e-05, "loss": 0.8486, "step": 5308 }, { "epoch": 0.40172524686920663, "grad_norm": 2.088871479034424, "learning_rate": 1.2568087707841367e-05, "loss": 0.7525, "step": 5309 }, { "epoch": 0.40180091559154024, "grad_norm": 2.0699868202209473, "learning_rate": 1.2566019938053996e-05, "loss": 0.6048, "step": 5310 }, { "epoch": 0.40187658431387385, "grad_norm": 1.6784697771072388, "learning_rate": 1.2563951944950923e-05, "loss": 0.8357, "step": 5311 }, { "epoch": 0.40195225303620746, "grad_norm": 2.897984504699707, "learning_rate": 1.2561883728661652e-05, "loss": 0.6136, "step": 5312 }, { "epoch": 0.40202792175854113, "grad_norm": 2.1926939487457275, "learning_rate": 1.2559815289315701e-05, "loss": 0.6236, "step": 5313 }, { "epoch": 0.40210359048087474, "grad_norm": 2.2458455562591553, "learning_rate": 1.2557746627042605e-05, "loss": 0.8109, "step": 5314 }, { "epoch": 0.40217925920320835, "grad_norm": 2.5638132095336914, "learning_rate": 1.2555677741971905e-05, "loss": 0.7555, "step": 5315 }, { "epoch": 0.40225492792554196, "grad_norm": 1.7509515285491943, "learning_rate": 1.2553608634233166e-05, "loss": 0.8178, "step": 5316 }, { "epoch": 0.4023305966478756, "grad_norm": 2.2792065143585205, "learning_rate": 1.2551539303955962e-05, "loss": 0.8282, "step": 5317 }, { "epoch": 0.40240626537020924, "grad_norm": 1.9566348791122437, "learning_rate": 1.2549469751269876e-05, "loss": 0.7285, "step": 5318 }, { "epoch": 0.40248193409254285, "grad_norm": 2.4450414180755615, "learning_rate": 1.2547399976304517e-05, "loss": 0.7842, "step": 5319 }, { "epoch": 0.40255760281487646, "grad_norm": 2.2559216022491455, "learning_rate": 1.2545329979189495e-05, "loss": 0.7553, "step": 5320 }, { "epoch": 0.40263327153721007, "grad_norm": 2.124101400375366, "learning_rate": 1.2543259760054444e-05, "loss": 0.6608, "step": 5321 }, { "epoch": 0.40270894025954374, "grad_norm": 16.932872772216797, "learning_rate": 1.2541189319029008e-05, "loss": 0.7524, "step": 5322 }, { "epoch": 0.40278460898187735, "grad_norm": 1.7447752952575684, "learning_rate": 1.2539118656242839e-05, "loss": 0.7034, "step": 5323 }, { "epoch": 0.40286027770421096, "grad_norm": 2.0638234615325928, "learning_rate": 1.2537047771825618e-05, "loss": 0.6932, "step": 5324 }, { "epoch": 0.40293594642654457, "grad_norm": 2.157304048538208, "learning_rate": 1.2534976665907024e-05, "loss": 0.7061, "step": 5325 }, { "epoch": 0.40301161514887823, "grad_norm": 1.8160690069198608, "learning_rate": 1.2532905338616756e-05, "loss": 0.8272, "step": 5326 }, { "epoch": 0.40308728387121184, "grad_norm": 2.0002903938293457, "learning_rate": 1.2530833790084527e-05, "loss": 0.6858, "step": 5327 }, { "epoch": 0.40316295259354545, "grad_norm": 2.2748143672943115, "learning_rate": 1.252876202044007e-05, "loss": 0.8348, "step": 5328 }, { "epoch": 0.40323862131587906, "grad_norm": 2.3512051105499268, "learning_rate": 1.2526690029813123e-05, "loss": 0.9381, "step": 5329 }, { "epoch": 0.40331429003821273, "grad_norm": 2.0239651203155518, "learning_rate": 1.2524617818333437e-05, "loss": 0.6707, "step": 5330 }, { "epoch": 0.40338995876054634, "grad_norm": 2.503915786743164, "learning_rate": 1.2522545386130781e-05, "loss": 0.6709, "step": 5331 }, { "epoch": 0.40346562748287995, "grad_norm": 2.1918065547943115, "learning_rate": 1.2520472733334942e-05, "loss": 0.901, "step": 5332 }, { "epoch": 0.40354129620521356, "grad_norm": 2.0927445888519287, "learning_rate": 1.2518399860075714e-05, "loss": 0.8102, "step": 5333 }, { "epoch": 0.4036169649275472, "grad_norm": 1.9232884645462036, "learning_rate": 1.2516326766482908e-05, "loss": 0.7762, "step": 5334 }, { "epoch": 0.40369263364988084, "grad_norm": 2.297513961791992, "learning_rate": 1.2514253452686346e-05, "loss": 0.8568, "step": 5335 }, { "epoch": 0.40376830237221445, "grad_norm": 2.033656597137451, "learning_rate": 1.2512179918815865e-05, "loss": 0.8079, "step": 5336 }, { "epoch": 0.40384397109454806, "grad_norm": 1.996671438217163, "learning_rate": 1.2510106165001317e-05, "loss": 0.6862, "step": 5337 }, { "epoch": 0.40391963981688167, "grad_norm": 2.0469861030578613, "learning_rate": 1.250803219137257e-05, "loss": 0.7625, "step": 5338 }, { "epoch": 0.40399530853921534, "grad_norm": 1.9562182426452637, "learning_rate": 1.25059579980595e-05, "loss": 0.8611, "step": 5339 }, { "epoch": 0.40407097726154895, "grad_norm": 2.2968802452087402, "learning_rate": 1.2503883585192003e-05, "loss": 0.6937, "step": 5340 }, { "epoch": 0.40414664598388256, "grad_norm": 2.1029908657073975, "learning_rate": 1.2501808952899976e-05, "loss": 0.6784, "step": 5341 }, { "epoch": 0.40422231470621617, "grad_norm": 1.7134768962860107, "learning_rate": 1.2499734101313355e-05, "loss": 0.9551, "step": 5342 }, { "epoch": 0.40429798342854983, "grad_norm": 2.0898208618164062, "learning_rate": 1.2497659030562058e-05, "loss": 0.6126, "step": 5343 }, { "epoch": 0.40437365215088344, "grad_norm": 1.7897844314575195, "learning_rate": 1.2495583740776043e-05, "loss": 0.9229, "step": 5344 }, { "epoch": 0.40444932087321706, "grad_norm": 1.9922789335250854, "learning_rate": 1.2493508232085271e-05, "loss": 0.7787, "step": 5345 }, { "epoch": 0.40452498959555067, "grad_norm": 2.0527071952819824, "learning_rate": 1.2491432504619707e-05, "loss": 0.764, "step": 5346 }, { "epoch": 0.4046006583178843, "grad_norm": 1.69284188747406, "learning_rate": 1.2489356558509353e-05, "loss": 0.5887, "step": 5347 }, { "epoch": 0.40467632704021794, "grad_norm": 2.769381284713745, "learning_rate": 1.2487280393884202e-05, "loss": 0.855, "step": 5348 }, { "epoch": 0.40475199576255155, "grad_norm": 1.9943363666534424, "learning_rate": 1.2485204010874276e-05, "loss": 0.6139, "step": 5349 }, { "epoch": 0.40482766448488516, "grad_norm": 2.07372784614563, "learning_rate": 1.2483127409609598e-05, "loss": 0.8462, "step": 5350 }, { "epoch": 0.4049033332072188, "grad_norm": 2.265497922897339, "learning_rate": 1.248105059022022e-05, "loss": 0.6575, "step": 5351 }, { "epoch": 0.40497900192955244, "grad_norm": 2.506788969039917, "learning_rate": 1.2478973552836195e-05, "loss": 0.7756, "step": 5352 }, { "epoch": 0.40505467065188605, "grad_norm": 1.914207935333252, "learning_rate": 1.2476896297587592e-05, "loss": 0.7362, "step": 5353 }, { "epoch": 0.40513033937421966, "grad_norm": 2.050699234008789, "learning_rate": 1.2474818824604498e-05, "loss": 0.6945, "step": 5354 }, { "epoch": 0.40520600809655327, "grad_norm": 2.0397143363952637, "learning_rate": 1.2472741134017008e-05, "loss": 0.7752, "step": 5355 }, { "epoch": 0.40528167681888694, "grad_norm": 2.459721803665161, "learning_rate": 1.2470663225955239e-05, "loss": 0.8221, "step": 5356 }, { "epoch": 0.40535734554122055, "grad_norm": 1.8895254135131836, "learning_rate": 1.2468585100549311e-05, "loss": 0.6238, "step": 5357 }, { "epoch": 0.40543301426355416, "grad_norm": 2.120483875274658, "learning_rate": 1.2466506757929369e-05, "loss": 0.6015, "step": 5358 }, { "epoch": 0.40550868298588777, "grad_norm": 1.9569705724716187, "learning_rate": 1.2464428198225558e-05, "loss": 0.704, "step": 5359 }, { "epoch": 0.4055843517082214, "grad_norm": 1.8289756774902344, "learning_rate": 1.2462349421568047e-05, "loss": 0.701, "step": 5360 }, { "epoch": 0.40566002043055505, "grad_norm": 1.8984501361846924, "learning_rate": 1.246027042808702e-05, "loss": 0.6924, "step": 5361 }, { "epoch": 0.40573568915288866, "grad_norm": 2.5578982830047607, "learning_rate": 1.2458191217912664e-05, "loss": 0.8879, "step": 5362 }, { "epoch": 0.40581135787522227, "grad_norm": 2.1358728408813477, "learning_rate": 1.2456111791175193e-05, "loss": 0.7446, "step": 5363 }, { "epoch": 0.4058870265975559, "grad_norm": 2.0823116302490234, "learning_rate": 1.2454032148004819e-05, "loss": 0.8014, "step": 5364 }, { "epoch": 0.40596269531988954, "grad_norm": 1.6264188289642334, "learning_rate": 1.2451952288531781e-05, "loss": 0.7815, "step": 5365 }, { "epoch": 0.40603836404222315, "grad_norm": 2.0880937576293945, "learning_rate": 1.2449872212886328e-05, "loss": 0.6668, "step": 5366 }, { "epoch": 0.40611403276455676, "grad_norm": 2.2300379276275635, "learning_rate": 1.2447791921198715e-05, "loss": 0.7545, "step": 5367 }, { "epoch": 0.4061897014868904, "grad_norm": 1.5105425119400024, "learning_rate": 1.2445711413599226e-05, "loss": 0.8274, "step": 5368 }, { "epoch": 0.40626537020922404, "grad_norm": 2.3562936782836914, "learning_rate": 1.2443630690218137e-05, "loss": 0.8011, "step": 5369 }, { "epoch": 0.40634103893155765, "grad_norm": 2.463721990585327, "learning_rate": 1.2441549751185762e-05, "loss": 0.706, "step": 5370 }, { "epoch": 0.40641670765389126, "grad_norm": 2.0283122062683105, "learning_rate": 1.2439468596632408e-05, "loss": 0.7052, "step": 5371 }, { "epoch": 0.4064923763762249, "grad_norm": 1.6645065546035767, "learning_rate": 1.2437387226688404e-05, "loss": 0.5734, "step": 5372 }, { "epoch": 0.4065680450985585, "grad_norm": 2.11430025100708, "learning_rate": 1.2435305641484095e-05, "loss": 0.7679, "step": 5373 }, { "epoch": 0.40664371382089215, "grad_norm": 2.3016443252563477, "learning_rate": 1.2433223841149837e-05, "loss": 0.7524, "step": 5374 }, { "epoch": 0.40671938254322576, "grad_norm": 2.4958677291870117, "learning_rate": 1.2431141825815998e-05, "loss": 0.7631, "step": 5375 }, { "epoch": 0.40679505126555937, "grad_norm": 1.683720350265503, "learning_rate": 1.2429059595612957e-05, "loss": 0.7059, "step": 5376 }, { "epoch": 0.406870719987893, "grad_norm": 1.871661901473999, "learning_rate": 1.2426977150671117e-05, "loss": 0.6518, "step": 5377 }, { "epoch": 0.40694638871022665, "grad_norm": 1.8744332790374756, "learning_rate": 1.2424894491120879e-05, "loss": 0.7192, "step": 5378 }, { "epoch": 0.40702205743256026, "grad_norm": 2.238365888595581, "learning_rate": 1.2422811617092675e-05, "loss": 0.7441, "step": 5379 }, { "epoch": 0.40709772615489387, "grad_norm": 2.139251232147217, "learning_rate": 1.2420728528716933e-05, "loss": 0.7847, "step": 5380 }, { "epoch": 0.4071733948772275, "grad_norm": 2.121941328048706, "learning_rate": 1.241864522612411e-05, "loss": 0.781, "step": 5381 }, { "epoch": 0.40724906359956115, "grad_norm": 2.219752788543701, "learning_rate": 1.2416561709444665e-05, "loss": 0.815, "step": 5382 }, { "epoch": 0.40732473232189476, "grad_norm": 2.355746030807495, "learning_rate": 1.2414477978809075e-05, "loss": 0.8222, "step": 5383 }, { "epoch": 0.40740040104422837, "grad_norm": 2.5740647315979004, "learning_rate": 1.241239403434783e-05, "loss": 0.8416, "step": 5384 }, { "epoch": 0.407476069766562, "grad_norm": 2.1180238723754883, "learning_rate": 1.2410309876191433e-05, "loss": 0.8138, "step": 5385 }, { "epoch": 0.4075517384888956, "grad_norm": 2.1506288051605225, "learning_rate": 1.2408225504470402e-05, "loss": 0.7021, "step": 5386 }, { "epoch": 0.40762740721122925, "grad_norm": 3.2336843013763428, "learning_rate": 1.2406140919315265e-05, "loss": 0.8422, "step": 5387 }, { "epoch": 0.40770307593356286, "grad_norm": 2.023808240890503, "learning_rate": 1.2404056120856568e-05, "loss": 0.7343, "step": 5388 }, { "epoch": 0.4077787446558965, "grad_norm": 1.8890466690063477, "learning_rate": 1.2401971109224865e-05, "loss": 0.6938, "step": 5389 }, { "epoch": 0.4078544133782301, "grad_norm": 2.454148530960083, "learning_rate": 1.239988588455073e-05, "loss": 0.8247, "step": 5390 }, { "epoch": 0.40793008210056375, "grad_norm": 2.348931312561035, "learning_rate": 1.2397800446964743e-05, "loss": 0.6928, "step": 5391 }, { "epoch": 0.40800575082289736, "grad_norm": 1.7471752166748047, "learning_rate": 1.2395714796597503e-05, "loss": 0.7767, "step": 5392 }, { "epoch": 0.408081419545231, "grad_norm": 2.375242233276367, "learning_rate": 1.239362893357962e-05, "loss": 0.6983, "step": 5393 }, { "epoch": 0.4081570882675646, "grad_norm": 2.8043153285980225, "learning_rate": 1.2391542858041716e-05, "loss": 0.6071, "step": 5394 }, { "epoch": 0.40823275698989825, "grad_norm": 2.196038246154785, "learning_rate": 1.238945657011443e-05, "loss": 0.7422, "step": 5395 }, { "epoch": 0.40830842571223186, "grad_norm": 2.552044153213501, "learning_rate": 1.2387370069928408e-05, "loss": 0.8483, "step": 5396 }, { "epoch": 0.40838409443456547, "grad_norm": 2.0079550743103027, "learning_rate": 1.2385283357614319e-05, "loss": 0.7005, "step": 5397 }, { "epoch": 0.4084597631568991, "grad_norm": 3.333538055419922, "learning_rate": 1.2383196433302832e-05, "loss": 0.6432, "step": 5398 }, { "epoch": 0.40853543187923275, "grad_norm": 2.925452947616577, "learning_rate": 1.2381109297124649e-05, "loss": 0.7974, "step": 5399 }, { "epoch": 0.40861110060156636, "grad_norm": 2.683720350265503, "learning_rate": 1.2379021949210461e-05, "loss": 0.7273, "step": 5400 }, { "epoch": 0.40868676932389997, "grad_norm": 2.577501058578491, "learning_rate": 1.2376934389690992e-05, "loss": 0.8398, "step": 5401 }, { "epoch": 0.4087624380462336, "grad_norm": 1.819061040878296, "learning_rate": 1.2374846618696968e-05, "loss": 0.6676, "step": 5402 }, { "epoch": 0.4088381067685672, "grad_norm": 1.9940983057022095, "learning_rate": 1.2372758636359129e-05, "loss": 0.7512, "step": 5403 }, { "epoch": 0.40891377549090085, "grad_norm": 2.4478976726531982, "learning_rate": 1.2370670442808242e-05, "loss": 0.7858, "step": 5404 }, { "epoch": 0.40898944421323447, "grad_norm": 1.8682414293289185, "learning_rate": 1.2368582038175066e-05, "loss": 0.6828, "step": 5405 }, { "epoch": 0.4090651129355681, "grad_norm": 1.9492807388305664, "learning_rate": 1.2366493422590389e-05, "loss": 0.5985, "step": 5406 }, { "epoch": 0.4091407816579017, "grad_norm": 2.0559017658233643, "learning_rate": 1.2364404596185005e-05, "loss": 0.7456, "step": 5407 }, { "epoch": 0.40921645038023535, "grad_norm": 1.9211013317108154, "learning_rate": 1.2362315559089724e-05, "loss": 0.8048, "step": 5408 }, { "epoch": 0.40929211910256896, "grad_norm": 2.1894242763519287, "learning_rate": 1.2360226311435368e-05, "loss": 0.6719, "step": 5409 }, { "epoch": 0.4093677878249026, "grad_norm": 2.1584041118621826, "learning_rate": 1.235813685335277e-05, "loss": 0.7722, "step": 5410 }, { "epoch": 0.4094434565472362, "grad_norm": 2.0298099517822266, "learning_rate": 1.235604718497278e-05, "loss": 0.7631, "step": 5411 }, { "epoch": 0.40951912526956985, "grad_norm": 2.1612000465393066, "learning_rate": 1.2353957306426264e-05, "loss": 0.7931, "step": 5412 }, { "epoch": 0.40959479399190346, "grad_norm": 2.1110432147979736, "learning_rate": 1.2351867217844091e-05, "loss": 0.7961, "step": 5413 }, { "epoch": 0.40967046271423707, "grad_norm": 2.3404643535614014, "learning_rate": 1.2349776919357153e-05, "loss": 0.7194, "step": 5414 }, { "epoch": 0.4097461314365707, "grad_norm": 2.3472416400909424, "learning_rate": 1.2347686411096347e-05, "loss": 0.7346, "step": 5415 }, { "epoch": 0.4098218001589043, "grad_norm": 2.5707545280456543, "learning_rate": 1.2345595693192594e-05, "loss": 0.7142, "step": 5416 }, { "epoch": 0.40989746888123796, "grad_norm": 1.96560800075531, "learning_rate": 1.2343504765776816e-05, "loss": 0.8404, "step": 5417 }, { "epoch": 0.40997313760357157, "grad_norm": 1.9135947227478027, "learning_rate": 1.2341413628979957e-05, "loss": 0.697, "step": 5418 }, { "epoch": 0.4100488063259052, "grad_norm": 2.3961076736450195, "learning_rate": 1.2339322282932964e-05, "loss": 0.7307, "step": 5419 }, { "epoch": 0.4101244750482388, "grad_norm": 2.5133774280548096, "learning_rate": 1.2337230727766815e-05, "loss": 0.6817, "step": 5420 }, { "epoch": 0.41020014377057246, "grad_norm": 2.319206714630127, "learning_rate": 1.233513896361248e-05, "loss": 0.7036, "step": 5421 }, { "epoch": 0.41027581249290607, "grad_norm": 2.3848443031311035, "learning_rate": 1.2333046990600959e-05, "loss": 0.7175, "step": 5422 }, { "epoch": 0.4103514812152397, "grad_norm": 3.0399367809295654, "learning_rate": 1.2330954808863253e-05, "loss": 0.649, "step": 5423 }, { "epoch": 0.4104271499375733, "grad_norm": 2.1274526119232178, "learning_rate": 1.2328862418530381e-05, "loss": 0.7032, "step": 5424 }, { "epoch": 0.41050281865990695, "grad_norm": 2.8769493103027344, "learning_rate": 1.2326769819733382e-05, "loss": 0.7368, "step": 5425 }, { "epoch": 0.41057848738224056, "grad_norm": 2.2980008125305176, "learning_rate": 1.2324677012603294e-05, "loss": 0.7648, "step": 5426 }, { "epoch": 0.4106541561045742, "grad_norm": 2.084308624267578, "learning_rate": 1.232258399727118e-05, "loss": 0.7854, "step": 5427 }, { "epoch": 0.4107298248269078, "grad_norm": 1.9208357334136963, "learning_rate": 1.232049077386811e-05, "loss": 0.6656, "step": 5428 }, { "epoch": 0.4108054935492414, "grad_norm": 1.9888135194778442, "learning_rate": 1.2318397342525164e-05, "loss": 0.693, "step": 5429 }, { "epoch": 0.41088116227157506, "grad_norm": 2.4940552711486816, "learning_rate": 1.2316303703373448e-05, "loss": 0.8753, "step": 5430 }, { "epoch": 0.4109568309939087, "grad_norm": 1.7741725444793701, "learning_rate": 1.2314209856544064e-05, "loss": 0.5688, "step": 5431 }, { "epoch": 0.4110324997162423, "grad_norm": 2.1441521644592285, "learning_rate": 1.2312115802168144e-05, "loss": 0.7552, "step": 5432 }, { "epoch": 0.4111081684385759, "grad_norm": 2.1466495990753174, "learning_rate": 1.2310021540376815e-05, "loss": 0.7369, "step": 5433 }, { "epoch": 0.41118383716090956, "grad_norm": 2.301936149597168, "learning_rate": 1.2307927071301235e-05, "loss": 0.7292, "step": 5434 }, { "epoch": 0.41125950588324317, "grad_norm": 1.9242736101150513, "learning_rate": 1.230583239507256e-05, "loss": 0.7407, "step": 5435 }, { "epoch": 0.4113351746055768, "grad_norm": 2.5096495151519775, "learning_rate": 1.2303737511821969e-05, "loss": 0.6824, "step": 5436 }, { "epoch": 0.4114108433279104, "grad_norm": 2.5667378902435303, "learning_rate": 1.2301642421680649e-05, "loss": 0.7517, "step": 5437 }, { "epoch": 0.41148651205024406, "grad_norm": 2.038986921310425, "learning_rate": 1.2299547124779803e-05, "loss": 0.6629, "step": 5438 }, { "epoch": 0.41156218077257767, "grad_norm": 2.1099348068237305, "learning_rate": 1.2297451621250643e-05, "loss": 0.7357, "step": 5439 }, { "epoch": 0.4116378494949113, "grad_norm": 1.7685575485229492, "learning_rate": 1.2295355911224398e-05, "loss": 0.8865, "step": 5440 }, { "epoch": 0.4117135182172449, "grad_norm": 2.0220160484313965, "learning_rate": 1.2293259994832306e-05, "loss": 0.7703, "step": 5441 }, { "epoch": 0.4117891869395785, "grad_norm": 1.9990547895431519, "learning_rate": 1.2291163872205624e-05, "loss": 0.8773, "step": 5442 }, { "epoch": 0.41186485566191217, "grad_norm": 2.4579837322235107, "learning_rate": 1.2289067543475613e-05, "loss": 0.7528, "step": 5443 }, { "epoch": 0.4119405243842458, "grad_norm": 2.3881900310516357, "learning_rate": 1.2286971008773552e-05, "loss": 0.8378, "step": 5444 }, { "epoch": 0.4120161931065794, "grad_norm": 2.1781957149505615, "learning_rate": 1.228487426823074e-05, "loss": 0.824, "step": 5445 }, { "epoch": 0.412091861828913, "grad_norm": 2.6942150592803955, "learning_rate": 1.2282777321978474e-05, "loss": 0.8154, "step": 5446 }, { "epoch": 0.41216753055124666, "grad_norm": 1.4597055912017822, "learning_rate": 1.2280680170148075e-05, "loss": 0.7794, "step": 5447 }, { "epoch": 0.4122431992735803, "grad_norm": 2.1186511516571045, "learning_rate": 1.2278582812870874e-05, "loss": 0.6442, "step": 5448 }, { "epoch": 0.4123188679959139, "grad_norm": 1.8283412456512451, "learning_rate": 1.2276485250278211e-05, "loss": 0.8088, "step": 5449 }, { "epoch": 0.4123945367182475, "grad_norm": 2.1991498470306396, "learning_rate": 1.2274387482501444e-05, "loss": 0.7892, "step": 5450 }, { "epoch": 0.41247020544058116, "grad_norm": 1.786555290222168, "learning_rate": 1.2272289509671943e-05, "loss": 0.6558, "step": 5451 }, { "epoch": 0.41254587416291477, "grad_norm": 1.8185572624206543, "learning_rate": 1.227019133192109e-05, "loss": 0.8318, "step": 5452 }, { "epoch": 0.4126215428852484, "grad_norm": 2.238388776779175, "learning_rate": 1.2268092949380277e-05, "loss": 0.6967, "step": 5453 }, { "epoch": 0.412697211607582, "grad_norm": 1.840320348739624, "learning_rate": 1.2265994362180915e-05, "loss": 0.7751, "step": 5454 }, { "epoch": 0.4127728803299156, "grad_norm": 2.3982057571411133, "learning_rate": 1.2263895570454424e-05, "loss": 0.7067, "step": 5455 }, { "epoch": 0.41284854905224927, "grad_norm": 3.4811136722564697, "learning_rate": 1.2261796574332232e-05, "loss": 0.6473, "step": 5456 }, { "epoch": 0.4129242177745829, "grad_norm": 1.9703245162963867, "learning_rate": 1.225969737394579e-05, "loss": 0.7064, "step": 5457 }, { "epoch": 0.4129998864969165, "grad_norm": 2.2226948738098145, "learning_rate": 1.2257597969426555e-05, "loss": 0.6056, "step": 5458 }, { "epoch": 0.4130755552192501, "grad_norm": 2.0323541164398193, "learning_rate": 1.2255498360905998e-05, "loss": 0.7867, "step": 5459 }, { "epoch": 0.41315122394158377, "grad_norm": 1.938133716583252, "learning_rate": 1.2253398548515604e-05, "loss": 0.6533, "step": 5460 }, { "epoch": 0.4132268926639174, "grad_norm": 2.4267141819000244, "learning_rate": 1.2251298532386874e-05, "loss": 0.6883, "step": 5461 }, { "epoch": 0.413302561386251, "grad_norm": 2.085056781768799, "learning_rate": 1.224919831265131e-05, "loss": 0.6782, "step": 5462 }, { "epoch": 0.4133782301085846, "grad_norm": 1.7166036367416382, "learning_rate": 1.2247097889440441e-05, "loss": 0.8407, "step": 5463 }, { "epoch": 0.41345389883091826, "grad_norm": 1.9741954803466797, "learning_rate": 1.2244997262885797e-05, "loss": 0.6178, "step": 5464 }, { "epoch": 0.4135295675532519, "grad_norm": 3.0332493782043457, "learning_rate": 1.224289643311893e-05, "loss": 0.5895, "step": 5465 }, { "epoch": 0.4136052362755855, "grad_norm": 2.2371206283569336, "learning_rate": 1.2240795400271402e-05, "loss": 0.6845, "step": 5466 }, { "epoch": 0.4136809049979191, "grad_norm": 2.590519666671753, "learning_rate": 1.223869416447478e-05, "loss": 0.7363, "step": 5467 }, { "epoch": 0.4137565737202527, "grad_norm": 2.10429310798645, "learning_rate": 1.2236592725860656e-05, "loss": 0.7608, "step": 5468 }, { "epoch": 0.4138322424425864, "grad_norm": 2.429518461227417, "learning_rate": 1.2234491084560629e-05, "loss": 0.6788, "step": 5469 }, { "epoch": 0.41390791116492, "grad_norm": 2.2328531742095947, "learning_rate": 1.2232389240706306e-05, "loss": 0.7914, "step": 5470 }, { "epoch": 0.4139835798872536, "grad_norm": 1.950385570526123, "learning_rate": 1.2230287194429316e-05, "loss": 0.777, "step": 5471 }, { "epoch": 0.4140592486095872, "grad_norm": 2.1990959644317627, "learning_rate": 1.2228184945861291e-05, "loss": 0.8321, "step": 5472 }, { "epoch": 0.41413491733192087, "grad_norm": 2.1966259479522705, "learning_rate": 1.2226082495133886e-05, "loss": 0.644, "step": 5473 }, { "epoch": 0.4142105860542545, "grad_norm": 2.390727996826172, "learning_rate": 1.2223979842378756e-05, "loss": 0.6661, "step": 5474 }, { "epoch": 0.4142862547765881, "grad_norm": 2.415733575820923, "learning_rate": 1.2221876987727586e-05, "loss": 0.7288, "step": 5475 }, { "epoch": 0.4143619234989217, "grad_norm": 2.3557534217834473, "learning_rate": 1.2219773931312057e-05, "loss": 0.6913, "step": 5476 }, { "epoch": 0.41443759222125537, "grad_norm": 2.2471041679382324, "learning_rate": 1.221767067326387e-05, "loss": 0.7311, "step": 5477 }, { "epoch": 0.414513260943589, "grad_norm": 1.8936131000518799, "learning_rate": 1.221556721371474e-05, "loss": 0.6176, "step": 5478 }, { "epoch": 0.4145889296659226, "grad_norm": 1.792964220046997, "learning_rate": 1.2213463552796388e-05, "loss": 0.6454, "step": 5479 }, { "epoch": 0.4146645983882562, "grad_norm": 2.177844762802124, "learning_rate": 1.2211359690640556e-05, "loss": 0.8097, "step": 5480 }, { "epoch": 0.4147402671105898, "grad_norm": 1.9635275602340698, "learning_rate": 1.2209255627378992e-05, "loss": 0.7149, "step": 5481 }, { "epoch": 0.4148159358329235, "grad_norm": 1.988793969154358, "learning_rate": 1.2207151363143462e-05, "loss": 0.7471, "step": 5482 }, { "epoch": 0.4148916045552571, "grad_norm": 2.298090696334839, "learning_rate": 1.220504689806574e-05, "loss": 0.8539, "step": 5483 }, { "epoch": 0.4149672732775907, "grad_norm": 1.7271684408187866, "learning_rate": 1.2202942232277616e-05, "loss": 0.8253, "step": 5484 }, { "epoch": 0.4150429419999243, "grad_norm": 1.700923204421997, "learning_rate": 1.2200837365910887e-05, "loss": 0.7333, "step": 5485 }, { "epoch": 0.415118610722258, "grad_norm": 2.043684482574463, "learning_rate": 1.2198732299097373e-05, "loss": 0.7908, "step": 5486 }, { "epoch": 0.4151942794445916, "grad_norm": 1.729766845703125, "learning_rate": 1.2196627031968894e-05, "loss": 0.691, "step": 5487 }, { "epoch": 0.4152699481669252, "grad_norm": 2.202939033508301, "learning_rate": 1.2194521564657293e-05, "loss": 0.8334, "step": 5488 }, { "epoch": 0.4153456168892588, "grad_norm": 1.9363715648651123, "learning_rate": 1.2192415897294418e-05, "loss": 0.7155, "step": 5489 }, { "epoch": 0.4154212856115925, "grad_norm": 2.62788724899292, "learning_rate": 1.2190310030012132e-05, "loss": 0.6351, "step": 5490 }, { "epoch": 0.4154969543339261, "grad_norm": 1.856323480606079, "learning_rate": 1.2188203962942318e-05, "loss": 0.7471, "step": 5491 }, { "epoch": 0.4155726230562597, "grad_norm": 2.280324935913086, "learning_rate": 1.2186097696216856e-05, "loss": 0.6655, "step": 5492 }, { "epoch": 0.4156482917785933, "grad_norm": 21.438453674316406, "learning_rate": 1.2183991229967652e-05, "loss": 0.615, "step": 5493 }, { "epoch": 0.4157239605009269, "grad_norm": 2.4099819660186768, "learning_rate": 1.2181884564326618e-05, "loss": 0.7488, "step": 5494 }, { "epoch": 0.4157996292232606, "grad_norm": 2.0276858806610107, "learning_rate": 1.2179777699425683e-05, "loss": 0.7315, "step": 5495 }, { "epoch": 0.4158752979455942, "grad_norm": 2.4924838542938232, "learning_rate": 1.2177670635396786e-05, "loss": 0.6686, "step": 5496 }, { "epoch": 0.4159509666679278, "grad_norm": 2.914191961288452, "learning_rate": 1.2175563372371872e-05, "loss": 0.9143, "step": 5497 }, { "epoch": 0.4160266353902614, "grad_norm": 3.372140884399414, "learning_rate": 1.217345591048291e-05, "loss": 0.6866, "step": 5498 }, { "epoch": 0.4161023041125951, "grad_norm": 3.612203598022461, "learning_rate": 1.2171348249861874e-05, "loss": 0.7108, "step": 5499 }, { "epoch": 0.4161779728349287, "grad_norm": 1.8624509572982788, "learning_rate": 1.2169240390640753e-05, "loss": 0.8142, "step": 5500 }, { "epoch": 0.4162536415572623, "grad_norm": 2.179865837097168, "learning_rate": 1.216713233295155e-05, "loss": 0.7727, "step": 5501 }, { "epoch": 0.4163293102795959, "grad_norm": 2.4808688163757324, "learning_rate": 1.2165024076926276e-05, "loss": 0.7229, "step": 5502 }, { "epoch": 0.4164049790019296, "grad_norm": 1.9209163188934326, "learning_rate": 1.2162915622696955e-05, "loss": 0.6675, "step": 5503 }, { "epoch": 0.4164806477242632, "grad_norm": 2.1031787395477295, "learning_rate": 1.216080697039563e-05, "loss": 0.7635, "step": 5504 }, { "epoch": 0.4165563164465968, "grad_norm": 2.6245055198669434, "learning_rate": 1.215869812015435e-05, "loss": 0.7576, "step": 5505 }, { "epoch": 0.4166319851689304, "grad_norm": 1.9226709604263306, "learning_rate": 1.2156589072105175e-05, "loss": 0.7822, "step": 5506 }, { "epoch": 0.4167076538912641, "grad_norm": 2.297623872756958, "learning_rate": 1.2154479826380185e-05, "loss": 0.8283, "step": 5507 }, { "epoch": 0.4167833226135977, "grad_norm": 2.166672706604004, "learning_rate": 1.215237038311146e-05, "loss": 0.8331, "step": 5508 }, { "epoch": 0.4168589913359313, "grad_norm": 1.8679463863372803, "learning_rate": 1.215026074243111e-05, "loss": 0.749, "step": 5509 }, { "epoch": 0.4169346600582649, "grad_norm": 2.0886306762695312, "learning_rate": 1.2148150904471246e-05, "loss": 0.6835, "step": 5510 }, { "epoch": 0.4170103287805985, "grad_norm": 1.7681407928466797, "learning_rate": 1.2146040869363986e-05, "loss": 0.768, "step": 5511 }, { "epoch": 0.4170859975029322, "grad_norm": 2.234034538269043, "learning_rate": 1.2143930637241473e-05, "loss": 0.7622, "step": 5512 }, { "epoch": 0.4171616662252658, "grad_norm": 2.314732789993286, "learning_rate": 1.2141820208235851e-05, "loss": 0.778, "step": 5513 }, { "epoch": 0.4172373349475994, "grad_norm": 2.147493362426758, "learning_rate": 1.213970958247929e-05, "loss": 0.644, "step": 5514 }, { "epoch": 0.417313003669933, "grad_norm": 2.0052413940429688, "learning_rate": 1.2137598760103958e-05, "loss": 0.8898, "step": 5515 }, { "epoch": 0.4173886723922667, "grad_norm": 2.1926968097686768, "learning_rate": 1.2135487741242043e-05, "loss": 0.8162, "step": 5516 }, { "epoch": 0.4174643411146003, "grad_norm": 2.2886886596679688, "learning_rate": 1.2133376526025745e-05, "loss": 0.6808, "step": 5517 }, { "epoch": 0.4175400098369339, "grad_norm": 2.391803503036499, "learning_rate": 1.2131265114587274e-05, "loss": 0.7002, "step": 5518 }, { "epoch": 0.4176156785592675, "grad_norm": 2.0181946754455566, "learning_rate": 1.2129153507058856e-05, "loss": 0.7994, "step": 5519 }, { "epoch": 0.4176913472816012, "grad_norm": 2.0882043838500977, "learning_rate": 1.2127041703572722e-05, "loss": 0.6383, "step": 5520 }, { "epoch": 0.4177670160039348, "grad_norm": 1.928208351135254, "learning_rate": 1.2124929704261123e-05, "loss": 0.745, "step": 5521 }, { "epoch": 0.4178426847262684, "grad_norm": 2.641408681869507, "learning_rate": 1.212281750925632e-05, "loss": 1.0537, "step": 5522 }, { "epoch": 0.417918353448602, "grad_norm": 2.605942726135254, "learning_rate": 1.2120705118690581e-05, "loss": 0.6757, "step": 5523 }, { "epoch": 0.4179940221709356, "grad_norm": 2.2262070178985596, "learning_rate": 1.2118592532696196e-05, "loss": 0.8022, "step": 5524 }, { "epoch": 0.4180696908932693, "grad_norm": 2.1038734912872314, "learning_rate": 1.2116479751405461e-05, "loss": 0.6194, "step": 5525 }, { "epoch": 0.4181453596156029, "grad_norm": 1.9260424375534058, "learning_rate": 1.2114366774950681e-05, "loss": 0.6886, "step": 5526 }, { "epoch": 0.4182210283379365, "grad_norm": 1.8009731769561768, "learning_rate": 1.2112253603464182e-05, "loss": 0.5855, "step": 5527 }, { "epoch": 0.4182966970602701, "grad_norm": 4.206608772277832, "learning_rate": 1.2110140237078297e-05, "loss": 0.8015, "step": 5528 }, { "epoch": 0.4183723657826038, "grad_norm": 2.875774621963501, "learning_rate": 1.2108026675925371e-05, "loss": 0.7709, "step": 5529 }, { "epoch": 0.4184480345049374, "grad_norm": 2.3251543045043945, "learning_rate": 1.2105912920137762e-05, "loss": 0.7194, "step": 5530 }, { "epoch": 0.418523703227271, "grad_norm": 1.7996389865875244, "learning_rate": 1.2103798969847836e-05, "loss": 0.805, "step": 5531 }, { "epoch": 0.4185993719496046, "grad_norm": 2.323073148727417, "learning_rate": 1.2101684825187985e-05, "loss": 0.7145, "step": 5532 }, { "epoch": 0.4186750406719383, "grad_norm": 3.068136692047119, "learning_rate": 1.2099570486290597e-05, "loss": 0.8114, "step": 5533 }, { "epoch": 0.4187507093942719, "grad_norm": 2.661367416381836, "learning_rate": 1.209745595328808e-05, "loss": 0.7873, "step": 5534 }, { "epoch": 0.4188263781166055, "grad_norm": 1.9999775886535645, "learning_rate": 1.2095341226312853e-05, "loss": 0.7032, "step": 5535 }, { "epoch": 0.4189020468389391, "grad_norm": 2.4388561248779297, "learning_rate": 1.2093226305497341e-05, "loss": 0.9638, "step": 5536 }, { "epoch": 0.4189777155612727, "grad_norm": 2.280811309814453, "learning_rate": 1.2091111190974e-05, "loss": 0.7426, "step": 5537 }, { "epoch": 0.4190533842836064, "grad_norm": 2.1886045932769775, "learning_rate": 1.2088995882875275e-05, "loss": 0.7784, "step": 5538 }, { "epoch": 0.41912905300594, "grad_norm": 2.459237813949585, "learning_rate": 1.208688038133364e-05, "loss": 0.8087, "step": 5539 }, { "epoch": 0.4192047217282736, "grad_norm": 2.0118658542633057, "learning_rate": 1.2084764686481569e-05, "loss": 0.7171, "step": 5540 }, { "epoch": 0.4192803904506072, "grad_norm": 2.3412704467773438, "learning_rate": 1.2082648798451555e-05, "loss": 0.7725, "step": 5541 }, { "epoch": 0.4193560591729409, "grad_norm": 1.796249270439148, "learning_rate": 1.2080532717376106e-05, "loss": 0.7044, "step": 5542 }, { "epoch": 0.4194317278952745, "grad_norm": 2.0164694786071777, "learning_rate": 1.2078416443387731e-05, "loss": 0.7137, "step": 5543 }, { "epoch": 0.4195073966176081, "grad_norm": 1.7637386322021484, "learning_rate": 1.2076299976618965e-05, "loss": 0.7083, "step": 5544 }, { "epoch": 0.4195830653399417, "grad_norm": 1.95462167263031, "learning_rate": 1.207418331720234e-05, "loss": 0.7147, "step": 5545 }, { "epoch": 0.4196587340622754, "grad_norm": 1.7692989110946655, "learning_rate": 1.2072066465270415e-05, "loss": 0.7749, "step": 5546 }, { "epoch": 0.419734402784609, "grad_norm": 1.8411818742752075, "learning_rate": 1.2069949420955753e-05, "loss": 0.6869, "step": 5547 }, { "epoch": 0.4198100715069426, "grad_norm": 1.8122678995132446, "learning_rate": 1.2067832184390928e-05, "loss": 0.7162, "step": 5548 }, { "epoch": 0.4198857402292762, "grad_norm": 1.7828391790390015, "learning_rate": 1.206571475570853e-05, "loss": 0.6865, "step": 5549 }, { "epoch": 0.4199614089516098, "grad_norm": 2.397252082824707, "learning_rate": 1.2063597135041156e-05, "loss": 0.658, "step": 5550 }, { "epoch": 0.4200370776739435, "grad_norm": 2.136765956878662, "learning_rate": 1.2061479322521422e-05, "loss": 0.935, "step": 5551 }, { "epoch": 0.4201127463962771, "grad_norm": 1.9939488172531128, "learning_rate": 1.2059361318281949e-05, "loss": 0.7466, "step": 5552 }, { "epoch": 0.4201884151186107, "grad_norm": 2.698948860168457, "learning_rate": 1.2057243122455378e-05, "loss": 0.6457, "step": 5553 }, { "epoch": 0.4202640838409443, "grad_norm": 18.516450881958008, "learning_rate": 1.2055124735174352e-05, "loss": 0.5688, "step": 5554 }, { "epoch": 0.420339752563278, "grad_norm": 2.335066556930542, "learning_rate": 1.2053006156571534e-05, "loss": 0.7693, "step": 5555 }, { "epoch": 0.4204154212856116, "grad_norm": 2.284088373184204, "learning_rate": 1.2050887386779595e-05, "loss": 0.6985, "step": 5556 }, { "epoch": 0.4204910900079452, "grad_norm": 1.7979247570037842, "learning_rate": 1.2048768425931222e-05, "loss": 0.8248, "step": 5557 }, { "epoch": 0.4205667587302788, "grad_norm": 1.9598959684371948, "learning_rate": 1.204664927415911e-05, "loss": 0.8005, "step": 5558 }, { "epoch": 0.4206424274526125, "grad_norm": 1.7757333517074585, "learning_rate": 1.2044529931595964e-05, "loss": 0.6955, "step": 5559 }, { "epoch": 0.4207180961749461, "grad_norm": 2.177375078201294, "learning_rate": 1.2042410398374509e-05, "loss": 0.6263, "step": 5560 }, { "epoch": 0.4207937648972797, "grad_norm": 2.13222336769104, "learning_rate": 1.2040290674627471e-05, "loss": 0.7584, "step": 5561 }, { "epoch": 0.4208694336196133, "grad_norm": 1.8539749383926392, "learning_rate": 1.20381707604876e-05, "loss": 0.7735, "step": 5562 }, { "epoch": 0.42094510234194693, "grad_norm": 2.649493455886841, "learning_rate": 1.2036050656087648e-05, "loss": 0.9243, "step": 5563 }, { "epoch": 0.4210207710642806, "grad_norm": 2.361145257949829, "learning_rate": 1.2033930361560386e-05, "loss": 0.6677, "step": 5564 }, { "epoch": 0.4210964397866142, "grad_norm": 1.8541384935379028, "learning_rate": 1.2031809877038592e-05, "loss": 0.9055, "step": 5565 }, { "epoch": 0.4211721085089478, "grad_norm": 2.4618043899536133, "learning_rate": 1.2029689202655054e-05, "loss": 0.8678, "step": 5566 }, { "epoch": 0.42124777723128143, "grad_norm": 2.1291258335113525, "learning_rate": 1.2027568338542583e-05, "loss": 0.7327, "step": 5567 }, { "epoch": 0.4213234459536151, "grad_norm": 2.079526424407959, "learning_rate": 1.2025447284833987e-05, "loss": 0.7069, "step": 5568 }, { "epoch": 0.4213991146759487, "grad_norm": 1.8269448280334473, "learning_rate": 1.2023326041662096e-05, "loss": 0.7895, "step": 5569 }, { "epoch": 0.4214747833982823, "grad_norm": 1.9055373668670654, "learning_rate": 1.2021204609159753e-05, "loss": 0.5952, "step": 5570 }, { "epoch": 0.4215504521206159, "grad_norm": 2.0945003032684326, "learning_rate": 1.2019082987459806e-05, "loss": 0.7579, "step": 5571 }, { "epoch": 0.4216261208429496, "grad_norm": 1.7482681274414062, "learning_rate": 1.2016961176695113e-05, "loss": 0.6244, "step": 5572 }, { "epoch": 0.4217017895652832, "grad_norm": 2.3275108337402344, "learning_rate": 1.2014839176998557e-05, "loss": 0.6763, "step": 5573 }, { "epoch": 0.4217774582876168, "grad_norm": 1.9964745044708252, "learning_rate": 1.2012716988503021e-05, "loss": 0.8104, "step": 5574 }, { "epoch": 0.4218531270099504, "grad_norm": 1.7609212398529053, "learning_rate": 1.20105946113414e-05, "loss": 0.7107, "step": 5575 }, { "epoch": 0.42192879573228403, "grad_norm": 2.2987399101257324, "learning_rate": 1.200847204564661e-05, "loss": 0.7246, "step": 5576 }, { "epoch": 0.4220044644546177, "grad_norm": 1.8618190288543701, "learning_rate": 1.2006349291551564e-05, "loss": 0.6966, "step": 5577 }, { "epoch": 0.4220801331769513, "grad_norm": 1.7116061449050903, "learning_rate": 1.2004226349189208e-05, "loss": 0.8733, "step": 5578 }, { "epoch": 0.4221558018992849, "grad_norm": 2.474656105041504, "learning_rate": 1.2002103218692479e-05, "loss": 0.8025, "step": 5579 }, { "epoch": 0.42223147062161853, "grad_norm": 1.4539145231246948, "learning_rate": 1.1999979900194335e-05, "loss": 0.8741, "step": 5580 }, { "epoch": 0.4223071393439522, "grad_norm": 2.465669631958008, "learning_rate": 1.1997856393827749e-05, "loss": 0.7923, "step": 5581 }, { "epoch": 0.4223828080662858, "grad_norm": 2.1911604404449463, "learning_rate": 1.1995732699725697e-05, "loss": 0.7288, "step": 5582 }, { "epoch": 0.4224584767886194, "grad_norm": 1.682003378868103, "learning_rate": 1.1993608818021176e-05, "loss": 0.7595, "step": 5583 }, { "epoch": 0.42253414551095303, "grad_norm": 2.285404920578003, "learning_rate": 1.1991484748847187e-05, "loss": 0.6823, "step": 5584 }, { "epoch": 0.4226098142332867, "grad_norm": 3.933152675628662, "learning_rate": 1.1989360492336747e-05, "loss": 0.8576, "step": 5585 }, { "epoch": 0.4226854829556203, "grad_norm": 2.1262471675872803, "learning_rate": 1.1987236048622886e-05, "loss": 0.7692, "step": 5586 }, { "epoch": 0.4227611516779539, "grad_norm": 2.068648099899292, "learning_rate": 1.198511141783864e-05, "loss": 0.885, "step": 5587 }, { "epoch": 0.4228368204002875, "grad_norm": 2.879906177520752, "learning_rate": 1.1982986600117065e-05, "loss": 0.744, "step": 5588 }, { "epoch": 0.42291248912262114, "grad_norm": 2.1112852096557617, "learning_rate": 1.198086159559122e-05, "loss": 0.6636, "step": 5589 }, { "epoch": 0.4229881578449548, "grad_norm": 2.5208778381347656, "learning_rate": 1.1978736404394177e-05, "loss": 0.7342, "step": 5590 }, { "epoch": 0.4230638265672884, "grad_norm": 2.7024381160736084, "learning_rate": 1.1976611026659029e-05, "loss": 0.6204, "step": 5591 }, { "epoch": 0.423139495289622, "grad_norm": 1.8119574785232544, "learning_rate": 1.1974485462518872e-05, "loss": 0.7252, "step": 5592 }, { "epoch": 0.42321516401195564, "grad_norm": 2.16031813621521, "learning_rate": 1.1972359712106811e-05, "loss": 0.7198, "step": 5593 }, { "epoch": 0.4232908327342893, "grad_norm": 1.8880118131637573, "learning_rate": 1.1970233775555975e-05, "loss": 0.7329, "step": 5594 }, { "epoch": 0.4233665014566229, "grad_norm": 1.758371114730835, "learning_rate": 1.196810765299949e-05, "loss": 0.7565, "step": 5595 }, { "epoch": 0.4234421701789565, "grad_norm": 2.201699733734131, "learning_rate": 1.1965981344570504e-05, "loss": 0.6688, "step": 5596 }, { "epoch": 0.42351783890129013, "grad_norm": 1.9135266542434692, "learning_rate": 1.1963854850402173e-05, "loss": 0.7328, "step": 5597 }, { "epoch": 0.4235935076236238, "grad_norm": 2.1251890659332275, "learning_rate": 1.1961728170627666e-05, "loss": 0.7701, "step": 5598 }, { "epoch": 0.4236691763459574, "grad_norm": 2.2974348068237305, "learning_rate": 1.1959601305380163e-05, "loss": 0.6692, "step": 5599 }, { "epoch": 0.423744845068291, "grad_norm": 2.4548261165618896, "learning_rate": 1.1957474254792851e-05, "loss": 0.7951, "step": 5600 }, { "epoch": 0.42382051379062463, "grad_norm": 2.009052276611328, "learning_rate": 1.195534701899894e-05, "loss": 0.7192, "step": 5601 }, { "epoch": 0.42389618251295824, "grad_norm": 2.0583083629608154, "learning_rate": 1.1953219598131634e-05, "loss": 0.6207, "step": 5602 }, { "epoch": 0.4239718512352919, "grad_norm": 1.8193392753601074, "learning_rate": 1.1951091992324167e-05, "loss": 0.7451, "step": 5603 }, { "epoch": 0.4240475199576255, "grad_norm": 2.209012269973755, "learning_rate": 1.1948964201709775e-05, "loss": 0.6402, "step": 5604 }, { "epoch": 0.42412318867995913, "grad_norm": 2.069322347640991, "learning_rate": 1.1946836226421708e-05, "loss": 0.7, "step": 5605 }, { "epoch": 0.42419885740229274, "grad_norm": 1.7103984355926514, "learning_rate": 1.1944708066593225e-05, "loss": 0.674, "step": 5606 }, { "epoch": 0.4242745261246264, "grad_norm": 2.117616891860962, "learning_rate": 1.1942579722357596e-05, "loss": 0.6814, "step": 5607 }, { "epoch": 0.42435019484696, "grad_norm": 4.542725086212158, "learning_rate": 1.1940451193848108e-05, "loss": 0.7538, "step": 5608 }, { "epoch": 0.4244258635692936, "grad_norm": 2.359140157699585, "learning_rate": 1.1938322481198056e-05, "loss": 0.6849, "step": 5609 }, { "epoch": 0.42450153229162724, "grad_norm": 3.171555995941162, "learning_rate": 1.1936193584540747e-05, "loss": 0.7442, "step": 5610 }, { "epoch": 0.4245772010139609, "grad_norm": 2.4823436737060547, "learning_rate": 1.19340645040095e-05, "loss": 0.9193, "step": 5611 }, { "epoch": 0.4246528697362945, "grad_norm": 1.8430533409118652, "learning_rate": 1.1931935239737643e-05, "loss": 0.6416, "step": 5612 }, { "epoch": 0.4247285384586281, "grad_norm": 2.7708842754364014, "learning_rate": 1.1929805791858518e-05, "loss": 0.7639, "step": 5613 }, { "epoch": 0.42480420718096173, "grad_norm": 2.5303053855895996, "learning_rate": 1.1927676160505476e-05, "loss": 0.7355, "step": 5614 }, { "epoch": 0.42487987590329535, "grad_norm": 2.337359666824341, "learning_rate": 1.1925546345811889e-05, "loss": 0.8643, "step": 5615 }, { "epoch": 0.424955544625629, "grad_norm": 2.4965403079986572, "learning_rate": 1.1923416347911123e-05, "loss": 0.7514, "step": 5616 }, { "epoch": 0.4250312133479626, "grad_norm": 1.8858367204666138, "learning_rate": 1.192128616693657e-05, "loss": 0.6455, "step": 5617 }, { "epoch": 0.42510688207029623, "grad_norm": 1.8443236351013184, "learning_rate": 1.1919155803021628e-05, "loss": 0.8063, "step": 5618 }, { "epoch": 0.42518255079262984, "grad_norm": 1.9932689666748047, "learning_rate": 1.1917025256299713e-05, "loss": 0.6858, "step": 5619 }, { "epoch": 0.4252582195149635, "grad_norm": 2.4727776050567627, "learning_rate": 1.1914894526904236e-05, "loss": 0.8452, "step": 5620 }, { "epoch": 0.4253338882372971, "grad_norm": 1.8388804197311401, "learning_rate": 1.1912763614968638e-05, "loss": 0.8343, "step": 5621 }, { "epoch": 0.42540955695963073, "grad_norm": 1.9806253910064697, "learning_rate": 1.1910632520626363e-05, "loss": 0.7089, "step": 5622 }, { "epoch": 0.42548522568196434, "grad_norm": 2.018436908721924, "learning_rate": 1.1908501244010862e-05, "loss": 0.7397, "step": 5623 }, { "epoch": 0.425560894404298, "grad_norm": 2.8145549297332764, "learning_rate": 1.190636978525561e-05, "loss": 0.6912, "step": 5624 }, { "epoch": 0.4256365631266316, "grad_norm": 1.973929762840271, "learning_rate": 1.190423814449408e-05, "loss": 0.6269, "step": 5625 }, { "epoch": 0.42571223184896523, "grad_norm": 1.5904262065887451, "learning_rate": 1.1902106321859764e-05, "loss": 0.8579, "step": 5626 }, { "epoch": 0.42578790057129884, "grad_norm": 1.901921033859253, "learning_rate": 1.189997431748616e-05, "loss": 0.595, "step": 5627 }, { "epoch": 0.4258635692936325, "grad_norm": 1.7810097932815552, "learning_rate": 1.189784213150679e-05, "loss": 0.6377, "step": 5628 }, { "epoch": 0.4259392380159661, "grad_norm": 1.7638015747070312, "learning_rate": 1.189570976405517e-05, "loss": 0.6832, "step": 5629 }, { "epoch": 0.4260149067382997, "grad_norm": 1.9742308855056763, "learning_rate": 1.189357721526484e-05, "loss": 0.7322, "step": 5630 }, { "epoch": 0.42609057546063334, "grad_norm": 2.480738639831543, "learning_rate": 1.1891444485269344e-05, "loss": 0.735, "step": 5631 }, { "epoch": 0.42616624418296695, "grad_norm": 1.8731553554534912, "learning_rate": 1.1889311574202242e-05, "loss": 0.7125, "step": 5632 }, { "epoch": 0.4262419129053006, "grad_norm": 2.045454263687134, "learning_rate": 1.1887178482197109e-05, "loss": 0.7475, "step": 5633 }, { "epoch": 0.4263175816276342, "grad_norm": 2.2472641468048096, "learning_rate": 1.1885045209387514e-05, "loss": 0.6585, "step": 5634 }, { "epoch": 0.42639325034996783, "grad_norm": 1.9361463785171509, "learning_rate": 1.1882911755907062e-05, "loss": 0.6429, "step": 5635 }, { "epoch": 0.42646891907230144, "grad_norm": 1.8335850238800049, "learning_rate": 1.1880778121889349e-05, "loss": 0.554, "step": 5636 }, { "epoch": 0.4265445877946351, "grad_norm": 2.1291892528533936, "learning_rate": 1.1878644307467992e-05, "loss": 0.8045, "step": 5637 }, { "epoch": 0.4266202565169687, "grad_norm": 2.041837692260742, "learning_rate": 1.187651031277662e-05, "loss": 0.7902, "step": 5638 }, { "epoch": 0.42669592523930233, "grad_norm": 1.9485516548156738, "learning_rate": 1.1874376137948867e-05, "loss": 0.6962, "step": 5639 }, { "epoch": 0.42677159396163594, "grad_norm": 2.171895742416382, "learning_rate": 1.1872241783118386e-05, "loss": 0.6273, "step": 5640 }, { "epoch": 0.4268472626839696, "grad_norm": 1.8618667125701904, "learning_rate": 1.187010724841883e-05, "loss": 0.6796, "step": 5641 }, { "epoch": 0.4269229314063032, "grad_norm": 1.9820287227630615, "learning_rate": 1.1867972533983879e-05, "loss": 0.7338, "step": 5642 }, { "epoch": 0.42699860012863683, "grad_norm": 1.5241734981536865, "learning_rate": 1.1865837639947209e-05, "loss": 0.6571, "step": 5643 }, { "epoch": 0.42707426885097044, "grad_norm": 1.9339543581008911, "learning_rate": 1.1863702566442516e-05, "loss": 0.7887, "step": 5644 }, { "epoch": 0.42714993757330405, "grad_norm": 4.0906572341918945, "learning_rate": 1.1861567313603511e-05, "loss": 0.6873, "step": 5645 }, { "epoch": 0.4272256062956377, "grad_norm": 2.432317018508911, "learning_rate": 1.1859431881563899e-05, "loss": 0.6544, "step": 5646 }, { "epoch": 0.4273012750179713, "grad_norm": 2.0724246501922607, "learning_rate": 1.185729627045742e-05, "loss": 0.6835, "step": 5647 }, { "epoch": 0.42737694374030494, "grad_norm": 2.3478708267211914, "learning_rate": 1.1855160480417801e-05, "loss": 0.8091, "step": 5648 }, { "epoch": 0.42745261246263855, "grad_norm": 1.9543095827102661, "learning_rate": 1.1853024511578802e-05, "loss": 0.7343, "step": 5649 }, { "epoch": 0.4275282811849722, "grad_norm": 1.7966238260269165, "learning_rate": 1.1850888364074179e-05, "loss": 0.6051, "step": 5650 }, { "epoch": 0.4276039499073058, "grad_norm": 1.587853193283081, "learning_rate": 1.1848752038037708e-05, "loss": 0.6063, "step": 5651 }, { "epoch": 0.42767961862963944, "grad_norm": 1.883578896522522, "learning_rate": 1.1846615533603168e-05, "loss": 0.8186, "step": 5652 }, { "epoch": 0.42775528735197305, "grad_norm": 2.1655025482177734, "learning_rate": 1.1844478850904357e-05, "loss": 0.7779, "step": 5653 }, { "epoch": 0.4278309560743067, "grad_norm": 2.625882387161255, "learning_rate": 1.1842341990075081e-05, "loss": 0.7361, "step": 5654 }, { "epoch": 0.4279066247966403, "grad_norm": 1.935105323791504, "learning_rate": 1.1840204951249152e-05, "loss": 0.7115, "step": 5655 }, { "epoch": 0.42798229351897393, "grad_norm": 2.1714651584625244, "learning_rate": 1.1838067734560408e-05, "loss": 0.8187, "step": 5656 }, { "epoch": 0.42805796224130754, "grad_norm": 2.1692118644714355, "learning_rate": 1.183593034014268e-05, "loss": 0.6285, "step": 5657 }, { "epoch": 0.42813363096364115, "grad_norm": 1.921869158744812, "learning_rate": 1.1833792768129824e-05, "loss": 0.6029, "step": 5658 }, { "epoch": 0.4282092996859748, "grad_norm": 2.104144811630249, "learning_rate": 1.1831655018655696e-05, "loss": 0.7716, "step": 5659 }, { "epoch": 0.42828496840830843, "grad_norm": 2.422243595123291, "learning_rate": 1.1829517091854176e-05, "loss": 0.7995, "step": 5660 }, { "epoch": 0.42836063713064204, "grad_norm": 1.9468094110488892, "learning_rate": 1.1827378987859144e-05, "loss": 0.7132, "step": 5661 }, { "epoch": 0.42843630585297565, "grad_norm": 1.8024096488952637, "learning_rate": 1.1825240706804489e-05, "loss": 0.5364, "step": 5662 }, { "epoch": 0.4285119745753093, "grad_norm": 2.2247347831726074, "learning_rate": 1.1823102248824128e-05, "loss": 0.7529, "step": 5663 }, { "epoch": 0.42858764329764293, "grad_norm": 1.928809404373169, "learning_rate": 1.182096361405197e-05, "loss": 0.7429, "step": 5664 }, { "epoch": 0.42866331201997654, "grad_norm": 1.948986530303955, "learning_rate": 1.181882480262195e-05, "loss": 0.7047, "step": 5665 }, { "epoch": 0.42873898074231015, "grad_norm": 2.77934193611145, "learning_rate": 1.1816685814668e-05, "loss": 0.8807, "step": 5666 }, { "epoch": 0.4288146494646438, "grad_norm": 2.046111583709717, "learning_rate": 1.1814546650324078e-05, "loss": 0.8026, "step": 5667 }, { "epoch": 0.4288903181869774, "grad_norm": 1.4681226015090942, "learning_rate": 1.181240730972414e-05, "loss": 0.8824, "step": 5668 }, { "epoch": 0.42896598690931104, "grad_norm": 1.7636147737503052, "learning_rate": 1.1810267793002158e-05, "loss": 0.7005, "step": 5669 }, { "epoch": 0.42904165563164465, "grad_norm": 2.328195571899414, "learning_rate": 1.180812810029212e-05, "loss": 0.7529, "step": 5670 }, { "epoch": 0.42911732435397826, "grad_norm": 2.7056803703308105, "learning_rate": 1.1805988231728015e-05, "loss": 0.7472, "step": 5671 }, { "epoch": 0.4291929930763119, "grad_norm": 2.115111827850342, "learning_rate": 1.1803848187443853e-05, "loss": 0.8469, "step": 5672 }, { "epoch": 0.42926866179864553, "grad_norm": 1.9240214824676514, "learning_rate": 1.1801707967573647e-05, "loss": 0.7624, "step": 5673 }, { "epoch": 0.42934433052097914, "grad_norm": 1.9669137001037598, "learning_rate": 1.179956757225143e-05, "loss": 0.69, "step": 5674 }, { "epoch": 0.42941999924331276, "grad_norm": 1.713476300239563, "learning_rate": 1.1797427001611232e-05, "loss": 0.7508, "step": 5675 }, { "epoch": 0.4294956679656464, "grad_norm": 2.3911690711975098, "learning_rate": 1.179528625578711e-05, "loss": 0.6873, "step": 5676 }, { "epoch": 0.42957133668798003, "grad_norm": 2.203371047973633, "learning_rate": 1.1793145334913121e-05, "loss": 0.7431, "step": 5677 }, { "epoch": 0.42964700541031364, "grad_norm": 1.975543737411499, "learning_rate": 1.1791004239123336e-05, "loss": 0.7112, "step": 5678 }, { "epoch": 0.42972267413264725, "grad_norm": 1.8592123985290527, "learning_rate": 1.1788862968551842e-05, "loss": 0.6954, "step": 5679 }, { "epoch": 0.4297983428549809, "grad_norm": 2.5964951515197754, "learning_rate": 1.1786721523332723e-05, "loss": 0.6297, "step": 5680 }, { "epoch": 0.42987401157731453, "grad_norm": 2.0938498973846436, "learning_rate": 1.1784579903600093e-05, "loss": 0.6312, "step": 5681 }, { "epoch": 0.42994968029964814, "grad_norm": 1.6955291032791138, "learning_rate": 1.1782438109488063e-05, "loss": 0.7806, "step": 5682 }, { "epoch": 0.43002534902198175, "grad_norm": 2.2497923374176025, "learning_rate": 1.1780296141130756e-05, "loss": 0.7267, "step": 5683 }, { "epoch": 0.43010101774431536, "grad_norm": 1.6915230751037598, "learning_rate": 1.1778153998662316e-05, "loss": 0.708, "step": 5684 }, { "epoch": 0.430176686466649, "grad_norm": 1.8355711698532104, "learning_rate": 1.1776011682216882e-05, "loss": 0.6188, "step": 5685 }, { "epoch": 0.43025235518898264, "grad_norm": 1.9086350202560425, "learning_rate": 1.1773869191928624e-05, "loss": 0.6782, "step": 5686 }, { "epoch": 0.43032802391131625, "grad_norm": 2.346781015396118, "learning_rate": 1.17717265279317e-05, "loss": 0.694, "step": 5687 }, { "epoch": 0.43040369263364986, "grad_norm": 2.6471588611602783, "learning_rate": 1.17695836903603e-05, "loss": 0.6686, "step": 5688 }, { "epoch": 0.4304793613559835, "grad_norm": 2.062077045440674, "learning_rate": 1.1767440679348607e-05, "loss": 0.7097, "step": 5689 }, { "epoch": 0.43055503007831714, "grad_norm": 2.0753626823425293, "learning_rate": 1.1765297495030831e-05, "loss": 0.6988, "step": 5690 }, { "epoch": 0.43063069880065075, "grad_norm": 2.3270702362060547, "learning_rate": 1.1763154137541183e-05, "loss": 0.7583, "step": 5691 }, { "epoch": 0.43070636752298436, "grad_norm": 1.7685538530349731, "learning_rate": 1.1761010607013883e-05, "loss": 0.6572, "step": 5692 }, { "epoch": 0.430782036245318, "grad_norm": 1.7218018770217896, "learning_rate": 1.175886690358317e-05, "loss": 0.5876, "step": 5693 }, { "epoch": 0.43085770496765163, "grad_norm": 1.9361426830291748, "learning_rate": 1.1756723027383286e-05, "loss": 0.6186, "step": 5694 }, { "epoch": 0.43093337368998524, "grad_norm": 2.054652214050293, "learning_rate": 1.1754578978548493e-05, "loss": 0.7866, "step": 5695 }, { "epoch": 0.43100904241231885, "grad_norm": 3.165121555328369, "learning_rate": 1.1752434757213053e-05, "loss": 0.6932, "step": 5696 }, { "epoch": 0.43108471113465247, "grad_norm": 1.8090955018997192, "learning_rate": 1.1750290363511248e-05, "loss": 0.7486, "step": 5697 }, { "epoch": 0.43116037985698613, "grad_norm": 2.016489028930664, "learning_rate": 1.1748145797577363e-05, "loss": 0.6689, "step": 5698 }, { "epoch": 0.43123604857931974, "grad_norm": 2.5373289585113525, "learning_rate": 1.17460010595457e-05, "loss": 0.8227, "step": 5699 }, { "epoch": 0.43131171730165335, "grad_norm": 1.8351738452911377, "learning_rate": 1.1743856149550568e-05, "loss": 0.6326, "step": 5700 }, { "epoch": 0.43138738602398696, "grad_norm": 2.076626777648926, "learning_rate": 1.174171106772629e-05, "loss": 0.8595, "step": 5701 }, { "epoch": 0.43146305474632063, "grad_norm": 2.6194770336151123, "learning_rate": 1.1739565814207198e-05, "loss": 0.8026, "step": 5702 }, { "epoch": 0.43153872346865424, "grad_norm": 2.0482687950134277, "learning_rate": 1.173742038912763e-05, "loss": 0.6438, "step": 5703 }, { "epoch": 0.43161439219098785, "grad_norm": 3.11622953414917, "learning_rate": 1.173527479262195e-05, "loss": 0.7928, "step": 5704 }, { "epoch": 0.43169006091332146, "grad_norm": 1.6607571840286255, "learning_rate": 1.1733129024824512e-05, "loss": 0.6947, "step": 5705 }, { "epoch": 0.4317657296356551, "grad_norm": 2.05531907081604, "learning_rate": 1.1730983085869693e-05, "loss": 0.7901, "step": 5706 }, { "epoch": 0.43184139835798874, "grad_norm": 2.3457841873168945, "learning_rate": 1.172883697589188e-05, "loss": 0.7528, "step": 5707 }, { "epoch": 0.43191706708032235, "grad_norm": 2.2732810974121094, "learning_rate": 1.1726690695025472e-05, "loss": 0.6971, "step": 5708 }, { "epoch": 0.43199273580265596, "grad_norm": 1.925724744796753, "learning_rate": 1.1724544243404873e-05, "loss": 0.853, "step": 5709 }, { "epoch": 0.43206840452498957, "grad_norm": 1.8667192459106445, "learning_rate": 1.1722397621164502e-05, "loss": 0.5859, "step": 5710 }, { "epoch": 0.43214407324732323, "grad_norm": 2.398282289505005, "learning_rate": 1.1720250828438785e-05, "loss": 0.7186, "step": 5711 }, { "epoch": 0.43221974196965685, "grad_norm": 2.3136038780212402, "learning_rate": 1.1718103865362161e-05, "loss": 0.849, "step": 5712 }, { "epoch": 0.43229541069199046, "grad_norm": 1.7996965646743774, "learning_rate": 1.1715956732069083e-05, "loss": 0.7615, "step": 5713 }, { "epoch": 0.43237107941432407, "grad_norm": 1.8926506042480469, "learning_rate": 1.171380942869401e-05, "loss": 0.6777, "step": 5714 }, { "epoch": 0.43244674813665773, "grad_norm": 1.8787177801132202, "learning_rate": 1.1711661955371416e-05, "loss": 0.7415, "step": 5715 }, { "epoch": 0.43252241685899134, "grad_norm": 1.7595826387405396, "learning_rate": 1.1709514312235777e-05, "loss": 0.756, "step": 5716 }, { "epoch": 0.43259808558132495, "grad_norm": 1.8866539001464844, "learning_rate": 1.1707366499421589e-05, "loss": 0.7147, "step": 5717 }, { "epoch": 0.43267375430365856, "grad_norm": 1.6653908491134644, "learning_rate": 1.1705218517063353e-05, "loss": 0.676, "step": 5718 }, { "epoch": 0.43274942302599223, "grad_norm": 2.470182180404663, "learning_rate": 1.1703070365295584e-05, "loss": 0.6446, "step": 5719 }, { "epoch": 0.43282509174832584, "grad_norm": 2.4232730865478516, "learning_rate": 1.1700922044252808e-05, "loss": 0.7754, "step": 5720 }, { "epoch": 0.43290076047065945, "grad_norm": 1.9250999689102173, "learning_rate": 1.1698773554069555e-05, "loss": 0.5243, "step": 5721 }, { "epoch": 0.43297642919299306, "grad_norm": 2.12267804145813, "learning_rate": 1.1696624894880376e-05, "loss": 0.6571, "step": 5722 }, { "epoch": 0.4330520979153267, "grad_norm": 1.7465804815292358, "learning_rate": 1.1694476066819821e-05, "loss": 0.6991, "step": 5723 }, { "epoch": 0.43312776663766034, "grad_norm": 3.37947154045105, "learning_rate": 1.1692327070022462e-05, "loss": 0.7811, "step": 5724 }, { "epoch": 0.43320343535999395, "grad_norm": 2.1732442378997803, "learning_rate": 1.1690177904622874e-05, "loss": 0.7455, "step": 5725 }, { "epoch": 0.43327910408232756, "grad_norm": 1.863910436630249, "learning_rate": 1.1688028570755642e-05, "loss": 0.8409, "step": 5726 }, { "epoch": 0.43335477280466117, "grad_norm": 1.823136806488037, "learning_rate": 1.1685879068555369e-05, "loss": 0.6706, "step": 5727 }, { "epoch": 0.43343044152699484, "grad_norm": 1.8565260171890259, "learning_rate": 1.168372939815666e-05, "loss": 0.6175, "step": 5728 }, { "epoch": 0.43350611024932845, "grad_norm": 2.070004463195801, "learning_rate": 1.1681579559694136e-05, "loss": 0.7205, "step": 5729 }, { "epoch": 0.43358177897166206, "grad_norm": 2.2450389862060547, "learning_rate": 1.167942955330243e-05, "loss": 0.7572, "step": 5730 }, { "epoch": 0.43365744769399567, "grad_norm": 1.7799854278564453, "learning_rate": 1.1677279379116174e-05, "loss": 0.7553, "step": 5731 }, { "epoch": 0.43373311641632933, "grad_norm": 2.37202787399292, "learning_rate": 1.1675129037270028e-05, "loss": 0.637, "step": 5732 }, { "epoch": 0.43380878513866294, "grad_norm": 2.0018792152404785, "learning_rate": 1.1672978527898647e-05, "loss": 0.7219, "step": 5733 }, { "epoch": 0.43388445386099656, "grad_norm": 2.104686975479126, "learning_rate": 1.1670827851136704e-05, "loss": 0.7433, "step": 5734 }, { "epoch": 0.43396012258333017, "grad_norm": 2.32853102684021, "learning_rate": 1.1668677007118884e-05, "loss": 0.7045, "step": 5735 }, { "epoch": 0.4340357913056638, "grad_norm": 1.7697525024414062, "learning_rate": 1.166652599597988e-05, "loss": 0.7692, "step": 5736 }, { "epoch": 0.43411146002799744, "grad_norm": 2.1800074577331543, "learning_rate": 1.166437481785439e-05, "loss": 0.7022, "step": 5737 }, { "epoch": 0.43418712875033105, "grad_norm": 2.2426414489746094, "learning_rate": 1.1662223472877135e-05, "loss": 0.7946, "step": 5738 }, { "epoch": 0.43426279747266466, "grad_norm": 2.2983815670013428, "learning_rate": 1.1660071961182834e-05, "loss": 0.7674, "step": 5739 }, { "epoch": 0.4343384661949983, "grad_norm": 2.3444814682006836, "learning_rate": 1.1657920282906221e-05, "loss": 0.6454, "step": 5740 }, { "epoch": 0.43441413491733194, "grad_norm": 2.3554506301879883, "learning_rate": 1.1655768438182046e-05, "loss": 0.795, "step": 5741 }, { "epoch": 0.43448980363966555, "grad_norm": 2.302736520767212, "learning_rate": 1.1653616427145061e-05, "loss": 0.7287, "step": 5742 }, { "epoch": 0.43456547236199916, "grad_norm": 1.9690250158309937, "learning_rate": 1.1651464249930032e-05, "loss": 0.662, "step": 5743 }, { "epoch": 0.43464114108433277, "grad_norm": 1.9559441804885864, "learning_rate": 1.1649311906671735e-05, "loss": 0.8327, "step": 5744 }, { "epoch": 0.43471680980666644, "grad_norm": 1.8690423965454102, "learning_rate": 1.1647159397504958e-05, "loss": 0.5716, "step": 5745 }, { "epoch": 0.43479247852900005, "grad_norm": 1.6188991069793701, "learning_rate": 1.1645006722564499e-05, "loss": 0.6871, "step": 5746 }, { "epoch": 0.43486814725133366, "grad_norm": 2.0028879642486572, "learning_rate": 1.1642853881985162e-05, "loss": 0.7658, "step": 5747 }, { "epoch": 0.43494381597366727, "grad_norm": 1.7991002798080444, "learning_rate": 1.1640700875901768e-05, "loss": 0.6294, "step": 5748 }, { "epoch": 0.43501948469600094, "grad_norm": 1.7511948347091675, "learning_rate": 1.1638547704449142e-05, "loss": 0.6803, "step": 5749 }, { "epoch": 0.43509515341833455, "grad_norm": 2.2755792140960693, "learning_rate": 1.163639436776213e-05, "loss": 0.634, "step": 5750 }, { "epoch": 0.43517082214066816, "grad_norm": 1.9088243246078491, "learning_rate": 1.1634240865975571e-05, "loss": 0.6084, "step": 5751 }, { "epoch": 0.43524649086300177, "grad_norm": 1.6567586660385132, "learning_rate": 1.163208719922433e-05, "loss": 0.6281, "step": 5752 }, { "epoch": 0.4353221595853354, "grad_norm": 1.8782658576965332, "learning_rate": 1.1629933367643274e-05, "loss": 0.7178, "step": 5753 }, { "epoch": 0.43539782830766904, "grad_norm": 2.370513677597046, "learning_rate": 1.1627779371367286e-05, "loss": 0.7157, "step": 5754 }, { "epoch": 0.43547349703000265, "grad_norm": 2.0037007331848145, "learning_rate": 1.1625625210531255e-05, "loss": 0.7701, "step": 5755 }, { "epoch": 0.43554916575233626, "grad_norm": 1.9564191102981567, "learning_rate": 1.162347088527008e-05, "loss": 0.8741, "step": 5756 }, { "epoch": 0.4356248344746699, "grad_norm": 2.218358039855957, "learning_rate": 1.1621316395718674e-05, "loss": 0.7502, "step": 5757 }, { "epoch": 0.43570050319700354, "grad_norm": 2.1146786212921143, "learning_rate": 1.1619161742011953e-05, "loss": 0.7886, "step": 5758 }, { "epoch": 0.43577617191933715, "grad_norm": 1.9309403896331787, "learning_rate": 1.1617006924284856e-05, "loss": 0.7796, "step": 5759 }, { "epoch": 0.43585184064167076, "grad_norm": 2.1848180294036865, "learning_rate": 1.1614851942672319e-05, "loss": 0.8031, "step": 5760 }, { "epoch": 0.4359275093640044, "grad_norm": 2.1754567623138428, "learning_rate": 1.1612696797309298e-05, "loss": 0.7759, "step": 5761 }, { "epoch": 0.43600317808633804, "grad_norm": 1.7848371267318726, "learning_rate": 1.1610541488330753e-05, "loss": 0.7262, "step": 5762 }, { "epoch": 0.43607884680867165, "grad_norm": 2.1866798400878906, "learning_rate": 1.1608386015871655e-05, "loss": 0.7978, "step": 5763 }, { "epoch": 0.43615451553100526, "grad_norm": 1.765702486038208, "learning_rate": 1.1606230380066988e-05, "loss": 0.7182, "step": 5764 }, { "epoch": 0.43623018425333887, "grad_norm": 2.1728196144104004, "learning_rate": 1.1604074581051746e-05, "loss": 0.748, "step": 5765 }, { "epoch": 0.4363058529756725, "grad_norm": 2.886596441268921, "learning_rate": 1.1601918618960933e-05, "loss": 0.8474, "step": 5766 }, { "epoch": 0.43638152169800615, "grad_norm": 2.492180824279785, "learning_rate": 1.1599762493929555e-05, "loss": 0.7185, "step": 5767 }, { "epoch": 0.43645719042033976, "grad_norm": 2.229836940765381, "learning_rate": 1.1597606206092645e-05, "loss": 0.7957, "step": 5768 }, { "epoch": 0.43653285914267337, "grad_norm": 1.8042664527893066, "learning_rate": 1.1595449755585232e-05, "loss": 0.7395, "step": 5769 }, { "epoch": 0.436608527865007, "grad_norm": 1.6901711225509644, "learning_rate": 1.159329314254236e-05, "loss": 0.6354, "step": 5770 }, { "epoch": 0.43668419658734065, "grad_norm": 1.7246809005737305, "learning_rate": 1.1591136367099087e-05, "loss": 0.7282, "step": 5771 }, { "epoch": 0.43675986530967426, "grad_norm": 1.6390856504440308, "learning_rate": 1.1588979429390467e-05, "loss": 0.7898, "step": 5772 }, { "epoch": 0.43683553403200787, "grad_norm": 2.199267625808716, "learning_rate": 1.1586822329551588e-05, "loss": 0.8082, "step": 5773 }, { "epoch": 0.4369112027543415, "grad_norm": 2.898261070251465, "learning_rate": 1.1584665067717527e-05, "loss": 0.6785, "step": 5774 }, { "epoch": 0.43698687147667514, "grad_norm": 2.123633623123169, "learning_rate": 1.1582507644023377e-05, "loss": 0.7712, "step": 5775 }, { "epoch": 0.43706254019900875, "grad_norm": 2.2638285160064697, "learning_rate": 1.1580350058604246e-05, "loss": 0.7443, "step": 5776 }, { "epoch": 0.43713820892134236, "grad_norm": 1.94474458694458, "learning_rate": 1.1578192311595247e-05, "loss": 0.7249, "step": 5777 }, { "epoch": 0.437213877643676, "grad_norm": 2.0058271884918213, "learning_rate": 1.1576034403131511e-05, "loss": 0.5709, "step": 5778 }, { "epoch": 0.4372895463660096, "grad_norm": 2.0627119541168213, "learning_rate": 1.1573876333348165e-05, "loss": 0.6974, "step": 5779 }, { "epoch": 0.43736521508834325, "grad_norm": 2.220038652420044, "learning_rate": 1.157171810238036e-05, "loss": 0.8202, "step": 5780 }, { "epoch": 0.43744088381067686, "grad_norm": 1.8286783695220947, "learning_rate": 1.1569559710363249e-05, "loss": 0.6777, "step": 5781 }, { "epoch": 0.43751655253301047, "grad_norm": 2.0850884914398193, "learning_rate": 1.1567401157431998e-05, "loss": 0.6338, "step": 5782 }, { "epoch": 0.4375922212553441, "grad_norm": 2.0958290100097656, "learning_rate": 1.1565242443721783e-05, "loss": 0.7985, "step": 5783 }, { "epoch": 0.43766788997767775, "grad_norm": 2.5265495777130127, "learning_rate": 1.156308356936779e-05, "loss": 0.7152, "step": 5784 }, { "epoch": 0.43774355870001136, "grad_norm": 1.9527240991592407, "learning_rate": 1.1560924534505212e-05, "loss": 0.7323, "step": 5785 }, { "epoch": 0.43781922742234497, "grad_norm": 2.079576253890991, "learning_rate": 1.1558765339269255e-05, "loss": 0.7322, "step": 5786 }, { "epoch": 0.4378948961446786, "grad_norm": 2.6876163482666016, "learning_rate": 1.1556605983795142e-05, "loss": 0.7538, "step": 5787 }, { "epoch": 0.43797056486701225, "grad_norm": 2.0275208950042725, "learning_rate": 1.1554446468218087e-05, "loss": 0.7103, "step": 5788 }, { "epoch": 0.43804623358934586, "grad_norm": 2.2174196243286133, "learning_rate": 1.1552286792673335e-05, "loss": 0.7053, "step": 5789 }, { "epoch": 0.43812190231167947, "grad_norm": 2.1054906845092773, "learning_rate": 1.1550126957296128e-05, "loss": 0.7026, "step": 5790 }, { "epoch": 0.4381975710340131, "grad_norm": 2.7534801959991455, "learning_rate": 1.1547966962221726e-05, "loss": 0.7907, "step": 5791 }, { "epoch": 0.4382732397563467, "grad_norm": 2.1715431213378906, "learning_rate": 1.154580680758539e-05, "loss": 0.7555, "step": 5792 }, { "epoch": 0.43834890847868035, "grad_norm": 2.3663253784179688, "learning_rate": 1.1543646493522395e-05, "loss": 0.7672, "step": 5793 }, { "epoch": 0.43842457720101397, "grad_norm": 2.317469358444214, "learning_rate": 1.1541486020168034e-05, "loss": 0.8387, "step": 5794 }, { "epoch": 0.4385002459233476, "grad_norm": 2.1923418045043945, "learning_rate": 1.1539325387657593e-05, "loss": 0.774, "step": 5795 }, { "epoch": 0.4385759146456812, "grad_norm": 2.3220553398132324, "learning_rate": 1.1537164596126386e-05, "loss": 0.7811, "step": 5796 }, { "epoch": 0.43865158336801485, "grad_norm": 2.2181203365325928, "learning_rate": 1.1535003645709725e-05, "loss": 0.616, "step": 5797 }, { "epoch": 0.43872725209034846, "grad_norm": 2.3010993003845215, "learning_rate": 1.1532842536542936e-05, "loss": 0.615, "step": 5798 }, { "epoch": 0.4388029208126821, "grad_norm": 2.1476845741271973, "learning_rate": 1.1530681268761356e-05, "loss": 0.8901, "step": 5799 }, { "epoch": 0.4388785895350157, "grad_norm": 1.7837895154953003, "learning_rate": 1.1528519842500328e-05, "loss": 0.8305, "step": 5800 }, { "epoch": 0.43895425825734935, "grad_norm": 2.1396026611328125, "learning_rate": 1.1526358257895216e-05, "loss": 0.8109, "step": 5801 }, { "epoch": 0.43902992697968296, "grad_norm": 2.0845377445220947, "learning_rate": 1.1524196515081372e-05, "loss": 0.8193, "step": 5802 }, { "epoch": 0.43910559570201657, "grad_norm": 1.9465970993041992, "learning_rate": 1.1522034614194178e-05, "loss": 0.7111, "step": 5803 }, { "epoch": 0.4391812644243502, "grad_norm": 1.8505274057388306, "learning_rate": 1.1519872555369022e-05, "loss": 0.7731, "step": 5804 }, { "epoch": 0.4392569331466838, "grad_norm": 2.0143327713012695, "learning_rate": 1.1517710338741297e-05, "loss": 0.5693, "step": 5805 }, { "epoch": 0.43933260186901746, "grad_norm": 2.1907799243927, "learning_rate": 1.1515547964446403e-05, "loss": 0.7013, "step": 5806 }, { "epoch": 0.43940827059135107, "grad_norm": 2.1983025074005127, "learning_rate": 1.1513385432619763e-05, "loss": 0.8154, "step": 5807 }, { "epoch": 0.4394839393136847, "grad_norm": 1.9378697872161865, "learning_rate": 1.1511222743396797e-05, "loss": 0.6557, "step": 5808 }, { "epoch": 0.4395596080360183, "grad_norm": 1.7255088090896606, "learning_rate": 1.150905989691294e-05, "loss": 0.6812, "step": 5809 }, { "epoch": 0.43963527675835196, "grad_norm": 1.7912418842315674, "learning_rate": 1.1506896893303637e-05, "loss": 0.642, "step": 5810 }, { "epoch": 0.43971094548068557, "grad_norm": 2.0049731731414795, "learning_rate": 1.1504733732704342e-05, "loss": 0.7161, "step": 5811 }, { "epoch": 0.4397866142030192, "grad_norm": 1.776609182357788, "learning_rate": 1.1502570415250522e-05, "loss": 0.6908, "step": 5812 }, { "epoch": 0.4398622829253528, "grad_norm": 1.9277169704437256, "learning_rate": 1.1500406941077642e-05, "loss": 0.7799, "step": 5813 }, { "epoch": 0.43993795164768645, "grad_norm": 1.8909525871276855, "learning_rate": 1.1498243310321198e-05, "loss": 0.7749, "step": 5814 }, { "epoch": 0.44001362037002006, "grad_norm": 2.2037034034729004, "learning_rate": 1.1496079523116677e-05, "loss": 0.9047, "step": 5815 }, { "epoch": 0.4400892890923537, "grad_norm": 1.9161611795425415, "learning_rate": 1.1493915579599582e-05, "loss": 0.6534, "step": 5816 }, { "epoch": 0.4401649578146873, "grad_norm": 2.0024547576904297, "learning_rate": 1.1491751479905425e-05, "loss": 0.7846, "step": 5817 }, { "epoch": 0.4402406265370209, "grad_norm": 1.8573756217956543, "learning_rate": 1.1489587224169733e-05, "loss": 0.7107, "step": 5818 }, { "epoch": 0.44031629525935456, "grad_norm": 2.4334030151367188, "learning_rate": 1.1487422812528037e-05, "loss": 0.5948, "step": 5819 }, { "epoch": 0.4403919639816882, "grad_norm": 1.6953381299972534, "learning_rate": 1.1485258245115878e-05, "loss": 0.761, "step": 5820 }, { "epoch": 0.4404676327040218, "grad_norm": 1.992057204246521, "learning_rate": 1.148309352206881e-05, "loss": 0.6255, "step": 5821 }, { "epoch": 0.4405433014263554, "grad_norm": 1.9691619873046875, "learning_rate": 1.1480928643522396e-05, "loss": 0.6193, "step": 5822 }, { "epoch": 0.44061897014868906, "grad_norm": 2.430366277694702, "learning_rate": 1.1478763609612204e-05, "loss": 0.7825, "step": 5823 }, { "epoch": 0.44069463887102267, "grad_norm": 1.9845337867736816, "learning_rate": 1.1476598420473817e-05, "loss": 0.5983, "step": 5824 }, { "epoch": 0.4407703075933563, "grad_norm": 1.902275800704956, "learning_rate": 1.147443307624283e-05, "loss": 0.7193, "step": 5825 }, { "epoch": 0.4408459763156899, "grad_norm": 2.0569605827331543, "learning_rate": 1.1472267577054838e-05, "loss": 0.8241, "step": 5826 }, { "epoch": 0.44092164503802356, "grad_norm": 1.7399667501449585, "learning_rate": 1.1470101923045453e-05, "loss": 0.62, "step": 5827 }, { "epoch": 0.44099731376035717, "grad_norm": 2.0543341636657715, "learning_rate": 1.14679361143503e-05, "loss": 0.6139, "step": 5828 }, { "epoch": 0.4410729824826908, "grad_norm": 2.1195523738861084, "learning_rate": 1.1465770151105e-05, "loss": 0.7834, "step": 5829 }, { "epoch": 0.4411486512050244, "grad_norm": 2.0110557079315186, "learning_rate": 1.1463604033445203e-05, "loss": 0.5876, "step": 5830 }, { "epoch": 0.441224319927358, "grad_norm": 1.9073213338851929, "learning_rate": 1.1461437761506548e-05, "loss": 0.7183, "step": 5831 }, { "epoch": 0.44129998864969167, "grad_norm": 1.7596300840377808, "learning_rate": 1.1459271335424703e-05, "loss": 0.7434, "step": 5832 }, { "epoch": 0.4413756573720253, "grad_norm": 2.102022647857666, "learning_rate": 1.1457104755335332e-05, "loss": 0.7105, "step": 5833 }, { "epoch": 0.4414513260943589, "grad_norm": 1.682158350944519, "learning_rate": 1.1454938021374112e-05, "loss": 0.6027, "step": 5834 }, { "epoch": 0.4415269948166925, "grad_norm": 2.463780641555786, "learning_rate": 1.1452771133676736e-05, "loss": 0.7158, "step": 5835 }, { "epoch": 0.44160266353902616, "grad_norm": 1.8532681465148926, "learning_rate": 1.1450604092378895e-05, "loss": 0.7358, "step": 5836 }, { "epoch": 0.4416783322613598, "grad_norm": 2.06534743309021, "learning_rate": 1.1448436897616304e-05, "loss": 0.6122, "step": 5837 }, { "epoch": 0.4417540009836934, "grad_norm": 1.474158763885498, "learning_rate": 1.144626954952467e-05, "loss": 0.869, "step": 5838 }, { "epoch": 0.441829669706027, "grad_norm": 2.5886144638061523, "learning_rate": 1.1444102048239729e-05, "loss": 0.8578, "step": 5839 }, { "epoch": 0.44190533842836066, "grad_norm": 2.0663976669311523, "learning_rate": 1.1441934393897208e-05, "loss": 0.6776, "step": 5840 }, { "epoch": 0.44198100715069427, "grad_norm": 3.3373160362243652, "learning_rate": 1.1439766586632861e-05, "loss": 0.6511, "step": 5841 }, { "epoch": 0.4420566758730279, "grad_norm": 1.776739239692688, "learning_rate": 1.1437598626582438e-05, "loss": 0.732, "step": 5842 }, { "epoch": 0.4421323445953615, "grad_norm": 1.9327268600463867, "learning_rate": 1.1435430513881705e-05, "loss": 0.7917, "step": 5843 }, { "epoch": 0.4422080133176951, "grad_norm": 2.4494521617889404, "learning_rate": 1.1433262248666438e-05, "loss": 0.7542, "step": 5844 }, { "epoch": 0.44228368204002877, "grad_norm": 2.140435218811035, "learning_rate": 1.1431093831072414e-05, "loss": 0.5638, "step": 5845 }, { "epoch": 0.4423593507623624, "grad_norm": 2.0090110301971436, "learning_rate": 1.1428925261235437e-05, "loss": 0.6552, "step": 5846 }, { "epoch": 0.442435019484696, "grad_norm": 2.2307565212249756, "learning_rate": 1.14267565392913e-05, "loss": 0.9066, "step": 5847 }, { "epoch": 0.4425106882070296, "grad_norm": 1.7167367935180664, "learning_rate": 1.142458766537582e-05, "loss": 0.7067, "step": 5848 }, { "epoch": 0.44258635692936327, "grad_norm": 1.934412956237793, "learning_rate": 1.1422418639624818e-05, "loss": 0.641, "step": 5849 }, { "epoch": 0.4426620256516969, "grad_norm": 1.84349524974823, "learning_rate": 1.142024946217413e-05, "loss": 0.8343, "step": 5850 }, { "epoch": 0.4427376943740305, "grad_norm": 1.82757568359375, "learning_rate": 1.1418080133159588e-05, "loss": 0.7165, "step": 5851 }, { "epoch": 0.4428133630963641, "grad_norm": 1.902925968170166, "learning_rate": 1.1415910652717046e-05, "loss": 0.7806, "step": 5852 }, { "epoch": 0.44288903181869776, "grad_norm": 2.100587844848633, "learning_rate": 1.1413741020982369e-05, "loss": 0.7068, "step": 5853 }, { "epoch": 0.4429647005410314, "grad_norm": 2.090022325515747, "learning_rate": 1.1411571238091419e-05, "loss": 0.6824, "step": 5854 }, { "epoch": 0.443040369263365, "grad_norm": 1.6007441282272339, "learning_rate": 1.1409401304180081e-05, "loss": 0.7933, "step": 5855 }, { "epoch": 0.4431160379856986, "grad_norm": 2.1241252422332764, "learning_rate": 1.1407231219384238e-05, "loss": 0.7396, "step": 5856 }, { "epoch": 0.44319170670803226, "grad_norm": 2.4907209873199463, "learning_rate": 1.140506098383979e-05, "loss": 0.8325, "step": 5857 }, { "epoch": 0.4432673754303659, "grad_norm": 1.7543824911117554, "learning_rate": 1.1402890597682648e-05, "loss": 0.6119, "step": 5858 }, { "epoch": 0.4433430441526995, "grad_norm": 2.4518723487854004, "learning_rate": 1.1400720061048718e-05, "loss": 0.8612, "step": 5859 }, { "epoch": 0.4434187128750331, "grad_norm": 2.1455647945404053, "learning_rate": 1.139854937407394e-05, "loss": 0.7573, "step": 5860 }, { "epoch": 0.4434943815973667, "grad_norm": 2.118077039718628, "learning_rate": 1.1396378536894239e-05, "loss": 0.6258, "step": 5861 }, { "epoch": 0.44357005031970037, "grad_norm": 1.8771562576293945, "learning_rate": 1.1394207549645564e-05, "loss": 0.5765, "step": 5862 }, { "epoch": 0.443645719042034, "grad_norm": 2.192807912826538, "learning_rate": 1.1392036412463868e-05, "loss": 0.6963, "step": 5863 }, { "epoch": 0.4437213877643676, "grad_norm": 3.553529739379883, "learning_rate": 1.1389865125485116e-05, "loss": 0.6827, "step": 5864 }, { "epoch": 0.4437970564867012, "grad_norm": 3.7018728256225586, "learning_rate": 1.138769368884528e-05, "loss": 0.6063, "step": 5865 }, { "epoch": 0.44387272520903487, "grad_norm": 2.078188896179199, "learning_rate": 1.138552210268034e-05, "loss": 0.7483, "step": 5866 }, { "epoch": 0.4439483939313685, "grad_norm": 2.0784387588500977, "learning_rate": 1.1383350367126292e-05, "loss": 0.7824, "step": 5867 }, { "epoch": 0.4440240626537021, "grad_norm": 3.4652624130249023, "learning_rate": 1.1381178482319136e-05, "loss": 0.7509, "step": 5868 }, { "epoch": 0.4440997313760357, "grad_norm": 1.8125836849212646, "learning_rate": 1.1379006448394882e-05, "loss": 0.6492, "step": 5869 }, { "epoch": 0.44417540009836937, "grad_norm": 2.023577928543091, "learning_rate": 1.1376834265489545e-05, "loss": 0.6456, "step": 5870 }, { "epoch": 0.444251068820703, "grad_norm": 2.251408100128174, "learning_rate": 1.1374661933739165e-05, "loss": 0.7316, "step": 5871 }, { "epoch": 0.4443267375430366, "grad_norm": 1.5530261993408203, "learning_rate": 1.137248945327977e-05, "loss": 0.9224, "step": 5872 }, { "epoch": 0.4444024062653702, "grad_norm": 1.9940237998962402, "learning_rate": 1.1370316824247414e-05, "loss": 0.6529, "step": 5873 }, { "epoch": 0.4444780749877038, "grad_norm": 2.0414655208587646, "learning_rate": 1.1368144046778151e-05, "loss": 0.6643, "step": 5874 }, { "epoch": 0.4445537437100375, "grad_norm": 2.2049062252044678, "learning_rate": 1.1365971121008047e-05, "loss": 0.586, "step": 5875 }, { "epoch": 0.4446294124323711, "grad_norm": 3.8549551963806152, "learning_rate": 1.1363798047073183e-05, "loss": 0.9112, "step": 5876 }, { "epoch": 0.4447050811547047, "grad_norm": 2.402311325073242, "learning_rate": 1.1361624825109634e-05, "loss": 0.7245, "step": 5877 }, { "epoch": 0.4447807498770383, "grad_norm": 2.2628328800201416, "learning_rate": 1.1359451455253505e-05, "loss": 0.786, "step": 5878 }, { "epoch": 0.44485641859937197, "grad_norm": 2.2788891792297363, "learning_rate": 1.1357277937640893e-05, "loss": 0.751, "step": 5879 }, { "epoch": 0.4449320873217056, "grad_norm": 2.0168333053588867, "learning_rate": 1.135510427240791e-05, "loss": 0.7533, "step": 5880 }, { "epoch": 0.4450077560440392, "grad_norm": 1.9024062156677246, "learning_rate": 1.1352930459690684e-05, "loss": 0.677, "step": 5881 }, { "epoch": 0.4450834247663728, "grad_norm": 1.9839564561843872, "learning_rate": 1.135075649962534e-05, "loss": 0.7093, "step": 5882 }, { "epoch": 0.44515909348870647, "grad_norm": 2.0501761436462402, "learning_rate": 1.1348582392348022e-05, "loss": 0.733, "step": 5883 }, { "epoch": 0.4452347622110401, "grad_norm": 2.069188356399536, "learning_rate": 1.1346408137994876e-05, "loss": 0.7962, "step": 5884 }, { "epoch": 0.4453104309333737, "grad_norm": 2.593379497528076, "learning_rate": 1.1344233736702065e-05, "loss": 0.6942, "step": 5885 }, { "epoch": 0.4453860996557073, "grad_norm": 2.6586804389953613, "learning_rate": 1.1342059188605756e-05, "loss": 0.7377, "step": 5886 }, { "epoch": 0.4454617683780409, "grad_norm": 2.206529140472412, "learning_rate": 1.1339884493842124e-05, "loss": 0.6509, "step": 5887 }, { "epoch": 0.4455374371003746, "grad_norm": 2.4398305416107178, "learning_rate": 1.1337709652547357e-05, "loss": 0.825, "step": 5888 }, { "epoch": 0.4456131058227082, "grad_norm": 2.344985008239746, "learning_rate": 1.1335534664857651e-05, "loss": 0.675, "step": 5889 }, { "epoch": 0.4456887745450418, "grad_norm": 2.7695236206054688, "learning_rate": 1.1333359530909208e-05, "loss": 0.6979, "step": 5890 }, { "epoch": 0.4457644432673754, "grad_norm": 3.5418498516082764, "learning_rate": 1.1331184250838249e-05, "loss": 0.6195, "step": 5891 }, { "epoch": 0.4458401119897091, "grad_norm": 2.1728100776672363, "learning_rate": 1.132900882478099e-05, "loss": 0.7099, "step": 5892 }, { "epoch": 0.4459157807120427, "grad_norm": 2.0681023597717285, "learning_rate": 1.1326833252873663e-05, "loss": 0.7016, "step": 5893 }, { "epoch": 0.4459914494343763, "grad_norm": 2.0414974689483643, "learning_rate": 1.1324657535252514e-05, "loss": 0.6981, "step": 5894 }, { "epoch": 0.4460671181567099, "grad_norm": 2.562387228012085, "learning_rate": 1.1322481672053791e-05, "loss": 0.6492, "step": 5895 }, { "epoch": 0.4461427868790436, "grad_norm": 1.7492594718933105, "learning_rate": 1.1320305663413752e-05, "loss": 0.5471, "step": 5896 }, { "epoch": 0.4462184556013772, "grad_norm": 2.4081857204437256, "learning_rate": 1.1318129509468671e-05, "loss": 0.7666, "step": 5897 }, { "epoch": 0.4462941243237108, "grad_norm": 2.3385374546051025, "learning_rate": 1.1315953210354821e-05, "loss": 0.6716, "step": 5898 }, { "epoch": 0.4463697930460444, "grad_norm": 2.440551280975342, "learning_rate": 1.1313776766208492e-05, "loss": 0.9059, "step": 5899 }, { "epoch": 0.446445461768378, "grad_norm": 1.830227017402649, "learning_rate": 1.1311600177165972e-05, "loss": 0.6836, "step": 5900 }, { "epoch": 0.4465211304907117, "grad_norm": 1.9618531465530396, "learning_rate": 1.130942344336358e-05, "loss": 0.7531, "step": 5901 }, { "epoch": 0.4465967992130453, "grad_norm": 1.9825726747512817, "learning_rate": 1.1307246564937618e-05, "loss": 0.8805, "step": 5902 }, { "epoch": 0.4466724679353789, "grad_norm": 2.091987133026123, "learning_rate": 1.1305069542024414e-05, "loss": 0.7716, "step": 5903 }, { "epoch": 0.4467481366577125, "grad_norm": 1.93959641456604, "learning_rate": 1.1302892374760301e-05, "loss": 0.6985, "step": 5904 }, { "epoch": 0.4468238053800462, "grad_norm": 2.1887693405151367, "learning_rate": 1.130071506328162e-05, "loss": 0.7668, "step": 5905 }, { "epoch": 0.4468994741023798, "grad_norm": 1.8061445951461792, "learning_rate": 1.1298537607724716e-05, "loss": 0.6938, "step": 5906 }, { "epoch": 0.4469751428247134, "grad_norm": 1.748567819595337, "learning_rate": 1.1296360008225957e-05, "loss": 0.6903, "step": 5907 }, { "epoch": 0.447050811547047, "grad_norm": 1.7834432125091553, "learning_rate": 1.1294182264921704e-05, "loss": 0.6602, "step": 5908 }, { "epoch": 0.4471264802693807, "grad_norm": 1.9683499336242676, "learning_rate": 1.1292004377948338e-05, "loss": 0.7615, "step": 5909 }, { "epoch": 0.4472021489917143, "grad_norm": 2.5268006324768066, "learning_rate": 1.1289826347442247e-05, "loss": 0.6007, "step": 5910 }, { "epoch": 0.4472778177140479, "grad_norm": 2.608851671218872, "learning_rate": 1.1287648173539822e-05, "loss": 0.7841, "step": 5911 }, { "epoch": 0.4473534864363815, "grad_norm": 2.4634876251220703, "learning_rate": 1.128546985637747e-05, "loss": 0.6308, "step": 5912 }, { "epoch": 0.4474291551587151, "grad_norm": 1.7287302017211914, "learning_rate": 1.1283291396091601e-05, "loss": 0.6105, "step": 5913 }, { "epoch": 0.4475048238810488, "grad_norm": 1.982318639755249, "learning_rate": 1.1281112792818641e-05, "loss": 0.7053, "step": 5914 }, { "epoch": 0.4475804926033824, "grad_norm": 1.8996347188949585, "learning_rate": 1.1278934046695023e-05, "loss": 0.7603, "step": 5915 }, { "epoch": 0.447656161325716, "grad_norm": 2.161860942840576, "learning_rate": 1.1276755157857179e-05, "loss": 0.7217, "step": 5916 }, { "epoch": 0.4477318300480496, "grad_norm": 2.7637171745300293, "learning_rate": 1.1274576126441568e-05, "loss": 0.7831, "step": 5917 }, { "epoch": 0.4478074987703833, "grad_norm": 1.9695764780044556, "learning_rate": 1.127239695258464e-05, "loss": 0.6614, "step": 5918 }, { "epoch": 0.4478831674927169, "grad_norm": 2.0457887649536133, "learning_rate": 1.1270217636422864e-05, "loss": 0.7391, "step": 5919 }, { "epoch": 0.4479588362150505, "grad_norm": 1.87351393699646, "learning_rate": 1.1268038178092718e-05, "loss": 0.8303, "step": 5920 }, { "epoch": 0.4480345049373841, "grad_norm": 2.1492748260498047, "learning_rate": 1.1265858577730685e-05, "loss": 0.6984, "step": 5921 }, { "epoch": 0.4481101736597178, "grad_norm": 2.0137827396392822, "learning_rate": 1.1263678835473263e-05, "loss": 0.7522, "step": 5922 }, { "epoch": 0.4481858423820514, "grad_norm": 2.2012124061584473, "learning_rate": 1.1261498951456948e-05, "loss": 0.6075, "step": 5923 }, { "epoch": 0.448261511104385, "grad_norm": 2.0582940578460693, "learning_rate": 1.1259318925818253e-05, "loss": 0.6671, "step": 5924 }, { "epoch": 0.4483371798267186, "grad_norm": 2.405733823776245, "learning_rate": 1.1257138758693701e-05, "loss": 0.8391, "step": 5925 }, { "epoch": 0.4484128485490522, "grad_norm": 3.624671697616577, "learning_rate": 1.1254958450219817e-05, "loss": 0.6537, "step": 5926 }, { "epoch": 0.4484885172713859, "grad_norm": 2.217015504837036, "learning_rate": 1.1252778000533143e-05, "loss": 0.6828, "step": 5927 }, { "epoch": 0.4485641859937195, "grad_norm": 2.174923896789551, "learning_rate": 1.1250597409770225e-05, "loss": 0.7816, "step": 5928 }, { "epoch": 0.4486398547160531, "grad_norm": 1.7993848323822021, "learning_rate": 1.1248416678067619e-05, "loss": 0.6842, "step": 5929 }, { "epoch": 0.4487155234383867, "grad_norm": 1.861826777458191, "learning_rate": 1.1246235805561887e-05, "loss": 0.8071, "step": 5930 }, { "epoch": 0.4487911921607204, "grad_norm": 2.257115125656128, "learning_rate": 1.1244054792389602e-05, "loss": 0.7332, "step": 5931 }, { "epoch": 0.448866860883054, "grad_norm": 2.5872914791107178, "learning_rate": 1.1241873638687348e-05, "loss": 0.6017, "step": 5932 }, { "epoch": 0.4489425296053876, "grad_norm": 1.890411138534546, "learning_rate": 1.1239692344591719e-05, "loss": 0.6682, "step": 5933 }, { "epoch": 0.4490181983277212, "grad_norm": 3.7354846000671387, "learning_rate": 1.1237510910239306e-05, "loss": 0.7684, "step": 5934 }, { "epoch": 0.4490938670500549, "grad_norm": 1.9048963785171509, "learning_rate": 1.1235329335766728e-05, "loss": 0.524, "step": 5935 }, { "epoch": 0.4491695357723885, "grad_norm": 1.8189629316329956, "learning_rate": 1.1233147621310594e-05, "loss": 0.6492, "step": 5936 }, { "epoch": 0.4492452044947221, "grad_norm": 1.712294101715088, "learning_rate": 1.1230965767007535e-05, "loss": 0.7487, "step": 5937 }, { "epoch": 0.4493208732170557, "grad_norm": 2.2259769439697266, "learning_rate": 1.1228783772994184e-05, "loss": 0.7662, "step": 5938 }, { "epoch": 0.4493965419393893, "grad_norm": 3.1922950744628906, "learning_rate": 1.122660163940718e-05, "loss": 0.8065, "step": 5939 }, { "epoch": 0.449472210661723, "grad_norm": 2.1241049766540527, "learning_rate": 1.1224419366383186e-05, "loss": 0.6927, "step": 5940 }, { "epoch": 0.4495478793840566, "grad_norm": 2.3622326850891113, "learning_rate": 1.1222236954058853e-05, "loss": 0.8493, "step": 5941 }, { "epoch": 0.4496235481063902, "grad_norm": 3.5606555938720703, "learning_rate": 1.1220054402570854e-05, "loss": 0.773, "step": 5942 }, { "epoch": 0.4496992168287238, "grad_norm": 2.492074966430664, "learning_rate": 1.1217871712055869e-05, "loss": 0.6058, "step": 5943 }, { "epoch": 0.4497748855510575, "grad_norm": 2.104963779449463, "learning_rate": 1.1215688882650582e-05, "loss": 0.7597, "step": 5944 }, { "epoch": 0.4498505542733911, "grad_norm": 1.9802522659301758, "learning_rate": 1.1213505914491695e-05, "loss": 0.7904, "step": 5945 }, { "epoch": 0.4499262229957247, "grad_norm": 1.8964923620224, "learning_rate": 1.1211322807715906e-05, "loss": 0.7552, "step": 5946 }, { "epoch": 0.4500018917180583, "grad_norm": 3.3813583850860596, "learning_rate": 1.1209139562459929e-05, "loss": 0.6773, "step": 5947 }, { "epoch": 0.450077560440392, "grad_norm": 2.5931599140167236, "learning_rate": 1.120695617886049e-05, "loss": 0.6421, "step": 5948 }, { "epoch": 0.4501532291627256, "grad_norm": 2.148244857788086, "learning_rate": 1.1204772657054314e-05, "loss": 0.8242, "step": 5949 }, { "epoch": 0.4502288978850592, "grad_norm": 1.9248651266098022, "learning_rate": 1.1202588997178144e-05, "loss": 0.737, "step": 5950 }, { "epoch": 0.4503045666073928, "grad_norm": 2.1882691383361816, "learning_rate": 1.1200405199368729e-05, "loss": 0.641, "step": 5951 }, { "epoch": 0.45038023532972643, "grad_norm": 2.8311820030212402, "learning_rate": 1.119822126376282e-05, "loss": 0.6549, "step": 5952 }, { "epoch": 0.4504559040520601, "grad_norm": 2.2649013996124268, "learning_rate": 1.1196037190497188e-05, "loss": 0.7611, "step": 5953 }, { "epoch": 0.4505315727743937, "grad_norm": 1.7124543190002441, "learning_rate": 1.1193852979708604e-05, "loss": 0.7877, "step": 5954 }, { "epoch": 0.4506072414967273, "grad_norm": 2.419224739074707, "learning_rate": 1.119166863153385e-05, "loss": 0.7871, "step": 5955 }, { "epoch": 0.45068291021906093, "grad_norm": 2.265690565109253, "learning_rate": 1.1189484146109719e-05, "loss": 0.5847, "step": 5956 }, { "epoch": 0.4507585789413946, "grad_norm": 2.1658334732055664, "learning_rate": 1.1187299523573007e-05, "loss": 0.6962, "step": 5957 }, { "epoch": 0.4508342476637282, "grad_norm": 2.0252439975738525, "learning_rate": 1.1185114764060528e-05, "loss": 0.7378, "step": 5958 }, { "epoch": 0.4509099163860618, "grad_norm": 1.994943380355835, "learning_rate": 1.118292986770909e-05, "loss": 0.6885, "step": 5959 }, { "epoch": 0.4509855851083954, "grad_norm": 2.032151699066162, "learning_rate": 1.1180744834655526e-05, "loss": 0.7695, "step": 5960 }, { "epoch": 0.4510612538307291, "grad_norm": 1.8477638959884644, "learning_rate": 1.1178559665036666e-05, "loss": 0.8245, "step": 5961 }, { "epoch": 0.4511369225530627, "grad_norm": 1.867470145225525, "learning_rate": 1.1176374358989354e-05, "loss": 0.6492, "step": 5962 }, { "epoch": 0.4512125912753963, "grad_norm": 2.083955764770508, "learning_rate": 1.117418891665044e-05, "loss": 0.6438, "step": 5963 }, { "epoch": 0.4512882599977299, "grad_norm": 2.1489977836608887, "learning_rate": 1.1172003338156787e-05, "loss": 0.6843, "step": 5964 }, { "epoch": 0.45136392872006353, "grad_norm": 1.905900478363037, "learning_rate": 1.1169817623645256e-05, "loss": 0.6142, "step": 5965 }, { "epoch": 0.4514395974423972, "grad_norm": 2.060368537902832, "learning_rate": 1.116763177325273e-05, "loss": 0.76, "step": 5966 }, { "epoch": 0.4515152661647308, "grad_norm": 2.1221015453338623, "learning_rate": 1.1165445787116088e-05, "loss": 0.7409, "step": 5967 }, { "epoch": 0.4515909348870644, "grad_norm": 1.9896661043167114, "learning_rate": 1.116325966537223e-05, "loss": 0.865, "step": 5968 }, { "epoch": 0.45166660360939803, "grad_norm": 1.9330137968063354, "learning_rate": 1.1161073408158054e-05, "loss": 0.8041, "step": 5969 }, { "epoch": 0.4517422723317317, "grad_norm": 1.8360910415649414, "learning_rate": 1.115888701561047e-05, "loss": 0.7317, "step": 5970 }, { "epoch": 0.4518179410540653, "grad_norm": 2.239154815673828, "learning_rate": 1.11567004878664e-05, "loss": 0.765, "step": 5971 }, { "epoch": 0.4518936097763989, "grad_norm": 2.8562796115875244, "learning_rate": 1.115451382506277e-05, "loss": 0.9, "step": 5972 }, { "epoch": 0.45196927849873253, "grad_norm": 1.8659065961837769, "learning_rate": 1.1152327027336513e-05, "loss": 0.6336, "step": 5973 }, { "epoch": 0.4520449472210662, "grad_norm": 2.5955421924591064, "learning_rate": 1.1150140094824579e-05, "loss": 0.6623, "step": 5974 }, { "epoch": 0.4521206159433998, "grad_norm": 1.7861441373825073, "learning_rate": 1.1147953027663919e-05, "loss": 0.6716, "step": 5975 }, { "epoch": 0.4521962846657334, "grad_norm": 1.989698052406311, "learning_rate": 1.114576582599149e-05, "loss": 0.5853, "step": 5976 }, { "epoch": 0.452271953388067, "grad_norm": 1.9923795461654663, "learning_rate": 1.1143578489944266e-05, "loss": 0.7264, "step": 5977 }, { "epoch": 0.4523476221104007, "grad_norm": 2.052943229675293, "learning_rate": 1.1141391019659223e-05, "loss": 0.6532, "step": 5978 }, { "epoch": 0.4524232908327343, "grad_norm": 1.8937102556228638, "learning_rate": 1.113920341527335e-05, "loss": 0.6145, "step": 5979 }, { "epoch": 0.4524989595550679, "grad_norm": 1.7291990518569946, "learning_rate": 1.1137015676923637e-05, "loss": 0.7514, "step": 5980 }, { "epoch": 0.4525746282774015, "grad_norm": 1.8814363479614258, "learning_rate": 1.1134827804747093e-05, "loss": 0.7184, "step": 5981 }, { "epoch": 0.45265029699973514, "grad_norm": 2.006896495819092, "learning_rate": 1.1132639798880728e-05, "loss": 0.6344, "step": 5982 }, { "epoch": 0.4527259657220688, "grad_norm": 2.146019458770752, "learning_rate": 1.1130451659461559e-05, "loss": 0.6921, "step": 5983 }, { "epoch": 0.4528016344444024, "grad_norm": 10.404562950134277, "learning_rate": 1.1128263386626617e-05, "loss": 0.5599, "step": 5984 }, { "epoch": 0.452877303166736, "grad_norm": 1.8004459142684937, "learning_rate": 1.1126074980512936e-05, "loss": 0.6701, "step": 5985 }, { "epoch": 0.45295297188906963, "grad_norm": 2.264495611190796, "learning_rate": 1.1123886441257567e-05, "loss": 0.7605, "step": 5986 }, { "epoch": 0.4530286406114033, "grad_norm": 1.9421061277389526, "learning_rate": 1.1121697768997556e-05, "loss": 0.7667, "step": 5987 }, { "epoch": 0.4531043093337369, "grad_norm": 3.031816244125366, "learning_rate": 1.1119508963869971e-05, "loss": 0.5885, "step": 5988 }, { "epoch": 0.4531799780560705, "grad_norm": 3.6335830688476562, "learning_rate": 1.1117320026011878e-05, "loss": 0.6176, "step": 5989 }, { "epoch": 0.45325564677840413, "grad_norm": 2.454843282699585, "learning_rate": 1.1115130955560357e-05, "loss": 0.7809, "step": 5990 }, { "epoch": 0.4533313155007378, "grad_norm": 1.9949727058410645, "learning_rate": 1.1112941752652495e-05, "loss": 0.7147, "step": 5991 }, { "epoch": 0.4534069842230714, "grad_norm": 1.9766342639923096, "learning_rate": 1.1110752417425386e-05, "loss": 0.8628, "step": 5992 }, { "epoch": 0.453482652945405, "grad_norm": 2.25211763381958, "learning_rate": 1.1108562950016133e-05, "loss": 0.7544, "step": 5993 }, { "epoch": 0.45355832166773863, "grad_norm": 2.635415554046631, "learning_rate": 1.1106373350561848e-05, "loss": 0.8409, "step": 5994 }, { "epoch": 0.45363399039007224, "grad_norm": 2.761585235595703, "learning_rate": 1.110418361919965e-05, "loss": 0.8435, "step": 5995 }, { "epoch": 0.4537096591124059, "grad_norm": 1.7957862615585327, "learning_rate": 1.110199375606667e-05, "loss": 0.6987, "step": 5996 }, { "epoch": 0.4537853278347395, "grad_norm": 1.743152141571045, "learning_rate": 1.1099803761300043e-05, "loss": 0.5961, "step": 5997 }, { "epoch": 0.4538609965570731, "grad_norm": 1.9768725633621216, "learning_rate": 1.1097613635036912e-05, "loss": 0.731, "step": 5998 }, { "epoch": 0.45393666527940674, "grad_norm": 2.327970504760742, "learning_rate": 1.109542337741443e-05, "loss": 0.8248, "step": 5999 }, { "epoch": 0.4540123340017404, "grad_norm": 2.5516083240509033, "learning_rate": 1.1093232988569757e-05, "loss": 0.7137, "step": 6000 }, { "epoch": 0.454088002724074, "grad_norm": 2.588467836380005, "learning_rate": 1.1091042468640066e-05, "loss": 0.7983, "step": 6001 }, { "epoch": 0.4541636714464076, "grad_norm": 2.372370481491089, "learning_rate": 1.1088851817762537e-05, "loss": 0.7425, "step": 6002 }, { "epoch": 0.45423934016874123, "grad_norm": 2.482089042663574, "learning_rate": 1.1086661036074342e-05, "loss": 0.6915, "step": 6003 }, { "epoch": 0.4543150088910749, "grad_norm": 2.0456271171569824, "learning_rate": 1.108447012371269e-05, "loss": 0.6623, "step": 6004 }, { "epoch": 0.4543906776134085, "grad_norm": 2.8427894115448, "learning_rate": 1.1082279080814775e-05, "loss": 0.7134, "step": 6005 }, { "epoch": 0.4544663463357421, "grad_norm": 2.3383491039276123, "learning_rate": 1.1080087907517808e-05, "loss": 0.8108, "step": 6006 }, { "epoch": 0.45454201505807573, "grad_norm": 2.1955339908599854, "learning_rate": 1.107789660395901e-05, "loss": 0.6805, "step": 6007 }, { "epoch": 0.45461768378040934, "grad_norm": 2.1509621143341064, "learning_rate": 1.1075705170275605e-05, "loss": 0.7771, "step": 6008 }, { "epoch": 0.454693352502743, "grad_norm": 2.429506778717041, "learning_rate": 1.107351360660483e-05, "loss": 0.6084, "step": 6009 }, { "epoch": 0.4547690212250766, "grad_norm": 2.3158512115478516, "learning_rate": 1.1071321913083925e-05, "loss": 0.7337, "step": 6010 }, { "epoch": 0.45484468994741023, "grad_norm": 1.9755150079727173, "learning_rate": 1.1069130089850142e-05, "loss": 0.9059, "step": 6011 }, { "epoch": 0.45492035866974384, "grad_norm": 1.9800193309783936, "learning_rate": 1.1066938137040742e-05, "loss": 0.9518, "step": 6012 }, { "epoch": 0.4549960273920775, "grad_norm": 2.4362120628356934, "learning_rate": 1.106474605479299e-05, "loss": 0.8131, "step": 6013 }, { "epoch": 0.4550716961144111, "grad_norm": 3.1265878677368164, "learning_rate": 1.106255384324416e-05, "loss": 0.8113, "step": 6014 }, { "epoch": 0.45514736483674473, "grad_norm": 2.1288368701934814, "learning_rate": 1.106036150253154e-05, "loss": 0.6329, "step": 6015 }, { "epoch": 0.45522303355907834, "grad_norm": 2.2900583744049072, "learning_rate": 1.1058169032792419e-05, "loss": 0.6617, "step": 6016 }, { "epoch": 0.455298702281412, "grad_norm": 2.1186749935150146, "learning_rate": 1.1055976434164094e-05, "loss": 0.76, "step": 6017 }, { "epoch": 0.4553743710037456, "grad_norm": 1.967383623123169, "learning_rate": 1.1053783706783876e-05, "loss": 0.7049, "step": 6018 }, { "epoch": 0.4554500397260792, "grad_norm": 2.262080430984497, "learning_rate": 1.1051590850789076e-05, "loss": 0.739, "step": 6019 }, { "epoch": 0.45552570844841284, "grad_norm": 2.2179148197174072, "learning_rate": 1.1049397866317026e-05, "loss": 0.8633, "step": 6020 }, { "epoch": 0.45560137717074645, "grad_norm": 2.158219575881958, "learning_rate": 1.1047204753505052e-05, "loss": 0.7621, "step": 6021 }, { "epoch": 0.4556770458930801, "grad_norm": 2.1269586086273193, "learning_rate": 1.1045011512490493e-05, "loss": 0.7006, "step": 6022 }, { "epoch": 0.4557527146154137, "grad_norm": 2.0919365882873535, "learning_rate": 1.1042818143410702e-05, "loss": 0.6958, "step": 6023 }, { "epoch": 0.45582838333774733, "grad_norm": 2.181525230407715, "learning_rate": 1.1040624646403027e-05, "loss": 0.6319, "step": 6024 }, { "epoch": 0.45590405206008094, "grad_norm": 2.3236260414123535, "learning_rate": 1.1038431021604841e-05, "loss": 0.8105, "step": 6025 }, { "epoch": 0.4559797207824146, "grad_norm": 2.2050108909606934, "learning_rate": 1.1036237269153509e-05, "loss": 0.7843, "step": 6026 }, { "epoch": 0.4560553895047482, "grad_norm": 2.168041467666626, "learning_rate": 1.1034043389186414e-05, "loss": 0.8211, "step": 6027 }, { "epoch": 0.45613105822708183, "grad_norm": 2.095221996307373, "learning_rate": 1.1031849381840942e-05, "loss": 0.7797, "step": 6028 }, { "epoch": 0.45620672694941544, "grad_norm": 2.5357155799865723, "learning_rate": 1.102965524725449e-05, "loss": 0.748, "step": 6029 }, { "epoch": 0.4562823956717491, "grad_norm": 2.3060734272003174, "learning_rate": 1.1027460985564464e-05, "loss": 0.6879, "step": 6030 }, { "epoch": 0.4563580643940827, "grad_norm": 1.986255407333374, "learning_rate": 1.102526659690827e-05, "loss": 0.5767, "step": 6031 }, { "epoch": 0.45643373311641633, "grad_norm": 1.7908231019973755, "learning_rate": 1.1023072081423334e-05, "loss": 0.5617, "step": 6032 }, { "epoch": 0.45650940183874994, "grad_norm": 2.0068113803863525, "learning_rate": 1.102087743924708e-05, "loss": 0.8403, "step": 6033 }, { "epoch": 0.45658507056108355, "grad_norm": 2.3249096870422363, "learning_rate": 1.1018682670516945e-05, "loss": 0.6172, "step": 6034 }, { "epoch": 0.4566607392834172, "grad_norm": 2.054591178894043, "learning_rate": 1.101648777537037e-05, "loss": 0.7662, "step": 6035 }, { "epoch": 0.4567364080057508, "grad_norm": 2.0367980003356934, "learning_rate": 1.101429275394481e-05, "loss": 0.6568, "step": 6036 }, { "epoch": 0.45681207672808444, "grad_norm": 1.8275066614151, "learning_rate": 1.1012097606377722e-05, "loss": 0.7773, "step": 6037 }, { "epoch": 0.45688774545041805, "grad_norm": 1.9169228076934814, "learning_rate": 1.1009902332806577e-05, "loss": 0.6533, "step": 6038 }, { "epoch": 0.4569634141727517, "grad_norm": 2.531177520751953, "learning_rate": 1.1007706933368843e-05, "loss": 0.8055, "step": 6039 }, { "epoch": 0.4570390828950853, "grad_norm": 2.8043148517608643, "learning_rate": 1.1005511408202008e-05, "loss": 0.8399, "step": 6040 }, { "epoch": 0.45711475161741894, "grad_norm": 1.9905619621276855, "learning_rate": 1.1003315757443565e-05, "loss": 0.5476, "step": 6041 }, { "epoch": 0.45719042033975255, "grad_norm": 2.0711894035339355, "learning_rate": 1.1001119981231004e-05, "loss": 0.6972, "step": 6042 }, { "epoch": 0.4572660890620862, "grad_norm": 2.3095309734344482, "learning_rate": 1.0998924079701843e-05, "loss": 0.6728, "step": 6043 }, { "epoch": 0.4573417577844198, "grad_norm": 3.2638471126556396, "learning_rate": 1.0996728052993586e-05, "loss": 0.838, "step": 6044 }, { "epoch": 0.45741742650675343, "grad_norm": 2.1741018295288086, "learning_rate": 1.0994531901243763e-05, "loss": 0.7435, "step": 6045 }, { "epoch": 0.45749309522908704, "grad_norm": 2.157972812652588, "learning_rate": 1.0992335624589902e-05, "loss": 0.7142, "step": 6046 }, { "epoch": 0.45756876395142065, "grad_norm": 3.9512946605682373, "learning_rate": 1.099013922316954e-05, "loss": 0.5924, "step": 6047 }, { "epoch": 0.4576444326737543, "grad_norm": 1.8326383829116821, "learning_rate": 1.0987942697120223e-05, "loss": 0.7153, "step": 6048 }, { "epoch": 0.45772010139608793, "grad_norm": 2.491291046142578, "learning_rate": 1.09857460465795e-05, "loss": 0.7102, "step": 6049 }, { "epoch": 0.45779577011842154, "grad_norm": 2.1547534465789795, "learning_rate": 1.0983549271684944e-05, "loss": 0.6816, "step": 6050 }, { "epoch": 0.45787143884075515, "grad_norm": 2.5720443725585938, "learning_rate": 1.0981352372574111e-05, "loss": 0.7271, "step": 6051 }, { "epoch": 0.4579471075630888, "grad_norm": 2.2329049110412598, "learning_rate": 1.0979155349384587e-05, "loss": 0.6481, "step": 6052 }, { "epoch": 0.45802277628542243, "grad_norm": 2.2942488193511963, "learning_rate": 1.0976958202253951e-05, "loss": 0.7957, "step": 6053 }, { "epoch": 0.45809844500775604, "grad_norm": 1.748425006866455, "learning_rate": 1.0974760931319801e-05, "loss": 0.9268, "step": 6054 }, { "epoch": 0.45817411373008965, "grad_norm": 5.090272903442383, "learning_rate": 1.0972563536719736e-05, "loss": 0.7559, "step": 6055 }, { "epoch": 0.4582497824524233, "grad_norm": 2.0367588996887207, "learning_rate": 1.097036601859136e-05, "loss": 0.7083, "step": 6056 }, { "epoch": 0.4583254511747569, "grad_norm": 2.288196325302124, "learning_rate": 1.096816837707229e-05, "loss": 0.7582, "step": 6057 }, { "epoch": 0.45840111989709054, "grad_norm": 1.7706087827682495, "learning_rate": 1.096597061230015e-05, "loss": 0.6756, "step": 6058 }, { "epoch": 0.45847678861942415, "grad_norm": 2.1202590465545654, "learning_rate": 1.0963772724412575e-05, "loss": 0.7529, "step": 6059 }, { "epoch": 0.45855245734175776, "grad_norm": 4.25075626373291, "learning_rate": 1.0961574713547196e-05, "loss": 0.8093, "step": 6060 }, { "epoch": 0.4586281260640914, "grad_norm": 2.7584314346313477, "learning_rate": 1.0959376579841669e-05, "loss": 0.6798, "step": 6061 }, { "epoch": 0.45870379478642503, "grad_norm": 2.2744717597961426, "learning_rate": 1.095717832343364e-05, "loss": 0.573, "step": 6062 }, { "epoch": 0.45877946350875864, "grad_norm": 1.9462858438491821, "learning_rate": 1.0954979944460773e-05, "loss": 0.7179, "step": 6063 }, { "epoch": 0.45885513223109226, "grad_norm": 2.249580144882202, "learning_rate": 1.0952781443060742e-05, "loss": 0.6776, "step": 6064 }, { "epoch": 0.4589308009534259, "grad_norm": 2.2577133178710938, "learning_rate": 1.0950582819371215e-05, "loss": 0.7104, "step": 6065 }, { "epoch": 0.45900646967575953, "grad_norm": 1.8561004400253296, "learning_rate": 1.094838407352989e-05, "loss": 0.6328, "step": 6066 }, { "epoch": 0.45908213839809314, "grad_norm": 2.133049249649048, "learning_rate": 1.0946185205674447e-05, "loss": 0.803, "step": 6067 }, { "epoch": 0.45915780712042675, "grad_norm": 2.2266693115234375, "learning_rate": 1.0943986215942597e-05, "loss": 0.7626, "step": 6068 }, { "epoch": 0.4592334758427604, "grad_norm": 2.2593750953674316, "learning_rate": 1.0941787104472038e-05, "loss": 0.5993, "step": 6069 }, { "epoch": 0.45930914456509403, "grad_norm": 1.5973234176635742, "learning_rate": 1.0939587871400493e-05, "loss": 0.9396, "step": 6070 }, { "epoch": 0.45938481328742764, "grad_norm": 1.9865094423294067, "learning_rate": 1.0937388516865681e-05, "loss": 0.843, "step": 6071 }, { "epoch": 0.45946048200976125, "grad_norm": 2.004659414291382, "learning_rate": 1.093518904100533e-05, "loss": 0.5817, "step": 6072 }, { "epoch": 0.45953615073209486, "grad_norm": 1.994409441947937, "learning_rate": 1.0932989443957188e-05, "loss": 0.6163, "step": 6073 }, { "epoch": 0.4596118194544285, "grad_norm": 2.4279356002807617, "learning_rate": 1.0930789725858994e-05, "loss": 0.6187, "step": 6074 }, { "epoch": 0.45968748817676214, "grad_norm": 1.8175309896469116, "learning_rate": 1.0928589886848499e-05, "loss": 0.7091, "step": 6075 }, { "epoch": 0.45976315689909575, "grad_norm": 2.018789768218994, "learning_rate": 1.092638992706347e-05, "loss": 0.5512, "step": 6076 }, { "epoch": 0.45983882562142936, "grad_norm": 1.8385061025619507, "learning_rate": 1.0924189846641673e-05, "loss": 0.665, "step": 6077 }, { "epoch": 0.459914494343763, "grad_norm": 2.198543071746826, "learning_rate": 1.0921989645720883e-05, "loss": 0.7314, "step": 6078 }, { "epoch": 0.45999016306609664, "grad_norm": 3.3511157035827637, "learning_rate": 1.0919789324438886e-05, "loss": 0.7007, "step": 6079 }, { "epoch": 0.46006583178843025, "grad_norm": 2.0471951961517334, "learning_rate": 1.0917588882933472e-05, "loss": 0.8203, "step": 6080 }, { "epoch": 0.46014150051076386, "grad_norm": 2.699324131011963, "learning_rate": 1.091538832134244e-05, "loss": 0.5743, "step": 6081 }, { "epoch": 0.4602171692330975, "grad_norm": 2.1900722980499268, "learning_rate": 1.0913187639803598e-05, "loss": 0.7094, "step": 6082 }, { "epoch": 0.46029283795543113, "grad_norm": 2.500459671020508, "learning_rate": 1.0910986838454754e-05, "loss": 0.6425, "step": 6083 }, { "epoch": 0.46036850667776474, "grad_norm": 2.3873353004455566, "learning_rate": 1.0908785917433737e-05, "loss": 0.6988, "step": 6084 }, { "epoch": 0.46044417540009835, "grad_norm": 2.539494037628174, "learning_rate": 1.090658487687837e-05, "loss": 0.6685, "step": 6085 }, { "epoch": 0.46051984412243196, "grad_norm": 2.0410473346710205, "learning_rate": 1.0904383716926491e-05, "loss": 0.8181, "step": 6086 }, { "epoch": 0.46059551284476563, "grad_norm": 2.1296074390411377, "learning_rate": 1.0902182437715947e-05, "loss": 0.6946, "step": 6087 }, { "epoch": 0.46067118156709924, "grad_norm": 2.1949639320373535, "learning_rate": 1.0899981039384581e-05, "loss": 0.7152, "step": 6088 }, { "epoch": 0.46074685028943285, "grad_norm": 2.5032870769500732, "learning_rate": 1.0897779522070262e-05, "loss": 0.7305, "step": 6089 }, { "epoch": 0.46082251901176646, "grad_norm": 1.8629069328308105, "learning_rate": 1.0895577885910846e-05, "loss": 0.7541, "step": 6090 }, { "epoch": 0.46089818773410013, "grad_norm": 1.527066707611084, "learning_rate": 1.0893376131044219e-05, "loss": 0.6971, "step": 6091 }, { "epoch": 0.46097385645643374, "grad_norm": 1.9106502532958984, "learning_rate": 1.089117425760825e-05, "loss": 0.7487, "step": 6092 }, { "epoch": 0.46104952517876735, "grad_norm": 2.104304075241089, "learning_rate": 1.0888972265740833e-05, "loss": 0.7364, "step": 6093 }, { "epoch": 0.46112519390110096, "grad_norm": 2.0999743938446045, "learning_rate": 1.0886770155579864e-05, "loss": 0.7293, "step": 6094 }, { "epoch": 0.4612008626234346, "grad_norm": 2.2601325511932373, "learning_rate": 1.0884567927263243e-05, "loss": 0.7555, "step": 6095 }, { "epoch": 0.46127653134576824, "grad_norm": 1.8899502754211426, "learning_rate": 1.0882365580928885e-05, "loss": 0.6945, "step": 6096 }, { "epoch": 0.46135220006810185, "grad_norm": 2.614907741546631, "learning_rate": 1.0880163116714706e-05, "loss": 0.8823, "step": 6097 }, { "epoch": 0.46142786879043546, "grad_norm": 1.670644760131836, "learning_rate": 1.087796053475863e-05, "loss": 0.8269, "step": 6098 }, { "epoch": 0.4615035375127691, "grad_norm": 2.111875295639038, "learning_rate": 1.0875757835198592e-05, "loss": 0.6693, "step": 6099 }, { "epoch": 0.46157920623510273, "grad_norm": 1.9727673530578613, "learning_rate": 1.0873555018172533e-05, "loss": 0.661, "step": 6100 }, { "epoch": 0.46165487495743635, "grad_norm": 2.154547691345215, "learning_rate": 1.0871352083818397e-05, "loss": 0.6215, "step": 6101 }, { "epoch": 0.46173054367976996, "grad_norm": 3.549818992614746, "learning_rate": 1.0869149032274142e-05, "loss": 0.8293, "step": 6102 }, { "epoch": 0.46180621240210357, "grad_norm": 2.007596731185913, "learning_rate": 1.0866945863677728e-05, "loss": 0.8542, "step": 6103 }, { "epoch": 0.46188188112443723, "grad_norm": 1.8901540040969849, "learning_rate": 1.0864742578167123e-05, "loss": 0.7118, "step": 6104 }, { "epoch": 0.46195754984677084, "grad_norm": 2.25508975982666, "learning_rate": 1.0862539175880313e-05, "loss": 0.7328, "step": 6105 }, { "epoch": 0.46203321856910445, "grad_norm": 1.9641987085342407, "learning_rate": 1.086033565695527e-05, "loss": 0.7337, "step": 6106 }, { "epoch": 0.46210888729143806, "grad_norm": 1.9229612350463867, "learning_rate": 1.0858132021529995e-05, "loss": 0.7118, "step": 6107 }, { "epoch": 0.46218455601377173, "grad_norm": 2.1345152854919434, "learning_rate": 1.0855928269742479e-05, "loss": 0.7995, "step": 6108 }, { "epoch": 0.46226022473610534, "grad_norm": 1.999732255935669, "learning_rate": 1.0853724401730733e-05, "loss": 0.5753, "step": 6109 }, { "epoch": 0.46233589345843895, "grad_norm": 1.7890396118164062, "learning_rate": 1.0851520417632772e-05, "loss": 0.7044, "step": 6110 }, { "epoch": 0.46241156218077256, "grad_norm": 1.8957141637802124, "learning_rate": 1.0849316317586611e-05, "loss": 0.8104, "step": 6111 }, { "epoch": 0.46248723090310623, "grad_norm": 1.9614812135696411, "learning_rate": 1.0847112101730284e-05, "loss": 0.7579, "step": 6112 }, { "epoch": 0.46256289962543984, "grad_norm": 4.7304205894470215, "learning_rate": 1.0844907770201818e-05, "loss": 0.666, "step": 6113 }, { "epoch": 0.46263856834777345, "grad_norm": 2.0775561332702637, "learning_rate": 1.0842703323139265e-05, "loss": 0.6668, "step": 6114 }, { "epoch": 0.46271423707010706, "grad_norm": 1.9007248878479004, "learning_rate": 1.0840498760680668e-05, "loss": 0.6426, "step": 6115 }, { "epoch": 0.46278990579244067, "grad_norm": 1.8677881956100464, "learning_rate": 1.0838294082964087e-05, "loss": 0.7154, "step": 6116 }, { "epoch": 0.46286557451477434, "grad_norm": 2.1474485397338867, "learning_rate": 1.0836089290127581e-05, "loss": 0.8626, "step": 6117 }, { "epoch": 0.46294124323710795, "grad_norm": 1.910867691040039, "learning_rate": 1.083388438230923e-05, "loss": 0.8349, "step": 6118 }, { "epoch": 0.46301691195944156, "grad_norm": 1.9479092359542847, "learning_rate": 1.0831679359647104e-05, "loss": 0.7569, "step": 6119 }, { "epoch": 0.46309258068177517, "grad_norm": 1.7901252508163452, "learning_rate": 1.0829474222279293e-05, "loss": 0.8097, "step": 6120 }, { "epoch": 0.46316824940410883, "grad_norm": 2.567704677581787, "learning_rate": 1.0827268970343888e-05, "loss": 0.601, "step": 6121 }, { "epoch": 0.46324391812644244, "grad_norm": 2.589970350265503, "learning_rate": 1.082506360397899e-05, "loss": 0.698, "step": 6122 }, { "epoch": 0.46331958684877605, "grad_norm": 2.233529567718506, "learning_rate": 1.082285812332271e-05, "loss": 0.8512, "step": 6123 }, { "epoch": 0.46339525557110967, "grad_norm": 2.331575870513916, "learning_rate": 1.0820652528513151e-05, "loss": 0.7811, "step": 6124 }, { "epoch": 0.46347092429344333, "grad_norm": 2.2309775352478027, "learning_rate": 1.081844681968845e-05, "loss": 0.6453, "step": 6125 }, { "epoch": 0.46354659301577694, "grad_norm": 1.7300264835357666, "learning_rate": 1.0816240996986723e-05, "loss": 0.6944, "step": 6126 }, { "epoch": 0.46362226173811055, "grad_norm": 2.187654972076416, "learning_rate": 1.0814035060546112e-05, "loss": 0.7591, "step": 6127 }, { "epoch": 0.46369793046044416, "grad_norm": 2.0955562591552734, "learning_rate": 1.081182901050476e-05, "loss": 0.7257, "step": 6128 }, { "epoch": 0.4637735991827778, "grad_norm": 2.2414302825927734, "learning_rate": 1.080962284700081e-05, "loss": 0.6971, "step": 6129 }, { "epoch": 0.46384926790511144, "grad_norm": 4.362318992614746, "learning_rate": 1.0807416570172429e-05, "loss": 0.7791, "step": 6130 }, { "epoch": 0.46392493662744505, "grad_norm": 1.8338907957077026, "learning_rate": 1.0805210180157772e-05, "loss": 0.6372, "step": 6131 }, { "epoch": 0.46400060534977866, "grad_norm": 2.5617542266845703, "learning_rate": 1.080300367709502e-05, "loss": 0.7845, "step": 6132 }, { "epoch": 0.46407627407211227, "grad_norm": 1.9908982515335083, "learning_rate": 1.0800797061122341e-05, "loss": 0.5978, "step": 6133 }, { "epoch": 0.46415194279444594, "grad_norm": 2.4405834674835205, "learning_rate": 1.079859033237793e-05, "loss": 0.7342, "step": 6134 }, { "epoch": 0.46422761151677955, "grad_norm": 1.8507134914398193, "learning_rate": 1.0796383490999975e-05, "loss": 0.6845, "step": 6135 }, { "epoch": 0.46430328023911316, "grad_norm": 2.517188787460327, "learning_rate": 1.0794176537126674e-05, "loss": 0.7715, "step": 6136 }, { "epoch": 0.46437894896144677, "grad_norm": 1.7793668508529663, "learning_rate": 1.0791969470896235e-05, "loss": 0.8915, "step": 6137 }, { "epoch": 0.46445461768378044, "grad_norm": 1.7618346214294434, "learning_rate": 1.0789762292446869e-05, "loss": 0.6284, "step": 6138 }, { "epoch": 0.46453028640611405, "grad_norm": 2.0393192768096924, "learning_rate": 1.0787555001916803e-05, "loss": 0.6054, "step": 6139 }, { "epoch": 0.46460595512844766, "grad_norm": 2.2760608196258545, "learning_rate": 1.078534759944426e-05, "loss": 0.8148, "step": 6140 }, { "epoch": 0.46468162385078127, "grad_norm": 1.8736259937286377, "learning_rate": 1.0783140085167477e-05, "loss": 0.6872, "step": 6141 }, { "epoch": 0.4647572925731149, "grad_norm": 2.1906280517578125, "learning_rate": 1.0780932459224692e-05, "loss": 0.7743, "step": 6142 }, { "epoch": 0.46483296129544854, "grad_norm": 2.7648184299468994, "learning_rate": 1.077872472175416e-05, "loss": 0.6033, "step": 6143 }, { "epoch": 0.46490863001778215, "grad_norm": 1.824427843093872, "learning_rate": 1.077651687289413e-05, "loss": 0.5442, "step": 6144 }, { "epoch": 0.46498429874011576, "grad_norm": 3.1149511337280273, "learning_rate": 1.0774308912782866e-05, "loss": 0.9675, "step": 6145 }, { "epoch": 0.4650599674624494, "grad_norm": 1.6089766025543213, "learning_rate": 1.0772100841558644e-05, "loss": 0.5172, "step": 6146 }, { "epoch": 0.46513563618478304, "grad_norm": 9.786401748657227, "learning_rate": 1.0769892659359731e-05, "loss": 0.7237, "step": 6147 }, { "epoch": 0.46521130490711665, "grad_norm": 1.9488837718963623, "learning_rate": 1.0767684366324418e-05, "loss": 0.8311, "step": 6148 }, { "epoch": 0.46528697362945026, "grad_norm": 2.2119038105010986, "learning_rate": 1.076547596259099e-05, "loss": 0.7903, "step": 6149 }, { "epoch": 0.4653626423517839, "grad_norm": 2.353832483291626, "learning_rate": 1.076326744829775e-05, "loss": 0.7647, "step": 6150 }, { "epoch": 0.46543831107411754, "grad_norm": 2.166916847229004, "learning_rate": 1.0761058823582999e-05, "loss": 0.8551, "step": 6151 }, { "epoch": 0.46551397979645115, "grad_norm": 2.20060658454895, "learning_rate": 1.0758850088585045e-05, "loss": 0.7732, "step": 6152 }, { "epoch": 0.46558964851878476, "grad_norm": 1.9233969449996948, "learning_rate": 1.0756641243442212e-05, "loss": 0.7602, "step": 6153 }, { "epoch": 0.46566531724111837, "grad_norm": 2.2503979206085205, "learning_rate": 1.0754432288292825e-05, "loss": 0.7605, "step": 6154 }, { "epoch": 0.465740985963452, "grad_norm": 2.2500691413879395, "learning_rate": 1.075222322327521e-05, "loss": 0.7364, "step": 6155 }, { "epoch": 0.46581665468578565, "grad_norm": 2.3264994621276855, "learning_rate": 1.0750014048527709e-05, "loss": 0.807, "step": 6156 }, { "epoch": 0.46589232340811926, "grad_norm": 2.227979898452759, "learning_rate": 1.074780476418867e-05, "loss": 0.6017, "step": 6157 }, { "epoch": 0.46596799213045287, "grad_norm": 2.266706705093384, "learning_rate": 1.0745595370396444e-05, "loss": 0.7044, "step": 6158 }, { "epoch": 0.4660436608527865, "grad_norm": 2.1310250759124756, "learning_rate": 1.074338586728939e-05, "loss": 0.7331, "step": 6159 }, { "epoch": 0.46611932957512014, "grad_norm": 2.122551202774048, "learning_rate": 1.0741176255005873e-05, "loss": 0.7152, "step": 6160 }, { "epoch": 0.46619499829745376, "grad_norm": 2.330875873565674, "learning_rate": 1.0738966533684268e-05, "loss": 0.7346, "step": 6161 }, { "epoch": 0.46627066701978737, "grad_norm": 1.9120339155197144, "learning_rate": 1.0736756703462951e-05, "loss": 0.7467, "step": 6162 }, { "epoch": 0.466346335742121, "grad_norm": 2.05835223197937, "learning_rate": 1.0734546764480316e-05, "loss": 0.7593, "step": 6163 }, { "epoch": 0.46642200446445464, "grad_norm": 2.023322820663452, "learning_rate": 1.0732336716874753e-05, "loss": 0.9205, "step": 6164 }, { "epoch": 0.46649767318678825, "grad_norm": 1.8999871015548706, "learning_rate": 1.073012656078466e-05, "loss": 0.6393, "step": 6165 }, { "epoch": 0.46657334190912186, "grad_norm": 2.1797525882720947, "learning_rate": 1.0727916296348444e-05, "loss": 0.8688, "step": 6166 }, { "epoch": 0.4666490106314555, "grad_norm": 2.276228904724121, "learning_rate": 1.0725705923704521e-05, "loss": 0.7856, "step": 6167 }, { "epoch": 0.4667246793537891, "grad_norm": 2.254700183868408, "learning_rate": 1.0723495442991314e-05, "loss": 0.5862, "step": 6168 }, { "epoch": 0.46680034807612275, "grad_norm": 1.760817527770996, "learning_rate": 1.0721284854347248e-05, "loss": 0.6833, "step": 6169 }, { "epoch": 0.46687601679845636, "grad_norm": 3.151423692703247, "learning_rate": 1.0719074157910752e-05, "loss": 0.8858, "step": 6170 }, { "epoch": 0.46695168552078997, "grad_norm": 1.7372065782546997, "learning_rate": 1.0716863353820278e-05, "loss": 0.6091, "step": 6171 }, { "epoch": 0.4670273542431236, "grad_norm": 1.8967386484146118, "learning_rate": 1.0714652442214266e-05, "loss": 0.8024, "step": 6172 }, { "epoch": 0.46710302296545725, "grad_norm": 1.915652871131897, "learning_rate": 1.0712441423231172e-05, "loss": 0.6228, "step": 6173 }, { "epoch": 0.46717869168779086, "grad_norm": 2.134735345840454, "learning_rate": 1.0710230297009458e-05, "loss": 0.8067, "step": 6174 }, { "epoch": 0.46725436041012447, "grad_norm": 2.2443509101867676, "learning_rate": 1.070801906368759e-05, "loss": 0.7097, "step": 6175 }, { "epoch": 0.4673300291324581, "grad_norm": 2.271360158920288, "learning_rate": 1.0705807723404044e-05, "loss": 0.6685, "step": 6176 }, { "epoch": 0.46740569785479175, "grad_norm": 2.679190158843994, "learning_rate": 1.0703596276297303e-05, "loss": 0.7566, "step": 6177 }, { "epoch": 0.46748136657712536, "grad_norm": 2.672213315963745, "learning_rate": 1.0701384722505851e-05, "loss": 0.6706, "step": 6178 }, { "epoch": 0.46755703529945897, "grad_norm": 2.041059732437134, "learning_rate": 1.0699173062168183e-05, "loss": 0.6699, "step": 6179 }, { "epoch": 0.4676327040217926, "grad_norm": 2.066025972366333, "learning_rate": 1.0696961295422806e-05, "loss": 0.6701, "step": 6180 }, { "epoch": 0.4677083727441262, "grad_norm": 2.1812968254089355, "learning_rate": 1.0694749422408223e-05, "loss": 0.6914, "step": 6181 }, { "epoch": 0.46778404146645985, "grad_norm": 2.1719706058502197, "learning_rate": 1.0692537443262949e-05, "loss": 0.8392, "step": 6182 }, { "epoch": 0.46785971018879347, "grad_norm": 2.272714614868164, "learning_rate": 1.0690325358125506e-05, "loss": 0.8093, "step": 6183 }, { "epoch": 0.4679353789111271, "grad_norm": 2.0720415115356445, "learning_rate": 1.0688113167134421e-05, "loss": 0.5, "step": 6184 }, { "epoch": 0.4680110476334607, "grad_norm": 2.3994011878967285, "learning_rate": 1.0685900870428232e-05, "loss": 0.8163, "step": 6185 }, { "epoch": 0.46808671635579435, "grad_norm": 1.8097761869430542, "learning_rate": 1.0683688468145474e-05, "loss": 0.761, "step": 6186 }, { "epoch": 0.46816238507812796, "grad_norm": 1.8033745288848877, "learning_rate": 1.0681475960424703e-05, "loss": 0.6908, "step": 6187 }, { "epoch": 0.4682380538004616, "grad_norm": 1.8102970123291016, "learning_rate": 1.0679263347404466e-05, "loss": 0.6382, "step": 6188 }, { "epoch": 0.4683137225227952, "grad_norm": 2.0066001415252686, "learning_rate": 1.0677050629223325e-05, "loss": 0.6318, "step": 6189 }, { "epoch": 0.46838939124512885, "grad_norm": 1.7005255222320557, "learning_rate": 1.0674837806019852e-05, "loss": 0.7265, "step": 6190 }, { "epoch": 0.46846505996746246, "grad_norm": 2.1413681507110596, "learning_rate": 1.0672624877932618e-05, "loss": 0.6977, "step": 6191 }, { "epoch": 0.46854072868979607, "grad_norm": 2.8440780639648438, "learning_rate": 1.0670411845100205e-05, "loss": 0.8837, "step": 6192 }, { "epoch": 0.4686163974121297, "grad_norm": 1.863978385925293, "learning_rate": 1.0668198707661198e-05, "loss": 0.5702, "step": 6193 }, { "epoch": 0.4686920661344633, "grad_norm": 2.1832001209259033, "learning_rate": 1.0665985465754193e-05, "loss": 0.6077, "step": 6194 }, { "epoch": 0.46876773485679696, "grad_norm": 2.120635747909546, "learning_rate": 1.066377211951779e-05, "loss": 0.732, "step": 6195 }, { "epoch": 0.46884340357913057, "grad_norm": 2.332000255584717, "learning_rate": 1.0661558669090595e-05, "loss": 0.6214, "step": 6196 }, { "epoch": 0.4689190723014642, "grad_norm": 2.0023016929626465, "learning_rate": 1.0659345114611225e-05, "loss": 0.7018, "step": 6197 }, { "epoch": 0.4689947410237978, "grad_norm": 2.0073201656341553, "learning_rate": 1.0657131456218291e-05, "loss": 0.7069, "step": 6198 }, { "epoch": 0.46907040974613146, "grad_norm": 2.4780099391937256, "learning_rate": 1.065491769405043e-05, "loss": 0.7045, "step": 6199 }, { "epoch": 0.46914607846846507, "grad_norm": 3.5818912982940674, "learning_rate": 1.0652703828246268e-05, "loss": 0.6786, "step": 6200 }, { "epoch": 0.4692217471907987, "grad_norm": 2.6084370613098145, "learning_rate": 1.0650489858944447e-05, "loss": 0.7227, "step": 6201 }, { "epoch": 0.4692974159131323, "grad_norm": 1.8275518417358398, "learning_rate": 1.0648275786283613e-05, "loss": 0.7012, "step": 6202 }, { "epoch": 0.46937308463546595, "grad_norm": 2.5070960521698, "learning_rate": 1.0646061610402418e-05, "loss": 0.9001, "step": 6203 }, { "epoch": 0.46944875335779956, "grad_norm": 2.149526596069336, "learning_rate": 1.0643847331439523e-05, "loss": 0.8065, "step": 6204 }, { "epoch": 0.4695244220801332, "grad_norm": 2.6355538368225098, "learning_rate": 1.0641632949533589e-05, "loss": 0.8699, "step": 6205 }, { "epoch": 0.4696000908024668, "grad_norm": 1.9515386819839478, "learning_rate": 1.0639418464823292e-05, "loss": 0.7724, "step": 6206 }, { "epoch": 0.46967575952480045, "grad_norm": 1.7659491300582886, "learning_rate": 1.0637203877447305e-05, "loss": 0.724, "step": 6207 }, { "epoch": 0.46975142824713406, "grad_norm": 2.187346935272217, "learning_rate": 1.0634989187544317e-05, "loss": 0.7288, "step": 6208 }, { "epoch": 0.4698270969694677, "grad_norm": 2.0950958728790283, "learning_rate": 1.0632774395253019e-05, "loss": 0.7164, "step": 6209 }, { "epoch": 0.4699027656918013, "grad_norm": 1.7800686359405518, "learning_rate": 1.063055950071211e-05, "loss": 0.9003, "step": 6210 }, { "epoch": 0.4699784344141349, "grad_norm": 2.117051124572754, "learning_rate": 1.0628344504060288e-05, "loss": 0.5784, "step": 6211 }, { "epoch": 0.47005410313646856, "grad_norm": 1.9472649097442627, "learning_rate": 1.0626129405436266e-05, "loss": 0.7032, "step": 6212 }, { "epoch": 0.47012977185880217, "grad_norm": 2.002767562866211, "learning_rate": 1.0623914204978761e-05, "loss": 0.7656, "step": 6213 }, { "epoch": 0.4702054405811358, "grad_norm": 2.2014105319976807, "learning_rate": 1.0621698902826497e-05, "loss": 0.888, "step": 6214 }, { "epoch": 0.4702811093034694, "grad_norm": 2.010322332382202, "learning_rate": 1.0619483499118204e-05, "loss": 0.7655, "step": 6215 }, { "epoch": 0.47035677802580306, "grad_norm": 2.8622562885284424, "learning_rate": 1.0617267993992612e-05, "loss": 0.8364, "step": 6216 }, { "epoch": 0.47043244674813667, "grad_norm": 1.905005931854248, "learning_rate": 1.061505238758847e-05, "loss": 0.6593, "step": 6217 }, { "epoch": 0.4705081154704703, "grad_norm": 1.9572088718414307, "learning_rate": 1.0612836680044525e-05, "loss": 0.6098, "step": 6218 }, { "epoch": 0.4705837841928039, "grad_norm": 2.0442428588867188, "learning_rate": 1.0610620871499529e-05, "loss": 0.7608, "step": 6219 }, { "epoch": 0.47065945291513755, "grad_norm": 2.0061099529266357, "learning_rate": 1.0608404962092244e-05, "loss": 0.7947, "step": 6220 }, { "epoch": 0.47073512163747117, "grad_norm": 2.0030205249786377, "learning_rate": 1.0606188951961438e-05, "loss": 0.706, "step": 6221 }, { "epoch": 0.4708107903598048, "grad_norm": 2.660520315170288, "learning_rate": 1.0603972841245887e-05, "loss": 0.665, "step": 6222 }, { "epoch": 0.4708864590821384, "grad_norm": 1.7287667989730835, "learning_rate": 1.0601756630084367e-05, "loss": 0.6559, "step": 6223 }, { "epoch": 0.470962127804472, "grad_norm": 1.7627499103546143, "learning_rate": 1.0599540318615667e-05, "loss": 0.7481, "step": 6224 }, { "epoch": 0.47103779652680566, "grad_norm": 2.1454079151153564, "learning_rate": 1.0597323906978577e-05, "loss": 0.6736, "step": 6225 }, { "epoch": 0.4711134652491393, "grad_norm": 1.9202808141708374, "learning_rate": 1.05951073953119e-05, "loss": 0.7624, "step": 6226 }, { "epoch": 0.4711891339714729, "grad_norm": 2.2088475227355957, "learning_rate": 1.0592890783754437e-05, "loss": 0.7322, "step": 6227 }, { "epoch": 0.4712648026938065, "grad_norm": 2.0040664672851562, "learning_rate": 1.0590674072445002e-05, "loss": 0.7205, "step": 6228 }, { "epoch": 0.47134047141614016, "grad_norm": 2.295071601867676, "learning_rate": 1.0588457261522413e-05, "loss": 0.6734, "step": 6229 }, { "epoch": 0.47141614013847377, "grad_norm": 2.2637734413146973, "learning_rate": 1.0586240351125489e-05, "loss": 0.8409, "step": 6230 }, { "epoch": 0.4714918088608074, "grad_norm": 2.1231727600097656, "learning_rate": 1.0584023341393069e-05, "loss": 0.7858, "step": 6231 }, { "epoch": 0.471567477583141, "grad_norm": 2.0771729946136475, "learning_rate": 1.0581806232463978e-05, "loss": 0.7433, "step": 6232 }, { "epoch": 0.47164314630547466, "grad_norm": 2.2591214179992676, "learning_rate": 1.0579589024477068e-05, "loss": 0.7656, "step": 6233 }, { "epoch": 0.47171881502780827, "grad_norm": 2.270939826965332, "learning_rate": 1.0577371717571182e-05, "loss": 0.8632, "step": 6234 }, { "epoch": 0.4717944837501419, "grad_norm": 2.1328177452087402, "learning_rate": 1.057515431188518e-05, "loss": 0.7845, "step": 6235 }, { "epoch": 0.4718701524724755, "grad_norm": 2.4155235290527344, "learning_rate": 1.0572936807557919e-05, "loss": 0.6961, "step": 6236 }, { "epoch": 0.4719458211948091, "grad_norm": 2.18650221824646, "learning_rate": 1.0570719204728265e-05, "loss": 0.69, "step": 6237 }, { "epoch": 0.47202148991714277, "grad_norm": 2.087130308151245, "learning_rate": 1.05685015035351e-05, "loss": 0.7847, "step": 6238 }, { "epoch": 0.4720971586394764, "grad_norm": 1.8233695030212402, "learning_rate": 1.0566283704117292e-05, "loss": 0.7336, "step": 6239 }, { "epoch": 0.47217282736181, "grad_norm": 2.3203165531158447, "learning_rate": 1.0564065806613736e-05, "loss": 0.6075, "step": 6240 }, { "epoch": 0.4722484960841436, "grad_norm": 1.858660101890564, "learning_rate": 1.056184781116332e-05, "loss": 0.6882, "step": 6241 }, { "epoch": 0.47232416480647726, "grad_norm": 2.2066173553466797, "learning_rate": 1.055962971790494e-05, "loss": 0.6147, "step": 6242 }, { "epoch": 0.4723998335288109, "grad_norm": 2.0067367553710938, "learning_rate": 1.0557411526977506e-05, "loss": 0.7309, "step": 6243 }, { "epoch": 0.4724755022511445, "grad_norm": 2.2683217525482178, "learning_rate": 1.055519323851992e-05, "loss": 0.6129, "step": 6244 }, { "epoch": 0.4725511709734781, "grad_norm": 2.247870683670044, "learning_rate": 1.0552974852671111e-05, "loss": 0.7197, "step": 6245 }, { "epoch": 0.47262683969581176, "grad_norm": 2.4525437355041504, "learning_rate": 1.0550756369569987e-05, "loss": 0.5801, "step": 6246 }, { "epoch": 0.4727025084181454, "grad_norm": 2.0607056617736816, "learning_rate": 1.0548537789355486e-05, "loss": 0.6399, "step": 6247 }, { "epoch": 0.472778177140479, "grad_norm": 1.7411423921585083, "learning_rate": 1.054631911216654e-05, "loss": 0.6645, "step": 6248 }, { "epoch": 0.4728538458628126, "grad_norm": 2.4376776218414307, "learning_rate": 1.0544100338142088e-05, "loss": 0.7827, "step": 6249 }, { "epoch": 0.4729295145851462, "grad_norm": 1.9150795936584473, "learning_rate": 1.0541881467421081e-05, "loss": 0.8126, "step": 6250 }, { "epoch": 0.47300518330747987, "grad_norm": 2.8938183784484863, "learning_rate": 1.053966250014247e-05, "loss": 0.8907, "step": 6251 }, { "epoch": 0.4730808520298135, "grad_norm": 2.5442142486572266, "learning_rate": 1.0537443436445213e-05, "loss": 0.7493, "step": 6252 }, { "epoch": 0.4731565207521471, "grad_norm": 5.999145984649658, "learning_rate": 1.0535224276468274e-05, "loss": 0.7522, "step": 6253 }, { "epoch": 0.4732321894744807, "grad_norm": 3.0991311073303223, "learning_rate": 1.0533005020350627e-05, "loss": 0.8252, "step": 6254 }, { "epoch": 0.47330785819681437, "grad_norm": 3.191121816635132, "learning_rate": 1.0530785668231243e-05, "loss": 0.7184, "step": 6255 }, { "epoch": 0.473383526919148, "grad_norm": 2.009726047515869, "learning_rate": 1.0528566220249113e-05, "loss": 0.7366, "step": 6256 }, { "epoch": 0.4734591956414816, "grad_norm": 2.614388942718506, "learning_rate": 1.052634667654322e-05, "loss": 0.8348, "step": 6257 }, { "epoch": 0.4735348643638152, "grad_norm": 2.2507081031799316, "learning_rate": 1.0524127037252564e-05, "loss": 0.7253, "step": 6258 }, { "epoch": 0.47361053308614887, "grad_norm": 2.0302281379699707, "learning_rate": 1.0521907302516143e-05, "loss": 0.7483, "step": 6259 }, { "epoch": 0.4736862018084825, "grad_norm": 2.1905441284179688, "learning_rate": 1.0519687472472962e-05, "loss": 0.7226, "step": 6260 }, { "epoch": 0.4737618705308161, "grad_norm": 1.9951703548431396, "learning_rate": 1.0517467547262038e-05, "loss": 0.8689, "step": 6261 }, { "epoch": 0.4738375392531497, "grad_norm": 2.1481285095214844, "learning_rate": 1.0515247527022386e-05, "loss": 0.8369, "step": 6262 }, { "epoch": 0.4739132079754833, "grad_norm": 1.8148024082183838, "learning_rate": 1.0513027411893035e-05, "loss": 0.7932, "step": 6263 }, { "epoch": 0.473988876697817, "grad_norm": 2.1687381267547607, "learning_rate": 1.0510807202013016e-05, "loss": 0.7735, "step": 6264 }, { "epoch": 0.4740645454201506, "grad_norm": 1.8470525741577148, "learning_rate": 1.0508586897521359e-05, "loss": 0.8987, "step": 6265 }, { "epoch": 0.4741402141424842, "grad_norm": 2.30938458442688, "learning_rate": 1.0506366498557113e-05, "loss": 0.6753, "step": 6266 }, { "epoch": 0.4742158828648178, "grad_norm": 2.1527512073516846, "learning_rate": 1.0504146005259323e-05, "loss": 0.8064, "step": 6267 }, { "epoch": 0.47429155158715147, "grad_norm": 2.357869863510132, "learning_rate": 1.050192541776705e-05, "loss": 0.8538, "step": 6268 }, { "epoch": 0.4743672203094851, "grad_norm": 2.084754705429077, "learning_rate": 1.0499704736219345e-05, "loss": 0.7007, "step": 6269 }, { "epoch": 0.4744428890318187, "grad_norm": 1.8246986865997314, "learning_rate": 1.049748396075528e-05, "loss": 0.6057, "step": 6270 }, { "epoch": 0.4745185577541523, "grad_norm": 3.0122311115264893, "learning_rate": 1.0495263091513926e-05, "loss": 0.6435, "step": 6271 }, { "epoch": 0.47459422647648597, "grad_norm": 2.0100696086883545, "learning_rate": 1.0493042128634361e-05, "loss": 0.6919, "step": 6272 }, { "epoch": 0.4746698951988196, "grad_norm": 2.3327362537384033, "learning_rate": 1.0490821072255667e-05, "loss": 0.6692, "step": 6273 }, { "epoch": 0.4747455639211532, "grad_norm": 2.43101167678833, "learning_rate": 1.0488599922516941e-05, "loss": 0.7824, "step": 6274 }, { "epoch": 0.4748212326434868, "grad_norm": 1.8376708030700684, "learning_rate": 1.048637867955727e-05, "loss": 0.7273, "step": 6275 }, { "epoch": 0.4748969013658204, "grad_norm": 1.9170385599136353, "learning_rate": 1.0484157343515756e-05, "loss": 0.6116, "step": 6276 }, { "epoch": 0.4749725700881541, "grad_norm": 1.9089981317520142, "learning_rate": 1.0481935914531513e-05, "loss": 0.7632, "step": 6277 }, { "epoch": 0.4750482388104877, "grad_norm": 2.5500640869140625, "learning_rate": 1.0479714392743645e-05, "loss": 0.643, "step": 6278 }, { "epoch": 0.4751239075328213, "grad_norm": 2.08392596244812, "learning_rate": 1.0477492778291281e-05, "loss": 0.7338, "step": 6279 }, { "epoch": 0.4751995762551549, "grad_norm": 2.564549446105957, "learning_rate": 1.0475271071313535e-05, "loss": 0.7122, "step": 6280 }, { "epoch": 0.4752752449774886, "grad_norm": 1.9711978435516357, "learning_rate": 1.0473049271949547e-05, "loss": 0.7447, "step": 6281 }, { "epoch": 0.4753509136998222, "grad_norm": 1.9568284749984741, "learning_rate": 1.0470827380338448e-05, "loss": 0.66, "step": 6282 }, { "epoch": 0.4754265824221558, "grad_norm": 2.3648922443389893, "learning_rate": 1.046860539661938e-05, "loss": 0.7885, "step": 6283 }, { "epoch": 0.4755022511444894, "grad_norm": 2.0861058235168457, "learning_rate": 1.0466383320931494e-05, "loss": 0.8754, "step": 6284 }, { "epoch": 0.4755779198668231, "grad_norm": 2.200965642929077, "learning_rate": 1.046416115341394e-05, "loss": 0.8098, "step": 6285 }, { "epoch": 0.4756535885891567, "grad_norm": 2.617201328277588, "learning_rate": 1.0461938894205882e-05, "loss": 0.6633, "step": 6286 }, { "epoch": 0.4757292573114903, "grad_norm": 2.2858943939208984, "learning_rate": 1.0459716543446477e-05, "loss": 0.729, "step": 6287 }, { "epoch": 0.4758049260338239, "grad_norm": 2.4813878536224365, "learning_rate": 1.0457494101274904e-05, "loss": 0.6442, "step": 6288 }, { "epoch": 0.4758805947561575, "grad_norm": 2.3235511779785156, "learning_rate": 1.0455271567830336e-05, "loss": 0.7065, "step": 6289 }, { "epoch": 0.4759562634784912, "grad_norm": 2.1012747287750244, "learning_rate": 1.0453048943251956e-05, "loss": 0.8561, "step": 6290 }, { "epoch": 0.4760319322008248, "grad_norm": 2.3040177822113037, "learning_rate": 1.045082622767895e-05, "loss": 0.6949, "step": 6291 }, { "epoch": 0.4761076009231584, "grad_norm": 2.2570884227752686, "learning_rate": 1.0448603421250513e-05, "loss": 0.77, "step": 6292 }, { "epoch": 0.476183269645492, "grad_norm": 2.1080000400543213, "learning_rate": 1.0446380524105847e-05, "loss": 0.8376, "step": 6293 }, { "epoch": 0.4762589383678257, "grad_norm": 2.6243064403533936, "learning_rate": 1.0444157536384152e-05, "loss": 0.7462, "step": 6294 }, { "epoch": 0.4763346070901593, "grad_norm": 2.161816358566284, "learning_rate": 1.0441934458224642e-05, "loss": 0.8241, "step": 6295 }, { "epoch": 0.4764102758124929, "grad_norm": 2.4741382598876953, "learning_rate": 1.043971128976653e-05, "loss": 0.733, "step": 6296 }, { "epoch": 0.4764859445348265, "grad_norm": 2.5977022647857666, "learning_rate": 1.0437488031149042e-05, "loss": 0.6428, "step": 6297 }, { "epoch": 0.4765616132571602, "grad_norm": 2.8118369579315186, "learning_rate": 1.0435264682511405e-05, "loss": 0.8184, "step": 6298 }, { "epoch": 0.4766372819794938, "grad_norm": 2.1726109981536865, "learning_rate": 1.0433041243992852e-05, "loss": 0.6495, "step": 6299 }, { "epoch": 0.4767129507018274, "grad_norm": 1.90822172164917, "learning_rate": 1.0430817715732622e-05, "loss": 0.7302, "step": 6300 }, { "epoch": 0.476788619424161, "grad_norm": 2.576930522918701, "learning_rate": 1.0428594097869953e-05, "loss": 0.8132, "step": 6301 }, { "epoch": 0.4768642881464946, "grad_norm": 1.8655272722244263, "learning_rate": 1.0426370390544107e-05, "loss": 0.8921, "step": 6302 }, { "epoch": 0.4769399568688283, "grad_norm": 2.354846715927124, "learning_rate": 1.042414659389433e-05, "loss": 0.803, "step": 6303 }, { "epoch": 0.4770156255911619, "grad_norm": 2.417755603790283, "learning_rate": 1.0421922708059892e-05, "loss": 0.8491, "step": 6304 }, { "epoch": 0.4770912943134955, "grad_norm": 1.6831603050231934, "learning_rate": 1.041969873318005e-05, "loss": 0.6529, "step": 6305 }, { "epoch": 0.4771669630358291, "grad_norm": 2.6375699043273926, "learning_rate": 1.0417474669394084e-05, "loss": 0.8822, "step": 6306 }, { "epoch": 0.4772426317581628, "grad_norm": 2.0903217792510986, "learning_rate": 1.041525051684127e-05, "loss": 0.6572, "step": 6307 }, { "epoch": 0.4773183004804964, "grad_norm": 1.7691537141799927, "learning_rate": 1.0413026275660887e-05, "loss": 0.7839, "step": 6308 }, { "epoch": 0.47739396920283, "grad_norm": 2.0674221515655518, "learning_rate": 1.0410801945992233e-05, "loss": 0.6806, "step": 6309 }, { "epoch": 0.4774696379251636, "grad_norm": 2.6139473915100098, "learning_rate": 1.0408577527974595e-05, "loss": 0.7133, "step": 6310 }, { "epoch": 0.4775453066474973, "grad_norm": 2.3850035667419434, "learning_rate": 1.0406353021747277e-05, "loss": 0.6617, "step": 6311 }, { "epoch": 0.4776209753698309, "grad_norm": 2.5777697563171387, "learning_rate": 1.0404128427449584e-05, "loss": 0.8671, "step": 6312 }, { "epoch": 0.4776966440921645, "grad_norm": 2.2594873905181885, "learning_rate": 1.0401903745220831e-05, "loss": 0.7522, "step": 6313 }, { "epoch": 0.4777723128144981, "grad_norm": 2.265115737915039, "learning_rate": 1.0399678975200328e-05, "loss": 0.7659, "step": 6314 }, { "epoch": 0.4778479815368317, "grad_norm": 2.038884162902832, "learning_rate": 1.03974541175274e-05, "loss": 0.6763, "step": 6315 }, { "epoch": 0.4779236502591654, "grad_norm": 2.0255823135375977, "learning_rate": 1.0395229172341377e-05, "loss": 0.7657, "step": 6316 }, { "epoch": 0.477999318981499, "grad_norm": 1.9382271766662598, "learning_rate": 1.0393004139781586e-05, "loss": 0.5579, "step": 6317 }, { "epoch": 0.4780749877038326, "grad_norm": 2.1081786155700684, "learning_rate": 1.0390779019987379e-05, "loss": 0.7453, "step": 6318 }, { "epoch": 0.4781506564261662, "grad_norm": 1.9590938091278076, "learning_rate": 1.0388553813098082e-05, "loss": 0.7372, "step": 6319 }, { "epoch": 0.4782263251484999, "grad_norm": 1.6101315021514893, "learning_rate": 1.0386328519253061e-05, "loss": 0.8169, "step": 6320 }, { "epoch": 0.4783019938708335, "grad_norm": 3.1328699588775635, "learning_rate": 1.0384103138591659e-05, "loss": 0.7454, "step": 6321 }, { "epoch": 0.4783776625931671, "grad_norm": 2.7711310386657715, "learning_rate": 1.0381877671253245e-05, "loss": 0.5887, "step": 6322 }, { "epoch": 0.4784533313155007, "grad_norm": 2.25168514251709, "learning_rate": 1.037965211737718e-05, "loss": 0.6437, "step": 6323 }, { "epoch": 0.4785290000378344, "grad_norm": 3.660417318344116, "learning_rate": 1.0377426477102837e-05, "loss": 0.7606, "step": 6324 }, { "epoch": 0.478604668760168, "grad_norm": 2.1805953979492188, "learning_rate": 1.0375200750569595e-05, "loss": 0.651, "step": 6325 }, { "epoch": 0.4786803374825016, "grad_norm": 2.296247720718384, "learning_rate": 1.037297493791683e-05, "loss": 0.7059, "step": 6326 }, { "epoch": 0.4787560062048352, "grad_norm": 2.1568257808685303, "learning_rate": 1.037074903928394e-05, "loss": 0.7095, "step": 6327 }, { "epoch": 0.4788316749271689, "grad_norm": 2.124258041381836, "learning_rate": 1.0368523054810308e-05, "loss": 0.775, "step": 6328 }, { "epoch": 0.4789073436495025, "grad_norm": 2.2516794204711914, "learning_rate": 1.0366296984635335e-05, "loss": 0.8205, "step": 6329 }, { "epoch": 0.4789830123718361, "grad_norm": 2.5653510093688965, "learning_rate": 1.0364070828898425e-05, "loss": 0.6034, "step": 6330 }, { "epoch": 0.4790586810941697, "grad_norm": 2.1148502826690674, "learning_rate": 1.0361844587738991e-05, "loss": 0.6733, "step": 6331 }, { "epoch": 0.4791343498165033, "grad_norm": 2.0348715782165527, "learning_rate": 1.0359618261296443e-05, "loss": 0.6521, "step": 6332 }, { "epoch": 0.479210018538837, "grad_norm": 1.895645022392273, "learning_rate": 1.0357391849710202e-05, "loss": 0.7377, "step": 6333 }, { "epoch": 0.4792856872611706, "grad_norm": 2.0529367923736572, "learning_rate": 1.0355165353119692e-05, "loss": 0.7686, "step": 6334 }, { "epoch": 0.4793613559835042, "grad_norm": 2.688235282897949, "learning_rate": 1.0352938771664346e-05, "loss": 0.7611, "step": 6335 }, { "epoch": 0.4794370247058378, "grad_norm": 2.2325501441955566, "learning_rate": 1.0350712105483598e-05, "loss": 0.7466, "step": 6336 }, { "epoch": 0.4795126934281715, "grad_norm": 2.4246463775634766, "learning_rate": 1.0348485354716888e-05, "loss": 0.738, "step": 6337 }, { "epoch": 0.4795883621505051, "grad_norm": 2.519249439239502, "learning_rate": 1.0346258519503663e-05, "loss": 0.6102, "step": 6338 }, { "epoch": 0.4796640308728387, "grad_norm": 2.213670253753662, "learning_rate": 1.0344031599983377e-05, "loss": 0.72, "step": 6339 }, { "epoch": 0.4797396995951723, "grad_norm": 2.312347173690796, "learning_rate": 1.0341804596295483e-05, "loss": 0.7855, "step": 6340 }, { "epoch": 0.479815368317506, "grad_norm": 2.551732063293457, "learning_rate": 1.033957750857945e-05, "loss": 0.6129, "step": 6341 }, { "epoch": 0.4798910370398396, "grad_norm": 2.086526870727539, "learning_rate": 1.0337350336974735e-05, "loss": 0.8321, "step": 6342 }, { "epoch": 0.4799667057621732, "grad_norm": 1.661421298980713, "learning_rate": 1.033512308162082e-05, "loss": 0.6088, "step": 6343 }, { "epoch": 0.4800423744845068, "grad_norm": 1.759372591972351, "learning_rate": 1.0332895742657175e-05, "loss": 0.7405, "step": 6344 }, { "epoch": 0.48011804320684043, "grad_norm": 1.9246104955673218, "learning_rate": 1.0330668320223293e-05, "loss": 0.6529, "step": 6345 }, { "epoch": 0.4801937119291741, "grad_norm": 2.265521764755249, "learning_rate": 1.0328440814458652e-05, "loss": 0.6742, "step": 6346 }, { "epoch": 0.4802693806515077, "grad_norm": 2.2895123958587646, "learning_rate": 1.0326213225502754e-05, "loss": 0.7062, "step": 6347 }, { "epoch": 0.4803450493738413, "grad_norm": 2.0983073711395264, "learning_rate": 1.0323985553495094e-05, "loss": 0.8804, "step": 6348 }, { "epoch": 0.4804207180961749, "grad_norm": 2.0943613052368164, "learning_rate": 1.0321757798575176e-05, "loss": 0.7967, "step": 6349 }, { "epoch": 0.4804963868185086, "grad_norm": 1.9753507375717163, "learning_rate": 1.0319529960882508e-05, "loss": 0.7633, "step": 6350 }, { "epoch": 0.4805720555408422, "grad_norm": 1.9423227310180664, "learning_rate": 1.0317302040556607e-05, "loss": 0.9147, "step": 6351 }, { "epoch": 0.4806477242631758, "grad_norm": 2.10290265083313, "learning_rate": 1.0315074037736991e-05, "loss": 0.6627, "step": 6352 }, { "epoch": 0.4807233929855094, "grad_norm": 2.183432102203369, "learning_rate": 1.0312845952563187e-05, "loss": 0.7181, "step": 6353 }, { "epoch": 0.4807990617078431, "grad_norm": 1.9517886638641357, "learning_rate": 1.0310617785174721e-05, "loss": 0.6473, "step": 6354 }, { "epoch": 0.4808747304301767, "grad_norm": 1.996443510055542, "learning_rate": 1.0308389535711133e-05, "loss": 0.5921, "step": 6355 }, { "epoch": 0.4809503991525103, "grad_norm": 1.8111634254455566, "learning_rate": 1.0306161204311958e-05, "loss": 0.5869, "step": 6356 }, { "epoch": 0.4810260678748439, "grad_norm": 2.648256301879883, "learning_rate": 1.0303932791116744e-05, "loss": 0.768, "step": 6357 }, { "epoch": 0.48110173659717753, "grad_norm": 2.27350115776062, "learning_rate": 1.0301704296265043e-05, "loss": 0.6906, "step": 6358 }, { "epoch": 0.4811774053195112, "grad_norm": 1.7875982522964478, "learning_rate": 1.0299475719896409e-05, "loss": 0.6928, "step": 6359 }, { "epoch": 0.4812530740418448, "grad_norm": 2.582258939743042, "learning_rate": 1.0297247062150398e-05, "loss": 0.588, "step": 6360 }, { "epoch": 0.4813287427641784, "grad_norm": 2.343061685562134, "learning_rate": 1.0295018323166583e-05, "loss": 0.8526, "step": 6361 }, { "epoch": 0.48140441148651203, "grad_norm": 2.1170144081115723, "learning_rate": 1.0292789503084532e-05, "loss": 0.7459, "step": 6362 }, { "epoch": 0.4814800802088457, "grad_norm": 2.0151526927948, "learning_rate": 1.029056060204382e-05, "loss": 0.7721, "step": 6363 }, { "epoch": 0.4815557489311793, "grad_norm": 1.9160104990005493, "learning_rate": 1.0288331620184032e-05, "loss": 0.6751, "step": 6364 }, { "epoch": 0.4816314176535129, "grad_norm": 2.3645200729370117, "learning_rate": 1.0286102557644746e-05, "loss": 0.6832, "step": 6365 }, { "epoch": 0.4817070863758465, "grad_norm": 1.6131598949432373, "learning_rate": 1.0283873414565564e-05, "loss": 0.6133, "step": 6366 }, { "epoch": 0.4817827550981802, "grad_norm": 2.991684913635254, "learning_rate": 1.0281644191086073e-05, "loss": 0.6093, "step": 6367 }, { "epoch": 0.4818584238205138, "grad_norm": 2.0066182613372803, "learning_rate": 1.0279414887345876e-05, "loss": 0.7508, "step": 6368 }, { "epoch": 0.4819340925428474, "grad_norm": 2.1001648902893066, "learning_rate": 1.0277185503484583e-05, "loss": 0.8123, "step": 6369 }, { "epoch": 0.482009761265181, "grad_norm": 2.06756591796875, "learning_rate": 1.0274956039641801e-05, "loss": 0.6822, "step": 6370 }, { "epoch": 0.48208542998751464, "grad_norm": 2.315427541732788, "learning_rate": 1.027272649595715e-05, "loss": 0.7408, "step": 6371 }, { "epoch": 0.4821610987098483, "grad_norm": 3.052438735961914, "learning_rate": 1.0270496872570249e-05, "loss": 0.6234, "step": 6372 }, { "epoch": 0.4822367674321819, "grad_norm": 1.8254213333129883, "learning_rate": 1.0268267169620725e-05, "loss": 0.7506, "step": 6373 }, { "epoch": 0.4823124361545155, "grad_norm": 2.1949892044067383, "learning_rate": 1.0266037387248206e-05, "loss": 0.7951, "step": 6374 }, { "epoch": 0.48238810487684913, "grad_norm": 2.8453803062438965, "learning_rate": 1.0263807525592332e-05, "loss": 0.8468, "step": 6375 }, { "epoch": 0.4824637735991828, "grad_norm": 2.467980146408081, "learning_rate": 1.0261577584792743e-05, "loss": 0.7182, "step": 6376 }, { "epoch": 0.4825394423215164, "grad_norm": 2.0588455200195312, "learning_rate": 1.0259347564989087e-05, "loss": 0.7689, "step": 6377 }, { "epoch": 0.48261511104385, "grad_norm": 2.141855001449585, "learning_rate": 1.0257117466321015e-05, "loss": 0.8404, "step": 6378 }, { "epoch": 0.48269077976618363, "grad_norm": 1.7582626342773438, "learning_rate": 1.0254887288928176e-05, "loss": 0.6482, "step": 6379 }, { "epoch": 0.4827664484885173, "grad_norm": 2.70973539352417, "learning_rate": 1.0252657032950239e-05, "loss": 0.7863, "step": 6380 }, { "epoch": 0.4828421172108509, "grad_norm": 2.2643096446990967, "learning_rate": 1.0250426698526867e-05, "loss": 0.6601, "step": 6381 }, { "epoch": 0.4829177859331845, "grad_norm": 2.705983877182007, "learning_rate": 1.0248196285797733e-05, "loss": 0.6439, "step": 6382 }, { "epoch": 0.48299345465551813, "grad_norm": 1.8606898784637451, "learning_rate": 1.0245965794902505e-05, "loss": 0.7542, "step": 6383 }, { "epoch": 0.48306912337785174, "grad_norm": 2.3897595405578613, "learning_rate": 1.0243735225980873e-05, "loss": 0.6444, "step": 6384 }, { "epoch": 0.4831447921001854, "grad_norm": 3.5943784713745117, "learning_rate": 1.0241504579172518e-05, "loss": 0.6732, "step": 6385 }, { "epoch": 0.483220460822519, "grad_norm": 2.2588038444519043, "learning_rate": 1.023927385461713e-05, "loss": 0.635, "step": 6386 }, { "epoch": 0.4832961295448526, "grad_norm": 1.9176634550094604, "learning_rate": 1.0237043052454404e-05, "loss": 0.7008, "step": 6387 }, { "epoch": 0.48337179826718624, "grad_norm": 2.0969033241271973, "learning_rate": 1.023481217282404e-05, "loss": 0.8144, "step": 6388 }, { "epoch": 0.4834474669895199, "grad_norm": 2.217078924179077, "learning_rate": 1.0232581215865748e-05, "loss": 0.6075, "step": 6389 }, { "epoch": 0.4835231357118535, "grad_norm": 2.35626220703125, "learning_rate": 1.0230350181719231e-05, "loss": 0.8421, "step": 6390 }, { "epoch": 0.4835988044341871, "grad_norm": 2.1566507816314697, "learning_rate": 1.0228119070524205e-05, "loss": 0.7397, "step": 6391 }, { "epoch": 0.48367447315652073, "grad_norm": 2.613382577896118, "learning_rate": 1.0225887882420394e-05, "loss": 0.7971, "step": 6392 }, { "epoch": 0.4837501418788544, "grad_norm": 6.792184829711914, "learning_rate": 1.0223656617547517e-05, "loss": 0.7269, "step": 6393 }, { "epoch": 0.483825810601188, "grad_norm": 2.764080047607422, "learning_rate": 1.0221425276045305e-05, "loss": 0.7354, "step": 6394 }, { "epoch": 0.4839014793235216, "grad_norm": 2.654021978378296, "learning_rate": 1.0219193858053493e-05, "loss": 0.6826, "step": 6395 }, { "epoch": 0.48397714804585523, "grad_norm": 2.122959613800049, "learning_rate": 1.0216962363711816e-05, "loss": 0.6705, "step": 6396 }, { "epoch": 0.48405281676818884, "grad_norm": 2.246718645095825, "learning_rate": 1.0214730793160018e-05, "loss": 0.6594, "step": 6397 }, { "epoch": 0.4841284854905225, "grad_norm": 2.107835054397583, "learning_rate": 1.0212499146537853e-05, "loss": 0.6163, "step": 6398 }, { "epoch": 0.4842041542128561, "grad_norm": 2.2131471633911133, "learning_rate": 1.0210267423985067e-05, "loss": 0.6586, "step": 6399 }, { "epoch": 0.48427982293518973, "grad_norm": 2.5556445121765137, "learning_rate": 1.0208035625641424e-05, "loss": 0.6997, "step": 6400 }, { "epoch": 0.48435549165752334, "grad_norm": 1.8932169675827026, "learning_rate": 1.020580375164668e-05, "loss": 0.8308, "step": 6401 }, { "epoch": 0.484431160379857, "grad_norm": 1.881028413772583, "learning_rate": 1.0203571802140605e-05, "loss": 0.6717, "step": 6402 }, { "epoch": 0.4845068291021906, "grad_norm": 2.3895459175109863, "learning_rate": 1.020133977726297e-05, "loss": 0.6854, "step": 6403 }, { "epoch": 0.4845824978245242, "grad_norm": 1.8925831317901611, "learning_rate": 1.0199107677153554e-05, "loss": 0.6973, "step": 6404 }, { "epoch": 0.48465816654685784, "grad_norm": 2.5870819091796875, "learning_rate": 1.0196875501952137e-05, "loss": 0.6679, "step": 6405 }, { "epoch": 0.4847338352691915, "grad_norm": 2.1854963302612305, "learning_rate": 1.01946432517985e-05, "loss": 0.7334, "step": 6406 }, { "epoch": 0.4848095039915251, "grad_norm": 3.5086848735809326, "learning_rate": 1.0192410926832446e-05, "loss": 0.6914, "step": 6407 }, { "epoch": 0.4848851727138587, "grad_norm": 3.082146167755127, "learning_rate": 1.0190178527193761e-05, "loss": 0.7652, "step": 6408 }, { "epoch": 0.48496084143619234, "grad_norm": 2.8593039512634277, "learning_rate": 1.0187946053022247e-05, "loss": 0.6731, "step": 6409 }, { "epoch": 0.48503651015852595, "grad_norm": 2.5862269401550293, "learning_rate": 1.0185713504457709e-05, "loss": 0.6637, "step": 6410 }, { "epoch": 0.4851121788808596, "grad_norm": 1.955764889717102, "learning_rate": 1.0183480881639952e-05, "loss": 0.6526, "step": 6411 }, { "epoch": 0.4851878476031932, "grad_norm": 2.400613307952881, "learning_rate": 1.01812481847088e-05, "loss": 0.7565, "step": 6412 }, { "epoch": 0.48526351632552683, "grad_norm": 1.8675727844238281, "learning_rate": 1.0179015413804063e-05, "loss": 0.6738, "step": 6413 }, { "epoch": 0.48533918504786044, "grad_norm": 2.350315809249878, "learning_rate": 1.0176782569065568e-05, "loss": 0.7441, "step": 6414 }, { "epoch": 0.4854148537701941, "grad_norm": 2.3835151195526123, "learning_rate": 1.0174549650633142e-05, "loss": 0.6982, "step": 6415 }, { "epoch": 0.4854905224925277, "grad_norm": 2.2682459354400635, "learning_rate": 1.0172316658646619e-05, "loss": 0.6537, "step": 6416 }, { "epoch": 0.48556619121486133, "grad_norm": 1.8722403049468994, "learning_rate": 1.0170083593245836e-05, "loss": 0.8612, "step": 6417 }, { "epoch": 0.48564185993719494, "grad_norm": 1.8039960861206055, "learning_rate": 1.0167850454570632e-05, "loss": 0.7195, "step": 6418 }, { "epoch": 0.4857175286595286, "grad_norm": 2.1905548572540283, "learning_rate": 1.0165617242760855e-05, "loss": 0.8805, "step": 6419 }, { "epoch": 0.4857931973818622, "grad_norm": 3.8410537242889404, "learning_rate": 1.0163383957956357e-05, "loss": 0.7325, "step": 6420 }, { "epoch": 0.48586886610419583, "grad_norm": 1.9922008514404297, "learning_rate": 1.0161150600296993e-05, "loss": 0.762, "step": 6421 }, { "epoch": 0.48594453482652944, "grad_norm": 2.191408157348633, "learning_rate": 1.0158917169922622e-05, "loss": 0.7489, "step": 6422 }, { "epoch": 0.48602020354886305, "grad_norm": 1.968904733657837, "learning_rate": 1.0156683666973112e-05, "loss": 0.6926, "step": 6423 }, { "epoch": 0.4860958722711967, "grad_norm": 2.3216192722320557, "learning_rate": 1.0154450091588326e-05, "loss": 0.7792, "step": 6424 }, { "epoch": 0.4861715409935303, "grad_norm": 1.8544453382492065, "learning_rate": 1.0152216443908144e-05, "loss": 0.6568, "step": 6425 }, { "epoch": 0.48624720971586394, "grad_norm": 2.2261478900909424, "learning_rate": 1.0149982724072439e-05, "loss": 0.7715, "step": 6426 }, { "epoch": 0.48632287843819755, "grad_norm": 2.093865394592285, "learning_rate": 1.0147748932221098e-05, "loss": 0.738, "step": 6427 }, { "epoch": 0.4863985471605312, "grad_norm": 2.6762888431549072, "learning_rate": 1.0145515068494007e-05, "loss": 0.7401, "step": 6428 }, { "epoch": 0.4864742158828648, "grad_norm": 1.9878803491592407, "learning_rate": 1.0143281133031056e-05, "loss": 0.6209, "step": 6429 }, { "epoch": 0.48654988460519843, "grad_norm": 2.1690900325775146, "learning_rate": 1.0141047125972145e-05, "loss": 0.7118, "step": 6430 }, { "epoch": 0.48662555332753205, "grad_norm": 2.016566038131714, "learning_rate": 1.013881304745717e-05, "loss": 0.6214, "step": 6431 }, { "epoch": 0.4867012220498657, "grad_norm": 1.7480603456497192, "learning_rate": 1.0136578897626037e-05, "loss": 0.7928, "step": 6432 }, { "epoch": 0.4867768907721993, "grad_norm": 2.419851064682007, "learning_rate": 1.013434467661866e-05, "loss": 0.7508, "step": 6433 }, { "epoch": 0.48685255949453293, "grad_norm": 3.0105323791503906, "learning_rate": 1.0132110384574949e-05, "loss": 0.7791, "step": 6434 }, { "epoch": 0.48692822821686654, "grad_norm": 1.9230643510818481, "learning_rate": 1.0129876021634826e-05, "loss": 0.7613, "step": 6435 }, { "epoch": 0.4870038969392002, "grad_norm": 2.396361827850342, "learning_rate": 1.0127641587938213e-05, "loss": 0.6539, "step": 6436 }, { "epoch": 0.4870795656615338, "grad_norm": 2.384631395339966, "learning_rate": 1.0125407083625034e-05, "loss": 0.661, "step": 6437 }, { "epoch": 0.48715523438386743, "grad_norm": 1.9804085493087769, "learning_rate": 1.0123172508835224e-05, "loss": 0.6685, "step": 6438 }, { "epoch": 0.48723090310620104, "grad_norm": 2.5004003047943115, "learning_rate": 1.0120937863708718e-05, "loss": 0.937, "step": 6439 }, { "epoch": 0.48730657182853465, "grad_norm": 1.8940950632095337, "learning_rate": 1.0118703148385458e-05, "loss": 0.7562, "step": 6440 }, { "epoch": 0.4873822405508683, "grad_norm": 1.8344001770019531, "learning_rate": 1.0116468363005388e-05, "loss": 0.6854, "step": 6441 }, { "epoch": 0.48745790927320193, "grad_norm": 2.383427858352661, "learning_rate": 1.011423350770846e-05, "loss": 0.7046, "step": 6442 }, { "epoch": 0.48753357799553554, "grad_norm": 2.256309747695923, "learning_rate": 1.0111998582634623e-05, "loss": 0.7245, "step": 6443 }, { "epoch": 0.48760924671786915, "grad_norm": 7.691187381744385, "learning_rate": 1.0109763587923842e-05, "loss": 0.7306, "step": 6444 }, { "epoch": 0.4876849154402028, "grad_norm": 2.102891206741333, "learning_rate": 1.0107528523716071e-05, "loss": 0.6399, "step": 6445 }, { "epoch": 0.4877605841625364, "grad_norm": 2.5367937088012695, "learning_rate": 1.0105293390151287e-05, "loss": 0.6951, "step": 6446 }, { "epoch": 0.48783625288487004, "grad_norm": 1.8842484951019287, "learning_rate": 1.0103058187369451e-05, "loss": 0.7347, "step": 6447 }, { "epoch": 0.48791192160720365, "grad_norm": 1.78765869140625, "learning_rate": 1.0100822915510547e-05, "loss": 0.6858, "step": 6448 }, { "epoch": 0.4879875903295373, "grad_norm": 1.82582688331604, "learning_rate": 1.0098587574714548e-05, "loss": 0.7601, "step": 6449 }, { "epoch": 0.4880632590518709, "grad_norm": 2.041457176208496, "learning_rate": 1.0096352165121444e-05, "loss": 0.6765, "step": 6450 }, { "epoch": 0.48813892777420453, "grad_norm": 2.2032086849212646, "learning_rate": 1.0094116686871222e-05, "loss": 0.6296, "step": 6451 }, { "epoch": 0.48821459649653814, "grad_norm": 2.4517290592193604, "learning_rate": 1.0091881140103873e-05, "loss": 0.8565, "step": 6452 }, { "epoch": 0.48829026521887176, "grad_norm": 2.434957504272461, "learning_rate": 1.0089645524959398e-05, "loss": 0.8896, "step": 6453 }, { "epoch": 0.4883659339412054, "grad_norm": 2.1253631114959717, "learning_rate": 1.0087409841577793e-05, "loss": 0.5887, "step": 6454 }, { "epoch": 0.48844160266353903, "grad_norm": 2.2699928283691406, "learning_rate": 1.0085174090099066e-05, "loss": 0.7519, "step": 6455 }, { "epoch": 0.48851727138587264, "grad_norm": 2.0873475074768066, "learning_rate": 1.008293827066323e-05, "loss": 0.6143, "step": 6456 }, { "epoch": 0.48859294010820625, "grad_norm": 2.778334140777588, "learning_rate": 1.0080702383410296e-05, "loss": 0.7002, "step": 6457 }, { "epoch": 0.4886686088305399, "grad_norm": 2.2571263313293457, "learning_rate": 1.0078466428480285e-05, "loss": 0.6735, "step": 6458 }, { "epoch": 0.48874427755287353, "grad_norm": 2.0266454219818115, "learning_rate": 1.0076230406013216e-05, "loss": 0.782, "step": 6459 }, { "epoch": 0.48881994627520714, "grad_norm": 1.8740894794464111, "learning_rate": 1.0073994316149117e-05, "loss": 0.6624, "step": 6460 }, { "epoch": 0.48889561499754075, "grad_norm": 2.3470191955566406, "learning_rate": 1.0071758159028023e-05, "loss": 0.6661, "step": 6461 }, { "epoch": 0.4889712837198744, "grad_norm": 2.5701887607574463, "learning_rate": 1.0069521934789965e-05, "loss": 0.6928, "step": 6462 }, { "epoch": 0.489046952442208, "grad_norm": 1.8447167873382568, "learning_rate": 1.0067285643574983e-05, "loss": 0.5812, "step": 6463 }, { "epoch": 0.48912262116454164, "grad_norm": 2.286895751953125, "learning_rate": 1.0065049285523126e-05, "loss": 0.751, "step": 6464 }, { "epoch": 0.48919828988687525, "grad_norm": 1.9406249523162842, "learning_rate": 1.0062812860774435e-05, "loss": 0.7846, "step": 6465 }, { "epoch": 0.48927395860920886, "grad_norm": 2.343668222427368, "learning_rate": 1.0060576369468964e-05, "loss": 0.6193, "step": 6466 }, { "epoch": 0.4893496273315425, "grad_norm": 2.254185199737549, "learning_rate": 1.0058339811746774e-05, "loss": 0.7514, "step": 6467 }, { "epoch": 0.48942529605387614, "grad_norm": 1.851659893989563, "learning_rate": 1.0056103187747916e-05, "loss": 0.6014, "step": 6468 }, { "epoch": 0.48950096477620975, "grad_norm": 1.9072391986846924, "learning_rate": 1.0053866497612465e-05, "loss": 0.6593, "step": 6469 }, { "epoch": 0.48957663349854336, "grad_norm": 2.4362573623657227, "learning_rate": 1.0051629741480483e-05, "loss": 0.6269, "step": 6470 }, { "epoch": 0.489652302220877, "grad_norm": 2.2436280250549316, "learning_rate": 1.004939291949205e-05, "loss": 0.6636, "step": 6471 }, { "epoch": 0.48972797094321063, "grad_norm": 1.9060099124908447, "learning_rate": 1.0047156031787233e-05, "loss": 0.7561, "step": 6472 }, { "epoch": 0.48980363966554424, "grad_norm": 2.352036714553833, "learning_rate": 1.0044919078506122e-05, "loss": 0.6255, "step": 6473 }, { "epoch": 0.48987930838787785, "grad_norm": 1.977865219116211, "learning_rate": 1.0042682059788798e-05, "loss": 0.6376, "step": 6474 }, { "epoch": 0.4899549771102115, "grad_norm": 3.1520273685455322, "learning_rate": 1.0040444975775348e-05, "loss": 0.8148, "step": 6475 }, { "epoch": 0.49003064583254513, "grad_norm": 1.8340657949447632, "learning_rate": 1.0038207826605871e-05, "loss": 0.5468, "step": 6476 }, { "epoch": 0.49010631455487874, "grad_norm": 2.7415192127227783, "learning_rate": 1.003597061242046e-05, "loss": 0.7775, "step": 6477 }, { "epoch": 0.49018198327721235, "grad_norm": 2.266130208969116, "learning_rate": 1.003373333335922e-05, "loss": 0.7789, "step": 6478 }, { "epoch": 0.49025765199954596, "grad_norm": 2.030848503112793, "learning_rate": 1.0031495989562255e-05, "loss": 0.709, "step": 6479 }, { "epoch": 0.49033332072187963, "grad_norm": 1.9253787994384766, "learning_rate": 1.0029258581169675e-05, "loss": 0.8155, "step": 6480 }, { "epoch": 0.49040898944421324, "grad_norm": 2.235677480697632, "learning_rate": 1.0027021108321597e-05, "loss": 0.6628, "step": 6481 }, { "epoch": 0.49048465816654685, "grad_norm": 2.522493600845337, "learning_rate": 1.002478357115813e-05, "loss": 0.6663, "step": 6482 }, { "epoch": 0.49056032688888046, "grad_norm": 2.5765788555145264, "learning_rate": 1.0022545969819403e-05, "loss": 0.6466, "step": 6483 }, { "epoch": 0.4906359956112141, "grad_norm": 2.641915798187256, "learning_rate": 1.0020308304445539e-05, "loss": 0.777, "step": 6484 }, { "epoch": 0.49071166433354774, "grad_norm": 4.392044544219971, "learning_rate": 1.0018070575176672e-05, "loss": 0.6881, "step": 6485 }, { "epoch": 0.49078733305588135, "grad_norm": 1.90240478515625, "learning_rate": 1.0015832782152928e-05, "loss": 0.7021, "step": 6486 }, { "epoch": 0.49086300177821496, "grad_norm": 1.9071075916290283, "learning_rate": 1.0013594925514453e-05, "loss": 0.6333, "step": 6487 }, { "epoch": 0.4909386705005486, "grad_norm": 1.997644066810608, "learning_rate": 1.0011357005401386e-05, "loss": 0.6004, "step": 6488 }, { "epoch": 0.49101433922288223, "grad_norm": 2.4626305103302, "learning_rate": 1.000911902195387e-05, "loss": 0.7033, "step": 6489 }, { "epoch": 0.49109000794521585, "grad_norm": 1.9787263870239258, "learning_rate": 1.0006880975312061e-05, "loss": 0.6778, "step": 6490 }, { "epoch": 0.49116567666754946, "grad_norm": 2.7848620414733887, "learning_rate": 1.0004642865616104e-05, "loss": 0.5979, "step": 6491 }, { "epoch": 0.49124134538988307, "grad_norm": 2.4936132431030273, "learning_rate": 1.0002404693006164e-05, "loss": 0.7636, "step": 6492 }, { "epoch": 0.49131701411221673, "grad_norm": 1.9078294038772583, "learning_rate": 1.0000166457622396e-05, "loss": 0.6217, "step": 6493 }, { "epoch": 0.49139268283455034, "grad_norm": 2.200657844543457, "learning_rate": 9.997928159604974e-06, "loss": 0.698, "step": 6494 }, { "epoch": 0.49146835155688395, "grad_norm": 2.0636913776397705, "learning_rate": 9.99568979909406e-06, "loss": 0.7468, "step": 6495 }, { "epoch": 0.49154402027921756, "grad_norm": 2.1066038608551025, "learning_rate": 9.993451376229832e-06, "loss": 0.5998, "step": 6496 }, { "epoch": 0.49161968900155123, "grad_norm": 2.909907579421997, "learning_rate": 9.991212891152469e-06, "loss": 0.7663, "step": 6497 }, { "epoch": 0.49169535772388484, "grad_norm": 2.411532402038574, "learning_rate": 9.988974344002143e-06, "loss": 0.727, "step": 6498 }, { "epoch": 0.49177102644621845, "grad_norm": 2.1404001712799072, "learning_rate": 9.986735734919048e-06, "loss": 0.7662, "step": 6499 }, { "epoch": 0.49184669516855206, "grad_norm": 1.9575718641281128, "learning_rate": 9.984497064043367e-06, "loss": 0.7276, "step": 6500 }, { "epoch": 0.4919223638908857, "grad_norm": 2.085799217224121, "learning_rate": 9.982258331515298e-06, "loss": 0.8749, "step": 6501 }, { "epoch": 0.49199803261321934, "grad_norm": 2.514505624771118, "learning_rate": 9.980019537475034e-06, "loss": 0.7701, "step": 6502 }, { "epoch": 0.49207370133555295, "grad_norm": 2.4015519618988037, "learning_rate": 9.977780682062779e-06, "loss": 0.7562, "step": 6503 }, { "epoch": 0.49214937005788656, "grad_norm": 2.25130033493042, "learning_rate": 9.975541765418734e-06, "loss": 0.9941, "step": 6504 }, { "epoch": 0.49222503878022017, "grad_norm": 2.4228641986846924, "learning_rate": 9.973302787683106e-06, "loss": 0.7318, "step": 6505 }, { "epoch": 0.49230070750255384, "grad_norm": 2.58864688873291, "learning_rate": 9.971063748996113e-06, "loss": 0.6254, "step": 6506 }, { "epoch": 0.49237637622488745, "grad_norm": 2.389697790145874, "learning_rate": 9.968824649497963e-06, "loss": 0.6684, "step": 6507 }, { "epoch": 0.49245204494722106, "grad_norm": 2.1879706382751465, "learning_rate": 9.966585489328885e-06, "loss": 0.7188, "step": 6508 }, { "epoch": 0.49252771366955467, "grad_norm": 2.2431092262268066, "learning_rate": 9.964346268629092e-06, "loss": 0.727, "step": 6509 }, { "epoch": 0.49260338239188833, "grad_norm": 2.4414150714874268, "learning_rate": 9.962106987538822e-06, "loss": 0.9228, "step": 6510 }, { "epoch": 0.49267905111422194, "grad_norm": 2.145207166671753, "learning_rate": 9.959867646198299e-06, "loss": 0.611, "step": 6511 }, { "epoch": 0.49275471983655555, "grad_norm": 2.1104111671447754, "learning_rate": 9.957628244747755e-06, "loss": 0.8066, "step": 6512 }, { "epoch": 0.49283038855888917, "grad_norm": 2.5595433712005615, "learning_rate": 9.95538878332744e-06, "loss": 0.6367, "step": 6513 }, { "epoch": 0.49290605728122283, "grad_norm": 2.492157459259033, "learning_rate": 9.953149262077583e-06, "loss": 0.811, "step": 6514 }, { "epoch": 0.49298172600355644, "grad_norm": 2.075108528137207, "learning_rate": 9.95090968113844e-06, "loss": 0.6916, "step": 6515 }, { "epoch": 0.49305739472589005, "grad_norm": 3.124265432357788, "learning_rate": 9.948670040650253e-06, "loss": 0.6464, "step": 6516 }, { "epoch": 0.49313306344822366, "grad_norm": 3.6622323989868164, "learning_rate": 9.946430340753285e-06, "loss": 0.5503, "step": 6517 }, { "epoch": 0.4932087321705573, "grad_norm": 2.151686191558838, "learning_rate": 9.944190581587787e-06, "loss": 0.697, "step": 6518 }, { "epoch": 0.49328440089289094, "grad_norm": 1.9287328720092773, "learning_rate": 9.941950763294019e-06, "loss": 0.9503, "step": 6519 }, { "epoch": 0.49336006961522455, "grad_norm": 2.108152389526367, "learning_rate": 9.93971088601225e-06, "loss": 0.6141, "step": 6520 }, { "epoch": 0.49343573833755816, "grad_norm": 1.9373310804367065, "learning_rate": 9.937470949882741e-06, "loss": 0.8837, "step": 6521 }, { "epoch": 0.49351140705989177, "grad_norm": 2.2187082767486572, "learning_rate": 9.935230955045775e-06, "loss": 0.7105, "step": 6522 }, { "epoch": 0.49358707578222544, "grad_norm": 2.0548200607299805, "learning_rate": 9.932990901641616e-06, "loss": 0.7974, "step": 6523 }, { "epoch": 0.49366274450455905, "grad_norm": 2.771439790725708, "learning_rate": 9.930750789810554e-06, "loss": 0.7277, "step": 6524 }, { "epoch": 0.49373841322689266, "grad_norm": 2.260519504547119, "learning_rate": 9.928510619692862e-06, "loss": 0.7294, "step": 6525 }, { "epoch": 0.49381408194922627, "grad_norm": 2.2592484951019287, "learning_rate": 9.92627039142884e-06, "loss": 0.6168, "step": 6526 }, { "epoch": 0.49388975067155994, "grad_norm": 2.256322145462036, "learning_rate": 9.924030105158762e-06, "loss": 0.7252, "step": 6527 }, { "epoch": 0.49396541939389355, "grad_norm": 2.2179460525512695, "learning_rate": 9.921789761022933e-06, "loss": 0.715, "step": 6528 }, { "epoch": 0.49404108811622716, "grad_norm": 2.221290111541748, "learning_rate": 9.919549359161649e-06, "loss": 0.7855, "step": 6529 }, { "epoch": 0.49411675683856077, "grad_norm": 2.497633218765259, "learning_rate": 9.917308899715208e-06, "loss": 0.6075, "step": 6530 }, { "epoch": 0.4941924255608944, "grad_norm": 2.1124684810638428, "learning_rate": 9.915068382823918e-06, "loss": 0.7536, "step": 6531 }, { "epoch": 0.49426809428322804, "grad_norm": 1.9984757900238037, "learning_rate": 9.912827808628085e-06, "loss": 0.5954, "step": 6532 }, { "epoch": 0.49434376300556165, "grad_norm": 2.670492172241211, "learning_rate": 9.910587177268025e-06, "loss": 0.7755, "step": 6533 }, { "epoch": 0.49441943172789526, "grad_norm": 1.9507744312286377, "learning_rate": 9.908346488884048e-06, "loss": 0.6622, "step": 6534 }, { "epoch": 0.4944951004502289, "grad_norm": 2.2064528465270996, "learning_rate": 9.906105743616476e-06, "loss": 0.7481, "step": 6535 }, { "epoch": 0.49457076917256254, "grad_norm": 2.032932758331299, "learning_rate": 9.903864941605631e-06, "loss": 0.6665, "step": 6536 }, { "epoch": 0.49464643789489615, "grad_norm": 1.6875627040863037, "learning_rate": 9.901624082991842e-06, "loss": 0.6829, "step": 6537 }, { "epoch": 0.49472210661722976, "grad_norm": 2.333106517791748, "learning_rate": 9.899383167915438e-06, "loss": 0.7117, "step": 6538 }, { "epoch": 0.4947977753395634, "grad_norm": 2.2066781520843506, "learning_rate": 9.897142196516745e-06, "loss": 0.628, "step": 6539 }, { "epoch": 0.49487344406189704, "grad_norm": 1.9764480590820312, "learning_rate": 9.894901168936112e-06, "loss": 0.7689, "step": 6540 }, { "epoch": 0.49494911278423065, "grad_norm": 1.911521077156067, "learning_rate": 9.892660085313872e-06, "loss": 0.6511, "step": 6541 }, { "epoch": 0.49502478150656426, "grad_norm": 2.4168167114257812, "learning_rate": 9.890418945790369e-06, "loss": 0.6539, "step": 6542 }, { "epoch": 0.49510045022889787, "grad_norm": 2.118840217590332, "learning_rate": 9.88817775050595e-06, "loss": 0.7646, "step": 6543 }, { "epoch": 0.4951761189512315, "grad_norm": 2.163649082183838, "learning_rate": 9.885936499600972e-06, "loss": 0.8885, "step": 6544 }, { "epoch": 0.49525178767356515, "grad_norm": 1.633584976196289, "learning_rate": 9.883695193215784e-06, "loss": 0.7617, "step": 6545 }, { "epoch": 0.49532745639589876, "grad_norm": 2.28901743888855, "learning_rate": 9.881453831490741e-06, "loss": 0.6508, "step": 6546 }, { "epoch": 0.49540312511823237, "grad_norm": 1.933939814567566, "learning_rate": 9.879212414566212e-06, "loss": 0.6829, "step": 6547 }, { "epoch": 0.495478793840566, "grad_norm": 2.058504104614258, "learning_rate": 9.876970942582555e-06, "loss": 0.7911, "step": 6548 }, { "epoch": 0.49555446256289964, "grad_norm": 4.5961127281188965, "learning_rate": 9.874729415680145e-06, "loss": 0.6346, "step": 6549 }, { "epoch": 0.49563013128523326, "grad_norm": 2.232285499572754, "learning_rate": 9.872487833999343e-06, "loss": 0.8858, "step": 6550 }, { "epoch": 0.49570580000756687, "grad_norm": 2.6463253498077393, "learning_rate": 9.870246197680539e-06, "loss": 0.7364, "step": 6551 }, { "epoch": 0.4957814687299005, "grad_norm": 2.0343997478485107, "learning_rate": 9.868004506864098e-06, "loss": 0.6202, "step": 6552 }, { "epoch": 0.49585713745223414, "grad_norm": 2.176643133163452, "learning_rate": 9.86576276169041e-06, "loss": 0.6113, "step": 6553 }, { "epoch": 0.49593280617456775, "grad_norm": 2.058581829071045, "learning_rate": 9.863520962299858e-06, "loss": 0.6814, "step": 6554 }, { "epoch": 0.49600847489690136, "grad_norm": 1.982015609741211, "learning_rate": 9.861279108832825e-06, "loss": 0.7254, "step": 6555 }, { "epoch": 0.496084143619235, "grad_norm": 2.0388500690460205, "learning_rate": 9.859037201429715e-06, "loss": 0.7784, "step": 6556 }, { "epoch": 0.49615981234156864, "grad_norm": 2.746868133544922, "learning_rate": 9.85679524023091e-06, "loss": 0.7083, "step": 6557 }, { "epoch": 0.49623548106390225, "grad_norm": 1.9284957647323608, "learning_rate": 9.854553225376823e-06, "loss": 0.6415, "step": 6558 }, { "epoch": 0.49631114978623586, "grad_norm": 2.158855438232422, "learning_rate": 9.852311157007845e-06, "loss": 0.4699, "step": 6559 }, { "epoch": 0.49638681850856947, "grad_norm": 1.9091185331344604, "learning_rate": 9.850069035264388e-06, "loss": 0.7226, "step": 6560 }, { "epoch": 0.4964624872309031, "grad_norm": 2.265294313430786, "learning_rate": 9.84782686028686e-06, "loss": 0.7521, "step": 6561 }, { "epoch": 0.49653815595323675, "grad_norm": 2.8000192642211914, "learning_rate": 9.845584632215667e-06, "loss": 0.8844, "step": 6562 }, { "epoch": 0.49661382467557036, "grad_norm": 2.02162504196167, "learning_rate": 9.843342351191232e-06, "loss": 0.7957, "step": 6563 }, { "epoch": 0.49668949339790397, "grad_norm": 2.197127103805542, "learning_rate": 9.841100017353972e-06, "loss": 0.791, "step": 6564 }, { "epoch": 0.4967651621202376, "grad_norm": 1.8492189645767212, "learning_rate": 9.838857630844305e-06, "loss": 0.7472, "step": 6565 }, { "epoch": 0.49684083084257125, "grad_norm": 2.151035785675049, "learning_rate": 9.836615191802663e-06, "loss": 0.5994, "step": 6566 }, { "epoch": 0.49691649956490486, "grad_norm": 2.232987642288208, "learning_rate": 9.834372700369472e-06, "loss": 0.8005, "step": 6567 }, { "epoch": 0.49699216828723847, "grad_norm": 1.6914528608322144, "learning_rate": 9.832130156685163e-06, "loss": 0.795, "step": 6568 }, { "epoch": 0.4970678370095721, "grad_norm": 2.227271318435669, "learning_rate": 9.829887560890171e-06, "loss": 0.7877, "step": 6569 }, { "epoch": 0.49714350573190574, "grad_norm": 2.173454523086548, "learning_rate": 9.827644913124937e-06, "loss": 0.7479, "step": 6570 }, { "epoch": 0.49721917445423935, "grad_norm": 2.1114470958709717, "learning_rate": 9.8254022135299e-06, "loss": 0.5879, "step": 6571 }, { "epoch": 0.49729484317657296, "grad_norm": 1.8386335372924805, "learning_rate": 9.82315946224551e-06, "loss": 0.572, "step": 6572 }, { "epoch": 0.4973705118989066, "grad_norm": 2.5144834518432617, "learning_rate": 9.820916659412208e-06, "loss": 0.6295, "step": 6573 }, { "epoch": 0.4974461806212402, "grad_norm": 2.596182107925415, "learning_rate": 9.818673805170454e-06, "loss": 0.7412, "step": 6574 }, { "epoch": 0.49752184934357385, "grad_norm": 1.809083342552185, "learning_rate": 9.816430899660695e-06, "loss": 0.7819, "step": 6575 }, { "epoch": 0.49759751806590746, "grad_norm": 1.6025587320327759, "learning_rate": 9.814187943023394e-06, "loss": 0.6106, "step": 6576 }, { "epoch": 0.4976731867882411, "grad_norm": 2.2268176078796387, "learning_rate": 9.811944935399011e-06, "loss": 0.7396, "step": 6577 }, { "epoch": 0.4977488555105747, "grad_norm": 2.883540391921997, "learning_rate": 9.809701876928007e-06, "loss": 0.7096, "step": 6578 }, { "epoch": 0.49782452423290835, "grad_norm": 2.127419948577881, "learning_rate": 9.807458767750857e-06, "loss": 0.5795, "step": 6579 }, { "epoch": 0.49790019295524196, "grad_norm": 2.2806951999664307, "learning_rate": 9.805215608008025e-06, "loss": 0.7534, "step": 6580 }, { "epoch": 0.49797586167757557, "grad_norm": 2.44822359085083, "learning_rate": 9.802972397839987e-06, "loss": 0.6425, "step": 6581 }, { "epoch": 0.4980515303999092, "grad_norm": 2.3223483562469482, "learning_rate": 9.800729137387221e-06, "loss": 0.6621, "step": 6582 }, { "epoch": 0.49812719912224285, "grad_norm": 2.271935224533081, "learning_rate": 9.798485826790205e-06, "loss": 0.6618, "step": 6583 }, { "epoch": 0.49820286784457646, "grad_norm": 1.8532313108444214, "learning_rate": 9.796242466189427e-06, "loss": 0.615, "step": 6584 }, { "epoch": 0.49827853656691007, "grad_norm": 1.9369428157806396, "learning_rate": 9.793999055725368e-06, "loss": 0.7089, "step": 6585 }, { "epoch": 0.4983542052892437, "grad_norm": 2.717416286468506, "learning_rate": 9.79175559553852e-06, "loss": 0.6442, "step": 6586 }, { "epoch": 0.4984298740115773, "grad_norm": 2.2628488540649414, "learning_rate": 9.789512085769375e-06, "loss": 0.6872, "step": 6587 }, { "epoch": 0.49850554273391096, "grad_norm": 2.6931004524230957, "learning_rate": 9.787268526558431e-06, "loss": 0.6916, "step": 6588 }, { "epoch": 0.49858121145624457, "grad_norm": 2.3270750045776367, "learning_rate": 9.785024918046185e-06, "loss": 0.7876, "step": 6589 }, { "epoch": 0.4986568801785782, "grad_norm": 2.268930673599243, "learning_rate": 9.782781260373143e-06, "loss": 0.6831, "step": 6590 }, { "epoch": 0.4987325489009118, "grad_norm": 3.6653621196746826, "learning_rate": 9.780537553679803e-06, "loss": 0.6437, "step": 6591 }, { "epoch": 0.49880821762324545, "grad_norm": 3.2328827381134033, "learning_rate": 9.778293798106676e-06, "loss": 0.668, "step": 6592 }, { "epoch": 0.49888388634557906, "grad_norm": 2.2856032848358154, "learning_rate": 9.776049993794277e-06, "loss": 0.8449, "step": 6593 }, { "epoch": 0.4989595550679127, "grad_norm": 1.9682530164718628, "learning_rate": 9.773806140883115e-06, "loss": 0.8408, "step": 6594 }, { "epoch": 0.4990352237902463, "grad_norm": 2.026069402694702, "learning_rate": 9.771562239513712e-06, "loss": 1.0183, "step": 6595 }, { "epoch": 0.49911089251257995, "grad_norm": 2.4245307445526123, "learning_rate": 9.769318289826581e-06, "loss": 0.666, "step": 6596 }, { "epoch": 0.49918656123491356, "grad_norm": 1.815333604812622, "learning_rate": 9.767074291962257e-06, "loss": 0.7764, "step": 6597 }, { "epoch": 0.49926222995724717, "grad_norm": 1.9312478303909302, "learning_rate": 9.764830246061256e-06, "loss": 0.6955, "step": 6598 }, { "epoch": 0.4993378986795808, "grad_norm": 2.2235007286071777, "learning_rate": 9.762586152264112e-06, "loss": 0.799, "step": 6599 }, { "epoch": 0.4994135674019144, "grad_norm": 2.057189464569092, "learning_rate": 9.760342010711359e-06, "loss": 0.8393, "step": 6600 }, { "epoch": 0.49948923612424806, "grad_norm": 2.18095326423645, "learning_rate": 9.758097821543523e-06, "loss": 0.5707, "step": 6601 }, { "epoch": 0.49956490484658167, "grad_norm": 2.4955899715423584, "learning_rate": 9.755853584901159e-06, "loss": 0.6642, "step": 6602 }, { "epoch": 0.4996405735689153, "grad_norm": 2.098681688308716, "learning_rate": 9.753609300924791e-06, "loss": 0.7294, "step": 6603 }, { "epoch": 0.4997162422912489, "grad_norm": 1.8592923879623413, "learning_rate": 9.751364969754975e-06, "loss": 0.79, "step": 6604 }, { "epoch": 0.49979191101358256, "grad_norm": 2.4341955184936523, "learning_rate": 9.749120591532253e-06, "loss": 0.7224, "step": 6605 }, { "epoch": 0.49986757973591617, "grad_norm": 2.533416748046875, "learning_rate": 9.746876166397175e-06, "loss": 0.6489, "step": 6606 }, { "epoch": 0.4999432484582498, "grad_norm": 1.9363288879394531, "learning_rate": 9.7446316944903e-06, "loss": 0.6737, "step": 6607 }, { "epoch": 0.5000189171805834, "grad_norm": 2.0590407848358154, "learning_rate": 9.742387175952178e-06, "loss": 0.6893, "step": 6608 }, { "epoch": 0.500094585902917, "grad_norm": 1.818794846534729, "learning_rate": 9.740142610923369e-06, "loss": 0.7371, "step": 6609 }, { "epoch": 0.5001702546252507, "grad_norm": 2.1174185276031494, "learning_rate": 9.737897999544437e-06, "loss": 0.7349, "step": 6610 }, { "epoch": 0.5002459233475843, "grad_norm": 2.133242607116699, "learning_rate": 9.735653341955944e-06, "loss": 0.7632, "step": 6611 }, { "epoch": 0.5003215920699179, "grad_norm": 2.435183525085449, "learning_rate": 9.73340863829846e-06, "loss": 0.612, "step": 6612 }, { "epoch": 0.5003972607922516, "grad_norm": 2.1215646266937256, "learning_rate": 9.731163888712557e-06, "loss": 0.6771, "step": 6613 }, { "epoch": 0.5004729295145851, "grad_norm": 1.9832267761230469, "learning_rate": 9.728919093338804e-06, "loss": 0.8126, "step": 6614 }, { "epoch": 0.5005485982369188, "grad_norm": 2.078450918197632, "learning_rate": 9.726674252317781e-06, "loss": 0.5838, "step": 6615 }, { "epoch": 0.5006242669592524, "grad_norm": 2.8075807094573975, "learning_rate": 9.724429365790064e-06, "loss": 0.5838, "step": 6616 }, { "epoch": 0.500699935681586, "grad_norm": 2.4533121585845947, "learning_rate": 9.722184433896237e-06, "loss": 0.712, "step": 6617 }, { "epoch": 0.5007756044039197, "grad_norm": 3.052152633666992, "learning_rate": 9.71993945677689e-06, "loss": 0.742, "step": 6618 }, { "epoch": 0.5008512731262532, "grad_norm": 2.4858107566833496, "learning_rate": 9.717694434572599e-06, "loss": 0.7089, "step": 6619 }, { "epoch": 0.5009269418485869, "grad_norm": 1.992408037185669, "learning_rate": 9.715449367423966e-06, "loss": 0.6742, "step": 6620 }, { "epoch": 0.5010026105709205, "grad_norm": 2.0677504539489746, "learning_rate": 9.713204255471577e-06, "loss": 0.5632, "step": 6621 }, { "epoch": 0.5010782792932541, "grad_norm": 2.122046947479248, "learning_rate": 9.71095909885603e-06, "loss": 0.7803, "step": 6622 }, { "epoch": 0.5011539480155878, "grad_norm": 1.7793095111846924, "learning_rate": 9.708713897717928e-06, "loss": 0.7774, "step": 6623 }, { "epoch": 0.5012296167379214, "grad_norm": 2.6335649490356445, "learning_rate": 9.706468652197866e-06, "loss": 0.7956, "step": 6624 }, { "epoch": 0.501305285460255, "grad_norm": 2.394765853881836, "learning_rate": 9.704223362436454e-06, "loss": 0.7354, "step": 6625 }, { "epoch": 0.5013809541825887, "grad_norm": 2.1570701599121094, "learning_rate": 9.701978028574298e-06, "loss": 0.6291, "step": 6626 }, { "epoch": 0.5014566229049222, "grad_norm": 2.289980411529541, "learning_rate": 9.699732650752005e-06, "loss": 0.7032, "step": 6627 }, { "epoch": 0.5015322916272559, "grad_norm": 2.2219419479370117, "learning_rate": 9.697487229110192e-06, "loss": 0.7864, "step": 6628 }, { "epoch": 0.5016079603495895, "grad_norm": 2.5143167972564697, "learning_rate": 9.695241763789474e-06, "loss": 0.6695, "step": 6629 }, { "epoch": 0.5016836290719231, "grad_norm": 2.949692726135254, "learning_rate": 9.692996254930464e-06, "loss": 0.733, "step": 6630 }, { "epoch": 0.5017592977942568, "grad_norm": 1.9106590747833252, "learning_rate": 9.690750702673792e-06, "loss": 0.7533, "step": 6631 }, { "epoch": 0.5018349665165903, "grad_norm": 2.054891586303711, "learning_rate": 9.688505107160074e-06, "loss": 0.7022, "step": 6632 }, { "epoch": 0.501910635238924, "grad_norm": 1.7698699235916138, "learning_rate": 9.686259468529938e-06, "loss": 0.6956, "step": 6633 }, { "epoch": 0.5019863039612577, "grad_norm": 2.303417205810547, "learning_rate": 9.684013786924014e-06, "loss": 0.6195, "step": 6634 }, { "epoch": 0.5020619726835912, "grad_norm": 1.9892022609710693, "learning_rate": 9.681768062482937e-06, "loss": 0.6891, "step": 6635 }, { "epoch": 0.5021376414059249, "grad_norm": 2.3008720874786377, "learning_rate": 9.679522295347342e-06, "loss": 0.6299, "step": 6636 }, { "epoch": 0.5022133101282585, "grad_norm": 2.5181405544281006, "learning_rate": 9.677276485657857e-06, "loss": 0.6741, "step": 6637 }, { "epoch": 0.5022889788505921, "grad_norm": 2.3965871334075928, "learning_rate": 9.67503063355513e-06, "loss": 0.758, "step": 6638 }, { "epoch": 0.5023646475729258, "grad_norm": 2.2800729274749756, "learning_rate": 9.672784739179801e-06, "loss": 0.7284, "step": 6639 }, { "epoch": 0.5024403162952593, "grad_norm": 2.018068790435791, "learning_rate": 9.670538802672516e-06, "loss": 0.5924, "step": 6640 }, { "epoch": 0.502515985017593, "grad_norm": 2.180234670639038, "learning_rate": 9.668292824173925e-06, "loss": 0.8005, "step": 6641 }, { "epoch": 0.5025916537399266, "grad_norm": 2.2746224403381348, "learning_rate": 9.666046803824671e-06, "loss": 0.7719, "step": 6642 }, { "epoch": 0.5026673224622602, "grad_norm": 3.1850080490112305, "learning_rate": 9.663800741765416e-06, "loss": 0.8144, "step": 6643 }, { "epoch": 0.5027429911845939, "grad_norm": 2.118246078491211, "learning_rate": 9.661554638136808e-06, "loss": 0.7937, "step": 6644 }, { "epoch": 0.5028186599069274, "grad_norm": 1.9210091829299927, "learning_rate": 9.659308493079511e-06, "loss": 0.5085, "step": 6645 }, { "epoch": 0.5028943286292611, "grad_norm": 2.1599907875061035, "learning_rate": 9.657062306734182e-06, "loss": 0.6544, "step": 6646 }, { "epoch": 0.5029699973515948, "grad_norm": 2.152679443359375, "learning_rate": 9.654816079241487e-06, "loss": 0.8208, "step": 6647 }, { "epoch": 0.5030456660739283, "grad_norm": 2.152647018432617, "learning_rate": 9.652569810742093e-06, "loss": 0.6365, "step": 6648 }, { "epoch": 0.503121334796262, "grad_norm": 2.0750904083251953, "learning_rate": 9.650323501376666e-06, "loss": 0.6757, "step": 6649 }, { "epoch": 0.5031970035185956, "grad_norm": 2.493428945541382, "learning_rate": 9.648077151285877e-06, "loss": 0.7954, "step": 6650 }, { "epoch": 0.5032726722409292, "grad_norm": 1.762840986251831, "learning_rate": 9.645830760610401e-06, "loss": 0.8119, "step": 6651 }, { "epoch": 0.5033483409632629, "grad_norm": 2.1899425983428955, "learning_rate": 9.643584329490914e-06, "loss": 0.6388, "step": 6652 }, { "epoch": 0.5034240096855964, "grad_norm": 2.0570414066314697, "learning_rate": 9.641337858068094e-06, "loss": 0.7231, "step": 6653 }, { "epoch": 0.5034996784079301, "grad_norm": 2.0795912742614746, "learning_rate": 9.639091346482624e-06, "loss": 0.7492, "step": 6654 }, { "epoch": 0.5035753471302638, "grad_norm": 2.199233293533325, "learning_rate": 9.636844794875187e-06, "loss": 0.6797, "step": 6655 }, { "epoch": 0.5036510158525973, "grad_norm": 2.3205556869506836, "learning_rate": 9.634598203386467e-06, "loss": 0.6839, "step": 6656 }, { "epoch": 0.503726684574931, "grad_norm": 2.707512855529785, "learning_rate": 9.632351572157156e-06, "loss": 0.8496, "step": 6657 }, { "epoch": 0.5038023532972645, "grad_norm": 1.746833086013794, "learning_rate": 9.630104901327944e-06, "loss": 0.8147, "step": 6658 }, { "epoch": 0.5038780220195982, "grad_norm": 1.9517803192138672, "learning_rate": 9.62785819103953e-06, "loss": 0.6498, "step": 6659 }, { "epoch": 0.5039536907419319, "grad_norm": 1.9872726202011108, "learning_rate": 9.625611441432598e-06, "loss": 0.6999, "step": 6660 }, { "epoch": 0.5040293594642654, "grad_norm": 2.2390949726104736, "learning_rate": 9.623364652647858e-06, "loss": 0.6743, "step": 6661 }, { "epoch": 0.5041050281865991, "grad_norm": 2.458838939666748, "learning_rate": 9.621117824826008e-06, "loss": 0.7239, "step": 6662 }, { "epoch": 0.5041806969089327, "grad_norm": 2.7684452533721924, "learning_rate": 9.618870958107747e-06, "loss": 0.67, "step": 6663 }, { "epoch": 0.5042563656312663, "grad_norm": 2.003108501434326, "learning_rate": 9.61662405263379e-06, "loss": 0.6692, "step": 6664 }, { "epoch": 0.5043320343536, "grad_norm": 1.8543363809585571, "learning_rate": 9.614377108544836e-06, "loss": 0.7839, "step": 6665 }, { "epoch": 0.5044077030759335, "grad_norm": 1.8645862340927124, "learning_rate": 9.612130125981603e-06, "loss": 0.6441, "step": 6666 }, { "epoch": 0.5044833717982672, "grad_norm": 2.3227145671844482, "learning_rate": 9.6098831050848e-06, "loss": 0.7593, "step": 6667 }, { "epoch": 0.5045590405206009, "grad_norm": 2.1270523071289062, "learning_rate": 9.607636045995145e-06, "loss": 0.7418, "step": 6668 }, { "epoch": 0.5046347092429344, "grad_norm": 2.077716827392578, "learning_rate": 9.605388948853355e-06, "loss": 0.7234, "step": 6669 }, { "epoch": 0.5047103779652681, "grad_norm": 1.9619203805923462, "learning_rate": 9.60314181380015e-06, "loss": 0.6765, "step": 6670 }, { "epoch": 0.5047860466876016, "grad_norm": 1.965520977973938, "learning_rate": 9.600894640976257e-06, "loss": 0.7541, "step": 6671 }, { "epoch": 0.5048617154099353, "grad_norm": 1.9246147871017456, "learning_rate": 9.598647430522397e-06, "loss": 0.7266, "step": 6672 }, { "epoch": 0.504937384132269, "grad_norm": 2.243699550628662, "learning_rate": 9.596400182579299e-06, "loss": 0.6776, "step": 6673 }, { "epoch": 0.5050130528546025, "grad_norm": 2.064436912536621, "learning_rate": 9.594152897287689e-06, "loss": 0.661, "step": 6674 }, { "epoch": 0.5050887215769362, "grad_norm": 2.3579468727111816, "learning_rate": 9.591905574788305e-06, "loss": 0.7555, "step": 6675 }, { "epoch": 0.5051643902992698, "grad_norm": 2.24985408782959, "learning_rate": 9.58965821522188e-06, "loss": 0.6199, "step": 6676 }, { "epoch": 0.5052400590216034, "grad_norm": 2.8891539573669434, "learning_rate": 9.587410818729151e-06, "loss": 0.7357, "step": 6677 }, { "epoch": 0.5053157277439371, "grad_norm": 1.8925280570983887, "learning_rate": 9.585163385450857e-06, "loss": 0.6271, "step": 6678 }, { "epoch": 0.5053913964662706, "grad_norm": 2.103595495223999, "learning_rate": 9.582915915527738e-06, "loss": 0.7584, "step": 6679 }, { "epoch": 0.5054670651886043, "grad_norm": 1.9762611389160156, "learning_rate": 9.580668409100539e-06, "loss": 0.6955, "step": 6680 }, { "epoch": 0.505542733910938, "grad_norm": 1.8071695566177368, "learning_rate": 9.578420866310004e-06, "loss": 0.7404, "step": 6681 }, { "epoch": 0.5056184026332715, "grad_norm": 1.5950270891189575, "learning_rate": 9.576173287296889e-06, "loss": 0.8587, "step": 6682 }, { "epoch": 0.5056940713556052, "grad_norm": 1.9475131034851074, "learning_rate": 9.573925672201932e-06, "loss": 0.7747, "step": 6683 }, { "epoch": 0.5057697400779387, "grad_norm": 1.9169731140136719, "learning_rate": 9.5716780211659e-06, "loss": 0.5432, "step": 6684 }, { "epoch": 0.5058454088002724, "grad_norm": 2.5060229301452637, "learning_rate": 9.569430334329538e-06, "loss": 0.6395, "step": 6685 }, { "epoch": 0.5059210775226061, "grad_norm": 1.836045503616333, "learning_rate": 9.567182611833605e-06, "loss": 0.7145, "step": 6686 }, { "epoch": 0.5059967462449396, "grad_norm": 1.950330138206482, "learning_rate": 9.564934853818867e-06, "loss": 0.6531, "step": 6687 }, { "epoch": 0.5060724149672733, "grad_norm": 2.326371192932129, "learning_rate": 9.562687060426075e-06, "loss": 0.7988, "step": 6688 }, { "epoch": 0.506148083689607, "grad_norm": 1.9932469129562378, "learning_rate": 9.560439231796005e-06, "loss": 0.6736, "step": 6689 }, { "epoch": 0.5062237524119405, "grad_norm": 1.9955806732177734, "learning_rate": 9.558191368069414e-06, "loss": 0.6104, "step": 6690 }, { "epoch": 0.5062994211342742, "grad_norm": 2.626847505569458, "learning_rate": 9.555943469387074e-06, "loss": 0.7226, "step": 6691 }, { "epoch": 0.5063750898566077, "grad_norm": 2.498826026916504, "learning_rate": 9.553695535889759e-06, "loss": 0.7776, "step": 6692 }, { "epoch": 0.5064507585789414, "grad_norm": 2.252244710922241, "learning_rate": 9.551447567718236e-06, "loss": 0.9246, "step": 6693 }, { "epoch": 0.5065264273012751, "grad_norm": 2.162478446960449, "learning_rate": 9.549199565013286e-06, "loss": 0.6745, "step": 6694 }, { "epoch": 0.5066020960236086, "grad_norm": 2.2146854400634766, "learning_rate": 9.546951527915681e-06, "loss": 0.834, "step": 6695 }, { "epoch": 0.5066777647459423, "grad_norm": 2.3544445037841797, "learning_rate": 9.5447034565662e-06, "loss": 0.8678, "step": 6696 }, { "epoch": 0.5067534334682758, "grad_norm": 1.949468970298767, "learning_rate": 9.54245535110563e-06, "loss": 0.6463, "step": 6697 }, { "epoch": 0.5068291021906095, "grad_norm": 2.0964138507843018, "learning_rate": 9.540207211674751e-06, "loss": 0.8263, "step": 6698 }, { "epoch": 0.5069047709129432, "grad_norm": 1.5557283163070679, "learning_rate": 9.537959038414345e-06, "loss": 0.6854, "step": 6699 }, { "epoch": 0.5069804396352767, "grad_norm": 1.7753989696502686, "learning_rate": 9.53571083146521e-06, "loss": 0.5893, "step": 6700 }, { "epoch": 0.5070561083576104, "grad_norm": 1.9944871664047241, "learning_rate": 9.533462590968125e-06, "loss": 0.7938, "step": 6701 }, { "epoch": 0.5071317770799441, "grad_norm": 2.079556465148926, "learning_rate": 9.531214317063891e-06, "loss": 0.7135, "step": 6702 }, { "epoch": 0.5072074458022776, "grad_norm": 2.094221591949463, "learning_rate": 9.528966009893297e-06, "loss": 0.8092, "step": 6703 }, { "epoch": 0.5072831145246113, "grad_norm": 2.0519769191741943, "learning_rate": 9.526717669597139e-06, "loss": 0.7803, "step": 6704 }, { "epoch": 0.5073587832469448, "grad_norm": 1.9500595331192017, "learning_rate": 9.52446929631622e-06, "loss": 0.6599, "step": 6705 }, { "epoch": 0.5074344519692785, "grad_norm": 2.6499404907226562, "learning_rate": 9.522220890191332e-06, "loss": 0.6721, "step": 6706 }, { "epoch": 0.5075101206916122, "grad_norm": 1.9413082599639893, "learning_rate": 9.51997245136329e-06, "loss": 0.6661, "step": 6707 }, { "epoch": 0.5075857894139457, "grad_norm": 1.8632111549377441, "learning_rate": 9.517723979972886e-06, "loss": 0.6466, "step": 6708 }, { "epoch": 0.5076614581362794, "grad_norm": 2.5629031658172607, "learning_rate": 9.515475476160934e-06, "loss": 0.7257, "step": 6709 }, { "epoch": 0.5077371268586129, "grad_norm": 2.516348361968994, "learning_rate": 9.513226940068241e-06, "loss": 0.8455, "step": 6710 }, { "epoch": 0.5078127955809466, "grad_norm": 2.427999973297119, "learning_rate": 9.510978371835613e-06, "loss": 0.7796, "step": 6711 }, { "epoch": 0.5078884643032803, "grad_norm": 2.137712001800537, "learning_rate": 9.508729771603872e-06, "loss": 0.7222, "step": 6712 }, { "epoch": 0.5079641330256138, "grad_norm": 2.6136555671691895, "learning_rate": 9.506481139513824e-06, "loss": 0.8969, "step": 6713 }, { "epoch": 0.5080398017479475, "grad_norm": 2.343489646911621, "learning_rate": 9.50423247570629e-06, "loss": 0.7039, "step": 6714 }, { "epoch": 0.5081154704702812, "grad_norm": 2.309749126434326, "learning_rate": 9.501983780322088e-06, "loss": 0.7666, "step": 6715 }, { "epoch": 0.5081911391926147, "grad_norm": 2.360842704772949, "learning_rate": 9.499735053502037e-06, "loss": 0.772, "step": 6716 }, { "epoch": 0.5082668079149484, "grad_norm": 1.918006181716919, "learning_rate": 9.497486295386962e-06, "loss": 0.7022, "step": 6717 }, { "epoch": 0.5083424766372819, "grad_norm": 2.2111973762512207, "learning_rate": 9.495237506117685e-06, "loss": 0.5871, "step": 6718 }, { "epoch": 0.5084181453596156, "grad_norm": 2.214198350906372, "learning_rate": 9.492988685835031e-06, "loss": 0.6968, "step": 6719 }, { "epoch": 0.5084938140819493, "grad_norm": 1.9260320663452148, "learning_rate": 9.490739834679834e-06, "loss": 0.8053, "step": 6720 }, { "epoch": 0.5085694828042828, "grad_norm": 2.5409858226776123, "learning_rate": 9.488490952792924e-06, "loss": 0.6891, "step": 6721 }, { "epoch": 0.5086451515266165, "grad_norm": 2.6639885902404785, "learning_rate": 9.486242040315125e-06, "loss": 0.6264, "step": 6722 }, { "epoch": 0.50872082024895, "grad_norm": 2.6699607372283936, "learning_rate": 9.48399309738728e-06, "loss": 0.7295, "step": 6723 }, { "epoch": 0.5087964889712837, "grad_norm": 1.702734351158142, "learning_rate": 9.481744124150222e-06, "loss": 0.705, "step": 6724 }, { "epoch": 0.5088721576936174, "grad_norm": 2.3843774795532227, "learning_rate": 9.479495120744786e-06, "loss": 0.7714, "step": 6725 }, { "epoch": 0.5089478264159509, "grad_norm": 2.0906484127044678, "learning_rate": 9.477246087311816e-06, "loss": 0.7696, "step": 6726 }, { "epoch": 0.5090234951382846, "grad_norm": 2.4674184322357178, "learning_rate": 9.474997023992152e-06, "loss": 0.7912, "step": 6727 }, { "epoch": 0.5090991638606183, "grad_norm": 1.9919297695159912, "learning_rate": 9.472747930926641e-06, "loss": 0.6566, "step": 6728 }, { "epoch": 0.5091748325829518, "grad_norm": 1.8796985149383545, "learning_rate": 9.470498808256121e-06, "loss": 0.6215, "step": 6729 }, { "epoch": 0.5092505013052855, "grad_norm": 2.5070700645446777, "learning_rate": 9.46824965612145e-06, "loss": 0.7519, "step": 6730 }, { "epoch": 0.509326170027619, "grad_norm": 3.2498910427093506, "learning_rate": 9.466000474663466e-06, "loss": 0.6886, "step": 6731 }, { "epoch": 0.5094018387499527, "grad_norm": 2.552865743637085, "learning_rate": 9.463751264023028e-06, "loss": 0.6306, "step": 6732 }, { "epoch": 0.5094775074722864, "grad_norm": 2.403729200363159, "learning_rate": 9.461502024340982e-06, "loss": 0.6939, "step": 6733 }, { "epoch": 0.5095531761946199, "grad_norm": 1.9017412662506104, "learning_rate": 9.45925275575819e-06, "loss": 0.8801, "step": 6734 }, { "epoch": 0.5096288449169536, "grad_norm": 2.5211994647979736, "learning_rate": 9.457003458415504e-06, "loss": 0.6766, "step": 6735 }, { "epoch": 0.5097045136392871, "grad_norm": 1.963262915611267, "learning_rate": 9.454754132453783e-06, "loss": 0.6982, "step": 6736 }, { "epoch": 0.5097801823616208, "grad_norm": 2.2367377281188965, "learning_rate": 9.452504778013888e-06, "loss": 0.807, "step": 6737 }, { "epoch": 0.5098558510839545, "grad_norm": 2.5869593620300293, "learning_rate": 9.450255395236678e-06, "loss": 0.7474, "step": 6738 }, { "epoch": 0.509931519806288, "grad_norm": 3.3216679096221924, "learning_rate": 9.448005984263022e-06, "loss": 0.581, "step": 6739 }, { "epoch": 0.5100071885286217, "grad_norm": 3.6230344772338867, "learning_rate": 9.44575654523378e-06, "loss": 0.7932, "step": 6740 }, { "epoch": 0.5100828572509554, "grad_norm": 2.0393731594085693, "learning_rate": 9.443507078289822e-06, "loss": 0.7265, "step": 6741 }, { "epoch": 0.5101585259732889, "grad_norm": 2.0096278190612793, "learning_rate": 9.441257583572017e-06, "loss": 0.7329, "step": 6742 }, { "epoch": 0.5102341946956226, "grad_norm": 2.389496088027954, "learning_rate": 9.439008061221235e-06, "loss": 0.752, "step": 6743 }, { "epoch": 0.5103098634179561, "grad_norm": 2.3090922832489014, "learning_rate": 9.436758511378348e-06, "loss": 0.7655, "step": 6744 }, { "epoch": 0.5103855321402898, "grad_norm": 1.957068920135498, "learning_rate": 9.434508934184228e-06, "loss": 0.6015, "step": 6745 }, { "epoch": 0.5104612008626235, "grad_norm": 2.256654739379883, "learning_rate": 9.432259329779758e-06, "loss": 0.7901, "step": 6746 }, { "epoch": 0.510536869584957, "grad_norm": 2.0302345752716064, "learning_rate": 9.430009698305804e-06, "loss": 0.8028, "step": 6747 }, { "epoch": 0.5106125383072907, "grad_norm": 2.3182132244110107, "learning_rate": 9.427760039903258e-06, "loss": 0.6554, "step": 6748 }, { "epoch": 0.5106882070296243, "grad_norm": 2.3157958984375, "learning_rate": 9.425510354712992e-06, "loss": 0.6733, "step": 6749 }, { "epoch": 0.5107638757519579, "grad_norm": 1.7436319589614868, "learning_rate": 9.423260642875892e-06, "loss": 0.7008, "step": 6750 }, { "epoch": 0.5108395444742916, "grad_norm": 2.456576108932495, "learning_rate": 9.421010904532843e-06, "loss": 0.5907, "step": 6751 }, { "epoch": 0.5109152131966251, "grad_norm": 2.08577561378479, "learning_rate": 9.418761139824726e-06, "loss": 0.6606, "step": 6752 }, { "epoch": 0.5109908819189588, "grad_norm": 2.5024051666259766, "learning_rate": 9.416511348892434e-06, "loss": 0.7737, "step": 6753 }, { "epoch": 0.5110665506412925, "grad_norm": 2.1575140953063965, "learning_rate": 9.414261531876855e-06, "loss": 0.7219, "step": 6754 }, { "epoch": 0.511142219363626, "grad_norm": 1.9586470127105713, "learning_rate": 9.412011688918878e-06, "loss": 0.8176, "step": 6755 }, { "epoch": 0.5112178880859597, "grad_norm": 2.153289556503296, "learning_rate": 9.409761820159396e-06, "loss": 0.8414, "step": 6756 }, { "epoch": 0.5112935568082932, "grad_norm": 2.560614585876465, "learning_rate": 9.407511925739306e-06, "loss": 0.6309, "step": 6757 }, { "epoch": 0.5113692255306269, "grad_norm": 2.1728456020355225, "learning_rate": 9.405262005799498e-06, "loss": 0.8317, "step": 6758 }, { "epoch": 0.5114448942529606, "grad_norm": 2.0352866649627686, "learning_rate": 9.403012060480872e-06, "loss": 0.6428, "step": 6759 }, { "epoch": 0.5115205629752941, "grad_norm": 2.638589859008789, "learning_rate": 9.400762089924329e-06, "loss": 0.7381, "step": 6760 }, { "epoch": 0.5115962316976278, "grad_norm": 2.571859836578369, "learning_rate": 9.398512094270768e-06, "loss": 0.7103, "step": 6761 }, { "epoch": 0.5116719004199614, "grad_norm": 1.9200884103775024, "learning_rate": 9.396262073661092e-06, "loss": 0.7302, "step": 6762 }, { "epoch": 0.511747569142295, "grad_norm": 2.4631552696228027, "learning_rate": 9.394012028236199e-06, "loss": 0.6403, "step": 6763 }, { "epoch": 0.5118232378646287, "grad_norm": 2.1766726970672607, "learning_rate": 9.391761958137e-06, "loss": 0.8078, "step": 6764 }, { "epoch": 0.5118989065869622, "grad_norm": 1.9873024225234985, "learning_rate": 9.389511863504403e-06, "loss": 0.6108, "step": 6765 }, { "epoch": 0.5119745753092959, "grad_norm": 2.2387278079986572, "learning_rate": 9.38726174447931e-06, "loss": 0.806, "step": 6766 }, { "epoch": 0.5120502440316296, "grad_norm": 2.1650137901306152, "learning_rate": 9.385011601202637e-06, "loss": 0.6439, "step": 6767 }, { "epoch": 0.5121259127539631, "grad_norm": 2.864428758621216, "learning_rate": 9.382761433815289e-06, "loss": 0.7591, "step": 6768 }, { "epoch": 0.5122015814762968, "grad_norm": 2.0299293994903564, "learning_rate": 9.380511242458185e-06, "loss": 0.7405, "step": 6769 }, { "epoch": 0.5122772501986304, "grad_norm": 2.2550573348999023, "learning_rate": 9.378261027272231e-06, "loss": 0.7065, "step": 6770 }, { "epoch": 0.512352918920964, "grad_norm": 1.7595993280410767, "learning_rate": 9.376010788398354e-06, "loss": 0.7129, "step": 6771 }, { "epoch": 0.5124285876432977, "grad_norm": 2.2322864532470703, "learning_rate": 9.373760525977464e-06, "loss": 0.7328, "step": 6772 }, { "epoch": 0.5125042563656312, "grad_norm": 1.9751027822494507, "learning_rate": 9.37151024015048e-06, "loss": 0.6518, "step": 6773 }, { "epoch": 0.5125799250879649, "grad_norm": 2.4681308269500732, "learning_rate": 9.369259931058326e-06, "loss": 0.6701, "step": 6774 }, { "epoch": 0.5126555938102985, "grad_norm": 2.20025372505188, "learning_rate": 9.367009598841916e-06, "loss": 0.7454, "step": 6775 }, { "epoch": 0.5127312625326321, "grad_norm": 2.411095380783081, "learning_rate": 9.36475924364218e-06, "loss": 0.8019, "step": 6776 }, { "epoch": 0.5128069312549658, "grad_norm": 2.401850938796997, "learning_rate": 9.362508865600039e-06, "loss": 0.6141, "step": 6777 }, { "epoch": 0.5128825999772993, "grad_norm": 2.024711847305298, "learning_rate": 9.360258464856422e-06, "loss": 0.6127, "step": 6778 }, { "epoch": 0.512958268699633, "grad_norm": 2.656041145324707, "learning_rate": 9.358008041552254e-06, "loss": 0.7047, "step": 6779 }, { "epoch": 0.5130339374219667, "grad_norm": 2.6526777744293213, "learning_rate": 9.355757595828464e-06, "loss": 0.831, "step": 6780 }, { "epoch": 0.5131096061443002, "grad_norm": 2.158024787902832, "learning_rate": 9.353507127825985e-06, "loss": 0.6814, "step": 6781 }, { "epoch": 0.5131852748666339, "grad_norm": 2.018216848373413, "learning_rate": 9.351256637685745e-06, "loss": 0.6801, "step": 6782 }, { "epoch": 0.5132609435889675, "grad_norm": 2.381420135498047, "learning_rate": 9.349006125548676e-06, "loss": 0.708, "step": 6783 }, { "epoch": 0.5133366123113011, "grad_norm": 2.565141201019287, "learning_rate": 9.346755591555718e-06, "loss": 0.7645, "step": 6784 }, { "epoch": 0.5134122810336348, "grad_norm": 2.3605058193206787, "learning_rate": 9.344505035847804e-06, "loss": 0.6184, "step": 6785 }, { "epoch": 0.5134879497559683, "grad_norm": 2.587228536605835, "learning_rate": 9.342254458565865e-06, "loss": 0.7598, "step": 6786 }, { "epoch": 0.513563618478302, "grad_norm": 1.9587780237197876, "learning_rate": 9.34000385985085e-06, "loss": 0.7085, "step": 6787 }, { "epoch": 0.5136392872006356, "grad_norm": 1.9432623386383057, "learning_rate": 9.337753239843691e-06, "loss": 0.6803, "step": 6788 }, { "epoch": 0.5137149559229692, "grad_norm": 2.544887065887451, "learning_rate": 9.335502598685333e-06, "loss": 0.8014, "step": 6789 }, { "epoch": 0.5137906246453029, "grad_norm": 2.4957051277160645, "learning_rate": 9.333251936516718e-06, "loss": 0.7523, "step": 6790 }, { "epoch": 0.5138662933676365, "grad_norm": 2.6802759170532227, "learning_rate": 9.331001253478786e-06, "loss": 0.6507, "step": 6791 }, { "epoch": 0.5139419620899701, "grad_norm": 2.0066659450531006, "learning_rate": 9.328750549712486e-06, "loss": 0.675, "step": 6792 }, { "epoch": 0.5140176308123038, "grad_norm": 2.4365880489349365, "learning_rate": 9.326499825358763e-06, "loss": 0.6524, "step": 6793 }, { "epoch": 0.5140932995346373, "grad_norm": 1.9508459568023682, "learning_rate": 9.324249080558565e-06, "loss": 0.7767, "step": 6794 }, { "epoch": 0.514168968256971, "grad_norm": 2.4417145252227783, "learning_rate": 9.321998315452841e-06, "loss": 0.7902, "step": 6795 }, { "epoch": 0.5142446369793046, "grad_norm": 2.5067169666290283, "learning_rate": 9.319747530182542e-06, "loss": 0.7099, "step": 6796 }, { "epoch": 0.5143203057016382, "grad_norm": 2.2060787677764893, "learning_rate": 9.31749672488862e-06, "loss": 0.6742, "step": 6797 }, { "epoch": 0.5143959744239719, "grad_norm": 2.209672689437866, "learning_rate": 9.315245899712022e-06, "loss": 0.594, "step": 6798 }, { "epoch": 0.5144716431463054, "grad_norm": 2.720317840576172, "learning_rate": 9.312995054793708e-06, "loss": 0.8389, "step": 6799 }, { "epoch": 0.5145473118686391, "grad_norm": 2.3275961875915527, "learning_rate": 9.310744190274631e-06, "loss": 0.7015, "step": 6800 }, { "epoch": 0.5146229805909727, "grad_norm": 2.169250726699829, "learning_rate": 9.308493306295748e-06, "loss": 0.6436, "step": 6801 }, { "epoch": 0.5146986493133063, "grad_norm": 2.38690185546875, "learning_rate": 9.306242402998016e-06, "loss": 0.8256, "step": 6802 }, { "epoch": 0.51477431803564, "grad_norm": 2.143653631210327, "learning_rate": 9.303991480522397e-06, "loss": 0.727, "step": 6803 }, { "epoch": 0.5148499867579736, "grad_norm": 2.64727783203125, "learning_rate": 9.301740539009845e-06, "loss": 0.7609, "step": 6804 }, { "epoch": 0.5149256554803072, "grad_norm": 2.108668565750122, "learning_rate": 9.299489578601326e-06, "loss": 0.7427, "step": 6805 }, { "epoch": 0.5150013242026409, "grad_norm": 2.263934850692749, "learning_rate": 9.2972385994378e-06, "loss": 0.7691, "step": 6806 }, { "epoch": 0.5150769929249744, "grad_norm": 2.89751935005188, "learning_rate": 9.294987601660231e-06, "loss": 0.7268, "step": 6807 }, { "epoch": 0.5151526616473081, "grad_norm": 2.1842520236968994, "learning_rate": 9.292736585409588e-06, "loss": 0.7398, "step": 6808 }, { "epoch": 0.5152283303696417, "grad_norm": 2.5846338272094727, "learning_rate": 9.290485550826828e-06, "loss": 0.7019, "step": 6809 }, { "epoch": 0.5153039990919753, "grad_norm": 2.2591192722320557, "learning_rate": 9.288234498052927e-06, "loss": 0.7099, "step": 6810 }, { "epoch": 0.515379667814309, "grad_norm": 2.2069311141967773, "learning_rate": 9.285983427228849e-06, "loss": 0.658, "step": 6811 }, { "epoch": 0.5154553365366425, "grad_norm": 2.1312320232391357, "learning_rate": 9.283732338495562e-06, "loss": 0.7175, "step": 6812 }, { "epoch": 0.5155310052589762, "grad_norm": 3.7413275241851807, "learning_rate": 9.28148123199404e-06, "loss": 0.6437, "step": 6813 }, { "epoch": 0.5156066739813098, "grad_norm": 2.0996272563934326, "learning_rate": 9.27923010786525e-06, "loss": 0.6743, "step": 6814 }, { "epoch": 0.5156823427036434, "grad_norm": 2.2065136432647705, "learning_rate": 9.27697896625017e-06, "loss": 0.7956, "step": 6815 }, { "epoch": 0.5157580114259771, "grad_norm": 2.435471534729004, "learning_rate": 9.274727807289772e-06, "loss": 0.5638, "step": 6816 }, { "epoch": 0.5158336801483107, "grad_norm": 2.6238391399383545, "learning_rate": 9.272476631125027e-06, "loss": 0.8299, "step": 6817 }, { "epoch": 0.5159093488706443, "grad_norm": 2.0177040100097656, "learning_rate": 9.270225437896916e-06, "loss": 0.7226, "step": 6818 }, { "epoch": 0.515985017592978, "grad_norm": 2.575056552886963, "learning_rate": 9.267974227746415e-06, "loss": 0.6647, "step": 6819 }, { "epoch": 0.5160606863153115, "grad_norm": 2.2004969120025635, "learning_rate": 9.265723000814501e-06, "loss": 0.739, "step": 6820 }, { "epoch": 0.5161363550376452, "grad_norm": 1.8651002645492554, "learning_rate": 9.263471757242153e-06, "loss": 0.6448, "step": 6821 }, { "epoch": 0.5162120237599788, "grad_norm": 2.0674023628234863, "learning_rate": 9.261220497170349e-06, "loss": 0.6993, "step": 6822 }, { "epoch": 0.5162876924823124, "grad_norm": 2.2824978828430176, "learning_rate": 9.258969220740075e-06, "loss": 0.7217, "step": 6823 }, { "epoch": 0.5163633612046461, "grad_norm": 2.708824634552002, "learning_rate": 9.25671792809231e-06, "loss": 0.6483, "step": 6824 }, { "epoch": 0.5164390299269797, "grad_norm": 4.072855472564697, "learning_rate": 9.254466619368038e-06, "loss": 0.5936, "step": 6825 }, { "epoch": 0.5165146986493133, "grad_norm": 8.13134765625, "learning_rate": 9.252215294708247e-06, "loss": 0.7496, "step": 6826 }, { "epoch": 0.5165903673716469, "grad_norm": 2.2548294067382812, "learning_rate": 9.249963954253913e-06, "loss": 0.5736, "step": 6827 }, { "epoch": 0.5166660360939805, "grad_norm": 2.1357603073120117, "learning_rate": 9.247712598146028e-06, "loss": 0.7561, "step": 6828 }, { "epoch": 0.5167417048163142, "grad_norm": 1.8459572792053223, "learning_rate": 9.245461226525584e-06, "loss": 0.6388, "step": 6829 }, { "epoch": 0.5168173735386478, "grad_norm": 2.7423040866851807, "learning_rate": 9.24320983953356e-06, "loss": 0.8378, "step": 6830 }, { "epoch": 0.5168930422609814, "grad_norm": 2.403897523880005, "learning_rate": 9.24095843731095e-06, "loss": 0.603, "step": 6831 }, { "epoch": 0.5169687109833151, "grad_norm": 2.0380334854125977, "learning_rate": 9.23870701999874e-06, "loss": 0.6979, "step": 6832 }, { "epoch": 0.5170443797056486, "grad_norm": 2.8560407161712646, "learning_rate": 9.23645558773793e-06, "loss": 0.7479, "step": 6833 }, { "epoch": 0.5171200484279823, "grad_norm": 2.2404606342315674, "learning_rate": 9.234204140669502e-06, "loss": 0.5442, "step": 6834 }, { "epoch": 0.5171957171503159, "grad_norm": 1.846641182899475, "learning_rate": 9.231952678934456e-06, "loss": 0.8732, "step": 6835 }, { "epoch": 0.5172713858726495, "grad_norm": 2.014122486114502, "learning_rate": 9.229701202673781e-06, "loss": 0.6284, "step": 6836 }, { "epoch": 0.5173470545949832, "grad_norm": 1.9352043867111206, "learning_rate": 9.227449712028475e-06, "loss": 0.7152, "step": 6837 }, { "epoch": 0.5174227233173168, "grad_norm": 2.4555435180664062, "learning_rate": 9.225198207139533e-06, "loss": 0.6324, "step": 6838 }, { "epoch": 0.5174983920396504, "grad_norm": 2.332766056060791, "learning_rate": 9.222946688147949e-06, "loss": 0.668, "step": 6839 }, { "epoch": 0.5175740607619841, "grad_norm": 2.4688680171966553, "learning_rate": 9.220695155194724e-06, "loss": 0.7283, "step": 6840 }, { "epoch": 0.5176497294843176, "grad_norm": 2.0865702629089355, "learning_rate": 9.218443608420855e-06, "loss": 0.6561, "step": 6841 }, { "epoch": 0.5177253982066513, "grad_norm": 1.7171027660369873, "learning_rate": 9.216192047967337e-06, "loss": 0.537, "step": 6842 }, { "epoch": 0.5178010669289849, "grad_norm": 2.2573938369750977, "learning_rate": 9.213940473975178e-06, "loss": 0.7413, "step": 6843 }, { "epoch": 0.5178767356513185, "grad_norm": 1.9172108173370361, "learning_rate": 9.211688886585373e-06, "loss": 0.5654, "step": 6844 }, { "epoch": 0.5179524043736522, "grad_norm": 2.324502468109131, "learning_rate": 9.209437285938926e-06, "loss": 0.6488, "step": 6845 }, { "epoch": 0.5180280730959858, "grad_norm": 1.878939151763916, "learning_rate": 9.207185672176837e-06, "loss": 0.7264, "step": 6846 }, { "epoch": 0.5181037418183194, "grad_norm": 2.321331262588501, "learning_rate": 9.204934045440111e-06, "loss": 0.8104, "step": 6847 }, { "epoch": 0.518179410540653, "grad_norm": 2.2414441108703613, "learning_rate": 9.202682405869753e-06, "loss": 0.8987, "step": 6848 }, { "epoch": 0.5182550792629866, "grad_norm": 2.0336124897003174, "learning_rate": 9.20043075360677e-06, "loss": 0.7724, "step": 6849 }, { "epoch": 0.5183307479853203, "grad_norm": 2.182743787765503, "learning_rate": 9.198179088792159e-06, "loss": 0.8737, "step": 6850 }, { "epoch": 0.5184064167076539, "grad_norm": 1.6060758829116821, "learning_rate": 9.195927411566938e-06, "loss": 0.6383, "step": 6851 }, { "epoch": 0.5184820854299875, "grad_norm": 1.7125988006591797, "learning_rate": 9.193675722072106e-06, "loss": 0.6993, "step": 6852 }, { "epoch": 0.5185577541523212, "grad_norm": 2.322448968887329, "learning_rate": 9.191424020448673e-06, "loss": 0.7036, "step": 6853 }, { "epoch": 0.5186334228746547, "grad_norm": 2.939868688583374, "learning_rate": 9.189172306837653e-06, "loss": 0.6713, "step": 6854 }, { "epoch": 0.5187090915969884, "grad_norm": 4.6412811279296875, "learning_rate": 9.186920581380045e-06, "loss": 0.6601, "step": 6855 }, { "epoch": 0.518784760319322, "grad_norm": 7.659454822540283, "learning_rate": 9.184668844216872e-06, "loss": 0.8464, "step": 6856 }, { "epoch": 0.5188604290416556, "grad_norm": 6.378112316131592, "learning_rate": 9.182417095489135e-06, "loss": 0.7449, "step": 6857 }, { "epoch": 0.5189360977639893, "grad_norm": 1.7814310789108276, "learning_rate": 9.180165335337848e-06, "loss": 0.7629, "step": 6858 }, { "epoch": 0.5190117664863229, "grad_norm": 2.039429187774658, "learning_rate": 9.177913563904029e-06, "loss": 0.7624, "step": 6859 }, { "epoch": 0.5190874352086565, "grad_norm": 2.5181028842926025, "learning_rate": 9.175661781328684e-06, "loss": 0.709, "step": 6860 }, { "epoch": 0.5191631039309901, "grad_norm": 2.226398229598999, "learning_rate": 9.173409987752834e-06, "loss": 0.6182, "step": 6861 }, { "epoch": 0.5192387726533237, "grad_norm": 1.9586197137832642, "learning_rate": 9.171158183317486e-06, "loss": 0.6997, "step": 6862 }, { "epoch": 0.5193144413756574, "grad_norm": 2.897284984588623, "learning_rate": 9.16890636816366e-06, "loss": 0.8039, "step": 6863 }, { "epoch": 0.519390110097991, "grad_norm": 2.2634551525115967, "learning_rate": 9.166654542432372e-06, "loss": 0.7075, "step": 6864 }, { "epoch": 0.5194657788203246, "grad_norm": 3.2109482288360596, "learning_rate": 9.164402706264635e-06, "loss": 0.6153, "step": 6865 }, { "epoch": 0.5195414475426583, "grad_norm": 2.3147075176239014, "learning_rate": 9.16215085980147e-06, "loss": 0.7481, "step": 6866 }, { "epoch": 0.5196171162649919, "grad_norm": 5.927982330322266, "learning_rate": 9.159899003183894e-06, "loss": 0.6095, "step": 6867 }, { "epoch": 0.5196927849873255, "grad_norm": 1.7811951637268066, "learning_rate": 9.157647136552926e-06, "loss": 0.8572, "step": 6868 }, { "epoch": 0.5197684537096591, "grad_norm": 1.9003556966781616, "learning_rate": 9.155395260049584e-06, "loss": 0.7132, "step": 6869 }, { "epoch": 0.5198441224319927, "grad_norm": 2.136584520339966, "learning_rate": 9.153143373814887e-06, "loss": 0.685, "step": 6870 }, { "epoch": 0.5199197911543264, "grad_norm": 3.125502109527588, "learning_rate": 9.150891477989858e-06, "loss": 0.8047, "step": 6871 }, { "epoch": 0.51999545987666, "grad_norm": 3.0765082836151123, "learning_rate": 9.148639572715517e-06, "loss": 0.5735, "step": 6872 }, { "epoch": 0.5200711285989936, "grad_norm": 2.166370153427124, "learning_rate": 9.146387658132881e-06, "loss": 0.8376, "step": 6873 }, { "epoch": 0.5201467973213272, "grad_norm": 2.2477190494537354, "learning_rate": 9.144135734382983e-06, "loss": 0.7244, "step": 6874 }, { "epoch": 0.5202224660436608, "grad_norm": 2.6588950157165527, "learning_rate": 9.141883801606836e-06, "loss": 0.6419, "step": 6875 }, { "epoch": 0.5202981347659945, "grad_norm": 2.5737180709838867, "learning_rate": 9.139631859945466e-06, "loss": 0.8748, "step": 6876 }, { "epoch": 0.5203738034883281, "grad_norm": 1.9617338180541992, "learning_rate": 9.137379909539897e-06, "loss": 0.7089, "step": 6877 }, { "epoch": 0.5204494722106617, "grad_norm": 2.4707319736480713, "learning_rate": 9.135127950531153e-06, "loss": 0.6812, "step": 6878 }, { "epoch": 0.5205251409329954, "grad_norm": 1.5463811159133911, "learning_rate": 9.132875983060262e-06, "loss": 0.8144, "step": 6879 }, { "epoch": 0.520600809655329, "grad_norm": 2.1439154148101807, "learning_rate": 9.130624007268247e-06, "loss": 0.7185, "step": 6880 }, { "epoch": 0.5206764783776626, "grad_norm": 2.178358554840088, "learning_rate": 9.128372023296132e-06, "loss": 0.7119, "step": 6881 }, { "epoch": 0.5207521470999962, "grad_norm": 2.2699880599975586, "learning_rate": 9.126120031284947e-06, "loss": 0.7, "step": 6882 }, { "epoch": 0.5208278158223298, "grad_norm": 2.0759565830230713, "learning_rate": 9.123868031375716e-06, "loss": 0.6736, "step": 6883 }, { "epoch": 0.5209034845446635, "grad_norm": 2.5509610176086426, "learning_rate": 9.121616023709473e-06, "loss": 0.8851, "step": 6884 }, { "epoch": 0.5209791532669971, "grad_norm": 2.314539909362793, "learning_rate": 9.119364008427239e-06, "loss": 0.8735, "step": 6885 }, { "epoch": 0.5210548219893307, "grad_norm": 1.9383503198623657, "learning_rate": 9.117111985670043e-06, "loss": 0.7673, "step": 6886 }, { "epoch": 0.5211304907116643, "grad_norm": 2.7739522457122803, "learning_rate": 9.114859955578916e-06, "loss": 0.736, "step": 6887 }, { "epoch": 0.521206159433998, "grad_norm": 1.9783974885940552, "learning_rate": 9.112607918294887e-06, "loss": 0.868, "step": 6888 }, { "epoch": 0.5212818281563316, "grad_norm": 2.2085728645324707, "learning_rate": 9.110355873958987e-06, "loss": 0.7107, "step": 6889 }, { "epoch": 0.5213574968786652, "grad_norm": 2.447173833847046, "learning_rate": 9.108103822712246e-06, "loss": 0.7465, "step": 6890 }, { "epoch": 0.5214331656009988, "grad_norm": 2.1307895183563232, "learning_rate": 9.105851764695691e-06, "loss": 0.6948, "step": 6891 }, { "epoch": 0.5215088343233325, "grad_norm": 2.2277281284332275, "learning_rate": 9.103599700050358e-06, "loss": 0.6859, "step": 6892 }, { "epoch": 0.5215845030456661, "grad_norm": 1.6344878673553467, "learning_rate": 9.101347628917278e-06, "loss": 0.6701, "step": 6893 }, { "epoch": 0.5216601717679997, "grad_norm": 2.285489797592163, "learning_rate": 9.09909555143748e-06, "loss": 0.5954, "step": 6894 }, { "epoch": 0.5217358404903333, "grad_norm": 2.104238986968994, "learning_rate": 9.096843467751999e-06, "loss": 0.6608, "step": 6895 }, { "epoch": 0.521811509212667, "grad_norm": 2.4383769035339355, "learning_rate": 9.094591378001864e-06, "loss": 0.7292, "step": 6896 }, { "epoch": 0.5218871779350006, "grad_norm": 2.041077136993408, "learning_rate": 9.092339282328115e-06, "loss": 0.7528, "step": 6897 }, { "epoch": 0.5219628466573342, "grad_norm": 2.0433545112609863, "learning_rate": 9.09008718087178e-06, "loss": 0.5971, "step": 6898 }, { "epoch": 0.5220385153796678, "grad_norm": 2.3326566219329834, "learning_rate": 9.087835073773893e-06, "loss": 0.6639, "step": 6899 }, { "epoch": 0.5221141841020014, "grad_norm": 1.7605972290039062, "learning_rate": 9.085582961175493e-06, "loss": 0.5656, "step": 6900 }, { "epoch": 0.522189852824335, "grad_norm": 2.2532219886779785, "learning_rate": 9.083330843217606e-06, "loss": 0.6726, "step": 6901 }, { "epoch": 0.5222655215466687, "grad_norm": 2.456960678100586, "learning_rate": 9.081078720041277e-06, "loss": 0.8113, "step": 6902 }, { "epoch": 0.5223411902690023, "grad_norm": 2.177849292755127, "learning_rate": 9.078826591787532e-06, "loss": 0.6898, "step": 6903 }, { "epoch": 0.5224168589913359, "grad_norm": 2.2699694633483887, "learning_rate": 9.076574458597413e-06, "loss": 0.7005, "step": 6904 }, { "epoch": 0.5224925277136696, "grad_norm": 2.6185245513916016, "learning_rate": 9.074322320611954e-06, "loss": 0.6497, "step": 6905 }, { "epoch": 0.5225681964360032, "grad_norm": 2.110748767852783, "learning_rate": 9.07207017797219e-06, "loss": 0.8304, "step": 6906 }, { "epoch": 0.5226438651583368, "grad_norm": 2.2064247131347656, "learning_rate": 9.069818030819162e-06, "loss": 0.714, "step": 6907 }, { "epoch": 0.5227195338806704, "grad_norm": 1.88336181640625, "learning_rate": 9.067565879293898e-06, "loss": 0.8837, "step": 6908 }, { "epoch": 0.522795202603004, "grad_norm": 2.301023244857788, "learning_rate": 9.065313723537443e-06, "loss": 0.8356, "step": 6909 }, { "epoch": 0.5228708713253377, "grad_norm": 2.310317039489746, "learning_rate": 9.06306156369083e-06, "loss": 0.8133, "step": 6910 }, { "epoch": 0.5229465400476713, "grad_norm": 2.0655336380004883, "learning_rate": 9.060809399895099e-06, "loss": 0.6615, "step": 6911 }, { "epoch": 0.5230222087700049, "grad_norm": 2.006929874420166, "learning_rate": 9.058557232291283e-06, "loss": 0.6776, "step": 6912 }, { "epoch": 0.5230978774923385, "grad_norm": 2.7849390506744385, "learning_rate": 9.05630506102043e-06, "loss": 0.6462, "step": 6913 }, { "epoch": 0.5231735462146722, "grad_norm": 1.9213398694992065, "learning_rate": 9.054052886223568e-06, "loss": 0.6398, "step": 6914 }, { "epoch": 0.5232492149370058, "grad_norm": 2.0991053581237793, "learning_rate": 9.051800708041741e-06, "loss": 0.6703, "step": 6915 }, { "epoch": 0.5233248836593394, "grad_norm": 1.9303232431411743, "learning_rate": 9.049548526615986e-06, "loss": 0.7027, "step": 6916 }, { "epoch": 0.523400552381673, "grad_norm": 1.928173303604126, "learning_rate": 9.04729634208734e-06, "loss": 0.7511, "step": 6917 }, { "epoch": 0.5234762211040067, "grad_norm": 2.1134893894195557, "learning_rate": 9.045044154596846e-06, "loss": 0.6348, "step": 6918 }, { "epoch": 0.5235518898263403, "grad_norm": 2.236544609069824, "learning_rate": 9.04279196428554e-06, "loss": 0.7813, "step": 6919 }, { "epoch": 0.5236275585486739, "grad_norm": 2.3019282817840576, "learning_rate": 9.040539771294464e-06, "loss": 0.8056, "step": 6920 }, { "epoch": 0.5237032272710075, "grad_norm": 2.164768934249878, "learning_rate": 9.038287575764656e-06, "loss": 0.7114, "step": 6921 }, { "epoch": 0.5237788959933412, "grad_norm": 2.2749412059783936, "learning_rate": 9.036035377837156e-06, "loss": 0.8045, "step": 6922 }, { "epoch": 0.5238545647156748, "grad_norm": 1.8921995162963867, "learning_rate": 9.033783177653006e-06, "loss": 0.9092, "step": 6923 }, { "epoch": 0.5239302334380084, "grad_norm": 1.9506926536560059, "learning_rate": 9.03153097535324e-06, "loss": 0.6368, "step": 6924 }, { "epoch": 0.524005902160342, "grad_norm": 2.1694705486297607, "learning_rate": 9.029278771078905e-06, "loss": 0.809, "step": 6925 }, { "epoch": 0.5240815708826756, "grad_norm": 4.481318950653076, "learning_rate": 9.02702656497104e-06, "loss": 0.7019, "step": 6926 }, { "epoch": 0.5241572396050093, "grad_norm": 1.8642576932907104, "learning_rate": 9.024774357170681e-06, "loss": 0.7031, "step": 6927 }, { "epoch": 0.5242329083273429, "grad_norm": 1.5886856317520142, "learning_rate": 9.022522147818873e-06, "loss": 0.6229, "step": 6928 }, { "epoch": 0.5243085770496765, "grad_norm": 2.0907092094421387, "learning_rate": 9.020269937056657e-06, "loss": 0.6835, "step": 6929 }, { "epoch": 0.5243842457720101, "grad_norm": 2.4200832843780518, "learning_rate": 9.01801772502507e-06, "loss": 0.8053, "step": 6930 }, { "epoch": 0.5244599144943438, "grad_norm": 1.7767586708068848, "learning_rate": 9.015765511865156e-06, "loss": 0.8483, "step": 6931 }, { "epoch": 0.5245355832166774, "grad_norm": 1.9560530185699463, "learning_rate": 9.013513297717954e-06, "loss": 0.7639, "step": 6932 }, { "epoch": 0.524611251939011, "grad_norm": 2.2985103130340576, "learning_rate": 9.011261082724503e-06, "loss": 0.7953, "step": 6933 }, { "epoch": 0.5246869206613446, "grad_norm": 2.1248481273651123, "learning_rate": 9.00900886702585e-06, "loss": 0.7215, "step": 6934 }, { "epoch": 0.5247625893836783, "grad_norm": 1.9643497467041016, "learning_rate": 9.006756650763031e-06, "loss": 0.6989, "step": 6935 }, { "epoch": 0.5248382581060119, "grad_norm": 2.1842288970947266, "learning_rate": 9.004504434077092e-06, "loss": 0.7278, "step": 6936 }, { "epoch": 0.5249139268283455, "grad_norm": 2.345261812210083, "learning_rate": 9.002252217109065e-06, "loss": 0.7213, "step": 6937 }, { "epoch": 0.5249895955506791, "grad_norm": 2.2348873615264893, "learning_rate": 9e-06, "loss": 0.6579, "step": 6938 }, { "epoch": 0.5250652642730127, "grad_norm": 2.263463258743286, "learning_rate": 8.997747782890936e-06, "loss": 0.7455, "step": 6939 }, { "epoch": 0.5251409329953464, "grad_norm": 2.1686911582946777, "learning_rate": 8.995495565922914e-06, "loss": 0.7449, "step": 6940 }, { "epoch": 0.52521660171768, "grad_norm": 2.4088521003723145, "learning_rate": 8.99324334923697e-06, "loss": 0.7396, "step": 6941 }, { "epoch": 0.5252922704400136, "grad_norm": 2.31754207611084, "learning_rate": 8.99099113297415e-06, "loss": 0.6586, "step": 6942 }, { "epoch": 0.5253679391623473, "grad_norm": 1.726416826248169, "learning_rate": 8.988738917275497e-06, "loss": 0.8426, "step": 6943 }, { "epoch": 0.5254436078846809, "grad_norm": 2.5192909240722656, "learning_rate": 8.986486702282048e-06, "loss": 0.7568, "step": 6944 }, { "epoch": 0.5255192766070145, "grad_norm": 2.0664658546447754, "learning_rate": 8.98423448813485e-06, "loss": 0.5892, "step": 6945 }, { "epoch": 0.5255949453293481, "grad_norm": 3.337775230407715, "learning_rate": 8.981982274974932e-06, "loss": 0.7521, "step": 6946 }, { "epoch": 0.5256706140516817, "grad_norm": 2.2166309356689453, "learning_rate": 8.979730062943344e-06, "loss": 0.8307, "step": 6947 }, { "epoch": 0.5257462827740154, "grad_norm": 1.8791322708129883, "learning_rate": 8.977477852181128e-06, "loss": 0.6808, "step": 6948 }, { "epoch": 0.525821951496349, "grad_norm": 2.134373664855957, "learning_rate": 8.97522564282932e-06, "loss": 0.7119, "step": 6949 }, { "epoch": 0.5258976202186826, "grad_norm": 2.081508159637451, "learning_rate": 8.972973435028964e-06, "loss": 0.6941, "step": 6950 }, { "epoch": 0.5259732889410162, "grad_norm": 2.069390296936035, "learning_rate": 8.970721228921096e-06, "loss": 0.7172, "step": 6951 }, { "epoch": 0.5260489576633498, "grad_norm": 2.029240369796753, "learning_rate": 8.968469024646759e-06, "loss": 0.7142, "step": 6952 }, { "epoch": 0.5261246263856835, "grad_norm": 2.3885130882263184, "learning_rate": 8.966216822346996e-06, "loss": 0.6241, "step": 6953 }, { "epoch": 0.5262002951080171, "grad_norm": 1.988101840019226, "learning_rate": 8.963964622162846e-06, "loss": 0.6023, "step": 6954 }, { "epoch": 0.5262759638303507, "grad_norm": 1.8047088384628296, "learning_rate": 8.961712424235346e-06, "loss": 0.6205, "step": 6955 }, { "epoch": 0.5263516325526844, "grad_norm": 2.6779801845550537, "learning_rate": 8.959460228705535e-06, "loss": 0.9106, "step": 6956 }, { "epoch": 0.526427301275018, "grad_norm": 2.134953022003174, "learning_rate": 8.957208035714461e-06, "loss": 0.7214, "step": 6957 }, { "epoch": 0.5265029699973516, "grad_norm": 1.9478284120559692, "learning_rate": 8.954955845403156e-06, "loss": 0.8335, "step": 6958 }, { "epoch": 0.5265786387196852, "grad_norm": 1.8003441095352173, "learning_rate": 8.952703657912662e-06, "loss": 0.7422, "step": 6959 }, { "epoch": 0.5266543074420188, "grad_norm": 1.7551347017288208, "learning_rate": 8.950451473384017e-06, "loss": 0.6058, "step": 6960 }, { "epoch": 0.5267299761643525, "grad_norm": 2.0923233032226562, "learning_rate": 8.948199291958263e-06, "loss": 0.8269, "step": 6961 }, { "epoch": 0.5268056448866861, "grad_norm": 1.9860175848007202, "learning_rate": 8.945947113776432e-06, "loss": 0.6191, "step": 6962 }, { "epoch": 0.5268813136090197, "grad_norm": 2.2504024505615234, "learning_rate": 8.943694938979572e-06, "loss": 0.6805, "step": 6963 }, { "epoch": 0.5269569823313534, "grad_norm": 2.547034978866577, "learning_rate": 8.941442767708717e-06, "loss": 0.6573, "step": 6964 }, { "epoch": 0.5270326510536869, "grad_norm": 1.7897727489471436, "learning_rate": 8.939190600104905e-06, "loss": 0.7814, "step": 6965 }, { "epoch": 0.5271083197760206, "grad_norm": 2.5294055938720703, "learning_rate": 8.936938436309175e-06, "loss": 0.7107, "step": 6966 }, { "epoch": 0.5271839884983542, "grad_norm": 2.606584310531616, "learning_rate": 8.934686276462558e-06, "loss": 0.6996, "step": 6967 }, { "epoch": 0.5272596572206878, "grad_norm": 1.6937854290008545, "learning_rate": 8.932434120706104e-06, "loss": 0.836, "step": 6968 }, { "epoch": 0.5273353259430215, "grad_norm": 2.6257095336914062, "learning_rate": 8.93018196918084e-06, "loss": 0.6491, "step": 6969 }, { "epoch": 0.5274109946653551, "grad_norm": 3.0788724422454834, "learning_rate": 8.927929822027812e-06, "loss": 0.6871, "step": 6970 }, { "epoch": 0.5274866633876887, "grad_norm": 2.0726146697998047, "learning_rate": 8.925677679388048e-06, "loss": 0.7104, "step": 6971 }, { "epoch": 0.5275623321100223, "grad_norm": 2.652439832687378, "learning_rate": 8.923425541402586e-06, "loss": 0.6209, "step": 6972 }, { "epoch": 0.5276380008323559, "grad_norm": 2.571249485015869, "learning_rate": 8.921173408212468e-06, "loss": 0.8608, "step": 6973 }, { "epoch": 0.5277136695546896, "grad_norm": 2.138179302215576, "learning_rate": 8.918921279958725e-06, "loss": 0.7663, "step": 6974 }, { "epoch": 0.5277893382770232, "grad_norm": 2.010223388671875, "learning_rate": 8.916669156782396e-06, "loss": 0.7324, "step": 6975 }, { "epoch": 0.5278650069993568, "grad_norm": 1.760847806930542, "learning_rate": 8.914417038824511e-06, "loss": 0.736, "step": 6976 }, { "epoch": 0.5279406757216905, "grad_norm": 2.2697741985321045, "learning_rate": 8.912164926226107e-06, "loss": 0.868, "step": 6977 }, { "epoch": 0.528016344444024, "grad_norm": 1.8922936916351318, "learning_rate": 8.909912819128223e-06, "loss": 0.5409, "step": 6978 }, { "epoch": 0.5280920131663577, "grad_norm": 1.831129789352417, "learning_rate": 8.907660717671887e-06, "loss": 0.667, "step": 6979 }, { "epoch": 0.5281676818886913, "grad_norm": 2.529381275177002, "learning_rate": 8.905408621998138e-06, "loss": 0.8239, "step": 6980 }, { "epoch": 0.5282433506110249, "grad_norm": 2.6161415576934814, "learning_rate": 8.903156532248005e-06, "loss": 0.7192, "step": 6981 }, { "epoch": 0.5283190193333586, "grad_norm": 2.2429845333099365, "learning_rate": 8.90090444856252e-06, "loss": 0.6279, "step": 6982 }, { "epoch": 0.5283946880556922, "grad_norm": 2.3764171600341797, "learning_rate": 8.898652371082722e-06, "loss": 0.6857, "step": 6983 }, { "epoch": 0.5284703567780258, "grad_norm": 2.131578207015991, "learning_rate": 8.896400299949642e-06, "loss": 0.6394, "step": 6984 }, { "epoch": 0.5285460255003595, "grad_norm": 2.332397699356079, "learning_rate": 8.894148235304309e-06, "loss": 0.6967, "step": 6985 }, { "epoch": 0.528621694222693, "grad_norm": 2.148642063140869, "learning_rate": 8.891896177287758e-06, "loss": 0.6876, "step": 6986 }, { "epoch": 0.5286973629450267, "grad_norm": 2.2178142070770264, "learning_rate": 8.889644126041014e-06, "loss": 0.5579, "step": 6987 }, { "epoch": 0.5287730316673603, "grad_norm": 2.4631600379943848, "learning_rate": 8.887392081705112e-06, "loss": 0.596, "step": 6988 }, { "epoch": 0.5288487003896939, "grad_norm": 2.4894394874572754, "learning_rate": 8.885140044421086e-06, "loss": 0.7106, "step": 6989 }, { "epoch": 0.5289243691120276, "grad_norm": 2.318631410598755, "learning_rate": 8.88288801432996e-06, "loss": 0.678, "step": 6990 }, { "epoch": 0.5290000378343611, "grad_norm": 2.5202953815460205, "learning_rate": 8.880635991572765e-06, "loss": 0.8136, "step": 6991 }, { "epoch": 0.5290757065566948, "grad_norm": 2.017930746078491, "learning_rate": 8.878383976290529e-06, "loss": 0.7493, "step": 6992 }, { "epoch": 0.5291513752790284, "grad_norm": 2.3018083572387695, "learning_rate": 8.876131968624282e-06, "loss": 0.6035, "step": 6993 }, { "epoch": 0.529227044001362, "grad_norm": 2.4072072505950928, "learning_rate": 8.873879968715055e-06, "loss": 0.772, "step": 6994 }, { "epoch": 0.5293027127236957, "grad_norm": 3.478423595428467, "learning_rate": 8.87162797670387e-06, "loss": 0.6318, "step": 6995 }, { "epoch": 0.5293783814460293, "grad_norm": 1.5796644687652588, "learning_rate": 8.869375992731757e-06, "loss": 0.5953, "step": 6996 }, { "epoch": 0.5294540501683629, "grad_norm": 2.066748857498169, "learning_rate": 8.867124016939742e-06, "loss": 0.6553, "step": 6997 }, { "epoch": 0.5295297188906966, "grad_norm": 2.06510591506958, "learning_rate": 8.864872049468846e-06, "loss": 0.6526, "step": 6998 }, { "epoch": 0.5296053876130301, "grad_norm": 2.0738534927368164, "learning_rate": 8.862620090460104e-06, "loss": 0.8406, "step": 6999 }, { "epoch": 0.5296810563353638, "grad_norm": 2.1421914100646973, "learning_rate": 8.860368140054536e-06, "loss": 0.714, "step": 7000 }, { "epoch": 0.5297567250576974, "grad_norm": 1.9644984006881714, "learning_rate": 8.858116198393166e-06, "loss": 0.628, "step": 7001 }, { "epoch": 0.529832393780031, "grad_norm": 2.3363261222839355, "learning_rate": 8.85586426561702e-06, "loss": 0.8578, "step": 7002 }, { "epoch": 0.5299080625023647, "grad_norm": 2.117711305618286, "learning_rate": 8.853612341867116e-06, "loss": 0.6332, "step": 7003 }, { "epoch": 0.5299837312246982, "grad_norm": 2.1709940433502197, "learning_rate": 8.851360427284485e-06, "loss": 0.7398, "step": 7004 }, { "epoch": 0.5300593999470319, "grad_norm": 1.8637499809265137, "learning_rate": 8.849108522010144e-06, "loss": 0.6726, "step": 7005 }, { "epoch": 0.5301350686693656, "grad_norm": 2.101854085922241, "learning_rate": 8.846856626185113e-06, "loss": 0.6772, "step": 7006 }, { "epoch": 0.5302107373916991, "grad_norm": 1.8691213130950928, "learning_rate": 8.84460473995042e-06, "loss": 0.7488, "step": 7007 }, { "epoch": 0.5302864061140328, "grad_norm": 2.332582473754883, "learning_rate": 8.842352863447076e-06, "loss": 0.6498, "step": 7008 }, { "epoch": 0.5303620748363664, "grad_norm": 1.9999667406082153, "learning_rate": 8.840100996816106e-06, "loss": 0.6713, "step": 7009 }, { "epoch": 0.5304377435587, "grad_norm": 2.319936752319336, "learning_rate": 8.837849140198531e-06, "loss": 0.6997, "step": 7010 }, { "epoch": 0.5305134122810337, "grad_norm": 2.1495556831359863, "learning_rate": 8.835597293735367e-06, "loss": 0.7488, "step": 7011 }, { "epoch": 0.5305890810033672, "grad_norm": 1.9620660543441772, "learning_rate": 8.833345457567632e-06, "loss": 0.6522, "step": 7012 }, { "epoch": 0.5306647497257009, "grad_norm": 1.9892656803131104, "learning_rate": 8.83109363183634e-06, "loss": 0.6923, "step": 7013 }, { "epoch": 0.5307404184480345, "grad_norm": 1.7837767601013184, "learning_rate": 8.828841816682515e-06, "loss": 0.6985, "step": 7014 }, { "epoch": 0.5308160871703681, "grad_norm": 2.1873769760131836, "learning_rate": 8.826590012247167e-06, "loss": 0.7771, "step": 7015 }, { "epoch": 0.5308917558927018, "grad_norm": 2.4407780170440674, "learning_rate": 8.824338218671317e-06, "loss": 0.8917, "step": 7016 }, { "epoch": 0.5309674246150353, "grad_norm": 2.155855178833008, "learning_rate": 8.822086436095973e-06, "loss": 0.7085, "step": 7017 }, { "epoch": 0.531043093337369, "grad_norm": 2.3129708766937256, "learning_rate": 8.819834664662149e-06, "loss": 0.8051, "step": 7018 }, { "epoch": 0.5311187620597027, "grad_norm": 1.8992316722869873, "learning_rate": 8.817582904510867e-06, "loss": 0.624, "step": 7019 }, { "epoch": 0.5311944307820362, "grad_norm": 1.9501327276229858, "learning_rate": 8.81533115578313e-06, "loss": 0.7945, "step": 7020 }, { "epoch": 0.5312700995043699, "grad_norm": 2.9170970916748047, "learning_rate": 8.813079418619955e-06, "loss": 0.5384, "step": 7021 }, { "epoch": 0.5313457682267035, "grad_norm": 1.9943617582321167, "learning_rate": 8.81082769316235e-06, "loss": 0.8106, "step": 7022 }, { "epoch": 0.5314214369490371, "grad_norm": 1.9144606590270996, "learning_rate": 8.808575979551325e-06, "loss": 0.8237, "step": 7023 }, { "epoch": 0.5314971056713708, "grad_norm": 2.214576482772827, "learning_rate": 8.806324277927895e-06, "loss": 0.6764, "step": 7024 }, { "epoch": 0.5315727743937043, "grad_norm": 2.135948657989502, "learning_rate": 8.804072588433063e-06, "loss": 0.5962, "step": 7025 }, { "epoch": 0.531648443116038, "grad_norm": 2.7250607013702393, "learning_rate": 8.801820911207842e-06, "loss": 0.6888, "step": 7026 }, { "epoch": 0.5317241118383716, "grad_norm": 3.235295534133911, "learning_rate": 8.799569246393234e-06, "loss": 0.6464, "step": 7027 }, { "epoch": 0.5317997805607052, "grad_norm": 2.087533473968506, "learning_rate": 8.797317594130245e-06, "loss": 0.6293, "step": 7028 }, { "epoch": 0.5318754492830389, "grad_norm": 5.911130905151367, "learning_rate": 8.795065954559888e-06, "loss": 0.7318, "step": 7029 }, { "epoch": 0.5319511180053724, "grad_norm": 1.9690461158752441, "learning_rate": 8.792814327823165e-06, "loss": 0.5787, "step": 7030 }, { "epoch": 0.5320267867277061, "grad_norm": 2.165632486343384, "learning_rate": 8.790562714061076e-06, "loss": 0.7131, "step": 7031 }, { "epoch": 0.5321024554500398, "grad_norm": 2.008331060409546, "learning_rate": 8.78831111341463e-06, "loss": 0.5816, "step": 7032 }, { "epoch": 0.5321781241723733, "grad_norm": 2.393400192260742, "learning_rate": 8.786059526024823e-06, "loss": 0.6825, "step": 7033 }, { "epoch": 0.532253792894707, "grad_norm": 2.293968915939331, "learning_rate": 8.78380795203266e-06, "loss": 0.6761, "step": 7034 }, { "epoch": 0.5323294616170406, "grad_norm": 2.2142651081085205, "learning_rate": 8.781556391579148e-06, "loss": 0.7001, "step": 7035 }, { "epoch": 0.5324051303393742, "grad_norm": 1.9812933206558228, "learning_rate": 8.779304844805278e-06, "loss": 0.6993, "step": 7036 }, { "epoch": 0.5324807990617079, "grad_norm": 1.889985203742981, "learning_rate": 8.777053311852053e-06, "loss": 0.7246, "step": 7037 }, { "epoch": 0.5325564677840414, "grad_norm": 2.5295422077178955, "learning_rate": 8.77480179286047e-06, "loss": 0.5645, "step": 7038 }, { "epoch": 0.5326321365063751, "grad_norm": 1.9612340927124023, "learning_rate": 8.772550287971525e-06, "loss": 0.6118, "step": 7039 }, { "epoch": 0.5327078052287088, "grad_norm": 2.831002712249756, "learning_rate": 8.77029879732622e-06, "loss": 0.7933, "step": 7040 }, { "epoch": 0.5327834739510423, "grad_norm": 2.5195281505584717, "learning_rate": 8.768047321065547e-06, "loss": 0.66, "step": 7041 }, { "epoch": 0.532859142673376, "grad_norm": 2.25597882270813, "learning_rate": 8.765795859330498e-06, "loss": 0.5955, "step": 7042 }, { "epoch": 0.5329348113957095, "grad_norm": 2.0232596397399902, "learning_rate": 8.763544412262074e-06, "loss": 0.7174, "step": 7043 }, { "epoch": 0.5330104801180432, "grad_norm": 2.8326871395111084, "learning_rate": 8.761292980001259e-06, "loss": 0.6191, "step": 7044 }, { "epoch": 0.5330861488403769, "grad_norm": 2.0261335372924805, "learning_rate": 8.759041562689053e-06, "loss": 0.7789, "step": 7045 }, { "epoch": 0.5331618175627104, "grad_norm": 2.5931825637817383, "learning_rate": 8.756790160466445e-06, "loss": 0.7508, "step": 7046 }, { "epoch": 0.5332374862850441, "grad_norm": 2.142396926879883, "learning_rate": 8.75453877347442e-06, "loss": 0.6213, "step": 7047 }, { "epoch": 0.5333131550073777, "grad_norm": 2.075147867202759, "learning_rate": 8.752287401853974e-06, "loss": 0.5699, "step": 7048 }, { "epoch": 0.5333888237297113, "grad_norm": 2.620086908340454, "learning_rate": 8.750036045746087e-06, "loss": 0.6605, "step": 7049 }, { "epoch": 0.533464492452045, "grad_norm": 2.613399028778076, "learning_rate": 8.747784705291756e-06, "loss": 0.7942, "step": 7050 }, { "epoch": 0.5335401611743785, "grad_norm": 2.060634136199951, "learning_rate": 8.745533380631963e-06, "loss": 0.5541, "step": 7051 }, { "epoch": 0.5336158298967122, "grad_norm": 1.947361946105957, "learning_rate": 8.743282071907692e-06, "loss": 0.8374, "step": 7052 }, { "epoch": 0.5336914986190459, "grad_norm": 2.726940393447876, "learning_rate": 8.741030779259927e-06, "loss": 0.815, "step": 7053 }, { "epoch": 0.5337671673413794, "grad_norm": 2.2680883407592773, "learning_rate": 8.738779502829651e-06, "loss": 0.7913, "step": 7054 }, { "epoch": 0.5338428360637131, "grad_norm": 1.992996096611023, "learning_rate": 8.736528242757849e-06, "loss": 0.6763, "step": 7055 }, { "epoch": 0.5339185047860466, "grad_norm": 2.075242519378662, "learning_rate": 8.7342769991855e-06, "loss": 0.6267, "step": 7056 }, { "epoch": 0.5339941735083803, "grad_norm": 2.2018284797668457, "learning_rate": 8.732025772253586e-06, "loss": 0.7294, "step": 7057 }, { "epoch": 0.534069842230714, "grad_norm": 1.962738275527954, "learning_rate": 8.729774562103084e-06, "loss": 0.6908, "step": 7058 }, { "epoch": 0.5341455109530475, "grad_norm": 1.9997669458389282, "learning_rate": 8.727523368874971e-06, "loss": 0.9049, "step": 7059 }, { "epoch": 0.5342211796753812, "grad_norm": 2.593151092529297, "learning_rate": 8.725272192710229e-06, "loss": 0.7374, "step": 7060 }, { "epoch": 0.5342968483977149, "grad_norm": 1.8062269687652588, "learning_rate": 8.72302103374983e-06, "loss": 0.7373, "step": 7061 }, { "epoch": 0.5343725171200484, "grad_norm": 1.9314745664596558, "learning_rate": 8.720769892134751e-06, "loss": 0.7628, "step": 7062 }, { "epoch": 0.5344481858423821, "grad_norm": 2.1951372623443604, "learning_rate": 8.718518768005963e-06, "loss": 0.8498, "step": 7063 }, { "epoch": 0.5345238545647156, "grad_norm": 3.0464980602264404, "learning_rate": 8.716267661504437e-06, "loss": 0.591, "step": 7064 }, { "epoch": 0.5345995232870493, "grad_norm": 2.5231964588165283, "learning_rate": 8.714016572771154e-06, "loss": 0.6656, "step": 7065 }, { "epoch": 0.534675192009383, "grad_norm": 1.8425929546356201, "learning_rate": 8.711765501947074e-06, "loss": 0.608, "step": 7066 }, { "epoch": 0.5347508607317165, "grad_norm": 2.2831332683563232, "learning_rate": 8.709514449173173e-06, "loss": 0.691, "step": 7067 }, { "epoch": 0.5348265294540502, "grad_norm": 1.8244147300720215, "learning_rate": 8.707263414590416e-06, "loss": 0.6913, "step": 7068 }, { "epoch": 0.5349021981763837, "grad_norm": 1.8097063302993774, "learning_rate": 8.705012398339768e-06, "loss": 0.6704, "step": 7069 }, { "epoch": 0.5349778668987174, "grad_norm": 1.9643720388412476, "learning_rate": 8.7027614005622e-06, "loss": 0.5699, "step": 7070 }, { "epoch": 0.5350535356210511, "grad_norm": 2.7243692874908447, "learning_rate": 8.700510421398676e-06, "loss": 0.7782, "step": 7071 }, { "epoch": 0.5351292043433846, "grad_norm": 1.7081341743469238, "learning_rate": 8.698259460990155e-06, "loss": 0.7373, "step": 7072 }, { "epoch": 0.5352048730657183, "grad_norm": 1.9497652053833008, "learning_rate": 8.696008519477607e-06, "loss": 0.5009, "step": 7073 }, { "epoch": 0.535280541788052, "grad_norm": 2.6480894088745117, "learning_rate": 8.693757597001985e-06, "loss": 0.7564, "step": 7074 }, { "epoch": 0.5353562105103855, "grad_norm": 1.7461856603622437, "learning_rate": 8.691506693704252e-06, "loss": 0.6427, "step": 7075 }, { "epoch": 0.5354318792327192, "grad_norm": 2.0621261596679688, "learning_rate": 8.68925580972537e-06, "loss": 0.7876, "step": 7076 }, { "epoch": 0.5355075479550527, "grad_norm": 2.9666640758514404, "learning_rate": 8.687004945206293e-06, "loss": 0.6868, "step": 7077 }, { "epoch": 0.5355832166773864, "grad_norm": 1.9791302680969238, "learning_rate": 8.68475410028798e-06, "loss": 0.7977, "step": 7078 }, { "epoch": 0.5356588853997201, "grad_norm": 2.7180376052856445, "learning_rate": 8.682503275111385e-06, "loss": 0.8495, "step": 7079 }, { "epoch": 0.5357345541220536, "grad_norm": 2.0174508094787598, "learning_rate": 8.680252469817459e-06, "loss": 0.7696, "step": 7080 }, { "epoch": 0.5358102228443873, "grad_norm": 2.2519609928131104, "learning_rate": 8.678001684547159e-06, "loss": 0.6735, "step": 7081 }, { "epoch": 0.535885891566721, "grad_norm": 2.086402654647827, "learning_rate": 8.675750919441436e-06, "loss": 0.7296, "step": 7082 }, { "epoch": 0.5359615602890545, "grad_norm": 2.2186553478240967, "learning_rate": 8.67350017464124e-06, "loss": 0.7418, "step": 7083 }, { "epoch": 0.5360372290113882, "grad_norm": 2.314690589904785, "learning_rate": 8.671249450287517e-06, "loss": 0.8358, "step": 7084 }, { "epoch": 0.5361128977337217, "grad_norm": 2.289297580718994, "learning_rate": 8.668998746521215e-06, "loss": 0.8058, "step": 7085 }, { "epoch": 0.5361885664560554, "grad_norm": 2.941833972930908, "learning_rate": 8.666748063483284e-06, "loss": 0.7801, "step": 7086 }, { "epoch": 0.5362642351783891, "grad_norm": 2.7016420364379883, "learning_rate": 8.66449740131467e-06, "loss": 0.6243, "step": 7087 }, { "epoch": 0.5363399039007226, "grad_norm": 2.9800827503204346, "learning_rate": 8.66224676015631e-06, "loss": 0.8864, "step": 7088 }, { "epoch": 0.5364155726230563, "grad_norm": 2.015385389328003, "learning_rate": 8.659996140149154e-06, "loss": 0.7499, "step": 7089 }, { "epoch": 0.5364912413453898, "grad_norm": 2.1981661319732666, "learning_rate": 8.657745541434134e-06, "loss": 0.6763, "step": 7090 }, { "epoch": 0.5365669100677235, "grad_norm": 1.9867689609527588, "learning_rate": 8.655494964152199e-06, "loss": 0.6845, "step": 7091 }, { "epoch": 0.5366425787900572, "grad_norm": 2.070387125015259, "learning_rate": 8.653244408444284e-06, "loss": 0.7047, "step": 7092 }, { "epoch": 0.5367182475123907, "grad_norm": 1.7507538795471191, "learning_rate": 8.650993874451324e-06, "loss": 0.9036, "step": 7093 }, { "epoch": 0.5367939162347244, "grad_norm": 2.0866281986236572, "learning_rate": 8.648743362314259e-06, "loss": 0.7097, "step": 7094 }, { "epoch": 0.536869584957058, "grad_norm": 3.4941627979278564, "learning_rate": 8.646492872174018e-06, "loss": 0.6882, "step": 7095 }, { "epoch": 0.5369452536793916, "grad_norm": 1.8780581951141357, "learning_rate": 8.644242404171536e-06, "loss": 0.7404, "step": 7096 }, { "epoch": 0.5370209224017253, "grad_norm": 1.995606541633606, "learning_rate": 8.641991958447748e-06, "loss": 0.7844, "step": 7097 }, { "epoch": 0.5370965911240588, "grad_norm": 2.5743753910064697, "learning_rate": 8.63974153514358e-06, "loss": 0.7448, "step": 7098 }, { "epoch": 0.5371722598463925, "grad_norm": 2.6984145641326904, "learning_rate": 8.637491134399965e-06, "loss": 0.7314, "step": 7099 }, { "epoch": 0.5372479285687262, "grad_norm": 2.029510021209717, "learning_rate": 8.63524075635782e-06, "loss": 0.6158, "step": 7100 }, { "epoch": 0.5373235972910597, "grad_norm": 2.0353198051452637, "learning_rate": 8.632990401158086e-06, "loss": 0.6718, "step": 7101 }, { "epoch": 0.5373992660133934, "grad_norm": 1.9957705736160278, "learning_rate": 8.630740068941678e-06, "loss": 0.7376, "step": 7102 }, { "epoch": 0.5374749347357269, "grad_norm": 2.259077787399292, "learning_rate": 8.628489759849522e-06, "loss": 0.5768, "step": 7103 }, { "epoch": 0.5375506034580606, "grad_norm": 2.07145619392395, "learning_rate": 8.626239474022538e-06, "loss": 0.591, "step": 7104 }, { "epoch": 0.5376262721803943, "grad_norm": 1.6986279487609863, "learning_rate": 8.623989211601645e-06, "loss": 0.9477, "step": 7105 }, { "epoch": 0.5377019409027278, "grad_norm": 2.296743392944336, "learning_rate": 8.621738972727768e-06, "loss": 0.6912, "step": 7106 }, { "epoch": 0.5377776096250615, "grad_norm": 9.655526161193848, "learning_rate": 8.619488757541817e-06, "loss": 0.678, "step": 7107 }, { "epoch": 0.5378532783473952, "grad_norm": 1.8935918807983398, "learning_rate": 8.617238566184713e-06, "loss": 0.7756, "step": 7108 }, { "epoch": 0.5379289470697287, "grad_norm": 2.278226137161255, "learning_rate": 8.614988398797366e-06, "loss": 0.7101, "step": 7109 }, { "epoch": 0.5380046157920624, "grad_norm": 2.2196240425109863, "learning_rate": 8.612738255520689e-06, "loss": 0.7687, "step": 7110 }, { "epoch": 0.5380802845143959, "grad_norm": 2.080031156539917, "learning_rate": 8.610488136495599e-06, "loss": 0.7437, "step": 7111 }, { "epoch": 0.5381559532367296, "grad_norm": 1.6355682611465454, "learning_rate": 8.608238041863e-06, "loss": 0.6928, "step": 7112 }, { "epoch": 0.5382316219590633, "grad_norm": 2.384467124938965, "learning_rate": 8.605987971763803e-06, "loss": 0.788, "step": 7113 }, { "epoch": 0.5383072906813968, "grad_norm": 2.125551223754883, "learning_rate": 8.603737926338912e-06, "loss": 0.6659, "step": 7114 }, { "epoch": 0.5383829594037305, "grad_norm": 2.0060691833496094, "learning_rate": 8.601487905729235e-06, "loss": 0.6208, "step": 7115 }, { "epoch": 0.538458628126064, "grad_norm": 2.356447458267212, "learning_rate": 8.59923791007567e-06, "loss": 0.7137, "step": 7116 }, { "epoch": 0.5385342968483977, "grad_norm": 2.69724178314209, "learning_rate": 8.596987939519128e-06, "loss": 0.7895, "step": 7117 }, { "epoch": 0.5386099655707314, "grad_norm": 2.0127129554748535, "learning_rate": 8.594737994200504e-06, "loss": 0.5838, "step": 7118 }, { "epoch": 0.5386856342930649, "grad_norm": 2.2311911582946777, "learning_rate": 8.592488074260698e-06, "loss": 0.7257, "step": 7119 }, { "epoch": 0.5387613030153986, "grad_norm": 2.5712006092071533, "learning_rate": 8.590238179840606e-06, "loss": 0.7806, "step": 7120 }, { "epoch": 0.5388369717377323, "grad_norm": 2.380955219268799, "learning_rate": 8.587988311081122e-06, "loss": 0.7459, "step": 7121 }, { "epoch": 0.5389126404600658, "grad_norm": 2.549931764602661, "learning_rate": 8.585738468123147e-06, "loss": 0.6662, "step": 7122 }, { "epoch": 0.5389883091823995, "grad_norm": 2.1435601711273193, "learning_rate": 8.583488651107566e-06, "loss": 0.7061, "step": 7123 }, { "epoch": 0.539063977904733, "grad_norm": 5.311253070831299, "learning_rate": 8.581238860175276e-06, "loss": 0.8853, "step": 7124 }, { "epoch": 0.5391396466270667, "grad_norm": 2.5951385498046875, "learning_rate": 8.578989095467161e-06, "loss": 0.6598, "step": 7125 }, { "epoch": 0.5392153153494004, "grad_norm": 2.184601068496704, "learning_rate": 8.576739357124107e-06, "loss": 0.6537, "step": 7126 }, { "epoch": 0.5392909840717339, "grad_norm": 2.4066100120544434, "learning_rate": 8.57448964528701e-06, "loss": 0.7578, "step": 7127 }, { "epoch": 0.5393666527940676, "grad_norm": 3.0579781532287598, "learning_rate": 8.572239960096742e-06, "loss": 0.7298, "step": 7128 }, { "epoch": 0.5394423215164011, "grad_norm": 1.7824645042419434, "learning_rate": 8.569990301694196e-06, "loss": 0.5753, "step": 7129 }, { "epoch": 0.5395179902387348, "grad_norm": 2.685590982437134, "learning_rate": 8.567740670220246e-06, "loss": 0.7501, "step": 7130 }, { "epoch": 0.5395936589610685, "grad_norm": 2.148221254348755, "learning_rate": 8.565491065815771e-06, "loss": 0.5976, "step": 7131 }, { "epoch": 0.539669327683402, "grad_norm": 2.3486313819885254, "learning_rate": 8.563241488621652e-06, "loss": 0.8495, "step": 7132 }, { "epoch": 0.5397449964057357, "grad_norm": 2.0380804538726807, "learning_rate": 8.560991938778767e-06, "loss": 0.6935, "step": 7133 }, { "epoch": 0.5398206651280694, "grad_norm": 2.2446913719177246, "learning_rate": 8.558742416427985e-06, "loss": 0.6925, "step": 7134 }, { "epoch": 0.5398963338504029, "grad_norm": 2.0997154712677, "learning_rate": 8.55649292171018e-06, "loss": 0.5679, "step": 7135 }, { "epoch": 0.5399720025727366, "grad_norm": 2.4768741130828857, "learning_rate": 8.55424345476622e-06, "loss": 0.7029, "step": 7136 }, { "epoch": 0.5400476712950701, "grad_norm": 3.763378858566284, "learning_rate": 8.551994015736978e-06, "loss": 0.6053, "step": 7137 }, { "epoch": 0.5401233400174038, "grad_norm": 1.940919280052185, "learning_rate": 8.549744604763322e-06, "loss": 0.6981, "step": 7138 }, { "epoch": 0.5401990087397375, "grad_norm": 2.1327764987945557, "learning_rate": 8.547495221986114e-06, "loss": 0.8293, "step": 7139 }, { "epoch": 0.540274677462071, "grad_norm": 2.6629951000213623, "learning_rate": 8.54524586754622e-06, "loss": 0.8285, "step": 7140 }, { "epoch": 0.5403503461844047, "grad_norm": 2.080131769180298, "learning_rate": 8.542996541584498e-06, "loss": 0.7531, "step": 7141 }, { "epoch": 0.5404260149067383, "grad_norm": 1.9729998111724854, "learning_rate": 8.540747244241811e-06, "loss": 0.9104, "step": 7142 }, { "epoch": 0.5405016836290719, "grad_norm": 1.957764744758606, "learning_rate": 8.53849797565902e-06, "loss": 0.836, "step": 7143 }, { "epoch": 0.5405773523514056, "grad_norm": 2.3214311599731445, "learning_rate": 8.536248735976976e-06, "loss": 0.6771, "step": 7144 }, { "epoch": 0.5406530210737391, "grad_norm": 1.9388372898101807, "learning_rate": 8.533999525336536e-06, "loss": 0.6914, "step": 7145 }, { "epoch": 0.5407286897960728, "grad_norm": 2.0969862937927246, "learning_rate": 8.531750343878551e-06, "loss": 0.7405, "step": 7146 }, { "epoch": 0.5408043585184065, "grad_norm": 2.0790417194366455, "learning_rate": 8.529501191743876e-06, "loss": 0.7561, "step": 7147 }, { "epoch": 0.54088002724074, "grad_norm": 1.9571138620376587, "learning_rate": 8.527252069073359e-06, "loss": 0.8044, "step": 7148 }, { "epoch": 0.5409556959630737, "grad_norm": 2.024289131164551, "learning_rate": 8.525002976007848e-06, "loss": 0.5776, "step": 7149 }, { "epoch": 0.5410313646854072, "grad_norm": 2.4422504901885986, "learning_rate": 8.522753912688184e-06, "loss": 0.7017, "step": 7150 }, { "epoch": 0.5411070334077409, "grad_norm": 2.0276260375976562, "learning_rate": 8.520504879255214e-06, "loss": 0.5853, "step": 7151 }, { "epoch": 0.5411827021300746, "grad_norm": 2.331113576889038, "learning_rate": 8.51825587584978e-06, "loss": 0.6115, "step": 7152 }, { "epoch": 0.5412583708524081, "grad_norm": 1.7973954677581787, "learning_rate": 8.516006902612721e-06, "loss": 0.5986, "step": 7153 }, { "epoch": 0.5413340395747418, "grad_norm": 2.5584282875061035, "learning_rate": 8.513757959684877e-06, "loss": 0.7094, "step": 7154 }, { "epoch": 0.5414097082970754, "grad_norm": 2.177795886993408, "learning_rate": 8.51150904720708e-06, "loss": 0.6867, "step": 7155 }, { "epoch": 0.541485377019409, "grad_norm": 2.400580644607544, "learning_rate": 8.509260165320168e-06, "loss": 0.6919, "step": 7156 }, { "epoch": 0.5415610457417427, "grad_norm": 1.9860256910324097, "learning_rate": 8.50701131416497e-06, "loss": 0.6899, "step": 7157 }, { "epoch": 0.5416367144640762, "grad_norm": 1.862237572669983, "learning_rate": 8.504762493882317e-06, "loss": 0.6249, "step": 7158 }, { "epoch": 0.5417123831864099, "grad_norm": 2.687009811401367, "learning_rate": 8.50251370461304e-06, "loss": 0.6726, "step": 7159 }, { "epoch": 0.5417880519087436, "grad_norm": 1.91978120803833, "learning_rate": 8.500264946497967e-06, "loss": 0.5824, "step": 7160 }, { "epoch": 0.5418637206310771, "grad_norm": 2.6464669704437256, "learning_rate": 8.498016219677915e-06, "loss": 0.6439, "step": 7161 }, { "epoch": 0.5419393893534108, "grad_norm": 2.026582717895508, "learning_rate": 8.49576752429371e-06, "loss": 0.76, "step": 7162 }, { "epoch": 0.5420150580757443, "grad_norm": 2.627919912338257, "learning_rate": 8.493518860486177e-06, "loss": 0.7907, "step": 7163 }, { "epoch": 0.542090726798078, "grad_norm": 2.2149717807769775, "learning_rate": 8.49127022839613e-06, "loss": 0.6007, "step": 7164 }, { "epoch": 0.5421663955204117, "grad_norm": 1.838017225265503, "learning_rate": 8.489021628164388e-06, "loss": 0.6642, "step": 7165 }, { "epoch": 0.5422420642427452, "grad_norm": 2.120692014694214, "learning_rate": 8.486773059931763e-06, "loss": 0.5709, "step": 7166 }, { "epoch": 0.5423177329650789, "grad_norm": 2.953522205352783, "learning_rate": 8.484524523839067e-06, "loss": 0.6147, "step": 7167 }, { "epoch": 0.5423934016874125, "grad_norm": 1.9977935552597046, "learning_rate": 8.482276020027114e-06, "loss": 0.7301, "step": 7168 }, { "epoch": 0.5424690704097461, "grad_norm": 2.271860361099243, "learning_rate": 8.480027548636714e-06, "loss": 0.6553, "step": 7169 }, { "epoch": 0.5425447391320798, "grad_norm": 1.9562822580337524, "learning_rate": 8.477779109808668e-06, "loss": 0.638, "step": 7170 }, { "epoch": 0.5426204078544133, "grad_norm": 2.308135509490967, "learning_rate": 8.475530703683784e-06, "loss": 0.781, "step": 7171 }, { "epoch": 0.542696076576747, "grad_norm": 1.8787046670913696, "learning_rate": 8.47328233040286e-06, "loss": 0.6161, "step": 7172 }, { "epoch": 0.5427717452990807, "grad_norm": 2.3262102603912354, "learning_rate": 8.471033990106703e-06, "loss": 0.6771, "step": 7173 }, { "epoch": 0.5428474140214142, "grad_norm": 2.1242523193359375, "learning_rate": 8.46878568293611e-06, "loss": 0.8264, "step": 7174 }, { "epoch": 0.5429230827437479, "grad_norm": 3.241060256958008, "learning_rate": 8.466537409031875e-06, "loss": 0.5887, "step": 7175 }, { "epoch": 0.5429987514660815, "grad_norm": 1.77642023563385, "learning_rate": 8.464289168534794e-06, "loss": 0.6155, "step": 7176 }, { "epoch": 0.5430744201884151, "grad_norm": 2.4458696842193604, "learning_rate": 8.462040961585655e-06, "loss": 0.6388, "step": 7177 }, { "epoch": 0.5431500889107488, "grad_norm": 1.5858855247497559, "learning_rate": 8.459792788325251e-06, "loss": 0.6843, "step": 7178 }, { "epoch": 0.5432257576330823, "grad_norm": 2.241321325302124, "learning_rate": 8.457544648894372e-06, "loss": 0.6885, "step": 7179 }, { "epoch": 0.543301426355416, "grad_norm": 1.9236705303192139, "learning_rate": 8.4552965434338e-06, "loss": 0.6897, "step": 7180 }, { "epoch": 0.5433770950777496, "grad_norm": 2.625894069671631, "learning_rate": 8.453048472084323e-06, "loss": 0.7572, "step": 7181 }, { "epoch": 0.5434527638000832, "grad_norm": 1.9380083084106445, "learning_rate": 8.450800434986716e-06, "loss": 0.8159, "step": 7182 }, { "epoch": 0.5435284325224169, "grad_norm": 2.195018768310547, "learning_rate": 8.448552432281763e-06, "loss": 0.7674, "step": 7183 }, { "epoch": 0.5436041012447504, "grad_norm": 3.2216532230377197, "learning_rate": 8.446304464110243e-06, "loss": 0.7002, "step": 7184 }, { "epoch": 0.5436797699670841, "grad_norm": 2.094017505645752, "learning_rate": 8.444056530612926e-06, "loss": 0.6522, "step": 7185 }, { "epoch": 0.5437554386894178, "grad_norm": 2.447477340698242, "learning_rate": 8.441808631930588e-06, "loss": 0.8029, "step": 7186 }, { "epoch": 0.5438311074117513, "grad_norm": 1.7024062871932983, "learning_rate": 8.439560768203996e-06, "loss": 0.6894, "step": 7187 }, { "epoch": 0.543906776134085, "grad_norm": 1.634425163269043, "learning_rate": 8.437312939573925e-06, "loss": 0.6593, "step": 7188 }, { "epoch": 0.5439824448564186, "grad_norm": 2.105696678161621, "learning_rate": 8.435065146181135e-06, "loss": 0.7249, "step": 7189 }, { "epoch": 0.5440581135787522, "grad_norm": 2.9935972690582275, "learning_rate": 8.432817388166395e-06, "loss": 0.632, "step": 7190 }, { "epoch": 0.5441337823010859, "grad_norm": 2.017632007598877, "learning_rate": 8.430569665670464e-06, "loss": 0.8673, "step": 7191 }, { "epoch": 0.5442094510234194, "grad_norm": 1.9083147048950195, "learning_rate": 8.428321978834104e-06, "loss": 0.6792, "step": 7192 }, { "epoch": 0.5442851197457531, "grad_norm": 2.1017110347747803, "learning_rate": 8.426074327798067e-06, "loss": 0.6695, "step": 7193 }, { "epoch": 0.5443607884680867, "grad_norm": 1.9872255325317383, "learning_rate": 8.423826712703114e-06, "loss": 0.6914, "step": 7194 }, { "epoch": 0.5444364571904203, "grad_norm": 2.61326003074646, "learning_rate": 8.421579133689997e-06, "loss": 0.6831, "step": 7195 }, { "epoch": 0.544512125912754, "grad_norm": 2.0831680297851562, "learning_rate": 8.419331590899463e-06, "loss": 0.662, "step": 7196 }, { "epoch": 0.5445877946350876, "grad_norm": 1.7519179582595825, "learning_rate": 8.417084084472267e-06, "loss": 0.7071, "step": 7197 }, { "epoch": 0.5446634633574212, "grad_norm": 1.9903972148895264, "learning_rate": 8.414836614549145e-06, "loss": 0.7907, "step": 7198 }, { "epoch": 0.5447391320797549, "grad_norm": 3.3325815200805664, "learning_rate": 8.412589181270849e-06, "loss": 0.6182, "step": 7199 }, { "epoch": 0.5448148008020884, "grad_norm": 2.1972944736480713, "learning_rate": 8.410341784778121e-06, "loss": 0.8044, "step": 7200 }, { "epoch": 0.5448904695244221, "grad_norm": 2.348930835723877, "learning_rate": 8.408094425211695e-06, "loss": 0.7072, "step": 7201 }, { "epoch": 0.5449661382467557, "grad_norm": 2.407869338989258, "learning_rate": 8.405847102712313e-06, "loss": 0.7567, "step": 7202 }, { "epoch": 0.5450418069690893, "grad_norm": 2.114015579223633, "learning_rate": 8.403599817420702e-06, "loss": 0.8147, "step": 7203 }, { "epoch": 0.545117475691423, "grad_norm": 2.1249501705169678, "learning_rate": 8.401352569477605e-06, "loss": 0.6118, "step": 7204 }, { "epoch": 0.5451931444137565, "grad_norm": 2.1897776126861572, "learning_rate": 8.399105359023743e-06, "loss": 0.6311, "step": 7205 }, { "epoch": 0.5452688131360902, "grad_norm": 2.278143882751465, "learning_rate": 8.39685818619985e-06, "loss": 0.8238, "step": 7206 }, { "epoch": 0.5453444818584238, "grad_norm": 1.9254010915756226, "learning_rate": 8.394611051146647e-06, "loss": 0.6331, "step": 7207 }, { "epoch": 0.5454201505807574, "grad_norm": 1.6860229969024658, "learning_rate": 8.392363954004855e-06, "loss": 0.7736, "step": 7208 }, { "epoch": 0.5454958193030911, "grad_norm": 2.540018081665039, "learning_rate": 8.390116894915201e-06, "loss": 0.6795, "step": 7209 }, { "epoch": 0.5455714880254247, "grad_norm": 2.2471187114715576, "learning_rate": 8.387869874018399e-06, "loss": 0.7135, "step": 7210 }, { "epoch": 0.5456471567477583, "grad_norm": 2.69797682762146, "learning_rate": 8.385622891455167e-06, "loss": 0.6507, "step": 7211 }, { "epoch": 0.545722825470092, "grad_norm": 3.0235390663146973, "learning_rate": 8.383375947366214e-06, "loss": 0.6393, "step": 7212 }, { "epoch": 0.5457984941924255, "grad_norm": 2.0341522693634033, "learning_rate": 8.381129041892252e-06, "loss": 0.8328, "step": 7213 }, { "epoch": 0.5458741629147592, "grad_norm": 1.5147240161895752, "learning_rate": 8.378882175173996e-06, "loss": 0.5524, "step": 7214 }, { "epoch": 0.5459498316370928, "grad_norm": 2.072648286819458, "learning_rate": 8.376635347352143e-06, "loss": 0.6394, "step": 7215 }, { "epoch": 0.5460255003594264, "grad_norm": 2.308685064315796, "learning_rate": 8.374388558567405e-06, "loss": 0.7882, "step": 7216 }, { "epoch": 0.5461011690817601, "grad_norm": 2.836092472076416, "learning_rate": 8.372141808960474e-06, "loss": 0.6588, "step": 7217 }, { "epoch": 0.5461768378040937, "grad_norm": 2.7137436866760254, "learning_rate": 8.369895098672053e-06, "loss": 0.6675, "step": 7218 }, { "epoch": 0.5462525065264273, "grad_norm": 1.9144799709320068, "learning_rate": 8.367648427842842e-06, "loss": 0.8562, "step": 7219 }, { "epoch": 0.5463281752487609, "grad_norm": 2.2741446495056152, "learning_rate": 8.365401796613534e-06, "loss": 0.6291, "step": 7220 }, { "epoch": 0.5464038439710945, "grad_norm": 2.282801866531372, "learning_rate": 8.363155205124815e-06, "loss": 0.6835, "step": 7221 }, { "epoch": 0.5464795126934282, "grad_norm": 2.0239760875701904, "learning_rate": 8.36090865351738e-06, "loss": 0.606, "step": 7222 }, { "epoch": 0.5465551814157618, "grad_norm": 2.0465428829193115, "learning_rate": 8.358662141931906e-06, "loss": 0.8214, "step": 7223 }, { "epoch": 0.5466308501380954, "grad_norm": 1.8509844541549683, "learning_rate": 8.356415670509085e-06, "loss": 0.6769, "step": 7224 }, { "epoch": 0.5467065188604291, "grad_norm": 2.0568501949310303, "learning_rate": 8.3541692393896e-06, "loss": 0.6617, "step": 7225 }, { "epoch": 0.5467821875827626, "grad_norm": 2.1013340950012207, "learning_rate": 8.351922848714125e-06, "loss": 0.7638, "step": 7226 }, { "epoch": 0.5468578563050963, "grad_norm": 2.2861831188201904, "learning_rate": 8.349676498623337e-06, "loss": 0.743, "step": 7227 }, { "epoch": 0.5469335250274299, "grad_norm": 2.278883457183838, "learning_rate": 8.347430189257907e-06, "loss": 0.6331, "step": 7228 }, { "epoch": 0.5470091937497635, "grad_norm": 1.7488274574279785, "learning_rate": 8.345183920758512e-06, "loss": 0.7192, "step": 7229 }, { "epoch": 0.5470848624720972, "grad_norm": 1.8391131162643433, "learning_rate": 8.342937693265819e-06, "loss": 0.8068, "step": 7230 }, { "epoch": 0.5471605311944308, "grad_norm": 2.0929038524627686, "learning_rate": 8.340691506920491e-06, "loss": 0.6009, "step": 7231 }, { "epoch": 0.5472361999167644, "grad_norm": 2.610123872756958, "learning_rate": 8.338445361863193e-06, "loss": 0.841, "step": 7232 }, { "epoch": 0.547311868639098, "grad_norm": 2.2190845012664795, "learning_rate": 8.336199258234588e-06, "loss": 0.7216, "step": 7233 }, { "epoch": 0.5473875373614316, "grad_norm": 1.9688743352890015, "learning_rate": 8.33395319617533e-06, "loss": 0.63, "step": 7234 }, { "epoch": 0.5474632060837653, "grad_norm": 2.508888006210327, "learning_rate": 8.331707175826077e-06, "loss": 0.6036, "step": 7235 }, { "epoch": 0.5475388748060989, "grad_norm": 2.6636786460876465, "learning_rate": 8.329461197327484e-06, "loss": 0.7058, "step": 7236 }, { "epoch": 0.5476145435284325, "grad_norm": 2.2972769737243652, "learning_rate": 8.3272152608202e-06, "loss": 0.6575, "step": 7237 }, { "epoch": 0.5476902122507662, "grad_norm": 2.279362440109253, "learning_rate": 8.324969366444874e-06, "loss": 0.6517, "step": 7238 }, { "epoch": 0.5477658809730998, "grad_norm": 2.3690998554229736, "learning_rate": 8.322723514342143e-06, "loss": 0.6411, "step": 7239 }, { "epoch": 0.5478415496954334, "grad_norm": 1.700605034828186, "learning_rate": 8.320477704652662e-06, "loss": 0.7621, "step": 7240 }, { "epoch": 0.547917218417767, "grad_norm": 2.426114320755005, "learning_rate": 8.318231937517063e-06, "loss": 0.6546, "step": 7241 }, { "epoch": 0.5479928871401006, "grad_norm": 2.4113972187042236, "learning_rate": 8.315986213075986e-06, "loss": 0.6745, "step": 7242 }, { "epoch": 0.5480685558624343, "grad_norm": 2.2647318840026855, "learning_rate": 8.313740531470065e-06, "loss": 0.8873, "step": 7243 }, { "epoch": 0.5481442245847679, "grad_norm": 2.242077112197876, "learning_rate": 8.311494892839929e-06, "loss": 0.737, "step": 7244 }, { "epoch": 0.5482198933071015, "grad_norm": 3.1310784816741943, "learning_rate": 8.30924929732621e-06, "loss": 0.8376, "step": 7245 }, { "epoch": 0.5482955620294351, "grad_norm": 2.1693949699401855, "learning_rate": 8.307003745069537e-06, "loss": 0.7175, "step": 7246 }, { "epoch": 0.5483712307517687, "grad_norm": 2.6031949520111084, "learning_rate": 8.30475823621053e-06, "loss": 0.5512, "step": 7247 }, { "epoch": 0.5484468994741024, "grad_norm": 2.4191761016845703, "learning_rate": 8.30251277088981e-06, "loss": 0.6359, "step": 7248 }, { "epoch": 0.548522568196436, "grad_norm": 1.9725650548934937, "learning_rate": 8.300267349247993e-06, "loss": 0.7108, "step": 7249 }, { "epoch": 0.5485982369187696, "grad_norm": 2.7272510528564453, "learning_rate": 8.298021971425704e-06, "loss": 0.6975, "step": 7250 }, { "epoch": 0.5486739056411033, "grad_norm": 2.1538898944854736, "learning_rate": 8.295776637563546e-06, "loss": 0.8058, "step": 7251 }, { "epoch": 0.5487495743634369, "grad_norm": 2.162531852722168, "learning_rate": 8.293531347802136e-06, "loss": 0.7727, "step": 7252 }, { "epoch": 0.5488252430857705, "grad_norm": 2.4503231048583984, "learning_rate": 8.291286102282076e-06, "loss": 0.7097, "step": 7253 }, { "epoch": 0.5489009118081041, "grad_norm": 2.264075994491577, "learning_rate": 8.289040901143969e-06, "loss": 0.5951, "step": 7254 }, { "epoch": 0.5489765805304377, "grad_norm": 2.111943244934082, "learning_rate": 8.286795744528425e-06, "loss": 0.6764, "step": 7255 }, { "epoch": 0.5490522492527714, "grad_norm": 1.6367228031158447, "learning_rate": 8.284550632576037e-06, "loss": 0.599, "step": 7256 }, { "epoch": 0.549127917975105, "grad_norm": 2.353544235229492, "learning_rate": 8.282305565427402e-06, "loss": 0.6703, "step": 7257 }, { "epoch": 0.5492035866974386, "grad_norm": 2.2156119346618652, "learning_rate": 8.280060543223115e-06, "loss": 0.6398, "step": 7258 }, { "epoch": 0.5492792554197722, "grad_norm": 2.4467849731445312, "learning_rate": 8.27781556610376e-06, "loss": 0.705, "step": 7259 }, { "epoch": 0.5493549241421058, "grad_norm": 1.8530616760253906, "learning_rate": 8.275570634209936e-06, "loss": 0.7576, "step": 7260 }, { "epoch": 0.5494305928644395, "grad_norm": 2.1063315868377686, "learning_rate": 8.273325747682223e-06, "loss": 0.6817, "step": 7261 }, { "epoch": 0.5495062615867731, "grad_norm": 3.19496488571167, "learning_rate": 8.271080906661197e-06, "loss": 0.7781, "step": 7262 }, { "epoch": 0.5495819303091067, "grad_norm": 1.902632713317871, "learning_rate": 8.268836111287447e-06, "loss": 0.6575, "step": 7263 }, { "epoch": 0.5496575990314404, "grad_norm": 1.7759108543395996, "learning_rate": 8.26659136170154e-06, "loss": 0.6641, "step": 7264 }, { "epoch": 0.549733267753774, "grad_norm": 1.6828055381774902, "learning_rate": 8.264346658044056e-06, "loss": 0.6752, "step": 7265 }, { "epoch": 0.5498089364761076, "grad_norm": 2.365657329559326, "learning_rate": 8.262102000455565e-06, "loss": 0.7019, "step": 7266 }, { "epoch": 0.5498846051984412, "grad_norm": 1.8660743236541748, "learning_rate": 8.259857389076632e-06, "loss": 0.6905, "step": 7267 }, { "epoch": 0.5499602739207748, "grad_norm": 2.5874767303466797, "learning_rate": 8.257612824047825e-06, "loss": 0.7434, "step": 7268 }, { "epoch": 0.5500359426431085, "grad_norm": 1.9300464391708374, "learning_rate": 8.255368305509703e-06, "loss": 0.7444, "step": 7269 }, { "epoch": 0.5501116113654421, "grad_norm": 2.1249427795410156, "learning_rate": 8.253123833602823e-06, "loss": 0.675, "step": 7270 }, { "epoch": 0.5501872800877757, "grad_norm": 2.6504623889923096, "learning_rate": 8.25087940846775e-06, "loss": 0.6052, "step": 7271 }, { "epoch": 0.5502629488101093, "grad_norm": 1.6666854619979858, "learning_rate": 8.248635030245026e-06, "loss": 0.9488, "step": 7272 }, { "epoch": 0.550338617532443, "grad_norm": 1.920323133468628, "learning_rate": 8.246390699075211e-06, "loss": 0.7232, "step": 7273 }, { "epoch": 0.5504142862547766, "grad_norm": 1.9666804075241089, "learning_rate": 8.244146415098847e-06, "loss": 0.6699, "step": 7274 }, { "epoch": 0.5504899549771102, "grad_norm": 2.455787181854248, "learning_rate": 8.241902178456474e-06, "loss": 0.7122, "step": 7275 }, { "epoch": 0.5505656236994438, "grad_norm": 2.3213722705841064, "learning_rate": 8.239657989288643e-06, "loss": 0.8173, "step": 7276 }, { "epoch": 0.5506412924217775, "grad_norm": 1.930747628211975, "learning_rate": 8.23741384773589e-06, "loss": 0.646, "step": 7277 }, { "epoch": 0.5507169611441111, "grad_norm": 2.0072009563446045, "learning_rate": 8.235169753938745e-06, "loss": 0.7275, "step": 7278 }, { "epoch": 0.5507926298664447, "grad_norm": 2.398528814315796, "learning_rate": 8.232925708037748e-06, "loss": 0.6895, "step": 7279 }, { "epoch": 0.5508682985887783, "grad_norm": 2.015350103378296, "learning_rate": 8.230681710173418e-06, "loss": 0.6356, "step": 7280 }, { "epoch": 0.550943967311112, "grad_norm": 2.454782485961914, "learning_rate": 8.22843776048629e-06, "loss": 0.7882, "step": 7281 }, { "epoch": 0.5510196360334456, "grad_norm": 2.29709529876709, "learning_rate": 8.226193859116887e-06, "loss": 0.6594, "step": 7282 }, { "epoch": 0.5510953047557792, "grad_norm": 2.140209197998047, "learning_rate": 8.223950006205725e-06, "loss": 0.6665, "step": 7283 }, { "epoch": 0.5511709734781128, "grad_norm": 1.968939185142517, "learning_rate": 8.221706201893326e-06, "loss": 0.7324, "step": 7284 }, { "epoch": 0.5512466422004464, "grad_norm": 1.8887324333190918, "learning_rate": 8.219462446320199e-06, "loss": 0.5153, "step": 7285 }, { "epoch": 0.5513223109227801, "grad_norm": 2.2595646381378174, "learning_rate": 8.21721873962686e-06, "loss": 0.8726, "step": 7286 }, { "epoch": 0.5513979796451137, "grad_norm": 5.0413899421691895, "learning_rate": 8.214975081953816e-06, "loss": 0.7368, "step": 7287 }, { "epoch": 0.5514736483674473, "grad_norm": 2.2042415142059326, "learning_rate": 8.21273147344157e-06, "loss": 0.6438, "step": 7288 }, { "epoch": 0.5515493170897809, "grad_norm": 2.034449815750122, "learning_rate": 8.210487914230627e-06, "loss": 0.655, "step": 7289 }, { "epoch": 0.5516249858121146, "grad_norm": 2.396224021911621, "learning_rate": 8.208244404461479e-06, "loss": 0.6204, "step": 7290 }, { "epoch": 0.5517006545344482, "grad_norm": 2.4055793285369873, "learning_rate": 8.206000944274634e-06, "loss": 0.7578, "step": 7291 }, { "epoch": 0.5517763232567818, "grad_norm": 2.2324295043945312, "learning_rate": 8.203757533810575e-06, "loss": 0.6712, "step": 7292 }, { "epoch": 0.5518519919791154, "grad_norm": 2.729771614074707, "learning_rate": 8.201514173209797e-06, "loss": 0.7397, "step": 7293 }, { "epoch": 0.551927660701449, "grad_norm": 1.7570246458053589, "learning_rate": 8.199270862612781e-06, "loss": 0.7682, "step": 7294 }, { "epoch": 0.5520033294237827, "grad_norm": 2.058879852294922, "learning_rate": 8.197027602160013e-06, "loss": 0.5722, "step": 7295 }, { "epoch": 0.5520789981461163, "grad_norm": 2.095630645751953, "learning_rate": 8.194784391991977e-06, "loss": 0.7722, "step": 7296 }, { "epoch": 0.5521546668684499, "grad_norm": 2.408841371536255, "learning_rate": 8.192541232249145e-06, "loss": 0.7326, "step": 7297 }, { "epoch": 0.5522303355907835, "grad_norm": 2.27298641204834, "learning_rate": 8.190298123071993e-06, "loss": 0.7144, "step": 7298 }, { "epoch": 0.5523060043131172, "grad_norm": 2.675877809524536, "learning_rate": 8.188055064600991e-06, "loss": 0.874, "step": 7299 }, { "epoch": 0.5523816730354508, "grad_norm": 2.062973976135254, "learning_rate": 8.185812056976605e-06, "loss": 0.6534, "step": 7300 }, { "epoch": 0.5524573417577844, "grad_norm": 2.2476651668548584, "learning_rate": 8.183569100339305e-06, "loss": 0.8483, "step": 7301 }, { "epoch": 0.552533010480118, "grad_norm": 2.406301259994507, "learning_rate": 8.181326194829548e-06, "loss": 0.8172, "step": 7302 }, { "epoch": 0.5526086792024517, "grad_norm": 2.0138354301452637, "learning_rate": 8.179083340587794e-06, "loss": 0.6932, "step": 7303 }, { "epoch": 0.5526843479247853, "grad_norm": 1.9023960828781128, "learning_rate": 8.176840537754493e-06, "loss": 0.643, "step": 7304 }, { "epoch": 0.5527600166471189, "grad_norm": 2.067840576171875, "learning_rate": 8.1745977864701e-06, "loss": 0.8828, "step": 7305 }, { "epoch": 0.5528356853694525, "grad_norm": 2.148322105407715, "learning_rate": 8.172355086875064e-06, "loss": 0.7668, "step": 7306 }, { "epoch": 0.5529113540917862, "grad_norm": 2.0931055545806885, "learning_rate": 8.17011243910983e-06, "loss": 0.6585, "step": 7307 }, { "epoch": 0.5529870228141198, "grad_norm": 1.9692755937576294, "learning_rate": 8.167869843314839e-06, "loss": 0.7746, "step": 7308 }, { "epoch": 0.5530626915364534, "grad_norm": 2.0854835510253906, "learning_rate": 8.165627299630532e-06, "loss": 0.7212, "step": 7309 }, { "epoch": 0.553138360258787, "grad_norm": 2.4114372730255127, "learning_rate": 8.163384808197339e-06, "loss": 0.7573, "step": 7310 }, { "epoch": 0.5532140289811207, "grad_norm": 2.3823318481445312, "learning_rate": 8.161142369155693e-06, "loss": 0.6895, "step": 7311 }, { "epoch": 0.5532896977034543, "grad_norm": 2.2626543045043945, "learning_rate": 8.158899982646032e-06, "loss": 0.7496, "step": 7312 }, { "epoch": 0.5533653664257879, "grad_norm": 2.219508647918701, "learning_rate": 8.156657648808769e-06, "loss": 0.6999, "step": 7313 }, { "epoch": 0.5534410351481215, "grad_norm": 2.2591121196746826, "learning_rate": 8.154415367784335e-06, "loss": 0.7347, "step": 7314 }, { "epoch": 0.5535167038704552, "grad_norm": 2.205310344696045, "learning_rate": 8.152173139713146e-06, "loss": 0.7123, "step": 7315 }, { "epoch": 0.5535923725927888, "grad_norm": 2.087782382965088, "learning_rate": 8.149930964735612e-06, "loss": 0.6367, "step": 7316 }, { "epoch": 0.5536680413151224, "grad_norm": 2.4498722553253174, "learning_rate": 8.147688842992155e-06, "loss": 0.614, "step": 7317 }, { "epoch": 0.553743710037456, "grad_norm": 3.0710363388061523, "learning_rate": 8.14544677462318e-06, "loss": 0.7032, "step": 7318 }, { "epoch": 0.5538193787597896, "grad_norm": 2.3299739360809326, "learning_rate": 8.14320475976909e-06, "loss": 0.854, "step": 7319 }, { "epoch": 0.5538950474821233, "grad_norm": 1.8151921033859253, "learning_rate": 8.140962798570289e-06, "loss": 0.5846, "step": 7320 }, { "epoch": 0.5539707162044569, "grad_norm": 2.084280014038086, "learning_rate": 8.138720891167174e-06, "loss": 0.713, "step": 7321 }, { "epoch": 0.5540463849267905, "grad_norm": 2.037383794784546, "learning_rate": 8.136479037700146e-06, "loss": 0.6499, "step": 7322 }, { "epoch": 0.5541220536491241, "grad_norm": 2.4484384059906006, "learning_rate": 8.134237238309593e-06, "loss": 0.7387, "step": 7323 }, { "epoch": 0.5541977223714578, "grad_norm": 2.2470922470092773, "learning_rate": 8.131995493135903e-06, "loss": 0.7522, "step": 7324 }, { "epoch": 0.5542733910937914, "grad_norm": 2.310148239135742, "learning_rate": 8.129753802319467e-06, "loss": 0.7125, "step": 7325 }, { "epoch": 0.554349059816125, "grad_norm": 1.8426295518875122, "learning_rate": 8.127512166000656e-06, "loss": 0.8074, "step": 7326 }, { "epoch": 0.5544247285384586, "grad_norm": 1.8981022834777832, "learning_rate": 8.125270584319857e-06, "loss": 0.5748, "step": 7327 }, { "epoch": 0.5545003972607923, "grad_norm": 2.154419183731079, "learning_rate": 8.123029057417446e-06, "loss": 0.7373, "step": 7328 }, { "epoch": 0.5545760659831259, "grad_norm": 1.6180402040481567, "learning_rate": 8.12078758543379e-06, "loss": 0.6536, "step": 7329 }, { "epoch": 0.5546517347054595, "grad_norm": 2.104201555252075, "learning_rate": 8.11854616850926e-06, "loss": 0.7068, "step": 7330 }, { "epoch": 0.5547274034277931, "grad_norm": 1.8646724224090576, "learning_rate": 8.116304806784218e-06, "loss": 0.5606, "step": 7331 }, { "epoch": 0.5548030721501267, "grad_norm": 2.118800163269043, "learning_rate": 8.11406350039903e-06, "loss": 0.7093, "step": 7332 }, { "epoch": 0.5548787408724604, "grad_norm": 2.5664494037628174, "learning_rate": 8.11182224949405e-06, "loss": 0.7353, "step": 7333 }, { "epoch": 0.554954409594794, "grad_norm": 2.0426931381225586, "learning_rate": 8.109581054209633e-06, "loss": 0.6415, "step": 7334 }, { "epoch": 0.5550300783171276, "grad_norm": 1.8999990224838257, "learning_rate": 8.10733991468613e-06, "loss": 0.6621, "step": 7335 }, { "epoch": 0.5551057470394613, "grad_norm": 2.4535303115844727, "learning_rate": 8.105098831063887e-06, "loss": 0.6392, "step": 7336 }, { "epoch": 0.5551814157617949, "grad_norm": 1.9093875885009766, "learning_rate": 8.102857803483254e-06, "loss": 0.6715, "step": 7337 }, { "epoch": 0.5552570844841285, "grad_norm": 1.9944705963134766, "learning_rate": 8.100616832084564e-06, "loss": 0.6541, "step": 7338 }, { "epoch": 0.5553327532064621, "grad_norm": 1.7408812046051025, "learning_rate": 8.09837591700816e-06, "loss": 0.7232, "step": 7339 }, { "epoch": 0.5554084219287957, "grad_norm": 2.030344247817993, "learning_rate": 8.09613505839437e-06, "loss": 0.7087, "step": 7340 }, { "epoch": 0.5554840906511294, "grad_norm": 1.8891116380691528, "learning_rate": 8.093894256383525e-06, "loss": 0.737, "step": 7341 }, { "epoch": 0.555559759373463, "grad_norm": 1.9823884963989258, "learning_rate": 8.091653511115954e-06, "loss": 0.7039, "step": 7342 }, { "epoch": 0.5556354280957966, "grad_norm": 2.3397977352142334, "learning_rate": 8.089412822731979e-06, "loss": 0.7968, "step": 7343 }, { "epoch": 0.5557110968181302, "grad_norm": 1.816161870956421, "learning_rate": 8.087172191371917e-06, "loss": 0.6302, "step": 7344 }, { "epoch": 0.5557867655404638, "grad_norm": 2.0150673389434814, "learning_rate": 8.084931617176084e-06, "loss": 0.5506, "step": 7345 }, { "epoch": 0.5558624342627975, "grad_norm": 2.0042967796325684, "learning_rate": 8.082691100284796e-06, "loss": 0.6103, "step": 7346 }, { "epoch": 0.5559381029851311, "grad_norm": 1.9429584741592407, "learning_rate": 8.080450640838353e-06, "loss": 0.8005, "step": 7347 }, { "epoch": 0.5560137717074647, "grad_norm": 1.7641698122024536, "learning_rate": 8.078210238977067e-06, "loss": 0.6253, "step": 7348 }, { "epoch": 0.5560894404297984, "grad_norm": 3.233842372894287, "learning_rate": 8.075969894841239e-06, "loss": 0.6349, "step": 7349 }, { "epoch": 0.556165109152132, "grad_norm": 1.9746023416519165, "learning_rate": 8.073729608571166e-06, "loss": 0.6736, "step": 7350 }, { "epoch": 0.5562407778744656, "grad_norm": 2.0112850666046143, "learning_rate": 8.071489380307138e-06, "loss": 0.7653, "step": 7351 }, { "epoch": 0.5563164465967992, "grad_norm": 2.242575168609619, "learning_rate": 8.069249210189447e-06, "loss": 0.741, "step": 7352 }, { "epoch": 0.5563921153191328, "grad_norm": 1.929771065711975, "learning_rate": 8.067009098358384e-06, "loss": 0.6086, "step": 7353 }, { "epoch": 0.5564677840414665, "grad_norm": 2.020859718322754, "learning_rate": 8.064769044954229e-06, "loss": 0.7699, "step": 7354 }, { "epoch": 0.5565434527638001, "grad_norm": 2.042544364929199, "learning_rate": 8.06252905011726e-06, "loss": 0.6731, "step": 7355 }, { "epoch": 0.5566191214861337, "grad_norm": 2.6606943607330322, "learning_rate": 8.060289113987754e-06, "loss": 0.6402, "step": 7356 }, { "epoch": 0.5566947902084673, "grad_norm": 1.789626955986023, "learning_rate": 8.058049236705982e-06, "loss": 0.8114, "step": 7357 }, { "epoch": 0.5567704589308009, "grad_norm": 1.9699000120162964, "learning_rate": 8.055809418412215e-06, "loss": 0.7457, "step": 7358 }, { "epoch": 0.5568461276531346, "grad_norm": 2.0746593475341797, "learning_rate": 8.053569659246716e-06, "loss": 0.855, "step": 7359 }, { "epoch": 0.5569217963754682, "grad_norm": 2.2033884525299072, "learning_rate": 8.051329959349748e-06, "loss": 0.6439, "step": 7360 }, { "epoch": 0.5569974650978018, "grad_norm": 1.9378942251205444, "learning_rate": 8.049090318861563e-06, "loss": 0.7654, "step": 7361 }, { "epoch": 0.5570731338201355, "grad_norm": 2.35151743888855, "learning_rate": 8.046850737922418e-06, "loss": 0.7722, "step": 7362 }, { "epoch": 0.5571488025424691, "grad_norm": 2.5247981548309326, "learning_rate": 8.044611216672562e-06, "loss": 0.6724, "step": 7363 }, { "epoch": 0.5572244712648027, "grad_norm": 2.331882953643799, "learning_rate": 8.042371755252245e-06, "loss": 0.6965, "step": 7364 }, { "epoch": 0.5573001399871363, "grad_norm": 2.167255401611328, "learning_rate": 8.040132353801705e-06, "loss": 0.7462, "step": 7365 }, { "epoch": 0.5573758087094699, "grad_norm": 2.5408060550689697, "learning_rate": 8.037893012461182e-06, "loss": 0.6558, "step": 7366 }, { "epoch": 0.5574514774318036, "grad_norm": 1.810550332069397, "learning_rate": 8.035653731370906e-06, "loss": 0.6378, "step": 7367 }, { "epoch": 0.5575271461541372, "grad_norm": 2.1500096321105957, "learning_rate": 8.033414510671117e-06, "loss": 0.6658, "step": 7368 }, { "epoch": 0.5576028148764708, "grad_norm": 2.4346792697906494, "learning_rate": 8.031175350502037e-06, "loss": 0.6172, "step": 7369 }, { "epoch": 0.5576784835988045, "grad_norm": 2.380704641342163, "learning_rate": 8.02893625100389e-06, "loss": 0.6924, "step": 7370 }, { "epoch": 0.557754152321138, "grad_norm": 1.7149864435195923, "learning_rate": 8.026697212316896e-06, "loss": 0.7118, "step": 7371 }, { "epoch": 0.5578298210434717, "grad_norm": 1.924574851989746, "learning_rate": 8.02445823458127e-06, "loss": 0.6955, "step": 7372 }, { "epoch": 0.5579054897658053, "grad_norm": 2.193007707595825, "learning_rate": 8.022219317937223e-06, "loss": 0.6082, "step": 7373 }, { "epoch": 0.5579811584881389, "grad_norm": 1.9797512292861938, "learning_rate": 8.019980462524968e-06, "loss": 0.84, "step": 7374 }, { "epoch": 0.5580568272104726, "grad_norm": 2.113743543624878, "learning_rate": 8.017741668484704e-06, "loss": 0.8963, "step": 7375 }, { "epoch": 0.5581324959328062, "grad_norm": 2.2391629219055176, "learning_rate": 8.015502935956636e-06, "loss": 0.7656, "step": 7376 }, { "epoch": 0.5582081646551398, "grad_norm": 2.6514220237731934, "learning_rate": 8.013264265080955e-06, "loss": 0.673, "step": 7377 }, { "epoch": 0.5582838333774734, "grad_norm": 2.256282091140747, "learning_rate": 8.011025655997858e-06, "loss": 0.7703, "step": 7378 }, { "epoch": 0.558359502099807, "grad_norm": 2.2570066452026367, "learning_rate": 8.008787108847533e-06, "loss": 0.7589, "step": 7379 }, { "epoch": 0.5584351708221407, "grad_norm": 2.5450544357299805, "learning_rate": 8.006548623770168e-06, "loss": 0.6944, "step": 7380 }, { "epoch": 0.5585108395444743, "grad_norm": 2.3593268394470215, "learning_rate": 8.00431020090594e-06, "loss": 0.6387, "step": 7381 }, { "epoch": 0.5585865082668079, "grad_norm": 1.9944931268692017, "learning_rate": 8.002071840395026e-06, "loss": 0.6667, "step": 7382 }, { "epoch": 0.5586621769891416, "grad_norm": 1.908489465713501, "learning_rate": 7.999833542377605e-06, "loss": 0.635, "step": 7383 }, { "epoch": 0.5587378457114751, "grad_norm": 2.8577051162719727, "learning_rate": 7.997595306993838e-06, "loss": 0.6809, "step": 7384 }, { "epoch": 0.5588135144338088, "grad_norm": 2.8183300495147705, "learning_rate": 7.995357134383898e-06, "loss": 0.7129, "step": 7385 }, { "epoch": 0.5588891831561424, "grad_norm": 2.3807883262634277, "learning_rate": 7.993119024687943e-06, "loss": 0.6761, "step": 7386 }, { "epoch": 0.558964851878476, "grad_norm": 1.9848699569702148, "learning_rate": 7.990880978046132e-06, "loss": 0.5648, "step": 7387 }, { "epoch": 0.5590405206008097, "grad_norm": 1.9537297487258911, "learning_rate": 7.988642994598616e-06, "loss": 0.6346, "step": 7388 }, { "epoch": 0.5591161893231433, "grad_norm": 2.2345707416534424, "learning_rate": 7.986405074485547e-06, "loss": 0.6992, "step": 7389 }, { "epoch": 0.5591918580454769, "grad_norm": 1.7011851072311401, "learning_rate": 7.984167217847072e-06, "loss": 0.6245, "step": 7390 }, { "epoch": 0.5592675267678106, "grad_norm": 1.5342859029769897, "learning_rate": 7.98192942482333e-06, "loss": 0.71, "step": 7391 }, { "epoch": 0.5593431954901441, "grad_norm": 1.9042041301727295, "learning_rate": 7.979691695554464e-06, "loss": 0.8046, "step": 7392 }, { "epoch": 0.5594188642124778, "grad_norm": 4.508663654327393, "learning_rate": 7.977454030180597e-06, "loss": 0.661, "step": 7393 }, { "epoch": 0.5594945329348114, "grad_norm": 2.309535503387451, "learning_rate": 7.975216428841871e-06, "loss": 0.7014, "step": 7394 }, { "epoch": 0.559570201657145, "grad_norm": 2.226212739944458, "learning_rate": 7.972978891678407e-06, "loss": 0.6736, "step": 7395 }, { "epoch": 0.5596458703794787, "grad_norm": 2.99662446975708, "learning_rate": 7.970741418830327e-06, "loss": 0.7326, "step": 7396 }, { "epoch": 0.5597215391018122, "grad_norm": 2.0158193111419678, "learning_rate": 7.968504010437746e-06, "loss": 0.7136, "step": 7397 }, { "epoch": 0.5597972078241459, "grad_norm": 2.631486654281616, "learning_rate": 7.966266666640778e-06, "loss": 0.7801, "step": 7398 }, { "epoch": 0.5598728765464795, "grad_norm": 2.236232280731201, "learning_rate": 7.96402938757954e-06, "loss": 0.7771, "step": 7399 }, { "epoch": 0.5599485452688131, "grad_norm": 1.8148475885391235, "learning_rate": 7.96179217339413e-06, "loss": 0.6843, "step": 7400 }, { "epoch": 0.5600242139911468, "grad_norm": 2.4410221576690674, "learning_rate": 7.959555024224654e-06, "loss": 0.606, "step": 7401 }, { "epoch": 0.5600998827134804, "grad_norm": 1.8520560264587402, "learning_rate": 7.957317940211205e-06, "loss": 0.6385, "step": 7402 }, { "epoch": 0.560175551435814, "grad_norm": 3.167067050933838, "learning_rate": 7.955080921493879e-06, "loss": 0.7738, "step": 7403 }, { "epoch": 0.5602512201581477, "grad_norm": 1.9839564561843872, "learning_rate": 7.952843968212768e-06, "loss": 0.8493, "step": 7404 }, { "epoch": 0.5603268888804812, "grad_norm": 3.0600149631500244, "learning_rate": 7.950607080507951e-06, "loss": 0.6369, "step": 7405 }, { "epoch": 0.5604025576028149, "grad_norm": 2.1107337474823, "learning_rate": 7.948370258519519e-06, "loss": 0.6013, "step": 7406 }, { "epoch": 0.5604782263251485, "grad_norm": 2.616274118423462, "learning_rate": 7.946133502387537e-06, "loss": 0.731, "step": 7407 }, { "epoch": 0.5605538950474821, "grad_norm": 2.0208301544189453, "learning_rate": 7.943896812252083e-06, "loss": 0.6894, "step": 7408 }, { "epoch": 0.5606295637698158, "grad_norm": 4.073803424835205, "learning_rate": 7.941660188253228e-06, "loss": 0.5672, "step": 7409 }, { "epoch": 0.5607052324921493, "grad_norm": 2.3524041175842285, "learning_rate": 7.939423630531038e-06, "loss": 0.7285, "step": 7410 }, { "epoch": 0.560780901214483, "grad_norm": 2.553096294403076, "learning_rate": 7.937187139225567e-06, "loss": 0.708, "step": 7411 }, { "epoch": 0.5608565699368167, "grad_norm": 2.174004554748535, "learning_rate": 7.93495071447688e-06, "loss": 0.6833, "step": 7412 }, { "epoch": 0.5609322386591502, "grad_norm": 2.332759380340576, "learning_rate": 7.932714356425018e-06, "loss": 0.6054, "step": 7413 }, { "epoch": 0.5610079073814839, "grad_norm": 2.4970386028289795, "learning_rate": 7.930478065210035e-06, "loss": 0.7808, "step": 7414 }, { "epoch": 0.5610835761038175, "grad_norm": 1.955387830734253, "learning_rate": 7.92824184097198e-06, "loss": 0.7495, "step": 7415 }, { "epoch": 0.5611592448261511, "grad_norm": 1.921034574508667, "learning_rate": 7.926005683850883e-06, "loss": 0.7805, "step": 7416 }, { "epoch": 0.5612349135484848, "grad_norm": 2.5919716358184814, "learning_rate": 7.923769593986788e-06, "loss": 0.6509, "step": 7417 }, { "epoch": 0.5613105822708183, "grad_norm": 2.1961042881011963, "learning_rate": 7.921533571519717e-06, "loss": 0.6006, "step": 7418 }, { "epoch": 0.561386250993152, "grad_norm": 2.0641181468963623, "learning_rate": 7.919297616589703e-06, "loss": 0.6438, "step": 7419 }, { "epoch": 0.5614619197154856, "grad_norm": 2.4737486839294434, "learning_rate": 7.917061729336771e-06, "loss": 0.7045, "step": 7420 }, { "epoch": 0.5615375884378192, "grad_norm": 2.176301956176758, "learning_rate": 7.914825909900935e-06, "loss": 0.7689, "step": 7421 }, { "epoch": 0.5616132571601529, "grad_norm": 2.713766574859619, "learning_rate": 7.91259015842221e-06, "loss": 0.7377, "step": 7422 }, { "epoch": 0.5616889258824864, "grad_norm": 1.8123505115509033, "learning_rate": 7.910354475040606e-06, "loss": 0.7732, "step": 7423 }, { "epoch": 0.5617645946048201, "grad_norm": 2.313922643661499, "learning_rate": 7.908118859896127e-06, "loss": 0.658, "step": 7424 }, { "epoch": 0.5618402633271538, "grad_norm": 2.6944148540496826, "learning_rate": 7.905883313128779e-06, "loss": 0.6323, "step": 7425 }, { "epoch": 0.5619159320494873, "grad_norm": 2.4101080894470215, "learning_rate": 7.903647834878557e-06, "loss": 0.7163, "step": 7426 }, { "epoch": 0.561991600771821, "grad_norm": 2.65920352935791, "learning_rate": 7.901412425285453e-06, "loss": 0.776, "step": 7427 }, { "epoch": 0.5620672694941546, "grad_norm": 2.0206611156463623, "learning_rate": 7.899177084489457e-06, "loss": 0.6004, "step": 7428 }, { "epoch": 0.5621429382164882, "grad_norm": 2.1238224506378174, "learning_rate": 7.89694181263055e-06, "loss": 0.7055, "step": 7429 }, { "epoch": 0.5622186069388219, "grad_norm": 2.1638028621673584, "learning_rate": 7.894706609848717e-06, "loss": 0.7816, "step": 7430 }, { "epoch": 0.5622942756611554, "grad_norm": 2.223090171813965, "learning_rate": 7.89247147628393e-06, "loss": 0.6012, "step": 7431 }, { "epoch": 0.5623699443834891, "grad_norm": 3.290097951889038, "learning_rate": 7.890236412076162e-06, "loss": 0.7829, "step": 7432 }, { "epoch": 0.5624456131058228, "grad_norm": 1.7183538675308228, "learning_rate": 7.888001417365379e-06, "loss": 0.6503, "step": 7433 }, { "epoch": 0.5625212818281563, "grad_norm": 2.0650956630706787, "learning_rate": 7.885766492291543e-06, "loss": 0.7117, "step": 7434 }, { "epoch": 0.56259695055049, "grad_norm": 2.289998769760132, "learning_rate": 7.883531636994612e-06, "loss": 0.8198, "step": 7435 }, { "epoch": 0.5626726192728235, "grad_norm": 2.176126718521118, "learning_rate": 7.881296851614544e-06, "loss": 0.7623, "step": 7436 }, { "epoch": 0.5627482879951572, "grad_norm": 2.0864546298980713, "learning_rate": 7.879062136291284e-06, "loss": 0.7213, "step": 7437 }, { "epoch": 0.5628239567174909, "grad_norm": 2.1789047718048096, "learning_rate": 7.876827491164778e-06, "loss": 0.638, "step": 7438 }, { "epoch": 0.5628996254398244, "grad_norm": 2.4413700103759766, "learning_rate": 7.874592916374966e-06, "loss": 0.5869, "step": 7439 }, { "epoch": 0.5629752941621581, "grad_norm": 2.1864519119262695, "learning_rate": 7.87235841206179e-06, "loss": 0.7605, "step": 7440 }, { "epoch": 0.5630509628844917, "grad_norm": 2.2284204959869385, "learning_rate": 7.870123978365174e-06, "loss": 0.7422, "step": 7441 }, { "epoch": 0.5631266316068253, "grad_norm": 1.9402129650115967, "learning_rate": 7.867889615425052e-06, "loss": 0.6012, "step": 7442 }, { "epoch": 0.563202300329159, "grad_norm": 1.827885627746582, "learning_rate": 7.865655323381342e-06, "loss": 0.6708, "step": 7443 }, { "epoch": 0.5632779690514925, "grad_norm": 2.1613986492156982, "learning_rate": 7.863421102373963e-06, "loss": 0.5724, "step": 7444 }, { "epoch": 0.5633536377738262, "grad_norm": 2.0706324577331543, "learning_rate": 7.861186952542832e-06, "loss": 0.8642, "step": 7445 }, { "epoch": 0.5634293064961599, "grad_norm": 2.373830795288086, "learning_rate": 7.85895287402786e-06, "loss": 0.6601, "step": 7446 }, { "epoch": 0.5635049752184934, "grad_norm": 1.8708213567733765, "learning_rate": 7.856718866968947e-06, "loss": 0.6013, "step": 7447 }, { "epoch": 0.5635806439408271, "grad_norm": 2.5757904052734375, "learning_rate": 7.854484931505997e-06, "loss": 0.7932, "step": 7448 }, { "epoch": 0.5636563126631606, "grad_norm": 1.908565640449524, "learning_rate": 7.852251067778903e-06, "loss": 0.6691, "step": 7449 }, { "epoch": 0.5637319813854943, "grad_norm": 4.402620792388916, "learning_rate": 7.850017275927563e-06, "loss": 0.7278, "step": 7450 }, { "epoch": 0.563807650107828, "grad_norm": 2.60870623588562, "learning_rate": 7.847783556091858e-06, "loss": 0.7348, "step": 7451 }, { "epoch": 0.5638833188301615, "grad_norm": 2.4514048099517822, "learning_rate": 7.845549908411676e-06, "loss": 0.8417, "step": 7452 }, { "epoch": 0.5639589875524952, "grad_norm": 2.5910489559173584, "learning_rate": 7.843316333026892e-06, "loss": 0.6838, "step": 7453 }, { "epoch": 0.5640346562748288, "grad_norm": 2.305745840072632, "learning_rate": 7.841082830077378e-06, "loss": 0.4627, "step": 7454 }, { "epoch": 0.5641103249971624, "grad_norm": 3.0249183177948, "learning_rate": 7.838849399703007e-06, "loss": 0.6928, "step": 7455 }, { "epoch": 0.5641859937194961, "grad_norm": 2.0935657024383545, "learning_rate": 7.836616042043643e-06, "loss": 0.6973, "step": 7456 }, { "epoch": 0.5642616624418296, "grad_norm": 2.505596399307251, "learning_rate": 7.834382757239145e-06, "loss": 0.6782, "step": 7457 }, { "epoch": 0.5643373311641633, "grad_norm": 2.2332210540771484, "learning_rate": 7.832149545429372e-06, "loss": 0.7438, "step": 7458 }, { "epoch": 0.564412999886497, "grad_norm": 2.181175470352173, "learning_rate": 7.829916406754166e-06, "loss": 0.6196, "step": 7459 }, { "epoch": 0.5644886686088305, "grad_norm": 1.7004423141479492, "learning_rate": 7.827683341353381e-06, "loss": 0.6966, "step": 7460 }, { "epoch": 0.5645643373311642, "grad_norm": 2.624783992767334, "learning_rate": 7.825450349366859e-06, "loss": 0.8012, "step": 7461 }, { "epoch": 0.5646400060534977, "grad_norm": 1.9809978008270264, "learning_rate": 7.823217430934434e-06, "loss": 0.7437, "step": 7462 }, { "epoch": 0.5647156747758314, "grad_norm": 3.563035011291504, "learning_rate": 7.82098458619594e-06, "loss": 0.6683, "step": 7463 }, { "epoch": 0.5647913434981651, "grad_norm": 2.226811408996582, "learning_rate": 7.818751815291204e-06, "loss": 0.7564, "step": 7464 }, { "epoch": 0.5648670122204986, "grad_norm": 1.755900502204895, "learning_rate": 7.816519118360046e-06, "loss": 0.62, "step": 7465 }, { "epoch": 0.5649426809428323, "grad_norm": 8.155805587768555, "learning_rate": 7.814286495542293e-06, "loss": 0.7387, "step": 7466 }, { "epoch": 0.565018349665166, "grad_norm": 2.6799027919769287, "learning_rate": 7.812053946977755e-06, "loss": 0.583, "step": 7467 }, { "epoch": 0.5650940183874995, "grad_norm": 2.1574854850769043, "learning_rate": 7.80982147280624e-06, "loss": 0.564, "step": 7468 }, { "epoch": 0.5651696871098332, "grad_norm": 2.8113937377929688, "learning_rate": 7.807589073167556e-06, "loss": 0.7773, "step": 7469 }, { "epoch": 0.5652453558321667, "grad_norm": 2.041391611099243, "learning_rate": 7.805356748201497e-06, "loss": 0.7095, "step": 7470 }, { "epoch": 0.5653210245545004, "grad_norm": 2.0056307315826416, "learning_rate": 7.803124498047865e-06, "loss": 0.6397, "step": 7471 }, { "epoch": 0.5653966932768341, "grad_norm": 2.5884063243865967, "learning_rate": 7.80089232284645e-06, "loss": 0.7664, "step": 7472 }, { "epoch": 0.5654723619991676, "grad_norm": 2.3261334896087646, "learning_rate": 7.798660222737033e-06, "loss": 0.7108, "step": 7473 }, { "epoch": 0.5655480307215013, "grad_norm": 1.9324620962142944, "learning_rate": 7.7964281978594e-06, "loss": 0.7209, "step": 7474 }, { "epoch": 0.5656236994438348, "grad_norm": 2.28661847114563, "learning_rate": 7.794196248353323e-06, "loss": 0.6863, "step": 7475 }, { "epoch": 0.5656993681661685, "grad_norm": 3.6437911987304688, "learning_rate": 7.791964374358579e-06, "loss": 0.7256, "step": 7476 }, { "epoch": 0.5657750368885022, "grad_norm": 2.1123297214508057, "learning_rate": 7.789732576014934e-06, "loss": 0.7905, "step": 7477 }, { "epoch": 0.5658507056108357, "grad_norm": 2.2938523292541504, "learning_rate": 7.787500853462149e-06, "loss": 0.579, "step": 7478 }, { "epoch": 0.5659263743331694, "grad_norm": 2.4166362285614014, "learning_rate": 7.785269206839984e-06, "loss": 0.6481, "step": 7479 }, { "epoch": 0.5660020430555031, "grad_norm": 1.8204790353775024, "learning_rate": 7.783037636288185e-06, "loss": 0.7452, "step": 7480 }, { "epoch": 0.5660777117778366, "grad_norm": 2.0831942558288574, "learning_rate": 7.78080614194651e-06, "loss": 0.5121, "step": 7481 }, { "epoch": 0.5661533805001703, "grad_norm": 2.7705237865448, "learning_rate": 7.778574723954695e-06, "loss": 0.7819, "step": 7482 }, { "epoch": 0.5662290492225038, "grad_norm": 2.23626708984375, "learning_rate": 7.776343382452485e-06, "loss": 0.609, "step": 7483 }, { "epoch": 0.5663047179448375, "grad_norm": 2.0028553009033203, "learning_rate": 7.774112117579608e-06, "loss": 0.701, "step": 7484 }, { "epoch": 0.5663803866671712, "grad_norm": 4.554917812347412, "learning_rate": 7.771880929475792e-06, "loss": 0.6932, "step": 7485 }, { "epoch": 0.5664560553895047, "grad_norm": 1.90889310836792, "learning_rate": 7.76964981828077e-06, "loss": 0.7258, "step": 7486 }, { "epoch": 0.5665317241118384, "grad_norm": 2.3061931133270264, "learning_rate": 7.767418784134253e-06, "loss": 0.7325, "step": 7487 }, { "epoch": 0.5666073928341719, "grad_norm": 2.0186426639556885, "learning_rate": 7.76518782717596e-06, "loss": 0.7392, "step": 7488 }, { "epoch": 0.5666830615565056, "grad_norm": 2.2334370613098145, "learning_rate": 7.762956947545598e-06, "loss": 0.7617, "step": 7489 }, { "epoch": 0.5667587302788393, "grad_norm": 2.3827006816864014, "learning_rate": 7.760726145382871e-06, "loss": 0.648, "step": 7490 }, { "epoch": 0.5668343990011728, "grad_norm": 4.122293472290039, "learning_rate": 7.758495420827485e-06, "loss": 0.7135, "step": 7491 }, { "epoch": 0.5669100677235065, "grad_norm": 2.1730690002441406, "learning_rate": 7.75626477401913e-06, "loss": 0.6489, "step": 7492 }, { "epoch": 0.5669857364458402, "grad_norm": 2.0568599700927734, "learning_rate": 7.754034205097497e-06, "loss": 0.7312, "step": 7493 }, { "epoch": 0.5670614051681737, "grad_norm": 2.0819613933563232, "learning_rate": 7.751803714202273e-06, "loss": 0.7455, "step": 7494 }, { "epoch": 0.5671370738905074, "grad_norm": 2.208782434463501, "learning_rate": 7.749573301473133e-06, "loss": 0.6664, "step": 7495 }, { "epoch": 0.5672127426128409, "grad_norm": 2.119398593902588, "learning_rate": 7.74734296704976e-06, "loss": 0.5872, "step": 7496 }, { "epoch": 0.5672884113351746, "grad_norm": 1.802098274230957, "learning_rate": 7.745112711071824e-06, "loss": 0.8714, "step": 7497 }, { "epoch": 0.5673640800575083, "grad_norm": 2.2059485912323, "learning_rate": 7.742882533678988e-06, "loss": 0.7653, "step": 7498 }, { "epoch": 0.5674397487798418, "grad_norm": 3.0473945140838623, "learning_rate": 7.740652435010915e-06, "loss": 0.7615, "step": 7499 }, { "epoch": 0.5675154175021755, "grad_norm": 2.383582830429077, "learning_rate": 7.738422415207257e-06, "loss": 0.8784, "step": 7500 }, { "epoch": 0.567591086224509, "grad_norm": 2.5827648639678955, "learning_rate": 7.736192474407667e-06, "loss": 0.7992, "step": 7501 }, { "epoch": 0.5676667549468427, "grad_norm": 2.1168980598449707, "learning_rate": 7.733962612751795e-06, "loss": 0.7262, "step": 7502 }, { "epoch": 0.5677424236691764, "grad_norm": 1.6200450658798218, "learning_rate": 7.731732830379278e-06, "loss": 0.6776, "step": 7503 }, { "epoch": 0.5678180923915099, "grad_norm": 2.622553825378418, "learning_rate": 7.729503127429755e-06, "loss": 0.7279, "step": 7504 }, { "epoch": 0.5678937611138436, "grad_norm": 1.9916034936904907, "learning_rate": 7.727273504042853e-06, "loss": 0.815, "step": 7505 }, { "epoch": 0.5679694298361773, "grad_norm": 2.347259521484375, "learning_rate": 7.7250439603582e-06, "loss": 0.4899, "step": 7506 }, { "epoch": 0.5680450985585108, "grad_norm": 2.532034397125244, "learning_rate": 7.722814496515418e-06, "loss": 0.7588, "step": 7507 }, { "epoch": 0.5681207672808445, "grad_norm": 2.015144109725952, "learning_rate": 7.720585112654124e-06, "loss": 0.7181, "step": 7508 }, { "epoch": 0.568196436003178, "grad_norm": 2.0816779136657715, "learning_rate": 7.718355808913931e-06, "loss": 0.7219, "step": 7509 }, { "epoch": 0.5682721047255117, "grad_norm": 2.1527559757232666, "learning_rate": 7.71612658543444e-06, "loss": 0.6645, "step": 7510 }, { "epoch": 0.5683477734478454, "grad_norm": 2.143007278442383, "learning_rate": 7.713897442355251e-06, "loss": 0.6128, "step": 7511 }, { "epoch": 0.5684234421701789, "grad_norm": 2.356175184249878, "learning_rate": 7.711668379815969e-06, "loss": 0.7695, "step": 7512 }, { "epoch": 0.5684991108925126, "grad_norm": 2.8489623069763184, "learning_rate": 7.70943939795618e-06, "loss": 0.8197, "step": 7513 }, { "epoch": 0.5685747796148461, "grad_norm": 1.8885236978530884, "learning_rate": 7.707210496915469e-06, "loss": 0.6958, "step": 7514 }, { "epoch": 0.5686504483371798, "grad_norm": 1.7150541543960571, "learning_rate": 7.70498167683342e-06, "loss": 0.6678, "step": 7515 }, { "epoch": 0.5687261170595135, "grad_norm": 2.1104001998901367, "learning_rate": 7.702752937849603e-06, "loss": 0.6665, "step": 7516 }, { "epoch": 0.568801785781847, "grad_norm": 2.0098612308502197, "learning_rate": 7.700524280103593e-06, "loss": 0.7077, "step": 7517 }, { "epoch": 0.5688774545041807, "grad_norm": 2.4959285259246826, "learning_rate": 7.69829570373496e-06, "loss": 0.7196, "step": 7518 }, { "epoch": 0.5689531232265144, "grad_norm": 1.7228821516036987, "learning_rate": 7.696067208883257e-06, "loss": 0.7246, "step": 7519 }, { "epoch": 0.5690287919488479, "grad_norm": 1.951913833618164, "learning_rate": 7.693838795688046e-06, "loss": 0.5224, "step": 7520 }, { "epoch": 0.5691044606711816, "grad_norm": 1.957993745803833, "learning_rate": 7.691610464288869e-06, "loss": 0.6015, "step": 7521 }, { "epoch": 0.5691801293935151, "grad_norm": 2.1833786964416504, "learning_rate": 7.689382214825279e-06, "loss": 0.7048, "step": 7522 }, { "epoch": 0.5692557981158488, "grad_norm": 2.1259801387786865, "learning_rate": 7.687154047436815e-06, "loss": 0.6339, "step": 7523 }, { "epoch": 0.5693314668381825, "grad_norm": 3.2358062267303467, "learning_rate": 7.68492596226301e-06, "loss": 0.9118, "step": 7524 }, { "epoch": 0.569407135560516, "grad_norm": 3.1258087158203125, "learning_rate": 7.682697959443396e-06, "loss": 0.733, "step": 7525 }, { "epoch": 0.5694828042828497, "grad_norm": 1.9816136360168457, "learning_rate": 7.680470039117491e-06, "loss": 0.5748, "step": 7526 }, { "epoch": 0.5695584730051833, "grad_norm": 1.8756901025772095, "learning_rate": 7.678242201424825e-06, "loss": 0.7879, "step": 7527 }, { "epoch": 0.5696341417275169, "grad_norm": 2.385270595550537, "learning_rate": 7.676014446504906e-06, "loss": 0.6642, "step": 7528 }, { "epoch": 0.5697098104498506, "grad_norm": 1.8264906406402588, "learning_rate": 7.673786774497248e-06, "loss": 0.5951, "step": 7529 }, { "epoch": 0.5697854791721841, "grad_norm": 3.006786823272705, "learning_rate": 7.671559185541348e-06, "loss": 0.6021, "step": 7530 }, { "epoch": 0.5698611478945178, "grad_norm": 2.019592523574829, "learning_rate": 7.669331679776708e-06, "loss": 0.6055, "step": 7531 }, { "epoch": 0.5699368166168515, "grad_norm": 2.036536693572998, "learning_rate": 7.667104257342825e-06, "loss": 0.7984, "step": 7532 }, { "epoch": 0.570012485339185, "grad_norm": 2.4801836013793945, "learning_rate": 7.664876918379182e-06, "loss": 0.747, "step": 7533 }, { "epoch": 0.5700881540615187, "grad_norm": 2.761587381362915, "learning_rate": 7.662649663025267e-06, "loss": 0.7959, "step": 7534 }, { "epoch": 0.5701638227838522, "grad_norm": 1.7771096229553223, "learning_rate": 7.660422491420554e-06, "loss": 0.6943, "step": 7535 }, { "epoch": 0.5702394915061859, "grad_norm": 2.084876537322998, "learning_rate": 7.658195403704516e-06, "loss": 0.6666, "step": 7536 }, { "epoch": 0.5703151602285196, "grad_norm": 1.841198444366455, "learning_rate": 7.655968400016624e-06, "loss": 0.791, "step": 7537 }, { "epoch": 0.5703908289508531, "grad_norm": 1.9432196617126465, "learning_rate": 7.653741480496337e-06, "loss": 0.8101, "step": 7538 }, { "epoch": 0.5704664976731868, "grad_norm": 1.6360357999801636, "learning_rate": 7.651514645283116e-06, "loss": 0.6755, "step": 7539 }, { "epoch": 0.5705421663955205, "grad_norm": 2.8515806198120117, "learning_rate": 7.649287894516406e-06, "loss": 0.7672, "step": 7540 }, { "epoch": 0.570617835117854, "grad_norm": 1.9835529327392578, "learning_rate": 7.647061228335656e-06, "loss": 0.763, "step": 7541 }, { "epoch": 0.5706935038401877, "grad_norm": 1.8836452960968018, "learning_rate": 7.644834646880308e-06, "loss": 0.6668, "step": 7542 }, { "epoch": 0.5707691725625212, "grad_norm": 2.047100067138672, "learning_rate": 7.6426081502898e-06, "loss": 0.6978, "step": 7543 }, { "epoch": 0.5708448412848549, "grad_norm": 1.7520688772201538, "learning_rate": 7.640381738703558e-06, "loss": 0.5824, "step": 7544 }, { "epoch": 0.5709205100071886, "grad_norm": 2.1768970489501953, "learning_rate": 7.638155412261011e-06, "loss": 0.8075, "step": 7545 }, { "epoch": 0.5709961787295221, "grad_norm": 2.6738734245300293, "learning_rate": 7.635929171101575e-06, "loss": 0.7113, "step": 7546 }, { "epoch": 0.5710718474518558, "grad_norm": 1.7277650833129883, "learning_rate": 7.633703015364664e-06, "loss": 0.6632, "step": 7547 }, { "epoch": 0.5711475161741894, "grad_norm": 1.8607732057571411, "learning_rate": 7.631476945189694e-06, "loss": 0.5316, "step": 7548 }, { "epoch": 0.571223184896523, "grad_norm": 2.290065050125122, "learning_rate": 7.629250960716061e-06, "loss": 0.6583, "step": 7549 }, { "epoch": 0.5712988536188567, "grad_norm": 2.1077466011047363, "learning_rate": 7.62702506208317e-06, "loss": 0.7311, "step": 7550 }, { "epoch": 0.5713745223411902, "grad_norm": 2.3623740673065186, "learning_rate": 7.6247992494304075e-06, "loss": 0.6345, "step": 7551 }, { "epoch": 0.5714501910635239, "grad_norm": 2.172419548034668, "learning_rate": 7.622573522897162e-06, "loss": 0.6144, "step": 7552 }, { "epoch": 0.5715258597858576, "grad_norm": 1.9453599452972412, "learning_rate": 7.620347882622821e-06, "loss": 0.8414, "step": 7553 }, { "epoch": 0.5716015285081911, "grad_norm": 2.7051191329956055, "learning_rate": 7.6181223287467574e-06, "loss": 0.6487, "step": 7554 }, { "epoch": 0.5716771972305248, "grad_norm": 2.7036776542663574, "learning_rate": 7.615896861408342e-06, "loss": 0.6815, "step": 7555 }, { "epoch": 0.5717528659528583, "grad_norm": 2.5882720947265625, "learning_rate": 7.613671480746944e-06, "loss": 0.735, "step": 7556 }, { "epoch": 0.571828534675192, "grad_norm": 2.0548622608184814, "learning_rate": 7.611446186901918e-06, "loss": 0.7788, "step": 7557 }, { "epoch": 0.5719042033975257, "grad_norm": 2.1789886951446533, "learning_rate": 7.609220980012624e-06, "loss": 0.672, "step": 7558 }, { "epoch": 0.5719798721198592, "grad_norm": 1.767871618270874, "learning_rate": 7.606995860218413e-06, "loss": 0.6939, "step": 7559 }, { "epoch": 0.5720555408421929, "grad_norm": 2.4886820316314697, "learning_rate": 7.604770827658626e-06, "loss": 0.7978, "step": 7560 }, { "epoch": 0.5721312095645265, "grad_norm": 2.7979044914245605, "learning_rate": 7.602545882472603e-06, "loss": 0.7555, "step": 7561 }, { "epoch": 0.5722068782868601, "grad_norm": 2.692528009414673, "learning_rate": 7.6003210247996736e-06, "loss": 0.7346, "step": 7562 }, { "epoch": 0.5722825470091938, "grad_norm": 1.8994667530059814, "learning_rate": 7.59809625477917e-06, "loss": 0.7628, "step": 7563 }, { "epoch": 0.5723582157315273, "grad_norm": 2.2061333656311035, "learning_rate": 7.595871572550416e-06, "loss": 0.7109, "step": 7564 }, { "epoch": 0.572433884453861, "grad_norm": 2.585219621658325, "learning_rate": 7.593646978252723e-06, "loss": 0.663, "step": 7565 }, { "epoch": 0.5725095531761947, "grad_norm": 2.168567180633545, "learning_rate": 7.591422472025408e-06, "loss": 0.6996, "step": 7566 }, { "epoch": 0.5725852218985282, "grad_norm": 2.5026702880859375, "learning_rate": 7.589198054007769e-06, "loss": 0.5515, "step": 7567 }, { "epoch": 0.5726608906208619, "grad_norm": 2.055335760116577, "learning_rate": 7.5869737243391125e-06, "loss": 0.6029, "step": 7568 }, { "epoch": 0.5727365593431955, "grad_norm": 1.7085371017456055, "learning_rate": 7.584749483158733e-06, "loss": 0.6204, "step": 7569 }, { "epoch": 0.5728122280655291, "grad_norm": 2.084444522857666, "learning_rate": 7.582525330605918e-06, "loss": 0.7365, "step": 7570 }, { "epoch": 0.5728878967878628, "grad_norm": 2.4518747329711914, "learning_rate": 7.580301266819951e-06, "loss": 0.7121, "step": 7571 }, { "epoch": 0.5729635655101963, "grad_norm": 2.6568686962127686, "learning_rate": 7.578077291940109e-06, "loss": 0.79, "step": 7572 }, { "epoch": 0.57303923423253, "grad_norm": 1.885642409324646, "learning_rate": 7.575853406105669e-06, "loss": 0.67, "step": 7573 }, { "epoch": 0.5731149029548636, "grad_norm": 2.200899362564087, "learning_rate": 7.573629609455893e-06, "loss": 0.7487, "step": 7574 }, { "epoch": 0.5731905716771972, "grad_norm": 1.83932626247406, "learning_rate": 7.571405902130047e-06, "loss": 0.8097, "step": 7575 }, { "epoch": 0.5732662403995309, "grad_norm": 1.8516623973846436, "learning_rate": 7.569182284267382e-06, "loss": 0.7444, "step": 7576 }, { "epoch": 0.5733419091218644, "grad_norm": 2.9464001655578613, "learning_rate": 7.566958756007148e-06, "loss": 0.7044, "step": 7577 }, { "epoch": 0.5734175778441981, "grad_norm": 2.2606701850891113, "learning_rate": 7.5647353174885956e-06, "loss": 0.7526, "step": 7578 }, { "epoch": 0.5734932465665318, "grad_norm": 1.8875621557235718, "learning_rate": 7.5625119688509575e-06, "loss": 0.5802, "step": 7579 }, { "epoch": 0.5735689152888653, "grad_norm": 2.271878957748413, "learning_rate": 7.560288710233472e-06, "loss": 0.7184, "step": 7580 }, { "epoch": 0.573644584011199, "grad_norm": 2.303473711013794, "learning_rate": 7.558065541775362e-06, "loss": 0.8383, "step": 7581 }, { "epoch": 0.5737202527335326, "grad_norm": 1.9427469968795776, "learning_rate": 7.555842463615853e-06, "loss": 0.7299, "step": 7582 }, { "epoch": 0.5737959214558662, "grad_norm": 1.7646313905715942, "learning_rate": 7.553619475894155e-06, "loss": 0.6953, "step": 7583 }, { "epoch": 0.5738715901781999, "grad_norm": 2.1616227626800537, "learning_rate": 7.551396578749487e-06, "loss": 0.8009, "step": 7584 }, { "epoch": 0.5739472589005334, "grad_norm": 1.9275579452514648, "learning_rate": 7.5491737723210515e-06, "loss": 0.7245, "step": 7585 }, { "epoch": 0.5740229276228671, "grad_norm": 1.933892011642456, "learning_rate": 7.546951056748047e-06, "loss": 0.7036, "step": 7586 }, { "epoch": 0.5740985963452007, "grad_norm": 1.7795796394348145, "learning_rate": 7.544728432169666e-06, "loss": 0.6322, "step": 7587 }, { "epoch": 0.5741742650675343, "grad_norm": 1.7775262594223022, "learning_rate": 7.542505898725095e-06, "loss": 0.6999, "step": 7588 }, { "epoch": 0.574249933789868, "grad_norm": 1.9181798696517944, "learning_rate": 7.540283456553523e-06, "loss": 0.5708, "step": 7589 }, { "epoch": 0.5743256025122015, "grad_norm": 4.439153671264648, "learning_rate": 7.538061105794121e-06, "loss": 0.8357, "step": 7590 }, { "epoch": 0.5744012712345352, "grad_norm": 1.863204836845398, "learning_rate": 7.5358388465860625e-06, "loss": 0.5036, "step": 7591 }, { "epoch": 0.5744769399568689, "grad_norm": 2.4404690265655518, "learning_rate": 7.533616679068508e-06, "loss": 0.8833, "step": 7592 }, { "epoch": 0.5745526086792024, "grad_norm": 2.3636746406555176, "learning_rate": 7.53139460338062e-06, "loss": 0.6487, "step": 7593 }, { "epoch": 0.5746282774015361, "grad_norm": 2.188185691833496, "learning_rate": 7.5291726196615545e-06, "loss": 0.6516, "step": 7594 }, { "epoch": 0.5747039461238697, "grad_norm": 2.0827059745788574, "learning_rate": 7.526950728050455e-06, "loss": 0.6179, "step": 7595 }, { "epoch": 0.5747796148462033, "grad_norm": 2.1536214351654053, "learning_rate": 7.524728928686468e-06, "loss": 0.5059, "step": 7596 }, { "epoch": 0.574855283568537, "grad_norm": 1.9691141843795776, "learning_rate": 7.522507221708724e-06, "loss": 0.6105, "step": 7597 }, { "epoch": 0.5749309522908705, "grad_norm": 2.0234973430633545, "learning_rate": 7.520285607256354e-06, "loss": 0.7116, "step": 7598 }, { "epoch": 0.5750066210132042, "grad_norm": 2.0433642864227295, "learning_rate": 7.51806408546849e-06, "loss": 0.6751, "step": 7599 }, { "epoch": 0.5750822897355378, "grad_norm": 2.2238693237304688, "learning_rate": 7.515842656484246e-06, "loss": 0.5849, "step": 7600 }, { "epoch": 0.5751579584578714, "grad_norm": 2.1468698978424072, "learning_rate": 7.513621320442734e-06, "loss": 0.6573, "step": 7601 }, { "epoch": 0.5752336271802051, "grad_norm": 1.8017033338546753, "learning_rate": 7.5114000774830645e-06, "loss": 0.7018, "step": 7602 }, { "epoch": 0.5753092959025387, "grad_norm": 3.9429688453674316, "learning_rate": 7.509178927744331e-06, "loss": 0.676, "step": 7603 }, { "epoch": 0.5753849646248723, "grad_norm": 2.290649175643921, "learning_rate": 7.506957871365639e-06, "loss": 0.6806, "step": 7604 }, { "epoch": 0.575460633347206, "grad_norm": 2.842364549636841, "learning_rate": 7.504736908486076e-06, "loss": 0.8284, "step": 7605 }, { "epoch": 0.5755363020695395, "grad_norm": 1.501198172569275, "learning_rate": 7.502516039244721e-06, "loss": 0.7676, "step": 7606 }, { "epoch": 0.5756119707918732, "grad_norm": 1.9701095819473267, "learning_rate": 7.500295263780658e-06, "loss": 0.7604, "step": 7607 }, { "epoch": 0.5756876395142068, "grad_norm": 2.247490644454956, "learning_rate": 7.498074582232952e-06, "loss": 0.895, "step": 7608 }, { "epoch": 0.5757633082365404, "grad_norm": 1.9799500703811646, "learning_rate": 7.4958539947406755e-06, "loss": 0.7798, "step": 7609 }, { "epoch": 0.5758389769588741, "grad_norm": 2.8931593894958496, "learning_rate": 7.493633501442889e-06, "loss": 0.6262, "step": 7610 }, { "epoch": 0.5759146456812076, "grad_norm": 2.1218535900115967, "learning_rate": 7.4914131024786425e-06, "loss": 0.7037, "step": 7611 }, { "epoch": 0.5759903144035413, "grad_norm": 2.6001503467559814, "learning_rate": 7.4891927979869885e-06, "loss": 0.6523, "step": 7612 }, { "epoch": 0.5760659831258749, "grad_norm": 2.1940317153930664, "learning_rate": 7.486972588106963e-06, "loss": 0.6337, "step": 7613 }, { "epoch": 0.5761416518482085, "grad_norm": 1.9558513164520264, "learning_rate": 7.4847524729776135e-06, "loss": 0.7347, "step": 7614 }, { "epoch": 0.5762173205705422, "grad_norm": 1.9365533590316772, "learning_rate": 7.4825324527379625e-06, "loss": 0.7676, "step": 7615 }, { "epoch": 0.5762929892928758, "grad_norm": 2.3588614463806152, "learning_rate": 7.48031252752704e-06, "loss": 0.5735, "step": 7616 }, { "epoch": 0.5763686580152094, "grad_norm": 1.7206127643585205, "learning_rate": 7.4780926974838605e-06, "loss": 0.6839, "step": 7617 }, { "epoch": 0.5764443267375431, "grad_norm": 2.759472370147705, "learning_rate": 7.4758729627474395e-06, "loss": 0.6453, "step": 7618 }, { "epoch": 0.5765199954598766, "grad_norm": 2.1076037883758545, "learning_rate": 7.473653323456781e-06, "loss": 0.6109, "step": 7619 }, { "epoch": 0.5765956641822103, "grad_norm": 1.9081132411956787, "learning_rate": 7.471433779750889e-06, "loss": 0.7941, "step": 7620 }, { "epoch": 0.5766713329045439, "grad_norm": 1.8242427110671997, "learning_rate": 7.4692143317687595e-06, "loss": 0.6116, "step": 7621 }, { "epoch": 0.5767470016268775, "grad_norm": 2.058173179626465, "learning_rate": 7.466994979649378e-06, "loss": 0.6837, "step": 7622 }, { "epoch": 0.5768226703492112, "grad_norm": 2.6424033641815186, "learning_rate": 7.464775723531731e-06, "loss": 0.7541, "step": 7623 }, { "epoch": 0.5768983390715448, "grad_norm": 1.771959900856018, "learning_rate": 7.46255656355479e-06, "loss": 0.6146, "step": 7624 }, { "epoch": 0.5769740077938784, "grad_norm": 2.1034512519836426, "learning_rate": 7.460337499857531e-06, "loss": 0.7532, "step": 7625 }, { "epoch": 0.577049676516212, "grad_norm": 2.0289289951324463, "learning_rate": 7.4581185325789204e-06, "loss": 0.7371, "step": 7626 }, { "epoch": 0.5771253452385456, "grad_norm": 2.0849831104278564, "learning_rate": 7.455899661857912e-06, "loss": 0.6457, "step": 7627 }, { "epoch": 0.5772010139608793, "grad_norm": 1.9334015846252441, "learning_rate": 7.453680887833464e-06, "loss": 0.6986, "step": 7628 }, { "epoch": 0.5772766826832129, "grad_norm": 2.172220468521118, "learning_rate": 7.451462210644513e-06, "loss": 0.6897, "step": 7629 }, { "epoch": 0.5773523514055465, "grad_norm": 2.764195203781128, "learning_rate": 7.449243630430013e-06, "loss": 0.7162, "step": 7630 }, { "epoch": 0.5774280201278802, "grad_norm": 1.864995002746582, "learning_rate": 7.447025147328891e-06, "loss": 0.7502, "step": 7631 }, { "epoch": 0.5775036888502137, "grad_norm": 1.9428197145462036, "learning_rate": 7.444806761480079e-06, "loss": 0.7187, "step": 7632 }, { "epoch": 0.5775793575725474, "grad_norm": 1.9613124132156372, "learning_rate": 7.442588473022497e-06, "loss": 0.8163, "step": 7633 }, { "epoch": 0.577655026294881, "grad_norm": 1.6671011447906494, "learning_rate": 7.440370282095059e-06, "loss": 0.7783, "step": 7634 }, { "epoch": 0.5777306950172146, "grad_norm": 1.9171720743179321, "learning_rate": 7.438152188836682e-06, "loss": 0.8034, "step": 7635 }, { "epoch": 0.5778063637395483, "grad_norm": 2.3247179985046387, "learning_rate": 7.435934193386265e-06, "loss": 0.6332, "step": 7636 }, { "epoch": 0.5778820324618819, "grad_norm": 2.2075064182281494, "learning_rate": 7.433716295882709e-06, "loss": 0.725, "step": 7637 }, { "epoch": 0.5779577011842155, "grad_norm": 2.2133593559265137, "learning_rate": 7.431498496464904e-06, "loss": 0.8622, "step": 7638 }, { "epoch": 0.5780333699065491, "grad_norm": 1.9004621505737305, "learning_rate": 7.4292807952717325e-06, "loss": 0.6223, "step": 7639 }, { "epoch": 0.5781090386288827, "grad_norm": 2.1120524406433105, "learning_rate": 7.427063192442083e-06, "loss": 0.6237, "step": 7640 }, { "epoch": 0.5781847073512164, "grad_norm": 1.8914631605148315, "learning_rate": 7.424845688114822e-06, "loss": 0.6204, "step": 7641 }, { "epoch": 0.57826037607355, "grad_norm": 1.872819423675537, "learning_rate": 7.42262828242882e-06, "loss": 0.7482, "step": 7642 }, { "epoch": 0.5783360447958836, "grad_norm": 1.7717469930648804, "learning_rate": 7.420410975522935e-06, "loss": 0.7326, "step": 7643 }, { "epoch": 0.5784117135182173, "grad_norm": 2.0166468620300293, "learning_rate": 7.418193767536022e-06, "loss": 0.6824, "step": 7644 }, { "epoch": 0.5784873822405509, "grad_norm": 2.0506820678710938, "learning_rate": 7.4159766586069335e-06, "loss": 0.602, "step": 7645 }, { "epoch": 0.5785630509628845, "grad_norm": 1.8485995531082153, "learning_rate": 7.413759648874512e-06, "loss": 0.7065, "step": 7646 }, { "epoch": 0.5786387196852181, "grad_norm": 3.743983030319214, "learning_rate": 7.411542738477589e-06, "loss": 0.777, "step": 7647 }, { "epoch": 0.5787143884075517, "grad_norm": 2.7498581409454346, "learning_rate": 7.409325927555001e-06, "loss": 0.7437, "step": 7648 }, { "epoch": 0.5787900571298854, "grad_norm": 1.9643309116363525, "learning_rate": 7.4071092162455635e-06, "loss": 0.7352, "step": 7649 }, { "epoch": 0.578865725852219, "grad_norm": 1.9491885900497437, "learning_rate": 7.4048926046881e-06, "loss": 0.7209, "step": 7650 }, { "epoch": 0.5789413945745526, "grad_norm": 1.8065403699874878, "learning_rate": 7.402676093021424e-06, "loss": 0.728, "step": 7651 }, { "epoch": 0.5790170632968862, "grad_norm": 1.9861661195755005, "learning_rate": 7.400459681384335e-06, "loss": 0.5903, "step": 7652 }, { "epoch": 0.5790927320192198, "grad_norm": 1.8976181745529175, "learning_rate": 7.398243369915636e-06, "loss": 0.7683, "step": 7653 }, { "epoch": 0.5791684007415535, "grad_norm": 1.8493512868881226, "learning_rate": 7.396027158754114e-06, "loss": 0.5691, "step": 7654 }, { "epoch": 0.5792440694638871, "grad_norm": 1.9040534496307373, "learning_rate": 7.393811048038561e-06, "loss": 0.7016, "step": 7655 }, { "epoch": 0.5793197381862207, "grad_norm": 1.8322153091430664, "learning_rate": 7.391595037907758e-06, "loss": 0.725, "step": 7656 }, { "epoch": 0.5793954069085544, "grad_norm": 2.335909128189087, "learning_rate": 7.389379128500474e-06, "loss": 0.7147, "step": 7657 }, { "epoch": 0.579471075630888, "grad_norm": 2.1806111335754395, "learning_rate": 7.3871633199554775e-06, "loss": 0.5799, "step": 7658 }, { "epoch": 0.5795467443532216, "grad_norm": 2.703801393508911, "learning_rate": 7.384947612411532e-06, "loss": 0.7216, "step": 7659 }, { "epoch": 0.5796224130755552, "grad_norm": 2.5444979667663574, "learning_rate": 7.3827320060073886e-06, "loss": 0.692, "step": 7660 }, { "epoch": 0.5796980817978888, "grad_norm": 2.3260116577148438, "learning_rate": 7.380516500881799e-06, "loss": 0.7942, "step": 7661 }, { "epoch": 0.5797737505202225, "grad_norm": 2.3497567176818848, "learning_rate": 7.378301097173506e-06, "loss": 0.6695, "step": 7662 }, { "epoch": 0.5798494192425561, "grad_norm": 2.0174307823181152, "learning_rate": 7.376085795021241e-06, "loss": 0.6379, "step": 7663 }, { "epoch": 0.5799250879648897, "grad_norm": 3.6419637203216553, "learning_rate": 7.373870594563739e-06, "loss": 0.6858, "step": 7664 }, { "epoch": 0.5800007566872233, "grad_norm": 2.2158446311950684, "learning_rate": 7.3716554959397145e-06, "loss": 0.7634, "step": 7665 }, { "epoch": 0.580076425409557, "grad_norm": 1.666245937347412, "learning_rate": 7.369440499287893e-06, "loss": 0.5982, "step": 7666 }, { "epoch": 0.5801520941318906, "grad_norm": 2.2408547401428223, "learning_rate": 7.367225604746981e-06, "loss": 0.6844, "step": 7667 }, { "epoch": 0.5802277628542242, "grad_norm": 2.1953916549682617, "learning_rate": 7.365010812455683e-06, "loss": 0.803, "step": 7668 }, { "epoch": 0.5803034315765578, "grad_norm": 2.5317487716674805, "learning_rate": 7.362796122552698e-06, "loss": 0.7437, "step": 7669 }, { "epoch": 0.5803791002988915, "grad_norm": 2.728444814682007, "learning_rate": 7.3605815351767105e-06, "loss": 0.725, "step": 7670 }, { "epoch": 0.5804547690212251, "grad_norm": 2.354583740234375, "learning_rate": 7.358367050466411e-06, "loss": 0.7462, "step": 7671 }, { "epoch": 0.5805304377435587, "grad_norm": 1.8923773765563965, "learning_rate": 7.356152668560478e-06, "loss": 0.6499, "step": 7672 }, { "epoch": 0.5806061064658923, "grad_norm": 2.219557523727417, "learning_rate": 7.353938389597583e-06, "loss": 0.6165, "step": 7673 }, { "epoch": 0.580681775188226, "grad_norm": 2.9942805767059326, "learning_rate": 7.351724213716388e-06, "loss": 0.6961, "step": 7674 }, { "epoch": 0.5807574439105596, "grad_norm": 1.9421963691711426, "learning_rate": 7.349510141055552e-06, "loss": 0.7431, "step": 7675 }, { "epoch": 0.5808331126328932, "grad_norm": 2.3083291053771973, "learning_rate": 7.347296171753734e-06, "loss": 0.739, "step": 7676 }, { "epoch": 0.5809087813552268, "grad_norm": 2.1545681953430176, "learning_rate": 7.345082305949572e-06, "loss": 0.6407, "step": 7677 }, { "epoch": 0.5809844500775604, "grad_norm": 2.1147782802581787, "learning_rate": 7.342868543781711e-06, "loss": 0.721, "step": 7678 }, { "epoch": 0.581060118799894, "grad_norm": 2.1288297176361084, "learning_rate": 7.34065488538878e-06, "loss": 0.6971, "step": 7679 }, { "epoch": 0.5811357875222277, "grad_norm": 2.313871145248413, "learning_rate": 7.338441330909405e-06, "loss": 0.7317, "step": 7680 }, { "epoch": 0.5812114562445613, "grad_norm": 2.2980892658233643, "learning_rate": 7.336227880482211e-06, "loss": 0.6622, "step": 7681 }, { "epoch": 0.5812871249668949, "grad_norm": 1.893399715423584, "learning_rate": 7.334014534245808e-06, "loss": 0.6524, "step": 7682 }, { "epoch": 0.5813627936892286, "grad_norm": 2.0858371257781982, "learning_rate": 7.3318012923388046e-06, "loss": 0.7039, "step": 7683 }, { "epoch": 0.5814384624115622, "grad_norm": 1.9755072593688965, "learning_rate": 7.329588154899797e-06, "loss": 0.6282, "step": 7684 }, { "epoch": 0.5815141311338958, "grad_norm": 1.9997296333312988, "learning_rate": 7.327375122067382e-06, "loss": 0.5055, "step": 7685 }, { "epoch": 0.5815897998562294, "grad_norm": 2.181192636489868, "learning_rate": 7.325162193980147e-06, "loss": 0.7401, "step": 7686 }, { "epoch": 0.581665468578563, "grad_norm": 1.5383789539337158, "learning_rate": 7.322949370776675e-06, "loss": 0.7502, "step": 7687 }, { "epoch": 0.5817411373008967, "grad_norm": 2.279628276824951, "learning_rate": 7.320736652595537e-06, "loss": 0.6221, "step": 7688 }, { "epoch": 0.5818168060232303, "grad_norm": 2.0535850524902344, "learning_rate": 7.3185240395753005e-06, "loss": 0.853, "step": 7689 }, { "epoch": 0.5818924747455639, "grad_norm": 1.900155782699585, "learning_rate": 7.316311531854524e-06, "loss": 0.8605, "step": 7690 }, { "epoch": 0.5819681434678975, "grad_norm": 1.8378933668136597, "learning_rate": 7.314099129571769e-06, "loss": 0.4497, "step": 7691 }, { "epoch": 0.5820438121902312, "grad_norm": 2.2572903633117676, "learning_rate": 7.3118868328655795e-06, "loss": 0.7395, "step": 7692 }, { "epoch": 0.5821194809125648, "grad_norm": 1.9963607788085938, "learning_rate": 7.309674641874496e-06, "loss": 0.8227, "step": 7693 }, { "epoch": 0.5821951496348984, "grad_norm": 2.2713754177093506, "learning_rate": 7.307462556737054e-06, "loss": 0.667, "step": 7694 }, { "epoch": 0.582270818357232, "grad_norm": 1.7703185081481934, "learning_rate": 7.30525057759178e-06, "loss": 0.8367, "step": 7695 }, { "epoch": 0.5823464870795657, "grad_norm": 2.0329811573028564, "learning_rate": 7.3030387045771945e-06, "loss": 0.7944, "step": 7696 }, { "epoch": 0.5824221558018993, "grad_norm": 2.182164192199707, "learning_rate": 7.300826937831816e-06, "loss": 0.6249, "step": 7697 }, { "epoch": 0.5824978245242329, "grad_norm": 1.8738006353378296, "learning_rate": 7.298615277494151e-06, "loss": 0.6997, "step": 7698 }, { "epoch": 0.5825734932465665, "grad_norm": 2.295851469039917, "learning_rate": 7.2964037237027004e-06, "loss": 0.6997, "step": 7699 }, { "epoch": 0.5826491619689002, "grad_norm": 2.639524221420288, "learning_rate": 7.294192276595958e-06, "loss": 0.8636, "step": 7700 }, { "epoch": 0.5827248306912338, "grad_norm": 2.49822998046875, "learning_rate": 7.2919809363124104e-06, "loss": 0.5851, "step": 7701 }, { "epoch": 0.5828004994135674, "grad_norm": 1.8222301006317139, "learning_rate": 7.289769702990542e-06, "loss": 0.7613, "step": 7702 }, { "epoch": 0.582876168135901, "grad_norm": 1.6940174102783203, "learning_rate": 7.28755857676883e-06, "loss": 0.5668, "step": 7703 }, { "epoch": 0.5829518368582346, "grad_norm": 2.8774144649505615, "learning_rate": 7.285347557785736e-06, "loss": 0.7722, "step": 7704 }, { "epoch": 0.5830275055805683, "grad_norm": 2.2571170330047607, "learning_rate": 7.283136646179724e-06, "loss": 0.7295, "step": 7705 }, { "epoch": 0.5831031743029019, "grad_norm": 2.2378146648406982, "learning_rate": 7.2809258420892455e-06, "loss": 0.7539, "step": 7706 }, { "epoch": 0.5831788430252355, "grad_norm": 2.1192033290863037, "learning_rate": 7.278715145652754e-06, "loss": 0.7074, "step": 7707 }, { "epoch": 0.5832545117475691, "grad_norm": 2.149138927459717, "learning_rate": 7.276504557008687e-06, "loss": 0.6908, "step": 7708 }, { "epoch": 0.5833301804699028, "grad_norm": 2.1191375255584717, "learning_rate": 7.274294076295479e-06, "loss": 0.6748, "step": 7709 }, { "epoch": 0.5834058491922364, "grad_norm": 1.9595415592193604, "learning_rate": 7.27208370365156e-06, "loss": 0.8031, "step": 7710 }, { "epoch": 0.58348151791457, "grad_norm": 2.0136635303497314, "learning_rate": 7.269873439215343e-06, "loss": 0.535, "step": 7711 }, { "epoch": 0.5835571866369036, "grad_norm": 6.647026062011719, "learning_rate": 7.267663283125249e-06, "loss": 0.5824, "step": 7712 }, { "epoch": 0.5836328553592373, "grad_norm": 2.1714112758636475, "learning_rate": 7.265453235519686e-06, "loss": 0.6723, "step": 7713 }, { "epoch": 0.5837085240815709, "grad_norm": 1.932857632637024, "learning_rate": 7.26324329653705e-06, "loss": 0.768, "step": 7714 }, { "epoch": 0.5837841928039045, "grad_norm": 3.383786916732788, "learning_rate": 7.261033466315737e-06, "loss": 0.5688, "step": 7715 }, { "epoch": 0.5838598615262381, "grad_norm": 2.061591863632202, "learning_rate": 7.2588237449941274e-06, "loss": 0.4932, "step": 7716 }, { "epoch": 0.5839355302485717, "grad_norm": 1.7182115316390991, "learning_rate": 7.256614132710612e-06, "loss": 0.5914, "step": 7717 }, { "epoch": 0.5840111989709054, "grad_norm": 2.315765142440796, "learning_rate": 7.254404629603557e-06, "loss": 0.7175, "step": 7718 }, { "epoch": 0.584086867693239, "grad_norm": 2.1149778366088867, "learning_rate": 7.252195235811331e-06, "loss": 0.6716, "step": 7719 }, { "epoch": 0.5841625364155726, "grad_norm": 2.2034709453582764, "learning_rate": 7.2499859514722925e-06, "loss": 0.718, "step": 7720 }, { "epoch": 0.5842382051379063, "grad_norm": 1.7320899963378906, "learning_rate": 7.24777677672479e-06, "loss": 0.8193, "step": 7721 }, { "epoch": 0.5843138738602399, "grad_norm": 2.224959373474121, "learning_rate": 7.2455677117071785e-06, "loss": 0.6078, "step": 7722 }, { "epoch": 0.5843895425825735, "grad_norm": 1.8637721538543701, "learning_rate": 7.243358756557788e-06, "loss": 0.7856, "step": 7723 }, { "epoch": 0.5844652113049071, "grad_norm": 1.866568922996521, "learning_rate": 7.241149911414957e-06, "loss": 0.6537, "step": 7724 }, { "epoch": 0.5845408800272407, "grad_norm": 2.14298415184021, "learning_rate": 7.238941176417005e-06, "loss": 0.7189, "step": 7725 }, { "epoch": 0.5846165487495744, "grad_norm": 2.2058675289154053, "learning_rate": 7.236732551702251e-06, "loss": 0.6751, "step": 7726 }, { "epoch": 0.584692217471908, "grad_norm": 2.182589054107666, "learning_rate": 7.23452403740901e-06, "loss": 0.757, "step": 7727 }, { "epoch": 0.5847678861942416, "grad_norm": 2.33272123336792, "learning_rate": 7.232315633675584e-06, "loss": 0.6802, "step": 7728 }, { "epoch": 0.5848435549165752, "grad_norm": 1.9340473413467407, "learning_rate": 7.230107340640272e-06, "loss": 0.6658, "step": 7729 }, { "epoch": 0.5849192236389088, "grad_norm": 6.095152854919434, "learning_rate": 7.22789915844136e-06, "loss": 0.6181, "step": 7730 }, { "epoch": 0.5849948923612425, "grad_norm": 1.9372726678848267, "learning_rate": 7.225691087217132e-06, "loss": 0.6955, "step": 7731 }, { "epoch": 0.5850705610835761, "grad_norm": 2.411663055419922, "learning_rate": 7.22348312710587e-06, "loss": 0.7125, "step": 7732 }, { "epoch": 0.5851462298059097, "grad_norm": 2.0117082595825195, "learning_rate": 7.221275278245842e-06, "loss": 0.6744, "step": 7733 }, { "epoch": 0.5852218985282434, "grad_norm": 2.462001085281372, "learning_rate": 7.2190675407753075e-06, "loss": 0.7146, "step": 7734 }, { "epoch": 0.585297567250577, "grad_norm": 2.144810438156128, "learning_rate": 7.216859914832526e-06, "loss": 0.6407, "step": 7735 }, { "epoch": 0.5853732359729106, "grad_norm": 2.357346773147583, "learning_rate": 7.2146524005557416e-06, "loss": 0.6624, "step": 7736 }, { "epoch": 0.5854489046952442, "grad_norm": 3.149186134338379, "learning_rate": 7.212444998083196e-06, "loss": 0.6403, "step": 7737 }, { "epoch": 0.5855245734175778, "grad_norm": 1.9493343830108643, "learning_rate": 7.210237707553132e-06, "loss": 0.6004, "step": 7738 }, { "epoch": 0.5856002421399115, "grad_norm": 2.3722169399261475, "learning_rate": 7.208030529103768e-06, "loss": 0.7029, "step": 7739 }, { "epoch": 0.5856759108622451, "grad_norm": 1.8572362661361694, "learning_rate": 7.205823462873331e-06, "loss": 0.5583, "step": 7740 }, { "epoch": 0.5857515795845787, "grad_norm": 19.258787155151367, "learning_rate": 7.203616509000029e-06, "loss": 0.7381, "step": 7741 }, { "epoch": 0.5858272483069124, "grad_norm": 2.024709939956665, "learning_rate": 7.201409667622069e-06, "loss": 0.6997, "step": 7742 }, { "epoch": 0.5859029170292459, "grad_norm": 1.7652946710586548, "learning_rate": 7.199202938877658e-06, "loss": 0.6909, "step": 7743 }, { "epoch": 0.5859785857515796, "grad_norm": 1.8822402954101562, "learning_rate": 7.196996322904982e-06, "loss": 0.635, "step": 7744 }, { "epoch": 0.5860542544739132, "grad_norm": 1.622046947479248, "learning_rate": 7.194789819842228e-06, "loss": 0.7197, "step": 7745 }, { "epoch": 0.5861299231962468, "grad_norm": 3.179093837738037, "learning_rate": 7.1925834298275735e-06, "loss": 0.6946, "step": 7746 }, { "epoch": 0.5862055919185805, "grad_norm": 2.236264944076538, "learning_rate": 7.19037715299919e-06, "loss": 0.7203, "step": 7747 }, { "epoch": 0.5862812606409141, "grad_norm": 2.4791111946105957, "learning_rate": 7.188170989495242e-06, "loss": 0.5344, "step": 7748 }, { "epoch": 0.5863569293632477, "grad_norm": 2.7363762855529785, "learning_rate": 7.18596493945389e-06, "loss": 0.6981, "step": 7749 }, { "epoch": 0.5864325980855813, "grad_norm": 1.8051518201828003, "learning_rate": 7.183759003013277e-06, "loss": 0.6166, "step": 7750 }, { "epoch": 0.5865082668079149, "grad_norm": 2.029879331588745, "learning_rate": 7.181553180311554e-06, "loss": 0.7514, "step": 7751 }, { "epoch": 0.5865839355302486, "grad_norm": 2.204545736312866, "learning_rate": 7.1793474714868465e-06, "loss": 0.7361, "step": 7752 }, { "epoch": 0.5866596042525822, "grad_norm": 2.728564739227295, "learning_rate": 7.177141876677292e-06, "loss": 0.637, "step": 7753 }, { "epoch": 0.5867352729749158, "grad_norm": 2.3274734020233154, "learning_rate": 7.174936396021011e-06, "loss": 0.6613, "step": 7754 }, { "epoch": 0.5868109416972495, "grad_norm": 2.8488948345184326, "learning_rate": 7.172731029656113e-06, "loss": 0.8433, "step": 7755 }, { "epoch": 0.586886610419583, "grad_norm": 2.334925651550293, "learning_rate": 7.1705257777207115e-06, "loss": 0.7575, "step": 7756 }, { "epoch": 0.5869622791419167, "grad_norm": 4.486547470092773, "learning_rate": 7.168320640352898e-06, "loss": 0.8143, "step": 7757 }, { "epoch": 0.5870379478642503, "grad_norm": 1.958241581916809, "learning_rate": 7.1661156176907716e-06, "loss": 0.742, "step": 7758 }, { "epoch": 0.5871136165865839, "grad_norm": 2.2698402404785156, "learning_rate": 7.163910709872421e-06, "loss": 0.817, "step": 7759 }, { "epoch": 0.5871892853089176, "grad_norm": 2.016049861907959, "learning_rate": 7.1617059170359165e-06, "loss": 0.6541, "step": 7760 }, { "epoch": 0.5872649540312512, "grad_norm": 1.8849208354949951, "learning_rate": 7.1595012393193346e-06, "loss": 0.6868, "step": 7761 }, { "epoch": 0.5873406227535848, "grad_norm": 2.1924757957458496, "learning_rate": 7.157296676860735e-06, "loss": 0.6863, "step": 7762 }, { "epoch": 0.5874162914759185, "grad_norm": 2.1753952503204346, "learning_rate": 7.155092229798181e-06, "loss": 0.6678, "step": 7763 }, { "epoch": 0.587491960198252, "grad_norm": 1.9696418046951294, "learning_rate": 7.152887898269718e-06, "loss": 0.706, "step": 7764 }, { "epoch": 0.5875676289205857, "grad_norm": 2.0107688903808594, "learning_rate": 7.15068368241339e-06, "loss": 0.7368, "step": 7765 }, { "epoch": 0.5876432976429193, "grad_norm": 1.9954001903533936, "learning_rate": 7.14847958236723e-06, "loss": 0.6301, "step": 7766 }, { "epoch": 0.5877189663652529, "grad_norm": 1.949197769165039, "learning_rate": 7.146275598269265e-06, "loss": 0.5946, "step": 7767 }, { "epoch": 0.5877946350875866, "grad_norm": 2.207111358642578, "learning_rate": 7.144071730257521e-06, "loss": 0.6304, "step": 7768 }, { "epoch": 0.5878703038099202, "grad_norm": 1.9679584503173828, "learning_rate": 7.141867978470007e-06, "loss": 0.652, "step": 7769 }, { "epoch": 0.5879459725322538, "grad_norm": 2.0223073959350586, "learning_rate": 7.139664343044732e-06, "loss": 0.6923, "step": 7770 }, { "epoch": 0.5880216412545874, "grad_norm": 2.1896934509277344, "learning_rate": 7.137460824119691e-06, "loss": 0.7841, "step": 7771 }, { "epoch": 0.588097309976921, "grad_norm": 2.157703161239624, "learning_rate": 7.135257421832879e-06, "loss": 0.6671, "step": 7772 }, { "epoch": 0.5881729786992547, "grad_norm": 2.098355293273926, "learning_rate": 7.133054136322274e-06, "loss": 0.6282, "step": 7773 }, { "epoch": 0.5882486474215883, "grad_norm": 1.8522675037384033, "learning_rate": 7.130850967725861e-06, "loss": 0.6418, "step": 7774 }, { "epoch": 0.5883243161439219, "grad_norm": 1.7040518522262573, "learning_rate": 7.128647916181605e-06, "loss": 0.5497, "step": 7775 }, { "epoch": 0.5883999848662556, "grad_norm": 2.2610528469085693, "learning_rate": 7.126444981827471e-06, "loss": 0.6669, "step": 7776 }, { "epoch": 0.5884756535885891, "grad_norm": 2.119845390319824, "learning_rate": 7.12424216480141e-06, "loss": 0.7393, "step": 7777 }, { "epoch": 0.5885513223109228, "grad_norm": 2.3598594665527344, "learning_rate": 7.12203946524137e-06, "loss": 0.6026, "step": 7778 }, { "epoch": 0.5886269910332564, "grad_norm": 2.374350070953369, "learning_rate": 7.119836883285297e-06, "loss": 0.6199, "step": 7779 }, { "epoch": 0.58870265975559, "grad_norm": 2.1946024894714355, "learning_rate": 7.117634419071117e-06, "loss": 0.6357, "step": 7780 }, { "epoch": 0.5887783284779237, "grad_norm": 2.086524724960327, "learning_rate": 7.115432072736759e-06, "loss": 0.8439, "step": 7781 }, { "epoch": 0.5888539972002573, "grad_norm": 2.3127012252807617, "learning_rate": 7.1132298444201395e-06, "loss": 0.7654, "step": 7782 }, { "epoch": 0.5889296659225909, "grad_norm": 2.065904378890991, "learning_rate": 7.111027734259167e-06, "loss": 0.7323, "step": 7783 }, { "epoch": 0.5890053346449245, "grad_norm": 1.9671047925949097, "learning_rate": 7.108825742391752e-06, "loss": 0.7849, "step": 7784 }, { "epoch": 0.5890810033672581, "grad_norm": 2.391326665878296, "learning_rate": 7.106623868955784e-06, "loss": 0.6453, "step": 7785 }, { "epoch": 0.5891566720895918, "grad_norm": 2.2341959476470947, "learning_rate": 7.104422114089155e-06, "loss": 0.7335, "step": 7786 }, { "epoch": 0.5892323408119254, "grad_norm": 2.1001060009002686, "learning_rate": 7.1022204779297415e-06, "loss": 0.752, "step": 7787 }, { "epoch": 0.589308009534259, "grad_norm": 2.2669565677642822, "learning_rate": 7.1000189606154185e-06, "loss": 0.6614, "step": 7788 }, { "epoch": 0.5893836782565927, "grad_norm": 2.1226632595062256, "learning_rate": 7.097817562284056e-06, "loss": 0.7003, "step": 7789 }, { "epoch": 0.5894593469789262, "grad_norm": 1.9791269302368164, "learning_rate": 7.095616283073511e-06, "loss": 0.7109, "step": 7790 }, { "epoch": 0.5895350157012599, "grad_norm": 2.323878765106201, "learning_rate": 7.093415123121633e-06, "loss": 0.7033, "step": 7791 }, { "epoch": 0.5896106844235935, "grad_norm": 2.5213472843170166, "learning_rate": 7.091214082566267e-06, "loss": 0.7201, "step": 7792 }, { "epoch": 0.5896863531459271, "grad_norm": 2.2447805404663086, "learning_rate": 7.089013161545246e-06, "loss": 0.7435, "step": 7793 }, { "epoch": 0.5897620218682608, "grad_norm": 1.9220545291900635, "learning_rate": 7.086812360196404e-06, "loss": 0.7438, "step": 7794 }, { "epoch": 0.5898376905905944, "grad_norm": 2.168332815170288, "learning_rate": 7.084611678657562e-06, "loss": 0.6878, "step": 7795 }, { "epoch": 0.589913359312928, "grad_norm": 2.1411046981811523, "learning_rate": 7.082411117066529e-06, "loss": 0.7192, "step": 7796 }, { "epoch": 0.5899890280352617, "grad_norm": 2.133474111557007, "learning_rate": 7.080210675561116e-06, "loss": 0.6435, "step": 7797 }, { "epoch": 0.5900646967575952, "grad_norm": 2.075592041015625, "learning_rate": 7.078010354279117e-06, "loss": 0.7506, "step": 7798 }, { "epoch": 0.5901403654799289, "grad_norm": 1.674235224723816, "learning_rate": 7.075810153358327e-06, "loss": 0.6847, "step": 7799 }, { "epoch": 0.5902160342022625, "grad_norm": 2.099836587905884, "learning_rate": 7.073610072936532e-06, "loss": 0.778, "step": 7800 }, { "epoch": 0.5902917029245961, "grad_norm": 1.963880181312561, "learning_rate": 7.0714101131515015e-06, "loss": 0.6492, "step": 7801 }, { "epoch": 0.5903673716469298, "grad_norm": 2.0048913955688477, "learning_rate": 7.069210274141011e-06, "loss": 0.7348, "step": 7802 }, { "epoch": 0.5904430403692633, "grad_norm": 1.986953616142273, "learning_rate": 7.067010556042812e-06, "loss": 0.7612, "step": 7803 }, { "epoch": 0.590518709091597, "grad_norm": 2.7819113731384277, "learning_rate": 7.064810958994668e-06, "loss": 0.7183, "step": 7804 }, { "epoch": 0.5905943778139306, "grad_norm": 2.236011266708374, "learning_rate": 7.062611483134321e-06, "loss": 0.8888, "step": 7805 }, { "epoch": 0.5906700465362642, "grad_norm": 1.9459607601165771, "learning_rate": 7.06041212859951e-06, "loss": 0.7414, "step": 7806 }, { "epoch": 0.5907457152585979, "grad_norm": 1.7577186822891235, "learning_rate": 7.058212895527964e-06, "loss": 0.7483, "step": 7807 }, { "epoch": 0.5908213839809315, "grad_norm": 1.7476128339767456, "learning_rate": 7.056013784057404e-06, "loss": 0.6792, "step": 7808 }, { "epoch": 0.5908970527032651, "grad_norm": 2.0670974254608154, "learning_rate": 7.053814794325552e-06, "loss": 0.743, "step": 7809 }, { "epoch": 0.5909727214255988, "grad_norm": 1.867936372756958, "learning_rate": 7.051615926470112e-06, "loss": 0.7591, "step": 7810 }, { "epoch": 0.5910483901479323, "grad_norm": 2.2760121822357178, "learning_rate": 7.049417180628785e-06, "loss": 0.6799, "step": 7811 }, { "epoch": 0.591124058870266, "grad_norm": 2.5277442932128906, "learning_rate": 7.047218556939262e-06, "loss": 0.7732, "step": 7812 }, { "epoch": 0.5911997275925996, "grad_norm": 2.176264524459839, "learning_rate": 7.04502005553923e-06, "loss": 0.6876, "step": 7813 }, { "epoch": 0.5912753963149332, "grad_norm": 2.316378355026245, "learning_rate": 7.042821676566363e-06, "loss": 0.6702, "step": 7814 }, { "epoch": 0.5913510650372669, "grad_norm": 2.1952383518218994, "learning_rate": 7.040623420158334e-06, "loss": 0.6593, "step": 7815 }, { "epoch": 0.5914267337596004, "grad_norm": 2.2782609462738037, "learning_rate": 7.038425286452806e-06, "loss": 0.7548, "step": 7816 }, { "epoch": 0.5915024024819341, "grad_norm": 1.863445520401001, "learning_rate": 7.036227275587428e-06, "loss": 0.5509, "step": 7817 }, { "epoch": 0.5915780712042678, "grad_norm": 5.7330851554870605, "learning_rate": 7.034029387699853e-06, "loss": 0.6268, "step": 7818 }, { "epoch": 0.5916537399266013, "grad_norm": 1.4934370517730713, "learning_rate": 7.031831622927709e-06, "loss": 0.6594, "step": 7819 }, { "epoch": 0.591729408648935, "grad_norm": 1.9840810298919678, "learning_rate": 7.0296339814086425e-06, "loss": 0.7832, "step": 7820 }, { "epoch": 0.5918050773712686, "grad_norm": 1.9863804578781128, "learning_rate": 7.027436463280266e-06, "loss": 0.6377, "step": 7821 }, { "epoch": 0.5918807460936022, "grad_norm": 2.300100803375244, "learning_rate": 7.0252390686802e-06, "loss": 0.6932, "step": 7822 }, { "epoch": 0.5919564148159359, "grad_norm": 1.7306914329528809, "learning_rate": 7.023041797746048e-06, "loss": 0.7395, "step": 7823 }, { "epoch": 0.5920320835382694, "grad_norm": 2.2924342155456543, "learning_rate": 7.020844650615412e-06, "loss": 0.6836, "step": 7824 }, { "epoch": 0.5921077522606031, "grad_norm": 2.0509254932403564, "learning_rate": 7.018647627425889e-06, "loss": 0.7414, "step": 7825 }, { "epoch": 0.5921834209829367, "grad_norm": 1.9954980611801147, "learning_rate": 7.016450728315059e-06, "loss": 0.7232, "step": 7826 }, { "epoch": 0.5922590897052703, "grad_norm": 2.126925468444824, "learning_rate": 7.014253953420501e-06, "loss": 0.8562, "step": 7827 }, { "epoch": 0.592334758427604, "grad_norm": 2.0931129455566406, "learning_rate": 7.0120573028797814e-06, "loss": 0.5782, "step": 7828 }, { "epoch": 0.5924104271499375, "grad_norm": 1.9808735847473145, "learning_rate": 7.009860776830461e-06, "loss": 0.6848, "step": 7829 }, { "epoch": 0.5924860958722712, "grad_norm": 1.9941705465316772, "learning_rate": 7.007664375410099e-06, "loss": 0.6859, "step": 7830 }, { "epoch": 0.5925617645946049, "grad_norm": 1.9075549840927124, "learning_rate": 7.005468098756237e-06, "loss": 0.6063, "step": 7831 }, { "epoch": 0.5926374333169384, "grad_norm": 2.4272186756134033, "learning_rate": 7.003271947006415e-06, "loss": 0.7188, "step": 7832 }, { "epoch": 0.5927131020392721, "grad_norm": 2.166393518447876, "learning_rate": 7.00107592029816e-06, "loss": 0.7613, "step": 7833 }, { "epoch": 0.5927887707616057, "grad_norm": 2.0019025802612305, "learning_rate": 6.998880018768995e-06, "loss": 0.5959, "step": 7834 }, { "epoch": 0.5928644394839393, "grad_norm": 2.18587327003479, "learning_rate": 6.996684242556438e-06, "loss": 0.6934, "step": 7835 }, { "epoch": 0.592940108206273, "grad_norm": 1.79190993309021, "learning_rate": 6.9944885917979935e-06, "loss": 0.5657, "step": 7836 }, { "epoch": 0.5930157769286065, "grad_norm": 1.922041893005371, "learning_rate": 6.992293066631159e-06, "loss": 0.7778, "step": 7837 }, { "epoch": 0.5930914456509402, "grad_norm": 2.1699047088623047, "learning_rate": 6.990097667193427e-06, "loss": 0.5798, "step": 7838 }, { "epoch": 0.5931671143732739, "grad_norm": 1.9367480278015137, "learning_rate": 6.987902393622278e-06, "loss": 0.8402, "step": 7839 }, { "epoch": 0.5932427830956074, "grad_norm": 2.1018497943878174, "learning_rate": 6.985707246055189e-06, "loss": 0.6521, "step": 7840 }, { "epoch": 0.5933184518179411, "grad_norm": 1.8842015266418457, "learning_rate": 6.983512224629631e-06, "loss": 0.6046, "step": 7841 }, { "epoch": 0.5933941205402746, "grad_norm": 2.317081928253174, "learning_rate": 6.981317329483057e-06, "loss": 0.6315, "step": 7842 }, { "epoch": 0.5934697892626083, "grad_norm": 2.405687093734741, "learning_rate": 6.979122560752923e-06, "loss": 0.5215, "step": 7843 }, { "epoch": 0.593545457984942, "grad_norm": 7.7125935554504395, "learning_rate": 6.976927918576667e-06, "loss": 0.6223, "step": 7844 }, { "epoch": 0.5936211267072755, "grad_norm": 2.015296697616577, "learning_rate": 6.974733403091729e-06, "loss": 0.6515, "step": 7845 }, { "epoch": 0.5936967954296092, "grad_norm": 1.864576816558838, "learning_rate": 6.972539014435539e-06, "loss": 0.74, "step": 7846 }, { "epoch": 0.5937724641519428, "grad_norm": 2.105365514755249, "learning_rate": 6.970344752745511e-06, "loss": 0.6876, "step": 7847 }, { "epoch": 0.5938481328742764, "grad_norm": 1.9904052019119263, "learning_rate": 6.968150618159058e-06, "loss": 0.7621, "step": 7848 }, { "epoch": 0.5939238015966101, "grad_norm": 2.3770105838775635, "learning_rate": 6.965956610813589e-06, "loss": 0.5695, "step": 7849 }, { "epoch": 0.5939994703189436, "grad_norm": 2.017836570739746, "learning_rate": 6.963762730846492e-06, "loss": 0.6737, "step": 7850 }, { "epoch": 0.5940751390412773, "grad_norm": 1.9348866939544678, "learning_rate": 6.96156897839516e-06, "loss": 0.8016, "step": 7851 }, { "epoch": 0.594150807763611, "grad_norm": 1.867281198501587, "learning_rate": 6.959375353596973e-06, "loss": 0.5993, "step": 7852 }, { "epoch": 0.5942264764859445, "grad_norm": 2.012544870376587, "learning_rate": 6.957181856589301e-06, "loss": 0.5875, "step": 7853 }, { "epoch": 0.5943021452082782, "grad_norm": 2.0992937088012695, "learning_rate": 6.9549884875095095e-06, "loss": 0.5162, "step": 7854 }, { "epoch": 0.5943778139306117, "grad_norm": 1.8907424211502075, "learning_rate": 6.952795246494949e-06, "loss": 0.7365, "step": 7855 }, { "epoch": 0.5944534826529454, "grad_norm": 1.9334065914154053, "learning_rate": 6.9506021336829745e-06, "loss": 0.6326, "step": 7856 }, { "epoch": 0.5945291513752791, "grad_norm": 2.2515668869018555, "learning_rate": 6.948409149210924e-06, "loss": 0.7034, "step": 7857 }, { "epoch": 0.5946048200976126, "grad_norm": 2.1506471633911133, "learning_rate": 6.946216293216127e-06, "loss": 0.8012, "step": 7858 }, { "epoch": 0.5946804888199463, "grad_norm": 1.9353808164596558, "learning_rate": 6.944023565835911e-06, "loss": 0.6319, "step": 7859 }, { "epoch": 0.59475615754228, "grad_norm": 3.7930614948272705, "learning_rate": 6.941830967207584e-06, "loss": 0.6095, "step": 7860 }, { "epoch": 0.5948318262646135, "grad_norm": 1.9903547763824463, "learning_rate": 6.939638497468461e-06, "loss": 0.7019, "step": 7861 }, { "epoch": 0.5949074949869472, "grad_norm": 2.3188912868499756, "learning_rate": 6.937446156755841e-06, "loss": 0.8333, "step": 7862 }, { "epoch": 0.5949831637092807, "grad_norm": 2.208460569381714, "learning_rate": 6.935253945207013e-06, "loss": 0.5642, "step": 7863 }, { "epoch": 0.5950588324316144, "grad_norm": 2.100937604904175, "learning_rate": 6.93306186295926e-06, "loss": 0.6346, "step": 7864 }, { "epoch": 0.5951345011539481, "grad_norm": 2.0526506900787354, "learning_rate": 6.9308699101498565e-06, "loss": 0.6951, "step": 7865 }, { "epoch": 0.5952101698762816, "grad_norm": 2.3143303394317627, "learning_rate": 6.928678086916076e-06, "loss": 0.6788, "step": 7866 }, { "epoch": 0.5952858385986153, "grad_norm": 2.076948404312134, "learning_rate": 6.926486393395171e-06, "loss": 0.7481, "step": 7867 }, { "epoch": 0.5953615073209488, "grad_norm": 2.473203659057617, "learning_rate": 6.9242948297243975e-06, "loss": 0.5965, "step": 7868 }, { "epoch": 0.5954371760432825, "grad_norm": 2.1721701622009277, "learning_rate": 6.922103396040992e-06, "loss": 0.768, "step": 7869 }, { "epoch": 0.5955128447656162, "grad_norm": 1.6090091466903687, "learning_rate": 6.919912092482192e-06, "loss": 0.6591, "step": 7870 }, { "epoch": 0.5955885134879497, "grad_norm": 1.9884438514709473, "learning_rate": 6.917720919185227e-06, "loss": 0.7708, "step": 7871 }, { "epoch": 0.5956641822102834, "grad_norm": 2.0646588802337646, "learning_rate": 6.9155298762873115e-06, "loss": 0.6364, "step": 7872 }, { "epoch": 0.595739850932617, "grad_norm": 2.5629851818084717, "learning_rate": 6.913338963925659e-06, "loss": 0.8015, "step": 7873 }, { "epoch": 0.5958155196549506, "grad_norm": 2.499199151992798, "learning_rate": 6.9111481822374685e-06, "loss": 0.5926, "step": 7874 }, { "epoch": 0.5958911883772843, "grad_norm": 2.0954859256744385, "learning_rate": 6.908957531359932e-06, "loss": 0.7402, "step": 7875 }, { "epoch": 0.5959668570996178, "grad_norm": 1.77829110622406, "learning_rate": 6.906767011430242e-06, "loss": 0.6274, "step": 7876 }, { "epoch": 0.5960425258219515, "grad_norm": 2.359452962875366, "learning_rate": 6.904576622585572e-06, "loss": 0.6625, "step": 7877 }, { "epoch": 0.5961181945442852, "grad_norm": 2.3234505653381348, "learning_rate": 6.9023863649630894e-06, "loss": 0.8029, "step": 7878 }, { "epoch": 0.5961938632666187, "grad_norm": 2.330986261367798, "learning_rate": 6.90019623869996e-06, "loss": 0.5878, "step": 7879 }, { "epoch": 0.5962695319889524, "grad_norm": 2.1926472187042236, "learning_rate": 6.898006243933329e-06, "loss": 0.6217, "step": 7880 }, { "epoch": 0.5963452007112859, "grad_norm": 1.9008252620697021, "learning_rate": 6.8958163808003485e-06, "loss": 0.6465, "step": 7881 }, { "epoch": 0.5964208694336196, "grad_norm": 2.272428512573242, "learning_rate": 6.893626649438154e-06, "loss": 0.692, "step": 7882 }, { "epoch": 0.5964965381559533, "grad_norm": 2.106896162033081, "learning_rate": 6.891437049983869e-06, "loss": 0.5936, "step": 7883 }, { "epoch": 0.5965722068782868, "grad_norm": 2.124065399169922, "learning_rate": 6.889247582574617e-06, "loss": 0.6684, "step": 7884 }, { "epoch": 0.5966478756006205, "grad_norm": 1.6563143730163574, "learning_rate": 6.887058247347506e-06, "loss": 0.6952, "step": 7885 }, { "epoch": 0.5967235443229542, "grad_norm": 1.7926180362701416, "learning_rate": 6.884869044439644e-06, "loss": 0.7559, "step": 7886 }, { "epoch": 0.5967992130452877, "grad_norm": 2.0105221271514893, "learning_rate": 6.8826799739881235e-06, "loss": 0.6041, "step": 7887 }, { "epoch": 0.5968748817676214, "grad_norm": 2.2636537551879883, "learning_rate": 6.88049103613003e-06, "loss": 0.6275, "step": 7888 }, { "epoch": 0.5969505504899549, "grad_norm": 2.092416524887085, "learning_rate": 6.878302231002446e-06, "loss": 0.7661, "step": 7889 }, { "epoch": 0.5970262192122886, "grad_norm": 1.8432059288024902, "learning_rate": 6.876113558742437e-06, "loss": 0.6898, "step": 7890 }, { "epoch": 0.5971018879346223, "grad_norm": 1.8818315267562866, "learning_rate": 6.873925019487064e-06, "loss": 0.5599, "step": 7891 }, { "epoch": 0.5971775566569558, "grad_norm": 1.8227505683898926, "learning_rate": 6.871736613373384e-06, "loss": 0.6097, "step": 7892 }, { "epoch": 0.5972532253792895, "grad_norm": 5.854292869567871, "learning_rate": 6.869548340538444e-06, "loss": 0.7175, "step": 7893 }, { "epoch": 0.597328894101623, "grad_norm": 1.9756673574447632, "learning_rate": 6.8673602011192746e-06, "loss": 0.6218, "step": 7894 }, { "epoch": 0.5974045628239567, "grad_norm": 2.129859209060669, "learning_rate": 6.86517219525291e-06, "loss": 0.7691, "step": 7895 }, { "epoch": 0.5974802315462904, "grad_norm": 2.6184825897216797, "learning_rate": 6.862984323076363e-06, "loss": 0.784, "step": 7896 }, { "epoch": 0.5975559002686239, "grad_norm": 2.6178689002990723, "learning_rate": 6.860796584726652e-06, "loss": 0.6983, "step": 7897 }, { "epoch": 0.5976315689909576, "grad_norm": 1.5637413263320923, "learning_rate": 6.858608980340779e-06, "loss": 0.6212, "step": 7898 }, { "epoch": 0.5977072377132913, "grad_norm": 1.7278257608413696, "learning_rate": 6.856421510055736e-06, "loss": 0.6918, "step": 7899 }, { "epoch": 0.5977829064356248, "grad_norm": 2.5124382972717285, "learning_rate": 6.8542341740085136e-06, "loss": 0.772, "step": 7900 }, { "epoch": 0.5978585751579585, "grad_norm": 2.5317604541778564, "learning_rate": 6.8520469723360835e-06, "loss": 0.5569, "step": 7901 }, { "epoch": 0.597934243880292, "grad_norm": 2.7019991874694824, "learning_rate": 6.849859905175421e-06, "loss": 0.7898, "step": 7902 }, { "epoch": 0.5980099126026257, "grad_norm": 1.7270742654800415, "learning_rate": 6.847672972663488e-06, "loss": 0.8241, "step": 7903 }, { "epoch": 0.5980855813249594, "grad_norm": 2.8753139972686768, "learning_rate": 6.845486174937233e-06, "loss": 0.6372, "step": 7904 }, { "epoch": 0.5981612500472929, "grad_norm": 2.0319454669952393, "learning_rate": 6.843299512133604e-06, "loss": 0.578, "step": 7905 }, { "epoch": 0.5982369187696266, "grad_norm": 2.0023133754730225, "learning_rate": 6.841112984389529e-06, "loss": 0.6541, "step": 7906 }, { "epoch": 0.5983125874919601, "grad_norm": 2.8424692153930664, "learning_rate": 6.8389265918419485e-06, "loss": 0.6049, "step": 7907 }, { "epoch": 0.5983882562142938, "grad_norm": 2.44048810005188, "learning_rate": 6.836740334627771e-06, "loss": 0.6917, "step": 7908 }, { "epoch": 0.5984639249366275, "grad_norm": 1.9694398641586304, "learning_rate": 6.8345542128839146e-06, "loss": 0.5634, "step": 7909 }, { "epoch": 0.598539593658961, "grad_norm": 3.217658519744873, "learning_rate": 6.832368226747273e-06, "loss": 0.824, "step": 7910 }, { "epoch": 0.5986152623812947, "grad_norm": 2.072033643722534, "learning_rate": 6.830182376354744e-06, "loss": 0.6921, "step": 7911 }, { "epoch": 0.5986909311036284, "grad_norm": 2.394564628601074, "learning_rate": 6.8279966618432155e-06, "loss": 0.6915, "step": 7912 }, { "epoch": 0.5987665998259619, "grad_norm": 1.8874036073684692, "learning_rate": 6.825811083349559e-06, "loss": 0.6492, "step": 7913 }, { "epoch": 0.5988422685482956, "grad_norm": 1.7274051904678345, "learning_rate": 6.8236256410106476e-06, "loss": 0.6783, "step": 7914 }, { "epoch": 0.5989179372706291, "grad_norm": 2.2389156818389893, "learning_rate": 6.821440334963335e-06, "loss": 0.5332, "step": 7915 }, { "epoch": 0.5989936059929628, "grad_norm": 3.8984248638153076, "learning_rate": 6.819255165344475e-06, "loss": 0.6413, "step": 7916 }, { "epoch": 0.5990692747152965, "grad_norm": 2.091085433959961, "learning_rate": 6.817070132290911e-06, "loss": 0.6986, "step": 7917 }, { "epoch": 0.59914494343763, "grad_norm": 2.023153781890869, "learning_rate": 6.814885235939475e-06, "loss": 0.734, "step": 7918 }, { "epoch": 0.5992206121599637, "grad_norm": 2.2519912719726562, "learning_rate": 6.812700476426995e-06, "loss": 0.6936, "step": 7919 }, { "epoch": 0.5992962808822972, "grad_norm": 1.7816762924194336, "learning_rate": 6.810515853890283e-06, "loss": 0.7838, "step": 7920 }, { "epoch": 0.5993719496046309, "grad_norm": 2.385741710662842, "learning_rate": 6.808331368466149e-06, "loss": 0.7145, "step": 7921 }, { "epoch": 0.5994476183269646, "grad_norm": 2.6496787071228027, "learning_rate": 6.806147020291395e-06, "loss": 0.7415, "step": 7922 }, { "epoch": 0.5995232870492981, "grad_norm": 2.55653977394104, "learning_rate": 6.803962809502812e-06, "loss": 0.7278, "step": 7923 }, { "epoch": 0.5995989557716318, "grad_norm": 2.4421041011810303, "learning_rate": 6.80177873623718e-06, "loss": 0.7035, "step": 7924 }, { "epoch": 0.5996746244939655, "grad_norm": 2.765859603881836, "learning_rate": 6.799594800631275e-06, "loss": 0.5941, "step": 7925 }, { "epoch": 0.599750293216299, "grad_norm": 2.608785390853882, "learning_rate": 6.797411002821856e-06, "loss": 0.6181, "step": 7926 }, { "epoch": 0.5998259619386327, "grad_norm": 2.0030951499938965, "learning_rate": 6.795227342945686e-06, "loss": 0.6672, "step": 7927 }, { "epoch": 0.5999016306609662, "grad_norm": 1.9999091625213623, "learning_rate": 6.7930438211395136e-06, "loss": 0.7108, "step": 7928 }, { "epoch": 0.5999772993832999, "grad_norm": 2.112736463546753, "learning_rate": 6.7908604375400725e-06, "loss": 0.5911, "step": 7929 }, { "epoch": 0.6000529681056336, "grad_norm": 2.1307666301727295, "learning_rate": 6.788677192284098e-06, "loss": 0.5783, "step": 7930 }, { "epoch": 0.6001286368279671, "grad_norm": 2.141432046890259, "learning_rate": 6.7864940855083085e-06, "loss": 0.6246, "step": 7931 }, { "epoch": 0.6002043055503008, "grad_norm": 1.9539786577224731, "learning_rate": 6.784311117349416e-06, "loss": 0.79, "step": 7932 }, { "epoch": 0.6002799742726344, "grad_norm": 2.6877975463867188, "learning_rate": 6.782128287944133e-06, "loss": 0.665, "step": 7933 }, { "epoch": 0.600355642994968, "grad_norm": 2.274559259414673, "learning_rate": 6.779945597429147e-06, "loss": 0.7325, "step": 7934 }, { "epoch": 0.6004313117173017, "grad_norm": 2.1375906467437744, "learning_rate": 6.77776304594115e-06, "loss": 0.8202, "step": 7935 }, { "epoch": 0.6005069804396352, "grad_norm": 1.993807315826416, "learning_rate": 6.775580633616818e-06, "loss": 0.7036, "step": 7936 }, { "epoch": 0.6005826491619689, "grad_norm": 2.2041423320770264, "learning_rate": 6.773398360592818e-06, "loss": 0.8387, "step": 7937 }, { "epoch": 0.6006583178843026, "grad_norm": 2.223114490509033, "learning_rate": 6.771216227005818e-06, "loss": 0.6734, "step": 7938 }, { "epoch": 0.6007339866066361, "grad_norm": 1.8410695791244507, "learning_rate": 6.769034232992466e-06, "loss": 0.5136, "step": 7939 }, { "epoch": 0.6008096553289698, "grad_norm": 2.056565046310425, "learning_rate": 6.766852378689406e-06, "loss": 0.6536, "step": 7940 }, { "epoch": 0.6008853240513033, "grad_norm": 2.2308459281921387, "learning_rate": 6.764670664233275e-06, "loss": 0.6653, "step": 7941 }, { "epoch": 0.600960992773637, "grad_norm": 2.1193389892578125, "learning_rate": 6.762489089760692e-06, "loss": 0.6981, "step": 7942 }, { "epoch": 0.6010366614959707, "grad_norm": 2.764047145843506, "learning_rate": 6.760307655408282e-06, "loss": 0.7005, "step": 7943 }, { "epoch": 0.6011123302183042, "grad_norm": 2.1812756061553955, "learning_rate": 6.758126361312653e-06, "loss": 0.6901, "step": 7944 }, { "epoch": 0.6011879989406379, "grad_norm": 2.3195836544036865, "learning_rate": 6.7559452076104e-06, "loss": 0.8043, "step": 7945 }, { "epoch": 0.6012636676629715, "grad_norm": 2.256772994995117, "learning_rate": 6.753764194438118e-06, "loss": 0.7321, "step": 7946 }, { "epoch": 0.6013393363853051, "grad_norm": 1.9437758922576904, "learning_rate": 6.751583321932382e-06, "loss": 0.7759, "step": 7947 }, { "epoch": 0.6014150051076388, "grad_norm": 1.943305253982544, "learning_rate": 6.749402590229775e-06, "loss": 0.6389, "step": 7948 }, { "epoch": 0.6014906738299723, "grad_norm": 2.6814630031585693, "learning_rate": 6.747221999466858e-06, "loss": 0.7856, "step": 7949 }, { "epoch": 0.601566342552306, "grad_norm": 2.330061197280884, "learning_rate": 6.745041549780184e-06, "loss": 0.6706, "step": 7950 }, { "epoch": 0.6016420112746397, "grad_norm": 2.5710747241973877, "learning_rate": 6.742861241306301e-06, "loss": 0.7252, "step": 7951 }, { "epoch": 0.6017176799969732, "grad_norm": 2.0507359504699707, "learning_rate": 6.7406810741817464e-06, "loss": 0.7641, "step": 7952 }, { "epoch": 0.6017933487193069, "grad_norm": 2.1858019828796387, "learning_rate": 6.738501048543054e-06, "loss": 0.6429, "step": 7953 }, { "epoch": 0.6018690174416405, "grad_norm": 1.9703868627548218, "learning_rate": 6.736321164526739e-06, "loss": 0.6259, "step": 7954 }, { "epoch": 0.6019446861639741, "grad_norm": 3.4809346199035645, "learning_rate": 6.734141422269315e-06, "loss": 0.6968, "step": 7955 }, { "epoch": 0.6020203548863078, "grad_norm": 1.7865307331085205, "learning_rate": 6.731961821907283e-06, "loss": 0.6615, "step": 7956 }, { "epoch": 0.6020960236086413, "grad_norm": 2.6982762813568115, "learning_rate": 6.729782363577135e-06, "loss": 0.7117, "step": 7957 }, { "epoch": 0.602171692330975, "grad_norm": 2.4082143306732178, "learning_rate": 6.727603047415362e-06, "loss": 0.884, "step": 7958 }, { "epoch": 0.6022473610533086, "grad_norm": 1.7152711153030396, "learning_rate": 6.725423873558435e-06, "loss": 0.5956, "step": 7959 }, { "epoch": 0.6023230297756422, "grad_norm": 2.04510760307312, "learning_rate": 6.723244842142823e-06, "loss": 0.762, "step": 7960 }, { "epoch": 0.6023986984979759, "grad_norm": 2.1493642330169678, "learning_rate": 6.721065953304981e-06, "loss": 0.6102, "step": 7961 }, { "epoch": 0.6024743672203094, "grad_norm": 1.814997911453247, "learning_rate": 6.718887207181358e-06, "loss": 0.6394, "step": 7962 }, { "epoch": 0.6025500359426431, "grad_norm": 2.8531596660614014, "learning_rate": 6.716708603908399e-06, "loss": 0.635, "step": 7963 }, { "epoch": 0.6026257046649768, "grad_norm": 2.303659677505493, "learning_rate": 6.714530143622533e-06, "loss": 0.8212, "step": 7964 }, { "epoch": 0.6027013733873103, "grad_norm": 2.530139684677124, "learning_rate": 6.71235182646018e-06, "loss": 0.6074, "step": 7965 }, { "epoch": 0.602777042109644, "grad_norm": 2.0177268981933594, "learning_rate": 6.710173652557756e-06, "loss": 0.6609, "step": 7966 }, { "epoch": 0.6028527108319776, "grad_norm": 2.0074338912963867, "learning_rate": 6.707995622051663e-06, "loss": 0.764, "step": 7967 }, { "epoch": 0.6029283795543112, "grad_norm": 1.9502952098846436, "learning_rate": 6.705817735078295e-06, "loss": 0.7149, "step": 7968 }, { "epoch": 0.6030040482766449, "grad_norm": 2.4225590229034424, "learning_rate": 6.703639991774045e-06, "loss": 0.8186, "step": 7969 }, { "epoch": 0.6030797169989784, "grad_norm": 2.3697550296783447, "learning_rate": 6.701462392275284e-06, "loss": 0.7241, "step": 7970 }, { "epoch": 0.6031553857213121, "grad_norm": 1.843167781829834, "learning_rate": 6.699284936718385e-06, "loss": 0.5219, "step": 7971 }, { "epoch": 0.6032310544436457, "grad_norm": 2.5825045108795166, "learning_rate": 6.697107625239701e-06, "loss": 0.7933, "step": 7972 }, { "epoch": 0.6033067231659793, "grad_norm": 1.4268264770507812, "learning_rate": 6.694930457975585e-06, "loss": 0.8802, "step": 7973 }, { "epoch": 0.603382391888313, "grad_norm": 2.36474347114563, "learning_rate": 6.692753435062383e-06, "loss": 0.8809, "step": 7974 }, { "epoch": 0.6034580606106466, "grad_norm": 1.9146884679794312, "learning_rate": 6.6905765566364225e-06, "loss": 0.6139, "step": 7975 }, { "epoch": 0.6035337293329802, "grad_norm": 2.1779873371124268, "learning_rate": 6.688399822834028e-06, "loss": 0.6991, "step": 7976 }, { "epoch": 0.6036093980553139, "grad_norm": 2.397181272506714, "learning_rate": 6.686223233791513e-06, "loss": 0.8585, "step": 7977 }, { "epoch": 0.6036850667776474, "grad_norm": 2.285151481628418, "learning_rate": 6.68404678964518e-06, "loss": 0.7212, "step": 7978 }, { "epoch": 0.6037607354999811, "grad_norm": 2.0508203506469727, "learning_rate": 6.681870490531329e-06, "loss": 0.7258, "step": 7979 }, { "epoch": 0.6038364042223147, "grad_norm": 1.981296181678772, "learning_rate": 6.679694336586247e-06, "loss": 0.6721, "step": 7980 }, { "epoch": 0.6039120729446483, "grad_norm": 2.3068652153015137, "learning_rate": 6.6775183279462105e-06, "loss": 0.5249, "step": 7981 }, { "epoch": 0.603987741666982, "grad_norm": 2.042125940322876, "learning_rate": 6.675342464747489e-06, "loss": 0.7777, "step": 7982 }, { "epoch": 0.6040634103893155, "grad_norm": 2.7529611587524414, "learning_rate": 6.673166747126338e-06, "loss": 0.7063, "step": 7983 }, { "epoch": 0.6041390791116492, "grad_norm": 1.930591344833374, "learning_rate": 6.670991175219012e-06, "loss": 0.7152, "step": 7984 }, { "epoch": 0.6042147478339828, "grad_norm": 1.793343186378479, "learning_rate": 6.668815749161754e-06, "loss": 0.5882, "step": 7985 }, { "epoch": 0.6042904165563164, "grad_norm": 2.146716356277466, "learning_rate": 6.666640469090792e-06, "loss": 0.7066, "step": 7986 }, { "epoch": 0.6043660852786501, "grad_norm": 2.544588327407837, "learning_rate": 6.664465335142352e-06, "loss": 0.7241, "step": 7987 }, { "epoch": 0.6044417540009837, "grad_norm": 2.0014796257019043, "learning_rate": 6.662290347452644e-06, "loss": 0.6437, "step": 7988 }, { "epoch": 0.6045174227233173, "grad_norm": 2.055544376373291, "learning_rate": 6.660115506157876e-06, "loss": 0.5982, "step": 7989 }, { "epoch": 0.604593091445651, "grad_norm": 2.17427134513855, "learning_rate": 6.6579408113942466e-06, "loss": 0.6711, "step": 7990 }, { "epoch": 0.6046687601679845, "grad_norm": 2.4635872840881348, "learning_rate": 6.655766263297936e-06, "loss": 0.7088, "step": 7991 }, { "epoch": 0.6047444288903182, "grad_norm": 2.311051607131958, "learning_rate": 6.653591862005126e-06, "loss": 0.7915, "step": 7992 }, { "epoch": 0.6048200976126518, "grad_norm": 2.1790106296539307, "learning_rate": 6.65141760765198e-06, "loss": 0.5936, "step": 7993 }, { "epoch": 0.6048957663349854, "grad_norm": 2.363032341003418, "learning_rate": 6.64924350037466e-06, "loss": 0.7366, "step": 7994 }, { "epoch": 0.6049714350573191, "grad_norm": 4.0425214767456055, "learning_rate": 6.6470695403093156e-06, "loss": 0.6819, "step": 7995 }, { "epoch": 0.6050471037796527, "grad_norm": 1.7406569719314575, "learning_rate": 6.6448957275920895e-06, "loss": 0.5655, "step": 7996 }, { "epoch": 0.6051227725019863, "grad_norm": 2.071523666381836, "learning_rate": 6.642722062359109e-06, "loss": 0.6732, "step": 7997 }, { "epoch": 0.6051984412243199, "grad_norm": 2.9325242042541504, "learning_rate": 6.640548544746494e-06, "loss": 0.7125, "step": 7998 }, { "epoch": 0.6052741099466535, "grad_norm": 1.7183152437210083, "learning_rate": 6.638375174890364e-06, "loss": 0.6568, "step": 7999 }, { "epoch": 0.6053497786689872, "grad_norm": 2.2659685611724854, "learning_rate": 6.636201952926818e-06, "loss": 0.6901, "step": 8000 }, { "epoch": 0.6054254473913208, "grad_norm": 1.8365509510040283, "learning_rate": 6.634028878991954e-06, "loss": 0.6363, "step": 8001 }, { "epoch": 0.6055011161136544, "grad_norm": 2.3198299407958984, "learning_rate": 6.631855953221851e-06, "loss": 0.748, "step": 8002 }, { "epoch": 0.6055767848359881, "grad_norm": 2.4509475231170654, "learning_rate": 6.629683175752586e-06, "loss": 0.667, "step": 8003 }, { "epoch": 0.6056524535583216, "grad_norm": 2.3595855236053467, "learning_rate": 6.62751054672023e-06, "loss": 0.693, "step": 8004 }, { "epoch": 0.6057281222806553, "grad_norm": 2.1252949237823486, "learning_rate": 6.625338066260836e-06, "loss": 0.766, "step": 8005 }, { "epoch": 0.6058037910029889, "grad_norm": 1.8091083765029907, "learning_rate": 6.623165734510455e-06, "loss": 0.7842, "step": 8006 }, { "epoch": 0.6058794597253225, "grad_norm": 2.139522075653076, "learning_rate": 6.620993551605123e-06, "loss": 0.8644, "step": 8007 }, { "epoch": 0.6059551284476562, "grad_norm": 2.6988987922668457, "learning_rate": 6.618821517680869e-06, "loss": 0.7944, "step": 8008 }, { "epoch": 0.6060307971699898, "grad_norm": 1.9569789171218872, "learning_rate": 6.616649632873708e-06, "loss": 0.7234, "step": 8009 }, { "epoch": 0.6061064658923234, "grad_norm": 2.4347798824310303, "learning_rate": 6.614477897319661e-06, "loss": 0.7029, "step": 8010 }, { "epoch": 0.6061821346146571, "grad_norm": 2.1535308361053467, "learning_rate": 6.612306311154722e-06, "loss": 0.5297, "step": 8011 }, { "epoch": 0.6062578033369906, "grad_norm": 2.128608465194702, "learning_rate": 6.6101348745148865e-06, "loss": 0.5245, "step": 8012 }, { "epoch": 0.6063334720593243, "grad_norm": 2.0060644149780273, "learning_rate": 6.607963587536134e-06, "loss": 0.6084, "step": 8013 }, { "epoch": 0.6064091407816579, "grad_norm": 2.5640623569488525, "learning_rate": 6.605792450354436e-06, "loss": 0.6693, "step": 8014 }, { "epoch": 0.6064848095039915, "grad_norm": 2.0857560634613037, "learning_rate": 6.603621463105762e-06, "loss": 0.7536, "step": 8015 }, { "epoch": 0.6065604782263252, "grad_norm": 2.7970449924468994, "learning_rate": 6.601450625926061e-06, "loss": 0.8408, "step": 8016 }, { "epoch": 0.6066361469486587, "grad_norm": 2.1201984882354736, "learning_rate": 6.599279938951282e-06, "loss": 0.6348, "step": 8017 }, { "epoch": 0.6067118156709924, "grad_norm": 2.2606847286224365, "learning_rate": 6.597109402317356e-06, "loss": 0.8402, "step": 8018 }, { "epoch": 0.606787484393326, "grad_norm": 2.860076904296875, "learning_rate": 6.594939016160209e-06, "loss": 0.6749, "step": 8019 }, { "epoch": 0.6068631531156596, "grad_norm": 2.2081804275512695, "learning_rate": 6.592768780615764e-06, "loss": 0.704, "step": 8020 }, { "epoch": 0.6069388218379933, "grad_norm": 2.2822539806365967, "learning_rate": 6.590598695819921e-06, "loss": 0.6199, "step": 8021 }, { "epoch": 0.6070144905603269, "grad_norm": 2.0878994464874268, "learning_rate": 6.588428761908583e-06, "loss": 0.6372, "step": 8022 }, { "epoch": 0.6070901592826605, "grad_norm": 2.373242139816284, "learning_rate": 6.586258979017634e-06, "loss": 0.7862, "step": 8023 }, { "epoch": 0.6071658280049942, "grad_norm": 1.9555684328079224, "learning_rate": 6.584089347282954e-06, "loss": 0.676, "step": 8024 }, { "epoch": 0.6072414967273277, "grad_norm": 2.07110595703125, "learning_rate": 6.581919866840413e-06, "loss": 0.5871, "step": 8025 }, { "epoch": 0.6073171654496614, "grad_norm": 3.0451760292053223, "learning_rate": 6.579750537825874e-06, "loss": 0.6727, "step": 8026 }, { "epoch": 0.607392834171995, "grad_norm": 2.067446708679199, "learning_rate": 6.577581360375182e-06, "loss": 0.726, "step": 8027 }, { "epoch": 0.6074685028943286, "grad_norm": 1.995710849761963, "learning_rate": 6.575412334624183e-06, "loss": 0.79, "step": 8028 }, { "epoch": 0.6075441716166623, "grad_norm": 2.084068536758423, "learning_rate": 6.573243460708701e-06, "loss": 0.7256, "step": 8029 }, { "epoch": 0.6076198403389959, "grad_norm": 2.150193929672241, "learning_rate": 6.571074738764565e-06, "loss": 0.7144, "step": 8030 }, { "epoch": 0.6076955090613295, "grad_norm": 1.506453514099121, "learning_rate": 6.568906168927585e-06, "loss": 0.8205, "step": 8031 }, { "epoch": 0.6077711777836631, "grad_norm": 2.1317901611328125, "learning_rate": 6.5667377513335645e-06, "loss": 0.7269, "step": 8032 }, { "epoch": 0.6078468465059967, "grad_norm": 2.2331316471099854, "learning_rate": 6.564569486118297e-06, "loss": 0.8241, "step": 8033 }, { "epoch": 0.6079225152283304, "grad_norm": 2.03304386138916, "learning_rate": 6.562401373417562e-06, "loss": 0.6857, "step": 8034 }, { "epoch": 0.607998183950664, "grad_norm": 1.8887367248535156, "learning_rate": 6.560233413367139e-06, "loss": 0.6265, "step": 8035 }, { "epoch": 0.6080738526729976, "grad_norm": 1.7368558645248413, "learning_rate": 6.558065606102792e-06, "loss": 0.6891, "step": 8036 }, { "epoch": 0.6081495213953313, "grad_norm": 2.355769395828247, "learning_rate": 6.555897951760274e-06, "loss": 0.7196, "step": 8037 }, { "epoch": 0.6082251901176648, "grad_norm": 2.221735715866089, "learning_rate": 6.553730450475333e-06, "loss": 0.6272, "step": 8038 }, { "epoch": 0.6083008588399985, "grad_norm": 2.569798231124878, "learning_rate": 6.551563102383697e-06, "loss": 0.7951, "step": 8039 }, { "epoch": 0.6083765275623321, "grad_norm": 1.8355985879898071, "learning_rate": 6.5493959076211055e-06, "loss": 0.61, "step": 8040 }, { "epoch": 0.6084521962846657, "grad_norm": 2.0829899311065674, "learning_rate": 6.547228866323265e-06, "loss": 0.7505, "step": 8041 }, { "epoch": 0.6085278650069994, "grad_norm": 1.4479275941848755, "learning_rate": 6.54506197862589e-06, "loss": 0.827, "step": 8042 }, { "epoch": 0.608603533729333, "grad_norm": 1.3985527753829956, "learning_rate": 6.542895244664671e-06, "loss": 0.9008, "step": 8043 }, { "epoch": 0.6086792024516666, "grad_norm": 1.865014672279358, "learning_rate": 6.540728664575301e-06, "loss": 0.7863, "step": 8044 }, { "epoch": 0.6087548711740002, "grad_norm": 1.861416220664978, "learning_rate": 6.538562238493453e-06, "loss": 0.6838, "step": 8045 }, { "epoch": 0.6088305398963338, "grad_norm": 2.162048578262329, "learning_rate": 6.5363959665547996e-06, "loss": 0.8778, "step": 8046 }, { "epoch": 0.6089062086186675, "grad_norm": 2.3792169094085693, "learning_rate": 6.534229848895002e-06, "loss": 0.6775, "step": 8047 }, { "epoch": 0.6089818773410011, "grad_norm": 1.898842692375183, "learning_rate": 6.532063885649705e-06, "loss": 0.5081, "step": 8048 }, { "epoch": 0.6090575460633347, "grad_norm": 1.847794532775879, "learning_rate": 6.52989807695455e-06, "loss": 0.7093, "step": 8049 }, { "epoch": 0.6091332147856684, "grad_norm": 1.8337700366973877, "learning_rate": 6.527732422945164e-06, "loss": 0.7425, "step": 8050 }, { "epoch": 0.609208883508002, "grad_norm": 2.419373035430908, "learning_rate": 6.525566923757172e-06, "loss": 0.7282, "step": 8051 }, { "epoch": 0.6092845522303356, "grad_norm": 2.604099750518799, "learning_rate": 6.5234015795261845e-06, "loss": 0.7182, "step": 8052 }, { "epoch": 0.6093602209526692, "grad_norm": 1.8286306858062744, "learning_rate": 6.5212363903877975e-06, "loss": 0.6825, "step": 8053 }, { "epoch": 0.6094358896750028, "grad_norm": 2.197711229324341, "learning_rate": 6.519071356477606e-06, "loss": 0.6857, "step": 8054 }, { "epoch": 0.6095115583973365, "grad_norm": 2.546816110610962, "learning_rate": 6.516906477931188e-06, "loss": 0.6036, "step": 8055 }, { "epoch": 0.6095872271196701, "grad_norm": 1.9592362642288208, "learning_rate": 6.514741754884122e-06, "loss": 0.8768, "step": 8056 }, { "epoch": 0.6096628958420037, "grad_norm": 2.2540531158447266, "learning_rate": 6.512577187471963e-06, "loss": 0.8306, "step": 8057 }, { "epoch": 0.6097385645643373, "grad_norm": 2.1701200008392334, "learning_rate": 6.510412775830269e-06, "loss": 0.7949, "step": 8058 }, { "epoch": 0.609814233286671, "grad_norm": 2.3326847553253174, "learning_rate": 6.508248520094577e-06, "loss": 0.8873, "step": 8059 }, { "epoch": 0.6098899020090046, "grad_norm": 2.0343968868255615, "learning_rate": 6.506084420400419e-06, "loss": 0.6667, "step": 8060 }, { "epoch": 0.6099655707313382, "grad_norm": 2.17753529548645, "learning_rate": 6.503920476883326e-06, "loss": 0.7618, "step": 8061 }, { "epoch": 0.6100412394536718, "grad_norm": 2.2259414196014404, "learning_rate": 6.501756689678802e-06, "loss": 0.629, "step": 8062 }, { "epoch": 0.6101169081760055, "grad_norm": 2.3345701694488525, "learning_rate": 6.4995930589223575e-06, "loss": 0.74, "step": 8063 }, { "epoch": 0.6101925768983391, "grad_norm": 2.023045301437378, "learning_rate": 6.497429584749482e-06, "loss": 0.8297, "step": 8064 }, { "epoch": 0.6102682456206727, "grad_norm": 2.1104860305786133, "learning_rate": 6.495266267295658e-06, "loss": 0.8041, "step": 8065 }, { "epoch": 0.6103439143430063, "grad_norm": 2.1051814556121826, "learning_rate": 6.493103106696364e-06, "loss": 0.7396, "step": 8066 }, { "epoch": 0.6104195830653399, "grad_norm": 1.9070154428482056, "learning_rate": 6.490940103087062e-06, "loss": 0.7472, "step": 8067 }, { "epoch": 0.6104952517876736, "grad_norm": 2.5003867149353027, "learning_rate": 6.488777256603204e-06, "loss": 0.8995, "step": 8068 }, { "epoch": 0.6105709205100072, "grad_norm": 2.510374069213867, "learning_rate": 6.486614567380239e-06, "loss": 0.7385, "step": 8069 }, { "epoch": 0.6106465892323408, "grad_norm": 1.955937385559082, "learning_rate": 6.484452035553597e-06, "loss": 0.728, "step": 8070 }, { "epoch": 0.6107222579546744, "grad_norm": 1.8284022808074951, "learning_rate": 6.482289661258704e-06, "loss": 0.6269, "step": 8071 }, { "epoch": 0.610797926677008, "grad_norm": 1.7981479167938232, "learning_rate": 6.4801274446309794e-06, "loss": 0.7175, "step": 8072 }, { "epoch": 0.6108735953993417, "grad_norm": 1.8497958183288574, "learning_rate": 6.477965385805822e-06, "loss": 0.7925, "step": 8073 }, { "epoch": 0.6109492641216753, "grad_norm": 1.7783727645874023, "learning_rate": 6.475803484918631e-06, "loss": 0.6708, "step": 8074 }, { "epoch": 0.6110249328440089, "grad_norm": 1.979129433631897, "learning_rate": 6.473641742104787e-06, "loss": 0.5945, "step": 8075 }, { "epoch": 0.6111006015663426, "grad_norm": 1.9033944606781006, "learning_rate": 6.4714801574996695e-06, "loss": 0.6975, "step": 8076 }, { "epoch": 0.6111762702886762, "grad_norm": 1.668627381324768, "learning_rate": 6.469318731238645e-06, "loss": 0.6458, "step": 8077 }, { "epoch": 0.6112519390110098, "grad_norm": 2.2541165351867676, "learning_rate": 6.467157463457064e-06, "loss": 0.6398, "step": 8078 }, { "epoch": 0.6113276077333434, "grad_norm": 2.112131118774414, "learning_rate": 6.464996354290277e-06, "loss": 0.7406, "step": 8079 }, { "epoch": 0.611403276455677, "grad_norm": 1.7680984735488892, "learning_rate": 6.462835403873615e-06, "loss": 0.7407, "step": 8080 }, { "epoch": 0.6114789451780107, "grad_norm": 1.924974799156189, "learning_rate": 6.460674612342407e-06, "loss": 0.7928, "step": 8081 }, { "epoch": 0.6115546139003443, "grad_norm": 1.6907529830932617, "learning_rate": 6.458513979831969e-06, "loss": 0.7178, "step": 8082 }, { "epoch": 0.6116302826226779, "grad_norm": 2.3238351345062256, "learning_rate": 6.456353506477607e-06, "loss": 0.7008, "step": 8083 }, { "epoch": 0.6117059513450115, "grad_norm": 1.6362533569335938, "learning_rate": 6.454193192414613e-06, "loss": 0.6097, "step": 8084 }, { "epoch": 0.6117816200673452, "grad_norm": 2.3989768028259277, "learning_rate": 6.452033037778277e-06, "loss": 0.7459, "step": 8085 }, { "epoch": 0.6118572887896788, "grad_norm": 2.1705870628356934, "learning_rate": 6.449873042703871e-06, "loss": 0.6697, "step": 8086 }, { "epoch": 0.6119329575120124, "grad_norm": 2.115978240966797, "learning_rate": 6.4477132073266645e-06, "loss": 0.6262, "step": 8087 }, { "epoch": 0.612008626234346, "grad_norm": 1.8548389673233032, "learning_rate": 6.445553531781915e-06, "loss": 0.6613, "step": 8088 }, { "epoch": 0.6120842949566797, "grad_norm": 1.7452164888381958, "learning_rate": 6.443394016204861e-06, "loss": 0.6711, "step": 8089 }, { "epoch": 0.6121599636790133, "grad_norm": 1.9946900606155396, "learning_rate": 6.441234660730747e-06, "loss": 0.6328, "step": 8090 }, { "epoch": 0.6122356324013469, "grad_norm": 2.100299596786499, "learning_rate": 6.43907546549479e-06, "loss": 0.7854, "step": 8091 }, { "epoch": 0.6123113011236805, "grad_norm": 2.174600124359131, "learning_rate": 6.4369164306322125e-06, "loss": 0.7751, "step": 8092 }, { "epoch": 0.6123869698460142, "grad_norm": 1.753521203994751, "learning_rate": 6.434757556278219e-06, "loss": 0.5145, "step": 8093 }, { "epoch": 0.6124626385683478, "grad_norm": 1.6871925592422485, "learning_rate": 6.432598842568003e-06, "loss": 0.6158, "step": 8094 }, { "epoch": 0.6125383072906814, "grad_norm": 1.852492332458496, "learning_rate": 6.430440289636754e-06, "loss": 0.613, "step": 8095 }, { "epoch": 0.612613976013015, "grad_norm": 1.7599942684173584, "learning_rate": 6.428281897619638e-06, "loss": 0.8107, "step": 8096 }, { "epoch": 0.6126896447353486, "grad_norm": 2.176158905029297, "learning_rate": 6.4261236666518345e-06, "loss": 0.6481, "step": 8097 }, { "epoch": 0.6127653134576823, "grad_norm": 2.7235372066497803, "learning_rate": 6.423965596868489e-06, "loss": 0.7695, "step": 8098 }, { "epoch": 0.6128409821800159, "grad_norm": 1.6822925806045532, "learning_rate": 6.421807688404753e-06, "loss": 0.533, "step": 8099 }, { "epoch": 0.6129166509023495, "grad_norm": 1.8413187265396118, "learning_rate": 6.419649941395756e-06, "loss": 0.6069, "step": 8100 }, { "epoch": 0.6129923196246831, "grad_norm": 1.9572733640670776, "learning_rate": 6.417492355976624e-06, "loss": 0.7027, "step": 8101 }, { "epoch": 0.6130679883470168, "grad_norm": 2.1810455322265625, "learning_rate": 6.4153349322824765e-06, "loss": 0.6632, "step": 8102 }, { "epoch": 0.6131436570693504, "grad_norm": 2.011125326156616, "learning_rate": 6.413177670448413e-06, "loss": 0.7041, "step": 8103 }, { "epoch": 0.613219325791684, "grad_norm": 2.0759332180023193, "learning_rate": 6.411020570609533e-06, "loss": 0.7887, "step": 8104 }, { "epoch": 0.6132949945140176, "grad_norm": 1.7652744054794312, "learning_rate": 6.408863632900918e-06, "loss": 0.6336, "step": 8105 }, { "epoch": 0.6133706632363513, "grad_norm": 1.9956303834915161, "learning_rate": 6.406706857457639e-06, "loss": 0.7777, "step": 8106 }, { "epoch": 0.6134463319586849, "grad_norm": 2.095097780227661, "learning_rate": 6.40455024441477e-06, "loss": 0.6843, "step": 8107 }, { "epoch": 0.6135220006810185, "grad_norm": 2.287135601043701, "learning_rate": 6.402393793907355e-06, "loss": 0.7009, "step": 8108 }, { "epoch": 0.6135976694033521, "grad_norm": 1.8834456205368042, "learning_rate": 6.4002375060704465e-06, "loss": 0.6393, "step": 8109 }, { "epoch": 0.6136733381256857, "grad_norm": 4.113138198852539, "learning_rate": 6.398081381039072e-06, "loss": 0.7464, "step": 8110 }, { "epoch": 0.6137490068480194, "grad_norm": 1.7231311798095703, "learning_rate": 6.395925418948255e-06, "loss": 0.6792, "step": 8111 }, { "epoch": 0.613824675570353, "grad_norm": 3.072347640991211, "learning_rate": 6.3937696199330116e-06, "loss": 0.5752, "step": 8112 }, { "epoch": 0.6139003442926866, "grad_norm": 1.6998788118362427, "learning_rate": 6.3916139841283465e-06, "loss": 0.6967, "step": 8113 }, { "epoch": 0.6139760130150202, "grad_norm": 2.079223394393921, "learning_rate": 6.3894585116692496e-06, "loss": 0.6728, "step": 8114 }, { "epoch": 0.6140516817373539, "grad_norm": 2.595216751098633, "learning_rate": 6.387303202690705e-06, "loss": 0.6351, "step": 8115 }, { "epoch": 0.6141273504596875, "grad_norm": 2.1260061264038086, "learning_rate": 6.385148057327681e-06, "loss": 0.7492, "step": 8116 }, { "epoch": 0.6142030191820211, "grad_norm": 1.9894077777862549, "learning_rate": 6.382993075715144e-06, "loss": 0.8202, "step": 8117 }, { "epoch": 0.6142786879043547, "grad_norm": 3.4752848148345947, "learning_rate": 6.380838257988048e-06, "loss": 0.6512, "step": 8118 }, { "epoch": 0.6143543566266884, "grad_norm": 2.0392558574676514, "learning_rate": 6.378683604281329e-06, "loss": 0.6064, "step": 8119 }, { "epoch": 0.614430025349022, "grad_norm": 1.8147621154785156, "learning_rate": 6.376529114729924e-06, "loss": 0.6444, "step": 8120 }, { "epoch": 0.6145056940713556, "grad_norm": 2.527744770050049, "learning_rate": 6.374374789468749e-06, "loss": 0.6505, "step": 8121 }, { "epoch": 0.6145813627936892, "grad_norm": 2.176684856414795, "learning_rate": 6.372220628632714e-06, "loss": 0.5707, "step": 8122 }, { "epoch": 0.6146570315160228, "grad_norm": 2.0857019424438477, "learning_rate": 6.3700666323567265e-06, "loss": 0.7341, "step": 8123 }, { "epoch": 0.6147327002383565, "grad_norm": 2.4121081829071045, "learning_rate": 6.3679128007756724e-06, "loss": 0.7551, "step": 8124 }, { "epoch": 0.6148083689606901, "grad_norm": 2.668250560760498, "learning_rate": 6.365759134024433e-06, "loss": 0.7629, "step": 8125 }, { "epoch": 0.6148840376830237, "grad_norm": 1.942336916923523, "learning_rate": 6.363605632237874e-06, "loss": 0.6856, "step": 8126 }, { "epoch": 0.6149597064053574, "grad_norm": 2.963472843170166, "learning_rate": 6.361452295550856e-06, "loss": 0.684, "step": 8127 }, { "epoch": 0.615035375127691, "grad_norm": 2.0592167377471924, "learning_rate": 6.359299124098231e-06, "loss": 0.7157, "step": 8128 }, { "epoch": 0.6151110438500246, "grad_norm": 2.172013282775879, "learning_rate": 6.3571461180148395e-06, "loss": 0.6001, "step": 8129 }, { "epoch": 0.6151867125723582, "grad_norm": 2.4280693531036377, "learning_rate": 6.354993277435503e-06, "loss": 0.5956, "step": 8130 }, { "epoch": 0.6152623812946918, "grad_norm": 1.9213684797286987, "learning_rate": 6.352840602495044e-06, "loss": 0.59, "step": 8131 }, { "epoch": 0.6153380500170255, "grad_norm": 2.0935068130493164, "learning_rate": 6.350688093328266e-06, "loss": 0.6688, "step": 8132 }, { "epoch": 0.6154137187393591, "grad_norm": 3.5406651496887207, "learning_rate": 6.348535750069969e-06, "loss": 0.8106, "step": 8133 }, { "epoch": 0.6154893874616927, "grad_norm": 2.5586190223693848, "learning_rate": 6.346383572854942e-06, "loss": 0.6554, "step": 8134 }, { "epoch": 0.6155650561840263, "grad_norm": 5.290948390960693, "learning_rate": 6.344231561817956e-06, "loss": 0.5677, "step": 8135 }, { "epoch": 0.6156407249063599, "grad_norm": 9.015077590942383, "learning_rate": 6.342079717093782e-06, "loss": 0.7897, "step": 8136 }, { "epoch": 0.6157163936286936, "grad_norm": 8.069103240966797, "learning_rate": 6.339928038817168e-06, "loss": 0.741, "step": 8137 }, { "epoch": 0.6157920623510272, "grad_norm": 58.060359954833984, "learning_rate": 6.337776527122865e-06, "loss": 0.7947, "step": 8138 }, { "epoch": 0.6158677310733608, "grad_norm": 12.597784996032715, "learning_rate": 6.335625182145611e-06, "loss": 0.6853, "step": 8139 }, { "epoch": 0.6159433997956945, "grad_norm": 2.0326411724090576, "learning_rate": 6.333474004020123e-06, "loss": 0.672, "step": 8140 }, { "epoch": 0.6160190685180281, "grad_norm": 2.0402638912200928, "learning_rate": 6.331322992881118e-06, "loss": 0.7124, "step": 8141 }, { "epoch": 0.6160947372403617, "grad_norm": 4.363745212554932, "learning_rate": 6.329172148863294e-06, "loss": 0.8492, "step": 8142 }, { "epoch": 0.6161704059626953, "grad_norm": 4.418940544128418, "learning_rate": 6.327021472101355e-06, "loss": 0.6559, "step": 8143 }, { "epoch": 0.6162460746850289, "grad_norm": 2.802222728729248, "learning_rate": 6.3248709627299735e-06, "loss": 0.7396, "step": 8144 }, { "epoch": 0.6163217434073626, "grad_norm": 2.031181573867798, "learning_rate": 6.322720620883827e-06, "loss": 0.6788, "step": 8145 }, { "epoch": 0.6163974121296962, "grad_norm": 2.4163577556610107, "learning_rate": 6.320570446697574e-06, "loss": 0.8298, "step": 8146 }, { "epoch": 0.6164730808520298, "grad_norm": 2.520803928375244, "learning_rate": 6.318420440305863e-06, "loss": 0.8713, "step": 8147 }, { "epoch": 0.6165487495743635, "grad_norm": 2.569690227508545, "learning_rate": 6.316270601843342e-06, "loss": 0.5737, "step": 8148 }, { "epoch": 0.616624418296697, "grad_norm": 2.0397837162017822, "learning_rate": 6.314120931444631e-06, "loss": 0.6953, "step": 8149 }, { "epoch": 0.6167000870190307, "grad_norm": 2.84653639793396, "learning_rate": 6.31197142924436e-06, "loss": 0.7573, "step": 8150 }, { "epoch": 0.6167757557413643, "grad_norm": 2.532266616821289, "learning_rate": 6.30982209537713e-06, "loss": 0.5795, "step": 8151 }, { "epoch": 0.6168514244636979, "grad_norm": 2.0461106300354004, "learning_rate": 6.307672929977539e-06, "loss": 0.7261, "step": 8152 }, { "epoch": 0.6169270931860316, "grad_norm": 2.5015604496002197, "learning_rate": 6.3055239331801795e-06, "loss": 0.6285, "step": 8153 }, { "epoch": 0.6170027619083652, "grad_norm": 3.2419116497039795, "learning_rate": 6.303375105119626e-06, "loss": 0.618, "step": 8154 }, { "epoch": 0.6170784306306988, "grad_norm": 2.273522138595581, "learning_rate": 6.301226445930447e-06, "loss": 0.6384, "step": 8155 }, { "epoch": 0.6171540993530324, "grad_norm": 2.353877544403076, "learning_rate": 6.299077955747195e-06, "loss": 0.6086, "step": 8156 }, { "epoch": 0.617229768075366, "grad_norm": 2.2574803829193115, "learning_rate": 6.296929634704415e-06, "loss": 0.8628, "step": 8157 }, { "epoch": 0.6173054367976997, "grad_norm": 2.2511677742004395, "learning_rate": 6.294781482936646e-06, "loss": 0.8214, "step": 8158 }, { "epoch": 0.6173811055200333, "grad_norm": 2.1180849075317383, "learning_rate": 6.292633500578412e-06, "loss": 0.6905, "step": 8159 }, { "epoch": 0.6174567742423669, "grad_norm": 2.302046775817871, "learning_rate": 6.290485687764223e-06, "loss": 0.807, "step": 8160 }, { "epoch": 0.6175324429647006, "grad_norm": 2.0591933727264404, "learning_rate": 6.2883380446285865e-06, "loss": 0.7709, "step": 8161 }, { "epoch": 0.6176081116870341, "grad_norm": 2.0985355377197266, "learning_rate": 6.28619057130599e-06, "loss": 0.8037, "step": 8162 }, { "epoch": 0.6176837804093678, "grad_norm": 1.847230315208435, "learning_rate": 6.284043267930915e-06, "loss": 0.6209, "step": 8163 }, { "epoch": 0.6177594491317014, "grad_norm": 2.1725268363952637, "learning_rate": 6.28189613463784e-06, "loss": 0.7832, "step": 8164 }, { "epoch": 0.617835117854035, "grad_norm": 1.3901453018188477, "learning_rate": 6.279749171561218e-06, "loss": 0.772, "step": 8165 }, { "epoch": 0.6179107865763687, "grad_norm": 2.406770944595337, "learning_rate": 6.277602378835502e-06, "loss": 0.6068, "step": 8166 }, { "epoch": 0.6179864552987023, "grad_norm": 2.2921957969665527, "learning_rate": 6.275455756595129e-06, "loss": 0.7577, "step": 8167 }, { "epoch": 0.6180621240210359, "grad_norm": 2.531522274017334, "learning_rate": 6.273309304974528e-06, "loss": 0.5886, "step": 8168 }, { "epoch": 0.6181377927433696, "grad_norm": 2.0290911197662354, "learning_rate": 6.27116302410812e-06, "loss": 0.7469, "step": 8169 }, { "epoch": 0.6182134614657031, "grad_norm": 2.061554431915283, "learning_rate": 6.269016914130309e-06, "loss": 0.5977, "step": 8170 }, { "epoch": 0.6182891301880368, "grad_norm": 4.355343818664551, "learning_rate": 6.266870975175491e-06, "loss": 0.6287, "step": 8171 }, { "epoch": 0.6183647989103704, "grad_norm": 2.044095754623413, "learning_rate": 6.264725207378055e-06, "loss": 0.8558, "step": 8172 }, { "epoch": 0.618440467632704, "grad_norm": 1.840848684310913, "learning_rate": 6.262579610872368e-06, "loss": 0.7437, "step": 8173 }, { "epoch": 0.6185161363550377, "grad_norm": 5.799072742462158, "learning_rate": 6.260434185792803e-06, "loss": 0.7079, "step": 8174 }, { "epoch": 0.6185918050773712, "grad_norm": 1.9204273223876953, "learning_rate": 6.258288932273713e-06, "loss": 0.6698, "step": 8175 }, { "epoch": 0.6186674737997049, "grad_norm": 2.231785774230957, "learning_rate": 6.2561438504494346e-06, "loss": 0.7382, "step": 8176 }, { "epoch": 0.6187431425220385, "grad_norm": 1.8387155532836914, "learning_rate": 6.253998940454305e-06, "loss": 0.5977, "step": 8177 }, { "epoch": 0.6188188112443721, "grad_norm": 2.453481435775757, "learning_rate": 6.25185420242264e-06, "loss": 0.7128, "step": 8178 }, { "epoch": 0.6188944799667058, "grad_norm": 2.066225528717041, "learning_rate": 6.249709636488755e-06, "loss": 0.5713, "step": 8179 }, { "epoch": 0.6189701486890394, "grad_norm": 1.8709297180175781, "learning_rate": 6.2475652427869495e-06, "loss": 0.7591, "step": 8180 }, { "epoch": 0.619045817411373, "grad_norm": 2.087465763092041, "learning_rate": 6.2454210214515095e-06, "loss": 0.7528, "step": 8181 }, { "epoch": 0.6191214861337067, "grad_norm": 2.2975540161132812, "learning_rate": 6.243276972616716e-06, "loss": 0.7861, "step": 8182 }, { "epoch": 0.6191971548560402, "grad_norm": 1.8741811513900757, "learning_rate": 6.241133096416832e-06, "loss": 0.629, "step": 8183 }, { "epoch": 0.6192728235783739, "grad_norm": 1.919198751449585, "learning_rate": 6.238989392986118e-06, "loss": 0.7016, "step": 8184 }, { "epoch": 0.6193484923007075, "grad_norm": 2.3332748413085938, "learning_rate": 6.236845862458818e-06, "loss": 0.59, "step": 8185 }, { "epoch": 0.6194241610230411, "grad_norm": 2.41435170173645, "learning_rate": 6.2347025049691696e-06, "loss": 0.7076, "step": 8186 }, { "epoch": 0.6194998297453748, "grad_norm": 2.0539135932922363, "learning_rate": 6.232559320651392e-06, "loss": 0.6218, "step": 8187 }, { "epoch": 0.6195754984677083, "grad_norm": 2.1097443103790283, "learning_rate": 6.2304163096397e-06, "loss": 0.7403, "step": 8188 }, { "epoch": 0.619651167190042, "grad_norm": 2.184004306793213, "learning_rate": 6.2282734720683e-06, "loss": 0.5566, "step": 8189 }, { "epoch": 0.6197268359123757, "grad_norm": 2.4492106437683105, "learning_rate": 6.226130808071377e-06, "loss": 0.6588, "step": 8190 }, { "epoch": 0.6198025046347092, "grad_norm": 2.1081714630126953, "learning_rate": 6.2239883177831174e-06, "loss": 0.6285, "step": 8191 }, { "epoch": 0.6198781733570429, "grad_norm": 2.0451788902282715, "learning_rate": 6.221846001337686e-06, "loss": 0.7617, "step": 8192 }, { "epoch": 0.6199538420793765, "grad_norm": 2.1223862171173096, "learning_rate": 6.219703858869242e-06, "loss": 0.734, "step": 8193 }, { "epoch": 0.6200295108017101, "grad_norm": 1.9312350749969482, "learning_rate": 6.217561890511939e-06, "loss": 0.5471, "step": 8194 }, { "epoch": 0.6201051795240438, "grad_norm": 2.3495125770568848, "learning_rate": 6.215420096399907e-06, "loss": 0.7066, "step": 8195 }, { "epoch": 0.6201808482463773, "grad_norm": 2.163055181503296, "learning_rate": 6.213278476667278e-06, "loss": 0.7731, "step": 8196 }, { "epoch": 0.620256516968711, "grad_norm": 2.2514150142669678, "learning_rate": 6.211137031448162e-06, "loss": 0.6057, "step": 8197 }, { "epoch": 0.6203321856910446, "grad_norm": 1.9421865940093994, "learning_rate": 6.2089957608766664e-06, "loss": 0.6273, "step": 8198 }, { "epoch": 0.6204078544133782, "grad_norm": 1.938656210899353, "learning_rate": 6.2068546650868785e-06, "loss": 0.8241, "step": 8199 }, { "epoch": 0.6204835231357119, "grad_norm": 2.263339042663574, "learning_rate": 6.204713744212891e-06, "loss": 0.7253, "step": 8200 }, { "epoch": 0.6205591918580454, "grad_norm": 1.7081513404846191, "learning_rate": 6.202572998388768e-06, "loss": 0.5888, "step": 8201 }, { "epoch": 0.6206348605803791, "grad_norm": 2.124990940093994, "learning_rate": 6.200432427748574e-06, "loss": 0.7011, "step": 8202 }, { "epoch": 0.6207105293027128, "grad_norm": 2.4887731075286865, "learning_rate": 6.198292032426354e-06, "loss": 0.7463, "step": 8203 }, { "epoch": 0.6207861980250463, "grad_norm": 2.28210186958313, "learning_rate": 6.1961518125561485e-06, "loss": 0.7691, "step": 8204 }, { "epoch": 0.62086186674738, "grad_norm": 1.8958467245101929, "learning_rate": 6.194011768271986e-06, "loss": 0.7223, "step": 8205 }, { "epoch": 0.6209375354697136, "grad_norm": 1.9587349891662598, "learning_rate": 6.191871899707883e-06, "loss": 0.769, "step": 8206 }, { "epoch": 0.6210132041920472, "grad_norm": 2.0116994380950928, "learning_rate": 6.189732206997845e-06, "loss": 0.5752, "step": 8207 }, { "epoch": 0.6210888729143809, "grad_norm": 2.030748128890991, "learning_rate": 6.187592690275864e-06, "loss": 0.6877, "step": 8208 }, { "epoch": 0.6211645416367144, "grad_norm": 1.9845973253250122, "learning_rate": 6.185453349675923e-06, "loss": 0.6563, "step": 8209 }, { "epoch": 0.6212402103590481, "grad_norm": 2.015986680984497, "learning_rate": 6.1833141853319995e-06, "loss": 0.6908, "step": 8210 }, { "epoch": 0.6213158790813818, "grad_norm": 2.061414957046509, "learning_rate": 6.181175197378053e-06, "loss": 0.7801, "step": 8211 }, { "epoch": 0.6213915478037153, "grad_norm": 1.997130274772644, "learning_rate": 6.179036385948032e-06, "loss": 0.6684, "step": 8212 }, { "epoch": 0.621467216526049, "grad_norm": 2.0803966522216797, "learning_rate": 6.1768977511758755e-06, "loss": 0.6245, "step": 8213 }, { "epoch": 0.6215428852483825, "grad_norm": 2.1265552043914795, "learning_rate": 6.174759293195511e-06, "loss": 0.6819, "step": 8214 }, { "epoch": 0.6216185539707162, "grad_norm": 1.9517875909805298, "learning_rate": 6.1726210121408594e-06, "loss": 0.7735, "step": 8215 }, { "epoch": 0.6216942226930499, "grad_norm": 2.6221845149993896, "learning_rate": 6.170482908145827e-06, "loss": 0.7706, "step": 8216 }, { "epoch": 0.6217698914153834, "grad_norm": 2.260093927383423, "learning_rate": 6.168344981344304e-06, "loss": 0.6144, "step": 8217 }, { "epoch": 0.6218455601377171, "grad_norm": 2.432312488555908, "learning_rate": 6.166207231870179e-06, "loss": 0.8123, "step": 8218 }, { "epoch": 0.6219212288600507, "grad_norm": 1.845873236656189, "learning_rate": 6.16406965985732e-06, "loss": 0.7548, "step": 8219 }, { "epoch": 0.6219968975823843, "grad_norm": 2.7891929149627686, "learning_rate": 6.161932265439592e-06, "loss": 0.6276, "step": 8220 }, { "epoch": 0.622072566304718, "grad_norm": 1.9426125288009644, "learning_rate": 6.159795048750848e-06, "loss": 0.778, "step": 8221 }, { "epoch": 0.6221482350270515, "grad_norm": 2.255960702896118, "learning_rate": 6.157658009924922e-06, "loss": 0.7768, "step": 8222 }, { "epoch": 0.6222239037493852, "grad_norm": 2.089102029800415, "learning_rate": 6.155521149095647e-06, "loss": 0.7533, "step": 8223 }, { "epoch": 0.6222995724717189, "grad_norm": 2.0780301094055176, "learning_rate": 6.153384466396833e-06, "loss": 0.8131, "step": 8224 }, { "epoch": 0.6223752411940524, "grad_norm": 2.264507532119751, "learning_rate": 6.151247961962294e-06, "loss": 0.5875, "step": 8225 }, { "epoch": 0.6224509099163861, "grad_norm": 1.9472380876541138, "learning_rate": 6.1491116359258215e-06, "loss": 0.6929, "step": 8226 }, { "epoch": 0.6225265786387196, "grad_norm": 1.84212327003479, "learning_rate": 6.146975488421199e-06, "loss": 0.5678, "step": 8227 }, { "epoch": 0.6226022473610533, "grad_norm": 1.9093300104141235, "learning_rate": 6.144839519582201e-06, "loss": 0.739, "step": 8228 }, { "epoch": 0.622677916083387, "grad_norm": 1.707137942314148, "learning_rate": 6.142703729542581e-06, "loss": 0.6922, "step": 8229 }, { "epoch": 0.6227535848057205, "grad_norm": 1.5876680612564087, "learning_rate": 6.1405681184361e-06, "loss": 0.7791, "step": 8230 }, { "epoch": 0.6228292535280542, "grad_norm": 2.1227242946624756, "learning_rate": 6.138432686396492e-06, "loss": 0.8995, "step": 8231 }, { "epoch": 0.6229049222503878, "grad_norm": 1.9821844100952148, "learning_rate": 6.1362974335574835e-06, "loss": 0.8453, "step": 8232 }, { "epoch": 0.6229805909727214, "grad_norm": 2.247864007949829, "learning_rate": 6.134162360052793e-06, "loss": 0.7166, "step": 8233 }, { "epoch": 0.6230562596950551, "grad_norm": 2.540407657623291, "learning_rate": 6.132027466016122e-06, "loss": 0.7784, "step": 8234 }, { "epoch": 0.6231319284173886, "grad_norm": 2.323075294494629, "learning_rate": 6.129892751581171e-06, "loss": 0.6891, "step": 8235 }, { "epoch": 0.6232075971397223, "grad_norm": 1.875849723815918, "learning_rate": 6.1277582168816165e-06, "loss": 0.6888, "step": 8236 }, { "epoch": 0.623283265862056, "grad_norm": 2.02666974067688, "learning_rate": 6.125623862051135e-06, "loss": 0.7028, "step": 8237 }, { "epoch": 0.6233589345843895, "grad_norm": 1.8737157583236694, "learning_rate": 6.1234896872233815e-06, "loss": 0.9008, "step": 8238 }, { "epoch": 0.6234346033067232, "grad_norm": 1.9101803302764893, "learning_rate": 6.1213556925320105e-06, "loss": 0.6532, "step": 8239 }, { "epoch": 0.6235102720290568, "grad_norm": 2.0796594619750977, "learning_rate": 6.119221878110652e-06, "loss": 0.5757, "step": 8240 }, { "epoch": 0.6235859407513904, "grad_norm": 2.185795307159424, "learning_rate": 6.1170882440929385e-06, "loss": 0.6812, "step": 8241 }, { "epoch": 0.6236616094737241, "grad_norm": 2.2780838012695312, "learning_rate": 6.114954790612487e-06, "loss": 0.7021, "step": 8242 }, { "epoch": 0.6237372781960576, "grad_norm": 2.1631765365600586, "learning_rate": 6.112821517802896e-06, "loss": 0.6584, "step": 8243 }, { "epoch": 0.6238129469183913, "grad_norm": 2.143260955810547, "learning_rate": 6.11068842579776e-06, "loss": 0.7908, "step": 8244 }, { "epoch": 0.623888615640725, "grad_norm": 2.4261891841888428, "learning_rate": 6.108555514730655e-06, "loss": 0.704, "step": 8245 }, { "epoch": 0.6239642843630585, "grad_norm": 2.189960241317749, "learning_rate": 6.106422784735162e-06, "loss": 0.7673, "step": 8246 }, { "epoch": 0.6240399530853922, "grad_norm": 2.8708183765411377, "learning_rate": 6.104290235944831e-06, "loss": 0.6039, "step": 8247 }, { "epoch": 0.6241156218077257, "grad_norm": 1.9821953773498535, "learning_rate": 6.1021578684932136e-06, "loss": 0.6746, "step": 8248 }, { "epoch": 0.6241912905300594, "grad_norm": 2.20764422416687, "learning_rate": 6.1000256825138405e-06, "loss": 0.6322, "step": 8249 }, { "epoch": 0.6242669592523931, "grad_norm": 2.8695743083953857, "learning_rate": 6.097893678140237e-06, "loss": 0.7263, "step": 8250 }, { "epoch": 0.6243426279747266, "grad_norm": 2.6145105361938477, "learning_rate": 6.095761855505921e-06, "loss": 0.7057, "step": 8251 }, { "epoch": 0.6244182966970603, "grad_norm": 2.075453758239746, "learning_rate": 6.093630214744391e-06, "loss": 0.8061, "step": 8252 }, { "epoch": 0.624493965419394, "grad_norm": 2.094466209411621, "learning_rate": 6.091498755989139e-06, "loss": 0.613, "step": 8253 }, { "epoch": 0.6245696341417275, "grad_norm": 1.5534762144088745, "learning_rate": 6.089367479373639e-06, "loss": 0.7078, "step": 8254 }, { "epoch": 0.6246453028640612, "grad_norm": 2.0539896488189697, "learning_rate": 6.087236385031361e-06, "loss": 0.7436, "step": 8255 }, { "epoch": 0.6247209715863947, "grad_norm": 1.873143196105957, "learning_rate": 6.085105473095764e-06, "loss": 0.7195, "step": 8256 }, { "epoch": 0.6247966403087284, "grad_norm": 2.717400312423706, "learning_rate": 6.082974743700289e-06, "loss": 0.726, "step": 8257 }, { "epoch": 0.6248723090310621, "grad_norm": 2.0352890491485596, "learning_rate": 6.0808441969783714e-06, "loss": 0.727, "step": 8258 }, { "epoch": 0.6249479777533956, "grad_norm": 2.217931032180786, "learning_rate": 6.078713833063431e-06, "loss": 0.7579, "step": 8259 }, { "epoch": 0.6250236464757293, "grad_norm": 1.9803812503814697, "learning_rate": 6.0765836520888774e-06, "loss": 0.7234, "step": 8260 }, { "epoch": 0.6250993151980628, "grad_norm": 2.017169237136841, "learning_rate": 6.074453654188113e-06, "loss": 0.7283, "step": 8261 }, { "epoch": 0.6251749839203965, "grad_norm": 2.0302610397338867, "learning_rate": 6.072323839494523e-06, "loss": 0.7154, "step": 8262 }, { "epoch": 0.6252506526427302, "grad_norm": 1.8315473794937134, "learning_rate": 6.070194208141484e-06, "loss": 0.642, "step": 8263 }, { "epoch": 0.6253263213650637, "grad_norm": 2.1627883911132812, "learning_rate": 6.0680647602623605e-06, "loss": 0.7464, "step": 8264 }, { "epoch": 0.6254019900873974, "grad_norm": 1.989489197731018, "learning_rate": 6.065935495990501e-06, "loss": 0.5703, "step": 8265 }, { "epoch": 0.625477658809731, "grad_norm": 1.6930984258651733, "learning_rate": 6.063806415459253e-06, "loss": 0.5945, "step": 8266 }, { "epoch": 0.6255533275320646, "grad_norm": 1.9315162897109985, "learning_rate": 6.0616775188019444e-06, "loss": 0.6163, "step": 8267 }, { "epoch": 0.6256289962543983, "grad_norm": 1.9883739948272705, "learning_rate": 6.059548806151893e-06, "loss": 0.6101, "step": 8268 }, { "epoch": 0.6257046649767318, "grad_norm": 2.2060041427612305, "learning_rate": 6.057420277642407e-06, "loss": 0.6781, "step": 8269 }, { "epoch": 0.6257803336990655, "grad_norm": 2.501366138458252, "learning_rate": 6.055291933406778e-06, "loss": 0.6316, "step": 8270 }, { "epoch": 0.6258560024213992, "grad_norm": 1.9674981832504272, "learning_rate": 6.053163773578293e-06, "loss": 0.8069, "step": 8271 }, { "epoch": 0.6259316711437327, "grad_norm": 2.0307114124298096, "learning_rate": 6.051035798290226e-06, "loss": 0.5404, "step": 8272 }, { "epoch": 0.6260073398660664, "grad_norm": 1.978594183921814, "learning_rate": 6.048908007675834e-06, "loss": 0.6138, "step": 8273 }, { "epoch": 0.6260830085883999, "grad_norm": 3.162856340408325, "learning_rate": 6.046780401868367e-06, "loss": 0.705, "step": 8274 }, { "epoch": 0.6261586773107336, "grad_norm": 2.049156427383423, "learning_rate": 6.044652981001066e-06, "loss": 0.7661, "step": 8275 }, { "epoch": 0.6262343460330673, "grad_norm": 2.057219982147217, "learning_rate": 6.042525745207149e-06, "loss": 0.6902, "step": 8276 }, { "epoch": 0.6263100147554008, "grad_norm": 2.105268716812134, "learning_rate": 6.040398694619838e-06, "loss": 0.6733, "step": 8277 }, { "epoch": 0.6263856834777345, "grad_norm": 2.046825647354126, "learning_rate": 6.038271829372335e-06, "loss": 0.6682, "step": 8278 }, { "epoch": 0.6264613522000682, "grad_norm": 2.0353481769561768, "learning_rate": 6.036145149597828e-06, "loss": 0.7527, "step": 8279 }, { "epoch": 0.6265370209224017, "grad_norm": 1.9645310640335083, "learning_rate": 6.034018655429499e-06, "loss": 0.6571, "step": 8280 }, { "epoch": 0.6266126896447354, "grad_norm": 1.8533154726028442, "learning_rate": 6.031892347000512e-06, "loss": 0.6641, "step": 8281 }, { "epoch": 0.6266883583670689, "grad_norm": 2.540891170501709, "learning_rate": 6.029766224444028e-06, "loss": 0.7443, "step": 8282 }, { "epoch": 0.6267640270894026, "grad_norm": 2.1608104705810547, "learning_rate": 6.027640287893191e-06, "loss": 0.6818, "step": 8283 }, { "epoch": 0.6268396958117363, "grad_norm": 2.1452369689941406, "learning_rate": 6.0255145374811315e-06, "loss": 0.7741, "step": 8284 }, { "epoch": 0.6269153645340698, "grad_norm": 1.848710060119629, "learning_rate": 6.023388973340974e-06, "loss": 0.6587, "step": 8285 }, { "epoch": 0.6269910332564035, "grad_norm": 2.499025821685791, "learning_rate": 6.021263595605825e-06, "loss": 0.7964, "step": 8286 }, { "epoch": 0.627066701978737, "grad_norm": 2.207012414932251, "learning_rate": 6.019138404408783e-06, "loss": 0.6924, "step": 8287 }, { "epoch": 0.6271423707010707, "grad_norm": 2.040011405944824, "learning_rate": 6.017013399882936e-06, "loss": 0.6481, "step": 8288 }, { "epoch": 0.6272180394234044, "grad_norm": 1.5633232593536377, "learning_rate": 6.014888582161361e-06, "loss": 0.6835, "step": 8289 }, { "epoch": 0.6272937081457379, "grad_norm": 2.0602505207061768, "learning_rate": 6.012763951377116e-06, "loss": 0.8177, "step": 8290 }, { "epoch": 0.6273693768680716, "grad_norm": 1.8293778896331787, "learning_rate": 6.010639507663251e-06, "loss": 0.6229, "step": 8291 }, { "epoch": 0.6274450455904053, "grad_norm": 2.240879535675049, "learning_rate": 6.008515251152815e-06, "loss": 0.6448, "step": 8292 }, { "epoch": 0.6275207143127388, "grad_norm": 1.7623291015625, "learning_rate": 6.006391181978825e-06, "loss": 0.6318, "step": 8293 }, { "epoch": 0.6275963830350725, "grad_norm": 2.637432098388672, "learning_rate": 6.004267300274305e-06, "loss": 0.632, "step": 8294 }, { "epoch": 0.627672051757406, "grad_norm": 2.096395492553711, "learning_rate": 6.002143606172254e-06, "loss": 0.798, "step": 8295 }, { "epoch": 0.6277477204797397, "grad_norm": 2.2483010292053223, "learning_rate": 6.000020099805665e-06, "loss": 0.6926, "step": 8296 }, { "epoch": 0.6278233892020734, "grad_norm": 2.0288619995117188, "learning_rate": 5.997896781307524e-06, "loss": 0.7208, "step": 8297 }, { "epoch": 0.6278990579244069, "grad_norm": 1.9230502843856812, "learning_rate": 5.995773650810794e-06, "loss": 0.6955, "step": 8298 }, { "epoch": 0.6279747266467406, "grad_norm": 1.8316569328308105, "learning_rate": 5.993650708448437e-06, "loss": 0.5128, "step": 8299 }, { "epoch": 0.6280503953690741, "grad_norm": 2.2068562507629395, "learning_rate": 5.991527954353395e-06, "loss": 0.765, "step": 8300 }, { "epoch": 0.6281260640914078, "grad_norm": 2.418468952178955, "learning_rate": 5.9894053886586006e-06, "loss": 0.7241, "step": 8301 }, { "epoch": 0.6282017328137415, "grad_norm": 1.7980351448059082, "learning_rate": 5.987283011496981e-06, "loss": 0.6045, "step": 8302 }, { "epoch": 0.628277401536075, "grad_norm": 2.48748779296875, "learning_rate": 5.985160823001445e-06, "loss": 0.7215, "step": 8303 }, { "epoch": 0.6283530702584087, "grad_norm": 1.8230119943618774, "learning_rate": 5.983038823304886e-06, "loss": 0.705, "step": 8304 }, { "epoch": 0.6284287389807424, "grad_norm": 2.0660839080810547, "learning_rate": 5.980917012540198e-06, "loss": 0.7044, "step": 8305 }, { "epoch": 0.6285044077030759, "grad_norm": 1.6837860345840454, "learning_rate": 5.978795390840247e-06, "loss": 0.6187, "step": 8306 }, { "epoch": 0.6285800764254096, "grad_norm": 1.9054850339889526, "learning_rate": 5.976673958337902e-06, "loss": 0.6454, "step": 8307 }, { "epoch": 0.6286557451477431, "grad_norm": 2.1775870323181152, "learning_rate": 5.974552715166014e-06, "loss": 0.6592, "step": 8308 }, { "epoch": 0.6287314138700768, "grad_norm": 1.7539699077606201, "learning_rate": 5.97243166145742e-06, "loss": 0.6329, "step": 8309 }, { "epoch": 0.6288070825924105, "grad_norm": 7.603641033172607, "learning_rate": 5.970310797344949e-06, "loss": 0.6322, "step": 8310 }, { "epoch": 0.628882751314744, "grad_norm": 2.0297069549560547, "learning_rate": 5.968190122961411e-06, "loss": 0.7299, "step": 8311 }, { "epoch": 0.6289584200370777, "grad_norm": 2.0094974040985107, "learning_rate": 5.966069638439615e-06, "loss": 0.5245, "step": 8312 }, { "epoch": 0.6290340887594112, "grad_norm": 1.9231394529342651, "learning_rate": 5.963949343912353e-06, "loss": 0.6593, "step": 8313 }, { "epoch": 0.6291097574817449, "grad_norm": 25.850337982177734, "learning_rate": 5.961829239512402e-06, "loss": 0.5678, "step": 8314 }, { "epoch": 0.6291854262040786, "grad_norm": 3.02929425239563, "learning_rate": 5.959709325372531e-06, "loss": 0.5795, "step": 8315 }, { "epoch": 0.6292610949264121, "grad_norm": 1.7381452322006226, "learning_rate": 5.957589601625495e-06, "loss": 0.6775, "step": 8316 }, { "epoch": 0.6293367636487458, "grad_norm": 2.252568483352661, "learning_rate": 5.955470068404037e-06, "loss": 0.6707, "step": 8317 }, { "epoch": 0.6294124323710795, "grad_norm": 1.7029986381530762, "learning_rate": 5.953350725840891e-06, "loss": 0.6799, "step": 8318 }, { "epoch": 0.629488101093413, "grad_norm": 1.7706902027130127, "learning_rate": 5.9512315740687785e-06, "loss": 0.6089, "step": 8319 }, { "epoch": 0.6295637698157467, "grad_norm": 2.4172465801239014, "learning_rate": 5.949112613220405e-06, "loss": 0.8089, "step": 8320 }, { "epoch": 0.6296394385380802, "grad_norm": 2.203758955001831, "learning_rate": 5.946993843428469e-06, "loss": 0.6302, "step": 8321 }, { "epoch": 0.6297151072604139, "grad_norm": 2.0722339153289795, "learning_rate": 5.944875264825648e-06, "loss": 0.613, "step": 8322 }, { "epoch": 0.6297907759827476, "grad_norm": 2.695570707321167, "learning_rate": 5.942756877544623e-06, "loss": 0.7249, "step": 8323 }, { "epoch": 0.6298664447050811, "grad_norm": 2.075514554977417, "learning_rate": 5.940638681718052e-06, "loss": 0.5446, "step": 8324 }, { "epoch": 0.6299421134274148, "grad_norm": 1.8209716081619263, "learning_rate": 5.938520677478581e-06, "loss": 0.719, "step": 8325 }, { "epoch": 0.6300177821497484, "grad_norm": 2.407820224761963, "learning_rate": 5.936402864958848e-06, "loss": 0.7208, "step": 8326 }, { "epoch": 0.630093450872082, "grad_norm": 2.0046629905700684, "learning_rate": 5.934285244291473e-06, "loss": 0.7994, "step": 8327 }, { "epoch": 0.6301691195944157, "grad_norm": 2.1427841186523438, "learning_rate": 5.932167815609073e-06, "loss": 0.7415, "step": 8328 }, { "epoch": 0.6302447883167492, "grad_norm": 2.2254834175109863, "learning_rate": 5.930050579044249e-06, "loss": 0.6476, "step": 8329 }, { "epoch": 0.6303204570390829, "grad_norm": 2.195834159851074, "learning_rate": 5.927933534729585e-06, "loss": 0.5771, "step": 8330 }, { "epoch": 0.6303961257614166, "grad_norm": 1.9512183666229248, "learning_rate": 5.925816682797663e-06, "loss": 0.7415, "step": 8331 }, { "epoch": 0.6304717944837501, "grad_norm": 2.1546523571014404, "learning_rate": 5.9237000233810356e-06, "loss": 0.6585, "step": 8332 }, { "epoch": 0.6305474632060838, "grad_norm": 1.8281909227371216, "learning_rate": 5.9215835566122696e-06, "loss": 0.7334, "step": 8333 }, { "epoch": 0.6306231319284173, "grad_norm": 2.1117517948150635, "learning_rate": 5.919467282623896e-06, "loss": 0.5892, "step": 8334 }, { "epoch": 0.630698800650751, "grad_norm": 2.3042280673980713, "learning_rate": 5.917351201548447e-06, "loss": 0.6998, "step": 8335 }, { "epoch": 0.6307744693730847, "grad_norm": 2.325014114379883, "learning_rate": 5.9152353135184335e-06, "loss": 0.6966, "step": 8336 }, { "epoch": 0.6308501380954182, "grad_norm": 1.9471384286880493, "learning_rate": 5.913119618666361e-06, "loss": 0.6993, "step": 8337 }, { "epoch": 0.6309258068177519, "grad_norm": 2.687548875808716, "learning_rate": 5.911004117124724e-06, "loss": 0.7726, "step": 8338 }, { "epoch": 0.6310014755400855, "grad_norm": 1.9551631212234497, "learning_rate": 5.908888809026001e-06, "loss": 0.7378, "step": 8339 }, { "epoch": 0.6310771442624191, "grad_norm": 2.2991223335266113, "learning_rate": 5.9067736945026594e-06, "loss": 0.7404, "step": 8340 }, { "epoch": 0.6311528129847528, "grad_norm": 2.1046478748321533, "learning_rate": 5.904658773687153e-06, "loss": 0.7065, "step": 8341 }, { "epoch": 0.6312284817070863, "grad_norm": 2.240189790725708, "learning_rate": 5.902544046711922e-06, "loss": 0.6193, "step": 8342 }, { "epoch": 0.63130415042942, "grad_norm": 1.9708818197250366, "learning_rate": 5.9004295137094054e-06, "loss": 0.6508, "step": 8343 }, { "epoch": 0.6313798191517537, "grad_norm": 2.299236297607422, "learning_rate": 5.898315174812016e-06, "loss": 0.842, "step": 8344 }, { "epoch": 0.6314554878740872, "grad_norm": 2.2343852519989014, "learning_rate": 5.896201030152164e-06, "loss": 0.7665, "step": 8345 }, { "epoch": 0.6315311565964209, "grad_norm": 2.4583773612976074, "learning_rate": 5.894087079862241e-06, "loss": 0.6585, "step": 8346 }, { "epoch": 0.6316068253187545, "grad_norm": 2.2705533504486084, "learning_rate": 5.89197332407463e-06, "loss": 0.5592, "step": 8347 }, { "epoch": 0.6316824940410881, "grad_norm": 1.823162317276001, "learning_rate": 5.889859762921702e-06, "loss": 0.5589, "step": 8348 }, { "epoch": 0.6317581627634218, "grad_norm": 2.318509340286255, "learning_rate": 5.8877463965358175e-06, "loss": 0.6892, "step": 8349 }, { "epoch": 0.6318338314857553, "grad_norm": 2.149641275405884, "learning_rate": 5.885633225049318e-06, "loss": 0.789, "step": 8350 }, { "epoch": 0.631909500208089, "grad_norm": 2.2734084129333496, "learning_rate": 5.883520248594542e-06, "loss": 0.7964, "step": 8351 }, { "epoch": 0.6319851689304226, "grad_norm": 1.703250527381897, "learning_rate": 5.881407467303804e-06, "loss": 0.6932, "step": 8352 }, { "epoch": 0.6320608376527562, "grad_norm": 1.407254934310913, "learning_rate": 5.879294881309418e-06, "loss": 0.9285, "step": 8353 }, { "epoch": 0.6321365063750899, "grad_norm": 1.6375536918640137, "learning_rate": 5.877182490743683e-06, "loss": 0.5346, "step": 8354 }, { "epoch": 0.6322121750974234, "grad_norm": 2.1857407093048096, "learning_rate": 5.875070295738878e-06, "loss": 0.675, "step": 8355 }, { "epoch": 0.6322878438197571, "grad_norm": 2.0227956771850586, "learning_rate": 5.872958296427281e-06, "loss": 0.6196, "step": 8356 }, { "epoch": 0.6323635125420908, "grad_norm": 1.9548943042755127, "learning_rate": 5.870846492941147e-06, "loss": 0.8473, "step": 8357 }, { "epoch": 0.6324391812644243, "grad_norm": 1.8974775075912476, "learning_rate": 5.868734885412725e-06, "loss": 0.7865, "step": 8358 }, { "epoch": 0.632514849986758, "grad_norm": 1.963448405265808, "learning_rate": 5.866623473974256e-06, "loss": 0.6768, "step": 8359 }, { "epoch": 0.6325905187090916, "grad_norm": 2.001469135284424, "learning_rate": 5.864512258757957e-06, "loss": 0.7864, "step": 8360 }, { "epoch": 0.6326661874314252, "grad_norm": 1.9863218069076538, "learning_rate": 5.862401239896045e-06, "loss": 0.6782, "step": 8361 }, { "epoch": 0.6327418561537589, "grad_norm": 2.1429598331451416, "learning_rate": 5.8602904175207126e-06, "loss": 0.8312, "step": 8362 }, { "epoch": 0.6328175248760924, "grad_norm": 1.959979772567749, "learning_rate": 5.858179791764148e-06, "loss": 0.7098, "step": 8363 }, { "epoch": 0.6328931935984261, "grad_norm": 2.1421241760253906, "learning_rate": 5.856069362758528e-06, "loss": 0.7651, "step": 8364 }, { "epoch": 0.6329688623207597, "grad_norm": 2.1992077827453613, "learning_rate": 5.853959130636017e-06, "loss": 0.6911, "step": 8365 }, { "epoch": 0.6330445310430933, "grad_norm": 1.951259970664978, "learning_rate": 5.8518490955287564e-06, "loss": 0.7088, "step": 8366 }, { "epoch": 0.633120199765427, "grad_norm": 2.2392029762268066, "learning_rate": 5.849739257568891e-06, "loss": 0.6065, "step": 8367 }, { "epoch": 0.6331958684877605, "grad_norm": 1.755878210067749, "learning_rate": 5.847629616888538e-06, "loss": 0.5707, "step": 8368 }, { "epoch": 0.6332715372100942, "grad_norm": 1.9123753309249878, "learning_rate": 5.845520173619817e-06, "loss": 0.6594, "step": 8369 }, { "epoch": 0.6333472059324279, "grad_norm": 2.4851553440093994, "learning_rate": 5.843410927894827e-06, "loss": 0.7454, "step": 8370 }, { "epoch": 0.6334228746547614, "grad_norm": 2.0496127605438232, "learning_rate": 5.841301879845653e-06, "loss": 0.6757, "step": 8371 }, { "epoch": 0.6334985433770951, "grad_norm": 2.043626070022583, "learning_rate": 5.839193029604373e-06, "loss": 0.7011, "step": 8372 }, { "epoch": 0.6335742120994287, "grad_norm": 2.5057694911956787, "learning_rate": 5.837084377303045e-06, "loss": 0.6438, "step": 8373 }, { "epoch": 0.6336498808217623, "grad_norm": 1.9614980220794678, "learning_rate": 5.834975923073727e-06, "loss": 0.4584, "step": 8374 }, { "epoch": 0.633725549544096, "grad_norm": 1.787739872932434, "learning_rate": 5.832867667048453e-06, "loss": 0.6868, "step": 8375 }, { "epoch": 0.6338012182664295, "grad_norm": 2.6709229946136475, "learning_rate": 5.830759609359248e-06, "loss": 0.7381, "step": 8376 }, { "epoch": 0.6338768869887632, "grad_norm": 2.4086039066314697, "learning_rate": 5.828651750138128e-06, "loss": 0.6239, "step": 8377 }, { "epoch": 0.6339525557110968, "grad_norm": 2.210710287094116, "learning_rate": 5.82654408951709e-06, "loss": 0.7521, "step": 8378 }, { "epoch": 0.6340282244334304, "grad_norm": 2.0984058380126953, "learning_rate": 5.82443662762813e-06, "loss": 0.6862, "step": 8379 }, { "epoch": 0.6341038931557641, "grad_norm": 2.363299608230591, "learning_rate": 5.8223293646032166e-06, "loss": 0.6896, "step": 8380 }, { "epoch": 0.6341795618780977, "grad_norm": 1.9636515378952026, "learning_rate": 5.820222300574318e-06, "loss": 0.7168, "step": 8381 }, { "epoch": 0.6342552306004313, "grad_norm": 2.6832809448242188, "learning_rate": 5.8181154356733815e-06, "loss": 0.7862, "step": 8382 }, { "epoch": 0.634330899322765, "grad_norm": 1.9470926523208618, "learning_rate": 5.816008770032347e-06, "loss": 0.7394, "step": 8383 }, { "epoch": 0.6344065680450985, "grad_norm": 1.952431321144104, "learning_rate": 5.8139023037831446e-06, "loss": 0.7354, "step": 8384 }, { "epoch": 0.6344822367674322, "grad_norm": 2.5503687858581543, "learning_rate": 5.8117960370576845e-06, "loss": 0.7003, "step": 8385 }, { "epoch": 0.6345579054897658, "grad_norm": 2.3349881172180176, "learning_rate": 5.809689969987869e-06, "loss": 0.6624, "step": 8386 }, { "epoch": 0.6346335742120994, "grad_norm": 2.486293315887451, "learning_rate": 5.807584102705585e-06, "loss": 0.7769, "step": 8387 }, { "epoch": 0.6347092429344331, "grad_norm": 2.2371928691864014, "learning_rate": 5.805478435342707e-06, "loss": 0.7067, "step": 8388 }, { "epoch": 0.6347849116567666, "grad_norm": 2.085529088973999, "learning_rate": 5.803372968031108e-06, "loss": 0.7602, "step": 8389 }, { "epoch": 0.6348605803791003, "grad_norm": 2.1521055698394775, "learning_rate": 5.80126770090263e-06, "loss": 0.9044, "step": 8390 }, { "epoch": 0.6349362491014339, "grad_norm": 2.458247661590576, "learning_rate": 5.799162634089113e-06, "loss": 0.6094, "step": 8391 }, { "epoch": 0.6350119178237675, "grad_norm": 1.8370447158813477, "learning_rate": 5.7970577677223876e-06, "loss": 0.6742, "step": 8392 }, { "epoch": 0.6350875865461012, "grad_norm": 2.2332441806793213, "learning_rate": 5.794953101934262e-06, "loss": 0.6287, "step": 8393 }, { "epoch": 0.6351632552684348, "grad_norm": 2.1860263347625732, "learning_rate": 5.792848636856537e-06, "loss": 0.674, "step": 8394 }, { "epoch": 0.6352389239907684, "grad_norm": 2.5469307899475098, "learning_rate": 5.790744372621009e-06, "loss": 0.7648, "step": 8395 }, { "epoch": 0.6353145927131021, "grad_norm": 2.202962636947632, "learning_rate": 5.788640309359445e-06, "loss": 0.8781, "step": 8396 }, { "epoch": 0.6353902614354356, "grad_norm": 1.9034675359725952, "learning_rate": 5.786536447203615e-06, "loss": 0.4877, "step": 8397 }, { "epoch": 0.6354659301577693, "grad_norm": 1.8396954536437988, "learning_rate": 5.784432786285264e-06, "loss": 0.6477, "step": 8398 }, { "epoch": 0.6355415988801029, "grad_norm": 2.2514522075653076, "learning_rate": 5.78232932673613e-06, "loss": 0.6755, "step": 8399 }, { "epoch": 0.6356172676024365, "grad_norm": 2.3135526180267334, "learning_rate": 5.780226068687944e-06, "loss": 0.6906, "step": 8400 }, { "epoch": 0.6356929363247702, "grad_norm": 1.9569233655929565, "learning_rate": 5.778123012272415e-06, "loss": 0.7055, "step": 8401 }, { "epoch": 0.6357686050471038, "grad_norm": 2.095384359359741, "learning_rate": 5.776020157621244e-06, "loss": 0.8051, "step": 8402 }, { "epoch": 0.6358442737694374, "grad_norm": 2.363507032394409, "learning_rate": 5.773917504866118e-06, "loss": 0.6488, "step": 8403 }, { "epoch": 0.635919942491771, "grad_norm": 1.7671669721603394, "learning_rate": 5.77181505413871e-06, "loss": 0.6069, "step": 8404 }, { "epoch": 0.6359956112141046, "grad_norm": 2.2608208656311035, "learning_rate": 5.7697128055706865e-06, "loss": 0.7638, "step": 8405 }, { "epoch": 0.6360712799364383, "grad_norm": 2.832077741622925, "learning_rate": 5.767610759293697e-06, "loss": 0.5604, "step": 8406 }, { "epoch": 0.6361469486587719, "grad_norm": 1.9445099830627441, "learning_rate": 5.765508915439374e-06, "loss": 0.5337, "step": 8407 }, { "epoch": 0.6362226173811055, "grad_norm": 2.1563804149627686, "learning_rate": 5.763407274139347e-06, "loss": 0.7573, "step": 8408 }, { "epoch": 0.6362982861034392, "grad_norm": 2.05100417137146, "learning_rate": 5.761305835525221e-06, "loss": 0.5423, "step": 8409 }, { "epoch": 0.6363739548257727, "grad_norm": 2.729825258255005, "learning_rate": 5.7592045997286e-06, "loss": 0.6868, "step": 8410 }, { "epoch": 0.6364496235481064, "grad_norm": 2.297889232635498, "learning_rate": 5.757103566881071e-06, "loss": 0.8106, "step": 8411 }, { "epoch": 0.63652529227044, "grad_norm": 2.0106875896453857, "learning_rate": 5.755002737114204e-06, "loss": 0.6299, "step": 8412 }, { "epoch": 0.6366009609927736, "grad_norm": 2.3025240898132324, "learning_rate": 5.752902110559564e-06, "loss": 0.6633, "step": 8413 }, { "epoch": 0.6366766297151073, "grad_norm": 2.3724968433380127, "learning_rate": 5.75080168734869e-06, "loss": 0.6491, "step": 8414 }, { "epoch": 0.6367522984374409, "grad_norm": 2.080514430999756, "learning_rate": 5.748701467613128e-06, "loss": 0.5447, "step": 8415 }, { "epoch": 0.6368279671597745, "grad_norm": 1.9041091203689575, "learning_rate": 5.746601451484396e-06, "loss": 0.7418, "step": 8416 }, { "epoch": 0.6369036358821081, "grad_norm": 2.2937114238739014, "learning_rate": 5.744501639094003e-06, "loss": 0.9319, "step": 8417 }, { "epoch": 0.6369793046044417, "grad_norm": 1.9907230138778687, "learning_rate": 5.742402030573449e-06, "loss": 0.7111, "step": 8418 }, { "epoch": 0.6370549733267754, "grad_norm": 1.8847614526748657, "learning_rate": 5.74030262605421e-06, "loss": 0.6703, "step": 8419 }, { "epoch": 0.637130642049109, "grad_norm": 1.972623586654663, "learning_rate": 5.73820342566777e-06, "loss": 0.763, "step": 8420 }, { "epoch": 0.6372063107714426, "grad_norm": 2.680828332901001, "learning_rate": 5.736104429545579e-06, "loss": 0.7193, "step": 8421 }, { "epoch": 0.6372819794937763, "grad_norm": 2.13775372505188, "learning_rate": 5.7340056378190865e-06, "loss": 0.6998, "step": 8422 }, { "epoch": 0.6373576482161099, "grad_norm": 2.1243772506713867, "learning_rate": 5.731907050619723e-06, "loss": 0.6792, "step": 8423 }, { "epoch": 0.6374333169384435, "grad_norm": 1.8973451852798462, "learning_rate": 5.72980866807891e-06, "loss": 0.7644, "step": 8424 }, { "epoch": 0.6375089856607771, "grad_norm": 1.803795576095581, "learning_rate": 5.7277104903280575e-06, "loss": 0.6412, "step": 8425 }, { "epoch": 0.6375846543831107, "grad_norm": 1.9631472826004028, "learning_rate": 5.725612517498555e-06, "loss": 0.6546, "step": 8426 }, { "epoch": 0.6376603231054444, "grad_norm": 2.7881197929382324, "learning_rate": 5.723514749721792e-06, "loss": 0.7727, "step": 8427 }, { "epoch": 0.637735991827778, "grad_norm": 2.023376941680908, "learning_rate": 5.721417187129128e-06, "loss": 0.7832, "step": 8428 }, { "epoch": 0.6378116605501116, "grad_norm": 2.40487003326416, "learning_rate": 5.719319829851925e-06, "loss": 0.8232, "step": 8429 }, { "epoch": 0.6378873292724452, "grad_norm": 2.0782392024993896, "learning_rate": 5.717222678021528e-06, "loss": 0.5967, "step": 8430 }, { "epoch": 0.6379629979947788, "grad_norm": 2.3502237796783447, "learning_rate": 5.715125731769261e-06, "loss": 0.8443, "step": 8431 }, { "epoch": 0.6380386667171125, "grad_norm": 4.884004592895508, "learning_rate": 5.713028991226448e-06, "loss": 0.7055, "step": 8432 }, { "epoch": 0.6381143354394461, "grad_norm": 2.4166226387023926, "learning_rate": 5.71093245652439e-06, "loss": 0.6572, "step": 8433 }, { "epoch": 0.6381900041617797, "grad_norm": 1.7382185459136963, "learning_rate": 5.708836127794382e-06, "loss": 0.4822, "step": 8434 }, { "epoch": 0.6382656728841134, "grad_norm": 1.87117338180542, "learning_rate": 5.706740005167694e-06, "loss": 0.7563, "step": 8435 }, { "epoch": 0.638341341606447, "grad_norm": 2.3526854515075684, "learning_rate": 5.704644088775605e-06, "loss": 0.6714, "step": 8436 }, { "epoch": 0.6384170103287806, "grad_norm": 1.952702283859253, "learning_rate": 5.702548378749359e-06, "loss": 0.6943, "step": 8437 }, { "epoch": 0.6384926790511142, "grad_norm": 4.3784356117248535, "learning_rate": 5.7004528752202e-06, "loss": 0.5664, "step": 8438 }, { "epoch": 0.6385683477734478, "grad_norm": 2.005580425262451, "learning_rate": 5.698357578319353e-06, "loss": 0.4389, "step": 8439 }, { "epoch": 0.6386440164957815, "grad_norm": 2.7630388736724854, "learning_rate": 5.696262488178031e-06, "loss": 0.5916, "step": 8440 }, { "epoch": 0.6387196852181151, "grad_norm": 2.2635788917541504, "learning_rate": 5.694167604927441e-06, "loss": 0.8042, "step": 8441 }, { "epoch": 0.6387953539404487, "grad_norm": 2.6450791358947754, "learning_rate": 5.692072928698768e-06, "loss": 0.5544, "step": 8442 }, { "epoch": 0.6388710226627823, "grad_norm": 2.0859289169311523, "learning_rate": 5.689978459623186e-06, "loss": 0.798, "step": 8443 }, { "epoch": 0.638946691385116, "grad_norm": 2.267434597015381, "learning_rate": 5.6878841978318596e-06, "loss": 0.789, "step": 8444 }, { "epoch": 0.6390223601074496, "grad_norm": 2.525017738342285, "learning_rate": 5.6857901434559335e-06, "loss": 0.6433, "step": 8445 }, { "epoch": 0.6390980288297832, "grad_norm": 3.037821054458618, "learning_rate": 5.683696296626554e-06, "loss": 0.7781, "step": 8446 }, { "epoch": 0.6391736975521168, "grad_norm": 2.607813835144043, "learning_rate": 5.681602657474835e-06, "loss": 0.7136, "step": 8447 }, { "epoch": 0.6392493662744505, "grad_norm": 2.1024436950683594, "learning_rate": 5.679509226131894e-06, "loss": 0.6246, "step": 8448 }, { "epoch": 0.6393250349967841, "grad_norm": 2.2261359691619873, "learning_rate": 5.677416002728822e-06, "loss": 0.847, "step": 8449 }, { "epoch": 0.6394007037191177, "grad_norm": 2.5437755584716797, "learning_rate": 5.675322987396705e-06, "loss": 0.5989, "step": 8450 }, { "epoch": 0.6394763724414513, "grad_norm": 2.4765031337738037, "learning_rate": 5.673230180266618e-06, "loss": 0.6378, "step": 8451 }, { "epoch": 0.639552041163785, "grad_norm": 2.359419584274292, "learning_rate": 5.6711375814696184e-06, "loss": 0.7394, "step": 8452 }, { "epoch": 0.6396277098861186, "grad_norm": 1.8588393926620483, "learning_rate": 5.66904519113675e-06, "loss": 0.6876, "step": 8453 }, { "epoch": 0.6397033786084522, "grad_norm": 2.5035762786865234, "learning_rate": 5.666953009399045e-06, "loss": 0.8412, "step": 8454 }, { "epoch": 0.6397790473307858, "grad_norm": 2.0973055362701416, "learning_rate": 5.6648610363875196e-06, "loss": 0.6031, "step": 8455 }, { "epoch": 0.6398547160531194, "grad_norm": 1.9568322896957397, "learning_rate": 5.662769272233186e-06, "loss": 0.6001, "step": 8456 }, { "epoch": 0.639930384775453, "grad_norm": 1.8160796165466309, "learning_rate": 5.660677717067035e-06, "loss": 0.6849, "step": 8457 }, { "epoch": 0.6400060534977867, "grad_norm": 2.6608216762542725, "learning_rate": 5.658586371020046e-06, "loss": 0.8533, "step": 8458 }, { "epoch": 0.6400817222201203, "grad_norm": 1.9408966302871704, "learning_rate": 5.6564952342231875e-06, "loss": 0.584, "step": 8459 }, { "epoch": 0.6401573909424539, "grad_norm": 2.22806978225708, "learning_rate": 5.654404306807407e-06, "loss": 0.6623, "step": 8460 }, { "epoch": 0.6402330596647876, "grad_norm": 1.8229750394821167, "learning_rate": 5.652313588903652e-06, "loss": 0.6162, "step": 8461 }, { "epoch": 0.6403087283871212, "grad_norm": 2.114300489425659, "learning_rate": 5.650223080642849e-06, "loss": 0.6585, "step": 8462 }, { "epoch": 0.6403843971094548, "grad_norm": 2.9402008056640625, "learning_rate": 5.648132782155911e-06, "loss": 0.7531, "step": 8463 }, { "epoch": 0.6404600658317884, "grad_norm": 2.2352404594421387, "learning_rate": 5.646042693573738e-06, "loss": 0.7114, "step": 8464 }, { "epoch": 0.640535734554122, "grad_norm": 1.94172203540802, "learning_rate": 5.643952815027218e-06, "loss": 0.6461, "step": 8465 }, { "epoch": 0.6406114032764557, "grad_norm": 2.5948123931884766, "learning_rate": 5.6418631466472315e-06, "loss": 0.714, "step": 8466 }, { "epoch": 0.6406870719987893, "grad_norm": 1.9451491832733154, "learning_rate": 5.639773688564634e-06, "loss": 0.664, "step": 8467 }, { "epoch": 0.6407627407211229, "grad_norm": 2.0619945526123047, "learning_rate": 5.637684440910279e-06, "loss": 0.7656, "step": 8468 }, { "epoch": 0.6408384094434566, "grad_norm": 1.8974416255950928, "learning_rate": 5.635595403814996e-06, "loss": 0.6097, "step": 8469 }, { "epoch": 0.6409140781657902, "grad_norm": 3.0312063694000244, "learning_rate": 5.633506577409614e-06, "loss": 0.7523, "step": 8470 }, { "epoch": 0.6409897468881238, "grad_norm": 2.127302885055542, "learning_rate": 5.631417961824933e-06, "loss": 0.7443, "step": 8471 }, { "epoch": 0.6410654156104574, "grad_norm": 2.226922035217285, "learning_rate": 5.629329557191758e-06, "loss": 0.6491, "step": 8472 }, { "epoch": 0.641141084332791, "grad_norm": 1.9703574180603027, "learning_rate": 5.627241363640871e-06, "loss": 0.5922, "step": 8473 }, { "epoch": 0.6412167530551247, "grad_norm": 2.090078353881836, "learning_rate": 5.6251533813030355e-06, "loss": 0.6001, "step": 8474 }, { "epoch": 0.6412924217774583, "grad_norm": 2.1429638862609863, "learning_rate": 5.623065610309013e-06, "loss": 0.6735, "step": 8475 }, { "epoch": 0.6413680904997919, "grad_norm": 1.9501971006393433, "learning_rate": 5.6209780507895404e-06, "loss": 0.6364, "step": 8476 }, { "epoch": 0.6414437592221255, "grad_norm": 1.956324577331543, "learning_rate": 5.618890702875353e-06, "loss": 0.7588, "step": 8477 }, { "epoch": 0.6415194279444592, "grad_norm": 2.1515815258026123, "learning_rate": 5.616803566697168e-06, "loss": 0.7206, "step": 8478 }, { "epoch": 0.6415950966667928, "grad_norm": 2.6640117168426514, "learning_rate": 5.614716642385684e-06, "loss": 0.6887, "step": 8479 }, { "epoch": 0.6416707653891264, "grad_norm": 2.3514342308044434, "learning_rate": 5.612629930071594e-06, "loss": 0.5355, "step": 8480 }, { "epoch": 0.64174643411146, "grad_norm": 1.976243019104004, "learning_rate": 5.610543429885571e-06, "loss": 0.7735, "step": 8481 }, { "epoch": 0.6418221028337937, "grad_norm": 2.3452563285827637, "learning_rate": 5.608457141958285e-06, "loss": 0.6755, "step": 8482 }, { "epoch": 0.6418977715561273, "grad_norm": 2.0472850799560547, "learning_rate": 5.60637106642038e-06, "loss": 0.6521, "step": 8483 }, { "epoch": 0.6419734402784609, "grad_norm": 2.2305705547332764, "learning_rate": 5.6042852034024995e-06, "loss": 0.8029, "step": 8484 }, { "epoch": 0.6420491090007945, "grad_norm": 2.1921286582946777, "learning_rate": 5.602199553035258e-06, "loss": 0.6344, "step": 8485 }, { "epoch": 0.6421247777231281, "grad_norm": 2.1539828777313232, "learning_rate": 5.600114115449269e-06, "loss": 0.7381, "step": 8486 }, { "epoch": 0.6422004464454618, "grad_norm": 1.9343609809875488, "learning_rate": 5.598028890775135e-06, "loss": 0.5944, "step": 8487 }, { "epoch": 0.6422761151677954, "grad_norm": 1.9569308757781982, "learning_rate": 5.595943879143434e-06, "loss": 0.6096, "step": 8488 }, { "epoch": 0.642351783890129, "grad_norm": 2.5590083599090576, "learning_rate": 5.593859080684738e-06, "loss": 0.6341, "step": 8489 }, { "epoch": 0.6424274526124626, "grad_norm": 2.0084571838378906, "learning_rate": 5.591774495529602e-06, "loss": 0.5779, "step": 8490 }, { "epoch": 0.6425031213347963, "grad_norm": 2.7293450832366943, "learning_rate": 5.589690123808568e-06, "loss": 0.7232, "step": 8491 }, { "epoch": 0.6425787900571299, "grad_norm": 2.072709321975708, "learning_rate": 5.587605965652173e-06, "loss": 0.7979, "step": 8492 }, { "epoch": 0.6426544587794635, "grad_norm": 1.9151296615600586, "learning_rate": 5.585522021190928e-06, "loss": 0.7374, "step": 8493 }, { "epoch": 0.6427301275017971, "grad_norm": 3.357416868209839, "learning_rate": 5.583438290555337e-06, "loss": 0.6413, "step": 8494 }, { "epoch": 0.6428057962241308, "grad_norm": 2.125547409057617, "learning_rate": 5.581354773875893e-06, "loss": 0.6862, "step": 8495 }, { "epoch": 0.6428814649464644, "grad_norm": 2.053462266921997, "learning_rate": 5.579271471283065e-06, "loss": 0.7123, "step": 8496 }, { "epoch": 0.642957133668798, "grad_norm": 2.2528815269470215, "learning_rate": 5.577188382907326e-06, "loss": 0.6743, "step": 8497 }, { "epoch": 0.6430328023911316, "grad_norm": 2.1880528926849365, "learning_rate": 5.575105508879122e-06, "loss": 0.6552, "step": 8498 }, { "epoch": 0.6431084711134653, "grad_norm": 2.2088634967803955, "learning_rate": 5.573022849328886e-06, "loss": 0.8085, "step": 8499 }, { "epoch": 0.6431841398357989, "grad_norm": 2.240103244781494, "learning_rate": 5.570940404387046e-06, "loss": 0.6571, "step": 8500 }, { "epoch": 0.6432598085581325, "grad_norm": 2.2424118518829346, "learning_rate": 5.568858174184005e-06, "loss": 0.5845, "step": 8501 }, { "epoch": 0.6433354772804661, "grad_norm": 2.1710944175720215, "learning_rate": 5.566776158850164e-06, "loss": 0.7788, "step": 8502 }, { "epoch": 0.6434111460027997, "grad_norm": 2.4002442359924316, "learning_rate": 5.564694358515907e-06, "loss": 0.5717, "step": 8503 }, { "epoch": 0.6434868147251334, "grad_norm": 1.750193476676941, "learning_rate": 5.5626127733115976e-06, "loss": 0.8584, "step": 8504 }, { "epoch": 0.643562483447467, "grad_norm": 2.198309898376465, "learning_rate": 5.560531403367596e-06, "loss": 0.8146, "step": 8505 }, { "epoch": 0.6436381521698006, "grad_norm": 1.8954391479492188, "learning_rate": 5.55845024881424e-06, "loss": 0.6983, "step": 8506 }, { "epoch": 0.6437138208921342, "grad_norm": 2.1946661472320557, "learning_rate": 5.556369309781862e-06, "loss": 0.729, "step": 8507 }, { "epoch": 0.6437894896144679, "grad_norm": 2.274904727935791, "learning_rate": 5.5542885864007756e-06, "loss": 0.6662, "step": 8508 }, { "epoch": 0.6438651583368015, "grad_norm": 2.831035852432251, "learning_rate": 5.5522080788012845e-06, "loss": 0.8062, "step": 8509 }, { "epoch": 0.6439408270591351, "grad_norm": 2.2157704830169678, "learning_rate": 5.550127787113674e-06, "loss": 0.4898, "step": 8510 }, { "epoch": 0.6440164957814687, "grad_norm": 2.1222331523895264, "learning_rate": 5.548047711468221e-06, "loss": 0.7425, "step": 8511 }, { "epoch": 0.6440921645038024, "grad_norm": 2.3518216609954834, "learning_rate": 5.545967851995182e-06, "loss": 0.9132, "step": 8512 }, { "epoch": 0.644167833226136, "grad_norm": 2.1871204376220703, "learning_rate": 5.543888208824809e-06, "loss": 0.697, "step": 8513 }, { "epoch": 0.6442435019484696, "grad_norm": 1.9656989574432373, "learning_rate": 5.541808782087337e-06, "loss": 0.7371, "step": 8514 }, { "epoch": 0.6443191706708032, "grad_norm": 2.232603073120117, "learning_rate": 5.539729571912982e-06, "loss": 0.689, "step": 8515 }, { "epoch": 0.6443948393931368, "grad_norm": 3.9067957401275635, "learning_rate": 5.537650578431956e-06, "loss": 0.7104, "step": 8516 }, { "epoch": 0.6444705081154705, "grad_norm": 2.082522392272949, "learning_rate": 5.5355718017744444e-06, "loss": 0.7162, "step": 8517 }, { "epoch": 0.6445461768378041, "grad_norm": 2.175347328186035, "learning_rate": 5.533493242070634e-06, "loss": 0.7402, "step": 8518 }, { "epoch": 0.6446218455601377, "grad_norm": 1.8790175914764404, "learning_rate": 5.531414899450692e-06, "loss": 0.6263, "step": 8519 }, { "epoch": 0.6446975142824714, "grad_norm": 2.5705785751342773, "learning_rate": 5.529336774044764e-06, "loss": 0.694, "step": 8520 }, { "epoch": 0.644773183004805, "grad_norm": 1.955169916152954, "learning_rate": 5.527258865982995e-06, "loss": 0.8155, "step": 8521 }, { "epoch": 0.6448488517271386, "grad_norm": 1.9175649881362915, "learning_rate": 5.525181175395503e-06, "loss": 0.8298, "step": 8522 }, { "epoch": 0.6449245204494722, "grad_norm": 2.2717673778533936, "learning_rate": 5.523103702412411e-06, "loss": 0.7048, "step": 8523 }, { "epoch": 0.6450001891718058, "grad_norm": 2.508749485015869, "learning_rate": 5.521026447163807e-06, "loss": 0.7999, "step": 8524 }, { "epoch": 0.6450758578941395, "grad_norm": 1.875464677810669, "learning_rate": 5.5189494097797825e-06, "loss": 0.6481, "step": 8525 }, { "epoch": 0.6451515266164731, "grad_norm": 2.2824044227600098, "learning_rate": 5.516872590390402e-06, "loss": 0.6753, "step": 8526 }, { "epoch": 0.6452271953388067, "grad_norm": 2.4387025833129883, "learning_rate": 5.514795989125725e-06, "loss": 0.7616, "step": 8527 }, { "epoch": 0.6453028640611403, "grad_norm": 1.9747780561447144, "learning_rate": 5.512719606115799e-06, "loss": 0.7541, "step": 8528 }, { "epoch": 0.6453785327834739, "grad_norm": 2.339794158935547, "learning_rate": 5.510643441490649e-06, "loss": 0.7607, "step": 8529 }, { "epoch": 0.6454542015058076, "grad_norm": 2.1262335777282715, "learning_rate": 5.5085674953802945e-06, "loss": 0.5666, "step": 8530 }, { "epoch": 0.6455298702281412, "grad_norm": 1.9069366455078125, "learning_rate": 5.506491767914734e-06, "loss": 0.6467, "step": 8531 }, { "epoch": 0.6456055389504748, "grad_norm": 2.164801597595215, "learning_rate": 5.504416259223956e-06, "loss": 0.6073, "step": 8532 }, { "epoch": 0.6456812076728085, "grad_norm": 3.146503210067749, "learning_rate": 5.502340969437942e-06, "loss": 0.6192, "step": 8533 }, { "epoch": 0.6457568763951421, "grad_norm": 2.1276612281799316, "learning_rate": 5.5002658986866475e-06, "loss": 0.6759, "step": 8534 }, { "epoch": 0.6458325451174757, "grad_norm": 1.8618454933166504, "learning_rate": 5.498191047100023e-06, "loss": 0.5535, "step": 8535 }, { "epoch": 0.6459082138398093, "grad_norm": 1.86492121219635, "learning_rate": 5.496116414808002e-06, "loss": 0.6922, "step": 8536 }, { "epoch": 0.6459838825621429, "grad_norm": 1.8783886432647705, "learning_rate": 5.4940420019405e-06, "loss": 0.6147, "step": 8537 }, { "epoch": 0.6460595512844766, "grad_norm": 2.076680898666382, "learning_rate": 5.49196780862743e-06, "loss": 0.7457, "step": 8538 }, { "epoch": 0.6461352200068102, "grad_norm": 1.9781914949417114, "learning_rate": 5.489893834998683e-06, "loss": 0.7596, "step": 8539 }, { "epoch": 0.6462108887291438, "grad_norm": 3.7510299682617188, "learning_rate": 5.487820081184136e-06, "loss": 0.6868, "step": 8540 }, { "epoch": 0.6462865574514775, "grad_norm": 2.0768771171569824, "learning_rate": 5.485746547313658e-06, "loss": 0.606, "step": 8541 }, { "epoch": 0.646362226173811, "grad_norm": 1.8995541334152222, "learning_rate": 5.483673233517094e-06, "loss": 0.7812, "step": 8542 }, { "epoch": 0.6464378948961447, "grad_norm": 2.8474512100219727, "learning_rate": 5.4816001399242865e-06, "loss": 0.6534, "step": 8543 }, { "epoch": 0.6465135636184783, "grad_norm": 2.1161701679229736, "learning_rate": 5.479527266665059e-06, "loss": 0.6513, "step": 8544 }, { "epoch": 0.6465892323408119, "grad_norm": 1.9404470920562744, "learning_rate": 5.477454613869219e-06, "loss": 0.5536, "step": 8545 }, { "epoch": 0.6466649010631456, "grad_norm": 2.743656873703003, "learning_rate": 5.475382181666567e-06, "loss": 0.6976, "step": 8546 }, { "epoch": 0.6467405697854792, "grad_norm": 1.7919031381607056, "learning_rate": 5.473309970186882e-06, "loss": 0.6573, "step": 8547 }, { "epoch": 0.6468162385078128, "grad_norm": 2.278919219970703, "learning_rate": 5.4712379795599295e-06, "loss": 0.7562, "step": 8548 }, { "epoch": 0.6468919072301464, "grad_norm": 2.6098785400390625, "learning_rate": 5.469166209915472e-06, "loss": 0.7391, "step": 8549 }, { "epoch": 0.64696757595248, "grad_norm": 2.1722793579101562, "learning_rate": 5.4670946613832466e-06, "loss": 0.839, "step": 8550 }, { "epoch": 0.6470432446748137, "grad_norm": 2.256061553955078, "learning_rate": 5.465023334092981e-06, "loss": 0.6023, "step": 8551 }, { "epoch": 0.6471189133971473, "grad_norm": 2.3544907569885254, "learning_rate": 5.4629522281743846e-06, "loss": 0.6151, "step": 8552 }, { "epoch": 0.6471945821194809, "grad_norm": 1.9995858669281006, "learning_rate": 5.46088134375716e-06, "loss": 0.6645, "step": 8553 }, { "epoch": 0.6472702508418146, "grad_norm": 2.3130130767822266, "learning_rate": 5.4588106809709945e-06, "loss": 0.6585, "step": 8554 }, { "epoch": 0.6473459195641481, "grad_norm": 2.5768182277679443, "learning_rate": 5.456740239945559e-06, "loss": 0.8411, "step": 8555 }, { "epoch": 0.6474215882864818, "grad_norm": 2.34093976020813, "learning_rate": 5.454670020810507e-06, "loss": 0.7124, "step": 8556 }, { "epoch": 0.6474972570088154, "grad_norm": 3.165555477142334, "learning_rate": 5.452600023695488e-06, "loss": 0.6142, "step": 8557 }, { "epoch": 0.647572925731149, "grad_norm": 2.1847751140594482, "learning_rate": 5.450530248730125e-06, "loss": 0.6331, "step": 8558 }, { "epoch": 0.6476485944534827, "grad_norm": 2.339569568634033, "learning_rate": 5.448460696044041e-06, "loss": 0.6593, "step": 8559 }, { "epoch": 0.6477242631758163, "grad_norm": 2.989414691925049, "learning_rate": 5.446391365766837e-06, "loss": 0.6377, "step": 8560 }, { "epoch": 0.6477999318981499, "grad_norm": 2.329726457595825, "learning_rate": 5.444322258028096e-06, "loss": 0.5623, "step": 8561 }, { "epoch": 0.6478756006204835, "grad_norm": 2.963811159133911, "learning_rate": 5.442253372957399e-06, "loss": 0.816, "step": 8562 }, { "epoch": 0.6479512693428171, "grad_norm": 2.018897533416748, "learning_rate": 5.440184710684299e-06, "loss": 0.742, "step": 8563 }, { "epoch": 0.6480269380651508, "grad_norm": 2.518212080001831, "learning_rate": 5.438116271338347e-06, "loss": 0.8152, "step": 8564 }, { "epoch": 0.6481026067874844, "grad_norm": 1.4820868968963623, "learning_rate": 5.436048055049079e-06, "loss": 0.5836, "step": 8565 }, { "epoch": 0.648178275509818, "grad_norm": 2.1838934421539307, "learning_rate": 5.433980061946006e-06, "loss": 0.6756, "step": 8566 }, { "epoch": 0.6482539442321517, "grad_norm": 2.867804527282715, "learning_rate": 5.4319122921586354e-06, "loss": 0.6669, "step": 8567 }, { "epoch": 0.6483296129544852, "grad_norm": 2.323784112930298, "learning_rate": 5.429844745816454e-06, "loss": 0.7107, "step": 8568 }, { "epoch": 0.6484052816768189, "grad_norm": 2.1240692138671875, "learning_rate": 5.427777423048948e-06, "loss": 0.7573, "step": 8569 }, { "epoch": 0.6484809503991525, "grad_norm": 2.3311257362365723, "learning_rate": 5.425710323985571e-06, "loss": 0.6085, "step": 8570 }, { "epoch": 0.6485566191214861, "grad_norm": 2.233501672744751, "learning_rate": 5.423643448755776e-06, "loss": 0.7538, "step": 8571 }, { "epoch": 0.6486322878438198, "grad_norm": 2.3207247257232666, "learning_rate": 5.421576797488994e-06, "loss": 0.678, "step": 8572 }, { "epoch": 0.6487079565661534, "grad_norm": 2.2097394466400146, "learning_rate": 5.4195103703146445e-06, "loss": 0.7674, "step": 8573 }, { "epoch": 0.648783625288487, "grad_norm": 3.0667641162872314, "learning_rate": 5.41744416736214e-06, "loss": 0.6953, "step": 8574 }, { "epoch": 0.6488592940108207, "grad_norm": 2.093312978744507, "learning_rate": 5.4153781887608684e-06, "loss": 0.7736, "step": 8575 }, { "epoch": 0.6489349627331542, "grad_norm": 1.9957607984542847, "learning_rate": 5.41331243464021e-06, "loss": 0.8296, "step": 8576 }, { "epoch": 0.6490106314554879, "grad_norm": 1.9288572072982788, "learning_rate": 5.411246905129525e-06, "loss": 0.6244, "step": 8577 }, { "epoch": 0.6490863001778215, "grad_norm": 2.4149534702301025, "learning_rate": 5.409181600358165e-06, "loss": 0.7823, "step": 8578 }, { "epoch": 0.6491619689001551, "grad_norm": 2.701127052307129, "learning_rate": 5.407116520455471e-06, "loss": 0.661, "step": 8579 }, { "epoch": 0.6492376376224888, "grad_norm": 2.4020912647247314, "learning_rate": 5.405051665550759e-06, "loss": 0.7354, "step": 8580 }, { "epoch": 0.6493133063448223, "grad_norm": 2.4615938663482666, "learning_rate": 5.4029870357733405e-06, "loss": 0.6902, "step": 8581 }, { "epoch": 0.649388975067156, "grad_norm": 2.4611809253692627, "learning_rate": 5.400922631252509e-06, "loss": 0.5852, "step": 8582 }, { "epoch": 0.6494646437894896, "grad_norm": 2.5221340656280518, "learning_rate": 5.398858452117539e-06, "loss": 0.6872, "step": 8583 }, { "epoch": 0.6495403125118232, "grad_norm": 1.908057451248169, "learning_rate": 5.396794498497703e-06, "loss": 0.6117, "step": 8584 }, { "epoch": 0.6496159812341569, "grad_norm": 1.944278597831726, "learning_rate": 5.3947307705222515e-06, "loss": 0.6342, "step": 8585 }, { "epoch": 0.6496916499564905, "grad_norm": 1.9990040063858032, "learning_rate": 5.392667268320418e-06, "loss": 0.6878, "step": 8586 }, { "epoch": 0.6497673186788241, "grad_norm": 2.623224973678589, "learning_rate": 5.390603992021429e-06, "loss": 0.6674, "step": 8587 }, { "epoch": 0.6498429874011578, "grad_norm": 2.0191500186920166, "learning_rate": 5.38854094175449e-06, "loss": 0.6673, "step": 8588 }, { "epoch": 0.6499186561234913, "grad_norm": 2.417546510696411, "learning_rate": 5.386478117648798e-06, "loss": 0.5823, "step": 8589 }, { "epoch": 0.649994324845825, "grad_norm": 1.9548628330230713, "learning_rate": 5.384415519833536e-06, "loss": 0.7199, "step": 8590 }, { "epoch": 0.6500699935681586, "grad_norm": 2.064302444458008, "learning_rate": 5.382353148437866e-06, "loss": 0.6522, "step": 8591 }, { "epoch": 0.6501456622904922, "grad_norm": 2.792672872543335, "learning_rate": 5.380291003590946e-06, "loss": 0.5625, "step": 8592 }, { "epoch": 0.6502213310128259, "grad_norm": 2.0606002807617188, "learning_rate": 5.378229085421909e-06, "loss": 0.7184, "step": 8593 }, { "epoch": 0.6502969997351594, "grad_norm": 2.4181129932403564, "learning_rate": 5.376167394059879e-06, "loss": 0.7157, "step": 8594 }, { "epoch": 0.6503726684574931, "grad_norm": 2.101733922958374, "learning_rate": 5.374105929633969e-06, "loss": 0.6396, "step": 8595 }, { "epoch": 0.6504483371798268, "grad_norm": 2.896683931350708, "learning_rate": 5.372044692273275e-06, "loss": 0.749, "step": 8596 }, { "epoch": 0.6505240059021603, "grad_norm": 2.5679473876953125, "learning_rate": 5.369983682106875e-06, "loss": 0.6503, "step": 8597 }, { "epoch": 0.650599674624494, "grad_norm": 2.505889892578125, "learning_rate": 5.36792289926384e-06, "loss": 0.7238, "step": 8598 }, { "epoch": 0.6506753433468276, "grad_norm": 2.405747413635254, "learning_rate": 5.3658623438732165e-06, "loss": 0.7156, "step": 8599 }, { "epoch": 0.6507510120691612, "grad_norm": 2.227442741394043, "learning_rate": 5.363802016064049e-06, "loss": 0.5993, "step": 8600 }, { "epoch": 0.6508266807914949, "grad_norm": 2.0744035243988037, "learning_rate": 5.361741915965362e-06, "loss": 0.7058, "step": 8601 }, { "epoch": 0.6509023495138284, "grad_norm": 2.3363168239593506, "learning_rate": 5.359682043706162e-06, "loss": 0.5921, "step": 8602 }, { "epoch": 0.6509780182361621, "grad_norm": 2.573744058609009, "learning_rate": 5.357622399415448e-06, "loss": 0.7344, "step": 8603 }, { "epoch": 0.6510536869584957, "grad_norm": 2.494749069213867, "learning_rate": 5.355562983222197e-06, "loss": 0.7317, "step": 8604 }, { "epoch": 0.6511293556808293, "grad_norm": 2.3259880542755127, "learning_rate": 5.353503795255382e-06, "loss": 0.695, "step": 8605 }, { "epoch": 0.651205024403163, "grad_norm": 2.2108333110809326, "learning_rate": 5.3514448356439545e-06, "loss": 0.5293, "step": 8606 }, { "epoch": 0.6512806931254965, "grad_norm": 2.3737170696258545, "learning_rate": 5.3493861045168514e-06, "loss": 0.5965, "step": 8607 }, { "epoch": 0.6513563618478302, "grad_norm": 2.9041547775268555, "learning_rate": 5.347327602002999e-06, "loss": 0.7724, "step": 8608 }, { "epoch": 0.6514320305701639, "grad_norm": 2.1703732013702393, "learning_rate": 5.345269328231303e-06, "loss": 0.5928, "step": 8609 }, { "epoch": 0.6515076992924974, "grad_norm": 2.191357374191284, "learning_rate": 5.343211283330663e-06, "loss": 0.8274, "step": 8610 }, { "epoch": 0.6515833680148311, "grad_norm": 2.2903363704681396, "learning_rate": 5.341153467429962e-06, "loss": 0.6822, "step": 8611 }, { "epoch": 0.6516590367371647, "grad_norm": 2.7158641815185547, "learning_rate": 5.339095880658066e-06, "loss": 0.7942, "step": 8612 }, { "epoch": 0.6517347054594983, "grad_norm": 1.8598185777664185, "learning_rate": 5.3370385231438245e-06, "loss": 0.7329, "step": 8613 }, { "epoch": 0.651810374181832, "grad_norm": 2.0901763439178467, "learning_rate": 5.334981395016076e-06, "loss": 0.7677, "step": 8614 }, { "epoch": 0.6518860429041655, "grad_norm": 2.2997934818267822, "learning_rate": 5.33292449640365e-06, "loss": 0.6171, "step": 8615 }, { "epoch": 0.6519617116264992, "grad_norm": 2.0025293827056885, "learning_rate": 5.330867827435353e-06, "loss": 0.713, "step": 8616 }, { "epoch": 0.6520373803488329, "grad_norm": 2.187777042388916, "learning_rate": 5.328811388239981e-06, "loss": 0.5571, "step": 8617 }, { "epoch": 0.6521130490711664, "grad_norm": 2.3849644660949707, "learning_rate": 5.326755178946312e-06, "loss": 0.6938, "step": 8618 }, { "epoch": 0.6521887177935001, "grad_norm": 2.3532207012176514, "learning_rate": 5.324699199683113e-06, "loss": 0.7331, "step": 8619 }, { "epoch": 0.6522643865158336, "grad_norm": 2.0683131217956543, "learning_rate": 5.3226434505791405e-06, "loss": 0.6739, "step": 8620 }, { "epoch": 0.6523400552381673, "grad_norm": 1.9506670236587524, "learning_rate": 5.320587931763127e-06, "loss": 0.6907, "step": 8621 }, { "epoch": 0.652415723960501, "grad_norm": 1.8213778734207153, "learning_rate": 5.3185326433638e-06, "loss": 0.7844, "step": 8622 }, { "epoch": 0.6524913926828345, "grad_norm": 2.387977361679077, "learning_rate": 5.316477585509865e-06, "loss": 0.6507, "step": 8623 }, { "epoch": 0.6525670614051682, "grad_norm": 2.132040500640869, "learning_rate": 5.3144227583300185e-06, "loss": 0.6664, "step": 8624 }, { "epoch": 0.6526427301275018, "grad_norm": 2.443291187286377, "learning_rate": 5.312368161952933e-06, "loss": 0.5767, "step": 8625 }, { "epoch": 0.6527183988498354, "grad_norm": 2.27197265625, "learning_rate": 5.310313796507288e-06, "loss": 0.6735, "step": 8626 }, { "epoch": 0.6527940675721691, "grad_norm": 2.6916403770446777, "learning_rate": 5.308259662121724e-06, "loss": 0.6195, "step": 8627 }, { "epoch": 0.6528697362945026, "grad_norm": 2.7389891147613525, "learning_rate": 5.306205758924883e-06, "loss": 0.7494, "step": 8628 }, { "epoch": 0.6529454050168363, "grad_norm": 2.406222105026245, "learning_rate": 5.304152087045383e-06, "loss": 0.6614, "step": 8629 }, { "epoch": 0.65302107373917, "grad_norm": 2.205073595046997, "learning_rate": 5.3020986466118305e-06, "loss": 0.7074, "step": 8630 }, { "epoch": 0.6530967424615035, "grad_norm": 2.350299119949341, "learning_rate": 5.3000454377528256e-06, "loss": 0.8051, "step": 8631 }, { "epoch": 0.6531724111838372, "grad_norm": 2.4323620796203613, "learning_rate": 5.297992460596941e-06, "loss": 0.6793, "step": 8632 }, { "epoch": 0.6532480799061707, "grad_norm": 2.1624834537506104, "learning_rate": 5.295939715272742e-06, "loss": 0.7988, "step": 8633 }, { "epoch": 0.6533237486285044, "grad_norm": 2.935800075531006, "learning_rate": 5.293887201908778e-06, "loss": 0.7486, "step": 8634 }, { "epoch": 0.6533994173508381, "grad_norm": 2.3798375129699707, "learning_rate": 5.291834920633583e-06, "loss": 0.5782, "step": 8635 }, { "epoch": 0.6534750860731716, "grad_norm": 2.4022819995880127, "learning_rate": 5.289782871575682e-06, "loss": 0.666, "step": 8636 }, { "epoch": 0.6535507547955053, "grad_norm": 1.8358285427093506, "learning_rate": 5.287731054863575e-06, "loss": 0.7288, "step": 8637 }, { "epoch": 0.653626423517839, "grad_norm": 2.1910176277160645, "learning_rate": 5.28567947062576e-06, "loss": 0.8125, "step": 8638 }, { "epoch": 0.6537020922401725, "grad_norm": 2.170234203338623, "learning_rate": 5.283628118990708e-06, "loss": 0.6749, "step": 8639 }, { "epoch": 0.6537777609625062, "grad_norm": 2.4804930686950684, "learning_rate": 5.281577000086881e-06, "loss": 0.6696, "step": 8640 }, { "epoch": 0.6538534296848397, "grad_norm": 2.3019261360168457, "learning_rate": 5.279526114042731e-06, "loss": 0.5192, "step": 8641 }, { "epoch": 0.6539290984071734, "grad_norm": 2.1788456439971924, "learning_rate": 5.27747546098669e-06, "loss": 0.7353, "step": 8642 }, { "epoch": 0.6540047671295071, "grad_norm": 2.3936922550201416, "learning_rate": 5.2754250410471755e-06, "loss": 0.5753, "step": 8643 }, { "epoch": 0.6540804358518406, "grad_norm": 2.109896421432495, "learning_rate": 5.2733748543525925e-06, "loss": 0.572, "step": 8644 }, { "epoch": 0.6541561045741743, "grad_norm": 2.3399693965911865, "learning_rate": 5.271324901031326e-06, "loss": 0.6521, "step": 8645 }, { "epoch": 0.6542317732965078, "grad_norm": 2.2131054401397705, "learning_rate": 5.2692751812117576e-06, "loss": 0.6941, "step": 8646 }, { "epoch": 0.6543074420188415, "grad_norm": 2.646402597427368, "learning_rate": 5.267225695022244e-06, "loss": 0.802, "step": 8647 }, { "epoch": 0.6543831107411752, "grad_norm": 1.9599591493606567, "learning_rate": 5.26517644259113e-06, "loss": 0.8231, "step": 8648 }, { "epoch": 0.6544587794635087, "grad_norm": 2.2101340293884277, "learning_rate": 5.263127424046747e-06, "loss": 0.7479, "step": 8649 }, { "epoch": 0.6545344481858424, "grad_norm": 2.379575252532959, "learning_rate": 5.26107863951741e-06, "loss": 0.6335, "step": 8650 }, { "epoch": 0.654610116908176, "grad_norm": 2.6849305629730225, "learning_rate": 5.259030089131421e-06, "loss": 0.7674, "step": 8651 }, { "epoch": 0.6546857856305096, "grad_norm": 2.3194165229797363, "learning_rate": 5.256981773017071e-06, "loss": 0.7081, "step": 8652 }, { "epoch": 0.6547614543528433, "grad_norm": 5.000668048858643, "learning_rate": 5.254933691302628e-06, "loss": 0.737, "step": 8653 }, { "epoch": 0.6548371230751768, "grad_norm": 2.265462875366211, "learning_rate": 5.252885844116347e-06, "loss": 0.5589, "step": 8654 }, { "epoch": 0.6549127917975105, "grad_norm": 2.609497308731079, "learning_rate": 5.250838231586477e-06, "loss": 0.7, "step": 8655 }, { "epoch": 0.6549884605198442, "grad_norm": 2.6671085357666016, "learning_rate": 5.248790853841241e-06, "loss": 0.6093, "step": 8656 }, { "epoch": 0.6550641292421777, "grad_norm": 2.3832755088806152, "learning_rate": 5.2467437110088565e-06, "loss": 0.7668, "step": 8657 }, { "epoch": 0.6551397979645114, "grad_norm": 2.164400815963745, "learning_rate": 5.24469680321752e-06, "loss": 0.6777, "step": 8658 }, { "epoch": 0.6552154666868449, "grad_norm": 1.7403801679611206, "learning_rate": 5.242650130595418e-06, "loss": 0.6456, "step": 8659 }, { "epoch": 0.6552911354091786, "grad_norm": 2.5465121269226074, "learning_rate": 5.240603693270712e-06, "loss": 0.7281, "step": 8660 }, { "epoch": 0.6553668041315123, "grad_norm": 2.5230278968811035, "learning_rate": 5.238557491371566e-06, "loss": 0.7588, "step": 8661 }, { "epoch": 0.6554424728538458, "grad_norm": 1.767684817314148, "learning_rate": 5.236511525026118e-06, "loss": 0.824, "step": 8662 }, { "epoch": 0.6555181415761795, "grad_norm": 2.1746935844421387, "learning_rate": 5.2344657943624876e-06, "loss": 0.712, "step": 8663 }, { "epoch": 0.6555938102985132, "grad_norm": 2.5205862522125244, "learning_rate": 5.232420299508789e-06, "loss": 0.7244, "step": 8664 }, { "epoch": 0.6556694790208467, "grad_norm": 2.556107759475708, "learning_rate": 5.230375040593117e-06, "loss": 0.7721, "step": 8665 }, { "epoch": 0.6557451477431804, "grad_norm": 2.134599208831787, "learning_rate": 5.228330017743552e-06, "loss": 0.7349, "step": 8666 }, { "epoch": 0.6558208164655139, "grad_norm": 2.397552251815796, "learning_rate": 5.226285231088161e-06, "loss": 0.6472, "step": 8667 }, { "epoch": 0.6558964851878476, "grad_norm": 2.927499532699585, "learning_rate": 5.224240680754993e-06, "loss": 0.5487, "step": 8668 }, { "epoch": 0.6559721539101813, "grad_norm": 1.7893126010894775, "learning_rate": 5.222196366872091e-06, "loss": 0.6124, "step": 8669 }, { "epoch": 0.6560478226325148, "grad_norm": 1.9689534902572632, "learning_rate": 5.220152289567468e-06, "loss": 0.7077, "step": 8670 }, { "epoch": 0.6561234913548485, "grad_norm": 2.465264081954956, "learning_rate": 5.218108448969129e-06, "loss": 0.6206, "step": 8671 }, { "epoch": 0.656199160077182, "grad_norm": 2.3276827335357666, "learning_rate": 5.216064845205075e-06, "loss": 0.8604, "step": 8672 }, { "epoch": 0.6562748287995157, "grad_norm": 3.2658963203430176, "learning_rate": 5.214021478403283e-06, "loss": 0.6325, "step": 8673 }, { "epoch": 0.6563504975218494, "grad_norm": 2.2174007892608643, "learning_rate": 5.211978348691708e-06, "loss": 0.7559, "step": 8674 }, { "epoch": 0.6564261662441829, "grad_norm": 2.120908260345459, "learning_rate": 5.209935456198301e-06, "loss": 0.7938, "step": 8675 }, { "epoch": 0.6565018349665166, "grad_norm": 2.1590099334716797, "learning_rate": 5.207892801050993e-06, "loss": 0.6541, "step": 8676 }, { "epoch": 0.6565775036888503, "grad_norm": 2.1857635974884033, "learning_rate": 5.205850383377704e-06, "loss": 0.6656, "step": 8677 }, { "epoch": 0.6566531724111838, "grad_norm": 2.734827995300293, "learning_rate": 5.2038082033063365e-06, "loss": 0.7115, "step": 8678 }, { "epoch": 0.6567288411335175, "grad_norm": 1.769230604171753, "learning_rate": 5.201766260964777e-06, "loss": 0.585, "step": 8679 }, { "epoch": 0.656804509855851, "grad_norm": 1.630325198173523, "learning_rate": 5.199724556480902e-06, "loss": 0.7843, "step": 8680 }, { "epoch": 0.6568801785781847, "grad_norm": 2.335381269454956, "learning_rate": 5.19768308998256e-06, "loss": 0.5799, "step": 8681 }, { "epoch": 0.6569558473005184, "grad_norm": 2.884042739868164, "learning_rate": 5.1956418615976054e-06, "loss": 0.6448, "step": 8682 }, { "epoch": 0.6570315160228519, "grad_norm": 2.7924041748046875, "learning_rate": 5.193600871453866e-06, "loss": 0.8123, "step": 8683 }, { "epoch": 0.6571071847451856, "grad_norm": 2.314347267150879, "learning_rate": 5.191560119679147e-06, "loss": 0.7676, "step": 8684 }, { "epoch": 0.6571828534675191, "grad_norm": 2.933027744293213, "learning_rate": 5.189519606401252e-06, "loss": 0.7057, "step": 8685 }, { "epoch": 0.6572585221898528, "grad_norm": 2.0092270374298096, "learning_rate": 5.18747933174796e-06, "loss": 0.7644, "step": 8686 }, { "epoch": 0.6573341909121865, "grad_norm": 3.9684855937957764, "learning_rate": 5.18543929584705e-06, "loss": 0.6298, "step": 8687 }, { "epoch": 0.65740985963452, "grad_norm": 3.053493022918701, "learning_rate": 5.183399498826266e-06, "loss": 0.7078, "step": 8688 }, { "epoch": 0.6574855283568537, "grad_norm": 2.5908734798431396, "learning_rate": 5.18135994081335e-06, "loss": 0.6395, "step": 8689 }, { "epoch": 0.6575611970791874, "grad_norm": 2.060353994369507, "learning_rate": 5.179320621936025e-06, "loss": 0.5517, "step": 8690 }, { "epoch": 0.6576368658015209, "grad_norm": 2.310406446456909, "learning_rate": 5.177281542322e-06, "loss": 0.7194, "step": 8691 }, { "epoch": 0.6577125345238546, "grad_norm": 2.261384963989258, "learning_rate": 5.175242702098969e-06, "loss": 0.5707, "step": 8692 }, { "epoch": 0.6577882032461881, "grad_norm": 2.6075448989868164, "learning_rate": 5.173204101394612e-06, "loss": 0.6743, "step": 8693 }, { "epoch": 0.6578638719685218, "grad_norm": 2.4900269508361816, "learning_rate": 5.1711657403365935e-06, "loss": 0.7795, "step": 8694 }, { "epoch": 0.6579395406908555, "grad_norm": 3.0863840579986572, "learning_rate": 5.169127619052558e-06, "loss": 0.6771, "step": 8695 }, { "epoch": 0.658015209413189, "grad_norm": 2.687215566635132, "learning_rate": 5.167089737670137e-06, "loss": 0.7066, "step": 8696 }, { "epoch": 0.6580908781355227, "grad_norm": 2.019657850265503, "learning_rate": 5.16505209631696e-06, "loss": 0.7545, "step": 8697 }, { "epoch": 0.6581665468578564, "grad_norm": 2.119903564453125, "learning_rate": 5.163014695120623e-06, "loss": 0.8052, "step": 8698 }, { "epoch": 0.6582422155801899, "grad_norm": 8.88284683227539, "learning_rate": 5.160977534208716e-06, "loss": 0.8043, "step": 8699 }, { "epoch": 0.6583178843025236, "grad_norm": 1.7967544794082642, "learning_rate": 5.158940613708812e-06, "loss": 0.7364, "step": 8700 }, { "epoch": 0.6583935530248571, "grad_norm": 2.3100337982177734, "learning_rate": 5.15690393374847e-06, "loss": 0.6598, "step": 8701 }, { "epoch": 0.6584692217471908, "grad_norm": 2.662794589996338, "learning_rate": 5.154867494455234e-06, "loss": 0.5787, "step": 8702 }, { "epoch": 0.6585448904695245, "grad_norm": 2.560645341873169, "learning_rate": 5.152831295956632e-06, "loss": 0.6013, "step": 8703 }, { "epoch": 0.658620559191858, "grad_norm": 2.0240793228149414, "learning_rate": 5.150795338380178e-06, "loss": 0.7068, "step": 8704 }, { "epoch": 0.6586962279141917, "grad_norm": 2.2594637870788574, "learning_rate": 5.1487596218533735e-06, "loss": 0.656, "step": 8705 }, { "epoch": 0.6587718966365252, "grad_norm": 3.0724432468414307, "learning_rate": 5.146724146503693e-06, "loss": 0.8438, "step": 8706 }, { "epoch": 0.6588475653588589, "grad_norm": 2.630356788635254, "learning_rate": 5.144688912458607e-06, "loss": 0.5843, "step": 8707 }, { "epoch": 0.6589232340811926, "grad_norm": 2.645566701889038, "learning_rate": 5.142653919845578e-06, "loss": 0.6826, "step": 8708 }, { "epoch": 0.6589989028035261, "grad_norm": 2.8515145778656006, "learning_rate": 5.140619168792033e-06, "loss": 0.6833, "step": 8709 }, { "epoch": 0.6590745715258598, "grad_norm": 3.5823323726654053, "learning_rate": 5.138584659425398e-06, "loss": 0.679, "step": 8710 }, { "epoch": 0.6591502402481935, "grad_norm": 2.3689839839935303, "learning_rate": 5.136550391873082e-06, "loss": 0.6783, "step": 8711 }, { "epoch": 0.659225908970527, "grad_norm": 2.3048954010009766, "learning_rate": 5.134516366262475e-06, "loss": 0.8017, "step": 8712 }, { "epoch": 0.6593015776928607, "grad_norm": 3.267667531967163, "learning_rate": 5.1324825827209564e-06, "loss": 0.7462, "step": 8713 }, { "epoch": 0.6593772464151942, "grad_norm": 2.637197732925415, "learning_rate": 5.130449041375887e-06, "loss": 0.487, "step": 8714 }, { "epoch": 0.6594529151375279, "grad_norm": 2.3977911472320557, "learning_rate": 5.128415742354615e-06, "loss": 0.6639, "step": 8715 }, { "epoch": 0.6595285838598616, "grad_norm": 2.283331871032715, "learning_rate": 5.126382685784475e-06, "loss": 0.7063, "step": 8716 }, { "epoch": 0.6596042525821951, "grad_norm": 2.826462507247925, "learning_rate": 5.124349871792772e-06, "loss": 0.6383, "step": 8717 }, { "epoch": 0.6596799213045288, "grad_norm": 3.0163345336914062, "learning_rate": 5.122317300506819e-06, "loss": 0.6715, "step": 8718 }, { "epoch": 0.6597555900268623, "grad_norm": 2.01212215423584, "learning_rate": 5.1202849720539035e-06, "loss": 0.698, "step": 8719 }, { "epoch": 0.659831258749196, "grad_norm": 2.5961616039276123, "learning_rate": 5.118252886561287e-06, "loss": 0.6204, "step": 8720 }, { "epoch": 0.6599069274715297, "grad_norm": 2.244570255279541, "learning_rate": 5.11622104415623e-06, "loss": 0.6391, "step": 8721 }, { "epoch": 0.6599825961938632, "grad_norm": 2.480987787246704, "learning_rate": 5.114189444965974e-06, "loss": 0.869, "step": 8722 }, { "epoch": 0.6600582649161969, "grad_norm": 2.770308256149292, "learning_rate": 5.112158089117742e-06, "loss": 0.7924, "step": 8723 }, { "epoch": 0.6601339336385306, "grad_norm": 2.041288375854492, "learning_rate": 5.110126976738745e-06, "loss": 0.715, "step": 8724 }, { "epoch": 0.6602096023608641, "grad_norm": 2.9713993072509766, "learning_rate": 5.108096107956178e-06, "loss": 0.6904, "step": 8725 }, { "epoch": 0.6602852710831978, "grad_norm": 2.7926697731018066, "learning_rate": 5.106065482897225e-06, "loss": 0.6128, "step": 8726 }, { "epoch": 0.6603609398055313, "grad_norm": 2.216324806213379, "learning_rate": 5.104035101689038e-06, "loss": 0.7352, "step": 8727 }, { "epoch": 0.660436608527865, "grad_norm": 2.27839732170105, "learning_rate": 5.1020049644587795e-06, "loss": 0.6477, "step": 8728 }, { "epoch": 0.6605122772501987, "grad_norm": 1.9538133144378662, "learning_rate": 5.0999750713335745e-06, "loss": 0.6509, "step": 8729 }, { "epoch": 0.6605879459725322, "grad_norm": 2.1575965881347656, "learning_rate": 5.097945422440551e-06, "loss": 0.7347, "step": 8730 }, { "epoch": 0.6606636146948659, "grad_norm": 2.204331398010254, "learning_rate": 5.095916017906802e-06, "loss": 0.7395, "step": 8731 }, { "epoch": 0.6607392834171995, "grad_norm": 3.217972993850708, "learning_rate": 5.093886857859415e-06, "loss": 0.5874, "step": 8732 }, { "epoch": 0.6608149521395331, "grad_norm": 2.131350040435791, "learning_rate": 5.0918579424254736e-06, "loss": 0.7008, "step": 8733 }, { "epoch": 0.6608906208618668, "grad_norm": 2.436474323272705, "learning_rate": 5.089829271732025e-06, "loss": 0.6264, "step": 8734 }, { "epoch": 0.6609662895842003, "grad_norm": 1.924094796180725, "learning_rate": 5.087800845906116e-06, "loss": 0.5002, "step": 8735 }, { "epoch": 0.661041958306534, "grad_norm": 2.152076005935669, "learning_rate": 5.085772665074771e-06, "loss": 0.7399, "step": 8736 }, { "epoch": 0.6611176270288677, "grad_norm": 2.1113076210021973, "learning_rate": 5.083744729365001e-06, "loss": 0.7238, "step": 8737 }, { "epoch": 0.6611932957512012, "grad_norm": 3.3278396129608154, "learning_rate": 5.081717038903803e-06, "loss": 0.664, "step": 8738 }, { "epoch": 0.6612689644735349, "grad_norm": 2.1845004558563232, "learning_rate": 5.079689593818156e-06, "loss": 0.6055, "step": 8739 }, { "epoch": 0.6613446331958684, "grad_norm": 2.576305627822876, "learning_rate": 5.0776623942350324e-06, "loss": 0.6543, "step": 8740 }, { "epoch": 0.6614203019182021, "grad_norm": 2.260627269744873, "learning_rate": 5.075635440281372e-06, "loss": 0.7901, "step": 8741 }, { "epoch": 0.6614959706405358, "grad_norm": 2.078648805618286, "learning_rate": 5.073608732084113e-06, "loss": 0.6235, "step": 8742 }, { "epoch": 0.6615716393628693, "grad_norm": 4.44390869140625, "learning_rate": 5.0715822697701704e-06, "loss": 0.7602, "step": 8743 }, { "epoch": 0.661647308085203, "grad_norm": 2.401336193084717, "learning_rate": 5.06955605346646e-06, "loss": 0.5624, "step": 8744 }, { "epoch": 0.6617229768075366, "grad_norm": 2.3435049057006836, "learning_rate": 5.067530083299858e-06, "loss": 0.6278, "step": 8745 }, { "epoch": 0.6617986455298702, "grad_norm": 3.2491648197174072, "learning_rate": 5.065504359397241e-06, "loss": 0.6314, "step": 8746 }, { "epoch": 0.6618743142522039, "grad_norm": 1.942625641822815, "learning_rate": 5.063478881885468e-06, "loss": 0.8968, "step": 8747 }, { "epoch": 0.6619499829745374, "grad_norm": 3.2278025150299072, "learning_rate": 5.0614536508913785e-06, "loss": 0.6857, "step": 8748 }, { "epoch": 0.6620256516968711, "grad_norm": 2.8973255157470703, "learning_rate": 5.059428666541801e-06, "loss": 0.7619, "step": 8749 }, { "epoch": 0.6621013204192048, "grad_norm": 1.850770354270935, "learning_rate": 5.057403928963545e-06, "loss": 0.591, "step": 8750 }, { "epoch": 0.6621769891415383, "grad_norm": 1.6158236265182495, "learning_rate": 5.055379438283411e-06, "loss": 0.5541, "step": 8751 }, { "epoch": 0.662252657863872, "grad_norm": 2.043518304824829, "learning_rate": 5.053355194628172e-06, "loss": 0.7462, "step": 8752 }, { "epoch": 0.6623283265862056, "grad_norm": 1.9455914497375488, "learning_rate": 5.05133119812459e-06, "loss": 0.7079, "step": 8753 }, { "epoch": 0.6624039953085392, "grad_norm": 2.781599283218384, "learning_rate": 5.0493074488994296e-06, "loss": 0.628, "step": 8754 }, { "epoch": 0.6624796640308729, "grad_norm": 1.801193356513977, "learning_rate": 5.04728394707941e-06, "loss": 0.6794, "step": 8755 }, { "epoch": 0.6625553327532064, "grad_norm": 2.097200632095337, "learning_rate": 5.045260692791256e-06, "loss": 0.6512, "step": 8756 }, { "epoch": 0.6626310014755401, "grad_norm": 2.374746322631836, "learning_rate": 5.0432376861616655e-06, "loss": 0.7123, "step": 8757 }, { "epoch": 0.6627066701978737, "grad_norm": 2.2179994583129883, "learning_rate": 5.0412149273173305e-06, "loss": 0.5607, "step": 8758 }, { "epoch": 0.6627823389202073, "grad_norm": 1.721039056777954, "learning_rate": 5.039192416384922e-06, "loss": 0.7004, "step": 8759 }, { "epoch": 0.662858007642541, "grad_norm": 2.0622527599334717, "learning_rate": 5.037170153491093e-06, "loss": 0.5236, "step": 8760 }, { "epoch": 0.6629336763648745, "grad_norm": 2.1661341190338135, "learning_rate": 5.035148138762487e-06, "loss": 0.6125, "step": 8761 }, { "epoch": 0.6630093450872082, "grad_norm": 2.070807695388794, "learning_rate": 5.033126372325733e-06, "loss": 0.7534, "step": 8762 }, { "epoch": 0.6630850138095419, "grad_norm": 1.9332561492919922, "learning_rate": 5.031104854307428e-06, "loss": 0.6172, "step": 8763 }, { "epoch": 0.6631606825318754, "grad_norm": 2.211865186691284, "learning_rate": 5.029083584834179e-06, "loss": 0.8465, "step": 8764 }, { "epoch": 0.6632363512542091, "grad_norm": 6.03785514831543, "learning_rate": 5.027062564032561e-06, "loss": 0.6893, "step": 8765 }, { "epoch": 0.6633120199765427, "grad_norm": 1.8481940031051636, "learning_rate": 5.025041792029133e-06, "loss": 0.722, "step": 8766 }, { "epoch": 0.6633876886988763, "grad_norm": 1.6104676723480225, "learning_rate": 5.023021268950444e-06, "loss": 0.6848, "step": 8767 }, { "epoch": 0.66346335742121, "grad_norm": 2.4193434715270996, "learning_rate": 5.021000994923026e-06, "loss": 0.7977, "step": 8768 }, { "epoch": 0.6635390261435435, "grad_norm": 2.3450634479522705, "learning_rate": 5.018980970073395e-06, "loss": 0.6101, "step": 8769 }, { "epoch": 0.6636146948658772, "grad_norm": 1.899941325187683, "learning_rate": 5.016961194528053e-06, "loss": 0.5525, "step": 8770 }, { "epoch": 0.6636903635882108, "grad_norm": 1.9693715572357178, "learning_rate": 5.014941668413483e-06, "loss": 0.8298, "step": 8771 }, { "epoch": 0.6637660323105444, "grad_norm": 2.609485626220703, "learning_rate": 5.012922391856156e-06, "loss": 0.6256, "step": 8772 }, { "epoch": 0.6638417010328781, "grad_norm": 2.0286991596221924, "learning_rate": 5.010903364982523e-06, "loss": 0.8595, "step": 8773 }, { "epoch": 0.6639173697552117, "grad_norm": 1.6877880096435547, "learning_rate": 5.008884587919025e-06, "loss": 0.6734, "step": 8774 }, { "epoch": 0.6639930384775453, "grad_norm": 2.174236297607422, "learning_rate": 5.006866060792081e-06, "loss": 0.6317, "step": 8775 }, { "epoch": 0.664068707199879, "grad_norm": 1.8174835443496704, "learning_rate": 5.004847783728106e-06, "loss": 0.6284, "step": 8776 }, { "epoch": 0.6641443759222125, "grad_norm": 2.0437092781066895, "learning_rate": 5.002829756853479e-06, "loss": 0.7169, "step": 8777 }, { "epoch": 0.6642200446445462, "grad_norm": 2.2291433811187744, "learning_rate": 5.000811980294578e-06, "loss": 0.6002, "step": 8778 }, { "epoch": 0.6642957133668798, "grad_norm": 2.1589958667755127, "learning_rate": 4.998794454177773e-06, "loss": 0.7183, "step": 8779 }, { "epoch": 0.6643713820892134, "grad_norm": 1.941250205039978, "learning_rate": 4.996777178629397e-06, "loss": 0.6259, "step": 8780 }, { "epoch": 0.6644470508115471, "grad_norm": 1.7236140966415405, "learning_rate": 4.994760153775782e-06, "loss": 0.6895, "step": 8781 }, { "epoch": 0.6645227195338806, "grad_norm": 2.2558975219726562, "learning_rate": 4.992743379743242e-06, "loss": 0.6669, "step": 8782 }, { "epoch": 0.6645983882562143, "grad_norm": 1.9559904336929321, "learning_rate": 4.990726856658075e-06, "loss": 0.6926, "step": 8783 }, { "epoch": 0.6646740569785479, "grad_norm": 2.2078473567962646, "learning_rate": 4.988710584646552e-06, "loss": 0.7465, "step": 8784 }, { "epoch": 0.6647497257008815, "grad_norm": 2.152083396911621, "learning_rate": 4.986694563834951e-06, "loss": 0.7085, "step": 8785 }, { "epoch": 0.6648253944232152, "grad_norm": 2.4791300296783447, "learning_rate": 4.98467879434952e-06, "loss": 0.6156, "step": 8786 }, { "epoch": 0.6649010631455488, "grad_norm": 2.4543418884277344, "learning_rate": 4.982663276316487e-06, "loss": 0.6784, "step": 8787 }, { "epoch": 0.6649767318678824, "grad_norm": 9.999984741210938, "learning_rate": 4.980648009862073e-06, "loss": 0.6158, "step": 8788 }, { "epoch": 0.6650524005902161, "grad_norm": 2.0445075035095215, "learning_rate": 4.978632995112476e-06, "loss": 0.765, "step": 8789 }, { "epoch": 0.6651280693125496, "grad_norm": 2.165191411972046, "learning_rate": 4.976618232193895e-06, "loss": 0.6143, "step": 8790 }, { "epoch": 0.6652037380348833, "grad_norm": 1.9150837659835815, "learning_rate": 4.974603721232492e-06, "loss": 0.5768, "step": 8791 }, { "epoch": 0.6652794067572169, "grad_norm": 2.5317835807800293, "learning_rate": 4.972589462354423e-06, "loss": 0.7633, "step": 8792 }, { "epoch": 0.6653550754795505, "grad_norm": 2.1504557132720947, "learning_rate": 4.970575455685826e-06, "loss": 0.6546, "step": 8793 }, { "epoch": 0.6654307442018842, "grad_norm": 1.821834683418274, "learning_rate": 4.968561701352829e-06, "loss": 0.7508, "step": 8794 }, { "epoch": 0.6655064129242177, "grad_norm": 2.307339668273926, "learning_rate": 4.966548199481536e-06, "loss": 0.5994, "step": 8795 }, { "epoch": 0.6655820816465514, "grad_norm": 2.067732334136963, "learning_rate": 4.964534950198041e-06, "loss": 0.6737, "step": 8796 }, { "epoch": 0.665657750368885, "grad_norm": 2.2294864654541016, "learning_rate": 4.962521953628425e-06, "loss": 0.6829, "step": 8797 }, { "epoch": 0.6657334190912186, "grad_norm": 2.271959066390991, "learning_rate": 4.960509209898737e-06, "loss": 0.6612, "step": 8798 }, { "epoch": 0.6658090878135523, "grad_norm": 2.435523271560669, "learning_rate": 4.958496719135024e-06, "loss": 0.7108, "step": 8799 }, { "epoch": 0.6658847565358859, "grad_norm": 2.1555373668670654, "learning_rate": 4.956484481463328e-06, "loss": 0.741, "step": 8800 }, { "epoch": 0.6659604252582195, "grad_norm": 2.427854061126709, "learning_rate": 4.954472497009647e-06, "loss": 0.9206, "step": 8801 }, { "epoch": 0.6660360939805532, "grad_norm": 2.376939296722412, "learning_rate": 4.952460765899982e-06, "loss": 0.677, "step": 8802 }, { "epoch": 0.6661117627028867, "grad_norm": 2.0379579067230225, "learning_rate": 4.950449288260316e-06, "loss": 0.6682, "step": 8803 }, { "epoch": 0.6661874314252204, "grad_norm": 1.950374722480774, "learning_rate": 4.948438064216615e-06, "loss": 0.5976, "step": 8804 }, { "epoch": 0.666263100147554, "grad_norm": 1.9691082239151, "learning_rate": 4.946427093894825e-06, "loss": 0.6487, "step": 8805 }, { "epoch": 0.6663387688698876, "grad_norm": 2.2391226291656494, "learning_rate": 4.944416377420881e-06, "loss": 0.7114, "step": 8806 }, { "epoch": 0.6664144375922213, "grad_norm": 1.9541027545928955, "learning_rate": 4.942405914920701e-06, "loss": 0.5922, "step": 8807 }, { "epoch": 0.6664901063145549, "grad_norm": 2.247073173522949, "learning_rate": 4.94039570652019e-06, "loss": 0.7678, "step": 8808 }, { "epoch": 0.6665657750368885, "grad_norm": 2.149578094482422, "learning_rate": 4.938385752345224e-06, "loss": 0.7262, "step": 8809 }, { "epoch": 0.6666414437592221, "grad_norm": 2.3252670764923096, "learning_rate": 4.936376052521682e-06, "loss": 0.6556, "step": 8810 }, { "epoch": 0.6667171124815557, "grad_norm": 1.9192390441894531, "learning_rate": 4.934366607175419e-06, "loss": 0.6206, "step": 8811 }, { "epoch": 0.6667927812038894, "grad_norm": 2.2456088066101074, "learning_rate": 4.932357416432264e-06, "loss": 0.6447, "step": 8812 }, { "epoch": 0.666868449926223, "grad_norm": 2.371447801589966, "learning_rate": 4.930348480418045e-06, "loss": 0.6276, "step": 8813 }, { "epoch": 0.6669441186485566, "grad_norm": 2.4890329837799072, "learning_rate": 4.928339799258567e-06, "loss": 0.5793, "step": 8814 }, { "epoch": 0.6670197873708903, "grad_norm": 2.112485885620117, "learning_rate": 4.926331373079619e-06, "loss": 0.6851, "step": 8815 }, { "epoch": 0.6670954560932238, "grad_norm": 2.055906295776367, "learning_rate": 4.9243232020069775e-06, "loss": 0.4433, "step": 8816 }, { "epoch": 0.6671711248155575, "grad_norm": 2.641813278198242, "learning_rate": 4.9223152861664e-06, "loss": 0.7469, "step": 8817 }, { "epoch": 0.6672467935378911, "grad_norm": 2.035452127456665, "learning_rate": 4.920307625683626e-06, "loss": 0.5807, "step": 8818 }, { "epoch": 0.6673224622602247, "grad_norm": 2.3828279972076416, "learning_rate": 4.9183002206843894e-06, "loss": 0.8857, "step": 8819 }, { "epoch": 0.6673981309825584, "grad_norm": 2.7702672481536865, "learning_rate": 4.916293071294386e-06, "loss": 0.5946, "step": 8820 }, { "epoch": 0.667473799704892, "grad_norm": 2.4469079971313477, "learning_rate": 4.914286177639324e-06, "loss": 0.6725, "step": 8821 }, { "epoch": 0.6675494684272256, "grad_norm": 2.4644572734832764, "learning_rate": 4.912279539844879e-06, "loss": 0.7755, "step": 8822 }, { "epoch": 0.6676251371495592, "grad_norm": 2.4523849487304688, "learning_rate": 4.9102731580367075e-06, "loss": 0.7279, "step": 8823 }, { "epoch": 0.6677008058718928, "grad_norm": 2.323460340499878, "learning_rate": 4.908267032340458e-06, "loss": 0.594, "step": 8824 }, { "epoch": 0.6677764745942265, "grad_norm": 2.83245849609375, "learning_rate": 4.906261162881761e-06, "loss": 0.7527, "step": 8825 }, { "epoch": 0.6678521433165601, "grad_norm": 3.085604429244995, "learning_rate": 4.9042555497862314e-06, "loss": 0.659, "step": 8826 }, { "epoch": 0.6679278120388937, "grad_norm": 2.575090169906616, "learning_rate": 4.902250193179466e-06, "loss": 0.6034, "step": 8827 }, { "epoch": 0.6680034807612274, "grad_norm": 2.862489700317383, "learning_rate": 4.900245093187049e-06, "loss": 0.7112, "step": 8828 }, { "epoch": 0.668079149483561, "grad_norm": 2.3818016052246094, "learning_rate": 4.898240249934546e-06, "loss": 0.7612, "step": 8829 }, { "epoch": 0.6681548182058946, "grad_norm": 2.6290574073791504, "learning_rate": 4.896235663547498e-06, "loss": 0.6276, "step": 8830 }, { "epoch": 0.6682304869282282, "grad_norm": 1.9389046430587769, "learning_rate": 4.89423133415145e-06, "loss": 0.5628, "step": 8831 }, { "epoch": 0.6683061556505618, "grad_norm": 2.1501002311706543, "learning_rate": 4.8922272618719154e-06, "loss": 0.776, "step": 8832 }, { "epoch": 0.6683818243728955, "grad_norm": 1.9380501508712769, "learning_rate": 4.8902234468344e-06, "loss": 0.5632, "step": 8833 }, { "epoch": 0.6684574930952291, "grad_norm": 1.943785309791565, "learning_rate": 4.888219889164381e-06, "loss": 0.6471, "step": 8834 }, { "epoch": 0.6685331618175627, "grad_norm": 2.238030433654785, "learning_rate": 4.886216588987328e-06, "loss": 0.5461, "step": 8835 }, { "epoch": 0.6686088305398963, "grad_norm": 2.7209441661834717, "learning_rate": 4.884213546428706e-06, "loss": 0.6858, "step": 8836 }, { "epoch": 0.66868449926223, "grad_norm": 1.9361830949783325, "learning_rate": 4.882210761613938e-06, "loss": 0.627, "step": 8837 }, { "epoch": 0.6687601679845636, "grad_norm": 2.493215799331665, "learning_rate": 4.880208234668452e-06, "loss": 0.6585, "step": 8838 }, { "epoch": 0.6688358367068972, "grad_norm": 2.2090611457824707, "learning_rate": 4.878205965717652e-06, "loss": 0.7604, "step": 8839 }, { "epoch": 0.6689115054292308, "grad_norm": 1.8827425241470337, "learning_rate": 4.8762039548869245e-06, "loss": 0.6121, "step": 8840 }, { "epoch": 0.6689871741515645, "grad_norm": 1.9385136365890503, "learning_rate": 4.8742022023016445e-06, "loss": 0.7205, "step": 8841 }, { "epoch": 0.6690628428738981, "grad_norm": 2.260593891143799, "learning_rate": 4.8722007080871675e-06, "loss": 0.626, "step": 8842 }, { "epoch": 0.6691385115962317, "grad_norm": 1.950851321220398, "learning_rate": 4.870199472368835e-06, "loss": 0.665, "step": 8843 }, { "epoch": 0.6692141803185653, "grad_norm": 3.029724597930908, "learning_rate": 4.868198495271966e-06, "loss": 0.7195, "step": 8844 }, { "epoch": 0.6692898490408989, "grad_norm": 2.3940155506134033, "learning_rate": 4.866197776921867e-06, "loss": 0.5533, "step": 8845 }, { "epoch": 0.6693655177632326, "grad_norm": 1.612696647644043, "learning_rate": 4.864197317443839e-06, "loss": 0.7753, "step": 8846 }, { "epoch": 0.6694411864855662, "grad_norm": 1.8332983255386353, "learning_rate": 4.8621971169631535e-06, "loss": 0.6191, "step": 8847 }, { "epoch": 0.6695168552078998, "grad_norm": 1.9380171298980713, "learning_rate": 4.8601971756050645e-06, "loss": 0.6095, "step": 8848 }, { "epoch": 0.6695925239302334, "grad_norm": 2.1490824222564697, "learning_rate": 4.858197493494819e-06, "loss": 0.7483, "step": 8849 }, { "epoch": 0.669668192652567, "grad_norm": 2.265101909637451, "learning_rate": 4.8561980707576415e-06, "loss": 0.6927, "step": 8850 }, { "epoch": 0.6697438613749007, "grad_norm": 1.6495442390441895, "learning_rate": 4.8541989075187446e-06, "loss": 0.5957, "step": 8851 }, { "epoch": 0.6698195300972343, "grad_norm": 2.1201844215393066, "learning_rate": 4.852200003903321e-06, "loss": 0.662, "step": 8852 }, { "epoch": 0.6698951988195679, "grad_norm": 2.0578794479370117, "learning_rate": 4.850201360036548e-06, "loss": 0.7122, "step": 8853 }, { "epoch": 0.6699708675419016, "grad_norm": 2.0692341327667236, "learning_rate": 4.848202976043593e-06, "loss": 0.7108, "step": 8854 }, { "epoch": 0.6700465362642352, "grad_norm": 2.5348756313323975, "learning_rate": 4.846204852049588e-06, "loss": 0.6537, "step": 8855 }, { "epoch": 0.6701222049865688, "grad_norm": 2.185525417327881, "learning_rate": 4.844206988179674e-06, "loss": 0.6937, "step": 8856 }, { "epoch": 0.6701978737089024, "grad_norm": 2.0344419479370117, "learning_rate": 4.842209384558962e-06, "loss": 0.535, "step": 8857 }, { "epoch": 0.670273542431236, "grad_norm": 2.430760145187378, "learning_rate": 4.840212041312545e-06, "loss": 0.8704, "step": 8858 }, { "epoch": 0.6703492111535697, "grad_norm": 2.276468276977539, "learning_rate": 4.838214958565503e-06, "loss": 0.8045, "step": 8859 }, { "epoch": 0.6704248798759033, "grad_norm": 2.0148627758026123, "learning_rate": 4.836218136442902e-06, "loss": 0.6484, "step": 8860 }, { "epoch": 0.6705005485982369, "grad_norm": 1.9523584842681885, "learning_rate": 4.834221575069788e-06, "loss": 0.7177, "step": 8861 }, { "epoch": 0.6705762173205705, "grad_norm": 1.8609076738357544, "learning_rate": 4.8322252745711925e-06, "loss": 0.8153, "step": 8862 }, { "epoch": 0.6706518860429042, "grad_norm": 2.0499250888824463, "learning_rate": 4.83022923507213e-06, "loss": 0.7258, "step": 8863 }, { "epoch": 0.6707275547652378, "grad_norm": 2.329328775405884, "learning_rate": 4.8282334566976e-06, "loss": 0.7621, "step": 8864 }, { "epoch": 0.6708032234875714, "grad_norm": 2.1345584392547607, "learning_rate": 4.8262379395725885e-06, "loss": 0.564, "step": 8865 }, { "epoch": 0.670878892209905, "grad_norm": 1.9283918142318726, "learning_rate": 4.82424268382205e-06, "loss": 0.6522, "step": 8866 }, { "epoch": 0.6709545609322387, "grad_norm": 2.036198377609253, "learning_rate": 4.822247689570943e-06, "loss": 0.7945, "step": 8867 }, { "epoch": 0.6710302296545723, "grad_norm": 2.039332389831543, "learning_rate": 4.8202529569442015e-06, "loss": 0.6903, "step": 8868 }, { "epoch": 0.6711058983769059, "grad_norm": 2.211557149887085, "learning_rate": 4.818258486066736e-06, "loss": 0.6866, "step": 8869 }, { "epoch": 0.6711815670992395, "grad_norm": 3.0144009590148926, "learning_rate": 4.816264277063449e-06, "loss": 0.6603, "step": 8870 }, { "epoch": 0.6712572358215732, "grad_norm": 2.047494649887085, "learning_rate": 4.814270330059226e-06, "loss": 0.6691, "step": 8871 }, { "epoch": 0.6713329045439068, "grad_norm": 2.4948368072509766, "learning_rate": 4.812276645178932e-06, "loss": 0.6757, "step": 8872 }, { "epoch": 0.6714085732662404, "grad_norm": 2.7353858947753906, "learning_rate": 4.8102832225474194e-06, "loss": 0.6022, "step": 8873 }, { "epoch": 0.671484241988574, "grad_norm": 2.2280282974243164, "learning_rate": 4.8082900622895226e-06, "loss": 0.585, "step": 8874 }, { "epoch": 0.6715599107109076, "grad_norm": 1.8477840423583984, "learning_rate": 4.806297164530059e-06, "loss": 0.8268, "step": 8875 }, { "epoch": 0.6716355794332413, "grad_norm": 2.323336601257324, "learning_rate": 4.804304529393834e-06, "loss": 0.6874, "step": 8876 }, { "epoch": 0.6717112481555749, "grad_norm": 2.1553761959075928, "learning_rate": 4.8023121570056265e-06, "loss": 0.6259, "step": 8877 }, { "epoch": 0.6717869168779085, "grad_norm": 2.515099287033081, "learning_rate": 4.800320047490211e-06, "loss": 0.8223, "step": 8878 }, { "epoch": 0.6718625856002421, "grad_norm": 2.2152724266052246, "learning_rate": 4.798328200972339e-06, "loss": 0.5351, "step": 8879 }, { "epoch": 0.6719382543225758, "grad_norm": 2.307845115661621, "learning_rate": 4.7963366175767425e-06, "loss": 0.6745, "step": 8880 }, { "epoch": 0.6720139230449094, "grad_norm": 2.3008527755737305, "learning_rate": 4.79434529742814e-06, "loss": 0.7065, "step": 8881 }, { "epoch": 0.672089591767243, "grad_norm": 2.3067424297332764, "learning_rate": 4.792354240651245e-06, "loss": 0.6639, "step": 8882 }, { "epoch": 0.6721652604895766, "grad_norm": 2.0410807132720947, "learning_rate": 4.790363447370733e-06, "loss": 0.6769, "step": 8883 }, { "epoch": 0.6722409292119103, "grad_norm": 1.8702601194381714, "learning_rate": 4.788372917711276e-06, "loss": 0.7758, "step": 8884 }, { "epoch": 0.6723165979342439, "grad_norm": 1.8784395456314087, "learning_rate": 4.78638265179753e-06, "loss": 0.7677, "step": 8885 }, { "epoch": 0.6723922666565775, "grad_norm": 2.5597028732299805, "learning_rate": 4.784392649754131e-06, "loss": 0.7775, "step": 8886 }, { "epoch": 0.6724679353789111, "grad_norm": 2.01203989982605, "learning_rate": 4.782402911705699e-06, "loss": 0.8557, "step": 8887 }, { "epoch": 0.6725436041012447, "grad_norm": 2.301677942276001, "learning_rate": 4.780413437776838e-06, "loss": 0.7891, "step": 8888 }, { "epoch": 0.6726192728235784, "grad_norm": 1.7823597192764282, "learning_rate": 4.778424228092136e-06, "loss": 0.5104, "step": 8889 }, { "epoch": 0.672694941545912, "grad_norm": 1.991434097290039, "learning_rate": 4.776435282776166e-06, "loss": 0.7134, "step": 8890 }, { "epoch": 0.6727706102682456, "grad_norm": 2.2954578399658203, "learning_rate": 4.774446601953472e-06, "loss": 0.7555, "step": 8891 }, { "epoch": 0.6728462789905792, "grad_norm": 2.434096097946167, "learning_rate": 4.772458185748603e-06, "loss": 0.5947, "step": 8892 }, { "epoch": 0.6729219477129129, "grad_norm": 2.205364942550659, "learning_rate": 4.770470034286079e-06, "loss": 0.601, "step": 8893 }, { "epoch": 0.6729976164352465, "grad_norm": 2.9410433769226074, "learning_rate": 4.768482147690398e-06, "loss": 0.8037, "step": 8894 }, { "epoch": 0.6730732851575801, "grad_norm": 2.166212320327759, "learning_rate": 4.766494526086052e-06, "loss": 0.7444, "step": 8895 }, { "epoch": 0.6731489538799137, "grad_norm": 1.7446627616882324, "learning_rate": 4.76450716959751e-06, "loss": 0.7428, "step": 8896 }, { "epoch": 0.6732246226022474, "grad_norm": 2.243112802505493, "learning_rate": 4.762520078349229e-06, "loss": 0.5938, "step": 8897 }, { "epoch": 0.673300291324581, "grad_norm": 2.0831973552703857, "learning_rate": 4.760533252465647e-06, "loss": 0.683, "step": 8898 }, { "epoch": 0.6733759600469146, "grad_norm": 2.1453609466552734, "learning_rate": 4.7585466920711845e-06, "loss": 0.6909, "step": 8899 }, { "epoch": 0.6734516287692482, "grad_norm": 2.366060733795166, "learning_rate": 4.756560397290251e-06, "loss": 0.7826, "step": 8900 }, { "epoch": 0.6735272974915818, "grad_norm": 2.3025095462799072, "learning_rate": 4.754574368247225e-06, "loss": 0.6098, "step": 8901 }, { "epoch": 0.6736029662139155, "grad_norm": 1.991722822189331, "learning_rate": 4.752588605066481e-06, "loss": 0.7002, "step": 8902 }, { "epoch": 0.6736786349362491, "grad_norm": 2.3091063499450684, "learning_rate": 4.75060310787238e-06, "loss": 0.8689, "step": 8903 }, { "epoch": 0.6737543036585827, "grad_norm": 2.582026243209839, "learning_rate": 4.748617876789259e-06, "loss": 0.759, "step": 8904 }, { "epoch": 0.6738299723809164, "grad_norm": 2.0004804134368896, "learning_rate": 4.746632911941435e-06, "loss": 0.7951, "step": 8905 }, { "epoch": 0.67390564110325, "grad_norm": 2.1170551776885986, "learning_rate": 4.744648213453215e-06, "loss": 0.6839, "step": 8906 }, { "epoch": 0.6739813098255836, "grad_norm": 2.757847785949707, "learning_rate": 4.742663781448887e-06, "loss": 0.6751, "step": 8907 }, { "epoch": 0.6740569785479172, "grad_norm": 1.556501865386963, "learning_rate": 4.740679616052722e-06, "loss": 0.7912, "step": 8908 }, { "epoch": 0.6741326472702508, "grad_norm": 2.0596907138824463, "learning_rate": 4.7386957173889775e-06, "loss": 0.7228, "step": 8909 }, { "epoch": 0.6742083159925845, "grad_norm": 2.1540966033935547, "learning_rate": 4.736712085581889e-06, "loss": 0.7179, "step": 8910 }, { "epoch": 0.6742839847149181, "grad_norm": 2.1931824684143066, "learning_rate": 4.734728720755683e-06, "loss": 0.717, "step": 8911 }, { "epoch": 0.6743596534372517, "grad_norm": 2.3688266277313232, "learning_rate": 4.732745623034552e-06, "loss": 0.6503, "step": 8912 }, { "epoch": 0.6744353221595853, "grad_norm": 2.4349288940429688, "learning_rate": 4.730762792542696e-06, "loss": 0.5946, "step": 8913 }, { "epoch": 0.6745109908819189, "grad_norm": 2.7900352478027344, "learning_rate": 4.728780229404286e-06, "loss": 0.6437, "step": 8914 }, { "epoch": 0.6745866596042526, "grad_norm": 3.1007180213928223, "learning_rate": 4.726797933743469e-06, "loss": 0.7885, "step": 8915 }, { "epoch": 0.6746623283265862, "grad_norm": 1.8395260572433472, "learning_rate": 4.724815905684387e-06, "loss": 0.5847, "step": 8916 }, { "epoch": 0.6747379970489198, "grad_norm": 2.2980258464813232, "learning_rate": 4.722834145351159e-06, "loss": 0.6564, "step": 8917 }, { "epoch": 0.6748136657712535, "grad_norm": 2.2813050746917725, "learning_rate": 4.7208526528678934e-06, "loss": 0.6424, "step": 8918 }, { "epoch": 0.6748893344935871, "grad_norm": 3.0813608169555664, "learning_rate": 4.7188714283586735e-06, "loss": 0.6765, "step": 8919 }, { "epoch": 0.6749650032159207, "grad_norm": 3.065865993499756, "learning_rate": 4.716890471947572e-06, "loss": 0.6069, "step": 8920 }, { "epoch": 0.6750406719382543, "grad_norm": 2.9707062244415283, "learning_rate": 4.7149097837586425e-06, "loss": 0.7634, "step": 8921 }, { "epoch": 0.6751163406605879, "grad_norm": 2.9453794956207275, "learning_rate": 4.712929363915923e-06, "loss": 0.7505, "step": 8922 }, { "epoch": 0.6751920093829216, "grad_norm": 1.7816321849822998, "learning_rate": 4.710949212543431e-06, "loss": 0.6617, "step": 8923 }, { "epoch": 0.6752676781052552, "grad_norm": 1.8292231559753418, "learning_rate": 4.7089693297651725e-06, "loss": 0.7435, "step": 8924 }, { "epoch": 0.6753433468275888, "grad_norm": 2.4246504306793213, "learning_rate": 4.706989715705137e-06, "loss": 0.5491, "step": 8925 }, { "epoch": 0.6754190155499225, "grad_norm": 2.1744165420532227, "learning_rate": 4.705010370487287e-06, "loss": 0.7401, "step": 8926 }, { "epoch": 0.6754946842722561, "grad_norm": 1.9293251037597656, "learning_rate": 4.703031294235576e-06, "loss": 0.6611, "step": 8927 }, { "epoch": 0.6755703529945897, "grad_norm": 2.0167737007141113, "learning_rate": 4.701052487073951e-06, "loss": 0.6623, "step": 8928 }, { "epoch": 0.6756460217169233, "grad_norm": 2.362187147140503, "learning_rate": 4.69907394912632e-06, "loss": 0.8366, "step": 8929 }, { "epoch": 0.6757216904392569, "grad_norm": 2.09653377532959, "learning_rate": 4.697095680516588e-06, "loss": 0.757, "step": 8930 }, { "epoch": 0.6757973591615906, "grad_norm": 2.5158259868621826, "learning_rate": 4.695117681368643e-06, "loss": 0.6652, "step": 8931 }, { "epoch": 0.6758730278839242, "grad_norm": 2.2666311264038086, "learning_rate": 4.693139951806352e-06, "loss": 0.728, "step": 8932 }, { "epoch": 0.6759486966062578, "grad_norm": 2.03359055519104, "learning_rate": 4.691162491953568e-06, "loss": 0.772, "step": 8933 }, { "epoch": 0.6760243653285914, "grad_norm": 2.2918758392333984, "learning_rate": 4.689185301934124e-06, "loss": 0.7318, "step": 8934 }, { "epoch": 0.676100034050925, "grad_norm": 2.3821206092834473, "learning_rate": 4.6872083818718404e-06, "loss": 0.678, "step": 8935 }, { "epoch": 0.6761757027732587, "grad_norm": 1.8658883571624756, "learning_rate": 4.685231731890521e-06, "loss": 0.7425, "step": 8936 }, { "epoch": 0.6762513714955923, "grad_norm": 1.821655511856079, "learning_rate": 4.6832553521139415e-06, "loss": 0.6313, "step": 8937 }, { "epoch": 0.6763270402179259, "grad_norm": 2.3317677974700928, "learning_rate": 4.6812792426658715e-06, "loss": 0.8466, "step": 8938 }, { "epoch": 0.6764027089402596, "grad_norm": 2.308093547821045, "learning_rate": 4.679303403670069e-06, "loss": 0.7643, "step": 8939 }, { "epoch": 0.6764783776625932, "grad_norm": 2.0288562774658203, "learning_rate": 4.67732783525026e-06, "loss": 0.6358, "step": 8940 }, { "epoch": 0.6765540463849268, "grad_norm": 2.001481771469116, "learning_rate": 4.675352537530162e-06, "loss": 0.5445, "step": 8941 }, { "epoch": 0.6766297151072604, "grad_norm": 2.197216749191284, "learning_rate": 4.673377510633478e-06, "loss": 0.7168, "step": 8942 }, { "epoch": 0.676705383829594, "grad_norm": 3.374070882797241, "learning_rate": 4.671402754683887e-06, "loss": 0.7088, "step": 8943 }, { "epoch": 0.6767810525519277, "grad_norm": 2.1551625728607178, "learning_rate": 4.669428269805055e-06, "loss": 0.7868, "step": 8944 }, { "epoch": 0.6768567212742613, "grad_norm": 2.4078245162963867, "learning_rate": 4.6674540561206336e-06, "loss": 0.8625, "step": 8945 }, { "epoch": 0.6769323899965949, "grad_norm": 2.43843674659729, "learning_rate": 4.665480113754253e-06, "loss": 0.6372, "step": 8946 }, { "epoch": 0.6770080587189286, "grad_norm": 6.045653343200684, "learning_rate": 4.663506442829526e-06, "loss": 0.7967, "step": 8947 }, { "epoch": 0.6770837274412621, "grad_norm": 1.9134116172790527, "learning_rate": 4.661533043470047e-06, "loss": 0.675, "step": 8948 }, { "epoch": 0.6771593961635958, "grad_norm": 2.1514625549316406, "learning_rate": 4.659559915799406e-06, "loss": 0.7456, "step": 8949 }, { "epoch": 0.6772350648859294, "grad_norm": 2.0664405822753906, "learning_rate": 4.657587059941163e-06, "loss": 0.6689, "step": 8950 }, { "epoch": 0.677310733608263, "grad_norm": 2.511876344680786, "learning_rate": 4.655614476018862e-06, "loss": 0.7499, "step": 8951 }, { "epoch": 0.6773864023305967, "grad_norm": 2.2233481407165527, "learning_rate": 4.653642164156032e-06, "loss": 0.695, "step": 8952 }, { "epoch": 0.6774620710529303, "grad_norm": 4.162423610687256, "learning_rate": 4.651670124476189e-06, "loss": 0.5902, "step": 8953 }, { "epoch": 0.6775377397752639, "grad_norm": 2.2912869453430176, "learning_rate": 4.649698357102826e-06, "loss": 0.9048, "step": 8954 }, { "epoch": 0.6776134084975975, "grad_norm": 2.0150766372680664, "learning_rate": 4.647726862159423e-06, "loss": 0.6542, "step": 8955 }, { "epoch": 0.6776890772199311, "grad_norm": 2.4221763610839844, "learning_rate": 4.6457556397694415e-06, "loss": 0.7111, "step": 8956 }, { "epoch": 0.6777647459422648, "grad_norm": 2.3624653816223145, "learning_rate": 4.643784690056328e-06, "loss": 0.6624, "step": 8957 }, { "epoch": 0.6778404146645984, "grad_norm": 2.5852222442626953, "learning_rate": 4.641814013143499e-06, "loss": 0.7025, "step": 8958 }, { "epoch": 0.677916083386932, "grad_norm": 2.3841378688812256, "learning_rate": 4.639843609154379e-06, "loss": 0.6945, "step": 8959 }, { "epoch": 0.6779917521092657, "grad_norm": 2.0488786697387695, "learning_rate": 4.637873478212354e-06, "loss": 0.7785, "step": 8960 }, { "epoch": 0.6780674208315992, "grad_norm": 2.140420436859131, "learning_rate": 4.6359036204408e-06, "loss": 0.5558, "step": 8961 }, { "epoch": 0.6781430895539329, "grad_norm": 2.20794939994812, "learning_rate": 4.633934035963076e-06, "loss": 0.7389, "step": 8962 }, { "epoch": 0.6782187582762665, "grad_norm": 2.0823874473571777, "learning_rate": 4.631964724902521e-06, "loss": 0.5781, "step": 8963 }, { "epoch": 0.6782944269986001, "grad_norm": 2.717106580734253, "learning_rate": 4.629995687382469e-06, "loss": 0.7518, "step": 8964 }, { "epoch": 0.6783700957209338, "grad_norm": 1.9206687211990356, "learning_rate": 4.6280269235262175e-06, "loss": 0.6779, "step": 8965 }, { "epoch": 0.6784457644432674, "grad_norm": 2.1041131019592285, "learning_rate": 4.626058433457062e-06, "loss": 0.6477, "step": 8966 }, { "epoch": 0.678521433165601, "grad_norm": 1.8171806335449219, "learning_rate": 4.624090217298274e-06, "loss": 0.7458, "step": 8967 }, { "epoch": 0.6785971018879347, "grad_norm": 2.141724109649658, "learning_rate": 4.62212227517311e-06, "loss": 0.599, "step": 8968 }, { "epoch": 0.6786727706102682, "grad_norm": 2.140650510787964, "learning_rate": 4.620154607204809e-06, "loss": 0.6146, "step": 8969 }, { "epoch": 0.6787484393326019, "grad_norm": 1.9559601545333862, "learning_rate": 4.618187213516592e-06, "loss": 0.7644, "step": 8970 }, { "epoch": 0.6788241080549355, "grad_norm": 2.512819528579712, "learning_rate": 4.616220094231669e-06, "loss": 0.6922, "step": 8971 }, { "epoch": 0.6788997767772691, "grad_norm": 1.995936393737793, "learning_rate": 4.614253249473218e-06, "loss": 0.4985, "step": 8972 }, { "epoch": 0.6789754454996028, "grad_norm": 1.9954118728637695, "learning_rate": 4.612286679364414e-06, "loss": 0.6756, "step": 8973 }, { "epoch": 0.6790511142219363, "grad_norm": 2.6867809295654297, "learning_rate": 4.610320384028409e-06, "loss": 0.6245, "step": 8974 }, { "epoch": 0.67912678294427, "grad_norm": 2.2337684631347656, "learning_rate": 4.60835436358834e-06, "loss": 0.7234, "step": 8975 }, { "epoch": 0.6792024516666036, "grad_norm": 2.338660478591919, "learning_rate": 4.606388618167325e-06, "loss": 0.6593, "step": 8976 }, { "epoch": 0.6792781203889372, "grad_norm": 2.2352778911590576, "learning_rate": 4.604423147888467e-06, "loss": 0.6932, "step": 8977 }, { "epoch": 0.6793537891112709, "grad_norm": 1.7522343397140503, "learning_rate": 4.6024579528748465e-06, "loss": 0.706, "step": 8978 }, { "epoch": 0.6794294578336045, "grad_norm": 2.317509412765503, "learning_rate": 4.600493033249532e-06, "loss": 0.6231, "step": 8979 }, { "epoch": 0.6795051265559381, "grad_norm": 1.9539676904678345, "learning_rate": 4.598528389135574e-06, "loss": 0.7355, "step": 8980 }, { "epoch": 0.6795807952782718, "grad_norm": 1.9069515466690063, "learning_rate": 4.5965640206560055e-06, "loss": 0.6524, "step": 8981 }, { "epoch": 0.6796564640006053, "grad_norm": 2.012300729751587, "learning_rate": 4.594599927933843e-06, "loss": 0.6192, "step": 8982 }, { "epoch": 0.679732132722939, "grad_norm": 2.2685201168060303, "learning_rate": 4.59263611109208e-06, "loss": 0.7487, "step": 8983 }, { "epoch": 0.6798078014452726, "grad_norm": 2.114442825317383, "learning_rate": 4.5906725702536925e-06, "loss": 0.6905, "step": 8984 }, { "epoch": 0.6798834701676062, "grad_norm": 1.9639710187911987, "learning_rate": 4.588709305541659e-06, "loss": 0.6025, "step": 8985 }, { "epoch": 0.6799591388899399, "grad_norm": 1.5509191751480103, "learning_rate": 4.586746317078913e-06, "loss": 0.8064, "step": 8986 }, { "epoch": 0.6800348076122734, "grad_norm": 2.1551706790924072, "learning_rate": 4.584783604988387e-06, "loss": 0.6756, "step": 8987 }, { "epoch": 0.6801104763346071, "grad_norm": 2.119821548461914, "learning_rate": 4.5828211693929915e-06, "loss": 0.8007, "step": 8988 }, { "epoch": 0.6801861450569407, "grad_norm": 2.0947601795196533, "learning_rate": 4.580859010415622e-06, "loss": 0.6009, "step": 8989 }, { "epoch": 0.6802618137792743, "grad_norm": 1.9060765504837036, "learning_rate": 4.5788971281791535e-06, "loss": 0.6957, "step": 8990 }, { "epoch": 0.680337482501608, "grad_norm": 2.3293838500976562, "learning_rate": 4.576935522806447e-06, "loss": 0.552, "step": 8991 }, { "epoch": 0.6804131512239416, "grad_norm": 2.8745076656341553, "learning_rate": 4.574974194420344e-06, "loss": 0.6307, "step": 8992 }, { "epoch": 0.6804888199462752, "grad_norm": 2.3362932205200195, "learning_rate": 4.573013143143672e-06, "loss": 0.6628, "step": 8993 }, { "epoch": 0.6805644886686089, "grad_norm": 2.5250635147094727, "learning_rate": 4.5710523690992296e-06, "loss": 0.6863, "step": 8994 }, { "epoch": 0.6806401573909424, "grad_norm": 2.553046226501465, "learning_rate": 4.569091872409816e-06, "loss": 0.7694, "step": 8995 }, { "epoch": 0.6807158261132761, "grad_norm": 2.729386806488037, "learning_rate": 4.567131653198204e-06, "loss": 0.7139, "step": 8996 }, { "epoch": 0.6807914948356097, "grad_norm": 1.8493585586547852, "learning_rate": 4.5651717115871415e-06, "loss": 0.6534, "step": 8997 }, { "epoch": 0.6808671635579433, "grad_norm": 2.3107059001922607, "learning_rate": 4.563212047699371e-06, "loss": 0.6176, "step": 8998 }, { "epoch": 0.680942832280277, "grad_norm": 1.6899579763412476, "learning_rate": 4.561252661657613e-06, "loss": 0.6259, "step": 8999 }, { "epoch": 0.6810185010026105, "grad_norm": 2.0711593627929688, "learning_rate": 4.559293553584569e-06, "loss": 0.6786, "step": 9000 }, { "epoch": 0.6810941697249442, "grad_norm": 1.8845760822296143, "learning_rate": 4.557334723602927e-06, "loss": 0.6397, "step": 9001 }, { "epoch": 0.6811698384472779, "grad_norm": 2.040178060531616, "learning_rate": 4.555376171835352e-06, "loss": 0.5297, "step": 9002 }, { "epoch": 0.6812455071696114, "grad_norm": 2.0751795768737793, "learning_rate": 4.5534178984045e-06, "loss": 0.6987, "step": 9003 }, { "epoch": 0.6813211758919451, "grad_norm": 2.1096200942993164, "learning_rate": 4.551459903432997e-06, "loss": 0.7418, "step": 9004 }, { "epoch": 0.6813968446142787, "grad_norm": 2.9717164039611816, "learning_rate": 4.549502187043465e-06, "loss": 0.6486, "step": 9005 }, { "epoch": 0.6814725133366123, "grad_norm": 1.7609609365463257, "learning_rate": 4.5475447493585004e-06, "loss": 0.7631, "step": 9006 }, { "epoch": 0.681548182058946, "grad_norm": 2.13960599899292, "learning_rate": 4.545587590500689e-06, "loss": 0.7899, "step": 9007 }, { "epoch": 0.6816238507812795, "grad_norm": 1.9353188276290894, "learning_rate": 4.543630710592585e-06, "loss": 0.6638, "step": 9008 }, { "epoch": 0.6816995195036132, "grad_norm": 1.8156839609146118, "learning_rate": 4.5416741097567385e-06, "loss": 0.7883, "step": 9009 }, { "epoch": 0.6817751882259468, "grad_norm": 1.8130958080291748, "learning_rate": 4.539717788115684e-06, "loss": 0.618, "step": 9010 }, { "epoch": 0.6818508569482804, "grad_norm": 1.71598219871521, "learning_rate": 4.537761745791925e-06, "loss": 0.627, "step": 9011 }, { "epoch": 0.6819265256706141, "grad_norm": 2.342985153198242, "learning_rate": 4.535805982907958e-06, "loss": 0.7325, "step": 9012 }, { "epoch": 0.6820021943929476, "grad_norm": 1.825118064880371, "learning_rate": 4.53385049958626e-06, "loss": 0.6055, "step": 9013 }, { "epoch": 0.6820778631152813, "grad_norm": 1.9702906608581543, "learning_rate": 4.531895295949292e-06, "loss": 0.7337, "step": 9014 }, { "epoch": 0.682153531837615, "grad_norm": 2.108592987060547, "learning_rate": 4.529940372119486e-06, "loss": 0.635, "step": 9015 }, { "epoch": 0.6822292005599485, "grad_norm": 1.7487767934799194, "learning_rate": 4.5279857282192735e-06, "loss": 0.6604, "step": 9016 }, { "epoch": 0.6823048692822822, "grad_norm": 2.037961721420288, "learning_rate": 4.5260313643710625e-06, "loss": 0.6136, "step": 9017 }, { "epoch": 0.6823805380046158, "grad_norm": 2.4088294506073, "learning_rate": 4.524077280697237e-06, "loss": 0.6478, "step": 9018 }, { "epoch": 0.6824562067269494, "grad_norm": 2.065066337585449, "learning_rate": 4.522123477320167e-06, "loss": 0.6117, "step": 9019 }, { "epoch": 0.6825318754492831, "grad_norm": 3.416294813156128, "learning_rate": 4.520169954362204e-06, "loss": 0.76, "step": 9020 }, { "epoch": 0.6826075441716166, "grad_norm": 2.648378610610962, "learning_rate": 4.518216711945697e-06, "loss": 0.7329, "step": 9021 }, { "epoch": 0.6826832128939503, "grad_norm": 2.742499589920044, "learning_rate": 4.516263750192951e-06, "loss": 0.8434, "step": 9022 }, { "epoch": 0.682758881616284, "grad_norm": 1.814097285270691, "learning_rate": 4.514311069226272e-06, "loss": 0.7289, "step": 9023 }, { "epoch": 0.6828345503386175, "grad_norm": 1.8923470973968506, "learning_rate": 4.5123586691679405e-06, "loss": 0.7187, "step": 9024 }, { "epoch": 0.6829102190609512, "grad_norm": 2.0584118366241455, "learning_rate": 4.510406550140226e-06, "loss": 0.7942, "step": 9025 }, { "epoch": 0.6829858877832847, "grad_norm": 2.10097074508667, "learning_rate": 4.508454712265373e-06, "loss": 0.6773, "step": 9026 }, { "epoch": 0.6830615565056184, "grad_norm": 2.169400453567505, "learning_rate": 4.506503155665613e-06, "loss": 0.492, "step": 9027 }, { "epoch": 0.6831372252279521, "grad_norm": 2.034270763397217, "learning_rate": 4.5045518804631635e-06, "loss": 0.6998, "step": 9028 }, { "epoch": 0.6832128939502856, "grad_norm": 3.989406108856201, "learning_rate": 4.502600886780212e-06, "loss": 0.6633, "step": 9029 }, { "epoch": 0.6832885626726193, "grad_norm": 2.1138885021209717, "learning_rate": 4.500650174738935e-06, "loss": 0.674, "step": 9030 }, { "epoch": 0.683364231394953, "grad_norm": 1.778512954711914, "learning_rate": 4.498699744461504e-06, "loss": 0.6803, "step": 9031 }, { "epoch": 0.6834399001172865, "grad_norm": 2.0274288654327393, "learning_rate": 4.496749596070052e-06, "loss": 0.7654, "step": 9032 }, { "epoch": 0.6835155688396202, "grad_norm": 1.8345074653625488, "learning_rate": 4.494799729686703e-06, "loss": 0.7808, "step": 9033 }, { "epoch": 0.6835912375619537, "grad_norm": 1.9967671632766724, "learning_rate": 4.492850145433567e-06, "loss": 0.5957, "step": 9034 }, { "epoch": 0.6836669062842874, "grad_norm": 2.4879066944122314, "learning_rate": 4.490900843432734e-06, "loss": 0.6603, "step": 9035 }, { "epoch": 0.6837425750066211, "grad_norm": 1.9220361709594727, "learning_rate": 4.488951823806274e-06, "loss": 0.6626, "step": 9036 }, { "epoch": 0.6838182437289546, "grad_norm": 2.3953804969787598, "learning_rate": 4.487003086676241e-06, "loss": 0.7222, "step": 9037 }, { "epoch": 0.6838939124512883, "grad_norm": 4.208496570587158, "learning_rate": 4.485054632164672e-06, "loss": 0.6668, "step": 9038 }, { "epoch": 0.6839695811736218, "grad_norm": 1.9624886512756348, "learning_rate": 4.483106460393587e-06, "loss": 0.6975, "step": 9039 }, { "epoch": 0.6840452498959555, "grad_norm": 1.8978896141052246, "learning_rate": 4.481158571484981e-06, "loss": 0.5826, "step": 9040 }, { "epoch": 0.6841209186182892, "grad_norm": 2.2070109844207764, "learning_rate": 4.479210965560841e-06, "loss": 0.743, "step": 9041 }, { "epoch": 0.6841965873406227, "grad_norm": 2.3264880180358887, "learning_rate": 4.477263642743137e-06, "loss": 0.7172, "step": 9042 }, { "epoch": 0.6842722560629564, "grad_norm": 2.088231086730957, "learning_rate": 4.475316603153809e-06, "loss": 0.6427, "step": 9043 }, { "epoch": 0.68434792478529, "grad_norm": 2.185490131378174, "learning_rate": 4.47336984691479e-06, "loss": 0.7606, "step": 9044 }, { "epoch": 0.6844235935076236, "grad_norm": 2.1886022090911865, "learning_rate": 4.4714233741479914e-06, "loss": 0.5061, "step": 9045 }, { "epoch": 0.6844992622299573, "grad_norm": 2.4206368923187256, "learning_rate": 4.46947718497531e-06, "loss": 0.7002, "step": 9046 }, { "epoch": 0.6845749309522908, "grad_norm": 2.282135009765625, "learning_rate": 4.467531279518619e-06, "loss": 0.7932, "step": 9047 }, { "epoch": 0.6846505996746245, "grad_norm": 2.5050301551818848, "learning_rate": 4.465585657899779e-06, "loss": 0.5978, "step": 9048 }, { "epoch": 0.6847262683969582, "grad_norm": 2.1457459926605225, "learning_rate": 4.463640320240636e-06, "loss": 0.6758, "step": 9049 }, { "epoch": 0.6848019371192917, "grad_norm": 1.7627110481262207, "learning_rate": 4.4616952666630036e-06, "loss": 0.7674, "step": 9050 }, { "epoch": 0.6848776058416254, "grad_norm": 2.0747087001800537, "learning_rate": 4.4597504972886895e-06, "loss": 0.6957, "step": 9051 }, { "epoch": 0.6849532745639589, "grad_norm": 1.8389160633087158, "learning_rate": 4.457806012239488e-06, "loss": 0.6881, "step": 9052 }, { "epoch": 0.6850289432862926, "grad_norm": 3.18410325050354, "learning_rate": 4.455861811637168e-06, "loss": 0.7342, "step": 9053 }, { "epoch": 0.6851046120086263, "grad_norm": 1.8247333765029907, "learning_rate": 4.453917895603476e-06, "loss": 0.5705, "step": 9054 }, { "epoch": 0.6851802807309598, "grad_norm": 1.9525268077850342, "learning_rate": 4.451974264260148e-06, "loss": 0.7596, "step": 9055 }, { "epoch": 0.6852559494532935, "grad_norm": 1.9142605066299438, "learning_rate": 4.450030917728903e-06, "loss": 0.6349, "step": 9056 }, { "epoch": 0.6853316181756272, "grad_norm": 1.9247804880142212, "learning_rate": 4.448087856131438e-06, "loss": 0.5992, "step": 9057 }, { "epoch": 0.6854072868979607, "grad_norm": 2.2341079711914062, "learning_rate": 4.446145079589434e-06, "loss": 0.8441, "step": 9058 }, { "epoch": 0.6854829556202944, "grad_norm": 2.2383766174316406, "learning_rate": 4.444202588224554e-06, "loss": 0.5673, "step": 9059 }, { "epoch": 0.6855586243426279, "grad_norm": 1.9901219606399536, "learning_rate": 4.442260382158447e-06, "loss": 0.7313, "step": 9060 }, { "epoch": 0.6856342930649616, "grad_norm": 2.1193864345550537, "learning_rate": 4.440318461512729e-06, "loss": 0.7233, "step": 9061 }, { "epoch": 0.6857099617872953, "grad_norm": 2.3127729892730713, "learning_rate": 4.438376826409021e-06, "loss": 0.656, "step": 9062 }, { "epoch": 0.6857856305096288, "grad_norm": 1.5982202291488647, "learning_rate": 4.4364354769689125e-06, "loss": 0.7635, "step": 9063 }, { "epoch": 0.6858612992319625, "grad_norm": 1.8591372966766357, "learning_rate": 4.434494413313972e-06, "loss": 0.6837, "step": 9064 }, { "epoch": 0.685936967954296, "grad_norm": 2.3256266117095947, "learning_rate": 4.432553635565758e-06, "loss": 0.8136, "step": 9065 }, { "epoch": 0.6860126366766297, "grad_norm": 1.8938707113265991, "learning_rate": 4.430613143845805e-06, "loss": 0.7994, "step": 9066 }, { "epoch": 0.6860883053989634, "grad_norm": 1.8198975324630737, "learning_rate": 4.428672938275642e-06, "loss": 0.7606, "step": 9067 }, { "epoch": 0.6861639741212969, "grad_norm": 2.892153739929199, "learning_rate": 4.4267330189767624e-06, "loss": 0.714, "step": 9068 }, { "epoch": 0.6862396428436306, "grad_norm": 1.9913108348846436, "learning_rate": 4.424793386070653e-06, "loss": 0.5629, "step": 9069 }, { "epoch": 0.6863153115659643, "grad_norm": 2.234666347503662, "learning_rate": 4.4228540396787795e-06, "loss": 0.786, "step": 9070 }, { "epoch": 0.6863909802882978, "grad_norm": 2.110463857650757, "learning_rate": 4.4209149799225905e-06, "loss": 0.657, "step": 9071 }, { "epoch": 0.6864666490106315, "grad_norm": 2.373729705810547, "learning_rate": 4.418976206923516e-06, "loss": 0.672, "step": 9072 }, { "epoch": 0.686542317732965, "grad_norm": 1.475502371788025, "learning_rate": 4.4170377208029684e-06, "loss": 0.8645, "step": 9073 }, { "epoch": 0.6866179864552987, "grad_norm": 2.1476123332977295, "learning_rate": 4.415099521682345e-06, "loss": 0.7975, "step": 9074 }, { "epoch": 0.6866936551776324, "grad_norm": 2.0400941371917725, "learning_rate": 4.4131616096830155e-06, "loss": 0.7912, "step": 9075 }, { "epoch": 0.6867693238999659, "grad_norm": 1.941752314567566, "learning_rate": 4.411223984926338e-06, "loss": 0.6795, "step": 9076 }, { "epoch": 0.6868449926222996, "grad_norm": 2.0431621074676514, "learning_rate": 4.409286647533664e-06, "loss": 0.7254, "step": 9077 }, { "epoch": 0.6869206613446331, "grad_norm": 1.969773530960083, "learning_rate": 4.407349597626304e-06, "loss": 0.7392, "step": 9078 }, { "epoch": 0.6869963300669668, "grad_norm": 2.117180109024048, "learning_rate": 4.4054128353255676e-06, "loss": 0.7551, "step": 9079 }, { "epoch": 0.6870719987893005, "grad_norm": 2.118661403656006, "learning_rate": 4.403476360752739e-06, "loss": 0.7525, "step": 9080 }, { "epoch": 0.687147667511634, "grad_norm": 1.820186972618103, "learning_rate": 4.401540174029088e-06, "loss": 0.5936, "step": 9081 }, { "epoch": 0.6872233362339677, "grad_norm": 2.435248851776123, "learning_rate": 4.399604275275865e-06, "loss": 0.6549, "step": 9082 }, { "epoch": 0.6872990049563014, "grad_norm": 2.5884740352630615, "learning_rate": 4.397668664614301e-06, "loss": 0.6127, "step": 9083 }, { "epoch": 0.6873746736786349, "grad_norm": 2.3123669624328613, "learning_rate": 4.395733342165612e-06, "loss": 0.5723, "step": 9084 }, { "epoch": 0.6874503424009686, "grad_norm": 2.0741138458251953, "learning_rate": 4.393798308050996e-06, "loss": 0.6145, "step": 9085 }, { "epoch": 0.6875260111233021, "grad_norm": 2.0065953731536865, "learning_rate": 4.3918635623916214e-06, "loss": 0.7152, "step": 9086 }, { "epoch": 0.6876016798456358, "grad_norm": 2.173222064971924, "learning_rate": 4.389929105308658e-06, "loss": 0.7504, "step": 9087 }, { "epoch": 0.6876773485679695, "grad_norm": 2.4735090732574463, "learning_rate": 4.3879949369232486e-06, "loss": 0.68, "step": 9088 }, { "epoch": 0.687753017290303, "grad_norm": 2.0955440998077393, "learning_rate": 4.38606105735651e-06, "loss": 0.6141, "step": 9089 }, { "epoch": 0.6878286860126367, "grad_norm": 1.9914036989212036, "learning_rate": 4.3841274667295524e-06, "loss": 0.6674, "step": 9090 }, { "epoch": 0.6879043547349702, "grad_norm": 1.948864221572876, "learning_rate": 4.3821941651634605e-06, "loss": 0.5495, "step": 9091 }, { "epoch": 0.6879800234573039, "grad_norm": 2.403665542602539, "learning_rate": 4.380261152779307e-06, "loss": 0.6433, "step": 9092 }, { "epoch": 0.6880556921796376, "grad_norm": 1.9592474699020386, "learning_rate": 4.378328429698142e-06, "loss": 0.6297, "step": 9093 }, { "epoch": 0.6881313609019711, "grad_norm": 1.8239825963974, "learning_rate": 4.3763959960409985e-06, "loss": 0.6765, "step": 9094 }, { "epoch": 0.6882070296243048, "grad_norm": 1.8471215963363647, "learning_rate": 4.3744638519288915e-06, "loss": 0.7255, "step": 9095 }, { "epoch": 0.6882826983466385, "grad_norm": 1.9686150550842285, "learning_rate": 4.372531997482823e-06, "loss": 0.6088, "step": 9096 }, { "epoch": 0.688358367068972, "grad_norm": 2.405791997909546, "learning_rate": 4.370600432823762e-06, "loss": 0.8179, "step": 9097 }, { "epoch": 0.6884340357913057, "grad_norm": 2.5450029373168945, "learning_rate": 4.368669158072678e-06, "loss": 0.5827, "step": 9098 }, { "epoch": 0.6885097045136392, "grad_norm": 2.776323080062866, "learning_rate": 4.366738173350514e-06, "loss": 0.7119, "step": 9099 }, { "epoch": 0.6885853732359729, "grad_norm": 2.0069100856781006, "learning_rate": 4.364807478778188e-06, "loss": 0.7055, "step": 9100 }, { "epoch": 0.6886610419583066, "grad_norm": 1.881259799003601, "learning_rate": 4.362877074476611e-06, "loss": 0.6521, "step": 9101 }, { "epoch": 0.6887367106806401, "grad_norm": 2.377471923828125, "learning_rate": 4.3609469605666686e-06, "loss": 0.6983, "step": 9102 }, { "epoch": 0.6888123794029738, "grad_norm": 3.351958751678467, "learning_rate": 4.359017137169231e-06, "loss": 0.7147, "step": 9103 }, { "epoch": 0.6888880481253074, "grad_norm": 2.346928119659424, "learning_rate": 4.3570876044051525e-06, "loss": 0.6571, "step": 9104 }, { "epoch": 0.688963716847641, "grad_norm": 1.908215880393982, "learning_rate": 4.355158362395264e-06, "loss": 0.7263, "step": 9105 }, { "epoch": 0.6890393855699747, "grad_norm": 2.136303186416626, "learning_rate": 4.353229411260387e-06, "loss": 0.732, "step": 9106 }, { "epoch": 0.6891150542923082, "grad_norm": 2.8035433292388916, "learning_rate": 4.351300751121307e-06, "loss": 0.6906, "step": 9107 }, { "epoch": 0.6891907230146419, "grad_norm": 1.8425335884094238, "learning_rate": 4.3493723820988125e-06, "loss": 0.6579, "step": 9108 }, { "epoch": 0.6892663917369756, "grad_norm": 1.5369822978973389, "learning_rate": 4.347444304313661e-06, "loss": 0.5209, "step": 9109 }, { "epoch": 0.6893420604593091, "grad_norm": 1.7258330583572388, "learning_rate": 4.345516517886599e-06, "loss": 0.7984, "step": 9110 }, { "epoch": 0.6894177291816428, "grad_norm": 2.101191759109497, "learning_rate": 4.343589022938344e-06, "loss": 0.6611, "step": 9111 }, { "epoch": 0.6894933979039763, "grad_norm": 1.9967455863952637, "learning_rate": 4.341661819589601e-06, "loss": 0.7261, "step": 9112 }, { "epoch": 0.68956906662631, "grad_norm": 1.7132256031036377, "learning_rate": 4.339734907961069e-06, "loss": 0.6795, "step": 9113 }, { "epoch": 0.6896447353486437, "grad_norm": 1.7403509616851807, "learning_rate": 4.337808288173407e-06, "loss": 0.6901, "step": 9114 }, { "epoch": 0.6897204040709772, "grad_norm": 2.2544641494750977, "learning_rate": 4.335881960347269e-06, "loss": 0.5939, "step": 9115 }, { "epoch": 0.6897960727933109, "grad_norm": 2.149181842803955, "learning_rate": 4.333955924603288e-06, "loss": 0.8534, "step": 9116 }, { "epoch": 0.6898717415156445, "grad_norm": 1.9814257621765137, "learning_rate": 4.332030181062079e-06, "loss": 0.7261, "step": 9117 }, { "epoch": 0.6899474102379781, "grad_norm": 2.3098654747009277, "learning_rate": 4.3301047298442385e-06, "loss": 0.7357, "step": 9118 }, { "epoch": 0.6900230789603118, "grad_norm": 1.9268317222595215, "learning_rate": 4.3281795710703436e-06, "loss": 0.6553, "step": 9119 }, { "epoch": 0.6900987476826453, "grad_norm": 1.951817274093628, "learning_rate": 4.326254704860959e-06, "loss": 0.7475, "step": 9120 }, { "epoch": 0.690174416404979, "grad_norm": 2.312189817428589, "learning_rate": 4.324330131336617e-06, "loss": 0.5805, "step": 9121 }, { "epoch": 0.6902500851273127, "grad_norm": 1.6239818334579468, "learning_rate": 4.322405850617842e-06, "loss": 0.8134, "step": 9122 }, { "epoch": 0.6903257538496462, "grad_norm": 2.053333044052124, "learning_rate": 4.320481862825146e-06, "loss": 0.6272, "step": 9123 }, { "epoch": 0.6904014225719799, "grad_norm": 2.048210859298706, "learning_rate": 4.318558168079012e-06, "loss": 0.6803, "step": 9124 }, { "epoch": 0.6904770912943134, "grad_norm": 2.4473769664764404, "learning_rate": 4.316634766499906e-06, "loss": 0.5872, "step": 9125 }, { "epoch": 0.6905527600166471, "grad_norm": 2.2394003868103027, "learning_rate": 4.314711658208278e-06, "loss": 0.7076, "step": 9126 }, { "epoch": 0.6906284287389808, "grad_norm": 1.7375953197479248, "learning_rate": 4.31278884332456e-06, "loss": 0.672, "step": 9127 }, { "epoch": 0.6907040974613143, "grad_norm": 1.8307822942733765, "learning_rate": 4.3108663219691656e-06, "loss": 0.8224, "step": 9128 }, { "epoch": 0.690779766183648, "grad_norm": 1.7547332048416138, "learning_rate": 4.308944094262488e-06, "loss": 0.6712, "step": 9129 }, { "epoch": 0.6908554349059816, "grad_norm": 2.497670888900757, "learning_rate": 4.3070221603249036e-06, "loss": 0.753, "step": 9130 }, { "epoch": 0.6909311036283152, "grad_norm": 1.989039421081543, "learning_rate": 4.305100520276775e-06, "loss": 0.7124, "step": 9131 }, { "epoch": 0.6910067723506489, "grad_norm": 1.7251970767974854, "learning_rate": 4.303179174238433e-06, "loss": 0.6056, "step": 9132 }, { "epoch": 0.6910824410729824, "grad_norm": 2.441929578781128, "learning_rate": 4.301258122330198e-06, "loss": 0.6691, "step": 9133 }, { "epoch": 0.6911581097953161, "grad_norm": 2.3875224590301514, "learning_rate": 4.299337364672385e-06, "loss": 0.7165, "step": 9134 }, { "epoch": 0.6912337785176498, "grad_norm": 1.9688384532928467, "learning_rate": 4.297416901385267e-06, "loss": 0.7152, "step": 9135 }, { "epoch": 0.6913094472399833, "grad_norm": 2.073638677597046, "learning_rate": 4.295496732589112e-06, "loss": 0.6398, "step": 9136 }, { "epoch": 0.691385115962317, "grad_norm": 2.102508068084717, "learning_rate": 4.293576858404167e-06, "loss": 0.6271, "step": 9137 }, { "epoch": 0.6914607846846506, "grad_norm": 2.1116816997528076, "learning_rate": 4.2916572789506625e-06, "loss": 0.5456, "step": 9138 }, { "epoch": 0.6915364534069842, "grad_norm": 2.1573615074157715, "learning_rate": 4.2897379943488075e-06, "loss": 0.8615, "step": 9139 }, { "epoch": 0.6916121221293179, "grad_norm": 1.9969463348388672, "learning_rate": 4.2878190047187944e-06, "loss": 0.6516, "step": 9140 }, { "epoch": 0.6916877908516514, "grad_norm": 1.9518414735794067, "learning_rate": 4.285900310180796e-06, "loss": 0.7213, "step": 9141 }, { "epoch": 0.6917634595739851, "grad_norm": 1.909786581993103, "learning_rate": 4.283981910854971e-06, "loss": 0.6096, "step": 9142 }, { "epoch": 0.6918391282963187, "grad_norm": 2.240398406982422, "learning_rate": 4.2820638068614455e-06, "loss": 0.6252, "step": 9143 }, { "epoch": 0.6919147970186523, "grad_norm": 2.16809344291687, "learning_rate": 4.280145998320347e-06, "loss": 0.5408, "step": 9144 }, { "epoch": 0.691990465740986, "grad_norm": 2.0366976261138916, "learning_rate": 4.278228485351776e-06, "loss": 0.757, "step": 9145 }, { "epoch": 0.6920661344633195, "grad_norm": 1.8961420059204102, "learning_rate": 4.276311268075806e-06, "loss": 0.7155, "step": 9146 }, { "epoch": 0.6921418031856532, "grad_norm": 1.6012108325958252, "learning_rate": 4.274394346612502e-06, "loss": 0.6259, "step": 9147 }, { "epoch": 0.6922174719079869, "grad_norm": 2.297229290008545, "learning_rate": 4.272477721081908e-06, "loss": 0.6098, "step": 9148 }, { "epoch": 0.6922931406303204, "grad_norm": 1.7989269495010376, "learning_rate": 4.270561391604051e-06, "loss": 0.6358, "step": 9149 }, { "epoch": 0.6923688093526541, "grad_norm": 2.2176966667175293, "learning_rate": 4.268645358298935e-06, "loss": 0.668, "step": 9150 }, { "epoch": 0.6924444780749877, "grad_norm": 2.412024974822998, "learning_rate": 4.266729621286552e-06, "loss": 0.7053, "step": 9151 }, { "epoch": 0.6925201467973213, "grad_norm": 2.2924671173095703, "learning_rate": 4.2648141806868705e-06, "loss": 0.6765, "step": 9152 }, { "epoch": 0.692595815519655, "grad_norm": 3.1433582305908203, "learning_rate": 4.262899036619835e-06, "loss": 0.7652, "step": 9153 }, { "epoch": 0.6926714842419885, "grad_norm": 2.3667728900909424, "learning_rate": 4.2609841892053865e-06, "loss": 0.7888, "step": 9154 }, { "epoch": 0.6927471529643222, "grad_norm": 2.2989847660064697, "learning_rate": 4.259069638563436e-06, "loss": 0.8005, "step": 9155 }, { "epoch": 0.6928228216866558, "grad_norm": 2.9312691688537598, "learning_rate": 4.257155384813883e-06, "loss": 0.6623, "step": 9156 }, { "epoch": 0.6928984904089894, "grad_norm": 1.8132741451263428, "learning_rate": 4.255241428076595e-06, "loss": 0.7629, "step": 9157 }, { "epoch": 0.6929741591313231, "grad_norm": 2.117753505706787, "learning_rate": 4.253327768471433e-06, "loss": 0.827, "step": 9158 }, { "epoch": 0.6930498278536567, "grad_norm": 2.0779871940612793, "learning_rate": 4.2514144061182446e-06, "loss": 0.6522, "step": 9159 }, { "epoch": 0.6931254965759903, "grad_norm": 2.123748302459717, "learning_rate": 4.249501341136843e-06, "loss": 0.7253, "step": 9160 }, { "epoch": 0.693201165298324, "grad_norm": 2.4232475757598877, "learning_rate": 4.24758857364703e-06, "loss": 0.6461, "step": 9161 }, { "epoch": 0.6932768340206575, "grad_norm": 2.0096006393432617, "learning_rate": 4.2456761037685936e-06, "loss": 0.6071, "step": 9162 }, { "epoch": 0.6933525027429912, "grad_norm": 3.09159255027771, "learning_rate": 4.243763931621296e-06, "loss": 0.5963, "step": 9163 }, { "epoch": 0.6934281714653248, "grad_norm": 2.0967772006988525, "learning_rate": 4.241852057324885e-06, "loss": 0.6053, "step": 9164 }, { "epoch": 0.6935038401876584, "grad_norm": 2.970052719116211, "learning_rate": 4.239940480999087e-06, "loss": 0.6578, "step": 9165 }, { "epoch": 0.6935795089099921, "grad_norm": 2.1014604568481445, "learning_rate": 4.238029202763617e-06, "loss": 0.5055, "step": 9166 }, { "epoch": 0.6936551776323256, "grad_norm": 2.0818746089935303, "learning_rate": 4.2361182227381556e-06, "loss": 0.7098, "step": 9167 }, { "epoch": 0.6937308463546593, "grad_norm": 2.38274884223938, "learning_rate": 4.23420754104238e-06, "loss": 0.6155, "step": 9168 }, { "epoch": 0.693806515076993, "grad_norm": 1.9527738094329834, "learning_rate": 4.232297157795939e-06, "loss": 0.6962, "step": 9169 }, { "epoch": 0.6938821837993265, "grad_norm": 2.1649208068847656, "learning_rate": 4.230387073118477e-06, "loss": 0.7543, "step": 9170 }, { "epoch": 0.6939578525216602, "grad_norm": 3.0182876586914062, "learning_rate": 4.228477287129601e-06, "loss": 0.9058, "step": 9171 }, { "epoch": 0.6940335212439938, "grad_norm": 2.083481788635254, "learning_rate": 4.226567799948909e-06, "loss": 0.6472, "step": 9172 }, { "epoch": 0.6941091899663274, "grad_norm": 2.3242437839508057, "learning_rate": 4.224658611695981e-06, "loss": 0.632, "step": 9173 }, { "epoch": 0.6941848586886611, "grad_norm": 2.4421794414520264, "learning_rate": 4.222749722490377e-06, "loss": 0.6659, "step": 9174 }, { "epoch": 0.6942605274109946, "grad_norm": 2.1332194805145264, "learning_rate": 4.220841132451636e-06, "loss": 0.6021, "step": 9175 }, { "epoch": 0.6943361961333283, "grad_norm": 2.1453001499176025, "learning_rate": 4.218932841699281e-06, "loss": 0.7119, "step": 9176 }, { "epoch": 0.6944118648556619, "grad_norm": 2.6229517459869385, "learning_rate": 4.2170248503528195e-06, "loss": 0.6428, "step": 9177 }, { "epoch": 0.6944875335779955, "grad_norm": 1.7247917652130127, "learning_rate": 4.215117158531727e-06, "loss": 0.7384, "step": 9178 }, { "epoch": 0.6945632023003292, "grad_norm": 1.925907850265503, "learning_rate": 4.213209766355471e-06, "loss": 0.6112, "step": 9179 }, { "epoch": 0.6946388710226628, "grad_norm": 2.0717689990997314, "learning_rate": 4.21130267394351e-06, "loss": 0.7033, "step": 9180 }, { "epoch": 0.6947145397449964, "grad_norm": 1.7757562398910522, "learning_rate": 4.209395881415259e-06, "loss": 0.5708, "step": 9181 }, { "epoch": 0.6947902084673301, "grad_norm": 2.3035130500793457, "learning_rate": 4.207489388890133e-06, "loss": 0.6394, "step": 9182 }, { "epoch": 0.6948658771896636, "grad_norm": 2.059298515319824, "learning_rate": 4.205583196487524e-06, "loss": 0.7353, "step": 9183 }, { "epoch": 0.6949415459119973, "grad_norm": 1.9274907112121582, "learning_rate": 4.2036773043268005e-06, "loss": 0.7174, "step": 9184 }, { "epoch": 0.6950172146343309, "grad_norm": 2.086277961730957, "learning_rate": 4.201771712527318e-06, "loss": 0.6836, "step": 9185 }, { "epoch": 0.6950928833566645, "grad_norm": 2.223573684692383, "learning_rate": 4.19986642120841e-06, "loss": 0.6575, "step": 9186 }, { "epoch": 0.6951685520789982, "grad_norm": 3.5324819087982178, "learning_rate": 4.197961430489393e-06, "loss": 0.7639, "step": 9187 }, { "epoch": 0.6952442208013317, "grad_norm": 3.183359146118164, "learning_rate": 4.1960567404895644e-06, "loss": 0.6029, "step": 9188 }, { "epoch": 0.6953198895236654, "grad_norm": 1.7968335151672363, "learning_rate": 4.194152351328196e-06, "loss": 0.8075, "step": 9189 }, { "epoch": 0.695395558245999, "grad_norm": 1.813084602355957, "learning_rate": 4.192248263124554e-06, "loss": 0.7122, "step": 9190 }, { "epoch": 0.6954712269683326, "grad_norm": 2.4125254154205322, "learning_rate": 4.1903444759978785e-06, "loss": 0.7781, "step": 9191 }, { "epoch": 0.6955468956906663, "grad_norm": 2.2479751110076904, "learning_rate": 4.188440990067385e-06, "loss": 0.671, "step": 9192 }, { "epoch": 0.6956225644129999, "grad_norm": 2.125385284423828, "learning_rate": 4.18653780545228e-06, "loss": 0.6195, "step": 9193 }, { "epoch": 0.6956982331353335, "grad_norm": 2.0010180473327637, "learning_rate": 4.184634922271746e-06, "loss": 0.712, "step": 9194 }, { "epoch": 0.6957739018576672, "grad_norm": 2.053400754928589, "learning_rate": 4.182732340644948e-06, "loss": 0.538, "step": 9195 }, { "epoch": 0.6958495705800007, "grad_norm": 2.342834949493408, "learning_rate": 4.180830060691031e-06, "loss": 0.6268, "step": 9196 }, { "epoch": 0.6959252393023344, "grad_norm": 2.2274081707000732, "learning_rate": 4.178928082529123e-06, "loss": 0.7135, "step": 9197 }, { "epoch": 0.696000908024668, "grad_norm": 2.246617317199707, "learning_rate": 4.177026406278332e-06, "loss": 0.684, "step": 9198 }, { "epoch": 0.6960765767470016, "grad_norm": 2.258347988128662, "learning_rate": 4.1751250320577475e-06, "loss": 0.7247, "step": 9199 }, { "epoch": 0.6961522454693353, "grad_norm": 1.881675362586975, "learning_rate": 4.173223959986437e-06, "loss": 0.733, "step": 9200 }, { "epoch": 0.6962279141916689, "grad_norm": 3.7385098934173584, "learning_rate": 4.171323190183455e-06, "loss": 0.585, "step": 9201 }, { "epoch": 0.6963035829140025, "grad_norm": 2.185427665710449, "learning_rate": 4.1694227227678365e-06, "loss": 0.6836, "step": 9202 }, { "epoch": 0.6963792516363361, "grad_norm": 2.1207618713378906, "learning_rate": 4.167522557858588e-06, "loss": 0.6311, "step": 9203 }, { "epoch": 0.6964549203586697, "grad_norm": 1.9281233549118042, "learning_rate": 4.165622695574704e-06, "loss": 0.8161, "step": 9204 }, { "epoch": 0.6965305890810034, "grad_norm": 2.0313522815704346, "learning_rate": 4.163723136035168e-06, "loss": 0.5862, "step": 9205 }, { "epoch": 0.696606257803337, "grad_norm": 2.408750534057617, "learning_rate": 4.161823879358929e-06, "loss": 0.6848, "step": 9206 }, { "epoch": 0.6966819265256706, "grad_norm": 1.5770151615142822, "learning_rate": 4.159924925664927e-06, "loss": 0.5911, "step": 9207 }, { "epoch": 0.6967575952480043, "grad_norm": 2.710477113723755, "learning_rate": 4.158026275072082e-06, "loss": 0.7804, "step": 9208 }, { "epoch": 0.6968332639703378, "grad_norm": 1.9035274982452393, "learning_rate": 4.156127927699294e-06, "loss": 0.6249, "step": 9209 }, { "epoch": 0.6969089326926715, "grad_norm": 1.7576590776443481, "learning_rate": 4.154229883665437e-06, "loss": 0.5942, "step": 9210 }, { "epoch": 0.6969846014150051, "grad_norm": 1.8701988458633423, "learning_rate": 4.152332143089381e-06, "loss": 0.601, "step": 9211 }, { "epoch": 0.6970602701373387, "grad_norm": 2.1016006469726562, "learning_rate": 4.150434706089965e-06, "loss": 0.643, "step": 9212 }, { "epoch": 0.6971359388596724, "grad_norm": 2.1549484729766846, "learning_rate": 4.148537572786016e-06, "loss": 0.734, "step": 9213 }, { "epoch": 0.697211607582006, "grad_norm": 2.0905442237854004, "learning_rate": 4.146640743296333e-06, "loss": 0.6454, "step": 9214 }, { "epoch": 0.6972872763043396, "grad_norm": 1.8356784582138062, "learning_rate": 4.144744217739701e-06, "loss": 0.6778, "step": 9215 }, { "epoch": 0.6973629450266732, "grad_norm": 2.0387625694274902, "learning_rate": 4.142847996234896e-06, "loss": 0.6613, "step": 9216 }, { "epoch": 0.6974386137490068, "grad_norm": 1.5910100936889648, "learning_rate": 4.140952078900658e-06, "loss": 0.5748, "step": 9217 }, { "epoch": 0.6975142824713405, "grad_norm": 1.9364255666732788, "learning_rate": 4.139056465855714e-06, "loss": 0.6595, "step": 9218 }, { "epoch": 0.6975899511936741, "grad_norm": 1.8187669515609741, "learning_rate": 4.137161157218779e-06, "loss": 0.5342, "step": 9219 }, { "epoch": 0.6976656199160077, "grad_norm": 1.753604769706726, "learning_rate": 4.135266153108539e-06, "loss": 0.6342, "step": 9220 }, { "epoch": 0.6977412886383414, "grad_norm": 2.2299137115478516, "learning_rate": 4.133371453643668e-06, "loss": 0.6365, "step": 9221 }, { "epoch": 0.697816957360675, "grad_norm": 2.1055357456207275, "learning_rate": 4.131477058942816e-06, "loss": 0.8319, "step": 9222 }, { "epoch": 0.6978926260830086, "grad_norm": 3.087947368621826, "learning_rate": 4.12958296912462e-06, "loss": 0.6226, "step": 9223 }, { "epoch": 0.6979682948053422, "grad_norm": 1.8325108289718628, "learning_rate": 4.127689184307691e-06, "loss": 0.6056, "step": 9224 }, { "epoch": 0.6980439635276758, "grad_norm": 2.2946414947509766, "learning_rate": 4.1257957046106185e-06, "loss": 0.6827, "step": 9225 }, { "epoch": 0.6981196322500095, "grad_norm": 2.5224714279174805, "learning_rate": 4.1239025301519875e-06, "loss": 0.7167, "step": 9226 }, { "epoch": 0.6981953009723431, "grad_norm": 1.8570104837417603, "learning_rate": 4.122009661050355e-06, "loss": 0.6781, "step": 9227 }, { "epoch": 0.6982709696946767, "grad_norm": 2.250521183013916, "learning_rate": 4.120117097424252e-06, "loss": 0.6597, "step": 9228 }, { "epoch": 0.6983466384170103, "grad_norm": 2.871605157852173, "learning_rate": 4.118224839392199e-06, "loss": 0.6728, "step": 9229 }, { "epoch": 0.698422307139344, "grad_norm": 2.111581325531006, "learning_rate": 4.116332887072697e-06, "loss": 0.7846, "step": 9230 }, { "epoch": 0.6984979758616776, "grad_norm": 2.0724074840545654, "learning_rate": 4.1144412405842245e-06, "loss": 0.6152, "step": 9231 }, { "epoch": 0.6985736445840112, "grad_norm": 1.961753487586975, "learning_rate": 4.112549900045244e-06, "loss": 0.7844, "step": 9232 }, { "epoch": 0.6986493133063448, "grad_norm": 1.6883488893508911, "learning_rate": 4.1106588655741965e-06, "loss": 0.7343, "step": 9233 }, { "epoch": 0.6987249820286785, "grad_norm": 1.871146559715271, "learning_rate": 4.108768137289507e-06, "loss": 0.5357, "step": 9234 }, { "epoch": 0.698800650751012, "grad_norm": 2.21620774269104, "learning_rate": 4.106877715309572e-06, "loss": 0.6735, "step": 9235 }, { "epoch": 0.6988763194733457, "grad_norm": 1.7150919437408447, "learning_rate": 4.104987599752783e-06, "loss": 0.5634, "step": 9236 }, { "epoch": 0.6989519881956793, "grad_norm": 2.08954119682312, "learning_rate": 4.103097790737507e-06, "loss": 0.6117, "step": 9237 }, { "epoch": 0.6990276569180129, "grad_norm": 2.288553237915039, "learning_rate": 4.101208288382082e-06, "loss": 0.7672, "step": 9238 }, { "epoch": 0.6991033256403466, "grad_norm": 1.9526705741882324, "learning_rate": 4.099319092804839e-06, "loss": 0.7364, "step": 9239 }, { "epoch": 0.6991789943626802, "grad_norm": 2.6329140663146973, "learning_rate": 4.097430204124082e-06, "loss": 0.7055, "step": 9240 }, { "epoch": 0.6992546630850138, "grad_norm": 2.0371086597442627, "learning_rate": 4.095541622458108e-06, "loss": 0.6239, "step": 9241 }, { "epoch": 0.6993303318073474, "grad_norm": 1.7768152952194214, "learning_rate": 4.093653347925178e-06, "loss": 0.651, "step": 9242 }, { "epoch": 0.699406000529681, "grad_norm": 1.7975434064865112, "learning_rate": 4.091765380643544e-06, "loss": 0.6697, "step": 9243 }, { "epoch": 0.6994816692520147, "grad_norm": 1.8695652484893799, "learning_rate": 4.089877720731438e-06, "loss": 0.768, "step": 9244 }, { "epoch": 0.6995573379743483, "grad_norm": 2.318563222885132, "learning_rate": 4.087990368307073e-06, "loss": 0.6121, "step": 9245 }, { "epoch": 0.6996330066966819, "grad_norm": 1.9575533866882324, "learning_rate": 4.0861033234886305e-06, "loss": 0.7827, "step": 9246 }, { "epoch": 0.6997086754190156, "grad_norm": 2.9256958961486816, "learning_rate": 4.084216586394297e-06, "loss": 0.5644, "step": 9247 }, { "epoch": 0.6997843441413492, "grad_norm": 2.3860838413238525, "learning_rate": 4.082330157142222e-06, "loss": 0.8408, "step": 9248 }, { "epoch": 0.6998600128636828, "grad_norm": 1.9278440475463867, "learning_rate": 4.080444035850536e-06, "loss": 0.6105, "step": 9249 }, { "epoch": 0.6999356815860164, "grad_norm": 2.849597215652466, "learning_rate": 4.0785582226373555e-06, "loss": 0.6557, "step": 9250 }, { "epoch": 0.70001135030835, "grad_norm": 2.0541129112243652, "learning_rate": 4.076672717620778e-06, "loss": 0.6789, "step": 9251 }, { "epoch": 0.7000870190306837, "grad_norm": 1.8564491271972656, "learning_rate": 4.074787520918878e-06, "loss": 0.6847, "step": 9252 }, { "epoch": 0.7001626877530173, "grad_norm": 1.949328899383545, "learning_rate": 4.072902632649714e-06, "loss": 0.7276, "step": 9253 }, { "epoch": 0.7002383564753509, "grad_norm": 1.7822113037109375, "learning_rate": 4.071018052931323e-06, "loss": 0.698, "step": 9254 }, { "epoch": 0.7003140251976845, "grad_norm": 2.231820821762085, "learning_rate": 4.069133781881727e-06, "loss": 0.601, "step": 9255 }, { "epoch": 0.7003896939200182, "grad_norm": 2.3078548908233643, "learning_rate": 4.067249819618916e-06, "loss": 0.646, "step": 9256 }, { "epoch": 0.7004653626423518, "grad_norm": 2.0806334018707275, "learning_rate": 4.065366166260878e-06, "loss": 0.6797, "step": 9257 }, { "epoch": 0.7005410313646854, "grad_norm": 1.993569254875183, "learning_rate": 4.063482821925572e-06, "loss": 0.6611, "step": 9258 }, { "epoch": 0.700616700087019, "grad_norm": 2.1819117069244385, "learning_rate": 4.061599786730941e-06, "loss": 0.6336, "step": 9259 }, { "epoch": 0.7006923688093527, "grad_norm": 2.2110774517059326, "learning_rate": 4.059717060794902e-06, "loss": 0.5346, "step": 9260 }, { "epoch": 0.7007680375316863, "grad_norm": 2.1333065032958984, "learning_rate": 4.057834644235355e-06, "loss": 0.7319, "step": 9261 }, { "epoch": 0.7008437062540199, "grad_norm": 1.8538813591003418, "learning_rate": 4.055952537170195e-06, "loss": 0.6036, "step": 9262 }, { "epoch": 0.7009193749763535, "grad_norm": 2.097684144973755, "learning_rate": 4.054070739717274e-06, "loss": 0.7522, "step": 9263 }, { "epoch": 0.7009950436986871, "grad_norm": 1.6460198163986206, "learning_rate": 4.05218925199444e-06, "loss": 0.533, "step": 9264 }, { "epoch": 0.7010707124210208, "grad_norm": 4.016617774963379, "learning_rate": 4.05030807411952e-06, "loss": 0.5638, "step": 9265 }, { "epoch": 0.7011463811433544, "grad_norm": 1.8979525566101074, "learning_rate": 4.048427206210316e-06, "loss": 0.6731, "step": 9266 }, { "epoch": 0.701222049865688, "grad_norm": 2.001255989074707, "learning_rate": 4.046546648384616e-06, "loss": 0.5913, "step": 9267 }, { "epoch": 0.7012977185880216, "grad_norm": 2.3916757106781006, "learning_rate": 4.044666400760186e-06, "loss": 0.5653, "step": 9268 }, { "epoch": 0.7013733873103553, "grad_norm": 1.7634689807891846, "learning_rate": 4.042786463454778e-06, "loss": 0.6478, "step": 9269 }, { "epoch": 0.7014490560326889, "grad_norm": 2.492938280105591, "learning_rate": 4.04090683658611e-06, "loss": 0.6283, "step": 9270 }, { "epoch": 0.7015247247550225, "grad_norm": 1.7969627380371094, "learning_rate": 4.039027520271894e-06, "loss": 0.6721, "step": 9271 }, { "epoch": 0.7016003934773561, "grad_norm": 1.9584459066390991, "learning_rate": 4.037148514629823e-06, "loss": 0.5983, "step": 9272 }, { "epoch": 0.7016760621996898, "grad_norm": 2.0558922290802, "learning_rate": 4.035269819777567e-06, "loss": 0.7428, "step": 9273 }, { "epoch": 0.7017517309220234, "grad_norm": 1.8677301406860352, "learning_rate": 4.03339143583277e-06, "loss": 0.8003, "step": 9274 }, { "epoch": 0.701827399644357, "grad_norm": 2.25616192817688, "learning_rate": 4.0315133629130645e-06, "loss": 0.6491, "step": 9275 }, { "epoch": 0.7019030683666906, "grad_norm": 2.1559536457061768, "learning_rate": 4.029635601136063e-06, "loss": 0.6757, "step": 9276 }, { "epoch": 0.7019787370890243, "grad_norm": 1.8619732856750488, "learning_rate": 4.027758150619356e-06, "loss": 0.5132, "step": 9277 }, { "epoch": 0.7020544058113579, "grad_norm": 1.7390613555908203, "learning_rate": 4.0258810114805156e-06, "loss": 0.6006, "step": 9278 }, { "epoch": 0.7021300745336915, "grad_norm": 2.2929675579071045, "learning_rate": 4.024004183837095e-06, "loss": 0.605, "step": 9279 }, { "epoch": 0.7022057432560251, "grad_norm": 2.116135835647583, "learning_rate": 4.022127667806629e-06, "loss": 0.8126, "step": 9280 }, { "epoch": 0.7022814119783587, "grad_norm": 2.891150951385498, "learning_rate": 4.020251463506623e-06, "loss": 0.7529, "step": 9281 }, { "epoch": 0.7023570807006924, "grad_norm": 2.0369067192077637, "learning_rate": 4.01837557105458e-06, "loss": 0.7471, "step": 9282 }, { "epoch": 0.702432749423026, "grad_norm": 2.0991392135620117, "learning_rate": 4.016499990567975e-06, "loss": 0.8235, "step": 9283 }, { "epoch": 0.7025084181453596, "grad_norm": 2.3228790760040283, "learning_rate": 4.014624722164255e-06, "loss": 0.5768, "step": 9284 }, { "epoch": 0.7025840868676932, "grad_norm": 2.1848552227020264, "learning_rate": 4.01274976596086e-06, "loss": 0.6158, "step": 9285 }, { "epoch": 0.7026597555900269, "grad_norm": 2.320488452911377, "learning_rate": 4.0108751220752065e-06, "loss": 0.7085, "step": 9286 }, { "epoch": 0.7027354243123605, "grad_norm": 3.8415627479553223, "learning_rate": 4.009000790624687e-06, "loss": 0.7112, "step": 9287 }, { "epoch": 0.7028110930346941, "grad_norm": 2.0608065128326416, "learning_rate": 4.007126771726684e-06, "loss": 0.6086, "step": 9288 }, { "epoch": 0.7028867617570277, "grad_norm": 1.4849302768707275, "learning_rate": 4.005253065498549e-06, "loss": 0.6349, "step": 9289 }, { "epoch": 0.7029624304793614, "grad_norm": 3.0800068378448486, "learning_rate": 4.003379672057622e-06, "loss": 0.7289, "step": 9290 }, { "epoch": 0.703038099201695, "grad_norm": 2.145936965942383, "learning_rate": 4.001506591521226e-06, "loss": 0.7398, "step": 9291 }, { "epoch": 0.7031137679240286, "grad_norm": 1.8864727020263672, "learning_rate": 3.999633824006647e-06, "loss": 0.6183, "step": 9292 }, { "epoch": 0.7031894366463622, "grad_norm": 1.6764299869537354, "learning_rate": 3.997761369631174e-06, "loss": 0.6655, "step": 9293 }, { "epoch": 0.7032651053686958, "grad_norm": 1.7775150537490845, "learning_rate": 3.9958892285120674e-06, "loss": 0.5845, "step": 9294 }, { "epoch": 0.7033407740910295, "grad_norm": 1.978812575340271, "learning_rate": 3.994017400766558e-06, "loss": 0.7968, "step": 9295 }, { "epoch": 0.7034164428133631, "grad_norm": 2.058699369430542, "learning_rate": 3.992145886511871e-06, "loss": 0.6392, "step": 9296 }, { "epoch": 0.7034921115356967, "grad_norm": 1.882474422454834, "learning_rate": 3.990274685865206e-06, "loss": 0.5851, "step": 9297 }, { "epoch": 0.7035677802580304, "grad_norm": 1.9979125261306763, "learning_rate": 3.988403798943743e-06, "loss": 0.8891, "step": 9298 }, { "epoch": 0.703643448980364, "grad_norm": 1.9912859201431274, "learning_rate": 3.986533225864645e-06, "loss": 0.6231, "step": 9299 }, { "epoch": 0.7037191177026976, "grad_norm": 2.228140115737915, "learning_rate": 3.984662966745051e-06, "loss": 0.8467, "step": 9300 }, { "epoch": 0.7037947864250312, "grad_norm": 1.6897190809249878, "learning_rate": 3.982793021702084e-06, "loss": 0.7131, "step": 9301 }, { "epoch": 0.7038704551473648, "grad_norm": 2.1993837356567383, "learning_rate": 3.980923390852844e-06, "loss": 0.7983, "step": 9302 }, { "epoch": 0.7039461238696985, "grad_norm": 2.0132064819335938, "learning_rate": 3.979054074314417e-06, "loss": 0.7792, "step": 9303 }, { "epoch": 0.7040217925920321, "grad_norm": 2.138044595718384, "learning_rate": 3.977185072203862e-06, "loss": 0.692, "step": 9304 }, { "epoch": 0.7040974613143657, "grad_norm": 1.7573822736740112, "learning_rate": 3.975316384638228e-06, "loss": 0.5628, "step": 9305 }, { "epoch": 0.7041731300366993, "grad_norm": 1.9482179880142212, "learning_rate": 3.97344801173453e-06, "loss": 0.4971, "step": 9306 }, { "epoch": 0.7042487987590329, "grad_norm": 2.954737424850464, "learning_rate": 3.971579953609772e-06, "loss": 0.8212, "step": 9307 }, { "epoch": 0.7043244674813666, "grad_norm": 1.92641019821167, "learning_rate": 3.9697122103809475e-06, "loss": 0.8241, "step": 9308 }, { "epoch": 0.7044001362037002, "grad_norm": 2.1319527626037598, "learning_rate": 3.967844782165012e-06, "loss": 0.6169, "step": 9309 }, { "epoch": 0.7044758049260338, "grad_norm": 1.711698293685913, "learning_rate": 3.9659776690789104e-06, "loss": 0.6958, "step": 9310 }, { "epoch": 0.7045514736483675, "grad_norm": 2.079453468322754, "learning_rate": 3.9641108712395714e-06, "loss": 0.6348, "step": 9311 }, { "epoch": 0.7046271423707011, "grad_norm": 3.2162301540374756, "learning_rate": 3.962244388763896e-06, "loss": 0.6935, "step": 9312 }, { "epoch": 0.7047028110930347, "grad_norm": 1.953348159790039, "learning_rate": 3.960378221768772e-06, "loss": 0.586, "step": 9313 }, { "epoch": 0.7047784798153683, "grad_norm": 2.384439468383789, "learning_rate": 3.958512370371063e-06, "loss": 0.6983, "step": 9314 }, { "epoch": 0.7048541485377019, "grad_norm": 1.7614166736602783, "learning_rate": 3.956646834687616e-06, "loss": 0.7052, "step": 9315 }, { "epoch": 0.7049298172600356, "grad_norm": 5.346411228179932, "learning_rate": 3.95478161483526e-06, "loss": 0.7654, "step": 9316 }, { "epoch": 0.7050054859823692, "grad_norm": 2.2105648517608643, "learning_rate": 3.9529167109307915e-06, "loss": 0.7921, "step": 9317 }, { "epoch": 0.7050811547047028, "grad_norm": 2.0886669158935547, "learning_rate": 3.951052123091005e-06, "loss": 0.6371, "step": 9318 }, { "epoch": 0.7051568234270364, "grad_norm": 1.895363450050354, "learning_rate": 3.949187851432667e-06, "loss": 0.6704, "step": 9319 }, { "epoch": 0.70523249214937, "grad_norm": 3.5013060569763184, "learning_rate": 3.947323896072521e-06, "loss": 0.6872, "step": 9320 }, { "epoch": 0.7053081608717037, "grad_norm": 1.9184173345565796, "learning_rate": 3.945460257127294e-06, "loss": 0.665, "step": 9321 }, { "epoch": 0.7053838295940373, "grad_norm": 1.5872775316238403, "learning_rate": 3.943596934713695e-06, "loss": 0.8925, "step": 9322 }, { "epoch": 0.7054594983163709, "grad_norm": 2.0756430625915527, "learning_rate": 3.9417339289484085e-06, "loss": 0.7668, "step": 9323 }, { "epoch": 0.7055351670387046, "grad_norm": 2.210831642150879, "learning_rate": 3.939871239948105e-06, "loss": 0.697, "step": 9324 }, { "epoch": 0.7056108357610382, "grad_norm": 1.8079180717468262, "learning_rate": 3.93800886782943e-06, "loss": 0.749, "step": 9325 }, { "epoch": 0.7056865044833718, "grad_norm": 2.1431519985198975, "learning_rate": 3.936146812709017e-06, "loss": 0.7066, "step": 9326 }, { "epoch": 0.7057621732057054, "grad_norm": 1.8611176013946533, "learning_rate": 3.934285074703465e-06, "loss": 0.6619, "step": 9327 }, { "epoch": 0.705837841928039, "grad_norm": 6.968406677246094, "learning_rate": 3.932423653929362e-06, "loss": 0.5841, "step": 9328 }, { "epoch": 0.7059135106503727, "grad_norm": 2.0079822540283203, "learning_rate": 3.930562550503284e-06, "loss": 0.568, "step": 9329 }, { "epoch": 0.7059891793727063, "grad_norm": 2.0030272006988525, "learning_rate": 3.92870176454178e-06, "loss": 0.6616, "step": 9330 }, { "epoch": 0.7060648480950399, "grad_norm": 2.209120273590088, "learning_rate": 3.926841296161369e-06, "loss": 0.6471, "step": 9331 }, { "epoch": 0.7061405168173736, "grad_norm": 2.2087392807006836, "learning_rate": 3.924981145478567e-06, "loss": 0.6976, "step": 9332 }, { "epoch": 0.7062161855397071, "grad_norm": 2.025752067565918, "learning_rate": 3.923121312609859e-06, "loss": 0.6718, "step": 9333 }, { "epoch": 0.7062918542620408, "grad_norm": 2.695591926574707, "learning_rate": 3.921261797671714e-06, "loss": 0.7283, "step": 9334 }, { "epoch": 0.7063675229843744, "grad_norm": 2.5060596466064453, "learning_rate": 3.9194026007805834e-06, "loss": 0.7901, "step": 9335 }, { "epoch": 0.706443191706708, "grad_norm": 1.7619905471801758, "learning_rate": 3.917543722052894e-06, "loss": 0.5261, "step": 9336 }, { "epoch": 0.7065188604290417, "grad_norm": 2.090834379196167, "learning_rate": 3.915685161605058e-06, "loss": 0.6204, "step": 9337 }, { "epoch": 0.7065945291513753, "grad_norm": 1.739188313484192, "learning_rate": 3.913826919553457e-06, "loss": 0.6318, "step": 9338 }, { "epoch": 0.7066701978737089, "grad_norm": 2.2699122428894043, "learning_rate": 3.911968996014467e-06, "loss": 0.727, "step": 9339 }, { "epoch": 0.7067458665960425, "grad_norm": 1.8585529327392578, "learning_rate": 3.910111391104438e-06, "loss": 0.6699, "step": 9340 }, { "epoch": 0.7068215353183761, "grad_norm": 2.3249197006225586, "learning_rate": 3.908254104939695e-06, "loss": 0.5889, "step": 9341 }, { "epoch": 0.7068972040407098, "grad_norm": 1.8129733800888062, "learning_rate": 3.906397137636547e-06, "loss": 0.6422, "step": 9342 }, { "epoch": 0.7069728727630434, "grad_norm": 1.9523649215698242, "learning_rate": 3.9045404893112815e-06, "loss": 0.6836, "step": 9343 }, { "epoch": 0.707048541485377, "grad_norm": 1.462280035018921, "learning_rate": 3.902684160080179e-06, "loss": 0.5477, "step": 9344 }, { "epoch": 0.7071242102077107, "grad_norm": 2.921168327331543, "learning_rate": 3.900828150059477e-06, "loss": 0.5474, "step": 9345 }, { "epoch": 0.7071998789300442, "grad_norm": 1.826545238494873, "learning_rate": 3.898972459365409e-06, "loss": 0.7442, "step": 9346 }, { "epoch": 0.7072755476523779, "grad_norm": 1.8657852411270142, "learning_rate": 3.897117088114185e-06, "loss": 0.6929, "step": 9347 }, { "epoch": 0.7073512163747115, "grad_norm": 2.4558889865875244, "learning_rate": 3.895262036421993e-06, "loss": 0.842, "step": 9348 }, { "epoch": 0.7074268850970451, "grad_norm": 1.9185665845870972, "learning_rate": 3.893407304405003e-06, "loss": 0.6258, "step": 9349 }, { "epoch": 0.7075025538193788, "grad_norm": 2.26891827583313, "learning_rate": 3.891552892179365e-06, "loss": 0.7757, "step": 9350 }, { "epoch": 0.7075782225417124, "grad_norm": 2.160792589187622, "learning_rate": 3.88969879986121e-06, "loss": 0.6637, "step": 9351 }, { "epoch": 0.707653891264046, "grad_norm": 2.357847213745117, "learning_rate": 3.887845027566642e-06, "loss": 0.6997, "step": 9352 }, { "epoch": 0.7077295599863797, "grad_norm": 1.9302562475204468, "learning_rate": 3.8859915754117505e-06, "loss": 0.8658, "step": 9353 }, { "epoch": 0.7078052287087132, "grad_norm": 2.308100700378418, "learning_rate": 3.884138443512612e-06, "loss": 0.6944, "step": 9354 }, { "epoch": 0.7078808974310469, "grad_norm": 2.170538902282715, "learning_rate": 3.882285631985269e-06, "loss": 0.6771, "step": 9355 }, { "epoch": 0.7079565661533805, "grad_norm": 1.9224172830581665, "learning_rate": 3.880433140945753e-06, "loss": 0.592, "step": 9356 }, { "epoch": 0.7080322348757141, "grad_norm": 1.849142074584961, "learning_rate": 3.878580970510071e-06, "loss": 0.5242, "step": 9357 }, { "epoch": 0.7081079035980478, "grad_norm": 1.9761168956756592, "learning_rate": 3.876729120794215e-06, "loss": 0.706, "step": 9358 }, { "epoch": 0.7081835723203813, "grad_norm": 2.217850685119629, "learning_rate": 3.87487759191415e-06, "loss": 0.6843, "step": 9359 }, { "epoch": 0.708259241042715, "grad_norm": 2.1054461002349854, "learning_rate": 3.873026383985828e-06, "loss": 0.7485, "step": 9360 }, { "epoch": 0.7083349097650486, "grad_norm": 1.964118480682373, "learning_rate": 3.871175497125176e-06, "loss": 0.5514, "step": 9361 }, { "epoch": 0.7084105784873822, "grad_norm": 1.676416039466858, "learning_rate": 3.869324931448107e-06, "loss": 0.587, "step": 9362 }, { "epoch": 0.7084862472097159, "grad_norm": 1.9723479747772217, "learning_rate": 3.867474687070502e-06, "loss": 0.697, "step": 9363 }, { "epoch": 0.7085619159320495, "grad_norm": 2.1898975372314453, "learning_rate": 3.865624764108229e-06, "loss": 0.6789, "step": 9364 }, { "epoch": 0.7086375846543831, "grad_norm": 2.4142651557922363, "learning_rate": 3.863775162677147e-06, "loss": 0.7163, "step": 9365 }, { "epoch": 0.7087132533767168, "grad_norm": 2.3115365505218506, "learning_rate": 3.8619258828930725e-06, "loss": 0.5373, "step": 9366 }, { "epoch": 0.7087889220990503, "grad_norm": 2.0673863887786865, "learning_rate": 3.860076924871818e-06, "loss": 0.5509, "step": 9367 }, { "epoch": 0.708864590821384, "grad_norm": 2.3030877113342285, "learning_rate": 3.8582282887291724e-06, "loss": 0.754, "step": 9368 }, { "epoch": 0.7089402595437176, "grad_norm": 2.19999361038208, "learning_rate": 3.856379974580901e-06, "loss": 0.6528, "step": 9369 }, { "epoch": 0.7090159282660512, "grad_norm": 2.0521130561828613, "learning_rate": 3.854531982542751e-06, "loss": 0.7486, "step": 9370 }, { "epoch": 0.7090915969883849, "grad_norm": 1.9347343444824219, "learning_rate": 3.852684312730452e-06, "loss": 0.7396, "step": 9371 }, { "epoch": 0.7091672657107184, "grad_norm": 5.190321922302246, "learning_rate": 3.850836965259713e-06, "loss": 0.6111, "step": 9372 }, { "epoch": 0.7092429344330521, "grad_norm": 1.797953724861145, "learning_rate": 3.848989940246214e-06, "loss": 0.6532, "step": 9373 }, { "epoch": 0.7093186031553858, "grad_norm": 2.0552330017089844, "learning_rate": 3.847143237805622e-06, "loss": 0.6478, "step": 9374 }, { "epoch": 0.7093942718777193, "grad_norm": 2.1826653480529785, "learning_rate": 3.845296858053591e-06, "loss": 0.674, "step": 9375 }, { "epoch": 0.709469940600053, "grad_norm": 2.6305980682373047, "learning_rate": 3.8434508011057456e-06, "loss": 0.7174, "step": 9376 }, { "epoch": 0.7095456093223866, "grad_norm": 2.358619451522827, "learning_rate": 3.841605067077686e-06, "loss": 0.7223, "step": 9377 }, { "epoch": 0.7096212780447202, "grad_norm": 1.9301419258117676, "learning_rate": 3.839759656085001e-06, "loss": 0.6305, "step": 9378 }, { "epoch": 0.7096969467670539, "grad_norm": 1.9100176095962524, "learning_rate": 3.8379145682432565e-06, "loss": 0.6708, "step": 9379 }, { "epoch": 0.7097726154893874, "grad_norm": 2.4053359031677246, "learning_rate": 3.836069803667998e-06, "loss": 0.6106, "step": 9380 }, { "epoch": 0.7098482842117211, "grad_norm": 4.036832332611084, "learning_rate": 3.834225362474753e-06, "loss": 0.6419, "step": 9381 }, { "epoch": 0.7099239529340547, "grad_norm": 2.3508119583129883, "learning_rate": 3.8323812447790205e-06, "loss": 0.8037, "step": 9382 }, { "epoch": 0.7099996216563883, "grad_norm": 2.4832112789154053, "learning_rate": 3.830537450696293e-06, "loss": 0.6607, "step": 9383 }, { "epoch": 0.710075290378722, "grad_norm": 1.9314616918563843, "learning_rate": 3.828693980342024e-06, "loss": 0.7162, "step": 9384 }, { "epoch": 0.7101509591010555, "grad_norm": 2.2306244373321533, "learning_rate": 3.826850833831668e-06, "loss": 0.7208, "step": 9385 }, { "epoch": 0.7102266278233892, "grad_norm": 2.328071355819702, "learning_rate": 3.825008011280648e-06, "loss": 0.6431, "step": 9386 }, { "epoch": 0.7103022965457229, "grad_norm": 1.6222195625305176, "learning_rate": 3.823165512804361e-06, "loss": 0.6632, "step": 9387 }, { "epoch": 0.7103779652680564, "grad_norm": 2.752122640609741, "learning_rate": 3.821323338518193e-06, "loss": 0.7188, "step": 9388 }, { "epoch": 0.7104536339903901, "grad_norm": 1.881763219833374, "learning_rate": 3.819481488537504e-06, "loss": 0.6389, "step": 9389 }, { "epoch": 0.7105293027127237, "grad_norm": 1.7420378923416138, "learning_rate": 3.817639962977646e-06, "loss": 0.7323, "step": 9390 }, { "epoch": 0.7106049714350573, "grad_norm": 2.1217947006225586, "learning_rate": 3.815798761953933e-06, "loss": 0.7075, "step": 9391 }, { "epoch": 0.710680640157391, "grad_norm": 2.142434597015381, "learning_rate": 3.813957885581669e-06, "loss": 0.7825, "step": 9392 }, { "epoch": 0.7107563088797245, "grad_norm": 2.093893051147461, "learning_rate": 3.8121173339761356e-06, "loss": 0.6228, "step": 9393 }, { "epoch": 0.7108319776020582, "grad_norm": 2.101154088973999, "learning_rate": 3.8102771072525944e-06, "loss": 0.8039, "step": 9394 }, { "epoch": 0.7109076463243919, "grad_norm": 1.9551880359649658, "learning_rate": 3.8084372055262866e-06, "loss": 0.7045, "step": 9395 }, { "epoch": 0.7109833150467254, "grad_norm": 1.807440996170044, "learning_rate": 3.8065976289124328e-06, "loss": 0.6904, "step": 9396 }, { "epoch": 0.7110589837690591, "grad_norm": 1.9995956420898438, "learning_rate": 3.8047583775262367e-06, "loss": 0.7355, "step": 9397 }, { "epoch": 0.7111346524913927, "grad_norm": 2.0244057178497314, "learning_rate": 3.80291945148287e-06, "loss": 0.6372, "step": 9398 }, { "epoch": 0.7112103212137263, "grad_norm": 2.710584878921509, "learning_rate": 3.801080850897497e-06, "loss": 0.8311, "step": 9399 }, { "epoch": 0.71128598993606, "grad_norm": 1.9999345541000366, "learning_rate": 3.7992425758852565e-06, "loss": 0.6724, "step": 9400 }, { "epoch": 0.7113616586583935, "grad_norm": 2.0285205841064453, "learning_rate": 3.7974046265612676e-06, "loss": 0.6539, "step": 9401 }, { "epoch": 0.7114373273807272, "grad_norm": 1.9455727338790894, "learning_rate": 3.795567003040628e-06, "loss": 0.472, "step": 9402 }, { "epoch": 0.7115129961030608, "grad_norm": 2.234898090362549, "learning_rate": 3.7937297054384152e-06, "loss": 0.679, "step": 9403 }, { "epoch": 0.7115886648253944, "grad_norm": 2.2903592586517334, "learning_rate": 3.791892733869688e-06, "loss": 0.5767, "step": 9404 }, { "epoch": 0.7116643335477281, "grad_norm": 2.1868555545806885, "learning_rate": 3.790056088449483e-06, "loss": 0.6769, "step": 9405 }, { "epoch": 0.7117400022700616, "grad_norm": 2.426342725753784, "learning_rate": 3.7882197692928168e-06, "loss": 0.6697, "step": 9406 }, { "epoch": 0.7118156709923953, "grad_norm": 3.310873031616211, "learning_rate": 3.786383776514685e-06, "loss": 0.6237, "step": 9407 }, { "epoch": 0.711891339714729, "grad_norm": 2.320218324661255, "learning_rate": 3.784548110230068e-06, "loss": 0.647, "step": 9408 }, { "epoch": 0.7119670084370625, "grad_norm": 1.9522889852523804, "learning_rate": 3.7827127705539136e-06, "loss": 0.7945, "step": 9409 }, { "epoch": 0.7120426771593962, "grad_norm": 1.851012110710144, "learning_rate": 3.7808777576011564e-06, "loss": 0.5536, "step": 9410 }, { "epoch": 0.7121183458817298, "grad_norm": 1.8848350048065186, "learning_rate": 3.7790430714867223e-06, "loss": 0.6775, "step": 9411 }, { "epoch": 0.7121940146040634, "grad_norm": 1.8779957294464111, "learning_rate": 3.777208712325493e-06, "loss": 0.7499, "step": 9412 }, { "epoch": 0.7122696833263971, "grad_norm": 1.9439111948013306, "learning_rate": 3.775374680232348e-06, "loss": 0.6631, "step": 9413 }, { "epoch": 0.7123453520487306, "grad_norm": 2.2685723304748535, "learning_rate": 3.773540975322138e-06, "loss": 0.6203, "step": 9414 }, { "epoch": 0.7124210207710643, "grad_norm": 2.156620979309082, "learning_rate": 3.7717075977096973e-06, "loss": 0.6294, "step": 9415 }, { "epoch": 0.712496689493398, "grad_norm": 2.185917854309082, "learning_rate": 3.7698745475098365e-06, "loss": 0.7808, "step": 9416 }, { "epoch": 0.7125723582157315, "grad_norm": 1.884772777557373, "learning_rate": 3.768041824837349e-06, "loss": 0.6747, "step": 9417 }, { "epoch": 0.7126480269380652, "grad_norm": 1.9386128187179565, "learning_rate": 3.766209429807004e-06, "loss": 0.656, "step": 9418 }, { "epoch": 0.7127236956603987, "grad_norm": 1.856753945350647, "learning_rate": 3.764377362533556e-06, "loss": 0.4882, "step": 9419 }, { "epoch": 0.7127993643827324, "grad_norm": 2.4401142597198486, "learning_rate": 3.762545623131724e-06, "loss": 0.654, "step": 9420 }, { "epoch": 0.7128750331050661, "grad_norm": 1.9322893619537354, "learning_rate": 3.7607142117162297e-06, "loss": 0.7618, "step": 9421 }, { "epoch": 0.7129507018273996, "grad_norm": 2.7075653076171875, "learning_rate": 3.7588831284017608e-06, "loss": 0.6788, "step": 9422 }, { "epoch": 0.7130263705497333, "grad_norm": 1.963377833366394, "learning_rate": 3.757052373302978e-06, "loss": 0.6509, "step": 9423 }, { "epoch": 0.713102039272067, "grad_norm": 1.9781228303909302, "learning_rate": 3.7552219465345335e-06, "loss": 0.5682, "step": 9424 }, { "epoch": 0.7131777079944005, "grad_norm": 1.9401733875274658, "learning_rate": 3.7533918482110544e-06, "loss": 0.5823, "step": 9425 }, { "epoch": 0.7132533767167342, "grad_norm": 1.7262191772460938, "learning_rate": 3.7515620784471475e-06, "loss": 0.6593, "step": 9426 }, { "epoch": 0.7133290454390677, "grad_norm": 2.252978801727295, "learning_rate": 3.7497326373573983e-06, "loss": 0.7555, "step": 9427 }, { "epoch": 0.7134047141614014, "grad_norm": 2.0870866775512695, "learning_rate": 3.747903525056374e-06, "loss": 0.6717, "step": 9428 }, { "epoch": 0.713480382883735, "grad_norm": 2.1834726333618164, "learning_rate": 3.746074741658621e-06, "loss": 0.6464, "step": 9429 }, { "epoch": 0.7135560516060686, "grad_norm": 2.443652629852295, "learning_rate": 3.744246287278654e-06, "loss": 0.5819, "step": 9430 }, { "epoch": 0.7136317203284023, "grad_norm": 1.8211729526519775, "learning_rate": 3.742418162030987e-06, "loss": 0.6914, "step": 9431 }, { "epoch": 0.7137073890507358, "grad_norm": 3.0865557193756104, "learning_rate": 3.740590366030099e-06, "loss": 0.7489, "step": 9432 }, { "epoch": 0.7137830577730695, "grad_norm": 1.6434742212295532, "learning_rate": 3.738762899390458e-06, "loss": 0.6222, "step": 9433 }, { "epoch": 0.7138587264954032, "grad_norm": 1.8651151657104492, "learning_rate": 3.736935762226497e-06, "loss": 0.5819, "step": 9434 }, { "epoch": 0.7139343952177367, "grad_norm": 2.248553514480591, "learning_rate": 3.7351089546526386e-06, "loss": 0.8123, "step": 9435 }, { "epoch": 0.7140100639400704, "grad_norm": 2.241312265396118, "learning_rate": 3.7332824767832927e-06, "loss": 0.8631, "step": 9436 }, { "epoch": 0.714085732662404, "grad_norm": 2.0975382328033447, "learning_rate": 3.731456328732829e-06, "loss": 0.6104, "step": 9437 }, { "epoch": 0.7141614013847376, "grad_norm": 1.8900827169418335, "learning_rate": 3.729630510615611e-06, "loss": 0.8335, "step": 9438 }, { "epoch": 0.7142370701070713, "grad_norm": 2.051609992980957, "learning_rate": 3.7278050225459774e-06, "loss": 0.6891, "step": 9439 }, { "epoch": 0.7143127388294048, "grad_norm": 2.2935402393341064, "learning_rate": 3.7259798646382476e-06, "loss": 0.5638, "step": 9440 }, { "epoch": 0.7143884075517385, "grad_norm": 2.171649217605591, "learning_rate": 3.724155037006711e-06, "loss": 0.7834, "step": 9441 }, { "epoch": 0.7144640762740722, "grad_norm": 2.2171740531921387, "learning_rate": 3.7223305397656537e-06, "loss": 0.5864, "step": 9442 }, { "epoch": 0.7145397449964057, "grad_norm": 2.0101137161254883, "learning_rate": 3.7205063730293306e-06, "loss": 0.5468, "step": 9443 }, { "epoch": 0.7146154137187394, "grad_norm": 1.868871808052063, "learning_rate": 3.718682536911972e-06, "loss": 0.6371, "step": 9444 }, { "epoch": 0.7146910824410729, "grad_norm": 1.9993494749069214, "learning_rate": 3.716859031527794e-06, "loss": 0.8044, "step": 9445 }, { "epoch": 0.7147667511634066, "grad_norm": 2.727073907852173, "learning_rate": 3.715035856990989e-06, "loss": 0.6426, "step": 9446 }, { "epoch": 0.7148424198857403, "grad_norm": 1.9772788286209106, "learning_rate": 3.7132130134157373e-06, "loss": 0.6816, "step": 9447 }, { "epoch": 0.7149180886080738, "grad_norm": 1.7466607093811035, "learning_rate": 3.7113905009161843e-06, "loss": 0.5874, "step": 9448 }, { "epoch": 0.7149937573304075, "grad_norm": 2.1143674850463867, "learning_rate": 3.7095683196064624e-06, "loss": 0.7799, "step": 9449 }, { "epoch": 0.7150694260527412, "grad_norm": 2.3886454105377197, "learning_rate": 3.707746469600685e-06, "loss": 0.5778, "step": 9450 }, { "epoch": 0.7151450947750747, "grad_norm": 1.7100965976715088, "learning_rate": 3.7059249510129392e-06, "loss": 0.6333, "step": 9451 }, { "epoch": 0.7152207634974084, "grad_norm": 2.1997292041778564, "learning_rate": 3.7041037639572976e-06, "loss": 0.698, "step": 9452 }, { "epoch": 0.7152964322197419, "grad_norm": 2.3083553314208984, "learning_rate": 3.7022829085478066e-06, "loss": 0.6241, "step": 9453 }, { "epoch": 0.7153721009420756, "grad_norm": 2.3041303157806396, "learning_rate": 3.7004623848984977e-06, "loss": 0.6932, "step": 9454 }, { "epoch": 0.7154477696644093, "grad_norm": 1.8793962001800537, "learning_rate": 3.698642193123373e-06, "loss": 0.5792, "step": 9455 }, { "epoch": 0.7155234383867428, "grad_norm": 2.057422161102295, "learning_rate": 3.696822333336416e-06, "loss": 0.6926, "step": 9456 }, { "epoch": 0.7155991071090765, "grad_norm": 1.9454712867736816, "learning_rate": 3.695002805651605e-06, "loss": 0.6216, "step": 9457 }, { "epoch": 0.71567477583141, "grad_norm": 2.082719326019287, "learning_rate": 3.693183610182873e-06, "loss": 0.6695, "step": 9458 }, { "epoch": 0.7157504445537437, "grad_norm": 3.1850838661193848, "learning_rate": 3.691364747044147e-06, "loss": 0.7003, "step": 9459 }, { "epoch": 0.7158261132760774, "grad_norm": 2.776329517364502, "learning_rate": 3.6895462163493316e-06, "loss": 0.6318, "step": 9460 }, { "epoch": 0.7159017819984109, "grad_norm": 1.9540523290634155, "learning_rate": 3.6877280182123084e-06, "loss": 0.7524, "step": 9461 }, { "epoch": 0.7159774507207446, "grad_norm": 2.246755599975586, "learning_rate": 3.6859101527469375e-06, "loss": 0.6448, "step": 9462 }, { "epoch": 0.7160531194430783, "grad_norm": 1.626090168952942, "learning_rate": 3.684092620067062e-06, "loss": 0.6897, "step": 9463 }, { "epoch": 0.7161287881654118, "grad_norm": 2.5425055027008057, "learning_rate": 3.6822754202864992e-06, "loss": 0.7054, "step": 9464 }, { "epoch": 0.7162044568877455, "grad_norm": 2.3118436336517334, "learning_rate": 3.680458553519053e-06, "loss": 0.6208, "step": 9465 }, { "epoch": 0.716280125610079, "grad_norm": 2.1892473697662354, "learning_rate": 3.678642019878491e-06, "loss": 0.6901, "step": 9466 }, { "epoch": 0.7163557943324127, "grad_norm": 1.977307915687561, "learning_rate": 3.676825819478581e-06, "loss": 0.7049, "step": 9467 }, { "epoch": 0.7164314630547464, "grad_norm": 2.380528450012207, "learning_rate": 3.6750099524330575e-06, "loss": 0.6314, "step": 9468 }, { "epoch": 0.7165071317770799, "grad_norm": 2.11545729637146, "learning_rate": 3.6731944188556317e-06, "loss": 0.7005, "step": 9469 }, { "epoch": 0.7165828004994136, "grad_norm": 2.1556060314178467, "learning_rate": 3.6713792188599997e-06, "loss": 0.6029, "step": 9470 }, { "epoch": 0.7166584692217471, "grad_norm": 1.7340325117111206, "learning_rate": 3.669564352559837e-06, "loss": 0.6378, "step": 9471 }, { "epoch": 0.7167341379440808, "grad_norm": 1.669938325881958, "learning_rate": 3.6677498200687934e-06, "loss": 0.709, "step": 9472 }, { "epoch": 0.7168098066664145, "grad_norm": 2.10733699798584, "learning_rate": 3.6659356215005036e-06, "loss": 0.728, "step": 9473 }, { "epoch": 0.716885475388748, "grad_norm": 1.7881940603256226, "learning_rate": 3.6641217569685783e-06, "loss": 0.5225, "step": 9474 }, { "epoch": 0.7169611441110817, "grad_norm": 1.5777084827423096, "learning_rate": 3.6623082265866098e-06, "loss": 0.5302, "step": 9475 }, { "epoch": 0.7170368128334154, "grad_norm": 2.004207134246826, "learning_rate": 3.66049503046816e-06, "loss": 0.8483, "step": 9476 }, { "epoch": 0.7171124815557489, "grad_norm": 1.8468573093414307, "learning_rate": 3.658682168726779e-06, "loss": 0.7662, "step": 9477 }, { "epoch": 0.7171881502780826, "grad_norm": 1.8332854509353638, "learning_rate": 3.6568696414760007e-06, "loss": 0.5098, "step": 9478 }, { "epoch": 0.7172638190004161, "grad_norm": 1.9783474206924438, "learning_rate": 3.6550574488293284e-06, "loss": 0.5637, "step": 9479 }, { "epoch": 0.7173394877227498, "grad_norm": 1.8308309316635132, "learning_rate": 3.6532455909002453e-06, "loss": 0.7741, "step": 9480 }, { "epoch": 0.7174151564450835, "grad_norm": 2.1236815452575684, "learning_rate": 3.6514340678022155e-06, "loss": 0.6946, "step": 9481 }, { "epoch": 0.717490825167417, "grad_norm": 2.0192248821258545, "learning_rate": 3.649622879648684e-06, "loss": 0.6707, "step": 9482 }, { "epoch": 0.7175664938897507, "grad_norm": 2.7706687450408936, "learning_rate": 3.647812026553073e-06, "loss": 0.6171, "step": 9483 }, { "epoch": 0.7176421626120842, "grad_norm": 2.084230661392212, "learning_rate": 3.6460015086287838e-06, "loss": 0.6501, "step": 9484 }, { "epoch": 0.7177178313344179, "grad_norm": 2.3443830013275146, "learning_rate": 3.6441913259891964e-06, "loss": 0.6697, "step": 9485 }, { "epoch": 0.7177935000567516, "grad_norm": 2.1774091720581055, "learning_rate": 3.6423814787476756e-06, "loss": 0.6251, "step": 9486 }, { "epoch": 0.7178691687790851, "grad_norm": 1.484096646308899, "learning_rate": 3.640571967017548e-06, "loss": 0.6349, "step": 9487 }, { "epoch": 0.7179448375014188, "grad_norm": 1.9635212421417236, "learning_rate": 3.638762790912142e-06, "loss": 0.5859, "step": 9488 }, { "epoch": 0.7180205062237525, "grad_norm": 1.664218544960022, "learning_rate": 3.636953950544753e-06, "loss": 0.7502, "step": 9489 }, { "epoch": 0.718096174946086, "grad_norm": 1.7915072441101074, "learning_rate": 3.635145446028651e-06, "loss": 0.6983, "step": 9490 }, { "epoch": 0.7181718436684197, "grad_norm": 2.0278759002685547, "learning_rate": 3.6333372774770926e-06, "loss": 0.6947, "step": 9491 }, { "epoch": 0.7182475123907532, "grad_norm": 1.7070521116256714, "learning_rate": 3.631529445003309e-06, "loss": 0.6624, "step": 9492 }, { "epoch": 0.7183231811130869, "grad_norm": 1.7862852811813354, "learning_rate": 3.629721948720522e-06, "loss": 0.6233, "step": 9493 }, { "epoch": 0.7183988498354206, "grad_norm": 1.671974778175354, "learning_rate": 3.6279147887419135e-06, "loss": 0.7435, "step": 9494 }, { "epoch": 0.7184745185577541, "grad_norm": 2.4894473552703857, "learning_rate": 3.6261079651806546e-06, "loss": 0.6102, "step": 9495 }, { "epoch": 0.7185501872800878, "grad_norm": 2.10819673538208, "learning_rate": 3.624301478149897e-06, "loss": 0.6282, "step": 9496 }, { "epoch": 0.7186258560024213, "grad_norm": 2.059037923812866, "learning_rate": 3.6224953277627686e-06, "loss": 0.6832, "step": 9497 }, { "epoch": 0.718701524724755, "grad_norm": 2.2662110328674316, "learning_rate": 3.620689514132375e-06, "loss": 0.6392, "step": 9498 }, { "epoch": 0.7187771934470887, "grad_norm": 1.8977314233779907, "learning_rate": 3.6188840373718028e-06, "loss": 0.6685, "step": 9499 }, { "epoch": 0.7188528621694222, "grad_norm": 1.8836039304733276, "learning_rate": 3.617078897594121e-06, "loss": 0.7153, "step": 9500 }, { "epoch": 0.7189285308917559, "grad_norm": 1.4642912149429321, "learning_rate": 3.6152740949123648e-06, "loss": 0.5909, "step": 9501 }, { "epoch": 0.7190041996140896, "grad_norm": 1.8887202739715576, "learning_rate": 3.6134696294395585e-06, "loss": 0.726, "step": 9502 }, { "epoch": 0.7190798683364231, "grad_norm": 2.040818214416504, "learning_rate": 3.6116655012887122e-06, "loss": 0.6889, "step": 9503 }, { "epoch": 0.7191555370587568, "grad_norm": 1.7351603507995605, "learning_rate": 3.6098617105727973e-06, "loss": 0.7366, "step": 9504 }, { "epoch": 0.7192312057810903, "grad_norm": 1.4561235904693604, "learning_rate": 3.608058257404776e-06, "loss": 0.6087, "step": 9505 }, { "epoch": 0.719306874503424, "grad_norm": 3.0228309631347656, "learning_rate": 3.606255141897586e-06, "loss": 0.8051, "step": 9506 }, { "epoch": 0.7193825432257577, "grad_norm": 1.730713129043579, "learning_rate": 3.6044523641641448e-06, "loss": 0.685, "step": 9507 }, { "epoch": 0.7194582119480912, "grad_norm": 2.0511322021484375, "learning_rate": 3.6026499243173475e-06, "loss": 0.7083, "step": 9508 }, { "epoch": 0.7195338806704249, "grad_norm": 2.072368860244751, "learning_rate": 3.6008478224700685e-06, "loss": 0.6813, "step": 9509 }, { "epoch": 0.7196095493927585, "grad_norm": 1.9627420902252197, "learning_rate": 3.5990460587351625e-06, "loss": 0.7139, "step": 9510 }, { "epoch": 0.7196852181150921, "grad_norm": 2.194145441055298, "learning_rate": 3.5972446332254646e-06, "loss": 0.5347, "step": 9511 }, { "epoch": 0.7197608868374258, "grad_norm": 1.782472848892212, "learning_rate": 3.595443546053776e-06, "loss": 0.6956, "step": 9512 }, { "epoch": 0.7198365555597593, "grad_norm": 1.7157336473464966, "learning_rate": 3.5936427973328957e-06, "loss": 0.7086, "step": 9513 }, { "epoch": 0.719912224282093, "grad_norm": 2.2964377403259277, "learning_rate": 3.591842387175593e-06, "loss": 0.7328, "step": 9514 }, { "epoch": 0.7199878930044267, "grad_norm": 1.782142996788025, "learning_rate": 3.590042315694609e-06, "loss": 0.7607, "step": 9515 }, { "epoch": 0.7200635617267602, "grad_norm": 2.2905352115631104, "learning_rate": 3.588242583002674e-06, "loss": 0.6966, "step": 9516 }, { "epoch": 0.7201392304490939, "grad_norm": 1.8695130348205566, "learning_rate": 3.5864431892124913e-06, "loss": 0.6522, "step": 9517 }, { "epoch": 0.7202148991714274, "grad_norm": 1.8097631931304932, "learning_rate": 3.5846441344367456e-06, "loss": 0.6304, "step": 9518 }, { "epoch": 0.7202905678937611, "grad_norm": 4.14026403427124, "learning_rate": 3.5828454187881e-06, "loss": 0.5817, "step": 9519 }, { "epoch": 0.7203662366160948, "grad_norm": 1.8867014646530151, "learning_rate": 3.581047042379195e-06, "loss": 0.5896, "step": 9520 }, { "epoch": 0.7204419053384283, "grad_norm": 2.1675286293029785, "learning_rate": 3.579249005322652e-06, "loss": 0.7204, "step": 9521 }, { "epoch": 0.720517574060762, "grad_norm": 2.6543667316436768, "learning_rate": 3.577451307731071e-06, "loss": 0.7004, "step": 9522 }, { "epoch": 0.7205932427830956, "grad_norm": 2.085458517074585, "learning_rate": 3.575653949717022e-06, "loss": 0.645, "step": 9523 }, { "epoch": 0.7206689115054292, "grad_norm": 2.0255820751190186, "learning_rate": 3.5738569313930702e-06, "loss": 0.6807, "step": 9524 }, { "epoch": 0.7207445802277629, "grad_norm": 2.1324872970581055, "learning_rate": 3.572060252871752e-06, "loss": 0.6868, "step": 9525 }, { "epoch": 0.7208202489500964, "grad_norm": 1.980837345123291, "learning_rate": 3.570263914265572e-06, "loss": 0.5308, "step": 9526 }, { "epoch": 0.7208959176724301, "grad_norm": 2.0542397499084473, "learning_rate": 3.5684679156870284e-06, "loss": 0.6264, "step": 9527 }, { "epoch": 0.7209715863947638, "grad_norm": 2.3621749877929688, "learning_rate": 3.5666722572485916e-06, "loss": 0.7101, "step": 9528 }, { "epoch": 0.7210472551170973, "grad_norm": 2.299753189086914, "learning_rate": 3.564876939062711e-06, "loss": 0.6421, "step": 9529 }, { "epoch": 0.721122923839431, "grad_norm": 1.9980701208114624, "learning_rate": 3.5630819612418172e-06, "loss": 0.7892, "step": 9530 }, { "epoch": 0.7211985925617646, "grad_norm": 1.7415344715118408, "learning_rate": 3.5612873238983153e-06, "loss": 0.6191, "step": 9531 }, { "epoch": 0.7212742612840982, "grad_norm": 2.2024734020233154, "learning_rate": 3.5594930271445946e-06, "loss": 0.6404, "step": 9532 }, { "epoch": 0.7213499300064319, "grad_norm": 2.5525379180908203, "learning_rate": 3.557699071093012e-06, "loss": 0.7167, "step": 9533 }, { "epoch": 0.7214255987287654, "grad_norm": 2.1622018814086914, "learning_rate": 3.5559054558559193e-06, "loss": 0.6134, "step": 9534 }, { "epoch": 0.7215012674510991, "grad_norm": 2.7653920650482178, "learning_rate": 3.5541121815456345e-06, "loss": 0.6996, "step": 9535 }, { "epoch": 0.7215769361734327, "grad_norm": 2.7878835201263428, "learning_rate": 3.5523192482744618e-06, "loss": 0.6121, "step": 9536 }, { "epoch": 0.7216526048957663, "grad_norm": 1.8439620733261108, "learning_rate": 3.5505266561546753e-06, "loss": 0.6619, "step": 9537 }, { "epoch": 0.7217282736181, "grad_norm": 2.6026670932769775, "learning_rate": 3.5487344052985323e-06, "loss": 0.7525, "step": 9538 }, { "epoch": 0.7218039423404335, "grad_norm": 2.118645191192627, "learning_rate": 3.5469424958182783e-06, "loss": 0.7478, "step": 9539 }, { "epoch": 0.7218796110627672, "grad_norm": 1.8370145559310913, "learning_rate": 3.5451509278261196e-06, "loss": 0.7044, "step": 9540 }, { "epoch": 0.7219552797851009, "grad_norm": 2.0463836193084717, "learning_rate": 3.543359701434254e-06, "loss": 0.6612, "step": 9541 }, { "epoch": 0.7220309485074344, "grad_norm": 1.8631353378295898, "learning_rate": 3.5415688167548513e-06, "loss": 0.7544, "step": 9542 }, { "epoch": 0.7221066172297681, "grad_norm": 2.5322537422180176, "learning_rate": 3.5397782739000647e-06, "loss": 0.7171, "step": 9543 }, { "epoch": 0.7221822859521017, "grad_norm": 1.8810664415359497, "learning_rate": 3.5379880729820227e-06, "loss": 0.5414, "step": 9544 }, { "epoch": 0.7222579546744353, "grad_norm": 2.0911777019500732, "learning_rate": 3.536198214112834e-06, "loss": 0.6315, "step": 9545 }, { "epoch": 0.722333623396769, "grad_norm": 2.1385138034820557, "learning_rate": 3.534408697404588e-06, "loss": 0.7681, "step": 9546 }, { "epoch": 0.7224092921191025, "grad_norm": 2.2795066833496094, "learning_rate": 3.5326195229693447e-06, "loss": 0.7531, "step": 9547 }, { "epoch": 0.7224849608414362, "grad_norm": 2.068978786468506, "learning_rate": 3.5308306909191467e-06, "loss": 0.6324, "step": 9548 }, { "epoch": 0.7225606295637698, "grad_norm": 2.281078577041626, "learning_rate": 3.5290422013660234e-06, "loss": 0.751, "step": 9549 }, { "epoch": 0.7226362982861034, "grad_norm": 2.842747449874878, "learning_rate": 3.5272540544219766e-06, "loss": 0.5912, "step": 9550 }, { "epoch": 0.7227119670084371, "grad_norm": 1.6664164066314697, "learning_rate": 3.5254662501989788e-06, "loss": 0.5474, "step": 9551 }, { "epoch": 0.7227876357307707, "grad_norm": 2.199381113052368, "learning_rate": 3.5236787888089905e-06, "loss": 0.6749, "step": 9552 }, { "epoch": 0.7228633044531043, "grad_norm": 1.8498841524124146, "learning_rate": 3.5218916703639495e-06, "loss": 0.6779, "step": 9553 }, { "epoch": 0.722938973175438, "grad_norm": 2.030409336090088, "learning_rate": 3.5201048949757702e-06, "loss": 0.7235, "step": 9554 }, { "epoch": 0.7230146418977715, "grad_norm": 2.3380861282348633, "learning_rate": 3.5183184627563463e-06, "loss": 0.6836, "step": 9555 }, { "epoch": 0.7230903106201052, "grad_norm": 2.5878119468688965, "learning_rate": 3.5165323738175504e-06, "loss": 0.6228, "step": 9556 }, { "epoch": 0.7231659793424388, "grad_norm": 2.390239953994751, "learning_rate": 3.514746628271236e-06, "loss": 0.4656, "step": 9557 }, { "epoch": 0.7232416480647724, "grad_norm": 2.0692977905273438, "learning_rate": 3.512961226229227e-06, "loss": 0.6703, "step": 9558 }, { "epoch": 0.7233173167871061, "grad_norm": 2.3725948333740234, "learning_rate": 3.511176167803329e-06, "loss": 0.665, "step": 9559 }, { "epoch": 0.7233929855094396, "grad_norm": 2.082200288772583, "learning_rate": 3.509391453105339e-06, "loss": 0.6218, "step": 9560 }, { "epoch": 0.7234686542317733, "grad_norm": 1.8738415241241455, "learning_rate": 3.5076070822470115e-06, "loss": 0.7547, "step": 9561 }, { "epoch": 0.7235443229541069, "grad_norm": 2.0773375034332275, "learning_rate": 3.5058230553400937e-06, "loss": 0.6218, "step": 9562 }, { "epoch": 0.7236199916764405, "grad_norm": 2.684323310852051, "learning_rate": 3.504039372496306e-06, "loss": 0.6819, "step": 9563 }, { "epoch": 0.7236956603987742, "grad_norm": 2.242973804473877, "learning_rate": 3.502256033827349e-06, "loss": 0.7311, "step": 9564 }, { "epoch": 0.7237713291211078, "grad_norm": 1.914873480796814, "learning_rate": 3.5004730394449014e-06, "loss": 0.6217, "step": 9565 }, { "epoch": 0.7238469978434414, "grad_norm": 3.037616729736328, "learning_rate": 3.498690389460619e-06, "loss": 0.7967, "step": 9566 }, { "epoch": 0.7239226665657751, "grad_norm": 1.9221965074539185, "learning_rate": 3.4969080839861388e-06, "loss": 0.6185, "step": 9567 }, { "epoch": 0.7239983352881086, "grad_norm": 1.7986969947814941, "learning_rate": 3.495126123133075e-06, "loss": 0.5751, "step": 9568 }, { "epoch": 0.7240740040104423, "grad_norm": 2.0456697940826416, "learning_rate": 3.4933445070130137e-06, "loss": 0.7111, "step": 9569 }, { "epoch": 0.7241496727327759, "grad_norm": 2.568084716796875, "learning_rate": 3.4915632357375322e-06, "loss": 0.7023, "step": 9570 }, { "epoch": 0.7242253414551095, "grad_norm": 1.8491854667663574, "learning_rate": 3.489782309418181e-06, "loss": 0.5003, "step": 9571 }, { "epoch": 0.7243010101774432, "grad_norm": 2.522088050842285, "learning_rate": 3.4880017281664807e-06, "loss": 0.6625, "step": 9572 }, { "epoch": 0.7243766788997767, "grad_norm": 2.015510082244873, "learning_rate": 3.4862214920939396e-06, "loss": 0.6182, "step": 9573 }, { "epoch": 0.7244523476221104, "grad_norm": 1.839280366897583, "learning_rate": 3.4844416013120436e-06, "loss": 0.6601, "step": 9574 }, { "epoch": 0.724528016344444, "grad_norm": 1.9909266233444214, "learning_rate": 3.4826620559322523e-06, "loss": 0.7079, "step": 9575 }, { "epoch": 0.7246036850667776, "grad_norm": 2.2563157081604004, "learning_rate": 3.480882856066009e-06, "loss": 0.5589, "step": 9576 }, { "epoch": 0.7246793537891113, "grad_norm": 2.3766355514526367, "learning_rate": 3.4791040018247334e-06, "loss": 0.6712, "step": 9577 }, { "epoch": 0.7247550225114449, "grad_norm": 2.32324481010437, "learning_rate": 3.477325493319824e-06, "loss": 0.7717, "step": 9578 }, { "epoch": 0.7248306912337785, "grad_norm": 1.881474256515503, "learning_rate": 3.4755473306626482e-06, "loss": 0.6536, "step": 9579 }, { "epoch": 0.7249063599561122, "grad_norm": 2.3308231830596924, "learning_rate": 3.4737695139645697e-06, "loss": 0.6384, "step": 9580 }, { "epoch": 0.7249820286784457, "grad_norm": 2.173731803894043, "learning_rate": 3.471992043336919e-06, "loss": 0.6587, "step": 9581 }, { "epoch": 0.7250576974007794, "grad_norm": 2.35199236869812, "learning_rate": 3.4702149188910087e-06, "loss": 0.6212, "step": 9582 }, { "epoch": 0.725133366123113, "grad_norm": 2.924612522125244, "learning_rate": 3.468438140738123e-06, "loss": 0.7118, "step": 9583 }, { "epoch": 0.7252090348454466, "grad_norm": 2.093873977661133, "learning_rate": 3.46666170898953e-06, "loss": 0.6152, "step": 9584 }, { "epoch": 0.7252847035677803, "grad_norm": 2.7379560470581055, "learning_rate": 3.4648856237564827e-06, "loss": 0.7422, "step": 9585 }, { "epoch": 0.7253603722901139, "grad_norm": 2.0272998809814453, "learning_rate": 3.463109885150198e-06, "loss": 0.6245, "step": 9586 }, { "epoch": 0.7254360410124475, "grad_norm": 2.0299673080444336, "learning_rate": 3.4613344932818797e-06, "loss": 0.7292, "step": 9587 }, { "epoch": 0.7255117097347811, "grad_norm": 2.6118695735931396, "learning_rate": 3.459559448262711e-06, "loss": 0.6669, "step": 9588 }, { "epoch": 0.7255873784571147, "grad_norm": 1.9721378087997437, "learning_rate": 3.457784750203849e-06, "loss": 0.6908, "step": 9589 }, { "epoch": 0.7256630471794484, "grad_norm": 2.455974817276001, "learning_rate": 3.456010399216431e-06, "loss": 0.8746, "step": 9590 }, { "epoch": 0.725738715901782, "grad_norm": 1.8864761590957642, "learning_rate": 3.454236395411574e-06, "loss": 0.6697, "step": 9591 }, { "epoch": 0.7258143846241156, "grad_norm": 1.9574358463287354, "learning_rate": 3.4524627389003745e-06, "loss": 0.7325, "step": 9592 }, { "epoch": 0.7258900533464493, "grad_norm": 1.9608203172683716, "learning_rate": 3.450689429793897e-06, "loss": 0.7059, "step": 9593 }, { "epoch": 0.7259657220687828, "grad_norm": 2.011075496673584, "learning_rate": 3.4489164682031966e-06, "loss": 0.6755, "step": 9594 }, { "epoch": 0.7260413907911165, "grad_norm": 1.925155520439148, "learning_rate": 3.4471438542392987e-06, "loss": 0.8509, "step": 9595 }, { "epoch": 0.7261170595134501, "grad_norm": 1.9459024667739868, "learning_rate": 3.4453715880132183e-06, "loss": 0.6895, "step": 9596 }, { "epoch": 0.7261927282357837, "grad_norm": 1.7830241918563843, "learning_rate": 3.4435996696359328e-06, "loss": 0.6713, "step": 9597 }, { "epoch": 0.7262683969581174, "grad_norm": 2.3200533390045166, "learning_rate": 3.441828099218406e-06, "loss": 0.7796, "step": 9598 }, { "epoch": 0.726344065680451, "grad_norm": 2.053757429122925, "learning_rate": 3.4400568768715827e-06, "loss": 0.6481, "step": 9599 }, { "epoch": 0.7264197344027846, "grad_norm": 2.283618688583374, "learning_rate": 3.4382860027063798e-06, "loss": 0.7214, "step": 9600 }, { "epoch": 0.7264954031251182, "grad_norm": 2.036465644836426, "learning_rate": 3.436515476833696e-06, "loss": 0.6602, "step": 9601 }, { "epoch": 0.7265710718474518, "grad_norm": 1.9989351034164429, "learning_rate": 3.434745299364408e-06, "loss": 0.6376, "step": 9602 }, { "epoch": 0.7266467405697855, "grad_norm": 1.9617687463760376, "learning_rate": 3.4329754704093725e-06, "loss": 0.5082, "step": 9603 }, { "epoch": 0.7267224092921191, "grad_norm": 2.9853837490081787, "learning_rate": 3.431205990079416e-06, "loss": 0.7962, "step": 9604 }, { "epoch": 0.7267980780144527, "grad_norm": 2.0819427967071533, "learning_rate": 3.4294368584853484e-06, "loss": 0.6982, "step": 9605 }, { "epoch": 0.7268737467367864, "grad_norm": 2.134868621826172, "learning_rate": 3.4276680757379687e-06, "loss": 0.6123, "step": 9606 }, { "epoch": 0.72694941545912, "grad_norm": 2.1825947761535645, "learning_rate": 3.425899641948035e-06, "loss": 0.7086, "step": 9607 }, { "epoch": 0.7270250841814536, "grad_norm": 2.3707220554351807, "learning_rate": 3.4241315572262933e-06, "loss": 0.8065, "step": 9608 }, { "epoch": 0.7271007529037872, "grad_norm": 1.2709568738937378, "learning_rate": 3.4223638216834683e-06, "loss": 0.8368, "step": 9609 }, { "epoch": 0.7271764216261208, "grad_norm": 2.1164627075195312, "learning_rate": 3.4205964354302608e-06, "loss": 0.7194, "step": 9610 }, { "epoch": 0.7272520903484545, "grad_norm": 1.7704885005950928, "learning_rate": 3.4188293985773507e-06, "loss": 0.6807, "step": 9611 }, { "epoch": 0.7273277590707881, "grad_norm": 1.7768155336380005, "learning_rate": 3.417062711235396e-06, "loss": 0.6752, "step": 9612 }, { "epoch": 0.7274034277931217, "grad_norm": 1.9417698383331299, "learning_rate": 3.415296373515031e-06, "loss": 0.7535, "step": 9613 }, { "epoch": 0.7274790965154553, "grad_norm": 2.049741506576538, "learning_rate": 3.413530385526874e-06, "loss": 0.7368, "step": 9614 }, { "epoch": 0.727554765237789, "grad_norm": 1.6550544500350952, "learning_rate": 3.411764747381506e-06, "loss": 0.6998, "step": 9615 }, { "epoch": 0.7276304339601226, "grad_norm": 1.9627418518066406, "learning_rate": 3.409999459189508e-06, "loss": 0.6864, "step": 9616 }, { "epoch": 0.7277061026824562, "grad_norm": 2.080371379852295, "learning_rate": 3.4082345210614273e-06, "loss": 0.6129, "step": 9617 }, { "epoch": 0.7277817714047898, "grad_norm": 1.9414567947387695, "learning_rate": 3.406469933107783e-06, "loss": 0.6578, "step": 9618 }, { "epoch": 0.7278574401271235, "grad_norm": 2.097715139389038, "learning_rate": 3.404705695439083e-06, "loss": 0.6798, "step": 9619 }, { "epoch": 0.7279331088494571, "grad_norm": 2.0292246341705322, "learning_rate": 3.40294180816581e-06, "loss": 0.6346, "step": 9620 }, { "epoch": 0.7280087775717907, "grad_norm": 2.0286881923675537, "learning_rate": 3.401178271398425e-06, "loss": 0.7645, "step": 9621 }, { "epoch": 0.7280844462941243, "grad_norm": 2.190192461013794, "learning_rate": 3.3994150852473645e-06, "loss": 0.6803, "step": 9622 }, { "epoch": 0.7281601150164579, "grad_norm": 2.6516058444976807, "learning_rate": 3.3976522498230454e-06, "loss": 0.9133, "step": 9623 }, { "epoch": 0.7282357837387916, "grad_norm": 1.7994333505630493, "learning_rate": 3.395889765235864e-06, "loss": 0.7207, "step": 9624 }, { "epoch": 0.7283114524611252, "grad_norm": 2.710233211517334, "learning_rate": 3.3941276315961903e-06, "loss": 0.6214, "step": 9625 }, { "epoch": 0.7283871211834588, "grad_norm": 2.217609167098999, "learning_rate": 3.3923658490143767e-06, "loss": 0.6707, "step": 9626 }, { "epoch": 0.7284627899057925, "grad_norm": 2.534865379333496, "learning_rate": 3.3906044176007505e-06, "loss": 0.8433, "step": 9627 }, { "epoch": 0.728538458628126, "grad_norm": 2.2182860374450684, "learning_rate": 3.3888433374656217e-06, "loss": 0.7009, "step": 9628 }, { "epoch": 0.7286141273504597, "grad_norm": 2.057269811630249, "learning_rate": 3.387082608719268e-06, "loss": 0.7962, "step": 9629 }, { "epoch": 0.7286897960727933, "grad_norm": 2.081799268722534, "learning_rate": 3.385322231471954e-06, "loss": 0.6249, "step": 9630 }, { "epoch": 0.7287654647951269, "grad_norm": 2.1988329887390137, "learning_rate": 3.383562205833927e-06, "loss": 0.8234, "step": 9631 }, { "epoch": 0.7288411335174606, "grad_norm": 2.533674716949463, "learning_rate": 3.381802531915398e-06, "loss": 0.6977, "step": 9632 }, { "epoch": 0.7289168022397942, "grad_norm": 1.9693000316619873, "learning_rate": 3.380043209826566e-06, "loss": 0.5226, "step": 9633 }, { "epoch": 0.7289924709621278, "grad_norm": 2.4341700077056885, "learning_rate": 3.3782842396776048e-06, "loss": 0.6874, "step": 9634 }, { "epoch": 0.7290681396844614, "grad_norm": 2.3296284675598145, "learning_rate": 3.3765256215786707e-06, "loss": 0.4436, "step": 9635 }, { "epoch": 0.729143808406795, "grad_norm": 1.8959673643112183, "learning_rate": 3.374767355639885e-06, "loss": 0.6406, "step": 9636 }, { "epoch": 0.7292194771291287, "grad_norm": 2.5320215225219727, "learning_rate": 3.373009441971364e-06, "loss": 0.7049, "step": 9637 }, { "epoch": 0.7292951458514623, "grad_norm": 2.963879346847534, "learning_rate": 3.3712518806831915e-06, "loss": 0.6362, "step": 9638 }, { "epoch": 0.7293708145737959, "grad_norm": 1.864016056060791, "learning_rate": 3.3694946718854357e-06, "loss": 0.5834, "step": 9639 }, { "epoch": 0.7294464832961296, "grad_norm": 2.2070538997650146, "learning_rate": 3.3677378156881313e-06, "loss": 0.64, "step": 9640 }, { "epoch": 0.7295221520184632, "grad_norm": 2.1202077865600586, "learning_rate": 3.3659813122012987e-06, "loss": 0.619, "step": 9641 }, { "epoch": 0.7295978207407968, "grad_norm": 1.996546983718872, "learning_rate": 3.364225161534945e-06, "loss": 0.5529, "step": 9642 }, { "epoch": 0.7296734894631304, "grad_norm": 1.7262141704559326, "learning_rate": 3.362469363799037e-06, "loss": 0.6483, "step": 9643 }, { "epoch": 0.729749158185464, "grad_norm": 2.2432174682617188, "learning_rate": 3.360713919103532e-06, "loss": 0.6979, "step": 9644 }, { "epoch": 0.7298248269077977, "grad_norm": 2.1169657707214355, "learning_rate": 3.35895882755836e-06, "loss": 0.7512, "step": 9645 }, { "epoch": 0.7299004956301313, "grad_norm": 2.215263843536377, "learning_rate": 3.357204089273432e-06, "loss": 0.6911, "step": 9646 }, { "epoch": 0.7299761643524649, "grad_norm": 2.0325393676757812, "learning_rate": 3.3554497043586354e-06, "loss": 0.6089, "step": 9647 }, { "epoch": 0.7300518330747985, "grad_norm": 2.492884874343872, "learning_rate": 3.353695672923835e-06, "loss": 0.6432, "step": 9648 }, { "epoch": 0.7301275017971322, "grad_norm": 1.838275671005249, "learning_rate": 3.351941995078877e-06, "loss": 0.6128, "step": 9649 }, { "epoch": 0.7302031705194658, "grad_norm": 2.5768980979919434, "learning_rate": 3.3501886709335755e-06, "loss": 0.542, "step": 9650 }, { "epoch": 0.7302788392417994, "grad_norm": 2.3133151531219482, "learning_rate": 3.3484357005977307e-06, "loss": 0.6435, "step": 9651 }, { "epoch": 0.730354507964133, "grad_norm": 2.1591763496398926, "learning_rate": 3.346683084181125e-06, "loss": 0.8351, "step": 9652 }, { "epoch": 0.7304301766864667, "grad_norm": 2.5849671363830566, "learning_rate": 3.344930821793512e-06, "loss": 0.5672, "step": 9653 }, { "epoch": 0.7305058454088003, "grad_norm": 2.141481876373291, "learning_rate": 3.343178913544619e-06, "loss": 0.6263, "step": 9654 }, { "epoch": 0.7305815141311339, "grad_norm": 2.7578744888305664, "learning_rate": 3.341427359544158e-06, "loss": 0.7468, "step": 9655 }, { "epoch": 0.7306571828534675, "grad_norm": 1.959076166152954, "learning_rate": 3.339676159901819e-06, "loss": 0.582, "step": 9656 }, { "epoch": 0.7307328515758011, "grad_norm": 2.0008225440979004, "learning_rate": 3.3379253147272654e-06, "loss": 0.6107, "step": 9657 }, { "epoch": 0.7308085202981348, "grad_norm": 2.1886539459228516, "learning_rate": 3.336174824130143e-06, "loss": 0.7106, "step": 9658 }, { "epoch": 0.7308841890204684, "grad_norm": 2.4869959354400635, "learning_rate": 3.334424688220071e-06, "loss": 0.7828, "step": 9659 }, { "epoch": 0.730959857742802, "grad_norm": 3.1968321800231934, "learning_rate": 3.3326749071066546e-06, "loss": 0.6548, "step": 9660 }, { "epoch": 0.7310355264651356, "grad_norm": 2.0156288146972656, "learning_rate": 3.330925480899458e-06, "loss": 0.7084, "step": 9661 }, { "epoch": 0.7311111951874693, "grad_norm": 2.093147039413452, "learning_rate": 3.329176409708048e-06, "loss": 0.716, "step": 9662 }, { "epoch": 0.7311868639098029, "grad_norm": 1.8537280559539795, "learning_rate": 3.3274276936419558e-06, "loss": 0.7604, "step": 9663 }, { "epoch": 0.7312625326321365, "grad_norm": 1.8829224109649658, "learning_rate": 3.325679332810685e-06, "loss": 0.5923, "step": 9664 }, { "epoch": 0.7313382013544701, "grad_norm": 2.2655227184295654, "learning_rate": 3.323931327323727e-06, "loss": 0.6448, "step": 9665 }, { "epoch": 0.7314138700768038, "grad_norm": 2.4388043880462646, "learning_rate": 3.322183677290546e-06, "loss": 0.6538, "step": 9666 }, { "epoch": 0.7314895387991374, "grad_norm": 2.1966893672943115, "learning_rate": 3.3204363828205933e-06, "loss": 0.609, "step": 9667 }, { "epoch": 0.731565207521471, "grad_norm": 1.9812705516815186, "learning_rate": 3.318689444023281e-06, "loss": 0.6558, "step": 9668 }, { "epoch": 0.7316408762438046, "grad_norm": 2.1352076530456543, "learning_rate": 3.3169428610080107e-06, "loss": 0.6868, "step": 9669 }, { "epoch": 0.7317165449661382, "grad_norm": 1.9275273084640503, "learning_rate": 3.315196633884161e-06, "loss": 0.663, "step": 9670 }, { "epoch": 0.7317922136884719, "grad_norm": 2.548799991607666, "learning_rate": 3.3134507627610867e-06, "loss": 0.743, "step": 9671 }, { "epoch": 0.7318678824108055, "grad_norm": 1.8957780599594116, "learning_rate": 3.311705247748113e-06, "loss": 0.5394, "step": 9672 }, { "epoch": 0.7319435511331391, "grad_norm": 1.761271595954895, "learning_rate": 3.3099600889545576e-06, "loss": 0.7391, "step": 9673 }, { "epoch": 0.7320192198554727, "grad_norm": 1.8656989336013794, "learning_rate": 3.308215286489708e-06, "loss": 0.5925, "step": 9674 }, { "epoch": 0.7320948885778064, "grad_norm": 2.2291691303253174, "learning_rate": 3.306470840462824e-06, "loss": 0.6399, "step": 9675 }, { "epoch": 0.73217055730014, "grad_norm": 2.45021390914917, "learning_rate": 3.304726750983151e-06, "loss": 0.7225, "step": 9676 }, { "epoch": 0.7322462260224736, "grad_norm": 1.7993860244750977, "learning_rate": 3.30298301815991e-06, "loss": 0.6022, "step": 9677 }, { "epoch": 0.7323218947448072, "grad_norm": 2.358670234680176, "learning_rate": 3.301239642102298e-06, "loss": 0.691, "step": 9678 }, { "epoch": 0.7323975634671409, "grad_norm": 4.461367130279541, "learning_rate": 3.2994966229194917e-06, "loss": 0.6848, "step": 9679 }, { "epoch": 0.7324732321894745, "grad_norm": 2.1369030475616455, "learning_rate": 3.297753960720645e-06, "loss": 0.7066, "step": 9680 }, { "epoch": 0.7325489009118081, "grad_norm": 2.010079860687256, "learning_rate": 3.296011655614891e-06, "loss": 0.7084, "step": 9681 }, { "epoch": 0.7326245696341417, "grad_norm": 2.3091893196105957, "learning_rate": 3.2942697077113305e-06, "loss": 0.7503, "step": 9682 }, { "epoch": 0.7327002383564754, "grad_norm": 2.3691303730010986, "learning_rate": 3.292528117119058e-06, "loss": 0.6997, "step": 9683 }, { "epoch": 0.732775907078809, "grad_norm": 3.361497402191162, "learning_rate": 3.2907868839471364e-06, "loss": 0.7454, "step": 9684 }, { "epoch": 0.7328515758011426, "grad_norm": 2.057619571685791, "learning_rate": 3.2890460083046072e-06, "loss": 0.7054, "step": 9685 }, { "epoch": 0.7329272445234762, "grad_norm": 2.1395699977874756, "learning_rate": 3.2873054903004863e-06, "loss": 0.5957, "step": 9686 }, { "epoch": 0.7330029132458098, "grad_norm": 1.947824478149414, "learning_rate": 3.28556533004377e-06, "loss": 0.5955, "step": 9687 }, { "epoch": 0.7330785819681435, "grad_norm": 2.4234938621520996, "learning_rate": 3.283825527643441e-06, "loss": 0.7185, "step": 9688 }, { "epoch": 0.7331542506904771, "grad_norm": 4.633688926696777, "learning_rate": 3.282086083208443e-06, "loss": 0.7757, "step": 9689 }, { "epoch": 0.7332299194128107, "grad_norm": 2.446262836456299, "learning_rate": 3.280346996847709e-06, "loss": 0.655, "step": 9690 }, { "epoch": 0.7333055881351443, "grad_norm": 1.894422173500061, "learning_rate": 3.2786082686701447e-06, "loss": 0.7366, "step": 9691 }, { "epoch": 0.733381256857478, "grad_norm": 2.1454946994781494, "learning_rate": 3.2768698987846356e-06, "loss": 0.5931, "step": 9692 }, { "epoch": 0.7334569255798116, "grad_norm": 2.107937812805176, "learning_rate": 3.2751318873000444e-06, "loss": 0.5901, "step": 9693 }, { "epoch": 0.7335325943021452, "grad_norm": 1.8799843788146973, "learning_rate": 3.2733942343252114e-06, "loss": 0.7529, "step": 9694 }, { "epoch": 0.7336082630244788, "grad_norm": 2.41536808013916, "learning_rate": 3.271656939968957e-06, "loss": 0.7178, "step": 9695 }, { "epoch": 0.7336839317468125, "grad_norm": 2.3177335262298584, "learning_rate": 3.2699200043400684e-06, "loss": 0.6441, "step": 9696 }, { "epoch": 0.7337596004691461, "grad_norm": 1.8029228448867798, "learning_rate": 3.2681834275473205e-06, "loss": 0.6193, "step": 9697 }, { "epoch": 0.7338352691914797, "grad_norm": 1.7246633768081665, "learning_rate": 3.2664472096994678e-06, "loss": 0.5477, "step": 9698 }, { "epoch": 0.7339109379138133, "grad_norm": 2.265120029449463, "learning_rate": 3.2647113509052387e-06, "loss": 0.7033, "step": 9699 }, { "epoch": 0.7339866066361469, "grad_norm": 2.030282974243164, "learning_rate": 3.2629758512733326e-06, "loss": 0.6291, "step": 9700 }, { "epoch": 0.7340622753584806, "grad_norm": 2.392416477203369, "learning_rate": 3.261240710912433e-06, "loss": 0.7904, "step": 9701 }, { "epoch": 0.7341379440808142, "grad_norm": 2.1410059928894043, "learning_rate": 3.2595059299312027e-06, "loss": 0.5866, "step": 9702 }, { "epoch": 0.7342136128031478, "grad_norm": 2.2164275646209717, "learning_rate": 3.2577715084382777e-06, "loss": 0.7813, "step": 9703 }, { "epoch": 0.7342892815254815, "grad_norm": 2.500359535217285, "learning_rate": 3.256037446542273e-06, "loss": 0.7013, "step": 9704 }, { "epoch": 0.7343649502478151, "grad_norm": 2.0464277267456055, "learning_rate": 3.2543037443517825e-06, "loss": 0.6824, "step": 9705 }, { "epoch": 0.7344406189701487, "grad_norm": 1.9826641082763672, "learning_rate": 3.252570401975377e-06, "loss": 0.6748, "step": 9706 }, { "epoch": 0.7345162876924823, "grad_norm": 2.0672097206115723, "learning_rate": 3.250837419521598e-06, "loss": 0.5698, "step": 9707 }, { "epoch": 0.7345919564148159, "grad_norm": 1.9913432598114014, "learning_rate": 3.2491047970989765e-06, "loss": 0.7454, "step": 9708 }, { "epoch": 0.7346676251371496, "grad_norm": 1.714163064956665, "learning_rate": 3.2473725348160173e-06, "loss": 0.5349, "step": 9709 }, { "epoch": 0.7347432938594832, "grad_norm": 1.9784096479415894, "learning_rate": 3.2456406327811926e-06, "loss": 0.6531, "step": 9710 }, { "epoch": 0.7348189625818168, "grad_norm": 2.2186923027038574, "learning_rate": 3.243909091102964e-06, "loss": 0.695, "step": 9711 }, { "epoch": 0.7348946313041504, "grad_norm": 2.7318224906921387, "learning_rate": 3.2421779098897644e-06, "loss": 0.7293, "step": 9712 }, { "epoch": 0.734970300026484, "grad_norm": 2.105350971221924, "learning_rate": 3.240447089250008e-06, "loss": 0.6585, "step": 9713 }, { "epoch": 0.7350459687488177, "grad_norm": 2.2211616039276123, "learning_rate": 3.2387166292920837e-06, "loss": 0.7232, "step": 9714 }, { "epoch": 0.7351216374711513, "grad_norm": 2.1771297454833984, "learning_rate": 3.2369865301243573e-06, "loss": 0.5941, "step": 9715 }, { "epoch": 0.7351973061934849, "grad_norm": 30.021831512451172, "learning_rate": 3.2352567918551753e-06, "loss": 0.7043, "step": 9716 }, { "epoch": 0.7352729749158186, "grad_norm": 1.4489507675170898, "learning_rate": 3.233527414592861e-06, "loss": 0.7254, "step": 9717 }, { "epoch": 0.7353486436381522, "grad_norm": 2.0714879035949707, "learning_rate": 3.231798398445705e-06, "loss": 0.7017, "step": 9718 }, { "epoch": 0.7354243123604858, "grad_norm": 2.2503437995910645, "learning_rate": 3.230069743521993e-06, "loss": 0.7195, "step": 9719 }, { "epoch": 0.7354999810828194, "grad_norm": 1.817765235900879, "learning_rate": 3.2283414499299786e-06, "loss": 0.6089, "step": 9720 }, { "epoch": 0.735575649805153, "grad_norm": 1.9589232206344604, "learning_rate": 3.2266135177778883e-06, "loss": 0.7062, "step": 9721 }, { "epoch": 0.7356513185274867, "grad_norm": 2.7293508052825928, "learning_rate": 3.224885947173932e-06, "loss": 0.7512, "step": 9722 }, { "epoch": 0.7357269872498203, "grad_norm": 2.42242169380188, "learning_rate": 3.223158738226297e-06, "loss": 0.8047, "step": 9723 }, { "epoch": 0.7358026559721539, "grad_norm": 2.5627288818359375, "learning_rate": 3.221431891043146e-06, "loss": 0.7915, "step": 9724 }, { "epoch": 0.7358783246944876, "grad_norm": 1.7673484086990356, "learning_rate": 3.2197054057326203e-06, "loss": 0.7325, "step": 9725 }, { "epoch": 0.7359539934168211, "grad_norm": 2.0500118732452393, "learning_rate": 3.217979282402839e-06, "loss": 0.6227, "step": 9726 }, { "epoch": 0.7360296621391548, "grad_norm": 2.1804354190826416, "learning_rate": 3.216253521161894e-06, "loss": 0.6206, "step": 9727 }, { "epoch": 0.7361053308614884, "grad_norm": 1.8224960565567017, "learning_rate": 3.214528122117862e-06, "loss": 0.7576, "step": 9728 }, { "epoch": 0.736180999583822, "grad_norm": 2.191704750061035, "learning_rate": 3.212803085378792e-06, "loss": 0.6808, "step": 9729 }, { "epoch": 0.7362566683061557, "grad_norm": 1.6620792150497437, "learning_rate": 3.2110784110527098e-06, "loss": 0.771, "step": 9730 }, { "epoch": 0.7363323370284893, "grad_norm": 2.0463523864746094, "learning_rate": 3.2093540992476243e-06, "loss": 0.5801, "step": 9731 }, { "epoch": 0.7364080057508229, "grad_norm": 1.8782941102981567, "learning_rate": 3.207630150071512e-06, "loss": 0.778, "step": 9732 }, { "epoch": 0.7364836744731565, "grad_norm": 2.127807378768921, "learning_rate": 3.205906563632331e-06, "loss": 0.7317, "step": 9733 }, { "epoch": 0.7365593431954901, "grad_norm": 2.123108386993408, "learning_rate": 3.2041833400380274e-06, "loss": 0.5925, "step": 9734 }, { "epoch": 0.7366350119178238, "grad_norm": 1.8136372566223145, "learning_rate": 3.202460479396505e-06, "loss": 0.7108, "step": 9735 }, { "epoch": 0.7367106806401574, "grad_norm": 2.5573692321777344, "learning_rate": 3.200737981815661e-06, "loss": 0.8463, "step": 9736 }, { "epoch": 0.736786349362491, "grad_norm": 2.7275161743164062, "learning_rate": 3.19901584740336e-06, "loss": 0.6705, "step": 9737 }, { "epoch": 0.7368620180848247, "grad_norm": 1.9936103820800781, "learning_rate": 3.1972940762674494e-06, "loss": 0.6206, "step": 9738 }, { "epoch": 0.7369376868071582, "grad_norm": 1.7031792402267456, "learning_rate": 3.195572668515753e-06, "loss": 0.6619, "step": 9739 }, { "epoch": 0.7370133555294919, "grad_norm": 1.8480082750320435, "learning_rate": 3.193851624256069e-06, "loss": 0.6239, "step": 9740 }, { "epoch": 0.7370890242518255, "grad_norm": 1.9207595586776733, "learning_rate": 3.192130943596176e-06, "loss": 0.8244, "step": 9741 }, { "epoch": 0.7371646929741591, "grad_norm": 1.9544023275375366, "learning_rate": 3.190410626643831e-06, "loss": 0.6302, "step": 9742 }, { "epoch": 0.7372403616964928, "grad_norm": 2.287046194076538, "learning_rate": 3.188690673506757e-06, "loss": 0.5985, "step": 9743 }, { "epoch": 0.7373160304188264, "grad_norm": 2.2530996799468994, "learning_rate": 3.186971084292673e-06, "loss": 0.7136, "step": 9744 }, { "epoch": 0.73739169914116, "grad_norm": 2.2167491912841797, "learning_rate": 3.1852518591092636e-06, "loss": 0.6572, "step": 9745 }, { "epoch": 0.7374673678634937, "grad_norm": 1.5545406341552734, "learning_rate": 3.1835329980641866e-06, "loss": 0.7841, "step": 9746 }, { "epoch": 0.7375430365858272, "grad_norm": 2.0609633922576904, "learning_rate": 3.181814501265086e-06, "loss": 0.6042, "step": 9747 }, { "epoch": 0.7376187053081609, "grad_norm": 2.269827365875244, "learning_rate": 3.18009636881958e-06, "loss": 0.8208, "step": 9748 }, { "epoch": 0.7376943740304945, "grad_norm": 2.116123914718628, "learning_rate": 3.178378600835264e-06, "loss": 0.693, "step": 9749 }, { "epoch": 0.7377700427528281, "grad_norm": 2.1836884021759033, "learning_rate": 3.176661197419708e-06, "loss": 0.707, "step": 9750 }, { "epoch": 0.7378457114751618, "grad_norm": 1.9921830892562866, "learning_rate": 3.1749441586804633e-06, "loss": 0.58, "step": 9751 }, { "epoch": 0.7379213801974953, "grad_norm": 2.1456127166748047, "learning_rate": 3.173227484725059e-06, "loss": 0.6073, "step": 9752 }, { "epoch": 0.737997048919829, "grad_norm": 2.2834341526031494, "learning_rate": 3.1715111756609924e-06, "loss": 0.6229, "step": 9753 }, { "epoch": 0.7380727176421626, "grad_norm": 2.3917596340179443, "learning_rate": 3.1697952315957453e-06, "loss": 0.7978, "step": 9754 }, { "epoch": 0.7381483863644962, "grad_norm": 2.0401668548583984, "learning_rate": 3.1680796526367804e-06, "loss": 0.7177, "step": 9755 }, { "epoch": 0.7382240550868299, "grad_norm": 2.360987424850464, "learning_rate": 3.1663644388915333e-06, "loss": 0.7348, "step": 9756 }, { "epoch": 0.7382997238091635, "grad_norm": 2.4300918579101562, "learning_rate": 3.1646495904674113e-06, "loss": 0.667, "step": 9757 }, { "epoch": 0.7383753925314971, "grad_norm": 2.076064109802246, "learning_rate": 3.162935107471805e-06, "loss": 0.6606, "step": 9758 }, { "epoch": 0.7384510612538308, "grad_norm": 2.9375874996185303, "learning_rate": 3.1612209900120817e-06, "loss": 0.6929, "step": 9759 }, { "epoch": 0.7385267299761643, "grad_norm": 2.2996110916137695, "learning_rate": 3.159507238195584e-06, "loss": 0.6716, "step": 9760 }, { "epoch": 0.738602398698498, "grad_norm": 1.980484127998352, "learning_rate": 3.1577938521296352e-06, "loss": 0.6685, "step": 9761 }, { "epoch": 0.7386780674208316, "grad_norm": 3.1064846515655518, "learning_rate": 3.1560808319215305e-06, "loss": 0.7042, "step": 9762 }, { "epoch": 0.7387537361431652, "grad_norm": 2.007359743118286, "learning_rate": 3.154368177678548e-06, "loss": 0.6777, "step": 9763 }, { "epoch": 0.7388294048654989, "grad_norm": 2.1503567695617676, "learning_rate": 3.1526558895079316e-06, "loss": 0.6476, "step": 9764 }, { "epoch": 0.7389050735878324, "grad_norm": 1.7849518060684204, "learning_rate": 3.15094396751692e-06, "loss": 0.5983, "step": 9765 }, { "epoch": 0.7389807423101661, "grad_norm": 1.9997239112854004, "learning_rate": 3.1492324118127173e-06, "loss": 0.69, "step": 9766 }, { "epoch": 0.7390564110324997, "grad_norm": 2.347898244857788, "learning_rate": 3.147521222502502e-06, "loss": 0.7001, "step": 9767 }, { "epoch": 0.7391320797548333, "grad_norm": 2.870927095413208, "learning_rate": 3.145810399693437e-06, "loss": 0.692, "step": 9768 }, { "epoch": 0.739207748477167, "grad_norm": 2.2380945682525635, "learning_rate": 3.1440999434926564e-06, "loss": 0.7641, "step": 9769 }, { "epoch": 0.7392834171995006, "grad_norm": 1.807690143585205, "learning_rate": 3.1423898540072832e-06, "loss": 0.6217, "step": 9770 }, { "epoch": 0.7393590859218342, "grad_norm": 2.3498446941375732, "learning_rate": 3.140680131344401e-06, "loss": 0.6596, "step": 9771 }, { "epoch": 0.7394347546441679, "grad_norm": 2.41398024559021, "learning_rate": 3.13897077561108e-06, "loss": 0.7398, "step": 9772 }, { "epoch": 0.7395104233665014, "grad_norm": 2.0578386783599854, "learning_rate": 3.137261786914366e-06, "loss": 0.7848, "step": 9773 }, { "epoch": 0.7395860920888351, "grad_norm": 2.1671581268310547, "learning_rate": 3.1355531653612802e-06, "loss": 0.562, "step": 9774 }, { "epoch": 0.7396617608111687, "grad_norm": 2.2448394298553467, "learning_rate": 3.1338449110588247e-06, "loss": 0.7788, "step": 9775 }, { "epoch": 0.7397374295335023, "grad_norm": 1.992663025856018, "learning_rate": 3.132137024113973e-06, "loss": 0.7574, "step": 9776 }, { "epoch": 0.739813098255836, "grad_norm": 3.92378830909729, "learning_rate": 3.1304295046336836e-06, "loss": 0.5947, "step": 9777 }, { "epoch": 0.7398887669781695, "grad_norm": 1.903420090675354, "learning_rate": 3.12872235272488e-06, "loss": 0.6038, "step": 9778 }, { "epoch": 0.7399644357005032, "grad_norm": 2.0623772144317627, "learning_rate": 3.1270155684944695e-06, "loss": 0.7105, "step": 9779 }, { "epoch": 0.7400401044228369, "grad_norm": 2.4272897243499756, "learning_rate": 3.125309152049346e-06, "loss": 0.6364, "step": 9780 }, { "epoch": 0.7401157731451704, "grad_norm": 2.0178956985473633, "learning_rate": 3.1236031034963617e-06, "loss": 0.7385, "step": 9781 }, { "epoch": 0.7401914418675041, "grad_norm": 1.974817156791687, "learning_rate": 3.1218974229423575e-06, "loss": 0.5617, "step": 9782 }, { "epoch": 0.7402671105898377, "grad_norm": 2.286247968673706, "learning_rate": 3.1201921104941478e-06, "loss": 0.6671, "step": 9783 }, { "epoch": 0.7403427793121713, "grad_norm": 2.1284759044647217, "learning_rate": 3.118487166258527e-06, "loss": 0.6746, "step": 9784 }, { "epoch": 0.740418448034505, "grad_norm": 2.2996256351470947, "learning_rate": 3.1167825903422616e-06, "loss": 0.6687, "step": 9785 }, { "epoch": 0.7404941167568385, "grad_norm": 2.304643154144287, "learning_rate": 3.1150783828521005e-06, "loss": 0.6445, "step": 9786 }, { "epoch": 0.7405697854791722, "grad_norm": 2.089303731918335, "learning_rate": 3.1133745438947643e-06, "loss": 0.5833, "step": 9787 }, { "epoch": 0.7406454542015058, "grad_norm": 2.256558895111084, "learning_rate": 3.1116710735769567e-06, "loss": 0.7369, "step": 9788 }, { "epoch": 0.7407211229238394, "grad_norm": 2.6049532890319824, "learning_rate": 3.109967972005349e-06, "loss": 0.5936, "step": 9789 }, { "epoch": 0.7407967916461731, "grad_norm": 2.2916321754455566, "learning_rate": 3.1082652392865946e-06, "loss": 0.6695, "step": 9790 }, { "epoch": 0.7408724603685066, "grad_norm": 2.5603201389312744, "learning_rate": 3.1065628755273324e-06, "loss": 0.5951, "step": 9791 }, { "epoch": 0.7409481290908403, "grad_norm": 2.134892225265503, "learning_rate": 3.1048608808341624e-06, "loss": 0.7521, "step": 9792 }, { "epoch": 0.741023797813174, "grad_norm": 10.978889465332031, "learning_rate": 3.103159255313671e-06, "loss": 0.6364, "step": 9793 }, { "epoch": 0.7410994665355075, "grad_norm": 1.8416504859924316, "learning_rate": 3.10145799907242e-06, "loss": 0.5399, "step": 9794 }, { "epoch": 0.7411751352578412, "grad_norm": 2.196185350418091, "learning_rate": 3.099757112216947e-06, "loss": 0.6477, "step": 9795 }, { "epoch": 0.7412508039801748, "grad_norm": 2.4861955642700195, "learning_rate": 3.098056594853767e-06, "loss": 0.5316, "step": 9796 }, { "epoch": 0.7413264727025084, "grad_norm": 2.2695884704589844, "learning_rate": 3.0963564470893736e-06, "loss": 0.7883, "step": 9797 }, { "epoch": 0.7414021414248421, "grad_norm": 2.2510933876037598, "learning_rate": 3.094656669030236e-06, "loss": 0.7622, "step": 9798 }, { "epoch": 0.7414778101471756, "grad_norm": 2.05856990814209, "learning_rate": 3.0929572607827946e-06, "loss": 0.5341, "step": 9799 }, { "epoch": 0.7415534788695093, "grad_norm": 2.074747323989868, "learning_rate": 3.0912582224534737e-06, "loss": 0.6792, "step": 9800 }, { "epoch": 0.741629147591843, "grad_norm": 2.0920283794403076, "learning_rate": 3.089559554148676e-06, "loss": 0.7247, "step": 9801 }, { "epoch": 0.7417048163141765, "grad_norm": 2.252413034439087, "learning_rate": 3.0878612559747785e-06, "loss": 0.6384, "step": 9802 }, { "epoch": 0.7417804850365102, "grad_norm": 1.887231707572937, "learning_rate": 3.0861633280381293e-06, "loss": 0.5092, "step": 9803 }, { "epoch": 0.7418561537588437, "grad_norm": 2.2161378860473633, "learning_rate": 3.08446577044506e-06, "loss": 0.8336, "step": 9804 }, { "epoch": 0.7419318224811774, "grad_norm": 2.0824790000915527, "learning_rate": 3.082768583301876e-06, "loss": 0.7406, "step": 9805 }, { "epoch": 0.7420074912035111, "grad_norm": 2.0326271057128906, "learning_rate": 3.0810717667148635e-06, "loss": 0.6042, "step": 9806 }, { "epoch": 0.7420831599258446, "grad_norm": 1.864272117614746, "learning_rate": 3.07937532079028e-06, "loss": 0.6879, "step": 9807 }, { "epoch": 0.7421588286481783, "grad_norm": 2.2721335887908936, "learning_rate": 3.0776792456343648e-06, "loss": 0.7037, "step": 9808 }, { "epoch": 0.742234497370512, "grad_norm": 1.7374581098556519, "learning_rate": 3.0759835413533324e-06, "loss": 0.6843, "step": 9809 }, { "epoch": 0.7423101660928455, "grad_norm": 2.2523179054260254, "learning_rate": 3.0742882080533656e-06, "loss": 0.705, "step": 9810 }, { "epoch": 0.7423858348151792, "grad_norm": 1.5839877128601074, "learning_rate": 3.0725932458406395e-06, "loss": 0.7204, "step": 9811 }, { "epoch": 0.7424615035375127, "grad_norm": 2.310640335083008, "learning_rate": 3.0708986548212998e-06, "loss": 0.635, "step": 9812 }, { "epoch": 0.7425371722598464, "grad_norm": 2.1407198905944824, "learning_rate": 3.06920443510146e-06, "loss": 0.6782, "step": 9813 }, { "epoch": 0.7426128409821801, "grad_norm": 2.077183246612549, "learning_rate": 3.067510586787221e-06, "loss": 0.7058, "step": 9814 }, { "epoch": 0.7426885097045136, "grad_norm": 1.9576934576034546, "learning_rate": 3.065817109984654e-06, "loss": 0.5691, "step": 9815 }, { "epoch": 0.7427641784268473, "grad_norm": 2.2817611694335938, "learning_rate": 3.0641240047998196e-06, "loss": 0.7766, "step": 9816 }, { "epoch": 0.7428398471491808, "grad_norm": 2.1753251552581787, "learning_rate": 3.062431271338736e-06, "loss": 0.6152, "step": 9817 }, { "epoch": 0.7429155158715145, "grad_norm": 2.1816024780273438, "learning_rate": 3.0607389097074095e-06, "loss": 0.6559, "step": 9818 }, { "epoch": 0.7429911845938482, "grad_norm": 2.068418264389038, "learning_rate": 3.059046920011823e-06, "loss": 0.7292, "step": 9819 }, { "epoch": 0.7430668533161817, "grad_norm": 2.037598133087158, "learning_rate": 3.057355302357934e-06, "loss": 0.5673, "step": 9820 }, { "epoch": 0.7431425220385154, "grad_norm": 2.198431968688965, "learning_rate": 3.055664056851677e-06, "loss": 0.6868, "step": 9821 }, { "epoch": 0.743218190760849, "grad_norm": 2.101435899734497, "learning_rate": 3.0539731835989625e-06, "loss": 0.6842, "step": 9822 }, { "epoch": 0.7432938594831826, "grad_norm": 2.1221351623535156, "learning_rate": 3.052282682705682e-06, "loss": 0.6233, "step": 9823 }, { "epoch": 0.7433695282055163, "grad_norm": 1.9273860454559326, "learning_rate": 3.0505925542776946e-06, "loss": 0.6363, "step": 9824 }, { "epoch": 0.7434451969278498, "grad_norm": 2.380946159362793, "learning_rate": 3.048902798420844e-06, "loss": 0.665, "step": 9825 }, { "epoch": 0.7435208656501835, "grad_norm": 2.2486279010772705, "learning_rate": 3.047213415240948e-06, "loss": 0.6321, "step": 9826 }, { "epoch": 0.7435965343725172, "grad_norm": 1.9640283584594727, "learning_rate": 3.0455244048438014e-06, "loss": 0.5942, "step": 9827 }, { "epoch": 0.7436722030948507, "grad_norm": 2.507197141647339, "learning_rate": 3.043835767335177e-06, "loss": 0.7769, "step": 9828 }, { "epoch": 0.7437478718171844, "grad_norm": 2.5422580242156982, "learning_rate": 3.0421475028208205e-06, "loss": 0.7886, "step": 9829 }, { "epoch": 0.7438235405395179, "grad_norm": 2.2974729537963867, "learning_rate": 3.0404596114064573e-06, "loss": 0.4964, "step": 9830 }, { "epoch": 0.7438992092618516, "grad_norm": 2.1887059211730957, "learning_rate": 3.038772093197789e-06, "loss": 0.7204, "step": 9831 }, { "epoch": 0.7439748779841853, "grad_norm": 2.602665901184082, "learning_rate": 3.0370849483004927e-06, "loss": 0.5673, "step": 9832 }, { "epoch": 0.7440505467065188, "grad_norm": 2.0209755897521973, "learning_rate": 3.0353981768202243e-06, "loss": 0.6575, "step": 9833 }, { "epoch": 0.7441262154288525, "grad_norm": 2.425705671310425, "learning_rate": 3.033711778862616e-06, "loss": 0.6502, "step": 9834 }, { "epoch": 0.7442018841511862, "grad_norm": 3.1738603115081787, "learning_rate": 3.032025754533271e-06, "loss": 0.5545, "step": 9835 }, { "epoch": 0.7442775528735197, "grad_norm": 1.8896595239639282, "learning_rate": 3.0303401039377725e-06, "loss": 0.5624, "step": 9836 }, { "epoch": 0.7443532215958534, "grad_norm": 2.21313738822937, "learning_rate": 3.0286548271816916e-06, "loss": 0.6534, "step": 9837 }, { "epoch": 0.7444288903181869, "grad_norm": 2.004441499710083, "learning_rate": 3.0269699243705555e-06, "loss": 0.7336, "step": 9838 }, { "epoch": 0.7445045590405206, "grad_norm": 2.018430233001709, "learning_rate": 3.025285395609882e-06, "loss": 0.6419, "step": 9839 }, { "epoch": 0.7445802277628543, "grad_norm": 2.3819639682769775, "learning_rate": 3.0236012410051617e-06, "loss": 0.6499, "step": 9840 }, { "epoch": 0.7446558964851878, "grad_norm": 2.396756172180176, "learning_rate": 3.0219174606618614e-06, "loss": 0.7293, "step": 9841 }, { "epoch": 0.7447315652075215, "grad_norm": 1.8934662342071533, "learning_rate": 3.0202340546854254e-06, "loss": 0.6671, "step": 9842 }, { "epoch": 0.744807233929855, "grad_norm": 2.2285642623901367, "learning_rate": 3.0185510231812736e-06, "loss": 0.5863, "step": 9843 }, { "epoch": 0.7448829026521887, "grad_norm": 1.9590516090393066, "learning_rate": 3.0168683662548037e-06, "loss": 0.57, "step": 9844 }, { "epoch": 0.7449585713745224, "grad_norm": 2.253278970718384, "learning_rate": 3.0151860840113916e-06, "loss": 0.6678, "step": 9845 }, { "epoch": 0.7450342400968559, "grad_norm": 1.9592149257659912, "learning_rate": 3.0135041765563778e-06, "loss": 0.7633, "step": 9846 }, { "epoch": 0.7451099088191896, "grad_norm": 2.0090091228485107, "learning_rate": 3.011822643995098e-06, "loss": 0.7217, "step": 9847 }, { "epoch": 0.7451855775415233, "grad_norm": 2.6432924270629883, "learning_rate": 3.0101414864328547e-06, "loss": 0.6495, "step": 9848 }, { "epoch": 0.7452612462638568, "grad_norm": 2.1458330154418945, "learning_rate": 3.0084607039749234e-06, "loss": 0.556, "step": 9849 }, { "epoch": 0.7453369149861905, "grad_norm": 1.9538377523422241, "learning_rate": 3.006780296726561e-06, "loss": 0.582, "step": 9850 }, { "epoch": 0.745412583708524, "grad_norm": 1.9687731266021729, "learning_rate": 3.0051002647930002e-06, "loss": 0.6966, "step": 9851 }, { "epoch": 0.7454882524308577, "grad_norm": 2.3369882106781006, "learning_rate": 3.0034206082794515e-06, "loss": 0.5864, "step": 9852 }, { "epoch": 0.7455639211531914, "grad_norm": 2.699866533279419, "learning_rate": 3.0017413272911e-06, "loss": 0.7418, "step": 9853 }, { "epoch": 0.7456395898755249, "grad_norm": 1.819517731666565, "learning_rate": 3.000062421933107e-06, "loss": 0.5972, "step": 9854 }, { "epoch": 0.7457152585978586, "grad_norm": 2.184372663497925, "learning_rate": 2.9983838923106146e-06, "loss": 0.6785, "step": 9855 }, { "epoch": 0.7457909273201923, "grad_norm": 1.910994291305542, "learning_rate": 2.996705738528728e-06, "loss": 0.6254, "step": 9856 }, { "epoch": 0.7458665960425258, "grad_norm": 5.832062244415283, "learning_rate": 2.995027960692548e-06, "loss": 0.5108, "step": 9857 }, { "epoch": 0.7459422647648595, "grad_norm": 4.642062664031982, "learning_rate": 2.9933505589071393e-06, "loss": 0.7348, "step": 9858 }, { "epoch": 0.746017933487193, "grad_norm": 2.5428013801574707, "learning_rate": 2.9916735332775504e-06, "loss": 0.6369, "step": 9859 }, { "epoch": 0.7460936022095267, "grad_norm": 1.667399525642395, "learning_rate": 2.989996883908794e-06, "loss": 0.594, "step": 9860 }, { "epoch": 0.7461692709318604, "grad_norm": 3.054075002670288, "learning_rate": 2.9883206109058685e-06, "loss": 0.7789, "step": 9861 }, { "epoch": 0.7462449396541939, "grad_norm": 2.8136708736419678, "learning_rate": 2.9866447143737572e-06, "loss": 0.5894, "step": 9862 }, { "epoch": 0.7463206083765276, "grad_norm": 2.113799571990967, "learning_rate": 2.9849691944174e-06, "loss": 0.5714, "step": 9863 }, { "epoch": 0.7463962770988611, "grad_norm": 1.9772562980651855, "learning_rate": 2.983294051141727e-06, "loss": 0.6968, "step": 9864 }, { "epoch": 0.7464719458211948, "grad_norm": 2.416429042816162, "learning_rate": 2.9816192846516415e-06, "loss": 0.6939, "step": 9865 }, { "epoch": 0.7465476145435285, "grad_norm": 1.8433407545089722, "learning_rate": 2.9799448950520247e-06, "loss": 0.5994, "step": 9866 }, { "epoch": 0.746623283265862, "grad_norm": 2.278648853302002, "learning_rate": 2.978270882447723e-06, "loss": 0.7247, "step": 9867 }, { "epoch": 0.7466989519881957, "grad_norm": 2.0349230766296387, "learning_rate": 2.976597246943579e-06, "loss": 0.7355, "step": 9868 }, { "epoch": 0.7467746207105294, "grad_norm": 1.8349040746688843, "learning_rate": 2.974923988644401e-06, "loss": 0.7532, "step": 9869 }, { "epoch": 0.7468502894328629, "grad_norm": 2.4737563133239746, "learning_rate": 2.973251107654966e-06, "loss": 0.6121, "step": 9870 }, { "epoch": 0.7469259581551966, "grad_norm": 2.696403741836548, "learning_rate": 2.9715786040800403e-06, "loss": 0.7659, "step": 9871 }, { "epoch": 0.7470016268775301, "grad_norm": 2.2334213256835938, "learning_rate": 2.969906478024358e-06, "loss": 0.7986, "step": 9872 }, { "epoch": 0.7470772955998638, "grad_norm": 1.8761075735092163, "learning_rate": 2.9682347295926405e-06, "loss": 0.6269, "step": 9873 }, { "epoch": 0.7471529643221975, "grad_norm": 2.75467586517334, "learning_rate": 2.9665633588895718e-06, "loss": 0.6236, "step": 9874 }, { "epoch": 0.747228633044531, "grad_norm": 2.219914197921753, "learning_rate": 2.964892366019819e-06, "loss": 0.6861, "step": 9875 }, { "epoch": 0.7473043017668647, "grad_norm": 1.8167731761932373, "learning_rate": 2.9632217510880267e-06, "loss": 0.6217, "step": 9876 }, { "epoch": 0.7473799704891982, "grad_norm": 1.8947856426239014, "learning_rate": 2.9615515141988137e-06, "loss": 0.65, "step": 9877 }, { "epoch": 0.7474556392115319, "grad_norm": 2.316633701324463, "learning_rate": 2.959881655456775e-06, "loss": 0.7939, "step": 9878 }, { "epoch": 0.7475313079338656, "grad_norm": 2.0706794261932373, "learning_rate": 2.9582121749664843e-06, "loss": 0.7122, "step": 9879 }, { "epoch": 0.7476069766561991, "grad_norm": 2.491586446762085, "learning_rate": 2.956543072832491e-06, "loss": 0.6208, "step": 9880 }, { "epoch": 0.7476826453785328, "grad_norm": 1.9248894453048706, "learning_rate": 2.954874349159314e-06, "loss": 0.5814, "step": 9881 }, { "epoch": 0.7477583141008665, "grad_norm": 2.1573891639709473, "learning_rate": 2.9532060040514544e-06, "loss": 0.7211, "step": 9882 }, { "epoch": 0.7478339828232, "grad_norm": 2.213338613510132, "learning_rate": 2.9515380376133995e-06, "loss": 0.5858, "step": 9883 }, { "epoch": 0.7479096515455337, "grad_norm": 2.1346771717071533, "learning_rate": 2.9498704499495923e-06, "loss": 0.6022, "step": 9884 }, { "epoch": 0.7479853202678672, "grad_norm": 2.2232656478881836, "learning_rate": 2.9482032411644665e-06, "loss": 0.5621, "step": 9885 }, { "epoch": 0.7480609889902009, "grad_norm": 2.403750419616699, "learning_rate": 2.946536411362427e-06, "loss": 0.6615, "step": 9886 }, { "epoch": 0.7481366577125346, "grad_norm": 2.009737491607666, "learning_rate": 2.9448699606478564e-06, "loss": 0.7192, "step": 9887 }, { "epoch": 0.7482123264348681, "grad_norm": 1.6389305591583252, "learning_rate": 2.943203889125114e-06, "loss": 0.6288, "step": 9888 }, { "epoch": 0.7482879951572018, "grad_norm": 2.2867612838745117, "learning_rate": 2.941538196898534e-06, "loss": 0.8133, "step": 9889 }, { "epoch": 0.7483636638795353, "grad_norm": 3.108665704727173, "learning_rate": 2.939872884072428e-06, "loss": 0.5923, "step": 9890 }, { "epoch": 0.748439332601869, "grad_norm": 1.8590794801712036, "learning_rate": 2.9382079507510856e-06, "loss": 0.5962, "step": 9891 }, { "epoch": 0.7485150013242027, "grad_norm": 2.1245317459106445, "learning_rate": 2.9365433970387614e-06, "loss": 0.6682, "step": 9892 }, { "epoch": 0.7485906700465362, "grad_norm": 2.0722525119781494, "learning_rate": 2.9348792230397044e-06, "loss": 0.616, "step": 9893 }, { "epoch": 0.7486663387688699, "grad_norm": 2.0355629920959473, "learning_rate": 2.9332154288581305e-06, "loss": 0.7896, "step": 9894 }, { "epoch": 0.7487420074912036, "grad_norm": 2.689260244369507, "learning_rate": 2.9315520145982257e-06, "loss": 0.6665, "step": 9895 }, { "epoch": 0.7488176762135371, "grad_norm": 1.7102781534194946, "learning_rate": 2.929888980364161e-06, "loss": 0.6444, "step": 9896 }, { "epoch": 0.7488933449358708, "grad_norm": 2.3687448501586914, "learning_rate": 2.9282263262600825e-06, "loss": 0.8416, "step": 9897 }, { "epoch": 0.7489690136582043, "grad_norm": 2.313998222351074, "learning_rate": 2.926564052390109e-06, "loss": 0.6892, "step": 9898 }, { "epoch": 0.749044682380538, "grad_norm": 1.732693076133728, "learning_rate": 2.9249021588583393e-06, "loss": 0.6822, "step": 9899 }, { "epoch": 0.7491203511028717, "grad_norm": 2.4626896381378174, "learning_rate": 2.9232406457688444e-06, "loss": 0.5485, "step": 9900 }, { "epoch": 0.7491960198252052, "grad_norm": 2.252591133117676, "learning_rate": 2.9215795132256786e-06, "loss": 0.7695, "step": 9901 }, { "epoch": 0.7492716885475389, "grad_norm": 1.9376341104507446, "learning_rate": 2.9199187613328577e-06, "loss": 0.6194, "step": 9902 }, { "epoch": 0.7493473572698724, "grad_norm": 2.1779584884643555, "learning_rate": 2.9182583901943925e-06, "loss": 0.7618, "step": 9903 }, { "epoch": 0.7494230259922061, "grad_norm": 2.131627321243286, "learning_rate": 2.9165983999142577e-06, "loss": 0.7612, "step": 9904 }, { "epoch": 0.7494986947145398, "grad_norm": 2.43209171295166, "learning_rate": 2.9149387905964096e-06, "loss": 0.5466, "step": 9905 }, { "epoch": 0.7495743634368733, "grad_norm": 2.0824105739593506, "learning_rate": 2.9132795623447736e-06, "loss": 0.8629, "step": 9906 }, { "epoch": 0.749650032159207, "grad_norm": 2.1721785068511963, "learning_rate": 2.9116207152632575e-06, "loss": 0.6502, "step": 9907 }, { "epoch": 0.7497257008815407, "grad_norm": 1.9518764019012451, "learning_rate": 2.909962249455746e-06, "loss": 0.7207, "step": 9908 }, { "epoch": 0.7498013696038742, "grad_norm": 3.3193180561065674, "learning_rate": 2.908304165026094e-06, "loss": 0.7037, "step": 9909 }, { "epoch": 0.7498770383262079, "grad_norm": 2.249847412109375, "learning_rate": 2.906646462078139e-06, "loss": 0.8591, "step": 9910 }, { "epoch": 0.7499527070485414, "grad_norm": 1.7684543132781982, "learning_rate": 2.904989140715691e-06, "loss": 0.6102, "step": 9911 }, { "epoch": 0.7500283757708751, "grad_norm": 2.1191229820251465, "learning_rate": 2.9033322010425397e-06, "loss": 0.6022, "step": 9912 }, { "epoch": 0.7501040444932088, "grad_norm": 2.044253349304199, "learning_rate": 2.901675643162439e-06, "loss": 0.6631, "step": 9913 }, { "epoch": 0.7501797132155423, "grad_norm": 2.230672597885132, "learning_rate": 2.9000194671791366e-06, "loss": 0.7228, "step": 9914 }, { "epoch": 0.750255381937876, "grad_norm": 4.245325088500977, "learning_rate": 2.898363673196348e-06, "loss": 0.7393, "step": 9915 }, { "epoch": 0.7503310506602096, "grad_norm": 2.3195321559906006, "learning_rate": 2.896708261317758e-06, "loss": 0.5678, "step": 9916 }, { "epoch": 0.7504067193825432, "grad_norm": 2.411245346069336, "learning_rate": 2.8950532316470373e-06, "loss": 0.7304, "step": 9917 }, { "epoch": 0.7504823881048769, "grad_norm": 3.026913642883301, "learning_rate": 2.893398584287826e-06, "loss": 0.6865, "step": 9918 }, { "epoch": 0.7505580568272104, "grad_norm": 1.9332554340362549, "learning_rate": 2.8917443193437524e-06, "loss": 0.6483, "step": 9919 }, { "epoch": 0.7506337255495441, "grad_norm": 1.6384657621383667, "learning_rate": 2.890090436918403e-06, "loss": 0.659, "step": 9920 }, { "epoch": 0.7507093942718778, "grad_norm": 2.61690354347229, "learning_rate": 2.888436937115353e-06, "loss": 0.6423, "step": 9921 }, { "epoch": 0.7507850629942113, "grad_norm": 1.9042266607284546, "learning_rate": 2.886783820038149e-06, "loss": 0.5829, "step": 9922 }, { "epoch": 0.750860731716545, "grad_norm": 2.1988930702209473, "learning_rate": 2.885131085790314e-06, "loss": 0.6954, "step": 9923 }, { "epoch": 0.7509364004388785, "grad_norm": 2.5003511905670166, "learning_rate": 2.8834787344753483e-06, "loss": 0.5322, "step": 9924 }, { "epoch": 0.7510120691612122, "grad_norm": 2.1539666652679443, "learning_rate": 2.8818267661967285e-06, "loss": 0.7318, "step": 9925 }, { "epoch": 0.7510877378835459, "grad_norm": 1.925028681755066, "learning_rate": 2.8801751810579074e-06, "loss": 0.6704, "step": 9926 }, { "epoch": 0.7511634066058794, "grad_norm": 1.9884802103042603, "learning_rate": 2.8785239791623075e-06, "loss": 0.6807, "step": 9927 }, { "epoch": 0.7512390753282131, "grad_norm": 2.154848575592041, "learning_rate": 2.8768731606133323e-06, "loss": 0.6473, "step": 9928 }, { "epoch": 0.7513147440505467, "grad_norm": 2.2960104942321777, "learning_rate": 2.8752227255143707e-06, "loss": 0.6503, "step": 9929 }, { "epoch": 0.7513904127728803, "grad_norm": 2.1243772506713867, "learning_rate": 2.873572673968768e-06, "loss": 0.9263, "step": 9930 }, { "epoch": 0.751466081495214, "grad_norm": 4.902968406677246, "learning_rate": 2.8719230060798606e-06, "loss": 0.7779, "step": 9931 }, { "epoch": 0.7515417502175475, "grad_norm": 2.171704053878784, "learning_rate": 2.870273721950955e-06, "loss": 0.6809, "step": 9932 }, { "epoch": 0.7516174189398812, "grad_norm": 2.409769296646118, "learning_rate": 2.868624821685335e-06, "loss": 0.6696, "step": 9933 }, { "epoch": 0.7516930876622149, "grad_norm": 2.15291690826416, "learning_rate": 2.8669763053862595e-06, "loss": 0.6879, "step": 9934 }, { "epoch": 0.7517687563845484, "grad_norm": 2.0499536991119385, "learning_rate": 2.8653281731569645e-06, "loss": 0.5733, "step": 9935 }, { "epoch": 0.7518444251068821, "grad_norm": 1.5915296077728271, "learning_rate": 2.8636804251006612e-06, "loss": 0.593, "step": 9936 }, { "epoch": 0.7519200938292157, "grad_norm": 2.1210434436798096, "learning_rate": 2.862033061320541e-06, "loss": 0.5076, "step": 9937 }, { "epoch": 0.7519957625515493, "grad_norm": 2.2604899406433105, "learning_rate": 2.8603860819197558e-06, "loss": 0.6894, "step": 9938 }, { "epoch": 0.752071431273883, "grad_norm": 2.7228496074676514, "learning_rate": 2.8587394870014557e-06, "loss": 0.7777, "step": 9939 }, { "epoch": 0.7521470999962165, "grad_norm": 1.9101241827011108, "learning_rate": 2.857093276668755e-06, "loss": 0.6761, "step": 9940 }, { "epoch": 0.7522227687185502, "grad_norm": 1.973021388053894, "learning_rate": 2.8554474510247377e-06, "loss": 0.4929, "step": 9941 }, { "epoch": 0.7522984374408838, "grad_norm": 1.9737138748168945, "learning_rate": 2.8538020101724762e-06, "loss": 0.6213, "step": 9942 }, { "epoch": 0.7523741061632174, "grad_norm": 2.246549606323242, "learning_rate": 2.852156954215012e-06, "loss": 0.7567, "step": 9943 }, { "epoch": 0.7524497748855511, "grad_norm": 1.7363240718841553, "learning_rate": 2.850512283255364e-06, "loss": 0.7059, "step": 9944 }, { "epoch": 0.7525254436078846, "grad_norm": 2.246518135070801, "learning_rate": 2.8488679973965264e-06, "loss": 0.8108, "step": 9945 }, { "epoch": 0.7526011123302183, "grad_norm": 1.421762466430664, "learning_rate": 2.84722409674147e-06, "loss": 0.7699, "step": 9946 }, { "epoch": 0.752676781052552, "grad_norm": 2.417525291442871, "learning_rate": 2.8455805813931415e-06, "loss": 0.6468, "step": 9947 }, { "epoch": 0.7527524497748855, "grad_norm": 2.013603687286377, "learning_rate": 2.8439374514544645e-06, "loss": 0.7207, "step": 9948 }, { "epoch": 0.7528281184972192, "grad_norm": 2.3147010803222656, "learning_rate": 2.8422947070283305e-06, "loss": 0.6962, "step": 9949 }, { "epoch": 0.7529037872195528, "grad_norm": 2.1023120880126953, "learning_rate": 2.840652348217622e-06, "loss": 0.7563, "step": 9950 }, { "epoch": 0.7529794559418864, "grad_norm": 2.0633652210235596, "learning_rate": 2.8390103751251867e-06, "loss": 0.5911, "step": 9951 }, { "epoch": 0.7530551246642201, "grad_norm": 2.3509156703948975, "learning_rate": 2.8373687878538466e-06, "loss": 0.7062, "step": 9952 }, { "epoch": 0.7531307933865536, "grad_norm": 2.152987480163574, "learning_rate": 2.8357275865064056e-06, "loss": 0.6786, "step": 9953 }, { "epoch": 0.7532064621088873, "grad_norm": 2.2282655239105225, "learning_rate": 2.834086771185641e-06, "loss": 0.648, "step": 9954 }, { "epoch": 0.7532821308312209, "grad_norm": 3.348574638366699, "learning_rate": 2.8324463419943045e-06, "loss": 0.6576, "step": 9955 }, { "epoch": 0.7533577995535545, "grad_norm": 1.9638891220092773, "learning_rate": 2.8308062990351275e-06, "loss": 0.6466, "step": 9956 }, { "epoch": 0.7534334682758882, "grad_norm": 2.2350552082061768, "learning_rate": 2.8291666424108125e-06, "loss": 0.6518, "step": 9957 }, { "epoch": 0.7535091369982218, "grad_norm": 1.747753620147705, "learning_rate": 2.827527372224046e-06, "loss": 0.756, "step": 9958 }, { "epoch": 0.7535848057205554, "grad_norm": 2.364982843399048, "learning_rate": 2.8258884885774716e-06, "loss": 0.8052, "step": 9959 }, { "epoch": 0.7536604744428891, "grad_norm": 1.823944330215454, "learning_rate": 2.8242499915737346e-06, "loss": 0.5839, "step": 9960 }, { "epoch": 0.7537361431652226, "grad_norm": 1.797965168952942, "learning_rate": 2.822611881315437e-06, "loss": 0.6997, "step": 9961 }, { "epoch": 0.7538118118875563, "grad_norm": 3.2685093879699707, "learning_rate": 2.8209741579051656e-06, "loss": 0.8114, "step": 9962 }, { "epoch": 0.7538874806098899, "grad_norm": 2.2074337005615234, "learning_rate": 2.8193368214454753e-06, "loss": 0.6622, "step": 9963 }, { "epoch": 0.7539631493322235, "grad_norm": 3.020928382873535, "learning_rate": 2.8176998720389014e-06, "loss": 0.7776, "step": 9964 }, { "epoch": 0.7540388180545572, "grad_norm": 2.763895034790039, "learning_rate": 2.816063309787964e-06, "loss": 0.7328, "step": 9965 }, { "epoch": 0.7541144867768907, "grad_norm": 2.075176477432251, "learning_rate": 2.8144271347951395e-06, "loss": 0.6773, "step": 9966 }, { "epoch": 0.7541901554992244, "grad_norm": 2.109731912612915, "learning_rate": 2.8127913471628942e-06, "loss": 0.5774, "step": 9967 }, { "epoch": 0.754265824221558, "grad_norm": 1.6521979570388794, "learning_rate": 2.811155946993668e-06, "loss": 0.7379, "step": 9968 }, { "epoch": 0.7543414929438916, "grad_norm": 2.2940516471862793, "learning_rate": 2.809520934389872e-06, "loss": 0.611, "step": 9969 }, { "epoch": 0.7544171616662253, "grad_norm": 3.290804862976074, "learning_rate": 2.8078863094538983e-06, "loss": 0.678, "step": 9970 }, { "epoch": 0.7544928303885589, "grad_norm": 1.9227198362350464, "learning_rate": 2.8062520722881114e-06, "loss": 0.659, "step": 9971 }, { "epoch": 0.7545684991108925, "grad_norm": 2.2773373126983643, "learning_rate": 2.8046182229948555e-06, "loss": 0.5751, "step": 9972 }, { "epoch": 0.7546441678332262, "grad_norm": 2.041769504547119, "learning_rate": 2.802984761676443e-06, "loss": 0.6616, "step": 9973 }, { "epoch": 0.7547198365555597, "grad_norm": 2.143829107284546, "learning_rate": 2.8013516884351637e-06, "loss": 0.7292, "step": 9974 }, { "epoch": 0.7547955052778934, "grad_norm": 1.9663937091827393, "learning_rate": 2.7997190033732943e-06, "loss": 0.5109, "step": 9975 }, { "epoch": 0.754871174000227, "grad_norm": 2.2332675457000732, "learning_rate": 2.7980867065930774e-06, "loss": 0.6587, "step": 9976 }, { "epoch": 0.7549468427225606, "grad_norm": 2.470423460006714, "learning_rate": 2.796454798196729e-06, "loss": 0.5856, "step": 9977 }, { "epoch": 0.7550225114448943, "grad_norm": 2.361738681793213, "learning_rate": 2.7948232782864444e-06, "loss": 0.6109, "step": 9978 }, { "epoch": 0.7550981801672279, "grad_norm": 2.0614898204803467, "learning_rate": 2.793192146964397e-06, "loss": 0.5809, "step": 9979 }, { "epoch": 0.7551738488895615, "grad_norm": 2.270296096801758, "learning_rate": 2.791561404332731e-06, "loss": 0.6174, "step": 9980 }, { "epoch": 0.7552495176118951, "grad_norm": 7.146130561828613, "learning_rate": 2.7899310504935724e-06, "loss": 0.5779, "step": 9981 }, { "epoch": 0.7553251863342287, "grad_norm": 2.3047327995300293, "learning_rate": 2.788301085549016e-06, "loss": 0.7432, "step": 9982 }, { "epoch": 0.7554008550565624, "grad_norm": 2.4408271312713623, "learning_rate": 2.78667150960114e-06, "loss": 0.6569, "step": 9983 }, { "epoch": 0.755476523778896, "grad_norm": 2.420197010040283, "learning_rate": 2.785042322751987e-06, "loss": 0.8204, "step": 9984 }, { "epoch": 0.7555521925012296, "grad_norm": 5.947091579437256, "learning_rate": 2.7834135251035825e-06, "loss": 0.7756, "step": 9985 }, { "epoch": 0.7556278612235633, "grad_norm": 2.492147445678711, "learning_rate": 2.781785116757936e-06, "loss": 0.6647, "step": 9986 }, { "epoch": 0.7557035299458968, "grad_norm": 2.13313889503479, "learning_rate": 2.780157097817015e-06, "loss": 0.7473, "step": 9987 }, { "epoch": 0.7557791986682305, "grad_norm": 7.684272289276123, "learning_rate": 2.778529468382774e-06, "loss": 0.6594, "step": 9988 }, { "epoch": 0.7558548673905641, "grad_norm": 1.934888482093811, "learning_rate": 2.7769022285571394e-06, "loss": 0.5884, "step": 9989 }, { "epoch": 0.7559305361128977, "grad_norm": 2.01039457321167, "learning_rate": 2.7752753784420167e-06, "loss": 0.6648, "step": 9990 }, { "epoch": 0.7560062048352314, "grad_norm": 2.073888063430786, "learning_rate": 2.7736489181392825e-06, "loss": 0.6572, "step": 9991 }, { "epoch": 0.756081873557565, "grad_norm": 1.679289698600769, "learning_rate": 2.772022847750791e-06, "loss": 0.8544, "step": 9992 }, { "epoch": 0.7561575422798986, "grad_norm": 1.9221335649490356, "learning_rate": 2.7703971673783728e-06, "loss": 0.7504, "step": 9993 }, { "epoch": 0.7562332110022322, "grad_norm": 2.1600656509399414, "learning_rate": 2.768771877123836e-06, "loss": 0.7859, "step": 9994 }, { "epoch": 0.7563088797245658, "grad_norm": 2.2807884216308594, "learning_rate": 2.7671469770889522e-06, "loss": 0.6178, "step": 9995 }, { "epoch": 0.7563845484468995, "grad_norm": 2.230642557144165, "learning_rate": 2.765522467375487e-06, "loss": 0.5125, "step": 9996 }, { "epoch": 0.7564602171692331, "grad_norm": 1.8637601137161255, "learning_rate": 2.7638983480851724e-06, "loss": 0.6613, "step": 9997 }, { "epoch": 0.7565358858915667, "grad_norm": 2.0826594829559326, "learning_rate": 2.7622746193197115e-06, "loss": 0.8624, "step": 9998 }, { "epoch": 0.7566115546139004, "grad_norm": 2.7239291667938232, "learning_rate": 2.7606512811807885e-06, "loss": 0.7223, "step": 9999 }, { "epoch": 0.756687223336234, "grad_norm": 1.7906194925308228, "learning_rate": 2.7590283337700626e-06, "loss": 0.8105, "step": 10000 }, { "epoch": 0.7567628920585676, "grad_norm": 2.736328125, "learning_rate": 2.757405777189168e-06, "loss": 0.8085, "step": 10001 }, { "epoch": 0.7568385607809012, "grad_norm": 3.0120749473571777, "learning_rate": 2.7557836115397153e-06, "loss": 0.5936, "step": 10002 }, { "epoch": 0.7569142295032348, "grad_norm": 2.22469425201416, "learning_rate": 2.754161836923289e-06, "loss": 0.4523, "step": 10003 }, { "epoch": 0.7569898982255685, "grad_norm": 2.1429076194763184, "learning_rate": 2.7525404534414494e-06, "loss": 0.5562, "step": 10004 }, { "epoch": 0.7570655669479021, "grad_norm": 2.4758174419403076, "learning_rate": 2.750919461195734e-06, "loss": 0.6039, "step": 10005 }, { "epoch": 0.7571412356702357, "grad_norm": 1.938726782798767, "learning_rate": 2.749298860287653e-06, "loss": 0.6375, "step": 10006 }, { "epoch": 0.7572169043925693, "grad_norm": 2.25929594039917, "learning_rate": 2.7476786508186953e-06, "loss": 0.7259, "step": 10007 }, { "epoch": 0.757292573114903, "grad_norm": 1.9672093391418457, "learning_rate": 2.7460588328903265e-06, "loss": 0.6031, "step": 10008 }, { "epoch": 0.7573682418372366, "grad_norm": 2.4729254245758057, "learning_rate": 2.7444394066039776e-06, "loss": 0.6826, "step": 10009 }, { "epoch": 0.7574439105595702, "grad_norm": 2.6728367805480957, "learning_rate": 2.742820372061063e-06, "loss": 0.6732, "step": 10010 }, { "epoch": 0.7575195792819038, "grad_norm": 2.688096523284912, "learning_rate": 2.7412017293629802e-06, "loss": 0.7156, "step": 10011 }, { "epoch": 0.7575952480042375, "grad_norm": 3.0258054733276367, "learning_rate": 2.7395834786110872e-06, "loss": 0.5589, "step": 10012 }, { "epoch": 0.757670916726571, "grad_norm": 1.7880308628082275, "learning_rate": 2.7379656199067244e-06, "loss": 0.6092, "step": 10013 }, { "epoch": 0.7577465854489047, "grad_norm": 2.20332407951355, "learning_rate": 2.736348153351208e-06, "loss": 0.9061, "step": 10014 }, { "epoch": 0.7578222541712383, "grad_norm": 1.8708069324493408, "learning_rate": 2.73473107904583e-06, "loss": 0.6117, "step": 10015 }, { "epoch": 0.7578979228935719, "grad_norm": 2.2508816719055176, "learning_rate": 2.7331143970918554e-06, "loss": 0.8897, "step": 10016 }, { "epoch": 0.7579735916159056, "grad_norm": 2.168473243713379, "learning_rate": 2.7314981075905277e-06, "loss": 0.6392, "step": 10017 }, { "epoch": 0.7580492603382392, "grad_norm": 1.8958176374435425, "learning_rate": 2.729882210643066e-06, "loss": 0.631, "step": 10018 }, { "epoch": 0.7581249290605728, "grad_norm": 2.8258678913116455, "learning_rate": 2.7282667063506567e-06, "loss": 0.6969, "step": 10019 }, { "epoch": 0.7582005977829064, "grad_norm": 1.941758155822754, "learning_rate": 2.7266515948144726e-06, "loss": 0.6123, "step": 10020 }, { "epoch": 0.75827626650524, "grad_norm": 2.0148000717163086, "learning_rate": 2.7250368761356524e-06, "loss": 0.7288, "step": 10021 }, { "epoch": 0.7583519352275737, "grad_norm": 2.3907501697540283, "learning_rate": 2.723422550415325e-06, "loss": 0.7616, "step": 10022 }, { "epoch": 0.7584276039499073, "grad_norm": 2.1037983894348145, "learning_rate": 2.7218086177545744e-06, "loss": 0.6279, "step": 10023 }, { "epoch": 0.7585032726722409, "grad_norm": 1.8898531198501587, "learning_rate": 2.7201950782544758e-06, "loss": 0.6425, "step": 10024 }, { "epoch": 0.7585789413945746, "grad_norm": 2.113624334335327, "learning_rate": 2.7185819320160714e-06, "loss": 0.6748, "step": 10025 }, { "epoch": 0.7586546101169082, "grad_norm": 2.033940553665161, "learning_rate": 2.7169691791403844e-06, "loss": 0.5771, "step": 10026 }, { "epoch": 0.7587302788392418, "grad_norm": 2.1298940181732178, "learning_rate": 2.715356819728408e-06, "loss": 0.7522, "step": 10027 }, { "epoch": 0.7588059475615754, "grad_norm": 2.124619960784912, "learning_rate": 2.7137448538811158e-06, "loss": 0.6404, "step": 10028 }, { "epoch": 0.758881616283909, "grad_norm": 1.9840469360351562, "learning_rate": 2.712133281699454e-06, "loss": 0.5566, "step": 10029 }, { "epoch": 0.7589572850062427, "grad_norm": 2.5573880672454834, "learning_rate": 2.710522103284342e-06, "loss": 0.7079, "step": 10030 }, { "epoch": 0.7590329537285763, "grad_norm": 2.176572561264038, "learning_rate": 2.7089113187366758e-06, "loss": 0.5414, "step": 10031 }, { "epoch": 0.7591086224509099, "grad_norm": 2.228116273880005, "learning_rate": 2.7073009281573362e-06, "loss": 0.7058, "step": 10032 }, { "epoch": 0.7591842911732435, "grad_norm": 2.373213052749634, "learning_rate": 2.705690931647162e-06, "loss": 0.7606, "step": 10033 }, { "epoch": 0.7592599598955772, "grad_norm": 1.8033760786056519, "learning_rate": 2.704081329306981e-06, "loss": 0.716, "step": 10034 }, { "epoch": 0.7593356286179108, "grad_norm": 2.362074375152588, "learning_rate": 2.70247212123759e-06, "loss": 0.7064, "step": 10035 }, { "epoch": 0.7594112973402444, "grad_norm": 2.1984481811523438, "learning_rate": 2.700863307539763e-06, "loss": 0.7345, "step": 10036 }, { "epoch": 0.759486966062578, "grad_norm": 3.2609875202178955, "learning_rate": 2.699254888314251e-06, "loss": 0.7909, "step": 10037 }, { "epoch": 0.7595626347849117, "grad_norm": 1.9195688962936401, "learning_rate": 2.697646863661776e-06, "loss": 0.6074, "step": 10038 }, { "epoch": 0.7596383035072453, "grad_norm": 2.3153951168060303, "learning_rate": 2.6960392336830385e-06, "loss": 0.7519, "step": 10039 }, { "epoch": 0.7597139722295789, "grad_norm": 2.753959894180298, "learning_rate": 2.6944319984787166e-06, "loss": 0.6649, "step": 10040 }, { "epoch": 0.7597896409519125, "grad_norm": 2.4450736045837402, "learning_rate": 2.692825158149452e-06, "loss": 0.7087, "step": 10041 }, { "epoch": 0.7598653096742461, "grad_norm": 1.9317896366119385, "learning_rate": 2.691218712795879e-06, "loss": 0.5861, "step": 10042 }, { "epoch": 0.7599409783965798, "grad_norm": 2.266354560852051, "learning_rate": 2.689612662518598e-06, "loss": 0.6434, "step": 10043 }, { "epoch": 0.7600166471189134, "grad_norm": 1.8731240034103394, "learning_rate": 2.6880070074181794e-06, "loss": 0.5763, "step": 10044 }, { "epoch": 0.760092315841247, "grad_norm": 2.1654038429260254, "learning_rate": 2.6864017475951778e-06, "loss": 0.5699, "step": 10045 }, { "epoch": 0.7601679845635806, "grad_norm": 2.76196026802063, "learning_rate": 2.6847968831501187e-06, "loss": 0.6435, "step": 10046 }, { "epoch": 0.7602436532859143, "grad_norm": 2.265397548675537, "learning_rate": 2.6831924141835052e-06, "loss": 0.6767, "step": 10047 }, { "epoch": 0.7603193220082479, "grad_norm": 1.6206222772598267, "learning_rate": 2.6815883407958136e-06, "loss": 0.6914, "step": 10048 }, { "epoch": 0.7603949907305815, "grad_norm": 2.7728967666625977, "learning_rate": 2.6799846630874965e-06, "loss": 0.7587, "step": 10049 }, { "epoch": 0.7604706594529151, "grad_norm": 2.363548517227173, "learning_rate": 2.678381381158981e-06, "loss": 0.7465, "step": 10050 }, { "epoch": 0.7605463281752488, "grad_norm": 1.4986516237258911, "learning_rate": 2.67677849511067e-06, "loss": 0.5781, "step": 10051 }, { "epoch": 0.7606219968975824, "grad_norm": 2.2090630531311035, "learning_rate": 2.6751760050429415e-06, "loss": 0.7364, "step": 10052 }, { "epoch": 0.760697665619916, "grad_norm": 2.0763497352600098, "learning_rate": 2.673573911056148e-06, "loss": 0.9323, "step": 10053 }, { "epoch": 0.7607733343422496, "grad_norm": 1.6616204977035522, "learning_rate": 2.6719722132506225e-06, "loss": 0.6069, "step": 10054 }, { "epoch": 0.7608490030645833, "grad_norm": 2.090778112411499, "learning_rate": 2.67037091172666e-06, "loss": 0.676, "step": 10055 }, { "epoch": 0.7609246717869169, "grad_norm": 1.8353502750396729, "learning_rate": 2.6687700065845417e-06, "loss": 0.4939, "step": 10056 }, { "epoch": 0.7610003405092505, "grad_norm": 1.8233598470687866, "learning_rate": 2.667169497924528e-06, "loss": 0.5752, "step": 10057 }, { "epoch": 0.7610760092315841, "grad_norm": 1.976509928703308, "learning_rate": 2.6655693858468413e-06, "loss": 0.6282, "step": 10058 }, { "epoch": 0.7611516779539177, "grad_norm": 3.537311553955078, "learning_rate": 2.6639696704516876e-06, "loss": 0.7171, "step": 10059 }, { "epoch": 0.7612273466762514, "grad_norm": 2.408458709716797, "learning_rate": 2.6623703518392456e-06, "loss": 0.733, "step": 10060 }, { "epoch": 0.761303015398585, "grad_norm": 1.9443494081497192, "learning_rate": 2.6607714301096737e-06, "loss": 0.6078, "step": 10061 }, { "epoch": 0.7613786841209186, "grad_norm": 2.5981321334838867, "learning_rate": 2.659172905363094e-06, "loss": 0.7041, "step": 10062 }, { "epoch": 0.7614543528432522, "grad_norm": 2.7510721683502197, "learning_rate": 2.657574777699617e-06, "loss": 0.7332, "step": 10063 }, { "epoch": 0.7615300215655859, "grad_norm": 2.1146605014801025, "learning_rate": 2.6559770472193217e-06, "loss": 0.6687, "step": 10064 }, { "epoch": 0.7616056902879195, "grad_norm": 2.1599109172821045, "learning_rate": 2.654379714022266e-06, "loss": 0.6745, "step": 10065 }, { "epoch": 0.7616813590102531, "grad_norm": 2.144578695297241, "learning_rate": 2.6527827782084733e-06, "loss": 0.6877, "step": 10066 }, { "epoch": 0.7617570277325867, "grad_norm": 3.8134841918945312, "learning_rate": 2.6511862398779495e-06, "loss": 0.6743, "step": 10067 }, { "epoch": 0.7618326964549204, "grad_norm": 1.7415173053741455, "learning_rate": 2.6495900991306847e-06, "loss": 0.6796, "step": 10068 }, { "epoch": 0.761908365177254, "grad_norm": 2.2260866165161133, "learning_rate": 2.647994356066624e-06, "loss": 0.7098, "step": 10069 }, { "epoch": 0.7619840338995876, "grad_norm": 2.0269641876220703, "learning_rate": 2.6463990107857016e-06, "loss": 0.7222, "step": 10070 }, { "epoch": 0.7620597026219212, "grad_norm": 2.0193490982055664, "learning_rate": 2.6448040633878226e-06, "loss": 0.6655, "step": 10071 }, { "epoch": 0.7621353713442548, "grad_norm": 2.8648266792297363, "learning_rate": 2.6432095139728695e-06, "loss": 0.6558, "step": 10072 }, { "epoch": 0.7622110400665885, "grad_norm": 2.253610610961914, "learning_rate": 2.641615362640696e-06, "loss": 0.5851, "step": 10073 }, { "epoch": 0.7622867087889221, "grad_norm": 2.2244420051574707, "learning_rate": 2.6400216094911348e-06, "loss": 0.6313, "step": 10074 }, { "epoch": 0.7623623775112557, "grad_norm": 2.7857227325439453, "learning_rate": 2.638428254623993e-06, "loss": 0.7529, "step": 10075 }, { "epoch": 0.7624380462335894, "grad_norm": 1.7802857160568237, "learning_rate": 2.636835298139048e-06, "loss": 0.616, "step": 10076 }, { "epoch": 0.762513714955923, "grad_norm": 2.1480860710144043, "learning_rate": 2.635242740136054e-06, "loss": 0.6676, "step": 10077 }, { "epoch": 0.7625893836782566, "grad_norm": 1.7942516803741455, "learning_rate": 2.6336505807147486e-06, "loss": 0.6174, "step": 10078 }, { "epoch": 0.7626650524005902, "grad_norm": 1.9763191938400269, "learning_rate": 2.6320588199748383e-06, "loss": 0.7353, "step": 10079 }, { "epoch": 0.7627407211229238, "grad_norm": 1.8652597665786743, "learning_rate": 2.6304674580159983e-06, "loss": 0.7269, "step": 10080 }, { "epoch": 0.7628163898452575, "grad_norm": 1.8752639293670654, "learning_rate": 2.628876494937888e-06, "loss": 0.6001, "step": 10081 }, { "epoch": 0.7628920585675911, "grad_norm": 1.8885382413864136, "learning_rate": 2.6272859308401375e-06, "loss": 0.6663, "step": 10082 }, { "epoch": 0.7629677272899247, "grad_norm": 2.4886248111724854, "learning_rate": 2.6256957658223537e-06, "loss": 0.7086, "step": 10083 }, { "epoch": 0.7630433960122583, "grad_norm": 2.2221665382385254, "learning_rate": 2.6241059999841183e-06, "loss": 0.6899, "step": 10084 }, { "epoch": 0.7631190647345919, "grad_norm": 1.9292700290679932, "learning_rate": 2.6225166334249877e-06, "loss": 0.8171, "step": 10085 }, { "epoch": 0.7631947334569256, "grad_norm": 2.5671818256378174, "learning_rate": 2.620927666244496e-06, "loss": 0.6218, "step": 10086 }, { "epoch": 0.7632704021792592, "grad_norm": 2.0714309215545654, "learning_rate": 2.6193390985421403e-06, "loss": 0.6615, "step": 10087 }, { "epoch": 0.7633460709015928, "grad_norm": 1.8017868995666504, "learning_rate": 2.6177509304174105e-06, "loss": 0.6723, "step": 10088 }, { "epoch": 0.7634217396239265, "grad_norm": 1.793261170387268, "learning_rate": 2.616163161969762e-06, "loss": 0.5502, "step": 10089 }, { "epoch": 0.7634974083462601, "grad_norm": 2.2344236373901367, "learning_rate": 2.614575793298622e-06, "loss": 0.6795, "step": 10090 }, { "epoch": 0.7635730770685937, "grad_norm": 1.8927500247955322, "learning_rate": 2.612988824503399e-06, "loss": 0.8756, "step": 10091 }, { "epoch": 0.7636487457909273, "grad_norm": 3.597015142440796, "learning_rate": 2.6114022556834717e-06, "loss": 0.6986, "step": 10092 }, { "epoch": 0.7637244145132609, "grad_norm": 2.1370482444763184, "learning_rate": 2.6098160869382026e-06, "loss": 0.6671, "step": 10093 }, { "epoch": 0.7638000832355946, "grad_norm": 2.2916955947875977, "learning_rate": 2.6082303183669164e-06, "loss": 0.5868, "step": 10094 }, { "epoch": 0.7638757519579282, "grad_norm": 1.7020882368087769, "learning_rate": 2.606644950068921e-06, "loss": 0.5157, "step": 10095 }, { "epoch": 0.7639514206802618, "grad_norm": 1.9924485683441162, "learning_rate": 2.6050599821434985e-06, "loss": 0.6293, "step": 10096 }, { "epoch": 0.7640270894025954, "grad_norm": 2.115015983581543, "learning_rate": 2.603475414689905e-06, "loss": 0.7781, "step": 10097 }, { "epoch": 0.7641027581249291, "grad_norm": 2.8572585582733154, "learning_rate": 2.6018912478073657e-06, "loss": 0.5423, "step": 10098 }, { "epoch": 0.7641784268472627, "grad_norm": 1.8709588050842285, "learning_rate": 2.600307481595092e-06, "loss": 0.7225, "step": 10099 }, { "epoch": 0.7642540955695963, "grad_norm": 2.634243965148926, "learning_rate": 2.5987241161522665e-06, "loss": 0.837, "step": 10100 }, { "epoch": 0.7643297642919299, "grad_norm": 1.8500239849090576, "learning_rate": 2.597141151578038e-06, "loss": 0.6693, "step": 10101 }, { "epoch": 0.7644054330142636, "grad_norm": 1.8766469955444336, "learning_rate": 2.5955585879715396e-06, "loss": 0.6458, "step": 10102 }, { "epoch": 0.7644811017365972, "grad_norm": 2.2552802562713623, "learning_rate": 2.5939764254318767e-06, "loss": 0.7068, "step": 10103 }, { "epoch": 0.7645567704589308, "grad_norm": 2.3717575073242188, "learning_rate": 2.5923946640581307e-06, "loss": 0.685, "step": 10104 }, { "epoch": 0.7646324391812644, "grad_norm": 2.0326504707336426, "learning_rate": 2.590813303949355e-06, "loss": 0.8291, "step": 10105 }, { "epoch": 0.764708107903598, "grad_norm": 2.752199172973633, "learning_rate": 2.5892323452045797e-06, "loss": 0.7143, "step": 10106 }, { "epoch": 0.7647837766259317, "grad_norm": 2.53975510597229, "learning_rate": 2.5876517879228106e-06, "loss": 0.8028, "step": 10107 }, { "epoch": 0.7648594453482653, "grad_norm": 1.4467507600784302, "learning_rate": 2.5860716322030263e-06, "loss": 0.7469, "step": 10108 }, { "epoch": 0.7649351140705989, "grad_norm": 2.353743553161621, "learning_rate": 2.5844918781441815e-06, "loss": 0.5224, "step": 10109 }, { "epoch": 0.7650107827929326, "grad_norm": 2.355583906173706, "learning_rate": 2.582912525845205e-06, "loss": 0.7163, "step": 10110 }, { "epoch": 0.7650864515152662, "grad_norm": 1.8890095949172974, "learning_rate": 2.5813335754050047e-06, "loss": 0.5741, "step": 10111 }, { "epoch": 0.7651621202375998, "grad_norm": 2.1098287105560303, "learning_rate": 2.5797550269224544e-06, "loss": 0.6586, "step": 10112 }, { "epoch": 0.7652377889599334, "grad_norm": 2.207130193710327, "learning_rate": 2.5781768804964063e-06, "loss": 0.7282, "step": 10113 }, { "epoch": 0.765313457682267, "grad_norm": 2.3205008506774902, "learning_rate": 2.576599136225698e-06, "loss": 0.6441, "step": 10114 }, { "epoch": 0.7653891264046007, "grad_norm": 2.5144574642181396, "learning_rate": 2.5750217942091252e-06, "loss": 0.6996, "step": 10115 }, { "epoch": 0.7654647951269343, "grad_norm": 2.885598659515381, "learning_rate": 2.573444854545468e-06, "loss": 0.5839, "step": 10116 }, { "epoch": 0.7655404638492679, "grad_norm": 1.7929126024246216, "learning_rate": 2.571868317333481e-06, "loss": 0.4543, "step": 10117 }, { "epoch": 0.7656161325716015, "grad_norm": 2.23551082611084, "learning_rate": 2.5702921826718902e-06, "loss": 0.7682, "step": 10118 }, { "epoch": 0.7656918012939351, "grad_norm": 2.1573288440704346, "learning_rate": 2.5687164506593993e-06, "loss": 0.6006, "step": 10119 }, { "epoch": 0.7657674700162688, "grad_norm": 3.274181842803955, "learning_rate": 2.5671411213946864e-06, "loss": 0.5829, "step": 10120 }, { "epoch": 0.7658431387386024, "grad_norm": 2.04793381690979, "learning_rate": 2.565566194976402e-06, "loss": 0.5535, "step": 10121 }, { "epoch": 0.765918807460936, "grad_norm": 2.4527783393859863, "learning_rate": 2.5639916715031764e-06, "loss": 0.6092, "step": 10122 }, { "epoch": 0.7659944761832697, "grad_norm": 2.0146374702453613, "learning_rate": 2.5624175510736047e-06, "loss": 0.7187, "step": 10123 }, { "epoch": 0.7660701449056033, "grad_norm": 2.2973668575286865, "learning_rate": 2.5608438337862695e-06, "loss": 0.7676, "step": 10124 }, { "epoch": 0.7661458136279369, "grad_norm": 2.033846139907837, "learning_rate": 2.559270519739723e-06, "loss": 0.6969, "step": 10125 }, { "epoch": 0.7662214823502705, "grad_norm": 2.0946013927459717, "learning_rate": 2.5576976090324856e-06, "loss": 0.7496, "step": 10126 }, { "epoch": 0.7662971510726041, "grad_norm": 1.880980372428894, "learning_rate": 2.556125101763061e-06, "loss": 0.784, "step": 10127 }, { "epoch": 0.7663728197949378, "grad_norm": 1.8983718156814575, "learning_rate": 2.554552998029924e-06, "loss": 0.6489, "step": 10128 }, { "epoch": 0.7664484885172714, "grad_norm": 2.347182512283325, "learning_rate": 2.552981297931526e-06, "loss": 0.6194, "step": 10129 }, { "epoch": 0.766524157239605, "grad_norm": 3.0601274967193604, "learning_rate": 2.5514100015662915e-06, "loss": 0.5625, "step": 10130 }, { "epoch": 0.7665998259619387, "grad_norm": 2.182065725326538, "learning_rate": 2.5498391090326193e-06, "loss": 0.7296, "step": 10131 }, { "epoch": 0.7666754946842722, "grad_norm": 1.8279647827148438, "learning_rate": 2.5482686204288874e-06, "loss": 0.6273, "step": 10132 }, { "epoch": 0.7667511634066059, "grad_norm": 2.4019768238067627, "learning_rate": 2.5466985358534365e-06, "loss": 0.6006, "step": 10133 }, { "epoch": 0.7668268321289395, "grad_norm": 1.9945967197418213, "learning_rate": 2.5451288554045986e-06, "loss": 0.8876, "step": 10134 }, { "epoch": 0.7669025008512731, "grad_norm": 2.012547254562378, "learning_rate": 2.5435595791806693e-06, "loss": 0.6557, "step": 10135 }, { "epoch": 0.7669781695736068, "grad_norm": 2.253406047821045, "learning_rate": 2.541990707279925e-06, "loss": 0.599, "step": 10136 }, { "epoch": 0.7670538382959404, "grad_norm": 1.9846205711364746, "learning_rate": 2.5404222398006072e-06, "loss": 0.6009, "step": 10137 }, { "epoch": 0.767129507018274, "grad_norm": 2.01389741897583, "learning_rate": 2.538854176840941e-06, "loss": 0.6161, "step": 10138 }, { "epoch": 0.7672051757406076, "grad_norm": 1.8462287187576294, "learning_rate": 2.537286518499125e-06, "loss": 0.7132, "step": 10139 }, { "epoch": 0.7672808444629412, "grad_norm": 2.2837891578674316, "learning_rate": 2.5357192648733296e-06, "loss": 0.7349, "step": 10140 }, { "epoch": 0.7673565131852749, "grad_norm": 2.81986403465271, "learning_rate": 2.534152416061703e-06, "loss": 0.6632, "step": 10141 }, { "epoch": 0.7674321819076085, "grad_norm": 2.114987850189209, "learning_rate": 2.5325859721623636e-06, "loss": 0.6577, "step": 10142 }, { "epoch": 0.7675078506299421, "grad_norm": 2.2492518424987793, "learning_rate": 2.5310199332734123e-06, "loss": 0.7331, "step": 10143 }, { "epoch": 0.7675835193522758, "grad_norm": 2.2491164207458496, "learning_rate": 2.52945429949291e-06, "loss": 0.6299, "step": 10144 }, { "epoch": 0.7676591880746093, "grad_norm": 4.50223445892334, "learning_rate": 2.527889070918911e-06, "loss": 0.6815, "step": 10145 }, { "epoch": 0.767734856796943, "grad_norm": 1.7851481437683105, "learning_rate": 2.526324247649435e-06, "loss": 0.5633, "step": 10146 }, { "epoch": 0.7678105255192766, "grad_norm": 2.163814067840576, "learning_rate": 2.5247598297824694e-06, "loss": 0.6235, "step": 10147 }, { "epoch": 0.7678861942416102, "grad_norm": 2.0847737789154053, "learning_rate": 2.523195817415987e-06, "loss": 0.6388, "step": 10148 }, { "epoch": 0.7679618629639439, "grad_norm": 1.7596197128295898, "learning_rate": 2.5216322106479305e-06, "loss": 0.6007, "step": 10149 }, { "epoch": 0.7680375316862775, "grad_norm": 3.082669973373413, "learning_rate": 2.52006900957622e-06, "loss": 0.6749, "step": 10150 }, { "epoch": 0.7681132004086111, "grad_norm": 2.0572025775909424, "learning_rate": 2.518506214298745e-06, "loss": 0.737, "step": 10151 }, { "epoch": 0.7681888691309448, "grad_norm": 2.777792453765869, "learning_rate": 2.5169438249133753e-06, "loss": 0.7714, "step": 10152 }, { "epoch": 0.7682645378532783, "grad_norm": 2.290621280670166, "learning_rate": 2.515381841517952e-06, "loss": 0.5603, "step": 10153 }, { "epoch": 0.768340206575612, "grad_norm": 2.1354238986968994, "learning_rate": 2.5138202642102922e-06, "loss": 0.762, "step": 10154 }, { "epoch": 0.7684158752979456, "grad_norm": 1.9276994466781616, "learning_rate": 2.512259093088186e-06, "loss": 0.7416, "step": 10155 }, { "epoch": 0.7684915440202792, "grad_norm": 1.9895371198654175, "learning_rate": 2.5106983282493985e-06, "loss": 0.564, "step": 10156 }, { "epoch": 0.7685672127426129, "grad_norm": 3.0279078483581543, "learning_rate": 2.5091379697916745e-06, "loss": 0.6865, "step": 10157 }, { "epoch": 0.7686428814649464, "grad_norm": 1.848624348640442, "learning_rate": 2.5075780178127215e-06, "loss": 0.6344, "step": 10158 }, { "epoch": 0.7687185501872801, "grad_norm": 1.8406096696853638, "learning_rate": 2.506018472410229e-06, "loss": 0.6756, "step": 10159 }, { "epoch": 0.7687942189096137, "grad_norm": 9.052090644836426, "learning_rate": 2.5044593336818697e-06, "loss": 0.5753, "step": 10160 }, { "epoch": 0.7688698876319473, "grad_norm": 2.3040497303009033, "learning_rate": 2.502900601725274e-06, "loss": 0.5959, "step": 10161 }, { "epoch": 0.768945556354281, "grad_norm": 2.6110458374023438, "learning_rate": 2.501342276638056e-06, "loss": 0.8063, "step": 10162 }, { "epoch": 0.7690212250766146, "grad_norm": 2.4658761024475098, "learning_rate": 2.4997843585178035e-06, "loss": 0.6493, "step": 10163 }, { "epoch": 0.7690968937989482, "grad_norm": 2.334641456604004, "learning_rate": 2.4982268474620786e-06, "loss": 0.7169, "step": 10164 }, { "epoch": 0.7691725625212819, "grad_norm": 1.93704354763031, "learning_rate": 2.4966697435684195e-06, "loss": 0.6254, "step": 10165 }, { "epoch": 0.7692482312436154, "grad_norm": 2.5095372200012207, "learning_rate": 2.495113046934334e-06, "loss": 0.7474, "step": 10166 }, { "epoch": 0.7693238999659491, "grad_norm": 2.160459041595459, "learning_rate": 2.4935567576573085e-06, "loss": 0.7978, "step": 10167 }, { "epoch": 0.7693995686882827, "grad_norm": 2.0386502742767334, "learning_rate": 2.4920008758348072e-06, "loss": 0.696, "step": 10168 }, { "epoch": 0.7694752374106163, "grad_norm": 2.827305555343628, "learning_rate": 2.4904454015642546e-06, "loss": 0.6549, "step": 10169 }, { "epoch": 0.76955090613295, "grad_norm": 1.8038434982299805, "learning_rate": 2.4888903349430677e-06, "loss": 0.658, "step": 10170 }, { "epoch": 0.7696265748552835, "grad_norm": 3.055170774459839, "learning_rate": 2.48733567606863e-06, "loss": 0.657, "step": 10171 }, { "epoch": 0.7697022435776172, "grad_norm": 3.3407459259033203, "learning_rate": 2.485781425038294e-06, "loss": 0.6095, "step": 10172 }, { "epoch": 0.7697779122999509, "grad_norm": 2.0380611419677734, "learning_rate": 2.484227581949396e-06, "loss": 0.5941, "step": 10173 }, { "epoch": 0.7698535810222844, "grad_norm": 2.155287504196167, "learning_rate": 2.4826741468992407e-06, "loss": 0.7884, "step": 10174 }, { "epoch": 0.7699292497446181, "grad_norm": 2.009636163711548, "learning_rate": 2.4811211199851102e-06, "loss": 0.6486, "step": 10175 }, { "epoch": 0.7700049184669517, "grad_norm": 2.339906930923462, "learning_rate": 2.479568501304259e-06, "loss": 0.4735, "step": 10176 }, { "epoch": 0.7700805871892853, "grad_norm": 2.612977981567383, "learning_rate": 2.4780162909539178e-06, "loss": 0.7915, "step": 10177 }, { "epoch": 0.770156255911619, "grad_norm": 2.249410629272461, "learning_rate": 2.4764644890312947e-06, "loss": 0.6992, "step": 10178 }, { "epoch": 0.7702319246339525, "grad_norm": 1.8891581296920776, "learning_rate": 2.474913095633562e-06, "loss": 0.6526, "step": 10179 }, { "epoch": 0.7703075933562862, "grad_norm": 3.0311803817749023, "learning_rate": 2.473362110857873e-06, "loss": 0.7193, "step": 10180 }, { "epoch": 0.7703832620786198, "grad_norm": 2.6356663703918457, "learning_rate": 2.4718115348013604e-06, "loss": 0.6858, "step": 10181 }, { "epoch": 0.7704589308009534, "grad_norm": 2.00958514213562, "learning_rate": 2.4702613675611284e-06, "loss": 0.7983, "step": 10182 }, { "epoch": 0.7705345995232871, "grad_norm": 2.9431421756744385, "learning_rate": 2.468711609234246e-06, "loss": 0.6924, "step": 10183 }, { "epoch": 0.7706102682456206, "grad_norm": 1.6894335746765137, "learning_rate": 2.467162259917767e-06, "loss": 0.5111, "step": 10184 }, { "epoch": 0.7706859369679543, "grad_norm": 2.004911184310913, "learning_rate": 2.4656133197087166e-06, "loss": 0.6717, "step": 10185 }, { "epoch": 0.770761605690288, "grad_norm": 2.340867280960083, "learning_rate": 2.4640647887040957e-06, "loss": 0.8315, "step": 10186 }, { "epoch": 0.7708372744126215, "grad_norm": 2.400015115737915, "learning_rate": 2.4625166670008777e-06, "loss": 0.691, "step": 10187 }, { "epoch": 0.7709129431349552, "grad_norm": 2.0905392169952393, "learning_rate": 2.46096895469601e-06, "loss": 0.6846, "step": 10188 }, { "epoch": 0.7709886118572888, "grad_norm": 2.421705961227417, "learning_rate": 2.45942165188642e-06, "loss": 0.622, "step": 10189 }, { "epoch": 0.7710642805796224, "grad_norm": 2.1648590564727783, "learning_rate": 2.457874758668995e-06, "loss": 0.7249, "step": 10190 }, { "epoch": 0.7711399493019561, "grad_norm": 2.4879276752471924, "learning_rate": 2.4563282751406145e-06, "loss": 0.7729, "step": 10191 }, { "epoch": 0.7712156180242896, "grad_norm": 1.9964536428451538, "learning_rate": 2.4547822013981253e-06, "loss": 0.735, "step": 10192 }, { "epoch": 0.7712912867466233, "grad_norm": 2.6276025772094727, "learning_rate": 2.4532365375383423e-06, "loss": 0.6983, "step": 10193 }, { "epoch": 0.771366955468957, "grad_norm": 2.3542721271514893, "learning_rate": 2.451691283658061e-06, "loss": 0.751, "step": 10194 }, { "epoch": 0.7714426241912905, "grad_norm": 1.8065311908721924, "learning_rate": 2.4501464398540494e-06, "loss": 0.67, "step": 10195 }, { "epoch": 0.7715182929136242, "grad_norm": 1.8417807817459106, "learning_rate": 2.4486020062230577e-06, "loss": 0.6552, "step": 10196 }, { "epoch": 0.7715939616359577, "grad_norm": 2.1254916191101074, "learning_rate": 2.4470579828617955e-06, "loss": 0.6009, "step": 10197 }, { "epoch": 0.7716696303582914, "grad_norm": 2.3638763427734375, "learning_rate": 2.4455143698669573e-06, "loss": 0.6551, "step": 10198 }, { "epoch": 0.7717452990806251, "grad_norm": 2.0712759494781494, "learning_rate": 2.4439711673352094e-06, "loss": 0.6991, "step": 10199 }, { "epoch": 0.7718209678029586, "grad_norm": 1.8671433925628662, "learning_rate": 2.4424283753631906e-06, "loss": 0.7074, "step": 10200 }, { "epoch": 0.7718966365252923, "grad_norm": 2.3705897331237793, "learning_rate": 2.4408859940475177e-06, "loss": 0.6934, "step": 10201 }, { "epoch": 0.771972305247626, "grad_norm": 2.1461682319641113, "learning_rate": 2.4393440234847788e-06, "loss": 0.6653, "step": 10202 }, { "epoch": 0.7720479739699595, "grad_norm": 3.3979363441467285, "learning_rate": 2.4378024637715394e-06, "loss": 0.509, "step": 10203 }, { "epoch": 0.7721236426922932, "grad_norm": 1.9009852409362793, "learning_rate": 2.4362613150043307e-06, "loss": 0.657, "step": 10204 }, { "epoch": 0.7721993114146267, "grad_norm": 2.3193490505218506, "learning_rate": 2.4347205772796663e-06, "loss": 0.8041, "step": 10205 }, { "epoch": 0.7722749801369604, "grad_norm": 2.223050355911255, "learning_rate": 2.4331802506940397e-06, "loss": 0.6525, "step": 10206 }, { "epoch": 0.772350648859294, "grad_norm": 2.456544876098633, "learning_rate": 2.4316403353439026e-06, "loss": 0.6296, "step": 10207 }, { "epoch": 0.7724263175816276, "grad_norm": 2.209892511367798, "learning_rate": 2.430100831325692e-06, "loss": 0.6408, "step": 10208 }, { "epoch": 0.7725019863039613, "grad_norm": 2.1688029766082764, "learning_rate": 2.428561738735817e-06, "loss": 0.8956, "step": 10209 }, { "epoch": 0.7725776550262948, "grad_norm": 2.3047657012939453, "learning_rate": 2.4270230576706603e-06, "loss": 0.6141, "step": 10210 }, { "epoch": 0.7726533237486285, "grad_norm": 2.0687978267669678, "learning_rate": 2.42548478822658e-06, "loss": 0.5368, "step": 10211 }, { "epoch": 0.7727289924709622, "grad_norm": 3.177206039428711, "learning_rate": 2.4239469304999065e-06, "loss": 0.7155, "step": 10212 }, { "epoch": 0.7728046611932957, "grad_norm": 1.9328745603561401, "learning_rate": 2.4224094845869464e-06, "loss": 0.7287, "step": 10213 }, { "epoch": 0.7728803299156294, "grad_norm": 2.771430492401123, "learning_rate": 2.420872450583981e-06, "loss": 0.6553, "step": 10214 }, { "epoch": 0.772955998637963, "grad_norm": 2.3271167278289795, "learning_rate": 2.419335828587259e-06, "loss": 0.5808, "step": 10215 }, { "epoch": 0.7730316673602966, "grad_norm": 2.1030850410461426, "learning_rate": 2.4177996186930102e-06, "loss": 0.6993, "step": 10216 }, { "epoch": 0.7731073360826303, "grad_norm": 2.1259703636169434, "learning_rate": 2.4162638209974437e-06, "loss": 0.8063, "step": 10217 }, { "epoch": 0.7731830048049638, "grad_norm": 1.9268971681594849, "learning_rate": 2.414728435596728e-06, "loss": 0.7061, "step": 10218 }, { "epoch": 0.7732586735272975, "grad_norm": 2.1795198917388916, "learning_rate": 2.413193462587017e-06, "loss": 0.6964, "step": 10219 }, { "epoch": 0.7733343422496312, "grad_norm": 1.8316737413406372, "learning_rate": 2.4116589020644367e-06, "loss": 0.6009, "step": 10220 }, { "epoch": 0.7734100109719647, "grad_norm": 2.2390308380126953, "learning_rate": 2.4101247541250833e-06, "loss": 0.5806, "step": 10221 }, { "epoch": 0.7734856796942984, "grad_norm": 2.265646457672119, "learning_rate": 2.408591018865034e-06, "loss": 0.701, "step": 10222 }, { "epoch": 0.7735613484166319, "grad_norm": 1.9274543523788452, "learning_rate": 2.407057696380334e-06, "loss": 0.624, "step": 10223 }, { "epoch": 0.7736370171389656, "grad_norm": 1.9996583461761475, "learning_rate": 2.4055247867670044e-06, "loss": 0.663, "step": 10224 }, { "epoch": 0.7737126858612993, "grad_norm": 2.1675891876220703, "learning_rate": 2.4039922901210444e-06, "loss": 0.6858, "step": 10225 }, { "epoch": 0.7737883545836328, "grad_norm": 2.129465103149414, "learning_rate": 2.4024602065384162e-06, "loss": 0.598, "step": 10226 }, { "epoch": 0.7738640233059665, "grad_norm": 2.162490129470825, "learning_rate": 2.4009285361150723e-06, "loss": 0.7343, "step": 10227 }, { "epoch": 0.7739396920283002, "grad_norm": 2.1317222118377686, "learning_rate": 2.39939727894693e-06, "loss": 0.7082, "step": 10228 }, { "epoch": 0.7740153607506337, "grad_norm": 2.5465569496154785, "learning_rate": 2.3978664351298754e-06, "loss": 0.616, "step": 10229 }, { "epoch": 0.7740910294729674, "grad_norm": 2.2197041511535645, "learning_rate": 2.396336004759779e-06, "loss": 0.7131, "step": 10230 }, { "epoch": 0.7741666981953009, "grad_norm": 1.9501080513000488, "learning_rate": 2.39480598793248e-06, "loss": 0.6171, "step": 10231 }, { "epoch": 0.7742423669176346, "grad_norm": 1.9470248222351074, "learning_rate": 2.393276384743795e-06, "loss": 0.6346, "step": 10232 }, { "epoch": 0.7743180356399683, "grad_norm": 2.145937442779541, "learning_rate": 2.3917471952895117e-06, "loss": 0.6556, "step": 10233 }, { "epoch": 0.7743937043623018, "grad_norm": 1.949892520904541, "learning_rate": 2.3902184196653922e-06, "loss": 0.6536, "step": 10234 }, { "epoch": 0.7744693730846355, "grad_norm": 2.531683921813965, "learning_rate": 2.3886900579671765e-06, "loss": 0.6373, "step": 10235 }, { "epoch": 0.774545041806969, "grad_norm": 1.8003000020980835, "learning_rate": 2.3871621102905676e-06, "loss": 0.6487, "step": 10236 }, { "epoch": 0.7746207105293027, "grad_norm": 2.1588857173919678, "learning_rate": 2.385634576731258e-06, "loss": 0.7224, "step": 10237 }, { "epoch": 0.7746963792516364, "grad_norm": 1.7768288850784302, "learning_rate": 2.3841074573849058e-06, "loss": 0.6877, "step": 10238 }, { "epoch": 0.7747720479739699, "grad_norm": 2.2642805576324463, "learning_rate": 2.382580752347145e-06, "loss": 0.621, "step": 10239 }, { "epoch": 0.7748477166963036, "grad_norm": 1.7709468603134155, "learning_rate": 2.381054461713579e-06, "loss": 0.7062, "step": 10240 }, { "epoch": 0.7749233854186373, "grad_norm": 2.4417378902435303, "learning_rate": 2.3795285855797874e-06, "loss": 0.6432, "step": 10241 }, { "epoch": 0.7749990541409708, "grad_norm": 2.286708354949951, "learning_rate": 2.3780031240413338e-06, "loss": 0.6124, "step": 10242 }, { "epoch": 0.7750747228633045, "grad_norm": 2.1248910427093506, "learning_rate": 2.376478077193741e-06, "loss": 0.6614, "step": 10243 }, { "epoch": 0.775150391585638, "grad_norm": 3.0439376831054688, "learning_rate": 2.3749534451325134e-06, "loss": 0.8119, "step": 10244 }, { "epoch": 0.7752260603079717, "grad_norm": 2.391871213912964, "learning_rate": 2.37342922795313e-06, "loss": 0.6768, "step": 10245 }, { "epoch": 0.7753017290303054, "grad_norm": 2.5745506286621094, "learning_rate": 2.3719054257510398e-06, "loss": 0.7174, "step": 10246 }, { "epoch": 0.7753773977526389, "grad_norm": 2.4691545963287354, "learning_rate": 2.370382038621671e-06, "loss": 0.7401, "step": 10247 }, { "epoch": 0.7754530664749726, "grad_norm": 2.357771396636963, "learning_rate": 2.368859066660421e-06, "loss": 0.7197, "step": 10248 }, { "epoch": 0.7755287351973061, "grad_norm": 2.1640098094940186, "learning_rate": 2.3673365099626673e-06, "loss": 0.5828, "step": 10249 }, { "epoch": 0.7756044039196398, "grad_norm": 1.9653394222259521, "learning_rate": 2.365814368623751e-06, "loss": 0.6857, "step": 10250 }, { "epoch": 0.7756800726419735, "grad_norm": 2.2896316051483154, "learning_rate": 2.364292642738996e-06, "loss": 0.6718, "step": 10251 }, { "epoch": 0.775755741364307, "grad_norm": 2.249856948852539, "learning_rate": 2.3627713324036957e-06, "loss": 0.588, "step": 10252 }, { "epoch": 0.7758314100866407, "grad_norm": 1.9826165437698364, "learning_rate": 2.3612504377131283e-06, "loss": 0.6545, "step": 10253 }, { "epoch": 0.7759070788089744, "grad_norm": 3.986067295074463, "learning_rate": 2.359729958762527e-06, "loss": 0.7375, "step": 10254 }, { "epoch": 0.7759827475313079, "grad_norm": 2.1496474742889404, "learning_rate": 2.3582098956471134e-06, "loss": 0.7683, "step": 10255 }, { "epoch": 0.7760584162536416, "grad_norm": 2.206681966781616, "learning_rate": 2.3566902484620785e-06, "loss": 0.5398, "step": 10256 }, { "epoch": 0.7761340849759751, "grad_norm": 2.004941940307617, "learning_rate": 2.355171017302587e-06, "loss": 0.7467, "step": 10257 }, { "epoch": 0.7762097536983088, "grad_norm": 2.200032949447632, "learning_rate": 2.353652202263778e-06, "loss": 0.6155, "step": 10258 }, { "epoch": 0.7762854224206425, "grad_norm": 1.9743727445602417, "learning_rate": 2.352133803440765e-06, "loss": 0.5706, "step": 10259 }, { "epoch": 0.776361091142976, "grad_norm": 1.8890496492385864, "learning_rate": 2.350615820928639e-06, "loss": 0.578, "step": 10260 }, { "epoch": 0.7764367598653097, "grad_norm": 2.166748523712158, "learning_rate": 2.3490982548224532e-06, "loss": 0.7314, "step": 10261 }, { "epoch": 0.7765124285876432, "grad_norm": 2.333298683166504, "learning_rate": 2.3475811052172434e-06, "loss": 0.7265, "step": 10262 }, { "epoch": 0.7765880973099769, "grad_norm": 2.2891969680786133, "learning_rate": 2.3460643722080277e-06, "loss": 0.6929, "step": 10263 }, { "epoch": 0.7766637660323106, "grad_norm": 2.030637741088867, "learning_rate": 2.344548055889779e-06, "loss": 0.7014, "step": 10264 }, { "epoch": 0.7767394347546441, "grad_norm": 2.0292556285858154, "learning_rate": 2.3430321563574577e-06, "loss": 0.6334, "step": 10265 }, { "epoch": 0.7768151034769778, "grad_norm": 2.329683542251587, "learning_rate": 2.3415166737059937e-06, "loss": 0.7243, "step": 10266 }, { "epoch": 0.7768907721993115, "grad_norm": 2.4206478595733643, "learning_rate": 2.340001608030292e-06, "loss": 0.604, "step": 10267 }, { "epoch": 0.776966440921645, "grad_norm": 2.233008861541748, "learning_rate": 2.3384869594252304e-06, "loss": 0.7065, "step": 10268 }, { "epoch": 0.7770421096439787, "grad_norm": 1.844909906387329, "learning_rate": 2.336972727985662e-06, "loss": 0.7302, "step": 10269 }, { "epoch": 0.7771177783663122, "grad_norm": 1.7668637037277222, "learning_rate": 2.335458913806411e-06, "loss": 0.6437, "step": 10270 }, { "epoch": 0.7771934470886459, "grad_norm": 2.1801650524139404, "learning_rate": 2.3339455169822822e-06, "loss": 0.8086, "step": 10271 }, { "epoch": 0.7772691158109796, "grad_norm": 1.8977692127227783, "learning_rate": 2.33243253760804e-06, "loss": 0.6991, "step": 10272 }, { "epoch": 0.7773447845333131, "grad_norm": 2.6029887199401855, "learning_rate": 2.3309199757784408e-06, "loss": 0.6931, "step": 10273 }, { "epoch": 0.7774204532556468, "grad_norm": 2.1565604209899902, "learning_rate": 2.3294078315882057e-06, "loss": 0.66, "step": 10274 }, { "epoch": 0.7774961219779803, "grad_norm": 2.308840751647949, "learning_rate": 2.3278961051320257e-06, "loss": 0.6124, "step": 10275 }, { "epoch": 0.777571790700314, "grad_norm": 2.039461851119995, "learning_rate": 2.3263847965045705e-06, "loss": 0.5688, "step": 10276 }, { "epoch": 0.7776474594226477, "grad_norm": 2.1434340476989746, "learning_rate": 2.324873905800485e-06, "loss": 0.656, "step": 10277 }, { "epoch": 0.7777231281449812, "grad_norm": 1.743505597114563, "learning_rate": 2.323363433114385e-06, "loss": 0.6187, "step": 10278 }, { "epoch": 0.7777987968673149, "grad_norm": 2.4365437030792236, "learning_rate": 2.321853378540862e-06, "loss": 0.7503, "step": 10279 }, { "epoch": 0.7778744655896486, "grad_norm": 2.0435638427734375, "learning_rate": 2.3203437421744804e-06, "loss": 0.7011, "step": 10280 }, { "epoch": 0.7779501343119821, "grad_norm": 2.0592703819274902, "learning_rate": 2.318834524109781e-06, "loss": 0.6205, "step": 10281 }, { "epoch": 0.7780258030343158, "grad_norm": 2.7824881076812744, "learning_rate": 2.3173257244412673e-06, "loss": 0.5982, "step": 10282 }, { "epoch": 0.7781014717566493, "grad_norm": 2.0062973499298096, "learning_rate": 2.3158173432634347e-06, "loss": 0.6368, "step": 10283 }, { "epoch": 0.778177140478983, "grad_norm": 2.447920322418213, "learning_rate": 2.314309380670739e-06, "loss": 0.7854, "step": 10284 }, { "epoch": 0.7782528092013167, "grad_norm": 2.387455463409424, "learning_rate": 2.312801836757616e-06, "loss": 0.6015, "step": 10285 }, { "epoch": 0.7783284779236502, "grad_norm": 2.1444883346557617, "learning_rate": 2.3112947116184693e-06, "loss": 0.5855, "step": 10286 }, { "epoch": 0.7784041466459839, "grad_norm": 2.148451328277588, "learning_rate": 2.3097880053476777e-06, "loss": 0.6432, "step": 10287 }, { "epoch": 0.7784798153683175, "grad_norm": 2.473336696624756, "learning_rate": 2.308281718039607e-06, "loss": 0.6729, "step": 10288 }, { "epoch": 0.7785554840906511, "grad_norm": 2.9266908168792725, "learning_rate": 2.306775849788575e-06, "loss": 0.5335, "step": 10289 }, { "epoch": 0.7786311528129848, "grad_norm": 2.390139102935791, "learning_rate": 2.3052704006888876e-06, "loss": 0.6986, "step": 10290 }, { "epoch": 0.7787068215353183, "grad_norm": 2.233603000640869, "learning_rate": 2.3037653708348215e-06, "loss": 0.6058, "step": 10291 }, { "epoch": 0.778782490257652, "grad_norm": 2.33750057220459, "learning_rate": 2.302260760320629e-06, "loss": 0.769, "step": 10292 }, { "epoch": 0.7788581589799857, "grad_norm": 2.0690042972564697, "learning_rate": 2.3007565692405256e-06, "loss": 0.6749, "step": 10293 }, { "epoch": 0.7789338277023192, "grad_norm": 2.1784255504608154, "learning_rate": 2.2992527976887156e-06, "loss": 0.5672, "step": 10294 }, { "epoch": 0.7790094964246529, "grad_norm": 1.8492722511291504, "learning_rate": 2.2977494457593715e-06, "loss": 0.7427, "step": 10295 }, { "epoch": 0.7790851651469864, "grad_norm": 2.1456425189971924, "learning_rate": 2.2962465135466325e-06, "loss": 0.6621, "step": 10296 }, { "epoch": 0.7791608338693201, "grad_norm": 2.1436009407043457, "learning_rate": 2.294744001144619e-06, "loss": 0.7521, "step": 10297 }, { "epoch": 0.7792365025916538, "grad_norm": 2.698065996170044, "learning_rate": 2.2932419086474206e-06, "loss": 0.7116, "step": 10298 }, { "epoch": 0.7793121713139873, "grad_norm": 2.1838340759277344, "learning_rate": 2.291740236149112e-06, "loss": 0.6111, "step": 10299 }, { "epoch": 0.779387840036321, "grad_norm": 2.1678380966186523, "learning_rate": 2.290238983743724e-06, "loss": 0.5987, "step": 10300 }, { "epoch": 0.7794635087586546, "grad_norm": 2.3915209770202637, "learning_rate": 2.288738151525273e-06, "loss": 0.5449, "step": 10301 }, { "epoch": 0.7795391774809882, "grad_norm": 2.2159979343414307, "learning_rate": 2.2872377395877457e-06, "loss": 0.6592, "step": 10302 }, { "epoch": 0.7796148462033219, "grad_norm": 2.2043135166168213, "learning_rate": 2.285737748025103e-06, "loss": 0.671, "step": 10303 }, { "epoch": 0.7796905149256554, "grad_norm": 2.210493564605713, "learning_rate": 2.2842381769312798e-06, "loss": 0.589, "step": 10304 }, { "epoch": 0.7797661836479891, "grad_norm": 2.3865721225738525, "learning_rate": 2.282739026400182e-06, "loss": 0.6478, "step": 10305 }, { "epoch": 0.7798418523703228, "grad_norm": 1.9948359727859497, "learning_rate": 2.2812402965256957e-06, "loss": 0.7697, "step": 10306 }, { "epoch": 0.7799175210926563, "grad_norm": 3.1462361812591553, "learning_rate": 2.27974198740167e-06, "loss": 0.6498, "step": 10307 }, { "epoch": 0.77999318981499, "grad_norm": 1.8765848875045776, "learning_rate": 2.278244099121936e-06, "loss": 0.6286, "step": 10308 }, { "epoch": 0.7800688585373236, "grad_norm": 4.639185905456543, "learning_rate": 2.276746631780301e-06, "loss": 0.5139, "step": 10309 }, { "epoch": 0.7801445272596572, "grad_norm": 2.111081838607788, "learning_rate": 2.2752495854705357e-06, "loss": 0.6906, "step": 10310 }, { "epoch": 0.7802201959819909, "grad_norm": 6.2167744636535645, "learning_rate": 2.2737529602863918e-06, "loss": 0.8498, "step": 10311 }, { "epoch": 0.7802958647043244, "grad_norm": 2.314579486846924, "learning_rate": 2.2722567563215922e-06, "loss": 0.745, "step": 10312 }, { "epoch": 0.7803715334266581, "grad_norm": 1.908983826637268, "learning_rate": 2.270760973669836e-06, "loss": 0.6662, "step": 10313 }, { "epoch": 0.7804472021489917, "grad_norm": 1.937185287475586, "learning_rate": 2.269265612424791e-06, "loss": 0.5662, "step": 10314 }, { "epoch": 0.7805228708713253, "grad_norm": 2.0278525352478027, "learning_rate": 2.2677706726801044e-06, "loss": 0.8562, "step": 10315 }, { "epoch": 0.780598539593659, "grad_norm": 2.70788311958313, "learning_rate": 2.266276154529393e-06, "loss": 0.8197, "step": 10316 }, { "epoch": 0.7806742083159925, "grad_norm": 1.852952480316162, "learning_rate": 2.2647820580662505e-06, "loss": 0.7382, "step": 10317 }, { "epoch": 0.7807498770383262, "grad_norm": 2.082524299621582, "learning_rate": 2.263288383384234e-06, "loss": 0.6123, "step": 10318 }, { "epoch": 0.7808255457606599, "grad_norm": 2.4691317081451416, "learning_rate": 2.2617951305768917e-06, "loss": 0.7913, "step": 10319 }, { "epoch": 0.7809012144829934, "grad_norm": 2.056469678878784, "learning_rate": 2.2603022997377337e-06, "loss": 0.7534, "step": 10320 }, { "epoch": 0.7809768832053271, "grad_norm": 2.364788293838501, "learning_rate": 2.2588098909602435e-06, "loss": 0.6309, "step": 10321 }, { "epoch": 0.7810525519276607, "grad_norm": 2.2668612003326416, "learning_rate": 2.2573179043378803e-06, "loss": 0.7426, "step": 10322 }, { "epoch": 0.7811282206499943, "grad_norm": 2.3786261081695557, "learning_rate": 2.255826339964079e-06, "loss": 0.6801, "step": 10323 }, { "epoch": 0.781203889372328, "grad_norm": 2.557690382003784, "learning_rate": 2.254335197932246e-06, "loss": 0.5807, "step": 10324 }, { "epoch": 0.7812795580946615, "grad_norm": 2.1861777305603027, "learning_rate": 2.25284447833576e-06, "loss": 0.6966, "step": 10325 }, { "epoch": 0.7813552268169952, "grad_norm": 2.943876028060913, "learning_rate": 2.251354181267977e-06, "loss": 0.6657, "step": 10326 }, { "epoch": 0.7814308955393289, "grad_norm": 1.9715664386749268, "learning_rate": 2.249864306822222e-06, "loss": 0.7396, "step": 10327 }, { "epoch": 0.7815065642616624, "grad_norm": 1.953696608543396, "learning_rate": 2.248374855091797e-06, "loss": 0.52, "step": 10328 }, { "epoch": 0.7815822329839961, "grad_norm": 2.13394832611084, "learning_rate": 2.246885826169975e-06, "loss": 0.6673, "step": 10329 }, { "epoch": 0.7816579017063296, "grad_norm": 1.9963359832763672, "learning_rate": 2.2453972201500055e-06, "loss": 0.6992, "step": 10330 }, { "epoch": 0.7817335704286633, "grad_norm": 2.229747772216797, "learning_rate": 2.243909037125112e-06, "loss": 0.8009, "step": 10331 }, { "epoch": 0.781809239150997, "grad_norm": 1.8210551738739014, "learning_rate": 2.2424212771884842e-06, "loss": 0.7221, "step": 10332 }, { "epoch": 0.7818849078733305, "grad_norm": 2.274820566177368, "learning_rate": 2.2409339404332924e-06, "loss": 0.8382, "step": 10333 }, { "epoch": 0.7819605765956642, "grad_norm": 2.1577939987182617, "learning_rate": 2.2394470269526785e-06, "loss": 0.7821, "step": 10334 }, { "epoch": 0.7820362453179978, "grad_norm": 1.9666999578475952, "learning_rate": 2.2379605368397578e-06, "loss": 0.7119, "step": 10335 }, { "epoch": 0.7821119140403314, "grad_norm": 2.317742109298706, "learning_rate": 2.2364744701876195e-06, "loss": 0.5406, "step": 10336 }, { "epoch": 0.7821875827626651, "grad_norm": 2.2449235916137695, "learning_rate": 2.234988827089326e-06, "loss": 0.6204, "step": 10337 }, { "epoch": 0.7822632514849986, "grad_norm": 2.313387632369995, "learning_rate": 2.2335036076379153e-06, "loss": 0.7333, "step": 10338 }, { "epoch": 0.7823389202073323, "grad_norm": 2.1615755558013916, "learning_rate": 2.2320188119263895e-06, "loss": 0.7058, "step": 10339 }, { "epoch": 0.782414588929666, "grad_norm": 2.0965301990509033, "learning_rate": 2.230534440047738e-06, "loss": 0.5865, "step": 10340 }, { "epoch": 0.7824902576519995, "grad_norm": 2.1409988403320312, "learning_rate": 2.2290504920949155e-06, "loss": 0.643, "step": 10341 }, { "epoch": 0.7825659263743332, "grad_norm": 2.249452590942383, "learning_rate": 2.2275669681608534e-06, "loss": 0.6476, "step": 10342 }, { "epoch": 0.7826415950966668, "grad_norm": 2.122205972671509, "learning_rate": 2.22608386833845e-06, "loss": 0.7286, "step": 10343 }, { "epoch": 0.7827172638190004, "grad_norm": 2.537824869155884, "learning_rate": 2.224601192720581e-06, "loss": 0.7216, "step": 10344 }, { "epoch": 0.7827929325413341, "grad_norm": 2.4190261363983154, "learning_rate": 2.2231189414001053e-06, "loss": 0.544, "step": 10345 }, { "epoch": 0.7828686012636676, "grad_norm": 2.434654712677002, "learning_rate": 2.221637114469837e-06, "loss": 0.7063, "step": 10346 }, { "epoch": 0.7829442699860013, "grad_norm": 2.0389528274536133, "learning_rate": 2.2201557120225783e-06, "loss": 0.5486, "step": 10347 }, { "epoch": 0.7830199387083349, "grad_norm": 2.8061161041259766, "learning_rate": 2.2186747341510968e-06, "loss": 0.6029, "step": 10348 }, { "epoch": 0.7830956074306685, "grad_norm": 1.9045759439468384, "learning_rate": 2.2171941809481367e-06, "loss": 0.7342, "step": 10349 }, { "epoch": 0.7831712761530022, "grad_norm": 2.0165064334869385, "learning_rate": 2.2157140525064155e-06, "loss": 0.7228, "step": 10350 }, { "epoch": 0.7832469448753357, "grad_norm": 2.1648142337799072, "learning_rate": 2.214234348918623e-06, "loss": 0.7189, "step": 10351 }, { "epoch": 0.7833226135976694, "grad_norm": 1.7740191221237183, "learning_rate": 2.2127550702774267e-06, "loss": 0.6009, "step": 10352 }, { "epoch": 0.7833982823200031, "grad_norm": 2.149911642074585, "learning_rate": 2.2112762166754567e-06, "loss": 0.7393, "step": 10353 }, { "epoch": 0.7834739510423366, "grad_norm": 2.2649195194244385, "learning_rate": 2.209797788205326e-06, "loss": 0.657, "step": 10354 }, { "epoch": 0.7835496197646703, "grad_norm": 2.4505796432495117, "learning_rate": 2.208319784959622e-06, "loss": 0.569, "step": 10355 }, { "epoch": 0.7836252884870039, "grad_norm": 2.0467987060546875, "learning_rate": 2.2068422070309032e-06, "loss": 0.8597, "step": 10356 }, { "epoch": 0.7837009572093375, "grad_norm": 2.103842258453369, "learning_rate": 2.2053650545116936e-06, "loss": 0.6934, "step": 10357 }, { "epoch": 0.7837766259316712, "grad_norm": 1.9594662189483643, "learning_rate": 2.2038883274945015e-06, "loss": 0.6439, "step": 10358 }, { "epoch": 0.7838522946540047, "grad_norm": 2.5224552154541016, "learning_rate": 2.2024120260718035e-06, "loss": 0.6937, "step": 10359 }, { "epoch": 0.7839279633763384, "grad_norm": 1.9823088645935059, "learning_rate": 2.2009361503360506e-06, "loss": 0.5863, "step": 10360 }, { "epoch": 0.784003632098672, "grad_norm": 2.8926243782043457, "learning_rate": 2.199460700379666e-06, "loss": 0.6975, "step": 10361 }, { "epoch": 0.7840793008210056, "grad_norm": 2.1695239543914795, "learning_rate": 2.1979856762950488e-06, "loss": 0.5354, "step": 10362 }, { "epoch": 0.7841549695433393, "grad_norm": 4.557194709777832, "learning_rate": 2.196511078174571e-06, "loss": 0.7417, "step": 10363 }, { "epoch": 0.7842306382656729, "grad_norm": 2.158311605453491, "learning_rate": 2.19503690611057e-06, "loss": 0.6356, "step": 10364 }, { "epoch": 0.7843063069880065, "grad_norm": 2.076812505722046, "learning_rate": 2.1935631601953705e-06, "loss": 0.6391, "step": 10365 }, { "epoch": 0.7843819757103402, "grad_norm": 2.2256968021392822, "learning_rate": 2.192089840521263e-06, "loss": 0.5693, "step": 10366 }, { "epoch": 0.7844576444326737, "grad_norm": 3.5892693996429443, "learning_rate": 2.1906169471805065e-06, "loss": 0.6821, "step": 10367 }, { "epoch": 0.7845333131550074, "grad_norm": 2.188708543777466, "learning_rate": 2.1891444802653406e-06, "loss": 0.6475, "step": 10368 }, { "epoch": 0.784608981877341, "grad_norm": 2.307600975036621, "learning_rate": 2.187672439867977e-06, "loss": 0.7261, "step": 10369 }, { "epoch": 0.7846846505996746, "grad_norm": 1.9995859861373901, "learning_rate": 2.1862008260805987e-06, "loss": 0.6297, "step": 10370 }, { "epoch": 0.7847603193220083, "grad_norm": 2.0694448947906494, "learning_rate": 2.184729638995363e-06, "loss": 0.6715, "step": 10371 }, { "epoch": 0.7848359880443418, "grad_norm": 2.0987765789031982, "learning_rate": 2.1832588787044003e-06, "loss": 0.6215, "step": 10372 }, { "epoch": 0.7849116567666755, "grad_norm": 2.225297689437866, "learning_rate": 2.1817885452998156e-06, "loss": 0.5915, "step": 10373 }, { "epoch": 0.7849873254890091, "grad_norm": 3.953972101211548, "learning_rate": 2.1803186388736867e-06, "loss": 0.789, "step": 10374 }, { "epoch": 0.7850629942113427, "grad_norm": 2.2498698234558105, "learning_rate": 2.1788491595180567e-06, "loss": 0.6853, "step": 10375 }, { "epoch": 0.7851386629336764, "grad_norm": 2.0867936611175537, "learning_rate": 2.177380107324958e-06, "loss": 0.7266, "step": 10376 }, { "epoch": 0.78521433165601, "grad_norm": 2.7368338108062744, "learning_rate": 2.175911482386386e-06, "loss": 0.6268, "step": 10377 }, { "epoch": 0.7852900003783436, "grad_norm": 2.203387975692749, "learning_rate": 2.174443284794307e-06, "loss": 0.7792, "step": 10378 }, { "epoch": 0.7853656691006773, "grad_norm": 1.9313303232192993, "learning_rate": 2.1729755146406653e-06, "loss": 0.7007, "step": 10379 }, { "epoch": 0.7854413378230108, "grad_norm": 2.8301777839660645, "learning_rate": 2.171508172017378e-06, "loss": 0.7817, "step": 10380 }, { "epoch": 0.7855170065453445, "grad_norm": 2.140004873275757, "learning_rate": 2.170041257016336e-06, "loss": 0.7564, "step": 10381 }, { "epoch": 0.7855926752676781, "grad_norm": 9.439233779907227, "learning_rate": 2.1685747697294005e-06, "loss": 0.6332, "step": 10382 }, { "epoch": 0.7856683439900117, "grad_norm": 3.0512073040008545, "learning_rate": 2.167108710248408e-06, "loss": 0.6822, "step": 10383 }, { "epoch": 0.7857440127123454, "grad_norm": 2.452768564224243, "learning_rate": 2.165643078665172e-06, "loss": 0.6007, "step": 10384 }, { "epoch": 0.785819681434679, "grad_norm": 2.016571044921875, "learning_rate": 2.1641778750714653e-06, "loss": 0.6412, "step": 10385 }, { "epoch": 0.7858953501570126, "grad_norm": 2.0821337699890137, "learning_rate": 2.162713099559053e-06, "loss": 0.6281, "step": 10386 }, { "epoch": 0.7859710188793462, "grad_norm": 1.9462223052978516, "learning_rate": 2.16124875221966e-06, "loss": 0.6654, "step": 10387 }, { "epoch": 0.7860466876016798, "grad_norm": 1.9950424432754517, "learning_rate": 2.1597848331449925e-06, "loss": 0.6193, "step": 10388 }, { "epoch": 0.7861223563240135, "grad_norm": 2.3755111694335938, "learning_rate": 2.1583213424267207e-06, "loss": 0.6631, "step": 10389 }, { "epoch": 0.7861980250463471, "grad_norm": 2.3503427505493164, "learning_rate": 2.1568582801564918e-06, "loss": 0.7469, "step": 10390 }, { "epoch": 0.7862736937686807, "grad_norm": 2.1532740592956543, "learning_rate": 2.1553956464259367e-06, "loss": 0.675, "step": 10391 }, { "epoch": 0.7863493624910144, "grad_norm": 3.5984179973602295, "learning_rate": 2.153933441326641e-06, "loss": 0.5442, "step": 10392 }, { "epoch": 0.786425031213348, "grad_norm": 1.983961820602417, "learning_rate": 2.1524716649501764e-06, "loss": 0.6268, "step": 10393 }, { "epoch": 0.7865006999356816, "grad_norm": 2.3665051460266113, "learning_rate": 2.151010317388083e-06, "loss": 0.6089, "step": 10394 }, { "epoch": 0.7865763686580152, "grad_norm": 3.3090527057647705, "learning_rate": 2.1495493987318773e-06, "loss": 0.7426, "step": 10395 }, { "epoch": 0.7866520373803488, "grad_norm": 2.405433416366577, "learning_rate": 2.148088909073044e-06, "loss": 0.7821, "step": 10396 }, { "epoch": 0.7867277061026825, "grad_norm": 2.0059757232666016, "learning_rate": 2.1466288485030456e-06, "loss": 0.6236, "step": 10397 }, { "epoch": 0.786803374825016, "grad_norm": 2.419422149658203, "learning_rate": 2.145169217113317e-06, "loss": 0.7572, "step": 10398 }, { "epoch": 0.7868790435473497, "grad_norm": 3.1536026000976562, "learning_rate": 2.143710014995261e-06, "loss": 0.6652, "step": 10399 }, { "epoch": 0.7869547122696833, "grad_norm": 2.279188394546509, "learning_rate": 2.142251242240258e-06, "loss": 0.6332, "step": 10400 }, { "epoch": 0.7870303809920169, "grad_norm": 2.087526321411133, "learning_rate": 2.1407928989396655e-06, "loss": 0.5919, "step": 10401 }, { "epoch": 0.7871060497143506, "grad_norm": 3.789907693862915, "learning_rate": 2.1393349851848084e-06, "loss": 0.6605, "step": 10402 }, { "epoch": 0.7871817184366842, "grad_norm": 2.147244930267334, "learning_rate": 2.1378775010669824e-06, "loss": 0.7815, "step": 10403 }, { "epoch": 0.7872573871590178, "grad_norm": 3.6964704990386963, "learning_rate": 2.1364204466774623e-06, "loss": 0.4579, "step": 10404 }, { "epoch": 0.7873330558813515, "grad_norm": 2.3750951290130615, "learning_rate": 2.134963822107494e-06, "loss": 0.6549, "step": 10405 }, { "epoch": 0.787408724603685, "grad_norm": 2.594712972640991, "learning_rate": 2.1335076274482954e-06, "loss": 0.6412, "step": 10406 }, { "epoch": 0.7874843933260187, "grad_norm": 2.0209412574768066, "learning_rate": 2.132051862791057e-06, "loss": 0.7044, "step": 10407 }, { "epoch": 0.7875600620483523, "grad_norm": 2.0428459644317627, "learning_rate": 2.130596528226945e-06, "loss": 0.6416, "step": 10408 }, { "epoch": 0.7876357307706859, "grad_norm": 2.461974859237671, "learning_rate": 2.1291416238470994e-06, "loss": 0.7138, "step": 10409 }, { "epoch": 0.7877113994930196, "grad_norm": 1.9830410480499268, "learning_rate": 2.127687149742626e-06, "loss": 0.6454, "step": 10410 }, { "epoch": 0.7877870682153532, "grad_norm": 2.3147575855255127, "learning_rate": 2.126233106004608e-06, "loss": 0.7328, "step": 10411 }, { "epoch": 0.7878627369376868, "grad_norm": 2.058706283569336, "learning_rate": 2.124779492724111e-06, "loss": 0.6221, "step": 10412 }, { "epoch": 0.7879384056600204, "grad_norm": 2.767449140548706, "learning_rate": 2.1233263099921565e-06, "loss": 0.6106, "step": 10413 }, { "epoch": 0.788014074382354, "grad_norm": 2.3518853187561035, "learning_rate": 2.12187355789975e-06, "loss": 0.7009, "step": 10414 }, { "epoch": 0.7880897431046877, "grad_norm": 1.9216002225875854, "learning_rate": 2.1204212365378685e-06, "loss": 0.7082, "step": 10415 }, { "epoch": 0.7881654118270213, "grad_norm": 2.5170297622680664, "learning_rate": 2.1189693459974597e-06, "loss": 0.6601, "step": 10416 }, { "epoch": 0.7882410805493549, "grad_norm": 3.180408000946045, "learning_rate": 2.117517886369447e-06, "loss": 0.8731, "step": 10417 }, { "epoch": 0.7883167492716886, "grad_norm": 1.9902843236923218, "learning_rate": 2.116066857744725e-06, "loss": 0.6012, "step": 10418 }, { "epoch": 0.7883924179940222, "grad_norm": 2.8387811183929443, "learning_rate": 2.1146162602141614e-06, "loss": 0.6855, "step": 10419 }, { "epoch": 0.7884680867163558, "grad_norm": 2.100433349609375, "learning_rate": 2.1131660938685998e-06, "loss": 0.6094, "step": 10420 }, { "epoch": 0.7885437554386894, "grad_norm": 2.3717706203460693, "learning_rate": 2.1117163587988477e-06, "loss": 0.6863, "step": 10421 }, { "epoch": 0.788619424161023, "grad_norm": 1.9035117626190186, "learning_rate": 2.1102670550956986e-06, "loss": 0.645, "step": 10422 }, { "epoch": 0.7886950928833567, "grad_norm": 2.151751756668091, "learning_rate": 2.108818182849914e-06, "loss": 0.7161, "step": 10423 }, { "epoch": 0.7887707616056903, "grad_norm": 2.054111957550049, "learning_rate": 2.10736974215222e-06, "loss": 0.6702, "step": 10424 }, { "epoch": 0.7888464303280239, "grad_norm": 2.213700532913208, "learning_rate": 2.1059217330933273e-06, "loss": 0.5848, "step": 10425 }, { "epoch": 0.7889220990503575, "grad_norm": 1.8522429466247559, "learning_rate": 2.104474155763913e-06, "loss": 0.6449, "step": 10426 }, { "epoch": 0.7889977677726911, "grad_norm": 2.314577341079712, "learning_rate": 2.1030270102546303e-06, "loss": 0.7704, "step": 10427 }, { "epoch": 0.7890734364950248, "grad_norm": 2.8026366233825684, "learning_rate": 2.1015802966561037e-06, "loss": 0.5235, "step": 10428 }, { "epoch": 0.7891491052173584, "grad_norm": 2.0576069355010986, "learning_rate": 2.100134015058931e-06, "loss": 0.6654, "step": 10429 }, { "epoch": 0.789224773939692, "grad_norm": 2.624588966369629, "learning_rate": 2.098688165553683e-06, "loss": 0.8377, "step": 10430 }, { "epoch": 0.7893004426620257, "grad_norm": 1.8975677490234375, "learning_rate": 2.0972427482309034e-06, "loss": 0.8135, "step": 10431 }, { "epoch": 0.7893761113843593, "grad_norm": 2.2255985736846924, "learning_rate": 2.09579776318111e-06, "loss": 0.7366, "step": 10432 }, { "epoch": 0.7894517801066929, "grad_norm": 2.165253162384033, "learning_rate": 2.0943532104947906e-06, "loss": 0.6496, "step": 10433 }, { "epoch": 0.7895274488290265, "grad_norm": 2.220125436782837, "learning_rate": 2.0929090902624117e-06, "loss": 0.697, "step": 10434 }, { "epoch": 0.7896031175513601, "grad_norm": 1.6284946203231812, "learning_rate": 2.0914654025744034e-06, "loss": 0.589, "step": 10435 }, { "epoch": 0.7896787862736938, "grad_norm": 1.925709843635559, "learning_rate": 2.090022147521174e-06, "loss": 0.6547, "step": 10436 }, { "epoch": 0.7897544549960274, "grad_norm": 2.4849398136138916, "learning_rate": 2.088579325193112e-06, "loss": 0.5531, "step": 10437 }, { "epoch": 0.789830123718361, "grad_norm": 1.8222843408584595, "learning_rate": 2.0871369356805653e-06, "loss": 0.6774, "step": 10438 }, { "epoch": 0.7899057924406946, "grad_norm": 2.412240505218506, "learning_rate": 2.085694979073861e-06, "loss": 0.7183, "step": 10439 }, { "epoch": 0.7899814611630283, "grad_norm": 2.244049549102783, "learning_rate": 2.084253455463302e-06, "loss": 0.6786, "step": 10440 }, { "epoch": 0.7900571298853619, "grad_norm": 3.5137126445770264, "learning_rate": 2.0828123649391594e-06, "loss": 0.6426, "step": 10441 }, { "epoch": 0.7901327986076955, "grad_norm": 2.3122079372406006, "learning_rate": 2.0813717075916797e-06, "loss": 0.6122, "step": 10442 }, { "epoch": 0.7902084673300291, "grad_norm": 2.5689215660095215, "learning_rate": 2.0799314835110808e-06, "loss": 0.7128, "step": 10443 }, { "epoch": 0.7902841360523628, "grad_norm": 1.6637498140335083, "learning_rate": 2.0784916927875547e-06, "loss": 0.6082, "step": 10444 }, { "epoch": 0.7903598047746964, "grad_norm": 2.4732978343963623, "learning_rate": 2.0770523355112686e-06, "loss": 0.7996, "step": 10445 }, { "epoch": 0.79043547349703, "grad_norm": 1.848886251449585, "learning_rate": 2.075613411772353e-06, "loss": 0.6386, "step": 10446 }, { "epoch": 0.7905111422193636, "grad_norm": 2.2179338932037354, "learning_rate": 2.074174921660921e-06, "loss": 0.7844, "step": 10447 }, { "epoch": 0.7905868109416972, "grad_norm": 2.1151363849639893, "learning_rate": 2.0727368652670605e-06, "loss": 0.6207, "step": 10448 }, { "epoch": 0.7906624796640309, "grad_norm": 1.6570191383361816, "learning_rate": 2.07129924268082e-06, "loss": 0.5132, "step": 10449 }, { "epoch": 0.7907381483863645, "grad_norm": 2.8186240196228027, "learning_rate": 2.069862053992231e-06, "loss": 0.7471, "step": 10450 }, { "epoch": 0.7908138171086981, "grad_norm": 2.399502992630005, "learning_rate": 2.0684252992912963e-06, "loss": 0.7079, "step": 10451 }, { "epoch": 0.7908894858310317, "grad_norm": 2.391279935836792, "learning_rate": 2.0669889786679883e-06, "loss": 0.7076, "step": 10452 }, { "epoch": 0.7909651545533654, "grad_norm": 2.1151928901672363, "learning_rate": 2.065553092212254e-06, "loss": 0.6934, "step": 10453 }, { "epoch": 0.791040823275699, "grad_norm": 1.9356576204299927, "learning_rate": 2.0641176400140136e-06, "loss": 0.5707, "step": 10454 }, { "epoch": 0.7911164919980326, "grad_norm": 2.005326986312866, "learning_rate": 2.0626826221631627e-06, "loss": 0.7772, "step": 10455 }, { "epoch": 0.7911921607203662, "grad_norm": 2.1144111156463623, "learning_rate": 2.0612480387495613e-06, "loss": 0.6407, "step": 10456 }, { "epoch": 0.7912678294426999, "grad_norm": 1.8899532556533813, "learning_rate": 2.0598138898630487e-06, "loss": 0.7053, "step": 10457 }, { "epoch": 0.7913434981650335, "grad_norm": 2.0663957595825195, "learning_rate": 2.0583801755934396e-06, "loss": 0.5835, "step": 10458 }, { "epoch": 0.7914191668873671, "grad_norm": 2.444054126739502, "learning_rate": 2.0569468960305178e-06, "loss": 0.6798, "step": 10459 }, { "epoch": 0.7914948356097007, "grad_norm": 2.1964471340179443, "learning_rate": 2.055514051264036e-06, "loss": 0.7087, "step": 10460 }, { "epoch": 0.7915705043320344, "grad_norm": 2.126030683517456, "learning_rate": 2.0540816413837256e-06, "loss": 0.6266, "step": 10461 }, { "epoch": 0.791646173054368, "grad_norm": 2.224245071411133, "learning_rate": 2.052649666479289e-06, "loss": 0.6503, "step": 10462 }, { "epoch": 0.7917218417767016, "grad_norm": 2.307697057723999, "learning_rate": 2.0512181266404004e-06, "loss": 0.6489, "step": 10463 }, { "epoch": 0.7917975104990352, "grad_norm": 2.7697362899780273, "learning_rate": 2.0497870219567073e-06, "loss": 0.6928, "step": 10464 }, { "epoch": 0.7918731792213688, "grad_norm": 2.193354368209839, "learning_rate": 2.048356352517831e-06, "loss": 0.7207, "step": 10465 }, { "epoch": 0.7919488479437025, "grad_norm": 2.0516974925994873, "learning_rate": 2.0469261184133664e-06, "loss": 0.6624, "step": 10466 }, { "epoch": 0.7920245166660361, "grad_norm": 2.0919811725616455, "learning_rate": 2.0454963197328724e-06, "loss": 0.6431, "step": 10467 }, { "epoch": 0.7921001853883697, "grad_norm": 2.1068317890167236, "learning_rate": 2.044066956565895e-06, "loss": 0.6878, "step": 10468 }, { "epoch": 0.7921758541107033, "grad_norm": 2.4329023361206055, "learning_rate": 2.0426380290019456e-06, "loss": 0.778, "step": 10469 }, { "epoch": 0.792251522833037, "grad_norm": 2.205319881439209, "learning_rate": 2.0412095371305034e-06, "loss": 0.7374, "step": 10470 }, { "epoch": 0.7923271915553706, "grad_norm": 1.9677940607070923, "learning_rate": 2.0397814810410265e-06, "loss": 0.6473, "step": 10471 }, { "epoch": 0.7924028602777042, "grad_norm": 2.6336700916290283, "learning_rate": 2.038353860822944e-06, "loss": 0.6716, "step": 10472 }, { "epoch": 0.7924785290000378, "grad_norm": 1.8802638053894043, "learning_rate": 2.0369266765656644e-06, "loss": 0.7585, "step": 10473 }, { "epoch": 0.7925541977223715, "grad_norm": 4.260136127471924, "learning_rate": 2.035499928358554e-06, "loss": 0.6322, "step": 10474 }, { "epoch": 0.7926298664447051, "grad_norm": 1.8927791118621826, "learning_rate": 2.034073616290965e-06, "loss": 0.5409, "step": 10475 }, { "epoch": 0.7927055351670387, "grad_norm": 2.0177175998687744, "learning_rate": 2.0326477404522163e-06, "loss": 0.6721, "step": 10476 }, { "epoch": 0.7927812038893723, "grad_norm": 2.187217950820923, "learning_rate": 2.031222300931601e-06, "loss": 0.7119, "step": 10477 }, { "epoch": 0.7928568726117059, "grad_norm": 1.9448692798614502, "learning_rate": 2.029797297818385e-06, "loss": 0.5877, "step": 10478 }, { "epoch": 0.7929325413340396, "grad_norm": 2.5591869354248047, "learning_rate": 2.0283727312018075e-06, "loss": 0.5605, "step": 10479 }, { "epoch": 0.7930082100563732, "grad_norm": 2.923570394515991, "learning_rate": 2.02694860117108e-06, "loss": 0.6858, "step": 10480 }, { "epoch": 0.7930838787787068, "grad_norm": 1.8390942811965942, "learning_rate": 2.0255249078153825e-06, "loss": 0.719, "step": 10481 }, { "epoch": 0.7931595475010405, "grad_norm": 2.0713329315185547, "learning_rate": 2.0241016512238716e-06, "loss": 0.7737, "step": 10482 }, { "epoch": 0.7932352162233741, "grad_norm": 1.8896489143371582, "learning_rate": 2.0226788314856824e-06, "loss": 0.5676, "step": 10483 }, { "epoch": 0.7933108849457077, "grad_norm": 2.4428815841674805, "learning_rate": 2.021256448689909e-06, "loss": 0.6226, "step": 10484 }, { "epoch": 0.7933865536680413, "grad_norm": 2.0781445503234863, "learning_rate": 2.01983450292563e-06, "loss": 0.63, "step": 10485 }, { "epoch": 0.7934622223903749, "grad_norm": 2.051621198654175, "learning_rate": 2.0184129942818912e-06, "loss": 0.6363, "step": 10486 }, { "epoch": 0.7935378911127086, "grad_norm": 2.4698948860168457, "learning_rate": 2.0169919228477136e-06, "loss": 0.6365, "step": 10487 }, { "epoch": 0.7936135598350422, "grad_norm": 2.3092644214630127, "learning_rate": 2.0155712887120822e-06, "loss": 0.6834, "step": 10488 }, { "epoch": 0.7936892285573758, "grad_norm": 1.9739770889282227, "learning_rate": 2.01415109196397e-06, "loss": 0.5711, "step": 10489 }, { "epoch": 0.7937648972797094, "grad_norm": 2.368990659713745, "learning_rate": 2.0127313326923118e-06, "loss": 0.6306, "step": 10490 }, { "epoch": 0.793840566002043, "grad_norm": 1.8285222053527832, "learning_rate": 2.01131201098602e-06, "loss": 0.7374, "step": 10491 }, { "epoch": 0.7939162347243767, "grad_norm": 2.3828883171081543, "learning_rate": 2.0098931269339706e-06, "loss": 0.694, "step": 10492 }, { "epoch": 0.7939919034467103, "grad_norm": 2.278015375137329, "learning_rate": 2.00847468062502e-06, "loss": 0.6483, "step": 10493 }, { "epoch": 0.7940675721690439, "grad_norm": 2.0421197414398193, "learning_rate": 2.0070566721480044e-06, "loss": 0.6897, "step": 10494 }, { "epoch": 0.7941432408913776, "grad_norm": 1.976413369178772, "learning_rate": 2.005639101591714e-06, "loss": 0.6678, "step": 10495 }, { "epoch": 0.7942189096137112, "grad_norm": 1.767219066619873, "learning_rate": 2.0042219690449255e-06, "loss": 0.6366, "step": 10496 }, { "epoch": 0.7942945783360448, "grad_norm": 3.3812007904052734, "learning_rate": 2.002805274596386e-06, "loss": 0.5554, "step": 10497 }, { "epoch": 0.7943702470583784, "grad_norm": 2.365610361099243, "learning_rate": 2.0013890183348107e-06, "loss": 0.778, "step": 10498 }, { "epoch": 0.794445915780712, "grad_norm": 1.9793906211853027, "learning_rate": 1.9999732003488917e-06, "loss": 0.7294, "step": 10499 }, { "epoch": 0.7945215845030457, "grad_norm": 1.898956060409546, "learning_rate": 1.9985578207272914e-06, "loss": 0.649, "step": 10500 }, { "epoch": 0.7945972532253793, "grad_norm": 1.8723238706588745, "learning_rate": 1.997142879558649e-06, "loss": 0.584, "step": 10501 }, { "epoch": 0.7946729219477129, "grad_norm": 2.4054598808288574, "learning_rate": 1.9957283769315654e-06, "loss": 0.7476, "step": 10502 }, { "epoch": 0.7947485906700466, "grad_norm": 2.132002830505371, "learning_rate": 1.994314312934624e-06, "loss": 0.8334, "step": 10503 }, { "epoch": 0.7948242593923801, "grad_norm": 2.4048826694488525, "learning_rate": 1.9929006876563824e-06, "loss": 0.6341, "step": 10504 }, { "epoch": 0.7948999281147138, "grad_norm": 6.029483318328857, "learning_rate": 1.991487501185365e-06, "loss": 0.695, "step": 10505 }, { "epoch": 0.7949755968370474, "grad_norm": 2.2719666957855225, "learning_rate": 1.9900747536100666e-06, "loss": 0.6664, "step": 10506 }, { "epoch": 0.795051265559381, "grad_norm": 2.24306058883667, "learning_rate": 1.9886624450189597e-06, "loss": 0.6445, "step": 10507 }, { "epoch": 0.7951269342817147, "grad_norm": 2.079287052154541, "learning_rate": 1.9872505755004876e-06, "loss": 0.6169, "step": 10508 }, { "epoch": 0.7952026030040483, "grad_norm": 1.9773471355438232, "learning_rate": 1.985839145143068e-06, "loss": 0.5757, "step": 10509 }, { "epoch": 0.7952782717263819, "grad_norm": 1.859931468963623, "learning_rate": 1.984428154035086e-06, "loss": 0.655, "step": 10510 }, { "epoch": 0.7953539404487155, "grad_norm": 1.6292186975479126, "learning_rate": 1.983017602264904e-06, "loss": 0.7832, "step": 10511 }, { "epoch": 0.7954296091710491, "grad_norm": 1.8309117555618286, "learning_rate": 1.981607489920859e-06, "loss": 0.6333, "step": 10512 }, { "epoch": 0.7955052778933828, "grad_norm": 1.76371169090271, "learning_rate": 1.9801978170912485e-06, "loss": 0.5041, "step": 10513 }, { "epoch": 0.7955809466157164, "grad_norm": 1.981345534324646, "learning_rate": 1.978788583864357e-06, "loss": 0.6277, "step": 10514 }, { "epoch": 0.79565661533805, "grad_norm": 2.2104129791259766, "learning_rate": 1.9773797903284367e-06, "loss": 0.6721, "step": 10515 }, { "epoch": 0.7957322840603837, "grad_norm": 2.0217669010162354, "learning_rate": 1.975971436571705e-06, "loss": 0.585, "step": 10516 }, { "epoch": 0.7958079527827172, "grad_norm": 2.435241222381592, "learning_rate": 1.97456352268236e-06, "loss": 0.6876, "step": 10517 }, { "epoch": 0.7958836215050509, "grad_norm": 2.3520796298980713, "learning_rate": 1.973156048748569e-06, "loss": 0.4607, "step": 10518 }, { "epoch": 0.7959592902273845, "grad_norm": 2.4293181896209717, "learning_rate": 1.9717490148584775e-06, "loss": 0.6131, "step": 10519 }, { "epoch": 0.7960349589497181, "grad_norm": 2.50989031791687, "learning_rate": 1.9703424211001926e-06, "loss": 0.7233, "step": 10520 }, { "epoch": 0.7961106276720518, "grad_norm": 2.3708267211914062, "learning_rate": 1.968936267561803e-06, "loss": 0.7077, "step": 10521 }, { "epoch": 0.7961862963943854, "grad_norm": 1.8803597688674927, "learning_rate": 1.9675305543313647e-06, "loss": 0.632, "step": 10522 }, { "epoch": 0.796261965116719, "grad_norm": 1.908509373664856, "learning_rate": 1.9661252814969117e-06, "loss": 0.6277, "step": 10523 }, { "epoch": 0.7963376338390526, "grad_norm": 2.45629620552063, "learning_rate": 1.964720449146439e-06, "loss": 0.6072, "step": 10524 }, { "epoch": 0.7964133025613862, "grad_norm": 3.3856143951416016, "learning_rate": 1.9633160573679287e-06, "loss": 0.6671, "step": 10525 }, { "epoch": 0.7964889712837199, "grad_norm": 2.470855474472046, "learning_rate": 1.9619121062493283e-06, "loss": 0.6902, "step": 10526 }, { "epoch": 0.7965646400060535, "grad_norm": 2.5444891452789307, "learning_rate": 1.960508595878554e-06, "loss": 0.5386, "step": 10527 }, { "epoch": 0.7966403087283871, "grad_norm": 2.180934429168701, "learning_rate": 1.9591055263434998e-06, "loss": 0.605, "step": 10528 }, { "epoch": 0.7967159774507208, "grad_norm": 2.0110931396484375, "learning_rate": 1.9577028977320297e-06, "loss": 0.6318, "step": 10529 }, { "epoch": 0.7967916461730543, "grad_norm": 2.564516544342041, "learning_rate": 1.9563007101319826e-06, "loss": 0.7039, "step": 10530 }, { "epoch": 0.796867314895388, "grad_norm": 2.044375419616699, "learning_rate": 1.9548989636311673e-06, "loss": 0.7169, "step": 10531 }, { "epoch": 0.7969429836177216, "grad_norm": 2.060332775115967, "learning_rate": 1.9534976583173652e-06, "loss": 0.5094, "step": 10532 }, { "epoch": 0.7970186523400552, "grad_norm": 2.0547759532928467, "learning_rate": 1.9520967942783307e-06, "loss": 0.6053, "step": 10533 }, { "epoch": 0.7970943210623889, "grad_norm": 2.1505582332611084, "learning_rate": 1.950696371601791e-06, "loss": 0.6574, "step": 10534 }, { "epoch": 0.7971699897847225, "grad_norm": 2.3827664852142334, "learning_rate": 1.949296390375445e-06, "loss": 0.5319, "step": 10535 }, { "epoch": 0.7972456585070561, "grad_norm": 2.6274614334106445, "learning_rate": 1.947896850686963e-06, "loss": 0.8266, "step": 10536 }, { "epoch": 0.7973213272293898, "grad_norm": 2.0706329345703125, "learning_rate": 1.946497752623993e-06, "loss": 0.7172, "step": 10537 }, { "epoch": 0.7973969959517233, "grad_norm": 2.9049019813537598, "learning_rate": 1.945099096274144e-06, "loss": 0.7605, "step": 10538 }, { "epoch": 0.797472664674057, "grad_norm": 2.054645299911499, "learning_rate": 1.943700881725006e-06, "loss": 0.6115, "step": 10539 }, { "epoch": 0.7975483333963906, "grad_norm": 2.4279866218566895, "learning_rate": 1.9423031090641456e-06, "loss": 0.6093, "step": 10540 }, { "epoch": 0.7976240021187242, "grad_norm": 2.322181224822998, "learning_rate": 1.9409057783790908e-06, "loss": 0.7224, "step": 10541 }, { "epoch": 0.7976996708410579, "grad_norm": 2.0702531337738037, "learning_rate": 1.9395088897573463e-06, "loss": 0.8073, "step": 10542 }, { "epoch": 0.7977753395633914, "grad_norm": 3.023908853530884, "learning_rate": 1.9381124432863933e-06, "loss": 0.6391, "step": 10543 }, { "epoch": 0.7978510082857251, "grad_norm": 2.054307699203491, "learning_rate": 1.936716439053679e-06, "loss": 0.7798, "step": 10544 }, { "epoch": 0.7979266770080587, "grad_norm": 2.4131476879119873, "learning_rate": 1.935320877146627e-06, "loss": 0.7464, "step": 10545 }, { "epoch": 0.7980023457303923, "grad_norm": 2.163196086883545, "learning_rate": 1.9339257576526325e-06, "loss": 0.7076, "step": 10546 }, { "epoch": 0.798078014452726, "grad_norm": 2.453834056854248, "learning_rate": 1.9325310806590596e-06, "loss": 0.7057, "step": 10547 }, { "epoch": 0.7981536831750596, "grad_norm": 1.4434751272201538, "learning_rate": 1.9311368462532536e-06, "loss": 0.8129, "step": 10548 }, { "epoch": 0.7982293518973932, "grad_norm": 2.3839449882507324, "learning_rate": 1.929743054522516e-06, "loss": 0.7117, "step": 10549 }, { "epoch": 0.7983050206197269, "grad_norm": 2.3013927936553955, "learning_rate": 1.9283497055541383e-06, "loss": 0.6521, "step": 10550 }, { "epoch": 0.7983806893420604, "grad_norm": 2.079742670059204, "learning_rate": 1.926956799435378e-06, "loss": 0.5988, "step": 10551 }, { "epoch": 0.7984563580643941, "grad_norm": 2.3923838138580322, "learning_rate": 1.9255643362534573e-06, "loss": 0.6227, "step": 10552 }, { "epoch": 0.7985320267867277, "grad_norm": 2.4555153846740723, "learning_rate": 1.9241723160955793e-06, "loss": 0.6377, "step": 10553 }, { "epoch": 0.7986076955090613, "grad_norm": 1.8996168375015259, "learning_rate": 1.9227807390489167e-06, "loss": 0.6329, "step": 10554 }, { "epoch": 0.798683364231395, "grad_norm": 2.6470742225646973, "learning_rate": 1.9213896052006145e-06, "loss": 0.7298, "step": 10555 }, { "epoch": 0.7987590329537286, "grad_norm": 1.7246336936950684, "learning_rate": 1.9199989146377903e-06, "loss": 0.6901, "step": 10556 }, { "epoch": 0.7988347016760622, "grad_norm": 3.4669127464294434, "learning_rate": 1.918608667447534e-06, "loss": 0.5589, "step": 10557 }, { "epoch": 0.7989103703983959, "grad_norm": 2.552844762802124, "learning_rate": 1.9172188637169087e-06, "loss": 0.5577, "step": 10558 }, { "epoch": 0.7989860391207294, "grad_norm": 3.0051169395446777, "learning_rate": 1.9158295035329425e-06, "loss": 0.5776, "step": 10559 }, { "epoch": 0.7990617078430631, "grad_norm": 2.2740957736968994, "learning_rate": 1.9144405869826475e-06, "loss": 0.6686, "step": 10560 }, { "epoch": 0.7991373765653967, "grad_norm": 2.724806308746338, "learning_rate": 1.9130521141530013e-06, "loss": 0.6296, "step": 10561 }, { "epoch": 0.7992130452877303, "grad_norm": 2.1365087032318115, "learning_rate": 1.9116640851309554e-06, "loss": 0.7783, "step": 10562 }, { "epoch": 0.799288714010064, "grad_norm": 2.0151805877685547, "learning_rate": 1.9102765000034293e-06, "loss": 0.6014, "step": 10563 }, { "epoch": 0.7993643827323975, "grad_norm": 1.9399892091751099, "learning_rate": 1.9088893588573187e-06, "loss": 0.6421, "step": 10564 }, { "epoch": 0.7994400514547312, "grad_norm": 2.5492935180664062, "learning_rate": 1.9075026617794924e-06, "loss": 0.8427, "step": 10565 }, { "epoch": 0.7995157201770648, "grad_norm": 2.4646923542022705, "learning_rate": 1.9061164088567896e-06, "loss": 0.5545, "step": 10566 }, { "epoch": 0.7995913888993984, "grad_norm": 1.8194031715393066, "learning_rate": 1.9047306001760213e-06, "loss": 0.6374, "step": 10567 }, { "epoch": 0.7996670576217321, "grad_norm": 1.9019348621368408, "learning_rate": 1.9033452358239716e-06, "loss": 0.6827, "step": 10568 }, { "epoch": 0.7997427263440657, "grad_norm": 2.4975194931030273, "learning_rate": 1.9019603158873995e-06, "loss": 0.7033, "step": 10569 }, { "epoch": 0.7998183950663993, "grad_norm": 3.534217596054077, "learning_rate": 1.9005758404530242e-06, "loss": 0.6896, "step": 10570 }, { "epoch": 0.799894063788733, "grad_norm": 1.9258010387420654, "learning_rate": 1.8991918096075558e-06, "loss": 0.6669, "step": 10571 }, { "epoch": 0.7999697325110665, "grad_norm": 1.901734709739685, "learning_rate": 1.8978082234376657e-06, "loss": 0.6585, "step": 10572 }, { "epoch": 0.8000454012334002, "grad_norm": 2.1467180252075195, "learning_rate": 1.8964250820299927e-06, "loss": 0.5655, "step": 10573 }, { "epoch": 0.8001210699557338, "grad_norm": 2.114226818084717, "learning_rate": 1.8950423854711563e-06, "loss": 0.6701, "step": 10574 }, { "epoch": 0.8001967386780674, "grad_norm": 2.7017455101013184, "learning_rate": 1.8936601338477445e-06, "loss": 0.5554, "step": 10575 }, { "epoch": 0.8002724074004011, "grad_norm": 1.8943901062011719, "learning_rate": 1.8922783272463251e-06, "loss": 0.5889, "step": 10576 }, { "epoch": 0.8003480761227346, "grad_norm": 2.2416491508483887, "learning_rate": 1.8908969657534225e-06, "loss": 0.7364, "step": 10577 }, { "epoch": 0.8004237448450683, "grad_norm": 2.565739154815674, "learning_rate": 1.889516049455546e-06, "loss": 0.772, "step": 10578 }, { "epoch": 0.800499413567402, "grad_norm": 2.755770683288574, "learning_rate": 1.888135578439172e-06, "loss": 0.6644, "step": 10579 }, { "epoch": 0.8005750822897355, "grad_norm": 1.84762704372406, "learning_rate": 1.8867555527907516e-06, "loss": 0.6432, "step": 10580 }, { "epoch": 0.8006507510120692, "grad_norm": 2.085479497909546, "learning_rate": 1.8853759725967045e-06, "loss": 0.7186, "step": 10581 }, { "epoch": 0.8007264197344028, "grad_norm": 1.9291927814483643, "learning_rate": 1.8839968379434267e-06, "loss": 0.5889, "step": 10582 }, { "epoch": 0.8008020884567364, "grad_norm": 2.066859483718872, "learning_rate": 1.8826181489172843e-06, "loss": 0.6291, "step": 10583 }, { "epoch": 0.8008777571790701, "grad_norm": 2.1499745845794678, "learning_rate": 1.8812399056046118e-06, "loss": 0.5774, "step": 10584 }, { "epoch": 0.8009534259014036, "grad_norm": 2.3581552505493164, "learning_rate": 1.8798621080917184e-06, "loss": 0.632, "step": 10585 }, { "epoch": 0.8010290946237373, "grad_norm": 2.0463876724243164, "learning_rate": 1.8784847564648952e-06, "loss": 0.7999, "step": 10586 }, { "epoch": 0.801104763346071, "grad_norm": 1.8345085382461548, "learning_rate": 1.877107850810387e-06, "loss": 0.6854, "step": 10587 }, { "epoch": 0.8011804320684045, "grad_norm": 2.0658600330352783, "learning_rate": 1.8757313912144227e-06, "loss": 0.6487, "step": 10588 }, { "epoch": 0.8012561007907382, "grad_norm": 2.1300668716430664, "learning_rate": 1.874355377763203e-06, "loss": 0.8562, "step": 10589 }, { "epoch": 0.8013317695130717, "grad_norm": 2.3721659183502197, "learning_rate": 1.8729798105428951e-06, "loss": 0.6192, "step": 10590 }, { "epoch": 0.8014074382354054, "grad_norm": 1.95151686668396, "learning_rate": 1.8716046896396437e-06, "loss": 0.6669, "step": 10591 }, { "epoch": 0.801483106957739, "grad_norm": 2.4539577960968018, "learning_rate": 1.8702300151395627e-06, "loss": 0.8109, "step": 10592 }, { "epoch": 0.8015587756800726, "grad_norm": 2.4067583084106445, "learning_rate": 1.8688557871287382e-06, "loss": 0.6994, "step": 10593 }, { "epoch": 0.8016344444024063, "grad_norm": 2.889045476913452, "learning_rate": 1.8674820056932325e-06, "loss": 0.6797, "step": 10594 }, { "epoch": 0.8017101131247399, "grad_norm": 2.1616549491882324, "learning_rate": 1.8661086709190677e-06, "loss": 0.7378, "step": 10595 }, { "epoch": 0.8017857818470735, "grad_norm": 1.9277255535125732, "learning_rate": 1.864735782892254e-06, "loss": 0.6307, "step": 10596 }, { "epoch": 0.8018614505694072, "grad_norm": 2.1379098892211914, "learning_rate": 1.8633633416987667e-06, "loss": 0.5359, "step": 10597 }, { "epoch": 0.8019371192917407, "grad_norm": 2.582929849624634, "learning_rate": 1.861991347424547e-06, "loss": 0.6166, "step": 10598 }, { "epoch": 0.8020127880140744, "grad_norm": 2.745823383331299, "learning_rate": 1.8606198001555162e-06, "loss": 0.7859, "step": 10599 }, { "epoch": 0.802088456736408, "grad_norm": 1.990330696105957, "learning_rate": 1.8592486999775644e-06, "loss": 0.7426, "step": 10600 }, { "epoch": 0.8021641254587416, "grad_norm": 2.0357511043548584, "learning_rate": 1.8578780469765562e-06, "loss": 0.8174, "step": 10601 }, { "epoch": 0.8022397941810753, "grad_norm": 1.937957763671875, "learning_rate": 1.8565078412383238e-06, "loss": 0.8851, "step": 10602 }, { "epoch": 0.8023154629034088, "grad_norm": 2.319841146469116, "learning_rate": 1.8551380828486765e-06, "loss": 0.6238, "step": 10603 }, { "epoch": 0.8023911316257425, "grad_norm": 2.1866133213043213, "learning_rate": 1.8537687718933928e-06, "loss": 0.7628, "step": 10604 }, { "epoch": 0.8024668003480762, "grad_norm": 2.112520933151245, "learning_rate": 1.852399908458221e-06, "loss": 0.565, "step": 10605 }, { "epoch": 0.8025424690704097, "grad_norm": 2.1112277507781982, "learning_rate": 1.8510314926288826e-06, "loss": 0.7111, "step": 10606 }, { "epoch": 0.8026181377927434, "grad_norm": 2.6001243591308594, "learning_rate": 1.8496635244910772e-06, "loss": 0.6784, "step": 10607 }, { "epoch": 0.802693806515077, "grad_norm": 2.205632448196411, "learning_rate": 1.84829600413047e-06, "loss": 0.6225, "step": 10608 }, { "epoch": 0.8027694752374106, "grad_norm": 2.1408543586730957, "learning_rate": 1.8469289316326977e-06, "loss": 0.6406, "step": 10609 }, { "epoch": 0.8028451439597443, "grad_norm": 2.142906904220581, "learning_rate": 1.8455623070833706e-06, "loss": 0.7465, "step": 10610 }, { "epoch": 0.8029208126820778, "grad_norm": 2.3650617599487305, "learning_rate": 1.8441961305680726e-06, "loss": 0.6459, "step": 10611 }, { "epoch": 0.8029964814044115, "grad_norm": 2.328948497772217, "learning_rate": 1.842830402172357e-06, "loss": 0.7611, "step": 10612 }, { "epoch": 0.8030721501267452, "grad_norm": 1.9152311086654663, "learning_rate": 1.8414651219817513e-06, "loss": 0.5491, "step": 10613 }, { "epoch": 0.8031478188490787, "grad_norm": 3.1804325580596924, "learning_rate": 1.8401002900817533e-06, "loss": 0.6511, "step": 10614 }, { "epoch": 0.8032234875714124, "grad_norm": 2.4679126739501953, "learning_rate": 1.8387359065578344e-06, "loss": 0.6399, "step": 10615 }, { "epoch": 0.8032991562937459, "grad_norm": 2.6551365852355957, "learning_rate": 1.8373719714954315e-06, "loss": 0.7862, "step": 10616 }, { "epoch": 0.8033748250160796, "grad_norm": 1.9284342527389526, "learning_rate": 1.8360084849799643e-06, "loss": 0.6186, "step": 10617 }, { "epoch": 0.8034504937384133, "grad_norm": 2.3558826446533203, "learning_rate": 1.8346454470968194e-06, "loss": 0.7019, "step": 10618 }, { "epoch": 0.8035261624607468, "grad_norm": 2.2871432304382324, "learning_rate": 1.8332828579313505e-06, "loss": 0.7563, "step": 10619 }, { "epoch": 0.8036018311830805, "grad_norm": 2.3927693367004395, "learning_rate": 1.8319207175688881e-06, "loss": 0.7131, "step": 10620 }, { "epoch": 0.8036774999054141, "grad_norm": 2.0817835330963135, "learning_rate": 1.8305590260947336e-06, "loss": 0.6026, "step": 10621 }, { "epoch": 0.8037531686277477, "grad_norm": 2.117398738861084, "learning_rate": 1.8291977835941651e-06, "loss": 0.6481, "step": 10622 }, { "epoch": 0.8038288373500814, "grad_norm": 2.352769136428833, "learning_rate": 1.827836990152423e-06, "loss": 0.6245, "step": 10623 }, { "epoch": 0.8039045060724149, "grad_norm": 1.9620620012283325, "learning_rate": 1.8264766458547258e-06, "loss": 0.8225, "step": 10624 }, { "epoch": 0.8039801747947486, "grad_norm": 1.7931766510009766, "learning_rate": 1.8251167507862633e-06, "loss": 0.5747, "step": 10625 }, { "epoch": 0.8040558435170823, "grad_norm": 2.456925630569458, "learning_rate": 1.8237573050321955e-06, "loss": 0.6384, "step": 10626 }, { "epoch": 0.8041315122394158, "grad_norm": 2.1616482734680176, "learning_rate": 1.8223983086776574e-06, "loss": 0.6522, "step": 10627 }, { "epoch": 0.8042071809617495, "grad_norm": 1.8963215351104736, "learning_rate": 1.8210397618077507e-06, "loss": 0.679, "step": 10628 }, { "epoch": 0.804282849684083, "grad_norm": 2.105846643447876, "learning_rate": 1.8196816645075575e-06, "loss": 0.7072, "step": 10629 }, { "epoch": 0.8043585184064167, "grad_norm": 2.052081823348999, "learning_rate": 1.8183240168621198e-06, "loss": 0.5888, "step": 10630 }, { "epoch": 0.8044341871287504, "grad_norm": 2.6387953758239746, "learning_rate": 1.8169668189564574e-06, "loss": 0.5963, "step": 10631 }, { "epoch": 0.8045098558510839, "grad_norm": 3.556173801422119, "learning_rate": 1.8156100708755705e-06, "loss": 0.6972, "step": 10632 }, { "epoch": 0.8045855245734176, "grad_norm": 2.080299139022827, "learning_rate": 1.8142537727044158e-06, "loss": 0.5975, "step": 10633 }, { "epoch": 0.8046611932957513, "grad_norm": 2.5786876678466797, "learning_rate": 1.812897924527932e-06, "loss": 0.64, "step": 10634 }, { "epoch": 0.8047368620180848, "grad_norm": 3.127788543701172, "learning_rate": 1.8115425264310257e-06, "loss": 0.7167, "step": 10635 }, { "epoch": 0.8048125307404185, "grad_norm": 1.6849464178085327, "learning_rate": 1.810187578498577e-06, "loss": 0.6677, "step": 10636 }, { "epoch": 0.804888199462752, "grad_norm": 2.1735470294952393, "learning_rate": 1.8088330808154364e-06, "loss": 0.6236, "step": 10637 }, { "epoch": 0.8049638681850857, "grad_norm": 2.128405809402466, "learning_rate": 1.8074790334664275e-06, "loss": 0.6748, "step": 10638 }, { "epoch": 0.8050395369074194, "grad_norm": 2.5104458332061768, "learning_rate": 1.806125436536345e-06, "loss": 0.6915, "step": 10639 }, { "epoch": 0.8051152056297529, "grad_norm": 2.0362462997436523, "learning_rate": 1.8047722901099575e-06, "loss": 0.5961, "step": 10640 }, { "epoch": 0.8051908743520866, "grad_norm": 3.8204987049102783, "learning_rate": 1.803419594271999e-06, "loss": 0.6853, "step": 10641 }, { "epoch": 0.8052665430744201, "grad_norm": 2.5584213733673096, "learning_rate": 1.80206734910718e-06, "loss": 0.8556, "step": 10642 }, { "epoch": 0.8053422117967538, "grad_norm": 2.950486898422241, "learning_rate": 1.800715554700189e-06, "loss": 0.7538, "step": 10643 }, { "epoch": 0.8054178805190875, "grad_norm": 1.961512565612793, "learning_rate": 1.7993642111356726e-06, "loss": 0.6005, "step": 10644 }, { "epoch": 0.805493549241421, "grad_norm": 2.0798726081848145, "learning_rate": 1.7980133184982597e-06, "loss": 0.7188, "step": 10645 }, { "epoch": 0.8055692179637547, "grad_norm": 2.015023946762085, "learning_rate": 1.796662876872547e-06, "loss": 0.6464, "step": 10646 }, { "epoch": 0.8056448866860884, "grad_norm": 2.150325059890747, "learning_rate": 1.7953128863431025e-06, "loss": 0.6864, "step": 10647 }, { "epoch": 0.8057205554084219, "grad_norm": 2.242133617401123, "learning_rate": 1.7939633469944687e-06, "loss": 0.5954, "step": 10648 }, { "epoch": 0.8057962241307556, "grad_norm": 2.3537936210632324, "learning_rate": 1.792614258911157e-06, "loss": 0.734, "step": 10649 }, { "epoch": 0.8058718928530891, "grad_norm": 4.886502742767334, "learning_rate": 1.7912656221776517e-06, "loss": 0.5386, "step": 10650 }, { "epoch": 0.8059475615754228, "grad_norm": 2.2850396633148193, "learning_rate": 1.7899174368784116e-06, "loss": 0.7146, "step": 10651 }, { "epoch": 0.8060232302977565, "grad_norm": 1.9249435663223267, "learning_rate": 1.7885697030978569e-06, "loss": 0.7584, "step": 10652 }, { "epoch": 0.80609889902009, "grad_norm": 1.7427829504013062, "learning_rate": 1.787222420920394e-06, "loss": 0.694, "step": 10653 }, { "epoch": 0.8061745677424237, "grad_norm": 2.255074977874756, "learning_rate": 1.7858755904303947e-06, "loss": 0.7371, "step": 10654 }, { "epoch": 0.8062502364647572, "grad_norm": 2.0775415897369385, "learning_rate": 1.7845292117121972e-06, "loss": 0.601, "step": 10655 }, { "epoch": 0.8063259051870909, "grad_norm": 3.3672308921813965, "learning_rate": 1.7831832848501183e-06, "loss": 0.6938, "step": 10656 }, { "epoch": 0.8064015739094246, "grad_norm": 2.231694459915161, "learning_rate": 1.7818378099284435e-06, "loss": 0.6689, "step": 10657 }, { "epoch": 0.8064772426317581, "grad_norm": 2.523862838745117, "learning_rate": 1.7804927870314314e-06, "loss": 0.6083, "step": 10658 }, { "epoch": 0.8065529113540918, "grad_norm": 2.5286412239074707, "learning_rate": 1.7791482162433126e-06, "loss": 0.7218, "step": 10659 }, { "epoch": 0.8066285800764255, "grad_norm": 1.992052674293518, "learning_rate": 1.7778040976482867e-06, "loss": 0.7306, "step": 10660 }, { "epoch": 0.806704248798759, "grad_norm": 2.2717790603637695, "learning_rate": 1.7764604313305307e-06, "loss": 0.7695, "step": 10661 }, { "epoch": 0.8067799175210927, "grad_norm": 2.3762831687927246, "learning_rate": 1.7751172173741807e-06, "loss": 0.6682, "step": 10662 }, { "epoch": 0.8068555862434262, "grad_norm": 1.9005999565124512, "learning_rate": 1.773774455863361e-06, "loss": 0.7671, "step": 10663 }, { "epoch": 0.8069312549657599, "grad_norm": 1.9350870847702026, "learning_rate": 1.772432146882158e-06, "loss": 0.5733, "step": 10664 }, { "epoch": 0.8070069236880936, "grad_norm": 1.9355764389038086, "learning_rate": 1.7710902905146324e-06, "loss": 0.5401, "step": 10665 }, { "epoch": 0.8070825924104271, "grad_norm": 8.177988052368164, "learning_rate": 1.7697488868448123e-06, "loss": 0.6673, "step": 10666 }, { "epoch": 0.8071582611327608, "grad_norm": 2.334674835205078, "learning_rate": 1.7684079359567002e-06, "loss": 0.6466, "step": 10667 }, { "epoch": 0.8072339298550943, "grad_norm": 1.965072751045227, "learning_rate": 1.7670674379342773e-06, "loss": 0.6563, "step": 10668 }, { "epoch": 0.807309598577428, "grad_norm": 2.178422451019287, "learning_rate": 1.7657273928614828e-06, "loss": 0.7631, "step": 10669 }, { "epoch": 0.8073852672997617, "grad_norm": 2.614917755126953, "learning_rate": 1.7643878008222373e-06, "loss": 0.63, "step": 10670 }, { "epoch": 0.8074609360220952, "grad_norm": 2.7337183952331543, "learning_rate": 1.7630486619004313e-06, "loss": 0.7156, "step": 10671 }, { "epoch": 0.8075366047444289, "grad_norm": 2.084549903869629, "learning_rate": 1.7617099761799246e-06, "loss": 0.49, "step": 10672 }, { "epoch": 0.8076122734667626, "grad_norm": 2.35610294342041, "learning_rate": 1.7603717437445506e-06, "loss": 0.6401, "step": 10673 }, { "epoch": 0.8076879421890961, "grad_norm": 2.1338629722595215, "learning_rate": 1.7590339646781149e-06, "loss": 0.5452, "step": 10674 }, { "epoch": 0.8077636109114298, "grad_norm": 2.5002894401550293, "learning_rate": 1.7576966390643935e-06, "loss": 0.6739, "step": 10675 }, { "epoch": 0.8078392796337633, "grad_norm": 2.1118812561035156, "learning_rate": 1.7563597669871315e-06, "loss": 0.5796, "step": 10676 }, { "epoch": 0.807914948356097, "grad_norm": 2.3569159507751465, "learning_rate": 1.7550233485300469e-06, "loss": 0.7535, "step": 10677 }, { "epoch": 0.8079906170784307, "grad_norm": 1.8390127420425415, "learning_rate": 1.7536873837768358e-06, "loss": 0.6276, "step": 10678 }, { "epoch": 0.8080662858007642, "grad_norm": 2.113814115524292, "learning_rate": 1.7523518728111603e-06, "loss": 0.5618, "step": 10679 }, { "epoch": 0.8081419545230979, "grad_norm": 2.1201260089874268, "learning_rate": 1.7510168157166506e-06, "loss": 0.6731, "step": 10680 }, { "epoch": 0.8082176232454314, "grad_norm": 2.6762893199920654, "learning_rate": 1.7496822125769133e-06, "loss": 0.5319, "step": 10681 }, { "epoch": 0.8082932919677651, "grad_norm": 2.3151755332946777, "learning_rate": 1.7483480634755262e-06, "loss": 0.7085, "step": 10682 }, { "epoch": 0.8083689606900988, "grad_norm": 2.3955535888671875, "learning_rate": 1.7470143684960382e-06, "loss": 0.6417, "step": 10683 }, { "epoch": 0.8084446294124323, "grad_norm": 2.628687858581543, "learning_rate": 1.7456811277219693e-06, "loss": 0.5682, "step": 10684 }, { "epoch": 0.808520298134766, "grad_norm": 2.5121073722839355, "learning_rate": 1.7443483412368119e-06, "loss": 0.675, "step": 10685 }, { "epoch": 0.8085959668570997, "grad_norm": 2.235436201095581, "learning_rate": 1.7430160091240313e-06, "loss": 0.599, "step": 10686 }, { "epoch": 0.8086716355794332, "grad_norm": 2.1594743728637695, "learning_rate": 1.7416841314670577e-06, "loss": 0.6628, "step": 10687 }, { "epoch": 0.8087473043017669, "grad_norm": 2.264970064163208, "learning_rate": 1.7403527083492974e-06, "loss": 0.5556, "step": 10688 }, { "epoch": 0.8088229730241004, "grad_norm": 2.473870038986206, "learning_rate": 1.7390217398541355e-06, "loss": 0.6411, "step": 10689 }, { "epoch": 0.8088986417464341, "grad_norm": 2.277695894241333, "learning_rate": 1.7376912260649158e-06, "loss": 0.6542, "step": 10690 }, { "epoch": 0.8089743104687678, "grad_norm": 2.1290292739868164, "learning_rate": 1.73636116706496e-06, "loss": 0.6924, "step": 10691 }, { "epoch": 0.8090499791911013, "grad_norm": 2.1614058017730713, "learning_rate": 1.7350315629375611e-06, "loss": 0.588, "step": 10692 }, { "epoch": 0.809125647913435, "grad_norm": 1.7077982425689697, "learning_rate": 1.733702413765984e-06, "loss": 0.5756, "step": 10693 }, { "epoch": 0.8092013166357686, "grad_norm": 2.483567953109741, "learning_rate": 1.7323737196334635e-06, "loss": 0.6524, "step": 10694 }, { "epoch": 0.8092769853581022, "grad_norm": 2.2687125205993652, "learning_rate": 1.7310454806232077e-06, "loss": 0.6016, "step": 10695 }, { "epoch": 0.8093526540804359, "grad_norm": 2.1247775554656982, "learning_rate": 1.7297176968183935e-06, "loss": 0.6076, "step": 10696 }, { "epoch": 0.8094283228027694, "grad_norm": 2.6836366653442383, "learning_rate": 1.7283903683021748e-06, "loss": 0.6584, "step": 10697 }, { "epoch": 0.8095039915251031, "grad_norm": 2.096525192260742, "learning_rate": 1.7270634951576667e-06, "loss": 0.5641, "step": 10698 }, { "epoch": 0.8095796602474368, "grad_norm": 2.056962251663208, "learning_rate": 1.7257370774679675e-06, "loss": 0.6351, "step": 10699 }, { "epoch": 0.8096553289697703, "grad_norm": 2.368328809738159, "learning_rate": 1.7244111153161425e-06, "loss": 0.696, "step": 10700 }, { "epoch": 0.809730997692104, "grad_norm": 2.048150062561035, "learning_rate": 1.7230856087852236e-06, "loss": 0.6948, "step": 10701 }, { "epoch": 0.8098066664144375, "grad_norm": 3.076840400695801, "learning_rate": 1.7217605579582204e-06, "loss": 0.6797, "step": 10702 }, { "epoch": 0.8098823351367712, "grad_norm": 1.980602502822876, "learning_rate": 1.7204359629181112e-06, "loss": 0.6077, "step": 10703 }, { "epoch": 0.8099580038591049, "grad_norm": 2.4633147716522217, "learning_rate": 1.719111823747847e-06, "loss": 0.6029, "step": 10704 }, { "epoch": 0.8100336725814384, "grad_norm": 6.110641956329346, "learning_rate": 1.7177881405303505e-06, "loss": 0.7389, "step": 10705 }, { "epoch": 0.8101093413037721, "grad_norm": 2.669058322906494, "learning_rate": 1.716464913348514e-06, "loss": 0.6563, "step": 10706 }, { "epoch": 0.8101850100261057, "grad_norm": 2.2113122940063477, "learning_rate": 1.7151421422852045e-06, "loss": 0.6166, "step": 10707 }, { "epoch": 0.8102606787484393, "grad_norm": 2.3939402103424072, "learning_rate": 1.7138198274232508e-06, "loss": 0.618, "step": 10708 }, { "epoch": 0.810336347470773, "grad_norm": 2.17268967628479, "learning_rate": 1.7124979688454684e-06, "loss": 0.8182, "step": 10709 }, { "epoch": 0.8104120161931065, "grad_norm": 9.700509071350098, "learning_rate": 1.7111765666346343e-06, "loss": 0.6712, "step": 10710 }, { "epoch": 0.8104876849154402, "grad_norm": 2.0205531120300293, "learning_rate": 1.7098556208735011e-06, "loss": 0.7657, "step": 10711 }, { "epoch": 0.8105633536377739, "grad_norm": 1.8378758430480957, "learning_rate": 1.708535131644785e-06, "loss": 0.6912, "step": 10712 }, { "epoch": 0.8106390223601074, "grad_norm": 1.6077888011932373, "learning_rate": 1.7072150990311805e-06, "loss": 0.6132, "step": 10713 }, { "epoch": 0.8107146910824411, "grad_norm": 1.9418666362762451, "learning_rate": 1.7058955231153598e-06, "loss": 0.6918, "step": 10714 }, { "epoch": 0.8107903598047747, "grad_norm": 2.010127544403076, "learning_rate": 1.7045764039799502e-06, "loss": 0.6395, "step": 10715 }, { "epoch": 0.8108660285271083, "grad_norm": 1.9228168725967407, "learning_rate": 1.7032577417075624e-06, "loss": 0.7397, "step": 10716 }, { "epoch": 0.810941697249442, "grad_norm": 2.564603328704834, "learning_rate": 1.7019395363807748e-06, "loss": 0.7028, "step": 10717 }, { "epoch": 0.8110173659717755, "grad_norm": 2.034059762954712, "learning_rate": 1.7006217880821414e-06, "loss": 0.6505, "step": 10718 }, { "epoch": 0.8110930346941092, "grad_norm": 2.007988691329956, "learning_rate": 1.6993044968941754e-06, "loss": 0.7674, "step": 10719 }, { "epoch": 0.8111687034164428, "grad_norm": 2.383240222930908, "learning_rate": 1.6979876628993777e-06, "loss": 0.7798, "step": 10720 }, { "epoch": 0.8112443721387764, "grad_norm": 2.532583236694336, "learning_rate": 1.6966712861802135e-06, "loss": 0.7291, "step": 10721 }, { "epoch": 0.8113200408611101, "grad_norm": 2.3727943897247314, "learning_rate": 1.6953553668191115e-06, "loss": 0.6543, "step": 10722 }, { "epoch": 0.8113957095834436, "grad_norm": 2.5050201416015625, "learning_rate": 1.6940399048984833e-06, "loss": 0.5767, "step": 10723 }, { "epoch": 0.8114713783057773, "grad_norm": 2.1862447261810303, "learning_rate": 1.6927249005007034e-06, "loss": 0.7536, "step": 10724 }, { "epoch": 0.811547047028111, "grad_norm": 2.4321300983428955, "learning_rate": 1.6914103537081305e-06, "loss": 0.7764, "step": 10725 }, { "epoch": 0.8116227157504445, "grad_norm": 2.240424871444702, "learning_rate": 1.6900962646030772e-06, "loss": 0.6716, "step": 10726 }, { "epoch": 0.8116983844727782, "grad_norm": 2.253845453262329, "learning_rate": 1.6887826332678393e-06, "loss": 0.6926, "step": 10727 }, { "epoch": 0.8117740531951118, "grad_norm": 1.9651557207107544, "learning_rate": 1.6874694597846795e-06, "loss": 0.7082, "step": 10728 }, { "epoch": 0.8118497219174454, "grad_norm": 2.718416929244995, "learning_rate": 1.686156744235834e-06, "loss": 0.7596, "step": 10729 }, { "epoch": 0.8119253906397791, "grad_norm": 2.3345022201538086, "learning_rate": 1.6848444867035093e-06, "loss": 0.6771, "step": 10730 }, { "epoch": 0.8120010593621126, "grad_norm": 2.0901668071746826, "learning_rate": 1.6835326872698826e-06, "loss": 0.6624, "step": 10731 }, { "epoch": 0.8120767280844463, "grad_norm": 2.0496819019317627, "learning_rate": 1.6822213460171061e-06, "loss": 0.6007, "step": 10732 }, { "epoch": 0.8121523968067799, "grad_norm": 1.9577654600143433, "learning_rate": 1.6809104630272944e-06, "loss": 0.8049, "step": 10733 }, { "epoch": 0.8122280655291135, "grad_norm": 2.862490177154541, "learning_rate": 1.6796000383825414e-06, "loss": 0.6048, "step": 10734 }, { "epoch": 0.8123037342514472, "grad_norm": 2.0185632705688477, "learning_rate": 1.6782900721649146e-06, "loss": 0.67, "step": 10735 }, { "epoch": 0.8123794029737808, "grad_norm": 2.023618221282959, "learning_rate": 1.6769805644564426e-06, "loss": 0.599, "step": 10736 }, { "epoch": 0.8124550716961144, "grad_norm": 2.3943023681640625, "learning_rate": 1.6756715153391327e-06, "loss": 0.5855, "step": 10737 }, { "epoch": 0.8125307404184481, "grad_norm": 2.002091407775879, "learning_rate": 1.6743629248949631e-06, "loss": 0.7371, "step": 10738 }, { "epoch": 0.8126064091407816, "grad_norm": 2.1737380027770996, "learning_rate": 1.6730547932058806e-06, "loss": 0.5976, "step": 10739 }, { "epoch": 0.8126820778631153, "grad_norm": 2.151082992553711, "learning_rate": 1.6717471203538053e-06, "loss": 0.7263, "step": 10740 }, { "epoch": 0.8127577465854489, "grad_norm": 2.34664249420166, "learning_rate": 1.670439906420628e-06, "loss": 0.6413, "step": 10741 }, { "epoch": 0.8128334153077825, "grad_norm": 2.2073299884796143, "learning_rate": 1.66913315148821e-06, "loss": 0.7334, "step": 10742 }, { "epoch": 0.8129090840301162, "grad_norm": 1.9917361736297607, "learning_rate": 1.667826855638388e-06, "loss": 0.6073, "step": 10743 }, { "epoch": 0.8129847527524497, "grad_norm": 2.251788854598999, "learning_rate": 1.6665210189529585e-06, "loss": 0.6817, "step": 10744 }, { "epoch": 0.8130604214747834, "grad_norm": 2.446361541748047, "learning_rate": 1.6652156415137041e-06, "loss": 0.7737, "step": 10745 }, { "epoch": 0.813136090197117, "grad_norm": 2.6486692428588867, "learning_rate": 1.6639107234023723e-06, "loss": 0.5922, "step": 10746 }, { "epoch": 0.8132117589194506, "grad_norm": 2.2547566890716553, "learning_rate": 1.662606264700676e-06, "loss": 0.6869, "step": 10747 }, { "epoch": 0.8132874276417843, "grad_norm": 2.7549359798431396, "learning_rate": 1.6613022654903086e-06, "loss": 0.5634, "step": 10748 }, { "epoch": 0.8133630963641179, "grad_norm": 2.0122973918914795, "learning_rate": 1.6599987258529288e-06, "loss": 0.5678, "step": 10749 }, { "epoch": 0.8134387650864515, "grad_norm": 2.138713836669922, "learning_rate": 1.6586956458701685e-06, "loss": 0.7879, "step": 10750 }, { "epoch": 0.8135144338087852, "grad_norm": 2.357614040374756, "learning_rate": 1.6573930256236323e-06, "loss": 0.6622, "step": 10751 }, { "epoch": 0.8135901025311187, "grad_norm": 2.2221148014068604, "learning_rate": 1.656090865194894e-06, "loss": 0.79, "step": 10752 }, { "epoch": 0.8136657712534524, "grad_norm": 2.7023918628692627, "learning_rate": 1.654789164665499e-06, "loss": 0.716, "step": 10753 }, { "epoch": 0.813741439975786, "grad_norm": 2.392548084259033, "learning_rate": 1.6534879241169625e-06, "loss": 0.6097, "step": 10754 }, { "epoch": 0.8138171086981196, "grad_norm": 2.4918103218078613, "learning_rate": 1.6521871436307754e-06, "loss": 0.5744, "step": 10755 }, { "epoch": 0.8138927774204533, "grad_norm": 1.715964913368225, "learning_rate": 1.6508868232883932e-06, "loss": 0.614, "step": 10756 }, { "epoch": 0.8139684461427869, "grad_norm": 1.9560282230377197, "learning_rate": 1.649586963171252e-06, "loss": 0.6038, "step": 10757 }, { "epoch": 0.8140441148651205, "grad_norm": 1.8922325372695923, "learning_rate": 1.6482875633607465e-06, "loss": 0.6643, "step": 10758 }, { "epoch": 0.8141197835874541, "grad_norm": 2.1275534629821777, "learning_rate": 1.6469886239382518e-06, "loss": 0.7323, "step": 10759 }, { "epoch": 0.8141954523097877, "grad_norm": 2.745668649673462, "learning_rate": 1.6456901449851118e-06, "loss": 0.655, "step": 10760 }, { "epoch": 0.8142711210321214, "grad_norm": 2.2803080081939697, "learning_rate": 1.6443921265826423e-06, "loss": 0.5338, "step": 10761 }, { "epoch": 0.814346789754455, "grad_norm": 2.5156054496765137, "learning_rate": 1.6430945688121284e-06, "loss": 0.6988, "step": 10762 }, { "epoch": 0.8144224584767886, "grad_norm": 1.8427619934082031, "learning_rate": 1.6417974717548272e-06, "loss": 0.5556, "step": 10763 }, { "epoch": 0.8144981271991223, "grad_norm": 2.746143341064453, "learning_rate": 1.6405008354919705e-06, "loss": 0.7378, "step": 10764 }, { "epoch": 0.8145737959214558, "grad_norm": 2.229966163635254, "learning_rate": 1.6392046601047505e-06, "loss": 0.7431, "step": 10765 }, { "epoch": 0.8146494646437895, "grad_norm": 3.1086843013763428, "learning_rate": 1.637908945674344e-06, "loss": 0.7127, "step": 10766 }, { "epoch": 0.8147251333661231, "grad_norm": 2.0336904525756836, "learning_rate": 1.6366136922818926e-06, "loss": 0.6288, "step": 10767 }, { "epoch": 0.8148008020884567, "grad_norm": 2.8491573333740234, "learning_rate": 1.635318900008509e-06, "loss": 0.7268, "step": 10768 }, { "epoch": 0.8148764708107904, "grad_norm": 2.5054476261138916, "learning_rate": 1.6340245689352744e-06, "loss": 0.7421, "step": 10769 }, { "epoch": 0.814952139533124, "grad_norm": 2.3975508213043213, "learning_rate": 1.6327306991432431e-06, "loss": 0.6014, "step": 10770 }, { "epoch": 0.8150278082554576, "grad_norm": 1.5673209428787231, "learning_rate": 1.6314372907134484e-06, "loss": 0.8127, "step": 10771 }, { "epoch": 0.8151034769777912, "grad_norm": 1.5236127376556396, "learning_rate": 1.630144343726882e-06, "loss": 0.6618, "step": 10772 }, { "epoch": 0.8151791457001248, "grad_norm": 2.371945858001709, "learning_rate": 1.6288518582645128e-06, "loss": 0.7702, "step": 10773 }, { "epoch": 0.8152548144224585, "grad_norm": 3.2651684284210205, "learning_rate": 1.6275598344072825e-06, "loss": 0.53, "step": 10774 }, { "epoch": 0.8153304831447921, "grad_norm": 2.621706008911133, "learning_rate": 1.6262682722360997e-06, "loss": 0.6125, "step": 10775 }, { "epoch": 0.8154061518671257, "grad_norm": 2.606985092163086, "learning_rate": 1.6249771718318475e-06, "loss": 0.7299, "step": 10776 }, { "epoch": 0.8154818205894594, "grad_norm": 2.2531168460845947, "learning_rate": 1.6236865332753782e-06, "loss": 0.6822, "step": 10777 }, { "epoch": 0.815557489311793, "grad_norm": 1.9697020053863525, "learning_rate": 1.6223963566475195e-06, "loss": 0.5334, "step": 10778 }, { "epoch": 0.8156331580341266, "grad_norm": 2.3771908283233643, "learning_rate": 1.6211066420290594e-06, "loss": 0.6614, "step": 10779 }, { "epoch": 0.8157088267564602, "grad_norm": 2.4156370162963867, "learning_rate": 1.6198173895007665e-06, "loss": 0.6298, "step": 10780 }, { "epoch": 0.8157844954787938, "grad_norm": 4.180350303649902, "learning_rate": 1.6185285991433812e-06, "loss": 0.7348, "step": 10781 }, { "epoch": 0.8158601642011275, "grad_norm": 2.094904899597168, "learning_rate": 1.6172402710376108e-06, "loss": 0.6079, "step": 10782 }, { "epoch": 0.8159358329234611, "grad_norm": 2.2320287227630615, "learning_rate": 1.6159524052641319e-06, "loss": 0.7023, "step": 10783 }, { "epoch": 0.8160115016457947, "grad_norm": 2.9694509506225586, "learning_rate": 1.6146650019035967e-06, "loss": 0.7397, "step": 10784 }, { "epoch": 0.8160871703681284, "grad_norm": 5.22353982925415, "learning_rate": 1.6133780610366253e-06, "loss": 0.6958, "step": 10785 }, { "epoch": 0.816162839090462, "grad_norm": 2.167146682739258, "learning_rate": 1.6120915827438116e-06, "loss": 0.8393, "step": 10786 }, { "epoch": 0.8162385078127956, "grad_norm": 2.2224442958831787, "learning_rate": 1.6108055671057176e-06, "loss": 0.5897, "step": 10787 }, { "epoch": 0.8163141765351292, "grad_norm": 2.2738425731658936, "learning_rate": 1.6095200142028796e-06, "loss": 0.6285, "step": 10788 }, { "epoch": 0.8163898452574628, "grad_norm": 2.0885982513427734, "learning_rate": 1.6082349241158033e-06, "loss": 0.6971, "step": 10789 }, { "epoch": 0.8164655139797965, "grad_norm": 1.7838362455368042, "learning_rate": 1.6069502969249595e-06, "loss": 0.6409, "step": 10790 }, { "epoch": 0.81654118270213, "grad_norm": 2.1843435764312744, "learning_rate": 1.6056661327108026e-06, "loss": 0.7144, "step": 10791 }, { "epoch": 0.8166168514244637, "grad_norm": 2.814628839492798, "learning_rate": 1.6043824315537513e-06, "loss": 0.5826, "step": 10792 }, { "epoch": 0.8166925201467973, "grad_norm": 1.8284931182861328, "learning_rate": 1.6030991935341905e-06, "loss": 0.6479, "step": 10793 }, { "epoch": 0.8167681888691309, "grad_norm": 2.4346306324005127, "learning_rate": 1.6018164187324818e-06, "loss": 0.7205, "step": 10794 }, { "epoch": 0.8168438575914646, "grad_norm": 2.5444185733795166, "learning_rate": 1.6005341072289578e-06, "loss": 0.8004, "step": 10795 }, { "epoch": 0.8169195263137982, "grad_norm": 3.5385169982910156, "learning_rate": 1.5992522591039204e-06, "loss": 0.7806, "step": 10796 }, { "epoch": 0.8169951950361318, "grad_norm": 2.1103341579437256, "learning_rate": 1.5979708744376443e-06, "loss": 0.6149, "step": 10797 }, { "epoch": 0.8170708637584655, "grad_norm": 2.047963857650757, "learning_rate": 1.5966899533103725e-06, "loss": 0.6581, "step": 10798 }, { "epoch": 0.817146532480799, "grad_norm": 2.359651565551758, "learning_rate": 1.5954094958023217e-06, "loss": 0.6508, "step": 10799 }, { "epoch": 0.8172222012031327, "grad_norm": 2.0635018348693848, "learning_rate": 1.5941295019936786e-06, "loss": 0.7374, "step": 10800 }, { "epoch": 0.8172978699254663, "grad_norm": 1.9403828382492065, "learning_rate": 1.5928499719645964e-06, "loss": 0.6434, "step": 10801 }, { "epoch": 0.8173735386477999, "grad_norm": 2.187650442123413, "learning_rate": 1.5915709057952078e-06, "loss": 0.7436, "step": 10802 }, { "epoch": 0.8174492073701336, "grad_norm": 2.316969871520996, "learning_rate": 1.5902923035656138e-06, "loss": 0.6399, "step": 10803 }, { "epoch": 0.8175248760924672, "grad_norm": 2.0138914585113525, "learning_rate": 1.5890141653558796e-06, "loss": 0.7807, "step": 10804 }, { "epoch": 0.8176005448148008, "grad_norm": 2.207831382751465, "learning_rate": 1.5877364912460476e-06, "loss": 0.5119, "step": 10805 }, { "epoch": 0.8176762135371344, "grad_norm": 2.2698066234588623, "learning_rate": 1.586459281316131e-06, "loss": 0.7117, "step": 10806 }, { "epoch": 0.817751882259468, "grad_norm": 2.2364702224731445, "learning_rate": 1.5851825356461133e-06, "loss": 0.6974, "step": 10807 }, { "epoch": 0.8178275509818017, "grad_norm": 1.6945065259933472, "learning_rate": 1.583906254315947e-06, "loss": 0.6608, "step": 10808 }, { "epoch": 0.8179032197041353, "grad_norm": 1.855660319328308, "learning_rate": 1.5826304374055573e-06, "loss": 0.5783, "step": 10809 }, { "epoch": 0.8179788884264689, "grad_norm": 2.5465874671936035, "learning_rate": 1.5813550849948433e-06, "loss": 0.6822, "step": 10810 }, { "epoch": 0.8180545571488026, "grad_norm": 2.3344638347625732, "learning_rate": 1.580080197163663e-06, "loss": 0.6158, "step": 10811 }, { "epoch": 0.8181302258711362, "grad_norm": 4.9126877784729, "learning_rate": 1.578805773991863e-06, "loss": 0.6967, "step": 10812 }, { "epoch": 0.8182058945934698, "grad_norm": 2.2319014072418213, "learning_rate": 1.577531815559248e-06, "loss": 0.6559, "step": 10813 }, { "epoch": 0.8182815633158034, "grad_norm": 5.378940582275391, "learning_rate": 1.5762583219456002e-06, "loss": 0.762, "step": 10814 }, { "epoch": 0.818357232038137, "grad_norm": 3.3257508277893066, "learning_rate": 1.574985293230666e-06, "loss": 0.7037, "step": 10815 }, { "epoch": 0.8184329007604707, "grad_norm": 2.033773899078369, "learning_rate": 1.5737127294941647e-06, "loss": 0.6447, "step": 10816 }, { "epoch": 0.8185085694828043, "grad_norm": 2.516923666000366, "learning_rate": 1.5724406308157973e-06, "loss": 0.546, "step": 10817 }, { "epoch": 0.8185842382051379, "grad_norm": 1.9066401720046997, "learning_rate": 1.5711689972752181e-06, "loss": 0.5628, "step": 10818 }, { "epoch": 0.8186599069274715, "grad_norm": 2.1507344245910645, "learning_rate": 1.5698978289520646e-06, "loss": 0.6484, "step": 10819 }, { "epoch": 0.8187355756498051, "grad_norm": 2.1909475326538086, "learning_rate": 1.568627125925941e-06, "loss": 0.5734, "step": 10820 }, { "epoch": 0.8188112443721388, "grad_norm": 2.0895121097564697, "learning_rate": 1.5673568882764225e-06, "loss": 0.5795, "step": 10821 }, { "epoch": 0.8188869130944724, "grad_norm": 1.9548548460006714, "learning_rate": 1.5660871160830558e-06, "loss": 0.6567, "step": 10822 }, { "epoch": 0.818962581816806, "grad_norm": 2.28955340385437, "learning_rate": 1.564817809425358e-06, "loss": 0.7006, "step": 10823 }, { "epoch": 0.8190382505391397, "grad_norm": 2.538539409637451, "learning_rate": 1.5635489683828196e-06, "loss": 0.5051, "step": 10824 }, { "epoch": 0.8191139192614733, "grad_norm": 2.679903984069824, "learning_rate": 1.5622805930348953e-06, "loss": 0.667, "step": 10825 }, { "epoch": 0.8191895879838069, "grad_norm": 2.0182337760925293, "learning_rate": 1.5610126834610141e-06, "loss": 0.6008, "step": 10826 }, { "epoch": 0.8192652567061405, "grad_norm": 1.927375316619873, "learning_rate": 1.5597452397405818e-06, "loss": 0.612, "step": 10827 }, { "epoch": 0.8193409254284741, "grad_norm": 2.192244291305542, "learning_rate": 1.5584782619529688e-06, "loss": 0.6674, "step": 10828 }, { "epoch": 0.8194165941508078, "grad_norm": 2.570380449295044, "learning_rate": 1.5572117501775148e-06, "loss": 0.4887, "step": 10829 }, { "epoch": 0.8194922628731414, "grad_norm": 2.119783401489258, "learning_rate": 1.555945704493533e-06, "loss": 0.7094, "step": 10830 }, { "epoch": 0.819567931595475, "grad_norm": 2.8816754817962646, "learning_rate": 1.5546801249803083e-06, "loss": 0.7619, "step": 10831 }, { "epoch": 0.8196436003178086, "grad_norm": 1.987670660018921, "learning_rate": 1.5534150117170953e-06, "loss": 0.6066, "step": 10832 }, { "epoch": 0.8197192690401423, "grad_norm": 2.288383722305298, "learning_rate": 1.5521503647831193e-06, "loss": 0.65, "step": 10833 }, { "epoch": 0.8197949377624759, "grad_norm": 1.7874622344970703, "learning_rate": 1.5508861842575773e-06, "loss": 0.5712, "step": 10834 }, { "epoch": 0.8198706064848095, "grad_norm": 2.939530372619629, "learning_rate": 1.549622470219638e-06, "loss": 0.6617, "step": 10835 }, { "epoch": 0.8199462752071431, "grad_norm": 2.5253098011016846, "learning_rate": 1.5483592227484347e-06, "loss": 0.8274, "step": 10836 }, { "epoch": 0.8200219439294768, "grad_norm": 2.590799570083618, "learning_rate": 1.5470964419230754e-06, "loss": 0.7798, "step": 10837 }, { "epoch": 0.8200976126518104, "grad_norm": 2.3619234561920166, "learning_rate": 1.5458341278226478e-06, "loss": 0.6679, "step": 10838 }, { "epoch": 0.820173281374144, "grad_norm": 2.0205016136169434, "learning_rate": 1.544572280526195e-06, "loss": 0.6351, "step": 10839 }, { "epoch": 0.8202489500964776, "grad_norm": 2.110157012939453, "learning_rate": 1.543310900112738e-06, "loss": 0.6308, "step": 10840 }, { "epoch": 0.8203246188188112, "grad_norm": 2.0330491065979004, "learning_rate": 1.5420499866612723e-06, "loss": 0.7454, "step": 10841 }, { "epoch": 0.8204002875411449, "grad_norm": 2.687309980392456, "learning_rate": 1.5407895402507574e-06, "loss": 0.78, "step": 10842 }, { "epoch": 0.8204759562634785, "grad_norm": 3.1039085388183594, "learning_rate": 1.5395295609601274e-06, "loss": 0.6781, "step": 10843 }, { "epoch": 0.8205516249858121, "grad_norm": 2.10479474067688, "learning_rate": 1.538270048868286e-06, "loss": 0.6809, "step": 10844 }, { "epoch": 0.8206272937081457, "grad_norm": 5.9827752113342285, "learning_rate": 1.5370110040541093e-06, "loss": 0.6575, "step": 10845 }, { "epoch": 0.8207029624304794, "grad_norm": 2.722191572189331, "learning_rate": 1.535752426596444e-06, "loss": 0.6425, "step": 10846 }, { "epoch": 0.820778631152813, "grad_norm": 2.3565070629119873, "learning_rate": 1.534494316574099e-06, "loss": 0.6546, "step": 10847 }, { "epoch": 0.8208542998751466, "grad_norm": 2.351691961288452, "learning_rate": 1.5332366740658685e-06, "loss": 0.6076, "step": 10848 }, { "epoch": 0.8209299685974802, "grad_norm": 3.5153005123138428, "learning_rate": 1.5319794991505105e-06, "loss": 0.6766, "step": 10849 }, { "epoch": 0.8210056373198139, "grad_norm": 2.6627254486083984, "learning_rate": 1.530722791906748e-06, "loss": 0.6439, "step": 10850 }, { "epoch": 0.8210813060421475, "grad_norm": 3.6057939529418945, "learning_rate": 1.5294665524132828e-06, "loss": 0.6652, "step": 10851 }, { "epoch": 0.8211569747644811, "grad_norm": 1.9653656482696533, "learning_rate": 1.5282107807487854e-06, "loss": 0.7099, "step": 10852 }, { "epoch": 0.8212326434868147, "grad_norm": 2.3477768898010254, "learning_rate": 1.5269554769918955e-06, "loss": 0.6548, "step": 10853 }, { "epoch": 0.8213083122091484, "grad_norm": 2.1630735397338867, "learning_rate": 1.5257006412212244e-06, "loss": 0.6972, "step": 10854 }, { "epoch": 0.821383980931482, "grad_norm": 1.8949837684631348, "learning_rate": 1.524446273515353e-06, "loss": 0.5324, "step": 10855 }, { "epoch": 0.8214596496538156, "grad_norm": 2.782655954360962, "learning_rate": 1.523192373952836e-06, "loss": 0.6504, "step": 10856 }, { "epoch": 0.8215353183761492, "grad_norm": 2.5171873569488525, "learning_rate": 1.5219389426121952e-06, "loss": 0.6931, "step": 10857 }, { "epoch": 0.8216109870984828, "grad_norm": 2.055389404296875, "learning_rate": 1.5206859795719249e-06, "loss": 0.6662, "step": 10858 }, { "epoch": 0.8216866558208165, "grad_norm": 2.1020753383636475, "learning_rate": 1.5194334849104892e-06, "loss": 0.7256, "step": 10859 }, { "epoch": 0.8217623245431501, "grad_norm": 2.210233211517334, "learning_rate": 1.5181814587063255e-06, "loss": 0.7089, "step": 10860 }, { "epoch": 0.8218379932654837, "grad_norm": 1.9540194272994995, "learning_rate": 1.5169299010378372e-06, "loss": 0.6508, "step": 10861 }, { "epoch": 0.8219136619878173, "grad_norm": 2.1212716102600098, "learning_rate": 1.5156788119833983e-06, "loss": 0.6668, "step": 10862 }, { "epoch": 0.821989330710151, "grad_norm": 2.4180808067321777, "learning_rate": 1.5144281916213645e-06, "loss": 0.6415, "step": 10863 }, { "epoch": 0.8220649994324846, "grad_norm": 2.248098373413086, "learning_rate": 1.5131780400300459e-06, "loss": 0.6769, "step": 10864 }, { "epoch": 0.8221406681548182, "grad_norm": 2.054067611694336, "learning_rate": 1.5119283572877336e-06, "loss": 0.8053, "step": 10865 }, { "epoch": 0.8222163368771518, "grad_norm": 2.5258889198303223, "learning_rate": 1.5106791434726876e-06, "loss": 0.691, "step": 10866 }, { "epoch": 0.8222920055994855, "grad_norm": 2.0589208602905273, "learning_rate": 1.509430398663137e-06, "loss": 0.6954, "step": 10867 }, { "epoch": 0.8223676743218191, "grad_norm": 2.6181116104125977, "learning_rate": 1.5081821229372813e-06, "loss": 0.7106, "step": 10868 }, { "epoch": 0.8224433430441527, "grad_norm": 2.312509059906006, "learning_rate": 1.5069343163732939e-06, "loss": 0.6206, "step": 10869 }, { "epoch": 0.8225190117664863, "grad_norm": 2.3199472427368164, "learning_rate": 1.5056869790493144e-06, "loss": 0.7401, "step": 10870 }, { "epoch": 0.8225946804888199, "grad_norm": 2.136983871459961, "learning_rate": 1.5044401110434582e-06, "loss": 0.6275, "step": 10871 }, { "epoch": 0.8226703492111536, "grad_norm": 1.8843696117401123, "learning_rate": 1.503193712433803e-06, "loss": 0.6358, "step": 10872 }, { "epoch": 0.8227460179334872, "grad_norm": 2.7073588371276855, "learning_rate": 1.5019477832984042e-06, "loss": 0.8258, "step": 10873 }, { "epoch": 0.8228216866558208, "grad_norm": 2.6563751697540283, "learning_rate": 1.5007023237152905e-06, "loss": 0.5735, "step": 10874 }, { "epoch": 0.8228973553781544, "grad_norm": 2.3090715408325195, "learning_rate": 1.4994573337624505e-06, "loss": 0.661, "step": 10875 }, { "epoch": 0.8229730241004881, "grad_norm": 2.4490222930908203, "learning_rate": 1.4982128135178528e-06, "loss": 0.6891, "step": 10876 }, { "epoch": 0.8230486928228217, "grad_norm": 2.2669460773468018, "learning_rate": 1.496968763059431e-06, "loss": 0.6218, "step": 10877 }, { "epoch": 0.8231243615451553, "grad_norm": 2.8864951133728027, "learning_rate": 1.4957251824650948e-06, "loss": 0.474, "step": 10878 }, { "epoch": 0.8232000302674889, "grad_norm": 2.1421849727630615, "learning_rate": 1.4944820718127179e-06, "loss": 0.6624, "step": 10879 }, { "epoch": 0.8232756989898226, "grad_norm": 2.4659669399261475, "learning_rate": 1.49323943118015e-06, "loss": 0.6561, "step": 10880 }, { "epoch": 0.8233513677121562, "grad_norm": 1.961225986480713, "learning_rate": 1.4919972606452113e-06, "loss": 0.6731, "step": 10881 }, { "epoch": 0.8234270364344898, "grad_norm": 2.310224771499634, "learning_rate": 1.4907555602856849e-06, "loss": 0.7196, "step": 10882 }, { "epoch": 0.8235027051568234, "grad_norm": 2.2171401977539062, "learning_rate": 1.4895143301793321e-06, "loss": 0.8323, "step": 10883 }, { "epoch": 0.823578373879157, "grad_norm": 3.4674880504608154, "learning_rate": 1.4882735704038853e-06, "loss": 0.6263, "step": 10884 }, { "epoch": 0.8236540426014907, "grad_norm": 2.4391887187957764, "learning_rate": 1.4870332810370457e-06, "loss": 0.7007, "step": 10885 }, { "epoch": 0.8237297113238243, "grad_norm": 2.19388747215271, "learning_rate": 1.48579346215648e-06, "loss": 0.744, "step": 10886 }, { "epoch": 0.8238053800461579, "grad_norm": 2.4731009006500244, "learning_rate": 1.4845541138398313e-06, "loss": 0.6056, "step": 10887 }, { "epoch": 0.8238810487684916, "grad_norm": 1.8592519760131836, "learning_rate": 1.4833152361647122e-06, "loss": 0.642, "step": 10888 }, { "epoch": 0.8239567174908252, "grad_norm": 1.9034373760223389, "learning_rate": 1.4820768292087048e-06, "loss": 0.7955, "step": 10889 }, { "epoch": 0.8240323862131588, "grad_norm": 2.9963486194610596, "learning_rate": 1.480838893049362e-06, "loss": 0.7364, "step": 10890 }, { "epoch": 0.8241080549354924, "grad_norm": 2.4194231033325195, "learning_rate": 1.4796014277642077e-06, "loss": 0.638, "step": 10891 }, { "epoch": 0.824183723657826, "grad_norm": 2.1171884536743164, "learning_rate": 1.4783644334307374e-06, "loss": 0.7346, "step": 10892 }, { "epoch": 0.8242593923801597, "grad_norm": 2.5294923782348633, "learning_rate": 1.4771279101264106e-06, "loss": 0.5958, "step": 10893 }, { "epoch": 0.8243350611024933, "grad_norm": 2.560204029083252, "learning_rate": 1.4758918579286686e-06, "loss": 0.7642, "step": 10894 }, { "epoch": 0.8244107298248269, "grad_norm": 2.2272257804870605, "learning_rate": 1.4746562769149163e-06, "loss": 0.6933, "step": 10895 }, { "epoch": 0.8244863985471605, "grad_norm": 2.733365774154663, "learning_rate": 1.473421167162525e-06, "loss": 0.6733, "step": 10896 }, { "epoch": 0.8245620672694941, "grad_norm": 2.285641670227051, "learning_rate": 1.4721865287488448e-06, "loss": 0.6461, "step": 10897 }, { "epoch": 0.8246377359918278, "grad_norm": 2.4050447940826416, "learning_rate": 1.4709523617511898e-06, "loss": 0.6534, "step": 10898 }, { "epoch": 0.8247134047141614, "grad_norm": 2.5170106887817383, "learning_rate": 1.4697186662468542e-06, "loss": 0.5798, "step": 10899 }, { "epoch": 0.824789073436495, "grad_norm": 2.0948894023895264, "learning_rate": 1.4684854423130891e-06, "loss": 0.7217, "step": 10900 }, { "epoch": 0.8248647421588287, "grad_norm": 2.6201298236846924, "learning_rate": 1.467252690027126e-06, "loss": 0.6472, "step": 10901 }, { "epoch": 0.8249404108811623, "grad_norm": 2.4255621433258057, "learning_rate": 1.466020409466163e-06, "loss": 0.7014, "step": 10902 }, { "epoch": 0.8250160796034959, "grad_norm": 2.1704020500183105, "learning_rate": 1.4647886007073692e-06, "loss": 0.6657, "step": 10903 }, { "epoch": 0.8250917483258295, "grad_norm": 2.4163525104522705, "learning_rate": 1.463557263827886e-06, "loss": 0.7631, "step": 10904 }, { "epoch": 0.8251674170481631, "grad_norm": 2.173043966293335, "learning_rate": 1.4623263989048226e-06, "loss": 0.7403, "step": 10905 }, { "epoch": 0.8252430857704968, "grad_norm": 2.1846542358398438, "learning_rate": 1.4610960060152616e-06, "loss": 0.7112, "step": 10906 }, { "epoch": 0.8253187544928304, "grad_norm": 2.4031577110290527, "learning_rate": 1.4598660852362505e-06, "loss": 0.6655, "step": 10907 }, { "epoch": 0.825394423215164, "grad_norm": 3.223851442337036, "learning_rate": 1.4586366366448113e-06, "loss": 0.6503, "step": 10908 }, { "epoch": 0.8254700919374977, "grad_norm": 2.421539545059204, "learning_rate": 1.4574076603179413e-06, "loss": 0.6976, "step": 10909 }, { "epoch": 0.8255457606598312, "grad_norm": 3.1366801261901855, "learning_rate": 1.4561791563325965e-06, "loss": 0.62, "step": 10910 }, { "epoch": 0.8256214293821649, "grad_norm": 2.170020818710327, "learning_rate": 1.454951124765714e-06, "loss": 0.66, "step": 10911 }, { "epoch": 0.8256970981044985, "grad_norm": 2.331679344177246, "learning_rate": 1.4537235656941952e-06, "loss": 0.6694, "step": 10912 }, { "epoch": 0.8257727668268321, "grad_norm": 1.9958034753799438, "learning_rate": 1.4524964791949157e-06, "loss": 0.6669, "step": 10913 }, { "epoch": 0.8258484355491658, "grad_norm": 1.878063440322876, "learning_rate": 1.4512698653447153e-06, "loss": 0.6129, "step": 10914 }, { "epoch": 0.8259241042714994, "grad_norm": 2.260777711868286, "learning_rate": 1.450043724220413e-06, "loss": 0.6623, "step": 10915 }, { "epoch": 0.825999772993833, "grad_norm": 2.2355360984802246, "learning_rate": 1.4488180558987921e-06, "loss": 0.5674, "step": 10916 }, { "epoch": 0.8260754417161666, "grad_norm": 2.1847424507141113, "learning_rate": 1.4475928604566107e-06, "loss": 0.7828, "step": 10917 }, { "epoch": 0.8261511104385002, "grad_norm": 2.2542574405670166, "learning_rate": 1.4463681379705883e-06, "loss": 0.804, "step": 10918 }, { "epoch": 0.8262267791608339, "grad_norm": 1.649489402770996, "learning_rate": 1.4451438885174242e-06, "loss": 0.7146, "step": 10919 }, { "epoch": 0.8263024478831675, "grad_norm": 2.6824514865875244, "learning_rate": 1.4439201121737882e-06, "loss": 0.6701, "step": 10920 }, { "epoch": 0.8263781166055011, "grad_norm": 3.05550217628479, "learning_rate": 1.4426968090163127e-06, "loss": 0.4885, "step": 10921 }, { "epoch": 0.8264537853278348, "grad_norm": 2.190661907196045, "learning_rate": 1.4414739791216062e-06, "loss": 0.5637, "step": 10922 }, { "epoch": 0.8265294540501683, "grad_norm": 2.1329610347747803, "learning_rate": 1.4402516225662454e-06, "loss": 0.6271, "step": 10923 }, { "epoch": 0.826605122772502, "grad_norm": 2.2187185287475586, "learning_rate": 1.43902973942678e-06, "loss": 0.7154, "step": 10924 }, { "epoch": 0.8266807914948356, "grad_norm": 2.099266529083252, "learning_rate": 1.4378083297797278e-06, "loss": 0.5802, "step": 10925 }, { "epoch": 0.8267564602171692, "grad_norm": 2.433722734451294, "learning_rate": 1.4365873937015758e-06, "loss": 0.7381, "step": 10926 }, { "epoch": 0.8268321289395029, "grad_norm": 2.2790136337280273, "learning_rate": 1.4353669312687878e-06, "loss": 0.6474, "step": 10927 }, { "epoch": 0.8269077976618365, "grad_norm": 2.4600353240966797, "learning_rate": 1.4341469425577866e-06, "loss": 0.7024, "step": 10928 }, { "epoch": 0.8269834663841701, "grad_norm": 2.4430034160614014, "learning_rate": 1.432927427644973e-06, "loss": 0.6797, "step": 10929 }, { "epoch": 0.8270591351065038, "grad_norm": 6.050069332122803, "learning_rate": 1.431708386606721e-06, "loss": 0.6611, "step": 10930 }, { "epoch": 0.8271348038288373, "grad_norm": 2.662050485610962, "learning_rate": 1.4304898195193705e-06, "loss": 0.6946, "step": 10931 }, { "epoch": 0.827210472551171, "grad_norm": 2.608130693435669, "learning_rate": 1.4292717264592286e-06, "loss": 0.7405, "step": 10932 }, { "epoch": 0.8272861412735046, "grad_norm": 2.265187978744507, "learning_rate": 1.428054107502577e-06, "loss": 0.7296, "step": 10933 }, { "epoch": 0.8273618099958382, "grad_norm": 1.9278501272201538, "learning_rate": 1.426836962725669e-06, "loss": 0.594, "step": 10934 }, { "epoch": 0.8274374787181719, "grad_norm": 2.370166540145874, "learning_rate": 1.4256202922047243e-06, "loss": 0.5605, "step": 10935 }, { "epoch": 0.8275131474405054, "grad_norm": 2.0922703742980957, "learning_rate": 1.4244040960159356e-06, "loss": 0.5532, "step": 10936 }, { "epoch": 0.8275888161628391, "grad_norm": 2.2597086429595947, "learning_rate": 1.423188374235464e-06, "loss": 0.609, "step": 10937 }, { "epoch": 0.8276644848851727, "grad_norm": 2.991779088973999, "learning_rate": 1.4219731269394455e-06, "loss": 0.6567, "step": 10938 }, { "epoch": 0.8277401536075063, "grad_norm": 1.8932214975357056, "learning_rate": 1.4207583542039767e-06, "loss": 0.5854, "step": 10939 }, { "epoch": 0.82781582232984, "grad_norm": 2.025179147720337, "learning_rate": 1.4195440561051349e-06, "loss": 0.7445, "step": 10940 }, { "epoch": 0.8278914910521736, "grad_norm": 2.18306827545166, "learning_rate": 1.4183302327189654e-06, "loss": 0.7864, "step": 10941 }, { "epoch": 0.8279671597745072, "grad_norm": 2.069181203842163, "learning_rate": 1.4171168841214762e-06, "loss": 0.6587, "step": 10942 }, { "epoch": 0.8280428284968409, "grad_norm": 1.9649893045425415, "learning_rate": 1.4159040103886545e-06, "loss": 0.6386, "step": 10943 }, { "epoch": 0.8281184972191744, "grad_norm": 2.0835180282592773, "learning_rate": 1.4146916115964507e-06, "loss": 0.5745, "step": 10944 }, { "epoch": 0.8281941659415081, "grad_norm": 1.8389742374420166, "learning_rate": 1.413479687820796e-06, "loss": 0.6632, "step": 10945 }, { "epoch": 0.8282698346638417, "grad_norm": 2.533998489379883, "learning_rate": 1.4122682391375796e-06, "loss": 0.6618, "step": 10946 }, { "epoch": 0.8283455033861753, "grad_norm": 6.8104963302612305, "learning_rate": 1.411057265622668e-06, "loss": 0.7536, "step": 10947 }, { "epoch": 0.828421172108509, "grad_norm": 2.065166473388672, "learning_rate": 1.4098467673518954e-06, "loss": 0.5601, "step": 10948 }, { "epoch": 0.8284968408308425, "grad_norm": 2.1348156929016113, "learning_rate": 1.4086367444010704e-06, "loss": 0.7066, "step": 10949 }, { "epoch": 0.8285725095531762, "grad_norm": 2.0122158527374268, "learning_rate": 1.4074271968459609e-06, "loss": 0.6169, "step": 10950 }, { "epoch": 0.8286481782755099, "grad_norm": 2.3869214057922363, "learning_rate": 1.4062181247623206e-06, "loss": 0.651, "step": 10951 }, { "epoch": 0.8287238469978434, "grad_norm": 2.8264942169189453, "learning_rate": 1.4050095282258642e-06, "loss": 0.6669, "step": 10952 }, { "epoch": 0.8287995157201771, "grad_norm": 2.2256650924682617, "learning_rate": 1.4038014073122747e-06, "loss": 0.6945, "step": 10953 }, { "epoch": 0.8288751844425107, "grad_norm": 2.1724610328674316, "learning_rate": 1.40259376209721e-06, "loss": 0.618, "step": 10954 }, { "epoch": 0.8289508531648443, "grad_norm": 2.1436386108398438, "learning_rate": 1.401386592656297e-06, "loss": 0.6178, "step": 10955 }, { "epoch": 0.829026521887178, "grad_norm": 1.8904942274093628, "learning_rate": 1.4001798990651317e-06, "loss": 0.565, "step": 10956 }, { "epoch": 0.8291021906095115, "grad_norm": 1.9519450664520264, "learning_rate": 1.3989736813992826e-06, "loss": 0.6762, "step": 10957 }, { "epoch": 0.8291778593318452, "grad_norm": 2.116001605987549, "learning_rate": 1.3977679397342863e-06, "loss": 0.7516, "step": 10958 }, { "epoch": 0.8292535280541788, "grad_norm": 2.4981284141540527, "learning_rate": 1.3965626741456495e-06, "loss": 0.6231, "step": 10959 }, { "epoch": 0.8293291967765124, "grad_norm": 2.15065860748291, "learning_rate": 1.3953578847088513e-06, "loss": 0.5323, "step": 10960 }, { "epoch": 0.8294048654988461, "grad_norm": 2.2730371952056885, "learning_rate": 1.394153571499339e-06, "loss": 0.7206, "step": 10961 }, { "epoch": 0.8294805342211796, "grad_norm": 2.4419403076171875, "learning_rate": 1.3929497345925299e-06, "loss": 0.667, "step": 10962 }, { "epoch": 0.8295562029435133, "grad_norm": 2.3964638710021973, "learning_rate": 1.3917463740638146e-06, "loss": 0.7303, "step": 10963 }, { "epoch": 0.829631871665847, "grad_norm": 2.3041999340057373, "learning_rate": 1.3905434899885471e-06, "loss": 0.657, "step": 10964 }, { "epoch": 0.8297075403881805, "grad_norm": 2.5561516284942627, "learning_rate": 1.389341082442057e-06, "loss": 0.6983, "step": 10965 }, { "epoch": 0.8297832091105142, "grad_norm": 1.8878254890441895, "learning_rate": 1.3881391514996473e-06, "loss": 0.6237, "step": 10966 }, { "epoch": 0.8298588778328478, "grad_norm": 3.115190029144287, "learning_rate": 1.3869376972365825e-06, "loss": 0.798, "step": 10967 }, { "epoch": 0.8299345465551814, "grad_norm": 2.7332189083099365, "learning_rate": 1.3857367197281024e-06, "loss": 0.6883, "step": 10968 }, { "epoch": 0.8300102152775151, "grad_norm": 2.638230562210083, "learning_rate": 1.3845362190494161e-06, "loss": 0.5863, "step": 10969 }, { "epoch": 0.8300858839998486, "grad_norm": 2.4270436763763428, "learning_rate": 1.3833361952757031e-06, "loss": 0.6456, "step": 10970 }, { "epoch": 0.8301615527221823, "grad_norm": 1.9929462671279907, "learning_rate": 1.3821366484821138e-06, "loss": 0.6827, "step": 10971 }, { "epoch": 0.830237221444516, "grad_norm": 4.07589864730835, "learning_rate": 1.3809375787437656e-06, "loss": 0.6311, "step": 10972 }, { "epoch": 0.8303128901668495, "grad_norm": 2.5167202949523926, "learning_rate": 1.3797389861357507e-06, "loss": 0.7506, "step": 10973 }, { "epoch": 0.8303885588891832, "grad_norm": 1.9688563346862793, "learning_rate": 1.378540870733128e-06, "loss": 0.5853, "step": 10974 }, { "epoch": 0.8304642276115167, "grad_norm": 2.244810104370117, "learning_rate": 1.3773432326109234e-06, "loss": 0.6253, "step": 10975 }, { "epoch": 0.8305398963338504, "grad_norm": 1.8359615802764893, "learning_rate": 1.376146071844142e-06, "loss": 0.7278, "step": 10976 }, { "epoch": 0.8306155650561841, "grad_norm": 2.00067138671875, "learning_rate": 1.374949388507754e-06, "loss": 0.6246, "step": 10977 }, { "epoch": 0.8306912337785176, "grad_norm": 2.020059108734131, "learning_rate": 1.3737531826766962e-06, "loss": 0.6065, "step": 10978 }, { "epoch": 0.8307669025008513, "grad_norm": 2.200312614440918, "learning_rate": 1.3725574544258797e-06, "loss": 0.7528, "step": 10979 }, { "epoch": 0.830842571223185, "grad_norm": 2.1670212745666504, "learning_rate": 1.3713622038301856e-06, "loss": 0.6273, "step": 10980 }, { "epoch": 0.8309182399455185, "grad_norm": 2.5770716667175293, "learning_rate": 1.3701674309644652e-06, "loss": 0.6216, "step": 10981 }, { "epoch": 0.8309939086678522, "grad_norm": 3.1510024070739746, "learning_rate": 1.3689731359035375e-06, "loss": 0.6588, "step": 10982 }, { "epoch": 0.8310695773901857, "grad_norm": 3.46155047416687, "learning_rate": 1.3677793187221936e-06, "loss": 0.5913, "step": 10983 }, { "epoch": 0.8311452461125194, "grad_norm": 1.877045750617981, "learning_rate": 1.3665859794951969e-06, "loss": 0.5908, "step": 10984 }, { "epoch": 0.831220914834853, "grad_norm": 2.1516778469085693, "learning_rate": 1.3653931182972716e-06, "loss": 0.675, "step": 10985 }, { "epoch": 0.8312965835571866, "grad_norm": 2.5128893852233887, "learning_rate": 1.3642007352031238e-06, "loss": 0.7084, "step": 10986 }, { "epoch": 0.8313722522795203, "grad_norm": 2.5535311698913574, "learning_rate": 1.3630088302874237e-06, "loss": 0.7207, "step": 10987 }, { "epoch": 0.8314479210018538, "grad_norm": 2.579092502593994, "learning_rate": 1.3618174036248138e-06, "loss": 0.6827, "step": 10988 }, { "epoch": 0.8315235897241875, "grad_norm": 2.279123306274414, "learning_rate": 1.3606264552899005e-06, "loss": 0.7558, "step": 10989 }, { "epoch": 0.8315992584465212, "grad_norm": 2.163329839706421, "learning_rate": 1.359435985357268e-06, "loss": 0.6043, "step": 10990 }, { "epoch": 0.8316749271688547, "grad_norm": 2.061577320098877, "learning_rate": 1.3582459939014655e-06, "loss": 0.7602, "step": 10991 }, { "epoch": 0.8317505958911884, "grad_norm": 2.2457826137542725, "learning_rate": 1.3570564809970164e-06, "loss": 0.6957, "step": 10992 }, { "epoch": 0.831826264613522, "grad_norm": 2.5345616340637207, "learning_rate": 1.3558674467184096e-06, "loss": 0.6992, "step": 10993 }, { "epoch": 0.8319019333358556, "grad_norm": 1.9230178594589233, "learning_rate": 1.354678891140108e-06, "loss": 0.7109, "step": 10994 }, { "epoch": 0.8319776020581893, "grad_norm": 1.9315879344940186, "learning_rate": 1.3534908143365452e-06, "loss": 0.6629, "step": 10995 }, { "epoch": 0.8320532707805228, "grad_norm": 2.1779463291168213, "learning_rate": 1.352303216382114e-06, "loss": 0.7043, "step": 10996 }, { "epoch": 0.8321289395028565, "grad_norm": 1.9600178003311157, "learning_rate": 1.3511160973511935e-06, "loss": 0.7524, "step": 10997 }, { "epoch": 0.8322046082251902, "grad_norm": 4.411078929901123, "learning_rate": 1.3499294573181253e-06, "loss": 0.749, "step": 10998 }, { "epoch": 0.8322802769475237, "grad_norm": 3.7787492275238037, "learning_rate": 1.3487432963572152e-06, "loss": 0.5859, "step": 10999 }, { "epoch": 0.8323559456698574, "grad_norm": 2.2628886699676514, "learning_rate": 1.3475576145427465e-06, "loss": 0.7292, "step": 11000 }, { "epoch": 0.8324316143921909, "grad_norm": 2.6907804012298584, "learning_rate": 1.346372411948969e-06, "loss": 0.6431, "step": 11001 }, { "epoch": 0.8325072831145246, "grad_norm": 3.0805437564849854, "learning_rate": 1.3451876886501101e-06, "loss": 0.5905, "step": 11002 }, { "epoch": 0.8325829518368583, "grad_norm": 2.1811044216156006, "learning_rate": 1.344003444720356e-06, "loss": 0.593, "step": 11003 }, { "epoch": 0.8326586205591918, "grad_norm": 2.125622272491455, "learning_rate": 1.3428196802338676e-06, "loss": 0.5257, "step": 11004 }, { "epoch": 0.8327342892815255, "grad_norm": 3.8492448329925537, "learning_rate": 1.3416363952647772e-06, "loss": 0.6362, "step": 11005 }, { "epoch": 0.8328099580038592, "grad_norm": 2.9568638801574707, "learning_rate": 1.340453589887185e-06, "loss": 0.655, "step": 11006 }, { "epoch": 0.8328856267261927, "grad_norm": 2.092532157897949, "learning_rate": 1.3392712641751645e-06, "loss": 0.6265, "step": 11007 }, { "epoch": 0.8329612954485264, "grad_norm": 2.0288496017456055, "learning_rate": 1.3380894182027548e-06, "loss": 0.5884, "step": 11008 }, { "epoch": 0.8330369641708599, "grad_norm": 2.5260086059570312, "learning_rate": 1.336908052043969e-06, "loss": 0.6618, "step": 11009 }, { "epoch": 0.8331126328931936, "grad_norm": 2.303161382675171, "learning_rate": 1.3357271657727847e-06, "loss": 0.6813, "step": 11010 }, { "epoch": 0.8331883016155273, "grad_norm": 2.152733087539673, "learning_rate": 1.334546759463152e-06, "loss": 0.5689, "step": 11011 }, { "epoch": 0.8332639703378608, "grad_norm": 2.2623064517974854, "learning_rate": 1.3333668331889998e-06, "loss": 0.6485, "step": 11012 }, { "epoch": 0.8333396390601945, "grad_norm": 2.2851810455322266, "learning_rate": 1.3321873870242097e-06, "loss": 0.6046, "step": 11013 }, { "epoch": 0.8334153077825281, "grad_norm": 1.8821451663970947, "learning_rate": 1.3310084210426468e-06, "loss": 0.8392, "step": 11014 }, { "epoch": 0.8334909765048617, "grad_norm": 2.2863385677337646, "learning_rate": 1.3298299353181411e-06, "loss": 0.6131, "step": 11015 }, { "epoch": 0.8335666452271954, "grad_norm": 2.0595953464508057, "learning_rate": 1.3286519299244936e-06, "loss": 0.7144, "step": 11016 }, { "epoch": 0.8336423139495289, "grad_norm": 1.9019546508789062, "learning_rate": 1.3274744049354739e-06, "loss": 0.574, "step": 11017 }, { "epoch": 0.8337179826718626, "grad_norm": 2.477344036102295, "learning_rate": 1.3262973604248235e-06, "loss": 0.6929, "step": 11018 }, { "epoch": 0.8337936513941963, "grad_norm": 2.8345818519592285, "learning_rate": 1.325120796466251e-06, "loss": 0.6346, "step": 11019 }, { "epoch": 0.8338693201165298, "grad_norm": 2.6509807109832764, "learning_rate": 1.323944713133441e-06, "loss": 0.6845, "step": 11020 }, { "epoch": 0.8339449888388635, "grad_norm": 2.0513241291046143, "learning_rate": 1.322769110500036e-06, "loss": 0.5085, "step": 11021 }, { "epoch": 0.834020657561197, "grad_norm": 2.247509002685547, "learning_rate": 1.3215939886396625e-06, "loss": 0.7244, "step": 11022 }, { "epoch": 0.8340963262835307, "grad_norm": 2.2295756340026855, "learning_rate": 1.3204193476259096e-06, "loss": 0.6197, "step": 11023 }, { "epoch": 0.8341719950058644, "grad_norm": 2.411051034927368, "learning_rate": 1.3192451875323353e-06, "loss": 0.6679, "step": 11024 }, { "epoch": 0.8342476637281979, "grad_norm": 2.1455461978912354, "learning_rate": 1.3180715084324689e-06, "loss": 0.6545, "step": 11025 }, { "epoch": 0.8343233324505316, "grad_norm": 2.34451961517334, "learning_rate": 1.3168983103998115e-06, "loss": 0.6976, "step": 11026 }, { "epoch": 0.8343990011728653, "grad_norm": 2.02919602394104, "learning_rate": 1.3157255935078313e-06, "loss": 0.5394, "step": 11027 }, { "epoch": 0.8344746698951988, "grad_norm": 2.013793468475342, "learning_rate": 1.3145533578299699e-06, "loss": 0.8555, "step": 11028 }, { "epoch": 0.8345503386175325, "grad_norm": 2.240281581878662, "learning_rate": 1.3133816034396343e-06, "loss": 0.6576, "step": 11029 }, { "epoch": 0.834626007339866, "grad_norm": 1.8225212097167969, "learning_rate": 1.3122103304102057e-06, "loss": 0.7601, "step": 11030 }, { "epoch": 0.8347016760621997, "grad_norm": 2.2404656410217285, "learning_rate": 1.3110395388150296e-06, "loss": 0.6523, "step": 11031 }, { "epoch": 0.8347773447845334, "grad_norm": 1.9908554553985596, "learning_rate": 1.3098692287274252e-06, "loss": 0.5598, "step": 11032 }, { "epoch": 0.8348530135068669, "grad_norm": 2.0587451457977295, "learning_rate": 1.3086994002206843e-06, "loss": 0.5893, "step": 11033 }, { "epoch": 0.8349286822292006, "grad_norm": 2.665961503982544, "learning_rate": 1.3075300533680657e-06, "loss": 0.7025, "step": 11034 }, { "epoch": 0.8350043509515341, "grad_norm": 2.2750911712646484, "learning_rate": 1.3063611882427943e-06, "loss": 0.8163, "step": 11035 }, { "epoch": 0.8350800196738678, "grad_norm": 2.3275375366210938, "learning_rate": 1.3051928049180683e-06, "loss": 0.5284, "step": 11036 }, { "epoch": 0.8351556883962015, "grad_norm": 1.4327739477157593, "learning_rate": 1.304024903467057e-06, "loss": 0.7461, "step": 11037 }, { "epoch": 0.835231357118535, "grad_norm": 3.3276166915893555, "learning_rate": 1.3028574839628995e-06, "loss": 0.8109, "step": 11038 }, { "epoch": 0.8353070258408687, "grad_norm": 1.9185841083526611, "learning_rate": 1.3016905464787009e-06, "loss": 0.6655, "step": 11039 }, { "epoch": 0.8353826945632024, "grad_norm": 1.899623990058899, "learning_rate": 1.3005240910875395e-06, "loss": 0.6141, "step": 11040 }, { "epoch": 0.8354583632855359, "grad_norm": 2.365811586380005, "learning_rate": 1.2993581178624644e-06, "loss": 0.6686, "step": 11041 }, { "epoch": 0.8355340320078696, "grad_norm": 2.494126558303833, "learning_rate": 1.298192626876488e-06, "loss": 0.6618, "step": 11042 }, { "epoch": 0.8356097007302031, "grad_norm": 2.977726459503174, "learning_rate": 1.2970276182026006e-06, "loss": 0.6845, "step": 11043 }, { "epoch": 0.8356853694525368, "grad_norm": 2.094541549682617, "learning_rate": 1.2958630919137614e-06, "loss": 0.5355, "step": 11044 }, { "epoch": 0.8357610381748705, "grad_norm": 1.4946520328521729, "learning_rate": 1.2946990480828904e-06, "loss": 0.9194, "step": 11045 }, { "epoch": 0.835836706897204, "grad_norm": 2.1046907901763916, "learning_rate": 1.293535486782888e-06, "loss": 0.7676, "step": 11046 }, { "epoch": 0.8359123756195377, "grad_norm": 2.2517387866973877, "learning_rate": 1.2923724080866165e-06, "loss": 0.5016, "step": 11047 }, { "epoch": 0.8359880443418712, "grad_norm": 3.1959762573242188, "learning_rate": 1.2912098120669186e-06, "loss": 0.633, "step": 11048 }, { "epoch": 0.8360637130642049, "grad_norm": 2.66766095161438, "learning_rate": 1.2900476987965934e-06, "loss": 0.6543, "step": 11049 }, { "epoch": 0.8361393817865386, "grad_norm": 2.5261588096618652, "learning_rate": 1.2888860683484182e-06, "loss": 0.6519, "step": 11050 }, { "epoch": 0.8362150505088721, "grad_norm": 2.3242642879486084, "learning_rate": 1.2877249207951384e-06, "loss": 0.6523, "step": 11051 }, { "epoch": 0.8362907192312058, "grad_norm": 2.110452890396118, "learning_rate": 1.2865642562094692e-06, "loss": 0.658, "step": 11052 }, { "epoch": 0.8363663879535395, "grad_norm": 2.2709860801696777, "learning_rate": 1.285404074664094e-06, "loss": 0.796, "step": 11053 }, { "epoch": 0.836442056675873, "grad_norm": 1.7346996068954468, "learning_rate": 1.284244376231667e-06, "loss": 0.5757, "step": 11054 }, { "epoch": 0.8365177253982067, "grad_norm": 2.3549299240112305, "learning_rate": 1.283085160984816e-06, "loss": 0.8578, "step": 11055 }, { "epoch": 0.8365933941205402, "grad_norm": 2.3999102115631104, "learning_rate": 1.2819264289961293e-06, "loss": 0.5272, "step": 11056 }, { "epoch": 0.8366690628428739, "grad_norm": 2.2719616889953613, "learning_rate": 1.2807681803381701e-06, "loss": 0.7264, "step": 11057 }, { "epoch": 0.8367447315652076, "grad_norm": 2.6402430534362793, "learning_rate": 1.2796104150834793e-06, "loss": 0.6027, "step": 11058 }, { "epoch": 0.8368204002875411, "grad_norm": 2.883604049682617, "learning_rate": 1.2784531333045529e-06, "loss": 0.8013, "step": 11059 }, { "epoch": 0.8368960690098748, "grad_norm": 1.8437893390655518, "learning_rate": 1.277296335073866e-06, "loss": 0.7198, "step": 11060 }, { "epoch": 0.8369717377322083, "grad_norm": 2.287963628768921, "learning_rate": 1.2761400204638605e-06, "loss": 0.7142, "step": 11061 }, { "epoch": 0.837047406454542, "grad_norm": 2.6593968868255615, "learning_rate": 1.2749841895469497e-06, "loss": 0.5975, "step": 11062 }, { "epoch": 0.8371230751768757, "grad_norm": 2.181272268295288, "learning_rate": 1.2738288423955146e-06, "loss": 0.6539, "step": 11063 }, { "epoch": 0.8371987438992092, "grad_norm": 2.1479294300079346, "learning_rate": 1.2726739790819062e-06, "loss": 0.6179, "step": 11064 }, { "epoch": 0.8372744126215429, "grad_norm": 2.6610589027404785, "learning_rate": 1.2715195996784468e-06, "loss": 0.696, "step": 11065 }, { "epoch": 0.8373500813438766, "grad_norm": 2.6823954582214355, "learning_rate": 1.2703657042574284e-06, "loss": 0.6255, "step": 11066 }, { "epoch": 0.8374257500662101, "grad_norm": 2.4057936668395996, "learning_rate": 1.2692122928911085e-06, "loss": 0.5914, "step": 11067 }, { "epoch": 0.8375014187885438, "grad_norm": 1.9947429895401, "learning_rate": 1.268059365651718e-06, "loss": 0.6166, "step": 11068 }, { "epoch": 0.8375770875108773, "grad_norm": 2.460395574569702, "learning_rate": 1.2669069226114614e-06, "loss": 0.6943, "step": 11069 }, { "epoch": 0.837652756233211, "grad_norm": 2.2644882202148438, "learning_rate": 1.2657549638425028e-06, "loss": 0.6663, "step": 11070 }, { "epoch": 0.8377284249555447, "grad_norm": 2.3289754390716553, "learning_rate": 1.2646034894169848e-06, "loss": 0.6335, "step": 11071 }, { "epoch": 0.8378040936778782, "grad_norm": 2.1823723316192627, "learning_rate": 1.2634524994070152e-06, "loss": 0.7363, "step": 11072 }, { "epoch": 0.8378797624002119, "grad_norm": 2.479443311691284, "learning_rate": 1.2623019938846735e-06, "loss": 0.705, "step": 11073 }, { "epoch": 0.8379554311225454, "grad_norm": 2.303396701812744, "learning_rate": 1.2611519729220074e-06, "loss": 0.8203, "step": 11074 }, { "epoch": 0.8380310998448791, "grad_norm": 2.0928168296813965, "learning_rate": 1.2600024365910352e-06, "loss": 0.6169, "step": 11075 }, { "epoch": 0.8381067685672128, "grad_norm": 2.577695608139038, "learning_rate": 1.258853384963745e-06, "loss": 0.6813, "step": 11076 }, { "epoch": 0.8381824372895463, "grad_norm": 1.9073697328567505, "learning_rate": 1.2577048181120954e-06, "loss": 0.7995, "step": 11077 }, { "epoch": 0.83825810601188, "grad_norm": 2.2130842208862305, "learning_rate": 1.256556736108007e-06, "loss": 0.6344, "step": 11078 }, { "epoch": 0.8383337747342137, "grad_norm": 2.398723840713501, "learning_rate": 1.2554091390233841e-06, "loss": 0.7178, "step": 11079 }, { "epoch": 0.8384094434565472, "grad_norm": 2.5538170337677, "learning_rate": 1.2542620269300912e-06, "loss": 0.7334, "step": 11080 }, { "epoch": 0.8384851121788809, "grad_norm": 2.482813596725464, "learning_rate": 1.253115399899962e-06, "loss": 0.6849, "step": 11081 }, { "epoch": 0.8385607809012144, "grad_norm": 2.486558198928833, "learning_rate": 1.2519692580048022e-06, "loss": 0.6309, "step": 11082 }, { "epoch": 0.8386364496235481, "grad_norm": 2.245243549346924, "learning_rate": 1.250823601316388e-06, "loss": 0.7787, "step": 11083 }, { "epoch": 0.8387121183458818, "grad_norm": 2.0628883838653564, "learning_rate": 1.2496784299064634e-06, "loss": 0.6715, "step": 11084 }, { "epoch": 0.8387877870682153, "grad_norm": 1.9107873439788818, "learning_rate": 1.2485337438467425e-06, "loss": 0.5899, "step": 11085 }, { "epoch": 0.838863455790549, "grad_norm": 2.893704891204834, "learning_rate": 1.2473895432089116e-06, "loss": 0.7503, "step": 11086 }, { "epoch": 0.8389391245128826, "grad_norm": 2.1818957328796387, "learning_rate": 1.246245828064623e-06, "loss": 0.6855, "step": 11087 }, { "epoch": 0.8390147932352162, "grad_norm": 2.1986918449401855, "learning_rate": 1.2451025984854952e-06, "loss": 0.7521, "step": 11088 }, { "epoch": 0.8390904619575499, "grad_norm": 2.2782137393951416, "learning_rate": 1.2439598545431285e-06, "loss": 0.6727, "step": 11089 }, { "epoch": 0.8391661306798834, "grad_norm": 2.482480764389038, "learning_rate": 1.2428175963090803e-06, "loss": 0.7597, "step": 11090 }, { "epoch": 0.8392417994022171, "grad_norm": 2.7252655029296875, "learning_rate": 1.2416758238548872e-06, "loss": 0.5514, "step": 11091 }, { "epoch": 0.8393174681245508, "grad_norm": 2.0488505363464355, "learning_rate": 1.2405345372520447e-06, "loss": 0.589, "step": 11092 }, { "epoch": 0.8393931368468843, "grad_norm": 2.3147764205932617, "learning_rate": 1.2393937365720247e-06, "loss": 0.6551, "step": 11093 }, { "epoch": 0.839468805569218, "grad_norm": 1.9987397193908691, "learning_rate": 1.2382534218862738e-06, "loss": 0.8516, "step": 11094 }, { "epoch": 0.8395444742915515, "grad_norm": 1.991015911102295, "learning_rate": 1.2371135932661967e-06, "loss": 0.5578, "step": 11095 }, { "epoch": 0.8396201430138852, "grad_norm": 2.4074630737304688, "learning_rate": 1.235974250783174e-06, "loss": 0.8004, "step": 11096 }, { "epoch": 0.8396958117362189, "grad_norm": 1.9555567502975464, "learning_rate": 1.234835394508556e-06, "loss": 0.7125, "step": 11097 }, { "epoch": 0.8397714804585524, "grad_norm": 2.148538827896118, "learning_rate": 1.2336970245136604e-06, "loss": 0.6414, "step": 11098 }, { "epoch": 0.8398471491808861, "grad_norm": 2.0188326835632324, "learning_rate": 1.2325591408697773e-06, "loss": 0.6591, "step": 11099 }, { "epoch": 0.8399228179032197, "grad_norm": 2.890838146209717, "learning_rate": 1.2314217436481636e-06, "loss": 0.6491, "step": 11100 }, { "epoch": 0.8399984866255533, "grad_norm": 1.9198577404022217, "learning_rate": 1.2302848329200484e-06, "loss": 0.5948, "step": 11101 }, { "epoch": 0.840074155347887, "grad_norm": 12.436066627502441, "learning_rate": 1.2291484087566258e-06, "loss": 0.7092, "step": 11102 }, { "epoch": 0.8401498240702205, "grad_norm": 2.7914984226226807, "learning_rate": 1.2280124712290618e-06, "loss": 0.6803, "step": 11103 }, { "epoch": 0.8402254927925542, "grad_norm": 2.818707227706909, "learning_rate": 1.2268770204084955e-06, "loss": 0.7888, "step": 11104 }, { "epoch": 0.8403011615148879, "grad_norm": 2.656458854675293, "learning_rate": 1.225742056366035e-06, "loss": 0.6282, "step": 11105 }, { "epoch": 0.8403768302372214, "grad_norm": 2.1726672649383545, "learning_rate": 1.2246075791727494e-06, "loss": 0.6697, "step": 11106 }, { "epoch": 0.8404524989595551, "grad_norm": 2.160932779312134, "learning_rate": 1.223473588899685e-06, "loss": 0.7079, "step": 11107 }, { "epoch": 0.8405281676818886, "grad_norm": 1.9137705564498901, "learning_rate": 1.222340085617858e-06, "loss": 0.6773, "step": 11108 }, { "epoch": 0.8406038364042223, "grad_norm": 2.2129149436950684, "learning_rate": 1.2212070693982505e-06, "loss": 0.7601, "step": 11109 }, { "epoch": 0.840679505126556, "grad_norm": 1.9030208587646484, "learning_rate": 1.2200745403118159e-06, "loss": 0.7088, "step": 11110 }, { "epoch": 0.8407551738488895, "grad_norm": 4.24271821975708, "learning_rate": 1.2189424984294774e-06, "loss": 0.6975, "step": 11111 }, { "epoch": 0.8408308425712232, "grad_norm": 2.1732661724090576, "learning_rate": 1.217810943822128e-06, "loss": 0.7062, "step": 11112 }, { "epoch": 0.8409065112935568, "grad_norm": 2.135040521621704, "learning_rate": 1.2166798765606255e-06, "loss": 0.5736, "step": 11113 }, { "epoch": 0.8409821800158904, "grad_norm": 1.6705262660980225, "learning_rate": 1.2155492967158019e-06, "loss": 0.5572, "step": 11114 }, { "epoch": 0.8410578487382241, "grad_norm": 2.2693963050842285, "learning_rate": 1.2144192043584637e-06, "loss": 0.6938, "step": 11115 }, { "epoch": 0.8411335174605576, "grad_norm": 2.54014253616333, "learning_rate": 1.2132895995593742e-06, "loss": 0.6641, "step": 11116 }, { "epoch": 0.8412091861828913, "grad_norm": 1.9177050590515137, "learning_rate": 1.212160482389275e-06, "loss": 0.6905, "step": 11117 }, { "epoch": 0.841284854905225, "grad_norm": 2.4678313732147217, "learning_rate": 1.2110318529188764e-06, "loss": 0.8327, "step": 11118 }, { "epoch": 0.8413605236275585, "grad_norm": 1.8306165933609009, "learning_rate": 1.209903711218855e-06, "loss": 0.6428, "step": 11119 }, { "epoch": 0.8414361923498922, "grad_norm": 10.050545692443848, "learning_rate": 1.208776057359859e-06, "loss": 0.5172, "step": 11120 }, { "epoch": 0.8415118610722258, "grad_norm": 2.323607921600342, "learning_rate": 1.207648891412507e-06, "loss": 0.7041, "step": 11121 }, { "epoch": 0.8415875297945594, "grad_norm": 2.1820437908172607, "learning_rate": 1.206522213447384e-06, "loss": 0.727, "step": 11122 }, { "epoch": 0.8416631985168931, "grad_norm": 2.826960563659668, "learning_rate": 1.2053960235350498e-06, "loss": 0.6435, "step": 11123 }, { "epoch": 0.8417388672392266, "grad_norm": 2.4186246395111084, "learning_rate": 1.2042703217460235e-06, "loss": 0.6031, "step": 11124 }, { "epoch": 0.8418145359615603, "grad_norm": 1.740941047668457, "learning_rate": 1.2031451081508057e-06, "loss": 0.619, "step": 11125 }, { "epoch": 0.8418902046838939, "grad_norm": 2.0095555782318115, "learning_rate": 1.2020203828198617e-06, "loss": 0.7084, "step": 11126 }, { "epoch": 0.8419658734062275, "grad_norm": 2.2118263244628906, "learning_rate": 1.2008961458236206e-06, "loss": 0.6634, "step": 11127 }, { "epoch": 0.8420415421285612, "grad_norm": 2.157186269760132, "learning_rate": 1.1997723972324888e-06, "loss": 0.62, "step": 11128 }, { "epoch": 0.8421172108508947, "grad_norm": 2.188436985015869, "learning_rate": 1.198649137116838e-06, "loss": 0.6341, "step": 11129 }, { "epoch": 0.8421928795732284, "grad_norm": 2.3916919231414795, "learning_rate": 1.197526365547011e-06, "loss": 0.6438, "step": 11130 }, { "epoch": 0.8422685482955621, "grad_norm": 2.1905405521392822, "learning_rate": 1.1964040825933196e-06, "loss": 0.7768, "step": 11131 }, { "epoch": 0.8423442170178956, "grad_norm": 2.423164129257202, "learning_rate": 1.1952822883260445e-06, "loss": 0.6209, "step": 11132 }, { "epoch": 0.8424198857402293, "grad_norm": 2.417698621749878, "learning_rate": 1.1941609828154374e-06, "loss": 0.7285, "step": 11133 }, { "epoch": 0.8424955544625629, "grad_norm": 2.6083669662475586, "learning_rate": 1.1930401661317124e-06, "loss": 0.6182, "step": 11134 }, { "epoch": 0.8425712231848965, "grad_norm": 2.146847724914551, "learning_rate": 1.1919198383450663e-06, "loss": 0.6415, "step": 11135 }, { "epoch": 0.8426468919072302, "grad_norm": 2.49105167388916, "learning_rate": 1.190799999525653e-06, "loss": 0.6844, "step": 11136 }, { "epoch": 0.8427225606295637, "grad_norm": 2.345392942428589, "learning_rate": 1.189680649743604e-06, "loss": 0.56, "step": 11137 }, { "epoch": 0.8427982293518974, "grad_norm": 2.192192316055298, "learning_rate": 1.1885617890690128e-06, "loss": 0.6248, "step": 11138 }, { "epoch": 0.842873898074231, "grad_norm": 3.144986867904663, "learning_rate": 1.1874434175719458e-06, "loss": 0.6772, "step": 11139 }, { "epoch": 0.8429495667965646, "grad_norm": 1.7006821632385254, "learning_rate": 1.1863255353224444e-06, "loss": 0.5736, "step": 11140 }, { "epoch": 0.8430252355188983, "grad_norm": 3.1362383365631104, "learning_rate": 1.1852081423905087e-06, "loss": 0.6766, "step": 11141 }, { "epoch": 0.8431009042412319, "grad_norm": 2.2844927310943604, "learning_rate": 1.1840912388461152e-06, "loss": 0.6861, "step": 11142 }, { "epoch": 0.8431765729635655, "grad_norm": 2.2476208209991455, "learning_rate": 1.1829748247592082e-06, "loss": 0.7521, "step": 11143 }, { "epoch": 0.8432522416858992, "grad_norm": 2.138150215148926, "learning_rate": 1.181858900199702e-06, "loss": 0.6996, "step": 11144 }, { "epoch": 0.8433279104082327, "grad_norm": 2.889930009841919, "learning_rate": 1.1807434652374754e-06, "loss": 0.6901, "step": 11145 }, { "epoch": 0.8434035791305664, "grad_norm": 1.7074170112609863, "learning_rate": 1.1796285199423857e-06, "loss": 0.6359, "step": 11146 }, { "epoch": 0.8434792478529, "grad_norm": 2.217623710632324, "learning_rate": 1.178514064384254e-06, "loss": 0.6327, "step": 11147 }, { "epoch": 0.8435549165752336, "grad_norm": 1.8675048351287842, "learning_rate": 1.1774000986328665e-06, "loss": 0.5452, "step": 11148 }, { "epoch": 0.8436305852975673, "grad_norm": 1.739334225654602, "learning_rate": 1.1762866227579872e-06, "loss": 0.6498, "step": 11149 }, { "epoch": 0.8437062540199008, "grad_norm": 1.5697089433670044, "learning_rate": 1.1751736368293417e-06, "loss": 0.5786, "step": 11150 }, { "epoch": 0.8437819227422345, "grad_norm": 2.253666639328003, "learning_rate": 1.1740611409166368e-06, "loss": 0.7211, "step": 11151 }, { "epoch": 0.8438575914645681, "grad_norm": 3.369987964630127, "learning_rate": 1.172949135089532e-06, "loss": 0.595, "step": 11152 }, { "epoch": 0.8439332601869017, "grad_norm": 2.647265672683716, "learning_rate": 1.171837619417669e-06, "loss": 0.6779, "step": 11153 }, { "epoch": 0.8440089289092354, "grad_norm": 2.462435483932495, "learning_rate": 1.1707265939706543e-06, "loss": 0.6241, "step": 11154 }, { "epoch": 0.844084597631569, "grad_norm": 2.3992083072662354, "learning_rate": 1.1696160588180617e-06, "loss": 0.7099, "step": 11155 }, { "epoch": 0.8441602663539026, "grad_norm": 2.0883431434631348, "learning_rate": 1.1685060140294388e-06, "loss": 0.7162, "step": 11156 }, { "epoch": 0.8442359350762363, "grad_norm": 2.320479393005371, "learning_rate": 1.1673964596742994e-06, "loss": 0.6615, "step": 11157 }, { "epoch": 0.8443116037985698, "grad_norm": 2.3560616970062256, "learning_rate": 1.1662873958221294e-06, "loss": 0.6776, "step": 11158 }, { "epoch": 0.8443872725209035, "grad_norm": 2.070371389389038, "learning_rate": 1.165178822542378e-06, "loss": 0.5541, "step": 11159 }, { "epoch": 0.8444629412432371, "grad_norm": 7.825291633605957, "learning_rate": 1.164070739904468e-06, "loss": 0.8369, "step": 11160 }, { "epoch": 0.8445386099655707, "grad_norm": 2.257880687713623, "learning_rate": 1.1629631479777953e-06, "loss": 0.7629, "step": 11161 }, { "epoch": 0.8446142786879044, "grad_norm": 2.6338634490966797, "learning_rate": 1.161856046831718e-06, "loss": 0.7076, "step": 11162 }, { "epoch": 0.844689947410238, "grad_norm": 2.4323246479034424, "learning_rate": 1.1607494365355664e-06, "loss": 0.7328, "step": 11163 }, { "epoch": 0.8447656161325716, "grad_norm": 2.372086524963379, "learning_rate": 1.1596433171586389e-06, "loss": 0.7085, "step": 11164 }, { "epoch": 0.8448412848549052, "grad_norm": 2.4352121353149414, "learning_rate": 1.1585376887702074e-06, "loss": 0.7244, "step": 11165 }, { "epoch": 0.8449169535772388, "grad_norm": 1.8406596183776855, "learning_rate": 1.1574325514395073e-06, "loss": 0.6614, "step": 11166 }, { "epoch": 0.8449926222995725, "grad_norm": 2.4071455001831055, "learning_rate": 1.1563279052357464e-06, "loss": 0.8433, "step": 11167 }, { "epoch": 0.8450682910219061, "grad_norm": 1.97054123878479, "learning_rate": 1.1552237502281023e-06, "loss": 0.624, "step": 11168 }, { "epoch": 0.8451439597442397, "grad_norm": 2.932701826095581, "learning_rate": 1.1541200864857225e-06, "loss": 0.6386, "step": 11169 }, { "epoch": 0.8452196284665734, "grad_norm": 2.2148234844207764, "learning_rate": 1.153016914077714e-06, "loss": 0.6634, "step": 11170 }, { "epoch": 0.845295297188907, "grad_norm": 2.325047254562378, "learning_rate": 1.1519142330731705e-06, "loss": 0.6842, "step": 11171 }, { "epoch": 0.8453709659112406, "grad_norm": 2.387718677520752, "learning_rate": 1.1508120435411416e-06, "loss": 0.6088, "step": 11172 }, { "epoch": 0.8454466346335742, "grad_norm": 1.7872700691223145, "learning_rate": 1.149710345550649e-06, "loss": 0.6071, "step": 11173 }, { "epoch": 0.8455223033559078, "grad_norm": 1.974530577659607, "learning_rate": 1.148609139170685e-06, "loss": 0.7307, "step": 11174 }, { "epoch": 0.8455979720782415, "grad_norm": 2.9320738315582275, "learning_rate": 1.147508424470212e-06, "loss": 0.6461, "step": 11175 }, { "epoch": 0.845673640800575, "grad_norm": 2.1946792602539062, "learning_rate": 1.146408201518159e-06, "loss": 0.8129, "step": 11176 }, { "epoch": 0.8457493095229087, "grad_norm": 2.6159780025482178, "learning_rate": 1.1453084703834259e-06, "loss": 0.7393, "step": 11177 }, { "epoch": 0.8458249782452423, "grad_norm": 2.3954858779907227, "learning_rate": 1.1442092311348814e-06, "loss": 0.6058, "step": 11178 }, { "epoch": 0.8459006469675759, "grad_norm": 2.413275957107544, "learning_rate": 1.1431104838413637e-06, "loss": 0.6777, "step": 11179 }, { "epoch": 0.8459763156899096, "grad_norm": 1.921155333518982, "learning_rate": 1.1420122285716798e-06, "loss": 0.5244, "step": 11180 }, { "epoch": 0.8460519844122432, "grad_norm": 2.647214651107788, "learning_rate": 1.1409144653946064e-06, "loss": 0.6092, "step": 11181 }, { "epoch": 0.8461276531345768, "grad_norm": 3.575390338897705, "learning_rate": 1.1398171943788878e-06, "loss": 0.6721, "step": 11182 }, { "epoch": 0.8462033218569105, "grad_norm": 2.585902452468872, "learning_rate": 1.1387204155932418e-06, "loss": 0.6889, "step": 11183 }, { "epoch": 0.846278990579244, "grad_norm": 2.851043462753296, "learning_rate": 1.1376241291063476e-06, "loss": 0.6995, "step": 11184 }, { "epoch": 0.8463546593015777, "grad_norm": 1.9983781576156616, "learning_rate": 1.1365283349868602e-06, "loss": 0.6446, "step": 11185 }, { "epoch": 0.8464303280239113, "grad_norm": 2.99881911277771, "learning_rate": 1.1354330333034028e-06, "loss": 0.7288, "step": 11186 }, { "epoch": 0.8465059967462449, "grad_norm": 2.3670239448547363, "learning_rate": 1.1343382241245656e-06, "loss": 0.7242, "step": 11187 }, { "epoch": 0.8465816654685786, "grad_norm": 2.0711822509765625, "learning_rate": 1.1332439075189095e-06, "loss": 0.6653, "step": 11188 }, { "epoch": 0.8466573341909122, "grad_norm": 1.839362382888794, "learning_rate": 1.132150083554964e-06, "loss": 0.6028, "step": 11189 }, { "epoch": 0.8467330029132458, "grad_norm": 2.130958080291748, "learning_rate": 1.1310567523012298e-06, "loss": 0.7051, "step": 11190 }, { "epoch": 0.8468086716355794, "grad_norm": 2.0340983867645264, "learning_rate": 1.1299639138261687e-06, "loss": 0.6712, "step": 11191 }, { "epoch": 0.846884340357913, "grad_norm": 2.854250192642212, "learning_rate": 1.1288715681982247e-06, "loss": 0.6333, "step": 11192 }, { "epoch": 0.8469600090802467, "grad_norm": 2.552464485168457, "learning_rate": 1.127779715485802e-06, "loss": 0.6445, "step": 11193 }, { "epoch": 0.8470356778025803, "grad_norm": 2.156697988510132, "learning_rate": 1.1266883557572762e-06, "loss": 0.6876, "step": 11194 }, { "epoch": 0.8471113465249139, "grad_norm": 1.9202338457107544, "learning_rate": 1.1255974890809892e-06, "loss": 0.8281, "step": 11195 }, { "epoch": 0.8471870152472476, "grad_norm": 2.571288585662842, "learning_rate": 1.1245071155252547e-06, "loss": 0.7569, "step": 11196 }, { "epoch": 0.8472626839695812, "grad_norm": 2.219005823135376, "learning_rate": 1.1234172351583611e-06, "loss": 0.5319, "step": 11197 }, { "epoch": 0.8473383526919148, "grad_norm": 1.8485450744628906, "learning_rate": 1.1223278480485535e-06, "loss": 0.5917, "step": 11198 }, { "epoch": 0.8474140214142484, "grad_norm": 2.1197338104248047, "learning_rate": 1.1212389542640566e-06, "loss": 0.6706, "step": 11199 }, { "epoch": 0.847489690136582, "grad_norm": 2.1624670028686523, "learning_rate": 1.1201505538730586e-06, "loss": 0.6419, "step": 11200 }, { "epoch": 0.8475653588589157, "grad_norm": 6.582333087921143, "learning_rate": 1.1190626469437192e-06, "loss": 0.6285, "step": 11201 }, { "epoch": 0.8476410275812493, "grad_norm": 2.2934112548828125, "learning_rate": 1.117975233544168e-06, "loss": 0.6322, "step": 11202 }, { "epoch": 0.8477166963035829, "grad_norm": 2.3044066429138184, "learning_rate": 1.1168883137425003e-06, "loss": 0.6555, "step": 11203 }, { "epoch": 0.8477923650259165, "grad_norm": 2.2896509170532227, "learning_rate": 1.1158018876067855e-06, "loss": 0.6963, "step": 11204 }, { "epoch": 0.8478680337482501, "grad_norm": 3.5141971111297607, "learning_rate": 1.1147159552050557e-06, "loss": 0.6393, "step": 11205 }, { "epoch": 0.8479437024705838, "grad_norm": 2.2190842628479004, "learning_rate": 1.113630516605315e-06, "loss": 0.666, "step": 11206 }, { "epoch": 0.8480193711929174, "grad_norm": 1.9215750694274902, "learning_rate": 1.1125455718755402e-06, "loss": 0.4681, "step": 11207 }, { "epoch": 0.848095039915251, "grad_norm": 1.961254596710205, "learning_rate": 1.1114611210836752e-06, "loss": 0.6418, "step": 11208 }, { "epoch": 0.8481707086375847, "grad_norm": 1.8327749967575073, "learning_rate": 1.1103771642976272e-06, "loss": 0.7484, "step": 11209 }, { "epoch": 0.8482463773599183, "grad_norm": 2.4051928520202637, "learning_rate": 1.1092937015852793e-06, "loss": 0.6549, "step": 11210 }, { "epoch": 0.8483220460822519, "grad_norm": 2.0945074558258057, "learning_rate": 1.108210733014482e-06, "loss": 0.6683, "step": 11211 }, { "epoch": 0.8483977148045855, "grad_norm": 2.168553113937378, "learning_rate": 1.1071282586530533e-06, "loss": 0.7284, "step": 11212 }, { "epoch": 0.8484733835269191, "grad_norm": 2.0133745670318604, "learning_rate": 1.1060462785687816e-06, "loss": 0.6258, "step": 11213 }, { "epoch": 0.8485490522492528, "grad_norm": 2.4811995029449463, "learning_rate": 1.104964792829424e-06, "loss": 0.6499, "step": 11214 }, { "epoch": 0.8486247209715864, "grad_norm": 2.246004343032837, "learning_rate": 1.1038838015027091e-06, "loss": 0.7389, "step": 11215 }, { "epoch": 0.84870038969392, "grad_norm": 3.04372501373291, "learning_rate": 1.1028033046563251e-06, "loss": 0.7067, "step": 11216 }, { "epoch": 0.8487760584162536, "grad_norm": 4.410548210144043, "learning_rate": 1.1017233023579434e-06, "loss": 0.7638, "step": 11217 }, { "epoch": 0.8488517271385873, "grad_norm": 2.3239493370056152, "learning_rate": 1.1006437946751964e-06, "loss": 0.6129, "step": 11218 }, { "epoch": 0.8489273958609209, "grad_norm": 1.7490601539611816, "learning_rate": 1.0995647816756827e-06, "loss": 0.6007, "step": 11219 }, { "epoch": 0.8490030645832545, "grad_norm": 4.648648738861084, "learning_rate": 1.0984862634269753e-06, "loss": 0.5775, "step": 11220 }, { "epoch": 0.8490787333055881, "grad_norm": 1.797368049621582, "learning_rate": 1.0974082399966151e-06, "loss": 0.6743, "step": 11221 }, { "epoch": 0.8491544020279218, "grad_norm": 2.3413918018341064, "learning_rate": 1.0963307114521103e-06, "loss": 0.9284, "step": 11222 }, { "epoch": 0.8492300707502554, "grad_norm": 2.0559816360473633, "learning_rate": 1.0952536778609407e-06, "loss": 0.6177, "step": 11223 }, { "epoch": 0.849305739472589, "grad_norm": 1.990634560585022, "learning_rate": 1.0941771392905526e-06, "loss": 0.5489, "step": 11224 }, { "epoch": 0.8493814081949226, "grad_norm": 1.8581657409667969, "learning_rate": 1.0931010958083619e-06, "loss": 0.6661, "step": 11225 }, { "epoch": 0.8494570769172562, "grad_norm": 3.590940475463867, "learning_rate": 1.0920255474817577e-06, "loss": 0.5803, "step": 11226 }, { "epoch": 0.8495327456395899, "grad_norm": 1.9839054346084595, "learning_rate": 1.0909504943780863e-06, "loss": 0.7265, "step": 11227 }, { "epoch": 0.8496084143619235, "grad_norm": 2.0688846111297607, "learning_rate": 1.0898759365646786e-06, "loss": 0.6612, "step": 11228 }, { "epoch": 0.8496840830842571, "grad_norm": 2.5211896896362305, "learning_rate": 1.0888018741088258e-06, "loss": 0.5971, "step": 11229 }, { "epoch": 0.8497597518065907, "grad_norm": 1.9265793561935425, "learning_rate": 1.0877283070777852e-06, "loss": 0.7328, "step": 11230 }, { "epoch": 0.8498354205289244, "grad_norm": 2.134287118911743, "learning_rate": 1.08665523553879e-06, "loss": 0.6763, "step": 11231 }, { "epoch": 0.849911089251258, "grad_norm": 2.190967082977295, "learning_rate": 1.0855826595590385e-06, "loss": 0.7203, "step": 11232 }, { "epoch": 0.8499867579735916, "grad_norm": 2.094014883041382, "learning_rate": 1.0845105792056989e-06, "loss": 0.7426, "step": 11233 }, { "epoch": 0.8500624266959252, "grad_norm": 2.0137734413146973, "learning_rate": 1.0834389945459096e-06, "loss": 0.6875, "step": 11234 }, { "epoch": 0.8501380954182589, "grad_norm": 2.131049156188965, "learning_rate": 1.0823679056467746e-06, "loss": 0.6814, "step": 11235 }, { "epoch": 0.8502137641405925, "grad_norm": 2.530545949935913, "learning_rate": 1.0812973125753708e-06, "loss": 0.5937, "step": 11236 }, { "epoch": 0.8502894328629261, "grad_norm": 2.1850709915161133, "learning_rate": 1.080227215398741e-06, "loss": 0.7354, "step": 11237 }, { "epoch": 0.8503651015852597, "grad_norm": 3.4205210208892822, "learning_rate": 1.0791576141838997e-06, "loss": 0.6784, "step": 11238 }, { "epoch": 0.8504407703075934, "grad_norm": 1.7098037004470825, "learning_rate": 1.0780885089978268e-06, "loss": 0.6278, "step": 11239 }, { "epoch": 0.850516439029927, "grad_norm": 2.0882728099823, "learning_rate": 1.0770198999074763e-06, "loss": 0.5592, "step": 11240 }, { "epoch": 0.8505921077522606, "grad_norm": 1.8970396518707275, "learning_rate": 1.0759517869797636e-06, "loss": 0.577, "step": 11241 }, { "epoch": 0.8506677764745942, "grad_norm": 2.3336918354034424, "learning_rate": 1.0748841702815775e-06, "loss": 0.6625, "step": 11242 }, { "epoch": 0.8507434451969278, "grad_norm": 2.1653034687042236, "learning_rate": 1.0738170498797813e-06, "loss": 0.7601, "step": 11243 }, { "epoch": 0.8508191139192615, "grad_norm": 2.735419988632202, "learning_rate": 1.0727504258411958e-06, "loss": 0.7221, "step": 11244 }, { "epoch": 0.8508947826415951, "grad_norm": 2.724544048309326, "learning_rate": 1.0716842982326182e-06, "loss": 0.6285, "step": 11245 }, { "epoch": 0.8509704513639287, "grad_norm": 1.9268399477005005, "learning_rate": 1.0706186671208144e-06, "loss": 0.6552, "step": 11246 }, { "epoch": 0.8510461200862623, "grad_norm": 3.0010483264923096, "learning_rate": 1.069553532572515e-06, "loss": 0.6105, "step": 11247 }, { "epoch": 0.851121788808596, "grad_norm": 2.1853091716766357, "learning_rate": 1.0684888946544244e-06, "loss": 0.6504, "step": 11248 }, { "epoch": 0.8511974575309296, "grad_norm": 2.737804412841797, "learning_rate": 1.0674247534332125e-06, "loss": 0.6128, "step": 11249 }, { "epoch": 0.8512731262532632, "grad_norm": 2.3587794303894043, "learning_rate": 1.066361108975522e-06, "loss": 0.6185, "step": 11250 }, { "epoch": 0.8513487949755968, "grad_norm": 1.3188940286636353, "learning_rate": 1.0652979613479574e-06, "loss": 0.7213, "step": 11251 }, { "epoch": 0.8514244636979305, "grad_norm": 2.1479740142822266, "learning_rate": 1.0642353106170956e-06, "loss": 0.4939, "step": 11252 }, { "epoch": 0.8515001324202641, "grad_norm": 2.823068141937256, "learning_rate": 1.0631731568494884e-06, "loss": 0.7325, "step": 11253 }, { "epoch": 0.8515758011425977, "grad_norm": 1.8597098588943481, "learning_rate": 1.0621115001116516e-06, "loss": 0.5473, "step": 11254 }, { "epoch": 0.8516514698649313, "grad_norm": 2.1567814350128174, "learning_rate": 1.0610503404700639e-06, "loss": 0.7219, "step": 11255 }, { "epoch": 0.851727138587265, "grad_norm": 2.5054931640625, "learning_rate": 1.0599896779911822e-06, "loss": 0.7023, "step": 11256 }, { "epoch": 0.8518028073095986, "grad_norm": 2.3276219367980957, "learning_rate": 1.0589295127414283e-06, "loss": 0.661, "step": 11257 }, { "epoch": 0.8518784760319322, "grad_norm": 2.0778353214263916, "learning_rate": 1.0578698447871923e-06, "loss": 0.7084, "step": 11258 }, { "epoch": 0.8519541447542658, "grad_norm": 2.253126621246338, "learning_rate": 1.056810674194835e-06, "loss": 0.6965, "step": 11259 }, { "epoch": 0.8520298134765995, "grad_norm": 2.866034984588623, "learning_rate": 1.0557520010306842e-06, "loss": 0.7603, "step": 11260 }, { "epoch": 0.8521054821989331, "grad_norm": 2.482037305831909, "learning_rate": 1.0546938253610389e-06, "loss": 0.5681, "step": 11261 }, { "epoch": 0.8521811509212667, "grad_norm": 1.911925196647644, "learning_rate": 1.0536361472521644e-06, "loss": 0.7476, "step": 11262 }, { "epoch": 0.8522568196436003, "grad_norm": 2.388798236846924, "learning_rate": 1.0525789667702935e-06, "loss": 0.6213, "step": 11263 }, { "epoch": 0.8523324883659339, "grad_norm": 2.2827963829040527, "learning_rate": 1.051522283981636e-06, "loss": 0.7701, "step": 11264 }, { "epoch": 0.8524081570882676, "grad_norm": 2.937349796295166, "learning_rate": 1.0504660989523604e-06, "loss": 0.7427, "step": 11265 }, { "epoch": 0.8524838258106012, "grad_norm": 1.9935485124588013, "learning_rate": 1.0494104117486086e-06, "loss": 0.6395, "step": 11266 }, { "epoch": 0.8525594945329348, "grad_norm": 1.938444972038269, "learning_rate": 1.0483552224364936e-06, "loss": 0.639, "step": 11267 }, { "epoch": 0.8526351632552684, "grad_norm": 2.6357712745666504, "learning_rate": 1.047300531082092e-06, "loss": 0.7034, "step": 11268 }, { "epoch": 0.8527108319776021, "grad_norm": 1.8678675889968872, "learning_rate": 1.0462463377514543e-06, "loss": 0.7342, "step": 11269 }, { "epoch": 0.8527865006999357, "grad_norm": 2.1463828086853027, "learning_rate": 1.045192642510596e-06, "loss": 0.6495, "step": 11270 }, { "epoch": 0.8528621694222693, "grad_norm": 1.9937474727630615, "learning_rate": 1.0441394454255035e-06, "loss": 0.6389, "step": 11271 }, { "epoch": 0.8529378381446029, "grad_norm": 2.2576024532318115, "learning_rate": 1.0430867465621328e-06, "loss": 0.676, "step": 11272 }, { "epoch": 0.8530135068669366, "grad_norm": 2.1842191219329834, "learning_rate": 1.0420345459864023e-06, "loss": 0.5946, "step": 11273 }, { "epoch": 0.8530891755892702, "grad_norm": 1.927615761756897, "learning_rate": 1.0409828437642092e-06, "loss": 0.6436, "step": 11274 }, { "epoch": 0.8531648443116038, "grad_norm": 3.0947635173797607, "learning_rate": 1.039931639961416e-06, "loss": 0.5957, "step": 11275 }, { "epoch": 0.8532405130339374, "grad_norm": 2.468949556350708, "learning_rate": 1.0388809346438467e-06, "loss": 0.618, "step": 11276 }, { "epoch": 0.853316181756271, "grad_norm": 3.7624311447143555, "learning_rate": 1.037830727877303e-06, "loss": 0.6508, "step": 11277 }, { "epoch": 0.8533918504786047, "grad_norm": 1.588759183883667, "learning_rate": 1.036781019727552e-06, "loss": 0.6209, "step": 11278 }, { "epoch": 0.8534675192009383, "grad_norm": 2.215994119644165, "learning_rate": 1.0357318102603293e-06, "loss": 0.6864, "step": 11279 }, { "epoch": 0.8535431879232719, "grad_norm": 2.4442219734191895, "learning_rate": 1.0346830995413405e-06, "loss": 0.5362, "step": 11280 }, { "epoch": 0.8536188566456056, "grad_norm": 1.9089670181274414, "learning_rate": 1.0336348876362587e-06, "loss": 0.6868, "step": 11281 }, { "epoch": 0.8536945253679392, "grad_norm": 2.9302616119384766, "learning_rate": 1.0325871746107266e-06, "loss": 0.8656, "step": 11282 }, { "epoch": 0.8537701940902728, "grad_norm": 2.2180144786834717, "learning_rate": 1.0315399605303558e-06, "loss": 0.5997, "step": 11283 }, { "epoch": 0.8538458628126064, "grad_norm": 2.2152187824249268, "learning_rate": 1.0304932454607254e-06, "loss": 0.675, "step": 11284 }, { "epoch": 0.85392153153494, "grad_norm": 1.842807412147522, "learning_rate": 1.0294470294673846e-06, "loss": 0.6631, "step": 11285 }, { "epoch": 0.8539972002572737, "grad_norm": 2.476421356201172, "learning_rate": 1.0284013126158527e-06, "loss": 0.5843, "step": 11286 }, { "epoch": 0.8540728689796073, "grad_norm": 2.07633113861084, "learning_rate": 1.0273560949716123e-06, "loss": 0.6895, "step": 11287 }, { "epoch": 0.8541485377019409, "grad_norm": 2.113013744354248, "learning_rate": 1.026311376600117e-06, "loss": 0.7096, "step": 11288 }, { "epoch": 0.8542242064242745, "grad_norm": 1.9778804779052734, "learning_rate": 1.0252671575667984e-06, "loss": 0.5172, "step": 11289 }, { "epoch": 0.8542998751466081, "grad_norm": 2.0087730884552, "learning_rate": 1.0242234379370402e-06, "loss": 0.5877, "step": 11290 }, { "epoch": 0.8543755438689418, "grad_norm": 2.063204288482666, "learning_rate": 1.0231802177762084e-06, "loss": 0.6326, "step": 11291 }, { "epoch": 0.8544512125912754, "grad_norm": 2.7657787799835205, "learning_rate": 1.0221374971496316e-06, "loss": 0.7622, "step": 11292 }, { "epoch": 0.854526881313609, "grad_norm": 1.7157849073410034, "learning_rate": 1.0210952761226075e-06, "loss": 0.7045, "step": 11293 }, { "epoch": 0.8546025500359427, "grad_norm": 2.532585620880127, "learning_rate": 1.020053554760405e-06, "loss": 0.5766, "step": 11294 }, { "epoch": 0.8546782187582763, "grad_norm": 2.225200891494751, "learning_rate": 1.0190123331282586e-06, "loss": 0.6826, "step": 11295 }, { "epoch": 0.8547538874806099, "grad_norm": 2.5582962036132812, "learning_rate": 1.0179716112913737e-06, "loss": 0.6516, "step": 11296 }, { "epoch": 0.8548295562029435, "grad_norm": 1.9359956979751587, "learning_rate": 1.0169313893149256e-06, "loss": 0.6565, "step": 11297 }, { "epoch": 0.8549052249252771, "grad_norm": 1.8306528329849243, "learning_rate": 1.0158916672640524e-06, "loss": 0.7271, "step": 11298 }, { "epoch": 0.8549808936476108, "grad_norm": 2.077057123184204, "learning_rate": 1.0148524452038643e-06, "loss": 0.6396, "step": 11299 }, { "epoch": 0.8550565623699444, "grad_norm": 2.082470655441284, "learning_rate": 1.0138137231994477e-06, "loss": 0.7199, "step": 11300 }, { "epoch": 0.855132231092278, "grad_norm": 1.7077547311782837, "learning_rate": 1.0127755013158432e-06, "loss": 0.679, "step": 11301 }, { "epoch": 0.8552078998146116, "grad_norm": 2.193718671798706, "learning_rate": 1.0117377796180712e-06, "loss": 0.7218, "step": 11302 }, { "epoch": 0.8552835685369452, "grad_norm": 2.146713972091675, "learning_rate": 1.010700558171117e-06, "loss": 0.8642, "step": 11303 }, { "epoch": 0.8553592372592789, "grad_norm": 2.3448047637939453, "learning_rate": 1.0096638370399347e-06, "loss": 0.6397, "step": 11304 }, { "epoch": 0.8554349059816125, "grad_norm": 2.3762691020965576, "learning_rate": 1.0086276162894462e-06, "loss": 0.5452, "step": 11305 }, { "epoch": 0.8555105747039461, "grad_norm": 2.4727251529693604, "learning_rate": 1.0075918959845437e-06, "loss": 0.7184, "step": 11306 }, { "epoch": 0.8555862434262798, "grad_norm": 2.2382912635803223, "learning_rate": 1.0065566761900882e-06, "loss": 0.5858, "step": 11307 }, { "epoch": 0.8556619121486134, "grad_norm": 2.0968942642211914, "learning_rate": 1.005521956970907e-06, "loss": 0.6225, "step": 11308 }, { "epoch": 0.855737580870947, "grad_norm": 2.303739309310913, "learning_rate": 1.0044877383917962e-06, "loss": 0.7995, "step": 11309 }, { "epoch": 0.8558132495932806, "grad_norm": 2.176666498184204, "learning_rate": 1.003454020517525e-06, "loss": 0.6295, "step": 11310 }, { "epoch": 0.8558889183156142, "grad_norm": 2.0503792762756348, "learning_rate": 1.0024208034128285e-06, "loss": 0.599, "step": 11311 }, { "epoch": 0.8559645870379479, "grad_norm": 2.041576385498047, "learning_rate": 1.0013880871424082e-06, "loss": 0.6261, "step": 11312 }, { "epoch": 0.8560402557602815, "grad_norm": 2.608793258666992, "learning_rate": 1.0003558717709352e-06, "loss": 0.5635, "step": 11313 }, { "epoch": 0.8561159244826151, "grad_norm": 2.738176107406616, "learning_rate": 9.99324157363053e-07, "loss": 0.7301, "step": 11314 }, { "epoch": 0.8561915932049488, "grad_norm": 2.0771257877349854, "learning_rate": 9.982929439833684e-07, "loss": 0.6853, "step": 11315 }, { "epoch": 0.8562672619272823, "grad_norm": 2.2196590900421143, "learning_rate": 9.972622316964602e-07, "loss": 0.5945, "step": 11316 }, { "epoch": 0.856342930649616, "grad_norm": 2.066857099533081, "learning_rate": 9.962320205668747e-07, "loss": 0.6929, "step": 11317 }, { "epoch": 0.8564185993719496, "grad_norm": 3.5094408988952637, "learning_rate": 9.9520231065913e-07, "loss": 0.7241, "step": 11318 }, { "epoch": 0.8564942680942832, "grad_norm": 2.093404531478882, "learning_rate": 9.94173102037703e-07, "loss": 0.6483, "step": 11319 }, { "epoch": 0.8565699368166169, "grad_norm": 2.078911304473877, "learning_rate": 9.931443947670527e-07, "loss": 0.6671, "step": 11320 }, { "epoch": 0.8566456055389505, "grad_norm": 1.9345077276229858, "learning_rate": 9.92116188911599e-07, "loss": 0.665, "step": 11321 }, { "epoch": 0.8567212742612841, "grad_norm": 1.7748900651931763, "learning_rate": 9.91088484535729e-07, "loss": 0.6294, "step": 11322 }, { "epoch": 0.8567969429836177, "grad_norm": 2.2130126953125, "learning_rate": 9.900612817038015e-07, "loss": 0.637, "step": 11323 }, { "epoch": 0.8568726117059513, "grad_norm": 2.3196053504943848, "learning_rate": 9.890345804801428e-07, "loss": 0.5467, "step": 11324 }, { "epoch": 0.856948280428285, "grad_norm": 3.2533650398254395, "learning_rate": 9.880083809290526e-07, "loss": 0.6086, "step": 11325 }, { "epoch": 0.8570239491506186, "grad_norm": 2.1937711238861084, "learning_rate": 9.869826831147895e-07, "loss": 0.5794, "step": 11326 }, { "epoch": 0.8570996178729522, "grad_norm": 2.005005121231079, "learning_rate": 9.85957487101588e-07, "loss": 0.5671, "step": 11327 }, { "epoch": 0.8571752865952859, "grad_norm": 2.504263401031494, "learning_rate": 9.84932792953649e-07, "loss": 0.6214, "step": 11328 }, { "epoch": 0.8572509553176194, "grad_norm": 2.071230888366699, "learning_rate": 9.83908600735142e-07, "loss": 0.6633, "step": 11329 }, { "epoch": 0.8573266240399531, "grad_norm": 2.571350574493408, "learning_rate": 9.828849105102067e-07, "loss": 0.6164, "step": 11330 }, { "epoch": 0.8574022927622867, "grad_norm": 2.430696725845337, "learning_rate": 9.81861722342948e-07, "loss": 0.6413, "step": 11331 }, { "epoch": 0.8574779614846203, "grad_norm": 3.106407403945923, "learning_rate": 9.80839036297444e-07, "loss": 0.5968, "step": 11332 }, { "epoch": 0.857553630206954, "grad_norm": 2.656306505203247, "learning_rate": 9.798168524377353e-07, "loss": 0.621, "step": 11333 }, { "epoch": 0.8576292989292876, "grad_norm": 2.564502000808716, "learning_rate": 9.787951708278334e-07, "loss": 0.8049, "step": 11334 }, { "epoch": 0.8577049676516212, "grad_norm": 2.267310619354248, "learning_rate": 9.77773991531726e-07, "loss": 0.724, "step": 11335 }, { "epoch": 0.8577806363739549, "grad_norm": 2.382852554321289, "learning_rate": 9.767533146133558e-07, "loss": 0.6997, "step": 11336 }, { "epoch": 0.8578563050962884, "grad_norm": 3.9764015674591064, "learning_rate": 9.757331401366431e-07, "loss": 0.5756, "step": 11337 }, { "epoch": 0.8579319738186221, "grad_norm": 2.423640727996826, "learning_rate": 9.747134681654754e-07, "loss": 0.6611, "step": 11338 }, { "epoch": 0.8580076425409557, "grad_norm": 1.80607008934021, "learning_rate": 9.736942987637069e-07, "loss": 0.7413, "step": 11339 }, { "epoch": 0.8580833112632893, "grad_norm": 2.182513952255249, "learning_rate": 9.726756319951625e-07, "loss": 0.81, "step": 11340 }, { "epoch": 0.858158979985623, "grad_norm": 2.507810592651367, "learning_rate": 9.716574679236322e-07, "loss": 0.6897, "step": 11341 }, { "epoch": 0.8582346487079565, "grad_norm": 2.9260153770446777, "learning_rate": 9.706398066128786e-07, "loss": 0.6551, "step": 11342 }, { "epoch": 0.8583103174302902, "grad_norm": 1.9764418601989746, "learning_rate": 9.696226481266323e-07, "loss": 0.7505, "step": 11343 }, { "epoch": 0.8583859861526238, "grad_norm": 2.561077356338501, "learning_rate": 9.68605992528588e-07, "loss": 0.6391, "step": 11344 }, { "epoch": 0.8584616548749574, "grad_norm": 2.1961679458618164, "learning_rate": 9.675898398824107e-07, "loss": 0.7866, "step": 11345 }, { "epoch": 0.8585373235972911, "grad_norm": 4.30953311920166, "learning_rate": 9.66574190251741e-07, "loss": 0.7338, "step": 11346 }, { "epoch": 0.8586129923196247, "grad_norm": 5.359286308288574, "learning_rate": 9.655590437001774e-07, "loss": 0.6028, "step": 11347 }, { "epoch": 0.8586886610419583, "grad_norm": 2.5870842933654785, "learning_rate": 9.645444002912923e-07, "loss": 0.6086, "step": 11348 }, { "epoch": 0.858764329764292, "grad_norm": 2.1643459796905518, "learning_rate": 9.63530260088627e-07, "loss": 0.6189, "step": 11349 }, { "epoch": 0.8588399984866255, "grad_norm": 2.0352864265441895, "learning_rate": 9.625166231556905e-07, "loss": 0.6887, "step": 11350 }, { "epoch": 0.8589156672089592, "grad_norm": 2.4447758197784424, "learning_rate": 9.615034895559582e-07, "loss": 0.821, "step": 11351 }, { "epoch": 0.8589913359312928, "grad_norm": 1.9808082580566406, "learning_rate": 9.604908593528783e-07, "loss": 0.8302, "step": 11352 }, { "epoch": 0.8590670046536264, "grad_norm": 2.222776174545288, "learning_rate": 9.594787326098629e-07, "loss": 0.6017, "step": 11353 }, { "epoch": 0.8591426733759601, "grad_norm": 2.0380992889404297, "learning_rate": 9.584671093902976e-07, "loss": 0.6517, "step": 11354 }, { "epoch": 0.8592183420982936, "grad_norm": 2.0588114261627197, "learning_rate": 9.574559897575285e-07, "loss": 0.768, "step": 11355 }, { "epoch": 0.8592940108206273, "grad_norm": 2.0195462703704834, "learning_rate": 9.564453737748789e-07, "loss": 0.5548, "step": 11356 }, { "epoch": 0.859369679542961, "grad_norm": 2.724370002746582, "learning_rate": 9.554352615056375e-07, "loss": 0.7525, "step": 11357 }, { "epoch": 0.8594453482652945, "grad_norm": 4.9696855545043945, "learning_rate": 9.544256530130582e-07, "loss": 0.5823, "step": 11358 }, { "epoch": 0.8595210169876282, "grad_norm": 7.338983058929443, "learning_rate": 9.534165483603669e-07, "loss": 0.6556, "step": 11359 }, { "epoch": 0.8595966857099618, "grad_norm": 2.392385244369507, "learning_rate": 9.524079476107569e-07, "loss": 0.6524, "step": 11360 }, { "epoch": 0.8596723544322954, "grad_norm": 2.5599746704101562, "learning_rate": 9.513998508273906e-07, "loss": 0.6916, "step": 11361 }, { "epoch": 0.8597480231546291, "grad_norm": 2.130995750427246, "learning_rate": 9.503922580733985e-07, "loss": 0.6197, "step": 11362 }, { "epoch": 0.8598236918769626, "grad_norm": 1.7371002435684204, "learning_rate": 9.493851694118781e-07, "loss": 0.8717, "step": 11363 }, { "epoch": 0.8598993605992963, "grad_norm": 2.7299606800079346, "learning_rate": 9.483785849058991e-07, "loss": 0.7653, "step": 11364 }, { "epoch": 0.85997502932163, "grad_norm": 2.7302086353302, "learning_rate": 9.47372504618491e-07, "loss": 0.6537, "step": 11365 }, { "epoch": 0.8600506980439635, "grad_norm": 4.3424153327941895, "learning_rate": 9.463669286126653e-07, "loss": 0.6663, "step": 11366 }, { "epoch": 0.8601263667662972, "grad_norm": 2.4264373779296875, "learning_rate": 9.453618569513898e-07, "loss": 0.7241, "step": 11367 }, { "epoch": 0.8602020354886307, "grad_norm": 2.623892307281494, "learning_rate": 9.443572896976091e-07, "loss": 0.6928, "step": 11368 }, { "epoch": 0.8602777042109644, "grad_norm": 2.2946858406066895, "learning_rate": 9.433532269142278e-07, "loss": 0.6384, "step": 11369 }, { "epoch": 0.860353372933298, "grad_norm": 2.276153326034546, "learning_rate": 9.423496686641248e-07, "loss": 0.7294, "step": 11370 }, { "epoch": 0.8604290416556316, "grad_norm": 1.7452298402786255, "learning_rate": 9.413466150101505e-07, "loss": 0.7299, "step": 11371 }, { "epoch": 0.8605047103779653, "grad_norm": 3.4095396995544434, "learning_rate": 9.403440660151139e-07, "loss": 0.641, "step": 11372 }, { "epoch": 0.8605803791002989, "grad_norm": 3.2750163078308105, "learning_rate": 9.393420217417997e-07, "loss": 0.7713, "step": 11373 }, { "epoch": 0.8606560478226325, "grad_norm": 2.141160726547241, "learning_rate": 9.383404822529598e-07, "loss": 0.6203, "step": 11374 }, { "epoch": 0.8607317165449662, "grad_norm": 2.0613651275634766, "learning_rate": 9.373394476113149e-07, "loss": 0.5827, "step": 11375 }, { "epoch": 0.8608073852672997, "grad_norm": 2.971005916595459, "learning_rate": 9.363389178795488e-07, "loss": 0.8215, "step": 11376 }, { "epoch": 0.8608830539896334, "grad_norm": 2.2407732009887695, "learning_rate": 9.353388931203216e-07, "loss": 0.6456, "step": 11377 }, { "epoch": 0.860958722711967, "grad_norm": 2.820349931716919, "learning_rate": 9.343393733962601e-07, "loss": 0.7552, "step": 11378 }, { "epoch": 0.8610343914343006, "grad_norm": 2.415344476699829, "learning_rate": 9.333403587699511e-07, "loss": 0.712, "step": 11379 }, { "epoch": 0.8611100601566343, "grad_norm": 2.1280593872070312, "learning_rate": 9.323418493039609e-07, "loss": 0.518, "step": 11380 }, { "epoch": 0.8611857288789678, "grad_norm": 2.12552547454834, "learning_rate": 9.31343845060818e-07, "loss": 0.6181, "step": 11381 }, { "epoch": 0.8612613976013015, "grad_norm": 2.4220640659332275, "learning_rate": 9.303463461030208e-07, "loss": 0.632, "step": 11382 }, { "epoch": 0.8613370663236352, "grad_norm": 1.9439997673034668, "learning_rate": 9.293493524930352e-07, "loss": 0.511, "step": 11383 }, { "epoch": 0.8614127350459687, "grad_norm": 2.1692566871643066, "learning_rate": 9.283528642932972e-07, "loss": 0.7734, "step": 11384 }, { "epoch": 0.8614884037683024, "grad_norm": 2.1930227279663086, "learning_rate": 9.27356881566209e-07, "loss": 0.512, "step": 11385 }, { "epoch": 0.861564072490636, "grad_norm": 2.2688231468200684, "learning_rate": 9.263614043741437e-07, "loss": 0.7379, "step": 11386 }, { "epoch": 0.8616397412129696, "grad_norm": 1.992141842842102, "learning_rate": 9.253664327794402e-07, "loss": 0.7816, "step": 11387 }, { "epoch": 0.8617154099353033, "grad_norm": 1.8397136926651, "learning_rate": 9.243719668444079e-07, "loss": 0.6154, "step": 11388 }, { "epoch": 0.8617910786576368, "grad_norm": 2.1748602390289307, "learning_rate": 9.23378006631324e-07, "loss": 0.7455, "step": 11389 }, { "epoch": 0.8618667473799705, "grad_norm": 2.877929925918579, "learning_rate": 9.223845522024305e-07, "loss": 0.7016, "step": 11390 }, { "epoch": 0.8619424161023042, "grad_norm": 2.6606006622314453, "learning_rate": 9.213916036199409e-07, "loss": 0.7219, "step": 11391 }, { "epoch": 0.8620180848246377, "grad_norm": 2.1790380477905273, "learning_rate": 9.203991609460422e-07, "loss": 0.6841, "step": 11392 }, { "epoch": 0.8620937535469714, "grad_norm": 2.446531295776367, "learning_rate": 9.19407224242879e-07, "loss": 0.63, "step": 11393 }, { "epoch": 0.8621694222693049, "grad_norm": 2.1365866661071777, "learning_rate": 9.184157935725702e-07, "loss": 0.7192, "step": 11394 }, { "epoch": 0.8622450909916386, "grad_norm": 3.078911304473877, "learning_rate": 9.174248689972045e-07, "loss": 0.7079, "step": 11395 }, { "epoch": 0.8623207597139723, "grad_norm": 1.73885977268219, "learning_rate": 9.164344505788351e-07, "loss": 0.7938, "step": 11396 }, { "epoch": 0.8623964284363058, "grad_norm": 2.480379581451416, "learning_rate": 9.154445383794863e-07, "loss": 0.5902, "step": 11397 }, { "epoch": 0.8624720971586395, "grad_norm": 2.1541407108306885, "learning_rate": 9.144551324611486e-07, "loss": 0.6969, "step": 11398 }, { "epoch": 0.8625477658809731, "grad_norm": 2.4464011192321777, "learning_rate": 9.134662328857826e-07, "loss": 0.6242, "step": 11399 }, { "epoch": 0.8626234346033067, "grad_norm": 2.167466640472412, "learning_rate": 9.124778397153175e-07, "loss": 0.6878, "step": 11400 }, { "epoch": 0.8626991033256404, "grad_norm": 2.1103248596191406, "learning_rate": 9.114899530116459e-07, "loss": 0.6499, "step": 11401 }, { "epoch": 0.8627747720479739, "grad_norm": 1.9201300144195557, "learning_rate": 9.105025728366354e-07, "loss": 0.6265, "step": 11402 }, { "epoch": 0.8628504407703076, "grad_norm": 2.0732924938201904, "learning_rate": 9.095156992521204e-07, "loss": 0.6943, "step": 11403 }, { "epoch": 0.8629261094926413, "grad_norm": 2.583040952682495, "learning_rate": 9.085293323198995e-07, "loss": 0.6831, "step": 11404 }, { "epoch": 0.8630017782149748, "grad_norm": 2.722351312637329, "learning_rate": 9.075434721017414e-07, "loss": 0.6832, "step": 11405 }, { "epoch": 0.8630774469373085, "grad_norm": 2.765360116958618, "learning_rate": 9.065581186593855e-07, "loss": 0.6247, "step": 11406 }, { "epoch": 0.863153115659642, "grad_norm": 2.3917152881622314, "learning_rate": 9.055732720545377e-07, "loss": 0.647, "step": 11407 }, { "epoch": 0.8632287843819757, "grad_norm": 2.091081380844116, "learning_rate": 9.045889323488724e-07, "loss": 0.5273, "step": 11408 }, { "epoch": 0.8633044531043094, "grad_norm": 2.154362201690674, "learning_rate": 9.036050996040325e-07, "loss": 0.5733, "step": 11409 }, { "epoch": 0.8633801218266429, "grad_norm": 1.648404836654663, "learning_rate": 9.026217738816286e-07, "loss": 0.4534, "step": 11410 }, { "epoch": 0.8634557905489766, "grad_norm": 2.2857422828674316, "learning_rate": 9.016389552432365e-07, "loss": 0.6723, "step": 11411 }, { "epoch": 0.8635314592713103, "grad_norm": 2.5683135986328125, "learning_rate": 9.006566437504079e-07, "loss": 0.6025, "step": 11412 }, { "epoch": 0.8636071279936438, "grad_norm": 5.81996488571167, "learning_rate": 8.996748394646567e-07, "loss": 0.7606, "step": 11413 }, { "epoch": 0.8636827967159775, "grad_norm": 2.0632705688476562, "learning_rate": 8.986935424474686e-07, "loss": 0.4855, "step": 11414 }, { "epoch": 0.863758465438311, "grad_norm": 2.0599944591522217, "learning_rate": 8.977127527602925e-07, "loss": 0.6972, "step": 11415 }, { "epoch": 0.8638341341606447, "grad_norm": 2.6218225955963135, "learning_rate": 8.967324704645483e-07, "loss": 0.7039, "step": 11416 }, { "epoch": 0.8639098028829784, "grad_norm": 2.5727875232696533, "learning_rate": 8.957526956216269e-07, "loss": 0.7001, "step": 11417 }, { "epoch": 0.8639854716053119, "grad_norm": 2.259443998336792, "learning_rate": 8.947734282928841e-07, "loss": 0.6443, "step": 11418 }, { "epoch": 0.8640611403276456, "grad_norm": 2.0039772987365723, "learning_rate": 8.93794668539645e-07, "loss": 0.6373, "step": 11419 }, { "epoch": 0.8641368090499791, "grad_norm": 3.764800548553467, "learning_rate": 8.928164164232015e-07, "loss": 0.5713, "step": 11420 }, { "epoch": 0.8642124777723128, "grad_norm": 2.341893196105957, "learning_rate": 8.918386720048185e-07, "loss": 0.6677, "step": 11421 }, { "epoch": 0.8642881464946465, "grad_norm": 2.431959867477417, "learning_rate": 8.908614353457182e-07, "loss": 0.7105, "step": 11422 }, { "epoch": 0.86436381521698, "grad_norm": 2.6695075035095215, "learning_rate": 8.898847065071055e-07, "loss": 0.7028, "step": 11423 }, { "epoch": 0.8644394839393137, "grad_norm": 2.1305665969848633, "learning_rate": 8.889084855501456e-07, "loss": 0.6387, "step": 11424 }, { "epoch": 0.8645151526616474, "grad_norm": 2.250269889831543, "learning_rate": 8.879327725359684e-07, "loss": 0.6831, "step": 11425 }, { "epoch": 0.8645908213839809, "grad_norm": 2.085334300994873, "learning_rate": 8.869575675256794e-07, "loss": 0.8023, "step": 11426 }, { "epoch": 0.8646664901063146, "grad_norm": 2.079393148422241, "learning_rate": 8.859828705803463e-07, "loss": 0.5462, "step": 11427 }, { "epoch": 0.8647421588286481, "grad_norm": 2.8435254096984863, "learning_rate": 8.850086817610126e-07, "loss": 0.6934, "step": 11428 }, { "epoch": 0.8648178275509818, "grad_norm": 2.8293187618255615, "learning_rate": 8.840350011286813e-07, "loss": 0.6062, "step": 11429 }, { "epoch": 0.8648934962733155, "grad_norm": 1.95986008644104, "learning_rate": 8.830618287443277e-07, "loss": 0.6402, "step": 11430 }, { "epoch": 0.864969164995649, "grad_norm": 1.9640858173370361, "learning_rate": 8.820891646688961e-07, "loss": 0.7183, "step": 11431 }, { "epoch": 0.8650448337179827, "grad_norm": 2.166637659072876, "learning_rate": 8.811170089632977e-07, "loss": 0.7106, "step": 11432 }, { "epoch": 0.8651205024403162, "grad_norm": 2.3195998668670654, "learning_rate": 8.801453616884119e-07, "loss": 0.605, "step": 11433 }, { "epoch": 0.8651961711626499, "grad_norm": 2.1643240451812744, "learning_rate": 8.791742229050869e-07, "loss": 0.5808, "step": 11434 }, { "epoch": 0.8652718398849836, "grad_norm": 2.442021131515503, "learning_rate": 8.782035926741381e-07, "loss": 0.6294, "step": 11435 }, { "epoch": 0.8653475086073171, "grad_norm": 2.4725868701934814, "learning_rate": 8.772334710563489e-07, "loss": 0.5829, "step": 11436 }, { "epoch": 0.8654231773296508, "grad_norm": 2.438480854034424, "learning_rate": 8.762638581124707e-07, "loss": 0.6427, "step": 11437 }, { "epoch": 0.8654988460519845, "grad_norm": 2.3041131496429443, "learning_rate": 8.752947539032268e-07, "loss": 0.564, "step": 11438 }, { "epoch": 0.865574514774318, "grad_norm": 1.981211543083191, "learning_rate": 8.74326158489304e-07, "loss": 0.7893, "step": 11439 }, { "epoch": 0.8656501834966517, "grad_norm": 4.369067668914795, "learning_rate": 8.733580719313574e-07, "loss": 0.6974, "step": 11440 }, { "epoch": 0.8657258522189852, "grad_norm": 2.5769243240356445, "learning_rate": 8.723904942900137e-07, "loss": 0.5311, "step": 11441 }, { "epoch": 0.8658015209413189, "grad_norm": 2.2570548057556152, "learning_rate": 8.714234256258654e-07, "loss": 0.6526, "step": 11442 }, { "epoch": 0.8658771896636526, "grad_norm": 1.7594521045684814, "learning_rate": 8.704568659994721e-07, "loss": 0.6366, "step": 11443 }, { "epoch": 0.8659528583859861, "grad_norm": 1.9956740140914917, "learning_rate": 8.694908154713652e-07, "loss": 0.6423, "step": 11444 }, { "epoch": 0.8660285271083198, "grad_norm": 2.2370998859405518, "learning_rate": 8.685252741020405e-07, "loss": 0.6358, "step": 11445 }, { "epoch": 0.8661041958306533, "grad_norm": 2.284075975418091, "learning_rate": 8.675602419519646e-07, "loss": 0.6383, "step": 11446 }, { "epoch": 0.866179864552987, "grad_norm": 2.2030980587005615, "learning_rate": 8.665957190815671e-07, "loss": 0.5712, "step": 11447 }, { "epoch": 0.8662555332753207, "grad_norm": 2.6359593868255615, "learning_rate": 8.656317055512537e-07, "loss": 0.6499, "step": 11448 }, { "epoch": 0.8663312019976542, "grad_norm": 14.77769660949707, "learning_rate": 8.646682014213941e-07, "loss": 0.8428, "step": 11449 }, { "epoch": 0.8664068707199879, "grad_norm": 2.7199504375457764, "learning_rate": 8.637052067523231e-07, "loss": 0.6286, "step": 11450 }, { "epoch": 0.8664825394423216, "grad_norm": 1.821079134941101, "learning_rate": 8.627427216043474e-07, "loss": 0.578, "step": 11451 }, { "epoch": 0.8665582081646551, "grad_norm": 2.893192768096924, "learning_rate": 8.617807460377428e-07, "loss": 0.7833, "step": 11452 }, { "epoch": 0.8666338768869888, "grad_norm": 2.2512125968933105, "learning_rate": 8.608192801127491e-07, "loss": 0.7754, "step": 11453 }, { "epoch": 0.8667095456093223, "grad_norm": 1.8546165227890015, "learning_rate": 8.598583238895782e-07, "loss": 0.457, "step": 11454 }, { "epoch": 0.866785214331656, "grad_norm": 2.225909471511841, "learning_rate": 8.588978774284069e-07, "loss": 0.6653, "step": 11455 }, { "epoch": 0.8668608830539897, "grad_norm": 2.2338666915893555, "learning_rate": 8.57937940789382e-07, "loss": 0.6326, "step": 11456 }, { "epoch": 0.8669365517763232, "grad_norm": 2.0527563095092773, "learning_rate": 8.569785140326197e-07, "loss": 0.8369, "step": 11457 }, { "epoch": 0.8670122204986569, "grad_norm": 3.0348801612854004, "learning_rate": 8.560195972181965e-07, "loss": 0.705, "step": 11458 }, { "epoch": 0.8670878892209904, "grad_norm": 2.616718292236328, "learning_rate": 8.550611904061698e-07, "loss": 0.7044, "step": 11459 }, { "epoch": 0.8671635579433241, "grad_norm": 2.5653347969055176, "learning_rate": 8.541032936565564e-07, "loss": 0.7478, "step": 11460 }, { "epoch": 0.8672392266656578, "grad_norm": 2.5321779251098633, "learning_rate": 8.531459070293403e-07, "loss": 0.5518, "step": 11461 }, { "epoch": 0.8673148953879913, "grad_norm": 2.7409920692443848, "learning_rate": 8.521890305844775e-07, "loss": 0.6368, "step": 11462 }, { "epoch": 0.867390564110325, "grad_norm": 2.6878130435943604, "learning_rate": 8.512326643818912e-07, "loss": 0.6114, "step": 11463 }, { "epoch": 0.8674662328326587, "grad_norm": 2.02453351020813, "learning_rate": 8.502768084814714e-07, "loss": 0.6421, "step": 11464 }, { "epoch": 0.8675419015549922, "grad_norm": 2.029902696609497, "learning_rate": 8.493214629430773e-07, "loss": 0.6868, "step": 11465 }, { "epoch": 0.8676175702773259, "grad_norm": 2.0235416889190674, "learning_rate": 8.483666278265348e-07, "loss": 0.6933, "step": 11466 }, { "epoch": 0.8676932389996594, "grad_norm": 1.4413191080093384, "learning_rate": 8.474123031916425e-07, "loss": 0.7743, "step": 11467 }, { "epoch": 0.8677689077219931, "grad_norm": 2.967958688735962, "learning_rate": 8.464584890981572e-07, "loss": 0.7331, "step": 11468 }, { "epoch": 0.8678445764443268, "grad_norm": 1.6408123970031738, "learning_rate": 8.455051856058142e-07, "loss": 0.6527, "step": 11469 }, { "epoch": 0.8679202451666603, "grad_norm": 2.9439444541931152, "learning_rate": 8.44552392774311e-07, "loss": 0.6358, "step": 11470 }, { "epoch": 0.867995913888994, "grad_norm": 2.3100738525390625, "learning_rate": 8.436001106633165e-07, "loss": 0.7387, "step": 11471 }, { "epoch": 0.8680715826113276, "grad_norm": 2.5663743019104004, "learning_rate": 8.426483393324633e-07, "loss": 0.658, "step": 11472 }, { "epoch": 0.8681472513336612, "grad_norm": 3.1118669509887695, "learning_rate": 8.416970788413527e-07, "loss": 0.7758, "step": 11473 }, { "epoch": 0.8682229200559949, "grad_norm": 2.1538186073303223, "learning_rate": 8.407463292495617e-07, "loss": 0.6391, "step": 11474 }, { "epoch": 0.8682985887783284, "grad_norm": 2.083836078643799, "learning_rate": 8.39796090616625e-07, "loss": 0.6682, "step": 11475 }, { "epoch": 0.8683742575006621, "grad_norm": 2.3815724849700928, "learning_rate": 8.38846363002049e-07, "loss": 0.4637, "step": 11476 }, { "epoch": 0.8684499262229958, "grad_norm": 1.9574769735336304, "learning_rate": 8.378971464653112e-07, "loss": 0.7101, "step": 11477 }, { "epoch": 0.8685255949453293, "grad_norm": 2.0645639896392822, "learning_rate": 8.369484410658537e-07, "loss": 0.5948, "step": 11478 }, { "epoch": 0.868601263667663, "grad_norm": 2.0333096981048584, "learning_rate": 8.360002468630862e-07, "loss": 0.6257, "step": 11479 }, { "epoch": 0.8686769323899965, "grad_norm": 2.2880470752716064, "learning_rate": 8.350525639163892e-07, "loss": 0.7798, "step": 11480 }, { "epoch": 0.8687526011123302, "grad_norm": 2.19171404838562, "learning_rate": 8.341053922851111e-07, "loss": 0.7149, "step": 11481 }, { "epoch": 0.8688282698346639, "grad_norm": 2.084751605987549, "learning_rate": 8.331587320285638e-07, "loss": 0.5851, "step": 11482 }, { "epoch": 0.8689039385569974, "grad_norm": 2.067612886428833, "learning_rate": 8.322125832060294e-07, "loss": 0.6274, "step": 11483 }, { "epoch": 0.8689796072793311, "grad_norm": 2.2991397380828857, "learning_rate": 8.312669458767629e-07, "loss": 0.5268, "step": 11484 }, { "epoch": 0.8690552760016648, "grad_norm": 2.2263104915618896, "learning_rate": 8.303218200999817e-07, "loss": 0.642, "step": 11485 }, { "epoch": 0.8691309447239983, "grad_norm": 2.2175941467285156, "learning_rate": 8.293772059348716e-07, "loss": 0.5625, "step": 11486 }, { "epoch": 0.869206613446332, "grad_norm": 2.1039774417877197, "learning_rate": 8.28433103440587e-07, "loss": 0.6794, "step": 11487 }, { "epoch": 0.8692822821686655, "grad_norm": 2.4359757900238037, "learning_rate": 8.27489512676252e-07, "loss": 0.5836, "step": 11488 }, { "epoch": 0.8693579508909992, "grad_norm": 2.079524278640747, "learning_rate": 8.265464337009572e-07, "loss": 0.6959, "step": 11489 }, { "epoch": 0.8694336196133329, "grad_norm": 2.333555221557617, "learning_rate": 8.256038665737602e-07, "loss": 0.766, "step": 11490 }, { "epoch": 0.8695092883356664, "grad_norm": 2.577230215072632, "learning_rate": 8.246618113536889e-07, "loss": 0.6448, "step": 11491 }, { "epoch": 0.8695849570580001, "grad_norm": 2.1624553203582764, "learning_rate": 8.237202680997381e-07, "loss": 0.6679, "step": 11492 }, { "epoch": 0.8696606257803337, "grad_norm": 2.065601110458374, "learning_rate": 8.227792368708686e-07, "loss": 0.5157, "step": 11493 }, { "epoch": 0.8697362945026673, "grad_norm": 2.3269245624542236, "learning_rate": 8.218387177260094e-07, "loss": 0.7201, "step": 11494 }, { "epoch": 0.869811963225001, "grad_norm": 2.024136543273926, "learning_rate": 8.208987107240642e-07, "loss": 0.5877, "step": 11495 }, { "epoch": 0.8698876319473345, "grad_norm": 2.4356894493103027, "learning_rate": 8.19959215923895e-07, "loss": 0.6413, "step": 11496 }, { "epoch": 0.8699633006696682, "grad_norm": 2.5709826946258545, "learning_rate": 8.190202333843368e-07, "loss": 0.7845, "step": 11497 }, { "epoch": 0.8700389693920019, "grad_norm": 2.712878704071045, "learning_rate": 8.180817631641923e-07, "loss": 0.623, "step": 11498 }, { "epoch": 0.8701146381143354, "grad_norm": 3.3466601371765137, "learning_rate": 8.171438053222318e-07, "loss": 0.6294, "step": 11499 }, { "epoch": 0.8701903068366691, "grad_norm": 2.0400924682617188, "learning_rate": 8.162063599171923e-07, "loss": 0.6766, "step": 11500 }, { "epoch": 0.8702659755590026, "grad_norm": 2.169499397277832, "learning_rate": 8.152694270077796e-07, "loss": 0.6908, "step": 11501 }, { "epoch": 0.8703416442813363, "grad_norm": 2.0179340839385986, "learning_rate": 8.143330066526689e-07, "loss": 0.7238, "step": 11502 }, { "epoch": 0.87041731300367, "grad_norm": 2.6831698417663574, "learning_rate": 8.133970989105024e-07, "loss": 0.6951, "step": 11503 }, { "epoch": 0.8704929817260035, "grad_norm": 2.6158101558685303, "learning_rate": 8.12461703839884e-07, "loss": 0.561, "step": 11504 }, { "epoch": 0.8705686504483372, "grad_norm": 2.133500337600708, "learning_rate": 8.115268214993981e-07, "loss": 0.7176, "step": 11505 }, { "epoch": 0.8706443191706708, "grad_norm": 2.4899628162384033, "learning_rate": 8.105924519475886e-07, "loss": 0.7863, "step": 11506 }, { "epoch": 0.8707199878930044, "grad_norm": 2.085965394973755, "learning_rate": 8.096585952429668e-07, "loss": 0.6698, "step": 11507 }, { "epoch": 0.8707956566153381, "grad_norm": 2.165194272994995, "learning_rate": 8.08725251444013e-07, "loss": 0.7282, "step": 11508 }, { "epoch": 0.8708713253376716, "grad_norm": 2.1750943660736084, "learning_rate": 8.077924206091794e-07, "loss": 0.7459, "step": 11509 }, { "epoch": 0.8709469940600053, "grad_norm": 2.4543588161468506, "learning_rate": 8.068601027968802e-07, "loss": 0.7384, "step": 11510 }, { "epoch": 0.871022662782339, "grad_norm": 2.087395668029785, "learning_rate": 8.059282980655007e-07, "loss": 0.5855, "step": 11511 }, { "epoch": 0.8710983315046725, "grad_norm": 2.3441057205200195, "learning_rate": 8.049970064733953e-07, "loss": 0.7226, "step": 11512 }, { "epoch": 0.8711740002270062, "grad_norm": 1.9657081365585327, "learning_rate": 8.040662280788844e-07, "loss": 0.631, "step": 11513 }, { "epoch": 0.8712496689493398, "grad_norm": 3.1376612186431885, "learning_rate": 8.031359629402512e-07, "loss": 0.7256, "step": 11514 }, { "epoch": 0.8713253376716734, "grad_norm": 2.253770112991333, "learning_rate": 8.022062111157583e-07, "loss": 0.5113, "step": 11515 }, { "epoch": 0.8714010063940071, "grad_norm": 2.1332924365997314, "learning_rate": 8.01276972663627e-07, "loss": 0.6469, "step": 11516 }, { "epoch": 0.8714766751163406, "grad_norm": 2.4022717475891113, "learning_rate": 8.003482476420517e-07, "loss": 0.6562, "step": 11517 }, { "epoch": 0.8715523438386743, "grad_norm": 2.587195634841919, "learning_rate": 7.99420036109188e-07, "loss": 0.7556, "step": 11518 }, { "epoch": 0.8716280125610079, "grad_norm": 2.437520742416382, "learning_rate": 7.984923381231634e-07, "loss": 0.8001, "step": 11519 }, { "epoch": 0.8717036812833415, "grad_norm": 2.373514413833618, "learning_rate": 7.975651537420793e-07, "loss": 0.6267, "step": 11520 }, { "epoch": 0.8717793500056752, "grad_norm": 1.9270378351211548, "learning_rate": 7.966384830239933e-07, "loss": 0.5706, "step": 11521 }, { "epoch": 0.8718550187280087, "grad_norm": 2.1025631427764893, "learning_rate": 7.957123260269391e-07, "loss": 0.6488, "step": 11522 }, { "epoch": 0.8719306874503424, "grad_norm": 2.036912441253662, "learning_rate": 7.947866828089142e-07, "loss": 0.6159, "step": 11523 }, { "epoch": 0.8720063561726761, "grad_norm": 2.3144853115081787, "learning_rate": 7.938615534278862e-07, "loss": 0.7151, "step": 11524 }, { "epoch": 0.8720820248950096, "grad_norm": 2.1579651832580566, "learning_rate": 7.929369379417899e-07, "loss": 0.6715, "step": 11525 }, { "epoch": 0.8721576936173433, "grad_norm": 2.2118868827819824, "learning_rate": 7.920128364085268e-07, "loss": 0.6239, "step": 11526 }, { "epoch": 0.8722333623396769, "grad_norm": 2.359009265899658, "learning_rate": 7.910892488859698e-07, "loss": 0.6618, "step": 11527 }, { "epoch": 0.8723090310620105, "grad_norm": 2.0738399028778076, "learning_rate": 7.901661754319534e-07, "loss": 0.5986, "step": 11528 }, { "epoch": 0.8723846997843442, "grad_norm": 3.8261353969573975, "learning_rate": 7.892436161042826e-07, "loss": 0.7576, "step": 11529 }, { "epoch": 0.8724603685066777, "grad_norm": 1.8722835779190063, "learning_rate": 7.883215709607351e-07, "loss": 0.7365, "step": 11530 }, { "epoch": 0.8725360372290114, "grad_norm": 1.9653728008270264, "learning_rate": 7.874000400590526e-07, "loss": 0.6152, "step": 11531 }, { "epoch": 0.872611705951345, "grad_norm": 3.910702705383301, "learning_rate": 7.864790234569411e-07, "loss": 0.5375, "step": 11532 }, { "epoch": 0.8726873746736786, "grad_norm": 1.5178172588348389, "learning_rate": 7.855585212120783e-07, "loss": 0.7787, "step": 11533 }, { "epoch": 0.8727630433960123, "grad_norm": 2.157939910888672, "learning_rate": 7.846385333821103e-07, "loss": 0.5934, "step": 11534 }, { "epoch": 0.8728387121183458, "grad_norm": 2.1008949279785156, "learning_rate": 7.837190600246489e-07, "loss": 0.7366, "step": 11535 }, { "epoch": 0.8729143808406795, "grad_norm": 2.356288433074951, "learning_rate": 7.82800101197274e-07, "loss": 0.5139, "step": 11536 }, { "epoch": 0.8729900495630132, "grad_norm": 1.809434413909912, "learning_rate": 7.818816569575346e-07, "loss": 0.5801, "step": 11537 }, { "epoch": 0.8730657182853467, "grad_norm": 4.456824779510498, "learning_rate": 7.809637273629486e-07, "loss": 0.732, "step": 11538 }, { "epoch": 0.8731413870076804, "grad_norm": 2.023094415664673, "learning_rate": 7.800463124709952e-07, "loss": 0.4706, "step": 11539 }, { "epoch": 0.873217055730014, "grad_norm": 2.1191153526306152, "learning_rate": 7.791294123391274e-07, "loss": 0.6217, "step": 11540 }, { "epoch": 0.8732927244523476, "grad_norm": 3.088059186935425, "learning_rate": 7.782130270247681e-07, "loss": 0.727, "step": 11541 }, { "epoch": 0.8733683931746813, "grad_norm": 2.3737430572509766, "learning_rate": 7.772971565852997e-07, "loss": 0.6377, "step": 11542 }, { "epoch": 0.8734440618970148, "grad_norm": 3.2077624797821045, "learning_rate": 7.76381801078079e-07, "loss": 0.7562, "step": 11543 }, { "epoch": 0.8735197306193485, "grad_norm": 2.7162673473358154, "learning_rate": 7.754669605604284e-07, "loss": 0.5585, "step": 11544 }, { "epoch": 0.8735953993416821, "grad_norm": 1.9874165058135986, "learning_rate": 7.745526350896388e-07, "loss": 0.695, "step": 11545 }, { "epoch": 0.8736710680640157, "grad_norm": 2.2441554069519043, "learning_rate": 7.736388247229667e-07, "loss": 0.716, "step": 11546 }, { "epoch": 0.8737467367863494, "grad_norm": 2.2191619873046875, "learning_rate": 7.727255295176391e-07, "loss": 0.725, "step": 11547 }, { "epoch": 0.873822405508683, "grad_norm": 2.3463375568389893, "learning_rate": 7.718127495308483e-07, "loss": 0.7155, "step": 11548 }, { "epoch": 0.8738980742310166, "grad_norm": 2.282282590866089, "learning_rate": 7.709004848197588e-07, "loss": 0.8187, "step": 11549 }, { "epoch": 0.8739737429533503, "grad_norm": 2.2139830589294434, "learning_rate": 7.699887354414935e-07, "loss": 0.7518, "step": 11550 }, { "epoch": 0.8740494116756838, "grad_norm": 2.0290632247924805, "learning_rate": 7.69077501453154e-07, "loss": 0.6057, "step": 11551 }, { "epoch": 0.8741250803980175, "grad_norm": 1.9807419776916504, "learning_rate": 7.681667829118057e-07, "loss": 0.6753, "step": 11552 }, { "epoch": 0.8742007491203511, "grad_norm": 2.6558451652526855, "learning_rate": 7.672565798744757e-07, "loss": 0.6585, "step": 11553 }, { "epoch": 0.8742764178426847, "grad_norm": 2.189072847366333, "learning_rate": 7.663468923981677e-07, "loss": 0.6509, "step": 11554 }, { "epoch": 0.8743520865650184, "grad_norm": 4.953122138977051, "learning_rate": 7.654377205398479e-07, "loss": 0.7143, "step": 11555 }, { "epoch": 0.874427755287352, "grad_norm": 1.9904096126556396, "learning_rate": 7.64529064356451e-07, "loss": 0.7092, "step": 11556 }, { "epoch": 0.8745034240096856, "grad_norm": 2.697661876678467, "learning_rate": 7.636209239048823e-07, "loss": 0.6954, "step": 11557 }, { "epoch": 0.8745790927320192, "grad_norm": 2.0726523399353027, "learning_rate": 7.627132992420103e-07, "loss": 0.7002, "step": 11558 }, { "epoch": 0.8746547614543528, "grad_norm": 3.2430927753448486, "learning_rate": 7.618061904246736e-07, "loss": 0.6645, "step": 11559 }, { "epoch": 0.8747304301766865, "grad_norm": 1.8486747741699219, "learning_rate": 7.608995975096797e-07, "loss": 0.6222, "step": 11560 }, { "epoch": 0.8748060988990201, "grad_norm": 1.8390332460403442, "learning_rate": 7.599935205538003e-07, "loss": 0.6021, "step": 11561 }, { "epoch": 0.8748817676213537, "grad_norm": 2.2097909450531006, "learning_rate": 7.590879596137789e-07, "loss": 0.7386, "step": 11562 }, { "epoch": 0.8749574363436874, "grad_norm": 2.2231502532958984, "learning_rate": 7.581829147463252e-07, "loss": 0.7924, "step": 11563 }, { "epoch": 0.8750331050660209, "grad_norm": 3.517443895339966, "learning_rate": 7.572783860081139e-07, "loss": 0.7055, "step": 11564 }, { "epoch": 0.8751087737883546, "grad_norm": 2.743074417114258, "learning_rate": 7.563743734557877e-07, "loss": 0.6501, "step": 11565 }, { "epoch": 0.8751844425106882, "grad_norm": 2.1922786235809326, "learning_rate": 7.554708771459651e-07, "loss": 0.7249, "step": 11566 }, { "epoch": 0.8752601112330218, "grad_norm": 2.300901174545288, "learning_rate": 7.5456789713522e-07, "loss": 0.6253, "step": 11567 }, { "epoch": 0.8753357799553555, "grad_norm": 2.120061159133911, "learning_rate": 7.536654334801022e-07, "loss": 0.6272, "step": 11568 }, { "epoch": 0.875411448677689, "grad_norm": 2.244152307510376, "learning_rate": 7.527634862371274e-07, "loss": 0.7696, "step": 11569 }, { "epoch": 0.8754871174000227, "grad_norm": 2.225641965866089, "learning_rate": 7.518620554627785e-07, "loss": 0.598, "step": 11570 }, { "epoch": 0.8755627861223563, "grad_norm": 2.236870288848877, "learning_rate": 7.509611412135034e-07, "loss": 0.7316, "step": 11571 }, { "epoch": 0.8756384548446899, "grad_norm": 2.152217149734497, "learning_rate": 7.500607435457238e-07, "loss": 0.7231, "step": 11572 }, { "epoch": 0.8757141235670236, "grad_norm": 2.509950637817383, "learning_rate": 7.491608625158226e-07, "loss": 0.6495, "step": 11573 }, { "epoch": 0.8757897922893572, "grad_norm": 2.5385403633117676, "learning_rate": 7.482614981801579e-07, "loss": 0.652, "step": 11574 }, { "epoch": 0.8758654610116908, "grad_norm": 2.698321580886841, "learning_rate": 7.473626505950445e-07, "loss": 0.7599, "step": 11575 }, { "epoch": 0.8759411297340245, "grad_norm": 2.6774983406066895, "learning_rate": 7.464643198167735e-07, "loss": 0.6847, "step": 11576 }, { "epoch": 0.876016798456358, "grad_norm": 2.8981354236602783, "learning_rate": 7.455665059016056e-07, "loss": 0.7735, "step": 11577 }, { "epoch": 0.8760924671786917, "grad_norm": 1.9417154788970947, "learning_rate": 7.446692089057583e-07, "loss": 0.5627, "step": 11578 }, { "epoch": 0.8761681359010253, "grad_norm": 2.02998948097229, "learning_rate": 7.437724288854273e-07, "loss": 0.6576, "step": 11579 }, { "epoch": 0.8762438046233589, "grad_norm": 2.600839614868164, "learning_rate": 7.428761658967697e-07, "loss": 0.7377, "step": 11580 }, { "epoch": 0.8763194733456926, "grad_norm": 2.005692720413208, "learning_rate": 7.419804199959138e-07, "loss": 0.5753, "step": 11581 }, { "epoch": 0.8763951420680262, "grad_norm": 2.1509876251220703, "learning_rate": 7.410851912389536e-07, "loss": 0.7596, "step": 11582 }, { "epoch": 0.8764708107903598, "grad_norm": 2.080975294113159, "learning_rate": 7.401904796819512e-07, "loss": 0.5581, "step": 11583 }, { "epoch": 0.8765464795126934, "grad_norm": 2.399707317352295, "learning_rate": 7.392962853809388e-07, "loss": 0.4842, "step": 11584 }, { "epoch": 0.876622148235027, "grad_norm": 2.309359550476074, "learning_rate": 7.384026083919087e-07, "loss": 0.6132, "step": 11585 }, { "epoch": 0.8766978169573607, "grad_norm": 2.053427219390869, "learning_rate": 7.375094487708281e-07, "loss": 0.6819, "step": 11586 }, { "epoch": 0.8767734856796943, "grad_norm": 1.9425896406173706, "learning_rate": 7.366168065736302e-07, "loss": 0.6406, "step": 11587 }, { "epoch": 0.8768491544020279, "grad_norm": 2.2914395332336426, "learning_rate": 7.357246818562174e-07, "loss": 0.6731, "step": 11588 }, { "epoch": 0.8769248231243616, "grad_norm": 1.905128836631775, "learning_rate": 7.348330746744529e-07, "loss": 0.6197, "step": 11589 }, { "epoch": 0.8770004918466952, "grad_norm": 2.3496506214141846, "learning_rate": 7.339419850841741e-07, "loss": 0.6112, "step": 11590 }, { "epoch": 0.8770761605690288, "grad_norm": 2.184185743331909, "learning_rate": 7.330514131411843e-07, "loss": 0.4918, "step": 11591 }, { "epoch": 0.8771518292913624, "grad_norm": 2.560593843460083, "learning_rate": 7.321613589012529e-07, "loss": 0.6898, "step": 11592 }, { "epoch": 0.877227498013696, "grad_norm": 1.9485561847686768, "learning_rate": 7.312718224201194e-07, "loss": 0.6156, "step": 11593 }, { "epoch": 0.8773031667360297, "grad_norm": 2.1661360263824463, "learning_rate": 7.303828037534881e-07, "loss": 0.5933, "step": 11594 }, { "epoch": 0.8773788354583633, "grad_norm": 2.1465418338775635, "learning_rate": 7.294943029570345e-07, "loss": 0.5613, "step": 11595 }, { "epoch": 0.8774545041806969, "grad_norm": 2.0607333183288574, "learning_rate": 7.286063200863953e-07, "loss": 0.4852, "step": 11596 }, { "epoch": 0.8775301729030305, "grad_norm": 2.41105580329895, "learning_rate": 7.277188551971817e-07, "loss": 0.6185, "step": 11597 }, { "epoch": 0.8776058416253641, "grad_norm": 1.9282902479171753, "learning_rate": 7.268319083449715e-07, "loss": 0.506, "step": 11598 }, { "epoch": 0.8776815103476978, "grad_norm": 2.118765354156494, "learning_rate": 7.259454795853041e-07, "loss": 0.856, "step": 11599 }, { "epoch": 0.8777571790700314, "grad_norm": 2.2869462966918945, "learning_rate": 7.250595689736921e-07, "loss": 0.6693, "step": 11600 }, { "epoch": 0.877832847792365, "grad_norm": 2.2871768474578857, "learning_rate": 7.241741765656124e-07, "loss": 0.6692, "step": 11601 }, { "epoch": 0.8779085165146987, "grad_norm": 2.093322992324829, "learning_rate": 7.232893024165172e-07, "loss": 0.6427, "step": 11602 }, { "epoch": 0.8779841852370323, "grad_norm": 1.7536505460739136, "learning_rate": 7.224049465818136e-07, "loss": 0.646, "step": 11603 }, { "epoch": 0.8780598539593659, "grad_norm": 2.1361818313598633, "learning_rate": 7.215211091168859e-07, "loss": 0.7456, "step": 11604 }, { "epoch": 0.8781355226816995, "grad_norm": 2.706083297729492, "learning_rate": 7.206377900770812e-07, "loss": 0.5506, "step": 11605 }, { "epoch": 0.8782111914040331, "grad_norm": 2.02065372467041, "learning_rate": 7.19754989517718e-07, "loss": 0.6444, "step": 11606 }, { "epoch": 0.8782868601263668, "grad_norm": 2.013063669204712, "learning_rate": 7.188727074940781e-07, "loss": 0.7695, "step": 11607 }, { "epoch": 0.8783625288487004, "grad_norm": 1.6234900951385498, "learning_rate": 7.179909440614135e-07, "loss": 0.8568, "step": 11608 }, { "epoch": 0.878438197571034, "grad_norm": 2.600998878479004, "learning_rate": 7.171096992749458e-07, "loss": 0.5912, "step": 11609 }, { "epoch": 0.8785138662933676, "grad_norm": 2.169203281402588, "learning_rate": 7.162289731898561e-07, "loss": 0.6923, "step": 11610 }, { "epoch": 0.8785895350157013, "grad_norm": 2.445180892944336, "learning_rate": 7.153487658613019e-07, "loss": 0.5911, "step": 11611 }, { "epoch": 0.8786652037380349, "grad_norm": 2.2180263996124268, "learning_rate": 7.144690773444034e-07, "loss": 0.6728, "step": 11612 }, { "epoch": 0.8787408724603685, "grad_norm": 2.4425292015075684, "learning_rate": 7.135899076942506e-07, "loss": 0.6479, "step": 11613 }, { "epoch": 0.8788165411827021, "grad_norm": 2.1483867168426514, "learning_rate": 7.127112569658982e-07, "loss": 0.6243, "step": 11614 }, { "epoch": 0.8788922099050358, "grad_norm": 1.742142677307129, "learning_rate": 7.118331252143724e-07, "loss": 0.6261, "step": 11615 }, { "epoch": 0.8789678786273694, "grad_norm": 2.120514392852783, "learning_rate": 7.109555124946641e-07, "loss": 0.7177, "step": 11616 }, { "epoch": 0.879043547349703, "grad_norm": 2.504844903945923, "learning_rate": 7.100784188617293e-07, "loss": 0.5731, "step": 11617 }, { "epoch": 0.8791192160720366, "grad_norm": 2.5175814628601074, "learning_rate": 7.092018443704971e-07, "loss": 0.6151, "step": 11618 }, { "epoch": 0.8791948847943702, "grad_norm": 2.128124713897705, "learning_rate": 7.083257890758618e-07, "loss": 0.6207, "step": 11619 }, { "epoch": 0.8792705535167039, "grad_norm": 1.736514687538147, "learning_rate": 7.074502530326862e-07, "loss": 0.6287, "step": 11620 }, { "epoch": 0.8793462222390375, "grad_norm": 6.643167018890381, "learning_rate": 7.065752362957955e-07, "loss": 0.6171, "step": 11621 }, { "epoch": 0.8794218909613711, "grad_norm": 1.8215636014938354, "learning_rate": 7.057007389199851e-07, "loss": 0.6954, "step": 11622 }, { "epoch": 0.8794975596837047, "grad_norm": 2.094165325164795, "learning_rate": 7.048267609600249e-07, "loss": 0.56, "step": 11623 }, { "epoch": 0.8795732284060384, "grad_norm": 1.876628041267395, "learning_rate": 7.039533024706424e-07, "loss": 0.7134, "step": 11624 }, { "epoch": 0.879648897128372, "grad_norm": 2.3218555450439453, "learning_rate": 7.030803635065356e-07, "loss": 0.7137, "step": 11625 }, { "epoch": 0.8797245658507056, "grad_norm": 3.562688112258911, "learning_rate": 7.022079441223718e-07, "loss": 0.6009, "step": 11626 }, { "epoch": 0.8798002345730392, "grad_norm": 3.2988367080688477, "learning_rate": 7.013360443727855e-07, "loss": 0.5274, "step": 11627 }, { "epoch": 0.8798759032953729, "grad_norm": 2.6240482330322266, "learning_rate": 7.004646643123769e-07, "loss": 0.7412, "step": 11628 }, { "epoch": 0.8799515720177065, "grad_norm": 3.6613142490386963, "learning_rate": 6.995938039957153e-07, "loss": 0.6822, "step": 11629 }, { "epoch": 0.8800272407400401, "grad_norm": 1.9795455932617188, "learning_rate": 6.987234634773381e-07, "loss": 0.6066, "step": 11630 }, { "epoch": 0.8801029094623737, "grad_norm": 2.0627286434173584, "learning_rate": 6.978536428117447e-07, "loss": 0.6163, "step": 11631 }, { "epoch": 0.8801785781847073, "grad_norm": 2.3510892391204834, "learning_rate": 6.969843420534085e-07, "loss": 0.7384, "step": 11632 }, { "epoch": 0.880254246907041, "grad_norm": 2.18217396736145, "learning_rate": 6.961155612567681e-07, "loss": 0.5873, "step": 11633 }, { "epoch": 0.8803299156293746, "grad_norm": 2.205120086669922, "learning_rate": 6.952473004762319e-07, "loss": 0.6898, "step": 11634 }, { "epoch": 0.8804055843517082, "grad_norm": 2.0564920902252197, "learning_rate": 6.943795597661683e-07, "loss": 0.5414, "step": 11635 }, { "epoch": 0.8804812530740418, "grad_norm": 2.376497983932495, "learning_rate": 6.935123391809209e-07, "loss": 0.8052, "step": 11636 }, { "epoch": 0.8805569217963755, "grad_norm": 2.4886834621429443, "learning_rate": 6.926456387747964e-07, "loss": 0.6784, "step": 11637 }, { "epoch": 0.8806325905187091, "grad_norm": 2.1387746334075928, "learning_rate": 6.917794586020722e-07, "loss": 0.6157, "step": 11638 }, { "epoch": 0.8807082592410427, "grad_norm": 3.309083938598633, "learning_rate": 6.909137987169899e-07, "loss": 0.761, "step": 11639 }, { "epoch": 0.8807839279633763, "grad_norm": 2.392918109893799, "learning_rate": 6.900486591737603e-07, "loss": 0.7521, "step": 11640 }, { "epoch": 0.88085959668571, "grad_norm": 2.2521510124206543, "learning_rate": 6.891840400265629e-07, "loss": 0.6259, "step": 11641 }, { "epoch": 0.8809352654080436, "grad_norm": 2.2190327644348145, "learning_rate": 6.883199413295384e-07, "loss": 0.7168, "step": 11642 }, { "epoch": 0.8810109341303772, "grad_norm": 1.9417771100997925, "learning_rate": 6.874563631368037e-07, "loss": 0.7122, "step": 11643 }, { "epoch": 0.8810866028527108, "grad_norm": 2.9581193923950195, "learning_rate": 6.865933055024394e-07, "loss": 0.6767, "step": 11644 }, { "epoch": 0.8811622715750445, "grad_norm": 3.3201589584350586, "learning_rate": 6.857307684804902e-07, "loss": 0.7296, "step": 11645 }, { "epoch": 0.8812379402973781, "grad_norm": 2.456382989883423, "learning_rate": 6.848687521249711e-07, "loss": 0.6919, "step": 11646 }, { "epoch": 0.8813136090197117, "grad_norm": 2.552457332611084, "learning_rate": 6.840072564898647e-07, "loss": 0.7836, "step": 11647 }, { "epoch": 0.8813892777420453, "grad_norm": 2.111968755722046, "learning_rate": 6.831462816291219e-07, "loss": 0.5549, "step": 11648 }, { "epoch": 0.8814649464643789, "grad_norm": 3.1068809032440186, "learning_rate": 6.822858275966585e-07, "loss": 0.6755, "step": 11649 }, { "epoch": 0.8815406151867126, "grad_norm": 2.4133050441741943, "learning_rate": 6.814258944463598e-07, "loss": 0.6294, "step": 11650 }, { "epoch": 0.8816162839090462, "grad_norm": 2.4189083576202393, "learning_rate": 6.805664822320762e-07, "loss": 0.6289, "step": 11651 }, { "epoch": 0.8816919526313798, "grad_norm": 1.988824486732483, "learning_rate": 6.797075910076299e-07, "loss": 0.6044, "step": 11652 }, { "epoch": 0.8817676213537134, "grad_norm": 2.337367534637451, "learning_rate": 6.788492208268029e-07, "loss": 0.6373, "step": 11653 }, { "epoch": 0.8818432900760471, "grad_norm": 4.022536277770996, "learning_rate": 6.779913717433521e-07, "loss": 0.6707, "step": 11654 }, { "epoch": 0.8819189587983807, "grad_norm": 2.011725902557373, "learning_rate": 6.771340438109996e-07, "loss": 0.7432, "step": 11655 }, { "epoch": 0.8819946275207143, "grad_norm": 1.9914313554763794, "learning_rate": 6.762772370834324e-07, "loss": 0.5172, "step": 11656 }, { "epoch": 0.8820702962430479, "grad_norm": 1.9742159843444824, "learning_rate": 6.754209516143058e-07, "loss": 0.6344, "step": 11657 }, { "epoch": 0.8821459649653816, "grad_norm": 2.7394680976867676, "learning_rate": 6.745651874572445e-07, "loss": 0.6957, "step": 11658 }, { "epoch": 0.8822216336877152, "grad_norm": 2.470006227493286, "learning_rate": 6.737099446658389e-07, "loss": 0.7392, "step": 11659 }, { "epoch": 0.8822973024100488, "grad_norm": 2.4756081104278564, "learning_rate": 6.728552232936471e-07, "loss": 0.7639, "step": 11660 }, { "epoch": 0.8823729711323824, "grad_norm": 2.942502021789551, "learning_rate": 6.720010233941943e-07, "loss": 0.6419, "step": 11661 }, { "epoch": 0.882448639854716, "grad_norm": 1.9842886924743652, "learning_rate": 6.711473450209737e-07, "loss": 0.7802, "step": 11662 }, { "epoch": 0.8825243085770497, "grad_norm": 1.7675484418869019, "learning_rate": 6.702941882274446e-07, "loss": 0.6591, "step": 11663 }, { "epoch": 0.8825999772993833, "grad_norm": 2.0675406455993652, "learning_rate": 6.694415530670351e-07, "loss": 0.6647, "step": 11664 }, { "epoch": 0.8826756460217169, "grad_norm": 2.01813006401062, "learning_rate": 6.685894395931396e-07, "loss": 0.631, "step": 11665 }, { "epoch": 0.8827513147440506, "grad_norm": 1.776025414466858, "learning_rate": 6.677378478591225e-07, "loss": 0.819, "step": 11666 }, { "epoch": 0.8828269834663842, "grad_norm": 2.657186269760132, "learning_rate": 6.668867779183099e-07, "loss": 0.6946, "step": 11667 }, { "epoch": 0.8829026521887178, "grad_norm": 2.1002979278564453, "learning_rate": 6.660362298239985e-07, "loss": 0.7027, "step": 11668 }, { "epoch": 0.8829783209110514, "grad_norm": 1.9301815032958984, "learning_rate": 6.651862036294554e-07, "loss": 0.7285, "step": 11669 }, { "epoch": 0.883053989633385, "grad_norm": 2.0952084064483643, "learning_rate": 6.6433669938791e-07, "loss": 0.652, "step": 11670 }, { "epoch": 0.8831296583557187, "grad_norm": 2.0233519077301025, "learning_rate": 6.634877171525611e-07, "loss": 0.6332, "step": 11671 }, { "epoch": 0.8832053270780523, "grad_norm": 2.134343385696411, "learning_rate": 6.626392569765738e-07, "loss": 0.7207, "step": 11672 }, { "epoch": 0.8832809958003859, "grad_norm": 1.9051159620285034, "learning_rate": 6.617913189130837e-07, "loss": 0.6636, "step": 11673 }, { "epoch": 0.8833566645227195, "grad_norm": 3.0139591693878174, "learning_rate": 6.609439030151905e-07, "loss": 0.5951, "step": 11674 }, { "epoch": 0.8834323332450531, "grad_norm": 2.278470277786255, "learning_rate": 6.600970093359605e-07, "loss": 0.519, "step": 11675 }, { "epoch": 0.8835080019673868, "grad_norm": 2.205688953399658, "learning_rate": 6.592506379284314e-07, "loss": 0.7323, "step": 11676 }, { "epoch": 0.8835836706897204, "grad_norm": 2.893535852432251, "learning_rate": 6.584047888456058e-07, "loss": 0.6726, "step": 11677 }, { "epoch": 0.883659339412054, "grad_norm": 3.5614638328552246, "learning_rate": 6.575594621404494e-07, "loss": 0.7368, "step": 11678 }, { "epoch": 0.8837350081343877, "grad_norm": 2.1940793991088867, "learning_rate": 6.567146578659037e-07, "loss": 0.694, "step": 11679 }, { "epoch": 0.8838106768567213, "grad_norm": 2.0482017993927, "learning_rate": 6.558703760748725e-07, "loss": 0.6786, "step": 11680 }, { "epoch": 0.8838863455790549, "grad_norm": 1.887109637260437, "learning_rate": 6.550266168202263e-07, "loss": 0.7625, "step": 11681 }, { "epoch": 0.8839620143013885, "grad_norm": 1.6695749759674072, "learning_rate": 6.541833801548032e-07, "loss": 0.6128, "step": 11682 }, { "epoch": 0.8840376830237221, "grad_norm": 2.545053720474243, "learning_rate": 6.533406661314107e-07, "loss": 0.6178, "step": 11683 }, { "epoch": 0.8841133517460558, "grad_norm": 2.606736183166504, "learning_rate": 6.524984748028226e-07, "loss": 0.7046, "step": 11684 }, { "epoch": 0.8841890204683894, "grad_norm": 2.562772512435913, "learning_rate": 6.516568062217777e-07, "loss": 0.5824, "step": 11685 }, { "epoch": 0.884264689190723, "grad_norm": 2.3864822387695312, "learning_rate": 6.50815660440987e-07, "loss": 0.7149, "step": 11686 }, { "epoch": 0.8843403579130567, "grad_norm": 1.7921406030654907, "learning_rate": 6.499750375131251e-07, "loss": 0.7122, "step": 11687 }, { "epoch": 0.8844160266353902, "grad_norm": 2.3052492141723633, "learning_rate": 6.491349374908321e-07, "loss": 0.6496, "step": 11688 }, { "epoch": 0.8844916953577239, "grad_norm": 1.8889057636260986, "learning_rate": 6.482953604267179e-07, "loss": 0.5084, "step": 11689 }, { "epoch": 0.8845673640800575, "grad_norm": 2.0114352703094482, "learning_rate": 6.474563063733615e-07, "loss": 0.7584, "step": 11690 }, { "epoch": 0.8846430328023911, "grad_norm": 3.2486960887908936, "learning_rate": 6.466177753833097e-07, "loss": 0.6731, "step": 11691 }, { "epoch": 0.8847187015247248, "grad_norm": 1.9725160598754883, "learning_rate": 6.457797675090685e-07, "loss": 0.6108, "step": 11692 }, { "epoch": 0.8847943702470584, "grad_norm": 2.553386926651001, "learning_rate": 6.449422828031191e-07, "loss": 0.5285, "step": 11693 }, { "epoch": 0.884870038969392, "grad_norm": 1.3259893655776978, "learning_rate": 6.441053213179074e-07, "loss": 0.7945, "step": 11694 }, { "epoch": 0.8849457076917256, "grad_norm": 2.1485307216644287, "learning_rate": 6.432688831058464e-07, "loss": 0.534, "step": 11695 }, { "epoch": 0.8850213764140592, "grad_norm": 1.817466139793396, "learning_rate": 6.424329682193174e-07, "loss": 0.7578, "step": 11696 }, { "epoch": 0.8850970451363929, "grad_norm": 2.9842936992645264, "learning_rate": 6.415975767106674e-07, "loss": 0.8272, "step": 11697 }, { "epoch": 0.8851727138587265, "grad_norm": 2.0747790336608887, "learning_rate": 6.407627086322136e-07, "loss": 0.7141, "step": 11698 }, { "epoch": 0.8852483825810601, "grad_norm": 2.0690724849700928, "learning_rate": 6.399283640362322e-07, "loss": 0.6518, "step": 11699 }, { "epoch": 0.8853240513033938, "grad_norm": 1.9558627605438232, "learning_rate": 6.390945429749784e-07, "loss": 0.689, "step": 11700 }, { "epoch": 0.8853997200257273, "grad_norm": 2.246373414993286, "learning_rate": 6.382612455006684e-07, "loss": 0.6619, "step": 11701 }, { "epoch": 0.885475388748061, "grad_norm": 2.5528721809387207, "learning_rate": 6.374284716654823e-07, "loss": 0.6938, "step": 11702 }, { "epoch": 0.8855510574703946, "grad_norm": 3.1837098598480225, "learning_rate": 6.365962215215737e-07, "loss": 0.7115, "step": 11703 }, { "epoch": 0.8856267261927282, "grad_norm": 1.9520882368087769, "learning_rate": 6.357644951210588e-07, "loss": 0.6254, "step": 11704 }, { "epoch": 0.8857023949150619, "grad_norm": 2.3639538288116455, "learning_rate": 6.349332925160267e-07, "loss": 0.8559, "step": 11705 }, { "epoch": 0.8857780636373955, "grad_norm": 2.534674644470215, "learning_rate": 6.341026137585271e-07, "loss": 0.7494, "step": 11706 }, { "epoch": 0.8858537323597291, "grad_norm": 2.048832416534424, "learning_rate": 6.332724589005792e-07, "loss": 0.7289, "step": 11707 }, { "epoch": 0.8859294010820628, "grad_norm": 2.1212267875671387, "learning_rate": 6.324428279941724e-07, "loss": 0.7757, "step": 11708 }, { "epoch": 0.8860050698043963, "grad_norm": 2.0502870082855225, "learning_rate": 6.316137210912593e-07, "loss": 0.7043, "step": 11709 }, { "epoch": 0.88608073852673, "grad_norm": 2.497807502746582, "learning_rate": 6.307851382437612e-07, "loss": 0.653, "step": 11710 }, { "epoch": 0.8861564072490636, "grad_norm": 1.9462158679962158, "learning_rate": 6.299570795035676e-07, "loss": 0.6508, "step": 11711 }, { "epoch": 0.8862320759713972, "grad_norm": 3.3071272373199463, "learning_rate": 6.291295449225352e-07, "loss": 0.7087, "step": 11712 }, { "epoch": 0.8863077446937309, "grad_norm": 2.035278797149658, "learning_rate": 6.283025345524833e-07, "loss": 0.6981, "step": 11713 }, { "epoch": 0.8863834134160645, "grad_norm": 1.910923719406128, "learning_rate": 6.274760484452027e-07, "loss": 0.5813, "step": 11714 }, { "epoch": 0.8864590821383981, "grad_norm": 1.7221661806106567, "learning_rate": 6.266500866524558e-07, "loss": 0.8203, "step": 11715 }, { "epoch": 0.8865347508607317, "grad_norm": 2.013583183288574, "learning_rate": 6.258246492259604e-07, "loss": 0.7452, "step": 11716 }, { "epoch": 0.8866104195830653, "grad_norm": 2.711366653442383, "learning_rate": 6.24999736217412e-07, "loss": 0.7126, "step": 11717 }, { "epoch": 0.886686088305399, "grad_norm": 2.107581853866577, "learning_rate": 6.241753476784674e-07, "loss": 0.655, "step": 11718 }, { "epoch": 0.8867617570277326, "grad_norm": 2.0487051010131836, "learning_rate": 6.233514836607533e-07, "loss": 0.6132, "step": 11719 }, { "epoch": 0.8868374257500662, "grad_norm": 2.098557949066162, "learning_rate": 6.225281442158633e-07, "loss": 0.6881, "step": 11720 }, { "epoch": 0.8869130944723999, "grad_norm": 2.5246660709381104, "learning_rate": 6.217053293953562e-07, "loss": 0.6164, "step": 11721 }, { "epoch": 0.8869887631947334, "grad_norm": 2.3522937297821045, "learning_rate": 6.208830392507609e-07, "loss": 0.661, "step": 11722 }, { "epoch": 0.8870644319170671, "grad_norm": 2.151280164718628, "learning_rate": 6.20061273833572e-07, "loss": 0.7885, "step": 11723 }, { "epoch": 0.8871401006394007, "grad_norm": 2.2462844848632812, "learning_rate": 6.192400331952486e-07, "loss": 0.7719, "step": 11724 }, { "epoch": 0.8872157693617343, "grad_norm": 2.168433427810669, "learning_rate": 6.184193173872194e-07, "loss": 0.5961, "step": 11725 }, { "epoch": 0.887291438084068, "grad_norm": 2.1609530448913574, "learning_rate": 6.175991264608853e-07, "loss": 0.8232, "step": 11726 }, { "epoch": 0.8873671068064016, "grad_norm": 1.7684580087661743, "learning_rate": 6.167794604676032e-07, "loss": 0.7733, "step": 11727 }, { "epoch": 0.8874427755287352, "grad_norm": 2.24383807182312, "learning_rate": 6.15960319458707e-07, "loss": 0.682, "step": 11728 }, { "epoch": 0.8875184442510688, "grad_norm": 2.093867540359497, "learning_rate": 6.151417034854928e-07, "loss": 0.6278, "step": 11729 }, { "epoch": 0.8875941129734024, "grad_norm": 1.9309519529342651, "learning_rate": 6.143236125992245e-07, "loss": 0.6813, "step": 11730 }, { "epoch": 0.8876697816957361, "grad_norm": 2.1042773723602295, "learning_rate": 6.135060468511352e-07, "loss": 0.6952, "step": 11731 }, { "epoch": 0.8877454504180697, "grad_norm": 4.267055988311768, "learning_rate": 6.126890062924218e-07, "loss": 0.685, "step": 11732 }, { "epoch": 0.8878211191404033, "grad_norm": 1.7628538608551025, "learning_rate": 6.118724909742515e-07, "loss": 0.5026, "step": 11733 }, { "epoch": 0.887896787862737, "grad_norm": 1.8866184949874878, "learning_rate": 6.110565009477555e-07, "loss": 0.5225, "step": 11734 }, { "epoch": 0.8879724565850705, "grad_norm": 2.075368881225586, "learning_rate": 6.102410362640336e-07, "loss": 0.6586, "step": 11735 }, { "epoch": 0.8880481253074042, "grad_norm": 3.18533992767334, "learning_rate": 6.094260969741542e-07, "loss": 0.7591, "step": 11736 }, { "epoch": 0.8881237940297378, "grad_norm": 2.374406576156616, "learning_rate": 6.086116831291534e-07, "loss": 0.8184, "step": 11737 }, { "epoch": 0.8881994627520714, "grad_norm": 2.989527940750122, "learning_rate": 6.077977947800284e-07, "loss": 0.8288, "step": 11738 }, { "epoch": 0.8882751314744051, "grad_norm": 2.4953837394714355, "learning_rate": 6.069844319777485e-07, "loss": 0.6191, "step": 11739 }, { "epoch": 0.8883508001967387, "grad_norm": 2.7576467990875244, "learning_rate": 6.061715947732508e-07, "loss": 0.5426, "step": 11740 }, { "epoch": 0.8884264689190723, "grad_norm": 2.4133262634277344, "learning_rate": 6.053592832174357e-07, "loss": 0.6257, "step": 11741 }, { "epoch": 0.888502137641406, "grad_norm": 2.3025264739990234, "learning_rate": 6.045474973611746e-07, "loss": 0.6992, "step": 11742 }, { "epoch": 0.8885778063637395, "grad_norm": 2.0357069969177246, "learning_rate": 6.037362372553026e-07, "loss": 0.6468, "step": 11743 }, { "epoch": 0.8886534750860732, "grad_norm": 2.5537617206573486, "learning_rate": 6.029255029506262e-07, "loss": 0.6608, "step": 11744 }, { "epoch": 0.8887291438084068, "grad_norm": 2.2393789291381836, "learning_rate": 6.021152944979118e-07, "loss": 0.6493, "step": 11745 }, { "epoch": 0.8888048125307404, "grad_norm": 2.217641830444336, "learning_rate": 6.013056119479008e-07, "loss": 0.7603, "step": 11746 }, { "epoch": 0.8888804812530741, "grad_norm": 2.554548978805542, "learning_rate": 6.004964553512986e-07, "loss": 0.7148, "step": 11747 }, { "epoch": 0.8889561499754076, "grad_norm": 1.9256244897842407, "learning_rate": 5.996878247587737e-07, "loss": 0.7035, "step": 11748 }, { "epoch": 0.8890318186977413, "grad_norm": 4.53993034362793, "learning_rate": 5.988797202209676e-07, "loss": 0.4874, "step": 11749 }, { "epoch": 0.889107487420075, "grad_norm": 2.335663080215454, "learning_rate": 5.980721417884838e-07, "loss": 0.7383, "step": 11750 }, { "epoch": 0.8891831561424085, "grad_norm": 2.05330228805542, "learning_rate": 5.972650895119018e-07, "loss": 0.7447, "step": 11751 }, { "epoch": 0.8892588248647422, "grad_norm": 2.6175127029418945, "learning_rate": 5.964585634417553e-07, "loss": 0.6698, "step": 11752 }, { "epoch": 0.8893344935870758, "grad_norm": 1.903134822845459, "learning_rate": 5.956525636285538e-07, "loss": 0.623, "step": 11753 }, { "epoch": 0.8894101623094094, "grad_norm": 2.384854316711426, "learning_rate": 5.94847090122772e-07, "loss": 0.6605, "step": 11754 }, { "epoch": 0.8894858310317431, "grad_norm": 3.7998368740081787, "learning_rate": 5.940421429748514e-07, "loss": 0.627, "step": 11755 }, { "epoch": 0.8895614997540766, "grad_norm": 2.438006639480591, "learning_rate": 5.932377222351987e-07, "loss": 0.5972, "step": 11756 }, { "epoch": 0.8896371684764103, "grad_norm": 2.115206718444824, "learning_rate": 5.924338279541919e-07, "loss": 0.5647, "step": 11757 }, { "epoch": 0.8897128371987439, "grad_norm": 2.2133545875549316, "learning_rate": 5.916304601821733e-07, "loss": 0.6166, "step": 11758 }, { "epoch": 0.8897885059210775, "grad_norm": 2.2478818893432617, "learning_rate": 5.90827618969449e-07, "loss": 0.6041, "step": 11759 }, { "epoch": 0.8898641746434112, "grad_norm": 2.2799623012542725, "learning_rate": 5.900253043662977e-07, "loss": 0.59, "step": 11760 }, { "epoch": 0.8899398433657447, "grad_norm": 2.2203476428985596, "learning_rate": 5.89223516422965e-07, "loss": 0.6151, "step": 11761 }, { "epoch": 0.8900155120880784, "grad_norm": 3.3624329566955566, "learning_rate": 5.88422255189658e-07, "loss": 0.7139, "step": 11762 }, { "epoch": 0.890091180810412, "grad_norm": 1.922843337059021, "learning_rate": 5.876215207165554e-07, "loss": 0.6256, "step": 11763 }, { "epoch": 0.8901668495327456, "grad_norm": 2.0801382064819336, "learning_rate": 5.868213130538032e-07, "loss": 0.6623, "step": 11764 }, { "epoch": 0.8902425182550793, "grad_norm": 1.8652883768081665, "learning_rate": 5.860216322515112e-07, "loss": 0.6678, "step": 11765 }, { "epoch": 0.8903181869774129, "grad_norm": 1.8340070247650146, "learning_rate": 5.852224783597584e-07, "loss": 0.7897, "step": 11766 }, { "epoch": 0.8903938556997465, "grad_norm": 2.2345545291900635, "learning_rate": 5.844238514285908e-07, "loss": 0.6383, "step": 11767 }, { "epoch": 0.8904695244220802, "grad_norm": 3.453942060470581, "learning_rate": 5.836257515080213e-07, "loss": 0.5833, "step": 11768 }, { "epoch": 0.8905451931444137, "grad_norm": 1.9211537837982178, "learning_rate": 5.82828178648031e-07, "loss": 0.6603, "step": 11769 }, { "epoch": 0.8906208618667474, "grad_norm": 1.9571908712387085, "learning_rate": 5.82031132898562e-07, "loss": 0.5726, "step": 11770 }, { "epoch": 0.890696530589081, "grad_norm": 2.344529151916504, "learning_rate": 5.812346143095303e-07, "loss": 0.7352, "step": 11771 }, { "epoch": 0.8907721993114146, "grad_norm": 1.8455153703689575, "learning_rate": 5.80438622930818e-07, "loss": 0.6122, "step": 11772 }, { "epoch": 0.8908478680337483, "grad_norm": 2.3054256439208984, "learning_rate": 5.796431588122711e-07, "loss": 0.5951, "step": 11773 }, { "epoch": 0.8909235367560818, "grad_norm": 2.202211618423462, "learning_rate": 5.788482220037041e-07, "loss": 0.5783, "step": 11774 }, { "epoch": 0.8909992054784155, "grad_norm": 3.3458003997802734, "learning_rate": 5.780538125548977e-07, "loss": 0.6301, "step": 11775 }, { "epoch": 0.8910748742007492, "grad_norm": 6.680607795715332, "learning_rate": 5.772599305156026e-07, "loss": 0.6437, "step": 11776 }, { "epoch": 0.8911505429230827, "grad_norm": 2.2788503170013428, "learning_rate": 5.764665759355326e-07, "loss": 0.5404, "step": 11777 }, { "epoch": 0.8912262116454164, "grad_norm": 2.148418426513672, "learning_rate": 5.756737488643713e-07, "loss": 0.6569, "step": 11778 }, { "epoch": 0.89130188036775, "grad_norm": 2.1473772525787354, "learning_rate": 5.748814493517668e-07, "loss": 0.4821, "step": 11779 }, { "epoch": 0.8913775490900836, "grad_norm": 2.9149558544158936, "learning_rate": 5.740896774473374e-07, "loss": 0.7051, "step": 11780 }, { "epoch": 0.8914532178124173, "grad_norm": 2.491338014602661, "learning_rate": 5.732984332006625e-07, "loss": 0.6287, "step": 11781 }, { "epoch": 0.8915288865347508, "grad_norm": 1.9785722494125366, "learning_rate": 5.725077166612966e-07, "loss": 0.5939, "step": 11782 }, { "epoch": 0.8916045552570845, "grad_norm": 2.247680187225342, "learning_rate": 5.717175278787568e-07, "loss": 0.7237, "step": 11783 }, { "epoch": 0.8916802239794182, "grad_norm": 2.0729784965515137, "learning_rate": 5.709278669025236e-07, "loss": 0.6251, "step": 11784 }, { "epoch": 0.8917558927017517, "grad_norm": 8.808799743652344, "learning_rate": 5.701387337820506e-07, "loss": 0.6845, "step": 11785 }, { "epoch": 0.8918315614240854, "grad_norm": 2.378300666809082, "learning_rate": 5.693501285667561e-07, "loss": 0.6736, "step": 11786 }, { "epoch": 0.8919072301464189, "grad_norm": 1.9735015630722046, "learning_rate": 5.685620513060238e-07, "loss": 0.648, "step": 11787 }, { "epoch": 0.8919828988687526, "grad_norm": 2.113264799118042, "learning_rate": 5.67774502049207e-07, "loss": 0.6149, "step": 11788 }, { "epoch": 0.8920585675910863, "grad_norm": 2.309506893157959, "learning_rate": 5.669874808456244e-07, "loss": 0.8259, "step": 11789 }, { "epoch": 0.8921342363134198, "grad_norm": 2.5161781311035156, "learning_rate": 5.662009877445614e-07, "loss": 0.6214, "step": 11790 }, { "epoch": 0.8922099050357535, "grad_norm": 1.9568015336990356, "learning_rate": 5.654150227952688e-07, "loss": 0.6697, "step": 11791 }, { "epoch": 0.8922855737580871, "grad_norm": 2.603675127029419, "learning_rate": 5.646295860469701e-07, "loss": 0.6682, "step": 11792 }, { "epoch": 0.8923612424804207, "grad_norm": 2.497239589691162, "learning_rate": 5.63844677548849e-07, "loss": 0.6721, "step": 11793 }, { "epoch": 0.8924369112027544, "grad_norm": 2.2003486156463623, "learning_rate": 5.630602973500622e-07, "loss": 0.6348, "step": 11794 }, { "epoch": 0.8925125799250879, "grad_norm": 2.175077199935913, "learning_rate": 5.622764454997265e-07, "loss": 0.7373, "step": 11795 }, { "epoch": 0.8925882486474216, "grad_norm": 2.1149489879608154, "learning_rate": 5.614931220469294e-07, "loss": 0.5557, "step": 11796 }, { "epoch": 0.8926639173697553, "grad_norm": 2.437793493270874, "learning_rate": 5.607103270407288e-07, "loss": 0.8061, "step": 11797 }, { "epoch": 0.8927395860920888, "grad_norm": 2.231208562850952, "learning_rate": 5.599280605301424e-07, "loss": 0.6489, "step": 11798 }, { "epoch": 0.8928152548144225, "grad_norm": 1.8926862478256226, "learning_rate": 5.591463225641592e-07, "loss": 0.7406, "step": 11799 }, { "epoch": 0.892890923536756, "grad_norm": 3.975660562515259, "learning_rate": 5.583651131917338e-07, "loss": 0.6159, "step": 11800 }, { "epoch": 0.8929665922590897, "grad_norm": 1.7848179340362549, "learning_rate": 5.575844324617914e-07, "loss": 0.6479, "step": 11801 }, { "epoch": 0.8930422609814234, "grad_norm": 2.325378656387329, "learning_rate": 5.568042804232135e-07, "loss": 0.7373, "step": 11802 }, { "epoch": 0.8931179297037569, "grad_norm": 2.4571590423583984, "learning_rate": 5.560246571248623e-07, "loss": 0.8312, "step": 11803 }, { "epoch": 0.8931935984260906, "grad_norm": 2.313886880874634, "learning_rate": 5.552455626155596e-07, "loss": 0.7574, "step": 11804 }, { "epoch": 0.8932692671484243, "grad_norm": 3.1456427574157715, "learning_rate": 5.544669969440924e-07, "loss": 0.5273, "step": 11805 }, { "epoch": 0.8933449358707578, "grad_norm": 2.003390073776245, "learning_rate": 5.536889601592178e-07, "loss": 0.7509, "step": 11806 }, { "epoch": 0.8934206045930915, "grad_norm": 2.1938109397888184, "learning_rate": 5.529114523096576e-07, "loss": 0.627, "step": 11807 }, { "epoch": 0.893496273315425, "grad_norm": 3.804588794708252, "learning_rate": 5.521344734441061e-07, "loss": 0.6258, "step": 11808 }, { "epoch": 0.8935719420377587, "grad_norm": 5.310543060302734, "learning_rate": 5.513580236112163e-07, "loss": 0.743, "step": 11809 }, { "epoch": 0.8936476107600924, "grad_norm": 2.3194751739501953, "learning_rate": 5.505821028596133e-07, "loss": 0.8416, "step": 11810 }, { "epoch": 0.8937232794824259, "grad_norm": 2.0355045795440674, "learning_rate": 5.498067112378881e-07, "loss": 0.7552, "step": 11811 }, { "epoch": 0.8937989482047596, "grad_norm": 2.1219053268432617, "learning_rate": 5.490318487945971e-07, "loss": 0.6533, "step": 11812 }, { "epoch": 0.8938746169270931, "grad_norm": 2.342350959777832, "learning_rate": 5.482575155782663e-07, "loss": 0.6366, "step": 11813 }, { "epoch": 0.8939502856494268, "grad_norm": 2.078801393508911, "learning_rate": 5.47483711637386e-07, "loss": 0.7247, "step": 11814 }, { "epoch": 0.8940259543717605, "grad_norm": 2.194209337234497, "learning_rate": 5.467104370204153e-07, "loss": 0.5571, "step": 11815 }, { "epoch": 0.894101623094094, "grad_norm": 2.5547542572021484, "learning_rate": 5.459376917757776e-07, "loss": 0.8075, "step": 11816 }, { "epoch": 0.8941772918164277, "grad_norm": 2.040752410888672, "learning_rate": 5.451654759518632e-07, "loss": 0.5734, "step": 11817 }, { "epoch": 0.8942529605387614, "grad_norm": 2.9403791427612305, "learning_rate": 5.443937895970364e-07, "loss": 0.694, "step": 11818 }, { "epoch": 0.8943286292610949, "grad_norm": 2.2212417125701904, "learning_rate": 5.436226327596176e-07, "loss": 0.5364, "step": 11819 }, { "epoch": 0.8944042979834286, "grad_norm": 2.305257558822632, "learning_rate": 5.428520054879009e-07, "loss": 0.5975, "step": 11820 }, { "epoch": 0.8944799667057621, "grad_norm": 2.220174551010132, "learning_rate": 5.42081907830145e-07, "loss": 0.7507, "step": 11821 }, { "epoch": 0.8945556354280958, "grad_norm": 2.3511104583740234, "learning_rate": 5.413123398345761e-07, "loss": 0.7957, "step": 11822 }, { "epoch": 0.8946313041504295, "grad_norm": 2.9117751121520996, "learning_rate": 5.405433015493879e-07, "loss": 0.6301, "step": 11823 }, { "epoch": 0.894706972872763, "grad_norm": 2.1860318183898926, "learning_rate": 5.397747930227386e-07, "loss": 0.7152, "step": 11824 }, { "epoch": 0.8947826415950967, "grad_norm": 2.775752067565918, "learning_rate": 5.39006814302756e-07, "loss": 0.7394, "step": 11825 }, { "epoch": 0.8948583103174302, "grad_norm": 2.653651475906372, "learning_rate": 5.382393654375344e-07, "loss": 0.6355, "step": 11826 }, { "epoch": 0.8949339790397639, "grad_norm": 1.9705297946929932, "learning_rate": 5.374724464751294e-07, "loss": 0.5801, "step": 11827 }, { "epoch": 0.8950096477620976, "grad_norm": 1.8360956907272339, "learning_rate": 5.367060574635726e-07, "loss": 0.7124, "step": 11828 }, { "epoch": 0.8950853164844311, "grad_norm": 2.0758135318756104, "learning_rate": 5.359401984508566e-07, "loss": 0.7395, "step": 11829 }, { "epoch": 0.8951609852067648, "grad_norm": 2.226825475692749, "learning_rate": 5.351748694849411e-07, "loss": 0.6466, "step": 11830 }, { "epoch": 0.8952366539290985, "grad_norm": 1.9071615934371948, "learning_rate": 5.344100706137527e-07, "loss": 0.708, "step": 11831 }, { "epoch": 0.895312322651432, "grad_norm": 2.7298760414123535, "learning_rate": 5.336458018851881e-07, "loss": 0.6113, "step": 11832 }, { "epoch": 0.8953879913737657, "grad_norm": 2.248567581176758, "learning_rate": 5.32882063347106e-07, "loss": 0.5838, "step": 11833 }, { "epoch": 0.8954636600960992, "grad_norm": 2.2924344539642334, "learning_rate": 5.321188550473351e-07, "loss": 0.5261, "step": 11834 }, { "epoch": 0.8955393288184329, "grad_norm": 1.8909533023834229, "learning_rate": 5.313561770336704e-07, "loss": 0.5217, "step": 11835 }, { "epoch": 0.8956149975407666, "grad_norm": 3.445955514907837, "learning_rate": 5.305940293538733e-07, "loss": 0.6061, "step": 11836 }, { "epoch": 0.8956906662631001, "grad_norm": 1.8945612907409668, "learning_rate": 5.2983241205567e-07, "loss": 0.6668, "step": 11837 }, { "epoch": 0.8957663349854338, "grad_norm": 2.5115628242492676, "learning_rate": 5.290713251867571e-07, "loss": 0.5408, "step": 11838 }, { "epoch": 0.8958420037077673, "grad_norm": 2.304194688796997, "learning_rate": 5.283107687947967e-07, "loss": 0.5717, "step": 11839 }, { "epoch": 0.895917672430101, "grad_norm": 2.0823569297790527, "learning_rate": 5.275507429274185e-07, "loss": 0.6496, "step": 11840 }, { "epoch": 0.8959933411524347, "grad_norm": 2.2738540172576904, "learning_rate": 5.267912476322134e-07, "loss": 0.5803, "step": 11841 }, { "epoch": 0.8960690098747682, "grad_norm": 1.8352035284042358, "learning_rate": 5.260322829567465e-07, "loss": 0.6032, "step": 11842 }, { "epoch": 0.8961446785971019, "grad_norm": 2.4070634841918945, "learning_rate": 5.252738489485467e-07, "loss": 0.6438, "step": 11843 }, { "epoch": 0.8962203473194356, "grad_norm": 2.4445745944976807, "learning_rate": 5.245159456551092e-07, "loss": 0.6391, "step": 11844 }, { "epoch": 0.8962960160417691, "grad_norm": 2.533707857131958, "learning_rate": 5.237585731238958e-07, "loss": 0.6541, "step": 11845 }, { "epoch": 0.8963716847641028, "grad_norm": 2.023911476135254, "learning_rate": 5.230017314023366e-07, "loss": 0.667, "step": 11846 }, { "epoch": 0.8964473534864363, "grad_norm": 2.331500768661499, "learning_rate": 5.222454205378277e-07, "loss": 0.7697, "step": 11847 }, { "epoch": 0.89652302220877, "grad_norm": 2.1052236557006836, "learning_rate": 5.214896405777281e-07, "loss": 0.6318, "step": 11848 }, { "epoch": 0.8965986909311037, "grad_norm": 1.6714646816253662, "learning_rate": 5.207343915693713e-07, "loss": 0.6142, "step": 11849 }, { "epoch": 0.8966743596534372, "grad_norm": 1.8316816091537476, "learning_rate": 5.199796735600541e-07, "loss": 0.63, "step": 11850 }, { "epoch": 0.8967500283757709, "grad_norm": 1.962476134300232, "learning_rate": 5.19225486597036e-07, "loss": 0.5936, "step": 11851 }, { "epoch": 0.8968256970981044, "grad_norm": 2.385847330093384, "learning_rate": 5.184718307275479e-07, "loss": 0.6865, "step": 11852 }, { "epoch": 0.8969013658204381, "grad_norm": 6.628912925720215, "learning_rate": 5.177187059987842e-07, "loss": 0.7063, "step": 11853 }, { "epoch": 0.8969770345427718, "grad_norm": 2.5971126556396484, "learning_rate": 5.169661124579143e-07, "loss": 0.7106, "step": 11854 }, { "epoch": 0.8970527032651053, "grad_norm": 1.8385004997253418, "learning_rate": 5.162140501520612e-07, "loss": 0.5338, "step": 11855 }, { "epoch": 0.897128371987439, "grad_norm": 1.9331872463226318, "learning_rate": 5.154625191283256e-07, "loss": 0.7039, "step": 11856 }, { "epoch": 0.8972040407097727, "grad_norm": 2.373424530029297, "learning_rate": 5.147115194337685e-07, "loss": 0.7406, "step": 11857 }, { "epoch": 0.8972797094321062, "grad_norm": 2.4697651863098145, "learning_rate": 5.139610511154204e-07, "loss": 0.7292, "step": 11858 }, { "epoch": 0.8973553781544399, "grad_norm": 2.0015337467193604, "learning_rate": 5.132111142202799e-07, "loss": 0.6327, "step": 11859 }, { "epoch": 0.8974310468767734, "grad_norm": 2.549948215484619, "learning_rate": 5.124617087953082e-07, "loss": 0.7159, "step": 11860 }, { "epoch": 0.8975067155991071, "grad_norm": 1.9285160303115845, "learning_rate": 5.117128348874368e-07, "loss": 0.6433, "step": 11861 }, { "epoch": 0.8975823843214408, "grad_norm": 2.8049867153167725, "learning_rate": 5.109644925435622e-07, "loss": 0.7273, "step": 11862 }, { "epoch": 0.8976580530437743, "grad_norm": 6.72634744644165, "learning_rate": 5.10216681810546e-07, "loss": 0.7476, "step": 11863 }, { "epoch": 0.897733721766108, "grad_norm": 2.0858564376831055, "learning_rate": 5.094694027352227e-07, "loss": 0.6723, "step": 11864 }, { "epoch": 0.8978093904884415, "grad_norm": 2.3242621421813965, "learning_rate": 5.087226553643868e-07, "loss": 0.6651, "step": 11865 }, { "epoch": 0.8978850592107752, "grad_norm": 2.657841205596924, "learning_rate": 5.079764397448019e-07, "loss": 0.7593, "step": 11866 }, { "epoch": 0.8979607279331089, "grad_norm": 1.891689658164978, "learning_rate": 5.072307559231986e-07, "loss": 0.6064, "step": 11867 }, { "epoch": 0.8980363966554424, "grad_norm": 2.2270796298980713, "learning_rate": 5.064856039462747e-07, "loss": 0.712, "step": 11868 }, { "epoch": 0.8981120653777761, "grad_norm": 2.0540242195129395, "learning_rate": 5.057409838606928e-07, "loss": 0.6487, "step": 11869 }, { "epoch": 0.8981877341001098, "grad_norm": 2.1607933044433594, "learning_rate": 5.049968957130855e-07, "loss": 0.5223, "step": 11870 }, { "epoch": 0.8982634028224433, "grad_norm": 2.262275457382202, "learning_rate": 5.042533395500475e-07, "loss": 0.7798, "step": 11871 }, { "epoch": 0.898339071544777, "grad_norm": 2.3458058834075928, "learning_rate": 5.035103154181458e-07, "loss": 0.7041, "step": 11872 }, { "epoch": 0.8984147402671105, "grad_norm": 2.098083257675171, "learning_rate": 5.02767823363907e-07, "loss": 0.6414, "step": 11873 }, { "epoch": 0.8984904089894442, "grad_norm": 2.3881022930145264, "learning_rate": 5.020258634338309e-07, "loss": 0.7463, "step": 11874 }, { "epoch": 0.8985660777117779, "grad_norm": 1.8996672630310059, "learning_rate": 5.012844356743834e-07, "loss": 0.5266, "step": 11875 }, { "epoch": 0.8986417464341114, "grad_norm": 2.157325506210327, "learning_rate": 5.005435401319904e-07, "loss": 0.6429, "step": 11876 }, { "epoch": 0.8987174151564451, "grad_norm": 2.011565685272217, "learning_rate": 4.998031768530525e-07, "loss": 0.6239, "step": 11877 }, { "epoch": 0.8987930838787787, "grad_norm": 2.1313388347625732, "learning_rate": 4.99063345883932e-07, "loss": 0.6479, "step": 11878 }, { "epoch": 0.8988687526011123, "grad_norm": 2.972902536392212, "learning_rate": 4.983240472709606e-07, "loss": 0.7627, "step": 11879 }, { "epoch": 0.898944421323446, "grad_norm": 2.6932406425476074, "learning_rate": 4.975852810604343e-07, "loss": 0.8005, "step": 11880 }, { "epoch": 0.8990200900457795, "grad_norm": 2.0103864669799805, "learning_rate": 4.968470472986182e-07, "loss": 0.5966, "step": 11881 }, { "epoch": 0.8990957587681132, "grad_norm": 2.020077705383301, "learning_rate": 4.961093460317422e-07, "loss": 0.7393, "step": 11882 }, { "epoch": 0.8991714274904469, "grad_norm": 2.478301763534546, "learning_rate": 4.953721773060064e-07, "loss": 0.5821, "step": 11883 }, { "epoch": 0.8992470962127804, "grad_norm": 2.066504955291748, "learning_rate": 4.946355411675688e-07, "loss": 0.5965, "step": 11884 }, { "epoch": 0.8993227649351141, "grad_norm": 2.5086374282836914, "learning_rate": 4.938994376625646e-07, "loss": 0.7674, "step": 11885 }, { "epoch": 0.8993984336574476, "grad_norm": 2.390106201171875, "learning_rate": 4.931638668370909e-07, "loss": 0.604, "step": 11886 }, { "epoch": 0.8994741023797813, "grad_norm": 2.292848825454712, "learning_rate": 4.924288287372089e-07, "loss": 0.8054, "step": 11887 }, { "epoch": 0.899549771102115, "grad_norm": 2.5886833667755127, "learning_rate": 4.916943234089506e-07, "loss": 0.6667, "step": 11888 }, { "epoch": 0.8996254398244485, "grad_norm": 2.0663654804229736, "learning_rate": 4.909603508983124e-07, "loss": 0.7111, "step": 11889 }, { "epoch": 0.8997011085467822, "grad_norm": 2.1520442962646484, "learning_rate": 4.902269112512594e-07, "loss": 0.7455, "step": 11890 }, { "epoch": 0.8997767772691158, "grad_norm": 2.241392135620117, "learning_rate": 4.894940045137209e-07, "loss": 0.6856, "step": 11891 }, { "epoch": 0.8998524459914494, "grad_norm": 2.823662757873535, "learning_rate": 4.887616307315943e-07, "loss": 0.6895, "step": 11892 }, { "epoch": 0.8999281147137831, "grad_norm": 2.172192335128784, "learning_rate": 4.880297899507438e-07, "loss": 0.7223, "step": 11893 }, { "epoch": 0.9000037834361166, "grad_norm": 2.1436941623687744, "learning_rate": 4.872984822169967e-07, "loss": 0.5677, "step": 11894 }, { "epoch": 0.9000794521584503, "grad_norm": 2.2516303062438965, "learning_rate": 4.865677075761534e-07, "loss": 0.8164, "step": 11895 }, { "epoch": 0.900155120880784, "grad_norm": 3.5125577449798584, "learning_rate": 4.858374660739764e-07, "loss": 0.6542, "step": 11896 }, { "epoch": 0.9002307896031175, "grad_norm": 2.0232994556427, "learning_rate": 4.85107757756196e-07, "loss": 0.5626, "step": 11897 }, { "epoch": 0.9003064583254512, "grad_norm": 2.3365819454193115, "learning_rate": 4.843785826685076e-07, "loss": 0.7357, "step": 11898 }, { "epoch": 0.9003821270477848, "grad_norm": 1.9569621086120605, "learning_rate": 4.836499408565738e-07, "loss": 0.7641, "step": 11899 }, { "epoch": 0.9004577957701184, "grad_norm": 2.693556547164917, "learning_rate": 4.829218323660282e-07, "loss": 0.6307, "step": 11900 }, { "epoch": 0.9005334644924521, "grad_norm": 1.9152913093566895, "learning_rate": 4.821942572424641e-07, "loss": 0.5283, "step": 11901 }, { "epoch": 0.9006091332147856, "grad_norm": 8.86640739440918, "learning_rate": 4.81467215531445e-07, "loss": 0.7087, "step": 11902 }, { "epoch": 0.9006848019371193, "grad_norm": 2.024935722351074, "learning_rate": 4.807407072785018e-07, "loss": 0.657, "step": 11903 }, { "epoch": 0.9007604706594529, "grad_norm": 2.7039053440093994, "learning_rate": 4.800147325291298e-07, "loss": 0.6988, "step": 11904 }, { "epoch": 0.9008361393817865, "grad_norm": 2.0335659980773926, "learning_rate": 4.792892913287927e-07, "loss": 0.5938, "step": 11905 }, { "epoch": 0.9009118081041202, "grad_norm": 2.8437318801879883, "learning_rate": 4.785643837229183e-07, "loss": 0.6922, "step": 11906 }, { "epoch": 0.9009874768264537, "grad_norm": 2.2634239196777344, "learning_rate": 4.778400097569062e-07, "loss": 0.7243, "step": 11907 }, { "epoch": 0.9010631455487874, "grad_norm": 1.859001636505127, "learning_rate": 4.771161694761152e-07, "loss": 0.8049, "step": 11908 }, { "epoch": 0.9011388142711211, "grad_norm": 2.409040927886963, "learning_rate": 4.763928629258748e-07, "loss": 0.5497, "step": 11909 }, { "epoch": 0.9012144829934546, "grad_norm": 2.5353803634643555, "learning_rate": 4.75670090151483e-07, "loss": 0.7495, "step": 11910 }, { "epoch": 0.9012901517157883, "grad_norm": 4.32592248916626, "learning_rate": 4.749478511982025e-07, "loss": 0.5614, "step": 11911 }, { "epoch": 0.9013658204381219, "grad_norm": 1.791941523551941, "learning_rate": 4.7422614611126013e-07, "loss": 0.5837, "step": 11912 }, { "epoch": 0.9014414891604555, "grad_norm": 1.7201472520828247, "learning_rate": 4.7350497493585175e-07, "loss": 0.7207, "step": 11913 }, { "epoch": 0.9015171578827892, "grad_norm": 2.2074570655822754, "learning_rate": 4.7278433771714027e-07, "loss": 0.5863, "step": 11914 }, { "epoch": 0.9015928266051227, "grad_norm": 1.8226673603057861, "learning_rate": 4.720642345002535e-07, "loss": 0.5865, "step": 11915 }, { "epoch": 0.9016684953274564, "grad_norm": 2.0403738021850586, "learning_rate": 4.7134466533028643e-07, "loss": 0.694, "step": 11916 }, { "epoch": 0.90174416404979, "grad_norm": 2.33229398727417, "learning_rate": 4.70625630252303e-07, "loss": 0.7215, "step": 11917 }, { "epoch": 0.9018198327721236, "grad_norm": 2.150709629058838, "learning_rate": 4.6990712931133015e-07, "loss": 0.7984, "step": 11918 }, { "epoch": 0.9018955014944573, "grad_norm": 9.452735900878906, "learning_rate": 4.69189162552361e-07, "loss": 0.5954, "step": 11919 }, { "epoch": 0.9019711702167909, "grad_norm": 2.4279088973999023, "learning_rate": 4.6847173002035747e-07, "loss": 0.6056, "step": 11920 }, { "epoch": 0.9020468389391245, "grad_norm": 2.4450533390045166, "learning_rate": 4.677548317602517e-07, "loss": 0.6681, "step": 11921 }, { "epoch": 0.9021225076614582, "grad_norm": 3.3857312202453613, "learning_rate": 4.670384678169337e-07, "loss": 0.5885, "step": 11922 }, { "epoch": 0.9021981763837917, "grad_norm": 2.306729793548584, "learning_rate": 4.6632263823526467e-07, "loss": 0.7093, "step": 11923 }, { "epoch": 0.9022738451061254, "grad_norm": 1.9452950954437256, "learning_rate": 4.656073430600747e-07, "loss": 0.6975, "step": 11924 }, { "epoch": 0.902349513828459, "grad_norm": 2.4310667514801025, "learning_rate": 4.6489258233615596e-07, "loss": 0.6344, "step": 11925 }, { "epoch": 0.9024251825507926, "grad_norm": 2.114664077758789, "learning_rate": 4.6417835610826863e-07, "loss": 0.7136, "step": 11926 }, { "epoch": 0.9025008512731263, "grad_norm": 2.3902628421783447, "learning_rate": 4.63464664421142e-07, "loss": 0.5602, "step": 11927 }, { "epoch": 0.9025765199954598, "grad_norm": 2.4101219177246094, "learning_rate": 4.6275150731946827e-07, "loss": 0.576, "step": 11928 }, { "epoch": 0.9026521887177935, "grad_norm": 5.150691509246826, "learning_rate": 4.620388848479087e-07, "loss": 0.7431, "step": 11929 }, { "epoch": 0.9027278574401271, "grad_norm": 2.332685708999634, "learning_rate": 4.613267970510876e-07, "loss": 0.67, "step": 11930 }, { "epoch": 0.9028035261624607, "grad_norm": 2.803213119506836, "learning_rate": 4.606152439736003e-07, "loss": 0.6053, "step": 11931 }, { "epoch": 0.9028791948847944, "grad_norm": 2.4882402420043945, "learning_rate": 4.5990422566000824e-07, "loss": 0.6151, "step": 11932 }, { "epoch": 0.902954863607128, "grad_norm": 2.111898422241211, "learning_rate": 4.591937421548337e-07, "loss": 0.7308, "step": 11933 }, { "epoch": 0.9030305323294616, "grad_norm": 1.9006226062774658, "learning_rate": 4.584837935025721e-07, "loss": 0.667, "step": 11934 }, { "epoch": 0.9031062010517953, "grad_norm": 2.327817678451538, "learning_rate": 4.5777437974768186e-07, "loss": 0.6714, "step": 11935 }, { "epoch": 0.9031818697741288, "grad_norm": 2.1458778381347656, "learning_rate": 4.5706550093458856e-07, "loss": 0.744, "step": 11936 }, { "epoch": 0.9032575384964625, "grad_norm": 1.835634708404541, "learning_rate": 4.5635715710768457e-07, "loss": 0.6296, "step": 11937 }, { "epoch": 0.9033332072187961, "grad_norm": 2.9382152557373047, "learning_rate": 4.5564934831132844e-07, "loss": 0.6187, "step": 11938 }, { "epoch": 0.9034088759411297, "grad_norm": 2.5697879791259766, "learning_rate": 4.5494207458984773e-07, "loss": 0.698, "step": 11939 }, { "epoch": 0.9034845446634634, "grad_norm": 2.2090299129486084, "learning_rate": 4.5423533598752997e-07, "loss": 0.6213, "step": 11940 }, { "epoch": 0.903560213385797, "grad_norm": 3.206984043121338, "learning_rate": 4.5352913254863683e-07, "loss": 0.5154, "step": 11941 }, { "epoch": 0.9036358821081306, "grad_norm": 3.334904670715332, "learning_rate": 4.5282346431739285e-07, "loss": 0.5593, "step": 11942 }, { "epoch": 0.9037115508304643, "grad_norm": 2.202554225921631, "learning_rate": 4.5211833133798873e-07, "loss": 0.6092, "step": 11943 }, { "epoch": 0.9037872195527978, "grad_norm": 2.342474937438965, "learning_rate": 4.5141373365458116e-07, "loss": 0.658, "step": 11944 }, { "epoch": 0.9038628882751315, "grad_norm": 2.030301094055176, "learning_rate": 4.5070967131129383e-07, "loss": 0.4765, "step": 11945 }, { "epoch": 0.9039385569974651, "grad_norm": 1.962053656578064, "learning_rate": 4.500061443522214e-07, "loss": 0.5477, "step": 11946 }, { "epoch": 0.9040142257197987, "grad_norm": 2.112226724624634, "learning_rate": 4.4930315282141574e-07, "loss": 0.4979, "step": 11947 }, { "epoch": 0.9040898944421324, "grad_norm": 2.1766979694366455, "learning_rate": 4.486006967629046e-07, "loss": 0.7071, "step": 11948 }, { "epoch": 0.904165563164466, "grad_norm": 2.6591641902923584, "learning_rate": 4.478987762206748e-07, "loss": 0.6703, "step": 11949 }, { "epoch": 0.9042412318867996, "grad_norm": 2.473120927810669, "learning_rate": 4.471973912386861e-07, "loss": 0.5526, "step": 11950 }, { "epoch": 0.9043169006091332, "grad_norm": 2.2571189403533936, "learning_rate": 4.464965418608584e-07, "loss": 0.6613, "step": 11951 }, { "epoch": 0.9043925693314668, "grad_norm": 2.007202386856079, "learning_rate": 4.4579622813108365e-07, "loss": 0.4949, "step": 11952 }, { "epoch": 0.9044682380538005, "grad_norm": 2.1282033920288086, "learning_rate": 4.4509645009321774e-07, "loss": 0.6742, "step": 11953 }, { "epoch": 0.904543906776134, "grad_norm": 2.2609729766845703, "learning_rate": 4.443972077910806e-07, "loss": 0.6759, "step": 11954 }, { "epoch": 0.9046195754984677, "grad_norm": 2.033329486846924, "learning_rate": 4.436985012684612e-07, "loss": 0.6447, "step": 11955 }, { "epoch": 0.9046952442208014, "grad_norm": 2.3120713233947754, "learning_rate": 4.430003305691176e-07, "loss": 0.741, "step": 11956 }, { "epoch": 0.9047709129431349, "grad_norm": 2.225062847137451, "learning_rate": 4.423026957367707e-07, "loss": 0.5616, "step": 11957 }, { "epoch": 0.9048465816654686, "grad_norm": 1.9412503242492676, "learning_rate": 4.416055968151077e-07, "loss": 0.6355, "step": 11958 }, { "epoch": 0.9049222503878022, "grad_norm": 1.9755481481552124, "learning_rate": 4.409090338477826e-07, "loss": 0.6126, "step": 11959 }, { "epoch": 0.9049979191101358, "grad_norm": 2.4795279502868652, "learning_rate": 4.4021300687841747e-07, "loss": 0.6847, "step": 11960 }, { "epoch": 0.9050735878324695, "grad_norm": 2.1052944660186768, "learning_rate": 4.395175159505995e-07, "loss": 0.6671, "step": 11961 }, { "epoch": 0.905149256554803, "grad_norm": 3.0458271503448486, "learning_rate": 4.3882256110788286e-07, "loss": 0.6731, "step": 11962 }, { "epoch": 0.9052249252771367, "grad_norm": 3.665377378463745, "learning_rate": 4.3812814239378774e-07, "loss": 0.7169, "step": 11963 }, { "epoch": 0.9053005939994703, "grad_norm": 1.8927977085113525, "learning_rate": 4.374342598518013e-07, "loss": 0.6032, "step": 11964 }, { "epoch": 0.9053762627218039, "grad_norm": 1.916894555091858, "learning_rate": 4.367409135253758e-07, "loss": 0.6076, "step": 11965 }, { "epoch": 0.9054519314441376, "grad_norm": 1.8118383884429932, "learning_rate": 4.3604810345792956e-07, "loss": 0.7221, "step": 11966 }, { "epoch": 0.9055276001664712, "grad_norm": 1.7049998044967651, "learning_rate": 4.353558296928528e-07, "loss": 0.6027, "step": 11967 }, { "epoch": 0.9056032688888048, "grad_norm": 1.889378547668457, "learning_rate": 4.346640922734949e-07, "loss": 0.6484, "step": 11968 }, { "epoch": 0.9056789376111385, "grad_norm": 2.0997726917266846, "learning_rate": 4.339728912431742e-07, "loss": 0.6673, "step": 11969 }, { "epoch": 0.905754606333472, "grad_norm": 3.0155575275421143, "learning_rate": 4.33282226645177e-07, "loss": 0.7523, "step": 11970 }, { "epoch": 0.9058302750558057, "grad_norm": 2.474846839904785, "learning_rate": 4.3259209852275583e-07, "loss": 0.6474, "step": 11971 }, { "epoch": 0.9059059437781393, "grad_norm": 2.1822690963745117, "learning_rate": 4.31902506919127e-07, "loss": 0.6469, "step": 11972 }, { "epoch": 0.9059816125004729, "grad_norm": 2.6391453742980957, "learning_rate": 4.312134518774761e-07, "loss": 0.7557, "step": 11973 }, { "epoch": 0.9060572812228066, "grad_norm": 1.6580966711044312, "learning_rate": 4.3052493344095346e-07, "loss": 0.8282, "step": 11974 }, { "epoch": 0.9061329499451402, "grad_norm": 2.3595659732818604, "learning_rate": 4.298369516526777e-07, "loss": 0.6902, "step": 11975 }, { "epoch": 0.9062086186674738, "grad_norm": 2.4112627506256104, "learning_rate": 4.2914950655572827e-07, "loss": 0.66, "step": 11976 }, { "epoch": 0.9062842873898074, "grad_norm": 2.256720781326294, "learning_rate": 4.284625981931608e-07, "loss": 0.7697, "step": 11977 }, { "epoch": 0.906359956112141, "grad_norm": 2.7008187770843506, "learning_rate": 4.277762266079899e-07, "loss": 0.5343, "step": 11978 }, { "epoch": 0.9064356248344747, "grad_norm": 2.01400089263916, "learning_rate": 4.270903918431961e-07, "loss": 0.5767, "step": 11979 }, { "epoch": 0.9065112935568083, "grad_norm": 2.3042778968811035, "learning_rate": 4.264050939417301e-07, "loss": 0.789, "step": 11980 }, { "epoch": 0.9065869622791419, "grad_norm": 2.1162967681884766, "learning_rate": 4.2572033294650756e-07, "loss": 0.6247, "step": 11981 }, { "epoch": 0.9066626310014756, "grad_norm": 1.9995644092559814, "learning_rate": 4.2503610890041023e-07, "loss": 0.6561, "step": 11982 }, { "epoch": 0.9067382997238091, "grad_norm": 2.2234203815460205, "learning_rate": 4.2435242184628677e-07, "loss": 0.719, "step": 11983 }, { "epoch": 0.9068139684461428, "grad_norm": 2.4800000190734863, "learning_rate": 4.236692718269519e-07, "loss": 0.635, "step": 11984 }, { "epoch": 0.9068896371684764, "grad_norm": 2.198791742324829, "learning_rate": 4.229866588851855e-07, "loss": 0.5428, "step": 11985 }, { "epoch": 0.90696530589081, "grad_norm": 2.6701927185058594, "learning_rate": 4.2230458306373634e-07, "loss": 0.6647, "step": 11986 }, { "epoch": 0.9070409746131437, "grad_norm": 2.1392929553985596, "learning_rate": 4.216230444053182e-07, "loss": 0.6216, "step": 11987 }, { "epoch": 0.9071166433354773, "grad_norm": 2.303083658218384, "learning_rate": 4.2094204295261095e-07, "loss": 0.7002, "step": 11988 }, { "epoch": 0.9071923120578109, "grad_norm": 2.640005111694336, "learning_rate": 4.2026157874826254e-07, "loss": 0.734, "step": 11989 }, { "epoch": 0.9072679807801445, "grad_norm": 2.1459431648254395, "learning_rate": 4.1958165183488185e-07, "loss": 0.7214, "step": 11990 }, { "epoch": 0.9073436495024781, "grad_norm": 2.6311416625976562, "learning_rate": 4.189022622550508e-07, "loss": 0.6757, "step": 11991 }, { "epoch": 0.9074193182248118, "grad_norm": 2.1436285972595215, "learning_rate": 4.1822341005131636e-07, "loss": 0.7854, "step": 11992 }, { "epoch": 0.9074949869471454, "grad_norm": 2.0433895587921143, "learning_rate": 4.1754509526618754e-07, "loss": 0.6659, "step": 11993 }, { "epoch": 0.907570655669479, "grad_norm": 1.8191251754760742, "learning_rate": 4.1686731794214337e-07, "loss": 0.6189, "step": 11994 }, { "epoch": 0.9076463243918127, "grad_norm": 2.218461751937866, "learning_rate": 4.161900781216299e-07, "loss": 0.5692, "step": 11995 }, { "epoch": 0.9077219931141463, "grad_norm": 2.302450656890869, "learning_rate": 4.1551337584705815e-07, "loss": 0.5076, "step": 11996 }, { "epoch": 0.9077976618364799, "grad_norm": 2.4182517528533936, "learning_rate": 4.148372111608023e-07, "loss": 0.6981, "step": 11997 }, { "epoch": 0.9078733305588135, "grad_norm": 3.4762744903564453, "learning_rate": 4.1416158410520845e-07, "loss": 0.5797, "step": 11998 }, { "epoch": 0.9079489992811471, "grad_norm": 2.167374610900879, "learning_rate": 4.1348649472258673e-07, "loss": 0.6399, "step": 11999 }, { "epoch": 0.9080246680034808, "grad_norm": 2.449777841567993, "learning_rate": 4.128119430552133e-07, "loss": 0.6094, "step": 12000 }, { "epoch": 0.9081003367258144, "grad_norm": 2.1977956295013428, "learning_rate": 4.1213792914533046e-07, "loss": 0.6119, "step": 12001 }, { "epoch": 0.908176005448148, "grad_norm": 1.5713450908660889, "learning_rate": 4.1146445303514537e-07, "loss": 0.8275, "step": 12002 }, { "epoch": 0.9082516741704816, "grad_norm": 2.628474235534668, "learning_rate": 4.107915147668363e-07, "loss": 0.8309, "step": 12003 }, { "epoch": 0.9083273428928152, "grad_norm": 2.199763774871826, "learning_rate": 4.1011911438254357e-07, "loss": 0.6514, "step": 12004 }, { "epoch": 0.9084030116151489, "grad_norm": 5.241161823272705, "learning_rate": 4.094472519243745e-07, "loss": 0.7267, "step": 12005 }, { "epoch": 0.9084786803374825, "grad_norm": 2.1290805339813232, "learning_rate": 4.087759274344034e-07, "loss": 0.6428, "step": 12006 }, { "epoch": 0.9085543490598161, "grad_norm": 1.9543763399124146, "learning_rate": 4.0810514095467164e-07, "loss": 0.6201, "step": 12007 }, { "epoch": 0.9086300177821498, "grad_norm": 2.172839879989624, "learning_rate": 4.074348925271847e-07, "loss": 0.739, "step": 12008 }, { "epoch": 0.9087056865044834, "grad_norm": 2.1397907733917236, "learning_rate": 4.067651821939169e-07, "loss": 0.6348, "step": 12009 }, { "epoch": 0.908781355226817, "grad_norm": 2.9188485145568848, "learning_rate": 4.0609600999680875e-07, "loss": 0.687, "step": 12010 }, { "epoch": 0.9088570239491506, "grad_norm": 2.0263235569000244, "learning_rate": 4.054273759777627e-07, "loss": 0.677, "step": 12011 }, { "epoch": 0.9089326926714842, "grad_norm": 2.360248327255249, "learning_rate": 4.047592801786523e-07, "loss": 0.6305, "step": 12012 }, { "epoch": 0.9090083613938179, "grad_norm": 2.5668838024139404, "learning_rate": 4.04091722641317e-07, "loss": 0.7643, "step": 12013 }, { "epoch": 0.9090840301161515, "grad_norm": 2.728790521621704, "learning_rate": 4.0342470340756145e-07, "loss": 0.5854, "step": 12014 }, { "epoch": 0.9091596988384851, "grad_norm": 2.55326771736145, "learning_rate": 4.0275822251915517e-07, "loss": 0.7622, "step": 12015 }, { "epoch": 0.9092353675608187, "grad_norm": 2.370495080947876, "learning_rate": 4.0209228001783484e-07, "loss": 0.7064, "step": 12016 }, { "epoch": 0.9093110362831524, "grad_norm": 1.9629454612731934, "learning_rate": 4.0142687594530604e-07, "loss": 0.686, "step": 12017 }, { "epoch": 0.909386705005486, "grad_norm": 2.457399368286133, "learning_rate": 4.0076201034323647e-07, "loss": 0.5467, "step": 12018 }, { "epoch": 0.9094623737278196, "grad_norm": 1.8854044675827026, "learning_rate": 4.000976832532638e-07, "loss": 0.6292, "step": 12019 }, { "epoch": 0.9095380424501532, "grad_norm": 2.935149908065796, "learning_rate": 3.994338947169888e-07, "loss": 0.5731, "step": 12020 }, { "epoch": 0.9096137111724869, "grad_norm": 2.078005790710449, "learning_rate": 3.987706447759831e-07, "loss": 0.5449, "step": 12021 }, { "epoch": 0.9096893798948205, "grad_norm": 2.3061161041259766, "learning_rate": 3.9810793347177663e-07, "loss": 0.5563, "step": 12022 }, { "epoch": 0.9097650486171541, "grad_norm": 2.2766451835632324, "learning_rate": 3.9744576084587413e-07, "loss": 0.6867, "step": 12023 }, { "epoch": 0.9098407173394877, "grad_norm": 2.109184503555298, "learning_rate": 3.967841269397434e-07, "loss": 0.5578, "step": 12024 }, { "epoch": 0.9099163860618213, "grad_norm": 2.568835735321045, "learning_rate": 3.9612303179481634e-07, "loss": 0.6472, "step": 12025 }, { "epoch": 0.909992054784155, "grad_norm": 2.184821128845215, "learning_rate": 3.9546247545249284e-07, "loss": 0.753, "step": 12026 }, { "epoch": 0.9100677235064886, "grad_norm": 1.9900933504104614, "learning_rate": 3.948024579541377e-07, "loss": 0.5763, "step": 12027 }, { "epoch": 0.9101433922288222, "grad_norm": 2.366878032684326, "learning_rate": 3.94142979341089e-07, "loss": 0.6965, "step": 12028 }, { "epoch": 0.9102190609511558, "grad_norm": 2.603574275970459, "learning_rate": 3.934840396546396e-07, "loss": 0.7252, "step": 12029 }, { "epoch": 0.9102947296734895, "grad_norm": 2.070781707763672, "learning_rate": 3.928256389360566e-07, "loss": 0.5665, "step": 12030 }, { "epoch": 0.9103703983958231, "grad_norm": 1.9723803997039795, "learning_rate": 3.921677772265709e-07, "loss": 0.7802, "step": 12031 }, { "epoch": 0.9104460671181567, "grad_norm": 2.6558046340942383, "learning_rate": 3.915104545673807e-07, "loss": 0.6873, "step": 12032 }, { "epoch": 0.9105217358404903, "grad_norm": 2.883211374282837, "learning_rate": 3.9085367099964786e-07, "loss": 0.7914, "step": 12033 }, { "epoch": 0.910597404562824, "grad_norm": 2.342439651489258, "learning_rate": 3.9019742656450465e-07, "loss": 0.6175, "step": 12034 }, { "epoch": 0.9106730732851576, "grad_norm": 2.299743413925171, "learning_rate": 3.895417213030471e-07, "loss": 0.7706, "step": 12035 }, { "epoch": 0.9107487420074912, "grad_norm": 2.087432861328125, "learning_rate": 3.8888655525633544e-07, "loss": 0.6474, "step": 12036 }, { "epoch": 0.9108244107298248, "grad_norm": 1.8561009168624878, "learning_rate": 3.882319284653988e-07, "loss": 0.7941, "step": 12037 }, { "epoch": 0.9109000794521585, "grad_norm": 1.991654634475708, "learning_rate": 3.8757784097123236e-07, "loss": 0.6651, "step": 12038 }, { "epoch": 0.9109757481744921, "grad_norm": 2.475132942199707, "learning_rate": 3.8692429281479845e-07, "loss": 0.6396, "step": 12039 }, { "epoch": 0.9110514168968257, "grad_norm": 1.9189382791519165, "learning_rate": 3.8627128403702326e-07, "loss": 0.7264, "step": 12040 }, { "epoch": 0.9111270856191593, "grad_norm": 2.612868547439575, "learning_rate": 3.856188146788001e-07, "loss": 0.6025, "step": 12041 }, { "epoch": 0.9112027543414929, "grad_norm": 2.8287506103515625, "learning_rate": 3.849668847809903e-07, "loss": 0.5975, "step": 12042 }, { "epoch": 0.9112784230638266, "grad_norm": 2.0225670337677, "learning_rate": 3.8431549438441616e-07, "loss": 0.6964, "step": 12043 }, { "epoch": 0.9113540917861602, "grad_norm": 2.5549564361572266, "learning_rate": 3.8366464352987405e-07, "loss": 0.5845, "step": 12044 }, { "epoch": 0.9114297605084938, "grad_norm": 2.6276886463165283, "learning_rate": 3.8301433225811945e-07, "loss": 0.8134, "step": 12045 }, { "epoch": 0.9115054292308274, "grad_norm": 2.4634835720062256, "learning_rate": 3.8236456060987967e-07, "loss": 0.6734, "step": 12046 }, { "epoch": 0.9115810979531611, "grad_norm": 2.0669867992401123, "learning_rate": 3.8171532862584326e-07, "loss": 0.6143, "step": 12047 }, { "epoch": 0.9116567666754947, "grad_norm": 2.2459287643432617, "learning_rate": 3.810666363466666e-07, "loss": 0.6493, "step": 12048 }, { "epoch": 0.9117324353978283, "grad_norm": 1.9273678064346313, "learning_rate": 3.8041848381297626e-07, "loss": 0.6154, "step": 12049 }, { "epoch": 0.9118081041201619, "grad_norm": 2.385669231414795, "learning_rate": 3.797708710653588e-07, "loss": 0.6914, "step": 12050 }, { "epoch": 0.9118837728424956, "grad_norm": 2.3687374591827393, "learning_rate": 3.791237981443697e-07, "loss": 0.8302, "step": 12051 }, { "epoch": 0.9119594415648292, "grad_norm": 2.349445343017578, "learning_rate": 3.784772650905326e-07, "loss": 0.6558, "step": 12052 }, { "epoch": 0.9120351102871628, "grad_norm": 2.4349260330200195, "learning_rate": 3.778312719443341e-07, "loss": 0.7113, "step": 12053 }, { "epoch": 0.9121107790094964, "grad_norm": 2.196366786956787, "learning_rate": 3.771858187462288e-07, "loss": 0.6412, "step": 12054 }, { "epoch": 0.91218644773183, "grad_norm": 2.005478620529175, "learning_rate": 3.7654090553663747e-07, "loss": 0.5158, "step": 12055 }, { "epoch": 0.9122621164541637, "grad_norm": 2.5596768856048584, "learning_rate": 3.758965323559467e-07, "loss": 0.7028, "step": 12056 }, { "epoch": 0.9123377851764973, "grad_norm": 2.0070645809173584, "learning_rate": 3.752526992445082e-07, "loss": 0.5788, "step": 12057 }, { "epoch": 0.9124134538988309, "grad_norm": 2.1916732788085938, "learning_rate": 3.7460940624263985e-07, "loss": 0.7334, "step": 12058 }, { "epoch": 0.9124891226211646, "grad_norm": 2.4893784523010254, "learning_rate": 3.739666533906303e-07, "loss": 0.7867, "step": 12059 }, { "epoch": 0.9125647913434982, "grad_norm": 2.1541330814361572, "learning_rate": 3.733244407287294e-07, "loss": 0.6831, "step": 12060 }, { "epoch": 0.9126404600658318, "grad_norm": 2.3403072357177734, "learning_rate": 3.72682768297153e-07, "loss": 0.7134, "step": 12061 }, { "epoch": 0.9127161287881654, "grad_norm": 2.0377376079559326, "learning_rate": 3.720416361360859e-07, "loss": 0.6679, "step": 12062 }, { "epoch": 0.912791797510499, "grad_norm": 2.7400593757629395, "learning_rate": 3.71401044285678e-07, "loss": 0.7331, "step": 12063 }, { "epoch": 0.9128674662328327, "grad_norm": 1.9614194631576538, "learning_rate": 3.7076099278604527e-07, "loss": 0.57, "step": 12064 }, { "epoch": 0.9129431349551663, "grad_norm": 1.974969506263733, "learning_rate": 3.7012148167726855e-07, "loss": 0.4305, "step": 12065 }, { "epoch": 0.9130188036774999, "grad_norm": 2.233001470565796, "learning_rate": 3.694825109993979e-07, "loss": 0.74, "step": 12066 }, { "epoch": 0.9130944723998335, "grad_norm": 2.316767454147339, "learning_rate": 3.688440807924472e-07, "loss": 0.8145, "step": 12067 }, { "epoch": 0.9131701411221671, "grad_norm": 1.535016417503357, "learning_rate": 3.682061910963956e-07, "loss": 0.7931, "step": 12068 }, { "epoch": 0.9132458098445008, "grad_norm": 1.9249364137649536, "learning_rate": 3.6756884195119114e-07, "loss": 0.6361, "step": 12069 }, { "epoch": 0.9133214785668344, "grad_norm": 2.287426471710205, "learning_rate": 3.669320333967477e-07, "loss": 0.6633, "step": 12070 }, { "epoch": 0.913397147289168, "grad_norm": 2.515241861343384, "learning_rate": 3.662957654729416e-07, "loss": 0.7616, "step": 12071 }, { "epoch": 0.9134728160115017, "grad_norm": 2.161454677581787, "learning_rate": 3.656600382196199e-07, "loss": 0.7475, "step": 12072 }, { "epoch": 0.9135484847338353, "grad_norm": 1.951395869255066, "learning_rate": 3.650248516765937e-07, "loss": 0.77, "step": 12073 }, { "epoch": 0.9136241534561689, "grad_norm": 2.1375932693481445, "learning_rate": 3.6439020588364023e-07, "loss": 0.5793, "step": 12074 }, { "epoch": 0.9136998221785025, "grad_norm": 2.560678720474243, "learning_rate": 3.637561008805027e-07, "loss": 0.6161, "step": 12075 }, { "epoch": 0.9137754909008361, "grad_norm": 2.45676851272583, "learning_rate": 3.631225367068913e-07, "loss": 0.7046, "step": 12076 }, { "epoch": 0.9138511596231698, "grad_norm": 2.3351166248321533, "learning_rate": 3.6248951340248136e-07, "loss": 0.6914, "step": 12077 }, { "epoch": 0.9139268283455034, "grad_norm": 2.179896354675293, "learning_rate": 3.6185703100691615e-07, "loss": 0.6828, "step": 12078 }, { "epoch": 0.914002497067837, "grad_norm": 2.130155563354492, "learning_rate": 3.6122508955980094e-07, "loss": 0.6875, "step": 12079 }, { "epoch": 0.9140781657901706, "grad_norm": 2.144454002380371, "learning_rate": 3.6059368910071313e-07, "loss": 0.702, "step": 12080 }, { "epoch": 0.9141538345125042, "grad_norm": 1.5945395231246948, "learning_rate": 3.5996282966919303e-07, "loss": 0.7236, "step": 12081 }, { "epoch": 0.9142295032348379, "grad_norm": 1.7243108749389648, "learning_rate": 3.593325113047441e-07, "loss": 0.5943, "step": 12082 }, { "epoch": 0.9143051719571715, "grad_norm": 2.1047885417938232, "learning_rate": 3.5870273404684073e-07, "loss": 0.7484, "step": 12083 }, { "epoch": 0.9143808406795051, "grad_norm": 2.1068966388702393, "learning_rate": 3.580734979349214e-07, "loss": 0.7103, "step": 12084 }, { "epoch": 0.9144565094018388, "grad_norm": 3.1981070041656494, "learning_rate": 3.5744480300839156e-07, "loss": 0.6087, "step": 12085 }, { "epoch": 0.9145321781241724, "grad_norm": 3.288362741470337, "learning_rate": 3.5681664930662075e-07, "loss": 0.5641, "step": 12086 }, { "epoch": 0.914607846846506, "grad_norm": 2.8169796466827393, "learning_rate": 3.5618903686894745e-07, "loss": 0.6044, "step": 12087 }, { "epoch": 0.9146835155688396, "grad_norm": 2.2789206504821777, "learning_rate": 3.5556196573467426e-07, "loss": 0.6518, "step": 12088 }, { "epoch": 0.9147591842911732, "grad_norm": 2.204268455505371, "learning_rate": 3.5493543594306974e-07, "loss": 0.6927, "step": 12089 }, { "epoch": 0.9148348530135069, "grad_norm": 2.291841983795166, "learning_rate": 3.5430944753336956e-07, "loss": 0.7375, "step": 12090 }, { "epoch": 0.9149105217358405, "grad_norm": 2.3435380458831787, "learning_rate": 3.5368400054477637e-07, "loss": 0.6929, "step": 12091 }, { "epoch": 0.9149861904581741, "grad_norm": 2.182016134262085, "learning_rate": 3.530590950164567e-07, "loss": 0.7357, "step": 12092 }, { "epoch": 0.9150618591805078, "grad_norm": 2.184502363204956, "learning_rate": 3.524347309875434e-07, "loss": 0.7398, "step": 12093 }, { "epoch": 0.9151375279028413, "grad_norm": 6.090785026550293, "learning_rate": 3.5181090849713617e-07, "loss": 0.6725, "step": 12094 }, { "epoch": 0.915213196625175, "grad_norm": 2.206554412841797, "learning_rate": 3.511876275843037e-07, "loss": 0.6784, "step": 12095 }, { "epoch": 0.9152888653475086, "grad_norm": 2.320699453353882, "learning_rate": 3.5056488828807377e-07, "loss": 0.6322, "step": 12096 }, { "epoch": 0.9153645340698422, "grad_norm": 2.412203311920166, "learning_rate": 3.4994269064744624e-07, "loss": 0.6949, "step": 12097 }, { "epoch": 0.9154402027921759, "grad_norm": 2.6732378005981445, "learning_rate": 3.493210347013859e-07, "loss": 0.7427, "step": 12098 }, { "epoch": 0.9155158715145095, "grad_norm": 1.9151742458343506, "learning_rate": 3.486999204888216e-07, "loss": 0.6351, "step": 12099 }, { "epoch": 0.9155915402368431, "grad_norm": 1.943975567817688, "learning_rate": 3.480793480486493e-07, "loss": 0.614, "step": 12100 }, { "epoch": 0.9156672089591767, "grad_norm": 2.587817907333374, "learning_rate": 3.474593174197328e-07, "loss": 0.6968, "step": 12101 }, { "epoch": 0.9157428776815103, "grad_norm": 3.2089931964874268, "learning_rate": 3.4683982864090013e-07, "loss": 0.5855, "step": 12102 }, { "epoch": 0.915818546403844, "grad_norm": 1.9807599782943726, "learning_rate": 3.462208817509452e-07, "loss": 0.5766, "step": 12103 }, { "epoch": 0.9158942151261776, "grad_norm": 2.119952917098999, "learning_rate": 3.456024767886261e-07, "loss": 0.6788, "step": 12104 }, { "epoch": 0.9159698838485112, "grad_norm": 2.423600196838379, "learning_rate": 3.4498461379267277e-07, "loss": 0.631, "step": 12105 }, { "epoch": 0.9160455525708449, "grad_norm": 1.9820013046264648, "learning_rate": 3.4436729280177823e-07, "loss": 0.8097, "step": 12106 }, { "epoch": 0.9161212212931784, "grad_norm": 1.9202264547348022, "learning_rate": 3.4375051385459864e-07, "loss": 0.5799, "step": 12107 }, { "epoch": 0.9161968900155121, "grad_norm": 2.3052821159362793, "learning_rate": 3.431342769897591e-07, "loss": 0.7043, "step": 12108 }, { "epoch": 0.9162725587378457, "grad_norm": 2.5394515991210938, "learning_rate": 3.4251858224585064e-07, "loss": 0.6048, "step": 12109 }, { "epoch": 0.9163482274601793, "grad_norm": 2.8350353240966797, "learning_rate": 3.419034296614305e-07, "loss": 0.5312, "step": 12110 }, { "epoch": 0.916423896182513, "grad_norm": 2.1664719581604004, "learning_rate": 3.4128881927502086e-07, "loss": 0.598, "step": 12111 }, { "epoch": 0.9164995649048466, "grad_norm": 2.737666606903076, "learning_rate": 3.406747511251119e-07, "loss": 0.7566, "step": 12112 }, { "epoch": 0.9165752336271802, "grad_norm": 2.4136788845062256, "learning_rate": 3.4006122525015793e-07, "loss": 0.745, "step": 12113 }, { "epoch": 0.9166509023495139, "grad_norm": 2.2882440090179443, "learning_rate": 3.3944824168857914e-07, "loss": 0.7246, "step": 12114 }, { "epoch": 0.9167265710718474, "grad_norm": 3.440190553665161, "learning_rate": 3.3883580047876186e-07, "loss": 0.6751, "step": 12115 }, { "epoch": 0.9168022397941811, "grad_norm": 2.7280311584472656, "learning_rate": 3.3822390165906134e-07, "loss": 0.7142, "step": 12116 }, { "epoch": 0.9168779085165147, "grad_norm": 2.077730417251587, "learning_rate": 3.376125452677971e-07, "loss": 0.6187, "step": 12117 }, { "epoch": 0.9169535772388483, "grad_norm": 2.064181327819824, "learning_rate": 3.370017313432513e-07, "loss": 0.6946, "step": 12118 }, { "epoch": 0.917029245961182, "grad_norm": 2.3035082817077637, "learning_rate": 3.3639145992367647e-07, "loss": 0.6829, "step": 12119 }, { "epoch": 0.9171049146835155, "grad_norm": 1.866401195526123, "learning_rate": 3.3578173104729005e-07, "loss": 0.6442, "step": 12120 }, { "epoch": 0.9171805834058492, "grad_norm": 2.719041585922241, "learning_rate": 3.3517254475227544e-07, "loss": 0.7242, "step": 12121 }, { "epoch": 0.9172562521281828, "grad_norm": 2.2494893074035645, "learning_rate": 3.345639010767811e-07, "loss": 0.701, "step": 12122 }, { "epoch": 0.9173319208505164, "grad_norm": 2.3660080432891846, "learning_rate": 3.3395580005892365e-07, "loss": 0.7243, "step": 12123 }, { "epoch": 0.9174075895728501, "grad_norm": 2.231206178665161, "learning_rate": 3.333482417367836e-07, "loss": 0.6173, "step": 12124 }, { "epoch": 0.9174832582951837, "grad_norm": 2.3385729789733887, "learning_rate": 3.327412261484064e-07, "loss": 0.7117, "step": 12125 }, { "epoch": 0.9175589270175173, "grad_norm": 2.6792593002319336, "learning_rate": 3.3213475333180777e-07, "loss": 0.7404, "step": 12126 }, { "epoch": 0.917634595739851, "grad_norm": 2.788846492767334, "learning_rate": 3.315288233249663e-07, "loss": 0.7583, "step": 12127 }, { "epoch": 0.9177102644621845, "grad_norm": 1.883687973022461, "learning_rate": 3.3092343616582753e-07, "loss": 0.7129, "step": 12128 }, { "epoch": 0.9177859331845182, "grad_norm": 2.291748285293579, "learning_rate": 3.303185918923013e-07, "loss": 0.6957, "step": 12129 }, { "epoch": 0.9178616019068518, "grad_norm": 1.4826828241348267, "learning_rate": 3.297142905422652e-07, "loss": 0.6491, "step": 12130 }, { "epoch": 0.9179372706291854, "grad_norm": 2.145691394805908, "learning_rate": 3.29110532153566e-07, "loss": 0.7542, "step": 12131 }, { "epoch": 0.9180129393515191, "grad_norm": 2.223241090774536, "learning_rate": 3.2850731676400945e-07, "loss": 0.615, "step": 12132 }, { "epoch": 0.9180886080738526, "grad_norm": 4.687051773071289, "learning_rate": 3.2790464441137037e-07, "loss": 0.7339, "step": 12133 }, { "epoch": 0.9181642767961863, "grad_norm": 2.535534620285034, "learning_rate": 3.273025151333925e-07, "loss": 0.7676, "step": 12134 }, { "epoch": 0.91823994551852, "grad_norm": 2.9152448177337646, "learning_rate": 3.267009289677817e-07, "loss": 0.6491, "step": 12135 }, { "epoch": 0.9183156142408535, "grad_norm": 2.461850881576538, "learning_rate": 3.2609988595221183e-07, "loss": 0.612, "step": 12136 }, { "epoch": 0.9183912829631872, "grad_norm": 2.5751256942749023, "learning_rate": 3.254993861243218e-07, "loss": 0.4901, "step": 12137 }, { "epoch": 0.9184669516855208, "grad_norm": 2.4287333488464355, "learning_rate": 3.248994295217176e-07, "loss": 0.6079, "step": 12138 }, { "epoch": 0.9185426204078544, "grad_norm": 2.0941126346588135, "learning_rate": 3.24300016181969e-07, "loss": 0.7313, "step": 12139 }, { "epoch": 0.9186182891301881, "grad_norm": 2.2236790657043457, "learning_rate": 3.2370114614261313e-07, "loss": 0.9, "step": 12140 }, { "epoch": 0.9186939578525216, "grad_norm": 2.147141933441162, "learning_rate": 3.231028194411569e-07, "loss": 0.6299, "step": 12141 }, { "epoch": 0.9187696265748553, "grad_norm": 2.7429134845733643, "learning_rate": 3.2250503611506444e-07, "loss": 0.8518, "step": 12142 }, { "epoch": 0.918845295297189, "grad_norm": 2.0502939224243164, "learning_rate": 3.2190779620177267e-07, "loss": 0.5715, "step": 12143 }, { "epoch": 0.9189209640195225, "grad_norm": 2.8797767162323, "learning_rate": 3.213110997386838e-07, "loss": 0.6093, "step": 12144 }, { "epoch": 0.9189966327418562, "grad_norm": 2.793109655380249, "learning_rate": 3.2071494676316484e-07, "loss": 0.7734, "step": 12145 }, { "epoch": 0.9190723014641897, "grad_norm": 2.135164976119995, "learning_rate": 3.2011933731254697e-07, "loss": 0.6241, "step": 12146 }, { "epoch": 0.9191479701865234, "grad_norm": 1.7029752731323242, "learning_rate": 3.1952427142413033e-07, "loss": 0.6607, "step": 12147 }, { "epoch": 0.919223638908857, "grad_norm": 6.995512008666992, "learning_rate": 3.1892974913518016e-07, "loss": 0.7122, "step": 12148 }, { "epoch": 0.9192993076311906, "grad_norm": 2.4727792739868164, "learning_rate": 3.183357704829286e-07, "loss": 0.7127, "step": 12149 }, { "epoch": 0.9193749763535243, "grad_norm": 1.784459114074707, "learning_rate": 3.1774233550457e-07, "loss": 0.5731, "step": 12150 }, { "epoch": 0.9194506450758579, "grad_norm": 1.7401350736618042, "learning_rate": 3.1714944423726653e-07, "loss": 0.6354, "step": 12151 }, { "epoch": 0.9195263137981915, "grad_norm": 2.0081143379211426, "learning_rate": 3.165570967181506e-07, "loss": 0.7686, "step": 12152 }, { "epoch": 0.9196019825205252, "grad_norm": 1.8823308944702148, "learning_rate": 3.1596529298431445e-07, "loss": 0.6569, "step": 12153 }, { "epoch": 0.9196776512428587, "grad_norm": 2.5882723331451416, "learning_rate": 3.1537403307281843e-07, "loss": 0.5767, "step": 12154 }, { "epoch": 0.9197533199651924, "grad_norm": 2.073834180831909, "learning_rate": 3.14783317020691e-07, "loss": 0.7049, "step": 12155 }, { "epoch": 0.919828988687526, "grad_norm": 2.439730644226074, "learning_rate": 3.1419314486492245e-07, "loss": 0.6684, "step": 12156 }, { "epoch": 0.9199046574098596, "grad_norm": 2.621870756149292, "learning_rate": 3.136035166424733e-07, "loss": 0.7028, "step": 12157 }, { "epoch": 0.9199803261321933, "grad_norm": 2.3154456615448, "learning_rate": 3.1301443239026705e-07, "loss": 0.7616, "step": 12158 }, { "epoch": 0.9200559948545268, "grad_norm": 2.4377593994140625, "learning_rate": 3.1242589214519513e-07, "loss": 0.5677, "step": 12159 }, { "epoch": 0.9201316635768605, "grad_norm": 2.1143412590026855, "learning_rate": 3.1183789594411203e-07, "loss": 0.591, "step": 12160 }, { "epoch": 0.9202073322991942, "grad_norm": 1.7824926376342773, "learning_rate": 3.112504438238394e-07, "loss": 0.6487, "step": 12161 }, { "epoch": 0.9202830010215277, "grad_norm": 2.3391568660736084, "learning_rate": 3.106635358211687e-07, "loss": 0.6635, "step": 12162 }, { "epoch": 0.9203586697438614, "grad_norm": 2.347287178039551, "learning_rate": 3.100771719728526e-07, "loss": 0.6899, "step": 12163 }, { "epoch": 0.920434338466195, "grad_norm": 2.611984968185425, "learning_rate": 3.0949135231560864e-07, "loss": 0.5357, "step": 12164 }, { "epoch": 0.9205100071885286, "grad_norm": 2.079094886779785, "learning_rate": 3.089060768861256e-07, "loss": 0.6983, "step": 12165 }, { "epoch": 0.9205856759108623, "grad_norm": 2.0612375736236572, "learning_rate": 3.0832134572105507e-07, "loss": 0.7183, "step": 12166 }, { "epoch": 0.9206613446331958, "grad_norm": 2.252366542816162, "learning_rate": 3.0773715885701284e-07, "loss": 0.6214, "step": 12167 }, { "epoch": 0.9207370133555295, "grad_norm": 2.7472410202026367, "learning_rate": 3.071535163305845e-07, "loss": 0.6864, "step": 12168 }, { "epoch": 0.9208126820778632, "grad_norm": 2.145517349243164, "learning_rate": 3.0657041817831897e-07, "loss": 0.6973, "step": 12169 }, { "epoch": 0.9208883508001967, "grad_norm": 1.8512307405471802, "learning_rate": 3.05987864436733e-07, "loss": 0.7021, "step": 12170 }, { "epoch": 0.9209640195225304, "grad_norm": 2.517260789871216, "learning_rate": 3.054058551423053e-07, "loss": 0.6665, "step": 12171 }, { "epoch": 0.9210396882448639, "grad_norm": 2.0453529357910156, "learning_rate": 3.048243903314849e-07, "loss": 0.6017, "step": 12172 }, { "epoch": 0.9211153569671976, "grad_norm": 2.419189453125, "learning_rate": 3.0424347004068555e-07, "loss": 0.5802, "step": 12173 }, { "epoch": 0.9211910256895313, "grad_norm": 2.2323241233825684, "learning_rate": 3.0366309430628516e-07, "loss": 0.7098, "step": 12174 }, { "epoch": 0.9212666944118648, "grad_norm": 2.0994818210601807, "learning_rate": 3.0308326316462966e-07, "loss": 0.6425, "step": 12175 }, { "epoch": 0.9213423631341985, "grad_norm": 1.8704789876937866, "learning_rate": 3.02503976652027e-07, "loss": 0.558, "step": 12176 }, { "epoch": 0.9214180318565321, "grad_norm": 2.0065083503723145, "learning_rate": 3.019252348047602e-07, "loss": 0.623, "step": 12177 }, { "epoch": 0.9214937005788657, "grad_norm": 2.0805394649505615, "learning_rate": 3.0134703765906626e-07, "loss": 0.6629, "step": 12178 }, { "epoch": 0.9215693693011994, "grad_norm": 2.2282912731170654, "learning_rate": 3.007693852511552e-07, "loss": 0.7508, "step": 12179 }, { "epoch": 0.9216450380235329, "grad_norm": 2.3103513717651367, "learning_rate": 3.0019227761720304e-07, "loss": 0.6449, "step": 12180 }, { "epoch": 0.9217207067458666, "grad_norm": 2.332411050796509, "learning_rate": 2.9961571479334794e-07, "loss": 0.5846, "step": 12181 }, { "epoch": 0.9217963754682003, "grad_norm": 1.9255980253219604, "learning_rate": 2.99039696815698e-07, "loss": 0.6451, "step": 12182 }, { "epoch": 0.9218720441905338, "grad_norm": 4.481695652008057, "learning_rate": 2.9846422372032434e-07, "loss": 0.5743, "step": 12183 }, { "epoch": 0.9219477129128675, "grad_norm": 2.167587995529175, "learning_rate": 2.9788929554326614e-07, "loss": 0.7151, "step": 12184 }, { "epoch": 0.9220233816352011, "grad_norm": 2.3407418727874756, "learning_rate": 2.9731491232052466e-07, "loss": 0.6573, "step": 12185 }, { "epoch": 0.9220990503575347, "grad_norm": 2.2735655307769775, "learning_rate": 2.9674107408807107e-07, "loss": 0.6146, "step": 12186 }, { "epoch": 0.9221747190798684, "grad_norm": 2.0419833660125732, "learning_rate": 2.961677808818436e-07, "loss": 0.6306, "step": 12187 }, { "epoch": 0.9222503878022019, "grad_norm": 2.499739646911621, "learning_rate": 2.955950327377396e-07, "loss": 0.6939, "step": 12188 }, { "epoch": 0.9223260565245356, "grad_norm": 2.449592351913452, "learning_rate": 2.950228296916283e-07, "loss": 0.834, "step": 12189 }, { "epoch": 0.9224017252468693, "grad_norm": 2.2888007164001465, "learning_rate": 2.944511717793421e-07, "loss": 0.6675, "step": 12190 }, { "epoch": 0.9224773939692028, "grad_norm": 2.9520766735076904, "learning_rate": 2.938800590366814e-07, "loss": 0.7078, "step": 12191 }, { "epoch": 0.9225530626915365, "grad_norm": 2.2078518867492676, "learning_rate": 2.9330949149941044e-07, "loss": 0.6119, "step": 12192 }, { "epoch": 0.92262873141387, "grad_norm": 2.88496470451355, "learning_rate": 2.927394692032598e-07, "loss": 0.6607, "step": 12193 }, { "epoch": 0.9227044001362037, "grad_norm": 2.3544445037841797, "learning_rate": 2.921699921839258e-07, "loss": 0.7694, "step": 12194 }, { "epoch": 0.9227800688585374, "grad_norm": 2.2602198123931885, "learning_rate": 2.91601060477073e-07, "loss": 0.7182, "step": 12195 }, { "epoch": 0.9228557375808709, "grad_norm": 2.0758039951324463, "learning_rate": 2.910326741183269e-07, "loss": 0.6234, "step": 12196 }, { "epoch": 0.9229314063032046, "grad_norm": 2.403632402420044, "learning_rate": 2.9046483314328296e-07, "loss": 0.6832, "step": 12197 }, { "epoch": 0.9230070750255382, "grad_norm": 2.320176362991333, "learning_rate": 2.898975375875018e-07, "loss": 0.7362, "step": 12198 }, { "epoch": 0.9230827437478718, "grad_norm": 2.266352891921997, "learning_rate": 2.89330787486508e-07, "loss": 0.6202, "step": 12199 }, { "epoch": 0.9231584124702055, "grad_norm": 2.6983745098114014, "learning_rate": 2.887645828757951e-07, "loss": 0.6643, "step": 12200 }, { "epoch": 0.923234081192539, "grad_norm": 3.239264488220215, "learning_rate": 2.881989237908188e-07, "loss": 0.6703, "step": 12201 }, { "epoch": 0.9233097499148727, "grad_norm": 1.9743820428848267, "learning_rate": 2.876338102670028e-07, "loss": 0.6001, "step": 12202 }, { "epoch": 0.9233854186372064, "grad_norm": 2.7023541927337646, "learning_rate": 2.8706924233973765e-07, "loss": 0.6539, "step": 12203 }, { "epoch": 0.9234610873595399, "grad_norm": 1.8627772331237793, "learning_rate": 2.865052200443772e-07, "loss": 0.6342, "step": 12204 }, { "epoch": 0.9235367560818736, "grad_norm": 2.2735352516174316, "learning_rate": 2.8594174341624216e-07, "loss": 0.6798, "step": 12205 }, { "epoch": 0.9236124248042071, "grad_norm": 2.6565213203430176, "learning_rate": 2.8537881249062225e-07, "loss": 0.6429, "step": 12206 }, { "epoch": 0.9236880935265408, "grad_norm": 1.921863317489624, "learning_rate": 2.8481642730276434e-07, "loss": 0.5624, "step": 12207 }, { "epoch": 0.9237637622488745, "grad_norm": 2.8346986770629883, "learning_rate": 2.8425458788789126e-07, "loss": 0.6731, "step": 12208 }, { "epoch": 0.923839430971208, "grad_norm": 1.9833852052688599, "learning_rate": 2.8369329428118784e-07, "loss": 0.7168, "step": 12209 }, { "epoch": 0.9239150996935417, "grad_norm": 2.3957440853118896, "learning_rate": 2.8313254651779997e-07, "loss": 0.7039, "step": 12210 }, { "epoch": 0.9239907684158754, "grad_norm": 2.2904884815216064, "learning_rate": 2.8257234463284653e-07, "loss": 0.7522, "step": 12211 }, { "epoch": 0.9240664371382089, "grad_norm": 2.137908458709717, "learning_rate": 2.820126886614085e-07, "loss": 0.5352, "step": 12212 }, { "epoch": 0.9241421058605426, "grad_norm": 2.9669129848480225, "learning_rate": 2.814535786385338e-07, "loss": 0.6325, "step": 12213 }, { "epoch": 0.9242177745828761, "grad_norm": 4.183269500732422, "learning_rate": 2.808950145992345e-07, "loss": 0.7076, "step": 12214 }, { "epoch": 0.9242934433052098, "grad_norm": 1.7913990020751953, "learning_rate": 2.8033699657849056e-07, "loss": 0.6799, "step": 12215 }, { "epoch": 0.9243691120275435, "grad_norm": 1.807224988937378, "learning_rate": 2.79779524611248e-07, "loss": 0.5429, "step": 12216 }, { "epoch": 0.924444780749877, "grad_norm": 3.026785135269165, "learning_rate": 2.7922259873241397e-07, "loss": 0.6853, "step": 12217 }, { "epoch": 0.9245204494722107, "grad_norm": 2.0761656761169434, "learning_rate": 2.786662189768685e-07, "loss": 0.5143, "step": 12218 }, { "epoch": 0.9245961181945442, "grad_norm": 2.615605354309082, "learning_rate": 2.7811038537945177e-07, "loss": 0.734, "step": 12219 }, { "epoch": 0.9246717869168779, "grad_norm": 2.0705811977386475, "learning_rate": 2.775550979749739e-07, "loss": 0.5934, "step": 12220 }, { "epoch": 0.9247474556392116, "grad_norm": 2.260209321975708, "learning_rate": 2.7700035679820714e-07, "loss": 0.8695, "step": 12221 }, { "epoch": 0.9248231243615451, "grad_norm": 2.3986639976501465, "learning_rate": 2.764461618838906e-07, "loss": 0.7589, "step": 12222 }, { "epoch": 0.9248987930838788, "grad_norm": 2.1753921508789062, "learning_rate": 2.758925132667326e-07, "loss": 0.6476, "step": 12223 }, { "epoch": 0.9249744618062125, "grad_norm": 1.9226336479187012, "learning_rate": 2.7533941098140234e-07, "loss": 0.7568, "step": 12224 }, { "epoch": 0.925050130528546, "grad_norm": 2.058516502380371, "learning_rate": 2.747868550625362e-07, "loss": 0.6215, "step": 12225 }, { "epoch": 0.9251257992508797, "grad_norm": 1.9395172595977783, "learning_rate": 2.742348455447384e-07, "loss": 0.571, "step": 12226 }, { "epoch": 0.9252014679732132, "grad_norm": 2.5205371379852295, "learning_rate": 2.736833824625774e-07, "loss": 0.6528, "step": 12227 }, { "epoch": 0.9252771366955469, "grad_norm": 2.5392866134643555, "learning_rate": 2.7313246585058647e-07, "loss": 0.6829, "step": 12228 }, { "epoch": 0.9253528054178806, "grad_norm": 3.4775822162628174, "learning_rate": 2.7258209574326707e-07, "loss": 0.5941, "step": 12229 }, { "epoch": 0.9254284741402141, "grad_norm": 2.08290433883667, "learning_rate": 2.7203227217508565e-07, "loss": 0.586, "step": 12230 }, { "epoch": 0.9255041428625478, "grad_norm": 2.0628485679626465, "learning_rate": 2.714829951804716e-07, "loss": 0.6772, "step": 12231 }, { "epoch": 0.9255798115848813, "grad_norm": 2.3678271770477295, "learning_rate": 2.709342647938244e-07, "loss": 0.6361, "step": 12232 }, { "epoch": 0.925655480307215, "grad_norm": 2.2154617309570312, "learning_rate": 2.703860810495057e-07, "loss": 0.6324, "step": 12233 }, { "epoch": 0.9257311490295487, "grad_norm": 2.3665215969085693, "learning_rate": 2.698384439818479e-07, "loss": 0.6778, "step": 12234 }, { "epoch": 0.9258068177518822, "grad_norm": 2.475618362426758, "learning_rate": 2.692913536251416e-07, "loss": 0.5842, "step": 12235 }, { "epoch": 0.9258824864742159, "grad_norm": 2.6356213092803955, "learning_rate": 2.6874481001365035e-07, "loss": 0.8052, "step": 12236 }, { "epoch": 0.9259581551965496, "grad_norm": 2.1997127532958984, "learning_rate": 2.681988131815989e-07, "loss": 0.564, "step": 12237 }, { "epoch": 0.9260338239188831, "grad_norm": 2.037262201309204, "learning_rate": 2.676533631631798e-07, "loss": 0.6311, "step": 12238 }, { "epoch": 0.9261094926412168, "grad_norm": 1.9841945171356201, "learning_rate": 2.6710845999255076e-07, "loss": 0.6565, "step": 12239 }, { "epoch": 0.9261851613635503, "grad_norm": 2.244075298309326, "learning_rate": 2.6656410370383544e-07, "loss": 0.614, "step": 12240 }, { "epoch": 0.926260830085884, "grad_norm": 2.1905736923217773, "learning_rate": 2.660202943311246e-07, "loss": 0.8046, "step": 12241 }, { "epoch": 0.9263364988082177, "grad_norm": 2.341736078262329, "learning_rate": 2.6547703190847105e-07, "loss": 0.6139, "step": 12242 }, { "epoch": 0.9264121675305512, "grad_norm": 2.3669471740722656, "learning_rate": 2.649343164698965e-07, "loss": 0.7966, "step": 12243 }, { "epoch": 0.9264878362528849, "grad_norm": 2.272200345993042, "learning_rate": 2.643921480493888e-07, "loss": 0.6013, "step": 12244 }, { "epoch": 0.9265635049752184, "grad_norm": 2.1507675647735596, "learning_rate": 2.6385052668089784e-07, "loss": 0.6805, "step": 12245 }, { "epoch": 0.9266391736975521, "grad_norm": 2.2726826667785645, "learning_rate": 2.6330945239834336e-07, "loss": 0.5546, "step": 12246 }, { "epoch": 0.9267148424198858, "grad_norm": 2.1980509757995605, "learning_rate": 2.6276892523560934e-07, "loss": 0.6105, "step": 12247 }, { "epoch": 0.9267905111422193, "grad_norm": 2.0372631549835205, "learning_rate": 2.6222894522654375e-07, "loss": 0.6964, "step": 12248 }, { "epoch": 0.926866179864553, "grad_norm": 2.292754888534546, "learning_rate": 2.6168951240496443e-07, "loss": 0.761, "step": 12249 }, { "epoch": 0.9269418485868867, "grad_norm": 2.1714630126953125, "learning_rate": 2.611506268046494e-07, "loss": 0.6707, "step": 12250 }, { "epoch": 0.9270175173092202, "grad_norm": 2.125967025756836, "learning_rate": 2.606122884593477e-07, "loss": 0.6586, "step": 12251 }, { "epoch": 0.9270931860315539, "grad_norm": 2.231060266494751, "learning_rate": 2.6007449740277235e-07, "loss": 0.5914, "step": 12252 }, { "epoch": 0.9271688547538874, "grad_norm": 2.295248031616211, "learning_rate": 2.5953725366859836e-07, "loss": 0.6735, "step": 12253 }, { "epoch": 0.9272445234762211, "grad_norm": 3.4666316509246826, "learning_rate": 2.590005572904729e-07, "loss": 0.6238, "step": 12254 }, { "epoch": 0.9273201921985548, "grad_norm": 2.405599355697632, "learning_rate": 2.5846440830200404e-07, "loss": 0.7202, "step": 12255 }, { "epoch": 0.9273958609208883, "grad_norm": 2.2995810508728027, "learning_rate": 2.579288067367679e-07, "loss": 0.6795, "step": 12256 }, { "epoch": 0.927471529643222, "grad_norm": 2.8682172298431396, "learning_rate": 2.5739375262830464e-07, "loss": 0.5819, "step": 12257 }, { "epoch": 0.9275471983655555, "grad_norm": 3.253770589828491, "learning_rate": 2.5685924601012157e-07, "loss": 0.6481, "step": 12258 }, { "epoch": 0.9276228670878892, "grad_norm": 2.7173924446105957, "learning_rate": 2.563252869156908e-07, "loss": 0.4945, "step": 12259 }, { "epoch": 0.9276985358102229, "grad_norm": 2.2100868225097656, "learning_rate": 2.5579187537845164e-07, "loss": 0.619, "step": 12260 }, { "epoch": 0.9277742045325564, "grad_norm": 2.2828118801116943, "learning_rate": 2.552590114318073e-07, "loss": 0.783, "step": 12261 }, { "epoch": 0.9278498732548901, "grad_norm": 2.5905113220214844, "learning_rate": 2.5472669510912916e-07, "loss": 0.6722, "step": 12262 }, { "epoch": 0.9279255419772238, "grad_norm": 2.3828647136688232, "learning_rate": 2.5419492644374855e-07, "loss": 0.6373, "step": 12263 }, { "epoch": 0.9280012106995573, "grad_norm": 2.7237589359283447, "learning_rate": 2.536637054689698e-07, "loss": 0.697, "step": 12264 }, { "epoch": 0.928076879421891, "grad_norm": 2.399646282196045, "learning_rate": 2.531330322180593e-07, "loss": 0.5974, "step": 12265 }, { "epoch": 0.9281525481442245, "grad_norm": 2.0155105590820312, "learning_rate": 2.5260290672424947e-07, "loss": 0.5291, "step": 12266 }, { "epoch": 0.9282282168665582, "grad_norm": 2.4780659675598145, "learning_rate": 2.5207332902073776e-07, "loss": 0.6977, "step": 12267 }, { "epoch": 0.9283038855888919, "grad_norm": 2.1585497856140137, "learning_rate": 2.5154429914068764e-07, "loss": 0.7286, "step": 12268 }, { "epoch": 0.9283795543112254, "grad_norm": 2.1875948905944824, "learning_rate": 2.510158171172296e-07, "loss": 0.6065, "step": 12269 }, { "epoch": 0.9284552230335591, "grad_norm": 2.345557928085327, "learning_rate": 2.5048788298345926e-07, "loss": 0.4536, "step": 12270 }, { "epoch": 0.9285308917558927, "grad_norm": 2.446751356124878, "learning_rate": 2.4996049677243703e-07, "loss": 0.6631, "step": 12271 }, { "epoch": 0.9286065604782263, "grad_norm": 2.7235686779022217, "learning_rate": 2.494336585171896e-07, "loss": 0.6673, "step": 12272 }, { "epoch": 0.92868222920056, "grad_norm": 2.1927380561828613, "learning_rate": 2.489073682507105e-07, "loss": 0.6072, "step": 12273 }, { "epoch": 0.9287578979228935, "grad_norm": 2.9181969165802, "learning_rate": 2.483816260059534e-07, "loss": 0.6484, "step": 12274 }, { "epoch": 0.9288335666452272, "grad_norm": 2.1326992511749268, "learning_rate": 2.4785643181584696e-07, "loss": 0.7141, "step": 12275 }, { "epoch": 0.9289092353675609, "grad_norm": 2.227208375930786, "learning_rate": 2.4733178571327887e-07, "loss": 0.554, "step": 12276 }, { "epoch": 0.9289849040898944, "grad_norm": 2.5068063735961914, "learning_rate": 2.4680768773110383e-07, "loss": 0.7795, "step": 12277 }, { "epoch": 0.9290605728122281, "grad_norm": 2.883193016052246, "learning_rate": 2.462841379021417e-07, "loss": 0.6704, "step": 12278 }, { "epoch": 0.9291362415345616, "grad_norm": 2.217097520828247, "learning_rate": 2.4576113625918005e-07, "loss": 0.6113, "step": 12279 }, { "epoch": 0.9292119102568953, "grad_norm": 3.0743801593780518, "learning_rate": 2.4523868283497186e-07, "loss": 0.7329, "step": 12280 }, { "epoch": 0.929287578979229, "grad_norm": 2.3578438758850098, "learning_rate": 2.447167776622329e-07, "loss": 0.7069, "step": 12281 }, { "epoch": 0.9293632477015625, "grad_norm": 2.4407949447631836, "learning_rate": 2.44195420773647e-07, "loss": 0.6827, "step": 12282 }, { "epoch": 0.9294389164238962, "grad_norm": 2.97994065284729, "learning_rate": 2.4367461220186406e-07, "loss": 0.572, "step": 12283 }, { "epoch": 0.9295145851462298, "grad_norm": 2.102031946182251, "learning_rate": 2.43154351979498e-07, "loss": 0.685, "step": 12284 }, { "epoch": 0.9295902538685634, "grad_norm": 2.1757426261901855, "learning_rate": 2.426346401391287e-07, "loss": 0.6041, "step": 12285 }, { "epoch": 0.9296659225908971, "grad_norm": 2.5137274265289307, "learning_rate": 2.4211547671330423e-07, "loss": 0.6202, "step": 12286 }, { "epoch": 0.9297415913132306, "grad_norm": 2.278620719909668, "learning_rate": 2.415968617345355e-07, "loss": 0.7326, "step": 12287 }, { "epoch": 0.9298172600355643, "grad_norm": 1.830971360206604, "learning_rate": 2.410787952352986e-07, "loss": 0.6772, "step": 12288 }, { "epoch": 0.929892928757898, "grad_norm": 2.2332699298858643, "learning_rate": 2.4056127724803656e-07, "loss": 0.6575, "step": 12289 }, { "epoch": 0.9299685974802315, "grad_norm": 3.241168975830078, "learning_rate": 2.400443078051604e-07, "loss": 0.7853, "step": 12290 }, { "epoch": 0.9300442662025652, "grad_norm": 1.8960295915603638, "learning_rate": 2.3952788693904125e-07, "loss": 0.5753, "step": 12291 }, { "epoch": 0.9301199349248988, "grad_norm": 2.006786584854126, "learning_rate": 2.3901201468202126e-07, "loss": 0.5916, "step": 12292 }, { "epoch": 0.9301956036472324, "grad_norm": 2.6823008060455322, "learning_rate": 2.3849669106640557e-07, "loss": 0.7097, "step": 12293 }, { "epoch": 0.9302712723695661, "grad_norm": 1.6065900325775146, "learning_rate": 2.379819161244654e-07, "loss": 0.5853, "step": 12294 }, { "epoch": 0.9303469410918996, "grad_norm": 3.367147207260132, "learning_rate": 2.3746768988843693e-07, "loss": 0.7193, "step": 12295 }, { "epoch": 0.9304226098142333, "grad_norm": 2.1877822875976562, "learning_rate": 2.3695401239052338e-07, "loss": 0.7386, "step": 12296 }, { "epoch": 0.9304982785365669, "grad_norm": 2.3759443759918213, "learning_rate": 2.3644088366289208e-07, "loss": 0.6824, "step": 12297 }, { "epoch": 0.9305739472589005, "grad_norm": 1.9915658235549927, "learning_rate": 2.3592830373767925e-07, "loss": 0.7018, "step": 12298 }, { "epoch": 0.9306496159812342, "grad_norm": 2.2175698280334473, "learning_rate": 2.3541627264698028e-07, "loss": 0.7714, "step": 12299 }, { "epoch": 0.9307252847035677, "grad_norm": 2.8158113956451416, "learning_rate": 2.349047904228635e-07, "loss": 0.6239, "step": 12300 }, { "epoch": 0.9308009534259014, "grad_norm": 2.3479740619659424, "learning_rate": 2.3439385709735928e-07, "loss": 0.7483, "step": 12301 }, { "epoch": 0.9308766221482351, "grad_norm": 2.05190372467041, "learning_rate": 2.3388347270246202e-07, "loss": 0.5696, "step": 12302 }, { "epoch": 0.9309522908705686, "grad_norm": 2.0475738048553467, "learning_rate": 2.3337363727013515e-07, "loss": 0.5486, "step": 12303 }, { "epoch": 0.9310279595929023, "grad_norm": 2.140483856201172, "learning_rate": 2.3286435083230618e-07, "loss": 0.6311, "step": 12304 }, { "epoch": 0.9311036283152359, "grad_norm": 2.0714380741119385, "learning_rate": 2.3235561342086753e-07, "loss": 0.6138, "step": 12305 }, { "epoch": 0.9311792970375695, "grad_norm": 2.3134987354278564, "learning_rate": 2.3184742506767775e-07, "loss": 0.6722, "step": 12306 }, { "epoch": 0.9312549657599032, "grad_norm": 2.5604090690612793, "learning_rate": 2.313397858045624e-07, "loss": 0.708, "step": 12307 }, { "epoch": 0.9313306344822367, "grad_norm": 3.3896567821502686, "learning_rate": 2.30832695663311e-07, "loss": 0.6125, "step": 12308 }, { "epoch": 0.9314063032045704, "grad_norm": 1.9573801755905151, "learning_rate": 2.303261546756802e-07, "loss": 0.5785, "step": 12309 }, { "epoch": 0.931481971926904, "grad_norm": 1.7080085277557373, "learning_rate": 2.298201628733876e-07, "loss": 0.7792, "step": 12310 }, { "epoch": 0.9315576406492376, "grad_norm": 1.9010158777236938, "learning_rate": 2.2931472028812384e-07, "loss": 0.5984, "step": 12311 }, { "epoch": 0.9316333093715713, "grad_norm": 2.1762309074401855, "learning_rate": 2.2880982695154162e-07, "loss": 0.6983, "step": 12312 }, { "epoch": 0.9317089780939048, "grad_norm": 1.8109301328659058, "learning_rate": 2.2830548289525666e-07, "loss": 0.6687, "step": 12313 }, { "epoch": 0.9317846468162385, "grad_norm": 1.976968765258789, "learning_rate": 2.2780168815085267e-07, "loss": 0.6647, "step": 12314 }, { "epoch": 0.9318603155385722, "grad_norm": 3.7262067794799805, "learning_rate": 2.2729844274987942e-07, "loss": 0.6439, "step": 12315 }, { "epoch": 0.9319359842609057, "grad_norm": 1.9241440296173096, "learning_rate": 2.2679574672385272e-07, "loss": 0.7134, "step": 12316 }, { "epoch": 0.9320116529832394, "grad_norm": 2.4006447792053223, "learning_rate": 2.2629360010425237e-07, "loss": 0.598, "step": 12317 }, { "epoch": 0.932087321705573, "grad_norm": 2.9187188148498535, "learning_rate": 2.2579200292252422e-07, "loss": 0.7099, "step": 12318 }, { "epoch": 0.9321629904279066, "grad_norm": 2.171943426132202, "learning_rate": 2.2529095521008114e-07, "loss": 0.6414, "step": 12319 }, { "epoch": 0.9322386591502403, "grad_norm": 2.2062504291534424, "learning_rate": 2.2479045699829803e-07, "loss": 0.653, "step": 12320 }, { "epoch": 0.9323143278725738, "grad_norm": 2.55442476272583, "learning_rate": 2.2429050831851882e-07, "loss": 0.6587, "step": 12321 }, { "epoch": 0.9323899965949075, "grad_norm": 2.2659802436828613, "learning_rate": 2.2379110920205248e-07, "loss": 0.6878, "step": 12322 }, { "epoch": 0.9324656653172411, "grad_norm": 3.343369245529175, "learning_rate": 2.2329225968017296e-07, "loss": 0.7144, "step": 12323 }, { "epoch": 0.9325413340395747, "grad_norm": 1.9788143634796143, "learning_rate": 2.2279395978411932e-07, "loss": 0.5868, "step": 12324 }, { "epoch": 0.9326170027619084, "grad_norm": 3.297405481338501, "learning_rate": 2.2229620954509554e-07, "loss": 0.6762, "step": 12325 }, { "epoch": 0.932692671484242, "grad_norm": 2.0477468967437744, "learning_rate": 2.2179900899427574e-07, "loss": 0.5926, "step": 12326 }, { "epoch": 0.9327683402065756, "grad_norm": 2.3132870197296143, "learning_rate": 2.21302358162793e-07, "loss": 0.6212, "step": 12327 }, { "epoch": 0.9328440089289093, "grad_norm": 2.395486354827881, "learning_rate": 2.208062570817514e-07, "loss": 0.664, "step": 12328 }, { "epoch": 0.9329196776512428, "grad_norm": 2.0944366455078125, "learning_rate": 2.2031070578221612e-07, "loss": 0.6979, "step": 12329 }, { "epoch": 0.9329953463735765, "grad_norm": 2.699592351913452, "learning_rate": 2.1981570429522134e-07, "loss": 0.6174, "step": 12330 }, { "epoch": 0.9330710150959101, "grad_norm": 2.9009218215942383, "learning_rate": 2.1932125265176628e-07, "loss": 0.7047, "step": 12331 }, { "epoch": 0.9331466838182437, "grad_norm": 2.090672254562378, "learning_rate": 2.1882735088281414e-07, "loss": 0.7085, "step": 12332 }, { "epoch": 0.9332223525405774, "grad_norm": 2.3523049354553223, "learning_rate": 2.1833399901929618e-07, "loss": 0.5658, "step": 12333 }, { "epoch": 0.933298021262911, "grad_norm": 1.9913511276245117, "learning_rate": 2.178411970921057e-07, "loss": 0.6611, "step": 12334 }, { "epoch": 0.9333736899852446, "grad_norm": 2.0843513011932373, "learning_rate": 2.1734894513210303e-07, "loss": 0.64, "step": 12335 }, { "epoch": 0.9334493587075782, "grad_norm": 2.296250343322754, "learning_rate": 2.1685724317011746e-07, "loss": 0.6017, "step": 12336 }, { "epoch": 0.9335250274299118, "grad_norm": 2.035994291305542, "learning_rate": 2.163660912369404e-07, "loss": 0.6316, "step": 12337 }, { "epoch": 0.9336006961522455, "grad_norm": 2.133603096008301, "learning_rate": 2.1587548936332723e-07, "loss": 0.7112, "step": 12338 }, { "epoch": 0.9336763648745791, "grad_norm": 9.666108131408691, "learning_rate": 2.1538543758000239e-07, "loss": 0.6514, "step": 12339 }, { "epoch": 0.9337520335969127, "grad_norm": 2.0413687229156494, "learning_rate": 2.1489593591765434e-07, "loss": 0.7346, "step": 12340 }, { "epoch": 0.9338277023192464, "grad_norm": 7.327420711517334, "learning_rate": 2.144069844069365e-07, "loss": 0.6501, "step": 12341 }, { "epoch": 0.9339033710415799, "grad_norm": 2.2991442680358887, "learning_rate": 2.1391858307847045e-07, "loss": 0.7028, "step": 12342 }, { "epoch": 0.9339790397639136, "grad_norm": 10.1016845703125, "learning_rate": 2.134307319628397e-07, "loss": 0.6521, "step": 12343 }, { "epoch": 0.9340547084862472, "grad_norm": 1.8840327262878418, "learning_rate": 2.1294343109059677e-07, "loss": 0.6156, "step": 12344 }, { "epoch": 0.9341303772085808, "grad_norm": 1.7931628227233887, "learning_rate": 2.124566804922563e-07, "loss": 0.632, "step": 12345 }, { "epoch": 0.9342060459309145, "grad_norm": 2.3598361015319824, "learning_rate": 2.119704801982999e-07, "loss": 0.6864, "step": 12346 }, { "epoch": 0.934281714653248, "grad_norm": 1.6346832513809204, "learning_rate": 2.114848302391772e-07, "loss": 0.5736, "step": 12347 }, { "epoch": 0.9343573833755817, "grad_norm": 2.1589810848236084, "learning_rate": 2.1099973064529987e-07, "loss": 0.657, "step": 12348 }, { "epoch": 0.9344330520979153, "grad_norm": 3.102557897567749, "learning_rate": 2.1051518144704562e-07, "loss": 0.6275, "step": 12349 }, { "epoch": 0.9345087208202489, "grad_norm": 2.09216046333313, "learning_rate": 2.100311826747602e-07, "loss": 0.7042, "step": 12350 }, { "epoch": 0.9345843895425826, "grad_norm": 1.9065994024276733, "learning_rate": 2.095477343587513e-07, "loss": 0.6763, "step": 12351 }, { "epoch": 0.9346600582649162, "grad_norm": 2.5319108963012695, "learning_rate": 2.0906483652929576e-07, "loss": 0.6599, "step": 12352 }, { "epoch": 0.9347357269872498, "grad_norm": 2.6786136627197266, "learning_rate": 2.0858248921663337e-07, "loss": 0.7684, "step": 12353 }, { "epoch": 0.9348113957095835, "grad_norm": 2.0246286392211914, "learning_rate": 2.0810069245097097e-07, "loss": 0.7829, "step": 12354 }, { "epoch": 0.934887064431917, "grad_norm": 2.1122617721557617, "learning_rate": 2.0761944626247942e-07, "loss": 0.7041, "step": 12355 }, { "epoch": 0.9349627331542507, "grad_norm": 2.1690616607666016, "learning_rate": 2.0713875068129563e-07, "loss": 0.6317, "step": 12356 }, { "epoch": 0.9350384018765843, "grad_norm": 2.203425407409668, "learning_rate": 2.066586057375225e-07, "loss": 0.7214, "step": 12357 }, { "epoch": 0.9351140705989179, "grad_norm": 2.0501766204833984, "learning_rate": 2.0617901146122998e-07, "loss": 0.595, "step": 12358 }, { "epoch": 0.9351897393212516, "grad_norm": 3.1031334400177, "learning_rate": 2.0569996788245005e-07, "loss": 0.6128, "step": 12359 }, { "epoch": 0.9352654080435852, "grad_norm": 2.129321336746216, "learning_rate": 2.052214750311817e-07, "loss": 0.6836, "step": 12360 }, { "epoch": 0.9353410767659188, "grad_norm": 2.268303871154785, "learning_rate": 2.04743532937391e-07, "loss": 0.6845, "step": 12361 }, { "epoch": 0.9354167454882524, "grad_norm": 2.151488780975342, "learning_rate": 2.0426614163100698e-07, "loss": 0.6448, "step": 12362 }, { "epoch": 0.935492414210586, "grad_norm": 2.471512794494629, "learning_rate": 2.0378930114192572e-07, "loss": 0.6199, "step": 12363 }, { "epoch": 0.9355680829329197, "grad_norm": 2.2034783363342285, "learning_rate": 2.0331301150000935e-07, "loss": 0.6998, "step": 12364 }, { "epoch": 0.9356437516552533, "grad_norm": 2.128675937652588, "learning_rate": 2.02837272735085e-07, "loss": 0.7106, "step": 12365 }, { "epoch": 0.9357194203775869, "grad_norm": 2.1250526905059814, "learning_rate": 2.0236208487694285e-07, "loss": 0.7074, "step": 12366 }, { "epoch": 0.9357950890999206, "grad_norm": 3.409339189529419, "learning_rate": 2.018874479553421e-07, "loss": 0.6652, "step": 12367 }, { "epoch": 0.9358707578222542, "grad_norm": 2.3269472122192383, "learning_rate": 2.0141336200000592e-07, "loss": 0.5631, "step": 12368 }, { "epoch": 0.9359464265445878, "grad_norm": 2.5745363235473633, "learning_rate": 2.0093982704062463e-07, "loss": 0.6937, "step": 12369 }, { "epoch": 0.9360220952669214, "grad_norm": 2.068528413772583, "learning_rate": 2.0046684310684948e-07, "loss": 0.7959, "step": 12370 }, { "epoch": 0.936097763989255, "grad_norm": 2.4575674533843994, "learning_rate": 1.9999441022830078e-07, "loss": 0.6793, "step": 12371 }, { "epoch": 0.9361734327115887, "grad_norm": 1.9595268964767456, "learning_rate": 1.9952252843456685e-07, "loss": 0.8498, "step": 12372 }, { "epoch": 0.9362491014339223, "grad_norm": 3.2010905742645264, "learning_rate": 1.990511977551951e-07, "loss": 0.6914, "step": 12373 }, { "epoch": 0.9363247701562559, "grad_norm": 2.2969300746917725, "learning_rate": 1.9858041821970386e-07, "loss": 0.6583, "step": 12374 }, { "epoch": 0.9364004388785895, "grad_norm": 2.4109859466552734, "learning_rate": 1.9811018985757357e-07, "loss": 0.7333, "step": 12375 }, { "epoch": 0.9364761076009231, "grad_norm": 1.9935057163238525, "learning_rate": 1.9764051269825168e-07, "loss": 0.6399, "step": 12376 }, { "epoch": 0.9365517763232568, "grad_norm": 2.352203130722046, "learning_rate": 1.9717138677115164e-07, "loss": 0.6009, "step": 12377 }, { "epoch": 0.9366274450455904, "grad_norm": 2.5235226154327393, "learning_rate": 1.96702812105651e-07, "loss": 0.6322, "step": 12378 }, { "epoch": 0.936703113767924, "grad_norm": 2.3327648639678955, "learning_rate": 1.9623478873109424e-07, "loss": 0.6441, "step": 12379 }, { "epoch": 0.9367787824902577, "grad_norm": 2.742035388946533, "learning_rate": 1.9576731667678993e-07, "loss": 0.6182, "step": 12380 }, { "epoch": 0.9368544512125913, "grad_norm": 3.735501766204834, "learning_rate": 1.9530039597201066e-07, "loss": 0.6479, "step": 12381 }, { "epoch": 0.9369301199349249, "grad_norm": 7.853936195373535, "learning_rate": 1.9483402664600002e-07, "loss": 0.7149, "step": 12382 }, { "epoch": 0.9370057886572585, "grad_norm": 2.559588670730591, "learning_rate": 1.9436820872796169e-07, "loss": 0.5942, "step": 12383 }, { "epoch": 0.9370814573795921, "grad_norm": 2.3932056427001953, "learning_rate": 1.939029422470673e-07, "loss": 0.6378, "step": 12384 }, { "epoch": 0.9371571261019258, "grad_norm": 2.621971368789673, "learning_rate": 1.9343822723245251e-07, "loss": 0.7658, "step": 12385 }, { "epoch": 0.9372327948242594, "grad_norm": 2.2142841815948486, "learning_rate": 1.9297406371322012e-07, "loss": 0.67, "step": 12386 }, { "epoch": 0.937308463546593, "grad_norm": 2.741013526916504, "learning_rate": 1.9251045171843684e-07, "loss": 0.8169, "step": 12387 }, { "epoch": 0.9373841322689266, "grad_norm": 2.1864612102508545, "learning_rate": 1.9204739127713644e-07, "loss": 0.6494, "step": 12388 }, { "epoch": 0.9374598009912603, "grad_norm": 2.0697124004364014, "learning_rate": 1.9158488241831672e-07, "loss": 0.8498, "step": 12389 }, { "epoch": 0.9375354697135939, "grad_norm": 2.364565372467041, "learning_rate": 1.9112292517094255e-07, "loss": 0.8094, "step": 12390 }, { "epoch": 0.9376111384359275, "grad_norm": 2.1771390438079834, "learning_rate": 1.9066151956394074e-07, "loss": 0.7019, "step": 12391 }, { "epoch": 0.9376868071582611, "grad_norm": 2.1345317363739014, "learning_rate": 1.902006656262062e-07, "loss": 0.5427, "step": 12392 }, { "epoch": 0.9377624758805948, "grad_norm": 2.93278431892395, "learning_rate": 1.8974036338660283e-07, "loss": 0.6624, "step": 12393 }, { "epoch": 0.9378381446029284, "grad_norm": 2.2009384632110596, "learning_rate": 1.892806128739526e-07, "loss": 0.7585, "step": 12394 }, { "epoch": 0.937913813325262, "grad_norm": 2.6381959915161133, "learning_rate": 1.8882141411704845e-07, "loss": 0.8012, "step": 12395 }, { "epoch": 0.9379894820475956, "grad_norm": 2.5263381004333496, "learning_rate": 1.883627671446454e-07, "loss": 0.6025, "step": 12396 }, { "epoch": 0.9380651507699292, "grad_norm": 2.298753261566162, "learning_rate": 1.8790467198546647e-07, "loss": 0.687, "step": 12397 }, { "epoch": 0.9381408194922629, "grad_norm": 3.0033249855041504, "learning_rate": 1.8744712866819768e-07, "loss": 0.8006, "step": 12398 }, { "epoch": 0.9382164882145965, "grad_norm": 1.8784987926483154, "learning_rate": 1.8699013722149417e-07, "loss": 0.6037, "step": 12399 }, { "epoch": 0.9382921569369301, "grad_norm": 2.2286734580993652, "learning_rate": 1.8653369767397298e-07, "loss": 0.7111, "step": 12400 }, { "epoch": 0.9383678256592637, "grad_norm": 2.3188793659210205, "learning_rate": 1.8607781005421832e-07, "loss": 0.6128, "step": 12401 }, { "epoch": 0.9384434943815974, "grad_norm": 2.3316099643707275, "learning_rate": 1.856224743907773e-07, "loss": 0.6353, "step": 12402 }, { "epoch": 0.938519163103931, "grad_norm": 2.368476629257202, "learning_rate": 1.851676907121671e-07, "loss": 0.6556, "step": 12403 }, { "epoch": 0.9385948318262646, "grad_norm": 2.1538264751434326, "learning_rate": 1.8471345904686699e-07, "loss": 0.6717, "step": 12404 }, { "epoch": 0.9386705005485982, "grad_norm": 2.1779584884643555, "learning_rate": 1.8425977942332118e-07, "loss": 0.7298, "step": 12405 }, { "epoch": 0.9387461692709319, "grad_norm": 11.318231582641602, "learning_rate": 1.8380665186994294e-07, "loss": 0.708, "step": 12406 }, { "epoch": 0.9388218379932655, "grad_norm": 2.0828261375427246, "learning_rate": 1.833540764151056e-07, "loss": 0.6225, "step": 12407 }, { "epoch": 0.9388975067155991, "grad_norm": 10.891009330749512, "learning_rate": 1.8290205308715346e-07, "loss": 0.634, "step": 12408 }, { "epoch": 0.9389731754379327, "grad_norm": 3.8643298149108887, "learning_rate": 1.824505819143929e-07, "loss": 0.7067, "step": 12409 }, { "epoch": 0.9390488441602663, "grad_norm": 3.798006534576416, "learning_rate": 1.819996629250953e-07, "loss": 0.876, "step": 12410 }, { "epoch": 0.9391245128826, "grad_norm": 1.9390292167663574, "learning_rate": 1.8154929614750004e-07, "loss": 0.6763, "step": 12411 }, { "epoch": 0.9392001816049336, "grad_norm": 2.075162172317505, "learning_rate": 1.810994816098106e-07, "loss": 0.6476, "step": 12412 }, { "epoch": 0.9392758503272672, "grad_norm": 2.536733865737915, "learning_rate": 1.8065021934019542e-07, "loss": 0.6086, "step": 12413 }, { "epoch": 0.9393515190496009, "grad_norm": 2.2751870155334473, "learning_rate": 1.8020150936678804e-07, "loss": 0.7458, "step": 12414 }, { "epoch": 0.9394271877719345, "grad_norm": 2.1633825302124023, "learning_rate": 1.7975335171768992e-07, "loss": 0.727, "step": 12415 }, { "epoch": 0.9395028564942681, "grad_norm": 1.8044906854629517, "learning_rate": 1.7930574642096464e-07, "loss": 0.5019, "step": 12416 }, { "epoch": 0.9395785252166017, "grad_norm": 2.0498223304748535, "learning_rate": 1.788586935046428e-07, "loss": 0.6854, "step": 12417 }, { "epoch": 0.9396541939389353, "grad_norm": 1.9652925729751587, "learning_rate": 1.7841219299672096e-07, "loss": 0.7548, "step": 12418 }, { "epoch": 0.939729862661269, "grad_norm": 2.0633480548858643, "learning_rate": 1.7796624492515978e-07, "loss": 0.6056, "step": 12419 }, { "epoch": 0.9398055313836026, "grad_norm": 2.8833742141723633, "learning_rate": 1.775208493178869e-07, "loss": 0.657, "step": 12420 }, { "epoch": 0.9398812001059362, "grad_norm": 3.0172793865203857, "learning_rate": 1.7707600620279307e-07, "loss": 0.7535, "step": 12421 }, { "epoch": 0.9399568688282698, "grad_norm": 2.2233293056488037, "learning_rate": 1.7663171560773694e-07, "loss": 0.7763, "step": 12422 }, { "epoch": 0.9400325375506035, "grad_norm": 1.8882126808166504, "learning_rate": 1.761879775605403e-07, "loss": 0.718, "step": 12423 }, { "epoch": 0.9401082062729371, "grad_norm": 2.3364417552948, "learning_rate": 1.7574479208899286e-07, "loss": 0.6638, "step": 12424 }, { "epoch": 0.9401838749952707, "grad_norm": 2.688260316848755, "learning_rate": 1.7530215922084646e-07, "loss": 0.6975, "step": 12425 }, { "epoch": 0.9402595437176043, "grad_norm": 1.90416419506073, "learning_rate": 1.7486007898382393e-07, "loss": 0.607, "step": 12426 }, { "epoch": 0.940335212439938, "grad_norm": 2.5654749870300293, "learning_rate": 1.7441855140560515e-07, "loss": 0.709, "step": 12427 }, { "epoch": 0.9404108811622716, "grad_norm": 2.002608060836792, "learning_rate": 1.7397757651384194e-07, "loss": 0.5844, "step": 12428 }, { "epoch": 0.9404865498846052, "grad_norm": 1.7451132535934448, "learning_rate": 1.7353715433615125e-07, "loss": 0.6631, "step": 12429 }, { "epoch": 0.9405622186069388, "grad_norm": 2.49381685256958, "learning_rate": 1.73097284900111e-07, "loss": 0.6062, "step": 12430 }, { "epoch": 0.9406378873292724, "grad_norm": 3.437695264816284, "learning_rate": 1.726579682332682e-07, "loss": 0.6267, "step": 12431 }, { "epoch": 0.9407135560516061, "grad_norm": 4.435547828674316, "learning_rate": 1.7221920436313577e-07, "loss": 0.65, "step": 12432 }, { "epoch": 0.9407892247739397, "grad_norm": 2.160989761352539, "learning_rate": 1.7178099331718776e-07, "loss": 0.6809, "step": 12433 }, { "epoch": 0.9408648934962733, "grad_norm": 1.8670154809951782, "learning_rate": 1.7134333512286925e-07, "loss": 0.6438, "step": 12434 }, { "epoch": 0.9409405622186069, "grad_norm": 2.766958713531494, "learning_rate": 1.709062298075853e-07, "loss": 0.6525, "step": 12435 }, { "epoch": 0.9410162309409406, "grad_norm": 2.01920747756958, "learning_rate": 1.70469677398711e-07, "loss": 0.7654, "step": 12436 }, { "epoch": 0.9410918996632742, "grad_norm": 2.5431160926818848, "learning_rate": 1.700336779235835e-07, "loss": 0.6332, "step": 12437 }, { "epoch": 0.9411675683856078, "grad_norm": 2.6801328659057617, "learning_rate": 1.695982314095059e-07, "loss": 0.6775, "step": 12438 }, { "epoch": 0.9412432371079414, "grad_norm": 2.0385563373565674, "learning_rate": 1.6916333788374849e-07, "loss": 0.8871, "step": 12439 }, { "epoch": 0.9413189058302751, "grad_norm": 1.3202354907989502, "learning_rate": 1.687289973735454e-07, "loss": 0.7429, "step": 12440 }, { "epoch": 0.9413945745526087, "grad_norm": 2.1803438663482666, "learning_rate": 1.6829520990609592e-07, "loss": 0.6388, "step": 12441 }, { "epoch": 0.9414702432749423, "grad_norm": 2.1765198707580566, "learning_rate": 1.678619755085663e-07, "loss": 0.6521, "step": 12442 }, { "epoch": 0.9415459119972759, "grad_norm": 2.3797476291656494, "learning_rate": 1.6742929420808584e-07, "loss": 0.7488, "step": 12443 }, { "epoch": 0.9416215807196096, "grad_norm": 2.2045605182647705, "learning_rate": 1.6699716603175086e-07, "loss": 0.7189, "step": 12444 }, { "epoch": 0.9416972494419432, "grad_norm": 2.272555351257324, "learning_rate": 1.6656559100662272e-07, "loss": 0.6715, "step": 12445 }, { "epoch": 0.9417729181642768, "grad_norm": 2.058490514755249, "learning_rate": 1.661345691597288e-07, "loss": 0.6625, "step": 12446 }, { "epoch": 0.9418485868866104, "grad_norm": 4.0172038078308105, "learning_rate": 1.657041005180605e-07, "loss": 0.7327, "step": 12447 }, { "epoch": 0.941924255608944, "grad_norm": 2.1872177124023438, "learning_rate": 1.6527418510857328e-07, "loss": 0.7891, "step": 12448 }, { "epoch": 0.9419999243312777, "grad_norm": 1.810878038406372, "learning_rate": 1.6484482295819258e-07, "loss": 0.5745, "step": 12449 }, { "epoch": 0.9420755930536113, "grad_norm": 2.1320366859436035, "learning_rate": 1.6441601409380591e-07, "loss": 0.7669, "step": 12450 }, { "epoch": 0.9421512617759449, "grad_norm": 2.2419726848602295, "learning_rate": 1.6398775854226578e-07, "loss": 0.6414, "step": 12451 }, { "epoch": 0.9422269304982785, "grad_norm": 1.9634507894515991, "learning_rate": 1.6356005633039074e-07, "loss": 0.7198, "step": 12452 }, { "epoch": 0.9423025992206122, "grad_norm": 2.088675022125244, "learning_rate": 1.6313290748496534e-07, "loss": 0.5175, "step": 12453 }, { "epoch": 0.9423782679429458, "grad_norm": 2.3254096508026123, "learning_rate": 1.6270631203274023e-07, "loss": 0.5991, "step": 12454 }, { "epoch": 0.9424539366652794, "grad_norm": 1.976793646812439, "learning_rate": 1.62280270000429e-07, "loss": 0.6158, "step": 12455 }, { "epoch": 0.942529605387613, "grad_norm": 1.8767503499984741, "learning_rate": 1.6185478141471132e-07, "loss": 0.6511, "step": 12456 }, { "epoch": 0.9426052741099467, "grad_norm": 2.4342007637023926, "learning_rate": 1.614298463022339e-07, "loss": 0.7071, "step": 12457 }, { "epoch": 0.9426809428322803, "grad_norm": 1.5997973680496216, "learning_rate": 1.6100546468960642e-07, "loss": 0.7338, "step": 12458 }, { "epoch": 0.9427566115546139, "grad_norm": 1.8619414567947388, "learning_rate": 1.6058163660340563e-07, "loss": 0.7648, "step": 12459 }, { "epoch": 0.9428322802769475, "grad_norm": 1.8532341718673706, "learning_rate": 1.601583620701733e-07, "loss": 0.7516, "step": 12460 }, { "epoch": 0.9429079489992811, "grad_norm": 2.581843614578247, "learning_rate": 1.5973564111641625e-07, "loss": 0.6256, "step": 12461 }, { "epoch": 0.9429836177216148, "grad_norm": 2.2200398445129395, "learning_rate": 1.5931347376860528e-07, "loss": 0.6617, "step": 12462 }, { "epoch": 0.9430592864439484, "grad_norm": 3.4904468059539795, "learning_rate": 1.5889186005317923e-07, "loss": 0.6086, "step": 12463 }, { "epoch": 0.943134955166282, "grad_norm": 3.9420273303985596, "learning_rate": 1.5847079999654e-07, "loss": 0.6135, "step": 12464 }, { "epoch": 0.9432106238886157, "grad_norm": 1.8716062307357788, "learning_rate": 1.5805029362505652e-07, "loss": 0.6254, "step": 12465 }, { "epoch": 0.9432862926109493, "grad_norm": 2.7966954708099365, "learning_rate": 1.5763034096506167e-07, "loss": 0.601, "step": 12466 }, { "epoch": 0.9433619613332829, "grad_norm": 1.860151767730713, "learning_rate": 1.5721094204285547e-07, "loss": 0.6642, "step": 12467 }, { "epoch": 0.9434376300556165, "grad_norm": 2.0277626514434814, "learning_rate": 1.5679209688470087e-07, "loss": 0.754, "step": 12468 }, { "epoch": 0.9435132987779501, "grad_norm": 1.7337976694107056, "learning_rate": 1.563738055168269e-07, "loss": 0.6579, "step": 12469 }, { "epoch": 0.9435889675002838, "grad_norm": 2.3716025352478027, "learning_rate": 1.559560679654296e-07, "loss": 0.6865, "step": 12470 }, { "epoch": 0.9436646362226174, "grad_norm": 2.448322057723999, "learning_rate": 1.5553888425666806e-07, "loss": 0.693, "step": 12471 }, { "epoch": 0.943740304944951, "grad_norm": 2.229336977005005, "learning_rate": 1.551222544166684e-07, "loss": 0.6458, "step": 12472 }, { "epoch": 0.9438159736672846, "grad_norm": 2.3972370624542236, "learning_rate": 1.5470617847152068e-07, "loss": 0.697, "step": 12473 }, { "epoch": 0.9438916423896182, "grad_norm": 5.422283172607422, "learning_rate": 1.5429065644728113e-07, "loss": 0.6889, "step": 12474 }, { "epoch": 0.9439673111119519, "grad_norm": 2.0572350025177, "learning_rate": 1.538756883699719e-07, "loss": 0.7075, "step": 12475 }, { "epoch": 0.9440429798342855, "grad_norm": 3.023061990737915, "learning_rate": 1.5346127426557822e-07, "loss": 0.7228, "step": 12476 }, { "epoch": 0.9441186485566191, "grad_norm": 2.5621566772460938, "learning_rate": 1.530474141600523e-07, "loss": 0.6039, "step": 12477 }, { "epoch": 0.9441943172789528, "grad_norm": 2.2269248962402344, "learning_rate": 1.5263410807931244e-07, "loss": 0.6546, "step": 12478 }, { "epoch": 0.9442699860012864, "grad_norm": 2.0988047122955322, "learning_rate": 1.5222135604924093e-07, "loss": 0.5909, "step": 12479 }, { "epoch": 0.94434565472362, "grad_norm": 2.0287554264068604, "learning_rate": 1.5180915809568507e-07, "loss": 0.7294, "step": 12480 }, { "epoch": 0.9444213234459536, "grad_norm": 2.013700008392334, "learning_rate": 1.5139751424445726e-07, "loss": 0.7171, "step": 12481 }, { "epoch": 0.9444969921682872, "grad_norm": 2.1935606002807617, "learning_rate": 1.5098642452133883e-07, "loss": 0.6523, "step": 12482 }, { "epoch": 0.9445726608906209, "grad_norm": 1.8394144773483276, "learning_rate": 1.505758889520702e-07, "loss": 0.6384, "step": 12483 }, { "epoch": 0.9446483296129545, "grad_norm": 2.9377388954162598, "learning_rate": 1.5016590756236183e-07, "loss": 0.646, "step": 12484 }, { "epoch": 0.9447239983352881, "grad_norm": 2.116981267929077, "learning_rate": 1.4975648037788914e-07, "loss": 0.6948, "step": 12485 }, { "epoch": 0.9447996670576218, "grad_norm": 2.6826493740081787, "learning_rate": 1.4934760742429066e-07, "loss": 0.5801, "step": 12486 }, { "epoch": 0.9448753357799553, "grad_norm": 2.8828585147857666, "learning_rate": 1.489392887271709e-07, "loss": 0.641, "step": 12487 }, { "epoch": 0.944951004502289, "grad_norm": 1.9045580625534058, "learning_rate": 1.4853152431210138e-07, "loss": 0.5268, "step": 12488 }, { "epoch": 0.9450266732246226, "grad_norm": 6.540375709533691, "learning_rate": 1.481243142046157e-07, "loss": 0.6434, "step": 12489 }, { "epoch": 0.9451023419469562, "grad_norm": 1.9567054510116577, "learning_rate": 1.4771765843021746e-07, "loss": 0.6509, "step": 12490 }, { "epoch": 0.9451780106692899, "grad_norm": 4.807526111602783, "learning_rate": 1.4731155701437028e-07, "loss": 0.6686, "step": 12491 }, { "epoch": 0.9452536793916235, "grad_norm": 2.3727920055389404, "learning_rate": 1.469060099825068e-07, "loss": 0.7862, "step": 12492 }, { "epoch": 0.9453293481139571, "grad_norm": 2.6744470596313477, "learning_rate": 1.4650101736002374e-07, "loss": 0.6864, "step": 12493 }, { "epoch": 0.9454050168362907, "grad_norm": 2.174910306930542, "learning_rate": 1.460965791722808e-07, "loss": 0.4797, "step": 12494 }, { "epoch": 0.9454806855586243, "grad_norm": 2.039815664291382, "learning_rate": 1.4569269544460872e-07, "loss": 0.621, "step": 12495 }, { "epoch": 0.945556354280958, "grad_norm": 2.9486753940582275, "learning_rate": 1.4528936620229826e-07, "loss": 0.7348, "step": 12496 }, { "epoch": 0.9456320230032916, "grad_norm": 2.2600271701812744, "learning_rate": 1.4488659147060723e-07, "loss": 0.6521, "step": 12497 }, { "epoch": 0.9457076917256252, "grad_norm": 2.1503450870513916, "learning_rate": 1.4448437127475844e-07, "loss": 0.6144, "step": 12498 }, { "epoch": 0.9457833604479589, "grad_norm": 2.493232250213623, "learning_rate": 1.4408270563994075e-07, "loss": 0.6895, "step": 12499 }, { "epoch": 0.9458590291702924, "grad_norm": 2.2982404232025146, "learning_rate": 1.4368159459130704e-07, "loss": 0.6948, "step": 12500 }, { "epoch": 0.9459346978926261, "grad_norm": 1.8142317533493042, "learning_rate": 1.432810381539772e-07, "loss": 0.6644, "step": 12501 }, { "epoch": 0.9460103666149597, "grad_norm": 2.070129871368408, "learning_rate": 1.4288103635303517e-07, "loss": 0.5584, "step": 12502 }, { "epoch": 0.9460860353372933, "grad_norm": 2.6108832359313965, "learning_rate": 1.4248158921352894e-07, "loss": 0.6624, "step": 12503 }, { "epoch": 0.946161704059627, "grad_norm": 1.9606751203536987, "learning_rate": 1.4208269676047547e-07, "loss": 0.6436, "step": 12504 }, { "epoch": 0.9462373727819606, "grad_norm": 1.9400218725204468, "learning_rate": 1.416843590188528e-07, "loss": 0.7202, "step": 12505 }, { "epoch": 0.9463130415042942, "grad_norm": 3.4818761348724365, "learning_rate": 1.4128657601360696e-07, "loss": 0.8214, "step": 12506 }, { "epoch": 0.9463887102266278, "grad_norm": 2.120473623275757, "learning_rate": 1.4088934776964902e-07, "loss": 0.651, "step": 12507 }, { "epoch": 0.9464643789489614, "grad_norm": 2.1492528915405273, "learning_rate": 1.404926743118531e-07, "loss": 0.6341, "step": 12508 }, { "epoch": 0.9465400476712951, "grad_norm": 2.199296712875366, "learning_rate": 1.400965556650613e-07, "loss": 0.6321, "step": 12509 }, { "epoch": 0.9466157163936287, "grad_norm": 1.9306163787841797, "learning_rate": 1.3970099185407982e-07, "loss": 0.6515, "step": 12510 }, { "epoch": 0.9466913851159623, "grad_norm": 1.7523746490478516, "learning_rate": 1.393059829036788e-07, "loss": 0.672, "step": 12511 }, { "epoch": 0.946767053838296, "grad_norm": 1.741031527519226, "learning_rate": 1.3891152883859748e-07, "loss": 0.5683, "step": 12512 }, { "epoch": 0.9468427225606295, "grad_norm": 2.261603355407715, "learning_rate": 1.385176296835361e-07, "loss": 0.6403, "step": 12513 }, { "epoch": 0.9469183912829632, "grad_norm": 2.020596504211426, "learning_rate": 1.381242854631619e-07, "loss": 0.7049, "step": 12514 }, { "epoch": 0.9469940600052968, "grad_norm": 1.8669471740722656, "learning_rate": 1.3773149620210723e-07, "loss": 0.6478, "step": 12515 }, { "epoch": 0.9470697287276304, "grad_norm": 2.3542299270629883, "learning_rate": 1.3733926192497136e-07, "loss": 0.7477, "step": 12516 }, { "epoch": 0.9471453974499641, "grad_norm": 2.5550339221954346, "learning_rate": 1.3694758265631568e-07, "loss": 0.6786, "step": 12517 }, { "epoch": 0.9472210661722977, "grad_norm": 2.4507362842559814, "learning_rate": 1.3655645842066956e-07, "loss": 0.6481, "step": 12518 }, { "epoch": 0.9472967348946313, "grad_norm": 1.8866103887557983, "learning_rate": 1.3616588924252538e-07, "loss": 0.6686, "step": 12519 }, { "epoch": 0.947372403616965, "grad_norm": 2.28023624420166, "learning_rate": 1.357758751463416e-07, "loss": 0.7095, "step": 12520 }, { "epoch": 0.9474480723392985, "grad_norm": 2.679999351501465, "learning_rate": 1.3538641615654468e-07, "loss": 0.7683, "step": 12521 }, { "epoch": 0.9475237410616322, "grad_norm": 2.136809825897217, "learning_rate": 1.349975122975211e-07, "loss": 0.771, "step": 12522 }, { "epoch": 0.9475994097839658, "grad_norm": 2.2693591117858887, "learning_rate": 1.346091635936254e-07, "loss": 0.6842, "step": 12523 }, { "epoch": 0.9476750785062994, "grad_norm": 1.6307967901229858, "learning_rate": 1.3422137006917913e-07, "loss": 0.801, "step": 12524 }, { "epoch": 0.9477507472286331, "grad_norm": 2.6975574493408203, "learning_rate": 1.3383413174846582e-07, "loss": 0.6235, "step": 12525 }, { "epoch": 0.9478264159509666, "grad_norm": 2.2791359424591064, "learning_rate": 1.334474486557351e-07, "loss": 0.591, "step": 12526 }, { "epoch": 0.9479020846733003, "grad_norm": 2.0071561336517334, "learning_rate": 1.3306132081520362e-07, "loss": 0.6949, "step": 12527 }, { "epoch": 0.947977753395634, "grad_norm": 3.1164028644561768, "learning_rate": 1.32675748251052e-07, "loss": 0.6275, "step": 12528 }, { "epoch": 0.9480534221179675, "grad_norm": 2.2760443687438965, "learning_rate": 1.3229073098742496e-07, "loss": 0.7345, "step": 12529 }, { "epoch": 0.9481290908403012, "grad_norm": 3.998307228088379, "learning_rate": 1.3190626904843317e-07, "loss": 0.7405, "step": 12530 }, { "epoch": 0.9482047595626348, "grad_norm": 2.044447183609009, "learning_rate": 1.315223624581544e-07, "loss": 0.6031, "step": 12531 }, { "epoch": 0.9482804282849684, "grad_norm": 3.347496509552002, "learning_rate": 1.3113901124063045e-07, "loss": 0.7423, "step": 12532 }, { "epoch": 0.9483560970073021, "grad_norm": 1.8583823442459106, "learning_rate": 1.3075621541986605e-07, "loss": 0.6394, "step": 12533 }, { "epoch": 0.9484317657296356, "grad_norm": 2.4121158123016357, "learning_rate": 1.3037397501983406e-07, "loss": 0.6032, "step": 12534 }, { "epoch": 0.9485074344519693, "grad_norm": 2.290224313735962, "learning_rate": 1.2999229006447134e-07, "loss": 0.6019, "step": 12535 }, { "epoch": 0.9485831031743029, "grad_norm": 2.389235019683838, "learning_rate": 1.2961116057768074e-07, "loss": 0.7305, "step": 12536 }, { "epoch": 0.9486587718966365, "grad_norm": 2.0536439418792725, "learning_rate": 1.292305865833292e-07, "loss": 0.5639, "step": 12537 }, { "epoch": 0.9487344406189702, "grad_norm": 2.742306709289551, "learning_rate": 1.2885056810525063e-07, "loss": 0.7101, "step": 12538 }, { "epoch": 0.9488101093413037, "grad_norm": 1.4020804166793823, "learning_rate": 1.2847110516724202e-07, "loss": 0.8055, "step": 12539 }, { "epoch": 0.9488857780636374, "grad_norm": 1.8928502798080444, "learning_rate": 1.2809219779306735e-07, "loss": 0.4833, "step": 12540 }, { "epoch": 0.948961446785971, "grad_norm": 1.8035271167755127, "learning_rate": 1.2771384600645264e-07, "loss": 0.727, "step": 12541 }, { "epoch": 0.9490371155083046, "grad_norm": 2.05291485786438, "learning_rate": 1.2733604983109493e-07, "loss": 0.6463, "step": 12542 }, { "epoch": 0.9491127842306383, "grad_norm": 2.0966427326202393, "learning_rate": 1.269588092906513e-07, "loss": 0.5246, "step": 12543 }, { "epoch": 0.9491884529529719, "grad_norm": 1.9400415420532227, "learning_rate": 1.2658212440874585e-07, "loss": 0.5948, "step": 12544 }, { "epoch": 0.9492641216753055, "grad_norm": 2.1425304412841797, "learning_rate": 1.262059952089677e-07, "loss": 0.5876, "step": 12545 }, { "epoch": 0.9493397903976392, "grad_norm": 2.972198724746704, "learning_rate": 1.2583042171487103e-07, "loss": 0.6889, "step": 12546 }, { "epoch": 0.9494154591199727, "grad_norm": 1.892687201499939, "learning_rate": 1.25455403949976e-07, "loss": 0.6808, "step": 12547 }, { "epoch": 0.9494911278423064, "grad_norm": 2.389648914337158, "learning_rate": 1.2508094193776786e-07, "loss": 0.7109, "step": 12548 }, { "epoch": 0.94956679656464, "grad_norm": 2.40558123588562, "learning_rate": 1.2470703570169583e-07, "loss": 0.6682, "step": 12549 }, { "epoch": 0.9496424652869736, "grad_norm": 5.251361846923828, "learning_rate": 1.2433368526517619e-07, "loss": 0.7605, "step": 12550 }, { "epoch": 0.9497181340093073, "grad_norm": 1.9533052444458008, "learning_rate": 1.2396089065158722e-07, "loss": 0.619, "step": 12551 }, { "epoch": 0.9497938027316408, "grad_norm": 2.2325778007507324, "learning_rate": 1.2358865188427626e-07, "loss": 0.707, "step": 12552 }, { "epoch": 0.9498694714539745, "grad_norm": 2.874405860900879, "learning_rate": 1.2321696898655465e-07, "loss": 0.6691, "step": 12553 }, { "epoch": 0.9499451401763082, "grad_norm": 6.6497697830200195, "learning_rate": 1.228458419816968e-07, "loss": 0.7334, "step": 12554 }, { "epoch": 0.9500208088986417, "grad_norm": 2.4262197017669678, "learning_rate": 1.2247527089294408e-07, "loss": 0.7098, "step": 12555 }, { "epoch": 0.9500964776209754, "grad_norm": 2.1515469551086426, "learning_rate": 1.2210525574350296e-07, "loss": 0.5938, "step": 12556 }, { "epoch": 0.950172146343309, "grad_norm": 2.0948853492736816, "learning_rate": 1.2173579655654686e-07, "loss": 0.7028, "step": 12557 }, { "epoch": 0.9502478150656426, "grad_norm": 2.3338329792022705, "learning_rate": 1.2136689335521035e-07, "loss": 0.7445, "step": 12558 }, { "epoch": 0.9503234837879763, "grad_norm": 1.8340742588043213, "learning_rate": 1.2099854616259587e-07, "loss": 0.6667, "step": 12559 }, { "epoch": 0.9503991525103098, "grad_norm": 3.241755962371826, "learning_rate": 1.2063075500177e-07, "loss": 0.6357, "step": 12560 }, { "epoch": 0.9504748212326435, "grad_norm": 2.4523792266845703, "learning_rate": 1.2026351989576633e-07, "loss": 0.5422, "step": 12561 }, { "epoch": 0.9505504899549772, "grad_norm": 2.439547538757324, "learning_rate": 1.1989684086758147e-07, "loss": 0.7088, "step": 12562 }, { "epoch": 0.9506261586773107, "grad_norm": 2.1670706272125244, "learning_rate": 1.19530717940178e-07, "loss": 0.7398, "step": 12563 }, { "epoch": 0.9507018273996444, "grad_norm": 3.0719752311706543, "learning_rate": 1.1916515113648463e-07, "loss": 0.7488, "step": 12564 }, { "epoch": 0.9507774961219779, "grad_norm": 4.41519832611084, "learning_rate": 1.1880014047939302e-07, "loss": 0.58, "step": 12565 }, { "epoch": 0.9508531648443116, "grad_norm": 2.505060911178589, "learning_rate": 1.1843568599176091e-07, "loss": 0.6799, "step": 12566 }, { "epoch": 0.9509288335666453, "grad_norm": 2.1886472702026367, "learning_rate": 1.1807178769641402e-07, "loss": 0.6989, "step": 12567 }, { "epoch": 0.9510045022889788, "grad_norm": 2.5705251693725586, "learning_rate": 1.1770844561613913e-07, "loss": 0.6411, "step": 12568 }, { "epoch": 0.9510801710113125, "grad_norm": 2.1216318607330322, "learning_rate": 1.1734565977369005e-07, "loss": 0.6649, "step": 12569 }, { "epoch": 0.9511558397336461, "grad_norm": 2.3023529052734375, "learning_rate": 1.1698343019178559e-07, "loss": 0.6896, "step": 12570 }, { "epoch": 0.9512315084559797, "grad_norm": 2.0217092037200928, "learning_rate": 1.166217568931096e-07, "loss": 0.4953, "step": 12571 }, { "epoch": 0.9513071771783134, "grad_norm": 2.0048747062683105, "learning_rate": 1.1626063990031199e-07, "loss": 0.5621, "step": 12572 }, { "epoch": 0.9513828459006469, "grad_norm": 2.298532009124756, "learning_rate": 1.1590007923600665e-07, "loss": 0.733, "step": 12573 }, { "epoch": 0.9514585146229806, "grad_norm": 2.250257730484009, "learning_rate": 1.1554007492277252e-07, "loss": 0.7155, "step": 12574 }, { "epoch": 0.9515341833453143, "grad_norm": 2.662155866622925, "learning_rate": 1.1518062698315557e-07, "loss": 0.608, "step": 12575 }, { "epoch": 0.9516098520676478, "grad_norm": 2.0141024589538574, "learning_rate": 1.1482173543966479e-07, "loss": 0.5917, "step": 12576 }, { "epoch": 0.9516855207899815, "grad_norm": 7.37333345413208, "learning_rate": 1.144634003147742e-07, "loss": 0.8291, "step": 12577 }, { "epoch": 0.951761189512315, "grad_norm": 2.1229794025421143, "learning_rate": 1.1410562163092486e-07, "loss": 0.5582, "step": 12578 }, { "epoch": 0.9518368582346487, "grad_norm": 2.4636754989624023, "learning_rate": 1.1374839941052284e-07, "loss": 0.8073, "step": 12579 }, { "epoch": 0.9519125269569824, "grad_norm": 2.854630947113037, "learning_rate": 1.1339173367593725e-07, "loss": 0.5689, "step": 12580 }, { "epoch": 0.9519881956793159, "grad_norm": 1.9160290956497192, "learning_rate": 1.1303562444950321e-07, "loss": 0.5861, "step": 12581 }, { "epoch": 0.9520638644016496, "grad_norm": 2.2296550273895264, "learning_rate": 1.1268007175352291e-07, "loss": 0.6746, "step": 12582 }, { "epoch": 0.9521395331239833, "grad_norm": 2.0400097370147705, "learning_rate": 1.123250756102625e-07, "loss": 0.7203, "step": 12583 }, { "epoch": 0.9522152018463168, "grad_norm": 1.9765279293060303, "learning_rate": 1.1197063604195123e-07, "loss": 0.589, "step": 12584 }, { "epoch": 0.9522908705686505, "grad_norm": 2.517303228378296, "learning_rate": 1.1161675307078534e-07, "loss": 0.741, "step": 12585 }, { "epoch": 0.952366539290984, "grad_norm": 2.5748841762542725, "learning_rate": 1.1126342671892908e-07, "loss": 0.6368, "step": 12586 }, { "epoch": 0.9524422080133177, "grad_norm": 1.8756376504898071, "learning_rate": 1.1091065700850378e-07, "loss": 0.6526, "step": 12587 }, { "epoch": 0.9525178767356514, "grad_norm": 3.596529960632324, "learning_rate": 1.1055844396160574e-07, "loss": 0.5991, "step": 12588 }, { "epoch": 0.9525935454579849, "grad_norm": 6.150516033172607, "learning_rate": 1.1020678760029035e-07, "loss": 0.6345, "step": 12589 }, { "epoch": 0.9526692141803186, "grad_norm": 2.0023791790008545, "learning_rate": 1.0985568794657797e-07, "loss": 0.5204, "step": 12590 }, { "epoch": 0.9527448829026521, "grad_norm": 1.9689321517944336, "learning_rate": 1.0950514502245701e-07, "loss": 0.666, "step": 12591 }, { "epoch": 0.9528205516249858, "grad_norm": 2.2433865070343018, "learning_rate": 1.0915515884987892e-07, "loss": 0.6823, "step": 12592 }, { "epoch": 0.9528962203473195, "grad_norm": 2.0992069244384766, "learning_rate": 1.0880572945076217e-07, "loss": 0.7694, "step": 12593 }, { "epoch": 0.952971889069653, "grad_norm": 3.2551393508911133, "learning_rate": 1.0845685684698726e-07, "loss": 0.6114, "step": 12594 }, { "epoch": 0.9530475577919867, "grad_norm": 2.008338212966919, "learning_rate": 1.0810854106040268e-07, "loss": 0.5985, "step": 12595 }, { "epoch": 0.9531232265143204, "grad_norm": 2.3150219917297363, "learning_rate": 1.0776078211282203e-07, "loss": 0.7655, "step": 12596 }, { "epoch": 0.9531988952366539, "grad_norm": 2.6294565200805664, "learning_rate": 1.0741358002602086e-07, "loss": 0.6856, "step": 12597 }, { "epoch": 0.9532745639589876, "grad_norm": 1.913138747215271, "learning_rate": 1.0706693482174479e-07, "loss": 0.597, "step": 12598 }, { "epoch": 0.9533502326813211, "grad_norm": 2.5744075775146484, "learning_rate": 1.0672084652169944e-07, "loss": 0.8647, "step": 12599 }, { "epoch": 0.9534259014036548, "grad_norm": 1.7659814357757568, "learning_rate": 1.0637531514756049e-07, "loss": 0.6648, "step": 12600 }, { "epoch": 0.9535015701259885, "grad_norm": 1.7846674919128418, "learning_rate": 1.0603034072096363e-07, "loss": 0.8073, "step": 12601 }, { "epoch": 0.953577238848322, "grad_norm": 2.20290207862854, "learning_rate": 1.0568592326351257e-07, "loss": 0.7963, "step": 12602 }, { "epoch": 0.9536529075706557, "grad_norm": 1.7884464263916016, "learning_rate": 1.0534206279677904e-07, "loss": 0.6471, "step": 12603 }, { "epoch": 0.9537285762929892, "grad_norm": 2.2755014896392822, "learning_rate": 1.0499875934229286e-07, "loss": 0.6275, "step": 12604 }, { "epoch": 0.9538042450153229, "grad_norm": 1.9066975116729736, "learning_rate": 1.046560129215538e-07, "loss": 0.6213, "step": 12605 }, { "epoch": 0.9538799137376566, "grad_norm": 3.067195177078247, "learning_rate": 1.043138235560267e-07, "loss": 0.6876, "step": 12606 }, { "epoch": 0.9539555824599901, "grad_norm": 2.0356249809265137, "learning_rate": 1.0397219126714042e-07, "loss": 0.6169, "step": 12607 }, { "epoch": 0.9540312511823238, "grad_norm": 2.0770580768585205, "learning_rate": 1.0363111607628884e-07, "loss": 0.7419, "step": 12608 }, { "epoch": 0.9541069199046575, "grad_norm": 3.184155225753784, "learning_rate": 1.0329059800483087e-07, "loss": 0.7008, "step": 12609 }, { "epoch": 0.954182588626991, "grad_norm": 1.850549578666687, "learning_rate": 1.0295063707409147e-07, "loss": 0.6739, "step": 12610 }, { "epoch": 0.9542582573493247, "grad_norm": 2.2812001705169678, "learning_rate": 1.026112333053596e-07, "loss": 0.6861, "step": 12611 }, { "epoch": 0.9543339260716582, "grad_norm": 2.713418960571289, "learning_rate": 1.0227238671988925e-07, "loss": 0.5918, "step": 12612 }, { "epoch": 0.9544095947939919, "grad_norm": 4.540587902069092, "learning_rate": 1.0193409733890147e-07, "loss": 0.6968, "step": 12613 }, { "epoch": 0.9544852635163256, "grad_norm": 6.917119979858398, "learning_rate": 1.0159636518358029e-07, "loss": 0.6452, "step": 12614 }, { "epoch": 0.9545609322386591, "grad_norm": 1.9576506614685059, "learning_rate": 1.012591902750758e-07, "loss": 0.7493, "step": 12615 }, { "epoch": 0.9546366009609928, "grad_norm": 2.1393682956695557, "learning_rate": 1.009225726345021e-07, "loss": 0.7141, "step": 12616 }, { "epoch": 0.9547122696833263, "grad_norm": 2.726297378540039, "learning_rate": 1.0058651228294036e-07, "loss": 0.697, "step": 12617 }, { "epoch": 0.95478793840566, "grad_norm": 2.0172853469848633, "learning_rate": 1.0025100924143571e-07, "loss": 0.6984, "step": 12618 }, { "epoch": 0.9548636071279937, "grad_norm": 2.437422752380371, "learning_rate": 9.991606353099836e-08, "loss": 0.7801, "step": 12619 }, { "epoch": 0.9549392758503272, "grad_norm": 2.639605760574341, "learning_rate": 9.958167517260252e-08, "loss": 0.6264, "step": 12620 }, { "epoch": 0.9550149445726609, "grad_norm": 2.6709601879119873, "learning_rate": 9.924784418719146e-08, "loss": 0.7681, "step": 12621 }, { "epoch": 0.9550906132949946, "grad_norm": 1.9807193279266357, "learning_rate": 9.891457059566745e-08, "loss": 0.6269, "step": 12622 }, { "epoch": 0.9551662820173281, "grad_norm": 2.4436280727386475, "learning_rate": 9.858185441890177e-08, "loss": 0.5812, "step": 12623 }, { "epoch": 0.9552419507396618, "grad_norm": 2.2791218757629395, "learning_rate": 9.824969567773278e-08, "loss": 0.6978, "step": 12624 }, { "epoch": 0.9553176194619953, "grad_norm": 1.9470678567886353, "learning_rate": 9.791809439295885e-08, "loss": 0.8761, "step": 12625 }, { "epoch": 0.955393288184329, "grad_norm": 2.1823413372039795, "learning_rate": 9.758705058534634e-08, "loss": 0.5794, "step": 12626 }, { "epoch": 0.9554689569066627, "grad_norm": 2.474637508392334, "learning_rate": 9.725656427562769e-08, "loss": 0.6105, "step": 12627 }, { "epoch": 0.9555446256289962, "grad_norm": 2.545193910598755, "learning_rate": 9.692663548449732e-08, "loss": 0.6524, "step": 12628 }, { "epoch": 0.9556202943513299, "grad_norm": 1.8600281476974487, "learning_rate": 9.659726423261672e-08, "loss": 0.594, "step": 12629 }, { "epoch": 0.9556959630736634, "grad_norm": 1.867389440536499, "learning_rate": 9.626845054061239e-08, "loss": 0.624, "step": 12630 }, { "epoch": 0.9557716317959971, "grad_norm": 1.7528537511825562, "learning_rate": 9.594019442907686e-08, "loss": 0.5813, "step": 12631 }, { "epoch": 0.9558473005183308, "grad_norm": 4.033431053161621, "learning_rate": 9.561249591856569e-08, "loss": 0.6462, "step": 12632 }, { "epoch": 0.9559229692406643, "grad_norm": 2.031341075897217, "learning_rate": 9.528535502959845e-08, "loss": 0.6628, "step": 12633 }, { "epoch": 0.955998637962998, "grad_norm": 2.2305564880371094, "learning_rate": 9.495877178266477e-08, "loss": 0.6545, "step": 12634 }, { "epoch": 0.9560743066853317, "grad_norm": 1.8292995691299438, "learning_rate": 9.463274619821627e-08, "loss": 0.5837, "step": 12635 }, { "epoch": 0.9561499754076652, "grad_norm": 1.8190289735794067, "learning_rate": 9.430727829666763e-08, "loss": 0.6024, "step": 12636 }, { "epoch": 0.9562256441299989, "grad_norm": 1.8149267435073853, "learning_rate": 9.398236809840155e-08, "loss": 0.7985, "step": 12637 }, { "epoch": 0.9563013128523324, "grad_norm": 2.005405902862549, "learning_rate": 9.365801562376474e-08, "loss": 0.7324, "step": 12638 }, { "epoch": 0.9563769815746661, "grad_norm": 3.069957971572876, "learning_rate": 9.333422089307097e-08, "loss": 0.5228, "step": 12639 }, { "epoch": 0.9564526502969998, "grad_norm": 2.064141035079956, "learning_rate": 9.301098392659502e-08, "loss": 0.7289, "step": 12640 }, { "epoch": 0.9565283190193333, "grad_norm": 1.8948678970336914, "learning_rate": 9.268830474457967e-08, "loss": 0.6597, "step": 12641 }, { "epoch": 0.956603987741667, "grad_norm": 2.4123117923736572, "learning_rate": 9.236618336723379e-08, "loss": 0.7717, "step": 12642 }, { "epoch": 0.9566796564640007, "grad_norm": 2.754319667816162, "learning_rate": 9.204461981472623e-08, "loss": 0.5993, "step": 12643 }, { "epoch": 0.9567553251863342, "grad_norm": 2.069659471511841, "learning_rate": 9.172361410719787e-08, "loss": 0.692, "step": 12644 }, { "epoch": 0.9568309939086679, "grad_norm": 2.8982131481170654, "learning_rate": 9.140316626474865e-08, "loss": 0.6309, "step": 12645 }, { "epoch": 0.9569066626310014, "grad_norm": 2.2474629878997803, "learning_rate": 9.10832763074485e-08, "loss": 0.5228, "step": 12646 }, { "epoch": 0.9569823313533351, "grad_norm": 3.5946133136749268, "learning_rate": 9.076394425532741e-08, "loss": 0.6798, "step": 12647 }, { "epoch": 0.9570580000756688, "grad_norm": 1.9877676963806152, "learning_rate": 9.044517012838438e-08, "loss": 0.6968, "step": 12648 }, { "epoch": 0.9571336687980023, "grad_norm": 2.1319262981414795, "learning_rate": 9.012695394658143e-08, "loss": 0.6383, "step": 12649 }, { "epoch": 0.957209337520336, "grad_norm": 2.4543004035949707, "learning_rate": 8.980929572984764e-08, "loss": 0.7055, "step": 12650 }, { "epoch": 0.9572850062426695, "grad_norm": 2.64933180809021, "learning_rate": 8.949219549807408e-08, "loss": 0.5386, "step": 12651 }, { "epoch": 0.9573606749650032, "grad_norm": 2.116211414337158, "learning_rate": 8.917565327111888e-08, "loss": 0.6279, "step": 12652 }, { "epoch": 0.9574363436873369, "grad_norm": 2.017352342605591, "learning_rate": 8.885966906880616e-08, "loss": 0.5641, "step": 12653 }, { "epoch": 0.9575120124096704, "grad_norm": 2.1703391075134277, "learning_rate": 8.854424291092311e-08, "loss": 0.7587, "step": 12654 }, { "epoch": 0.9575876811320041, "grad_norm": 2.2390897274017334, "learning_rate": 8.822937481722194e-08, "loss": 0.6269, "step": 12655 }, { "epoch": 0.9576633498543378, "grad_norm": 1.785683035850525, "learning_rate": 8.791506480742284e-08, "loss": 0.6123, "step": 12656 }, { "epoch": 0.9577390185766713, "grad_norm": 1.9447267055511475, "learning_rate": 8.76013129012061e-08, "loss": 0.6235, "step": 12657 }, { "epoch": 0.957814687299005, "grad_norm": 1.9245911836624146, "learning_rate": 8.728811911822199e-08, "loss": 0.5552, "step": 12658 }, { "epoch": 0.9578903560213385, "grad_norm": 3.584024667739868, "learning_rate": 8.697548347808281e-08, "loss": 0.8091, "step": 12659 }, { "epoch": 0.9579660247436722, "grad_norm": 2.3792314529418945, "learning_rate": 8.666340600036793e-08, "loss": 0.6981, "step": 12660 }, { "epoch": 0.9580416934660059, "grad_norm": 3.4363317489624023, "learning_rate": 8.635188670461869e-08, "loss": 0.5795, "step": 12661 }, { "epoch": 0.9581173621883394, "grad_norm": 2.3299102783203125, "learning_rate": 8.604092561034549e-08, "loss": 0.6234, "step": 12662 }, { "epoch": 0.9581930309106731, "grad_norm": 2.2737045288085938, "learning_rate": 8.573052273701975e-08, "loss": 0.7343, "step": 12663 }, { "epoch": 0.9582686996330066, "grad_norm": 2.334836483001709, "learning_rate": 8.542067810408194e-08, "loss": 0.7093, "step": 12664 }, { "epoch": 0.9583443683553403, "grad_norm": 2.3729231357574463, "learning_rate": 8.511139173093352e-08, "loss": 0.6742, "step": 12665 }, { "epoch": 0.958420037077674, "grad_norm": 2.322737693786621, "learning_rate": 8.4802663636945e-08, "loss": 0.6121, "step": 12666 }, { "epoch": 0.9584957058000075, "grad_norm": 2.7772295475006104, "learning_rate": 8.449449384144891e-08, "loss": 0.4887, "step": 12667 }, { "epoch": 0.9585713745223412, "grad_norm": 2.32209849357605, "learning_rate": 8.418688236374283e-08, "loss": 0.5629, "step": 12668 }, { "epoch": 0.9586470432446749, "grad_norm": 2.3986520767211914, "learning_rate": 8.387982922309135e-08, "loss": 0.5758, "step": 12669 }, { "epoch": 0.9587227119670084, "grad_norm": 2.683042526245117, "learning_rate": 8.357333443872406e-08, "loss": 0.7964, "step": 12670 }, { "epoch": 0.9587983806893421, "grad_norm": 3.9749975204467773, "learning_rate": 8.326739802983363e-08, "loss": 0.6979, "step": 12671 }, { "epoch": 0.9588740494116756, "grad_norm": 2.608891010284424, "learning_rate": 8.296202001557873e-08, "loss": 0.7334, "step": 12672 }, { "epoch": 0.9589497181340093, "grad_norm": 2.276517152786255, "learning_rate": 8.265720041508407e-08, "loss": 0.6777, "step": 12673 }, { "epoch": 0.959025386856343, "grad_norm": 2.6598875522613525, "learning_rate": 8.235293924743636e-08, "loss": 0.6954, "step": 12674 }, { "epoch": 0.9591010555786765, "grad_norm": 1.9157791137695312, "learning_rate": 8.204923653169139e-08, "loss": 0.5896, "step": 12675 }, { "epoch": 0.9591767243010102, "grad_norm": 2.493894100189209, "learning_rate": 8.174609228686792e-08, "loss": 0.6405, "step": 12676 }, { "epoch": 0.9592523930233438, "grad_norm": 2.365548610687256, "learning_rate": 8.144350653194877e-08, "loss": 0.6652, "step": 12677 }, { "epoch": 0.9593280617456774, "grad_norm": 1.957023024559021, "learning_rate": 8.114147928588377e-08, "loss": 0.6683, "step": 12678 }, { "epoch": 0.9594037304680111, "grad_norm": 2.2314658164978027, "learning_rate": 8.084001056758583e-08, "loss": 0.5744, "step": 12679 }, { "epoch": 0.9594793991903446, "grad_norm": 2.346789836883545, "learning_rate": 8.053910039593481e-08, "loss": 0.608, "step": 12680 }, { "epoch": 0.9595550679126783, "grad_norm": 2.3163628578186035, "learning_rate": 8.023874878977467e-08, "loss": 0.6973, "step": 12681 }, { "epoch": 0.959630736635012, "grad_norm": 2.010727643966675, "learning_rate": 7.993895576791333e-08, "loss": 0.7321, "step": 12682 }, { "epoch": 0.9597064053573455, "grad_norm": 2.24619197845459, "learning_rate": 7.963972134912578e-08, "loss": 0.8042, "step": 12683 }, { "epoch": 0.9597820740796792, "grad_norm": 2.5780961513519287, "learning_rate": 7.934104555215105e-08, "loss": 0.5879, "step": 12684 }, { "epoch": 0.9598577428020127, "grad_norm": 2.02319598197937, "learning_rate": 7.904292839569315e-08, "loss": 0.6226, "step": 12685 }, { "epoch": 0.9599334115243464, "grad_norm": 2.3072006702423096, "learning_rate": 7.874536989842018e-08, "loss": 0.742, "step": 12686 }, { "epoch": 0.9600090802466801, "grad_norm": 2.2716474533081055, "learning_rate": 7.844837007896821e-08, "loss": 0.7209, "step": 12687 }, { "epoch": 0.9600847489690136, "grad_norm": 2.065999984741211, "learning_rate": 7.815192895593437e-08, "loss": 0.6923, "step": 12688 }, { "epoch": 0.9601604176913473, "grad_norm": 2.5855307579040527, "learning_rate": 7.785604654788281e-08, "loss": 0.4902, "step": 12689 }, { "epoch": 0.9602360864136809, "grad_norm": 1.992203950881958, "learning_rate": 7.75607228733447e-08, "loss": 0.6572, "step": 12690 }, { "epoch": 0.9603117551360145, "grad_norm": 1.9567265510559082, "learning_rate": 7.726595795081226e-08, "loss": 0.6281, "step": 12691 }, { "epoch": 0.9603874238583482, "grad_norm": 1.9469960927963257, "learning_rate": 7.697175179874472e-08, "loss": 0.6364, "step": 12692 }, { "epoch": 0.9604630925806817, "grad_norm": 1.9473823308944702, "learning_rate": 7.667810443556733e-08, "loss": 0.5201, "step": 12693 }, { "epoch": 0.9605387613030154, "grad_norm": 2.243472099304199, "learning_rate": 7.638501587966839e-08, "loss": 0.569, "step": 12694 }, { "epoch": 0.9606144300253491, "grad_norm": 1.9308743476867676, "learning_rate": 7.609248614940123e-08, "loss": 0.5012, "step": 12695 }, { "epoch": 0.9606900987476826, "grad_norm": 2.4356298446655273, "learning_rate": 7.580051526308718e-08, "loss": 0.7305, "step": 12696 }, { "epoch": 0.9607657674700163, "grad_norm": 1.883434534072876, "learning_rate": 7.550910323900862e-08, "loss": 0.6199, "step": 12697 }, { "epoch": 0.9608414361923499, "grad_norm": 2.298492908477783, "learning_rate": 7.521825009541594e-08, "loss": 0.5387, "step": 12698 }, { "epoch": 0.9609171049146835, "grad_norm": 1.997653841972351, "learning_rate": 7.49279558505226e-08, "loss": 0.5514, "step": 12699 }, { "epoch": 0.9609927736370172, "grad_norm": 2.485727310180664, "learning_rate": 7.463822052250702e-08, "loss": 0.7501, "step": 12700 }, { "epoch": 0.9610684423593507, "grad_norm": 4.434056758880615, "learning_rate": 7.434904412951471e-08, "loss": 0.6913, "step": 12701 }, { "epoch": 0.9611441110816844, "grad_norm": 2.3401598930358887, "learning_rate": 7.406042668965419e-08, "loss": 0.654, "step": 12702 }, { "epoch": 0.961219779804018, "grad_norm": 1.9578860998153687, "learning_rate": 7.377236822099998e-08, "loss": 0.831, "step": 12703 }, { "epoch": 0.9612954485263516, "grad_norm": 2.5894887447357178, "learning_rate": 7.348486874159166e-08, "loss": 0.6493, "step": 12704 }, { "epoch": 0.9613711172486853, "grad_norm": 2.045994281768799, "learning_rate": 7.319792826943084e-08, "loss": 0.6242, "step": 12705 }, { "epoch": 0.9614467859710188, "grad_norm": 1.8696407079696655, "learning_rate": 7.291154682249013e-08, "loss": 0.7577, "step": 12706 }, { "epoch": 0.9615224546933525, "grad_norm": 2.9939026832580566, "learning_rate": 7.262572441870219e-08, "loss": 0.7651, "step": 12707 }, { "epoch": 0.9615981234156862, "grad_norm": 2.0975334644317627, "learning_rate": 7.234046107596471e-08, "loss": 0.6157, "step": 12708 }, { "epoch": 0.9616737921380197, "grad_norm": 2.63741397857666, "learning_rate": 7.205575681214438e-08, "loss": 0.6313, "step": 12709 }, { "epoch": 0.9617494608603534, "grad_norm": 2.186922788619995, "learning_rate": 7.177161164506795e-08, "loss": 0.622, "step": 12710 }, { "epoch": 0.961825129582687, "grad_norm": 4.426486492156982, "learning_rate": 7.14880255925312e-08, "loss": 0.7372, "step": 12711 }, { "epoch": 0.9619007983050206, "grad_norm": 2.0475969314575195, "learning_rate": 7.12049986722919e-08, "loss": 0.7598, "step": 12712 }, { "epoch": 0.9619764670273543, "grad_norm": 2.091733932495117, "learning_rate": 7.09225309020759e-08, "loss": 0.6758, "step": 12713 }, { "epoch": 0.9620521357496878, "grad_norm": 1.9092284440994263, "learning_rate": 7.064062229957102e-08, "loss": 0.6692, "step": 12714 }, { "epoch": 0.9621278044720215, "grad_norm": 2.5357465744018555, "learning_rate": 7.035927288243016e-08, "loss": 0.6938, "step": 12715 }, { "epoch": 0.9622034731943551, "grad_norm": 2.2587950229644775, "learning_rate": 7.007848266827521e-08, "loss": 0.6561, "step": 12716 }, { "epoch": 0.9622791419166887, "grad_norm": 2.3485636711120605, "learning_rate": 6.979825167468812e-08, "loss": 0.6759, "step": 12717 }, { "epoch": 0.9623548106390224, "grad_norm": 1.9571853876113892, "learning_rate": 6.951857991921783e-08, "loss": 0.6174, "step": 12718 }, { "epoch": 0.962430479361356, "grad_norm": 1.7149250507354736, "learning_rate": 6.923946741937836e-08, "loss": 0.6175, "step": 12719 }, { "epoch": 0.9625061480836896, "grad_norm": 1.5973360538482666, "learning_rate": 6.896091419264971e-08, "loss": 0.4001, "step": 12720 }, { "epoch": 0.9625818168060233, "grad_norm": 2.3491268157958984, "learning_rate": 6.868292025647494e-08, "loss": 0.5713, "step": 12721 }, { "epoch": 0.9626574855283568, "grad_norm": 2.8299319744110107, "learning_rate": 6.84054856282631e-08, "loss": 0.7462, "step": 12722 }, { "epoch": 0.9627331542506905, "grad_norm": 2.6515955924987793, "learning_rate": 6.81286103253883e-08, "loss": 0.7521, "step": 12723 }, { "epoch": 0.9628088229730241, "grad_norm": 2.534411668777466, "learning_rate": 6.785229436518969e-08, "loss": 0.5264, "step": 12724 }, { "epoch": 0.9628844916953577, "grad_norm": 2.334911823272705, "learning_rate": 6.757653776496841e-08, "loss": 0.5722, "step": 12725 }, { "epoch": 0.9629601604176914, "grad_norm": 2.0208029747009277, "learning_rate": 6.730134054199665e-08, "loss": 0.6558, "step": 12726 }, { "epoch": 0.963035829140025, "grad_norm": 2.0118119716644287, "learning_rate": 6.702670271350764e-08, "loss": 0.7271, "step": 12727 }, { "epoch": 0.9631114978623586, "grad_norm": 2.5050179958343506, "learning_rate": 6.675262429669759e-08, "loss": 0.7486, "step": 12728 }, { "epoch": 0.9631871665846922, "grad_norm": 2.230125665664673, "learning_rate": 6.64791053087328e-08, "loss": 0.6211, "step": 12729 }, { "epoch": 0.9632628353070258, "grad_norm": 2.61617112159729, "learning_rate": 6.620614576673956e-08, "loss": 0.6537, "step": 12730 }, { "epoch": 0.9633385040293595, "grad_norm": 2.5779638290405273, "learning_rate": 6.593374568781519e-08, "loss": 0.6734, "step": 12731 }, { "epoch": 0.963414172751693, "grad_norm": 2.4025802612304688, "learning_rate": 6.566190508901404e-08, "loss": 0.7389, "step": 12732 }, { "epoch": 0.9634898414740267, "grad_norm": 2.136965751647949, "learning_rate": 6.539062398736251e-08, "loss": 0.5392, "step": 12733 }, { "epoch": 0.9635655101963604, "grad_norm": 4.406826496124268, "learning_rate": 6.5119902399848e-08, "loss": 0.6547, "step": 12734 }, { "epoch": 0.9636411789186939, "grad_norm": 1.7842594385147095, "learning_rate": 6.484974034342395e-08, "loss": 0.6089, "step": 12735 }, { "epoch": 0.9637168476410276, "grad_norm": 2.642629623413086, "learning_rate": 6.458013783500882e-08, "loss": 0.7135, "step": 12736 }, { "epoch": 0.9637925163633612, "grad_norm": 2.0451202392578125, "learning_rate": 6.431109489148612e-08, "loss": 0.745, "step": 12737 }, { "epoch": 0.9638681850856948, "grad_norm": 2.547163486480713, "learning_rate": 6.404261152970437e-08, "loss": 0.5514, "step": 12738 }, { "epoch": 0.9639438538080285, "grad_norm": 2.487988233566284, "learning_rate": 6.37746877664771e-08, "loss": 0.6643, "step": 12739 }, { "epoch": 0.964019522530362, "grad_norm": 2.42266583442688, "learning_rate": 6.350732361858092e-08, "loss": 0.6916, "step": 12740 }, { "epoch": 0.9640951912526957, "grad_norm": 2.2286386489868164, "learning_rate": 6.324051910276141e-08, "loss": 0.7287, "step": 12741 }, { "epoch": 0.9641708599750293, "grad_norm": 2.0510995388031006, "learning_rate": 6.297427423572521e-08, "loss": 0.6814, "step": 12742 }, { "epoch": 0.9642465286973629, "grad_norm": 2.0005273818969727, "learning_rate": 6.2708589034146e-08, "loss": 0.6475, "step": 12743 }, { "epoch": 0.9643221974196966, "grad_norm": 1.9023462533950806, "learning_rate": 6.244346351466146e-08, "loss": 0.716, "step": 12744 }, { "epoch": 0.9643978661420302, "grad_norm": 2.4381988048553467, "learning_rate": 6.21788976938743e-08, "loss": 0.6293, "step": 12745 }, { "epoch": 0.9644735348643638, "grad_norm": 1.6925064325332642, "learning_rate": 6.191489158835328e-08, "loss": 0.5658, "step": 12746 }, { "epoch": 0.9645492035866975, "grad_norm": 2.0021846294403076, "learning_rate": 6.165144521463117e-08, "loss": 0.5507, "step": 12747 }, { "epoch": 0.964624872309031, "grad_norm": 2.283257484436035, "learning_rate": 6.138855858920577e-08, "loss": 0.8271, "step": 12748 }, { "epoch": 0.9647005410313647, "grad_norm": 2.3239376544952393, "learning_rate": 6.112623172853993e-08, "loss": 0.5572, "step": 12749 }, { "epoch": 0.9647762097536983, "grad_norm": 1.8095026016235352, "learning_rate": 6.086446464906148e-08, "loss": 0.6423, "step": 12750 }, { "epoch": 0.9648518784760319, "grad_norm": 2.217108964920044, "learning_rate": 6.060325736716133e-08, "loss": 0.6364, "step": 12751 }, { "epoch": 0.9649275471983656, "grad_norm": 2.219142436981201, "learning_rate": 6.034260989920037e-08, "loss": 0.7133, "step": 12752 }, { "epoch": 0.9650032159206992, "grad_norm": 2.773773670196533, "learning_rate": 6.008252226149957e-08, "loss": 0.5822, "step": 12753 }, { "epoch": 0.9650788846430328, "grad_norm": 2.4652557373046875, "learning_rate": 5.982299447034589e-08, "loss": 0.6448, "step": 12754 }, { "epoch": 0.9651545533653664, "grad_norm": 1.8013569116592407, "learning_rate": 5.9564026541992333e-08, "loss": 0.4925, "step": 12755 }, { "epoch": 0.9652302220877, "grad_norm": 2.425915241241455, "learning_rate": 5.930561849265592e-08, "loss": 0.6024, "step": 12756 }, { "epoch": 0.9653058908100337, "grad_norm": 2.936739921569824, "learning_rate": 5.9047770338520714e-08, "loss": 0.7074, "step": 12757 }, { "epoch": 0.9653815595323673, "grad_norm": 1.838965892791748, "learning_rate": 5.879048209573079e-08, "loss": 0.7531, "step": 12758 }, { "epoch": 0.9654572282547009, "grad_norm": 2.287705421447754, "learning_rate": 5.853375378040227e-08, "loss": 0.6203, "step": 12759 }, { "epoch": 0.9655328969770346, "grad_norm": 2.7539687156677246, "learning_rate": 5.827758540860928e-08, "loss": 0.5758, "step": 12760 }, { "epoch": 0.9656085656993681, "grad_norm": 1.8179734945297241, "learning_rate": 5.8021976996394e-08, "loss": 0.5622, "step": 12761 }, { "epoch": 0.9656842344217018, "grad_norm": 2.9774842262268066, "learning_rate": 5.776692855976562e-08, "loss": 0.7241, "step": 12762 }, { "epoch": 0.9657599031440354, "grad_norm": 1.650606632232666, "learning_rate": 5.751244011469536e-08, "loss": 0.743, "step": 12763 }, { "epoch": 0.965835571866369, "grad_norm": 2.1129419803619385, "learning_rate": 5.7258511677118485e-08, "loss": 0.6685, "step": 12764 }, { "epoch": 0.9659112405887027, "grad_norm": 3.256843328475952, "learning_rate": 5.7005143262938266e-08, "loss": 0.6937, "step": 12765 }, { "epoch": 0.9659869093110363, "grad_norm": 2.002124309539795, "learning_rate": 5.675233488802101e-08, "loss": 0.7681, "step": 12766 }, { "epoch": 0.9660625780333699, "grad_norm": 2.2396044731140137, "learning_rate": 5.650008656819905e-08, "loss": 0.5746, "step": 12767 }, { "epoch": 0.9661382467557035, "grad_norm": 2.126986503601074, "learning_rate": 5.624839831926776e-08, "loss": 0.6165, "step": 12768 }, { "epoch": 0.9662139154780371, "grad_norm": 1.8271424770355225, "learning_rate": 5.5997270156989525e-08, "loss": 0.6282, "step": 12769 }, { "epoch": 0.9662895842003708, "grad_norm": 2.457106828689575, "learning_rate": 5.574670209709176e-08, "loss": 0.6688, "step": 12770 }, { "epoch": 0.9663652529227044, "grad_norm": 2.185253381729126, "learning_rate": 5.5496694155262925e-08, "loss": 0.677, "step": 12771 }, { "epoch": 0.966440921645038, "grad_norm": 1.8739707469940186, "learning_rate": 5.524724634716149e-08, "loss": 0.6415, "step": 12772 }, { "epoch": 0.9665165903673717, "grad_norm": 2.169405221939087, "learning_rate": 5.499835868840997e-08, "loss": 0.5788, "step": 12773 }, { "epoch": 0.9665922590897053, "grad_norm": 2.6439480781555176, "learning_rate": 5.4750031194590875e-08, "loss": 0.6599, "step": 12774 }, { "epoch": 0.9666679278120389, "grad_norm": 2.3185319900512695, "learning_rate": 5.4502263881258784e-08, "loss": 0.6413, "step": 12775 }, { "epoch": 0.9667435965343725, "grad_norm": 1.7796145677566528, "learning_rate": 5.425505676392728e-08, "loss": 0.601, "step": 12776 }, { "epoch": 0.9668192652567061, "grad_norm": 1.9893208742141724, "learning_rate": 5.4008409858077977e-08, "loss": 0.6446, "step": 12777 }, { "epoch": 0.9668949339790398, "grad_norm": 2.2175159454345703, "learning_rate": 5.376232317915752e-08, "loss": 0.6573, "step": 12778 }, { "epoch": 0.9669706027013734, "grad_norm": 3.3002736568450928, "learning_rate": 5.351679674257559e-08, "loss": 0.6405, "step": 12779 }, { "epoch": 0.967046271423707, "grad_norm": 2.012585401535034, "learning_rate": 5.327183056370888e-08, "loss": 0.6775, "step": 12780 }, { "epoch": 0.9671219401460406, "grad_norm": 2.179703712463379, "learning_rate": 5.302742465789712e-08, "loss": 0.5052, "step": 12781 }, { "epoch": 0.9671976088683742, "grad_norm": 2.5398223400115967, "learning_rate": 5.278357904044606e-08, "loss": 0.7536, "step": 12782 }, { "epoch": 0.9672732775907079, "grad_norm": 2.5192582607269287, "learning_rate": 5.2540293726625497e-08, "loss": 0.7409, "step": 12783 }, { "epoch": 0.9673489463130415, "grad_norm": 2.220503091812134, "learning_rate": 5.229756873167224e-08, "loss": 0.6389, "step": 12784 }, { "epoch": 0.9674246150353751, "grad_norm": 2.697296142578125, "learning_rate": 5.205540407078513e-08, "loss": 0.77, "step": 12785 }, { "epoch": 0.9675002837577088, "grad_norm": 2.3187201023101807, "learning_rate": 5.1813799759130034e-08, "loss": 0.6236, "step": 12786 }, { "epoch": 0.9675759524800424, "grad_norm": 2.0090014934539795, "learning_rate": 5.157275581183585e-08, "loss": 0.6874, "step": 12787 }, { "epoch": 0.967651621202376, "grad_norm": 1.9001106023788452, "learning_rate": 5.13322722439995e-08, "loss": 0.6184, "step": 12788 }, { "epoch": 0.9677272899247096, "grad_norm": 2.549781560897827, "learning_rate": 5.1092349070678944e-08, "loss": 0.6552, "step": 12789 }, { "epoch": 0.9678029586470432, "grad_norm": 2.1971235275268555, "learning_rate": 5.085298630690016e-08, "loss": 0.6244, "step": 12790 }, { "epoch": 0.9678786273693769, "grad_norm": 3.237020492553711, "learning_rate": 5.061418396765316e-08, "loss": 0.7338, "step": 12791 }, { "epoch": 0.9679542960917105, "grad_norm": 2.797713279724121, "learning_rate": 5.0375942067890976e-08, "loss": 0.7075, "step": 12792 }, { "epoch": 0.9680299648140441, "grad_norm": 2.096550941467285, "learning_rate": 5.013826062253368e-08, "loss": 0.5549, "step": 12793 }, { "epoch": 0.9681056335363777, "grad_norm": 1.996402621269226, "learning_rate": 4.9901139646466364e-08, "loss": 0.6456, "step": 12794 }, { "epoch": 0.9681813022587114, "grad_norm": 1.7945371866226196, "learning_rate": 4.966457915453815e-08, "loss": 0.5903, "step": 12795 }, { "epoch": 0.968256970981045, "grad_norm": 2.276614189147949, "learning_rate": 4.9428579161562184e-08, "loss": 0.7016, "step": 12796 }, { "epoch": 0.9683326397033786, "grad_norm": 2.1116700172424316, "learning_rate": 4.919313968231765e-08, "loss": 0.6455, "step": 12797 }, { "epoch": 0.9684083084257122, "grad_norm": 2.1539816856384277, "learning_rate": 4.895826073155074e-08, "loss": 0.7131, "step": 12798 }, { "epoch": 0.9684839771480459, "grad_norm": 2.0993354320526123, "learning_rate": 4.872394232396771e-08, "loss": 0.6818, "step": 12799 }, { "epoch": 0.9685596458703795, "grad_norm": 2.5427918434143066, "learning_rate": 4.8490184474243806e-08, "loss": 0.472, "step": 12800 }, { "epoch": 0.9686353145927131, "grad_norm": 2.007993459701538, "learning_rate": 4.825698719701632e-08, "loss": 0.6107, "step": 12801 }, { "epoch": 0.9687109833150467, "grad_norm": 2.1347367763519287, "learning_rate": 4.802435050689058e-08, "loss": 0.7914, "step": 12802 }, { "epoch": 0.9687866520373803, "grad_norm": 2.216740846633911, "learning_rate": 4.779227441843392e-08, "loss": 0.702, "step": 12803 }, { "epoch": 0.968862320759714, "grad_norm": 2.9680533409118652, "learning_rate": 4.756075894618073e-08, "loss": 0.6582, "step": 12804 }, { "epoch": 0.9689379894820476, "grad_norm": 2.0287156105041504, "learning_rate": 4.7329804104627394e-08, "loss": 0.6694, "step": 12805 }, { "epoch": 0.9690136582043812, "grad_norm": 2.646369695663452, "learning_rate": 4.7099409908239355e-08, "loss": 0.6483, "step": 12806 }, { "epoch": 0.9690893269267148, "grad_norm": 2.547806978225708, "learning_rate": 4.686957637144207e-08, "loss": 0.5989, "step": 12807 }, { "epoch": 0.9691649956490485, "grad_norm": 2.441589832305908, "learning_rate": 4.664030350863102e-08, "loss": 0.75, "step": 12808 }, { "epoch": 0.9692406643713821, "grad_norm": 2.0199062824249268, "learning_rate": 4.641159133416273e-08, "loss": 0.6601, "step": 12809 }, { "epoch": 0.9693163330937157, "grad_norm": 2.6701488494873047, "learning_rate": 4.618343986235973e-08, "loss": 0.7961, "step": 12810 }, { "epoch": 0.9693920018160493, "grad_norm": 2.2760307788848877, "learning_rate": 4.5955849107509603e-08, "loss": 0.5234, "step": 12811 }, { "epoch": 0.969467670538383, "grad_norm": 2.0577778816223145, "learning_rate": 4.572881908386495e-08, "loss": 0.5675, "step": 12812 }, { "epoch": 0.9695433392607166, "grad_norm": 2.3686912059783936, "learning_rate": 4.5502349805643385e-08, "loss": 0.6366, "step": 12813 }, { "epoch": 0.9696190079830502, "grad_norm": 2.451944351196289, "learning_rate": 4.527644128702757e-08, "loss": 0.7198, "step": 12814 }, { "epoch": 0.9696946767053838, "grad_norm": 2.4918670654296875, "learning_rate": 4.505109354216419e-08, "loss": 0.5377, "step": 12815 }, { "epoch": 0.9697703454277175, "grad_norm": 1.964171051979065, "learning_rate": 4.4826306585164955e-08, "loss": 0.6491, "step": 12816 }, { "epoch": 0.9698460141500511, "grad_norm": 2.240342855453491, "learning_rate": 4.4602080430106605e-08, "loss": 0.7502, "step": 12817 }, { "epoch": 0.9699216828723847, "grad_norm": 2.0190277099609375, "learning_rate": 4.437841509103091e-08, "loss": 0.6771, "step": 12818 }, { "epoch": 0.9699973515947183, "grad_norm": 2.1004180908203125, "learning_rate": 4.415531058194566e-08, "loss": 0.675, "step": 12819 }, { "epoch": 0.9700730203170519, "grad_norm": 2.711425304412842, "learning_rate": 4.3932766916821684e-08, "loss": 0.6182, "step": 12820 }, { "epoch": 0.9701486890393856, "grad_norm": 2.490840435028076, "learning_rate": 4.371078410959484e-08, "loss": 0.6801, "step": 12821 }, { "epoch": 0.9702243577617192, "grad_norm": 2.158508777618408, "learning_rate": 4.348936217416599e-08, "loss": 0.6393, "step": 12822 }, { "epoch": 0.9703000264840528, "grad_norm": 2.2075629234313965, "learning_rate": 4.326850112440306e-08, "loss": 0.5681, "step": 12823 }, { "epoch": 0.9703756952063864, "grad_norm": 2.199986457824707, "learning_rate": 4.304820097413698e-08, "loss": 0.7375, "step": 12824 }, { "epoch": 0.9704513639287201, "grad_norm": 2.364950180053711, "learning_rate": 4.2828461737161706e-08, "loss": 0.6709, "step": 12825 }, { "epoch": 0.9705270326510537, "grad_norm": 2.256744861602783, "learning_rate": 4.2609283427239245e-08, "loss": 0.4733, "step": 12826 }, { "epoch": 0.9706027013733873, "grad_norm": 2.7820794582366943, "learning_rate": 4.2390666058095606e-08, "loss": 0.7549, "step": 12827 }, { "epoch": 0.9706783700957209, "grad_norm": 2.726006507873535, "learning_rate": 4.2172609643420846e-08, "loss": 0.8422, "step": 12828 }, { "epoch": 0.9707540388180546, "grad_norm": 2.408484697341919, "learning_rate": 4.1955114196870035e-08, "loss": 0.6995, "step": 12829 }, { "epoch": 0.9708297075403882, "grad_norm": 1.8652710914611816, "learning_rate": 4.1738179732064286e-08, "loss": 0.6074, "step": 12830 }, { "epoch": 0.9709053762627218, "grad_norm": 1.773630976676941, "learning_rate": 4.152180626258772e-08, "loss": 0.7438, "step": 12831 }, { "epoch": 0.9709810449850554, "grad_norm": 1.8957676887512207, "learning_rate": 4.1305993801991514e-08, "loss": 0.6815, "step": 12832 }, { "epoch": 0.971056713707389, "grad_norm": 2.1224136352539062, "learning_rate": 4.109074236378885e-08, "loss": 0.5116, "step": 12833 }, { "epoch": 0.9711323824297227, "grad_norm": 2.1865241527557373, "learning_rate": 4.087605196146094e-08, "loss": 0.6051, "step": 12834 }, { "epoch": 0.9712080511520563, "grad_norm": 2.5317890644073486, "learning_rate": 4.066192260845303e-08, "loss": 0.6102, "step": 12835 }, { "epoch": 0.9712837198743899, "grad_norm": 2.466912269592285, "learning_rate": 4.0448354318172395e-08, "loss": 0.6871, "step": 12836 }, { "epoch": 0.9713593885967235, "grad_norm": 2.1204843521118164, "learning_rate": 4.023534710399435e-08, "loss": 0.5491, "step": 12837 }, { "epoch": 0.9714350573190572, "grad_norm": 2.1326029300689697, "learning_rate": 4.0022900979259206e-08, "loss": 0.549, "step": 12838 }, { "epoch": 0.9715107260413908, "grad_norm": 2.1497695446014404, "learning_rate": 3.981101595726933e-08, "loss": 0.6592, "step": 12839 }, { "epoch": 0.9715863947637244, "grad_norm": 1.952813744544983, "learning_rate": 3.95996920512951e-08, "loss": 0.6218, "step": 12840 }, { "epoch": 0.971662063486058, "grad_norm": 2.6534552574157715, "learning_rate": 3.938892927456994e-08, "loss": 0.6288, "step": 12841 }, { "epoch": 0.9717377322083917, "grad_norm": 1.9842441082000732, "learning_rate": 3.917872764029129e-08, "loss": 0.6466, "step": 12842 }, { "epoch": 0.9718134009307253, "grad_norm": 2.3986659049987793, "learning_rate": 3.8969087161622616e-08, "loss": 0.6197, "step": 12843 }, { "epoch": 0.9718890696530589, "grad_norm": 2.6465704441070557, "learning_rate": 3.8760007851695423e-08, "loss": 0.7109, "step": 12844 }, { "epoch": 0.9719647383753925, "grad_norm": 2.349898338317871, "learning_rate": 3.855148972359923e-08, "loss": 0.6664, "step": 12845 }, { "epoch": 0.9720404070977261, "grad_norm": 2.368164300918579, "learning_rate": 3.83435327903936e-08, "loss": 0.5955, "step": 12846 }, { "epoch": 0.9721160758200598, "grad_norm": 2.0866646766662598, "learning_rate": 3.8136137065102104e-08, "loss": 0.6821, "step": 12847 }, { "epoch": 0.9721917445423934, "grad_norm": 2.3973708152770996, "learning_rate": 3.7929302560711365e-08, "loss": 0.6995, "step": 12848 }, { "epoch": 0.972267413264727, "grad_norm": 2.413179397583008, "learning_rate": 3.772302929017502e-08, "loss": 0.6289, "step": 12849 }, { "epoch": 0.9723430819870607, "grad_norm": 2.0390636920928955, "learning_rate": 3.7517317266409725e-08, "loss": 0.6386, "step": 12850 }, { "epoch": 0.9724187507093943, "grad_norm": 2.3033881187438965, "learning_rate": 3.7312166502298184e-08, "loss": 0.6435, "step": 12851 }, { "epoch": 0.9724944194317279, "grad_norm": 2.501260995864868, "learning_rate": 3.710757701068812e-08, "loss": 0.6013, "step": 12852 }, { "epoch": 0.9725700881540615, "grad_norm": 2.2412755489349365, "learning_rate": 3.6903548804390283e-08, "loss": 0.637, "step": 12853 }, { "epoch": 0.9726457568763951, "grad_norm": 1.7953567504882812, "learning_rate": 3.670008189618246e-08, "loss": 0.6847, "step": 12854 }, { "epoch": 0.9727214255987288, "grad_norm": 2.0152835845947266, "learning_rate": 3.6497176298807445e-08, "loss": 0.6373, "step": 12855 }, { "epoch": 0.9727970943210624, "grad_norm": 2.8471808433532715, "learning_rate": 3.629483202497008e-08, "loss": 0.6311, "step": 12856 }, { "epoch": 0.972872763043396, "grad_norm": 1.9700920581817627, "learning_rate": 3.6093049087342236e-08, "loss": 0.6387, "step": 12857 }, { "epoch": 0.9729484317657296, "grad_norm": 2.660348653793335, "learning_rate": 3.589182749855979e-08, "loss": 0.6581, "step": 12858 }, { "epoch": 0.9730241004880632, "grad_norm": 2.1217596530914307, "learning_rate": 3.5691167271225676e-08, "loss": 0.5784, "step": 12859 }, { "epoch": 0.9730997692103969, "grad_norm": 2.266953945159912, "learning_rate": 3.549106841790484e-08, "loss": 0.7355, "step": 12860 }, { "epoch": 0.9731754379327305, "grad_norm": 2.1540300846099854, "learning_rate": 3.5291530951127247e-08, "loss": 0.6858, "step": 12861 }, { "epoch": 0.9732511066550641, "grad_norm": 2.050753355026245, "learning_rate": 3.5092554883389916e-08, "loss": 0.617, "step": 12862 }, { "epoch": 0.9733267753773978, "grad_norm": 4.436022758483887, "learning_rate": 3.489414022715287e-08, "loss": 0.567, "step": 12863 }, { "epoch": 0.9734024440997314, "grad_norm": 2.4525585174560547, "learning_rate": 3.4696286994841176e-08, "loss": 0.7939, "step": 12864 }, { "epoch": 0.973478112822065, "grad_norm": 1.9763293266296387, "learning_rate": 3.449899519884492e-08, "loss": 0.5779, "step": 12865 }, { "epoch": 0.9735537815443986, "grad_norm": 1.7585806846618652, "learning_rate": 3.430226485152021e-08, "loss": 0.7401, "step": 12866 }, { "epoch": 0.9736294502667322, "grad_norm": 1.8818296194076538, "learning_rate": 3.410609596518621e-08, "loss": 0.6261, "step": 12867 }, { "epoch": 0.9737051189890659, "grad_norm": 2.081613302230835, "learning_rate": 3.3910488552127085e-08, "loss": 0.6814, "step": 12868 }, { "epoch": 0.9737807877113995, "grad_norm": 2.024808168411255, "learning_rate": 3.3715442624594025e-08, "loss": 0.6636, "step": 12869 }, { "epoch": 0.9738564564337331, "grad_norm": 3.159187078475952, "learning_rate": 3.352095819479928e-08, "loss": 0.6785, "step": 12870 }, { "epoch": 0.9739321251560668, "grad_norm": 2.1414241790771484, "learning_rate": 3.332703527492409e-08, "loss": 0.7564, "step": 12871 }, { "epoch": 0.9740077938784004, "grad_norm": 2.4196348190307617, "learning_rate": 3.3133673877111745e-08, "loss": 0.573, "step": 12872 }, { "epoch": 0.974083462600734, "grad_norm": 2.986185312271118, "learning_rate": 3.2940874013470567e-08, "loss": 0.653, "step": 12873 }, { "epoch": 0.9741591313230676, "grad_norm": 2.1942930221557617, "learning_rate": 3.274863569607489e-08, "loss": 0.832, "step": 12874 }, { "epoch": 0.9742348000454012, "grad_norm": 2.0108728408813477, "learning_rate": 3.255695893696309e-08, "loss": 0.5848, "step": 12875 }, { "epoch": 0.9743104687677349, "grad_norm": 1.9388446807861328, "learning_rate": 3.2365843748139554e-08, "loss": 0.7127, "step": 12876 }, { "epoch": 0.9743861374900685, "grad_norm": 4.268650054931641, "learning_rate": 3.2175290141571725e-08, "loss": 0.6322, "step": 12877 }, { "epoch": 0.9744618062124021, "grad_norm": 2.069544553756714, "learning_rate": 3.198529812919204e-08, "loss": 0.6331, "step": 12878 }, { "epoch": 0.9745374749347357, "grad_norm": 2.953239679336548, "learning_rate": 3.1795867722898995e-08, "loss": 0.6101, "step": 12879 }, { "epoch": 0.9746131436570693, "grad_norm": 2.6282474994659424, "learning_rate": 3.16069989345561e-08, "loss": 0.6981, "step": 12880 }, { "epoch": 0.974688812379403, "grad_norm": 2.169370412826538, "learning_rate": 3.141869177598988e-08, "loss": 0.6561, "step": 12881 }, { "epoch": 0.9747644811017366, "grad_norm": 2.562084913253784, "learning_rate": 3.123094625899292e-08, "loss": 0.6634, "step": 12882 }, { "epoch": 0.9748401498240702, "grad_norm": 2.8010025024414062, "learning_rate": 3.1043762395321804e-08, "loss": 0.6248, "step": 12883 }, { "epoch": 0.9749158185464039, "grad_norm": 1.9485735893249512, "learning_rate": 3.085714019670116e-08, "loss": 0.6315, "step": 12884 }, { "epoch": 0.9749914872687375, "grad_norm": 2.397359848022461, "learning_rate": 3.067107967481464e-08, "loss": 0.6059, "step": 12885 }, { "epoch": 0.9750671559910711, "grad_norm": 2.445741891860962, "learning_rate": 3.0485580841315916e-08, "loss": 0.5743, "step": 12886 }, { "epoch": 0.9751428247134047, "grad_norm": 11.2733736038208, "learning_rate": 3.030064370782171e-08, "loss": 0.6714, "step": 12887 }, { "epoch": 0.9752184934357383, "grad_norm": 2.1173009872436523, "learning_rate": 3.011626828591274e-08, "loss": 0.7012, "step": 12888 }, { "epoch": 0.975294162158072, "grad_norm": 2.1452109813690186, "learning_rate": 2.9932454587133784e-08, "loss": 0.7467, "step": 12889 }, { "epoch": 0.9753698308804056, "grad_norm": 2.0479838848114014, "learning_rate": 2.9749202622998628e-08, "loss": 0.6547, "step": 12890 }, { "epoch": 0.9754454996027392, "grad_norm": 2.4938087463378906, "learning_rate": 2.9566512404981096e-08, "loss": 0.6852, "step": 12891 }, { "epoch": 0.9755211683250729, "grad_norm": 1.6364085674285889, "learning_rate": 2.9384383944522032e-08, "loss": 0.6448, "step": 12892 }, { "epoch": 0.9755968370474064, "grad_norm": 2.1803574562072754, "learning_rate": 2.9202817253028314e-08, "loss": 0.6075, "step": 12893 }, { "epoch": 0.9756725057697401, "grad_norm": 1.8982106447219849, "learning_rate": 2.9021812341868847e-08, "loss": 0.6446, "step": 12894 }, { "epoch": 0.9757481744920737, "grad_norm": 2.0787320137023926, "learning_rate": 2.8841369222378566e-08, "loss": 0.7196, "step": 12895 }, { "epoch": 0.9758238432144073, "grad_norm": 2.5845391750335693, "learning_rate": 2.866148790585843e-08, "loss": 0.5858, "step": 12896 }, { "epoch": 0.975899511936741, "grad_norm": 1.972965955734253, "learning_rate": 2.8482168403573427e-08, "loss": 0.6546, "step": 12897 }, { "epoch": 0.9759751806590746, "grad_norm": 2.0222232341766357, "learning_rate": 2.8303410726751576e-08, "loss": 0.645, "step": 12898 }, { "epoch": 0.9760508493814082, "grad_norm": 2.261725425720215, "learning_rate": 2.8125214886588923e-08, "loss": 0.6731, "step": 12899 }, { "epoch": 0.9761265181037418, "grad_norm": 2.3352370262145996, "learning_rate": 2.7947580894242542e-08, "loss": 0.6196, "step": 12900 }, { "epoch": 0.9762021868260754, "grad_norm": 2.054361581802368, "learning_rate": 2.777050876083953e-08, "loss": 0.5426, "step": 12901 }, { "epoch": 0.9762778555484091, "grad_norm": 2.2071480751037598, "learning_rate": 2.759399849746602e-08, "loss": 0.6568, "step": 12902 }, { "epoch": 0.9763535242707427, "grad_norm": 2.048191785812378, "learning_rate": 2.7418050115176176e-08, "loss": 0.7078, "step": 12903 }, { "epoch": 0.9764291929930763, "grad_norm": 2.7316575050354004, "learning_rate": 2.7242663624989172e-08, "loss": 0.6979, "step": 12904 }, { "epoch": 0.97650486171541, "grad_norm": 2.226996660232544, "learning_rate": 2.706783903788823e-08, "loss": 0.5833, "step": 12905 }, { "epoch": 0.9765805304377435, "grad_norm": 2.25785231590271, "learning_rate": 2.6893576364821593e-08, "loss": 0.6875, "step": 12906 }, { "epoch": 0.9766561991600772, "grad_norm": 2.408618927001953, "learning_rate": 2.6719875616701528e-08, "loss": 0.7061, "step": 12907 }, { "epoch": 0.9767318678824108, "grad_norm": 2.1182861328125, "learning_rate": 2.6546736804405337e-08, "loss": 0.6309, "step": 12908 }, { "epoch": 0.9768075366047444, "grad_norm": 2.118018388748169, "learning_rate": 2.6374159938777342e-08, "loss": 0.6792, "step": 12909 }, { "epoch": 0.9768832053270781, "grad_norm": 2.3272552490234375, "learning_rate": 2.6202145030621904e-08, "loss": 0.7025, "step": 12910 }, { "epoch": 0.9769588740494117, "grad_norm": 2.4793472290039062, "learning_rate": 2.6030692090714404e-08, "loss": 0.6057, "step": 12911 }, { "epoch": 0.9770345427717453, "grad_norm": 2.1847920417785645, "learning_rate": 2.585980112978925e-08, "loss": 0.7071, "step": 12912 }, { "epoch": 0.977110211494079, "grad_norm": 1.9853038787841797, "learning_rate": 2.5689472158549888e-08, "loss": 0.6536, "step": 12913 }, { "epoch": 0.9771858802164125, "grad_norm": 2.087451934814453, "learning_rate": 2.5519705187662778e-08, "loss": 0.5842, "step": 12914 }, { "epoch": 0.9772615489387462, "grad_norm": 2.3047263622283936, "learning_rate": 2.535050022775742e-08, "loss": 0.555, "step": 12915 }, { "epoch": 0.9773372176610798, "grad_norm": 1.9747389554977417, "learning_rate": 2.518185728943234e-08, "loss": 0.5205, "step": 12916 }, { "epoch": 0.9774128863834134, "grad_norm": 2.134197235107422, "learning_rate": 2.5013776383247088e-08, "loss": 0.7488, "step": 12917 }, { "epoch": 0.9774885551057471, "grad_norm": 2.0079126358032227, "learning_rate": 2.4846257519727246e-08, "loss": 0.666, "step": 12918 }, { "epoch": 0.9775642238280806, "grad_norm": 2.0842461585998535, "learning_rate": 2.4679300709364416e-08, "loss": 0.6784, "step": 12919 }, { "epoch": 0.9776398925504143, "grad_norm": 2.3097853660583496, "learning_rate": 2.4512905962613242e-08, "loss": 0.7302, "step": 12920 }, { "epoch": 0.977715561272748, "grad_norm": 2.2408077716827393, "learning_rate": 2.4347073289894382e-08, "loss": 0.7462, "step": 12921 }, { "epoch": 0.9777912299950815, "grad_norm": 2.217599630355835, "learning_rate": 2.4181802701592537e-08, "loss": 0.6856, "step": 12922 }, { "epoch": 0.9778668987174152, "grad_norm": 2.31512188911438, "learning_rate": 2.401709420805842e-08, "loss": 0.6898, "step": 12923 }, { "epoch": 0.9779425674397488, "grad_norm": 2.1751387119293213, "learning_rate": 2.3852947819604788e-08, "loss": 0.5326, "step": 12924 }, { "epoch": 0.9780182361620824, "grad_norm": 2.364943265914917, "learning_rate": 2.3689363546511413e-08, "loss": 0.6644, "step": 12925 }, { "epoch": 0.978093904884416, "grad_norm": 1.4398491382598877, "learning_rate": 2.3526341399024097e-08, "loss": 0.7215, "step": 12926 }, { "epoch": 0.9781695736067496, "grad_norm": 2.1224849224090576, "learning_rate": 2.3363881387349684e-08, "loss": 0.6535, "step": 12927 }, { "epoch": 0.9782452423290833, "grad_norm": 2.169689178466797, "learning_rate": 2.3201983521664027e-08, "loss": 0.6147, "step": 12928 }, { "epoch": 0.9783209110514169, "grad_norm": 2.206355094909668, "learning_rate": 2.304064781210402e-08, "loss": 0.7304, "step": 12929 }, { "epoch": 0.9783965797737505, "grad_norm": 2.7798423767089844, "learning_rate": 2.2879874268773583e-08, "loss": 0.6318, "step": 12930 }, { "epoch": 0.9784722484960842, "grad_norm": 2.0683248043060303, "learning_rate": 2.2719662901741656e-08, "loss": 0.6629, "step": 12931 }, { "epoch": 0.9785479172184177, "grad_norm": 3.2282752990722656, "learning_rate": 2.2560013721039217e-08, "loss": 0.6315, "step": 12932 }, { "epoch": 0.9786235859407514, "grad_norm": 2.0770423412323, "learning_rate": 2.240092673666627e-08, "loss": 0.5093, "step": 12933 }, { "epoch": 0.978699254663085, "grad_norm": 2.091789722442627, "learning_rate": 2.2242401958584847e-08, "loss": 0.6643, "step": 12934 }, { "epoch": 0.9787749233854186, "grad_norm": 2.1290385723114014, "learning_rate": 2.2084439396721002e-08, "loss": 0.6081, "step": 12935 }, { "epoch": 0.9788505921077523, "grad_norm": 1.8989332914352417, "learning_rate": 2.1927039060966825e-08, "loss": 0.5779, "step": 12936 }, { "epoch": 0.9789262608300859, "grad_norm": 2.21238112449646, "learning_rate": 2.177020096118143e-08, "loss": 0.6956, "step": 12937 }, { "epoch": 0.9790019295524195, "grad_norm": 2.3626580238342285, "learning_rate": 2.1613925107184962e-08, "loss": 0.7432, "step": 12938 }, { "epoch": 0.9790775982747532, "grad_norm": 2.329190254211426, "learning_rate": 2.1458211508763594e-08, "loss": 0.7325, "step": 12939 }, { "epoch": 0.9791532669970867, "grad_norm": 3.697888135910034, "learning_rate": 2.130306017566952e-08, "loss": 0.6174, "step": 12940 }, { "epoch": 0.9792289357194204, "grad_norm": 1.962836503982544, "learning_rate": 2.1148471117617972e-08, "loss": 0.6935, "step": 12941 }, { "epoch": 0.979304604441754, "grad_norm": 2.527402877807617, "learning_rate": 2.0994444344291207e-08, "loss": 0.7468, "step": 12942 }, { "epoch": 0.9793802731640876, "grad_norm": 3.003408670425415, "learning_rate": 2.084097986533351e-08, "loss": 0.7296, "step": 12943 }, { "epoch": 0.9794559418864213, "grad_norm": 1.8468185663223267, "learning_rate": 2.068807769035519e-08, "loss": 0.6277, "step": 12944 }, { "epoch": 0.9795316106087548, "grad_norm": 2.2292520999908447, "learning_rate": 2.053573782893259e-08, "loss": 0.6755, "step": 12945 }, { "epoch": 0.9796072793310885, "grad_norm": 3.2633378505706787, "learning_rate": 2.0383960290605076e-08, "loss": 0.637, "step": 12946 }, { "epoch": 0.9796829480534222, "grad_norm": 2.093501329421997, "learning_rate": 2.0232745084878046e-08, "loss": 0.6773, "step": 12947 }, { "epoch": 0.9797586167757557, "grad_norm": 2.401658773422241, "learning_rate": 2.0082092221220925e-08, "loss": 0.6135, "step": 12948 }, { "epoch": 0.9798342854980894, "grad_norm": 2.732320547103882, "learning_rate": 1.9932001709066172e-08, "loss": 0.5327, "step": 12949 }, { "epoch": 0.979909954220423, "grad_norm": 2.547961711883545, "learning_rate": 1.978247355781626e-08, "loss": 0.684, "step": 12950 }, { "epoch": 0.9799856229427566, "grad_norm": 2.245232105255127, "learning_rate": 1.9633507776831704e-08, "loss": 0.5505, "step": 12951 }, { "epoch": 0.9800612916650903, "grad_norm": 2.435441732406616, "learning_rate": 1.948510437544404e-08, "loss": 0.5674, "step": 12952 }, { "epoch": 0.9801369603874238, "grad_norm": 2.2396018505096436, "learning_rate": 1.9337263362945833e-08, "loss": 0.6784, "step": 12953 }, { "epoch": 0.9802126291097575, "grad_norm": 1.6761008501052856, "learning_rate": 1.918998474859468e-08, "loss": 0.6931, "step": 12954 }, { "epoch": 0.9802882978320911, "grad_norm": 1.8858741521835327, "learning_rate": 1.90432685416142e-08, "loss": 0.6703, "step": 12955 }, { "epoch": 0.9803639665544247, "grad_norm": 2.178541421890259, "learning_rate": 1.8897114751192046e-08, "loss": 0.669, "step": 12956 }, { "epoch": 0.9804396352767584, "grad_norm": 2.8125827312469482, "learning_rate": 1.8751523386480896e-08, "loss": 0.7143, "step": 12957 }, { "epoch": 0.9805153039990919, "grad_norm": 2.164923667907715, "learning_rate": 1.8606494456599453e-08, "loss": 0.5678, "step": 12958 }, { "epoch": 0.9805909727214256, "grad_norm": 1.9167901277542114, "learning_rate": 1.846202797062746e-08, "loss": 0.5551, "step": 12959 }, { "epoch": 0.9806666414437593, "grad_norm": 4.015124320983887, "learning_rate": 1.8318123937612674e-08, "loss": 0.6921, "step": 12960 }, { "epoch": 0.9807423101660928, "grad_norm": 2.202143907546997, "learning_rate": 1.8174782366567887e-08, "loss": 0.6212, "step": 12961 }, { "epoch": 0.9808179788884265, "grad_norm": 2.2554047107696533, "learning_rate": 1.803200326646992e-08, "loss": 0.9251, "step": 12962 }, { "epoch": 0.9808936476107601, "grad_norm": 2.1025631427764893, "learning_rate": 1.7889786646257622e-08, "loss": 0.6946, "step": 12963 }, { "epoch": 0.9809693163330937, "grad_norm": 2.0117862224578857, "learning_rate": 1.7748132514838868e-08, "loss": 0.7143, "step": 12964 }, { "epoch": 0.9810449850554274, "grad_norm": 2.1950743198394775, "learning_rate": 1.7607040881084558e-08, "loss": 0.5661, "step": 12965 }, { "epoch": 0.9811206537777609, "grad_norm": 2.6558985710144043, "learning_rate": 1.7466511753830626e-08, "loss": 0.5764, "step": 12966 }, { "epoch": 0.9811963225000946, "grad_norm": 1.9270297288894653, "learning_rate": 1.7326545141875038e-08, "loss": 0.6928, "step": 12967 }, { "epoch": 0.9812719912224283, "grad_norm": 1.9736948013305664, "learning_rate": 1.7187141053985776e-08, "loss": 0.6422, "step": 12968 }, { "epoch": 0.9813476599447618, "grad_norm": 2.117433786392212, "learning_rate": 1.7048299498891862e-08, "loss": 0.6823, "step": 12969 }, { "epoch": 0.9814233286670955, "grad_norm": 1.8852287530899048, "learning_rate": 1.6910020485287338e-08, "loss": 0.7003, "step": 12970 }, { "epoch": 0.981498997389429, "grad_norm": 2.4513890743255615, "learning_rate": 1.6772304021832275e-08, "loss": 0.7829, "step": 12971 }, { "epoch": 0.9815746661117627, "grad_norm": 2.3832831382751465, "learning_rate": 1.6635150117150776e-08, "loss": 0.7601, "step": 12972 }, { "epoch": 0.9816503348340964, "grad_norm": 2.286743402481079, "learning_rate": 1.6498558779831973e-08, "loss": 0.7211, "step": 12973 }, { "epoch": 0.9817260035564299, "grad_norm": 3.2696454524993896, "learning_rate": 1.6362530018430022e-08, "loss": 0.7254, "step": 12974 }, { "epoch": 0.9818016722787636, "grad_norm": 2.6552700996398926, "learning_rate": 1.6227063841462108e-08, "loss": 0.7234, "step": 12975 }, { "epoch": 0.9818773410010972, "grad_norm": 2.3581531047821045, "learning_rate": 1.6092160257413446e-08, "loss": 0.7437, "step": 12976 }, { "epoch": 0.9819530097234308, "grad_norm": 3.002993106842041, "learning_rate": 1.5957819274730277e-08, "loss": 0.6852, "step": 12977 }, { "epoch": 0.9820286784457645, "grad_norm": 2.3881561756134033, "learning_rate": 1.5824040901826876e-08, "loss": 0.608, "step": 12978 }, { "epoch": 0.982104347168098, "grad_norm": 2.43519926071167, "learning_rate": 1.5690825147080533e-08, "loss": 0.6577, "step": 12979 }, { "epoch": 0.9821800158904317, "grad_norm": 2.051454544067383, "learning_rate": 1.5558172018833584e-08, "loss": 0.6634, "step": 12980 }, { "epoch": 0.9822556846127654, "grad_norm": 2.9443256855010986, "learning_rate": 1.5426081525392377e-08, "loss": 0.5589, "step": 12981 }, { "epoch": 0.9823313533350989, "grad_norm": 2.4073567390441895, "learning_rate": 1.52945536750303e-08, "loss": 0.6178, "step": 12982 }, { "epoch": 0.9824070220574326, "grad_norm": 2.8144779205322266, "learning_rate": 1.516358847598376e-08, "loss": 0.6876, "step": 12983 }, { "epoch": 0.9824826907797661, "grad_norm": 2.132742166519165, "learning_rate": 1.50331859364522e-08, "loss": 0.6289, "step": 12984 }, { "epoch": 0.9825583595020998, "grad_norm": 2.0860514640808105, "learning_rate": 1.4903346064605085e-08, "loss": 0.6644, "step": 12985 }, { "epoch": 0.9826340282244335, "grad_norm": 2.203529119491577, "learning_rate": 1.4774068868570911e-08, "loss": 0.7307, "step": 12986 }, { "epoch": 0.982709696946767, "grad_norm": 1.9949660301208496, "learning_rate": 1.4645354356446206e-08, "loss": 0.7352, "step": 12987 }, { "epoch": 0.9827853656691007, "grad_norm": 1.9843783378601074, "learning_rate": 1.4517202536291519e-08, "loss": 0.6633, "step": 12988 }, { "epoch": 0.9828610343914344, "grad_norm": 2.39467191696167, "learning_rate": 1.438961341613243e-08, "loss": 0.4948, "step": 12989 }, { "epoch": 0.9829367031137679, "grad_norm": 2.534012794494629, "learning_rate": 1.4262587003959549e-08, "loss": 0.6207, "step": 12990 }, { "epoch": 0.9830123718361016, "grad_norm": 2.261028528213501, "learning_rate": 1.4136123307725512e-08, "loss": 0.6348, "step": 12991 }, { "epoch": 0.9830880405584351, "grad_norm": 2.1703314781188965, "learning_rate": 1.4010222335351985e-08, "loss": 0.5724, "step": 12992 }, { "epoch": 0.9831637092807688, "grad_norm": 2.269578218460083, "learning_rate": 1.3884884094722662e-08, "loss": 0.6435, "step": 12993 }, { "epoch": 0.9832393780031025, "grad_norm": 2.1392128467559814, "learning_rate": 1.376010859368626e-08, "loss": 0.714, "step": 12994 }, { "epoch": 0.983315046725436, "grad_norm": 1.8886982202529907, "learning_rate": 1.3635895840056534e-08, "loss": 0.5763, "step": 12995 }, { "epoch": 0.9833907154477697, "grad_norm": 2.211404323577881, "learning_rate": 1.3512245841613257e-08, "loss": 0.6028, "step": 12996 }, { "epoch": 0.9834663841701032, "grad_norm": 1.8366492986679077, "learning_rate": 1.338915860609824e-08, "loss": 0.6571, "step": 12997 }, { "epoch": 0.9835420528924369, "grad_norm": 3.9259090423583984, "learning_rate": 1.3266634141220312e-08, "loss": 0.7254, "step": 12998 }, { "epoch": 0.9836177216147706, "grad_norm": 2.866379499435425, "learning_rate": 1.314467245465334e-08, "loss": 0.6933, "step": 12999 }, { "epoch": 0.9836933903371041, "grad_norm": 1.9616422653198242, "learning_rate": 1.302327355403321e-08, "loss": 0.6589, "step": 13000 }, { "epoch": 0.9837690590594378, "grad_norm": 2.223374843597412, "learning_rate": 1.2902437446962844e-08, "loss": 0.6306, "step": 13001 }, { "epoch": 0.9838447277817715, "grad_norm": 2.437225818634033, "learning_rate": 1.2782164141010188e-08, "loss": 0.841, "step": 13002 }, { "epoch": 0.983920396504105, "grad_norm": 2.194181203842163, "learning_rate": 1.2662453643706217e-08, "loss": 0.6298, "step": 13003 }, { "epoch": 0.9839960652264387, "grad_norm": 2.2740933895111084, "learning_rate": 1.2543305962548935e-08, "loss": 0.6895, "step": 13004 }, { "epoch": 0.9840717339487722, "grad_norm": 2.391655206680298, "learning_rate": 1.2424721104997371e-08, "loss": 0.7314, "step": 13005 }, { "epoch": 0.9841474026711059, "grad_norm": 2.1336898803710938, "learning_rate": 1.2306699078479588e-08, "loss": 0.609, "step": 13006 }, { "epoch": 0.9842230713934396, "grad_norm": 4.608248710632324, "learning_rate": 1.2189239890386672e-08, "loss": 0.7341, "step": 13007 }, { "epoch": 0.9842987401157731, "grad_norm": 3.4450154304504395, "learning_rate": 1.207234354807374e-08, "loss": 0.6952, "step": 13008 }, { "epoch": 0.9843744088381068, "grad_norm": 2.1393322944641113, "learning_rate": 1.1956010058859934e-08, "loss": 0.6627, "step": 13009 }, { "epoch": 0.9844500775604403, "grad_norm": 2.124804973602295, "learning_rate": 1.1840239430032429e-08, "loss": 0.5678, "step": 13010 }, { "epoch": 0.984525746282774, "grad_norm": 2.0524282455444336, "learning_rate": 1.1725031668840425e-08, "loss": 0.7415, "step": 13011 }, { "epoch": 0.9846014150051077, "grad_norm": 1.9901623725891113, "learning_rate": 1.161038678249815e-08, "loss": 0.541, "step": 13012 }, { "epoch": 0.9846770837274412, "grad_norm": 2.330474376678467, "learning_rate": 1.1496304778185863e-08, "loss": 0.7449, "step": 13013 }, { "epoch": 0.9847527524497749, "grad_norm": 2.8459479808807373, "learning_rate": 1.1382785663046846e-08, "loss": 0.7962, "step": 13014 }, { "epoch": 0.9848284211721086, "grad_norm": 2.1679039001464844, "learning_rate": 1.1269829444191416e-08, "loss": 0.639, "step": 13015 }, { "epoch": 0.9849040898944421, "grad_norm": 1.800337314605713, "learning_rate": 1.1157436128691911e-08, "loss": 0.5612, "step": 13016 }, { "epoch": 0.9849797586167758, "grad_norm": 2.2454311847686768, "learning_rate": 1.1045605723586705e-08, "loss": 0.6121, "step": 13017 }, { "epoch": 0.9850554273391093, "grad_norm": 2.1114368438720703, "learning_rate": 1.0934338235879193e-08, "loss": 0.8751, "step": 13018 }, { "epoch": 0.985131096061443, "grad_norm": 2.5677621364593506, "learning_rate": 1.0823633672538802e-08, "loss": 0.5946, "step": 13019 }, { "epoch": 0.9852067647837767, "grad_norm": 2.1673922538757324, "learning_rate": 1.0713492040495986e-08, "loss": 0.65, "step": 13020 }, { "epoch": 0.9852824335061102, "grad_norm": 2.8875181674957275, "learning_rate": 1.060391334664923e-08, "loss": 0.5534, "step": 13021 }, { "epoch": 0.9853581022284439, "grad_norm": 2.2637150287628174, "learning_rate": 1.0494897597861041e-08, "loss": 0.6161, "step": 13022 }, { "epoch": 0.9854337709507774, "grad_norm": 2.323862314224243, "learning_rate": 1.0386444800957962e-08, "loss": 0.7628, "step": 13023 }, { "epoch": 0.9855094396731111, "grad_norm": 2.155294179916382, "learning_rate": 1.0278554962731557e-08, "loss": 0.657, "step": 13024 }, { "epoch": 0.9855851083954448, "grad_norm": 2.271121025085449, "learning_rate": 1.0171228089938422e-08, "loss": 0.63, "step": 13025 }, { "epoch": 0.9856607771177783, "grad_norm": 2.7450411319732666, "learning_rate": 1.0064464189300181e-08, "loss": 0.6431, "step": 13026 }, { "epoch": 0.985736445840112, "grad_norm": 1.9051148891448975, "learning_rate": 9.958263267501488e-09, "loss": 0.5382, "step": 13027 }, { "epoch": 0.9858121145624457, "grad_norm": 2.6679155826568604, "learning_rate": 9.852625331193021e-09, "loss": 0.5978, "step": 13028 }, { "epoch": 0.9858877832847792, "grad_norm": 2.288546323776245, "learning_rate": 9.747550386991488e-09, "loss": 0.5773, "step": 13029 }, { "epoch": 0.9859634520071129, "grad_norm": 2.641033411026001, "learning_rate": 9.643038441476626e-09, "loss": 0.6219, "step": 13030 }, { "epoch": 0.9860391207294464, "grad_norm": 3.3034932613372803, "learning_rate": 9.539089501193199e-09, "loss": 0.7107, "step": 13031 }, { "epoch": 0.9861147894517801, "grad_norm": 1.9344165325164795, "learning_rate": 9.43570357265e-09, "loss": 0.7148, "step": 13032 }, { "epoch": 0.9861904581741138, "grad_norm": 2.1721315383911133, "learning_rate": 9.332880662321852e-09, "loss": 0.6956, "step": 13033 }, { "epoch": 0.9862661268964473, "grad_norm": 1.8685091733932495, "learning_rate": 9.230620776648602e-09, "loss": 0.583, "step": 13034 }, { "epoch": 0.986341795618781, "grad_norm": 1.820870280265808, "learning_rate": 9.128923922033128e-09, "loss": 0.5824, "step": 13035 }, { "epoch": 0.9864174643411145, "grad_norm": 2.135571002960205, "learning_rate": 9.027790104845335e-09, "loss": 0.7753, "step": 13036 }, { "epoch": 0.9864931330634482, "grad_norm": 3.053931474685669, "learning_rate": 8.927219331417158e-09, "loss": 0.6846, "step": 13037 }, { "epoch": 0.9865688017857819, "grad_norm": 2.2578110694885254, "learning_rate": 8.82721160804656e-09, "loss": 0.6994, "step": 13038 }, { "epoch": 0.9866444705081154, "grad_norm": 2.533595561981201, "learning_rate": 8.727766940997528e-09, "loss": 0.7083, "step": 13039 }, { "epoch": 0.9867201392304491, "grad_norm": 2.277336835861206, "learning_rate": 8.628885336497084e-09, "loss": 0.8314, "step": 13040 }, { "epoch": 0.9867958079527828, "grad_norm": 2.3916232585906982, "learning_rate": 8.530566800738272e-09, "loss": 0.6584, "step": 13041 }, { "epoch": 0.9868714766751163, "grad_norm": 2.5253095626831055, "learning_rate": 8.432811339876168e-09, "loss": 0.6123, "step": 13042 }, { "epoch": 0.98694714539745, "grad_norm": 1.7095435857772827, "learning_rate": 8.335618960033876e-09, "loss": 0.8094, "step": 13043 }, { "epoch": 0.9870228141197835, "grad_norm": 2.151761293411255, "learning_rate": 8.238989667297526e-09, "loss": 0.716, "step": 13044 }, { "epoch": 0.9870984828421172, "grad_norm": 2.2348501682281494, "learning_rate": 8.142923467718277e-09, "loss": 0.7185, "step": 13045 }, { "epoch": 0.9871741515644509, "grad_norm": 2.2857894897460938, "learning_rate": 8.047420367313319e-09, "loss": 0.7555, "step": 13046 }, { "epoch": 0.9872498202867844, "grad_norm": 2.075570821762085, "learning_rate": 7.952480372061866e-09, "loss": 0.6908, "step": 13047 }, { "epoch": 0.9873254890091181, "grad_norm": 2.3449413776397705, "learning_rate": 7.858103487910161e-09, "loss": 0.6586, "step": 13048 }, { "epoch": 0.9874011577314517, "grad_norm": 1.8678113222122192, "learning_rate": 7.764289720767482e-09, "loss": 0.6416, "step": 13049 }, { "epoch": 0.9874768264537853, "grad_norm": 1.8483861684799194, "learning_rate": 7.671039076510123e-09, "loss": 0.6662, "step": 13050 }, { "epoch": 0.987552495176119, "grad_norm": 2.6870992183685303, "learning_rate": 7.578351560976416e-09, "loss": 0.7492, "step": 13051 }, { "epoch": 0.9876281638984525, "grad_norm": 2.3274192810058594, "learning_rate": 7.486227179971717e-09, "loss": 0.7949, "step": 13052 }, { "epoch": 0.9877038326207862, "grad_norm": 2.314833641052246, "learning_rate": 7.394665939264411e-09, "loss": 0.7524, "step": 13053 }, { "epoch": 0.9877795013431199, "grad_norm": 2.826127767562866, "learning_rate": 7.303667844589912e-09, "loss": 0.6682, "step": 13054 }, { "epoch": 0.9878551700654534, "grad_norm": 2.203782081604004, "learning_rate": 7.213232901644662e-09, "loss": 0.5775, "step": 13055 }, { "epoch": 0.9879308387877871, "grad_norm": 2.5531766414642334, "learning_rate": 7.12336111609313e-09, "loss": 0.6546, "step": 13056 }, { "epoch": 0.9880065075101206, "grad_norm": 2.4287493228912354, "learning_rate": 7.034052493562815e-09, "loss": 0.7344, "step": 13057 }, { "epoch": 0.9880821762324543, "grad_norm": 1.6254581212997437, "learning_rate": 6.945307039647242e-09, "loss": 0.8126, "step": 13058 }, { "epoch": 0.988157844954788, "grad_norm": 2.4416098594665527, "learning_rate": 6.857124759903966e-09, "loss": 0.6512, "step": 13059 }, { "epoch": 0.9882335136771215, "grad_norm": 2.1222572326660156, "learning_rate": 6.769505659854569e-09, "loss": 0.7188, "step": 13060 }, { "epoch": 0.9883091823994552, "grad_norm": 2.3706514835357666, "learning_rate": 6.682449744986663e-09, "loss": 0.5678, "step": 13061 }, { "epoch": 0.9883848511217888, "grad_norm": 1.8657547235488892, "learning_rate": 6.5959570207508864e-09, "loss": 0.7439, "step": 13062 }, { "epoch": 0.9884605198441224, "grad_norm": 2.860536813735962, "learning_rate": 6.5100274925649075e-09, "loss": 0.5991, "step": 13063 }, { "epoch": 0.9885361885664561, "grad_norm": 2.22926664352417, "learning_rate": 6.42466116580942e-09, "loss": 0.817, "step": 13064 }, { "epoch": 0.9886118572887896, "grad_norm": 1.8725168704986572, "learning_rate": 6.339858045830149e-09, "loss": 0.6259, "step": 13065 }, { "epoch": 0.9886875260111233, "grad_norm": 2.0670039653778076, "learning_rate": 6.255618137938845e-09, "loss": 0.6289, "step": 13066 }, { "epoch": 0.988763194733457, "grad_norm": 2.009124517440796, "learning_rate": 6.17194144740929e-09, "loss": 0.6825, "step": 13067 }, { "epoch": 0.9888388634557905, "grad_norm": 1.9414106607437134, "learning_rate": 6.088827979483291e-09, "loss": 0.4782, "step": 13068 }, { "epoch": 0.9889145321781242, "grad_norm": 2.9034688472747803, "learning_rate": 6.006277739363686e-09, "loss": 0.6774, "step": 13069 }, { "epoch": 0.9889902009004577, "grad_norm": 2.372115135192871, "learning_rate": 5.924290732221338e-09, "loss": 0.6958, "step": 13070 }, { "epoch": 0.9890658696227914, "grad_norm": 1.480709433555603, "learning_rate": 5.842866963190141e-09, "loss": 0.6686, "step": 13071 }, { "epoch": 0.9891415383451251, "grad_norm": 2.762083053588867, "learning_rate": 5.762006437370015e-09, "loss": 0.6242, "step": 13072 }, { "epoch": 0.9892172070674586, "grad_norm": 2.282045364379883, "learning_rate": 5.681709159822912e-09, "loss": 0.5928, "step": 13073 }, { "epoch": 0.9892928757897923, "grad_norm": 2.124495029449463, "learning_rate": 5.601975135578807e-09, "loss": 0.5653, "step": 13074 }, { "epoch": 0.9893685445121259, "grad_norm": 2.060455560684204, "learning_rate": 5.522804369630707e-09, "loss": 0.6898, "step": 13075 }, { "epoch": 0.9894442132344595, "grad_norm": 2.3240835666656494, "learning_rate": 5.444196866935647e-09, "loss": 0.6417, "step": 13076 }, { "epoch": 0.9895198819567932, "grad_norm": 2.27996826171875, "learning_rate": 5.366152632417687e-09, "loss": 0.5804, "step": 13077 }, { "epoch": 0.9895955506791267, "grad_norm": 2.054598569869995, "learning_rate": 5.288671670962919e-09, "loss": 0.7136, "step": 13078 }, { "epoch": 0.9896712194014604, "grad_norm": 2.2513556480407715, "learning_rate": 5.211753987423462e-09, "loss": 0.8557, "step": 13079 }, { "epoch": 0.9897468881237941, "grad_norm": 2.1142704486846924, "learning_rate": 5.135399586617462e-09, "loss": 0.6724, "step": 13080 }, { "epoch": 0.9898225568461276, "grad_norm": 2.2248411178588867, "learning_rate": 5.059608473325095e-09, "loss": 0.6099, "step": 13081 }, { "epoch": 0.9898982255684613, "grad_norm": 3.098581552505493, "learning_rate": 4.984380652293563e-09, "loss": 0.5557, "step": 13082 }, { "epoch": 0.9899738942907949, "grad_norm": 1.7447338104248047, "learning_rate": 4.909716128234098e-09, "loss": 0.6664, "step": 13083 }, { "epoch": 0.9900495630131285, "grad_norm": 2.116192102432251, "learning_rate": 4.835614905820962e-09, "loss": 0.6416, "step": 13084 }, { "epoch": 0.9901252317354622, "grad_norm": 3.5600974559783936, "learning_rate": 4.762076989695441e-09, "loss": 0.5656, "step": 13085 }, { "epoch": 0.9902009004577957, "grad_norm": 2.4236409664154053, "learning_rate": 4.689102384462851e-09, "loss": 0.6626, "step": 13086 }, { "epoch": 0.9902765691801294, "grad_norm": 1.842241644859314, "learning_rate": 4.616691094693537e-09, "loss": 0.6851, "step": 13087 }, { "epoch": 0.990352237902463, "grad_norm": 2.1137871742248535, "learning_rate": 4.5448431249218715e-09, "loss": 0.5285, "step": 13088 }, { "epoch": 0.9904279066247966, "grad_norm": 2.3001229763031006, "learning_rate": 4.473558479646256e-09, "loss": 0.7189, "step": 13089 }, { "epoch": 0.9905035753471303, "grad_norm": 2.6320502758026123, "learning_rate": 4.402837163331119e-09, "loss": 0.6221, "step": 13090 }, { "epoch": 0.9905792440694638, "grad_norm": 2.4288251399993896, "learning_rate": 4.332679180406918e-09, "loss": 0.5301, "step": 13091 }, { "epoch": 0.9906549127917975, "grad_norm": 2.138550043106079, "learning_rate": 4.2630845352651384e-09, "loss": 0.6783, "step": 13092 }, { "epoch": 0.9907305815141312, "grad_norm": 2.3440093994140625, "learning_rate": 4.1940532322642946e-09, "loss": 0.7674, "step": 13093 }, { "epoch": 0.9908062502364647, "grad_norm": 1.6465092897415161, "learning_rate": 4.125585275728927e-09, "loss": 0.6484, "step": 13094 }, { "epoch": 0.9908819189587984, "grad_norm": 2.4605023860931396, "learning_rate": 4.057680669944608e-09, "loss": 0.7061, "step": 13095 }, { "epoch": 0.990957587681132, "grad_norm": 1.8242436647415161, "learning_rate": 3.990339419164935e-09, "loss": 0.7324, "step": 13096 }, { "epoch": 0.9910332564034656, "grad_norm": 2.511643648147583, "learning_rate": 3.923561527606534e-09, "loss": 0.6348, "step": 13097 }, { "epoch": 0.9911089251257993, "grad_norm": 2.8463854789733887, "learning_rate": 3.857346999452061e-09, "loss": 0.7027, "step": 13098 }, { "epoch": 0.9911845938481328, "grad_norm": 1.9040359258651733, "learning_rate": 3.7916958388481974e-09, "loss": 0.6504, "step": 13099 }, { "epoch": 0.9912602625704665, "grad_norm": 2.7657530307769775, "learning_rate": 3.726608049904656e-09, "loss": 0.8033, "step": 13100 }, { "epoch": 0.9913359312928002, "grad_norm": 2.482766628265381, "learning_rate": 3.662083636698177e-09, "loss": 0.6571, "step": 13101 }, { "epoch": 0.9914116000151337, "grad_norm": 2.55605411529541, "learning_rate": 3.598122603270526e-09, "loss": 0.6026, "step": 13102 }, { "epoch": 0.9914872687374674, "grad_norm": 2.0801234245300293, "learning_rate": 3.534724953625501e-09, "loss": 0.6188, "step": 13103 }, { "epoch": 0.991562937459801, "grad_norm": 3.96653413772583, "learning_rate": 3.4718906917349245e-09, "loss": 0.6812, "step": 13104 }, { "epoch": 0.9916386061821346, "grad_norm": 2.2530484199523926, "learning_rate": 3.4096198215326504e-09, "loss": 0.7594, "step": 13105 }, { "epoch": 0.9917142749044683, "grad_norm": 2.784916639328003, "learning_rate": 3.347912346917559e-09, "loss": 0.6711, "step": 13106 }, { "epoch": 0.9917899436268018, "grad_norm": 2.254490852355957, "learning_rate": 3.286768271756557e-09, "loss": 0.6553, "step": 13107 }, { "epoch": 0.9918656123491355, "grad_norm": 1.7609950304031372, "learning_rate": 3.226187599875585e-09, "loss": 0.6226, "step": 13108 }, { "epoch": 0.9919412810714691, "grad_norm": 2.008504629135132, "learning_rate": 3.166170335070606e-09, "loss": 0.6512, "step": 13109 }, { "epoch": 0.9920169497938027, "grad_norm": 2.3329126834869385, "learning_rate": 3.106716481098615e-09, "loss": 0.7869, "step": 13110 }, { "epoch": 0.9920926185161364, "grad_norm": 2.263775587081909, "learning_rate": 3.0478260416846314e-09, "loss": 0.5358, "step": 13111 }, { "epoch": 0.99216828723847, "grad_norm": 2.2455990314483643, "learning_rate": 2.9894990205147076e-09, "loss": 0.5658, "step": 13112 }, { "epoch": 0.9922439559608036, "grad_norm": 1.8885899782180786, "learning_rate": 2.931735421241921e-09, "loss": 0.744, "step": 13113 }, { "epoch": 0.9923196246831373, "grad_norm": 1.837786316871643, "learning_rate": 2.874535247484378e-09, "loss": 0.5932, "step": 13114 }, { "epoch": 0.9923952934054708, "grad_norm": 2.2272021770477295, "learning_rate": 2.817898502824212e-09, "loss": 0.6078, "step": 13115 }, { "epoch": 0.9924709621278045, "grad_norm": 5.379904747009277, "learning_rate": 2.7618251908065884e-09, "loss": 0.7149, "step": 13116 }, { "epoch": 0.9925466308501381, "grad_norm": 2.127641201019287, "learning_rate": 2.706315314944696e-09, "loss": 0.6478, "step": 13117 }, { "epoch": 0.9926222995724717, "grad_norm": 2.2539281845092773, "learning_rate": 2.6513688787137557e-09, "loss": 0.672, "step": 13118 }, { "epoch": 0.9926979682948054, "grad_norm": 2.355245351791382, "learning_rate": 2.5969858855560138e-09, "loss": 0.5535, "step": 13119 }, { "epoch": 0.9927736370171389, "grad_norm": 2.0226423740386963, "learning_rate": 2.543166338874747e-09, "loss": 0.5628, "step": 13120 }, { "epoch": 0.9928493057394726, "grad_norm": 2.060910701751709, "learning_rate": 2.4899102420422593e-09, "loss": 0.787, "step": 13121 }, { "epoch": 0.9929249744618062, "grad_norm": 2.399125576019287, "learning_rate": 2.4372175983938817e-09, "loss": 0.5982, "step": 13122 }, { "epoch": 0.9930006431841398, "grad_norm": 3.2431602478027344, "learning_rate": 2.385088411227976e-09, "loss": 0.6401, "step": 13123 }, { "epoch": 0.9930763119064735, "grad_norm": 1.9777895212173462, "learning_rate": 2.333522683808931e-09, "loss": 0.5933, "step": 13124 }, { "epoch": 0.993151980628807, "grad_norm": 2.409860372543335, "learning_rate": 2.2825204193681613e-09, "loss": 0.7402, "step": 13125 }, { "epoch": 0.9932276493511407, "grad_norm": 2.1435985565185547, "learning_rate": 2.232081621097115e-09, "loss": 0.7274, "step": 13126 }, { "epoch": 0.9933033180734744, "grad_norm": 2.2561211585998535, "learning_rate": 2.1822062921552644e-09, "loss": 0.5811, "step": 13127 }, { "epoch": 0.9933789867958079, "grad_norm": 2.5629122257232666, "learning_rate": 2.132894435666111e-09, "loss": 0.763, "step": 13128 }, { "epoch": 0.9934546555181416, "grad_norm": 2.6304588317871094, "learning_rate": 2.0841460547181833e-09, "loss": 0.653, "step": 13129 }, { "epoch": 0.9935303242404752, "grad_norm": 2.282959222793579, "learning_rate": 2.035961152364041e-09, "loss": 0.5899, "step": 13130 }, { "epoch": 0.9936059929628088, "grad_norm": 2.0651817321777344, "learning_rate": 1.9883397316202702e-09, "loss": 0.693, "step": 13131 }, { "epoch": 0.9936816616851425, "grad_norm": 2.5978622436523438, "learning_rate": 1.941281795470484e-09, "loss": 0.7713, "step": 13132 }, { "epoch": 0.993757330407476, "grad_norm": 2.876408338546753, "learning_rate": 1.894787346860327e-09, "loss": 0.5898, "step": 13133 }, { "epoch": 0.9938329991298097, "grad_norm": 2.2665717601776123, "learning_rate": 1.848856388702469e-09, "loss": 0.7736, "step": 13134 }, { "epoch": 0.9939086678521433, "grad_norm": 2.229184150695801, "learning_rate": 1.8034889238726093e-09, "loss": 0.5054, "step": 13135 }, { "epoch": 0.9939843365744769, "grad_norm": 2.301201820373535, "learning_rate": 1.7586849552114758e-09, "loss": 0.5685, "step": 13136 }, { "epoch": 0.9940600052968106, "grad_norm": 2.13543701171875, "learning_rate": 1.7144444855258234e-09, "loss": 0.6025, "step": 13137 }, { "epoch": 0.9941356740191442, "grad_norm": 1.9200706481933594, "learning_rate": 1.6707675175854363e-09, "loss": 0.4845, "step": 13138 }, { "epoch": 0.9942113427414778, "grad_norm": 2.272440195083618, "learning_rate": 1.6276540541261265e-09, "loss": 0.6253, "step": 13139 }, { "epoch": 0.9942870114638115, "grad_norm": 2.107754945755005, "learning_rate": 1.5851040978467346e-09, "loss": 0.7687, "step": 13140 }, { "epoch": 0.994362680186145, "grad_norm": 2.178772449493408, "learning_rate": 1.5431176514131285e-09, "loss": 0.5703, "step": 13141 }, { "epoch": 0.9944383489084787, "grad_norm": 2.395909070968628, "learning_rate": 1.5016947174532058e-09, "loss": 0.7377, "step": 13142 }, { "epoch": 0.9945140176308123, "grad_norm": 1.9375349283218384, "learning_rate": 1.4608352985628904e-09, "loss": 0.728, "step": 13143 }, { "epoch": 0.9945896863531459, "grad_norm": 2.407315254211426, "learning_rate": 1.4205393972991366e-09, "loss": 0.7251, "step": 13144 }, { "epoch": 0.9946653550754796, "grad_norm": 2.111492872238159, "learning_rate": 1.3808070161859255e-09, "loss": 0.7817, "step": 13145 }, { "epoch": 0.9947410237978132, "grad_norm": 2.2751195430755615, "learning_rate": 1.341638157712266e-09, "loss": 0.7493, "step": 13146 }, { "epoch": 0.9948166925201468, "grad_norm": 2.5054099559783936, "learning_rate": 1.303032824330197e-09, "loss": 0.6376, "step": 13147 }, { "epoch": 0.9948923612424804, "grad_norm": 2.099950075149536, "learning_rate": 1.264991018457784e-09, "loss": 0.6259, "step": 13148 }, { "epoch": 0.994968029964814, "grad_norm": 2.118708848953247, "learning_rate": 1.2275127424771216e-09, "loss": 0.6925, "step": 13149 }, { "epoch": 0.9950436986871477, "grad_norm": 4.26461935043335, "learning_rate": 1.190597998734333e-09, "loss": 0.6318, "step": 13150 }, { "epoch": 0.9951193674094813, "grad_norm": 1.8085392713546753, "learning_rate": 1.1542467895425679e-09, "loss": 0.5068, "step": 13151 }, { "epoch": 0.9951950361318149, "grad_norm": 2.8070521354675293, "learning_rate": 1.1184591171780056e-09, "loss": 0.6108, "step": 13152 }, { "epoch": 0.9952707048541486, "grad_norm": 2.0022857189178467, "learning_rate": 1.0832349838808542e-09, "loss": 0.5927, "step": 13153 }, { "epoch": 0.9953463735764821, "grad_norm": 2.230762243270874, "learning_rate": 1.0485743918583478e-09, "loss": 0.6105, "step": 13154 }, { "epoch": 0.9954220422988158, "grad_norm": 2.8104093074798584, "learning_rate": 1.0144773432797516e-09, "loss": 0.5951, "step": 13155 }, { "epoch": 0.9954977110211494, "grad_norm": 2.1270148754119873, "learning_rate": 9.809438402803572e-10, "loss": 0.6414, "step": 13156 }, { "epoch": 0.995573379743483, "grad_norm": 2.065595865249634, "learning_rate": 9.479738849614838e-10, "loss": 0.5966, "step": 13157 }, { "epoch": 0.9956490484658167, "grad_norm": 2.2215771675109863, "learning_rate": 9.15567479386481e-10, "loss": 0.6672, "step": 13158 }, { "epoch": 0.9957247171881503, "grad_norm": 2.1374623775482178, "learning_rate": 8.837246255847253e-10, "loss": 0.6385, "step": 13159 }, { "epoch": 0.9958003859104839, "grad_norm": 2.5752291679382324, "learning_rate": 8.524453255516207e-10, "loss": 0.6717, "step": 13160 }, { "epoch": 0.9958760546328175, "grad_norm": 2.127338409423828, "learning_rate": 8.217295812446013e-10, "loss": 0.5726, "step": 13161 }, { "epoch": 0.9959517233551511, "grad_norm": 2.5369412899017334, "learning_rate": 7.915773945881277e-10, "loss": 0.6269, "step": 13162 }, { "epoch": 0.9960273920774848, "grad_norm": 3.5135183334350586, "learning_rate": 7.619887674696902e-10, "loss": 0.5283, "step": 13163 }, { "epoch": 0.9961030607998184, "grad_norm": 2.36592960357666, "learning_rate": 7.329637017428059e-10, "loss": 0.6122, "step": 13164 }, { "epoch": 0.996178729522152, "grad_norm": 2.4969322681427, "learning_rate": 7.045021992250211e-10, "loss": 0.6736, "step": 13165 }, { "epoch": 0.9962543982444857, "grad_norm": 1.717811107635498, "learning_rate": 6.766042616989098e-10, "loss": 0.6547, "step": 13166 }, { "epoch": 0.9963300669668192, "grad_norm": 1.9306954145431519, "learning_rate": 6.49269890911075e-10, "loss": 0.7854, "step": 13167 }, { "epoch": 0.9964057356891529, "grad_norm": 2.652772903442383, "learning_rate": 6.224990885721482e-10, "loss": 0.5947, "step": 13168 }, { "epoch": 0.9964814044114865, "grad_norm": 2.094191074371338, "learning_rate": 5.962918563607867e-10, "loss": 0.7139, "step": 13169 }, { "epoch": 0.9965570731338201, "grad_norm": 2.385401725769043, "learning_rate": 5.706481959176779e-10, "loss": 0.6746, "step": 13170 }, { "epoch": 0.9966327418561538, "grad_norm": 2.2519052028656006, "learning_rate": 5.455681088475383e-10, "loss": 0.6605, "step": 13171 }, { "epoch": 0.9967084105784874, "grad_norm": 2.4455978870391846, "learning_rate": 5.210515967221108e-10, "loss": 0.6044, "step": 13172 }, { "epoch": 0.996784079300821, "grad_norm": 2.6201171875, "learning_rate": 4.970986610761675e-10, "loss": 0.5899, "step": 13173 }, { "epoch": 0.9968597480231546, "grad_norm": 2.2739479541778564, "learning_rate": 4.737093034095086e-10, "loss": 0.6609, "step": 13174 }, { "epoch": 0.9969354167454882, "grad_norm": 2.2343196868896484, "learning_rate": 4.5088352518796173e-10, "loss": 0.6017, "step": 13175 }, { "epoch": 0.9970110854678219, "grad_norm": 2.2220582962036133, "learning_rate": 4.286213278393847e-10, "loss": 0.6353, "step": 13176 }, { "epoch": 0.9970867541901555, "grad_norm": 2.405085802078247, "learning_rate": 4.0692271275866167e-10, "loss": 0.5783, "step": 13177 }, { "epoch": 0.9971624229124891, "grad_norm": 2.3258261680603027, "learning_rate": 3.8578768130470565e-10, "loss": 0.5282, "step": 13178 }, { "epoch": 0.9972380916348228, "grad_norm": 2.120819568634033, "learning_rate": 3.652162348014576e-10, "loss": 0.7857, "step": 13179 }, { "epoch": 0.9973137603571564, "grad_norm": 2.094067335128784, "learning_rate": 3.4520837453688726e-10, "loss": 0.7784, "step": 13180 }, { "epoch": 0.99738942907949, "grad_norm": 1.7406028509140015, "learning_rate": 3.257641017629931e-10, "loss": 0.617, "step": 13181 }, { "epoch": 0.9974650978018236, "grad_norm": 1.7582353353500366, "learning_rate": 3.0688341769880004e-10, "loss": 0.6051, "step": 13182 }, { "epoch": 0.9975407665241572, "grad_norm": 2.7545251846313477, "learning_rate": 2.8856632352636247e-10, "loss": 0.5891, "step": 13183 }, { "epoch": 0.9976164352464909, "grad_norm": 2.582125425338745, "learning_rate": 2.708128203917637e-10, "loss": 0.6018, "step": 13184 }, { "epoch": 0.9976921039688245, "grad_norm": 2.3737707138061523, "learning_rate": 2.536229094081133e-10, "loss": 0.6962, "step": 13185 }, { "epoch": 0.9977677726911581, "grad_norm": 2.5087530612945557, "learning_rate": 2.369965916505512e-10, "loss": 0.7588, "step": 13186 }, { "epoch": 0.9978434414134917, "grad_norm": 2.271505117416382, "learning_rate": 2.2093386816124383e-10, "loss": 0.6266, "step": 13187 }, { "epoch": 0.9979191101358253, "grad_norm": 2.8579909801483154, "learning_rate": 2.054347399463863e-10, "loss": 0.7987, "step": 13188 }, { "epoch": 0.997994778858159, "grad_norm": 1.9885179996490479, "learning_rate": 1.9049920797620245e-10, "loss": 0.6924, "step": 13189 }, { "epoch": 0.9980704475804926, "grad_norm": 2.303568124771118, "learning_rate": 1.7612727318494503e-10, "loss": 0.783, "step": 13190 }, { "epoch": 0.9981461163028262, "grad_norm": 2.4012231826782227, "learning_rate": 1.6231893647389306e-10, "loss": 0.6901, "step": 13191 }, { "epoch": 0.9982217850251599, "grad_norm": 2.4622113704681396, "learning_rate": 1.4907419870835437e-10, "loss": 0.7907, "step": 13192 }, { "epoch": 0.9982974537474935, "grad_norm": 2.3311586380004883, "learning_rate": 1.3639306071566714e-10, "loss": 0.7405, "step": 13193 }, { "epoch": 0.9983731224698271, "grad_norm": 2.215780019760132, "learning_rate": 1.2427552329119517e-10, "loss": 0.6988, "step": 13194 }, { "epoch": 0.9984487911921607, "grad_norm": 2.3962464332580566, "learning_rate": 1.12721587194331e-10, "loss": 0.6859, "step": 13195 }, { "epoch": 0.9985244599144943, "grad_norm": 1.8870036602020264, "learning_rate": 1.0173125314749676e-10, "loss": 0.6691, "step": 13196 }, { "epoch": 0.998600128636828, "grad_norm": 1.81784188747406, "learning_rate": 9.130452184014093e-11, "loss": 0.5999, "step": 13197 }, { "epoch": 0.9986757973591616, "grad_norm": 2.0552120208740234, "learning_rate": 8.14413939237424e-11, "loss": 0.5859, "step": 13198 }, { "epoch": 0.9987514660814952, "grad_norm": 2.1729273796081543, "learning_rate": 7.21418700178056e-11, "loss": 0.5408, "step": 13199 }, { "epoch": 0.9988271348038288, "grad_norm": 1.8984811305999756, "learning_rate": 6.340595070286614e-11, "loss": 0.6977, "step": 13200 }, { "epoch": 0.9989028035261625, "grad_norm": 2.0209014415740967, "learning_rate": 5.5233636526486054e-11, "loss": 0.6062, "step": 13201 }, { "epoch": 0.9989784722484961, "grad_norm": 2.05021333694458, "learning_rate": 4.7624928001255286e-11, "loss": 0.8087, "step": 13202 }, { "epoch": 0.9990541409708297, "grad_norm": 2.2087504863739014, "learning_rate": 4.057982560279339e-11, "loss": 0.7889, "step": 13203 }, { "epoch": 0.9991298096931633, "grad_norm": 2.0803823471069336, "learning_rate": 3.409832977274707e-11, "loss": 0.6568, "step": 13204 }, { "epoch": 0.999205478415497, "grad_norm": 2.142883777618408, "learning_rate": 2.818044091779104e-11, "loss": 0.6263, "step": 13205 }, { "epoch": 0.9992811471378306, "grad_norm": 2.5195977687835693, "learning_rate": 2.2826159406630353e-11, "loss": 0.6548, "step": 13206 }, { "epoch": 0.9993568158601642, "grad_norm": 1.918473720550537, "learning_rate": 1.8035485574996458e-11, "loss": 0.6472, "step": 13207 }, { "epoch": 0.9994324845824978, "grad_norm": 2.169924020767212, "learning_rate": 1.380841972464797e-11, "loss": 0.5984, "step": 13208 }, { "epoch": 0.9995081533048314, "grad_norm": 2.4711294174194336, "learning_rate": 1.0144962118374678e-11, "loss": 0.6986, "step": 13209 }, { "epoch": 0.9995838220271651, "grad_norm": 1.8922516107559204, "learning_rate": 7.045112986991953e-12, "loss": 0.6675, "step": 13210 }, { "epoch": 0.9996594907494987, "grad_norm": 1.9481332302093506, "learning_rate": 4.508872523345531e-12, "loss": 0.6967, "step": 13211 }, { "epoch": 0.9997351594718323, "grad_norm": 2.386504650115967, "learning_rate": 2.536240887307528e-12, "loss": 0.7459, "step": 13212 }, { "epoch": 0.9998108281941659, "grad_norm": 1.9094631671905518, "learning_rate": 1.127218201779634e-12, "loss": 0.6635, "step": 13213 }, { "epoch": 0.9998864969164996, "grad_norm": 2.6249496936798096, "learning_rate": 2.818045546915116e-13, "loss": 0.8093, "step": 13214 }, { "epoch": 0.9999621656388332, "grad_norm": 2.202721118927002, "learning_rate": 0.0, "loss": 0.7656, "step": 13215 }, { "epoch": 0.9999621656388332, "step": 13215, "total_flos": 4.661948874456302e+18, "train_loss": 0.7379108269377905, "train_runtime": 288889.3373, "train_samples_per_second": 2.928, "train_steps_per_second": 0.046 } ], "logging_steps": 1.0, "max_steps": 13215, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.661948874456302e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }