{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999589956672088, "eval_steps": 500, "global_step": 18290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.467244372155324e-05, "grad_norm": 33030144.0, "learning_rate": 1.8214936247723134e-08, "loss": 2.567, "step": 1 }, { "epoch": 0.00010934488744310648, "grad_norm": 24248320.0, "learning_rate": 3.642987249544627e-08, "loss": 2.58, "step": 2 }, { "epoch": 0.00016401733116465974, "grad_norm": 70144.0, "learning_rate": 5.4644808743169406e-08, "loss": 2.8013, "step": 3 }, { "epoch": 0.00021868977488621297, "grad_norm": 10368.001953125, "learning_rate": 7.285974499089254e-08, "loss": 2.639, "step": 4 }, { "epoch": 0.0002733622186077662, "grad_norm": 6816.00732421875, "learning_rate": 9.107468123861569e-08, "loss": 2.8062, "step": 5 }, { "epoch": 0.0003280346623293195, "grad_norm": 65024.0, "learning_rate": 1.0928961748633881e-07, "loss": 2.9574, "step": 6 }, { "epoch": 0.0003827071060508727, "grad_norm": 3696.0029296875, "learning_rate": 1.2750455373406196e-07, "loss": 2.3723, "step": 7 }, { "epoch": 0.00043737954977242594, "grad_norm": 23168.0, "learning_rate": 1.4571948998178507e-07, "loss": 2.3874, "step": 8 }, { "epoch": 0.0004920519934939792, "grad_norm": 17408.0, "learning_rate": 1.639344262295082e-07, "loss": 2.6225, "step": 9 }, { "epoch": 0.0005467244372155324, "grad_norm": 8320.0087890625, "learning_rate": 1.8214936247723137e-07, "loss": 3.0796, "step": 10 }, { "epoch": 0.0006013968809370857, "grad_norm": 2416.023193359375, "learning_rate": 2.0036429872495446e-07, "loss": 3.2167, "step": 11 }, { "epoch": 0.000656069324658639, "grad_norm": 1176.0054931640625, "learning_rate": 2.1857923497267762e-07, "loss": 2.4428, "step": 12 }, { "epoch": 0.0007107417683801921, "grad_norm": 1288.0125732421875, "learning_rate": 2.3679417122040076e-07, "loss": 2.5596, "step": 13 }, { "epoch": 0.0007654142121017454, "grad_norm": 684.0338134765625, "learning_rate": 2.550091074681239e-07, "loss": 2.5438, "step": 14 }, { "epoch": 0.0008200866558232987, "grad_norm": 12352.0009765625, "learning_rate": 2.73224043715847e-07, "loss": 2.5967, "step": 15 }, { "epoch": 0.0008747590995448519, "grad_norm": 40704.0, "learning_rate": 2.9143897996357015e-07, "loss": 3.1502, "step": 16 }, { "epoch": 0.0009294315432664052, "grad_norm": 9536.0009765625, "learning_rate": 3.096539162112933e-07, "loss": 2.793, "step": 17 }, { "epoch": 0.0009841039869879584, "grad_norm": 740.0360717773438, "learning_rate": 3.278688524590164e-07, "loss": 2.8053, "step": 18 }, { "epoch": 0.0010387764307095116, "grad_norm": 1872.0186767578125, "learning_rate": 3.4608378870673953e-07, "loss": 2.6896, "step": 19 }, { "epoch": 0.0010934488744310648, "grad_norm": 1576.0125732421875, "learning_rate": 3.6429872495446275e-07, "loss": 2.6433, "step": 20 }, { "epoch": 0.0011481213181526182, "grad_norm": 1448.02490234375, "learning_rate": 3.825136612021858e-07, "loss": 2.8423, "step": 21 }, { "epoch": 0.0012027937618741714, "grad_norm": 1648.024169921875, "learning_rate": 4.007285974499089e-07, "loss": 2.7137, "step": 22 }, { "epoch": 0.0012574662055957245, "grad_norm": 5440.00732421875, "learning_rate": 4.1894353369763213e-07, "loss": 2.7169, "step": 23 }, { "epoch": 0.001312138649317278, "grad_norm": 716.022216796875, "learning_rate": 4.3715846994535524e-07, "loss": 2.6772, "step": 24 }, { "epoch": 0.001366811093038831, "grad_norm": 1080.0130615234375, "learning_rate": 4.5537340619307836e-07, "loss": 2.4667, "step": 25 }, { "epoch": 0.0014214835367603843, "grad_norm": 504.0284423828125, "learning_rate": 4.735883424408015e-07, "loss": 2.7326, "step": 26 }, { "epoch": 0.0014761559804819377, "grad_norm": 1496.0078125, "learning_rate": 4.918032786885246e-07, "loss": 2.3757, "step": 27 }, { "epoch": 0.0015308284242034908, "grad_norm": 756.0468139648438, "learning_rate": 5.100182149362478e-07, "loss": 2.7044, "step": 28 }, { "epoch": 0.001585500867925044, "grad_norm": 784.0318603515625, "learning_rate": 5.28233151183971e-07, "loss": 2.5848, "step": 29 }, { "epoch": 0.0016401733116465974, "grad_norm": 728.0208740234375, "learning_rate": 5.46448087431694e-07, "loss": 2.6403, "step": 30 }, { "epoch": 0.0016948457553681506, "grad_norm": 7040.00244140625, "learning_rate": 5.646630236794172e-07, "loss": 2.5012, "step": 31 }, { "epoch": 0.0017495181990897037, "grad_norm": 1720.0067138671875, "learning_rate": 5.828779599271403e-07, "loss": 2.5169, "step": 32 }, { "epoch": 0.0018041906428112571, "grad_norm": 952.0287475585938, "learning_rate": 6.010928961748634e-07, "loss": 2.7338, "step": 33 }, { "epoch": 0.0018588630865328103, "grad_norm": 576.015625, "learning_rate": 6.193078324225866e-07, "loss": 2.3846, "step": 34 }, { "epoch": 0.0019135355302543635, "grad_norm": 660.0448608398438, "learning_rate": 6.375227686703097e-07, "loss": 2.6069, "step": 35 }, { "epoch": 0.001968207973975917, "grad_norm": 2880.00830078125, "learning_rate": 6.557377049180328e-07, "loss": 2.6292, "step": 36 }, { "epoch": 0.00202288041769747, "grad_norm": 956.0139770507812, "learning_rate": 6.73952641165756e-07, "loss": 2.4816, "step": 37 }, { "epoch": 0.0020775528614190232, "grad_norm": 1760.0037841796875, "learning_rate": 6.921675774134791e-07, "loss": 2.0911, "step": 38 }, { "epoch": 0.0021322253051405766, "grad_norm": 740.0234375, "learning_rate": 7.103825136612022e-07, "loss": 2.2564, "step": 39 }, { "epoch": 0.0021868977488621296, "grad_norm": 1128.021240234375, "learning_rate": 7.285974499089255e-07, "loss": 2.5969, "step": 40 }, { "epoch": 0.002241570192583683, "grad_norm": 676.0076293945312, "learning_rate": 7.468123861566486e-07, "loss": 2.5139, "step": 41 }, { "epoch": 0.0022962426363052364, "grad_norm": 780.0183715820312, "learning_rate": 7.650273224043716e-07, "loss": 2.4179, "step": 42 }, { "epoch": 0.0023509150800267893, "grad_norm": 41472.00390625, "learning_rate": 7.832422586520947e-07, "loss": 2.9822, "step": 43 }, { "epoch": 0.0024055875237483427, "grad_norm": 29056.001953125, "learning_rate": 8.014571948998178e-07, "loss": 2.5244, "step": 44 }, { "epoch": 0.002460259967469896, "grad_norm": 7008.0166015625, "learning_rate": 8.196721311475409e-07, "loss": 3.697, "step": 45 }, { "epoch": 0.002514932411191449, "grad_norm": 1504.0179443359375, "learning_rate": 8.378870673952643e-07, "loss": 2.7627, "step": 46 }, { "epoch": 0.0025696048549130024, "grad_norm": 840.0156860351562, "learning_rate": 8.561020036429874e-07, "loss": 2.4137, "step": 47 }, { "epoch": 0.002624277298634556, "grad_norm": 892.017822265625, "learning_rate": 8.743169398907105e-07, "loss": 2.5848, "step": 48 }, { "epoch": 0.002678949742356109, "grad_norm": 1440.0084228515625, "learning_rate": 8.925318761384336e-07, "loss": 2.4742, "step": 49 }, { "epoch": 0.002733622186077662, "grad_norm": 664.0147094726562, "learning_rate": 9.107468123861567e-07, "loss": 2.4924, "step": 50 }, { "epoch": 0.0027882946297992156, "grad_norm": 2192.021484375, "learning_rate": 9.289617486338799e-07, "loss": 2.8679, "step": 51 }, { "epoch": 0.0028429670735207685, "grad_norm": 864.0145874023438, "learning_rate": 9.47176684881603e-07, "loss": 2.3642, "step": 52 }, { "epoch": 0.002897639517242322, "grad_norm": 944.016357421875, "learning_rate": 9.653916211293261e-07, "loss": 2.4912, "step": 53 }, { "epoch": 0.0029523119609638753, "grad_norm": 632.0411376953125, "learning_rate": 9.836065573770493e-07, "loss": 2.5352, "step": 54 }, { "epoch": 0.0030069844046854283, "grad_norm": 544.0252685546875, "learning_rate": 1.0018214936247724e-06, "loss": 2.2216, "step": 55 }, { "epoch": 0.0030616568484069817, "grad_norm": 2336.0068359375, "learning_rate": 1.0200364298724957e-06, "loss": 2.415, "step": 56 }, { "epoch": 0.003116329292128535, "grad_norm": 2040.0159912109375, "learning_rate": 1.0382513661202188e-06, "loss": 2.7107, "step": 57 }, { "epoch": 0.003171001735850088, "grad_norm": 1296.02490234375, "learning_rate": 1.056466302367942e-06, "loss": 2.7423, "step": 58 }, { "epoch": 0.0032256741795716414, "grad_norm": 856.0162353515625, "learning_rate": 1.074681238615665e-06, "loss": 2.4244, "step": 59 }, { "epoch": 0.003280346623293195, "grad_norm": 1800.0068359375, "learning_rate": 1.092896174863388e-06, "loss": 2.4149, "step": 60 }, { "epoch": 0.0033350190670147477, "grad_norm": 1280.0245361328125, "learning_rate": 1.111111111111111e-06, "loss": 2.7142, "step": 61 }, { "epoch": 0.003389691510736301, "grad_norm": 1376.0135498046875, "learning_rate": 1.1293260473588344e-06, "loss": 2.4186, "step": 62 }, { "epoch": 0.0034443639544578545, "grad_norm": 252.00985717773438, "learning_rate": 1.1475409836065575e-06, "loss": 2.3165, "step": 63 }, { "epoch": 0.0034990363981794075, "grad_norm": 700.0087890625, "learning_rate": 1.1657559198542806e-06, "loss": 2.3622, "step": 64 }, { "epoch": 0.003553708841900961, "grad_norm": 7904.00390625, "learning_rate": 1.1839708561020037e-06, "loss": 2.5966, "step": 65 }, { "epoch": 0.0036083812856225143, "grad_norm": 3520.0048828125, "learning_rate": 1.2021857923497268e-06, "loss": 2.3202, "step": 66 }, { "epoch": 0.0036630537293440672, "grad_norm": 1096.0205078125, "learning_rate": 1.2204007285974501e-06, "loss": 2.3971, "step": 67 }, { "epoch": 0.0037177261730656206, "grad_norm": 888.0159912109375, "learning_rate": 1.2386156648451732e-06, "loss": 2.6137, "step": 68 }, { "epoch": 0.003772398616787174, "grad_norm": 1056.0140380859375, "learning_rate": 1.2568306010928963e-06, "loss": 2.7172, "step": 69 }, { "epoch": 0.003827071060508727, "grad_norm": 864.0096435546875, "learning_rate": 1.2750455373406195e-06, "loss": 2.4092, "step": 70 }, { "epoch": 0.0038817435042302804, "grad_norm": 604.0079345703125, "learning_rate": 1.2932604735883426e-06, "loss": 2.3226, "step": 71 }, { "epoch": 0.003936415947951834, "grad_norm": 624.0324096679688, "learning_rate": 1.3114754098360657e-06, "loss": 2.5376, "step": 72 }, { "epoch": 0.003991088391673387, "grad_norm": 2992.004638671875, "learning_rate": 1.3296903460837888e-06, "loss": 2.569, "step": 73 }, { "epoch": 0.00404576083539494, "grad_norm": 1232.01025390625, "learning_rate": 1.347905282331512e-06, "loss": 2.3506, "step": 74 }, { "epoch": 0.0041004332791164935, "grad_norm": 392.0179443359375, "learning_rate": 1.3661202185792352e-06, "loss": 2.5126, "step": 75 }, { "epoch": 0.0041551057228380464, "grad_norm": 482.03662109375, "learning_rate": 1.3843351548269581e-06, "loss": 2.4111, "step": 76 }, { "epoch": 0.004209778166559599, "grad_norm": 486.0335693359375, "learning_rate": 1.4025500910746814e-06, "loss": 2.5652, "step": 77 }, { "epoch": 0.004264450610281153, "grad_norm": 968.0347900390625, "learning_rate": 1.4207650273224043e-06, "loss": 2.6477, "step": 78 }, { "epoch": 0.004319123054002706, "grad_norm": 656.0114135742188, "learning_rate": 1.4389799635701277e-06, "loss": 2.3764, "step": 79 }, { "epoch": 0.004373795497724259, "grad_norm": 2160.0107421875, "learning_rate": 1.457194899817851e-06, "loss": 2.5509, "step": 80 }, { "epoch": 0.004428467941445813, "grad_norm": 2640.0029296875, "learning_rate": 1.4754098360655739e-06, "loss": 2.4232, "step": 81 }, { "epoch": 0.004483140385167366, "grad_norm": 1032.002685546875, "learning_rate": 1.4936247723132972e-06, "loss": 2.0851, "step": 82 }, { "epoch": 0.004537812828888919, "grad_norm": 548.022216796875, "learning_rate": 1.5118397085610201e-06, "loss": 2.5156, "step": 83 }, { "epoch": 0.004592485272610473, "grad_norm": 944.0350952148438, "learning_rate": 1.5300546448087432e-06, "loss": 2.5995, "step": 84 }, { "epoch": 0.004647157716332026, "grad_norm": 218.04299926757812, "learning_rate": 1.5482695810564663e-06, "loss": 2.4298, "step": 85 }, { "epoch": 0.004701830160053579, "grad_norm": 237.01837158203125, "learning_rate": 1.5664845173041894e-06, "loss": 2.3508, "step": 86 }, { "epoch": 0.0047565026037751324, "grad_norm": 296.0274658203125, "learning_rate": 1.5846994535519128e-06, "loss": 2.3718, "step": 87 }, { "epoch": 0.004811175047496685, "grad_norm": 616.03076171875, "learning_rate": 1.6029143897996357e-06, "loss": 2.561, "step": 88 }, { "epoch": 0.004865847491218238, "grad_norm": 9280.00390625, "learning_rate": 1.621129326047359e-06, "loss": 2.5975, "step": 89 }, { "epoch": 0.004920519934939792, "grad_norm": 1112.019775390625, "learning_rate": 1.6393442622950819e-06, "loss": 2.6299, "step": 90 }, { "epoch": 0.004975192378661345, "grad_norm": 494.03070068359375, "learning_rate": 1.6575591985428052e-06, "loss": 2.519, "step": 91 }, { "epoch": 0.005029864822382898, "grad_norm": 516.0339965820312, "learning_rate": 1.6757741347905285e-06, "loss": 2.4542, "step": 92 }, { "epoch": 0.005084537266104452, "grad_norm": 928.0206909179688, "learning_rate": 1.6939890710382514e-06, "loss": 2.5954, "step": 93 }, { "epoch": 0.005139209709826005, "grad_norm": 1624.0107421875, "learning_rate": 1.7122040072859748e-06, "loss": 2.4755, "step": 94 }, { "epoch": 0.005193882153547558, "grad_norm": 4864.00341796875, "learning_rate": 1.7304189435336977e-06, "loss": 2.6306, "step": 95 }, { "epoch": 0.005248554597269112, "grad_norm": 928.0137939453125, "learning_rate": 1.748633879781421e-06, "loss": 2.5861, "step": 96 }, { "epoch": 0.005303227040990665, "grad_norm": 880.0443115234375, "learning_rate": 1.766848816029144e-06, "loss": 2.9363, "step": 97 }, { "epoch": 0.005357899484712218, "grad_norm": 450.0501708984375, "learning_rate": 1.7850637522768672e-06, "loss": 2.7486, "step": 98 }, { "epoch": 0.005412571928433771, "grad_norm": 1568.07177734375, "learning_rate": 1.8032786885245903e-06, "loss": 3.3198, "step": 99 }, { "epoch": 0.005467244372155324, "grad_norm": 1072.01171875, "learning_rate": 1.8214936247723134e-06, "loss": 2.5423, "step": 100 }, { "epoch": 0.005521916815876877, "grad_norm": 556.0254516601562, "learning_rate": 1.8397085610200365e-06, "loss": 2.4559, "step": 101 }, { "epoch": 0.005576589259598431, "grad_norm": 2928.01123046875, "learning_rate": 1.8579234972677599e-06, "loss": 2.9167, "step": 102 }, { "epoch": 0.005631261703319984, "grad_norm": 392.0097961425781, "learning_rate": 1.8761384335154828e-06, "loss": 2.453, "step": 103 }, { "epoch": 0.005685934147041537, "grad_norm": 776.044921875, "learning_rate": 1.894353369763206e-06, "loss": 3.0035, "step": 104 }, { "epoch": 0.005740606590763091, "grad_norm": 1352.014892578125, "learning_rate": 1.912568306010929e-06, "loss": 2.5198, "step": 105 }, { "epoch": 0.005795279034484644, "grad_norm": 652.0107421875, "learning_rate": 1.9307832422586523e-06, "loss": 2.166, "step": 106 }, { "epoch": 0.005849951478206197, "grad_norm": 720.0281982421875, "learning_rate": 1.9489981785063756e-06, "loss": 2.5112, "step": 107 }, { "epoch": 0.005904623921927751, "grad_norm": 952.0064086914062, "learning_rate": 1.9672131147540985e-06, "loss": 2.2098, "step": 108 }, { "epoch": 0.005959296365649304, "grad_norm": 588.0551147460938, "learning_rate": 1.985428051001822e-06, "loss": 2.4876, "step": 109 }, { "epoch": 0.0060139688093708565, "grad_norm": 532.036376953125, "learning_rate": 2.0036429872495447e-06, "loss": 2.6228, "step": 110 }, { "epoch": 0.00606864125309241, "grad_norm": 6144.00146484375, "learning_rate": 2.021857923497268e-06, "loss": 2.296, "step": 111 }, { "epoch": 0.006123313696813963, "grad_norm": 2672.017822265625, "learning_rate": 2.0400728597449914e-06, "loss": 2.7684, "step": 112 }, { "epoch": 0.006177986140535516, "grad_norm": 1704.0052490234375, "learning_rate": 2.0582877959927143e-06, "loss": 2.3471, "step": 113 }, { "epoch": 0.00623265858425707, "grad_norm": 1616.0126953125, "learning_rate": 2.0765027322404376e-06, "loss": 2.2255, "step": 114 }, { "epoch": 0.006287331027978623, "grad_norm": 1016.016845703125, "learning_rate": 2.0947176684881605e-06, "loss": 2.3833, "step": 115 }, { "epoch": 0.006342003471700176, "grad_norm": 304.02947998046875, "learning_rate": 2.112932604735884e-06, "loss": 2.1778, "step": 116 }, { "epoch": 0.00639667591542173, "grad_norm": 199.04249572753906, "learning_rate": 2.1311475409836067e-06, "loss": 2.2277, "step": 117 }, { "epoch": 0.006451348359143283, "grad_norm": 7040.00439453125, "learning_rate": 2.14936247723133e-06, "loss": 2.7477, "step": 118 }, { "epoch": 0.006506020802864836, "grad_norm": 2944.005615234375, "learning_rate": 2.167577413479053e-06, "loss": 2.4396, "step": 119 }, { "epoch": 0.00656069324658639, "grad_norm": 568.011962890625, "learning_rate": 2.185792349726776e-06, "loss": 2.3279, "step": 120 }, { "epoch": 0.0066153656903079425, "grad_norm": 484.055908203125, "learning_rate": 2.204007285974499e-06, "loss": 2.5743, "step": 121 }, { "epoch": 0.0066700381340294955, "grad_norm": 788.01171875, "learning_rate": 2.222222222222222e-06, "loss": 2.3407, "step": 122 }, { "epoch": 0.006724710577751049, "grad_norm": 14080.0078125, "learning_rate": 2.2404371584699454e-06, "loss": 2.6, "step": 123 }, { "epoch": 0.006779383021472602, "grad_norm": 1200.015625, "learning_rate": 2.2586520947176687e-06, "loss": 2.6653, "step": 124 }, { "epoch": 0.006834055465194155, "grad_norm": 185.04591369628906, "learning_rate": 2.2768670309653916e-06, "loss": 2.461, "step": 125 }, { "epoch": 0.006888727908915709, "grad_norm": 223.0357208251953, "learning_rate": 2.295081967213115e-06, "loss": 2.471, "step": 126 }, { "epoch": 0.006943400352637262, "grad_norm": 260.0277404785156, "learning_rate": 2.313296903460838e-06, "loss": 2.2635, "step": 127 }, { "epoch": 0.006998072796358815, "grad_norm": 218.0380401611328, "learning_rate": 2.331511839708561e-06, "loss": 2.5004, "step": 128 }, { "epoch": 0.007052745240080369, "grad_norm": 376.054931640625, "learning_rate": 2.3497267759562845e-06, "loss": 2.3006, "step": 129 }, { "epoch": 0.007107417683801922, "grad_norm": 1048.0228271484375, "learning_rate": 2.3679417122040074e-06, "loss": 2.6466, "step": 130 }, { "epoch": 0.007162090127523475, "grad_norm": 1216.009765625, "learning_rate": 2.3861566484517307e-06, "loss": 2.2483, "step": 131 }, { "epoch": 0.0072167625712450285, "grad_norm": 1320.0145263671875, "learning_rate": 2.4043715846994536e-06, "loss": 2.4053, "step": 132 }, { "epoch": 0.0072714350149665815, "grad_norm": 352.0250549316406, "learning_rate": 2.422586520947177e-06, "loss": 2.514, "step": 133 }, { "epoch": 0.0073261074586881345, "grad_norm": 450.0653991699219, "learning_rate": 2.4408014571949003e-06, "loss": 2.7958, "step": 134 }, { "epoch": 0.007380779902409688, "grad_norm": 1904.0076904296875, "learning_rate": 2.459016393442623e-06, "loss": 2.2655, "step": 135 }, { "epoch": 0.007435452346131241, "grad_norm": 1160.0611572265625, "learning_rate": 2.4772313296903465e-06, "loss": 3.0276, "step": 136 }, { "epoch": 0.007490124789852794, "grad_norm": 636.0202026367188, "learning_rate": 2.4954462659380694e-06, "loss": 2.4003, "step": 137 }, { "epoch": 0.007544797233574348, "grad_norm": 360.04632568359375, "learning_rate": 2.5136612021857927e-06, "loss": 2.2876, "step": 138 }, { "epoch": 0.007599469677295901, "grad_norm": 300.0552673339844, "learning_rate": 2.5318761384335156e-06, "loss": 2.334, "step": 139 }, { "epoch": 0.007654142121017454, "grad_norm": 1012.0115356445312, "learning_rate": 2.550091074681239e-06, "loss": 2.4948, "step": 140 }, { "epoch": 0.007708814564739008, "grad_norm": 1112.014404296875, "learning_rate": 2.5683060109289622e-06, "loss": 2.5071, "step": 141 }, { "epoch": 0.007763487008460561, "grad_norm": 1184.011962890625, "learning_rate": 2.586520947176685e-06, "loss": 2.3151, "step": 142 }, { "epoch": 0.007818159452182115, "grad_norm": 696.0263061523438, "learning_rate": 2.604735883424408e-06, "loss": 2.2747, "step": 143 }, { "epoch": 0.007872831895903667, "grad_norm": 454.086181640625, "learning_rate": 2.6229508196721314e-06, "loss": 2.9102, "step": 144 }, { "epoch": 0.00792750433962522, "grad_norm": 1776.0126953125, "learning_rate": 2.6411657559198543e-06, "loss": 2.2342, "step": 145 }, { "epoch": 0.007982176783346773, "grad_norm": 358.0436096191406, "learning_rate": 2.6593806921675776e-06, "loss": 2.327, "step": 146 }, { "epoch": 0.008036849227068326, "grad_norm": 1008.0253295898438, "learning_rate": 2.677595628415301e-06, "loss": 2.6894, "step": 147 }, { "epoch": 0.00809152167078988, "grad_norm": 784.0208129882812, "learning_rate": 2.695810564663024e-06, "loss": 2.3909, "step": 148 }, { "epoch": 0.008146194114511434, "grad_norm": 3424.020751953125, "learning_rate": 2.7140255009107467e-06, "loss": 3.0384, "step": 149 }, { "epoch": 0.008200866558232987, "grad_norm": 3568.0068359375, "learning_rate": 2.7322404371584705e-06, "loss": 2.3802, "step": 150 }, { "epoch": 0.00825553900195454, "grad_norm": 844.0364990234375, "learning_rate": 2.7504553734061934e-06, "loss": 2.6832, "step": 151 }, { "epoch": 0.008310211445676093, "grad_norm": 370.02362060546875, "learning_rate": 2.7686703096539162e-06, "loss": 2.4024, "step": 152 }, { "epoch": 0.008364883889397646, "grad_norm": 374.0352783203125, "learning_rate": 2.786885245901639e-06, "loss": 2.5944, "step": 153 }, { "epoch": 0.008419556333119199, "grad_norm": 856.0094604492188, "learning_rate": 2.805100182149363e-06, "loss": 2.325, "step": 154 }, { "epoch": 0.008474228776840753, "grad_norm": 3424.008056640625, "learning_rate": 2.823315118397086e-06, "loss": 2.1577, "step": 155 }, { "epoch": 0.008528901220562306, "grad_norm": 724.0205078125, "learning_rate": 2.8415300546448087e-06, "loss": 2.5702, "step": 156 }, { "epoch": 0.00858357366428386, "grad_norm": 604.0199584960938, "learning_rate": 2.8597449908925324e-06, "loss": 2.3888, "step": 157 }, { "epoch": 0.008638246108005412, "grad_norm": 5440.005859375, "learning_rate": 2.8779599271402553e-06, "loss": 2.2454, "step": 158 }, { "epoch": 0.008692918551726965, "grad_norm": 1960.0086669921875, "learning_rate": 2.8961748633879782e-06, "loss": 2.2149, "step": 159 }, { "epoch": 0.008747590995448518, "grad_norm": 366.0301208496094, "learning_rate": 2.914389799635702e-06, "loss": 2.3711, "step": 160 }, { "epoch": 0.008802263439170073, "grad_norm": 402.0194091796875, "learning_rate": 2.932604735883425e-06, "loss": 2.1995, "step": 161 }, { "epoch": 0.008856935882891626, "grad_norm": 660.0184326171875, "learning_rate": 2.9508196721311478e-06, "loss": 2.3254, "step": 162 }, { "epoch": 0.008911608326613179, "grad_norm": 1056.015625, "learning_rate": 2.9690346083788707e-06, "loss": 2.4209, "step": 163 }, { "epoch": 0.008966280770334732, "grad_norm": 272.0218505859375, "learning_rate": 2.9872495446265944e-06, "loss": 2.2373, "step": 164 }, { "epoch": 0.009020953214056285, "grad_norm": 496.0189514160156, "learning_rate": 3.0054644808743173e-06, "loss": 2.1514, "step": 165 }, { "epoch": 0.009075625657777838, "grad_norm": 560.0172729492188, "learning_rate": 3.0236794171220402e-06, "loss": 2.2991, "step": 166 }, { "epoch": 0.009130298101499392, "grad_norm": 844.0252685546875, "learning_rate": 3.0418943533697635e-06, "loss": 2.4975, "step": 167 }, { "epoch": 0.009184970545220945, "grad_norm": 576.02490234375, "learning_rate": 3.0601092896174864e-06, "loss": 2.3996, "step": 168 }, { "epoch": 0.009239642988942498, "grad_norm": 1424.016845703125, "learning_rate": 3.0783242258652098e-06, "loss": 2.5395, "step": 169 }, { "epoch": 0.009294315432664051, "grad_norm": 1568.01220703125, "learning_rate": 3.0965391621129327e-06, "loss": 2.3495, "step": 170 }, { "epoch": 0.009348987876385604, "grad_norm": 952.0209350585938, "learning_rate": 3.114754098360656e-06, "loss": 2.4888, "step": 171 }, { "epoch": 0.009403660320107157, "grad_norm": 536.047119140625, "learning_rate": 3.132969034608379e-06, "loss": 2.4414, "step": 172 }, { "epoch": 0.009458332763828712, "grad_norm": 844.0148315429688, "learning_rate": 3.1511839708561022e-06, "loss": 2.3898, "step": 173 }, { "epoch": 0.009513005207550265, "grad_norm": 624.0457763671875, "learning_rate": 3.1693989071038255e-06, "loss": 2.4887, "step": 174 }, { "epoch": 0.009567677651271818, "grad_norm": 346.0221862792969, "learning_rate": 3.1876138433515484e-06, "loss": 2.251, "step": 175 }, { "epoch": 0.00962235009499337, "grad_norm": 680.0218505859375, "learning_rate": 3.2058287795992713e-06, "loss": 2.5616, "step": 176 }, { "epoch": 0.009677022538714924, "grad_norm": 424.0299987792969, "learning_rate": 3.224043715846995e-06, "loss": 2.2018, "step": 177 }, { "epoch": 0.009731694982436477, "grad_norm": 464.0181579589844, "learning_rate": 3.242258652094718e-06, "loss": 2.3747, "step": 178 }, { "epoch": 0.009786367426158031, "grad_norm": 255.0237579345703, "learning_rate": 3.260473588342441e-06, "loss": 2.3918, "step": 179 }, { "epoch": 0.009841039869879584, "grad_norm": 1728.021484375, "learning_rate": 3.2786885245901638e-06, "loss": 2.7769, "step": 180 }, { "epoch": 0.009895712313601137, "grad_norm": 1888.0050048828125, "learning_rate": 3.2969034608378875e-06, "loss": 2.3059, "step": 181 }, { "epoch": 0.00995038475732269, "grad_norm": 764.0204467773438, "learning_rate": 3.3151183970856104e-06, "loss": 2.3208, "step": 182 }, { "epoch": 0.010005057201044243, "grad_norm": 588.0213623046875, "learning_rate": 3.3333333333333333e-06, "loss": 2.1892, "step": 183 }, { "epoch": 0.010059729644765796, "grad_norm": 572.0220947265625, "learning_rate": 3.351548269581057e-06, "loss": 2.6294, "step": 184 }, { "epoch": 0.010114402088487351, "grad_norm": 12800.02734375, "learning_rate": 3.36976320582878e-06, "loss": 2.6385, "step": 185 }, { "epoch": 0.010169074532208904, "grad_norm": 5184.01220703125, "learning_rate": 3.387978142076503e-06, "loss": 2.5559, "step": 186 }, { "epoch": 0.010223746975930457, "grad_norm": 556.0195922851562, "learning_rate": 3.4061930783242266e-06, "loss": 2.228, "step": 187 }, { "epoch": 0.01027841941965201, "grad_norm": 564.0234375, "learning_rate": 3.4244080145719495e-06, "loss": 2.5122, "step": 188 }, { "epoch": 0.010333091863373563, "grad_norm": 1352.016357421875, "learning_rate": 3.4426229508196724e-06, "loss": 2.4213, "step": 189 }, { "epoch": 0.010387764307095116, "grad_norm": 221.03602600097656, "learning_rate": 3.4608378870673953e-06, "loss": 2.3994, "step": 190 }, { "epoch": 0.01044243675081667, "grad_norm": 668.0219116210938, "learning_rate": 3.4790528233151186e-06, "loss": 2.275, "step": 191 }, { "epoch": 0.010497109194538223, "grad_norm": 1360.011474609375, "learning_rate": 3.497267759562842e-06, "loss": 2.2648, "step": 192 }, { "epoch": 0.010551781638259776, "grad_norm": 1216.0118408203125, "learning_rate": 3.515482695810565e-06, "loss": 2.2953, "step": 193 }, { "epoch": 0.01060645408198133, "grad_norm": 800.0089721679688, "learning_rate": 3.533697632058288e-06, "loss": 2.4434, "step": 194 }, { "epoch": 0.010661126525702882, "grad_norm": 692.0223999023438, "learning_rate": 3.551912568306011e-06, "loss": 2.3895, "step": 195 }, { "epoch": 0.010715798969424435, "grad_norm": 644.0224609375, "learning_rate": 3.5701275045537344e-06, "loss": 2.1764, "step": 196 }, { "epoch": 0.01077047141314599, "grad_norm": 1664.0157470703125, "learning_rate": 3.5883424408014573e-06, "loss": 2.5719, "step": 197 }, { "epoch": 0.010825143856867543, "grad_norm": 1376.0150146484375, "learning_rate": 3.6065573770491806e-06, "loss": 2.3147, "step": 198 }, { "epoch": 0.010879816300589096, "grad_norm": 576.037841796875, "learning_rate": 3.6247723132969035e-06, "loss": 2.2969, "step": 199 }, { "epoch": 0.010934488744310649, "grad_norm": 1088.03271484375, "learning_rate": 3.642987249544627e-06, "loss": 2.5346, "step": 200 }, { "epoch": 0.010989161188032202, "grad_norm": 354.0188293457031, "learning_rate": 3.66120218579235e-06, "loss": 2.1836, "step": 201 }, { "epoch": 0.011043833631753755, "grad_norm": 632.0130615234375, "learning_rate": 3.679417122040073e-06, "loss": 2.1497, "step": 202 }, { "epoch": 0.01109850607547531, "grad_norm": 680.0060424804688, "learning_rate": 3.697632058287796e-06, "loss": 2.3594, "step": 203 }, { "epoch": 0.011153178519196862, "grad_norm": 692.0107421875, "learning_rate": 3.7158469945355197e-06, "loss": 2.3039, "step": 204 }, { "epoch": 0.011207850962918415, "grad_norm": 1328.0159912109375, "learning_rate": 3.7340619307832426e-06, "loss": 2.2576, "step": 205 }, { "epoch": 0.011262523406639968, "grad_norm": 2048.026611328125, "learning_rate": 3.7522768670309655e-06, "loss": 2.5968, "step": 206 }, { "epoch": 0.011317195850361521, "grad_norm": 480.0267028808594, "learning_rate": 3.7704918032786884e-06, "loss": 2.2147, "step": 207 }, { "epoch": 0.011371868294083074, "grad_norm": 482.0324401855469, "learning_rate": 3.788706739526412e-06, "loss": 2.2835, "step": 208 }, { "epoch": 0.011426540737804629, "grad_norm": 2080.008544921875, "learning_rate": 3.806921675774135e-06, "loss": 2.2389, "step": 209 }, { "epoch": 0.011481213181526182, "grad_norm": 560.0274047851562, "learning_rate": 3.825136612021858e-06, "loss": 2.3505, "step": 210 }, { "epoch": 0.011535885625247735, "grad_norm": 880.0169677734375, "learning_rate": 3.843351548269581e-06, "loss": 2.1882, "step": 211 }, { "epoch": 0.011590558068969288, "grad_norm": 1224.0157470703125, "learning_rate": 3.861566484517305e-06, "loss": 2.2281, "step": 212 }, { "epoch": 0.01164523051269084, "grad_norm": 6560.01416015625, "learning_rate": 3.879781420765028e-06, "loss": 2.3911, "step": 213 }, { "epoch": 0.011699902956412394, "grad_norm": 820.00634765625, "learning_rate": 3.897996357012751e-06, "loss": 2.0949, "step": 214 }, { "epoch": 0.011754575400133948, "grad_norm": 406.0335693359375, "learning_rate": 3.916211293260474e-06, "loss": 2.1476, "step": 215 }, { "epoch": 0.011809247843855501, "grad_norm": 444.0197448730469, "learning_rate": 3.934426229508197e-06, "loss": 2.3339, "step": 216 }, { "epoch": 0.011863920287577054, "grad_norm": 548.0263061523438, "learning_rate": 3.9526411657559195e-06, "loss": 2.3772, "step": 217 }, { "epoch": 0.011918592731298607, "grad_norm": 330.0309143066406, "learning_rate": 3.970856102003644e-06, "loss": 2.2261, "step": 218 }, { "epoch": 0.01197326517502016, "grad_norm": 304.0455322265625, "learning_rate": 3.989071038251366e-06, "loss": 2.2513, "step": 219 }, { "epoch": 0.012027937618741713, "grad_norm": 636.0241088867188, "learning_rate": 4.0072859744990895e-06, "loss": 2.345, "step": 220 }, { "epoch": 0.012082610062463268, "grad_norm": 2480.005859375, "learning_rate": 4.025500910746813e-06, "loss": 2.2992, "step": 221 }, { "epoch": 0.01213728250618482, "grad_norm": 1296.008056640625, "learning_rate": 4.043715846994536e-06, "loss": 2.2935, "step": 222 }, { "epoch": 0.012191954949906374, "grad_norm": 368.01824951171875, "learning_rate": 4.061930783242259e-06, "loss": 2.1603, "step": 223 }, { "epoch": 0.012246627393627927, "grad_norm": 186.04083251953125, "learning_rate": 4.080145719489983e-06, "loss": 2.1645, "step": 224 }, { "epoch": 0.01230129983734948, "grad_norm": 210.03292846679688, "learning_rate": 4.098360655737705e-06, "loss": 2.1317, "step": 225 }, { "epoch": 0.012355972281071033, "grad_norm": 382.03326416015625, "learning_rate": 4.1165755919854286e-06, "loss": 1.997, "step": 226 }, { "epoch": 0.012410644724792587, "grad_norm": 820.0191040039062, "learning_rate": 4.134790528233151e-06, "loss": 2.3941, "step": 227 }, { "epoch": 0.01246531716851414, "grad_norm": 320.0335693359375, "learning_rate": 4.153005464480875e-06, "loss": 2.3955, "step": 228 }, { "epoch": 0.012519989612235693, "grad_norm": 330.0426330566406, "learning_rate": 4.171220400728598e-06, "loss": 2.2206, "step": 229 }, { "epoch": 0.012574662055957246, "grad_norm": 370.0517272949219, "learning_rate": 4.189435336976321e-06, "loss": 2.3347, "step": 230 }, { "epoch": 0.012629334499678799, "grad_norm": 1304.0220947265625, "learning_rate": 4.207650273224044e-06, "loss": 2.1396, "step": 231 }, { "epoch": 0.012684006943400352, "grad_norm": 516.036865234375, "learning_rate": 4.225865209471768e-06, "loss": 2.0982, "step": 232 }, { "epoch": 0.012738679387121907, "grad_norm": 458.05462646484375, "learning_rate": 4.24408014571949e-06, "loss": 2.2762, "step": 233 }, { "epoch": 0.01279335183084346, "grad_norm": 416.01470947265625, "learning_rate": 4.2622950819672135e-06, "loss": 2.1859, "step": 234 }, { "epoch": 0.012848024274565013, "grad_norm": 3568.013916015625, "learning_rate": 4.280510018214937e-06, "loss": 2.2171, "step": 235 }, { "epoch": 0.012902696718286566, "grad_norm": 988.0137329101562, "learning_rate": 4.29872495446266e-06, "loss": 1.9652, "step": 236 }, { "epoch": 0.012957369162008119, "grad_norm": 400.0228576660156, "learning_rate": 4.316939890710383e-06, "loss": 2.1103, "step": 237 }, { "epoch": 0.013012041605729672, "grad_norm": 346.0256652832031, "learning_rate": 4.335154826958106e-06, "loss": 2.2698, "step": 238 }, { "epoch": 0.013066714049451226, "grad_norm": 170.02980041503906, "learning_rate": 4.353369763205829e-06, "loss": 2.1198, "step": 239 }, { "epoch": 0.01312138649317278, "grad_norm": 212.06466674804688, "learning_rate": 4.371584699453552e-06, "loss": 2.2806, "step": 240 }, { "epoch": 0.013176058936894332, "grad_norm": 1136.0059814453125, "learning_rate": 4.389799635701276e-06, "loss": 1.9838, "step": 241 }, { "epoch": 0.013230731380615885, "grad_norm": 972.0423583984375, "learning_rate": 4.408014571948998e-06, "loss": 2.4661, "step": 242 }, { "epoch": 0.013285403824337438, "grad_norm": 450.0162353515625, "learning_rate": 4.426229508196722e-06, "loss": 2.1294, "step": 243 }, { "epoch": 0.013340076268058991, "grad_norm": 203.02735900878906, "learning_rate": 4.444444444444444e-06, "loss": 2.0319, "step": 244 }, { "epoch": 0.013394748711780544, "grad_norm": 360.01953125, "learning_rate": 4.462659380692168e-06, "loss": 1.9549, "step": 245 }, { "epoch": 0.013449421155502099, "grad_norm": 780.0652465820312, "learning_rate": 4.480874316939891e-06, "loss": 2.5943, "step": 246 }, { "epoch": 0.013504093599223652, "grad_norm": 672.0272216796875, "learning_rate": 4.499089253187614e-06, "loss": 2.2945, "step": 247 }, { "epoch": 0.013558766042945205, "grad_norm": 996.014404296875, "learning_rate": 4.5173041894353374e-06, "loss": 2.0853, "step": 248 }, { "epoch": 0.013613438486666758, "grad_norm": 708.0068359375, "learning_rate": 4.535519125683061e-06, "loss": 2.043, "step": 249 }, { "epoch": 0.01366811093038831, "grad_norm": 148.01976013183594, "learning_rate": 4.553734061930783e-06, "loss": 2.2464, "step": 250 }, { "epoch": 0.013722783374109863, "grad_norm": 1040.0113525390625, "learning_rate": 4.571948998178507e-06, "loss": 2.0263, "step": 251 }, { "epoch": 0.013777455817831418, "grad_norm": 185.02944946289062, "learning_rate": 4.59016393442623e-06, "loss": 2.084, "step": 252 }, { "epoch": 0.013832128261552971, "grad_norm": 167.01451110839844, "learning_rate": 4.608378870673953e-06, "loss": 2.1263, "step": 253 }, { "epoch": 0.013886800705274524, "grad_norm": 896.0078735351562, "learning_rate": 4.626593806921676e-06, "loss": 2.0309, "step": 254 }, { "epoch": 0.013941473148996077, "grad_norm": 1056.0133056640625, "learning_rate": 4.6448087431694e-06, "loss": 2.0796, "step": 255 }, { "epoch": 0.01399614559271763, "grad_norm": 306.02783203125, "learning_rate": 4.663023679417122e-06, "loss": 2.2594, "step": 256 }, { "epoch": 0.014050818036439183, "grad_norm": 203.05438232421875, "learning_rate": 4.681238615664846e-06, "loss": 2.1341, "step": 257 }, { "epoch": 0.014105490480160738, "grad_norm": 241.0423583984375, "learning_rate": 4.699453551912569e-06, "loss": 2.1354, "step": 258 }, { "epoch": 0.01416016292388229, "grad_norm": 496.0068664550781, "learning_rate": 4.717668488160292e-06, "loss": 2.0617, "step": 259 }, { "epoch": 0.014214835367603844, "grad_norm": 896.0143432617188, "learning_rate": 4.735883424408015e-06, "loss": 2.0853, "step": 260 }, { "epoch": 0.014269507811325396, "grad_norm": 470.0319519042969, "learning_rate": 4.754098360655738e-06, "loss": 2.1333, "step": 261 }, { "epoch": 0.01432418025504695, "grad_norm": 161.0331573486328, "learning_rate": 4.772313296903461e-06, "loss": 2.0929, "step": 262 }, { "epoch": 0.014378852698768502, "grad_norm": 316.0201721191406, "learning_rate": 4.790528233151184e-06, "loss": 2.1778, "step": 263 }, { "epoch": 0.014433525142490057, "grad_norm": 185.02268981933594, "learning_rate": 4.808743169398907e-06, "loss": 2.1202, "step": 264 }, { "epoch": 0.01448819758621161, "grad_norm": 226.06277465820312, "learning_rate": 4.8269581056466305e-06, "loss": 2.1095, "step": 265 }, { "epoch": 0.014542870029933163, "grad_norm": 227.0435791015625, "learning_rate": 4.845173041894354e-06, "loss": 1.9726, "step": 266 }, { "epoch": 0.014597542473654716, "grad_norm": 920.0138549804688, "learning_rate": 4.863387978142076e-06, "loss": 2.0543, "step": 267 }, { "epoch": 0.014652214917376269, "grad_norm": 820.0074462890625, "learning_rate": 4.8816029143898005e-06, "loss": 2.0711, "step": 268 }, { "epoch": 0.014706887361097822, "grad_norm": 368.0155944824219, "learning_rate": 4.899817850637523e-06, "loss": 1.9161, "step": 269 }, { "epoch": 0.014761559804819377, "grad_norm": 78.05059814453125, "learning_rate": 4.918032786885246e-06, "loss": 2.0919, "step": 270 }, { "epoch": 0.01481623224854093, "grad_norm": 190.04483032226562, "learning_rate": 4.936247723132969e-06, "loss": 1.8823, "step": 271 }, { "epoch": 0.014870904692262482, "grad_norm": 198.02261352539062, "learning_rate": 4.954462659380693e-06, "loss": 2.168, "step": 272 }, { "epoch": 0.014925577135984035, "grad_norm": 1336.00830078125, "learning_rate": 4.9726775956284154e-06, "loss": 2.1219, "step": 273 }, { "epoch": 0.014980249579705588, "grad_norm": 384.0129699707031, "learning_rate": 4.990892531876139e-06, "loss": 1.9923, "step": 274 }, { "epoch": 0.015034922023427141, "grad_norm": 452.0115661621094, "learning_rate": 5.009107468123861e-06, "loss": 2.3218, "step": 275 }, { "epoch": 0.015089594467148696, "grad_norm": 184.0247344970703, "learning_rate": 5.027322404371585e-06, "loss": 2.0301, "step": 276 }, { "epoch": 0.015144266910870249, "grad_norm": 209.0110321044922, "learning_rate": 5.045537340619309e-06, "loss": 2.134, "step": 277 }, { "epoch": 0.015198939354591802, "grad_norm": 520.0255126953125, "learning_rate": 5.063752276867031e-06, "loss": 2.2807, "step": 278 }, { "epoch": 0.015253611798313355, "grad_norm": 398.01397705078125, "learning_rate": 5.0819672131147545e-06, "loss": 2.0248, "step": 279 }, { "epoch": 0.015308284242034908, "grad_norm": 358.0055847167969, "learning_rate": 5.100182149362478e-06, "loss": 2.1105, "step": 280 }, { "epoch": 0.01536295668575646, "grad_norm": 109.52635192871094, "learning_rate": 5.1183970856102e-06, "loss": 2.0285, "step": 281 }, { "epoch": 0.015417629129478016, "grad_norm": 163.01759338378906, "learning_rate": 5.1366120218579245e-06, "loss": 2.1447, "step": 282 }, { "epoch": 0.015472301573199568, "grad_norm": 2096.009765625, "learning_rate": 5.154826958105648e-06, "loss": 2.0671, "step": 283 }, { "epoch": 0.015526974016921121, "grad_norm": 1016.0091552734375, "learning_rate": 5.17304189435337e-06, "loss": 1.8779, "step": 284 }, { "epoch": 0.015581646460642674, "grad_norm": 752.0189208984375, "learning_rate": 5.191256830601094e-06, "loss": 2.1418, "step": 285 }, { "epoch": 0.01563631890436423, "grad_norm": 251.02122497558594, "learning_rate": 5.209471766848816e-06, "loss": 2.0588, "step": 286 }, { "epoch": 0.01569099134808578, "grad_norm": 95.5622329711914, "learning_rate": 5.227686703096539e-06, "loss": 1.7774, "step": 287 }, { "epoch": 0.015745663791807335, "grad_norm": 510.00360107421875, "learning_rate": 5.245901639344263e-06, "loss": 2.1761, "step": 288 }, { "epoch": 0.015800336235528886, "grad_norm": 374.0245361328125, "learning_rate": 5.264116575591985e-06, "loss": 2.1182, "step": 289 }, { "epoch": 0.01585500867925044, "grad_norm": 83.5390853881836, "learning_rate": 5.2823315118397085e-06, "loss": 1.7984, "step": 290 }, { "epoch": 0.015909681122971996, "grad_norm": 80.55767059326172, "learning_rate": 5.300546448087433e-06, "loss": 2.1273, "step": 291 }, { "epoch": 0.015964353566693547, "grad_norm": 668.01904296875, "learning_rate": 5.318761384335155e-06, "loss": 1.7876, "step": 292 }, { "epoch": 0.0160190260104151, "grad_norm": 676.0086669921875, "learning_rate": 5.3369763205828785e-06, "loss": 1.9223, "step": 293 }, { "epoch": 0.016073698454136653, "grad_norm": 147.03074645996094, "learning_rate": 5.355191256830602e-06, "loss": 1.8135, "step": 294 }, { "epoch": 0.016128370897858207, "grad_norm": 204.02589416503906, "learning_rate": 5.373406193078324e-06, "loss": 1.7634, "step": 295 }, { "epoch": 0.01618304334157976, "grad_norm": 108.52013397216797, "learning_rate": 5.391621129326048e-06, "loss": 1.9049, "step": 296 }, { "epoch": 0.016237715785301313, "grad_norm": 67.06488800048828, "learning_rate": 5.409836065573772e-06, "loss": 2.1258, "step": 297 }, { "epoch": 0.016292388229022868, "grad_norm": 46.805946350097656, "learning_rate": 5.428051001821493e-06, "loss": 2.16, "step": 298 }, { "epoch": 0.01634706067274442, "grad_norm": 406.0404357910156, "learning_rate": 5.446265938069218e-06, "loss": 1.9288, "step": 299 }, { "epoch": 0.016401733116465974, "grad_norm": 716.011474609375, "learning_rate": 5.464480874316941e-06, "loss": 1.7192, "step": 300 }, { "epoch": 0.016456405560187525, "grad_norm": 204.02354431152344, "learning_rate": 5.482695810564663e-06, "loss": 1.9111, "step": 301 }, { "epoch": 0.01651107800390908, "grad_norm": 238.0298614501953, "learning_rate": 5.500910746812387e-06, "loss": 1.7937, "step": 302 }, { "epoch": 0.016565750447630635, "grad_norm": 62.1051139831543, "learning_rate": 5.519125683060109e-06, "loss": 1.9039, "step": 303 }, { "epoch": 0.016620422891352186, "grad_norm": 71.5407485961914, "learning_rate": 5.5373406193078325e-06, "loss": 1.6975, "step": 304 }, { "epoch": 0.01667509533507374, "grad_norm": 45.79939270019531, "learning_rate": 5.555555555555557e-06, "loss": 1.8919, "step": 305 }, { "epoch": 0.01672976777879529, "grad_norm": 127.59284973144531, "learning_rate": 5.573770491803278e-06, "loss": 2.0893, "step": 306 }, { "epoch": 0.016784440222516846, "grad_norm": 256.009521484375, "learning_rate": 5.5919854280510025e-06, "loss": 1.8215, "step": 307 }, { "epoch": 0.016839112666238398, "grad_norm": 108.06598663330078, "learning_rate": 5.610200364298726e-06, "loss": 1.9255, "step": 308 }, { "epoch": 0.016893785109959952, "grad_norm": 61.29841613769531, "learning_rate": 5.628415300546448e-06, "loss": 1.9547, "step": 309 }, { "epoch": 0.016948457553681507, "grad_norm": 50.28725051879883, "learning_rate": 5.646630236794172e-06, "loss": 1.6713, "step": 310 }, { "epoch": 0.017003129997403058, "grad_norm": 225.0167999267578, "learning_rate": 5.664845173041895e-06, "loss": 2.0983, "step": 311 }, { "epoch": 0.017057802441124613, "grad_norm": 48.54398727416992, "learning_rate": 5.683060109289617e-06, "loss": 1.8821, "step": 312 }, { "epoch": 0.017112474884846164, "grad_norm": 121.0155258178711, "learning_rate": 5.701275045537341e-06, "loss": 1.9121, "step": 313 }, { "epoch": 0.01716714732856772, "grad_norm": 36.10184860229492, "learning_rate": 5.719489981785065e-06, "loss": 1.8144, "step": 314 }, { "epoch": 0.017221819772289274, "grad_norm": 45.80248260498047, "learning_rate": 5.737704918032787e-06, "loss": 1.8537, "step": 315 }, { "epoch": 0.017276492216010825, "grad_norm": 134.01553344726562, "learning_rate": 5.755919854280511e-06, "loss": 1.9252, "step": 316 }, { "epoch": 0.01733116465973238, "grad_norm": 140.0113067626953, "learning_rate": 5.774134790528234e-06, "loss": 1.6928, "step": 317 }, { "epoch": 0.01738583710345393, "grad_norm": 864.0111694335938, "learning_rate": 5.7923497267759565e-06, "loss": 1.8776, "step": 318 }, { "epoch": 0.017440509547175485, "grad_norm": 92.51460266113281, "learning_rate": 5.81056466302368e-06, "loss": 1.7628, "step": 319 }, { "epoch": 0.017495181990897037, "grad_norm": 34.546234130859375, "learning_rate": 5.828779599271404e-06, "loss": 1.825, "step": 320 }, { "epoch": 0.01754985443461859, "grad_norm": 116.5316162109375, "learning_rate": 5.846994535519126e-06, "loss": 1.8788, "step": 321 }, { "epoch": 0.017604526878340146, "grad_norm": 111.51554870605469, "learning_rate": 5.86520947176685e-06, "loss": 1.9495, "step": 322 }, { "epoch": 0.017659199322061697, "grad_norm": 149.012451171875, "learning_rate": 5.883424408014572e-06, "loss": 1.7702, "step": 323 }, { "epoch": 0.017713871765783252, "grad_norm": 70.52687072753906, "learning_rate": 5.9016393442622956e-06, "loss": 1.882, "step": 324 }, { "epoch": 0.017768544209504803, "grad_norm": 50.77800369262695, "learning_rate": 5.919854280510019e-06, "loss": 1.7074, "step": 325 }, { "epoch": 0.017823216653226358, "grad_norm": 42.29200744628906, "learning_rate": 5.938069216757741e-06, "loss": 1.6139, "step": 326 }, { "epoch": 0.017877889096947912, "grad_norm": 112.02457427978516, "learning_rate": 5.956284153005465e-06, "loss": 1.9376, "step": 327 }, { "epoch": 0.017932561540669464, "grad_norm": 45.347537994384766, "learning_rate": 5.974499089253189e-06, "loss": 1.6812, "step": 328 }, { "epoch": 0.01798723398439102, "grad_norm": 77.52938842773438, "learning_rate": 5.9927140255009105e-06, "loss": 2.0225, "step": 329 }, { "epoch": 0.01804190642811257, "grad_norm": 24.05701446533203, "learning_rate": 6.010928961748635e-06, "loss": 1.7002, "step": 330 }, { "epoch": 0.018096578871834124, "grad_norm": 38.790096282958984, "learning_rate": 6.029143897996358e-06, "loss": 2.028, "step": 331 }, { "epoch": 0.018151251315555676, "grad_norm": 87.02254486083984, "learning_rate": 6.0473588342440805e-06, "loss": 2.0143, "step": 332 }, { "epoch": 0.01820592375927723, "grad_norm": 122.01620483398438, "learning_rate": 6.065573770491804e-06, "loss": 1.8387, "step": 333 }, { "epoch": 0.018260596202998785, "grad_norm": 38.045310974121094, "learning_rate": 6.083788706739527e-06, "loss": 1.7412, "step": 334 }, { "epoch": 0.018315268646720336, "grad_norm": 56.02594757080078, "learning_rate": 6.1020036429872496e-06, "loss": 1.7576, "step": 335 }, { "epoch": 0.01836994109044189, "grad_norm": 91.51683807373047, "learning_rate": 6.120218579234973e-06, "loss": 1.9656, "step": 336 }, { "epoch": 0.018424613534163442, "grad_norm": 117.53571319580078, "learning_rate": 6.138433515482697e-06, "loss": 1.8075, "step": 337 }, { "epoch": 0.018479285977884997, "grad_norm": 134.01486206054688, "learning_rate": 6.1566484517304195e-06, "loss": 1.6896, "step": 338 }, { "epoch": 0.01853395842160655, "grad_norm": 22.68126106262207, "learning_rate": 6.174863387978143e-06, "loss": 1.676, "step": 339 }, { "epoch": 0.018588630865328103, "grad_norm": 27.293418884277344, "learning_rate": 6.193078324225865e-06, "loss": 1.8274, "step": 340 }, { "epoch": 0.018643303309049657, "grad_norm": 24.704256057739258, "learning_rate": 6.211293260473589e-06, "loss": 1.996, "step": 341 }, { "epoch": 0.01869797575277121, "grad_norm": 116.52053833007812, "learning_rate": 6.229508196721312e-06, "loss": 1.7453, "step": 342 }, { "epoch": 0.018752648196492763, "grad_norm": 40.551544189453125, "learning_rate": 6.2477231329690345e-06, "loss": 1.7258, "step": 343 }, { "epoch": 0.018807320640214314, "grad_norm": 79.0179672241211, "learning_rate": 6.265938069216758e-06, "loss": 1.8099, "step": 344 }, { "epoch": 0.01886199308393587, "grad_norm": 47.544185638427734, "learning_rate": 6.284153005464482e-06, "loss": 1.6583, "step": 345 }, { "epoch": 0.018916665527657424, "grad_norm": 24.236581802368164, "learning_rate": 6.3023679417122044e-06, "loss": 1.7416, "step": 346 }, { "epoch": 0.018971337971378975, "grad_norm": 32.805118560791016, "learning_rate": 6.320582877959928e-06, "loss": 1.8353, "step": 347 }, { "epoch": 0.01902601041510053, "grad_norm": 41.17081069946289, "learning_rate": 6.338797814207651e-06, "loss": 1.6803, "step": 348 }, { "epoch": 0.01908068285882208, "grad_norm": 25.94037628173828, "learning_rate": 6.3570127504553735e-06, "loss": 1.8075, "step": 349 }, { "epoch": 0.019135355302543636, "grad_norm": 34.12767791748047, "learning_rate": 6.375227686703097e-06, "loss": 1.6926, "step": 350 }, { "epoch": 0.01919002774626519, "grad_norm": 21.142074584960938, "learning_rate": 6.393442622950821e-06, "loss": 1.9076, "step": 351 }, { "epoch": 0.01924470018998674, "grad_norm": 122.5144271850586, "learning_rate": 6.411657559198543e-06, "loss": 1.9157, "step": 352 }, { "epoch": 0.019299372633708296, "grad_norm": 260.0071105957031, "learning_rate": 6.429872495446267e-06, "loss": 1.9144, "step": 353 }, { "epoch": 0.019354045077429848, "grad_norm": 14.359856605529785, "learning_rate": 6.44808743169399e-06, "loss": 2.0093, "step": 354 }, { "epoch": 0.019408717521151402, "grad_norm": 29.173152923583984, "learning_rate": 6.466302367941713e-06, "loss": 1.8828, "step": 355 }, { "epoch": 0.019463389964872953, "grad_norm": 16.204959869384766, "learning_rate": 6.484517304189436e-06, "loss": 1.7119, "step": 356 }, { "epoch": 0.019518062408594508, "grad_norm": 49.53606414794922, "learning_rate": 6.5027322404371584e-06, "loss": 1.952, "step": 357 }, { "epoch": 0.019572734852316063, "grad_norm": 39.298255920410156, "learning_rate": 6.520947176684882e-06, "loss": 1.7873, "step": 358 }, { "epoch": 0.019627407296037614, "grad_norm": 169.00823974609375, "learning_rate": 6.539162112932605e-06, "loss": 1.8037, "step": 359 }, { "epoch": 0.01968207973975917, "grad_norm": 8000.0712890625, "learning_rate": 6.5573770491803276e-06, "loss": 2.3235, "step": 360 }, { "epoch": 0.01973675218348072, "grad_norm": 3120.030517578125, "learning_rate": 6.575591985428052e-06, "loss": 1.9475, "step": 361 }, { "epoch": 0.019791424627202275, "grad_norm": 696.0104370117188, "learning_rate": 6.593806921675775e-06, "loss": 1.9744, "step": 362 }, { "epoch": 0.019846097070923826, "grad_norm": 160.02293395996094, "learning_rate": 6.6120218579234975e-06, "loss": 2.0417, "step": 363 }, { "epoch": 0.01990076951464538, "grad_norm": 26.0753231048584, "learning_rate": 6.630236794171221e-06, "loss": 1.6148, "step": 364 }, { "epoch": 0.019955441958366935, "grad_norm": 10.811483383178711, "learning_rate": 6.648451730418944e-06, "loss": 1.6206, "step": 365 }, { "epoch": 0.020010114402088486, "grad_norm": 203.012451171875, "learning_rate": 6.666666666666667e-06, "loss": 1.6245, "step": 366 }, { "epoch": 0.02006478684581004, "grad_norm": 99.01944732666016, "learning_rate": 6.68488160291439e-06, "loss": 1.4117, "step": 367 }, { "epoch": 0.020119459289531592, "grad_norm": 145.0130157470703, "learning_rate": 6.703096539162114e-06, "loss": 1.6135, "step": 368 }, { "epoch": 0.020174131733253147, "grad_norm": 54.28980255126953, "learning_rate": 6.721311475409837e-06, "loss": 1.795, "step": 369 }, { "epoch": 0.020228804176974702, "grad_norm": 26.047006607055664, "learning_rate": 6.73952641165756e-06, "loss": 1.8534, "step": 370 }, { "epoch": 0.020283476620696253, "grad_norm": 22.56418228149414, "learning_rate": 6.757741347905283e-06, "loss": 1.8132, "step": 371 }, { "epoch": 0.020338149064417808, "grad_norm": 92.51770782470703, "learning_rate": 6.775956284153006e-06, "loss": 1.7323, "step": 372 }, { "epoch": 0.02039282150813936, "grad_norm": 54.52263259887695, "learning_rate": 6.794171220400729e-06, "loss": 1.6177, "step": 373 }, { "epoch": 0.020447493951860914, "grad_norm": 25.711753845214844, "learning_rate": 6.812386156648453e-06, "loss": 2.0052, "step": 374 }, { "epoch": 0.020502166395582465, "grad_norm": 19.979473114013672, "learning_rate": 6.830601092896175e-06, "loss": 1.8377, "step": 375 }, { "epoch": 0.02055683883930402, "grad_norm": 31.692358016967773, "learning_rate": 6.848816029143899e-06, "loss": 1.7176, "step": 376 }, { "epoch": 0.020611511283025574, "grad_norm": 177.00946044921875, "learning_rate": 6.8670309653916215e-06, "loss": 1.9067, "step": 377 }, { "epoch": 0.020666183726747125, "grad_norm": 29.294233322143555, "learning_rate": 6.885245901639345e-06, "loss": 1.6063, "step": 378 }, { "epoch": 0.02072085617046868, "grad_norm": 12.75736141204834, "learning_rate": 6.903460837887068e-06, "loss": 1.4673, "step": 379 }, { "epoch": 0.02077552861419023, "grad_norm": 76.02081298828125, "learning_rate": 6.921675774134791e-06, "loss": 1.7654, "step": 380 }, { "epoch": 0.020830201057911786, "grad_norm": 115.01213836669922, "learning_rate": 6.939890710382514e-06, "loss": 1.8158, "step": 381 }, { "epoch": 0.02088487350163334, "grad_norm": 176.01040649414062, "learning_rate": 6.958105646630237e-06, "loss": 1.6819, "step": 382 }, { "epoch": 0.020939545945354892, "grad_norm": 83.51851654052734, "learning_rate": 6.97632058287796e-06, "loss": 1.5021, "step": 383 }, { "epoch": 0.020994218389076447, "grad_norm": 23.194541931152344, "learning_rate": 6.994535519125684e-06, "loss": 1.7205, "step": 384 }, { "epoch": 0.021048890832797998, "grad_norm": 36.81033706665039, "learning_rate": 7.012750455373407e-06, "loss": 1.8105, "step": 385 }, { "epoch": 0.021103563276519553, "grad_norm": 13.05042839050293, "learning_rate": 7.03096539162113e-06, "loss": 1.7686, "step": 386 }, { "epoch": 0.021158235720241104, "grad_norm": 71.02674102783203, "learning_rate": 7.049180327868853e-06, "loss": 1.778, "step": 387 }, { "epoch": 0.02121290816396266, "grad_norm": 39.53657913208008, "learning_rate": 7.067395264116576e-06, "loss": 1.7872, "step": 388 }, { "epoch": 0.021267580607684213, "grad_norm": 348.0055236816406, "learning_rate": 7.085610200364299e-06, "loss": 1.8245, "step": 389 }, { "epoch": 0.021322253051405764, "grad_norm": 99.51444244384766, "learning_rate": 7.103825136612022e-06, "loss": 1.3539, "step": 390 }, { "epoch": 0.02137692549512732, "grad_norm": 632.0061645507812, "learning_rate": 7.122040072859746e-06, "loss": 2.0225, "step": 391 }, { "epoch": 0.02143159793884887, "grad_norm": 30.299848556518555, "learning_rate": 7.140255009107469e-06, "loss": 1.9784, "step": 392 }, { "epoch": 0.021486270382570425, "grad_norm": 54.27102279663086, "learning_rate": 7.158469945355192e-06, "loss": 1.6287, "step": 393 }, { "epoch": 0.02154094282629198, "grad_norm": 29.668384552001953, "learning_rate": 7.176684881602915e-06, "loss": 1.7526, "step": 394 }, { "epoch": 0.02159561527001353, "grad_norm": 8.091886520385742, "learning_rate": 7.194899817850638e-06, "loss": 1.706, "step": 395 }, { "epoch": 0.021650287713735086, "grad_norm": 155.01077270507812, "learning_rate": 7.213114754098361e-06, "loss": 1.8333, "step": 396 }, { "epoch": 0.021704960157456637, "grad_norm": 2080.0107421875, "learning_rate": 7.231329690346084e-06, "loss": 1.8183, "step": 397 }, { "epoch": 0.02175963260117819, "grad_norm": 45.779212951660156, "learning_rate": 7.249544626593807e-06, "loss": 1.9255, "step": 398 }, { "epoch": 0.021814305044899743, "grad_norm": 53.04137420654297, "learning_rate": 7.267759562841531e-06, "loss": 1.5897, "step": 399 }, { "epoch": 0.021868977488621297, "grad_norm": 14.516731262207031, "learning_rate": 7.285974499089254e-06, "loss": 1.544, "step": 400 }, { "epoch": 0.021923649932342852, "grad_norm": 17.23487663269043, "learning_rate": 7.304189435336977e-06, "loss": 1.5168, "step": 401 }, { "epoch": 0.021978322376064403, "grad_norm": 17.09227180480957, "learning_rate": 7.3224043715847e-06, "loss": 1.9799, "step": 402 }, { "epoch": 0.022032994819785958, "grad_norm": 17.076295852661133, "learning_rate": 7.340619307832423e-06, "loss": 1.8071, "step": 403 }, { "epoch": 0.02208766726350751, "grad_norm": 23.319053649902344, "learning_rate": 7.358834244080146e-06, "loss": 1.7998, "step": 404 }, { "epoch": 0.022142339707229064, "grad_norm": 23.220468521118164, "learning_rate": 7.3770491803278695e-06, "loss": 1.6581, "step": 405 }, { "epoch": 0.02219701215095062, "grad_norm": 10.153203964233398, "learning_rate": 7.395264116575592e-06, "loss": 1.6762, "step": 406 }, { "epoch": 0.02225168459467217, "grad_norm": 21.929227828979492, "learning_rate": 7.413479052823316e-06, "loss": 1.7422, "step": 407 }, { "epoch": 0.022306357038393725, "grad_norm": 32.79121017456055, "learning_rate": 7.4316939890710394e-06, "loss": 1.7833, "step": 408 }, { "epoch": 0.022361029482115276, "grad_norm": 24.33732795715332, "learning_rate": 7.449908925318762e-06, "loss": 1.8367, "step": 409 }, { "epoch": 0.02241570192583683, "grad_norm": 9.545077323913574, "learning_rate": 7.468123861566485e-06, "loss": 1.8558, "step": 410 }, { "epoch": 0.02247037436955838, "grad_norm": 13.312451362609863, "learning_rate": 7.4863387978142085e-06, "loss": 1.7525, "step": 411 }, { "epoch": 0.022525046813279936, "grad_norm": 64.5172348022461, "learning_rate": 7.504553734061931e-06, "loss": 2.1004, "step": 412 }, { "epoch": 0.02257971925700149, "grad_norm": 6.659569263458252, "learning_rate": 7.522768670309654e-06, "loss": 1.7116, "step": 413 }, { "epoch": 0.022634391700723042, "grad_norm": 50.79689025878906, "learning_rate": 7.540983606557377e-06, "loss": 1.6249, "step": 414 }, { "epoch": 0.022689064144444597, "grad_norm": 274.016357421875, "learning_rate": 7.559198542805101e-06, "loss": 1.5976, "step": 415 }, { "epoch": 0.022743736588166148, "grad_norm": 65.52916717529297, "learning_rate": 7.577413479052824e-06, "loss": 1.5598, "step": 416 }, { "epoch": 0.022798409031887703, "grad_norm": 788.017822265625, "learning_rate": 7.595628415300547e-06, "loss": 1.6988, "step": 417 }, { "epoch": 0.022853081475609258, "grad_norm": 160.01016235351562, "learning_rate": 7.61384335154827e-06, "loss": 1.6407, "step": 418 }, { "epoch": 0.02290775391933081, "grad_norm": 57.53101348876953, "learning_rate": 7.632058287795994e-06, "loss": 1.6292, "step": 419 }, { "epoch": 0.022962426363052364, "grad_norm": 10.730374336242676, "learning_rate": 7.650273224043716e-06, "loss": 1.7518, "step": 420 }, { "epoch": 0.023017098806773915, "grad_norm": 29.367996215820312, "learning_rate": 7.66848816029144e-06, "loss": 1.5199, "step": 421 }, { "epoch": 0.02307177125049547, "grad_norm": 79.01619720458984, "learning_rate": 7.686703096539163e-06, "loss": 1.8853, "step": 422 }, { "epoch": 0.02312644369421702, "grad_norm": 12.513344764709473, "learning_rate": 7.704918032786886e-06, "loss": 1.4502, "step": 423 }, { "epoch": 0.023181116137938575, "grad_norm": 14.064740180969238, "learning_rate": 7.72313296903461e-06, "loss": 1.7711, "step": 424 }, { "epoch": 0.02323578858166013, "grad_norm": 16.678773880004883, "learning_rate": 7.741347905282333e-06, "loss": 1.8739, "step": 425 }, { "epoch": 0.02329046102538168, "grad_norm": 59.273536682128906, "learning_rate": 7.759562841530056e-06, "loss": 1.5786, "step": 426 }, { "epoch": 0.023345133469103236, "grad_norm": 96.51262664794922, "learning_rate": 7.77777777777778e-06, "loss": 1.8478, "step": 427 }, { "epoch": 0.023399805912824787, "grad_norm": 20.05228042602539, "learning_rate": 7.795992714025502e-06, "loss": 1.6104, "step": 428 }, { "epoch": 0.023454478356546342, "grad_norm": 19.190584182739258, "learning_rate": 7.814207650273224e-06, "loss": 1.7061, "step": 429 }, { "epoch": 0.023509150800267897, "grad_norm": 34.08525466918945, "learning_rate": 7.832422586520947e-06, "loss": 1.8103, "step": 430 }, { "epoch": 0.023563823243989448, "grad_norm": 170.0145721435547, "learning_rate": 7.85063752276867e-06, "loss": 1.8627, "step": 431 }, { "epoch": 0.023618495687711002, "grad_norm": 338.0140686035156, "learning_rate": 7.868852459016394e-06, "loss": 1.8022, "step": 432 }, { "epoch": 0.023673168131432554, "grad_norm": 37.80322265625, "learning_rate": 7.887067395264117e-06, "loss": 1.8159, "step": 433 }, { "epoch": 0.02372784057515411, "grad_norm": 30.426342010498047, "learning_rate": 7.905282331511839e-06, "loss": 1.7389, "step": 434 }, { "epoch": 0.02378251301887566, "grad_norm": 33.79006576538086, "learning_rate": 7.923497267759564e-06, "loss": 1.9101, "step": 435 }, { "epoch": 0.023837185462597214, "grad_norm": 100.51467895507812, "learning_rate": 7.941712204007287e-06, "loss": 1.5421, "step": 436 }, { "epoch": 0.02389185790631877, "grad_norm": 270.0106201171875, "learning_rate": 7.959927140255009e-06, "loss": 1.6692, "step": 437 }, { "epoch": 0.02394653035004032, "grad_norm": 35.04542541503906, "learning_rate": 7.978142076502732e-06, "loss": 1.7767, "step": 438 }, { "epoch": 0.024001202793761875, "grad_norm": 46.529052734375, "learning_rate": 7.996357012750456e-06, "loss": 1.7007, "step": 439 }, { "epoch": 0.024055875237483426, "grad_norm": 13.990824699401855, "learning_rate": 8.014571948998179e-06, "loss": 1.3247, "step": 440 }, { "epoch": 0.02411054768120498, "grad_norm": 19.045604705810547, "learning_rate": 8.032786885245902e-06, "loss": 1.7138, "step": 441 }, { "epoch": 0.024165220124926536, "grad_norm": 160.01927185058594, "learning_rate": 8.051001821493626e-06, "loss": 1.7007, "step": 442 }, { "epoch": 0.024219892568648087, "grad_norm": 8.53955364227295, "learning_rate": 8.069216757741349e-06, "loss": 1.778, "step": 443 }, { "epoch": 0.02427456501236964, "grad_norm": 20.32419204711914, "learning_rate": 8.087431693989072e-06, "loss": 1.8286, "step": 444 }, { "epoch": 0.024329237456091193, "grad_norm": 15.425686836242676, "learning_rate": 8.105646630236796e-06, "loss": 1.4766, "step": 445 }, { "epoch": 0.024383909899812747, "grad_norm": 29.053117752075195, "learning_rate": 8.123861566484517e-06, "loss": 1.8482, "step": 446 }, { "epoch": 0.0244385823435343, "grad_norm": 43.53908157348633, "learning_rate": 8.14207650273224e-06, "loss": 1.5597, "step": 447 }, { "epoch": 0.024493254787255853, "grad_norm": 18.705358505249023, "learning_rate": 8.160291438979966e-06, "loss": 1.5823, "step": 448 }, { "epoch": 0.024547927230977408, "grad_norm": 7.547692775726318, "learning_rate": 8.178506375227687e-06, "loss": 1.7081, "step": 449 }, { "epoch": 0.02460259967469896, "grad_norm": 14.39748477935791, "learning_rate": 8.19672131147541e-06, "loss": 1.7186, "step": 450 }, { "epoch": 0.024657272118420514, "grad_norm": 144.0106964111328, "learning_rate": 8.214936247723134e-06, "loss": 1.8191, "step": 451 }, { "epoch": 0.024711944562142065, "grad_norm": 64.01708221435547, "learning_rate": 8.233151183970857e-06, "loss": 1.6183, "step": 452 }, { "epoch": 0.02476661700586362, "grad_norm": 101.0206069946289, "learning_rate": 8.25136612021858e-06, "loss": 1.8195, "step": 453 }, { "epoch": 0.024821289449585174, "grad_norm": 17.010730743408203, "learning_rate": 8.269581056466302e-06, "loss": 1.5929, "step": 454 }, { "epoch": 0.024875961893306726, "grad_norm": 16.731687545776367, "learning_rate": 8.287795992714025e-06, "loss": 1.4595, "step": 455 }, { "epoch": 0.02493063433702828, "grad_norm": 9.012626647949219, "learning_rate": 8.30601092896175e-06, "loss": 1.8214, "step": 456 }, { "epoch": 0.02498530678074983, "grad_norm": 6.858699321746826, "learning_rate": 8.324225865209472e-06, "loss": 1.6511, "step": 457 }, { "epoch": 0.025039979224471386, "grad_norm": 19.92947769165039, "learning_rate": 8.342440801457195e-06, "loss": 1.7574, "step": 458 }, { "epoch": 0.025094651668192938, "grad_norm": 9.365639686584473, "learning_rate": 8.360655737704919e-06, "loss": 1.4392, "step": 459 }, { "epoch": 0.025149324111914492, "grad_norm": 5.968003273010254, "learning_rate": 8.378870673952642e-06, "loss": 1.7385, "step": 460 }, { "epoch": 0.025203996555636047, "grad_norm": 26.326936721801758, "learning_rate": 8.397085610200365e-06, "loss": 1.7256, "step": 461 }, { "epoch": 0.025258668999357598, "grad_norm": 23.189672470092773, "learning_rate": 8.415300546448089e-06, "loss": 1.8352, "step": 462 }, { "epoch": 0.025313341443079153, "grad_norm": 79.52198791503906, "learning_rate": 8.43351548269581e-06, "loss": 1.8782, "step": 463 }, { "epoch": 0.025368013886800704, "grad_norm": 5.539321422576904, "learning_rate": 8.451730418943535e-06, "loss": 1.5116, "step": 464 }, { "epoch": 0.02542268633052226, "grad_norm": 21.309106826782227, "learning_rate": 8.469945355191259e-06, "loss": 1.7954, "step": 465 }, { "epoch": 0.025477358774243813, "grad_norm": 7.675769805908203, "learning_rate": 8.48816029143898e-06, "loss": 1.6067, "step": 466 }, { "epoch": 0.025532031217965365, "grad_norm": 9.57551097869873, "learning_rate": 8.506375227686704e-06, "loss": 1.7915, "step": 467 }, { "epoch": 0.02558670366168692, "grad_norm": 7.009328842163086, "learning_rate": 8.524590163934427e-06, "loss": 1.4303, "step": 468 }, { "epoch": 0.02564137610540847, "grad_norm": 2.5177292823791504, "learning_rate": 8.54280510018215e-06, "loss": 1.5005, "step": 469 }, { "epoch": 0.025696048549130025, "grad_norm": 10.166261672973633, "learning_rate": 8.561020036429874e-06, "loss": 1.8151, "step": 470 }, { "epoch": 0.025750720992851577, "grad_norm": 4.73025369644165, "learning_rate": 8.579234972677595e-06, "loss": 1.6859, "step": 471 }, { "epoch": 0.02580539343657313, "grad_norm": 4.606897354125977, "learning_rate": 8.59744990892532e-06, "loss": 1.834, "step": 472 }, { "epoch": 0.025860065880294686, "grad_norm": 6.167375564575195, "learning_rate": 8.615664845173044e-06, "loss": 1.432, "step": 473 }, { "epoch": 0.025914738324016237, "grad_norm": 8.960147857666016, "learning_rate": 8.633879781420765e-06, "loss": 1.6297, "step": 474 }, { "epoch": 0.025969410767737792, "grad_norm": 6.362193584442139, "learning_rate": 8.652094717668488e-06, "loss": 1.5902, "step": 475 }, { "epoch": 0.026024083211459343, "grad_norm": 9.848794937133789, "learning_rate": 8.670309653916212e-06, "loss": 1.5507, "step": 476 }, { "epoch": 0.026078755655180898, "grad_norm": 134.01821899414062, "learning_rate": 8.688524590163935e-06, "loss": 1.861, "step": 477 }, { "epoch": 0.026133428098902452, "grad_norm": 44.777652740478516, "learning_rate": 8.706739526411658e-06, "loss": 1.5343, "step": 478 }, { "epoch": 0.026188100542624004, "grad_norm": 36.54542541503906, "learning_rate": 8.724954462659382e-06, "loss": 1.662, "step": 479 }, { "epoch": 0.02624277298634556, "grad_norm": 10.425684928894043, "learning_rate": 8.743169398907103e-06, "loss": 1.5, "step": 480 }, { "epoch": 0.02629744543006711, "grad_norm": 18.312904357910156, "learning_rate": 8.761384335154828e-06, "loss": 1.5104, "step": 481 }, { "epoch": 0.026352117873788664, "grad_norm": 195.01441955566406, "learning_rate": 8.779599271402552e-06, "loss": 1.625, "step": 482 }, { "epoch": 0.026406790317510215, "grad_norm": 8.86787223815918, "learning_rate": 8.797814207650273e-06, "loss": 1.5515, "step": 483 }, { "epoch": 0.02646146276123177, "grad_norm": 31.053314208984375, "learning_rate": 8.816029143897997e-06, "loss": 2.0787, "step": 484 }, { "epoch": 0.026516135204953325, "grad_norm": 7.857560157775879, "learning_rate": 8.83424408014572e-06, "loss": 1.906, "step": 485 }, { "epoch": 0.026570807648674876, "grad_norm": 4.57285737991333, "learning_rate": 8.852459016393443e-06, "loss": 1.7328, "step": 486 }, { "epoch": 0.02662548009239643, "grad_norm": 13.265275001525879, "learning_rate": 8.870673952641167e-06, "loss": 1.7053, "step": 487 }, { "epoch": 0.026680152536117982, "grad_norm": 6.015551567077637, "learning_rate": 8.888888888888888e-06, "loss": 1.6354, "step": 488 }, { "epoch": 0.026734824979839537, "grad_norm": 4.215312480926514, "learning_rate": 8.907103825136613e-06, "loss": 1.7984, "step": 489 }, { "epoch": 0.026789497423561088, "grad_norm": 16.210596084594727, "learning_rate": 8.925318761384337e-06, "loss": 1.5177, "step": 490 }, { "epoch": 0.026844169867282643, "grad_norm": 6.795618057250977, "learning_rate": 8.943533697632058e-06, "loss": 1.6754, "step": 491 }, { "epoch": 0.026898842311004197, "grad_norm": 11.831779479980469, "learning_rate": 8.961748633879782e-06, "loss": 1.5605, "step": 492 }, { "epoch": 0.02695351475472575, "grad_norm": 10.65335464477539, "learning_rate": 8.979963570127505e-06, "loss": 1.4869, "step": 493 }, { "epoch": 0.027008187198447303, "grad_norm": 6.668663024902344, "learning_rate": 8.998178506375228e-06, "loss": 1.633, "step": 494 }, { "epoch": 0.027062859642168854, "grad_norm": 13.726280212402344, "learning_rate": 9.016393442622952e-06, "loss": 1.6735, "step": 495 }, { "epoch": 0.02711753208589041, "grad_norm": 14.614136695861816, "learning_rate": 9.034608378870675e-06, "loss": 1.4094, "step": 496 }, { "epoch": 0.027172204529611964, "grad_norm": 12.225822448730469, "learning_rate": 9.052823315118398e-06, "loss": 1.7244, "step": 497 }, { "epoch": 0.027226876973333515, "grad_norm": 9.783112525939941, "learning_rate": 9.071038251366122e-06, "loss": 1.664, "step": 498 }, { "epoch": 0.02728154941705507, "grad_norm": 9.489224433898926, "learning_rate": 9.089253187613845e-06, "loss": 1.8194, "step": 499 }, { "epoch": 0.02733622186077662, "grad_norm": 14.986737251281738, "learning_rate": 9.107468123861566e-06, "loss": 1.5659, "step": 500 }, { "epoch": 0.027390894304498176, "grad_norm": 43.03645324707031, "learning_rate": 9.12568306010929e-06, "loss": 1.7144, "step": 501 }, { "epoch": 0.027445566748219727, "grad_norm": 15.087200164794922, "learning_rate": 9.143897996357015e-06, "loss": 1.9755, "step": 502 }, { "epoch": 0.02750023919194128, "grad_norm": 19.301931381225586, "learning_rate": 9.162112932604736e-06, "loss": 1.8966, "step": 503 }, { "epoch": 0.027554911635662836, "grad_norm": 6.955992698669434, "learning_rate": 9.18032786885246e-06, "loss": 1.6905, "step": 504 }, { "epoch": 0.027609584079384387, "grad_norm": 3.884648084640503, "learning_rate": 9.198542805100183e-06, "loss": 1.7818, "step": 505 }, { "epoch": 0.027664256523105942, "grad_norm": 4.608876705169678, "learning_rate": 9.216757741347906e-06, "loss": 1.8417, "step": 506 }, { "epoch": 0.027718928966827493, "grad_norm": 2.424344301223755, "learning_rate": 9.23497267759563e-06, "loss": 1.9972, "step": 507 }, { "epoch": 0.027773601410549048, "grad_norm": 3.5010344982147217, "learning_rate": 9.253187613843351e-06, "loss": 1.7629, "step": 508 }, { "epoch": 0.027828273854270603, "grad_norm": 6.331707954406738, "learning_rate": 9.271402550091075e-06, "loss": 1.6174, "step": 509 }, { "epoch": 0.027882946297992154, "grad_norm": 3.5301785469055176, "learning_rate": 9.2896174863388e-06, "loss": 1.6256, "step": 510 }, { "epoch": 0.02793761874171371, "grad_norm": 3.8192927837371826, "learning_rate": 9.307832422586521e-06, "loss": 1.4797, "step": 511 }, { "epoch": 0.02799229118543526, "grad_norm": 3.105834722518921, "learning_rate": 9.326047358834245e-06, "loss": 1.6919, "step": 512 }, { "epoch": 0.028046963629156815, "grad_norm": 24.90952491760254, "learning_rate": 9.344262295081968e-06, "loss": 1.7588, "step": 513 }, { "epoch": 0.028101636072878366, "grad_norm": 5.4175920486450195, "learning_rate": 9.362477231329691e-06, "loss": 1.5588, "step": 514 }, { "epoch": 0.02815630851659992, "grad_norm": 29.420063018798828, "learning_rate": 9.380692167577415e-06, "loss": 1.9164, "step": 515 }, { "epoch": 0.028210980960321475, "grad_norm": 10.098426818847656, "learning_rate": 9.398907103825138e-06, "loss": 1.4266, "step": 516 }, { "epoch": 0.028265653404043026, "grad_norm": 14.247804641723633, "learning_rate": 9.41712204007286e-06, "loss": 1.6919, "step": 517 }, { "epoch": 0.02832032584776458, "grad_norm": 8.183368682861328, "learning_rate": 9.435336976320585e-06, "loss": 1.5287, "step": 518 }, { "epoch": 0.028374998291486132, "grad_norm": 6.7284159660339355, "learning_rate": 9.453551912568308e-06, "loss": 1.6004, "step": 519 }, { "epoch": 0.028429670735207687, "grad_norm": 10.235706329345703, "learning_rate": 9.47176684881603e-06, "loss": 1.6788, "step": 520 }, { "epoch": 0.02848434317892924, "grad_norm": 17.185543060302734, "learning_rate": 9.489981785063753e-06, "loss": 1.6764, "step": 521 }, { "epoch": 0.028539015622650793, "grad_norm": 28.284420013427734, "learning_rate": 9.508196721311476e-06, "loss": 1.5209, "step": 522 }, { "epoch": 0.028593688066372348, "grad_norm": 4.12260103225708, "learning_rate": 9.5264116575592e-06, "loss": 1.6366, "step": 523 }, { "epoch": 0.0286483605100939, "grad_norm": 3.610032796859741, "learning_rate": 9.544626593806923e-06, "loss": 1.6223, "step": 524 }, { "epoch": 0.028703032953815454, "grad_norm": 4.30162239074707, "learning_rate": 9.562841530054644e-06, "loss": 1.5434, "step": 525 }, { "epoch": 0.028757705397537005, "grad_norm": 13.622430801391602, "learning_rate": 9.581056466302368e-06, "loss": 1.5989, "step": 526 }, { "epoch": 0.02881237784125856, "grad_norm": 6.309061050415039, "learning_rate": 9.599271402550093e-06, "loss": 1.5525, "step": 527 }, { "epoch": 0.028867050284980114, "grad_norm": 4.813770771026611, "learning_rate": 9.617486338797814e-06, "loss": 1.4329, "step": 528 }, { "epoch": 0.028921722728701665, "grad_norm": 8.192217826843262, "learning_rate": 9.635701275045538e-06, "loss": 1.3937, "step": 529 }, { "epoch": 0.02897639517242322, "grad_norm": 7.975921154022217, "learning_rate": 9.653916211293261e-06, "loss": 1.6263, "step": 530 }, { "epoch": 0.02903106761614477, "grad_norm": 12.64666748046875, "learning_rate": 9.672131147540984e-06, "loss": 1.4858, "step": 531 }, { "epoch": 0.029085740059866326, "grad_norm": 188.0128631591797, "learning_rate": 9.690346083788708e-06, "loss": 1.662, "step": 532 }, { "epoch": 0.02914041250358788, "grad_norm": 46.77443313598633, "learning_rate": 9.708561020036431e-06, "loss": 1.5439, "step": 533 }, { "epoch": 0.029195084947309432, "grad_norm": 1904.1121826171875, "learning_rate": 9.726775956284153e-06, "loss": 1.7982, "step": 534 }, { "epoch": 0.029249757391030987, "grad_norm": 171.03305053710938, "learning_rate": 9.744990892531878e-06, "loss": 1.6932, "step": 535 }, { "epoch": 0.029304429834752538, "grad_norm": 233.0238800048828, "learning_rate": 9.763205828779601e-06, "loss": 1.6538, "step": 536 }, { "epoch": 0.029359102278474093, "grad_norm": 14.903043746948242, "learning_rate": 9.781420765027323e-06, "loss": 1.5364, "step": 537 }, { "epoch": 0.029413774722195644, "grad_norm": 32.043087005615234, "learning_rate": 9.799635701275046e-06, "loss": 1.6852, "step": 538 }, { "epoch": 0.0294684471659172, "grad_norm": 120.51970672607422, "learning_rate": 9.81785063752277e-06, "loss": 1.4444, "step": 539 }, { "epoch": 0.029523119609638753, "grad_norm": 138.022216796875, "learning_rate": 9.836065573770493e-06, "loss": 1.3733, "step": 540 }, { "epoch": 0.029577792053360304, "grad_norm": 520.0370483398438, "learning_rate": 9.854280510018216e-06, "loss": 1.6568, "step": 541 }, { "epoch": 0.02963246449708186, "grad_norm": 44.2852897644043, "learning_rate": 9.872495446265938e-06, "loss": 1.6228, "step": 542 }, { "epoch": 0.02968713694080341, "grad_norm": 5.980830192565918, "learning_rate": 9.890710382513663e-06, "loss": 1.716, "step": 543 }, { "epoch": 0.029741809384524965, "grad_norm": 33.036312103271484, "learning_rate": 9.908925318761386e-06, "loss": 1.6408, "step": 544 }, { "epoch": 0.02979648182824652, "grad_norm": 115.51506805419922, "learning_rate": 9.927140255009108e-06, "loss": 1.6632, "step": 545 }, { "epoch": 0.02985115427196807, "grad_norm": 119.51416015625, "learning_rate": 9.945355191256831e-06, "loss": 1.7542, "step": 546 }, { "epoch": 0.029905826715689626, "grad_norm": 51.788551330566406, "learning_rate": 9.963570127504554e-06, "loss": 1.6726, "step": 547 }, { "epoch": 0.029960499159411177, "grad_norm": 6.184961795806885, "learning_rate": 9.981785063752277e-06, "loss": 1.436, "step": 548 }, { "epoch": 0.03001517160313273, "grad_norm": 4.3953022956848145, "learning_rate": 1e-05, "loss": 1.6481, "step": 549 }, { "epoch": 0.030069844046854283, "grad_norm": 5.75661039352417, "learning_rate": 1.0018214936247722e-05, "loss": 1.5588, "step": 550 }, { "epoch": 0.030124516490575837, "grad_norm": 4.860477447509766, "learning_rate": 1.0036429872495447e-05, "loss": 1.7314, "step": 551 }, { "epoch": 0.030179188934297392, "grad_norm": 7.09837007522583, "learning_rate": 1.005464480874317e-05, "loss": 1.5762, "step": 552 }, { "epoch": 0.030233861378018943, "grad_norm": 3.2026150226593018, "learning_rate": 1.0072859744990892e-05, "loss": 1.5582, "step": 553 }, { "epoch": 0.030288533821740498, "grad_norm": 3.1507716178894043, "learning_rate": 1.0091074681238617e-05, "loss": 1.3593, "step": 554 }, { "epoch": 0.03034320626546205, "grad_norm": 25.677522659301758, "learning_rate": 1.0109289617486339e-05, "loss": 1.6191, "step": 555 }, { "epoch": 0.030397878709183604, "grad_norm": 12.266745567321777, "learning_rate": 1.0127504553734062e-05, "loss": 1.4907, "step": 556 }, { "epoch": 0.03045255115290516, "grad_norm": 10.969487190246582, "learning_rate": 1.0145719489981787e-05, "loss": 1.643, "step": 557 }, { "epoch": 0.03050722359662671, "grad_norm": 6.642029762268066, "learning_rate": 1.0163934426229509e-05, "loss": 1.7484, "step": 558 }, { "epoch": 0.030561896040348265, "grad_norm": 8.79339599609375, "learning_rate": 1.0182149362477232e-05, "loss": 1.6713, "step": 559 }, { "epoch": 0.030616568484069816, "grad_norm": 2.608914852142334, "learning_rate": 1.0200364298724956e-05, "loss": 1.7269, "step": 560 }, { "epoch": 0.03067124092779137, "grad_norm": 5.862725734710693, "learning_rate": 1.0218579234972679e-05, "loss": 1.8067, "step": 561 }, { "epoch": 0.03072591337151292, "grad_norm": 4.448827743530273, "learning_rate": 1.02367941712204e-05, "loss": 1.8008, "step": 562 }, { "epoch": 0.030780585815234476, "grad_norm": 2.42978835105896, "learning_rate": 1.0255009107468126e-05, "loss": 1.65, "step": 563 }, { "epoch": 0.03083525825895603, "grad_norm": 6.816845893859863, "learning_rate": 1.0273224043715849e-05, "loss": 1.6722, "step": 564 }, { "epoch": 0.030889930702677582, "grad_norm": 9.44680404663086, "learning_rate": 1.029143897996357e-05, "loss": 1.6882, "step": 565 }, { "epoch": 0.030944603146399137, "grad_norm": 7.742184162139893, "learning_rate": 1.0309653916211296e-05, "loss": 1.5472, "step": 566 }, { "epoch": 0.030999275590120688, "grad_norm": 4.327066421508789, "learning_rate": 1.0327868852459017e-05, "loss": 1.6277, "step": 567 }, { "epoch": 0.031053948033842243, "grad_norm": 5.461192607879639, "learning_rate": 1.034608378870674e-05, "loss": 1.4209, "step": 568 }, { "epoch": 0.031108620477563798, "grad_norm": 3.7360358238220215, "learning_rate": 1.0364298724954462e-05, "loss": 1.6331, "step": 569 }, { "epoch": 0.03116329292128535, "grad_norm": 4.186465740203857, "learning_rate": 1.0382513661202187e-05, "loss": 1.5642, "step": 570 }, { "epoch": 0.031217965365006903, "grad_norm": 7.832154273986816, "learning_rate": 1.0400728597449909e-05, "loss": 1.7462, "step": 571 }, { "epoch": 0.03127263780872846, "grad_norm": 32.79762268066406, "learning_rate": 1.0418943533697632e-05, "loss": 1.7251, "step": 572 }, { "epoch": 0.03132731025245001, "grad_norm": 3.999708890914917, "learning_rate": 1.0437158469945357e-05, "loss": 1.6381, "step": 573 }, { "epoch": 0.03138198269617156, "grad_norm": 12.323874473571777, "learning_rate": 1.0455373406193079e-05, "loss": 2.0842, "step": 574 }, { "epoch": 0.03143665513989312, "grad_norm": 6.249226093292236, "learning_rate": 1.0473588342440802e-05, "loss": 1.4936, "step": 575 }, { "epoch": 0.03149132758361467, "grad_norm": 8.867032051086426, "learning_rate": 1.0491803278688525e-05, "loss": 1.9502, "step": 576 }, { "epoch": 0.03154600002733622, "grad_norm": 7.005560398101807, "learning_rate": 1.0510018214936249e-05, "loss": 1.8715, "step": 577 }, { "epoch": 0.03160067247105777, "grad_norm": 4.4658203125, "learning_rate": 1.052823315118397e-05, "loss": 1.6575, "step": 578 }, { "epoch": 0.03165534491477933, "grad_norm": 13.128610610961914, "learning_rate": 1.0546448087431695e-05, "loss": 1.5245, "step": 579 }, { "epoch": 0.03171001735850088, "grad_norm": 6.73513126373291, "learning_rate": 1.0564663023679417e-05, "loss": 1.8006, "step": 580 }, { "epoch": 0.03176468980222243, "grad_norm": 7.060339450836182, "learning_rate": 1.058287795992714e-05, "loss": 1.6943, "step": 581 }, { "epoch": 0.03181936224594399, "grad_norm": 11.59403133392334, "learning_rate": 1.0601092896174865e-05, "loss": 1.5839, "step": 582 }, { "epoch": 0.03187403468966554, "grad_norm": 4.472285747528076, "learning_rate": 1.0619307832422587e-05, "loss": 1.3259, "step": 583 }, { "epoch": 0.031928707133387094, "grad_norm": 4.010199069976807, "learning_rate": 1.063752276867031e-05, "loss": 1.624, "step": 584 }, { "epoch": 0.031983379577108645, "grad_norm": 9.272984504699707, "learning_rate": 1.0655737704918034e-05, "loss": 1.5848, "step": 585 }, { "epoch": 0.0320380520208302, "grad_norm": 4.2026214599609375, "learning_rate": 1.0673952641165757e-05, "loss": 1.859, "step": 586 }, { "epoch": 0.032092724464551754, "grad_norm": 10.884852409362793, "learning_rate": 1.0692167577413479e-05, "loss": 1.6738, "step": 587 }, { "epoch": 0.032147396908273305, "grad_norm": 14.73721981048584, "learning_rate": 1.0710382513661204e-05, "loss": 1.9183, "step": 588 }, { "epoch": 0.032202069351994864, "grad_norm": 10.187492370605469, "learning_rate": 1.0728597449908927e-05, "loss": 1.2606, "step": 589 }, { "epoch": 0.032256741795716415, "grad_norm": 14.465739250183105, "learning_rate": 1.0746812386156649e-05, "loss": 1.6241, "step": 590 }, { "epoch": 0.032311414239437966, "grad_norm": 7.834969520568848, "learning_rate": 1.0765027322404374e-05, "loss": 1.6039, "step": 591 }, { "epoch": 0.03236608668315952, "grad_norm": 3.3091492652893066, "learning_rate": 1.0783242258652095e-05, "loss": 1.7564, "step": 592 }, { "epoch": 0.032420759126881075, "grad_norm": 3.9838333129882812, "learning_rate": 1.0801457194899819e-05, "loss": 1.7132, "step": 593 }, { "epoch": 0.03247543157060263, "grad_norm": 6.917940139770508, "learning_rate": 1.0819672131147544e-05, "loss": 1.8773, "step": 594 }, { "epoch": 0.03253010401432418, "grad_norm": 9.394488334655762, "learning_rate": 1.0837887067395265e-05, "loss": 1.8009, "step": 595 }, { "epoch": 0.032584776458045736, "grad_norm": 4.738964080810547, "learning_rate": 1.0856102003642987e-05, "loss": 1.4849, "step": 596 }, { "epoch": 0.03263944890176729, "grad_norm": 3.525503396987915, "learning_rate": 1.0874316939890712e-05, "loss": 1.5913, "step": 597 }, { "epoch": 0.03269412134548884, "grad_norm": 5.691910743713379, "learning_rate": 1.0892531876138435e-05, "loss": 1.6261, "step": 598 }, { "epoch": 0.0327487937892104, "grad_norm": 4.7873358726501465, "learning_rate": 1.0910746812386157e-05, "loss": 1.4491, "step": 599 }, { "epoch": 0.03280346623293195, "grad_norm": 4.616060256958008, "learning_rate": 1.0928961748633882e-05, "loss": 1.6575, "step": 600 }, { "epoch": 0.0328581386766535, "grad_norm": 10.433281898498535, "learning_rate": 1.0947176684881603e-05, "loss": 1.6448, "step": 601 }, { "epoch": 0.03291281112037505, "grad_norm": 3.5171444416046143, "learning_rate": 1.0965391621129327e-05, "loss": 1.7057, "step": 602 }, { "epoch": 0.03296748356409661, "grad_norm": 4.219283103942871, "learning_rate": 1.0983606557377052e-05, "loss": 1.7957, "step": 603 }, { "epoch": 0.03302215600781816, "grad_norm": 4.261050701141357, "learning_rate": 1.1001821493624773e-05, "loss": 1.6284, "step": 604 }, { "epoch": 0.03307682845153971, "grad_norm": 9.697900772094727, "learning_rate": 1.1020036429872497e-05, "loss": 1.5927, "step": 605 }, { "epoch": 0.03313150089526127, "grad_norm": 5.829724311828613, "learning_rate": 1.1038251366120218e-05, "loss": 1.5334, "step": 606 }, { "epoch": 0.03318617333898282, "grad_norm": 2.626222848892212, "learning_rate": 1.1056466302367943e-05, "loss": 1.4924, "step": 607 }, { "epoch": 0.03324084578270437, "grad_norm": 3.740718364715576, "learning_rate": 1.1074681238615665e-05, "loss": 1.6957, "step": 608 }, { "epoch": 0.03329551822642592, "grad_norm": 5.807889461517334, "learning_rate": 1.1092896174863388e-05, "loss": 1.7035, "step": 609 }, { "epoch": 0.03335019067014748, "grad_norm": 41.54365921020508, "learning_rate": 1.1111111111111113e-05, "loss": 1.5687, "step": 610 }, { "epoch": 0.03340486311386903, "grad_norm": 113.03126525878906, "learning_rate": 1.1129326047358835e-05, "loss": 1.7354, "step": 611 }, { "epoch": 0.03345953555759058, "grad_norm": 5.581916809082031, "learning_rate": 1.1147540983606557e-05, "loss": 1.5874, "step": 612 }, { "epoch": 0.03351420800131214, "grad_norm": 33.03229522705078, "learning_rate": 1.1165755919854282e-05, "loss": 1.7766, "step": 613 }, { "epoch": 0.03356888044503369, "grad_norm": 12.083734512329102, "learning_rate": 1.1183970856102005e-05, "loss": 1.636, "step": 614 }, { "epoch": 0.033623552888755244, "grad_norm": 6.8849029541015625, "learning_rate": 1.1202185792349727e-05, "loss": 1.6793, "step": 615 }, { "epoch": 0.033678225332476795, "grad_norm": 16.020893096923828, "learning_rate": 1.1220400728597452e-05, "loss": 1.6941, "step": 616 }, { "epoch": 0.03373289777619835, "grad_norm": 8.924467086791992, "learning_rate": 1.1238615664845173e-05, "loss": 1.4666, "step": 617 }, { "epoch": 0.033787570219919905, "grad_norm": 8.862303733825684, "learning_rate": 1.1256830601092897e-05, "loss": 1.5832, "step": 618 }, { "epoch": 0.033842242663641456, "grad_norm": 6.0239410400390625, "learning_rate": 1.1275045537340622e-05, "loss": 1.5813, "step": 619 }, { "epoch": 0.033896915107363014, "grad_norm": 4.85123872756958, "learning_rate": 1.1293260473588343e-05, "loss": 1.4388, "step": 620 }, { "epoch": 0.033951587551084565, "grad_norm": 25.161012649536133, "learning_rate": 1.1311475409836066e-05, "loss": 1.8409, "step": 621 }, { "epoch": 0.034006259994806116, "grad_norm": 25.076623916625977, "learning_rate": 1.132969034608379e-05, "loss": 1.4562, "step": 622 }, { "epoch": 0.03406093243852767, "grad_norm": 6.7082390785217285, "learning_rate": 1.1347905282331513e-05, "loss": 1.1692, "step": 623 }, { "epoch": 0.034115604882249226, "grad_norm": 4.892858982086182, "learning_rate": 1.1366120218579235e-05, "loss": 1.6293, "step": 624 }, { "epoch": 0.03417027732597078, "grad_norm": 11.181694984436035, "learning_rate": 1.138433515482696e-05, "loss": 1.6143, "step": 625 }, { "epoch": 0.03422494976969233, "grad_norm": 4.447103023529053, "learning_rate": 1.1402550091074681e-05, "loss": 1.8192, "step": 626 }, { "epoch": 0.034279622213413886, "grad_norm": 3.1212823390960693, "learning_rate": 1.1420765027322405e-05, "loss": 1.5228, "step": 627 }, { "epoch": 0.03433429465713544, "grad_norm": 11.184993743896484, "learning_rate": 1.143897996357013e-05, "loss": 1.4187, "step": 628 }, { "epoch": 0.03438896710085699, "grad_norm": 7.824861526489258, "learning_rate": 1.1457194899817851e-05, "loss": 1.4879, "step": 629 }, { "epoch": 0.03444363954457855, "grad_norm": 3.905215263366699, "learning_rate": 1.1475409836065575e-05, "loss": 1.7575, "step": 630 }, { "epoch": 0.0344983119883001, "grad_norm": 18.328014373779297, "learning_rate": 1.1493624772313298e-05, "loss": 1.5347, "step": 631 }, { "epoch": 0.03455298443202165, "grad_norm": 103.51849365234375, "learning_rate": 1.1511839708561021e-05, "loss": 1.4426, "step": 632 }, { "epoch": 0.0346076568757432, "grad_norm": 132.04225158691406, "learning_rate": 1.1530054644808743e-05, "loss": 1.3866, "step": 633 }, { "epoch": 0.03466232931946476, "grad_norm": 332.01617431640625, "learning_rate": 1.1548269581056468e-05, "loss": 1.4103, "step": 634 }, { "epoch": 0.03471700176318631, "grad_norm": 139.00979614257812, "learning_rate": 1.1566484517304191e-05, "loss": 1.8443, "step": 635 }, { "epoch": 0.03477167420690786, "grad_norm": 141.0091552734375, "learning_rate": 1.1584699453551913e-05, "loss": 1.6841, "step": 636 }, { "epoch": 0.03482634665062942, "grad_norm": 55.52379608154297, "learning_rate": 1.1602914389799638e-05, "loss": 1.6039, "step": 637 }, { "epoch": 0.03488101909435097, "grad_norm": 81.02608489990234, "learning_rate": 1.162112932604736e-05, "loss": 1.4277, "step": 638 }, { "epoch": 0.03493569153807252, "grad_norm": 8.14922046661377, "learning_rate": 1.1639344262295083e-05, "loss": 1.5092, "step": 639 }, { "epoch": 0.03499036398179407, "grad_norm": 10.029525756835938, "learning_rate": 1.1657559198542808e-05, "loss": 1.5176, "step": 640 }, { "epoch": 0.03504503642551563, "grad_norm": 8.531185150146484, "learning_rate": 1.167577413479053e-05, "loss": 1.3684, "step": 641 }, { "epoch": 0.03509970886923718, "grad_norm": 3.040645122528076, "learning_rate": 1.1693989071038251e-05, "loss": 1.591, "step": 642 }, { "epoch": 0.035154381312958734, "grad_norm": 22.631431579589844, "learning_rate": 1.1712204007285975e-05, "loss": 1.7482, "step": 643 }, { "epoch": 0.03520905375668029, "grad_norm": 87.03707122802734, "learning_rate": 1.17304189435337e-05, "loss": 2.0554, "step": 644 }, { "epoch": 0.03526372620040184, "grad_norm": 7.931356906890869, "learning_rate": 1.1748633879781421e-05, "loss": 1.4348, "step": 645 }, { "epoch": 0.035318398644123394, "grad_norm": 20.706680297851562, "learning_rate": 1.1766848816029144e-05, "loss": 1.4187, "step": 646 }, { "epoch": 0.035373071087844946, "grad_norm": 17.480514526367188, "learning_rate": 1.1785063752276868e-05, "loss": 1.5945, "step": 647 }, { "epoch": 0.035427743531566504, "grad_norm": 5.6766204833984375, "learning_rate": 1.1803278688524591e-05, "loss": 1.5074, "step": 648 }, { "epoch": 0.035482415975288055, "grad_norm": 3.0095486640930176, "learning_rate": 1.1821493624772313e-05, "loss": 1.8405, "step": 649 }, { "epoch": 0.035537088419009606, "grad_norm": 7.316423416137695, "learning_rate": 1.1839708561020038e-05, "loss": 1.4364, "step": 650 }, { "epoch": 0.035591760862731164, "grad_norm": 3.2332794666290283, "learning_rate": 1.1857923497267761e-05, "loss": 1.3659, "step": 651 }, { "epoch": 0.035646433306452716, "grad_norm": 4.563765048980713, "learning_rate": 1.1876138433515483e-05, "loss": 1.6093, "step": 652 }, { "epoch": 0.03570110575017427, "grad_norm": 5.710320472717285, "learning_rate": 1.1894353369763208e-05, "loss": 1.8288, "step": 653 }, { "epoch": 0.035755778193895825, "grad_norm": 6.784440517425537, "learning_rate": 1.191256830601093e-05, "loss": 1.5702, "step": 654 }, { "epoch": 0.035810450637617376, "grad_norm": 4.906096935272217, "learning_rate": 1.1930783242258653e-05, "loss": 1.5365, "step": 655 }, { "epoch": 0.03586512308133893, "grad_norm": 5.759868144989014, "learning_rate": 1.1948998178506378e-05, "loss": 1.5999, "step": 656 }, { "epoch": 0.03591979552506048, "grad_norm": 4.045783519744873, "learning_rate": 1.19672131147541e-05, "loss": 1.7563, "step": 657 }, { "epoch": 0.03597446796878204, "grad_norm": 6.982707977294922, "learning_rate": 1.1985428051001821e-05, "loss": 1.6264, "step": 658 }, { "epoch": 0.03602914041250359, "grad_norm": 19.76385498046875, "learning_rate": 1.2003642987249546e-05, "loss": 1.4455, "step": 659 }, { "epoch": 0.03608381285622514, "grad_norm": 6.78510856628418, "learning_rate": 1.202185792349727e-05, "loss": 1.455, "step": 660 }, { "epoch": 0.0361384852999467, "grad_norm": 20.934667587280273, "learning_rate": 1.2040072859744991e-05, "loss": 1.4928, "step": 661 }, { "epoch": 0.03619315774366825, "grad_norm": 34.781654357910156, "learning_rate": 1.2058287795992716e-05, "loss": 1.8813, "step": 662 }, { "epoch": 0.0362478301873898, "grad_norm": 23.07387924194336, "learning_rate": 1.2076502732240438e-05, "loss": 1.7749, "step": 663 }, { "epoch": 0.03630250263111135, "grad_norm": 22.03473663330078, "learning_rate": 1.2094717668488161e-05, "loss": 1.6548, "step": 664 }, { "epoch": 0.03635717507483291, "grad_norm": 8.98752212524414, "learning_rate": 1.2112932604735886e-05, "loss": 1.2653, "step": 665 }, { "epoch": 0.03641184751855446, "grad_norm": 7.783271789550781, "learning_rate": 1.2131147540983608e-05, "loss": 1.5636, "step": 666 }, { "epoch": 0.03646651996227601, "grad_norm": 3.052783966064453, "learning_rate": 1.2149362477231331e-05, "loss": 1.5876, "step": 667 }, { "epoch": 0.03652119240599757, "grad_norm": 6.556682586669922, "learning_rate": 1.2167577413479054e-05, "loss": 1.6437, "step": 668 }, { "epoch": 0.03657586484971912, "grad_norm": 3.044846296310425, "learning_rate": 1.2185792349726778e-05, "loss": 1.4888, "step": 669 }, { "epoch": 0.03663053729344067, "grad_norm": 4.053534030914307, "learning_rate": 1.2204007285974499e-05, "loss": 1.5845, "step": 670 }, { "epoch": 0.036685209737162223, "grad_norm": 4.884008884429932, "learning_rate": 1.2222222222222224e-05, "loss": 1.7334, "step": 671 }, { "epoch": 0.03673988218088378, "grad_norm": 4.35679292678833, "learning_rate": 1.2240437158469946e-05, "loss": 1.6138, "step": 672 }, { "epoch": 0.03679455462460533, "grad_norm": 4.522188186645508, "learning_rate": 1.2258652094717669e-05, "loss": 1.579, "step": 673 }, { "epoch": 0.036849227068326884, "grad_norm": 2.9177489280700684, "learning_rate": 1.2276867030965394e-05, "loss": 1.5744, "step": 674 }, { "epoch": 0.03690389951204844, "grad_norm": 2.7164745330810547, "learning_rate": 1.2295081967213116e-05, "loss": 1.6004, "step": 675 }, { "epoch": 0.036958571955769993, "grad_norm": 2.510366678237915, "learning_rate": 1.2313296903460839e-05, "loss": 1.6603, "step": 676 }, { "epoch": 0.037013244399491545, "grad_norm": 10.043479919433594, "learning_rate": 1.2331511839708562e-05, "loss": 1.6413, "step": 677 }, { "epoch": 0.0370679168432131, "grad_norm": 15.094648361206055, "learning_rate": 1.2349726775956286e-05, "loss": 1.4164, "step": 678 }, { "epoch": 0.037122589286934654, "grad_norm": 7.889562129974365, "learning_rate": 1.2367941712204007e-05, "loss": 1.5836, "step": 679 }, { "epoch": 0.037177261730656205, "grad_norm": 14.107860565185547, "learning_rate": 1.238615664845173e-05, "loss": 1.5101, "step": 680 }, { "epoch": 0.03723193417437776, "grad_norm": 7.193120002746582, "learning_rate": 1.2404371584699456e-05, "loss": 1.5716, "step": 681 }, { "epoch": 0.037286606618099315, "grad_norm": 4.859918117523193, "learning_rate": 1.2422586520947177e-05, "loss": 1.5052, "step": 682 }, { "epoch": 0.037341279061820866, "grad_norm": 6.438990592956543, "learning_rate": 1.24408014571949e-05, "loss": 1.511, "step": 683 }, { "epoch": 0.03739595150554242, "grad_norm": 21.052907943725586, "learning_rate": 1.2459016393442624e-05, "loss": 1.5133, "step": 684 }, { "epoch": 0.037450623949263975, "grad_norm": 5.711713790893555, "learning_rate": 1.2477231329690347e-05, "loss": 1.5244, "step": 685 }, { "epoch": 0.03750529639298553, "grad_norm": 4.003962516784668, "learning_rate": 1.2495446265938069e-05, "loss": 1.8283, "step": 686 }, { "epoch": 0.03755996883670708, "grad_norm": 10.001753807067871, "learning_rate": 1.2513661202185794e-05, "loss": 1.4794, "step": 687 }, { "epoch": 0.03761464128042863, "grad_norm": 2.862154006958008, "learning_rate": 1.2531876138433516e-05, "loss": 1.7898, "step": 688 }, { "epoch": 0.03766931372415019, "grad_norm": 5.624928951263428, "learning_rate": 1.2550091074681239e-05, "loss": 1.7807, "step": 689 }, { "epoch": 0.03772398616787174, "grad_norm": 5.100170612335205, "learning_rate": 1.2568306010928964e-05, "loss": 1.4598, "step": 690 }, { "epoch": 0.03777865861159329, "grad_norm": 3.9843411445617676, "learning_rate": 1.2586520947176686e-05, "loss": 1.5355, "step": 691 }, { "epoch": 0.03783333105531485, "grad_norm": 5.7859578132629395, "learning_rate": 1.2604735883424409e-05, "loss": 1.8961, "step": 692 }, { "epoch": 0.0378880034990364, "grad_norm": 2.1335670948028564, "learning_rate": 1.2622950819672132e-05, "loss": 1.5885, "step": 693 }, { "epoch": 0.03794267594275795, "grad_norm": 2.2684943675994873, "learning_rate": 1.2641165755919856e-05, "loss": 1.3951, "step": 694 }, { "epoch": 0.0379973483864795, "grad_norm": 2.4361448287963867, "learning_rate": 1.2659380692167577e-05, "loss": 1.7012, "step": 695 }, { "epoch": 0.03805202083020106, "grad_norm": 3.639853000640869, "learning_rate": 1.2677595628415302e-05, "loss": 1.6273, "step": 696 }, { "epoch": 0.03810669327392261, "grad_norm": 2.096975088119507, "learning_rate": 1.2695810564663025e-05, "loss": 1.8317, "step": 697 }, { "epoch": 0.03816136571764416, "grad_norm": 2.2521655559539795, "learning_rate": 1.2714025500910747e-05, "loss": 1.6368, "step": 698 }, { "epoch": 0.03821603816136572, "grad_norm": 2.282578468322754, "learning_rate": 1.2732240437158472e-05, "loss": 1.6325, "step": 699 }, { "epoch": 0.03827071060508727, "grad_norm": 6.260979652404785, "learning_rate": 1.2750455373406194e-05, "loss": 1.6047, "step": 700 }, { "epoch": 0.03832538304880882, "grad_norm": 1.982373595237732, "learning_rate": 1.2768670309653917e-05, "loss": 1.5954, "step": 701 }, { "epoch": 0.03838005549253038, "grad_norm": 3.0477216243743896, "learning_rate": 1.2786885245901642e-05, "loss": 1.6826, "step": 702 }, { "epoch": 0.03843472793625193, "grad_norm": 5.212861061096191, "learning_rate": 1.2805100182149364e-05, "loss": 1.5726, "step": 703 }, { "epoch": 0.03848940037997348, "grad_norm": 1.5471229553222656, "learning_rate": 1.2823315118397085e-05, "loss": 1.5718, "step": 704 }, { "epoch": 0.038544072823695034, "grad_norm": 1.5528336763381958, "learning_rate": 1.284153005464481e-05, "loss": 1.6172, "step": 705 }, { "epoch": 0.03859874526741659, "grad_norm": 2.737852096557617, "learning_rate": 1.2859744990892534e-05, "loss": 1.5684, "step": 706 }, { "epoch": 0.038653417711138144, "grad_norm": 4.023623943328857, "learning_rate": 1.2877959927140255e-05, "loss": 1.4089, "step": 707 }, { "epoch": 0.038708090154859695, "grad_norm": 3.2251198291778564, "learning_rate": 1.289617486338798e-05, "loss": 1.6029, "step": 708 }, { "epoch": 0.03876276259858125, "grad_norm": 4.8897857666015625, "learning_rate": 1.2914389799635702e-05, "loss": 1.7954, "step": 709 }, { "epoch": 0.038817435042302804, "grad_norm": 1.7977705001831055, "learning_rate": 1.2932604735883425e-05, "loss": 1.465, "step": 710 }, { "epoch": 0.038872107486024356, "grad_norm": 5.745240211486816, "learning_rate": 1.295081967213115e-05, "loss": 1.3002, "step": 711 }, { "epoch": 0.03892677992974591, "grad_norm": 2.6106443405151367, "learning_rate": 1.2969034608378872e-05, "loss": 1.5778, "step": 712 }, { "epoch": 0.038981452373467465, "grad_norm": 2.387681007385254, "learning_rate": 1.2987249544626595e-05, "loss": 1.5166, "step": 713 }, { "epoch": 0.039036124817189016, "grad_norm": 3.949260711669922, "learning_rate": 1.3005464480874317e-05, "loss": 1.9056, "step": 714 }, { "epoch": 0.03909079726091057, "grad_norm": 2.0281448364257812, "learning_rate": 1.3023679417122042e-05, "loss": 1.4505, "step": 715 }, { "epoch": 0.039145469704632126, "grad_norm": 2.8135135173797607, "learning_rate": 1.3041894353369764e-05, "loss": 1.6035, "step": 716 }, { "epoch": 0.03920014214835368, "grad_norm": 3.019970655441284, "learning_rate": 1.3060109289617487e-05, "loss": 1.2199, "step": 717 }, { "epoch": 0.03925481459207523, "grad_norm": 2.67421817779541, "learning_rate": 1.307832422586521e-05, "loss": 1.6093, "step": 718 }, { "epoch": 0.03930948703579678, "grad_norm": 3.8656790256500244, "learning_rate": 1.3096539162112933e-05, "loss": 1.7057, "step": 719 }, { "epoch": 0.03936415947951834, "grad_norm": 1.7975623607635498, "learning_rate": 1.3114754098360655e-05, "loss": 1.6026, "step": 720 }, { "epoch": 0.03941883192323989, "grad_norm": 1.6123344898223877, "learning_rate": 1.313296903460838e-05, "loss": 1.4949, "step": 721 }, { "epoch": 0.03947350436696144, "grad_norm": 2.015040874481201, "learning_rate": 1.3151183970856103e-05, "loss": 1.4699, "step": 722 }, { "epoch": 0.039528176810683, "grad_norm": 2.093195676803589, "learning_rate": 1.3169398907103825e-05, "loss": 1.5118, "step": 723 }, { "epoch": 0.03958284925440455, "grad_norm": 2.8497426509857178, "learning_rate": 1.318761384335155e-05, "loss": 1.5373, "step": 724 }, { "epoch": 0.0396375216981261, "grad_norm": 3.374413013458252, "learning_rate": 1.3205828779599272e-05, "loss": 1.7074, "step": 725 }, { "epoch": 0.03969219414184765, "grad_norm": 6.981536865234375, "learning_rate": 1.3224043715846995e-05, "loss": 1.7364, "step": 726 }, { "epoch": 0.03974686658556921, "grad_norm": 1.7565622329711914, "learning_rate": 1.324225865209472e-05, "loss": 1.576, "step": 727 }, { "epoch": 0.03980153902929076, "grad_norm": 2.418100357055664, "learning_rate": 1.3260473588342442e-05, "loss": 1.1582, "step": 728 }, { "epoch": 0.03985621147301231, "grad_norm": 2.173644542694092, "learning_rate": 1.3278688524590165e-05, "loss": 1.5051, "step": 729 }, { "epoch": 0.03991088391673387, "grad_norm": 2.905071973800659, "learning_rate": 1.3296903460837888e-05, "loss": 1.3426, "step": 730 }, { "epoch": 0.03996555636045542, "grad_norm": 2.099263906478882, "learning_rate": 1.3315118397085612e-05, "loss": 1.6466, "step": 731 }, { "epoch": 0.04002022880417697, "grad_norm": 1.694176197052002, "learning_rate": 1.3333333333333333e-05, "loss": 1.6461, "step": 732 }, { "epoch": 0.04007490124789853, "grad_norm": 2.1338982582092285, "learning_rate": 1.3351548269581058e-05, "loss": 1.5175, "step": 733 }, { "epoch": 0.04012957369162008, "grad_norm": 2.0251243114471436, "learning_rate": 1.336976320582878e-05, "loss": 1.5002, "step": 734 }, { "epoch": 0.040184246135341634, "grad_norm": 5.281107425689697, "learning_rate": 1.3387978142076503e-05, "loss": 1.6841, "step": 735 }, { "epoch": 0.040238918579063185, "grad_norm": 4.499202251434326, "learning_rate": 1.3406193078324228e-05, "loss": 1.7833, "step": 736 }, { "epoch": 0.04029359102278474, "grad_norm": 2.2663779258728027, "learning_rate": 1.342440801457195e-05, "loss": 2.0445, "step": 737 }, { "epoch": 0.040348263466506294, "grad_norm": 1.5899535417556763, "learning_rate": 1.3442622950819673e-05, "loss": 1.6016, "step": 738 }, { "epoch": 0.040402935910227845, "grad_norm": 2.0759963989257812, "learning_rate": 1.3460837887067397e-05, "loss": 1.5877, "step": 739 }, { "epoch": 0.040457608353949404, "grad_norm": 2.0714035034179688, "learning_rate": 1.347905282331512e-05, "loss": 1.4544, "step": 740 }, { "epoch": 0.040512280797670955, "grad_norm": 1.6433204412460327, "learning_rate": 1.3497267759562842e-05, "loss": 1.5665, "step": 741 }, { "epoch": 0.040566953241392506, "grad_norm": 1.8273212909698486, "learning_rate": 1.3515482695810567e-05, "loss": 1.6072, "step": 742 }, { "epoch": 0.04062162568511406, "grad_norm": 2.9625658988952637, "learning_rate": 1.353369763205829e-05, "loss": 1.5736, "step": 743 }, { "epoch": 0.040676298128835615, "grad_norm": 1.6371225118637085, "learning_rate": 1.3551912568306011e-05, "loss": 1.3965, "step": 744 }, { "epoch": 0.04073097057255717, "grad_norm": 2.4661638736724854, "learning_rate": 1.3570127504553736e-05, "loss": 1.389, "step": 745 }, { "epoch": 0.04078564301627872, "grad_norm": 1.5258917808532715, "learning_rate": 1.3588342440801458e-05, "loss": 1.7132, "step": 746 }, { "epoch": 0.040840315460000276, "grad_norm": 2.029529094696045, "learning_rate": 1.3606557377049181e-05, "loss": 1.4379, "step": 747 }, { "epoch": 0.04089498790372183, "grad_norm": 3.1478819847106934, "learning_rate": 1.3624772313296906e-05, "loss": 1.6277, "step": 748 }, { "epoch": 0.04094966034744338, "grad_norm": 4.896278381347656, "learning_rate": 1.3642987249544628e-05, "loss": 1.4212, "step": 749 }, { "epoch": 0.04100433279116493, "grad_norm": 6.857458114624023, "learning_rate": 1.366120218579235e-05, "loss": 1.4302, "step": 750 }, { "epoch": 0.04105900523488649, "grad_norm": 2.3921518325805664, "learning_rate": 1.3679417122040073e-05, "loss": 1.3013, "step": 751 }, { "epoch": 0.04111367767860804, "grad_norm": 1.7917914390563965, "learning_rate": 1.3697632058287798e-05, "loss": 1.5334, "step": 752 }, { "epoch": 0.04116835012232959, "grad_norm": 2.7286770343780518, "learning_rate": 1.371584699453552e-05, "loss": 1.804, "step": 753 }, { "epoch": 0.04122302256605115, "grad_norm": 1.7219538688659668, "learning_rate": 1.3734061930783243e-05, "loss": 1.6594, "step": 754 }, { "epoch": 0.0412776950097727, "grad_norm": 1.5708924531936646, "learning_rate": 1.3752276867030966e-05, "loss": 1.8684, "step": 755 }, { "epoch": 0.04133236745349425, "grad_norm": 2.121535062789917, "learning_rate": 1.377049180327869e-05, "loss": 1.5952, "step": 756 }, { "epoch": 0.04138703989721581, "grad_norm": 1.6225206851959229, "learning_rate": 1.3788706739526411e-05, "loss": 1.4534, "step": 757 }, { "epoch": 0.04144171234093736, "grad_norm": 2.5721261501312256, "learning_rate": 1.3806921675774136e-05, "loss": 1.7414, "step": 758 }, { "epoch": 0.04149638478465891, "grad_norm": 3.6133809089660645, "learning_rate": 1.382513661202186e-05, "loss": 1.5975, "step": 759 }, { "epoch": 0.04155105722838046, "grad_norm": 3.664426326751709, "learning_rate": 1.3843351548269581e-05, "loss": 1.4293, "step": 760 }, { "epoch": 0.04160572967210202, "grad_norm": 8.339347839355469, "learning_rate": 1.3861566484517306e-05, "loss": 1.4354, "step": 761 }, { "epoch": 0.04166040211582357, "grad_norm": 6.7353434562683105, "learning_rate": 1.3879781420765028e-05, "loss": 1.1905, "step": 762 }, { "epoch": 0.04171507455954512, "grad_norm": 2.598757028579712, "learning_rate": 1.3897996357012751e-05, "loss": 1.4395, "step": 763 }, { "epoch": 0.04176974700326668, "grad_norm": 10.276640892028809, "learning_rate": 1.3916211293260475e-05, "loss": 1.504, "step": 764 }, { "epoch": 0.04182441944698823, "grad_norm": 6.1214141845703125, "learning_rate": 1.3934426229508198e-05, "loss": 1.2411, "step": 765 }, { "epoch": 0.041879091890709784, "grad_norm": 5.217445373535156, "learning_rate": 1.395264116575592e-05, "loss": 1.3147, "step": 766 }, { "epoch": 0.041933764334431335, "grad_norm": 3.003916025161743, "learning_rate": 1.3970856102003645e-05, "loss": 1.5854, "step": 767 }, { "epoch": 0.04198843677815289, "grad_norm": 4.03087043762207, "learning_rate": 1.3989071038251368e-05, "loss": 1.391, "step": 768 }, { "epoch": 0.042043109221874445, "grad_norm": 2.786803960800171, "learning_rate": 1.400728597449909e-05, "loss": 1.5443, "step": 769 }, { "epoch": 0.042097781665595996, "grad_norm": 11.0779447555542, "learning_rate": 1.4025500910746814e-05, "loss": 1.5606, "step": 770 }, { "epoch": 0.042152454109317554, "grad_norm": 10.564152717590332, "learning_rate": 1.4043715846994536e-05, "loss": 1.6478, "step": 771 }, { "epoch": 0.042207126553039105, "grad_norm": 2.6915221214294434, "learning_rate": 1.406193078324226e-05, "loss": 1.5733, "step": 772 }, { "epoch": 0.042261798996760656, "grad_norm": 6.5152974128723145, "learning_rate": 1.4080145719489984e-05, "loss": 1.4894, "step": 773 }, { "epoch": 0.04231647144048221, "grad_norm": 5.983321666717529, "learning_rate": 1.4098360655737706e-05, "loss": 1.5279, "step": 774 }, { "epoch": 0.042371143884203766, "grad_norm": 3.0311930179595947, "learning_rate": 1.411657559198543e-05, "loss": 1.4821, "step": 775 }, { "epoch": 0.04242581632792532, "grad_norm": 5.081065654754639, "learning_rate": 1.4134790528233153e-05, "loss": 1.4396, "step": 776 }, { "epoch": 0.04248048877164687, "grad_norm": 2.9162402153015137, "learning_rate": 1.4153005464480876e-05, "loss": 1.6176, "step": 777 }, { "epoch": 0.042535161215368426, "grad_norm": 2.272857189178467, "learning_rate": 1.4171220400728598e-05, "loss": 1.6596, "step": 778 }, { "epoch": 0.04258983365908998, "grad_norm": 8.419713020324707, "learning_rate": 1.4189435336976323e-05, "loss": 1.4064, "step": 779 }, { "epoch": 0.04264450610281153, "grad_norm": 2.677154541015625, "learning_rate": 1.4207650273224044e-05, "loss": 1.6531, "step": 780 }, { "epoch": 0.04269917854653309, "grad_norm": 5.763513565063477, "learning_rate": 1.4225865209471768e-05, "loss": 1.5625, "step": 781 }, { "epoch": 0.04275385099025464, "grad_norm": 2.8552024364471436, "learning_rate": 1.4244080145719493e-05, "loss": 1.263, "step": 782 }, { "epoch": 0.04280852343397619, "grad_norm": 3.3872663974761963, "learning_rate": 1.4262295081967214e-05, "loss": 1.3407, "step": 783 }, { "epoch": 0.04286319587769774, "grad_norm": 3.127913475036621, "learning_rate": 1.4280510018214938e-05, "loss": 1.3767, "step": 784 }, { "epoch": 0.0429178683214193, "grad_norm": 3.4496920108795166, "learning_rate": 1.4298724954462661e-05, "loss": 1.3579, "step": 785 }, { "epoch": 0.04297254076514085, "grad_norm": 3.6717374324798584, "learning_rate": 1.4316939890710384e-05, "loss": 1.7665, "step": 786 }, { "epoch": 0.0430272132088624, "grad_norm": 5.803621768951416, "learning_rate": 1.4335154826958106e-05, "loss": 1.6904, "step": 787 }, { "epoch": 0.04308188565258396, "grad_norm": 3.1783804893493652, "learning_rate": 1.435336976320583e-05, "loss": 1.6551, "step": 788 }, { "epoch": 0.04313655809630551, "grad_norm": 3.76396107673645, "learning_rate": 1.4371584699453554e-05, "loss": 1.2165, "step": 789 }, { "epoch": 0.04319123054002706, "grad_norm": 9.302123069763184, "learning_rate": 1.4389799635701276e-05, "loss": 1.4906, "step": 790 }, { "epoch": 0.04324590298374861, "grad_norm": 3.744433641433716, "learning_rate": 1.4408014571949e-05, "loss": 1.6967, "step": 791 }, { "epoch": 0.04330057542747017, "grad_norm": 3.9984867572784424, "learning_rate": 1.4426229508196722e-05, "loss": 1.5063, "step": 792 }, { "epoch": 0.04335524787119172, "grad_norm": 3.3221704959869385, "learning_rate": 1.4444444444444446e-05, "loss": 1.2987, "step": 793 }, { "epoch": 0.043409920314913274, "grad_norm": 3.9973151683807373, "learning_rate": 1.4462659380692167e-05, "loss": 1.3815, "step": 794 }, { "epoch": 0.04346459275863483, "grad_norm": 2.572755813598633, "learning_rate": 1.4480874316939892e-05, "loss": 1.2919, "step": 795 }, { "epoch": 0.04351926520235638, "grad_norm": 2.1283397674560547, "learning_rate": 1.4499089253187614e-05, "loss": 1.6123, "step": 796 }, { "epoch": 0.043573937646077934, "grad_norm": 7.445621967315674, "learning_rate": 1.4517304189435337e-05, "loss": 1.5386, "step": 797 }, { "epoch": 0.043628610089799486, "grad_norm": 2.5797955989837646, "learning_rate": 1.4535519125683062e-05, "loss": 1.7314, "step": 798 }, { "epoch": 0.043683282533521044, "grad_norm": 2.457653522491455, "learning_rate": 1.4553734061930784e-05, "loss": 1.5018, "step": 799 }, { "epoch": 0.043737954977242595, "grad_norm": 2.0535168647766113, "learning_rate": 1.4571948998178507e-05, "loss": 1.4632, "step": 800 }, { "epoch": 0.043792627420964146, "grad_norm": 2.4388575553894043, "learning_rate": 1.459016393442623e-05, "loss": 1.5292, "step": 801 }, { "epoch": 0.043847299864685704, "grad_norm": 2.4725406169891357, "learning_rate": 1.4608378870673954e-05, "loss": 1.2616, "step": 802 }, { "epoch": 0.043901972308407256, "grad_norm": 1.716168761253357, "learning_rate": 1.4626593806921676e-05, "loss": 1.542, "step": 803 }, { "epoch": 0.04395664475212881, "grad_norm": 3.7090916633605957, "learning_rate": 1.46448087431694e-05, "loss": 1.7902, "step": 804 }, { "epoch": 0.044011317195850365, "grad_norm": 1.4924829006195068, "learning_rate": 1.4663023679417124e-05, "loss": 1.5662, "step": 805 }, { "epoch": 0.044065989639571916, "grad_norm": 1.5973999500274658, "learning_rate": 1.4681238615664846e-05, "loss": 1.8222, "step": 806 }, { "epoch": 0.04412066208329347, "grad_norm": 1.808617115020752, "learning_rate": 1.469945355191257e-05, "loss": 1.6345, "step": 807 }, { "epoch": 0.04417533452701502, "grad_norm": 1.9447689056396484, "learning_rate": 1.4717668488160292e-05, "loss": 1.9055, "step": 808 }, { "epoch": 0.04423000697073658, "grad_norm": 2.3446459770202637, "learning_rate": 1.4735883424408016e-05, "loss": 1.6025, "step": 809 }, { "epoch": 0.04428467941445813, "grad_norm": 1.9693615436553955, "learning_rate": 1.4754098360655739e-05, "loss": 1.2323, "step": 810 }, { "epoch": 0.04433935185817968, "grad_norm": 2.602023124694824, "learning_rate": 1.4772313296903462e-05, "loss": 1.3293, "step": 811 }, { "epoch": 0.04439402430190124, "grad_norm": 3.7961273193359375, "learning_rate": 1.4790528233151184e-05, "loss": 1.4638, "step": 812 }, { "epoch": 0.04444869674562279, "grad_norm": 6.500240802764893, "learning_rate": 1.4808743169398909e-05, "loss": 1.7582, "step": 813 }, { "epoch": 0.04450336918934434, "grad_norm": 1.9717845916748047, "learning_rate": 1.4826958105646632e-05, "loss": 1.4128, "step": 814 }, { "epoch": 0.04455804163306589, "grad_norm": 4.814689636230469, "learning_rate": 1.4845173041894354e-05, "loss": 1.6573, "step": 815 }, { "epoch": 0.04461271407678745, "grad_norm": 1.786383032798767, "learning_rate": 1.4863387978142079e-05, "loss": 1.6199, "step": 816 }, { "epoch": 0.044667386520509, "grad_norm": 3.108644723892212, "learning_rate": 1.48816029143898e-05, "loss": 1.5752, "step": 817 }, { "epoch": 0.04472205896423055, "grad_norm": 5.063683986663818, "learning_rate": 1.4899817850637524e-05, "loss": 1.76, "step": 818 }, { "epoch": 0.04477673140795211, "grad_norm": 1.7165228128433228, "learning_rate": 1.4918032786885249e-05, "loss": 1.529, "step": 819 }, { "epoch": 0.04483140385167366, "grad_norm": 3.9811997413635254, "learning_rate": 1.493624772313297e-05, "loss": 1.6195, "step": 820 }, { "epoch": 0.04488607629539521, "grad_norm": 3.599674701690674, "learning_rate": 1.4954462659380694e-05, "loss": 1.5822, "step": 821 }, { "epoch": 0.04494074873911676, "grad_norm": 1.7354850769042969, "learning_rate": 1.4972677595628417e-05, "loss": 1.8033, "step": 822 }, { "epoch": 0.04499542118283832, "grad_norm": 2.0085256099700928, "learning_rate": 1.499089253187614e-05, "loss": 1.6582, "step": 823 }, { "epoch": 0.04505009362655987, "grad_norm": 1.4832592010498047, "learning_rate": 1.5009107468123862e-05, "loss": 1.3401, "step": 824 }, { "epoch": 0.045104766070281424, "grad_norm": 3.5641300678253174, "learning_rate": 1.5027322404371585e-05, "loss": 1.7478, "step": 825 }, { "epoch": 0.04515943851400298, "grad_norm": 5.570059776306152, "learning_rate": 1.5045537340619309e-05, "loss": 1.5388, "step": 826 }, { "epoch": 0.04521411095772453, "grad_norm": 1.7775079011917114, "learning_rate": 1.5063752276867032e-05, "loss": 1.8037, "step": 827 }, { "epoch": 0.045268783401446085, "grad_norm": 2.799924850463867, "learning_rate": 1.5081967213114754e-05, "loss": 1.4236, "step": 828 }, { "epoch": 0.04532345584516764, "grad_norm": 3.31817626953125, "learning_rate": 1.5100182149362479e-05, "loss": 1.4596, "step": 829 }, { "epoch": 0.045378128288889194, "grad_norm": 1.529395580291748, "learning_rate": 1.5118397085610202e-05, "loss": 1.6778, "step": 830 }, { "epoch": 0.045432800732610745, "grad_norm": 1.5050219297409058, "learning_rate": 1.5136612021857924e-05, "loss": 1.4787, "step": 831 }, { "epoch": 0.045487473176332296, "grad_norm": 3.343756675720215, "learning_rate": 1.5154826958105649e-05, "loss": 1.7319, "step": 832 }, { "epoch": 0.045542145620053855, "grad_norm": 2.030867099761963, "learning_rate": 1.517304189435337e-05, "loss": 1.6069, "step": 833 }, { "epoch": 0.045596818063775406, "grad_norm": 1.667038917541504, "learning_rate": 1.5191256830601094e-05, "loss": 1.5095, "step": 834 }, { "epoch": 0.04565149050749696, "grad_norm": 3.142927646636963, "learning_rate": 1.5209471766848819e-05, "loss": 1.5182, "step": 835 }, { "epoch": 0.045706162951218515, "grad_norm": 2.058892250061035, "learning_rate": 1.522768670309654e-05, "loss": 1.5842, "step": 836 }, { "epoch": 0.045760835394940066, "grad_norm": 2.1462595462799072, "learning_rate": 1.5245901639344264e-05, "loss": 1.8373, "step": 837 }, { "epoch": 0.04581550783866162, "grad_norm": 1.9963507652282715, "learning_rate": 1.526411657559199e-05, "loss": 1.7243, "step": 838 }, { "epoch": 0.04587018028238317, "grad_norm": 4.727952480316162, "learning_rate": 1.528233151183971e-05, "loss": 1.5418, "step": 839 }, { "epoch": 0.04592485272610473, "grad_norm": 5.376098155975342, "learning_rate": 1.5300546448087432e-05, "loss": 1.6243, "step": 840 }, { "epoch": 0.04597952516982628, "grad_norm": 4.539033889770508, "learning_rate": 1.5318761384335155e-05, "loss": 1.3653, "step": 841 }, { "epoch": 0.04603419761354783, "grad_norm": 2.2920732498168945, "learning_rate": 1.533697632058288e-05, "loss": 1.5507, "step": 842 }, { "epoch": 0.04608887005726939, "grad_norm": 4.329361915588379, "learning_rate": 1.5355191256830602e-05, "loss": 1.7795, "step": 843 }, { "epoch": 0.04614354250099094, "grad_norm": 1.9213992357254028, "learning_rate": 1.5373406193078325e-05, "loss": 1.529, "step": 844 }, { "epoch": 0.04619821494471249, "grad_norm": 6.432906627655029, "learning_rate": 1.539162112932605e-05, "loss": 1.631, "step": 845 }, { "epoch": 0.04625288738843404, "grad_norm": 3.654005289077759, "learning_rate": 1.5409836065573772e-05, "loss": 1.1951, "step": 846 }, { "epoch": 0.0463075598321556, "grad_norm": 3.0333025455474854, "learning_rate": 1.5428051001821495e-05, "loss": 1.6924, "step": 847 }, { "epoch": 0.04636223227587715, "grad_norm": 7.173074245452881, "learning_rate": 1.544626593806922e-05, "loss": 1.6842, "step": 848 }, { "epoch": 0.0464169047195987, "grad_norm": 3.187944173812866, "learning_rate": 1.5464480874316942e-05, "loss": 1.4207, "step": 849 }, { "epoch": 0.04647157716332026, "grad_norm": 3.134347915649414, "learning_rate": 1.5482695810564665e-05, "loss": 1.4003, "step": 850 }, { "epoch": 0.04652624960704181, "grad_norm": 9.158797264099121, "learning_rate": 1.550091074681239e-05, "loss": 1.6755, "step": 851 }, { "epoch": 0.04658092205076336, "grad_norm": 3.1534347534179688, "learning_rate": 1.551912568306011e-05, "loss": 1.6437, "step": 852 }, { "epoch": 0.046635594494484914, "grad_norm": 7.485493183135986, "learning_rate": 1.5537340619307835e-05, "loss": 1.6673, "step": 853 }, { "epoch": 0.04669026693820647, "grad_norm": 4.552635669708252, "learning_rate": 1.555555555555556e-05, "loss": 1.6082, "step": 854 }, { "epoch": 0.04674493938192802, "grad_norm": 3.2286365032196045, "learning_rate": 1.5573770491803278e-05, "loss": 1.3608, "step": 855 }, { "epoch": 0.046799611825649574, "grad_norm": 2.725600004196167, "learning_rate": 1.5591985428051005e-05, "loss": 1.662, "step": 856 }, { "epoch": 0.04685428426937113, "grad_norm": 6.69663667678833, "learning_rate": 1.5610200364298725e-05, "loss": 1.5679, "step": 857 }, { "epoch": 0.046908956713092684, "grad_norm": 1.7624690532684326, "learning_rate": 1.5628415300546448e-05, "loss": 1.416, "step": 858 }, { "epoch": 0.046963629156814235, "grad_norm": 2.476332187652588, "learning_rate": 1.5646630236794175e-05, "loss": 1.4983, "step": 859 }, { "epoch": 0.04701830160053579, "grad_norm": 3.708991765975952, "learning_rate": 1.5664845173041895e-05, "loss": 1.2732, "step": 860 }, { "epoch": 0.047072974044257344, "grad_norm": 11.681467056274414, "learning_rate": 1.5683060109289618e-05, "loss": 1.6337, "step": 861 }, { "epoch": 0.047127646487978896, "grad_norm": 3.1926848888397217, "learning_rate": 1.570127504553734e-05, "loss": 1.5703, "step": 862 }, { "epoch": 0.04718231893170045, "grad_norm": 3.1303701400756836, "learning_rate": 1.5719489981785065e-05, "loss": 1.6857, "step": 863 }, { "epoch": 0.047236991375422005, "grad_norm": 2.138373851776123, "learning_rate": 1.5737704918032788e-05, "loss": 1.5523, "step": 864 }, { "epoch": 0.047291663819143556, "grad_norm": 2.0010087490081787, "learning_rate": 1.575591985428051e-05, "loss": 1.4741, "step": 865 }, { "epoch": 0.04734633626286511, "grad_norm": 6.404220104217529, "learning_rate": 1.5774134790528235e-05, "loss": 1.4174, "step": 866 }, { "epoch": 0.047401008706586666, "grad_norm": 2.44767427444458, "learning_rate": 1.5792349726775958e-05, "loss": 1.3683, "step": 867 }, { "epoch": 0.04745568115030822, "grad_norm": 1.2939001321792603, "learning_rate": 1.5810564663023678e-05, "loss": 1.4515, "step": 868 }, { "epoch": 0.04751035359402977, "grad_norm": 3.8777928352355957, "learning_rate": 1.5828779599271405e-05, "loss": 1.3731, "step": 869 }, { "epoch": 0.04756502603775132, "grad_norm": 2.1154935359954834, "learning_rate": 1.5846994535519128e-05, "loss": 1.4479, "step": 870 }, { "epoch": 0.04761969848147288, "grad_norm": 1.5473954677581787, "learning_rate": 1.5865209471766848e-05, "loss": 1.4957, "step": 871 }, { "epoch": 0.04767437092519443, "grad_norm": 1.7548542022705078, "learning_rate": 1.5883424408014575e-05, "loss": 1.6404, "step": 872 }, { "epoch": 0.04772904336891598, "grad_norm": 1.888444185256958, "learning_rate": 1.5901639344262295e-05, "loss": 1.5814, "step": 873 }, { "epoch": 0.04778371581263754, "grad_norm": 2.575420379638672, "learning_rate": 1.5919854280510018e-05, "loss": 1.8286, "step": 874 }, { "epoch": 0.04783838825635909, "grad_norm": 2.360790729522705, "learning_rate": 1.5938069216757745e-05, "loss": 1.6037, "step": 875 }, { "epoch": 0.04789306070008064, "grad_norm": 1.4499542713165283, "learning_rate": 1.5956284153005465e-05, "loss": 1.1254, "step": 876 }, { "epoch": 0.04794773314380219, "grad_norm": 5.6409735679626465, "learning_rate": 1.5974499089253188e-05, "loss": 1.8781, "step": 877 }, { "epoch": 0.04800240558752375, "grad_norm": 1.8175671100616455, "learning_rate": 1.599271402550091e-05, "loss": 1.6105, "step": 878 }, { "epoch": 0.0480570780312453, "grad_norm": 3.8995778560638428, "learning_rate": 1.6010928961748635e-05, "loss": 1.2042, "step": 879 }, { "epoch": 0.04811175047496685, "grad_norm": 2.368727922439575, "learning_rate": 1.6029143897996358e-05, "loss": 1.9235, "step": 880 }, { "epoch": 0.04816642291868841, "grad_norm": 1.598502278327942, "learning_rate": 1.604735883424408e-05, "loss": 1.4585, "step": 881 }, { "epoch": 0.04822109536240996, "grad_norm": 2.228663444519043, "learning_rate": 1.6065573770491805e-05, "loss": 1.6017, "step": 882 }, { "epoch": 0.04827576780613151, "grad_norm": 4.30324125289917, "learning_rate": 1.6083788706739528e-05, "loss": 1.5903, "step": 883 }, { "epoch": 0.04833044024985307, "grad_norm": 1.7949811220169067, "learning_rate": 1.610200364298725e-05, "loss": 1.4788, "step": 884 }, { "epoch": 0.04838511269357462, "grad_norm": 2.146536111831665, "learning_rate": 1.6120218579234975e-05, "loss": 1.5623, "step": 885 }, { "epoch": 0.048439785137296174, "grad_norm": 1.6137200593948364, "learning_rate": 1.6138433515482698e-05, "loss": 1.672, "step": 886 }, { "epoch": 0.048494457581017725, "grad_norm": 3.445019006729126, "learning_rate": 1.615664845173042e-05, "loss": 1.581, "step": 887 }, { "epoch": 0.04854913002473928, "grad_norm": 4.117085933685303, "learning_rate": 1.6174863387978145e-05, "loss": 1.5459, "step": 888 }, { "epoch": 0.048603802468460834, "grad_norm": 3.461700439453125, "learning_rate": 1.6193078324225864e-05, "loss": 1.657, "step": 889 }, { "epoch": 0.048658474912182385, "grad_norm": 1.895340919494629, "learning_rate": 1.621129326047359e-05, "loss": 1.4412, "step": 890 }, { "epoch": 0.048713147355903944, "grad_norm": 1.7956609725952148, "learning_rate": 1.6229508196721314e-05, "loss": 1.4053, "step": 891 }, { "epoch": 0.048767819799625495, "grad_norm": 2.080864906311035, "learning_rate": 1.6247723132969034e-05, "loss": 1.5769, "step": 892 }, { "epoch": 0.048822492243347046, "grad_norm": 6.0239176750183105, "learning_rate": 1.626593806921676e-05, "loss": 1.6206, "step": 893 }, { "epoch": 0.0488771646870686, "grad_norm": 4.171196460723877, "learning_rate": 1.628415300546448e-05, "loss": 1.6149, "step": 894 }, { "epoch": 0.048931837130790155, "grad_norm": 7.570348262786865, "learning_rate": 1.6302367941712204e-05, "loss": 1.38, "step": 895 }, { "epoch": 0.04898650957451171, "grad_norm": 2.623032331466675, "learning_rate": 1.632058287795993e-05, "loss": 1.5385, "step": 896 }, { "epoch": 0.04904118201823326, "grad_norm": 3.1774494647979736, "learning_rate": 1.633879781420765e-05, "loss": 1.7647, "step": 897 }, { "epoch": 0.049095854461954816, "grad_norm": 2.3001108169555664, "learning_rate": 1.6357012750455374e-05, "loss": 1.6056, "step": 898 }, { "epoch": 0.04915052690567637, "grad_norm": 1.8896375894546509, "learning_rate": 1.6375227686703098e-05, "loss": 1.4944, "step": 899 }, { "epoch": 0.04920519934939792, "grad_norm": 2.0304481983184814, "learning_rate": 1.639344262295082e-05, "loss": 1.6486, "step": 900 }, { "epoch": 0.04925987179311947, "grad_norm": 3.496107816696167, "learning_rate": 1.6411657559198544e-05, "loss": 1.5009, "step": 901 }, { "epoch": 0.04931454423684103, "grad_norm": 3.934615135192871, "learning_rate": 1.6429872495446268e-05, "loss": 1.663, "step": 902 }, { "epoch": 0.04936921668056258, "grad_norm": 6.3559980392456055, "learning_rate": 1.644808743169399e-05, "loss": 1.3779, "step": 903 }, { "epoch": 0.04942388912428413, "grad_norm": 3.6680500507354736, "learning_rate": 1.6466302367941714e-05, "loss": 1.6105, "step": 904 }, { "epoch": 0.04947856156800569, "grad_norm": 2.504138231277466, "learning_rate": 1.6484517304189434e-05, "loss": 1.6682, "step": 905 }, { "epoch": 0.04953323401172724, "grad_norm": 2.1290829181671143, "learning_rate": 1.650273224043716e-05, "loss": 1.5636, "step": 906 }, { "epoch": 0.04958790645544879, "grad_norm": 1.8531492948532104, "learning_rate": 1.6520947176684884e-05, "loss": 1.8869, "step": 907 }, { "epoch": 0.04964257889917035, "grad_norm": 2.230724334716797, "learning_rate": 1.6539162112932604e-05, "loss": 1.3735, "step": 908 }, { "epoch": 0.0496972513428919, "grad_norm": 1.8534215688705444, "learning_rate": 1.655737704918033e-05, "loss": 1.7069, "step": 909 }, { "epoch": 0.04975192378661345, "grad_norm": 3.3351008892059326, "learning_rate": 1.657559198542805e-05, "loss": 1.5511, "step": 910 }, { "epoch": 0.049806596230335, "grad_norm": 1.8800227642059326, "learning_rate": 1.6593806921675774e-05, "loss": 1.5654, "step": 911 }, { "epoch": 0.04986126867405656, "grad_norm": 6.941686630249023, "learning_rate": 1.66120218579235e-05, "loss": 1.445, "step": 912 }, { "epoch": 0.04991594111777811, "grad_norm": 3.706303119659424, "learning_rate": 1.663023679417122e-05, "loss": 1.0795, "step": 913 }, { "epoch": 0.04997061356149966, "grad_norm": 1.4608372449874878, "learning_rate": 1.6648451730418944e-05, "loss": 1.6127, "step": 914 }, { "epoch": 0.05002528600522122, "grad_norm": 2.818208694458008, "learning_rate": 1.6666666666666667e-05, "loss": 1.5789, "step": 915 }, { "epoch": 0.05007995844894277, "grad_norm": 1.8790833950042725, "learning_rate": 1.668488160291439e-05, "loss": 1.6579, "step": 916 }, { "epoch": 0.050134630892664324, "grad_norm": 2.069784164428711, "learning_rate": 1.6703096539162114e-05, "loss": 1.4288, "step": 917 }, { "epoch": 0.050189303336385875, "grad_norm": 2.9918954372406006, "learning_rate": 1.6721311475409837e-05, "loss": 1.6925, "step": 918 }, { "epoch": 0.05024397578010743, "grad_norm": 10.072663307189941, "learning_rate": 1.673952641165756e-05, "loss": 1.5524, "step": 919 }, { "epoch": 0.050298648223828984, "grad_norm": 2.634843111038208, "learning_rate": 1.6757741347905284e-05, "loss": 1.681, "step": 920 }, { "epoch": 0.050353320667550536, "grad_norm": 10.10677433013916, "learning_rate": 1.6775956284153007e-05, "loss": 1.5766, "step": 921 }, { "epoch": 0.050407993111272094, "grad_norm": 8.688218116760254, "learning_rate": 1.679417122040073e-05, "loss": 1.6295, "step": 922 }, { "epoch": 0.050462665554993645, "grad_norm": 2.4425764083862305, "learning_rate": 1.6812386156648454e-05, "loss": 1.4733, "step": 923 }, { "epoch": 0.050517337998715196, "grad_norm": 3.7150521278381348, "learning_rate": 1.6830601092896177e-05, "loss": 1.715, "step": 924 }, { "epoch": 0.05057201044243675, "grad_norm": 10.078227043151855, "learning_rate": 1.68488160291439e-05, "loss": 1.5097, "step": 925 }, { "epoch": 0.050626682886158306, "grad_norm": 4.0565185546875, "learning_rate": 1.686703096539162e-05, "loss": 1.5259, "step": 926 }, { "epoch": 0.05068135532987986, "grad_norm": 3.0063624382019043, "learning_rate": 1.6885245901639347e-05, "loss": 1.5554, "step": 927 }, { "epoch": 0.05073602777360141, "grad_norm": 7.295692443847656, "learning_rate": 1.690346083788707e-05, "loss": 1.6359, "step": 928 }, { "epoch": 0.050790700217322966, "grad_norm": 3.9223926067352295, "learning_rate": 1.692167577413479e-05, "loss": 1.4597, "step": 929 }, { "epoch": 0.05084537266104452, "grad_norm": 2.686375141143799, "learning_rate": 1.6939890710382517e-05, "loss": 1.7427, "step": 930 }, { "epoch": 0.05090004510476607, "grad_norm": 1.971695065498352, "learning_rate": 1.6958105646630237e-05, "loss": 1.3597, "step": 931 }, { "epoch": 0.05095471754848763, "grad_norm": 1.644282579421997, "learning_rate": 1.697632058287796e-05, "loss": 1.6065, "step": 932 }, { "epoch": 0.05100938999220918, "grad_norm": 1.5265675783157349, "learning_rate": 1.6994535519125684e-05, "loss": 1.7453, "step": 933 }, { "epoch": 0.05106406243593073, "grad_norm": 1.8123087882995605, "learning_rate": 1.7012750455373407e-05, "loss": 1.4306, "step": 934 }, { "epoch": 0.05111873487965228, "grad_norm": 3.455627918243408, "learning_rate": 1.703096539162113e-05, "loss": 1.4107, "step": 935 }, { "epoch": 0.05117340732337384, "grad_norm": 4.1404709815979, "learning_rate": 1.7049180327868854e-05, "loss": 1.6703, "step": 936 }, { "epoch": 0.05122807976709539, "grad_norm": 2.290452003479004, "learning_rate": 1.7067395264116577e-05, "loss": 1.2655, "step": 937 }, { "epoch": 0.05128275221081694, "grad_norm": 2.179626226425171, "learning_rate": 1.70856102003643e-05, "loss": 1.528, "step": 938 }, { "epoch": 0.0513374246545385, "grad_norm": 1.3252441883087158, "learning_rate": 1.7103825136612024e-05, "loss": 1.8522, "step": 939 }, { "epoch": 0.05139209709826005, "grad_norm": 2.855977773666382, "learning_rate": 1.7122040072859747e-05, "loss": 1.2938, "step": 940 }, { "epoch": 0.0514467695419816, "grad_norm": 1.379390001296997, "learning_rate": 1.714025500910747e-05, "loss": 1.5766, "step": 941 }, { "epoch": 0.05150144198570315, "grad_norm": 5.651096820831299, "learning_rate": 1.715846994535519e-05, "loss": 1.6683, "step": 942 }, { "epoch": 0.05155611442942471, "grad_norm": 3.637247085571289, "learning_rate": 1.7176684881602917e-05, "loss": 1.6906, "step": 943 }, { "epoch": 0.05161078687314626, "grad_norm": 2.3121721744537354, "learning_rate": 1.719489981785064e-05, "loss": 1.5616, "step": 944 }, { "epoch": 0.051665459316867814, "grad_norm": 4.0798258781433105, "learning_rate": 1.721311475409836e-05, "loss": 1.6013, "step": 945 }, { "epoch": 0.05172013176058937, "grad_norm": 6.3804755210876465, "learning_rate": 1.7231329690346087e-05, "loss": 1.5191, "step": 946 }, { "epoch": 0.05177480420431092, "grad_norm": 3.940263032913208, "learning_rate": 1.7249544626593807e-05, "loss": 1.2863, "step": 947 }, { "epoch": 0.051829476648032474, "grad_norm": 4.398348331451416, "learning_rate": 1.726775956284153e-05, "loss": 1.582, "step": 948 }, { "epoch": 0.051884149091754025, "grad_norm": 1.8613778352737427, "learning_rate": 1.7285974499089254e-05, "loss": 1.3857, "step": 949 }, { "epoch": 0.051938821535475584, "grad_norm": 3.5695252418518066, "learning_rate": 1.7304189435336977e-05, "loss": 1.3844, "step": 950 }, { "epoch": 0.051993493979197135, "grad_norm": 5.344568729400635, "learning_rate": 1.73224043715847e-05, "loss": 1.6195, "step": 951 }, { "epoch": 0.052048166422918686, "grad_norm": 3.689749002456665, "learning_rate": 1.7340619307832424e-05, "loss": 1.5391, "step": 952 }, { "epoch": 0.052102838866640244, "grad_norm": 2.6063392162323, "learning_rate": 1.7358834244080147e-05, "loss": 1.3251, "step": 953 }, { "epoch": 0.052157511310361795, "grad_norm": 5.555291175842285, "learning_rate": 1.737704918032787e-05, "loss": 1.7463, "step": 954 }, { "epoch": 0.05221218375408335, "grad_norm": 3.231858015060425, "learning_rate": 1.7395264116575594e-05, "loss": 1.3178, "step": 955 }, { "epoch": 0.052266856197804905, "grad_norm": 5.1603593826293945, "learning_rate": 1.7413479052823317e-05, "loss": 1.4977, "step": 956 }, { "epoch": 0.052321528641526456, "grad_norm": 3.642411470413208, "learning_rate": 1.743169398907104e-05, "loss": 1.5201, "step": 957 }, { "epoch": 0.05237620108524801, "grad_norm": 2.8716976642608643, "learning_rate": 1.7449908925318764e-05, "loss": 1.3853, "step": 958 }, { "epoch": 0.05243087352896956, "grad_norm": 2.647634744644165, "learning_rate": 1.7468123861566487e-05, "loss": 1.3722, "step": 959 }, { "epoch": 0.05248554597269112, "grad_norm": 13.008302688598633, "learning_rate": 1.7486338797814207e-05, "loss": 1.5672, "step": 960 }, { "epoch": 0.05254021841641267, "grad_norm": 21.122316360473633, "learning_rate": 1.7504553734061934e-05, "loss": 1.6306, "step": 961 }, { "epoch": 0.05259489086013422, "grad_norm": 5.95751953125, "learning_rate": 1.7522768670309657e-05, "loss": 1.6665, "step": 962 }, { "epoch": 0.05264956330385578, "grad_norm": 1.751538872718811, "learning_rate": 1.7540983606557377e-05, "loss": 1.6251, "step": 963 }, { "epoch": 0.05270423574757733, "grad_norm": 7.346284866333008, "learning_rate": 1.7559198542805104e-05, "loss": 1.4605, "step": 964 }, { "epoch": 0.05275890819129888, "grad_norm": 3.4131035804748535, "learning_rate": 1.7577413479052823e-05, "loss": 1.6506, "step": 965 }, { "epoch": 0.05281358063502043, "grad_norm": 3.648735284805298, "learning_rate": 1.7595628415300547e-05, "loss": 1.428, "step": 966 }, { "epoch": 0.05286825307874199, "grad_norm": 4.90679407119751, "learning_rate": 1.7613843351548273e-05, "loss": 1.5075, "step": 967 }, { "epoch": 0.05292292552246354, "grad_norm": 3.487226963043213, "learning_rate": 1.7632058287795993e-05, "loss": 1.8266, "step": 968 }, { "epoch": 0.05297759796618509, "grad_norm": 13.721217155456543, "learning_rate": 1.7650273224043717e-05, "loss": 1.4885, "step": 969 }, { "epoch": 0.05303227040990665, "grad_norm": 3.632723569869995, "learning_rate": 1.766848816029144e-05, "loss": 1.5197, "step": 970 }, { "epoch": 0.0530869428536282, "grad_norm": 6.269376277923584, "learning_rate": 1.7686703096539163e-05, "loss": 1.6423, "step": 971 }, { "epoch": 0.05314161529734975, "grad_norm": 5.2685866355896, "learning_rate": 1.7704918032786887e-05, "loss": 1.4691, "step": 972 }, { "epoch": 0.0531962877410713, "grad_norm": 2.100022792816162, "learning_rate": 1.772313296903461e-05, "loss": 1.4929, "step": 973 }, { "epoch": 0.05325096018479286, "grad_norm": 1.4794156551361084, "learning_rate": 1.7741347905282333e-05, "loss": 1.8626, "step": 974 }, { "epoch": 0.05330563262851441, "grad_norm": 1.8801671266555786, "learning_rate": 1.7759562841530057e-05, "loss": 1.4282, "step": 975 }, { "epoch": 0.053360305072235964, "grad_norm": 2.1098215579986572, "learning_rate": 1.7777777777777777e-05, "loss": 1.3707, "step": 976 }, { "epoch": 0.05341497751595752, "grad_norm": 2.8537511825561523, "learning_rate": 1.7795992714025503e-05, "loss": 1.7197, "step": 977 }, { "epoch": 0.05346964995967907, "grad_norm": 1.8107129335403442, "learning_rate": 1.7814207650273227e-05, "loss": 1.799, "step": 978 }, { "epoch": 0.053524322403400625, "grad_norm": 1.802124261856079, "learning_rate": 1.7832422586520947e-05, "loss": 1.5618, "step": 979 }, { "epoch": 0.053578994847122176, "grad_norm": 2.193699359893799, "learning_rate": 1.7850637522768673e-05, "loss": 1.7969, "step": 980 }, { "epoch": 0.053633667290843734, "grad_norm": 1.3491523265838623, "learning_rate": 1.7868852459016393e-05, "loss": 1.6397, "step": 981 }, { "epoch": 0.053688339734565285, "grad_norm": 1.9052479267120361, "learning_rate": 1.7887067395264117e-05, "loss": 1.5105, "step": 982 }, { "epoch": 0.053743012178286836, "grad_norm": 1.9738547801971436, "learning_rate": 1.7905282331511843e-05, "loss": 1.2898, "step": 983 }, { "epoch": 0.053797684622008395, "grad_norm": 1.6019853353500366, "learning_rate": 1.7923497267759563e-05, "loss": 1.5262, "step": 984 }, { "epoch": 0.053852357065729946, "grad_norm": 1.4718248844146729, "learning_rate": 1.7941712204007287e-05, "loss": 1.6606, "step": 985 }, { "epoch": 0.0539070295094515, "grad_norm": 3.0115110874176025, "learning_rate": 1.795992714025501e-05, "loss": 1.462, "step": 986 }, { "epoch": 0.053961701953173055, "grad_norm": 1.6953569650650024, "learning_rate": 1.7978142076502733e-05, "loss": 1.448, "step": 987 }, { "epoch": 0.054016374396894606, "grad_norm": 2.7603445053100586, "learning_rate": 1.7996357012750456e-05, "loss": 1.478, "step": 988 }, { "epoch": 0.05407104684061616, "grad_norm": 1.7125484943389893, "learning_rate": 1.801457194899818e-05, "loss": 1.7102, "step": 989 }, { "epoch": 0.05412571928433771, "grad_norm": 1.7711455821990967, "learning_rate": 1.8032786885245903e-05, "loss": 1.7981, "step": 990 }, { "epoch": 0.05418039172805927, "grad_norm": 1.6023921966552734, "learning_rate": 1.8051001821493626e-05, "loss": 1.3865, "step": 991 }, { "epoch": 0.05423506417178082, "grad_norm": 1.4686400890350342, "learning_rate": 1.806921675774135e-05, "loss": 1.4419, "step": 992 }, { "epoch": 0.05428973661550237, "grad_norm": 1.535040259361267, "learning_rate": 1.8087431693989073e-05, "loss": 1.4635, "step": 993 }, { "epoch": 0.05434440905922393, "grad_norm": 3.0991134643554688, "learning_rate": 1.8105646630236796e-05, "loss": 1.3793, "step": 994 }, { "epoch": 0.05439908150294548, "grad_norm": 1.734311580657959, "learning_rate": 1.812386156648452e-05, "loss": 1.5018, "step": 995 }, { "epoch": 0.05445375394666703, "grad_norm": 4.180115699768066, "learning_rate": 1.8142076502732243e-05, "loss": 1.5115, "step": 996 }, { "epoch": 0.05450842639038858, "grad_norm": 2.2569239139556885, "learning_rate": 1.8160291438979963e-05, "loss": 1.7182, "step": 997 }, { "epoch": 0.05456309883411014, "grad_norm": 2.1206791400909424, "learning_rate": 1.817850637522769e-05, "loss": 1.3737, "step": 998 }, { "epoch": 0.05461777127783169, "grad_norm": 2.0928454399108887, "learning_rate": 1.8196721311475413e-05, "loss": 1.5603, "step": 999 }, { "epoch": 0.05467244372155324, "grad_norm": 2.1331875324249268, "learning_rate": 1.8214936247723133e-05, "loss": 1.6915, "step": 1000 }, { "epoch": 0.0547271161652748, "grad_norm": 2.1182360649108887, "learning_rate": 1.823315118397086e-05, "loss": 1.3913, "step": 1001 }, { "epoch": 0.05478178860899635, "grad_norm": 4.402976036071777, "learning_rate": 1.825136612021858e-05, "loss": 1.2436, "step": 1002 }, { "epoch": 0.0548364610527179, "grad_norm": 2.5606698989868164, "learning_rate": 1.8269581056466303e-05, "loss": 1.3675, "step": 1003 }, { "epoch": 0.054891133496439454, "grad_norm": 1.648170828819275, "learning_rate": 1.828779599271403e-05, "loss": 1.1424, "step": 1004 }, { "epoch": 0.05494580594016101, "grad_norm": 2.2595036029815674, "learning_rate": 1.830601092896175e-05, "loss": 1.6033, "step": 1005 }, { "epoch": 0.05500047838388256, "grad_norm": 2.6725800037384033, "learning_rate": 1.8324225865209473e-05, "loss": 1.4997, "step": 1006 }, { "epoch": 0.055055150827604114, "grad_norm": 2.3853471279144287, "learning_rate": 1.8342440801457196e-05, "loss": 1.187, "step": 1007 }, { "epoch": 0.05510982327132567, "grad_norm": 2.312697649002075, "learning_rate": 1.836065573770492e-05, "loss": 1.6389, "step": 1008 }, { "epoch": 0.055164495715047224, "grad_norm": 1.994649887084961, "learning_rate": 1.8378870673952643e-05, "loss": 1.4825, "step": 1009 }, { "epoch": 0.055219168158768775, "grad_norm": 1.7566077709197998, "learning_rate": 1.8397085610200366e-05, "loss": 1.3348, "step": 1010 }, { "epoch": 0.05527384060249033, "grad_norm": 1.648114562034607, "learning_rate": 1.841530054644809e-05, "loss": 1.4343, "step": 1011 }, { "epoch": 0.055328513046211884, "grad_norm": 1.9346117973327637, "learning_rate": 1.8433515482695813e-05, "loss": 1.3903, "step": 1012 }, { "epoch": 0.055383185489933436, "grad_norm": 1.3931632041931152, "learning_rate": 1.8451730418943533e-05, "loss": 1.4651, "step": 1013 }, { "epoch": 0.05543785793365499, "grad_norm": 1.7746509313583374, "learning_rate": 1.846994535519126e-05, "loss": 1.6191, "step": 1014 }, { "epoch": 0.055492530377376545, "grad_norm": 2.4960145950317383, "learning_rate": 1.8488160291438983e-05, "loss": 1.3351, "step": 1015 }, { "epoch": 0.055547202821098096, "grad_norm": 2.483114004135132, "learning_rate": 1.8506375227686703e-05, "loss": 1.3025, "step": 1016 }, { "epoch": 0.05560187526481965, "grad_norm": 1.9836492538452148, "learning_rate": 1.852459016393443e-05, "loss": 1.4424, "step": 1017 }, { "epoch": 0.055656547708541206, "grad_norm": 2.1728503704071045, "learning_rate": 1.854280510018215e-05, "loss": 1.5348, "step": 1018 }, { "epoch": 0.05571122015226276, "grad_norm": 1.7335433959960938, "learning_rate": 1.8561020036429873e-05, "loss": 1.6959, "step": 1019 }, { "epoch": 0.05576589259598431, "grad_norm": 3.323720693588257, "learning_rate": 1.85792349726776e-05, "loss": 1.5194, "step": 1020 }, { "epoch": 0.05582056503970586, "grad_norm": 1.6985464096069336, "learning_rate": 1.859744990892532e-05, "loss": 1.3654, "step": 1021 }, { "epoch": 0.05587523748342742, "grad_norm": 3.6221601963043213, "learning_rate": 1.8615664845173043e-05, "loss": 1.6049, "step": 1022 }, { "epoch": 0.05592990992714897, "grad_norm": 2.118238687515259, "learning_rate": 1.8633879781420766e-05, "loss": 1.405, "step": 1023 }, { "epoch": 0.05598458237087052, "grad_norm": 1.392772912979126, "learning_rate": 1.865209471766849e-05, "loss": 1.6013, "step": 1024 }, { "epoch": 0.05603925481459208, "grad_norm": 1.9341024160385132, "learning_rate": 1.8670309653916213e-05, "loss": 1.4611, "step": 1025 }, { "epoch": 0.05609392725831363, "grad_norm": 1.6137667894363403, "learning_rate": 1.8688524590163936e-05, "loss": 1.6608, "step": 1026 }, { "epoch": 0.05614859970203518, "grad_norm": 3.8758645057678223, "learning_rate": 1.870673952641166e-05, "loss": 1.313, "step": 1027 }, { "epoch": 0.05620327214575673, "grad_norm": 1.9938558340072632, "learning_rate": 1.8724954462659383e-05, "loss": 1.4965, "step": 1028 }, { "epoch": 0.05625794458947829, "grad_norm": 1.643349289894104, "learning_rate": 1.8743169398907106e-05, "loss": 1.7114, "step": 1029 }, { "epoch": 0.05631261703319984, "grad_norm": 3.1866776943206787, "learning_rate": 1.876138433515483e-05, "loss": 1.5527, "step": 1030 }, { "epoch": 0.05636728947692139, "grad_norm": 1.9505897760391235, "learning_rate": 1.8779599271402553e-05, "loss": 1.4288, "step": 1031 }, { "epoch": 0.05642196192064295, "grad_norm": 4.569311618804932, "learning_rate": 1.8797814207650276e-05, "loss": 1.4027, "step": 1032 }, { "epoch": 0.0564766343643645, "grad_norm": 15.42387866973877, "learning_rate": 1.8816029143898e-05, "loss": 1.0865, "step": 1033 }, { "epoch": 0.05653130680808605, "grad_norm": 6.7173357009887695, "learning_rate": 1.883424408014572e-05, "loss": 1.7441, "step": 1034 }, { "epoch": 0.05658597925180761, "grad_norm": 5.930701732635498, "learning_rate": 1.8852459016393446e-05, "loss": 1.6154, "step": 1035 }, { "epoch": 0.05664065169552916, "grad_norm": 3.0842459201812744, "learning_rate": 1.887067395264117e-05, "loss": 1.4357, "step": 1036 }, { "epoch": 0.05669532413925071, "grad_norm": 3.2369511127471924, "learning_rate": 1.888888888888889e-05, "loss": 1.5107, "step": 1037 }, { "epoch": 0.056749996582972265, "grad_norm": 3.3002097606658936, "learning_rate": 1.8907103825136616e-05, "loss": 1.5001, "step": 1038 }, { "epoch": 0.05680466902669382, "grad_norm": 2.126617431640625, "learning_rate": 1.8925318761384336e-05, "loss": 1.6559, "step": 1039 }, { "epoch": 0.056859341470415374, "grad_norm": 2.6268253326416016, "learning_rate": 1.894353369763206e-05, "loss": 1.5865, "step": 1040 }, { "epoch": 0.056914013914136925, "grad_norm": 2.055335760116577, "learning_rate": 1.8961748633879782e-05, "loss": 1.7524, "step": 1041 }, { "epoch": 0.05696868635785848, "grad_norm": 1.658119559288025, "learning_rate": 1.8979963570127506e-05, "loss": 1.5698, "step": 1042 }, { "epoch": 0.057023358801580035, "grad_norm": 2.2727537155151367, "learning_rate": 1.899817850637523e-05, "loss": 1.6137, "step": 1043 }, { "epoch": 0.057078031245301586, "grad_norm": 2.0980093479156494, "learning_rate": 1.9016393442622952e-05, "loss": 1.6682, "step": 1044 }, { "epoch": 0.05713270368902314, "grad_norm": 2.001721143722534, "learning_rate": 1.9034608378870676e-05, "loss": 1.4169, "step": 1045 }, { "epoch": 0.057187376132744695, "grad_norm": 1.5935534238815308, "learning_rate": 1.90528233151184e-05, "loss": 1.8592, "step": 1046 }, { "epoch": 0.057242048576466247, "grad_norm": 2.2897579669952393, "learning_rate": 1.9071038251366122e-05, "loss": 1.3441, "step": 1047 }, { "epoch": 0.0572967210201878, "grad_norm": 1.455405831336975, "learning_rate": 1.9089253187613846e-05, "loss": 1.4009, "step": 1048 }, { "epoch": 0.057351393463909356, "grad_norm": 2.1127169132232666, "learning_rate": 1.910746812386157e-05, "loss": 1.3165, "step": 1049 }, { "epoch": 0.05740606590763091, "grad_norm": 1.8106131553649902, "learning_rate": 1.912568306010929e-05, "loss": 1.5021, "step": 1050 }, { "epoch": 0.05746073835135246, "grad_norm": 1.6566798686981201, "learning_rate": 1.9143897996357016e-05, "loss": 1.4709, "step": 1051 }, { "epoch": 0.05751541079507401, "grad_norm": 2.473273277282715, "learning_rate": 1.9162112932604736e-05, "loss": 1.475, "step": 1052 }, { "epoch": 0.05757008323879557, "grad_norm": 1.532096028327942, "learning_rate": 1.918032786885246e-05, "loss": 1.6681, "step": 1053 }, { "epoch": 0.05762475568251712, "grad_norm": 2.3521230220794678, "learning_rate": 1.9198542805100186e-05, "loss": 1.4466, "step": 1054 }, { "epoch": 0.05767942812623867, "grad_norm": 1.928490161895752, "learning_rate": 1.9216757741347906e-05, "loss": 1.7069, "step": 1055 }, { "epoch": 0.05773410056996023, "grad_norm": 1.7642972469329834, "learning_rate": 1.923497267759563e-05, "loss": 1.3873, "step": 1056 }, { "epoch": 0.05778877301368178, "grad_norm": 1.6640342473983765, "learning_rate": 1.9253187613843352e-05, "loss": 1.4465, "step": 1057 }, { "epoch": 0.05784344545740333, "grad_norm": 1.992295265197754, "learning_rate": 1.9271402550091076e-05, "loss": 1.4095, "step": 1058 }, { "epoch": 0.05789811790112489, "grad_norm": 1.6689677238464355, "learning_rate": 1.92896174863388e-05, "loss": 1.5348, "step": 1059 }, { "epoch": 0.05795279034484644, "grad_norm": 1.7221934795379639, "learning_rate": 1.9307832422586522e-05, "loss": 1.8616, "step": 1060 }, { "epoch": 0.05800746278856799, "grad_norm": 1.6706196069717407, "learning_rate": 1.9326047358834245e-05, "loss": 1.283, "step": 1061 }, { "epoch": 0.05806213523228954, "grad_norm": 1.4273476600646973, "learning_rate": 1.934426229508197e-05, "loss": 1.565, "step": 1062 }, { "epoch": 0.0581168076760111, "grad_norm": 1.8446556329727173, "learning_rate": 1.9362477231329692e-05, "loss": 1.7859, "step": 1063 }, { "epoch": 0.05817148011973265, "grad_norm": 2.028374195098877, "learning_rate": 1.9380692167577415e-05, "loss": 1.4987, "step": 1064 }, { "epoch": 0.0582261525634542, "grad_norm": 1.8138765096664429, "learning_rate": 1.939890710382514e-05, "loss": 1.467, "step": 1065 }, { "epoch": 0.05828082500717576, "grad_norm": 1.6561753749847412, "learning_rate": 1.9417122040072862e-05, "loss": 1.6494, "step": 1066 }, { "epoch": 0.05833549745089731, "grad_norm": 3.5509824752807617, "learning_rate": 1.9435336976320585e-05, "loss": 1.7616, "step": 1067 }, { "epoch": 0.058390169894618864, "grad_norm": 1.8162500858306885, "learning_rate": 1.9453551912568305e-05, "loss": 1.8082, "step": 1068 }, { "epoch": 0.058444842338340415, "grad_norm": 4.98644495010376, "learning_rate": 1.9471766848816032e-05, "loss": 1.5197, "step": 1069 }, { "epoch": 0.05849951478206197, "grad_norm": 1.2290866374969482, "learning_rate": 1.9489981785063755e-05, "loss": 1.4892, "step": 1070 }, { "epoch": 0.058554187225783524, "grad_norm": 1.662894368171692, "learning_rate": 1.9508196721311475e-05, "loss": 1.5019, "step": 1071 }, { "epoch": 0.058608859669505076, "grad_norm": 1.86185622215271, "learning_rate": 1.9526411657559202e-05, "loss": 1.4415, "step": 1072 }, { "epoch": 0.058663532113226634, "grad_norm": 1.6555534601211548, "learning_rate": 1.9544626593806922e-05, "loss": 1.428, "step": 1073 }, { "epoch": 0.058718204556948185, "grad_norm": 3.96815824508667, "learning_rate": 1.9562841530054645e-05, "loss": 1.822, "step": 1074 }, { "epoch": 0.058772877000669736, "grad_norm": 1.8975927829742432, "learning_rate": 1.9581056466302372e-05, "loss": 1.6545, "step": 1075 }, { "epoch": 0.05882754944439129, "grad_norm": 1.5087734460830688, "learning_rate": 1.9599271402550092e-05, "loss": 1.7492, "step": 1076 }, { "epoch": 0.058882221888112846, "grad_norm": 1.6598289012908936, "learning_rate": 1.9617486338797815e-05, "loss": 1.5652, "step": 1077 }, { "epoch": 0.0589368943318344, "grad_norm": 1.5913035869598389, "learning_rate": 1.963570127504554e-05, "loss": 1.4926, "step": 1078 }, { "epoch": 0.05899156677555595, "grad_norm": 1.6461023092269897, "learning_rate": 1.9653916211293262e-05, "loss": 1.3069, "step": 1079 }, { "epoch": 0.059046239219277506, "grad_norm": 1.9975390434265137, "learning_rate": 1.9672131147540985e-05, "loss": 1.4909, "step": 1080 }, { "epoch": 0.05910091166299906, "grad_norm": 2.7855329513549805, "learning_rate": 1.969034608378871e-05, "loss": 1.6862, "step": 1081 }, { "epoch": 0.05915558410672061, "grad_norm": 1.6505134105682373, "learning_rate": 1.9708561020036432e-05, "loss": 1.6515, "step": 1082 }, { "epoch": 0.05921025655044217, "grad_norm": 2.037843704223633, "learning_rate": 1.9726775956284155e-05, "loss": 1.3054, "step": 1083 }, { "epoch": 0.05926492899416372, "grad_norm": 1.4165472984313965, "learning_rate": 1.9744990892531875e-05, "loss": 1.4694, "step": 1084 }, { "epoch": 0.05931960143788527, "grad_norm": 1.518390417098999, "learning_rate": 1.9763205828779602e-05, "loss": 1.5181, "step": 1085 }, { "epoch": 0.05937427388160682, "grad_norm": 2.2136595249176025, "learning_rate": 1.9781420765027325e-05, "loss": 1.5042, "step": 1086 }, { "epoch": 0.05942894632532838, "grad_norm": 1.599900484085083, "learning_rate": 1.9799635701275045e-05, "loss": 1.4806, "step": 1087 }, { "epoch": 0.05948361876904993, "grad_norm": 2.647918939590454, "learning_rate": 1.9817850637522772e-05, "loss": 1.4374, "step": 1088 }, { "epoch": 0.05953829121277148, "grad_norm": 1.7796549797058105, "learning_rate": 1.9836065573770492e-05, "loss": 1.5776, "step": 1089 }, { "epoch": 0.05959296365649304, "grad_norm": 2.595947027206421, "learning_rate": 1.9854280510018215e-05, "loss": 1.4917, "step": 1090 }, { "epoch": 0.05964763610021459, "grad_norm": 1.94907546043396, "learning_rate": 1.9872495446265942e-05, "loss": 1.6054, "step": 1091 }, { "epoch": 0.05970230854393614, "grad_norm": 2.140038013458252, "learning_rate": 1.9890710382513662e-05, "loss": 1.6088, "step": 1092 }, { "epoch": 0.05975698098765769, "grad_norm": 1.6690542697906494, "learning_rate": 1.9908925318761385e-05, "loss": 1.5938, "step": 1093 }, { "epoch": 0.05981165343137925, "grad_norm": 1.9641635417938232, "learning_rate": 1.992714025500911e-05, "loss": 1.8408, "step": 1094 }, { "epoch": 0.0598663258751008, "grad_norm": 3.2274160385131836, "learning_rate": 1.994535519125683e-05, "loss": 1.5887, "step": 1095 }, { "epoch": 0.059920998318822354, "grad_norm": 2.1458654403686523, "learning_rate": 1.9963570127504555e-05, "loss": 1.3847, "step": 1096 }, { "epoch": 0.05997567076254391, "grad_norm": 2.3502004146575928, "learning_rate": 1.998178506375228e-05, "loss": 1.2645, "step": 1097 }, { "epoch": 0.06003034320626546, "grad_norm": 3.8408541679382324, "learning_rate": 2e-05, "loss": 1.6191, "step": 1098 }, { "epoch": 0.060085015649987014, "grad_norm": 2.544405460357666, "learning_rate": 1.9999999833038268e-05, "loss": 1.2483, "step": 1099 }, { "epoch": 0.060139688093708565, "grad_norm": 1.6305997371673584, "learning_rate": 1.9999999332153076e-05, "loss": 1.7423, "step": 1100 }, { "epoch": 0.060194360537430124, "grad_norm": 1.701485276222229, "learning_rate": 1.999999849734444e-05, "loss": 1.4394, "step": 1101 }, { "epoch": 0.060249032981151675, "grad_norm": 1.5286561250686646, "learning_rate": 1.9999997328612388e-05, "loss": 1.4167, "step": 1102 }, { "epoch": 0.060303705424873226, "grad_norm": 1.482667088508606, "learning_rate": 1.9999995825956956e-05, "loss": 1.6109, "step": 1103 }, { "epoch": 0.060358377868594784, "grad_norm": 1.7886543273925781, "learning_rate": 1.9999993989378202e-05, "loss": 1.5942, "step": 1104 }, { "epoch": 0.060413050312316335, "grad_norm": 1.4977387189865112, "learning_rate": 1.9999991818876183e-05, "loss": 1.7228, "step": 1105 }, { "epoch": 0.06046772275603789, "grad_norm": 1.826735019683838, "learning_rate": 1.999998931445097e-05, "loss": 1.594, "step": 1106 }, { "epoch": 0.06052239519975944, "grad_norm": 2.4271090030670166, "learning_rate": 1.9999986476102647e-05, "loss": 1.2731, "step": 1107 }, { "epoch": 0.060577067643480996, "grad_norm": 2.9110143184661865, "learning_rate": 1.9999983303831313e-05, "loss": 1.4833, "step": 1108 }, { "epoch": 0.06063174008720255, "grad_norm": 1.3697479963302612, "learning_rate": 1.999997979763707e-05, "loss": 1.4762, "step": 1109 }, { "epoch": 0.0606864125309241, "grad_norm": 2.5188355445861816, "learning_rate": 1.9999975957520033e-05, "loss": 1.4941, "step": 1110 }, { "epoch": 0.06074108497464566, "grad_norm": 1.910686731338501, "learning_rate": 1.999997178348034e-05, "loss": 1.3679, "step": 1111 }, { "epoch": 0.06079575741836721, "grad_norm": 1.8605735301971436, "learning_rate": 1.9999967275518118e-05, "loss": 1.6269, "step": 1112 }, { "epoch": 0.06085042986208876, "grad_norm": 1.3612018823623657, "learning_rate": 1.999996243363352e-05, "loss": 1.5993, "step": 1113 }, { "epoch": 0.06090510230581032, "grad_norm": 1.8188728094100952, "learning_rate": 1.9999957257826716e-05, "loss": 1.405, "step": 1114 }, { "epoch": 0.06095977474953187, "grad_norm": 2.796227216720581, "learning_rate": 1.9999951748097874e-05, "loss": 1.5471, "step": 1115 }, { "epoch": 0.06101444719325342, "grad_norm": 1.3170112371444702, "learning_rate": 1.9999945904447173e-05, "loss": 1.5086, "step": 1116 }, { "epoch": 0.06106911963697497, "grad_norm": 2.1224875450134277, "learning_rate": 1.9999939726874817e-05, "loss": 1.326, "step": 1117 }, { "epoch": 0.06112379208069653, "grad_norm": 2.043226480484009, "learning_rate": 1.9999933215381005e-05, "loss": 1.3807, "step": 1118 }, { "epoch": 0.06117846452441808, "grad_norm": 1.2954350709915161, "learning_rate": 1.999992636996596e-05, "loss": 1.5335, "step": 1119 }, { "epoch": 0.06123313696813963, "grad_norm": 1.7000670433044434, "learning_rate": 1.9999919190629905e-05, "loss": 1.5428, "step": 1120 }, { "epoch": 0.06128780941186119, "grad_norm": 1.7278794050216675, "learning_rate": 1.9999911677373083e-05, "loss": 1.4532, "step": 1121 }, { "epoch": 0.06134248185558274, "grad_norm": 1.8836030960083008, "learning_rate": 1.9999903830195748e-05, "loss": 1.4397, "step": 1122 }, { "epoch": 0.06139715429930429, "grad_norm": 1.6089240312576294, "learning_rate": 1.9999895649098154e-05, "loss": 1.2321, "step": 1123 }, { "epoch": 0.06145182674302584, "grad_norm": 2.1902897357940674, "learning_rate": 1.999988713408058e-05, "loss": 1.6192, "step": 1124 }, { "epoch": 0.0615064991867474, "grad_norm": 1.605602741241455, "learning_rate": 1.999987828514331e-05, "loss": 1.3271, "step": 1125 }, { "epoch": 0.06156117163046895, "grad_norm": 1.3787533044815063, "learning_rate": 1.9999869102286638e-05, "loss": 1.5246, "step": 1126 }, { "epoch": 0.061615844074190504, "grad_norm": 1.7377169132232666, "learning_rate": 1.9999859585510873e-05, "loss": 1.6907, "step": 1127 }, { "epoch": 0.06167051651791206, "grad_norm": 1.7154555320739746, "learning_rate": 1.9999849734816334e-05, "loss": 1.3553, "step": 1128 }, { "epoch": 0.06172518896163361, "grad_norm": 2.315805196762085, "learning_rate": 1.999983955020334e-05, "loss": 1.575, "step": 1129 }, { "epoch": 0.061779861405355165, "grad_norm": 1.3194266557693481, "learning_rate": 1.999982903167224e-05, "loss": 1.4479, "step": 1130 }, { "epoch": 0.061834533849076716, "grad_norm": 1.88857901096344, "learning_rate": 1.9999818179223383e-05, "loss": 1.4421, "step": 1131 }, { "epoch": 0.061889206292798274, "grad_norm": 1.2752320766448975, "learning_rate": 1.9999806992857138e-05, "loss": 1.7235, "step": 1132 }, { "epoch": 0.061943878736519825, "grad_norm": 2.5380756855010986, "learning_rate": 1.9999795472573865e-05, "loss": 1.4663, "step": 1133 }, { "epoch": 0.061998551180241376, "grad_norm": 2.6463985443115234, "learning_rate": 1.9999783618373958e-05, "loss": 1.6397, "step": 1134 }, { "epoch": 0.062053223623962935, "grad_norm": 1.901836633682251, "learning_rate": 1.999977143025781e-05, "loss": 1.563, "step": 1135 }, { "epoch": 0.062107896067684486, "grad_norm": 1.8455452919006348, "learning_rate": 1.999975890822583e-05, "loss": 1.7, "step": 1136 }, { "epoch": 0.06216256851140604, "grad_norm": 1.6229336261749268, "learning_rate": 1.9999746052278433e-05, "loss": 1.5221, "step": 1137 }, { "epoch": 0.062217240955127595, "grad_norm": 1.6399595737457275, "learning_rate": 1.9999732862416053e-05, "loss": 1.6078, "step": 1138 }, { "epoch": 0.062271913398849146, "grad_norm": 2.150223970413208, "learning_rate": 1.9999719338639127e-05, "loss": 1.6001, "step": 1139 }, { "epoch": 0.0623265858425707, "grad_norm": 1.891929268836975, "learning_rate": 1.9999705480948107e-05, "loss": 1.5447, "step": 1140 }, { "epoch": 0.06238125828629225, "grad_norm": 2.178076982498169, "learning_rate": 1.9999691289343456e-05, "loss": 1.3828, "step": 1141 }, { "epoch": 0.06243593073001381, "grad_norm": 1.791776180267334, "learning_rate": 1.9999676763825647e-05, "loss": 1.4934, "step": 1142 }, { "epoch": 0.06249060317373536, "grad_norm": 1.5957577228546143, "learning_rate": 1.9999661904395165e-05, "loss": 1.7484, "step": 1143 }, { "epoch": 0.06254527561745692, "grad_norm": 1.40025794506073, "learning_rate": 1.999964671105251e-05, "loss": 1.7646, "step": 1144 }, { "epoch": 0.06259994806117847, "grad_norm": 1.454064965248108, "learning_rate": 1.9999631183798183e-05, "loss": 1.3898, "step": 1145 }, { "epoch": 0.06265462050490002, "grad_norm": 2.202091693878174, "learning_rate": 1.9999615322632707e-05, "loss": 1.5218, "step": 1146 }, { "epoch": 0.06270929294862157, "grad_norm": 1.6815342903137207, "learning_rate": 1.9999599127556614e-05, "loss": 1.4585, "step": 1147 }, { "epoch": 0.06276396539234312, "grad_norm": 2.0460386276245117, "learning_rate": 1.9999582598570437e-05, "loss": 1.4345, "step": 1148 }, { "epoch": 0.06281863783606467, "grad_norm": 2.3347270488739014, "learning_rate": 1.9999565735674734e-05, "loss": 1.3878, "step": 1149 }, { "epoch": 0.06287331027978624, "grad_norm": 2.1663222312927246, "learning_rate": 1.9999548538870067e-05, "loss": 1.4742, "step": 1150 }, { "epoch": 0.06292798272350779, "grad_norm": 2.9549031257629395, "learning_rate": 1.9999531008157007e-05, "loss": 1.2419, "step": 1151 }, { "epoch": 0.06298265516722934, "grad_norm": 1.7218244075775146, "learning_rate": 1.9999513143536146e-05, "loss": 1.528, "step": 1152 }, { "epoch": 0.06303732761095089, "grad_norm": 1.6537103652954102, "learning_rate": 1.999949494500807e-05, "loss": 1.3828, "step": 1153 }, { "epoch": 0.06309200005467244, "grad_norm": 1.2024484872817993, "learning_rate": 1.99994764125734e-05, "loss": 1.6678, "step": 1154 }, { "epoch": 0.063146672498394, "grad_norm": 1.322045087814331, "learning_rate": 1.9999457546232747e-05, "loss": 1.488, "step": 1155 }, { "epoch": 0.06320134494211554, "grad_norm": 1.8618019819259644, "learning_rate": 1.999943834598674e-05, "loss": 1.5704, "step": 1156 }, { "epoch": 0.06325601738583711, "grad_norm": 2.6756513118743896, "learning_rate": 1.999941881183602e-05, "loss": 1.7414, "step": 1157 }, { "epoch": 0.06331068982955866, "grad_norm": 3.3559930324554443, "learning_rate": 1.9999398943781245e-05, "loss": 1.3005, "step": 1158 }, { "epoch": 0.06336536227328021, "grad_norm": 2.2469186782836914, "learning_rate": 1.9999378741823076e-05, "loss": 1.7172, "step": 1159 }, { "epoch": 0.06342003471700176, "grad_norm": 1.5746798515319824, "learning_rate": 1.9999358205962186e-05, "loss": 1.6651, "step": 1160 }, { "epoch": 0.06347470716072331, "grad_norm": 1.3602454662322998, "learning_rate": 1.9999337336199257e-05, "loss": 1.3027, "step": 1161 }, { "epoch": 0.06352937960444487, "grad_norm": 1.9112083911895752, "learning_rate": 1.9999316132534995e-05, "loss": 1.6602, "step": 1162 }, { "epoch": 0.06358405204816642, "grad_norm": 1.6582131385803223, "learning_rate": 1.99992945949701e-05, "loss": 1.4295, "step": 1163 }, { "epoch": 0.06363872449188798, "grad_norm": 1.3753182888031006, "learning_rate": 1.9999272723505298e-05, "loss": 1.5813, "step": 1164 }, { "epoch": 0.06369339693560953, "grad_norm": 2.261707067489624, "learning_rate": 1.9999250518141313e-05, "loss": 1.4913, "step": 1165 }, { "epoch": 0.06374806937933108, "grad_norm": 1.5860804319381714, "learning_rate": 1.999922797887889e-05, "loss": 1.3315, "step": 1166 }, { "epoch": 0.06380274182305264, "grad_norm": 1.5462981462478638, "learning_rate": 1.9999205105718782e-05, "loss": 1.6859, "step": 1167 }, { "epoch": 0.06385741426677419, "grad_norm": 1.565550446510315, "learning_rate": 1.999918189866175e-05, "loss": 1.6155, "step": 1168 }, { "epoch": 0.06391208671049574, "grad_norm": 1.537591814994812, "learning_rate": 1.999915835770857e-05, "loss": 1.4741, "step": 1169 }, { "epoch": 0.06396675915421729, "grad_norm": 2.217282772064209, "learning_rate": 1.9999134482860028e-05, "loss": 1.5849, "step": 1170 }, { "epoch": 0.06402143159793885, "grad_norm": 1.9279837608337402, "learning_rate": 1.9999110274116925e-05, "loss": 1.5548, "step": 1171 }, { "epoch": 0.0640761040416604, "grad_norm": 1.7624815702438354, "learning_rate": 1.9999085731480064e-05, "loss": 1.6328, "step": 1172 }, { "epoch": 0.06413077648538196, "grad_norm": 1.5047762393951416, "learning_rate": 1.999906085495027e-05, "loss": 1.5328, "step": 1173 }, { "epoch": 0.06418544892910351, "grad_norm": 1.8195436000823975, "learning_rate": 1.9999035644528368e-05, "loss": 1.5798, "step": 1174 }, { "epoch": 0.06424012137282506, "grad_norm": 2.626966714859009, "learning_rate": 1.9999010100215202e-05, "loss": 1.3163, "step": 1175 }, { "epoch": 0.06429479381654661, "grad_norm": 2.1880452632904053, "learning_rate": 1.9998984222011627e-05, "loss": 1.2345, "step": 1176 }, { "epoch": 0.06434946626026816, "grad_norm": 1.8241431713104248, "learning_rate": 1.9998958009918503e-05, "loss": 1.4723, "step": 1177 }, { "epoch": 0.06440413870398973, "grad_norm": 2.422800302505493, "learning_rate": 1.9998931463936707e-05, "loss": 1.6456, "step": 1178 }, { "epoch": 0.06445881114771128, "grad_norm": 2.1994807720184326, "learning_rate": 1.999890458406713e-05, "loss": 1.6132, "step": 1179 }, { "epoch": 0.06451348359143283, "grad_norm": 2.127333402633667, "learning_rate": 1.9998877370310665e-05, "loss": 1.4369, "step": 1180 }, { "epoch": 0.06456815603515438, "grad_norm": 2.748859167098999, "learning_rate": 1.999884982266822e-05, "loss": 1.5155, "step": 1181 }, { "epoch": 0.06462282847887593, "grad_norm": 3.3327693939208984, "learning_rate": 1.9998821941140716e-05, "loss": 1.7026, "step": 1182 }, { "epoch": 0.06467750092259748, "grad_norm": 1.8221734762191772, "learning_rate": 1.9998793725729088e-05, "loss": 1.716, "step": 1183 }, { "epoch": 0.06473217336631903, "grad_norm": 2.794158697128296, "learning_rate": 1.999876517643427e-05, "loss": 1.581, "step": 1184 }, { "epoch": 0.0647868458100406, "grad_norm": 2.3012852668762207, "learning_rate": 1.999873629325722e-05, "loss": 1.3874, "step": 1185 }, { "epoch": 0.06484151825376215, "grad_norm": 1.7467641830444336, "learning_rate": 1.9998707076198906e-05, "loss": 1.7874, "step": 1186 }, { "epoch": 0.0648961906974837, "grad_norm": 5.129490852355957, "learning_rate": 1.99986775252603e-05, "loss": 1.6177, "step": 1187 }, { "epoch": 0.06495086314120525, "grad_norm": 2.2400012016296387, "learning_rate": 1.9998647640442384e-05, "loss": 1.6307, "step": 1188 }, { "epoch": 0.0650055355849268, "grad_norm": 2.400207996368408, "learning_rate": 1.9998617421746166e-05, "loss": 1.6655, "step": 1189 }, { "epoch": 0.06506020802864836, "grad_norm": 2.025761604309082, "learning_rate": 1.9998586869172647e-05, "loss": 1.4714, "step": 1190 }, { "epoch": 0.06511488047236991, "grad_norm": 1.83359694480896, "learning_rate": 1.999855598272285e-05, "loss": 1.578, "step": 1191 }, { "epoch": 0.06516955291609147, "grad_norm": 2.0609092712402344, "learning_rate": 1.999852476239781e-05, "loss": 1.5225, "step": 1192 }, { "epoch": 0.06522422535981302, "grad_norm": 2.130401134490967, "learning_rate": 1.999849320819856e-05, "loss": 1.2239, "step": 1193 }, { "epoch": 0.06527889780353457, "grad_norm": 1.7508656978607178, "learning_rate": 1.9998461320126163e-05, "loss": 1.9369, "step": 1194 }, { "epoch": 0.06533357024725613, "grad_norm": 2.3991100788116455, "learning_rate": 1.999842909818168e-05, "loss": 1.6777, "step": 1195 }, { "epoch": 0.06538824269097768, "grad_norm": 1.8858013153076172, "learning_rate": 1.9998396542366188e-05, "loss": 1.5815, "step": 1196 }, { "epoch": 0.06544291513469923, "grad_norm": 1.6120836734771729, "learning_rate": 1.9998363652680774e-05, "loss": 1.7943, "step": 1197 }, { "epoch": 0.0654975875784208, "grad_norm": 2.5080790519714355, "learning_rate": 1.9998330429126532e-05, "loss": 1.4061, "step": 1198 }, { "epoch": 0.06555226002214234, "grad_norm": 1.4010672569274902, "learning_rate": 1.9998296871704578e-05, "loss": 1.5272, "step": 1199 }, { "epoch": 0.0656069324658639, "grad_norm": 1.489585280418396, "learning_rate": 1.999826298041603e-05, "loss": 1.5119, "step": 1200 }, { "epoch": 0.06566160490958545, "grad_norm": 1.257415771484375, "learning_rate": 1.999822875526202e-05, "loss": 1.7209, "step": 1201 }, { "epoch": 0.065716277353307, "grad_norm": 1.7461520433425903, "learning_rate": 1.9998194196243688e-05, "loss": 1.8589, "step": 1202 }, { "epoch": 0.06577094979702855, "grad_norm": 2.4263319969177246, "learning_rate": 1.9998159303362193e-05, "loss": 1.5765, "step": 1203 }, { "epoch": 0.0658256222407501, "grad_norm": 1.5171164274215698, "learning_rate": 1.9998124076618694e-05, "loss": 1.594, "step": 1204 }, { "epoch": 0.06588029468447167, "grad_norm": 2.145761489868164, "learning_rate": 1.9998088516014374e-05, "loss": 1.5202, "step": 1205 }, { "epoch": 0.06593496712819322, "grad_norm": 1.5094274282455444, "learning_rate": 1.9998052621550415e-05, "loss": 1.5263, "step": 1206 }, { "epoch": 0.06598963957191477, "grad_norm": 1.5374337434768677, "learning_rate": 1.9998016393228016e-05, "loss": 1.6271, "step": 1207 }, { "epoch": 0.06604431201563632, "grad_norm": 1.5494486093521118, "learning_rate": 1.9997979831048392e-05, "loss": 1.5956, "step": 1208 }, { "epoch": 0.06609898445935787, "grad_norm": 1.86081063747406, "learning_rate": 1.999794293501276e-05, "loss": 1.4813, "step": 1209 }, { "epoch": 0.06615365690307942, "grad_norm": 1.5537439584732056, "learning_rate": 1.9997905705122352e-05, "loss": 1.5474, "step": 1210 }, { "epoch": 0.06620832934680097, "grad_norm": 1.496819257736206, "learning_rate": 1.999786814137841e-05, "loss": 1.5727, "step": 1211 }, { "epoch": 0.06626300179052254, "grad_norm": 1.715080976486206, "learning_rate": 1.9997830243782193e-05, "loss": 1.5914, "step": 1212 }, { "epoch": 0.06631767423424409, "grad_norm": 1.4197118282318115, "learning_rate": 1.9997792012334963e-05, "loss": 1.7277, "step": 1213 }, { "epoch": 0.06637234667796564, "grad_norm": 1.7258533239364624, "learning_rate": 1.9997753447037997e-05, "loss": 1.6531, "step": 1214 }, { "epoch": 0.06642701912168719, "grad_norm": 1.693030595779419, "learning_rate": 1.9997714547892584e-05, "loss": 1.5542, "step": 1215 }, { "epoch": 0.06648169156540874, "grad_norm": 4.218217372894287, "learning_rate": 1.9997675314900017e-05, "loss": 1.5666, "step": 1216 }, { "epoch": 0.0665363640091303, "grad_norm": 2.0682876110076904, "learning_rate": 1.9997635748061615e-05, "loss": 1.4179, "step": 1217 }, { "epoch": 0.06659103645285185, "grad_norm": 1.5528995990753174, "learning_rate": 1.9997595847378695e-05, "loss": 1.4339, "step": 1218 }, { "epoch": 0.06664570889657341, "grad_norm": 1.904218077659607, "learning_rate": 1.999755561285259e-05, "loss": 1.3655, "step": 1219 }, { "epoch": 0.06670038134029496, "grad_norm": 1.3366557359695435, "learning_rate": 1.9997515044484643e-05, "loss": 1.3597, "step": 1220 }, { "epoch": 0.06675505378401651, "grad_norm": 1.4046767950057983, "learning_rate": 1.9997474142276204e-05, "loss": 1.3792, "step": 1221 }, { "epoch": 0.06680972622773806, "grad_norm": 3.083486318588257, "learning_rate": 1.999743290622865e-05, "loss": 1.5075, "step": 1222 }, { "epoch": 0.06686439867145962, "grad_norm": 1.5812162160873413, "learning_rate": 1.9997391336343347e-05, "loss": 1.4453, "step": 1223 }, { "epoch": 0.06691907111518117, "grad_norm": 1.9017951488494873, "learning_rate": 1.999734943262169e-05, "loss": 1.6816, "step": 1224 }, { "epoch": 0.06697374355890272, "grad_norm": 1.5657445192337036, "learning_rate": 1.999730719506508e-05, "loss": 1.3643, "step": 1225 }, { "epoch": 0.06702841600262428, "grad_norm": 1.4696217775344849, "learning_rate": 1.9997264623674917e-05, "loss": 1.4627, "step": 1226 }, { "epoch": 0.06708308844634583, "grad_norm": 1.3621965646743774, "learning_rate": 1.9997221718452627e-05, "loss": 1.4827, "step": 1227 }, { "epoch": 0.06713776089006739, "grad_norm": 1.990960717201233, "learning_rate": 1.999717847939965e-05, "loss": 1.3489, "step": 1228 }, { "epoch": 0.06719243333378894, "grad_norm": 1.8771886825561523, "learning_rate": 1.9997134906517423e-05, "loss": 1.5837, "step": 1229 }, { "epoch": 0.06724710577751049, "grad_norm": 3.247182607650757, "learning_rate": 1.9997090999807406e-05, "loss": 1.4575, "step": 1230 }, { "epoch": 0.06730177822123204, "grad_norm": 2.3995604515075684, "learning_rate": 1.9997046759271055e-05, "loss": 1.5824, "step": 1231 }, { "epoch": 0.06735645066495359, "grad_norm": 2.1788604259490967, "learning_rate": 1.9997002184909858e-05, "loss": 1.5957, "step": 1232 }, { "epoch": 0.06741112310867516, "grad_norm": 1.5897732973098755, "learning_rate": 1.99969572767253e-05, "loss": 1.464, "step": 1233 }, { "epoch": 0.0674657955523967, "grad_norm": 2.645160675048828, "learning_rate": 1.9996912034718875e-05, "loss": 1.4747, "step": 1234 }, { "epoch": 0.06752046799611826, "grad_norm": 1.5394893884658813, "learning_rate": 1.9996866458892102e-05, "loss": 1.5883, "step": 1235 }, { "epoch": 0.06757514043983981, "grad_norm": 1.7981421947479248, "learning_rate": 1.99968205492465e-05, "loss": 1.3415, "step": 1236 }, { "epoch": 0.06762981288356136, "grad_norm": 1.6177647113800049, "learning_rate": 1.99967743057836e-05, "loss": 1.4125, "step": 1237 }, { "epoch": 0.06768448532728291, "grad_norm": 3.1649458408355713, "learning_rate": 1.999672772850495e-05, "loss": 1.6491, "step": 1238 }, { "epoch": 0.06773915777100446, "grad_norm": 1.7819938659667969, "learning_rate": 1.9996680817412103e-05, "loss": 1.7578, "step": 1239 }, { "epoch": 0.06779383021472603, "grad_norm": 1.4632337093353271, "learning_rate": 1.9996633572506623e-05, "loss": 1.5761, "step": 1240 }, { "epoch": 0.06784850265844758, "grad_norm": 2.014451503753662, "learning_rate": 1.9996585993790092e-05, "loss": 1.3064, "step": 1241 }, { "epoch": 0.06790317510216913, "grad_norm": 1.6855137348175049, "learning_rate": 1.9996538081264095e-05, "loss": 1.5936, "step": 1242 }, { "epoch": 0.06795784754589068, "grad_norm": 1.7316328287124634, "learning_rate": 1.9996489834930236e-05, "loss": 1.6092, "step": 1243 }, { "epoch": 0.06801251998961223, "grad_norm": 1.8292865753173828, "learning_rate": 1.9996441254790122e-05, "loss": 1.4487, "step": 1244 }, { "epoch": 0.06806719243333378, "grad_norm": 1.832751989364624, "learning_rate": 1.999639234084538e-05, "loss": 1.5494, "step": 1245 }, { "epoch": 0.06812186487705534, "grad_norm": 1.773266315460205, "learning_rate": 1.999634309309764e-05, "loss": 1.7302, "step": 1246 }, { "epoch": 0.0681765373207769, "grad_norm": 1.7937681674957275, "learning_rate": 1.9996293511548545e-05, "loss": 1.5047, "step": 1247 }, { "epoch": 0.06823120976449845, "grad_norm": 1.7663462162017822, "learning_rate": 1.999624359619975e-05, "loss": 1.5317, "step": 1248 }, { "epoch": 0.06828588220822, "grad_norm": 2.1950933933258057, "learning_rate": 1.999619334705293e-05, "loss": 1.3022, "step": 1249 }, { "epoch": 0.06834055465194155, "grad_norm": 2.0696167945861816, "learning_rate": 1.9996142764109755e-05, "loss": 1.5989, "step": 1250 }, { "epoch": 0.0683952270956631, "grad_norm": 1.576796531677246, "learning_rate": 1.9996091847371918e-05, "loss": 1.7565, "step": 1251 }, { "epoch": 0.06844989953938466, "grad_norm": 2.290696620941162, "learning_rate": 1.9996040596841118e-05, "loss": 1.3936, "step": 1252 }, { "epoch": 0.06850457198310622, "grad_norm": 2.1677699089050293, "learning_rate": 1.9995989012519065e-05, "loss": 1.5468, "step": 1253 }, { "epoch": 0.06855924442682777, "grad_norm": 3.806669235229492, "learning_rate": 1.999593709440748e-05, "loss": 1.624, "step": 1254 }, { "epoch": 0.06861391687054932, "grad_norm": 2.404595136642456, "learning_rate": 1.99958848425081e-05, "loss": 1.3768, "step": 1255 }, { "epoch": 0.06866858931427088, "grad_norm": 2.6625547409057617, "learning_rate": 1.999583225682267e-05, "loss": 1.5165, "step": 1256 }, { "epoch": 0.06872326175799243, "grad_norm": 1.526159644126892, "learning_rate": 1.9995779337352947e-05, "loss": 1.493, "step": 1257 }, { "epoch": 0.06877793420171398, "grad_norm": 1.734506607055664, "learning_rate": 1.9995726084100692e-05, "loss": 1.702, "step": 1258 }, { "epoch": 0.06883260664543553, "grad_norm": 1.7515190839767456, "learning_rate": 1.999567249706769e-05, "loss": 1.4635, "step": 1259 }, { "epoch": 0.0688872790891571, "grad_norm": 1.6622713804244995, "learning_rate": 1.999561857625573e-05, "loss": 1.5995, "step": 1260 }, { "epoch": 0.06894195153287865, "grad_norm": 1.32035493850708, "learning_rate": 1.9995564321666607e-05, "loss": 1.5776, "step": 1261 }, { "epoch": 0.0689966239766002, "grad_norm": 1.6633872985839844, "learning_rate": 1.9995509733302135e-05, "loss": 1.7251, "step": 1262 }, { "epoch": 0.06905129642032175, "grad_norm": 1.7697665691375732, "learning_rate": 1.999545481116414e-05, "loss": 1.5203, "step": 1263 }, { "epoch": 0.0691059688640433, "grad_norm": 1.5931053161621094, "learning_rate": 1.999539955525445e-05, "loss": 1.5511, "step": 1264 }, { "epoch": 0.06916064130776485, "grad_norm": 2.4162847995758057, "learning_rate": 1.999534396557492e-05, "loss": 1.3666, "step": 1265 }, { "epoch": 0.0692153137514864, "grad_norm": 1.5285011529922485, "learning_rate": 1.9995288042127396e-05, "loss": 1.4192, "step": 1266 }, { "epoch": 0.06926998619520797, "grad_norm": 2.1648106575012207, "learning_rate": 1.9995231784913753e-05, "loss": 1.2478, "step": 1267 }, { "epoch": 0.06932465863892952, "grad_norm": 1.7034767866134644, "learning_rate": 1.999517519393586e-05, "loss": 1.4412, "step": 1268 }, { "epoch": 0.06937933108265107, "grad_norm": 1.1360929012298584, "learning_rate": 1.999511826919562e-05, "loss": 1.4643, "step": 1269 }, { "epoch": 0.06943400352637262, "grad_norm": 1.754662275314331, "learning_rate": 1.9995061010694924e-05, "loss": 1.8088, "step": 1270 }, { "epoch": 0.06948867597009417, "grad_norm": 1.3045194149017334, "learning_rate": 1.9995003418435684e-05, "loss": 1.4511, "step": 1271 }, { "epoch": 0.06954334841381572, "grad_norm": 1.8725391626358032, "learning_rate": 1.999494549241983e-05, "loss": 1.4886, "step": 1272 }, { "epoch": 0.06959802085753727, "grad_norm": 1.5354368686676025, "learning_rate": 1.999488723264929e-05, "loss": 1.4867, "step": 1273 }, { "epoch": 0.06965269330125884, "grad_norm": 1.611694097518921, "learning_rate": 1.999482863912601e-05, "loss": 1.5223, "step": 1274 }, { "epoch": 0.06970736574498039, "grad_norm": 1.1090874671936035, "learning_rate": 1.9994769711851953e-05, "loss": 1.6099, "step": 1275 }, { "epoch": 0.06976203818870194, "grad_norm": 1.6562196016311646, "learning_rate": 1.9994710450829076e-05, "loss": 1.5536, "step": 1276 }, { "epoch": 0.06981671063242349, "grad_norm": 1.7162567377090454, "learning_rate": 1.9994650856059364e-05, "loss": 1.3761, "step": 1277 }, { "epoch": 0.06987138307614504, "grad_norm": 1.2904319763183594, "learning_rate": 1.999459092754481e-05, "loss": 1.4462, "step": 1278 }, { "epoch": 0.0699260555198666, "grad_norm": 1.225374698638916, "learning_rate": 1.9994530665287414e-05, "loss": 1.4498, "step": 1279 }, { "epoch": 0.06998072796358815, "grad_norm": 1.812130331993103, "learning_rate": 1.999447006928918e-05, "loss": 1.4612, "step": 1280 }, { "epoch": 0.07003540040730971, "grad_norm": 2.5331106185913086, "learning_rate": 1.999440913955214e-05, "loss": 1.5338, "step": 1281 }, { "epoch": 0.07009007285103126, "grad_norm": 1.6992679834365845, "learning_rate": 1.999434787607833e-05, "loss": 1.7407, "step": 1282 }, { "epoch": 0.07014474529475281, "grad_norm": 1.385827660560608, "learning_rate": 1.999428627886979e-05, "loss": 1.3802, "step": 1283 }, { "epoch": 0.07019941773847437, "grad_norm": 1.9675164222717285, "learning_rate": 1.9994224347928576e-05, "loss": 1.5821, "step": 1284 }, { "epoch": 0.07025409018219592, "grad_norm": 2.1022207736968994, "learning_rate": 1.999416208325676e-05, "loss": 1.6555, "step": 1285 }, { "epoch": 0.07030876262591747, "grad_norm": 1.5889188051223755, "learning_rate": 1.999409948485642e-05, "loss": 1.3933, "step": 1286 }, { "epoch": 0.07036343506963902, "grad_norm": 2.6343581676483154, "learning_rate": 1.999403655272965e-05, "loss": 1.3717, "step": 1287 }, { "epoch": 0.07041810751336058, "grad_norm": 2.453120708465576, "learning_rate": 1.9993973286878544e-05, "loss": 1.2787, "step": 1288 }, { "epoch": 0.07047277995708214, "grad_norm": 1.707032561302185, "learning_rate": 1.999390968730522e-05, "loss": 1.4427, "step": 1289 }, { "epoch": 0.07052745240080369, "grad_norm": 2.362342119216919, "learning_rate": 1.9993845754011797e-05, "loss": 1.7568, "step": 1290 }, { "epoch": 0.07058212484452524, "grad_norm": 1.6803033351898193, "learning_rate": 1.999378148700042e-05, "loss": 1.5074, "step": 1291 }, { "epoch": 0.07063679728824679, "grad_norm": 1.7727299928665161, "learning_rate": 1.999371688627322e-05, "loss": 1.4991, "step": 1292 }, { "epoch": 0.07069146973196834, "grad_norm": 1.473921537399292, "learning_rate": 1.9993651951832364e-05, "loss": 1.5112, "step": 1293 }, { "epoch": 0.07074614217568989, "grad_norm": 1.461706519126892, "learning_rate": 1.999358668368002e-05, "loss": 1.6926, "step": 1294 }, { "epoch": 0.07080081461941146, "grad_norm": 1.7974863052368164, "learning_rate": 1.9993521081818367e-05, "loss": 1.6708, "step": 1295 }, { "epoch": 0.07085548706313301, "grad_norm": 2.0094616413116455, "learning_rate": 1.9993455146249594e-05, "loss": 1.5105, "step": 1296 }, { "epoch": 0.07091015950685456, "grad_norm": 1.2530955076217651, "learning_rate": 1.9993388876975902e-05, "loss": 1.7769, "step": 1297 }, { "epoch": 0.07096483195057611, "grad_norm": 2.3296539783477783, "learning_rate": 1.9993322273999506e-05, "loss": 1.4894, "step": 1298 }, { "epoch": 0.07101950439429766, "grad_norm": 1.8532588481903076, "learning_rate": 1.999325533732263e-05, "loss": 1.6452, "step": 1299 }, { "epoch": 0.07107417683801921, "grad_norm": 2.460427761077881, "learning_rate": 1.999318806694751e-05, "loss": 1.7679, "step": 1300 }, { "epoch": 0.07112884928174078, "grad_norm": 1.2828619480133057, "learning_rate": 1.9993120462876385e-05, "loss": 1.7126, "step": 1301 }, { "epoch": 0.07118352172546233, "grad_norm": 1.5791027545928955, "learning_rate": 1.9993052525111522e-05, "loss": 1.4696, "step": 1302 }, { "epoch": 0.07123819416918388, "grad_norm": 1.44767427444458, "learning_rate": 1.9992984253655186e-05, "loss": 1.5712, "step": 1303 }, { "epoch": 0.07129286661290543, "grad_norm": 3.616734027862549, "learning_rate": 1.9992915648509655e-05, "loss": 1.377, "step": 1304 }, { "epoch": 0.07134753905662698, "grad_norm": 1.937209963798523, "learning_rate": 1.9992846709677222e-05, "loss": 1.4469, "step": 1305 }, { "epoch": 0.07140221150034853, "grad_norm": 1.6785064935684204, "learning_rate": 1.999277743716019e-05, "loss": 1.6507, "step": 1306 }, { "epoch": 0.07145688394407008, "grad_norm": 1.4206198453903198, "learning_rate": 1.9992707830960868e-05, "loss": 1.5272, "step": 1307 }, { "epoch": 0.07151155638779165, "grad_norm": 1.4981204271316528, "learning_rate": 1.9992637891081585e-05, "loss": 1.4017, "step": 1308 }, { "epoch": 0.0715662288315132, "grad_norm": 1.7444922924041748, "learning_rate": 1.999256761752467e-05, "loss": 1.3983, "step": 1309 }, { "epoch": 0.07162090127523475, "grad_norm": 1.449615716934204, "learning_rate": 1.999249701029248e-05, "loss": 1.7261, "step": 1310 }, { "epoch": 0.0716755737189563, "grad_norm": 2.3202834129333496, "learning_rate": 1.999242606938736e-05, "loss": 1.4484, "step": 1311 }, { "epoch": 0.07173024616267785, "grad_norm": 1.9680157899856567, "learning_rate": 1.999235479481169e-05, "loss": 1.2447, "step": 1312 }, { "epoch": 0.0717849186063994, "grad_norm": 1.8790236711502075, "learning_rate": 1.9992283186567848e-05, "loss": 1.6317, "step": 1313 }, { "epoch": 0.07183959105012096, "grad_norm": 1.913967490196228, "learning_rate": 1.9992211244658218e-05, "loss": 1.4692, "step": 1314 }, { "epoch": 0.07189426349384252, "grad_norm": 1.496690034866333, "learning_rate": 1.999213896908521e-05, "loss": 1.3845, "step": 1315 }, { "epoch": 0.07194893593756407, "grad_norm": 1.6363294124603271, "learning_rate": 1.9992066359851236e-05, "loss": 1.3991, "step": 1316 }, { "epoch": 0.07200360838128562, "grad_norm": 1.4245092868804932, "learning_rate": 1.9991993416958713e-05, "loss": 1.4807, "step": 1317 }, { "epoch": 0.07205828082500718, "grad_norm": 1.3856033086776733, "learning_rate": 1.999192014041009e-05, "loss": 1.524, "step": 1318 }, { "epoch": 0.07211295326872873, "grad_norm": 2.497117042541504, "learning_rate": 1.9991846530207798e-05, "loss": 1.4936, "step": 1319 }, { "epoch": 0.07216762571245028, "grad_norm": 1.8027387857437134, "learning_rate": 1.999177258635431e-05, "loss": 1.688, "step": 1320 }, { "epoch": 0.07222229815617183, "grad_norm": 2.7131106853485107, "learning_rate": 1.999169830885209e-05, "loss": 1.5162, "step": 1321 }, { "epoch": 0.0722769705998934, "grad_norm": 3.10786771774292, "learning_rate": 1.9991623697703613e-05, "loss": 1.6457, "step": 1322 }, { "epoch": 0.07233164304361495, "grad_norm": 3.0511820316314697, "learning_rate": 1.999154875291138e-05, "loss": 1.5205, "step": 1323 }, { "epoch": 0.0723863154873365, "grad_norm": 8.219062805175781, "learning_rate": 1.999147347447788e-05, "loss": 1.508, "step": 1324 }, { "epoch": 0.07244098793105805, "grad_norm": 8.490829467773438, "learning_rate": 1.9991397862405645e-05, "loss": 1.6168, "step": 1325 }, { "epoch": 0.0724956603747796, "grad_norm": 10.736400604248047, "learning_rate": 1.9991321916697182e-05, "loss": 1.304, "step": 1326 }, { "epoch": 0.07255033281850115, "grad_norm": 4.114467144012451, "learning_rate": 1.999124563735504e-05, "loss": 1.4126, "step": 1327 }, { "epoch": 0.0726050052622227, "grad_norm": 10.256958961486816, "learning_rate": 1.9991169024381756e-05, "loss": 1.7428, "step": 1328 }, { "epoch": 0.07265967770594427, "grad_norm": 4.760043621063232, "learning_rate": 1.9991092077779895e-05, "loss": 1.4954, "step": 1329 }, { "epoch": 0.07271435014966582, "grad_norm": 2.786720037460327, "learning_rate": 1.9991014797552027e-05, "loss": 1.5567, "step": 1330 }, { "epoch": 0.07276902259338737, "grad_norm": 2.566070556640625, "learning_rate": 1.9990937183700728e-05, "loss": 1.6235, "step": 1331 }, { "epoch": 0.07282369503710892, "grad_norm": 2.8351211547851562, "learning_rate": 1.999085923622859e-05, "loss": 1.5846, "step": 1332 }, { "epoch": 0.07287836748083047, "grad_norm": 1.5520023107528687, "learning_rate": 1.999078095513822e-05, "loss": 1.326, "step": 1333 }, { "epoch": 0.07293303992455202, "grad_norm": 1.9063211679458618, "learning_rate": 1.9990702340432232e-05, "loss": 1.6257, "step": 1334 }, { "epoch": 0.07298771236827357, "grad_norm": 1.625646710395813, "learning_rate": 1.999062339211325e-05, "loss": 1.5679, "step": 1335 }, { "epoch": 0.07304238481199514, "grad_norm": 1.7650436162948608, "learning_rate": 1.9990544110183907e-05, "loss": 1.4711, "step": 1336 }, { "epoch": 0.07309705725571669, "grad_norm": 2.451632499694824, "learning_rate": 1.999046449464685e-05, "loss": 1.4515, "step": 1337 }, { "epoch": 0.07315172969943824, "grad_norm": 1.4184114933013916, "learning_rate": 1.9990384545504743e-05, "loss": 1.3609, "step": 1338 }, { "epoch": 0.0732064021431598, "grad_norm": 1.8404932022094727, "learning_rate": 1.999030426276025e-05, "loss": 1.5497, "step": 1339 }, { "epoch": 0.07326107458688134, "grad_norm": 1.4780349731445312, "learning_rate": 1.999022364641606e-05, "loss": 1.395, "step": 1340 }, { "epoch": 0.0733157470306029, "grad_norm": 1.5440582036972046, "learning_rate": 1.9990142696474855e-05, "loss": 1.2588, "step": 1341 }, { "epoch": 0.07337041947432445, "grad_norm": 1.3378686904907227, "learning_rate": 1.9990061412939346e-05, "loss": 1.2617, "step": 1342 }, { "epoch": 0.07342509191804601, "grad_norm": 1.5411827564239502, "learning_rate": 1.998997979581224e-05, "loss": 1.1693, "step": 1343 }, { "epoch": 0.07347976436176756, "grad_norm": 2.2037034034729004, "learning_rate": 1.9989897845096272e-05, "loss": 1.6157, "step": 1344 }, { "epoch": 0.07353443680548911, "grad_norm": 1.628450632095337, "learning_rate": 1.998981556079417e-05, "loss": 1.491, "step": 1345 }, { "epoch": 0.07358910924921067, "grad_norm": 1.481205940246582, "learning_rate": 1.998973294290868e-05, "loss": 1.4002, "step": 1346 }, { "epoch": 0.07364378169293222, "grad_norm": 1.6178621053695679, "learning_rate": 1.998964999144257e-05, "loss": 1.7176, "step": 1347 }, { "epoch": 0.07369845413665377, "grad_norm": 2.3113863468170166, "learning_rate": 1.9989566706398606e-05, "loss": 1.4841, "step": 1348 }, { "epoch": 0.07375312658037532, "grad_norm": 1.8433367013931274, "learning_rate": 1.9989483087779565e-05, "loss": 1.7756, "step": 1349 }, { "epoch": 0.07380779902409688, "grad_norm": 1.6548599004745483, "learning_rate": 1.9989399135588246e-05, "loss": 1.5146, "step": 1350 }, { "epoch": 0.07386247146781844, "grad_norm": 2.5872066020965576, "learning_rate": 1.998931484982745e-05, "loss": 1.349, "step": 1351 }, { "epoch": 0.07391714391153999, "grad_norm": 2.0185892581939697, "learning_rate": 1.998923023049999e-05, "loss": 1.5186, "step": 1352 }, { "epoch": 0.07397181635526154, "grad_norm": 1.669389009475708, "learning_rate": 1.998914527760869e-05, "loss": 1.2299, "step": 1353 }, { "epoch": 0.07402648879898309, "grad_norm": 1.654268503189087, "learning_rate": 1.998905999115639e-05, "loss": 1.7672, "step": 1354 }, { "epoch": 0.07408116124270464, "grad_norm": 1.4807078838348389, "learning_rate": 1.9988974371145934e-05, "loss": 1.3438, "step": 1355 }, { "epoch": 0.0741358336864262, "grad_norm": 2.5296573638916016, "learning_rate": 1.9988888417580187e-05, "loss": 1.475, "step": 1356 }, { "epoch": 0.07419050613014776, "grad_norm": 3.7919695377349854, "learning_rate": 1.9988802130462017e-05, "loss": 1.7345, "step": 1357 }, { "epoch": 0.07424517857386931, "grad_norm": 1.2177444696426392, "learning_rate": 1.99887155097943e-05, "loss": 1.8177, "step": 1358 }, { "epoch": 0.07429985101759086, "grad_norm": 2.409872055053711, "learning_rate": 1.9988628555579935e-05, "loss": 1.6485, "step": 1359 }, { "epoch": 0.07435452346131241, "grad_norm": 1.2524796724319458, "learning_rate": 1.9988541267821825e-05, "loss": 1.8099, "step": 1360 }, { "epoch": 0.07440919590503396, "grad_norm": 1.776027798652649, "learning_rate": 1.9988453646522883e-05, "loss": 1.5636, "step": 1361 }, { "epoch": 0.07446386834875551, "grad_norm": 2.1976401805877686, "learning_rate": 1.9988365691686035e-05, "loss": 1.7178, "step": 1362 }, { "epoch": 0.07451854079247708, "grad_norm": 1.5729776620864868, "learning_rate": 1.9988277403314216e-05, "loss": 1.573, "step": 1363 }, { "epoch": 0.07457321323619863, "grad_norm": 2.7031311988830566, "learning_rate": 1.9988188781410377e-05, "loss": 1.4396, "step": 1364 }, { "epoch": 0.07462788567992018, "grad_norm": 1.892219066619873, "learning_rate": 1.9988099825977477e-05, "loss": 1.6087, "step": 1365 }, { "epoch": 0.07468255812364173, "grad_norm": 1.5407981872558594, "learning_rate": 1.998801053701849e-05, "loss": 1.4254, "step": 1366 }, { "epoch": 0.07473723056736328, "grad_norm": 1.48847496509552, "learning_rate": 1.998792091453639e-05, "loss": 1.4389, "step": 1367 }, { "epoch": 0.07479190301108483, "grad_norm": 1.3969628810882568, "learning_rate": 1.998783095853417e-05, "loss": 1.6054, "step": 1368 }, { "epoch": 0.07484657545480639, "grad_norm": 1.3891551494598389, "learning_rate": 1.9987740669014843e-05, "loss": 1.7408, "step": 1369 }, { "epoch": 0.07490124789852795, "grad_norm": 1.7329305410385132, "learning_rate": 1.9987650045981412e-05, "loss": 1.293, "step": 1370 }, { "epoch": 0.0749559203422495, "grad_norm": 1.7246800661087036, "learning_rate": 1.9987559089436917e-05, "loss": 1.3104, "step": 1371 }, { "epoch": 0.07501059278597105, "grad_norm": 1.408929467201233, "learning_rate": 1.998746779938438e-05, "loss": 1.5045, "step": 1372 }, { "epoch": 0.0750652652296926, "grad_norm": 1.4923949241638184, "learning_rate": 1.9987376175826864e-05, "loss": 1.6269, "step": 1373 }, { "epoch": 0.07511993767341416, "grad_norm": 1.5807560682296753, "learning_rate": 1.9987284218767415e-05, "loss": 1.5121, "step": 1374 }, { "epoch": 0.0751746101171357, "grad_norm": 1.5410770177841187, "learning_rate": 1.998719192820911e-05, "loss": 1.3621, "step": 1375 }, { "epoch": 0.07522928256085726, "grad_norm": 1.4318801164627075, "learning_rate": 1.9987099304155035e-05, "loss": 1.6232, "step": 1376 }, { "epoch": 0.07528395500457882, "grad_norm": 2.605900287628174, "learning_rate": 1.9987006346608274e-05, "loss": 1.5188, "step": 1377 }, { "epoch": 0.07533862744830037, "grad_norm": 1.037577748298645, "learning_rate": 1.998691305557194e-05, "loss": 1.4272, "step": 1378 }, { "epoch": 0.07539329989202193, "grad_norm": 1.4985905885696411, "learning_rate": 1.9986819431049146e-05, "loss": 1.5811, "step": 1379 }, { "epoch": 0.07544797233574348, "grad_norm": 1.8024965524673462, "learning_rate": 1.9986725473043013e-05, "loss": 1.5863, "step": 1380 }, { "epoch": 0.07550264477946503, "grad_norm": 1.3656904697418213, "learning_rate": 1.998663118155668e-05, "loss": 1.422, "step": 1381 }, { "epoch": 0.07555731722318658, "grad_norm": 1.682215929031372, "learning_rate": 1.9986536556593303e-05, "loss": 1.8549, "step": 1382 }, { "epoch": 0.07561198966690813, "grad_norm": 1.420535683631897, "learning_rate": 1.998644159815603e-05, "loss": 1.3695, "step": 1383 }, { "epoch": 0.0756666621106297, "grad_norm": 1.8872206211090088, "learning_rate": 1.998634630624804e-05, "loss": 1.6343, "step": 1384 }, { "epoch": 0.07572133455435125, "grad_norm": 1.3436874151229858, "learning_rate": 1.9986250680872515e-05, "loss": 1.4154, "step": 1385 }, { "epoch": 0.0757760069980728, "grad_norm": 1.7095215320587158, "learning_rate": 1.9986154722032646e-05, "loss": 1.6424, "step": 1386 }, { "epoch": 0.07583067944179435, "grad_norm": 2.0050108432769775, "learning_rate": 1.998605842973164e-05, "loss": 1.3326, "step": 1387 }, { "epoch": 0.0758853518855159, "grad_norm": 1.7463544607162476, "learning_rate": 1.9985961803972704e-05, "loss": 1.446, "step": 1388 }, { "epoch": 0.07594002432923745, "grad_norm": 1.3763561248779297, "learning_rate": 1.9985864844759073e-05, "loss": 1.5566, "step": 1389 }, { "epoch": 0.075994696772959, "grad_norm": 1.3255140781402588, "learning_rate": 1.9985767552093982e-05, "loss": 1.2478, "step": 1390 }, { "epoch": 0.07604936921668057, "grad_norm": 1.690529704093933, "learning_rate": 1.9985669925980683e-05, "loss": 1.4169, "step": 1391 }, { "epoch": 0.07610404166040212, "grad_norm": 1.2559468746185303, "learning_rate": 1.998557196642243e-05, "loss": 1.5187, "step": 1392 }, { "epoch": 0.07615871410412367, "grad_norm": 2.0146796703338623, "learning_rate": 1.99854736734225e-05, "loss": 1.429, "step": 1393 }, { "epoch": 0.07621338654784522, "grad_norm": 1.825858473777771, "learning_rate": 1.9985375046984167e-05, "loss": 1.6052, "step": 1394 }, { "epoch": 0.07626805899156677, "grad_norm": 1.9746991395950317, "learning_rate": 1.9985276087110733e-05, "loss": 1.5379, "step": 1395 }, { "epoch": 0.07632273143528832, "grad_norm": 1.9421709775924683, "learning_rate": 1.9985176793805503e-05, "loss": 1.3339, "step": 1396 }, { "epoch": 0.07637740387900988, "grad_norm": 1.6386035680770874, "learning_rate": 1.9985077167071784e-05, "loss": 1.4786, "step": 1397 }, { "epoch": 0.07643207632273144, "grad_norm": 1.929524302482605, "learning_rate": 1.9984977206912906e-05, "loss": 1.4553, "step": 1398 }, { "epoch": 0.07648674876645299, "grad_norm": 1.598169207572937, "learning_rate": 1.9984876913332215e-05, "loss": 1.4903, "step": 1399 }, { "epoch": 0.07654142121017454, "grad_norm": 1.3296905755996704, "learning_rate": 1.998477628633305e-05, "loss": 1.3372, "step": 1400 }, { "epoch": 0.0765960936538961, "grad_norm": 1.9978182315826416, "learning_rate": 1.9984675325918776e-05, "loss": 1.4798, "step": 1401 }, { "epoch": 0.07665076609761765, "grad_norm": 2.00881290435791, "learning_rate": 1.998457403209276e-05, "loss": 1.3763, "step": 1402 }, { "epoch": 0.0767054385413392, "grad_norm": 1.8332037925720215, "learning_rate": 1.998447240485839e-05, "loss": 1.4548, "step": 1403 }, { "epoch": 0.07676011098506076, "grad_norm": 6.64378547668457, "learning_rate": 1.998437044421906e-05, "loss": 1.4676, "step": 1404 }, { "epoch": 0.07681478342878231, "grad_norm": 1.9372215270996094, "learning_rate": 1.998426815017817e-05, "loss": 1.7017, "step": 1405 }, { "epoch": 0.07686945587250386, "grad_norm": 1.6271828413009644, "learning_rate": 1.9984165522739135e-05, "loss": 1.413, "step": 1406 }, { "epoch": 0.07692412831622542, "grad_norm": 2.633471965789795, "learning_rate": 1.9984062561905384e-05, "loss": 1.6218, "step": 1407 }, { "epoch": 0.07697880075994697, "grad_norm": 4.0390729904174805, "learning_rate": 1.9983959267680357e-05, "loss": 1.4804, "step": 1408 }, { "epoch": 0.07703347320366852, "grad_norm": 2.538224697113037, "learning_rate": 1.9983855640067505e-05, "loss": 1.3718, "step": 1409 }, { "epoch": 0.07708814564739007, "grad_norm": 1.7134323120117188, "learning_rate": 1.9983751679070284e-05, "loss": 1.5451, "step": 1410 }, { "epoch": 0.07714281809111163, "grad_norm": 1.6478416919708252, "learning_rate": 1.9983647384692164e-05, "loss": 1.6135, "step": 1411 }, { "epoch": 0.07719749053483319, "grad_norm": 1.471253752708435, "learning_rate": 1.9983542756936632e-05, "loss": 1.4638, "step": 1412 }, { "epoch": 0.07725216297855474, "grad_norm": 1.9267027378082275, "learning_rate": 1.998343779580718e-05, "loss": 1.3527, "step": 1413 }, { "epoch": 0.07730683542227629, "grad_norm": 1.4253953695297241, "learning_rate": 1.9983332501307315e-05, "loss": 1.7389, "step": 1414 }, { "epoch": 0.07736150786599784, "grad_norm": 2.2822985649108887, "learning_rate": 1.9983226873440548e-05, "loss": 1.3609, "step": 1415 }, { "epoch": 0.07741618030971939, "grad_norm": 1.8252607583999634, "learning_rate": 1.998312091221041e-05, "loss": 1.4761, "step": 1416 }, { "epoch": 0.07747085275344094, "grad_norm": 1.7507389783859253, "learning_rate": 1.9983014617620443e-05, "loss": 1.4946, "step": 1417 }, { "epoch": 0.0775255251971625, "grad_norm": 1.4540945291519165, "learning_rate": 1.9982907989674187e-05, "loss": 1.4818, "step": 1418 }, { "epoch": 0.07758019764088406, "grad_norm": 1.7392182350158691, "learning_rate": 1.9982801028375208e-05, "loss": 1.2957, "step": 1419 }, { "epoch": 0.07763487008460561, "grad_norm": 5.805693626403809, "learning_rate": 1.998269373372708e-05, "loss": 1.554, "step": 1420 }, { "epoch": 0.07768954252832716, "grad_norm": 1.5729652643203735, "learning_rate": 1.998258610573338e-05, "loss": 1.678, "step": 1421 }, { "epoch": 0.07774421497204871, "grad_norm": 1.7601641416549683, "learning_rate": 1.998247814439771e-05, "loss": 1.4117, "step": 1422 }, { "epoch": 0.07779888741577026, "grad_norm": 1.6161386966705322, "learning_rate": 1.9982369849723665e-05, "loss": 1.2913, "step": 1423 }, { "epoch": 0.07785355985949181, "grad_norm": 1.230092167854309, "learning_rate": 1.998226122171487e-05, "loss": 1.4638, "step": 1424 }, { "epoch": 0.07790823230321338, "grad_norm": 1.9125518798828125, "learning_rate": 1.998215226037495e-05, "loss": 1.6061, "step": 1425 }, { "epoch": 0.07796290474693493, "grad_norm": 2.005253553390503, "learning_rate": 1.9982042965707536e-05, "loss": 1.6641, "step": 1426 }, { "epoch": 0.07801757719065648, "grad_norm": 2.650190591812134, "learning_rate": 1.9981933337716288e-05, "loss": 1.4127, "step": 1427 }, { "epoch": 0.07807224963437803, "grad_norm": 2.0720410346984863, "learning_rate": 1.998182337640486e-05, "loss": 1.6017, "step": 1428 }, { "epoch": 0.07812692207809958, "grad_norm": 1.4466255903244019, "learning_rate": 1.998171308177693e-05, "loss": 1.7243, "step": 1429 }, { "epoch": 0.07818159452182114, "grad_norm": 1.8040194511413574, "learning_rate": 1.9981602453836175e-05, "loss": 1.6563, "step": 1430 }, { "epoch": 0.07823626696554269, "grad_norm": 1.907720923423767, "learning_rate": 1.998149149258629e-05, "loss": 1.4204, "step": 1431 }, { "epoch": 0.07829093940926425, "grad_norm": 1.5513297319412231, "learning_rate": 1.9981380198030984e-05, "loss": 1.4186, "step": 1432 }, { "epoch": 0.0783456118529858, "grad_norm": 1.6341168880462646, "learning_rate": 1.9981268570173968e-05, "loss": 1.1633, "step": 1433 }, { "epoch": 0.07840028429670735, "grad_norm": 1.5265246629714966, "learning_rate": 1.9981156609018977e-05, "loss": 1.4155, "step": 1434 }, { "epoch": 0.0784549567404289, "grad_norm": 1.7495880126953125, "learning_rate": 1.9981044314569745e-05, "loss": 1.2587, "step": 1435 }, { "epoch": 0.07850962918415046, "grad_norm": 1.8103597164154053, "learning_rate": 1.998093168683002e-05, "loss": 1.4918, "step": 1436 }, { "epoch": 0.07856430162787201, "grad_norm": 1.543670415878296, "learning_rate": 1.9980818725803565e-05, "loss": 1.3698, "step": 1437 }, { "epoch": 0.07861897407159356, "grad_norm": 2.0961549282073975, "learning_rate": 1.9980705431494152e-05, "loss": 1.4696, "step": 1438 }, { "epoch": 0.07867364651531512, "grad_norm": 2.398286819458008, "learning_rate": 1.9980591803905565e-05, "loss": 1.4688, "step": 1439 }, { "epoch": 0.07872831895903667, "grad_norm": 2.236544132232666, "learning_rate": 1.9980477843041596e-05, "loss": 1.5612, "step": 1440 }, { "epoch": 0.07878299140275823, "grad_norm": 1.8523964881896973, "learning_rate": 1.9980363548906056e-05, "loss": 1.6397, "step": 1441 }, { "epoch": 0.07883766384647978, "grad_norm": 1.8978562355041504, "learning_rate": 1.9980248921502753e-05, "loss": 1.2926, "step": 1442 }, { "epoch": 0.07889233629020133, "grad_norm": 1.8671815395355225, "learning_rate": 1.9980133960835522e-05, "loss": 1.1897, "step": 1443 }, { "epoch": 0.07894700873392288, "grad_norm": 1.2835829257965088, "learning_rate": 1.9980018666908197e-05, "loss": 1.608, "step": 1444 }, { "epoch": 0.07900168117764443, "grad_norm": 1.3700851202011108, "learning_rate": 1.9979903039724632e-05, "loss": 1.6253, "step": 1445 }, { "epoch": 0.079056353621366, "grad_norm": 1.8579756021499634, "learning_rate": 1.9979787079288683e-05, "loss": 1.3086, "step": 1446 }, { "epoch": 0.07911102606508755, "grad_norm": 1.6280509233474731, "learning_rate": 1.997967078560423e-05, "loss": 1.3291, "step": 1447 }, { "epoch": 0.0791656985088091, "grad_norm": 1.5629361867904663, "learning_rate": 1.9979554158675145e-05, "loss": 1.6032, "step": 1448 }, { "epoch": 0.07922037095253065, "grad_norm": 1.280612587928772, "learning_rate": 1.997943719850533e-05, "loss": 1.5317, "step": 1449 }, { "epoch": 0.0792750433962522, "grad_norm": 1.3023180961608887, "learning_rate": 1.9979319905098695e-05, "loss": 1.6232, "step": 1450 }, { "epoch": 0.07932971583997375, "grad_norm": 1.654685616493225, "learning_rate": 1.9979202278459143e-05, "loss": 1.4586, "step": 1451 }, { "epoch": 0.0793843882836953, "grad_norm": 2.1563854217529297, "learning_rate": 1.997908431859062e-05, "loss": 1.559, "step": 1452 }, { "epoch": 0.07943906072741687, "grad_norm": 1.7232205867767334, "learning_rate": 1.9978966025497047e-05, "loss": 1.4012, "step": 1453 }, { "epoch": 0.07949373317113842, "grad_norm": 2.0666470527648926, "learning_rate": 1.9978847399182384e-05, "loss": 1.4124, "step": 1454 }, { "epoch": 0.07954840561485997, "grad_norm": 1.6757659912109375, "learning_rate": 1.997872843965059e-05, "loss": 1.2239, "step": 1455 }, { "epoch": 0.07960307805858152, "grad_norm": 1.925733208656311, "learning_rate": 1.997860914690564e-05, "loss": 1.4448, "step": 1456 }, { "epoch": 0.07965775050230307, "grad_norm": 1.2891247272491455, "learning_rate": 1.9978489520951512e-05, "loss": 1.5153, "step": 1457 }, { "epoch": 0.07971242294602462, "grad_norm": 1.5377659797668457, "learning_rate": 1.9978369561792207e-05, "loss": 1.6777, "step": 1458 }, { "epoch": 0.07976709538974619, "grad_norm": 1.781521201133728, "learning_rate": 1.9978249269431723e-05, "loss": 1.7117, "step": 1459 }, { "epoch": 0.07982176783346774, "grad_norm": 1.6780054569244385, "learning_rate": 1.9978128643874085e-05, "loss": 1.6598, "step": 1460 }, { "epoch": 0.07987644027718929, "grad_norm": 1.5761616230010986, "learning_rate": 1.9978007685123314e-05, "loss": 1.5437, "step": 1461 }, { "epoch": 0.07993111272091084, "grad_norm": 1.4109890460968018, "learning_rate": 1.9977886393183454e-05, "loss": 1.5493, "step": 1462 }, { "epoch": 0.0799857851646324, "grad_norm": 1.5754703283309937, "learning_rate": 1.9977764768058555e-05, "loss": 1.446, "step": 1463 }, { "epoch": 0.08004045760835395, "grad_norm": 1.4216899871826172, "learning_rate": 1.9977642809752674e-05, "loss": 1.6933, "step": 1464 }, { "epoch": 0.0800951300520755, "grad_norm": 1.77826988697052, "learning_rate": 1.997752051826989e-05, "loss": 1.4376, "step": 1465 }, { "epoch": 0.08014980249579706, "grad_norm": 1.7030558586120605, "learning_rate": 1.9977397893614282e-05, "loss": 1.5961, "step": 1466 }, { "epoch": 0.08020447493951861, "grad_norm": 1.4721531867980957, "learning_rate": 1.997727493578994e-05, "loss": 1.2777, "step": 1467 }, { "epoch": 0.08025914738324016, "grad_norm": 1.5252325534820557, "learning_rate": 1.997715164480098e-05, "loss": 1.4092, "step": 1468 }, { "epoch": 0.08031381982696172, "grad_norm": 1.8038346767425537, "learning_rate": 1.9977028020651516e-05, "loss": 1.437, "step": 1469 }, { "epoch": 0.08036849227068327, "grad_norm": 1.5198851823806763, "learning_rate": 1.9976904063345673e-05, "loss": 1.398, "step": 1470 }, { "epoch": 0.08042316471440482, "grad_norm": 1.9531399011611938, "learning_rate": 1.997677977288759e-05, "loss": 1.4821, "step": 1471 }, { "epoch": 0.08047783715812637, "grad_norm": 1.6604009866714478, "learning_rate": 1.997665514928142e-05, "loss": 1.668, "step": 1472 }, { "epoch": 0.08053250960184793, "grad_norm": 2.00771164894104, "learning_rate": 1.9976530192531327e-05, "loss": 1.4846, "step": 1473 }, { "epoch": 0.08058718204556949, "grad_norm": 1.4158942699432373, "learning_rate": 1.997640490264148e-05, "loss": 1.2931, "step": 1474 }, { "epoch": 0.08064185448929104, "grad_norm": 1.473394513130188, "learning_rate": 1.997627927961606e-05, "loss": 1.3294, "step": 1475 }, { "epoch": 0.08069652693301259, "grad_norm": 1.3651061058044434, "learning_rate": 1.9976153323459262e-05, "loss": 1.5626, "step": 1476 }, { "epoch": 0.08075119937673414, "grad_norm": 2.089020013809204, "learning_rate": 1.99760270341753e-05, "loss": 1.1876, "step": 1477 }, { "epoch": 0.08080587182045569, "grad_norm": 1.419188141822815, "learning_rate": 1.9975900411768384e-05, "loss": 1.2222, "step": 1478 }, { "epoch": 0.08086054426417724, "grad_norm": 1.2078019380569458, "learning_rate": 1.9975773456242742e-05, "loss": 1.3548, "step": 1479 }, { "epoch": 0.08091521670789881, "grad_norm": 1.5013147592544556, "learning_rate": 1.9975646167602617e-05, "loss": 1.5219, "step": 1480 }, { "epoch": 0.08096988915162036, "grad_norm": 1.2686749696731567, "learning_rate": 1.9975518545852258e-05, "loss": 1.2763, "step": 1481 }, { "epoch": 0.08102456159534191, "grad_norm": 1.9870796203613281, "learning_rate": 1.9975390590995925e-05, "loss": 1.766, "step": 1482 }, { "epoch": 0.08107923403906346, "grad_norm": 1.694146752357483, "learning_rate": 1.9975262303037896e-05, "loss": 1.4553, "step": 1483 }, { "epoch": 0.08113390648278501, "grad_norm": 1.787434697151184, "learning_rate": 1.997513368198245e-05, "loss": 1.77, "step": 1484 }, { "epoch": 0.08118857892650656, "grad_norm": 2.0114173889160156, "learning_rate": 1.9975004727833882e-05, "loss": 1.3505, "step": 1485 }, { "epoch": 0.08124325137022811, "grad_norm": 1.4985202550888062, "learning_rate": 1.9974875440596496e-05, "loss": 1.4287, "step": 1486 }, { "epoch": 0.08129792381394968, "grad_norm": 1.4090726375579834, "learning_rate": 1.9974745820274618e-05, "loss": 1.259, "step": 1487 }, { "epoch": 0.08135259625767123, "grad_norm": 1.2159284353256226, "learning_rate": 1.9974615866872567e-05, "loss": 1.7018, "step": 1488 }, { "epoch": 0.08140726870139278, "grad_norm": 1.8953441381454468, "learning_rate": 1.9974485580394686e-05, "loss": 1.5643, "step": 1489 }, { "epoch": 0.08146194114511433, "grad_norm": 1.4673774242401123, "learning_rate": 1.9974354960845326e-05, "loss": 1.5258, "step": 1490 }, { "epoch": 0.08151661358883588, "grad_norm": 1.4768400192260742, "learning_rate": 1.997422400822885e-05, "loss": 1.5133, "step": 1491 }, { "epoch": 0.08157128603255744, "grad_norm": 1.8791747093200684, "learning_rate": 1.9974092722549628e-05, "loss": 1.779, "step": 1492 }, { "epoch": 0.08162595847627899, "grad_norm": 1.4112486839294434, "learning_rate": 1.9973961103812045e-05, "loss": 1.5407, "step": 1493 }, { "epoch": 0.08168063092000055, "grad_norm": 1.3483200073242188, "learning_rate": 1.9973829152020494e-05, "loss": 1.8465, "step": 1494 }, { "epoch": 0.0817353033637221, "grad_norm": 1.4820988178253174, "learning_rate": 1.9973696867179386e-05, "loss": 1.5222, "step": 1495 }, { "epoch": 0.08178997580744365, "grad_norm": 1.544813632965088, "learning_rate": 1.9973564249293136e-05, "loss": 1.6477, "step": 1496 }, { "epoch": 0.0818446482511652, "grad_norm": 1.9631234407424927, "learning_rate": 1.997343129836617e-05, "loss": 1.5386, "step": 1497 }, { "epoch": 0.08189932069488676, "grad_norm": 1.3450689315795898, "learning_rate": 1.997329801440293e-05, "loss": 1.5246, "step": 1498 }, { "epoch": 0.08195399313860831, "grad_norm": 1.687596082687378, "learning_rate": 1.9973164397407868e-05, "loss": 1.4183, "step": 1499 }, { "epoch": 0.08200866558232986, "grad_norm": 1.5008660554885864, "learning_rate": 1.997303044738544e-05, "loss": 1.3504, "step": 1500 }, { "epoch": 0.08206333802605142, "grad_norm": 2.093146324157715, "learning_rate": 1.997289616434013e-05, "loss": 1.4217, "step": 1501 }, { "epoch": 0.08211801046977298, "grad_norm": 1.4715228080749512, "learning_rate": 1.9972761548276407e-05, "loss": 1.726, "step": 1502 }, { "epoch": 0.08217268291349453, "grad_norm": 1.6056803464889526, "learning_rate": 1.9972626599198778e-05, "loss": 1.5225, "step": 1503 }, { "epoch": 0.08222735535721608, "grad_norm": 1.4428459405899048, "learning_rate": 1.9972491317111745e-05, "loss": 1.7938, "step": 1504 }, { "epoch": 0.08228202780093763, "grad_norm": 2.6537821292877197, "learning_rate": 1.9972355702019825e-05, "loss": 1.5586, "step": 1505 }, { "epoch": 0.08233670024465918, "grad_norm": 1.9924110174179077, "learning_rate": 1.997221975392755e-05, "loss": 1.5791, "step": 1506 }, { "epoch": 0.08239137268838075, "grad_norm": 1.3632066249847412, "learning_rate": 1.9972083472839454e-05, "loss": 1.5773, "step": 1507 }, { "epoch": 0.0824460451321023, "grad_norm": 2.0876715183258057, "learning_rate": 1.9971946858760088e-05, "loss": 1.2933, "step": 1508 }, { "epoch": 0.08250071757582385, "grad_norm": 1.7796317338943481, "learning_rate": 1.997180991169402e-05, "loss": 1.4745, "step": 1509 }, { "epoch": 0.0825553900195454, "grad_norm": 1.936639666557312, "learning_rate": 1.997167263164582e-05, "loss": 1.6689, "step": 1510 }, { "epoch": 0.08261006246326695, "grad_norm": 1.6990995407104492, "learning_rate": 1.997153501862007e-05, "loss": 1.5967, "step": 1511 }, { "epoch": 0.0826647349069885, "grad_norm": 1.6919329166412354, "learning_rate": 1.997139707262137e-05, "loss": 1.4812, "step": 1512 }, { "epoch": 0.08271940735071005, "grad_norm": 1.6395832300186157, "learning_rate": 1.997125879365432e-05, "loss": 1.716, "step": 1513 }, { "epoch": 0.08277407979443162, "grad_norm": 1.5662442445755005, "learning_rate": 1.9971120181723544e-05, "loss": 1.4958, "step": 1514 }, { "epoch": 0.08282875223815317, "grad_norm": 1.7120862007141113, "learning_rate": 1.997098123683366e-05, "loss": 1.4152, "step": 1515 }, { "epoch": 0.08288342468187472, "grad_norm": 1.7982224225997925, "learning_rate": 1.997084195898932e-05, "loss": 0.9693, "step": 1516 }, { "epoch": 0.08293809712559627, "grad_norm": 1.3114209175109863, "learning_rate": 1.9970702348195174e-05, "loss": 1.5138, "step": 1517 }, { "epoch": 0.08299276956931782, "grad_norm": 1.5058084726333618, "learning_rate": 1.9970562404455872e-05, "loss": 1.5267, "step": 1518 }, { "epoch": 0.08304744201303937, "grad_norm": 1.4071682691574097, "learning_rate": 1.9970422127776097e-05, "loss": 1.3183, "step": 1519 }, { "epoch": 0.08310211445676093, "grad_norm": 1.3418750762939453, "learning_rate": 1.997028151816053e-05, "loss": 1.2909, "step": 1520 }, { "epoch": 0.08315678690048249, "grad_norm": 1.575228214263916, "learning_rate": 1.997014057561387e-05, "loss": 1.4638, "step": 1521 }, { "epoch": 0.08321145934420404, "grad_norm": 1.5814039707183838, "learning_rate": 1.996999930014082e-05, "loss": 1.407, "step": 1522 }, { "epoch": 0.08326613178792559, "grad_norm": 1.5444762706756592, "learning_rate": 1.9969857691746095e-05, "loss": 1.5124, "step": 1523 }, { "epoch": 0.08332080423164714, "grad_norm": 1.6096080541610718, "learning_rate": 1.9969715750434427e-05, "loss": 1.4798, "step": 1524 }, { "epoch": 0.0833754766753687, "grad_norm": 1.6662013530731201, "learning_rate": 1.9969573476210558e-05, "loss": 1.4393, "step": 1525 }, { "epoch": 0.08343014911909025, "grad_norm": 1.3213207721710205, "learning_rate": 1.9969430869079237e-05, "loss": 1.5255, "step": 1526 }, { "epoch": 0.0834848215628118, "grad_norm": 1.5062114000320435, "learning_rate": 1.996928792904522e-05, "loss": 1.4431, "step": 1527 }, { "epoch": 0.08353949400653336, "grad_norm": 1.6940430402755737, "learning_rate": 1.996914465611329e-05, "loss": 1.3993, "step": 1528 }, { "epoch": 0.08359416645025491, "grad_norm": 1.4416582584381104, "learning_rate": 1.996900105028823e-05, "loss": 1.5564, "step": 1529 }, { "epoch": 0.08364883889397647, "grad_norm": 1.185084581375122, "learning_rate": 1.9968857111574826e-05, "loss": 1.6539, "step": 1530 }, { "epoch": 0.08370351133769802, "grad_norm": 1.632954478263855, "learning_rate": 1.996871283997789e-05, "loss": 1.2977, "step": 1531 }, { "epoch": 0.08375818378141957, "grad_norm": 1.1850625276565552, "learning_rate": 1.996856823550224e-05, "loss": 1.4306, "step": 1532 }, { "epoch": 0.08381285622514112, "grad_norm": 1.504447340965271, "learning_rate": 1.9968423298152707e-05, "loss": 1.3907, "step": 1533 }, { "epoch": 0.08386752866886267, "grad_norm": 1.6489825248718262, "learning_rate": 1.996827802793413e-05, "loss": 1.3258, "step": 1534 }, { "epoch": 0.08392220111258424, "grad_norm": 1.354317545890808, "learning_rate": 1.9968132424851357e-05, "loss": 1.5568, "step": 1535 }, { "epoch": 0.08397687355630579, "grad_norm": 2.1511728763580322, "learning_rate": 1.9967986488909248e-05, "loss": 1.4825, "step": 1536 }, { "epoch": 0.08403154600002734, "grad_norm": 1.471655249595642, "learning_rate": 1.9967840220112684e-05, "loss": 1.6402, "step": 1537 }, { "epoch": 0.08408621844374889, "grad_norm": 1.8672727346420288, "learning_rate": 1.996769361846654e-05, "loss": 1.3613, "step": 1538 }, { "epoch": 0.08414089088747044, "grad_norm": 1.5826618671417236, "learning_rate": 1.996754668397572e-05, "loss": 1.4988, "step": 1539 }, { "epoch": 0.08419556333119199, "grad_norm": 1.8594084978103638, "learning_rate": 1.9967399416645124e-05, "loss": 1.4304, "step": 1540 }, { "epoch": 0.08425023577491354, "grad_norm": 1.6618906259536743, "learning_rate": 1.9967251816479676e-05, "loss": 1.5961, "step": 1541 }, { "epoch": 0.08430490821863511, "grad_norm": 1.503785490989685, "learning_rate": 1.9967103883484297e-05, "loss": 1.5557, "step": 1542 }, { "epoch": 0.08435958066235666, "grad_norm": 1.681881308555603, "learning_rate": 1.9966955617663934e-05, "loss": 1.5301, "step": 1543 }, { "epoch": 0.08441425310607821, "grad_norm": 2.1508655548095703, "learning_rate": 1.9966807019023532e-05, "loss": 1.6099, "step": 1544 }, { "epoch": 0.08446892554979976, "grad_norm": 2.3577733039855957, "learning_rate": 1.9966658087568057e-05, "loss": 1.1491, "step": 1545 }, { "epoch": 0.08452359799352131, "grad_norm": 1.4733428955078125, "learning_rate": 1.9966508823302484e-05, "loss": 1.4551, "step": 1546 }, { "epoch": 0.08457827043724286, "grad_norm": 1.412211537361145, "learning_rate": 1.996635922623179e-05, "loss": 1.5408, "step": 1547 }, { "epoch": 0.08463294288096442, "grad_norm": 1.8616459369659424, "learning_rate": 1.9966209296360975e-05, "loss": 1.6042, "step": 1548 }, { "epoch": 0.08468761532468598, "grad_norm": 1.7939114570617676, "learning_rate": 1.9966059033695048e-05, "loss": 1.3848, "step": 1549 }, { "epoch": 0.08474228776840753, "grad_norm": 1.9412379264831543, "learning_rate": 1.9965908438239024e-05, "loss": 1.555, "step": 1550 }, { "epoch": 0.08479696021212908, "grad_norm": 1.614904522895813, "learning_rate": 1.9965757509997928e-05, "loss": 1.5722, "step": 1551 }, { "epoch": 0.08485163265585063, "grad_norm": 1.8180571794509888, "learning_rate": 1.9965606248976804e-05, "loss": 1.4595, "step": 1552 }, { "epoch": 0.08490630509957219, "grad_norm": 1.686018943786621, "learning_rate": 1.9965454655180704e-05, "loss": 1.4528, "step": 1553 }, { "epoch": 0.08496097754329374, "grad_norm": 1.1450626850128174, "learning_rate": 1.9965302728614687e-05, "loss": 1.5708, "step": 1554 }, { "epoch": 0.0850156499870153, "grad_norm": 1.5229878425598145, "learning_rate": 1.996515046928383e-05, "loss": 1.3151, "step": 1555 }, { "epoch": 0.08507032243073685, "grad_norm": 1.4375224113464355, "learning_rate": 1.996499787719321e-05, "loss": 1.5918, "step": 1556 }, { "epoch": 0.0851249948744584, "grad_norm": 1.8134859800338745, "learning_rate": 1.996484495234793e-05, "loss": 1.4258, "step": 1557 }, { "epoch": 0.08517966731817996, "grad_norm": 1.3870985507965088, "learning_rate": 1.9964691694753097e-05, "loss": 1.7093, "step": 1558 }, { "epoch": 0.0852343397619015, "grad_norm": 1.4158707857131958, "learning_rate": 1.9964538104413818e-05, "loss": 1.3319, "step": 1559 }, { "epoch": 0.08528901220562306, "grad_norm": 1.3897700309753418, "learning_rate": 1.9964384181335237e-05, "loss": 1.8021, "step": 1560 }, { "epoch": 0.08534368464934461, "grad_norm": 1.7761682271957397, "learning_rate": 1.9964229925522483e-05, "loss": 1.4088, "step": 1561 }, { "epoch": 0.08539835709306617, "grad_norm": 1.4701002836227417, "learning_rate": 1.9964075336980707e-05, "loss": 1.5165, "step": 1562 }, { "epoch": 0.08545302953678773, "grad_norm": 1.926718831062317, "learning_rate": 1.996392041571508e-05, "loss": 1.7123, "step": 1563 }, { "epoch": 0.08550770198050928, "grad_norm": 2.1104073524475098, "learning_rate": 1.9963765161730764e-05, "loss": 1.2278, "step": 1564 }, { "epoch": 0.08556237442423083, "grad_norm": 1.303541898727417, "learning_rate": 1.9963609575032952e-05, "loss": 1.2792, "step": 1565 }, { "epoch": 0.08561704686795238, "grad_norm": 1.7023420333862305, "learning_rate": 1.9963453655626833e-05, "loss": 1.2953, "step": 1566 }, { "epoch": 0.08567171931167393, "grad_norm": 1.421217679977417, "learning_rate": 1.996329740351762e-05, "loss": 1.5941, "step": 1567 }, { "epoch": 0.08572639175539548, "grad_norm": 1.7478113174438477, "learning_rate": 1.9963140818710525e-05, "loss": 1.4345, "step": 1568 }, { "epoch": 0.08578106419911705, "grad_norm": 2.0928337574005127, "learning_rate": 1.9962983901210776e-05, "loss": 1.5118, "step": 1569 }, { "epoch": 0.0858357366428386, "grad_norm": 1.6286580562591553, "learning_rate": 1.996282665102362e-05, "loss": 1.4688, "step": 1570 }, { "epoch": 0.08589040908656015, "grad_norm": 1.47043776512146, "learning_rate": 1.9962669068154303e-05, "loss": 1.4655, "step": 1571 }, { "epoch": 0.0859450815302817, "grad_norm": 1.6219879388809204, "learning_rate": 1.9962511152608087e-05, "loss": 1.454, "step": 1572 }, { "epoch": 0.08599975397400325, "grad_norm": 3.3829169273376465, "learning_rate": 1.9962352904390247e-05, "loss": 1.4351, "step": 1573 }, { "epoch": 0.0860544264177248, "grad_norm": 1.4991427659988403, "learning_rate": 1.9962194323506064e-05, "loss": 1.3963, "step": 1574 }, { "epoch": 0.08610909886144635, "grad_norm": 4.317023754119873, "learning_rate": 1.9962035409960835e-05, "loss": 1.4554, "step": 1575 }, { "epoch": 0.08616377130516792, "grad_norm": 1.486322283744812, "learning_rate": 1.996187616375987e-05, "loss": 1.5313, "step": 1576 }, { "epoch": 0.08621844374888947, "grad_norm": 1.241048812866211, "learning_rate": 1.996171658490848e-05, "loss": 1.5793, "step": 1577 }, { "epoch": 0.08627311619261102, "grad_norm": 2.6891825199127197, "learning_rate": 1.9961556673412e-05, "loss": 1.5753, "step": 1578 }, { "epoch": 0.08632778863633257, "grad_norm": 1.7201751470565796, "learning_rate": 1.9961396429275766e-05, "loss": 1.5114, "step": 1579 }, { "epoch": 0.08638246108005412, "grad_norm": 1.5906261205673218, "learning_rate": 1.996123585250513e-05, "loss": 1.4755, "step": 1580 }, { "epoch": 0.08643713352377567, "grad_norm": 1.6306707859039307, "learning_rate": 1.9961074943105457e-05, "loss": 1.3579, "step": 1581 }, { "epoch": 0.08649180596749723, "grad_norm": 1.1577993631362915, "learning_rate": 1.9960913701082112e-05, "loss": 1.5004, "step": 1582 }, { "epoch": 0.08654647841121879, "grad_norm": 1.3511306047439575, "learning_rate": 1.996075212644049e-05, "loss": 1.5183, "step": 1583 }, { "epoch": 0.08660115085494034, "grad_norm": 1.6250905990600586, "learning_rate": 1.9960590219185976e-05, "loss": 1.5482, "step": 1584 }, { "epoch": 0.0866558232986619, "grad_norm": 1.4525744915008545, "learning_rate": 1.9960427979323986e-05, "loss": 1.4034, "step": 1585 }, { "epoch": 0.08671049574238344, "grad_norm": 1.0729451179504395, "learning_rate": 1.996026540685993e-05, "loss": 1.5158, "step": 1586 }, { "epoch": 0.086765168186105, "grad_norm": 1.56699538230896, "learning_rate": 1.996010250179924e-05, "loss": 1.308, "step": 1587 }, { "epoch": 0.08681984062982655, "grad_norm": 1.7796252965927124, "learning_rate": 1.9959939264147355e-05, "loss": 1.5216, "step": 1588 }, { "epoch": 0.0868745130735481, "grad_norm": 1.2508461475372314, "learning_rate": 1.9959775693909726e-05, "loss": 1.5695, "step": 1589 }, { "epoch": 0.08692918551726966, "grad_norm": 1.4053988456726074, "learning_rate": 1.9959611791091816e-05, "loss": 1.3945, "step": 1590 }, { "epoch": 0.08698385796099121, "grad_norm": 1.435362458229065, "learning_rate": 1.9959447555699095e-05, "loss": 1.5618, "step": 1591 }, { "epoch": 0.08703853040471277, "grad_norm": 1.5788533687591553, "learning_rate": 1.9959282987737054e-05, "loss": 1.5495, "step": 1592 }, { "epoch": 0.08709320284843432, "grad_norm": 1.4030156135559082, "learning_rate": 1.9959118087211182e-05, "loss": 1.5825, "step": 1593 }, { "epoch": 0.08714787529215587, "grad_norm": 1.5086177587509155, "learning_rate": 1.9958952854126986e-05, "loss": 1.417, "step": 1594 }, { "epoch": 0.08720254773587742, "grad_norm": 2.5608203411102295, "learning_rate": 1.9958787288489983e-05, "loss": 1.609, "step": 1595 }, { "epoch": 0.08725722017959897, "grad_norm": 1.3839538097381592, "learning_rate": 1.995862139030571e-05, "loss": 1.4461, "step": 1596 }, { "epoch": 0.08731189262332054, "grad_norm": 2.1874918937683105, "learning_rate": 1.9958455159579695e-05, "loss": 1.3385, "step": 1597 }, { "epoch": 0.08736656506704209, "grad_norm": 2.1846072673797607, "learning_rate": 1.9958288596317496e-05, "loss": 1.6367, "step": 1598 }, { "epoch": 0.08742123751076364, "grad_norm": 2.0339345932006836, "learning_rate": 1.9958121700524672e-05, "loss": 1.477, "step": 1599 }, { "epoch": 0.08747590995448519, "grad_norm": 1.5019018650054932, "learning_rate": 1.99579544722068e-05, "loss": 1.6299, "step": 1600 }, { "epoch": 0.08753058239820674, "grad_norm": 2.0323526859283447, "learning_rate": 1.9957786911369456e-05, "loss": 1.7009, "step": 1601 }, { "epoch": 0.08758525484192829, "grad_norm": 1.945971131324768, "learning_rate": 1.9957619018018243e-05, "loss": 1.4901, "step": 1602 }, { "epoch": 0.08763992728564984, "grad_norm": 1.5777701139450073, "learning_rate": 1.995745079215876e-05, "loss": 1.4092, "step": 1603 }, { "epoch": 0.08769459972937141, "grad_norm": 1.5883806943893433, "learning_rate": 1.9957282233796635e-05, "loss": 1.4769, "step": 1604 }, { "epoch": 0.08774927217309296, "grad_norm": 2.3264987468719482, "learning_rate": 1.9957113342937492e-05, "loss": 1.4567, "step": 1605 }, { "epoch": 0.08780394461681451, "grad_norm": 1.6839487552642822, "learning_rate": 1.9956944119586967e-05, "loss": 1.3118, "step": 1606 }, { "epoch": 0.08785861706053606, "grad_norm": 1.5033317804336548, "learning_rate": 1.995677456375071e-05, "loss": 1.3271, "step": 1607 }, { "epoch": 0.08791328950425761, "grad_norm": 1.4284553527832031, "learning_rate": 1.9956604675434388e-05, "loss": 1.4301, "step": 1608 }, { "epoch": 0.08796796194797916, "grad_norm": 1.6740553379058838, "learning_rate": 1.9956434454643675e-05, "loss": 1.5004, "step": 1609 }, { "epoch": 0.08802263439170073, "grad_norm": 1.4819623231887817, "learning_rate": 1.995626390138425e-05, "loss": 1.4328, "step": 1610 }, { "epoch": 0.08807730683542228, "grad_norm": 1.5819188356399536, "learning_rate": 1.995609301566181e-05, "loss": 1.6292, "step": 1611 }, { "epoch": 0.08813197927914383, "grad_norm": 1.488365888595581, "learning_rate": 1.995592179748206e-05, "loss": 1.2694, "step": 1612 }, { "epoch": 0.08818665172286538, "grad_norm": 1.7625479698181152, "learning_rate": 1.995575024685072e-05, "loss": 1.3024, "step": 1613 }, { "epoch": 0.08824132416658693, "grad_norm": 1.282679557800293, "learning_rate": 1.9955578363773518e-05, "loss": 1.3879, "step": 1614 }, { "epoch": 0.08829599661030849, "grad_norm": 1.263314127922058, "learning_rate": 1.9955406148256192e-05, "loss": 1.3639, "step": 1615 }, { "epoch": 0.08835066905403004, "grad_norm": 1.4341076612472534, "learning_rate": 1.9955233600304496e-05, "loss": 1.701, "step": 1616 }, { "epoch": 0.0884053414977516, "grad_norm": 1.7052857875823975, "learning_rate": 1.9955060719924187e-05, "loss": 1.5968, "step": 1617 }, { "epoch": 0.08846001394147315, "grad_norm": 1.4205260276794434, "learning_rate": 1.995488750712104e-05, "loss": 1.3619, "step": 1618 }, { "epoch": 0.0885146863851947, "grad_norm": 1.333526611328125, "learning_rate": 1.995471396190084e-05, "loss": 1.3864, "step": 1619 }, { "epoch": 0.08856935882891626, "grad_norm": 1.6982076168060303, "learning_rate": 1.995454008426938e-05, "loss": 1.3201, "step": 1620 }, { "epoch": 0.08862403127263781, "grad_norm": 1.5059818029403687, "learning_rate": 1.995436587423247e-05, "loss": 1.4187, "step": 1621 }, { "epoch": 0.08867870371635936, "grad_norm": 1.5741255283355713, "learning_rate": 1.9954191331795926e-05, "loss": 1.5647, "step": 1622 }, { "epoch": 0.08873337616008091, "grad_norm": 1.5538687705993652, "learning_rate": 1.995401645696557e-05, "loss": 1.3338, "step": 1623 }, { "epoch": 0.08878804860380247, "grad_norm": 1.3465033769607544, "learning_rate": 1.995384124974725e-05, "loss": 1.3837, "step": 1624 }, { "epoch": 0.08884272104752403, "grad_norm": 1.5727931261062622, "learning_rate": 1.995366571014681e-05, "loss": 1.7751, "step": 1625 }, { "epoch": 0.08889739349124558, "grad_norm": 1.5659375190734863, "learning_rate": 1.995348983817012e-05, "loss": 1.2875, "step": 1626 }, { "epoch": 0.08895206593496713, "grad_norm": 1.4957748651504517, "learning_rate": 1.9953313633823045e-05, "loss": 1.3362, "step": 1627 }, { "epoch": 0.08900673837868868, "grad_norm": 2.0948798656463623, "learning_rate": 1.9953137097111475e-05, "loss": 1.4177, "step": 1628 }, { "epoch": 0.08906141082241023, "grad_norm": 1.7204400300979614, "learning_rate": 1.9952960228041297e-05, "loss": 1.3291, "step": 1629 }, { "epoch": 0.08911608326613178, "grad_norm": 1.5341800451278687, "learning_rate": 1.9952783026618424e-05, "loss": 1.4027, "step": 1630 }, { "epoch": 0.08917075570985335, "grad_norm": 1.6383814811706543, "learning_rate": 1.995260549284877e-05, "loss": 1.296, "step": 1631 }, { "epoch": 0.0892254281535749, "grad_norm": 1.3869118690490723, "learning_rate": 1.9952427626738264e-05, "loss": 1.6521, "step": 1632 }, { "epoch": 0.08928010059729645, "grad_norm": 1.255333662033081, "learning_rate": 1.9952249428292844e-05, "loss": 1.4011, "step": 1633 }, { "epoch": 0.089334773041018, "grad_norm": 1.5315126180648804, "learning_rate": 1.995207089751847e-05, "loss": 1.4066, "step": 1634 }, { "epoch": 0.08938944548473955, "grad_norm": 1.5410124063491821, "learning_rate": 1.9951892034421084e-05, "loss": 1.4274, "step": 1635 }, { "epoch": 0.0894441179284611, "grad_norm": 1.4954732656478882, "learning_rate": 1.9951712839006677e-05, "loss": 1.5315, "step": 1636 }, { "epoch": 0.08949879037218265, "grad_norm": 1.857706069946289, "learning_rate": 1.995153331128122e-05, "loss": 1.2547, "step": 1637 }, { "epoch": 0.08955346281590422, "grad_norm": 1.5264350175857544, "learning_rate": 1.9951353451250723e-05, "loss": 1.5897, "step": 1638 }, { "epoch": 0.08960813525962577, "grad_norm": 1.3540229797363281, "learning_rate": 1.9951173258921176e-05, "loss": 1.532, "step": 1639 }, { "epoch": 0.08966280770334732, "grad_norm": 1.7762031555175781, "learning_rate": 1.9950992734298606e-05, "loss": 1.2095, "step": 1640 }, { "epoch": 0.08971748014706887, "grad_norm": 1.627202033996582, "learning_rate": 1.9950811877389035e-05, "loss": 1.4945, "step": 1641 }, { "epoch": 0.08977215259079042, "grad_norm": 1.7713091373443604, "learning_rate": 1.9950630688198505e-05, "loss": 1.4319, "step": 1642 }, { "epoch": 0.08982682503451198, "grad_norm": 1.5466235876083374, "learning_rate": 1.9950449166733067e-05, "loss": 1.4496, "step": 1643 }, { "epoch": 0.08988149747823353, "grad_norm": 1.4340143203735352, "learning_rate": 1.9950267312998783e-05, "loss": 1.4938, "step": 1644 }, { "epoch": 0.08993616992195509, "grad_norm": 1.5043424367904663, "learning_rate": 1.9950085127001724e-05, "loss": 1.5407, "step": 1645 }, { "epoch": 0.08999084236567664, "grad_norm": 1.3916411399841309, "learning_rate": 1.9949902608747972e-05, "loss": 1.4698, "step": 1646 }, { "epoch": 0.0900455148093982, "grad_norm": 1.4333484172821045, "learning_rate": 1.9949719758243625e-05, "loss": 1.194, "step": 1647 }, { "epoch": 0.09010018725311975, "grad_norm": 2.390580415725708, "learning_rate": 1.994953657549479e-05, "loss": 1.4502, "step": 1648 }, { "epoch": 0.0901548596968413, "grad_norm": 1.5928953886032104, "learning_rate": 1.994935306050758e-05, "loss": 1.544, "step": 1649 }, { "epoch": 0.09020953214056285, "grad_norm": 1.364281415939331, "learning_rate": 1.9949169213288125e-05, "loss": 1.2128, "step": 1650 }, { "epoch": 0.0902642045842844, "grad_norm": 1.8797202110290527, "learning_rate": 1.994898503384256e-05, "loss": 1.6193, "step": 1651 }, { "epoch": 0.09031887702800596, "grad_norm": 1.8357555866241455, "learning_rate": 1.9948800522177043e-05, "loss": 1.4666, "step": 1652 }, { "epoch": 0.09037354947172752, "grad_norm": 1.6444532871246338, "learning_rate": 1.9948615678297728e-05, "loss": 1.575, "step": 1653 }, { "epoch": 0.09042822191544907, "grad_norm": 2.168154001235962, "learning_rate": 1.994843050221079e-05, "loss": 1.245, "step": 1654 }, { "epoch": 0.09048289435917062, "grad_norm": 2.1899962425231934, "learning_rate": 1.9948244993922413e-05, "loss": 1.4492, "step": 1655 }, { "epoch": 0.09053756680289217, "grad_norm": 1.5400794744491577, "learning_rate": 1.9948059153438796e-05, "loss": 1.4946, "step": 1656 }, { "epoch": 0.09059223924661372, "grad_norm": 1.260000228881836, "learning_rate": 1.9947872980766137e-05, "loss": 1.4135, "step": 1657 }, { "epoch": 0.09064691169033529, "grad_norm": 1.4295686483383179, "learning_rate": 1.9947686475910656e-05, "loss": 1.2315, "step": 1658 }, { "epoch": 0.09070158413405684, "grad_norm": 1.6078449487686157, "learning_rate": 1.9947499638878577e-05, "loss": 1.4246, "step": 1659 }, { "epoch": 0.09075625657777839, "grad_norm": 1.646077036857605, "learning_rate": 1.9947312469676148e-05, "loss": 1.3813, "step": 1660 }, { "epoch": 0.09081092902149994, "grad_norm": 1.5195859670639038, "learning_rate": 1.994712496830961e-05, "loss": 1.4807, "step": 1661 }, { "epoch": 0.09086560146522149, "grad_norm": 1.3184924125671387, "learning_rate": 1.994693713478523e-05, "loss": 1.4209, "step": 1662 }, { "epoch": 0.09092027390894304, "grad_norm": 1.2141833305358887, "learning_rate": 1.9946748969109275e-05, "loss": 1.7461, "step": 1663 }, { "epoch": 0.09097494635266459, "grad_norm": 2.2166714668273926, "learning_rate": 1.994656047128803e-05, "loss": 1.4363, "step": 1664 }, { "epoch": 0.09102961879638616, "grad_norm": 2.295611619949341, "learning_rate": 1.9946371641327794e-05, "loss": 1.5408, "step": 1665 }, { "epoch": 0.09108429124010771, "grad_norm": 1.6327710151672363, "learning_rate": 1.994618247923487e-05, "loss": 1.4758, "step": 1666 }, { "epoch": 0.09113896368382926, "grad_norm": 1.516614317893982, "learning_rate": 1.9945992985015573e-05, "loss": 1.5438, "step": 1667 }, { "epoch": 0.09119363612755081, "grad_norm": 2.3388166427612305, "learning_rate": 1.994580315867623e-05, "loss": 1.6249, "step": 1668 }, { "epoch": 0.09124830857127236, "grad_norm": 1.1179083585739136, "learning_rate": 1.994561300022318e-05, "loss": 1.5664, "step": 1669 }, { "epoch": 0.09130298101499391, "grad_norm": 2.114856719970703, "learning_rate": 1.9945422509662774e-05, "loss": 1.4734, "step": 1670 }, { "epoch": 0.09135765345871547, "grad_norm": 1.2546510696411133, "learning_rate": 1.9945231687001374e-05, "loss": 1.4614, "step": 1671 }, { "epoch": 0.09141232590243703, "grad_norm": 1.4875324964523315, "learning_rate": 1.9945040532245352e-05, "loss": 1.5395, "step": 1672 }, { "epoch": 0.09146699834615858, "grad_norm": 1.835026502609253, "learning_rate": 1.9944849045401088e-05, "loss": 1.6127, "step": 1673 }, { "epoch": 0.09152167078988013, "grad_norm": 1.870519757270813, "learning_rate": 1.9944657226474978e-05, "loss": 1.4955, "step": 1674 }, { "epoch": 0.09157634323360168, "grad_norm": 1.2327766418457031, "learning_rate": 1.9944465075473427e-05, "loss": 1.3819, "step": 1675 }, { "epoch": 0.09163101567732324, "grad_norm": 1.5991803407669067, "learning_rate": 1.9944272592402854e-05, "loss": 1.4945, "step": 1676 }, { "epoch": 0.09168568812104479, "grad_norm": 2.0337207317352295, "learning_rate": 1.994407977726968e-05, "loss": 1.5594, "step": 1677 }, { "epoch": 0.09174036056476634, "grad_norm": 1.170514702796936, "learning_rate": 1.994388663008035e-05, "loss": 1.5434, "step": 1678 }, { "epoch": 0.0917950330084879, "grad_norm": 2.9253616333007812, "learning_rate": 1.9943693150841312e-05, "loss": 1.5745, "step": 1679 }, { "epoch": 0.09184970545220945, "grad_norm": 1.445176362991333, "learning_rate": 1.9943499339559026e-05, "loss": 1.4169, "step": 1680 }, { "epoch": 0.091904377895931, "grad_norm": 1.472078800201416, "learning_rate": 1.9943305196239963e-05, "loss": 1.6143, "step": 1681 }, { "epoch": 0.09195905033965256, "grad_norm": 1.874276876449585, "learning_rate": 1.9943110720890608e-05, "loss": 1.4137, "step": 1682 }, { "epoch": 0.09201372278337411, "grad_norm": 1.8416680097579956, "learning_rate": 1.994291591351745e-05, "loss": 1.7437, "step": 1683 }, { "epoch": 0.09206839522709566, "grad_norm": 1.7027053833007812, "learning_rate": 1.9942720774127005e-05, "loss": 1.4558, "step": 1684 }, { "epoch": 0.09212306767081721, "grad_norm": 1.5260987281799316, "learning_rate": 1.9942525302725773e-05, "loss": 1.3184, "step": 1685 }, { "epoch": 0.09217774011453878, "grad_norm": 1.2607494592666626, "learning_rate": 1.9942329499320298e-05, "loss": 1.495, "step": 1686 }, { "epoch": 0.09223241255826033, "grad_norm": 1.3194410800933838, "learning_rate": 1.9942133363917108e-05, "loss": 1.4604, "step": 1687 }, { "epoch": 0.09228708500198188, "grad_norm": 1.4082800149917603, "learning_rate": 1.9941936896522756e-05, "loss": 1.4135, "step": 1688 }, { "epoch": 0.09234175744570343, "grad_norm": 1.529824137687683, "learning_rate": 1.9941740097143802e-05, "loss": 1.3726, "step": 1689 }, { "epoch": 0.09239642988942498, "grad_norm": 1.5300178527832031, "learning_rate": 1.9941542965786814e-05, "loss": 1.6522, "step": 1690 }, { "epoch": 0.09245110233314653, "grad_norm": 1.5218101739883423, "learning_rate": 1.9941345502458377e-05, "loss": 1.5566, "step": 1691 }, { "epoch": 0.09250577477686808, "grad_norm": 1.5422654151916504, "learning_rate": 1.994114770716509e-05, "loss": 1.5672, "step": 1692 }, { "epoch": 0.09256044722058965, "grad_norm": 1.9069157838821411, "learning_rate": 1.994094957991355e-05, "loss": 1.4447, "step": 1693 }, { "epoch": 0.0926151196643112, "grad_norm": 2.1209354400634766, "learning_rate": 1.994075112071038e-05, "loss": 1.3248, "step": 1694 }, { "epoch": 0.09266979210803275, "grad_norm": 1.3013306856155396, "learning_rate": 1.9940552329562202e-05, "loss": 1.4721, "step": 1695 }, { "epoch": 0.0927244645517543, "grad_norm": 1.5683015584945679, "learning_rate": 1.9940353206475653e-05, "loss": 1.5155, "step": 1696 }, { "epoch": 0.09277913699547585, "grad_norm": 1.8606189489364624, "learning_rate": 1.9940153751457386e-05, "loss": 1.4552, "step": 1697 }, { "epoch": 0.0928338094391974, "grad_norm": 1.5670247077941895, "learning_rate": 1.993995396451406e-05, "loss": 1.5439, "step": 1698 }, { "epoch": 0.09288848188291896, "grad_norm": 1.4064074754714966, "learning_rate": 1.9939753845652348e-05, "loss": 1.5579, "step": 1699 }, { "epoch": 0.09294315432664052, "grad_norm": 1.332702875137329, "learning_rate": 1.9939553394878926e-05, "loss": 1.4403, "step": 1700 }, { "epoch": 0.09299782677036207, "grad_norm": 1.8925729990005493, "learning_rate": 1.99393526122005e-05, "loss": 1.2291, "step": 1701 }, { "epoch": 0.09305249921408362, "grad_norm": 1.5949710607528687, "learning_rate": 1.993915149762376e-05, "loss": 1.5383, "step": 1702 }, { "epoch": 0.09310717165780517, "grad_norm": 1.6409337520599365, "learning_rate": 1.993895005115543e-05, "loss": 1.5149, "step": 1703 }, { "epoch": 0.09316184410152673, "grad_norm": 1.6703702211380005, "learning_rate": 1.993874827280224e-05, "loss": 1.3021, "step": 1704 }, { "epoch": 0.09321651654524828, "grad_norm": 1.4744514226913452, "learning_rate": 1.9938546162570916e-05, "loss": 1.5005, "step": 1705 }, { "epoch": 0.09327118898896983, "grad_norm": 1.2823127508163452, "learning_rate": 1.9938343720468216e-05, "loss": 1.7332, "step": 1706 }, { "epoch": 0.09332586143269139, "grad_norm": 1.6448768377304077, "learning_rate": 1.99381409465009e-05, "loss": 1.0352, "step": 1707 }, { "epoch": 0.09338053387641294, "grad_norm": 1.7453819513320923, "learning_rate": 1.9937937840675737e-05, "loss": 1.4635, "step": 1708 }, { "epoch": 0.0934352063201345, "grad_norm": 2.666973114013672, "learning_rate": 1.993773440299951e-05, "loss": 1.4536, "step": 1709 }, { "epoch": 0.09348987876385605, "grad_norm": 1.7528210878372192, "learning_rate": 1.9937530633479013e-05, "loss": 1.3199, "step": 1710 }, { "epoch": 0.0935445512075776, "grad_norm": 1.3800915479660034, "learning_rate": 1.9937326532121047e-05, "loss": 1.4268, "step": 1711 }, { "epoch": 0.09359922365129915, "grad_norm": 1.4592633247375488, "learning_rate": 1.9937122098932428e-05, "loss": 1.5123, "step": 1712 }, { "epoch": 0.09365389609502071, "grad_norm": 1.603437066078186, "learning_rate": 1.9936917333919983e-05, "loss": 1.1556, "step": 1713 }, { "epoch": 0.09370856853874227, "grad_norm": 1.5751267671585083, "learning_rate": 1.9936712237090554e-05, "loss": 1.2848, "step": 1714 }, { "epoch": 0.09376324098246382, "grad_norm": 1.3115602731704712, "learning_rate": 1.9936506808450984e-05, "loss": 1.4732, "step": 1715 }, { "epoch": 0.09381791342618537, "grad_norm": 1.1734814643859863, "learning_rate": 1.9936301048008137e-05, "loss": 1.5342, "step": 1716 }, { "epoch": 0.09387258586990692, "grad_norm": 1.5867278575897217, "learning_rate": 1.993609495576888e-05, "loss": 1.3921, "step": 1717 }, { "epoch": 0.09392725831362847, "grad_norm": 1.6521413326263428, "learning_rate": 1.99358885317401e-05, "loss": 1.5606, "step": 1718 }, { "epoch": 0.09398193075735002, "grad_norm": 2.2001421451568604, "learning_rate": 1.9935681775928683e-05, "loss": 1.8783, "step": 1719 }, { "epoch": 0.09403660320107159, "grad_norm": 1.493700623512268, "learning_rate": 1.9935474688341536e-05, "loss": 1.3139, "step": 1720 }, { "epoch": 0.09409127564479314, "grad_norm": 1.5041532516479492, "learning_rate": 1.9935267268985577e-05, "loss": 1.5916, "step": 1721 }, { "epoch": 0.09414594808851469, "grad_norm": 1.9454349279403687, "learning_rate": 1.993505951786773e-05, "loss": 1.6032, "step": 1722 }, { "epoch": 0.09420062053223624, "grad_norm": 1.6340646743774414, "learning_rate": 1.993485143499493e-05, "loss": 1.7373, "step": 1723 }, { "epoch": 0.09425529297595779, "grad_norm": 1.9457353353500366, "learning_rate": 1.993464302037413e-05, "loss": 1.5572, "step": 1724 }, { "epoch": 0.09430996541967934, "grad_norm": 1.574852466583252, "learning_rate": 1.993443427401229e-05, "loss": 1.6803, "step": 1725 }, { "epoch": 0.0943646378634009, "grad_norm": 1.2695581912994385, "learning_rate": 1.993422519591637e-05, "loss": 1.7121, "step": 1726 }, { "epoch": 0.09441931030712246, "grad_norm": 1.5292879343032837, "learning_rate": 1.9934015786093365e-05, "loss": 1.544, "step": 1727 }, { "epoch": 0.09447398275084401, "grad_norm": 2.712569236755371, "learning_rate": 1.9933806044550262e-05, "loss": 1.3952, "step": 1728 }, { "epoch": 0.09452865519456556, "grad_norm": 1.6308927536010742, "learning_rate": 1.9933595971294064e-05, "loss": 1.3264, "step": 1729 }, { "epoch": 0.09458332763828711, "grad_norm": 1.7557048797607422, "learning_rate": 1.9933385566331785e-05, "loss": 1.2184, "step": 1730 }, { "epoch": 0.09463800008200866, "grad_norm": 1.593674659729004, "learning_rate": 1.9933174829670455e-05, "loss": 1.4413, "step": 1731 }, { "epoch": 0.09469267252573021, "grad_norm": 1.2245628833770752, "learning_rate": 1.9932963761317105e-05, "loss": 1.6795, "step": 1732 }, { "epoch": 0.09474734496945177, "grad_norm": 1.6458286046981812, "learning_rate": 1.993275236127879e-05, "loss": 1.5459, "step": 1733 }, { "epoch": 0.09480201741317333, "grad_norm": 1.4179911613464355, "learning_rate": 1.9932540629562563e-05, "loss": 1.5115, "step": 1734 }, { "epoch": 0.09485668985689488, "grad_norm": 1.5650172233581543, "learning_rate": 1.99323285661755e-05, "loss": 1.3741, "step": 1735 }, { "epoch": 0.09491136230061643, "grad_norm": 1.5530622005462646, "learning_rate": 1.9932116171124676e-05, "loss": 1.5509, "step": 1736 }, { "epoch": 0.09496603474433798, "grad_norm": 1.7433711290359497, "learning_rate": 1.9931903444417187e-05, "loss": 1.5155, "step": 1737 }, { "epoch": 0.09502070718805954, "grad_norm": 1.6684026718139648, "learning_rate": 1.993169038606014e-05, "loss": 1.4881, "step": 1738 }, { "epoch": 0.09507537963178109, "grad_norm": 1.3182157278060913, "learning_rate": 1.9931476996060644e-05, "loss": 1.6398, "step": 1739 }, { "epoch": 0.09513005207550264, "grad_norm": 1.5965338945388794, "learning_rate": 1.9931263274425823e-05, "loss": 1.5307, "step": 1740 }, { "epoch": 0.0951847245192242, "grad_norm": 1.9560225009918213, "learning_rate": 1.993104922116282e-05, "loss": 1.3489, "step": 1741 }, { "epoch": 0.09523939696294575, "grad_norm": 1.3261865377426147, "learning_rate": 1.993083483627878e-05, "loss": 1.5302, "step": 1742 }, { "epoch": 0.0952940694066673, "grad_norm": 1.3669250011444092, "learning_rate": 1.993062011978086e-05, "loss": 1.3273, "step": 1743 }, { "epoch": 0.09534874185038886, "grad_norm": 1.9383946657180786, "learning_rate": 1.9930405071676228e-05, "loss": 1.557, "step": 1744 }, { "epoch": 0.09540341429411041, "grad_norm": 1.4097356796264648, "learning_rate": 1.993018969197207e-05, "loss": 1.5824, "step": 1745 }, { "epoch": 0.09545808673783196, "grad_norm": 1.401871681213379, "learning_rate": 1.992997398067558e-05, "loss": 1.652, "step": 1746 }, { "epoch": 0.09551275918155351, "grad_norm": 1.1693874597549438, "learning_rate": 1.9929757937793953e-05, "loss": 1.5897, "step": 1747 }, { "epoch": 0.09556743162527508, "grad_norm": 1.1689685583114624, "learning_rate": 1.992954156333441e-05, "loss": 1.2714, "step": 1748 }, { "epoch": 0.09562210406899663, "grad_norm": 1.5252403020858765, "learning_rate": 1.9929324857304175e-05, "loss": 1.3496, "step": 1749 }, { "epoch": 0.09567677651271818, "grad_norm": 1.7896329164505005, "learning_rate": 1.992910781971048e-05, "loss": 1.4395, "step": 1750 }, { "epoch": 0.09573144895643973, "grad_norm": 1.3501027822494507, "learning_rate": 1.9928890450560576e-05, "loss": 1.5115, "step": 1751 }, { "epoch": 0.09578612140016128, "grad_norm": 1.6681352853775024, "learning_rate": 1.9928672749861725e-05, "loss": 1.5475, "step": 1752 }, { "epoch": 0.09584079384388283, "grad_norm": 1.6473833322525024, "learning_rate": 1.992845471762119e-05, "loss": 1.2767, "step": 1753 }, { "epoch": 0.09589546628760438, "grad_norm": 1.517398715019226, "learning_rate": 1.992823635384625e-05, "loss": 1.5141, "step": 1754 }, { "epoch": 0.09595013873132595, "grad_norm": 1.4058247804641724, "learning_rate": 1.9928017658544206e-05, "loss": 1.3193, "step": 1755 }, { "epoch": 0.0960048111750475, "grad_norm": 1.4885313510894775, "learning_rate": 1.9927798631722353e-05, "loss": 1.3931, "step": 1756 }, { "epoch": 0.09605948361876905, "grad_norm": 1.8966232538223267, "learning_rate": 1.992757927338801e-05, "loss": 1.5853, "step": 1757 }, { "epoch": 0.0961141560624906, "grad_norm": 1.65766179561615, "learning_rate": 1.9927359583548495e-05, "loss": 1.3069, "step": 1758 }, { "epoch": 0.09616882850621215, "grad_norm": 1.7841012477874756, "learning_rate": 1.9927139562211154e-05, "loss": 1.5355, "step": 1759 }, { "epoch": 0.0962235009499337, "grad_norm": 1.6166903972625732, "learning_rate": 1.9926919209383325e-05, "loss": 1.2986, "step": 1760 }, { "epoch": 0.09627817339365527, "grad_norm": 1.2828015089035034, "learning_rate": 1.9926698525072368e-05, "loss": 1.5284, "step": 1761 }, { "epoch": 0.09633284583737682, "grad_norm": 1.38475501537323, "learning_rate": 1.9926477509285654e-05, "loss": 1.4538, "step": 1762 }, { "epoch": 0.09638751828109837, "grad_norm": 1.780187964439392, "learning_rate": 1.9926256162030564e-05, "loss": 1.3565, "step": 1763 }, { "epoch": 0.09644219072481992, "grad_norm": 1.5863038301467896, "learning_rate": 1.992603448331449e-05, "loss": 1.3199, "step": 1764 }, { "epoch": 0.09649686316854147, "grad_norm": 1.4961845874786377, "learning_rate": 1.9925812473144826e-05, "loss": 1.5003, "step": 1765 }, { "epoch": 0.09655153561226303, "grad_norm": 1.2805190086364746, "learning_rate": 1.9925590131528998e-05, "loss": 1.2253, "step": 1766 }, { "epoch": 0.09660620805598458, "grad_norm": 1.4726791381835938, "learning_rate": 1.9925367458474425e-05, "loss": 1.5545, "step": 1767 }, { "epoch": 0.09666088049970614, "grad_norm": 1.8597477674484253, "learning_rate": 1.992514445398854e-05, "loss": 1.4858, "step": 1768 }, { "epoch": 0.0967155529434277, "grad_norm": 1.569022297859192, "learning_rate": 1.9924921118078792e-05, "loss": 1.6647, "step": 1769 }, { "epoch": 0.09677022538714924, "grad_norm": 1.6198606491088867, "learning_rate": 1.9924697450752636e-05, "loss": 1.3653, "step": 1770 }, { "epoch": 0.0968248978308708, "grad_norm": 1.3879739046096802, "learning_rate": 1.992447345201754e-05, "loss": 1.5356, "step": 1771 }, { "epoch": 0.09687957027459235, "grad_norm": 1.2947200536727905, "learning_rate": 1.9924249121880993e-05, "loss": 1.5234, "step": 1772 }, { "epoch": 0.0969342427183139, "grad_norm": 1.693143606185913, "learning_rate": 1.992402446035048e-05, "loss": 1.0918, "step": 1773 }, { "epoch": 0.09698891516203545, "grad_norm": 2.1220762729644775, "learning_rate": 1.99237994674335e-05, "loss": 1.69, "step": 1774 }, { "epoch": 0.09704358760575701, "grad_norm": 1.4714914560317993, "learning_rate": 1.992357414313757e-05, "loss": 1.7523, "step": 1775 }, { "epoch": 0.09709826004947857, "grad_norm": 1.4982718229293823, "learning_rate": 1.9923348487470213e-05, "loss": 1.3369, "step": 1776 }, { "epoch": 0.09715293249320012, "grad_norm": 2.040687084197998, "learning_rate": 1.9923122500438964e-05, "loss": 1.5144, "step": 1777 }, { "epoch": 0.09720760493692167, "grad_norm": 1.582450032234192, "learning_rate": 1.992289618205137e-05, "loss": 1.8172, "step": 1778 }, { "epoch": 0.09726227738064322, "grad_norm": 1.45365571975708, "learning_rate": 1.9922669532314986e-05, "loss": 1.3067, "step": 1779 }, { "epoch": 0.09731694982436477, "grad_norm": 1.7552604675292969, "learning_rate": 1.9922442551237383e-05, "loss": 1.5179, "step": 1780 }, { "epoch": 0.09737162226808632, "grad_norm": 1.2681670188903809, "learning_rate": 1.9922215238826142e-05, "loss": 1.3719, "step": 1781 }, { "epoch": 0.09742629471180789, "grad_norm": 2.1350998878479004, "learning_rate": 1.9921987595088846e-05, "loss": 1.3543, "step": 1782 }, { "epoch": 0.09748096715552944, "grad_norm": 1.6422009468078613, "learning_rate": 1.9921759620033105e-05, "loss": 1.312, "step": 1783 }, { "epoch": 0.09753563959925099, "grad_norm": 1.4626091718673706, "learning_rate": 1.9921531313666526e-05, "loss": 1.451, "step": 1784 }, { "epoch": 0.09759031204297254, "grad_norm": 1.390844464302063, "learning_rate": 1.9921302675996735e-05, "loss": 1.6402, "step": 1785 }, { "epoch": 0.09764498448669409, "grad_norm": 1.389800786972046, "learning_rate": 1.992107370703137e-05, "loss": 1.6959, "step": 1786 }, { "epoch": 0.09769965693041564, "grad_norm": 1.4385098218917847, "learning_rate": 1.992084440677807e-05, "loss": 1.2667, "step": 1787 }, { "epoch": 0.0977543293741372, "grad_norm": 1.4191471338272095, "learning_rate": 1.9920614775244495e-05, "loss": 1.3743, "step": 1788 }, { "epoch": 0.09780900181785876, "grad_norm": 1.488439679145813, "learning_rate": 1.9920384812438315e-05, "loss": 1.3711, "step": 1789 }, { "epoch": 0.09786367426158031, "grad_norm": 1.3585976362228394, "learning_rate": 1.9920154518367206e-05, "loss": 1.6996, "step": 1790 }, { "epoch": 0.09791834670530186, "grad_norm": 1.3255475759506226, "learning_rate": 1.9919923893038863e-05, "loss": 1.5134, "step": 1791 }, { "epoch": 0.09797301914902341, "grad_norm": 1.3267654180526733, "learning_rate": 1.9919692936460978e-05, "loss": 1.3198, "step": 1792 }, { "epoch": 0.09802769159274496, "grad_norm": 1.4586164951324463, "learning_rate": 1.991946164864127e-05, "loss": 1.3646, "step": 1793 }, { "epoch": 0.09808236403646652, "grad_norm": 1.7039252519607544, "learning_rate": 1.9919230029587463e-05, "loss": 1.4376, "step": 1794 }, { "epoch": 0.09813703648018807, "grad_norm": 1.5228753089904785, "learning_rate": 1.9918998079307286e-05, "loss": 1.5011, "step": 1795 }, { "epoch": 0.09819170892390963, "grad_norm": 1.8350441455841064, "learning_rate": 1.9918765797808492e-05, "loss": 1.1701, "step": 1796 }, { "epoch": 0.09824638136763118, "grad_norm": 1.790132999420166, "learning_rate": 1.991853318509883e-05, "loss": 1.7619, "step": 1797 }, { "epoch": 0.09830105381135273, "grad_norm": 1.6200367212295532, "learning_rate": 1.991830024118607e-05, "loss": 1.2881, "step": 1798 }, { "epoch": 0.09835572625507429, "grad_norm": 1.3124312162399292, "learning_rate": 1.9918066966077992e-05, "loss": 1.4699, "step": 1799 }, { "epoch": 0.09841039869879584, "grad_norm": 2.4666144847869873, "learning_rate": 1.9917833359782382e-05, "loss": 1.4133, "step": 1800 }, { "epoch": 0.09846507114251739, "grad_norm": 1.6868526935577393, "learning_rate": 1.9917599422307047e-05, "loss": 1.545, "step": 1801 }, { "epoch": 0.09851974358623894, "grad_norm": 2.07700777053833, "learning_rate": 1.9917365153659794e-05, "loss": 1.2554, "step": 1802 }, { "epoch": 0.0985744160299605, "grad_norm": 1.369429588317871, "learning_rate": 1.9917130553848445e-05, "loss": 1.5319, "step": 1803 }, { "epoch": 0.09862908847368206, "grad_norm": 1.4568930864334106, "learning_rate": 1.9916895622880835e-05, "loss": 1.4973, "step": 1804 }, { "epoch": 0.0986837609174036, "grad_norm": 1.8687548637390137, "learning_rate": 1.991666036076481e-05, "loss": 1.601, "step": 1805 }, { "epoch": 0.09873843336112516, "grad_norm": 1.3855078220367432, "learning_rate": 1.9916424767508226e-05, "loss": 1.3474, "step": 1806 }, { "epoch": 0.09879310580484671, "grad_norm": 1.2748337984085083, "learning_rate": 1.991618884311895e-05, "loss": 1.5894, "step": 1807 }, { "epoch": 0.09884777824856826, "grad_norm": 1.2513909339904785, "learning_rate": 1.9915952587604857e-05, "loss": 1.3858, "step": 1808 }, { "epoch": 0.09890245069228983, "grad_norm": 1.5335333347320557, "learning_rate": 1.9915716000973844e-05, "loss": 1.6996, "step": 1809 }, { "epoch": 0.09895712313601138, "grad_norm": 1.6265902519226074, "learning_rate": 1.9915479083233803e-05, "loss": 1.3543, "step": 1810 }, { "epoch": 0.09901179557973293, "grad_norm": 2.085642099380493, "learning_rate": 1.991524183439265e-05, "loss": 1.582, "step": 1811 }, { "epoch": 0.09906646802345448, "grad_norm": 1.2570738792419434, "learning_rate": 1.99150042544583e-05, "loss": 1.5146, "step": 1812 }, { "epoch": 0.09912114046717603, "grad_norm": 1.4518916606903076, "learning_rate": 1.9914766343438695e-05, "loss": 1.8585, "step": 1813 }, { "epoch": 0.09917581291089758, "grad_norm": 1.47215735912323, "learning_rate": 1.9914528101341773e-05, "loss": 1.3512, "step": 1814 }, { "epoch": 0.09923048535461913, "grad_norm": 1.4385201930999756, "learning_rate": 1.9914289528175495e-05, "loss": 1.393, "step": 1815 }, { "epoch": 0.0992851577983407, "grad_norm": 1.379751443862915, "learning_rate": 1.9914050623947826e-05, "loss": 1.2352, "step": 1816 }, { "epoch": 0.09933983024206225, "grad_norm": 2.53837513923645, "learning_rate": 1.9913811388666742e-05, "loss": 1.3339, "step": 1817 }, { "epoch": 0.0993945026857838, "grad_norm": 1.4115151166915894, "learning_rate": 1.991357182234023e-05, "loss": 1.3359, "step": 1818 }, { "epoch": 0.09944917512950535, "grad_norm": 1.7813726663589478, "learning_rate": 1.9913331924976295e-05, "loss": 1.4404, "step": 1819 }, { "epoch": 0.0995038475732269, "grad_norm": 1.230549693107605, "learning_rate": 1.9913091696582945e-05, "loss": 1.441, "step": 1820 }, { "epoch": 0.09955852001694845, "grad_norm": 1.4457097053527832, "learning_rate": 1.99128511371682e-05, "loss": 1.5427, "step": 1821 }, { "epoch": 0.09961319246067, "grad_norm": 1.6225022077560425, "learning_rate": 1.9912610246740095e-05, "loss": 1.5632, "step": 1822 }, { "epoch": 0.09966786490439157, "grad_norm": 1.2429258823394775, "learning_rate": 1.991236902530667e-05, "loss": 1.4505, "step": 1823 }, { "epoch": 0.09972253734811312, "grad_norm": 1.6885920763015747, "learning_rate": 1.9912127472875986e-05, "loss": 1.4479, "step": 1824 }, { "epoch": 0.09977720979183467, "grad_norm": 1.6427525281906128, "learning_rate": 1.9911885589456107e-05, "loss": 1.3904, "step": 1825 }, { "epoch": 0.09983188223555622, "grad_norm": 1.3026151657104492, "learning_rate": 1.991164337505511e-05, "loss": 1.4737, "step": 1826 }, { "epoch": 0.09988655467927778, "grad_norm": 1.5707520246505737, "learning_rate": 1.9911400829681075e-05, "loss": 1.344, "step": 1827 }, { "epoch": 0.09994122712299933, "grad_norm": 1.2162247896194458, "learning_rate": 1.9911157953342114e-05, "loss": 1.2521, "step": 1828 }, { "epoch": 0.09999589956672088, "grad_norm": 1.5824880599975586, "learning_rate": 1.9910914746046333e-05, "loss": 1.2179, "step": 1829 }, { "epoch": 0.10005057201044244, "grad_norm": 1.422582983970642, "learning_rate": 1.9910671207801847e-05, "loss": 1.4524, "step": 1830 }, { "epoch": 0.100105244454164, "grad_norm": 1.220430612564087, "learning_rate": 1.9910427338616798e-05, "loss": 1.4832, "step": 1831 }, { "epoch": 0.10015991689788555, "grad_norm": 1.7565767765045166, "learning_rate": 1.9910183138499324e-05, "loss": 1.2841, "step": 1832 }, { "epoch": 0.1002145893416071, "grad_norm": 1.9228129386901855, "learning_rate": 1.990993860745758e-05, "loss": 1.515, "step": 1833 }, { "epoch": 0.10026926178532865, "grad_norm": 1.434735655784607, "learning_rate": 1.990969374549973e-05, "loss": 1.5442, "step": 1834 }, { "epoch": 0.1003239342290502, "grad_norm": 1.603229284286499, "learning_rate": 1.9909448552633952e-05, "loss": 1.4695, "step": 1835 }, { "epoch": 0.10037860667277175, "grad_norm": 1.4455797672271729, "learning_rate": 1.9909203028868432e-05, "loss": 1.6003, "step": 1836 }, { "epoch": 0.10043327911649332, "grad_norm": 1.4059416055679321, "learning_rate": 1.9908957174211375e-05, "loss": 1.6614, "step": 1837 }, { "epoch": 0.10048795156021487, "grad_norm": 1.34452223777771, "learning_rate": 1.9908710988670983e-05, "loss": 1.5081, "step": 1838 }, { "epoch": 0.10054262400393642, "grad_norm": 1.5210928916931152, "learning_rate": 1.990846447225548e-05, "loss": 1.4379, "step": 1839 }, { "epoch": 0.10059729644765797, "grad_norm": 1.6695094108581543, "learning_rate": 1.99082176249731e-05, "loss": 1.501, "step": 1840 }, { "epoch": 0.10065196889137952, "grad_norm": 1.6116315126419067, "learning_rate": 1.9907970446832076e-05, "loss": 1.5605, "step": 1841 }, { "epoch": 0.10070664133510107, "grad_norm": 1.350374460220337, "learning_rate": 1.9907722937840676e-05, "loss": 1.3767, "step": 1842 }, { "epoch": 0.10076131377882262, "grad_norm": 1.3653523921966553, "learning_rate": 1.9907475098007154e-05, "loss": 1.6476, "step": 1843 }, { "epoch": 0.10081598622254419, "grad_norm": 1.465053915977478, "learning_rate": 1.990722692733979e-05, "loss": 1.4378, "step": 1844 }, { "epoch": 0.10087065866626574, "grad_norm": 1.4275823831558228, "learning_rate": 1.9906978425846876e-05, "loss": 1.4469, "step": 1845 }, { "epoch": 0.10092533110998729, "grad_norm": 1.1340959072113037, "learning_rate": 1.9906729593536697e-05, "loss": 1.5307, "step": 1846 }, { "epoch": 0.10098000355370884, "grad_norm": 1.759062647819519, "learning_rate": 1.9906480430417575e-05, "loss": 1.3488, "step": 1847 }, { "epoch": 0.10103467599743039, "grad_norm": 1.7042450904846191, "learning_rate": 1.9906230936497825e-05, "loss": 1.4329, "step": 1848 }, { "epoch": 0.10108934844115194, "grad_norm": 1.4327473640441895, "learning_rate": 1.9905981111785774e-05, "loss": 1.5048, "step": 1849 }, { "epoch": 0.1011440208848735, "grad_norm": 1.8792243003845215, "learning_rate": 1.9905730956289772e-05, "loss": 1.4042, "step": 1850 }, { "epoch": 0.10119869332859506, "grad_norm": 1.819439172744751, "learning_rate": 1.9905480470018172e-05, "loss": 1.3254, "step": 1851 }, { "epoch": 0.10125336577231661, "grad_norm": 1.2951730489730835, "learning_rate": 1.9905229652979332e-05, "loss": 1.678, "step": 1852 }, { "epoch": 0.10130803821603816, "grad_norm": 1.4556912183761597, "learning_rate": 1.990497850518163e-05, "loss": 1.351, "step": 1853 }, { "epoch": 0.10136271065975971, "grad_norm": 1.6272050142288208, "learning_rate": 1.9904727026633453e-05, "loss": 1.5727, "step": 1854 }, { "epoch": 0.10141738310348127, "grad_norm": 1.3413488864898682, "learning_rate": 1.99044752173432e-05, "loss": 1.4423, "step": 1855 }, { "epoch": 0.10147205554720282, "grad_norm": 1.7437105178833008, "learning_rate": 1.9904223077319276e-05, "loss": 1.5833, "step": 1856 }, { "epoch": 0.10152672799092437, "grad_norm": 1.4060497283935547, "learning_rate": 1.99039706065701e-05, "loss": 1.4442, "step": 1857 }, { "epoch": 0.10158140043464593, "grad_norm": 1.4326763153076172, "learning_rate": 1.9903717805104112e-05, "loss": 1.4347, "step": 1858 }, { "epoch": 0.10163607287836748, "grad_norm": 2.0696282386779785, "learning_rate": 1.990346467292974e-05, "loss": 1.5319, "step": 1859 }, { "epoch": 0.10169074532208904, "grad_norm": 1.5258244276046753, "learning_rate": 1.990321121005545e-05, "loss": 1.4674, "step": 1860 }, { "epoch": 0.10174541776581059, "grad_norm": 1.3445353507995605, "learning_rate": 1.9902957416489693e-05, "loss": 1.5219, "step": 1861 }, { "epoch": 0.10180009020953214, "grad_norm": 1.6598886251449585, "learning_rate": 1.9902703292240953e-05, "loss": 1.5394, "step": 1862 }, { "epoch": 0.10185476265325369, "grad_norm": 1.4137279987335205, "learning_rate": 1.9902448837317712e-05, "loss": 1.4491, "step": 1863 }, { "epoch": 0.10190943509697525, "grad_norm": 1.3853482007980347, "learning_rate": 1.9902194051728466e-05, "loss": 1.4587, "step": 1864 }, { "epoch": 0.1019641075406968, "grad_norm": 3.362921714782715, "learning_rate": 1.9901938935481727e-05, "loss": 1.6018, "step": 1865 }, { "epoch": 0.10201877998441836, "grad_norm": 2.372065544128418, "learning_rate": 1.990168348858601e-05, "loss": 1.2527, "step": 1866 }, { "epoch": 0.10207345242813991, "grad_norm": 1.1766599416732788, "learning_rate": 1.9901427711049847e-05, "loss": 1.658, "step": 1867 }, { "epoch": 0.10212812487186146, "grad_norm": 1.3948557376861572, "learning_rate": 1.9901171602881778e-05, "loss": 1.4058, "step": 1868 }, { "epoch": 0.10218279731558301, "grad_norm": 1.411057949066162, "learning_rate": 1.9900915164090352e-05, "loss": 1.2897, "step": 1869 }, { "epoch": 0.10223746975930456, "grad_norm": 1.5309317111968994, "learning_rate": 1.990065839468414e-05, "loss": 1.4731, "step": 1870 }, { "epoch": 0.10229214220302613, "grad_norm": 1.558642864227295, "learning_rate": 1.990040129467171e-05, "loss": 1.4109, "step": 1871 }, { "epoch": 0.10234681464674768, "grad_norm": 1.3029075860977173, "learning_rate": 1.990014386406165e-05, "loss": 1.4997, "step": 1872 }, { "epoch": 0.10240148709046923, "grad_norm": 1.5697524547576904, "learning_rate": 1.9899886102862554e-05, "loss": 1.2507, "step": 1873 }, { "epoch": 0.10245615953419078, "grad_norm": 1.506425380706787, "learning_rate": 1.9899628011083028e-05, "loss": 1.4488, "step": 1874 }, { "epoch": 0.10251083197791233, "grad_norm": 1.3636568784713745, "learning_rate": 1.9899369588731697e-05, "loss": 1.7366, "step": 1875 }, { "epoch": 0.10256550442163388, "grad_norm": 1.2745696306228638, "learning_rate": 1.9899110835817182e-05, "loss": 1.4319, "step": 1876 }, { "epoch": 0.10262017686535543, "grad_norm": 2.0840303897857666, "learning_rate": 1.9898851752348128e-05, "loss": 1.5973, "step": 1877 }, { "epoch": 0.102674849309077, "grad_norm": 1.4632290601730347, "learning_rate": 1.9898592338333187e-05, "loss": 1.5354, "step": 1878 }, { "epoch": 0.10272952175279855, "grad_norm": 1.5487183332443237, "learning_rate": 1.989833259378102e-05, "loss": 1.7288, "step": 1879 }, { "epoch": 0.1027841941965201, "grad_norm": 1.5513215065002441, "learning_rate": 1.98980725187003e-05, "loss": 1.5532, "step": 1880 }, { "epoch": 0.10283886664024165, "grad_norm": 1.5579888820648193, "learning_rate": 1.989781211309971e-05, "loss": 1.4453, "step": 1881 }, { "epoch": 0.1028935390839632, "grad_norm": 1.400835633277893, "learning_rate": 1.9897551376987948e-05, "loss": 1.2796, "step": 1882 }, { "epoch": 0.10294821152768475, "grad_norm": 1.9773024320602417, "learning_rate": 1.9897290310373722e-05, "loss": 1.4655, "step": 1883 }, { "epoch": 0.1030028839714063, "grad_norm": 1.5561288595199585, "learning_rate": 1.989702891326575e-05, "loss": 1.443, "step": 1884 }, { "epoch": 0.10305755641512787, "grad_norm": 1.5312200784683228, "learning_rate": 1.9896767185672755e-05, "loss": 1.2734, "step": 1885 }, { "epoch": 0.10311222885884942, "grad_norm": 1.8050463199615479, "learning_rate": 1.989650512760348e-05, "loss": 1.4907, "step": 1886 }, { "epoch": 0.10316690130257097, "grad_norm": 1.5308105945587158, "learning_rate": 1.9896242739066678e-05, "loss": 1.4376, "step": 1887 }, { "epoch": 0.10322157374629252, "grad_norm": 1.432470679283142, "learning_rate": 1.9895980020071106e-05, "loss": 1.205, "step": 1888 }, { "epoch": 0.10327624619001408, "grad_norm": 1.7186681032180786, "learning_rate": 1.9895716970625544e-05, "loss": 1.3678, "step": 1889 }, { "epoch": 0.10333091863373563, "grad_norm": 1.4545783996582031, "learning_rate": 1.9895453590738766e-05, "loss": 1.5305, "step": 1890 }, { "epoch": 0.10338559107745718, "grad_norm": 1.3433388471603394, "learning_rate": 1.9895189880419576e-05, "loss": 1.641, "step": 1891 }, { "epoch": 0.10344026352117874, "grad_norm": 1.2928956747055054, "learning_rate": 1.9894925839676774e-05, "loss": 1.3548, "step": 1892 }, { "epoch": 0.1034949359649003, "grad_norm": 1.3962546586990356, "learning_rate": 1.989466146851918e-05, "loss": 1.3981, "step": 1893 }, { "epoch": 0.10354960840862185, "grad_norm": 1.2606325149536133, "learning_rate": 1.989439676695562e-05, "loss": 1.669, "step": 1894 }, { "epoch": 0.1036042808523434, "grad_norm": 1.7886894941329956, "learning_rate": 1.9894131734994935e-05, "loss": 1.386, "step": 1895 }, { "epoch": 0.10365895329606495, "grad_norm": 1.2475228309631348, "learning_rate": 1.9893866372645975e-05, "loss": 1.3977, "step": 1896 }, { "epoch": 0.1037136257397865, "grad_norm": 1.9760688543319702, "learning_rate": 1.98936006799176e-05, "loss": 1.295, "step": 1897 }, { "epoch": 0.10376829818350805, "grad_norm": 1.7461737394332886, "learning_rate": 1.989333465681868e-05, "loss": 1.506, "step": 1898 }, { "epoch": 0.10382297062722962, "grad_norm": 1.8128770589828491, "learning_rate": 1.98930683033581e-05, "loss": 1.4213, "step": 1899 }, { "epoch": 0.10387764307095117, "grad_norm": 1.3137749433517456, "learning_rate": 1.9892801619544756e-05, "loss": 1.6015, "step": 1900 }, { "epoch": 0.10393231551467272, "grad_norm": 1.8561447858810425, "learning_rate": 1.9892534605387555e-05, "loss": 1.36, "step": 1901 }, { "epoch": 0.10398698795839427, "grad_norm": 1.4543009996414185, "learning_rate": 1.9892267260895407e-05, "loss": 1.565, "step": 1902 }, { "epoch": 0.10404166040211582, "grad_norm": 1.3734452724456787, "learning_rate": 1.989199958607724e-05, "loss": 1.366, "step": 1903 }, { "epoch": 0.10409633284583737, "grad_norm": 1.2789698839187622, "learning_rate": 1.9891731580942e-05, "loss": 1.8235, "step": 1904 }, { "epoch": 0.10415100528955892, "grad_norm": 1.3147754669189453, "learning_rate": 1.9891463245498625e-05, "loss": 1.5173, "step": 1905 }, { "epoch": 0.10420567773328049, "grad_norm": 2.2261571884155273, "learning_rate": 1.9891194579756082e-05, "loss": 1.4246, "step": 1906 }, { "epoch": 0.10426035017700204, "grad_norm": 1.4449650049209595, "learning_rate": 1.9890925583723345e-05, "loss": 1.5229, "step": 1907 }, { "epoch": 0.10431502262072359, "grad_norm": 1.8502206802368164, "learning_rate": 1.9890656257409388e-05, "loss": 1.4369, "step": 1908 }, { "epoch": 0.10436969506444514, "grad_norm": 1.5295395851135254, "learning_rate": 1.9890386600823214e-05, "loss": 1.5078, "step": 1909 }, { "epoch": 0.1044243675081667, "grad_norm": 1.2699933052062988, "learning_rate": 1.9890116613973822e-05, "loss": 1.5906, "step": 1910 }, { "epoch": 0.10447903995188824, "grad_norm": 1.9504176378250122, "learning_rate": 1.9889846296870228e-05, "loss": 1.0494, "step": 1911 }, { "epoch": 0.10453371239560981, "grad_norm": 1.5496760606765747, "learning_rate": 1.9889575649521457e-05, "loss": 1.4869, "step": 1912 }, { "epoch": 0.10458838483933136, "grad_norm": 1.4622446298599243, "learning_rate": 1.988930467193655e-05, "loss": 1.3349, "step": 1913 }, { "epoch": 0.10464305728305291, "grad_norm": 1.9111266136169434, "learning_rate": 1.9889033364124555e-05, "loss": 1.6314, "step": 1914 }, { "epoch": 0.10469772972677446, "grad_norm": 1.5713474750518799, "learning_rate": 1.988876172609453e-05, "loss": 1.4423, "step": 1915 }, { "epoch": 0.10475240217049601, "grad_norm": 1.375016689300537, "learning_rate": 1.9888489757855548e-05, "loss": 1.39, "step": 1916 }, { "epoch": 0.10480707461421757, "grad_norm": 1.540898323059082, "learning_rate": 1.9888217459416685e-05, "loss": 1.6783, "step": 1917 }, { "epoch": 0.10486174705793912, "grad_norm": 1.5254456996917725, "learning_rate": 1.9887944830787042e-05, "loss": 1.2134, "step": 1918 }, { "epoch": 0.10491641950166068, "grad_norm": 1.2826035022735596, "learning_rate": 1.9887671871975716e-05, "loss": 1.3888, "step": 1919 }, { "epoch": 0.10497109194538223, "grad_norm": 2.2354347705841064, "learning_rate": 1.9887398582991825e-05, "loss": 1.4072, "step": 1920 }, { "epoch": 0.10502576438910378, "grad_norm": 1.2474637031555176, "learning_rate": 1.988712496384449e-05, "loss": 1.6952, "step": 1921 }, { "epoch": 0.10508043683282534, "grad_norm": 1.8984482288360596, "learning_rate": 1.9886851014542855e-05, "loss": 1.3814, "step": 1922 }, { "epoch": 0.10513510927654689, "grad_norm": 1.5700840950012207, "learning_rate": 1.9886576735096064e-05, "loss": 1.2377, "step": 1923 }, { "epoch": 0.10518978172026844, "grad_norm": 1.2046655416488647, "learning_rate": 1.9886302125513276e-05, "loss": 1.5794, "step": 1924 }, { "epoch": 0.10524445416398999, "grad_norm": 1.670613408088684, "learning_rate": 1.988602718580366e-05, "loss": 1.5672, "step": 1925 }, { "epoch": 0.10529912660771155, "grad_norm": 1.4049232006072998, "learning_rate": 1.9885751915976402e-05, "loss": 1.4038, "step": 1926 }, { "epoch": 0.1053537990514331, "grad_norm": 1.5612845420837402, "learning_rate": 1.9885476316040683e-05, "loss": 1.4511, "step": 1927 }, { "epoch": 0.10540847149515466, "grad_norm": 1.2501420974731445, "learning_rate": 1.988520038600572e-05, "loss": 1.6853, "step": 1928 }, { "epoch": 0.10546314393887621, "grad_norm": 1.7847304344177246, "learning_rate": 1.9884924125880713e-05, "loss": 1.5689, "step": 1929 }, { "epoch": 0.10551781638259776, "grad_norm": 1.8141359090805054, "learning_rate": 1.98846475356749e-05, "loss": 1.4724, "step": 1930 }, { "epoch": 0.10557248882631931, "grad_norm": 2.050806999206543, "learning_rate": 1.9884370615397507e-05, "loss": 1.2277, "step": 1931 }, { "epoch": 0.10562716127004086, "grad_norm": 2.093275785446167, "learning_rate": 1.9884093365057786e-05, "loss": 1.6754, "step": 1932 }, { "epoch": 0.10568183371376243, "grad_norm": 1.3557881116867065, "learning_rate": 1.9883815784664992e-05, "loss": 1.7157, "step": 1933 }, { "epoch": 0.10573650615748398, "grad_norm": 1.4710239171981812, "learning_rate": 1.9883537874228402e-05, "loss": 1.3754, "step": 1934 }, { "epoch": 0.10579117860120553, "grad_norm": 1.5383142232894897, "learning_rate": 1.9883259633757282e-05, "loss": 1.3608, "step": 1935 }, { "epoch": 0.10584585104492708, "grad_norm": 1.6843229532241821, "learning_rate": 1.9882981063260934e-05, "loss": 1.4324, "step": 1936 }, { "epoch": 0.10590052348864863, "grad_norm": 1.5105470418930054, "learning_rate": 1.9882702162748657e-05, "loss": 1.3854, "step": 1937 }, { "epoch": 0.10595519593237018, "grad_norm": 1.5314615964889526, "learning_rate": 1.9882422932229765e-05, "loss": 1.5764, "step": 1938 }, { "epoch": 0.10600986837609173, "grad_norm": 1.4554177522659302, "learning_rate": 1.9882143371713583e-05, "loss": 1.716, "step": 1939 }, { "epoch": 0.1060645408198133, "grad_norm": 1.497289776802063, "learning_rate": 1.9881863481209442e-05, "loss": 1.5795, "step": 1940 }, { "epoch": 0.10611921326353485, "grad_norm": 1.3077051639556885, "learning_rate": 1.9881583260726692e-05, "loss": 1.447, "step": 1941 }, { "epoch": 0.1061738857072564, "grad_norm": 2.302999496459961, "learning_rate": 1.988130271027469e-05, "loss": 1.3693, "step": 1942 }, { "epoch": 0.10622855815097795, "grad_norm": 1.5582464933395386, "learning_rate": 1.9881021829862802e-05, "loss": 1.2161, "step": 1943 }, { "epoch": 0.1062832305946995, "grad_norm": 1.739630103111267, "learning_rate": 1.9880740619500406e-05, "loss": 1.481, "step": 1944 }, { "epoch": 0.10633790303842106, "grad_norm": 1.704262375831604, "learning_rate": 1.9880459079196898e-05, "loss": 1.3714, "step": 1945 }, { "epoch": 0.1063925754821426, "grad_norm": 1.619309902191162, "learning_rate": 1.9880177208961676e-05, "loss": 1.1622, "step": 1946 }, { "epoch": 0.10644724792586417, "grad_norm": 1.451807975769043, "learning_rate": 1.9879895008804154e-05, "loss": 1.3824, "step": 1947 }, { "epoch": 0.10650192036958572, "grad_norm": 1.331317663192749, "learning_rate": 1.9879612478733753e-05, "loss": 1.5371, "step": 1948 }, { "epoch": 0.10655659281330727, "grad_norm": 1.4584474563598633, "learning_rate": 1.9879329618759903e-05, "loss": 1.5841, "step": 1949 }, { "epoch": 0.10661126525702883, "grad_norm": 1.619023084640503, "learning_rate": 1.987904642889206e-05, "loss": 1.4475, "step": 1950 }, { "epoch": 0.10666593770075038, "grad_norm": 1.4851019382476807, "learning_rate": 1.9878762909139673e-05, "loss": 1.6493, "step": 1951 }, { "epoch": 0.10672061014447193, "grad_norm": 1.7783244848251343, "learning_rate": 1.9878479059512212e-05, "loss": 1.3049, "step": 1952 }, { "epoch": 0.10677528258819348, "grad_norm": 1.4466408491134644, "learning_rate": 1.9878194880019154e-05, "loss": 1.5726, "step": 1953 }, { "epoch": 0.10682995503191504, "grad_norm": 1.246958613395691, "learning_rate": 1.987791037066999e-05, "loss": 1.5882, "step": 1954 }, { "epoch": 0.1068846274756366, "grad_norm": 1.77590811252594, "learning_rate": 1.9877625531474217e-05, "loss": 1.5048, "step": 1955 }, { "epoch": 0.10693929991935815, "grad_norm": 1.7915997505187988, "learning_rate": 1.9877340362441352e-05, "loss": 1.3498, "step": 1956 }, { "epoch": 0.1069939723630797, "grad_norm": 2.405240774154663, "learning_rate": 1.9877054863580912e-05, "loss": 1.2299, "step": 1957 }, { "epoch": 0.10704864480680125, "grad_norm": 1.9423408508300781, "learning_rate": 1.987676903490243e-05, "loss": 1.5114, "step": 1958 }, { "epoch": 0.1071033172505228, "grad_norm": 1.987260341644287, "learning_rate": 1.987648287641546e-05, "loss": 1.5833, "step": 1959 }, { "epoch": 0.10715798969424435, "grad_norm": 1.3487117290496826, "learning_rate": 1.9876196388129548e-05, "loss": 1.4708, "step": 1960 }, { "epoch": 0.10721266213796592, "grad_norm": 1.5397415161132812, "learning_rate": 1.9875909570054263e-05, "loss": 1.6711, "step": 1961 }, { "epoch": 0.10726733458168747, "grad_norm": 1.2630778551101685, "learning_rate": 1.9875622422199185e-05, "loss": 1.4617, "step": 1962 }, { "epoch": 0.10732200702540902, "grad_norm": 1.4994423389434814, "learning_rate": 1.98753349445739e-05, "loss": 1.4215, "step": 1963 }, { "epoch": 0.10737667946913057, "grad_norm": 1.4304856061935425, "learning_rate": 1.9875047137188005e-05, "loss": 1.5461, "step": 1964 }, { "epoch": 0.10743135191285212, "grad_norm": 3.641613721847534, "learning_rate": 1.9874759000051113e-05, "loss": 1.4025, "step": 1965 }, { "epoch": 0.10748602435657367, "grad_norm": 1.4312851428985596, "learning_rate": 1.987447053317285e-05, "loss": 1.4285, "step": 1966 }, { "epoch": 0.10754069680029524, "grad_norm": 1.7422188520431519, "learning_rate": 1.9874181736562844e-05, "loss": 1.5658, "step": 1967 }, { "epoch": 0.10759536924401679, "grad_norm": 1.3498656749725342, "learning_rate": 1.987389261023074e-05, "loss": 1.5331, "step": 1968 }, { "epoch": 0.10765004168773834, "grad_norm": 1.4938091039657593, "learning_rate": 1.9873603154186187e-05, "loss": 1.1904, "step": 1969 }, { "epoch": 0.10770471413145989, "grad_norm": 1.5354806184768677, "learning_rate": 1.987331336843886e-05, "loss": 1.5738, "step": 1970 }, { "epoch": 0.10775938657518144, "grad_norm": 2.0937960147857666, "learning_rate": 1.9873023252998432e-05, "loss": 1.3913, "step": 1971 }, { "epoch": 0.107814059018903, "grad_norm": 1.3324748277664185, "learning_rate": 1.9872732807874588e-05, "loss": 1.2353, "step": 1972 }, { "epoch": 0.10786873146262455, "grad_norm": 1.4201807975769043, "learning_rate": 1.9872442033077027e-05, "loss": 1.4288, "step": 1973 }, { "epoch": 0.10792340390634611, "grad_norm": 1.478533148765564, "learning_rate": 1.987215092861546e-05, "loss": 1.4427, "step": 1974 }, { "epoch": 0.10797807635006766, "grad_norm": 1.7240314483642578, "learning_rate": 1.9871859494499613e-05, "loss": 1.2962, "step": 1975 }, { "epoch": 0.10803274879378921, "grad_norm": 1.446010947227478, "learning_rate": 1.9871567730739207e-05, "loss": 1.1993, "step": 1976 }, { "epoch": 0.10808742123751076, "grad_norm": 1.619376301765442, "learning_rate": 1.987127563734399e-05, "loss": 1.113, "step": 1977 }, { "epoch": 0.10814209368123232, "grad_norm": 1.2235066890716553, "learning_rate": 1.987098321432372e-05, "loss": 1.4534, "step": 1978 }, { "epoch": 0.10819676612495387, "grad_norm": 1.6482146978378296, "learning_rate": 1.9870690461688154e-05, "loss": 1.6125, "step": 1979 }, { "epoch": 0.10825143856867542, "grad_norm": 1.697493553161621, "learning_rate": 1.9870397379447074e-05, "loss": 1.5141, "step": 1980 }, { "epoch": 0.10830611101239698, "grad_norm": 1.4109141826629639, "learning_rate": 1.9870103967610262e-05, "loss": 1.3578, "step": 1981 }, { "epoch": 0.10836078345611853, "grad_norm": 1.7153414487838745, "learning_rate": 1.9869810226187516e-05, "loss": 1.5848, "step": 1982 }, { "epoch": 0.10841545589984009, "grad_norm": 1.9992631673812866, "learning_rate": 1.9869516155188647e-05, "loss": 1.4695, "step": 1983 }, { "epoch": 0.10847012834356164, "grad_norm": 2.2647924423217773, "learning_rate": 1.986922175462348e-05, "loss": 1.4696, "step": 1984 }, { "epoch": 0.10852480078728319, "grad_norm": 1.7235642671585083, "learning_rate": 1.9868927024501833e-05, "loss": 1.6721, "step": 1985 }, { "epoch": 0.10857947323100474, "grad_norm": 1.293166160583496, "learning_rate": 1.9868631964833556e-05, "loss": 1.4393, "step": 1986 }, { "epoch": 0.10863414567472629, "grad_norm": 1.4009044170379639, "learning_rate": 1.98683365756285e-05, "loss": 1.4656, "step": 1987 }, { "epoch": 0.10868881811844786, "grad_norm": 1.856594204902649, "learning_rate": 1.986804085689653e-05, "loss": 1.5634, "step": 1988 }, { "epoch": 0.1087434905621694, "grad_norm": 1.3073347806930542, "learning_rate": 1.9867744808647518e-05, "loss": 1.6032, "step": 1989 }, { "epoch": 0.10879816300589096, "grad_norm": 1.5785162448883057, "learning_rate": 1.9867448430891353e-05, "loss": 1.5379, "step": 1990 }, { "epoch": 0.10885283544961251, "grad_norm": 1.5475586652755737, "learning_rate": 1.986715172363793e-05, "loss": 1.459, "step": 1991 }, { "epoch": 0.10890750789333406, "grad_norm": 1.6476349830627441, "learning_rate": 1.9866854686897156e-05, "loss": 1.5817, "step": 1992 }, { "epoch": 0.10896218033705561, "grad_norm": 2.149394989013672, "learning_rate": 1.9866557320678952e-05, "loss": 1.4684, "step": 1993 }, { "epoch": 0.10901685278077716, "grad_norm": 1.582430124282837, "learning_rate": 1.9866259624993246e-05, "loss": 1.5777, "step": 1994 }, { "epoch": 0.10907152522449873, "grad_norm": 1.942865014076233, "learning_rate": 1.986596159984998e-05, "loss": 1.6011, "step": 1995 }, { "epoch": 0.10912619766822028, "grad_norm": 1.2672007083892822, "learning_rate": 1.9865663245259105e-05, "loss": 1.4823, "step": 1996 }, { "epoch": 0.10918087011194183, "grad_norm": 1.4320091009140015, "learning_rate": 1.9865364561230583e-05, "loss": 1.5527, "step": 1997 }, { "epoch": 0.10923554255566338, "grad_norm": 1.4268858432769775, "learning_rate": 1.9865065547774386e-05, "loss": 1.2793, "step": 1998 }, { "epoch": 0.10929021499938493, "grad_norm": 1.4789615869522095, "learning_rate": 1.9864766204900506e-05, "loss": 1.3219, "step": 1999 }, { "epoch": 0.10934488744310648, "grad_norm": 1.5894593000411987, "learning_rate": 1.986446653261893e-05, "loss": 1.5824, "step": 2000 }, { "epoch": 0.10939955988682804, "grad_norm": 1.9653007984161377, "learning_rate": 1.986416653093967e-05, "loss": 1.5759, "step": 2001 }, { "epoch": 0.1094542323305496, "grad_norm": 1.4745914936065674, "learning_rate": 1.9863866199872747e-05, "loss": 1.6251, "step": 2002 }, { "epoch": 0.10950890477427115, "grad_norm": 1.3126962184906006, "learning_rate": 1.9863565539428177e-05, "loss": 1.5809, "step": 2003 }, { "epoch": 0.1095635772179927, "grad_norm": 1.5964776277542114, "learning_rate": 1.9863264549616015e-05, "loss": 1.6805, "step": 2004 }, { "epoch": 0.10961824966171425, "grad_norm": 1.4798564910888672, "learning_rate": 1.9862963230446303e-05, "loss": 1.5006, "step": 2005 }, { "epoch": 0.1096729221054358, "grad_norm": 1.398334264755249, "learning_rate": 1.9862661581929103e-05, "loss": 1.5023, "step": 2006 }, { "epoch": 0.10972759454915736, "grad_norm": 1.6044670343399048, "learning_rate": 1.986235960407449e-05, "loss": 1.7329, "step": 2007 }, { "epoch": 0.10978226699287891, "grad_norm": 1.3307002782821655, "learning_rate": 1.9862057296892546e-05, "loss": 1.4892, "step": 2008 }, { "epoch": 0.10983693943660047, "grad_norm": 1.613200306892395, "learning_rate": 1.986175466039337e-05, "loss": 1.3216, "step": 2009 }, { "epoch": 0.10989161188032202, "grad_norm": 1.8282749652862549, "learning_rate": 1.9861451694587063e-05, "loss": 1.5059, "step": 2010 }, { "epoch": 0.10994628432404357, "grad_norm": 2.1420319080352783, "learning_rate": 1.9861148399483743e-05, "loss": 1.5263, "step": 2011 }, { "epoch": 0.11000095676776513, "grad_norm": 1.7307202816009521, "learning_rate": 1.9860844775093536e-05, "loss": 1.6647, "step": 2012 }, { "epoch": 0.11005562921148668, "grad_norm": 1.710175633430481, "learning_rate": 1.9860540821426582e-05, "loss": 1.4395, "step": 2013 }, { "epoch": 0.11011030165520823, "grad_norm": 1.5947517156600952, "learning_rate": 1.9860236538493036e-05, "loss": 1.5887, "step": 2014 }, { "epoch": 0.1101649740989298, "grad_norm": 1.9986093044281006, "learning_rate": 1.985993192630305e-05, "loss": 1.4341, "step": 2015 }, { "epoch": 0.11021964654265134, "grad_norm": 1.2309187650680542, "learning_rate": 1.9859626984866804e-05, "loss": 1.4876, "step": 2016 }, { "epoch": 0.1102743189863729, "grad_norm": 1.1211985349655151, "learning_rate": 1.9859321714194477e-05, "loss": 1.5209, "step": 2017 }, { "epoch": 0.11032899143009445, "grad_norm": 1.755427360534668, "learning_rate": 1.985901611429626e-05, "loss": 1.204, "step": 2018 }, { "epoch": 0.110383663873816, "grad_norm": 1.5495178699493408, "learning_rate": 1.985871018518236e-05, "loss": 1.3284, "step": 2019 }, { "epoch": 0.11043833631753755, "grad_norm": 1.3843024969100952, "learning_rate": 1.985840392686299e-05, "loss": 1.4729, "step": 2020 }, { "epoch": 0.1104930087612591, "grad_norm": 1.533358097076416, "learning_rate": 1.9858097339348386e-05, "loss": 1.4063, "step": 2021 }, { "epoch": 0.11054768120498067, "grad_norm": 1.070459008216858, "learning_rate": 1.9857790422648774e-05, "loss": 1.4192, "step": 2022 }, { "epoch": 0.11060235364870222, "grad_norm": 1.425917148590088, "learning_rate": 1.9857483176774412e-05, "loss": 1.2987, "step": 2023 }, { "epoch": 0.11065702609242377, "grad_norm": 1.7225658893585205, "learning_rate": 1.9857175601735548e-05, "loss": 1.3768, "step": 2024 }, { "epoch": 0.11071169853614532, "grad_norm": 1.4186149835586548, "learning_rate": 1.9856867697542467e-05, "loss": 1.3269, "step": 2025 }, { "epoch": 0.11076637097986687, "grad_norm": 1.3969968557357788, "learning_rate": 1.9856559464205443e-05, "loss": 1.555, "step": 2026 }, { "epoch": 0.11082104342358842, "grad_norm": 1.3761875629425049, "learning_rate": 1.9856250901734767e-05, "loss": 1.6097, "step": 2027 }, { "epoch": 0.11087571586730997, "grad_norm": 1.6765421628952026, "learning_rate": 1.985594201014075e-05, "loss": 1.2177, "step": 2028 }, { "epoch": 0.11093038831103154, "grad_norm": 1.5672963857650757, "learning_rate": 1.9855632789433695e-05, "loss": 1.4051, "step": 2029 }, { "epoch": 0.11098506075475309, "grad_norm": 1.4340218305587769, "learning_rate": 1.9855323239623936e-05, "loss": 1.4163, "step": 2030 }, { "epoch": 0.11103973319847464, "grad_norm": 1.4067195653915405, "learning_rate": 1.9855013360721806e-05, "loss": 1.3516, "step": 2031 }, { "epoch": 0.11109440564219619, "grad_norm": 1.432308316230774, "learning_rate": 1.985470315273766e-05, "loss": 1.4869, "step": 2032 }, { "epoch": 0.11114907808591774, "grad_norm": 1.153895616531372, "learning_rate": 1.9854392615681845e-05, "loss": 1.4078, "step": 2033 }, { "epoch": 0.1112037505296393, "grad_norm": 1.178545594215393, "learning_rate": 1.985408174956474e-05, "loss": 1.5253, "step": 2034 }, { "epoch": 0.11125842297336085, "grad_norm": 1.8499534130096436, "learning_rate": 1.9853770554396722e-05, "loss": 1.7384, "step": 2035 }, { "epoch": 0.11131309541708241, "grad_norm": 1.3001817464828491, "learning_rate": 1.9853459030188183e-05, "loss": 1.5126, "step": 2036 }, { "epoch": 0.11136776786080396, "grad_norm": 1.2391173839569092, "learning_rate": 1.9853147176949523e-05, "loss": 1.6234, "step": 2037 }, { "epoch": 0.11142244030452551, "grad_norm": 1.5646449327468872, "learning_rate": 1.985283499469116e-05, "loss": 1.4309, "step": 2038 }, { "epoch": 0.11147711274824706, "grad_norm": 1.368013620376587, "learning_rate": 1.9852522483423513e-05, "loss": 1.5713, "step": 2039 }, { "epoch": 0.11153178519196862, "grad_norm": 1.480688452720642, "learning_rate": 1.985220964315702e-05, "loss": 1.4601, "step": 2040 }, { "epoch": 0.11158645763569017, "grad_norm": 1.5144182443618774, "learning_rate": 1.985189647390213e-05, "loss": 1.3145, "step": 2041 }, { "epoch": 0.11164113007941172, "grad_norm": 1.677608847618103, "learning_rate": 1.9851582975669302e-05, "loss": 1.1907, "step": 2042 }, { "epoch": 0.11169580252313328, "grad_norm": 1.6517748832702637, "learning_rate": 1.9851269148468998e-05, "loss": 1.3203, "step": 2043 }, { "epoch": 0.11175047496685483, "grad_norm": 1.5426079034805298, "learning_rate": 1.98509549923117e-05, "loss": 1.5313, "step": 2044 }, { "epoch": 0.11180514741057639, "grad_norm": 1.5941518545150757, "learning_rate": 1.9850640507207898e-05, "loss": 1.3163, "step": 2045 }, { "epoch": 0.11185981985429794, "grad_norm": 1.5300878286361694, "learning_rate": 1.9850325693168098e-05, "loss": 1.4814, "step": 2046 }, { "epoch": 0.11191449229801949, "grad_norm": 1.50736403465271, "learning_rate": 1.9850010550202806e-05, "loss": 1.3931, "step": 2047 }, { "epoch": 0.11196916474174104, "grad_norm": 1.592186450958252, "learning_rate": 1.9849695078322545e-05, "loss": 1.5524, "step": 2048 }, { "epoch": 0.11202383718546259, "grad_norm": 1.5905632972717285, "learning_rate": 1.9849379277537856e-05, "loss": 1.6534, "step": 2049 }, { "epoch": 0.11207850962918416, "grad_norm": 1.536801815032959, "learning_rate": 1.9849063147859282e-05, "loss": 1.5096, "step": 2050 }, { "epoch": 0.11213318207290571, "grad_norm": 1.6000120639801025, "learning_rate": 1.9848746689297375e-05, "loss": 1.4416, "step": 2051 }, { "epoch": 0.11218785451662726, "grad_norm": 1.7924838066101074, "learning_rate": 1.9848429901862705e-05, "loss": 1.5293, "step": 2052 }, { "epoch": 0.11224252696034881, "grad_norm": 1.6123721599578857, "learning_rate": 1.984811278556585e-05, "loss": 1.654, "step": 2053 }, { "epoch": 0.11229719940407036, "grad_norm": 1.3714877367019653, "learning_rate": 1.9847795340417405e-05, "loss": 1.3467, "step": 2054 }, { "epoch": 0.11235187184779191, "grad_norm": 1.2463396787643433, "learning_rate": 1.984747756642796e-05, "loss": 1.6122, "step": 2055 }, { "epoch": 0.11240654429151346, "grad_norm": 1.3543620109558105, "learning_rate": 1.9847159463608132e-05, "loss": 1.4216, "step": 2056 }, { "epoch": 0.11246121673523503, "grad_norm": 1.1492624282836914, "learning_rate": 1.9846841031968545e-05, "loss": 1.348, "step": 2057 }, { "epoch": 0.11251588917895658, "grad_norm": 1.665332317352295, "learning_rate": 1.9846522271519827e-05, "loss": 1.5503, "step": 2058 }, { "epoch": 0.11257056162267813, "grad_norm": 1.2970538139343262, "learning_rate": 1.9846203182272625e-05, "loss": 1.336, "step": 2059 }, { "epoch": 0.11262523406639968, "grad_norm": 1.4033019542694092, "learning_rate": 1.9845883764237594e-05, "loss": 1.5967, "step": 2060 }, { "epoch": 0.11267990651012123, "grad_norm": 1.707052230834961, "learning_rate": 1.98455640174254e-05, "loss": 1.3414, "step": 2061 }, { "epoch": 0.11273457895384278, "grad_norm": 1.2937778234481812, "learning_rate": 1.984524394184672e-05, "loss": 1.5274, "step": 2062 }, { "epoch": 0.11278925139756434, "grad_norm": 1.3060331344604492, "learning_rate": 1.9844923537512245e-05, "loss": 1.3127, "step": 2063 }, { "epoch": 0.1128439238412859, "grad_norm": 1.3086538314819336, "learning_rate": 1.9844602804432667e-05, "loss": 1.6407, "step": 2064 }, { "epoch": 0.11289859628500745, "grad_norm": 1.1872137784957886, "learning_rate": 1.98442817426187e-05, "loss": 1.4502, "step": 2065 }, { "epoch": 0.112953268728729, "grad_norm": 1.4546349048614502, "learning_rate": 1.9843960352081068e-05, "loss": 1.5991, "step": 2066 }, { "epoch": 0.11300794117245055, "grad_norm": 2.045081377029419, "learning_rate": 1.9843638632830504e-05, "loss": 1.4633, "step": 2067 }, { "epoch": 0.1130626136161721, "grad_norm": 1.300187349319458, "learning_rate": 1.9843316584877738e-05, "loss": 1.4425, "step": 2068 }, { "epoch": 0.11311728605989366, "grad_norm": 1.4456888437271118, "learning_rate": 1.9842994208233543e-05, "loss": 1.5057, "step": 2069 }, { "epoch": 0.11317195850361522, "grad_norm": 1.4282444715499878, "learning_rate": 1.9842671502908665e-05, "loss": 1.4694, "step": 2070 }, { "epoch": 0.11322663094733677, "grad_norm": 1.8425424098968506, "learning_rate": 1.9842348468913895e-05, "loss": 1.8613, "step": 2071 }, { "epoch": 0.11328130339105832, "grad_norm": 1.6203975677490234, "learning_rate": 1.984202510626001e-05, "loss": 1.3724, "step": 2072 }, { "epoch": 0.11333597583477988, "grad_norm": 1.2732081413269043, "learning_rate": 1.9841701414957815e-05, "loss": 1.4896, "step": 2073 }, { "epoch": 0.11339064827850143, "grad_norm": 1.349081039428711, "learning_rate": 1.9841377395018114e-05, "loss": 1.4004, "step": 2074 }, { "epoch": 0.11344532072222298, "grad_norm": 1.3055599927902222, "learning_rate": 1.9841053046451728e-05, "loss": 1.3851, "step": 2075 }, { "epoch": 0.11349999316594453, "grad_norm": 1.7996587753295898, "learning_rate": 1.984072836926949e-05, "loss": 1.4366, "step": 2076 }, { "epoch": 0.1135546656096661, "grad_norm": 1.2649246454238892, "learning_rate": 1.9840403363482236e-05, "loss": 1.549, "step": 2077 }, { "epoch": 0.11360933805338765, "grad_norm": 1.3633044958114624, "learning_rate": 1.9840078029100826e-05, "loss": 1.3454, "step": 2078 }, { "epoch": 0.1136640104971092, "grad_norm": 1.9087505340576172, "learning_rate": 1.983975236613612e-05, "loss": 1.2839, "step": 2079 }, { "epoch": 0.11371868294083075, "grad_norm": 1.5489253997802734, "learning_rate": 1.983942637459899e-05, "loss": 1.5306, "step": 2080 }, { "epoch": 0.1137733553845523, "grad_norm": 1.797749638557434, "learning_rate": 1.9839100054500324e-05, "loss": 1.3578, "step": 2081 }, { "epoch": 0.11382802782827385, "grad_norm": 1.9476100206375122, "learning_rate": 1.9838773405851022e-05, "loss": 1.4935, "step": 2082 }, { "epoch": 0.1138827002719954, "grad_norm": 1.2701579332351685, "learning_rate": 1.9838446428661988e-05, "loss": 1.4725, "step": 2083 }, { "epoch": 0.11393737271571697, "grad_norm": 1.2707525491714478, "learning_rate": 1.9838119122944143e-05, "loss": 1.4552, "step": 2084 }, { "epoch": 0.11399204515943852, "grad_norm": 1.6886049509048462, "learning_rate": 1.983779148870841e-05, "loss": 1.3742, "step": 2085 }, { "epoch": 0.11404671760316007, "grad_norm": 1.2981221675872803, "learning_rate": 1.9837463525965735e-05, "loss": 1.6217, "step": 2086 }, { "epoch": 0.11410139004688162, "grad_norm": 2.0044212341308594, "learning_rate": 1.983713523472707e-05, "loss": 1.4107, "step": 2087 }, { "epoch": 0.11415606249060317, "grad_norm": 2.0105063915252686, "learning_rate": 1.983680661500338e-05, "loss": 1.3276, "step": 2088 }, { "epoch": 0.11421073493432472, "grad_norm": 1.6909018754959106, "learning_rate": 1.9836477666805626e-05, "loss": 1.443, "step": 2089 }, { "epoch": 0.11426540737804627, "grad_norm": 1.4799944162368774, "learning_rate": 1.9836148390144805e-05, "loss": 1.4815, "step": 2090 }, { "epoch": 0.11432007982176784, "grad_norm": 1.650343656539917, "learning_rate": 1.9835818785031907e-05, "loss": 1.3521, "step": 2091 }, { "epoch": 0.11437475226548939, "grad_norm": 1.3849085569381714, "learning_rate": 1.9835488851477943e-05, "loss": 1.5262, "step": 2092 }, { "epoch": 0.11442942470921094, "grad_norm": 1.2457807064056396, "learning_rate": 1.9835158589493923e-05, "loss": 1.4913, "step": 2093 }, { "epoch": 0.11448409715293249, "grad_norm": 1.3270176649093628, "learning_rate": 1.983482799909088e-05, "loss": 1.4162, "step": 2094 }, { "epoch": 0.11453876959665404, "grad_norm": 1.2900276184082031, "learning_rate": 1.983449708027985e-05, "loss": 1.289, "step": 2095 }, { "epoch": 0.1145934420403756, "grad_norm": 1.3773249387741089, "learning_rate": 1.9834165833071887e-05, "loss": 1.4287, "step": 2096 }, { "epoch": 0.11464811448409715, "grad_norm": 1.5342649221420288, "learning_rate": 1.983383425747805e-05, "loss": 1.2288, "step": 2097 }, { "epoch": 0.11470278692781871, "grad_norm": 1.182763695716858, "learning_rate": 1.9833502353509412e-05, "loss": 1.6444, "step": 2098 }, { "epoch": 0.11475745937154026, "grad_norm": 1.5320522785186768, "learning_rate": 1.983317012117705e-05, "loss": 1.5698, "step": 2099 }, { "epoch": 0.11481213181526181, "grad_norm": 1.645879864692688, "learning_rate": 1.983283756049207e-05, "loss": 1.3307, "step": 2100 }, { "epoch": 0.11486680425898337, "grad_norm": 1.4653280973434448, "learning_rate": 1.983250467146557e-05, "loss": 1.211, "step": 2101 }, { "epoch": 0.11492147670270492, "grad_norm": 1.7566888332366943, "learning_rate": 1.9832171454108665e-05, "loss": 1.5292, "step": 2102 }, { "epoch": 0.11497614914642647, "grad_norm": 1.914618730545044, "learning_rate": 1.9831837908432482e-05, "loss": 1.59, "step": 2103 }, { "epoch": 0.11503082159014802, "grad_norm": 1.2574292421340942, "learning_rate": 1.983150403444816e-05, "loss": 1.585, "step": 2104 }, { "epoch": 0.11508549403386958, "grad_norm": 1.5164170265197754, "learning_rate": 1.983116983216685e-05, "loss": 1.7027, "step": 2105 }, { "epoch": 0.11514016647759114, "grad_norm": 1.6510192155838013, "learning_rate": 1.983083530159971e-05, "loss": 1.4222, "step": 2106 }, { "epoch": 0.11519483892131269, "grad_norm": 1.4249746799468994, "learning_rate": 1.9830500442757907e-05, "loss": 1.6186, "step": 2107 }, { "epoch": 0.11524951136503424, "grad_norm": 0.9344184994697571, "learning_rate": 1.983016525565263e-05, "loss": 1.7281, "step": 2108 }, { "epoch": 0.11530418380875579, "grad_norm": 1.3588041067123413, "learning_rate": 1.9829829740295067e-05, "loss": 1.5366, "step": 2109 }, { "epoch": 0.11535885625247734, "grad_norm": 1.33034348487854, "learning_rate": 1.982949389669642e-05, "loss": 1.3393, "step": 2110 }, { "epoch": 0.11541352869619889, "grad_norm": 1.4409319162368774, "learning_rate": 1.9829157724867908e-05, "loss": 1.5899, "step": 2111 }, { "epoch": 0.11546820113992046, "grad_norm": 1.218353033065796, "learning_rate": 1.9828821224820755e-05, "loss": 1.5732, "step": 2112 }, { "epoch": 0.11552287358364201, "grad_norm": 1.4194999933242798, "learning_rate": 1.9828484396566197e-05, "loss": 1.3725, "step": 2113 }, { "epoch": 0.11557754602736356, "grad_norm": 1.4646589756011963, "learning_rate": 1.9828147240115483e-05, "loss": 1.5134, "step": 2114 }, { "epoch": 0.11563221847108511, "grad_norm": 1.3200520277023315, "learning_rate": 1.982780975547987e-05, "loss": 1.4265, "step": 2115 }, { "epoch": 0.11568689091480666, "grad_norm": 1.2239234447479248, "learning_rate": 1.9827471942670624e-05, "loss": 1.5056, "step": 2116 }, { "epoch": 0.11574156335852821, "grad_norm": 1.5870987176895142, "learning_rate": 1.982713380169903e-05, "loss": 1.3483, "step": 2117 }, { "epoch": 0.11579623580224978, "grad_norm": 1.5987350940704346, "learning_rate": 1.982679533257638e-05, "loss": 1.5949, "step": 2118 }, { "epoch": 0.11585090824597133, "grad_norm": 1.4886589050292969, "learning_rate": 1.9826456535313978e-05, "loss": 1.4375, "step": 2119 }, { "epoch": 0.11590558068969288, "grad_norm": 1.798452615737915, "learning_rate": 1.982611740992313e-05, "loss": 1.5465, "step": 2120 }, { "epoch": 0.11596025313341443, "grad_norm": 1.2709169387817383, "learning_rate": 1.9825777956415163e-05, "loss": 1.4042, "step": 2121 }, { "epoch": 0.11601492557713598, "grad_norm": 1.608818531036377, "learning_rate": 1.9825438174801412e-05, "loss": 1.504, "step": 2122 }, { "epoch": 0.11606959802085753, "grad_norm": 1.7442920207977295, "learning_rate": 1.982509806509323e-05, "loss": 1.512, "step": 2123 }, { "epoch": 0.11612427046457909, "grad_norm": 1.2459287643432617, "learning_rate": 1.982475762730196e-05, "loss": 1.3053, "step": 2124 }, { "epoch": 0.11617894290830065, "grad_norm": 1.6300125122070312, "learning_rate": 1.9824416861438985e-05, "loss": 1.8143, "step": 2125 }, { "epoch": 0.1162336153520222, "grad_norm": 1.1212620735168457, "learning_rate": 1.9824075767515677e-05, "loss": 1.3294, "step": 2126 }, { "epoch": 0.11628828779574375, "grad_norm": 1.8518646955490112, "learning_rate": 1.9823734345543422e-05, "loss": 1.537, "step": 2127 }, { "epoch": 0.1163429602394653, "grad_norm": 1.432404637336731, "learning_rate": 1.982339259553363e-05, "loss": 1.5048, "step": 2128 }, { "epoch": 0.11639763268318686, "grad_norm": 1.8665088415145874, "learning_rate": 1.9823050517497703e-05, "loss": 1.4695, "step": 2129 }, { "epoch": 0.1164523051269084, "grad_norm": 1.7539362907409668, "learning_rate": 1.9822708111447074e-05, "loss": 1.3556, "step": 2130 }, { "epoch": 0.11650697757062996, "grad_norm": 1.4862240552902222, "learning_rate": 1.9822365377393168e-05, "loss": 1.6025, "step": 2131 }, { "epoch": 0.11656165001435152, "grad_norm": 0.888853907585144, "learning_rate": 1.9822022315347433e-05, "loss": 1.6303, "step": 2132 }, { "epoch": 0.11661632245807307, "grad_norm": 1.2490057945251465, "learning_rate": 1.9821678925321326e-05, "loss": 1.3888, "step": 2133 }, { "epoch": 0.11667099490179463, "grad_norm": 1.3347288370132446, "learning_rate": 1.982133520732631e-05, "loss": 1.6011, "step": 2134 }, { "epoch": 0.11672566734551618, "grad_norm": 1.491385817527771, "learning_rate": 1.982099116137387e-05, "loss": 1.7139, "step": 2135 }, { "epoch": 0.11678033978923773, "grad_norm": 1.1638767719268799, "learning_rate": 1.9820646787475483e-05, "loss": 1.4542, "step": 2136 }, { "epoch": 0.11683501223295928, "grad_norm": 1.3116626739501953, "learning_rate": 1.982030208564266e-05, "loss": 1.3347, "step": 2137 }, { "epoch": 0.11688968467668083, "grad_norm": 1.6003384590148926, "learning_rate": 1.9819957055886904e-05, "loss": 1.5577, "step": 2138 }, { "epoch": 0.1169443571204024, "grad_norm": 1.3553732633590698, "learning_rate": 1.981961169821974e-05, "loss": 1.4258, "step": 2139 }, { "epoch": 0.11699902956412395, "grad_norm": 1.3139814138412476, "learning_rate": 1.98192660126527e-05, "loss": 1.3621, "step": 2140 }, { "epoch": 0.1170537020078455, "grad_norm": 1.1755808591842651, "learning_rate": 1.981891999919732e-05, "loss": 1.5768, "step": 2141 }, { "epoch": 0.11710837445156705, "grad_norm": 1.5656397342681885, "learning_rate": 1.9818573657865167e-05, "loss": 1.7123, "step": 2142 }, { "epoch": 0.1171630468952886, "grad_norm": 1.3078978061676025, "learning_rate": 1.9818226988667797e-05, "loss": 1.5018, "step": 2143 }, { "epoch": 0.11721771933901015, "grad_norm": 1.5242273807525635, "learning_rate": 1.981787999161679e-05, "loss": 1.4149, "step": 2144 }, { "epoch": 0.1172723917827317, "grad_norm": 1.7667853832244873, "learning_rate": 1.981753266672373e-05, "loss": 1.3846, "step": 2145 }, { "epoch": 0.11732706422645327, "grad_norm": 1.5230649709701538, "learning_rate": 1.9817185014000218e-05, "loss": 1.3248, "step": 2146 }, { "epoch": 0.11738173667017482, "grad_norm": 1.6975783109664917, "learning_rate": 1.981683703345786e-05, "loss": 1.5188, "step": 2147 }, { "epoch": 0.11743640911389637, "grad_norm": 1.275094985961914, "learning_rate": 1.981648872510828e-05, "loss": 1.6249, "step": 2148 }, { "epoch": 0.11749108155761792, "grad_norm": 1.3324108123779297, "learning_rate": 1.9816140088963106e-05, "loss": 1.6422, "step": 2149 }, { "epoch": 0.11754575400133947, "grad_norm": 1.2793796062469482, "learning_rate": 1.981579112503398e-05, "loss": 1.23, "step": 2150 }, { "epoch": 0.11760042644506102, "grad_norm": 2.3257644176483154, "learning_rate": 1.981544183333255e-05, "loss": 1.3807, "step": 2151 }, { "epoch": 0.11765509888878257, "grad_norm": 1.4421112537384033, "learning_rate": 1.9815092213870487e-05, "loss": 1.2163, "step": 2152 }, { "epoch": 0.11770977133250414, "grad_norm": 1.7218310832977295, "learning_rate": 1.9814742266659467e-05, "loss": 1.3744, "step": 2153 }, { "epoch": 0.11776444377622569, "grad_norm": 1.4866646528244019, "learning_rate": 1.9814391991711172e-05, "loss": 1.7417, "step": 2154 }, { "epoch": 0.11781911621994724, "grad_norm": 1.2991050481796265, "learning_rate": 1.9814041389037292e-05, "loss": 1.5319, "step": 2155 }, { "epoch": 0.1178737886636688, "grad_norm": 1.4563924074172974, "learning_rate": 1.9813690458649546e-05, "loss": 1.4309, "step": 2156 }, { "epoch": 0.11792846110739034, "grad_norm": 1.5788333415985107, "learning_rate": 1.9813339200559644e-05, "loss": 1.6349, "step": 2157 }, { "epoch": 0.1179831335511119, "grad_norm": 1.4580535888671875, "learning_rate": 1.981298761477932e-05, "loss": 1.3976, "step": 2158 }, { "epoch": 0.11803780599483345, "grad_norm": 1.7690123319625854, "learning_rate": 1.9812635701320312e-05, "loss": 1.5315, "step": 2159 }, { "epoch": 0.11809247843855501, "grad_norm": 1.4830806255340576, "learning_rate": 1.9812283460194373e-05, "loss": 1.5064, "step": 2160 }, { "epoch": 0.11814715088227656, "grad_norm": 1.5222176313400269, "learning_rate": 1.9811930891413263e-05, "loss": 1.4413, "step": 2161 }, { "epoch": 0.11820182332599811, "grad_norm": 1.9023516178131104, "learning_rate": 1.9811577994988755e-05, "loss": 1.4371, "step": 2162 }, { "epoch": 0.11825649576971967, "grad_norm": 1.5664156675338745, "learning_rate": 1.9811224770932634e-05, "loss": 1.4023, "step": 2163 }, { "epoch": 0.11831116821344122, "grad_norm": 1.7377263307571411, "learning_rate": 1.98108712192567e-05, "loss": 1.3434, "step": 2164 }, { "epoch": 0.11836584065716277, "grad_norm": 1.3929482698440552, "learning_rate": 1.981051733997275e-05, "loss": 1.297, "step": 2165 }, { "epoch": 0.11842051310088433, "grad_norm": 1.3276461362838745, "learning_rate": 1.9810163133092604e-05, "loss": 1.4821, "step": 2166 }, { "epoch": 0.11847518554460588, "grad_norm": 1.449933648109436, "learning_rate": 1.9809808598628094e-05, "loss": 1.591, "step": 2167 }, { "epoch": 0.11852985798832744, "grad_norm": 1.3040119409561157, "learning_rate": 1.9809453736591054e-05, "loss": 1.5858, "step": 2168 }, { "epoch": 0.11858453043204899, "grad_norm": 1.7794641256332397, "learning_rate": 1.9809098546993333e-05, "loss": 1.6236, "step": 2169 }, { "epoch": 0.11863920287577054, "grad_norm": 1.4966917037963867, "learning_rate": 1.9808743029846795e-05, "loss": 1.4421, "step": 2170 }, { "epoch": 0.11869387531949209, "grad_norm": 1.6224250793457031, "learning_rate": 1.9808387185163313e-05, "loss": 1.4844, "step": 2171 }, { "epoch": 0.11874854776321364, "grad_norm": 1.211916446685791, "learning_rate": 1.980803101295476e-05, "loss": 1.4963, "step": 2172 }, { "epoch": 0.1188032202069352, "grad_norm": 1.595723032951355, "learning_rate": 1.9807674513233044e-05, "loss": 1.3732, "step": 2173 }, { "epoch": 0.11885789265065676, "grad_norm": 1.407813310623169, "learning_rate": 1.9807317686010055e-05, "loss": 1.4608, "step": 2174 }, { "epoch": 0.11891256509437831, "grad_norm": 1.3678655624389648, "learning_rate": 1.9806960531297722e-05, "loss": 1.488, "step": 2175 }, { "epoch": 0.11896723753809986, "grad_norm": 1.6112295389175415, "learning_rate": 1.980660304910796e-05, "loss": 1.6337, "step": 2176 }, { "epoch": 0.11902190998182141, "grad_norm": 1.393189549446106, "learning_rate": 1.980624523945271e-05, "loss": 1.4526, "step": 2177 }, { "epoch": 0.11907658242554296, "grad_norm": 1.4333256483078003, "learning_rate": 1.9805887102343922e-05, "loss": 1.424, "step": 2178 }, { "epoch": 0.11913125486926451, "grad_norm": 1.913366675376892, "learning_rate": 1.980552863779355e-05, "loss": 1.3756, "step": 2179 }, { "epoch": 0.11918592731298608, "grad_norm": 1.4916876554489136, "learning_rate": 1.9805169845813572e-05, "loss": 1.4479, "step": 2180 }, { "epoch": 0.11924059975670763, "grad_norm": 1.4152604341506958, "learning_rate": 1.980481072641596e-05, "loss": 1.2794, "step": 2181 }, { "epoch": 0.11929527220042918, "grad_norm": 1.6366851329803467, "learning_rate": 1.9804451279612714e-05, "loss": 1.4673, "step": 2182 }, { "epoch": 0.11934994464415073, "grad_norm": 1.8808908462524414, "learning_rate": 1.9804091505415833e-05, "loss": 1.2372, "step": 2183 }, { "epoch": 0.11940461708787228, "grad_norm": 2.9796321392059326, "learning_rate": 1.9803731403837326e-05, "loss": 1.6619, "step": 2184 }, { "epoch": 0.11945928953159383, "grad_norm": 1.4379152059555054, "learning_rate": 1.9803370974889225e-05, "loss": 1.4401, "step": 2185 }, { "epoch": 0.11951396197531539, "grad_norm": 1.291965365409851, "learning_rate": 1.9803010218583565e-05, "loss": 1.5308, "step": 2186 }, { "epoch": 0.11956863441903695, "grad_norm": 1.8012175559997559, "learning_rate": 1.980264913493239e-05, "loss": 1.8402, "step": 2187 }, { "epoch": 0.1196233068627585, "grad_norm": 1.2158582210540771, "learning_rate": 1.9802287723947753e-05, "loss": 1.5104, "step": 2188 }, { "epoch": 0.11967797930648005, "grad_norm": 1.4395697116851807, "learning_rate": 1.9801925985641733e-05, "loss": 1.4229, "step": 2189 }, { "epoch": 0.1197326517502016, "grad_norm": 1.3649723529815674, "learning_rate": 1.98015639200264e-05, "loss": 1.4501, "step": 2190 }, { "epoch": 0.11978732419392316, "grad_norm": 1.168919563293457, "learning_rate": 1.9801201527113843e-05, "loss": 1.7646, "step": 2191 }, { "epoch": 0.11984199663764471, "grad_norm": 1.7222669124603271, "learning_rate": 1.9800838806916175e-05, "loss": 1.3668, "step": 2192 }, { "epoch": 0.11989666908136626, "grad_norm": 1.72300124168396, "learning_rate": 1.9800475759445498e-05, "loss": 1.2597, "step": 2193 }, { "epoch": 0.11995134152508782, "grad_norm": 1.5468298196792603, "learning_rate": 1.980011238471394e-05, "loss": 1.5748, "step": 2194 }, { "epoch": 0.12000601396880937, "grad_norm": 1.3610812425613403, "learning_rate": 1.9799748682733632e-05, "loss": 1.4635, "step": 2195 }, { "epoch": 0.12006068641253093, "grad_norm": 1.3628512620925903, "learning_rate": 1.979938465351672e-05, "loss": 1.627, "step": 2196 }, { "epoch": 0.12011535885625248, "grad_norm": 1.432519793510437, "learning_rate": 1.979902029707536e-05, "loss": 1.3496, "step": 2197 }, { "epoch": 0.12017003129997403, "grad_norm": 1.4484983682632446, "learning_rate": 1.9798655613421712e-05, "loss": 1.2682, "step": 2198 }, { "epoch": 0.12022470374369558, "grad_norm": 1.4552372694015503, "learning_rate": 1.9798290602567965e-05, "loss": 1.756, "step": 2199 }, { "epoch": 0.12027937618741713, "grad_norm": 1.2203826904296875, "learning_rate": 1.9797925264526303e-05, "loss": 1.6559, "step": 2200 }, { "epoch": 0.1203340486311387, "grad_norm": 1.4852312803268433, "learning_rate": 1.9797559599308922e-05, "loss": 1.1988, "step": 2201 }, { "epoch": 0.12038872107486025, "grad_norm": 1.4800645112991333, "learning_rate": 1.9797193606928036e-05, "loss": 1.579, "step": 2202 }, { "epoch": 0.1204433935185818, "grad_norm": 1.4577480554580688, "learning_rate": 1.979682728739587e-05, "loss": 1.4596, "step": 2203 }, { "epoch": 0.12049806596230335, "grad_norm": 1.4118735790252686, "learning_rate": 1.9796460640724646e-05, "loss": 1.485, "step": 2204 }, { "epoch": 0.1205527384060249, "grad_norm": 1.238258957862854, "learning_rate": 1.9796093666926617e-05, "loss": 1.4684, "step": 2205 }, { "epoch": 0.12060741084974645, "grad_norm": 1.8290096521377563, "learning_rate": 1.979572636601403e-05, "loss": 1.3551, "step": 2206 }, { "epoch": 0.120662083293468, "grad_norm": 1.182539939880371, "learning_rate": 1.9795358737999155e-05, "loss": 1.3932, "step": 2207 }, { "epoch": 0.12071675573718957, "grad_norm": 1.845467448234558, "learning_rate": 1.979499078289427e-05, "loss": 1.5141, "step": 2208 }, { "epoch": 0.12077142818091112, "grad_norm": 1.647777795791626, "learning_rate": 1.979462250071165e-05, "loss": 1.4597, "step": 2209 }, { "epoch": 0.12082610062463267, "grad_norm": 1.6793267726898193, "learning_rate": 1.9794253891463606e-05, "loss": 1.4873, "step": 2210 }, { "epoch": 0.12088077306835422, "grad_norm": 1.5884578227996826, "learning_rate": 1.9793884955162442e-05, "loss": 1.496, "step": 2211 }, { "epoch": 0.12093544551207577, "grad_norm": 1.602833867073059, "learning_rate": 1.979351569182048e-05, "loss": 1.6215, "step": 2212 }, { "epoch": 0.12099011795579732, "grad_norm": 1.5619785785675049, "learning_rate": 1.9793146101450042e-05, "loss": 1.3348, "step": 2213 }, { "epoch": 0.12104479039951888, "grad_norm": 1.6715421676635742, "learning_rate": 1.9792776184063477e-05, "loss": 1.4472, "step": 2214 }, { "epoch": 0.12109946284324044, "grad_norm": 1.7754942178726196, "learning_rate": 1.9792405939673135e-05, "loss": 1.6459, "step": 2215 }, { "epoch": 0.12115413528696199, "grad_norm": 1.7193288803100586, "learning_rate": 1.979203536829138e-05, "loss": 1.3386, "step": 2216 }, { "epoch": 0.12120880773068354, "grad_norm": 1.5010956525802612, "learning_rate": 1.979166446993059e-05, "loss": 1.3789, "step": 2217 }, { "epoch": 0.1212634801744051, "grad_norm": 1.787362813949585, "learning_rate": 1.979129324460314e-05, "loss": 1.4878, "step": 2218 }, { "epoch": 0.12131815261812665, "grad_norm": 1.6471142768859863, "learning_rate": 1.979092169232144e-05, "loss": 1.2918, "step": 2219 }, { "epoch": 0.1213728250618482, "grad_norm": 1.3162057399749756, "learning_rate": 1.9790549813097884e-05, "loss": 1.4538, "step": 2220 }, { "epoch": 0.12142749750556976, "grad_norm": 1.3870378732681274, "learning_rate": 1.9790177606944897e-05, "loss": 1.7378, "step": 2221 }, { "epoch": 0.12148216994929131, "grad_norm": 1.432078242301941, "learning_rate": 1.978980507387491e-05, "loss": 1.2906, "step": 2222 }, { "epoch": 0.12153684239301286, "grad_norm": 1.5029345750808716, "learning_rate": 1.9789432213900354e-05, "loss": 1.5766, "step": 2223 }, { "epoch": 0.12159151483673442, "grad_norm": 1.5790473222732544, "learning_rate": 1.9789059027033688e-05, "loss": 1.5156, "step": 2224 }, { "epoch": 0.12164618728045597, "grad_norm": 2.1433961391448975, "learning_rate": 1.9788685513287368e-05, "loss": 1.6052, "step": 2225 }, { "epoch": 0.12170085972417752, "grad_norm": 1.56607186794281, "learning_rate": 1.978831167267387e-05, "loss": 1.5554, "step": 2226 }, { "epoch": 0.12175553216789907, "grad_norm": 1.8970680236816406, "learning_rate": 1.9787937505205678e-05, "loss": 1.466, "step": 2227 }, { "epoch": 0.12181020461162063, "grad_norm": 3.0899271965026855, "learning_rate": 1.9787563010895282e-05, "loss": 1.4452, "step": 2228 }, { "epoch": 0.12186487705534219, "grad_norm": 1.7554967403411865, "learning_rate": 1.978718818975519e-05, "loss": 1.4309, "step": 2229 }, { "epoch": 0.12191954949906374, "grad_norm": 1.9401981830596924, "learning_rate": 1.9786813041797915e-05, "loss": 1.3388, "step": 2230 }, { "epoch": 0.12197422194278529, "grad_norm": 1.7728456258773804, "learning_rate": 1.9786437567035993e-05, "loss": 1.4875, "step": 2231 }, { "epoch": 0.12202889438650684, "grad_norm": 1.5097863674163818, "learning_rate": 1.9786061765481954e-05, "loss": 1.2582, "step": 2232 }, { "epoch": 0.12208356683022839, "grad_norm": 1.3089873790740967, "learning_rate": 1.978568563714835e-05, "loss": 1.3873, "step": 2233 }, { "epoch": 0.12213823927394994, "grad_norm": 1.638718605041504, "learning_rate": 1.978530918204774e-05, "loss": 1.3599, "step": 2234 }, { "epoch": 0.1221929117176715, "grad_norm": 2.2763314247131348, "learning_rate": 1.9784932400192688e-05, "loss": 1.7165, "step": 2235 }, { "epoch": 0.12224758416139306, "grad_norm": 1.1445761919021606, "learning_rate": 1.978455529159579e-05, "loss": 1.4762, "step": 2236 }, { "epoch": 0.12230225660511461, "grad_norm": 1.5418862104415894, "learning_rate": 1.9784177856269628e-05, "loss": 1.655, "step": 2237 }, { "epoch": 0.12235692904883616, "grad_norm": 1.4440995454788208, "learning_rate": 1.9783800094226807e-05, "loss": 1.5017, "step": 2238 }, { "epoch": 0.12241160149255771, "grad_norm": 1.3815406560897827, "learning_rate": 1.9783422005479942e-05, "loss": 1.4074, "step": 2239 }, { "epoch": 0.12246627393627926, "grad_norm": 1.3866469860076904, "learning_rate": 1.978304359004166e-05, "loss": 1.3131, "step": 2240 }, { "epoch": 0.12252094638000081, "grad_norm": 1.2270231246948242, "learning_rate": 1.9782664847924596e-05, "loss": 1.4448, "step": 2241 }, { "epoch": 0.12257561882372238, "grad_norm": 1.4905072450637817, "learning_rate": 1.9782285779141397e-05, "loss": 1.3895, "step": 2242 }, { "epoch": 0.12263029126744393, "grad_norm": 1.3094444274902344, "learning_rate": 1.978190638370472e-05, "loss": 1.6482, "step": 2243 }, { "epoch": 0.12268496371116548, "grad_norm": 2.100921392440796, "learning_rate": 1.9781526661627235e-05, "loss": 1.4198, "step": 2244 }, { "epoch": 0.12273963615488703, "grad_norm": 1.393769383430481, "learning_rate": 1.978114661292162e-05, "loss": 1.391, "step": 2245 }, { "epoch": 0.12279430859860858, "grad_norm": 1.658860683441162, "learning_rate": 1.9780766237600574e-05, "loss": 1.4088, "step": 2246 }, { "epoch": 0.12284898104233014, "grad_norm": 1.6792802810668945, "learning_rate": 1.978038553567679e-05, "loss": 1.4926, "step": 2247 }, { "epoch": 0.12290365348605169, "grad_norm": 1.3742914199829102, "learning_rate": 1.9780004507162974e-05, "loss": 1.2073, "step": 2248 }, { "epoch": 0.12295832592977325, "grad_norm": 1.396876335144043, "learning_rate": 1.9779623152071866e-05, "loss": 1.4914, "step": 2249 }, { "epoch": 0.1230129983734948, "grad_norm": 1.7360835075378418, "learning_rate": 1.9779241470416194e-05, "loss": 1.5453, "step": 2250 }, { "epoch": 0.12306767081721635, "grad_norm": 1.5880515575408936, "learning_rate": 1.9778859462208694e-05, "loss": 1.1948, "step": 2251 }, { "epoch": 0.1231223432609379, "grad_norm": 1.7941937446594238, "learning_rate": 1.9778477127462135e-05, "loss": 1.3849, "step": 2252 }, { "epoch": 0.12317701570465946, "grad_norm": 1.5592879056930542, "learning_rate": 1.977809446618928e-05, "loss": 1.3872, "step": 2253 }, { "epoch": 0.12323168814838101, "grad_norm": 3.13191556930542, "learning_rate": 1.97777114784029e-05, "loss": 1.3551, "step": 2254 }, { "epoch": 0.12328636059210256, "grad_norm": 1.2997028827667236, "learning_rate": 1.9777328164115796e-05, "loss": 1.8128, "step": 2255 }, { "epoch": 0.12334103303582412, "grad_norm": 1.2930974960327148, "learning_rate": 1.9776944523340757e-05, "loss": 1.4166, "step": 2256 }, { "epoch": 0.12339570547954568, "grad_norm": 1.6367120742797852, "learning_rate": 1.97765605560906e-05, "loss": 1.5257, "step": 2257 }, { "epoch": 0.12345037792326723, "grad_norm": 1.7891086339950562, "learning_rate": 1.9776176262378145e-05, "loss": 1.2514, "step": 2258 }, { "epoch": 0.12350505036698878, "grad_norm": 1.71345055103302, "learning_rate": 1.9775791642216223e-05, "loss": 1.5112, "step": 2259 }, { "epoch": 0.12355972281071033, "grad_norm": 1.6001391410827637, "learning_rate": 1.9775406695617677e-05, "loss": 1.22, "step": 2260 }, { "epoch": 0.12361439525443188, "grad_norm": 1.4955302476882935, "learning_rate": 1.9775021422595366e-05, "loss": 1.3925, "step": 2261 }, { "epoch": 0.12366906769815343, "grad_norm": 1.326064944267273, "learning_rate": 1.977463582316215e-05, "loss": 1.4958, "step": 2262 }, { "epoch": 0.123723740141875, "grad_norm": 1.4753997325897217, "learning_rate": 1.9774249897330907e-05, "loss": 1.5155, "step": 2263 }, { "epoch": 0.12377841258559655, "grad_norm": 1.4130265712738037, "learning_rate": 1.9773863645114525e-05, "loss": 1.5884, "step": 2264 }, { "epoch": 0.1238330850293181, "grad_norm": 1.6338624954223633, "learning_rate": 1.97734770665259e-05, "loss": 1.3629, "step": 2265 }, { "epoch": 0.12388775747303965, "grad_norm": 2.7238690853118896, "learning_rate": 1.9773090161577943e-05, "loss": 1.1108, "step": 2266 }, { "epoch": 0.1239424299167612, "grad_norm": 1.5052788257598877, "learning_rate": 1.977270293028357e-05, "loss": 1.3047, "step": 2267 }, { "epoch": 0.12399710236048275, "grad_norm": 1.5490061044692993, "learning_rate": 1.9772315372655714e-05, "loss": 1.6479, "step": 2268 }, { "epoch": 0.12405177480420432, "grad_norm": 1.2565793991088867, "learning_rate": 1.9771927488707318e-05, "loss": 1.5323, "step": 2269 }, { "epoch": 0.12410644724792587, "grad_norm": 1.3070268630981445, "learning_rate": 1.977153927845133e-05, "loss": 1.5169, "step": 2270 }, { "epoch": 0.12416111969164742, "grad_norm": 1.374232292175293, "learning_rate": 1.977115074190072e-05, "loss": 1.5048, "step": 2271 }, { "epoch": 0.12421579213536897, "grad_norm": 1.398730754852295, "learning_rate": 1.9770761879068455e-05, "loss": 1.5109, "step": 2272 }, { "epoch": 0.12427046457909052, "grad_norm": 1.523627758026123, "learning_rate": 1.9770372689967523e-05, "loss": 1.332, "step": 2273 }, { "epoch": 0.12432513702281207, "grad_norm": 1.4155205488204956, "learning_rate": 1.9769983174610918e-05, "loss": 1.4517, "step": 2274 }, { "epoch": 0.12437980946653363, "grad_norm": 1.1786879301071167, "learning_rate": 1.9769593333011652e-05, "loss": 1.3036, "step": 2275 }, { "epoch": 0.12443448191025519, "grad_norm": 1.3712432384490967, "learning_rate": 1.976920316518274e-05, "loss": 1.2933, "step": 2276 }, { "epoch": 0.12448915435397674, "grad_norm": 1.700940728187561, "learning_rate": 1.9768812671137207e-05, "loss": 1.5027, "step": 2277 }, { "epoch": 0.12454382679769829, "grad_norm": 1.6109524965286255, "learning_rate": 1.97684218508881e-05, "loss": 1.4543, "step": 2278 }, { "epoch": 0.12459849924141984, "grad_norm": 1.15129816532135, "learning_rate": 1.9768030704448462e-05, "loss": 1.4529, "step": 2279 }, { "epoch": 0.1246531716851414, "grad_norm": 1.0018762350082397, "learning_rate": 1.976763923183136e-05, "loss": 1.6117, "step": 2280 }, { "epoch": 0.12470784412886295, "grad_norm": 1.3784887790679932, "learning_rate": 1.9767247433049858e-05, "loss": 1.5353, "step": 2281 }, { "epoch": 0.1247625165725845, "grad_norm": 2.009087324142456, "learning_rate": 1.976685530811705e-05, "loss": 1.7656, "step": 2282 }, { "epoch": 0.12481718901630606, "grad_norm": 1.3022103309631348, "learning_rate": 1.9766462857046022e-05, "loss": 1.2956, "step": 2283 }, { "epoch": 0.12487186146002761, "grad_norm": 1.717811107635498, "learning_rate": 1.9766070079849882e-05, "loss": 1.4175, "step": 2284 }, { "epoch": 0.12492653390374917, "grad_norm": 1.106884241104126, "learning_rate": 1.9765676976541748e-05, "loss": 1.5849, "step": 2285 }, { "epoch": 0.12498120634747072, "grad_norm": 1.4689738750457764, "learning_rate": 1.976528354713474e-05, "loss": 1.6118, "step": 2286 }, { "epoch": 0.12503587879119227, "grad_norm": 1.396822214126587, "learning_rate": 1.9764889791642e-05, "loss": 1.4304, "step": 2287 }, { "epoch": 0.12509055123491383, "grad_norm": 1.4026092290878296, "learning_rate": 1.9764495710076678e-05, "loss": 1.3023, "step": 2288 }, { "epoch": 0.12514522367863537, "grad_norm": 1.3284319639205933, "learning_rate": 1.976410130245193e-05, "loss": 1.5636, "step": 2289 }, { "epoch": 0.12519989612235694, "grad_norm": 1.349885106086731, "learning_rate": 1.9763706568780928e-05, "loss": 1.4682, "step": 2290 }, { "epoch": 0.12525456856607847, "grad_norm": 1.593701958656311, "learning_rate": 1.976331150907685e-05, "loss": 1.6244, "step": 2291 }, { "epoch": 0.12530924100980004, "grad_norm": 1.3095331192016602, "learning_rate": 1.976291612335289e-05, "loss": 1.3843, "step": 2292 }, { "epoch": 0.1253639134535216, "grad_norm": 1.3794735670089722, "learning_rate": 1.9762520411622255e-05, "loss": 1.4043, "step": 2293 }, { "epoch": 0.12541858589724314, "grad_norm": 1.4465008974075317, "learning_rate": 1.9762124373898155e-05, "loss": 1.5493, "step": 2294 }, { "epoch": 0.1254732583409647, "grad_norm": 1.9529355764389038, "learning_rate": 1.9761728010193812e-05, "loss": 1.5742, "step": 2295 }, { "epoch": 0.12552793078468624, "grad_norm": 1.1369885206222534, "learning_rate": 1.9761331320522466e-05, "loss": 1.6178, "step": 2296 }, { "epoch": 0.1255826032284078, "grad_norm": 1.8013349771499634, "learning_rate": 1.976093430489736e-05, "loss": 1.4132, "step": 2297 }, { "epoch": 0.12563727567212934, "grad_norm": 1.2994824647903442, "learning_rate": 1.976053696333175e-05, "loss": 1.4777, "step": 2298 }, { "epoch": 0.1256919481158509, "grad_norm": 1.144211769104004, "learning_rate": 1.9760139295838912e-05, "loss": 1.3105, "step": 2299 }, { "epoch": 0.12574662055957248, "grad_norm": 1.2537788152694702, "learning_rate": 1.975974130243212e-05, "loss": 1.2876, "step": 2300 }, { "epoch": 0.125801293003294, "grad_norm": 1.807423710823059, "learning_rate": 1.975934298312466e-05, "loss": 1.346, "step": 2301 }, { "epoch": 0.12585596544701558, "grad_norm": 1.5529348850250244, "learning_rate": 1.975894433792984e-05, "loss": 1.5301, "step": 2302 }, { "epoch": 0.12591063789073711, "grad_norm": 1.3483718633651733, "learning_rate": 1.9758545366860968e-05, "loss": 1.456, "step": 2303 }, { "epoch": 0.12596531033445868, "grad_norm": 1.4761509895324707, "learning_rate": 1.9758146069931364e-05, "loss": 1.6476, "step": 2304 }, { "epoch": 0.12601998277818022, "grad_norm": 1.6144390106201172, "learning_rate": 1.975774644715437e-05, "loss": 1.4285, "step": 2305 }, { "epoch": 0.12607465522190178, "grad_norm": 1.3730751276016235, "learning_rate": 1.975734649854332e-05, "loss": 1.3581, "step": 2306 }, { "epoch": 0.12612932766562335, "grad_norm": 1.412712574005127, "learning_rate": 1.975694622411158e-05, "loss": 1.1632, "step": 2307 }, { "epoch": 0.12618400010934488, "grad_norm": 1.4966212511062622, "learning_rate": 1.97565456238725e-05, "loss": 1.6367, "step": 2308 }, { "epoch": 0.12623867255306645, "grad_norm": 1.2563960552215576, "learning_rate": 1.9756144697839477e-05, "loss": 1.3491, "step": 2309 }, { "epoch": 0.126293344996788, "grad_norm": 1.432003140449524, "learning_rate": 1.9755743446025882e-05, "loss": 1.5297, "step": 2310 }, { "epoch": 0.12634801744050955, "grad_norm": 1.4674853086471558, "learning_rate": 1.9755341868445126e-05, "loss": 1.521, "step": 2311 }, { "epoch": 0.1264026898842311, "grad_norm": 1.8180832862854004, "learning_rate": 1.975493996511061e-05, "loss": 1.627, "step": 2312 }, { "epoch": 0.12645736232795265, "grad_norm": 1.703634262084961, "learning_rate": 1.975453773603576e-05, "loss": 1.6458, "step": 2313 }, { "epoch": 0.12651203477167422, "grad_norm": 1.615910291671753, "learning_rate": 1.9754135181234005e-05, "loss": 1.57, "step": 2314 }, { "epoch": 0.12656670721539576, "grad_norm": 1.674420952796936, "learning_rate": 1.9753732300718788e-05, "loss": 1.3917, "step": 2315 }, { "epoch": 0.12662137965911732, "grad_norm": 1.24888277053833, "learning_rate": 1.9753329094503563e-05, "loss": 1.3146, "step": 2316 }, { "epoch": 0.12667605210283886, "grad_norm": 1.4196667671203613, "learning_rate": 1.975292556260179e-05, "loss": 1.6169, "step": 2317 }, { "epoch": 0.12673072454656042, "grad_norm": 1.3783843517303467, "learning_rate": 1.9752521705026947e-05, "loss": 1.3009, "step": 2318 }, { "epoch": 0.12678539699028196, "grad_norm": 1.705186367034912, "learning_rate": 1.975211752179252e-05, "loss": 1.428, "step": 2319 }, { "epoch": 0.12684006943400353, "grad_norm": 1.4398263692855835, "learning_rate": 1.9751713012912002e-05, "loss": 1.5326, "step": 2320 }, { "epoch": 0.1268947418777251, "grad_norm": 1.2989997863769531, "learning_rate": 1.975130817839891e-05, "loss": 1.4899, "step": 2321 }, { "epoch": 0.12694941432144663, "grad_norm": 1.5088589191436768, "learning_rate": 1.9750903018266746e-05, "loss": 1.5865, "step": 2322 }, { "epoch": 0.1270040867651682, "grad_norm": 2.0721638202667236, "learning_rate": 1.9750497532529053e-05, "loss": 1.5057, "step": 2323 }, { "epoch": 0.12705875920888973, "grad_norm": 1.3767777681350708, "learning_rate": 1.975009172119937e-05, "loss": 1.4767, "step": 2324 }, { "epoch": 0.1271134316526113, "grad_norm": 1.6862815618515015, "learning_rate": 1.9749685584291246e-05, "loss": 1.3955, "step": 2325 }, { "epoch": 0.12716810409633283, "grad_norm": 1.2016689777374268, "learning_rate": 1.9749279121818235e-05, "loss": 1.3584, "step": 2326 }, { "epoch": 0.1272227765400544, "grad_norm": 1.6560763120651245, "learning_rate": 1.9748872333793923e-05, "loss": 1.485, "step": 2327 }, { "epoch": 0.12727744898377596, "grad_norm": 1.5283474922180176, "learning_rate": 1.9748465220231887e-05, "loss": 1.5907, "step": 2328 }, { "epoch": 0.1273321214274975, "grad_norm": 1.6298565864562988, "learning_rate": 1.974805778114572e-05, "loss": 1.3525, "step": 2329 }, { "epoch": 0.12738679387121907, "grad_norm": 1.724400520324707, "learning_rate": 1.974765001654903e-05, "loss": 1.3116, "step": 2330 }, { "epoch": 0.1274414663149406, "grad_norm": 1.1723814010620117, "learning_rate": 1.974724192645543e-05, "loss": 1.7058, "step": 2331 }, { "epoch": 0.12749613875866217, "grad_norm": 1.4607276916503906, "learning_rate": 1.9746833510878553e-05, "loss": 1.4924, "step": 2332 }, { "epoch": 0.1275508112023837, "grad_norm": 1.218117356300354, "learning_rate": 1.974642476983203e-05, "loss": 1.4143, "step": 2333 }, { "epoch": 0.12760548364610527, "grad_norm": 1.4902247190475464, "learning_rate": 1.9746015703329516e-05, "loss": 1.6041, "step": 2334 }, { "epoch": 0.12766015608982684, "grad_norm": 1.6782584190368652, "learning_rate": 1.974560631138467e-05, "loss": 1.3866, "step": 2335 }, { "epoch": 0.12771482853354837, "grad_norm": 1.4975218772888184, "learning_rate": 1.9745196594011156e-05, "loss": 1.5322, "step": 2336 }, { "epoch": 0.12776950097726994, "grad_norm": 1.3696743249893188, "learning_rate": 1.9744786551222658e-05, "loss": 1.6478, "step": 2337 }, { "epoch": 0.12782417342099148, "grad_norm": 1.1857635974884033, "learning_rate": 1.9744376183032874e-05, "loss": 1.6223, "step": 2338 }, { "epoch": 0.12787884586471304, "grad_norm": 1.8721191883087158, "learning_rate": 1.9743965489455505e-05, "loss": 1.5817, "step": 2339 }, { "epoch": 0.12793351830843458, "grad_norm": 1.4264785051345825, "learning_rate": 1.974355447050426e-05, "loss": 1.2527, "step": 2340 }, { "epoch": 0.12798819075215614, "grad_norm": 1.377733588218689, "learning_rate": 1.9743143126192868e-05, "loss": 1.4428, "step": 2341 }, { "epoch": 0.1280428631958777, "grad_norm": 1.306553840637207, "learning_rate": 1.9742731456535066e-05, "loss": 1.2546, "step": 2342 }, { "epoch": 0.12809753563959925, "grad_norm": 1.7144829034805298, "learning_rate": 1.9742319461544598e-05, "loss": 1.2573, "step": 2343 }, { "epoch": 0.1281522080833208, "grad_norm": 1.1501599550247192, "learning_rate": 1.974190714123522e-05, "loss": 1.4381, "step": 2344 }, { "epoch": 0.12820688052704235, "grad_norm": 1.5149067640304565, "learning_rate": 1.9741494495620703e-05, "loss": 1.6749, "step": 2345 }, { "epoch": 0.12826155297076391, "grad_norm": 1.581213116645813, "learning_rate": 1.9741081524714828e-05, "loss": 1.4946, "step": 2346 }, { "epoch": 0.12831622541448545, "grad_norm": 1.7268216609954834, "learning_rate": 1.9740668228531378e-05, "loss": 1.5389, "step": 2347 }, { "epoch": 0.12837089785820702, "grad_norm": 1.7286477088928223, "learning_rate": 1.9740254607084165e-05, "loss": 1.4973, "step": 2348 }, { "epoch": 0.12842557030192858, "grad_norm": 1.6480906009674072, "learning_rate": 1.9739840660386987e-05, "loss": 1.2351, "step": 2349 }, { "epoch": 0.12848024274565012, "grad_norm": 1.4575574398040771, "learning_rate": 1.973942638845368e-05, "loss": 1.371, "step": 2350 }, { "epoch": 0.12853491518937168, "grad_norm": 1.5168969631195068, "learning_rate": 1.9739011791298073e-05, "loss": 1.4062, "step": 2351 }, { "epoch": 0.12858958763309322, "grad_norm": 1.683429479598999, "learning_rate": 1.9738596868934007e-05, "loss": 1.4102, "step": 2352 }, { "epoch": 0.1286442600768148, "grad_norm": 1.4579919576644897, "learning_rate": 1.9738181621375335e-05, "loss": 1.4958, "step": 2353 }, { "epoch": 0.12869893252053632, "grad_norm": 1.72576105594635, "learning_rate": 1.973776604863593e-05, "loss": 1.4731, "step": 2354 }, { "epoch": 0.1287536049642579, "grad_norm": 1.5214049816131592, "learning_rate": 1.9737350150729667e-05, "loss": 1.37, "step": 2355 }, { "epoch": 0.12880827740797945, "grad_norm": 1.8747745752334595, "learning_rate": 1.9736933927670434e-05, "loss": 1.6165, "step": 2356 }, { "epoch": 0.128862949851701, "grad_norm": 1.5288032293319702, "learning_rate": 1.9736517379472125e-05, "loss": 1.5601, "step": 2357 }, { "epoch": 0.12891762229542256, "grad_norm": 1.1598364114761353, "learning_rate": 1.9736100506148657e-05, "loss": 1.5358, "step": 2358 }, { "epoch": 0.1289722947391441, "grad_norm": 1.1289441585540771, "learning_rate": 1.9735683307713946e-05, "loss": 1.5381, "step": 2359 }, { "epoch": 0.12902696718286566, "grad_norm": 1.5882948637008667, "learning_rate": 1.973526578418192e-05, "loss": 1.4084, "step": 2360 }, { "epoch": 0.1290816396265872, "grad_norm": 1.4701344966888428, "learning_rate": 1.973484793556653e-05, "loss": 1.4036, "step": 2361 }, { "epoch": 0.12913631207030876, "grad_norm": 1.7050037384033203, "learning_rate": 1.9734429761881723e-05, "loss": 1.7454, "step": 2362 }, { "epoch": 0.12919098451403033, "grad_norm": 1.0551458597183228, "learning_rate": 1.9734011263141462e-05, "loss": 1.5611, "step": 2363 }, { "epoch": 0.12924565695775186, "grad_norm": 1.4182568788528442, "learning_rate": 1.9733592439359722e-05, "loss": 1.3807, "step": 2364 }, { "epoch": 0.12930032940147343, "grad_norm": 1.5911186933517456, "learning_rate": 1.9733173290550494e-05, "loss": 1.5391, "step": 2365 }, { "epoch": 0.12935500184519497, "grad_norm": 1.5190976858139038, "learning_rate": 1.973275381672777e-05, "loss": 1.5782, "step": 2366 }, { "epoch": 0.12940967428891653, "grad_norm": 1.7954399585723877, "learning_rate": 1.9732334017905555e-05, "loss": 1.4344, "step": 2367 }, { "epoch": 0.12946434673263807, "grad_norm": 1.4001871347427368, "learning_rate": 1.973191389409787e-05, "loss": 1.5344, "step": 2368 }, { "epoch": 0.12951901917635963, "grad_norm": 1.3590930700302124, "learning_rate": 1.9731493445318742e-05, "loss": 1.5452, "step": 2369 }, { "epoch": 0.1295736916200812, "grad_norm": 2.3065872192382812, "learning_rate": 1.9731072671582214e-05, "loss": 1.3546, "step": 2370 }, { "epoch": 0.12962836406380274, "grad_norm": 1.6029311418533325, "learning_rate": 1.9730651572902335e-05, "loss": 1.4907, "step": 2371 }, { "epoch": 0.1296830365075243, "grad_norm": 1.778203010559082, "learning_rate": 1.9730230149293167e-05, "loss": 1.5058, "step": 2372 }, { "epoch": 0.12973770895124584, "grad_norm": 1.8893983364105225, "learning_rate": 1.972980840076878e-05, "loss": 1.2937, "step": 2373 }, { "epoch": 0.1297923813949674, "grad_norm": 1.9225428104400635, "learning_rate": 1.9729386327343258e-05, "loss": 1.4304, "step": 2374 }, { "epoch": 0.12984705383868894, "grad_norm": 1.6697345972061157, "learning_rate": 1.97289639290307e-05, "loss": 1.3488, "step": 2375 }, { "epoch": 0.1299017262824105, "grad_norm": 1.3208611011505127, "learning_rate": 1.9728541205845203e-05, "loss": 1.2208, "step": 2376 }, { "epoch": 0.12995639872613207, "grad_norm": 1.505894660949707, "learning_rate": 1.9728118157800887e-05, "loss": 1.4699, "step": 2377 }, { "epoch": 0.1300110711698536, "grad_norm": 1.4730002880096436, "learning_rate": 1.9727694784911877e-05, "loss": 1.2983, "step": 2378 }, { "epoch": 0.13006574361357517, "grad_norm": 1.4880772829055786, "learning_rate": 1.9727271087192312e-05, "loss": 1.5855, "step": 2379 }, { "epoch": 0.1301204160572967, "grad_norm": 1.9293490648269653, "learning_rate": 1.972684706465634e-05, "loss": 1.5455, "step": 2380 }, { "epoch": 0.13017508850101828, "grad_norm": 1.7550417184829712, "learning_rate": 1.972642271731812e-05, "loss": 1.4036, "step": 2381 }, { "epoch": 0.13022976094473981, "grad_norm": 1.7502892017364502, "learning_rate": 1.9725998045191822e-05, "loss": 1.4481, "step": 2382 }, { "epoch": 0.13028443338846138, "grad_norm": 1.2981517314910889, "learning_rate": 1.9725573048291628e-05, "loss": 1.5361, "step": 2383 }, { "epoch": 0.13033910583218294, "grad_norm": 1.254493236541748, "learning_rate": 1.9725147726631724e-05, "loss": 1.5536, "step": 2384 }, { "epoch": 0.13039377827590448, "grad_norm": 1.6160694360733032, "learning_rate": 1.972472208022632e-05, "loss": 1.293, "step": 2385 }, { "epoch": 0.13044845071962605, "grad_norm": 1.2741810083389282, "learning_rate": 1.9724296109089623e-05, "loss": 1.41, "step": 2386 }, { "epoch": 0.13050312316334758, "grad_norm": 1.7856237888336182, "learning_rate": 1.9723869813235863e-05, "loss": 1.6078, "step": 2387 }, { "epoch": 0.13055779560706915, "grad_norm": 1.5903664827346802, "learning_rate": 1.972344319267927e-05, "loss": 1.8034, "step": 2388 }, { "epoch": 0.1306124680507907, "grad_norm": 1.4206444025039673, "learning_rate": 1.9723016247434093e-05, "loss": 1.3302, "step": 2389 }, { "epoch": 0.13066714049451225, "grad_norm": 1.3751611709594727, "learning_rate": 1.972258897751459e-05, "loss": 1.6267, "step": 2390 }, { "epoch": 0.13072181293823382, "grad_norm": 1.3384367227554321, "learning_rate": 1.9722161382935022e-05, "loss": 1.4692, "step": 2391 }, { "epoch": 0.13077648538195535, "grad_norm": 1.5649179220199585, "learning_rate": 1.9721733463709673e-05, "loss": 1.4201, "step": 2392 }, { "epoch": 0.13083115782567692, "grad_norm": 1.3681790828704834, "learning_rate": 1.9721305219852833e-05, "loss": 1.252, "step": 2393 }, { "epoch": 0.13088583026939846, "grad_norm": 1.4994500875473022, "learning_rate": 1.9720876651378796e-05, "loss": 1.492, "step": 2394 }, { "epoch": 0.13094050271312002, "grad_norm": 1.7030138969421387, "learning_rate": 1.9720447758301882e-05, "loss": 1.5634, "step": 2395 }, { "epoch": 0.1309951751568416, "grad_norm": 1.5102274417877197, "learning_rate": 1.9720018540636404e-05, "loss": 1.4322, "step": 2396 }, { "epoch": 0.13104984760056312, "grad_norm": 1.4037408828735352, "learning_rate": 1.97195889983967e-05, "loss": 1.4215, "step": 2397 }, { "epoch": 0.1311045200442847, "grad_norm": 1.6593289375305176, "learning_rate": 1.9719159131597113e-05, "loss": 1.3437, "step": 2398 }, { "epoch": 0.13115919248800623, "grad_norm": 1.3593684434890747, "learning_rate": 1.971872894025199e-05, "loss": 1.2628, "step": 2399 }, { "epoch": 0.1312138649317278, "grad_norm": 1.3496592044830322, "learning_rate": 1.971829842437571e-05, "loss": 1.3025, "step": 2400 }, { "epoch": 0.13126853737544933, "grad_norm": 1.5859932899475098, "learning_rate": 1.9717867583982637e-05, "loss": 1.2435, "step": 2401 }, { "epoch": 0.1313232098191709, "grad_norm": 1.7169972658157349, "learning_rate": 1.971743641908716e-05, "loss": 1.3143, "step": 2402 }, { "epoch": 0.13137788226289246, "grad_norm": 1.3898626565933228, "learning_rate": 1.9717004929703677e-05, "loss": 1.2308, "step": 2403 }, { "epoch": 0.131432554706614, "grad_norm": 1.1493854522705078, "learning_rate": 1.9716573115846602e-05, "loss": 1.3887, "step": 2404 }, { "epoch": 0.13148722715033556, "grad_norm": 1.653768539428711, "learning_rate": 1.971614097753035e-05, "loss": 1.4866, "step": 2405 }, { "epoch": 0.1315418995940571, "grad_norm": 1.3948307037353516, "learning_rate": 1.9715708514769346e-05, "loss": 1.4284, "step": 2406 }, { "epoch": 0.13159657203777866, "grad_norm": 1.621320366859436, "learning_rate": 1.971527572757804e-05, "loss": 1.4536, "step": 2407 }, { "epoch": 0.1316512444815002, "grad_norm": 1.6143444776535034, "learning_rate": 1.9714842615970878e-05, "loss": 1.2661, "step": 2408 }, { "epoch": 0.13170591692522177, "grad_norm": 1.2530344724655151, "learning_rate": 1.9714409179962328e-05, "loss": 1.4154, "step": 2409 }, { "epoch": 0.13176058936894333, "grad_norm": 1.456976294517517, "learning_rate": 1.971397541956686e-05, "loss": 1.4185, "step": 2410 }, { "epoch": 0.13181526181266487, "grad_norm": 1.3966772556304932, "learning_rate": 1.9713541334798957e-05, "loss": 1.5313, "step": 2411 }, { "epoch": 0.13186993425638643, "grad_norm": 1.1621944904327393, "learning_rate": 1.971310692567311e-05, "loss": 1.5486, "step": 2412 }, { "epoch": 0.13192460670010797, "grad_norm": 1.4211359024047852, "learning_rate": 1.9712672192203836e-05, "loss": 1.3064, "step": 2413 }, { "epoch": 0.13197927914382954, "grad_norm": 1.2426303625106812, "learning_rate": 1.971223713440564e-05, "loss": 1.7528, "step": 2414 }, { "epoch": 0.13203395158755107, "grad_norm": 1.7135319709777832, "learning_rate": 1.9711801752293064e-05, "loss": 1.6169, "step": 2415 }, { "epoch": 0.13208862403127264, "grad_norm": 1.2377398014068604, "learning_rate": 1.9711366045880633e-05, "loss": 1.5174, "step": 2416 }, { "epoch": 0.1321432964749942, "grad_norm": 1.6490527391433716, "learning_rate": 1.9710930015182903e-05, "loss": 1.5322, "step": 2417 }, { "epoch": 0.13219796891871574, "grad_norm": 1.3662619590759277, "learning_rate": 1.9710493660214435e-05, "loss": 1.3253, "step": 2418 }, { "epoch": 0.1322526413624373, "grad_norm": 1.6222007274627686, "learning_rate": 1.971005698098979e-05, "loss": 1.2251, "step": 2419 }, { "epoch": 0.13230731380615884, "grad_norm": 1.2593441009521484, "learning_rate": 1.9709619977523564e-05, "loss": 1.4149, "step": 2420 }, { "epoch": 0.1323619862498804, "grad_norm": 1.4478793144226074, "learning_rate": 1.970918264983034e-05, "loss": 1.366, "step": 2421 }, { "epoch": 0.13241665869360195, "grad_norm": 1.6572685241699219, "learning_rate": 1.970874499792472e-05, "loss": 1.5795, "step": 2422 }, { "epoch": 0.1324713311373235, "grad_norm": 1.6472728252410889, "learning_rate": 1.970830702182133e-05, "loss": 1.2635, "step": 2423 }, { "epoch": 0.13252600358104508, "grad_norm": 1.2808973789215088, "learning_rate": 1.970786872153478e-05, "loss": 1.7126, "step": 2424 }, { "epoch": 0.1325806760247666, "grad_norm": 1.9152815341949463, "learning_rate": 1.970743009707972e-05, "loss": 1.4277, "step": 2425 }, { "epoch": 0.13263534846848818, "grad_norm": 1.7890470027923584, "learning_rate": 1.9706991148470783e-05, "loss": 1.1572, "step": 2426 }, { "epoch": 0.13269002091220972, "grad_norm": 1.5856525897979736, "learning_rate": 1.970655187572264e-05, "loss": 1.5092, "step": 2427 }, { "epoch": 0.13274469335593128, "grad_norm": 1.4688454866409302, "learning_rate": 1.970611227884995e-05, "loss": 1.2896, "step": 2428 }, { "epoch": 0.13279936579965282, "grad_norm": 1.1683562994003296, "learning_rate": 1.9705672357867392e-05, "loss": 1.6997, "step": 2429 }, { "epoch": 0.13285403824337438, "grad_norm": 1.652502417564392, "learning_rate": 1.970523211278966e-05, "loss": 1.5733, "step": 2430 }, { "epoch": 0.13290871068709595, "grad_norm": 1.4517933130264282, "learning_rate": 1.9704791543631455e-05, "loss": 1.4418, "step": 2431 }, { "epoch": 0.1329633831308175, "grad_norm": 1.4740105867385864, "learning_rate": 1.970435065040749e-05, "loss": 1.3855, "step": 2432 }, { "epoch": 0.13301805557453905, "grad_norm": 1.3825252056121826, "learning_rate": 1.9703909433132484e-05, "loss": 1.428, "step": 2433 }, { "epoch": 0.1330727280182606, "grad_norm": 1.4170554876327515, "learning_rate": 1.9703467891821165e-05, "loss": 1.3976, "step": 2434 }, { "epoch": 0.13312740046198215, "grad_norm": 1.509232997894287, "learning_rate": 1.9703026026488288e-05, "loss": 1.4605, "step": 2435 }, { "epoch": 0.1331820729057037, "grad_norm": 1.3860409259796143, "learning_rate": 1.9702583837148605e-05, "loss": 1.5701, "step": 2436 }, { "epoch": 0.13323674534942526, "grad_norm": 1.3906461000442505, "learning_rate": 1.9702141323816875e-05, "loss": 1.2749, "step": 2437 }, { "epoch": 0.13329141779314682, "grad_norm": 2.290541887283325, "learning_rate": 1.9701698486507884e-05, "loss": 1.2877, "step": 2438 }, { "epoch": 0.13334609023686836, "grad_norm": 1.5527483224868774, "learning_rate": 1.970125532523641e-05, "loss": 1.3435, "step": 2439 }, { "epoch": 0.13340076268058992, "grad_norm": 1.596862554550171, "learning_rate": 1.970081184001726e-05, "loss": 1.2433, "step": 2440 }, { "epoch": 0.13345543512431146, "grad_norm": 1.3724285364151, "learning_rate": 1.9700368030865235e-05, "loss": 1.4673, "step": 2441 }, { "epoch": 0.13351010756803303, "grad_norm": 1.3944157361984253, "learning_rate": 1.9699923897795165e-05, "loss": 1.3436, "step": 2442 }, { "epoch": 0.13356478001175456, "grad_norm": 1.3598405122756958, "learning_rate": 1.9699479440821868e-05, "loss": 1.4546, "step": 2443 }, { "epoch": 0.13361945245547613, "grad_norm": 1.4267398118972778, "learning_rate": 1.9699034659960197e-05, "loss": 1.6029, "step": 2444 }, { "epoch": 0.1336741248991977, "grad_norm": 1.339544415473938, "learning_rate": 1.9698589555224992e-05, "loss": 1.6048, "step": 2445 }, { "epoch": 0.13372879734291923, "grad_norm": 1.9287546873092651, "learning_rate": 1.969814412663113e-05, "loss": 1.3485, "step": 2446 }, { "epoch": 0.1337834697866408, "grad_norm": 1.7970800399780273, "learning_rate": 1.9697698374193478e-05, "loss": 1.4862, "step": 2447 }, { "epoch": 0.13383814223036233, "grad_norm": 1.2544654607772827, "learning_rate": 1.969725229792692e-05, "loss": 1.2239, "step": 2448 }, { "epoch": 0.1338928146740839, "grad_norm": 1.5078614950180054, "learning_rate": 1.9696805897846353e-05, "loss": 1.466, "step": 2449 }, { "epoch": 0.13394748711780544, "grad_norm": 1.2807525396347046, "learning_rate": 1.969635917396668e-05, "loss": 1.42, "step": 2450 }, { "epoch": 0.134002159561527, "grad_norm": 1.189491868019104, "learning_rate": 1.9695912126302823e-05, "loss": 1.4418, "step": 2451 }, { "epoch": 0.13405683200524857, "grad_norm": 1.4085191488265991, "learning_rate": 1.9695464754869708e-05, "loss": 1.4012, "step": 2452 }, { "epoch": 0.1341115044489701, "grad_norm": 1.4524955749511719, "learning_rate": 1.9695017059682274e-05, "loss": 1.5755, "step": 2453 }, { "epoch": 0.13416617689269167, "grad_norm": 1.376079797744751, "learning_rate": 1.9694569040755466e-05, "loss": 1.4146, "step": 2454 }, { "epoch": 0.1342208493364132, "grad_norm": 1.429933786392212, "learning_rate": 1.9694120698104253e-05, "loss": 1.4115, "step": 2455 }, { "epoch": 0.13427552178013477, "grad_norm": 1.2424880266189575, "learning_rate": 1.9693672031743604e-05, "loss": 1.7209, "step": 2456 }, { "epoch": 0.1343301942238563, "grad_norm": 1.351443886756897, "learning_rate": 1.9693223041688494e-05, "loss": 1.4852, "step": 2457 }, { "epoch": 0.13438486666757787, "grad_norm": 2.3254144191741943, "learning_rate": 1.9692773727953923e-05, "loss": 1.3415, "step": 2458 }, { "epoch": 0.13443953911129944, "grad_norm": 1.7057033777236938, "learning_rate": 1.9692324090554893e-05, "loss": 1.2211, "step": 2459 }, { "epoch": 0.13449421155502098, "grad_norm": 1.4864031076431274, "learning_rate": 1.9691874129506417e-05, "loss": 1.4013, "step": 2460 }, { "epoch": 0.13454888399874254, "grad_norm": 1.555176019668579, "learning_rate": 1.969142384482352e-05, "loss": 1.3686, "step": 2461 }, { "epoch": 0.13460355644246408, "grad_norm": 1.6181426048278809, "learning_rate": 1.969097323652124e-05, "loss": 1.5441, "step": 2462 }, { "epoch": 0.13465822888618564, "grad_norm": 1.490848422050476, "learning_rate": 1.9690522304614624e-05, "loss": 1.11, "step": 2463 }, { "epoch": 0.13471290132990718, "grad_norm": 1.4938596487045288, "learning_rate": 1.969007104911873e-05, "loss": 1.2146, "step": 2464 }, { "epoch": 0.13476757377362875, "grad_norm": 1.4798249006271362, "learning_rate": 1.9689619470048626e-05, "loss": 1.5804, "step": 2465 }, { "epoch": 0.1348222462173503, "grad_norm": 1.5867160558700562, "learning_rate": 1.9689167567419386e-05, "loss": 1.4001, "step": 2466 }, { "epoch": 0.13487691866107185, "grad_norm": 1.4521241188049316, "learning_rate": 1.9688715341246106e-05, "loss": 1.4718, "step": 2467 }, { "epoch": 0.1349315911047934, "grad_norm": 1.1362130641937256, "learning_rate": 1.9688262791543885e-05, "loss": 1.5136, "step": 2468 }, { "epoch": 0.13498626354851495, "grad_norm": 1.5816060304641724, "learning_rate": 1.968780991832784e-05, "loss": 1.3056, "step": 2469 }, { "epoch": 0.13504093599223652, "grad_norm": 1.38986337184906, "learning_rate": 1.9687356721613084e-05, "loss": 1.4863, "step": 2470 }, { "epoch": 0.13509560843595805, "grad_norm": 1.5635695457458496, "learning_rate": 1.9686903201414753e-05, "loss": 1.5438, "step": 2471 }, { "epoch": 0.13515028087967962, "grad_norm": 1.7527631521224976, "learning_rate": 1.9686449357747996e-05, "loss": 1.5342, "step": 2472 }, { "epoch": 0.13520495332340118, "grad_norm": 1.9096965789794922, "learning_rate": 1.9685995190627967e-05, "loss": 1.5853, "step": 2473 }, { "epoch": 0.13525962576712272, "grad_norm": 1.8163875341415405, "learning_rate": 1.9685540700069827e-05, "loss": 1.2881, "step": 2474 }, { "epoch": 0.13531429821084429, "grad_norm": 1.2368589639663696, "learning_rate": 1.9685085886088753e-05, "loss": 1.4101, "step": 2475 }, { "epoch": 0.13536897065456582, "grad_norm": 1.435131549835205, "learning_rate": 1.9684630748699937e-05, "loss": 1.2252, "step": 2476 }, { "epoch": 0.1354236430982874, "grad_norm": 1.8580447435379028, "learning_rate": 1.9684175287918576e-05, "loss": 1.3479, "step": 2477 }, { "epoch": 0.13547831554200893, "grad_norm": 2.023115634918213, "learning_rate": 1.9683719503759877e-05, "loss": 1.4503, "step": 2478 }, { "epoch": 0.1355329879857305, "grad_norm": 1.406362771987915, "learning_rate": 1.968326339623906e-05, "loss": 1.5841, "step": 2479 }, { "epoch": 0.13558766042945206, "grad_norm": 1.464137077331543, "learning_rate": 1.9682806965371355e-05, "loss": 1.534, "step": 2480 }, { "epoch": 0.1356423328731736, "grad_norm": 1.641331434249878, "learning_rate": 1.9682350211172005e-05, "loss": 1.5652, "step": 2481 }, { "epoch": 0.13569700531689516, "grad_norm": 1.5541025400161743, "learning_rate": 1.968189313365626e-05, "loss": 1.4279, "step": 2482 }, { "epoch": 0.1357516777606167, "grad_norm": 2.0972278118133545, "learning_rate": 1.9681435732839384e-05, "loss": 1.5519, "step": 2483 }, { "epoch": 0.13580635020433826, "grad_norm": 1.7048847675323486, "learning_rate": 1.968097800873665e-05, "loss": 1.5394, "step": 2484 }, { "epoch": 0.1358610226480598, "grad_norm": 1.299695372581482, "learning_rate": 1.9680519961363345e-05, "loss": 1.3336, "step": 2485 }, { "epoch": 0.13591569509178136, "grad_norm": 1.5024338960647583, "learning_rate": 1.9680061590734762e-05, "loss": 1.307, "step": 2486 }, { "epoch": 0.13597036753550293, "grad_norm": 1.1968740224838257, "learning_rate": 1.967960289686621e-05, "loss": 1.5203, "step": 2487 }, { "epoch": 0.13602503997922447, "grad_norm": 1.4192010164260864, "learning_rate": 1.9679143879772997e-05, "loss": 1.5983, "step": 2488 }, { "epoch": 0.13607971242294603, "grad_norm": 1.4562832117080688, "learning_rate": 1.967868453947046e-05, "loss": 1.5779, "step": 2489 }, { "epoch": 0.13613438486666757, "grad_norm": 1.636753797531128, "learning_rate": 1.967822487597394e-05, "loss": 1.3627, "step": 2490 }, { "epoch": 0.13618905731038913, "grad_norm": 1.4405572414398193, "learning_rate": 1.9677764889298775e-05, "loss": 1.4849, "step": 2491 }, { "epoch": 0.13624372975411067, "grad_norm": 1.3802292346954346, "learning_rate": 1.9677304579460328e-05, "loss": 1.4366, "step": 2492 }, { "epoch": 0.13629840219783224, "grad_norm": 1.3438664674758911, "learning_rate": 1.9676843946473977e-05, "loss": 1.5014, "step": 2493 }, { "epoch": 0.1363530746415538, "grad_norm": 1.895601749420166, "learning_rate": 1.9676382990355097e-05, "loss": 1.4758, "step": 2494 }, { "epoch": 0.13640774708527534, "grad_norm": 1.2461026906967163, "learning_rate": 1.9675921711119087e-05, "loss": 1.5468, "step": 2495 }, { "epoch": 0.1364624195289969, "grad_norm": 1.2982001304626465, "learning_rate": 1.967546010878134e-05, "loss": 1.5713, "step": 2496 }, { "epoch": 0.13651709197271844, "grad_norm": 1.409278392791748, "learning_rate": 1.9674998183357278e-05, "loss": 1.4643, "step": 2497 }, { "epoch": 0.13657176441644, "grad_norm": 1.4896432161331177, "learning_rate": 1.9674535934862327e-05, "loss": 1.315, "step": 2498 }, { "epoch": 0.13662643686016157, "grad_norm": 1.434421181678772, "learning_rate": 1.9674073363311918e-05, "loss": 1.3839, "step": 2499 }, { "epoch": 0.1366811093038831, "grad_norm": 1.4426922798156738, "learning_rate": 1.9673610468721492e-05, "loss": 1.7889, "step": 2500 }, { "epoch": 0.13673578174760467, "grad_norm": 1.28580641746521, "learning_rate": 1.967314725110652e-05, "loss": 1.6212, "step": 2501 }, { "epoch": 0.1367904541913262, "grad_norm": 1.3467786312103271, "learning_rate": 1.967268371048246e-05, "loss": 1.5498, "step": 2502 }, { "epoch": 0.13684512663504778, "grad_norm": 1.8359768390655518, "learning_rate": 1.9672219846864794e-05, "loss": 1.4701, "step": 2503 }, { "epoch": 0.1368997990787693, "grad_norm": 1.90402352809906, "learning_rate": 1.9671755660269007e-05, "loss": 1.1804, "step": 2504 }, { "epoch": 0.13695447152249088, "grad_norm": 1.600203514099121, "learning_rate": 1.967129115071061e-05, "loss": 1.6775, "step": 2505 }, { "epoch": 0.13700914396621244, "grad_norm": 1.599189281463623, "learning_rate": 1.96708263182051e-05, "loss": 1.4402, "step": 2506 }, { "epoch": 0.13706381640993398, "grad_norm": 1.5009105205535889, "learning_rate": 1.967036116276801e-05, "loss": 1.3451, "step": 2507 }, { "epoch": 0.13711848885365555, "grad_norm": 1.5419089794158936, "learning_rate": 1.966989568441487e-05, "loss": 1.2791, "step": 2508 }, { "epoch": 0.13717316129737708, "grad_norm": 2.0568740367889404, "learning_rate": 1.9669429883161214e-05, "loss": 1.3172, "step": 2509 }, { "epoch": 0.13722783374109865, "grad_norm": 1.900644063949585, "learning_rate": 1.9668963759022612e-05, "loss": 1.3473, "step": 2510 }, { "epoch": 0.13728250618482019, "grad_norm": 1.3349337577819824, "learning_rate": 1.9668497312014617e-05, "loss": 1.4768, "step": 2511 }, { "epoch": 0.13733717862854175, "grad_norm": 1.012161135673523, "learning_rate": 1.966803054215281e-05, "loss": 1.6341, "step": 2512 }, { "epoch": 0.13739185107226332, "grad_norm": 1.418579339981079, "learning_rate": 1.9667563449452782e-05, "loss": 1.3024, "step": 2513 }, { "epoch": 0.13744652351598485, "grad_norm": 1.8714749813079834, "learning_rate": 1.9667096033930116e-05, "loss": 1.4741, "step": 2514 }, { "epoch": 0.13750119595970642, "grad_norm": 1.4544050693511963, "learning_rate": 1.9666628295600433e-05, "loss": 1.4483, "step": 2515 }, { "epoch": 0.13755586840342796, "grad_norm": 1.456754207611084, "learning_rate": 1.966616023447935e-05, "loss": 1.5784, "step": 2516 }, { "epoch": 0.13761054084714952, "grad_norm": 1.4714404344558716, "learning_rate": 1.9665691850582492e-05, "loss": 1.6432, "step": 2517 }, { "epoch": 0.13766521329087106, "grad_norm": 1.993889570236206, "learning_rate": 1.96652231439255e-05, "loss": 1.3521, "step": 2518 }, { "epoch": 0.13771988573459262, "grad_norm": 1.6495710611343384, "learning_rate": 1.966475411452403e-05, "loss": 1.5107, "step": 2519 }, { "epoch": 0.1377745581783142, "grad_norm": 1.531771183013916, "learning_rate": 1.966428476239374e-05, "loss": 1.5131, "step": 2520 }, { "epoch": 0.13782923062203573, "grad_norm": 1.7763798236846924, "learning_rate": 1.9663815087550305e-05, "loss": 1.5072, "step": 2521 }, { "epoch": 0.1378839030657573, "grad_norm": 1.0507419109344482, "learning_rate": 1.9663345090009406e-05, "loss": 1.3767, "step": 2522 }, { "epoch": 0.13793857550947883, "grad_norm": 1.5640578269958496, "learning_rate": 1.9662874769786734e-05, "loss": 1.1813, "step": 2523 }, { "epoch": 0.1379932479532004, "grad_norm": 1.5187586545944214, "learning_rate": 1.9662404126898008e-05, "loss": 1.5149, "step": 2524 }, { "epoch": 0.13804792039692193, "grad_norm": 1.626383662223816, "learning_rate": 1.966193316135893e-05, "loss": 1.2775, "step": 2525 }, { "epoch": 0.1381025928406435, "grad_norm": 1.5545402765274048, "learning_rate": 1.966146187318523e-05, "loss": 1.2701, "step": 2526 }, { "epoch": 0.13815726528436506, "grad_norm": 1.266821026802063, "learning_rate": 1.966099026239265e-05, "loss": 1.5035, "step": 2527 }, { "epoch": 0.1382119377280866, "grad_norm": 1.6525177955627441, "learning_rate": 1.9660518328996933e-05, "loss": 1.4366, "step": 2528 }, { "epoch": 0.13826661017180816, "grad_norm": 1.3195972442626953, "learning_rate": 1.9660046073013838e-05, "loss": 1.4813, "step": 2529 }, { "epoch": 0.1383212826155297, "grad_norm": 1.4764735698699951, "learning_rate": 1.9659573494459142e-05, "loss": 1.558, "step": 2530 }, { "epoch": 0.13837595505925127, "grad_norm": 1.5515550374984741, "learning_rate": 1.9659100593348615e-05, "loss": 1.361, "step": 2531 }, { "epoch": 0.1384306275029728, "grad_norm": 1.6816940307617188, "learning_rate": 1.9658627369698052e-05, "loss": 1.3616, "step": 2532 }, { "epoch": 0.13848529994669437, "grad_norm": 1.868516445159912, "learning_rate": 1.9658153823523262e-05, "loss": 1.5475, "step": 2533 }, { "epoch": 0.13853997239041593, "grad_norm": 1.3899086713790894, "learning_rate": 1.965767995484005e-05, "loss": 1.5707, "step": 2534 }, { "epoch": 0.13859464483413747, "grad_norm": 1.9367003440856934, "learning_rate": 1.9657205763664244e-05, "loss": 1.3027, "step": 2535 }, { "epoch": 0.13864931727785904, "grad_norm": 1.366355299949646, "learning_rate": 1.965673125001167e-05, "loss": 1.5978, "step": 2536 }, { "epoch": 0.13870398972158057, "grad_norm": 1.5384535789489746, "learning_rate": 1.9656256413898184e-05, "loss": 1.3864, "step": 2537 }, { "epoch": 0.13875866216530214, "grad_norm": 1.910643219947815, "learning_rate": 1.9655781255339638e-05, "loss": 1.5055, "step": 2538 }, { "epoch": 0.13881333460902368, "grad_norm": 1.350209355354309, "learning_rate": 1.9655305774351898e-05, "loss": 1.5505, "step": 2539 }, { "epoch": 0.13886800705274524, "grad_norm": 1.4560250043869019, "learning_rate": 1.9654829970950838e-05, "loss": 1.5914, "step": 2540 }, { "epoch": 0.1389226794964668, "grad_norm": 1.2818232774734497, "learning_rate": 1.9654353845152352e-05, "loss": 1.5748, "step": 2541 }, { "epoch": 0.13897735194018834, "grad_norm": 1.4856804609298706, "learning_rate": 1.9653877396972337e-05, "loss": 1.6067, "step": 2542 }, { "epoch": 0.1390320243839099, "grad_norm": 1.4729632139205933, "learning_rate": 1.96534006264267e-05, "loss": 1.5647, "step": 2543 }, { "epoch": 0.13908669682763145, "grad_norm": 1.693721055984497, "learning_rate": 1.9652923533531365e-05, "loss": 1.2943, "step": 2544 }, { "epoch": 0.139141369271353, "grad_norm": 1.124879240989685, "learning_rate": 1.965244611830226e-05, "loss": 1.2632, "step": 2545 }, { "epoch": 0.13919604171507455, "grad_norm": 1.6283485889434814, "learning_rate": 1.965196838075533e-05, "loss": 1.3152, "step": 2546 }, { "epoch": 0.1392507141587961, "grad_norm": 1.3691980838775635, "learning_rate": 1.965149032090653e-05, "loss": 1.4319, "step": 2547 }, { "epoch": 0.13930538660251768, "grad_norm": 1.420490026473999, "learning_rate": 1.9651011938771815e-05, "loss": 1.3121, "step": 2548 }, { "epoch": 0.13936005904623922, "grad_norm": 1.3723042011260986, "learning_rate": 1.9650533234367167e-05, "loss": 1.4496, "step": 2549 }, { "epoch": 0.13941473148996078, "grad_norm": 1.4776685237884521, "learning_rate": 1.965005420770857e-05, "loss": 1.4305, "step": 2550 }, { "epoch": 0.13946940393368232, "grad_norm": 1.4880717992782593, "learning_rate": 1.9649574858812016e-05, "loss": 1.4915, "step": 2551 }, { "epoch": 0.13952407637740388, "grad_norm": 1.4838846921920776, "learning_rate": 1.9649095187693517e-05, "loss": 1.5235, "step": 2552 }, { "epoch": 0.13957874882112542, "grad_norm": 1.391142725944519, "learning_rate": 1.9648615194369082e-05, "loss": 1.3626, "step": 2553 }, { "epoch": 0.13963342126484699, "grad_norm": 1.7742605209350586, "learning_rate": 1.9648134878854747e-05, "loss": 1.1609, "step": 2554 }, { "epoch": 0.13968809370856855, "grad_norm": 2.076310873031616, "learning_rate": 1.9647654241166552e-05, "loss": 1.6693, "step": 2555 }, { "epoch": 0.1397427661522901, "grad_norm": 1.1208854913711548, "learning_rate": 1.964717328132054e-05, "loss": 1.5565, "step": 2556 }, { "epoch": 0.13979743859601165, "grad_norm": 1.572403907775879, "learning_rate": 1.9646691999332773e-05, "loss": 1.502, "step": 2557 }, { "epoch": 0.1398521110397332, "grad_norm": 1.428270697593689, "learning_rate": 1.9646210395219328e-05, "loss": 1.4482, "step": 2558 }, { "epoch": 0.13990678348345476, "grad_norm": 1.2416245937347412, "learning_rate": 1.964572846899628e-05, "loss": 1.5846, "step": 2559 }, { "epoch": 0.1399614559271763, "grad_norm": 1.7328678369522095, "learning_rate": 1.9645246220679722e-05, "loss": 1.4541, "step": 2560 }, { "epoch": 0.14001612837089786, "grad_norm": 3.0775725841522217, "learning_rate": 1.9644763650285758e-05, "loss": 1.3006, "step": 2561 }, { "epoch": 0.14007080081461942, "grad_norm": 1.3423646688461304, "learning_rate": 1.9644280757830508e-05, "loss": 1.6248, "step": 2562 }, { "epoch": 0.14012547325834096, "grad_norm": 1.5818288326263428, "learning_rate": 1.9643797543330092e-05, "loss": 1.599, "step": 2563 }, { "epoch": 0.14018014570206253, "grad_norm": 1.3417870998382568, "learning_rate": 1.9643314006800645e-05, "loss": 1.6507, "step": 2564 }, { "epoch": 0.14023481814578406, "grad_norm": 2.1658575534820557, "learning_rate": 1.9642830148258314e-05, "loss": 1.3631, "step": 2565 }, { "epoch": 0.14028949058950563, "grad_norm": 1.8508176803588867, "learning_rate": 1.9642345967719255e-05, "loss": 1.5296, "step": 2566 }, { "epoch": 0.14034416303322717, "grad_norm": 1.6255558729171753, "learning_rate": 1.964186146519964e-05, "loss": 1.4032, "step": 2567 }, { "epoch": 0.14039883547694873, "grad_norm": 1.3210779428482056, "learning_rate": 1.9641376640715646e-05, "loss": 1.4764, "step": 2568 }, { "epoch": 0.1404535079206703, "grad_norm": 1.2386341094970703, "learning_rate": 1.9640891494283463e-05, "loss": 1.7676, "step": 2569 }, { "epoch": 0.14050818036439183, "grad_norm": 1.4993996620178223, "learning_rate": 1.964040602591929e-05, "loss": 1.2751, "step": 2570 }, { "epoch": 0.1405628528081134, "grad_norm": 1.7058384418487549, "learning_rate": 1.9639920235639334e-05, "loss": 1.6215, "step": 2571 }, { "epoch": 0.14061752525183494, "grad_norm": 1.4490525722503662, "learning_rate": 1.9639434123459823e-05, "loss": 1.6792, "step": 2572 }, { "epoch": 0.1406721976955565, "grad_norm": 1.6329965591430664, "learning_rate": 1.9638947689396986e-05, "loss": 1.6167, "step": 2573 }, { "epoch": 0.14072687013927804, "grad_norm": 1.6994796991348267, "learning_rate": 1.9638460933467068e-05, "loss": 1.3196, "step": 2574 }, { "epoch": 0.1407815425829996, "grad_norm": 2.248826503753662, "learning_rate": 1.963797385568632e-05, "loss": 1.5623, "step": 2575 }, { "epoch": 0.14083621502672117, "grad_norm": 1.2802273035049438, "learning_rate": 1.963748645607101e-05, "loss": 1.4337, "step": 2576 }, { "epoch": 0.1408908874704427, "grad_norm": 1.8097304105758667, "learning_rate": 1.9636998734637414e-05, "loss": 1.4913, "step": 2577 }, { "epoch": 0.14094555991416427, "grad_norm": 1.945694088935852, "learning_rate": 1.9636510691401812e-05, "loss": 1.4054, "step": 2578 }, { "epoch": 0.1410002323578858, "grad_norm": 1.697110652923584, "learning_rate": 1.963602232638051e-05, "loss": 1.3165, "step": 2579 }, { "epoch": 0.14105490480160737, "grad_norm": 1.382455587387085, "learning_rate": 1.963553363958981e-05, "loss": 1.5584, "step": 2580 }, { "epoch": 0.1411095772453289, "grad_norm": 1.461669683456421, "learning_rate": 1.963504463104603e-05, "loss": 1.6513, "step": 2581 }, { "epoch": 0.14116424968905047, "grad_norm": 1.7758632898330688, "learning_rate": 1.96345553007655e-05, "loss": 1.2647, "step": 2582 }, { "epoch": 0.14121892213277204, "grad_norm": 1.1867246627807617, "learning_rate": 1.963406564876456e-05, "loss": 1.5777, "step": 2583 }, { "epoch": 0.14127359457649358, "grad_norm": 1.5158830881118774, "learning_rate": 1.9633575675059563e-05, "loss": 1.3875, "step": 2584 }, { "epoch": 0.14132826702021514, "grad_norm": 1.5172227621078491, "learning_rate": 1.9633085379666868e-05, "loss": 1.1839, "step": 2585 }, { "epoch": 0.14138293946393668, "grad_norm": 1.4862723350524902, "learning_rate": 1.9632594762602847e-05, "loss": 1.508, "step": 2586 }, { "epoch": 0.14143761190765824, "grad_norm": 1.2689878940582275, "learning_rate": 1.9632103823883882e-05, "loss": 1.3444, "step": 2587 }, { "epoch": 0.14149228435137978, "grad_norm": 1.3394643068313599, "learning_rate": 1.963161256352637e-05, "loss": 1.535, "step": 2588 }, { "epoch": 0.14154695679510135, "grad_norm": 1.2809981107711792, "learning_rate": 1.9631120981546713e-05, "loss": 1.2861, "step": 2589 }, { "epoch": 0.1416016292388229, "grad_norm": 1.5546852350234985, "learning_rate": 1.9630629077961327e-05, "loss": 1.4104, "step": 2590 }, { "epoch": 0.14165630168254445, "grad_norm": 1.5522106885910034, "learning_rate": 1.963013685278663e-05, "loss": 1.4953, "step": 2591 }, { "epoch": 0.14171097412626601, "grad_norm": 1.3995188474655151, "learning_rate": 1.962964430603907e-05, "loss": 1.3861, "step": 2592 }, { "epoch": 0.14176564656998755, "grad_norm": 1.176127552986145, "learning_rate": 1.9629151437735095e-05, "loss": 1.3836, "step": 2593 }, { "epoch": 0.14182031901370912, "grad_norm": 1.6217758655548096, "learning_rate": 1.9628658247891154e-05, "loss": 1.484, "step": 2594 }, { "epoch": 0.14187499145743065, "grad_norm": 1.4187021255493164, "learning_rate": 1.9628164736523717e-05, "loss": 1.6344, "step": 2595 }, { "epoch": 0.14192966390115222, "grad_norm": 1.263230323791504, "learning_rate": 1.9627670903649273e-05, "loss": 1.3889, "step": 2596 }, { "epoch": 0.14198433634487378, "grad_norm": 1.439340353012085, "learning_rate": 1.96271767492843e-05, "loss": 1.6, "step": 2597 }, { "epoch": 0.14203900878859532, "grad_norm": 1.308491826057434, "learning_rate": 1.962668227344531e-05, "loss": 1.165, "step": 2598 }, { "epoch": 0.1420936812323169, "grad_norm": 1.2014249563217163, "learning_rate": 1.96261874761488e-05, "loss": 1.3434, "step": 2599 }, { "epoch": 0.14214835367603842, "grad_norm": 1.3925282955169678, "learning_rate": 1.962569235741131e-05, "loss": 1.3069, "step": 2600 }, { "epoch": 0.14220302611976, "grad_norm": 1.4150800704956055, "learning_rate": 1.9625196917249362e-05, "loss": 1.5511, "step": 2601 }, { "epoch": 0.14225769856348155, "grad_norm": 1.3593544960021973, "learning_rate": 1.96247011556795e-05, "loss": 1.446, "step": 2602 }, { "epoch": 0.1423123710072031, "grad_norm": 1.7509582042694092, "learning_rate": 1.9624205072718285e-05, "loss": 1.5267, "step": 2603 }, { "epoch": 0.14236704345092466, "grad_norm": 1.8738083839416504, "learning_rate": 1.9623708668382276e-05, "loss": 1.4922, "step": 2604 }, { "epoch": 0.1424217158946462, "grad_norm": 1.7356810569763184, "learning_rate": 1.9623211942688055e-05, "loss": 1.5389, "step": 2605 }, { "epoch": 0.14247638833836776, "grad_norm": 1.5631537437438965, "learning_rate": 1.9622714895652204e-05, "loss": 1.6681, "step": 2606 }, { "epoch": 0.1425310607820893, "grad_norm": 1.5924668312072754, "learning_rate": 1.962221752729132e-05, "loss": 1.548, "step": 2607 }, { "epoch": 0.14258573322581086, "grad_norm": 2.181004285812378, "learning_rate": 1.962171983762202e-05, "loss": 1.6712, "step": 2608 }, { "epoch": 0.14264040566953243, "grad_norm": 1.7074517011642456, "learning_rate": 1.962122182666091e-05, "loss": 1.3624, "step": 2609 }, { "epoch": 0.14269507811325396, "grad_norm": 1.5250459909439087, "learning_rate": 1.9620723494424627e-05, "loss": 1.1952, "step": 2610 }, { "epoch": 0.14274975055697553, "grad_norm": 2.819366693496704, "learning_rate": 1.9620224840929812e-05, "loss": 1.2032, "step": 2611 }, { "epoch": 0.14280442300069707, "grad_norm": 2.1775643825531006, "learning_rate": 1.9619725866193117e-05, "loss": 1.2634, "step": 2612 }, { "epoch": 0.14285909544441863, "grad_norm": 1.5502768754959106, "learning_rate": 1.96192265702312e-05, "loss": 1.4416, "step": 2613 }, { "epoch": 0.14291376788814017, "grad_norm": 1.5395612716674805, "learning_rate": 1.9618726953060734e-05, "loss": 1.8732, "step": 2614 }, { "epoch": 0.14296844033186173, "grad_norm": 1.4280977249145508, "learning_rate": 1.961822701469841e-05, "loss": 1.468, "step": 2615 }, { "epoch": 0.1430231127755833, "grad_norm": 1.2990994453430176, "learning_rate": 1.961772675516091e-05, "loss": 1.5483, "step": 2616 }, { "epoch": 0.14307778521930484, "grad_norm": 1.669731616973877, "learning_rate": 1.9617226174464945e-05, "loss": 1.5446, "step": 2617 }, { "epoch": 0.1431324576630264, "grad_norm": 1.8880798816680908, "learning_rate": 1.9616725272627234e-05, "loss": 1.6301, "step": 2618 }, { "epoch": 0.14318713010674794, "grad_norm": 1.603151798248291, "learning_rate": 1.9616224049664495e-05, "loss": 1.4017, "step": 2619 }, { "epoch": 0.1432418025504695, "grad_norm": 1.5842560529708862, "learning_rate": 1.9615722505593474e-05, "loss": 1.5318, "step": 2620 }, { "epoch": 0.14329647499419104, "grad_norm": 3.4685018062591553, "learning_rate": 1.9615220640430915e-05, "loss": 1.3998, "step": 2621 }, { "epoch": 0.1433511474379126, "grad_norm": 1.4899107217788696, "learning_rate": 1.9614718454193574e-05, "loss": 1.1959, "step": 2622 }, { "epoch": 0.14340581988163417, "grad_norm": 2.1780176162719727, "learning_rate": 1.9614215946898224e-05, "loss": 1.3712, "step": 2623 }, { "epoch": 0.1434604923253557, "grad_norm": 1.3235328197479248, "learning_rate": 1.9613713118561638e-05, "loss": 1.6041, "step": 2624 }, { "epoch": 0.14351516476907727, "grad_norm": 2.277754306793213, "learning_rate": 1.9613209969200616e-05, "loss": 1.5777, "step": 2625 }, { "epoch": 0.1435698372127988, "grad_norm": 1.542559266090393, "learning_rate": 1.9612706498831956e-05, "loss": 1.3149, "step": 2626 }, { "epoch": 0.14362450965652038, "grad_norm": 1.6937458515167236, "learning_rate": 1.961220270747247e-05, "loss": 1.5399, "step": 2627 }, { "epoch": 0.14367918210024191, "grad_norm": 1.7041488885879517, "learning_rate": 1.9611698595138974e-05, "loss": 1.5951, "step": 2628 }, { "epoch": 0.14373385454396348, "grad_norm": 1.7613577842712402, "learning_rate": 1.961119416184831e-05, "loss": 1.459, "step": 2629 }, { "epoch": 0.14378852698768504, "grad_norm": 1.8363337516784668, "learning_rate": 1.961068940761732e-05, "loss": 1.471, "step": 2630 }, { "epoch": 0.14384319943140658, "grad_norm": 1.3823611736297607, "learning_rate": 1.961018433246286e-05, "loss": 1.2665, "step": 2631 }, { "epoch": 0.14389787187512815, "grad_norm": 1.207023024559021, "learning_rate": 1.9609678936401794e-05, "loss": 1.5101, "step": 2632 }, { "epoch": 0.14395254431884968, "grad_norm": 1.8095887899398804, "learning_rate": 1.9609173219450998e-05, "loss": 1.4618, "step": 2633 }, { "epoch": 0.14400721676257125, "grad_norm": 1.2420976161956787, "learning_rate": 1.9608667181627358e-05, "loss": 1.4226, "step": 2634 }, { "epoch": 0.1440618892062928, "grad_norm": 1.1084246635437012, "learning_rate": 1.9608160822947772e-05, "loss": 1.6704, "step": 2635 }, { "epoch": 0.14411656165001435, "grad_norm": 1.6220532655715942, "learning_rate": 1.9607654143429156e-05, "loss": 1.4322, "step": 2636 }, { "epoch": 0.14417123409373592, "grad_norm": 1.3289467096328735, "learning_rate": 1.9607147143088418e-05, "loss": 1.5682, "step": 2637 }, { "epoch": 0.14422590653745745, "grad_norm": 1.3991436958312988, "learning_rate": 1.9606639821942496e-05, "loss": 1.5272, "step": 2638 }, { "epoch": 0.14428057898117902, "grad_norm": 1.603367567062378, "learning_rate": 1.9606132180008324e-05, "loss": 1.5031, "step": 2639 }, { "epoch": 0.14433525142490056, "grad_norm": 1.1467663049697876, "learning_rate": 1.960562421730286e-05, "loss": 1.4717, "step": 2640 }, { "epoch": 0.14438992386862212, "grad_norm": 1.2220426797866821, "learning_rate": 1.960511593384306e-05, "loss": 1.6488, "step": 2641 }, { "epoch": 0.14444459631234366, "grad_norm": 1.5235321521759033, "learning_rate": 1.9604607329645905e-05, "loss": 1.5032, "step": 2642 }, { "epoch": 0.14449926875606522, "grad_norm": 1.6421459913253784, "learning_rate": 1.960409840472837e-05, "loss": 1.3355, "step": 2643 }, { "epoch": 0.1445539411997868, "grad_norm": 1.4350905418395996, "learning_rate": 1.960358915910745e-05, "loss": 1.4152, "step": 2644 }, { "epoch": 0.14460861364350833, "grad_norm": 1.3406784534454346, "learning_rate": 1.9603079592800157e-05, "loss": 1.5398, "step": 2645 }, { "epoch": 0.1446632860872299, "grad_norm": 1.4730103015899658, "learning_rate": 1.96025697058235e-05, "loss": 1.3873, "step": 2646 }, { "epoch": 0.14471795853095143, "grad_norm": 1.5071189403533936, "learning_rate": 1.9602059498194508e-05, "loss": 1.2011, "step": 2647 }, { "epoch": 0.144772630974673, "grad_norm": 1.4401670694351196, "learning_rate": 1.9601548969930214e-05, "loss": 1.2212, "step": 2648 }, { "epoch": 0.14482730341839453, "grad_norm": 1.2177810668945312, "learning_rate": 1.9601038121047674e-05, "loss": 1.4448, "step": 2649 }, { "epoch": 0.1448819758621161, "grad_norm": 1.4832611083984375, "learning_rate": 1.960052695156394e-05, "loss": 1.425, "step": 2650 }, { "epoch": 0.14493664830583766, "grad_norm": 1.9168627262115479, "learning_rate": 1.9600015461496086e-05, "loss": 1.4725, "step": 2651 }, { "epoch": 0.1449913207495592, "grad_norm": 1.692021369934082, "learning_rate": 1.9599503650861183e-05, "loss": 1.4249, "step": 2652 }, { "epoch": 0.14504599319328076, "grad_norm": 1.6118401288986206, "learning_rate": 1.9598991519676328e-05, "loss": 1.4332, "step": 2653 }, { "epoch": 0.1451006656370023, "grad_norm": 0.9950586557388306, "learning_rate": 1.9598479067958624e-05, "loss": 1.3657, "step": 2654 }, { "epoch": 0.14515533808072387, "grad_norm": 1.4401435852050781, "learning_rate": 1.959796629572518e-05, "loss": 1.4928, "step": 2655 }, { "epoch": 0.1452100105244454, "grad_norm": 1.2951931953430176, "learning_rate": 1.9597453202993118e-05, "loss": 1.5055, "step": 2656 }, { "epoch": 0.14526468296816697, "grad_norm": 1.2339435815811157, "learning_rate": 1.9596939789779573e-05, "loss": 1.5521, "step": 2657 }, { "epoch": 0.14531935541188853, "grad_norm": 1.2052028179168701, "learning_rate": 1.9596426056101688e-05, "loss": 1.7182, "step": 2658 }, { "epoch": 0.14537402785561007, "grad_norm": 1.3100051879882812, "learning_rate": 1.959591200197662e-05, "loss": 1.2976, "step": 2659 }, { "epoch": 0.14542870029933164, "grad_norm": 2.9325480461120605, "learning_rate": 1.959539762742153e-05, "loss": 1.5904, "step": 2660 }, { "epoch": 0.14548337274305317, "grad_norm": 1.1677049398422241, "learning_rate": 1.9594882932453596e-05, "loss": 1.6708, "step": 2661 }, { "epoch": 0.14553804518677474, "grad_norm": 1.875559687614441, "learning_rate": 1.959436791709001e-05, "loss": 1.5981, "step": 2662 }, { "epoch": 0.14559271763049628, "grad_norm": 1.2223438024520874, "learning_rate": 1.9593852581347962e-05, "loss": 1.3515, "step": 2663 }, { "epoch": 0.14564739007421784, "grad_norm": 1.8150825500488281, "learning_rate": 1.9593336925244662e-05, "loss": 1.3613, "step": 2664 }, { "epoch": 0.1457020625179394, "grad_norm": 1.545719861984253, "learning_rate": 1.9592820948797337e-05, "loss": 1.4454, "step": 2665 }, { "epoch": 0.14575673496166094, "grad_norm": 1.7881993055343628, "learning_rate": 1.9592304652023208e-05, "loss": 1.3501, "step": 2666 }, { "epoch": 0.1458114074053825, "grad_norm": 1.5180439949035645, "learning_rate": 1.9591788034939518e-05, "loss": 1.3305, "step": 2667 }, { "epoch": 0.14586607984910405, "grad_norm": 1.663390040397644, "learning_rate": 1.9591271097563512e-05, "loss": 1.4426, "step": 2668 }, { "epoch": 0.1459207522928256, "grad_norm": 1.7090823650360107, "learning_rate": 1.9590753839912463e-05, "loss": 1.5617, "step": 2669 }, { "epoch": 0.14597542473654715, "grad_norm": 1.575528621673584, "learning_rate": 1.9590236262003634e-05, "loss": 1.3786, "step": 2670 }, { "epoch": 0.14603009718026871, "grad_norm": 1.281819462776184, "learning_rate": 1.9589718363854315e-05, "loss": 1.5709, "step": 2671 }, { "epoch": 0.14608476962399028, "grad_norm": 1.764565348625183, "learning_rate": 1.9589200145481797e-05, "loss": 1.5586, "step": 2672 }, { "epoch": 0.14613944206771182, "grad_norm": 2.0200586318969727, "learning_rate": 1.9588681606903385e-05, "loss": 1.5466, "step": 2673 }, { "epoch": 0.14619411451143338, "grad_norm": 1.177758812904358, "learning_rate": 1.958816274813639e-05, "loss": 1.4109, "step": 2674 }, { "epoch": 0.14624878695515492, "grad_norm": 1.3620400428771973, "learning_rate": 1.9587643569198144e-05, "loss": 1.4102, "step": 2675 }, { "epoch": 0.14630345939887648, "grad_norm": 1.4385639429092407, "learning_rate": 1.958712407010598e-05, "loss": 1.4531, "step": 2676 }, { "epoch": 0.14635813184259802, "grad_norm": 1.2191437482833862, "learning_rate": 1.9586604250877248e-05, "loss": 1.6861, "step": 2677 }, { "epoch": 0.1464128042863196, "grad_norm": 1.4001915454864502, "learning_rate": 1.9586084111529304e-05, "loss": 1.349, "step": 2678 }, { "epoch": 0.14646747673004115, "grad_norm": 1.6560754776000977, "learning_rate": 1.958556365207952e-05, "loss": 1.6232, "step": 2679 }, { "epoch": 0.1465221491737627, "grad_norm": 1.4868851900100708, "learning_rate": 1.9585042872545266e-05, "loss": 1.5303, "step": 2680 }, { "epoch": 0.14657682161748425, "grad_norm": 1.4553309679031372, "learning_rate": 1.9584521772943944e-05, "loss": 1.2612, "step": 2681 }, { "epoch": 0.1466314940612058, "grad_norm": 1.7114171981811523, "learning_rate": 1.9584000353292944e-05, "loss": 1.66, "step": 2682 }, { "epoch": 0.14668616650492736, "grad_norm": 1.620546817779541, "learning_rate": 1.9583478613609684e-05, "loss": 1.4752, "step": 2683 }, { "epoch": 0.1467408389486489, "grad_norm": 1.5602649450302124, "learning_rate": 1.958295655391159e-05, "loss": 1.4577, "step": 2684 }, { "epoch": 0.14679551139237046, "grad_norm": 1.3589904308319092, "learning_rate": 1.9582434174216084e-05, "loss": 1.5035, "step": 2685 }, { "epoch": 0.14685018383609202, "grad_norm": 1.4770058393478394, "learning_rate": 1.9581911474540617e-05, "loss": 1.3609, "step": 2686 }, { "epoch": 0.14690485627981356, "grad_norm": 1.282721996307373, "learning_rate": 1.958138845490264e-05, "loss": 1.6109, "step": 2687 }, { "epoch": 0.14695952872353513, "grad_norm": 2.1886119842529297, "learning_rate": 1.958086511531962e-05, "loss": 1.537, "step": 2688 }, { "epoch": 0.14701420116725666, "grad_norm": 1.6992286443710327, "learning_rate": 1.958034145580903e-05, "loss": 1.4122, "step": 2689 }, { "epoch": 0.14706887361097823, "grad_norm": 2.0792040824890137, "learning_rate": 1.957981747638836e-05, "loss": 1.3354, "step": 2690 }, { "epoch": 0.14712354605469977, "grad_norm": 1.3296406269073486, "learning_rate": 1.9579293177075106e-05, "loss": 1.0848, "step": 2691 }, { "epoch": 0.14717821849842133, "grad_norm": 1.4308089017868042, "learning_rate": 1.957876855788677e-05, "loss": 1.4408, "step": 2692 }, { "epoch": 0.1472328909421429, "grad_norm": 1.4248777627944946, "learning_rate": 1.957824361884088e-05, "loss": 1.2637, "step": 2693 }, { "epoch": 0.14728756338586443, "grad_norm": 1.3060141801834106, "learning_rate": 1.9577718359954955e-05, "loss": 1.3245, "step": 2694 }, { "epoch": 0.147342235829586, "grad_norm": 1.8590017557144165, "learning_rate": 1.9577192781246542e-05, "loss": 1.278, "step": 2695 }, { "epoch": 0.14739690827330754, "grad_norm": 1.5089783668518066, "learning_rate": 1.9576666882733186e-05, "loss": 1.3372, "step": 2696 }, { "epoch": 0.1474515807170291, "grad_norm": 1.7234444618225098, "learning_rate": 1.9576140664432454e-05, "loss": 1.3645, "step": 2697 }, { "epoch": 0.14750625316075064, "grad_norm": 1.473320722579956, "learning_rate": 1.957561412636191e-05, "loss": 1.5525, "step": 2698 }, { "epoch": 0.1475609256044722, "grad_norm": 1.4793075323104858, "learning_rate": 1.9575087268539144e-05, "loss": 1.4016, "step": 2699 }, { "epoch": 0.14761559804819377, "grad_norm": 1.5482040643692017, "learning_rate": 1.957456009098174e-05, "loss": 1.3775, "step": 2700 }, { "epoch": 0.1476702704919153, "grad_norm": 1.8536432981491089, "learning_rate": 1.9574032593707314e-05, "loss": 1.5379, "step": 2701 }, { "epoch": 0.14772494293563687, "grad_norm": 1.6101540327072144, "learning_rate": 1.9573504776733467e-05, "loss": 1.6675, "step": 2702 }, { "epoch": 0.1477796153793584, "grad_norm": 1.4261025190353394, "learning_rate": 1.9572976640077836e-05, "loss": 1.3996, "step": 2703 }, { "epoch": 0.14783428782307997, "grad_norm": 1.5292493104934692, "learning_rate": 1.957244818375805e-05, "loss": 1.301, "step": 2704 }, { "epoch": 0.14788896026680154, "grad_norm": 1.7396742105484009, "learning_rate": 1.9571919407791754e-05, "loss": 1.5751, "step": 2705 }, { "epoch": 0.14794363271052308, "grad_norm": 1.180357813835144, "learning_rate": 1.9571390312196608e-05, "loss": 1.5846, "step": 2706 }, { "epoch": 0.14799830515424464, "grad_norm": 1.1251312494277954, "learning_rate": 1.9570860896990283e-05, "loss": 1.5328, "step": 2707 }, { "epoch": 0.14805297759796618, "grad_norm": 1.6292344331741333, "learning_rate": 1.957033116219045e-05, "loss": 1.5934, "step": 2708 }, { "epoch": 0.14810765004168774, "grad_norm": 1.1664451360702515, "learning_rate": 1.956980110781481e-05, "loss": 1.4818, "step": 2709 }, { "epoch": 0.14816232248540928, "grad_norm": 1.6904081106185913, "learning_rate": 1.9569270733881045e-05, "loss": 1.3416, "step": 2710 }, { "epoch": 0.14821699492913085, "grad_norm": 1.6706860065460205, "learning_rate": 1.956874004040688e-05, "loss": 1.2838, "step": 2711 }, { "epoch": 0.1482716673728524, "grad_norm": 1.3999607563018799, "learning_rate": 1.956820902741003e-05, "loss": 1.6425, "step": 2712 }, { "epoch": 0.14832633981657395, "grad_norm": 1.6714134216308594, "learning_rate": 1.9567677694908228e-05, "loss": 1.441, "step": 2713 }, { "epoch": 0.14838101226029551, "grad_norm": 1.2487248182296753, "learning_rate": 1.9567146042919217e-05, "loss": 1.4318, "step": 2714 }, { "epoch": 0.14843568470401705, "grad_norm": 1.6241528987884521, "learning_rate": 1.956661407146075e-05, "loss": 1.3083, "step": 2715 }, { "epoch": 0.14849035714773862, "grad_norm": 1.4826332330703735, "learning_rate": 1.956608178055059e-05, "loss": 1.5957, "step": 2716 }, { "epoch": 0.14854502959146015, "grad_norm": 1.2624613046646118, "learning_rate": 1.956554917020651e-05, "loss": 1.2993, "step": 2717 }, { "epoch": 0.14859970203518172, "grad_norm": 1.3189114332199097, "learning_rate": 1.95650162404463e-05, "loss": 1.2791, "step": 2718 }, { "epoch": 0.14865437447890328, "grad_norm": 1.2706133127212524, "learning_rate": 1.9564482991287753e-05, "loss": 1.5174, "step": 2719 }, { "epoch": 0.14870904692262482, "grad_norm": 1.5627316236495972, "learning_rate": 1.956394942274867e-05, "loss": 1.6065, "step": 2720 }, { "epoch": 0.1487637193663464, "grad_norm": 1.3617135286331177, "learning_rate": 1.9563415534846877e-05, "loss": 1.5531, "step": 2721 }, { "epoch": 0.14881839181006792, "grad_norm": 1.2796398401260376, "learning_rate": 1.9562881327600197e-05, "loss": 1.4778, "step": 2722 }, { "epoch": 0.1488730642537895, "grad_norm": 1.1959648132324219, "learning_rate": 1.956234680102647e-05, "loss": 1.2877, "step": 2723 }, { "epoch": 0.14892773669751103, "grad_norm": 1.4939475059509277, "learning_rate": 1.9561811955143547e-05, "loss": 1.2952, "step": 2724 }, { "epoch": 0.1489824091412326, "grad_norm": 1.234928011894226, "learning_rate": 1.9561276789969282e-05, "loss": 1.3294, "step": 2725 }, { "epoch": 0.14903708158495416, "grad_norm": 1.532353401184082, "learning_rate": 1.956074130552155e-05, "loss": 1.6007, "step": 2726 }, { "epoch": 0.1490917540286757, "grad_norm": 1.1824036836624146, "learning_rate": 1.956020550181823e-05, "loss": 1.4715, "step": 2727 }, { "epoch": 0.14914642647239726, "grad_norm": 1.463747501373291, "learning_rate": 1.9559669378877218e-05, "loss": 1.3461, "step": 2728 }, { "epoch": 0.1492010989161188, "grad_norm": 1.4901632070541382, "learning_rate": 1.955913293671641e-05, "loss": 1.3786, "step": 2729 }, { "epoch": 0.14925577135984036, "grad_norm": 1.8185393810272217, "learning_rate": 1.955859617535372e-05, "loss": 1.4668, "step": 2730 }, { "epoch": 0.1493104438035619, "grad_norm": 1.7121597528457642, "learning_rate": 1.955805909480708e-05, "loss": 1.3968, "step": 2731 }, { "epoch": 0.14936511624728346, "grad_norm": 1.2006827592849731, "learning_rate": 1.955752169509441e-05, "loss": 1.3471, "step": 2732 }, { "epoch": 0.14941978869100503, "grad_norm": 1.5438674688339233, "learning_rate": 1.955698397623367e-05, "loss": 1.4398, "step": 2733 }, { "epoch": 0.14947446113472657, "grad_norm": 1.4795979261398315, "learning_rate": 1.9556445938242805e-05, "loss": 1.364, "step": 2734 }, { "epoch": 0.14952913357844813, "grad_norm": 1.415423035621643, "learning_rate": 1.9555907581139787e-05, "loss": 1.493, "step": 2735 }, { "epoch": 0.14958380602216967, "grad_norm": 1.5461152791976929, "learning_rate": 1.9555368904942593e-05, "loss": 1.3425, "step": 2736 }, { "epoch": 0.14963847846589123, "grad_norm": 1.4316011667251587, "learning_rate": 1.9554829909669205e-05, "loss": 1.2035, "step": 2737 }, { "epoch": 0.14969315090961277, "grad_norm": 1.3485063314437866, "learning_rate": 1.9554290595337625e-05, "loss": 1.3784, "step": 2738 }, { "epoch": 0.14974782335333434, "grad_norm": 1.41782546043396, "learning_rate": 1.9553750961965864e-05, "loss": 1.4804, "step": 2739 }, { "epoch": 0.1498024957970559, "grad_norm": 1.49235200881958, "learning_rate": 1.955321100957194e-05, "loss": 1.4537, "step": 2740 }, { "epoch": 0.14985716824077744, "grad_norm": 1.3216272592544556, "learning_rate": 1.9552670738173884e-05, "loss": 1.5372, "step": 2741 }, { "epoch": 0.149911840684499, "grad_norm": 1.2180699110031128, "learning_rate": 1.9552130147789733e-05, "loss": 1.5522, "step": 2742 }, { "epoch": 0.14996651312822054, "grad_norm": 1.5551724433898926, "learning_rate": 1.9551589238437546e-05, "loss": 1.5878, "step": 2743 }, { "epoch": 0.1500211855719421, "grad_norm": 1.6574417352676392, "learning_rate": 1.9551048010135377e-05, "loss": 1.2589, "step": 2744 }, { "epoch": 0.15007585801566364, "grad_norm": 1.6611623764038086, "learning_rate": 1.9550506462901305e-05, "loss": 1.3634, "step": 2745 }, { "epoch": 0.1501305304593852, "grad_norm": 1.642937183380127, "learning_rate": 1.954996459675341e-05, "loss": 1.2137, "step": 2746 }, { "epoch": 0.15018520290310677, "grad_norm": 1.619188666343689, "learning_rate": 1.954942241170979e-05, "loss": 1.5684, "step": 2747 }, { "epoch": 0.1502398753468283, "grad_norm": 1.5133081674575806, "learning_rate": 1.954887990778854e-05, "loss": 1.3128, "step": 2748 }, { "epoch": 0.15029454779054988, "grad_norm": 1.5395199060440063, "learning_rate": 1.9548337085007788e-05, "loss": 1.4953, "step": 2749 }, { "epoch": 0.1503492202342714, "grad_norm": 1.3586562871932983, "learning_rate": 1.954779394338566e-05, "loss": 1.3569, "step": 2750 }, { "epoch": 0.15040389267799298, "grad_norm": 1.0716300010681152, "learning_rate": 1.954725048294028e-05, "loss": 1.8058, "step": 2751 }, { "epoch": 0.15045856512171452, "grad_norm": 1.520627737045288, "learning_rate": 1.9546706703689802e-05, "loss": 1.5255, "step": 2752 }, { "epoch": 0.15051323756543608, "grad_norm": 1.2232744693756104, "learning_rate": 1.954616260565239e-05, "loss": 1.6133, "step": 2753 }, { "epoch": 0.15056791000915765, "grad_norm": 1.4814786911010742, "learning_rate": 1.9545618188846206e-05, "loss": 1.2737, "step": 2754 }, { "epoch": 0.15062258245287918, "grad_norm": 1.7424176931381226, "learning_rate": 1.954507345328943e-05, "loss": 1.6399, "step": 2755 }, { "epoch": 0.15067725489660075, "grad_norm": 1.4117159843444824, "learning_rate": 1.9544528399000256e-05, "loss": 1.6179, "step": 2756 }, { "epoch": 0.15073192734032229, "grad_norm": 1.3660801649093628, "learning_rate": 1.954398302599688e-05, "loss": 1.3804, "step": 2757 }, { "epoch": 0.15078659978404385, "grad_norm": 1.5066287517547607, "learning_rate": 1.9543437334297515e-05, "loss": 1.5128, "step": 2758 }, { "epoch": 0.1508412722277654, "grad_norm": 2.7862071990966797, "learning_rate": 1.9542891323920386e-05, "loss": 1.3119, "step": 2759 }, { "epoch": 0.15089594467148695, "grad_norm": 1.4945894479751587, "learning_rate": 1.954234499488372e-05, "loss": 1.4415, "step": 2760 }, { "epoch": 0.15095061711520852, "grad_norm": 1.9905850887298584, "learning_rate": 1.9541798347205762e-05, "loss": 1.3847, "step": 2761 }, { "epoch": 0.15100528955893006, "grad_norm": 1.6514402627944946, "learning_rate": 1.9541251380904768e-05, "loss": 1.4237, "step": 2762 }, { "epoch": 0.15105996200265162, "grad_norm": 1.832334280014038, "learning_rate": 1.9540704095999e-05, "loss": 1.5763, "step": 2763 }, { "epoch": 0.15111463444637316, "grad_norm": 1.8215155601501465, "learning_rate": 1.9540156492506734e-05, "loss": 1.4344, "step": 2764 }, { "epoch": 0.15116930689009472, "grad_norm": 1.1015146970748901, "learning_rate": 1.9539608570446255e-05, "loss": 1.4811, "step": 2765 }, { "epoch": 0.15122397933381626, "grad_norm": 1.6712597608566284, "learning_rate": 1.9539060329835864e-05, "loss": 1.3857, "step": 2766 }, { "epoch": 0.15127865177753783, "grad_norm": 1.417328953742981, "learning_rate": 1.9538511770693862e-05, "loss": 1.5763, "step": 2767 }, { "epoch": 0.1513333242212594, "grad_norm": 1.472402572631836, "learning_rate": 1.953796289303857e-05, "loss": 1.3861, "step": 2768 }, { "epoch": 0.15138799666498093, "grad_norm": 1.4898594617843628, "learning_rate": 1.9537413696888317e-05, "loss": 1.3103, "step": 2769 }, { "epoch": 0.1514426691087025, "grad_norm": 2.0883371829986572, "learning_rate": 1.953686418226144e-05, "loss": 1.3622, "step": 2770 }, { "epoch": 0.15149734155242403, "grad_norm": 1.9061977863311768, "learning_rate": 1.9536314349176288e-05, "loss": 1.6872, "step": 2771 }, { "epoch": 0.1515520139961456, "grad_norm": 1.315819501876831, "learning_rate": 1.953576419765122e-05, "loss": 1.4365, "step": 2772 }, { "epoch": 0.15160668643986713, "grad_norm": 1.818593978881836, "learning_rate": 1.953521372770461e-05, "loss": 1.5011, "step": 2773 }, { "epoch": 0.1516613588835887, "grad_norm": 1.6624548435211182, "learning_rate": 1.9534662939354843e-05, "loss": 1.3405, "step": 2774 }, { "epoch": 0.15171603132731026, "grad_norm": 1.5290402173995972, "learning_rate": 1.9534111832620302e-05, "loss": 1.2478, "step": 2775 }, { "epoch": 0.1517707037710318, "grad_norm": 1.5326112508773804, "learning_rate": 1.9533560407519395e-05, "loss": 1.3872, "step": 2776 }, { "epoch": 0.15182537621475337, "grad_norm": 1.7061116695404053, "learning_rate": 1.9533008664070537e-05, "loss": 1.5527, "step": 2777 }, { "epoch": 0.1518800486584749, "grad_norm": 1.6081217527389526, "learning_rate": 1.9532456602292148e-05, "loss": 1.2521, "step": 2778 }, { "epoch": 0.15193472110219647, "grad_norm": 1.2627612352371216, "learning_rate": 1.9531904222202664e-05, "loss": 1.3104, "step": 2779 }, { "epoch": 0.151989393545918, "grad_norm": 1.3402667045593262, "learning_rate": 1.9531351523820533e-05, "loss": 1.4411, "step": 2780 }, { "epoch": 0.15204406598963957, "grad_norm": 1.6468076705932617, "learning_rate": 1.9530798507164207e-05, "loss": 1.2464, "step": 2781 }, { "epoch": 0.15209873843336114, "grad_norm": 1.5917330980300903, "learning_rate": 1.9530245172252154e-05, "loss": 1.6128, "step": 2782 }, { "epoch": 0.15215341087708267, "grad_norm": 1.5673917531967163, "learning_rate": 1.952969151910285e-05, "loss": 1.3141, "step": 2783 }, { "epoch": 0.15220808332080424, "grad_norm": 1.48997163772583, "learning_rate": 1.9529137547734787e-05, "loss": 1.4233, "step": 2784 }, { "epoch": 0.15226275576452578, "grad_norm": 1.7474342584609985, "learning_rate": 1.952858325816646e-05, "loss": 1.3391, "step": 2785 }, { "epoch": 0.15231742820824734, "grad_norm": 1.4729450941085815, "learning_rate": 1.9528028650416376e-05, "loss": 1.1317, "step": 2786 }, { "epoch": 0.15237210065196888, "grad_norm": 1.8339349031448364, "learning_rate": 1.952747372450306e-05, "loss": 1.5345, "step": 2787 }, { "epoch": 0.15242677309569044, "grad_norm": 1.55852472782135, "learning_rate": 1.952691848044504e-05, "loss": 1.3962, "step": 2788 }, { "epoch": 0.152481445539412, "grad_norm": 1.2908871173858643, "learning_rate": 1.9526362918260852e-05, "loss": 1.4553, "step": 2789 }, { "epoch": 0.15253611798313355, "grad_norm": 1.5558362007141113, "learning_rate": 1.9525807037969056e-05, "loss": 1.4592, "step": 2790 }, { "epoch": 0.1525907904268551, "grad_norm": 1.7640416622161865, "learning_rate": 1.9525250839588206e-05, "loss": 1.4617, "step": 2791 }, { "epoch": 0.15264546287057665, "grad_norm": 1.389538288116455, "learning_rate": 1.9524694323136883e-05, "loss": 1.4667, "step": 2792 }, { "epoch": 0.1527001353142982, "grad_norm": 1.1329737901687622, "learning_rate": 1.9524137488633662e-05, "loss": 1.5597, "step": 2793 }, { "epoch": 0.15275480775801975, "grad_norm": 1.6940982341766357, "learning_rate": 1.9523580336097147e-05, "loss": 1.3449, "step": 2794 }, { "epoch": 0.15280948020174132, "grad_norm": 1.5648080110549927, "learning_rate": 1.952302286554593e-05, "loss": 1.3963, "step": 2795 }, { "epoch": 0.15286415264546288, "grad_norm": 1.4943536520004272, "learning_rate": 1.9522465076998638e-05, "loss": 1.6602, "step": 2796 }, { "epoch": 0.15291882508918442, "grad_norm": 1.2581709623336792, "learning_rate": 1.952190697047389e-05, "loss": 1.5161, "step": 2797 }, { "epoch": 0.15297349753290598, "grad_norm": 1.4232177734375, "learning_rate": 1.9521348545990323e-05, "loss": 1.6949, "step": 2798 }, { "epoch": 0.15302816997662752, "grad_norm": 1.8715651035308838, "learning_rate": 1.952078980356659e-05, "loss": 1.2314, "step": 2799 }, { "epoch": 0.15308284242034909, "grad_norm": 1.7886865139007568, "learning_rate": 1.952023074322134e-05, "loss": 1.494, "step": 2800 }, { "epoch": 0.15313751486407062, "grad_norm": 1.754738450050354, "learning_rate": 1.9519671364973245e-05, "loss": 1.4397, "step": 2801 }, { "epoch": 0.1531921873077922, "grad_norm": 1.5123162269592285, "learning_rate": 1.9519111668840987e-05, "loss": 1.512, "step": 2802 }, { "epoch": 0.15324685975151375, "grad_norm": 1.1857787370681763, "learning_rate": 1.951855165484325e-05, "loss": 1.6819, "step": 2803 }, { "epoch": 0.1533015321952353, "grad_norm": 1.2733505964279175, "learning_rate": 1.9517991322998742e-05, "loss": 1.3332, "step": 2804 }, { "epoch": 0.15335620463895686, "grad_norm": 1.4067021608352661, "learning_rate": 1.9517430673326167e-05, "loss": 1.4012, "step": 2805 }, { "epoch": 0.1534108770826784, "grad_norm": 1.8906528949737549, "learning_rate": 1.951686970584425e-05, "loss": 1.4098, "step": 2806 }, { "epoch": 0.15346554952639996, "grad_norm": 1.5510845184326172, "learning_rate": 1.951630842057172e-05, "loss": 1.4394, "step": 2807 }, { "epoch": 0.15352022197012152, "grad_norm": 1.7975984811782837, "learning_rate": 1.951574681752732e-05, "loss": 1.6489, "step": 2808 }, { "epoch": 0.15357489441384306, "grad_norm": 1.2813087701797485, "learning_rate": 1.9515184896729805e-05, "loss": 1.4711, "step": 2809 }, { "epoch": 0.15362956685756463, "grad_norm": 1.489357590675354, "learning_rate": 1.9514622658197937e-05, "loss": 1.3278, "step": 2810 }, { "epoch": 0.15368423930128616, "grad_norm": 1.4211199283599854, "learning_rate": 1.9514060101950492e-05, "loss": 1.2483, "step": 2811 }, { "epoch": 0.15373891174500773, "grad_norm": 1.4237581491470337, "learning_rate": 1.9513497228006257e-05, "loss": 1.5323, "step": 2812 }, { "epoch": 0.15379358418872927, "grad_norm": 1.322521686553955, "learning_rate": 1.9512934036384026e-05, "loss": 1.4277, "step": 2813 }, { "epoch": 0.15384825663245083, "grad_norm": 1.736683964729309, "learning_rate": 1.9512370527102604e-05, "loss": 1.3435, "step": 2814 }, { "epoch": 0.1539029290761724, "grad_norm": 1.2891353368759155, "learning_rate": 1.9511806700180807e-05, "loss": 1.5285, "step": 2815 }, { "epoch": 0.15395760151989393, "grad_norm": 1.3193029165267944, "learning_rate": 1.9511242555637464e-05, "loss": 1.3563, "step": 2816 }, { "epoch": 0.1540122739636155, "grad_norm": 1.446864128112793, "learning_rate": 1.9510678093491413e-05, "loss": 1.5114, "step": 2817 }, { "epoch": 0.15406694640733704, "grad_norm": 2.336364269256592, "learning_rate": 1.9510113313761506e-05, "loss": 1.4866, "step": 2818 }, { "epoch": 0.1541216188510586, "grad_norm": 1.3219932317733765, "learning_rate": 1.9509548216466596e-05, "loss": 1.6881, "step": 2819 }, { "epoch": 0.15417629129478014, "grad_norm": 1.4546051025390625, "learning_rate": 1.9508982801625557e-05, "loss": 1.4628, "step": 2820 }, { "epoch": 0.1542309637385017, "grad_norm": 1.6861443519592285, "learning_rate": 1.950841706925727e-05, "loss": 1.5004, "step": 2821 }, { "epoch": 0.15428563618222327, "grad_norm": 1.5982779264450073, "learning_rate": 1.9507851019380625e-05, "loss": 1.3162, "step": 2822 }, { "epoch": 0.1543403086259448, "grad_norm": 1.0977164506912231, "learning_rate": 1.950728465201452e-05, "loss": 1.5761, "step": 2823 }, { "epoch": 0.15439498106966637, "grad_norm": 1.9054945707321167, "learning_rate": 1.9506717967177876e-05, "loss": 1.4974, "step": 2824 }, { "epoch": 0.1544496535133879, "grad_norm": 1.1653904914855957, "learning_rate": 1.9506150964889606e-05, "loss": 1.4179, "step": 2825 }, { "epoch": 0.15450432595710947, "grad_norm": 1.7590010166168213, "learning_rate": 1.9505583645168654e-05, "loss": 1.5428, "step": 2826 }, { "epoch": 0.154558998400831, "grad_norm": 1.5357468128204346, "learning_rate": 1.9505016008033953e-05, "loss": 1.527, "step": 2827 }, { "epoch": 0.15461367084455258, "grad_norm": 1.614620327949524, "learning_rate": 1.9504448053504466e-05, "loss": 1.1445, "step": 2828 }, { "epoch": 0.15466834328827414, "grad_norm": 2.338002920150757, "learning_rate": 1.9503879781599155e-05, "loss": 1.6295, "step": 2829 }, { "epoch": 0.15472301573199568, "grad_norm": 1.4620236158370972, "learning_rate": 1.9503311192336998e-05, "loss": 1.5334, "step": 2830 }, { "epoch": 0.15477768817571724, "grad_norm": 1.3699772357940674, "learning_rate": 1.9502742285736977e-05, "loss": 1.4055, "step": 2831 }, { "epoch": 0.15483236061943878, "grad_norm": 1.6533054113388062, "learning_rate": 1.9502173061818095e-05, "loss": 1.5912, "step": 2832 }, { "epoch": 0.15488703306316035, "grad_norm": 1.916176676750183, "learning_rate": 1.9501603520599356e-05, "loss": 1.2824, "step": 2833 }, { "epoch": 0.15494170550688188, "grad_norm": 1.6836637258529663, "learning_rate": 1.950103366209978e-05, "loss": 1.4311, "step": 2834 }, { "epoch": 0.15499637795060345, "grad_norm": 1.6855926513671875, "learning_rate": 1.9500463486338393e-05, "loss": 1.2853, "step": 2835 }, { "epoch": 0.155051050394325, "grad_norm": 1.5936856269836426, "learning_rate": 1.949989299333424e-05, "loss": 1.5827, "step": 2836 }, { "epoch": 0.15510572283804655, "grad_norm": 1.5472301244735718, "learning_rate": 1.9499322183106363e-05, "loss": 1.402, "step": 2837 }, { "epoch": 0.15516039528176812, "grad_norm": 1.6912533044815063, "learning_rate": 1.949875105567383e-05, "loss": 1.528, "step": 2838 }, { "epoch": 0.15521506772548965, "grad_norm": 1.5069878101348877, "learning_rate": 1.9498179611055713e-05, "loss": 1.3607, "step": 2839 }, { "epoch": 0.15526974016921122, "grad_norm": 1.446619987487793, "learning_rate": 1.9497607849271086e-05, "loss": 1.4827, "step": 2840 }, { "epoch": 0.15532441261293276, "grad_norm": 1.1680095195770264, "learning_rate": 1.949703577033905e-05, "loss": 1.5261, "step": 2841 }, { "epoch": 0.15537908505665432, "grad_norm": 1.3774322271347046, "learning_rate": 1.94964633742787e-05, "loss": 1.4667, "step": 2842 }, { "epoch": 0.15543375750037589, "grad_norm": 1.4234281778335571, "learning_rate": 1.9495890661109154e-05, "loss": 1.4088, "step": 2843 }, { "epoch": 0.15548842994409742, "grad_norm": 1.3260488510131836, "learning_rate": 1.949531763084954e-05, "loss": 1.5636, "step": 2844 }, { "epoch": 0.155543102387819, "grad_norm": 1.2214844226837158, "learning_rate": 1.9494744283518985e-05, "loss": 1.4373, "step": 2845 }, { "epoch": 0.15559777483154053, "grad_norm": 1.8154369592666626, "learning_rate": 1.949417061913664e-05, "loss": 1.5264, "step": 2846 }, { "epoch": 0.1556524472752621, "grad_norm": 1.7249126434326172, "learning_rate": 1.9493596637721658e-05, "loss": 1.3201, "step": 2847 }, { "epoch": 0.15570711971898363, "grad_norm": 1.2104778289794922, "learning_rate": 1.9493022339293207e-05, "loss": 1.4972, "step": 2848 }, { "epoch": 0.1557617921627052, "grad_norm": 1.51116144657135, "learning_rate": 1.9492447723870466e-05, "loss": 1.7633, "step": 2849 }, { "epoch": 0.15581646460642676, "grad_norm": 1.4705545902252197, "learning_rate": 1.9491872791472623e-05, "loss": 1.5543, "step": 2850 }, { "epoch": 0.1558711370501483, "grad_norm": 1.4406925439834595, "learning_rate": 1.9491297542118866e-05, "loss": 1.6636, "step": 2851 }, { "epoch": 0.15592580949386986, "grad_norm": 1.371819019317627, "learning_rate": 1.949072197582842e-05, "loss": 1.6364, "step": 2852 }, { "epoch": 0.1559804819375914, "grad_norm": 1.5976741313934326, "learning_rate": 1.9490146092620492e-05, "loss": 1.3887, "step": 2853 }, { "epoch": 0.15603515438131296, "grad_norm": 1.1943944692611694, "learning_rate": 1.948956989251432e-05, "loss": 1.4851, "step": 2854 }, { "epoch": 0.1560898268250345, "grad_norm": 1.2808836698532104, "learning_rate": 1.9488993375529137e-05, "loss": 1.5771, "step": 2855 }, { "epoch": 0.15614449926875607, "grad_norm": 1.3392277956008911, "learning_rate": 1.9488416541684202e-05, "loss": 1.5956, "step": 2856 }, { "epoch": 0.15619917171247763, "grad_norm": 1.3536860942840576, "learning_rate": 1.948783939099877e-05, "loss": 1.4417, "step": 2857 }, { "epoch": 0.15625384415619917, "grad_norm": 1.4590880870819092, "learning_rate": 1.948726192349212e-05, "loss": 1.2429, "step": 2858 }, { "epoch": 0.15630851659992073, "grad_norm": 1.8154103755950928, "learning_rate": 1.9486684139183533e-05, "loss": 1.6667, "step": 2859 }, { "epoch": 0.15636318904364227, "grad_norm": 1.489195704460144, "learning_rate": 1.9486106038092298e-05, "loss": 1.4421, "step": 2860 }, { "epoch": 0.15641786148736384, "grad_norm": 1.7500336170196533, "learning_rate": 1.9485527620237723e-05, "loss": 1.6188, "step": 2861 }, { "epoch": 0.15647253393108537, "grad_norm": 1.5014480352401733, "learning_rate": 1.9484948885639122e-05, "loss": 1.3426, "step": 2862 }, { "epoch": 0.15652720637480694, "grad_norm": 1.544818639755249, "learning_rate": 1.9484369834315823e-05, "loss": 1.5306, "step": 2863 }, { "epoch": 0.1565818788185285, "grad_norm": 1.7375695705413818, "learning_rate": 1.948379046628716e-05, "loss": 1.5294, "step": 2864 }, { "epoch": 0.15663655126225004, "grad_norm": 1.7180131673812866, "learning_rate": 1.9483210781572473e-05, "loss": 1.2248, "step": 2865 }, { "epoch": 0.1566912237059716, "grad_norm": 1.5810461044311523, "learning_rate": 1.948263078019113e-05, "loss": 1.5343, "step": 2866 }, { "epoch": 0.15674589614969314, "grad_norm": 1.6403323411941528, "learning_rate": 1.9482050462162495e-05, "loss": 1.6304, "step": 2867 }, { "epoch": 0.1568005685934147, "grad_norm": 1.5760633945465088, "learning_rate": 1.9481469827505943e-05, "loss": 1.6476, "step": 2868 }, { "epoch": 0.15685524103713624, "grad_norm": 1.271227240562439, "learning_rate": 1.948088887624086e-05, "loss": 1.5294, "step": 2869 }, { "epoch": 0.1569099134808578, "grad_norm": 1.5165094137191772, "learning_rate": 1.9480307608386655e-05, "loss": 1.2274, "step": 2870 }, { "epoch": 0.15696458592457938, "grad_norm": 1.437834620475769, "learning_rate": 1.9479726023962732e-05, "loss": 1.8096, "step": 2871 }, { "epoch": 0.1570192583683009, "grad_norm": 1.5247468948364258, "learning_rate": 1.947914412298851e-05, "loss": 1.5088, "step": 2872 }, { "epoch": 0.15707393081202248, "grad_norm": 1.5572141408920288, "learning_rate": 1.9478561905483425e-05, "loss": 1.261, "step": 2873 }, { "epoch": 0.15712860325574401, "grad_norm": 1.3243927955627441, "learning_rate": 1.9477979371466914e-05, "loss": 1.3962, "step": 2874 }, { "epoch": 0.15718327569946558, "grad_norm": 1.4470540285110474, "learning_rate": 1.9477396520958432e-05, "loss": 1.415, "step": 2875 }, { "epoch": 0.15723794814318712, "grad_norm": 1.4380465745925903, "learning_rate": 1.9476813353977442e-05, "loss": 1.8331, "step": 2876 }, { "epoch": 0.15729262058690868, "grad_norm": 1.5361607074737549, "learning_rate": 1.947622987054341e-05, "loss": 1.4013, "step": 2877 }, { "epoch": 0.15734729303063025, "grad_norm": 1.8736636638641357, "learning_rate": 1.9475646070675832e-05, "loss": 1.5951, "step": 2878 }, { "epoch": 0.15740196547435178, "grad_norm": 1.335097312927246, "learning_rate": 1.9475061954394196e-05, "loss": 1.2757, "step": 2879 }, { "epoch": 0.15745663791807335, "grad_norm": 1.454584002494812, "learning_rate": 1.9474477521718006e-05, "loss": 1.3511, "step": 2880 }, { "epoch": 0.1575113103617949, "grad_norm": 1.5663024187088013, "learning_rate": 1.947389277266678e-05, "loss": 1.2271, "step": 2881 }, { "epoch": 0.15756598280551645, "grad_norm": 1.2181572914123535, "learning_rate": 1.947330770726004e-05, "loss": 1.3436, "step": 2882 }, { "epoch": 0.157620655249238, "grad_norm": 1.3038904666900635, "learning_rate": 1.947272232551733e-05, "loss": 1.3192, "step": 2883 }, { "epoch": 0.15767532769295955, "grad_norm": 1.3833932876586914, "learning_rate": 1.947213662745819e-05, "loss": 1.2517, "step": 2884 }, { "epoch": 0.15773000013668112, "grad_norm": 0.9978225231170654, "learning_rate": 1.9471550613102185e-05, "loss": 1.4866, "step": 2885 }, { "epoch": 0.15778467258040266, "grad_norm": 1.7989715337753296, "learning_rate": 1.9470964282468874e-05, "loss": 1.4377, "step": 2886 }, { "epoch": 0.15783934502412422, "grad_norm": 1.61860990524292, "learning_rate": 1.9470377635577843e-05, "loss": 1.4379, "step": 2887 }, { "epoch": 0.15789401746784576, "grad_norm": 1.507188320159912, "learning_rate": 1.9469790672448683e-05, "loss": 1.5771, "step": 2888 }, { "epoch": 0.15794868991156732, "grad_norm": 1.6826632022857666, "learning_rate": 1.946920339310099e-05, "loss": 1.2985, "step": 2889 }, { "epoch": 0.15800336235528886, "grad_norm": 1.708065152168274, "learning_rate": 1.9468615797554374e-05, "loss": 1.3138, "step": 2890 }, { "epoch": 0.15805803479901043, "grad_norm": 2.338033676147461, "learning_rate": 1.9468027885828457e-05, "loss": 1.2976, "step": 2891 }, { "epoch": 0.158112707242732, "grad_norm": 1.2467643022537231, "learning_rate": 1.946743965794287e-05, "loss": 1.4277, "step": 2892 }, { "epoch": 0.15816737968645353, "grad_norm": 1.2479833364486694, "learning_rate": 1.946685111391726e-05, "loss": 1.561, "step": 2893 }, { "epoch": 0.1582220521301751, "grad_norm": 1.4889007806777954, "learning_rate": 1.9466262253771274e-05, "loss": 1.3936, "step": 2894 }, { "epoch": 0.15827672457389663, "grad_norm": 2.0959408283233643, "learning_rate": 1.9465673077524584e-05, "loss": 1.1606, "step": 2895 }, { "epoch": 0.1583313970176182, "grad_norm": 1.6843864917755127, "learning_rate": 1.946508358519685e-05, "loss": 1.5427, "step": 2896 }, { "epoch": 0.15838606946133973, "grad_norm": 1.509731650352478, "learning_rate": 1.946449377680777e-05, "loss": 1.611, "step": 2897 }, { "epoch": 0.1584407419050613, "grad_norm": 1.4019365310668945, "learning_rate": 1.946390365237703e-05, "loss": 1.287, "step": 2898 }, { "epoch": 0.15849541434878286, "grad_norm": 1.356191873550415, "learning_rate": 1.9463313211924343e-05, "loss": 1.4697, "step": 2899 }, { "epoch": 0.1585500867925044, "grad_norm": 1.2820035219192505, "learning_rate": 1.9462722455469422e-05, "loss": 1.4328, "step": 2900 }, { "epoch": 0.15860475923622597, "grad_norm": 1.4753276109695435, "learning_rate": 1.9462131383031988e-05, "loss": 1.4561, "step": 2901 }, { "epoch": 0.1586594316799475, "grad_norm": 1.6815465688705444, "learning_rate": 1.946153999463179e-05, "loss": 1.3454, "step": 2902 }, { "epoch": 0.15871410412366907, "grad_norm": 1.5866831541061401, "learning_rate": 1.9460948290288565e-05, "loss": 1.4836, "step": 2903 }, { "epoch": 0.1587687765673906, "grad_norm": 1.3741992712020874, "learning_rate": 1.9460356270022073e-05, "loss": 1.3683, "step": 2904 }, { "epoch": 0.15882344901111217, "grad_norm": 1.3848462104797363, "learning_rate": 1.945976393385209e-05, "loss": 1.4028, "step": 2905 }, { "epoch": 0.15887812145483374, "grad_norm": 1.8038004636764526, "learning_rate": 1.9459171281798394e-05, "loss": 1.3603, "step": 2906 }, { "epoch": 0.15893279389855527, "grad_norm": 1.5624521970748901, "learning_rate": 1.9458578313880768e-05, "loss": 1.425, "step": 2907 }, { "epoch": 0.15898746634227684, "grad_norm": 1.8646736145019531, "learning_rate": 1.9457985030119016e-05, "loss": 1.6152, "step": 2908 }, { "epoch": 0.15904213878599838, "grad_norm": 1.6980037689208984, "learning_rate": 1.9457391430532952e-05, "loss": 1.3202, "step": 2909 }, { "epoch": 0.15909681122971994, "grad_norm": 1.468959927558899, "learning_rate": 1.9456797515142397e-05, "loss": 1.6007, "step": 2910 }, { "epoch": 0.1591514836734415, "grad_norm": 1.4794411659240723, "learning_rate": 1.945620328396718e-05, "loss": 1.3086, "step": 2911 }, { "epoch": 0.15920615611716304, "grad_norm": 1.6865482330322266, "learning_rate": 1.9455608737027144e-05, "loss": 1.4443, "step": 2912 }, { "epoch": 0.1592608285608846, "grad_norm": 2.0544328689575195, "learning_rate": 1.9455013874342148e-05, "loss": 1.5732, "step": 2913 }, { "epoch": 0.15931550100460615, "grad_norm": 1.5664256811141968, "learning_rate": 1.9454418695932048e-05, "loss": 1.5323, "step": 2914 }, { "epoch": 0.1593701734483277, "grad_norm": 1.8229801654815674, "learning_rate": 1.9453823201816722e-05, "loss": 1.3163, "step": 2915 }, { "epoch": 0.15942484589204925, "grad_norm": 1.4309900999069214, "learning_rate": 1.945322739201606e-05, "loss": 1.3081, "step": 2916 }, { "epoch": 0.15947951833577081, "grad_norm": 1.2034496068954468, "learning_rate": 1.945263126654995e-05, "loss": 1.5227, "step": 2917 }, { "epoch": 0.15953419077949238, "grad_norm": 1.23235023021698, "learning_rate": 1.9452034825438302e-05, "loss": 1.471, "step": 2918 }, { "epoch": 0.15958886322321392, "grad_norm": 1.2509546279907227, "learning_rate": 1.945143806870103e-05, "loss": 1.5757, "step": 2919 }, { "epoch": 0.15964353566693548, "grad_norm": 1.483651041984558, "learning_rate": 1.9450840996358062e-05, "loss": 1.4083, "step": 2920 }, { "epoch": 0.15969820811065702, "grad_norm": 1.2659670114517212, "learning_rate": 1.9450243608429336e-05, "loss": 1.447, "step": 2921 }, { "epoch": 0.15975288055437858, "grad_norm": 1.799532175064087, "learning_rate": 1.9449645904934802e-05, "loss": 1.4213, "step": 2922 }, { "epoch": 0.15980755299810012, "grad_norm": 1.7016371488571167, "learning_rate": 1.9449047885894414e-05, "loss": 1.3696, "step": 2923 }, { "epoch": 0.1598622254418217, "grad_norm": 1.4432960748672485, "learning_rate": 1.9448449551328147e-05, "loss": 1.5897, "step": 2924 }, { "epoch": 0.15991689788554325, "grad_norm": 1.525590419769287, "learning_rate": 1.9447850901255975e-05, "loss": 1.3844, "step": 2925 }, { "epoch": 0.1599715703292648, "grad_norm": 1.4287968873977661, "learning_rate": 1.9447251935697895e-05, "loss": 1.3376, "step": 2926 }, { "epoch": 0.16002624277298635, "grad_norm": 2.2925314903259277, "learning_rate": 1.94466526546739e-05, "loss": 1.3815, "step": 2927 }, { "epoch": 0.1600809152167079, "grad_norm": 1.526984691619873, "learning_rate": 1.944605305820401e-05, "loss": 1.3836, "step": 2928 }, { "epoch": 0.16013558766042946, "grad_norm": 1.4641262292861938, "learning_rate": 1.944545314630824e-05, "loss": 1.3255, "step": 2929 }, { "epoch": 0.160190260104151, "grad_norm": 1.4541213512420654, "learning_rate": 1.9444852919006627e-05, "loss": 1.2494, "step": 2930 }, { "epoch": 0.16024493254787256, "grad_norm": 1.5146368741989136, "learning_rate": 1.944425237631921e-05, "loss": 1.5007, "step": 2931 }, { "epoch": 0.16029960499159412, "grad_norm": 1.5966720581054688, "learning_rate": 1.9443651518266044e-05, "loss": 1.4913, "step": 2932 }, { "epoch": 0.16035427743531566, "grad_norm": 1.539137601852417, "learning_rate": 1.9443050344867195e-05, "loss": 1.3306, "step": 2933 }, { "epoch": 0.16040894987903723, "grad_norm": 1.2014672756195068, "learning_rate": 1.9442448856142736e-05, "loss": 1.6187, "step": 2934 }, { "epoch": 0.16046362232275876, "grad_norm": 1.5387166738510132, "learning_rate": 1.9441847052112753e-05, "loss": 1.4123, "step": 2935 }, { "epoch": 0.16051829476648033, "grad_norm": 1.145957112312317, "learning_rate": 1.9441244932797337e-05, "loss": 1.4286, "step": 2936 }, { "epoch": 0.16057296721020187, "grad_norm": 1.4997791051864624, "learning_rate": 1.9440642498216604e-05, "loss": 1.3561, "step": 2937 }, { "epoch": 0.16062763965392343, "grad_norm": 2.2304582595825195, "learning_rate": 1.944003974839066e-05, "loss": 1.2105, "step": 2938 }, { "epoch": 0.160682312097645, "grad_norm": 1.6386065483093262, "learning_rate": 1.943943668333964e-05, "loss": 1.5487, "step": 2939 }, { "epoch": 0.16073698454136653, "grad_norm": 2.038482904434204, "learning_rate": 1.9438833303083677e-05, "loss": 1.4502, "step": 2940 }, { "epoch": 0.1607916569850881, "grad_norm": 1.5919677019119263, "learning_rate": 1.9438229607642923e-05, "loss": 1.1369, "step": 2941 }, { "epoch": 0.16084632942880964, "grad_norm": 2.326920509338379, "learning_rate": 1.9437625597037532e-05, "loss": 1.4054, "step": 2942 }, { "epoch": 0.1609010018725312, "grad_norm": 1.4357197284698486, "learning_rate": 1.943702127128768e-05, "loss": 1.5694, "step": 2943 }, { "epoch": 0.16095567431625274, "grad_norm": 1.2477177381515503, "learning_rate": 1.943641663041354e-05, "loss": 1.5902, "step": 2944 }, { "epoch": 0.1610103467599743, "grad_norm": 1.4444810152053833, "learning_rate": 1.9435811674435308e-05, "loss": 1.2124, "step": 2945 }, { "epoch": 0.16106501920369587, "grad_norm": 5.123129367828369, "learning_rate": 1.943520640337318e-05, "loss": 1.4084, "step": 2946 }, { "epoch": 0.1611196916474174, "grad_norm": 1.3405673503875732, "learning_rate": 1.9434600817247368e-05, "loss": 1.3605, "step": 2947 }, { "epoch": 0.16117436409113897, "grad_norm": 1.2000652551651, "learning_rate": 1.94339949160781e-05, "loss": 1.4704, "step": 2948 }, { "epoch": 0.1612290365348605, "grad_norm": 1.4953628778457642, "learning_rate": 1.94333886998856e-05, "loss": 1.5885, "step": 2949 }, { "epoch": 0.16128370897858207, "grad_norm": 1.4152045249938965, "learning_rate": 1.943278216869012e-05, "loss": 1.4359, "step": 2950 }, { "epoch": 0.1613383814223036, "grad_norm": 1.8113133907318115, "learning_rate": 1.943217532251191e-05, "loss": 1.4045, "step": 2951 }, { "epoch": 0.16139305386602518, "grad_norm": 1.418498158454895, "learning_rate": 1.9431568161371226e-05, "loss": 1.7242, "step": 2952 }, { "epoch": 0.16144772630974674, "grad_norm": 1.8204282522201538, "learning_rate": 1.9430960685288355e-05, "loss": 1.591, "step": 2953 }, { "epoch": 0.16150239875346828, "grad_norm": 1.34381103515625, "learning_rate": 1.943035289428357e-05, "loss": 1.2663, "step": 2954 }, { "epoch": 0.16155707119718984, "grad_norm": 1.2979538440704346, "learning_rate": 1.9429744788377178e-05, "loss": 1.4299, "step": 2955 }, { "epoch": 0.16161174364091138, "grad_norm": 1.5079708099365234, "learning_rate": 1.942913636758948e-05, "loss": 1.1742, "step": 2956 }, { "epoch": 0.16166641608463295, "grad_norm": 1.3866466283798218, "learning_rate": 1.942852763194079e-05, "loss": 1.3819, "step": 2957 }, { "epoch": 0.16172108852835448, "grad_norm": 1.381188988685608, "learning_rate": 1.942791858145144e-05, "loss": 1.7347, "step": 2958 }, { "epoch": 0.16177576097207605, "grad_norm": 1.6904770135879517, "learning_rate": 1.9427309216141762e-05, "loss": 1.5444, "step": 2959 }, { "epoch": 0.16183043341579761, "grad_norm": 1.6667919158935547, "learning_rate": 1.942669953603211e-05, "loss": 1.5129, "step": 2960 }, { "epoch": 0.16188510585951915, "grad_norm": 1.2422899007797241, "learning_rate": 1.9426089541142838e-05, "loss": 1.4581, "step": 2961 }, { "epoch": 0.16193977830324072, "grad_norm": 1.2546935081481934, "learning_rate": 1.942547923149432e-05, "loss": 1.6058, "step": 2962 }, { "epoch": 0.16199445074696225, "grad_norm": 1.805785894393921, "learning_rate": 1.942486860710693e-05, "loss": 1.2696, "step": 2963 }, { "epoch": 0.16204912319068382, "grad_norm": 1.3449187278747559, "learning_rate": 1.9424257668001064e-05, "loss": 1.4238, "step": 2964 }, { "epoch": 0.16210379563440536, "grad_norm": 1.3056186437606812, "learning_rate": 1.9423646414197116e-05, "loss": 1.6358, "step": 2965 }, { "epoch": 0.16215846807812692, "grad_norm": 1.294124722480774, "learning_rate": 1.9423034845715506e-05, "loss": 1.3897, "step": 2966 }, { "epoch": 0.1622131405218485, "grad_norm": 1.6230037212371826, "learning_rate": 1.9422422962576646e-05, "loss": 1.4732, "step": 2967 }, { "epoch": 0.16226781296557002, "grad_norm": 1.217187762260437, "learning_rate": 1.9421810764800978e-05, "loss": 1.5837, "step": 2968 }, { "epoch": 0.1623224854092916, "grad_norm": 1.5384924411773682, "learning_rate": 1.9421198252408934e-05, "loss": 1.6737, "step": 2969 }, { "epoch": 0.16237715785301313, "grad_norm": 2.1958043575286865, "learning_rate": 1.9420585425420974e-05, "loss": 1.469, "step": 2970 }, { "epoch": 0.1624318302967347, "grad_norm": 1.7251676321029663, "learning_rate": 1.9419972283857563e-05, "loss": 1.3967, "step": 2971 }, { "epoch": 0.16248650274045623, "grad_norm": 1.4529598951339722, "learning_rate": 1.941935882773917e-05, "loss": 1.3688, "step": 2972 }, { "epoch": 0.1625411751841778, "grad_norm": 1.5086255073547363, "learning_rate": 1.9418745057086284e-05, "loss": 1.4194, "step": 2973 }, { "epoch": 0.16259584762789936, "grad_norm": 1.2189534902572632, "learning_rate": 1.94181309719194e-05, "loss": 1.4932, "step": 2974 }, { "epoch": 0.1626505200716209, "grad_norm": 2.6286697387695312, "learning_rate": 1.9417516572259022e-05, "loss": 1.1157, "step": 2975 }, { "epoch": 0.16270519251534246, "grad_norm": 1.4358294010162354, "learning_rate": 1.9416901858125663e-05, "loss": 1.3478, "step": 2976 }, { "epoch": 0.162759864959064, "grad_norm": 1.380251169204712, "learning_rate": 1.9416286829539858e-05, "loss": 1.4071, "step": 2977 }, { "epoch": 0.16281453740278556, "grad_norm": 1.6199259757995605, "learning_rate": 1.9415671486522137e-05, "loss": 1.2715, "step": 2978 }, { "epoch": 0.1628692098465071, "grad_norm": 1.459662914276123, "learning_rate": 1.9415055829093054e-05, "loss": 1.5188, "step": 2979 }, { "epoch": 0.16292388229022867, "grad_norm": 1.825700283050537, "learning_rate": 1.941443985727316e-05, "loss": 1.2537, "step": 2980 }, { "epoch": 0.16297855473395023, "grad_norm": 1.6604676246643066, "learning_rate": 1.941382357108303e-05, "loss": 1.4312, "step": 2981 }, { "epoch": 0.16303322717767177, "grad_norm": 1.1114816665649414, "learning_rate": 1.9413206970543238e-05, "loss": 1.4408, "step": 2982 }, { "epoch": 0.16308789962139333, "grad_norm": 1.377711534500122, "learning_rate": 1.9412590055674378e-05, "loss": 1.4068, "step": 2983 }, { "epoch": 0.16314257206511487, "grad_norm": 1.6486696004867554, "learning_rate": 1.941197282649705e-05, "loss": 1.4289, "step": 2984 }, { "epoch": 0.16319724450883644, "grad_norm": 1.3127490282058716, "learning_rate": 1.9411355283031864e-05, "loss": 1.5195, "step": 2985 }, { "epoch": 0.16325191695255797, "grad_norm": 1.6221866607666016, "learning_rate": 1.941073742529944e-05, "loss": 1.3453, "step": 2986 }, { "epoch": 0.16330658939627954, "grad_norm": 2.4587414264678955, "learning_rate": 1.9410119253320406e-05, "loss": 1.1685, "step": 2987 }, { "epoch": 0.1633612618400011, "grad_norm": 1.2189918756484985, "learning_rate": 1.9409500767115414e-05, "loss": 1.7341, "step": 2988 }, { "epoch": 0.16341593428372264, "grad_norm": 1.4831984043121338, "learning_rate": 1.9408881966705107e-05, "loss": 1.3449, "step": 2989 }, { "epoch": 0.1634706067274442, "grad_norm": 1.5000447034835815, "learning_rate": 1.940826285211016e-05, "loss": 1.2548, "step": 2990 }, { "epoch": 0.16352527917116574, "grad_norm": 1.3822158575057983, "learning_rate": 1.940764342335123e-05, "loss": 1.4461, "step": 2991 }, { "epoch": 0.1635799516148873, "grad_norm": 1.6216528415679932, "learning_rate": 1.9407023680449012e-05, "loss": 1.5, "step": 2992 }, { "epoch": 0.16363462405860885, "grad_norm": 1.4271882772445679, "learning_rate": 1.9406403623424204e-05, "loss": 1.5444, "step": 2993 }, { "epoch": 0.1636892965023304, "grad_norm": 1.174431324005127, "learning_rate": 1.9405783252297505e-05, "loss": 1.5757, "step": 2994 }, { "epoch": 0.16374396894605198, "grad_norm": 1.3107324838638306, "learning_rate": 1.9405162567089627e-05, "loss": 1.3159, "step": 2995 }, { "epoch": 0.1637986413897735, "grad_norm": 1.3700960874557495, "learning_rate": 1.9404541567821305e-05, "loss": 1.3296, "step": 2996 }, { "epoch": 0.16385331383349508, "grad_norm": 1.2764148712158203, "learning_rate": 1.9403920254513272e-05, "loss": 1.4662, "step": 2997 }, { "epoch": 0.16390798627721662, "grad_norm": 1.471265196800232, "learning_rate": 1.9403298627186277e-05, "loss": 1.5772, "step": 2998 }, { "epoch": 0.16396265872093818, "grad_norm": 1.3176989555358887, "learning_rate": 1.940267668586107e-05, "loss": 1.4951, "step": 2999 }, { "epoch": 0.16401733116465972, "grad_norm": 1.660650610923767, "learning_rate": 1.9402054430558427e-05, "loss": 1.5116, "step": 3000 }, { "epoch": 0.16407200360838128, "grad_norm": 1.2806901931762695, "learning_rate": 1.9401431861299122e-05, "loss": 1.6683, "step": 3001 }, { "epoch": 0.16412667605210285, "grad_norm": 1.457066535949707, "learning_rate": 1.9400808978103948e-05, "loss": 1.2105, "step": 3002 }, { "epoch": 0.1641813484958244, "grad_norm": 1.3915966749191284, "learning_rate": 1.94001857809937e-05, "loss": 1.3758, "step": 3003 }, { "epoch": 0.16423602093954595, "grad_norm": 1.863292932510376, "learning_rate": 1.9399562269989193e-05, "loss": 1.4432, "step": 3004 }, { "epoch": 0.1642906933832675, "grad_norm": 1.6918946504592896, "learning_rate": 1.9398938445111245e-05, "loss": 1.6491, "step": 3005 }, { "epoch": 0.16434536582698905, "grad_norm": 2.551898956298828, "learning_rate": 1.939831430638069e-05, "loss": 1.2661, "step": 3006 }, { "epoch": 0.16440003827071062, "grad_norm": 1.6666566133499146, "learning_rate": 1.939768985381836e-05, "loss": 1.309, "step": 3007 }, { "epoch": 0.16445471071443216, "grad_norm": 1.345335841178894, "learning_rate": 1.939706508744512e-05, "loss": 1.2919, "step": 3008 }, { "epoch": 0.16450938315815372, "grad_norm": 1.4462456703186035, "learning_rate": 1.939644000728182e-05, "loss": 1.3351, "step": 3009 }, { "epoch": 0.16456405560187526, "grad_norm": 1.2315950393676758, "learning_rate": 1.939581461334934e-05, "loss": 1.3971, "step": 3010 }, { "epoch": 0.16461872804559682, "grad_norm": 1.8552303314208984, "learning_rate": 1.9395188905668563e-05, "loss": 1.408, "step": 3011 }, { "epoch": 0.16467340048931836, "grad_norm": 1.3722786903381348, "learning_rate": 1.9394562884260382e-05, "loss": 1.9665, "step": 3012 }, { "epoch": 0.16472807293303993, "grad_norm": 1.4350258111953735, "learning_rate": 1.9393936549145703e-05, "loss": 1.7187, "step": 3013 }, { "epoch": 0.1647827453767615, "grad_norm": 1.518476963043213, "learning_rate": 1.9393309900345436e-05, "loss": 1.51, "step": 3014 }, { "epoch": 0.16483741782048303, "grad_norm": 1.3719452619552612, "learning_rate": 1.939268293788051e-05, "loss": 1.3515, "step": 3015 }, { "epoch": 0.1648920902642046, "grad_norm": 1.9142919778823853, "learning_rate": 1.939205566177186e-05, "loss": 1.2953, "step": 3016 }, { "epoch": 0.16494676270792613, "grad_norm": 1.6123096942901611, "learning_rate": 1.9391428072040432e-05, "loss": 1.6812, "step": 3017 }, { "epoch": 0.1650014351516477, "grad_norm": 1.4180972576141357, "learning_rate": 1.9390800168707185e-05, "loss": 1.4214, "step": 3018 }, { "epoch": 0.16505610759536923, "grad_norm": 1.3063549995422363, "learning_rate": 1.939017195179308e-05, "loss": 1.4385, "step": 3019 }, { "epoch": 0.1651107800390908, "grad_norm": 1.3676209449768066, "learning_rate": 1.9389543421319106e-05, "loss": 1.4229, "step": 3020 }, { "epoch": 0.16516545248281236, "grad_norm": 1.2797821760177612, "learning_rate": 1.938891457730624e-05, "loss": 1.5757, "step": 3021 }, { "epoch": 0.1652201249265339, "grad_norm": 1.5786362886428833, "learning_rate": 1.9388285419775482e-05, "loss": 1.3432, "step": 3022 }, { "epoch": 0.16527479737025547, "grad_norm": 1.454355239868164, "learning_rate": 1.938765594874785e-05, "loss": 1.6132, "step": 3023 }, { "epoch": 0.165329469813977, "grad_norm": 1.4133530855178833, "learning_rate": 1.9387026164244347e-05, "loss": 1.5225, "step": 3024 }, { "epoch": 0.16538414225769857, "grad_norm": 1.7577955722808838, "learning_rate": 1.9386396066286024e-05, "loss": 1.5137, "step": 3025 }, { "epoch": 0.1654388147014201, "grad_norm": 1.846957802772522, "learning_rate": 1.9385765654893905e-05, "loss": 1.4443, "step": 3026 }, { "epoch": 0.16549348714514167, "grad_norm": 1.9580938816070557, "learning_rate": 1.9385134930089046e-05, "loss": 1.4862, "step": 3027 }, { "epoch": 0.16554815958886324, "grad_norm": 1.4376322031021118, "learning_rate": 1.938450389189251e-05, "loss": 1.4912, "step": 3028 }, { "epoch": 0.16560283203258477, "grad_norm": 1.471858263015747, "learning_rate": 1.9383872540325366e-05, "loss": 1.2017, "step": 3029 }, { "epoch": 0.16565750447630634, "grad_norm": 1.3017655611038208, "learning_rate": 1.93832408754087e-05, "loss": 1.4614, "step": 3030 }, { "epoch": 0.16571217692002788, "grad_norm": 1.6311501264572144, "learning_rate": 1.93826088971636e-05, "loss": 1.5165, "step": 3031 }, { "epoch": 0.16576684936374944, "grad_norm": 1.215707540512085, "learning_rate": 1.9381976605611176e-05, "loss": 1.5107, "step": 3032 }, { "epoch": 0.16582152180747098, "grad_norm": 1.2142505645751953, "learning_rate": 1.9381344000772535e-05, "loss": 1.4625, "step": 3033 }, { "epoch": 0.16587619425119254, "grad_norm": 1.7434804439544678, "learning_rate": 1.9380711082668805e-05, "loss": 1.2671, "step": 3034 }, { "epoch": 0.1659308666949141, "grad_norm": 1.9142482280731201, "learning_rate": 1.9380077851321117e-05, "loss": 1.7139, "step": 3035 }, { "epoch": 0.16598553913863565, "grad_norm": 1.4438796043395996, "learning_rate": 1.937944430675062e-05, "loss": 1.4042, "step": 3036 }, { "epoch": 0.1660402115823572, "grad_norm": 2.3461830615997314, "learning_rate": 1.937881044897847e-05, "loss": 1.5846, "step": 3037 }, { "epoch": 0.16609488402607875, "grad_norm": 1.74479341506958, "learning_rate": 1.937817627802583e-05, "loss": 1.512, "step": 3038 }, { "epoch": 0.1661495564698003, "grad_norm": 1.3007160425186157, "learning_rate": 1.9377541793913876e-05, "loss": 1.4068, "step": 3039 }, { "epoch": 0.16620422891352185, "grad_norm": 2.049813747406006, "learning_rate": 1.9376906996663795e-05, "loss": 1.5914, "step": 3040 }, { "epoch": 0.16625890135724342, "grad_norm": 1.4449503421783447, "learning_rate": 1.937627188629679e-05, "loss": 1.5829, "step": 3041 }, { "epoch": 0.16631357380096498, "grad_norm": 1.4940015077590942, "learning_rate": 1.9375636462834062e-05, "loss": 1.3373, "step": 3042 }, { "epoch": 0.16636824624468652, "grad_norm": 1.9097896814346313, "learning_rate": 1.9375000726296834e-05, "loss": 1.369, "step": 3043 }, { "epoch": 0.16642291868840808, "grad_norm": 1.4493377208709717, "learning_rate": 1.937436467670633e-05, "loss": 1.3613, "step": 3044 }, { "epoch": 0.16647759113212962, "grad_norm": 1.752784252166748, "learning_rate": 1.937372831408379e-05, "loss": 1.2061, "step": 3045 }, { "epoch": 0.16653226357585119, "grad_norm": 1.7064861059188843, "learning_rate": 1.9373091638450472e-05, "loss": 1.5081, "step": 3046 }, { "epoch": 0.16658693601957272, "grad_norm": 1.1904600858688354, "learning_rate": 1.9372454649827626e-05, "loss": 1.5643, "step": 3047 }, { "epoch": 0.1666416084632943, "grad_norm": 1.6086022853851318, "learning_rate": 1.9371817348236525e-05, "loss": 1.032, "step": 3048 }, { "epoch": 0.16669628090701585, "grad_norm": 1.2517242431640625, "learning_rate": 1.937117973369845e-05, "loss": 1.4466, "step": 3049 }, { "epoch": 0.1667509533507374, "grad_norm": 1.3545794486999512, "learning_rate": 1.93705418062347e-05, "loss": 1.5806, "step": 3050 }, { "epoch": 0.16680562579445896, "grad_norm": 1.400344729423523, "learning_rate": 1.9369903565866565e-05, "loss": 1.4503, "step": 3051 }, { "epoch": 0.1668602982381805, "grad_norm": 1.3979259729385376, "learning_rate": 1.9369265012615362e-05, "loss": 1.5162, "step": 3052 }, { "epoch": 0.16691497068190206, "grad_norm": 1.40626859664917, "learning_rate": 1.9368626146502416e-05, "loss": 1.4577, "step": 3053 }, { "epoch": 0.1669696431256236, "grad_norm": 1.091601014137268, "learning_rate": 1.936798696754906e-05, "loss": 1.4241, "step": 3054 }, { "epoch": 0.16702431556934516, "grad_norm": 1.4996941089630127, "learning_rate": 1.9367347475776633e-05, "loss": 1.3833, "step": 3055 }, { "epoch": 0.16707898801306673, "grad_norm": 1.6761102676391602, "learning_rate": 1.9366707671206496e-05, "loss": 1.3888, "step": 3056 }, { "epoch": 0.16713366045678826, "grad_norm": 1.465157151222229, "learning_rate": 1.936606755386001e-05, "loss": 1.5073, "step": 3057 }, { "epoch": 0.16718833290050983, "grad_norm": 1.2596359252929688, "learning_rate": 1.936542712375855e-05, "loss": 1.5447, "step": 3058 }, { "epoch": 0.16724300534423137, "grad_norm": 1.2412736415863037, "learning_rate": 1.9364786380923503e-05, "loss": 1.6221, "step": 3059 }, { "epoch": 0.16729767778795293, "grad_norm": 1.3543622493743896, "learning_rate": 1.936414532537626e-05, "loss": 1.561, "step": 3060 }, { "epoch": 0.16735235023167447, "grad_norm": 1.4928627014160156, "learning_rate": 1.9363503957138235e-05, "loss": 1.6416, "step": 3061 }, { "epoch": 0.16740702267539603, "grad_norm": 1.4729804992675781, "learning_rate": 1.9362862276230837e-05, "loss": 1.3801, "step": 3062 }, { "epoch": 0.1674616951191176, "grad_norm": 1.4510483741760254, "learning_rate": 1.9362220282675498e-05, "loss": 1.5147, "step": 3063 }, { "epoch": 0.16751636756283914, "grad_norm": 1.3074692487716675, "learning_rate": 1.9361577976493654e-05, "loss": 1.4952, "step": 3064 }, { "epoch": 0.1675710400065607, "grad_norm": 1.315611481666565, "learning_rate": 1.9360935357706756e-05, "loss": 1.412, "step": 3065 }, { "epoch": 0.16762571245028224, "grad_norm": 1.3208868503570557, "learning_rate": 1.9360292426336263e-05, "loss": 1.448, "step": 3066 }, { "epoch": 0.1676803848940038, "grad_norm": 1.3743969202041626, "learning_rate": 1.9359649182403633e-05, "loss": 1.5377, "step": 3067 }, { "epoch": 0.16773505733772534, "grad_norm": 1.5027670860290527, "learning_rate": 1.935900562593036e-05, "loss": 1.5254, "step": 3068 }, { "epoch": 0.1677897297814469, "grad_norm": 1.295687198638916, "learning_rate": 1.9358361756937926e-05, "loss": 1.4569, "step": 3069 }, { "epoch": 0.16784440222516847, "grad_norm": 1.6443965435028076, "learning_rate": 1.935771757544783e-05, "loss": 1.4519, "step": 3070 }, { "epoch": 0.16789907466889, "grad_norm": 2.104419469833374, "learning_rate": 1.935707308148159e-05, "loss": 1.3893, "step": 3071 }, { "epoch": 0.16795374711261157, "grad_norm": 2.0692660808563232, "learning_rate": 1.9356428275060722e-05, "loss": 1.3217, "step": 3072 }, { "epoch": 0.1680084195563331, "grad_norm": 1.707148790359497, "learning_rate": 1.9355783156206755e-05, "loss": 1.455, "step": 3073 }, { "epoch": 0.16806309200005468, "grad_norm": 1.5509693622589111, "learning_rate": 1.9355137724941237e-05, "loss": 1.5653, "step": 3074 }, { "epoch": 0.1681177644437762, "grad_norm": 1.781499981880188, "learning_rate": 1.935449198128572e-05, "loss": 1.4372, "step": 3075 }, { "epoch": 0.16817243688749778, "grad_norm": 1.4791916608810425, "learning_rate": 1.935384592526176e-05, "loss": 1.3611, "step": 3076 }, { "epoch": 0.16822710933121934, "grad_norm": 1.4929090738296509, "learning_rate": 1.935319955689094e-05, "loss": 1.5981, "step": 3077 }, { "epoch": 0.16828178177494088, "grad_norm": 1.875557541847229, "learning_rate": 1.9352552876194835e-05, "loss": 1.5021, "step": 3078 }, { "epoch": 0.16833645421866245, "grad_norm": 1.3013252019882202, "learning_rate": 1.9351905883195044e-05, "loss": 1.6525, "step": 3079 }, { "epoch": 0.16839112666238398, "grad_norm": 1.4774296283721924, "learning_rate": 1.935125857791317e-05, "loss": 1.5345, "step": 3080 }, { "epoch": 0.16844579910610555, "grad_norm": 1.566210150718689, "learning_rate": 1.935061096037083e-05, "loss": 1.3326, "step": 3081 }, { "epoch": 0.16850047154982709, "grad_norm": 1.8134933710098267, "learning_rate": 1.9349963030589648e-05, "loss": 1.3924, "step": 3082 }, { "epoch": 0.16855514399354865, "grad_norm": 1.1137460470199585, "learning_rate": 1.9349314788591258e-05, "loss": 1.4929, "step": 3083 }, { "epoch": 0.16860981643727022, "grad_norm": 1.3139737844467163, "learning_rate": 1.934866623439731e-05, "loss": 1.5511, "step": 3084 }, { "epoch": 0.16866448888099175, "grad_norm": 1.6814171075820923, "learning_rate": 1.9348017368029458e-05, "loss": 1.238, "step": 3085 }, { "epoch": 0.16871916132471332, "grad_norm": 1.8666608333587646, "learning_rate": 1.934736818950937e-05, "loss": 1.4177, "step": 3086 }, { "epoch": 0.16877383376843486, "grad_norm": 1.3559948205947876, "learning_rate": 1.9346718698858728e-05, "loss": 1.7685, "step": 3087 }, { "epoch": 0.16882850621215642, "grad_norm": 1.7018365859985352, "learning_rate": 1.934606889609921e-05, "loss": 1.3303, "step": 3088 }, { "epoch": 0.16888317865587796, "grad_norm": 1.405526041984558, "learning_rate": 1.9345418781252527e-05, "loss": 1.5183, "step": 3089 }, { "epoch": 0.16893785109959952, "grad_norm": 1.3009462356567383, "learning_rate": 1.9344768354340378e-05, "loss": 1.3461, "step": 3090 }, { "epoch": 0.1689925235433211, "grad_norm": 1.4990109205245972, "learning_rate": 1.9344117615384483e-05, "loss": 1.3585, "step": 3091 }, { "epoch": 0.16904719598704263, "grad_norm": 1.2758985757827759, "learning_rate": 1.9343466564406576e-05, "loss": 1.5286, "step": 3092 }, { "epoch": 0.1691018684307642, "grad_norm": 1.8231374025344849, "learning_rate": 1.9342815201428394e-05, "loss": 1.3259, "step": 3093 }, { "epoch": 0.16915654087448573, "grad_norm": 2.071901798248291, "learning_rate": 1.934216352647169e-05, "loss": 1.4468, "step": 3094 }, { "epoch": 0.1692112133182073, "grad_norm": 1.5407509803771973, "learning_rate": 1.9341511539558227e-05, "loss": 1.5009, "step": 3095 }, { "epoch": 0.16926588576192883, "grad_norm": 2.5939013957977295, "learning_rate": 1.934085924070977e-05, "loss": 1.4915, "step": 3096 }, { "epoch": 0.1693205582056504, "grad_norm": 1.2802404165267944, "learning_rate": 1.9340206629948104e-05, "loss": 1.4337, "step": 3097 }, { "epoch": 0.16937523064937196, "grad_norm": 1.6414778232574463, "learning_rate": 1.933955370729502e-05, "loss": 1.3936, "step": 3098 }, { "epoch": 0.1694299030930935, "grad_norm": 1.781400442123413, "learning_rate": 1.9338900472772323e-05, "loss": 1.3035, "step": 3099 }, { "epoch": 0.16948457553681506, "grad_norm": 1.4336572885513306, "learning_rate": 1.9338246926401828e-05, "loss": 1.5425, "step": 3100 }, { "epoch": 0.1695392479805366, "grad_norm": 1.5668832063674927, "learning_rate": 1.9337593068205353e-05, "loss": 1.4752, "step": 3101 }, { "epoch": 0.16959392042425817, "grad_norm": 1.668103575706482, "learning_rate": 1.933693889820473e-05, "loss": 1.6992, "step": 3102 }, { "epoch": 0.1696485928679797, "grad_norm": 1.158622145652771, "learning_rate": 1.933628441642181e-05, "loss": 1.54, "step": 3103 }, { "epoch": 0.16970326531170127, "grad_norm": 1.5747498273849487, "learning_rate": 1.9335629622878445e-05, "loss": 1.4912, "step": 3104 }, { "epoch": 0.16975793775542283, "grad_norm": 1.6311131715774536, "learning_rate": 1.9334974517596504e-05, "loss": 1.663, "step": 3105 }, { "epoch": 0.16981261019914437, "grad_norm": 1.562940239906311, "learning_rate": 1.9334319100597855e-05, "loss": 1.4542, "step": 3106 }, { "epoch": 0.16986728264286594, "grad_norm": 1.199357509613037, "learning_rate": 1.9333663371904388e-05, "loss": 1.5571, "step": 3107 }, { "epoch": 0.16992195508658747, "grad_norm": 1.3650332689285278, "learning_rate": 1.9333007331537998e-05, "loss": 1.3623, "step": 3108 }, { "epoch": 0.16997662753030904, "grad_norm": 1.4442349672317505, "learning_rate": 1.9332350979520594e-05, "loss": 1.4884, "step": 3109 }, { "epoch": 0.1700312999740306, "grad_norm": 1.373132348060608, "learning_rate": 1.933169431587409e-05, "loss": 1.5841, "step": 3110 }, { "epoch": 0.17008597241775214, "grad_norm": 1.4150294065475464, "learning_rate": 1.9331037340620418e-05, "loss": 1.3906, "step": 3111 }, { "epoch": 0.1701406448614737, "grad_norm": 1.7884818315505981, "learning_rate": 1.9330380053781512e-05, "loss": 1.6551, "step": 3112 }, { "epoch": 0.17019531730519524, "grad_norm": 2.0153820514678955, "learning_rate": 1.932972245537932e-05, "loss": 1.6159, "step": 3113 }, { "epoch": 0.1702499897489168, "grad_norm": 1.6897478103637695, "learning_rate": 1.9329064545435803e-05, "loss": 1.4769, "step": 3114 }, { "epoch": 0.17030466219263835, "grad_norm": 1.195862889289856, "learning_rate": 1.932840632397293e-05, "loss": 1.474, "step": 3115 }, { "epoch": 0.1703593346363599, "grad_norm": 1.4651423692703247, "learning_rate": 1.932774779101268e-05, "loss": 1.6136, "step": 3116 }, { "epoch": 0.17041400708008148, "grad_norm": 1.652071475982666, "learning_rate": 1.9327088946577042e-05, "loss": 1.5861, "step": 3117 }, { "epoch": 0.170468679523803, "grad_norm": 1.4680150747299194, "learning_rate": 1.9326429790688022e-05, "loss": 1.3366, "step": 3118 }, { "epoch": 0.17052335196752458, "grad_norm": 1.5458639860153198, "learning_rate": 1.932577032336762e-05, "loss": 1.2025, "step": 3119 }, { "epoch": 0.17057802441124612, "grad_norm": 1.2586864233016968, "learning_rate": 1.9325110544637868e-05, "loss": 1.3135, "step": 3120 }, { "epoch": 0.17063269685496768, "grad_norm": 1.367658019065857, "learning_rate": 1.932445045452079e-05, "loss": 1.4483, "step": 3121 }, { "epoch": 0.17068736929868922, "grad_norm": 1.341949462890625, "learning_rate": 1.9323790053038434e-05, "loss": 1.351, "step": 3122 }, { "epoch": 0.17074204174241078, "grad_norm": 1.9821809530258179, "learning_rate": 1.9323129340212844e-05, "loss": 1.2959, "step": 3123 }, { "epoch": 0.17079671418613235, "grad_norm": 1.5372717380523682, "learning_rate": 1.932246831606609e-05, "loss": 1.4405, "step": 3124 }, { "epoch": 0.17085138662985389, "grad_norm": 1.6916441917419434, "learning_rate": 1.9321806980620246e-05, "loss": 1.4797, "step": 3125 }, { "epoch": 0.17090605907357545, "grad_norm": 1.602028489112854, "learning_rate": 1.932114533389739e-05, "loss": 1.1764, "step": 3126 }, { "epoch": 0.170960731517297, "grad_norm": 1.689921498298645, "learning_rate": 1.932048337591962e-05, "loss": 1.3074, "step": 3127 }, { "epoch": 0.17101540396101855, "grad_norm": 1.840590476989746, "learning_rate": 1.931982110670904e-05, "loss": 1.512, "step": 3128 }, { "epoch": 0.1710700764047401, "grad_norm": 1.4035892486572266, "learning_rate": 1.931915852628776e-05, "loss": 1.3988, "step": 3129 }, { "epoch": 0.17112474884846166, "grad_norm": 1.7173727750778198, "learning_rate": 1.931849563467791e-05, "loss": 1.6065, "step": 3130 }, { "epoch": 0.17117942129218322, "grad_norm": 1.5026602745056152, "learning_rate": 1.9317832431901623e-05, "loss": 1.4081, "step": 3131 }, { "epoch": 0.17123409373590476, "grad_norm": 1.4211344718933105, "learning_rate": 1.9317168917981048e-05, "loss": 1.7165, "step": 3132 }, { "epoch": 0.17128876617962632, "grad_norm": 1.5540705919265747, "learning_rate": 1.931650509293834e-05, "loss": 1.4018, "step": 3133 }, { "epoch": 0.17134343862334786, "grad_norm": 1.437910556793213, "learning_rate": 1.9315840956795663e-05, "loss": 1.4649, "step": 3134 }, { "epoch": 0.17139811106706943, "grad_norm": 1.4215792417526245, "learning_rate": 1.9315176509575196e-05, "loss": 1.3768, "step": 3135 }, { "epoch": 0.17145278351079096, "grad_norm": 1.283174753189087, "learning_rate": 1.9314511751299128e-05, "loss": 1.4014, "step": 3136 }, { "epoch": 0.17150745595451253, "grad_norm": 1.444510579109192, "learning_rate": 1.9313846681989655e-05, "loss": 1.7285, "step": 3137 }, { "epoch": 0.1715621283982341, "grad_norm": 0.945478618144989, "learning_rate": 1.931318130166899e-05, "loss": 1.5413, "step": 3138 }, { "epoch": 0.17161680084195563, "grad_norm": 1.3820987939834595, "learning_rate": 1.931251561035934e-05, "loss": 1.5283, "step": 3139 }, { "epoch": 0.1716714732856772, "grad_norm": 1.2741066217422485, "learning_rate": 1.9311849608082945e-05, "loss": 1.3765, "step": 3140 }, { "epoch": 0.17172614572939873, "grad_norm": 1.9900118112564087, "learning_rate": 1.931118329486204e-05, "loss": 1.4097, "step": 3141 }, { "epoch": 0.1717808181731203, "grad_norm": 1.5137975215911865, "learning_rate": 1.9310516670718877e-05, "loss": 1.4155, "step": 3142 }, { "epoch": 0.17183549061684184, "grad_norm": 1.3237608671188354, "learning_rate": 1.930984973567571e-05, "loss": 1.4852, "step": 3143 }, { "epoch": 0.1718901630605634, "grad_norm": 1.462138295173645, "learning_rate": 1.9309182489754818e-05, "loss": 1.4135, "step": 3144 }, { "epoch": 0.17194483550428497, "grad_norm": 1.3201730251312256, "learning_rate": 1.930851493297848e-05, "loss": 1.5091, "step": 3145 }, { "epoch": 0.1719995079480065, "grad_norm": 2.210689067840576, "learning_rate": 1.9307847065368982e-05, "loss": 1.2154, "step": 3146 }, { "epoch": 0.17205418039172807, "grad_norm": 1.4923934936523438, "learning_rate": 1.9307178886948626e-05, "loss": 1.1631, "step": 3147 }, { "epoch": 0.1721088528354496, "grad_norm": 1.2528613805770874, "learning_rate": 1.9306510397739733e-05, "loss": 1.4504, "step": 3148 }, { "epoch": 0.17216352527917117, "grad_norm": 1.5700141191482544, "learning_rate": 1.9305841597764615e-05, "loss": 1.3437, "step": 3149 }, { "epoch": 0.1722181977228927, "grad_norm": 1.2504992485046387, "learning_rate": 1.930517248704561e-05, "loss": 1.3273, "step": 3150 }, { "epoch": 0.17227287016661427, "grad_norm": 1.4225314855575562, "learning_rate": 1.9304503065605066e-05, "loss": 1.569, "step": 3151 }, { "epoch": 0.17232754261033584, "grad_norm": 1.541516900062561, "learning_rate": 1.930383333346532e-05, "loss": 1.4261, "step": 3152 }, { "epoch": 0.17238221505405737, "grad_norm": 1.4538953304290771, "learning_rate": 1.930316329064876e-05, "loss": 1.3265, "step": 3153 }, { "epoch": 0.17243688749777894, "grad_norm": 1.5210895538330078, "learning_rate": 1.9302492937177736e-05, "loss": 1.4325, "step": 3154 }, { "epoch": 0.17249155994150048, "grad_norm": 2.0095739364624023, "learning_rate": 1.930182227307465e-05, "loss": 1.4641, "step": 3155 }, { "epoch": 0.17254623238522204, "grad_norm": 2.0630552768707275, "learning_rate": 1.9301151298361887e-05, "loss": 1.6214, "step": 3156 }, { "epoch": 0.17260090482894358, "grad_norm": 1.4769401550292969, "learning_rate": 1.9300480013061863e-05, "loss": 1.4495, "step": 3157 }, { "epoch": 0.17265557727266514, "grad_norm": 2.0903069972991943, "learning_rate": 1.929980841719698e-05, "loss": 1.3768, "step": 3158 }, { "epoch": 0.1727102497163867, "grad_norm": 1.6474182605743408, "learning_rate": 1.9299136510789675e-05, "loss": 1.4589, "step": 3159 }, { "epoch": 0.17276492216010825, "grad_norm": 1.5111244916915894, "learning_rate": 1.9298464293862377e-05, "loss": 1.1607, "step": 3160 }, { "epoch": 0.1728195946038298, "grad_norm": 1.8957055807113647, "learning_rate": 1.9297791766437538e-05, "loss": 1.3542, "step": 3161 }, { "epoch": 0.17287426704755135, "grad_norm": 1.2982065677642822, "learning_rate": 1.929711892853762e-05, "loss": 1.4805, "step": 3162 }, { "epoch": 0.17292893949127291, "grad_norm": 1.5447497367858887, "learning_rate": 1.9296445780185077e-05, "loss": 1.4056, "step": 3163 }, { "epoch": 0.17298361193499445, "grad_norm": 1.541083812713623, "learning_rate": 1.92957723214024e-05, "loss": 1.3148, "step": 3164 }, { "epoch": 0.17303828437871602, "grad_norm": 1.495003581047058, "learning_rate": 1.9295098552212067e-05, "loss": 1.592, "step": 3165 }, { "epoch": 0.17309295682243758, "grad_norm": 1.721265196800232, "learning_rate": 1.9294424472636587e-05, "loss": 1.519, "step": 3166 }, { "epoch": 0.17314762926615912, "grad_norm": 1.3177733421325684, "learning_rate": 1.9293750082698457e-05, "loss": 1.6678, "step": 3167 }, { "epoch": 0.17320230170988068, "grad_norm": 1.4708083868026733, "learning_rate": 1.929307538242021e-05, "loss": 1.2742, "step": 3168 }, { "epoch": 0.17325697415360222, "grad_norm": 1.47951340675354, "learning_rate": 1.9292400371824365e-05, "loss": 1.4458, "step": 3169 }, { "epoch": 0.1733116465973238, "grad_norm": 1.6677340269088745, "learning_rate": 1.929172505093347e-05, "loss": 1.4267, "step": 3170 }, { "epoch": 0.17336631904104532, "grad_norm": 1.6696587800979614, "learning_rate": 1.9291049419770067e-05, "loss": 1.2672, "step": 3171 }, { "epoch": 0.1734209914847669, "grad_norm": 1.7699213027954102, "learning_rate": 1.9290373478356724e-05, "loss": 1.5817, "step": 3172 }, { "epoch": 0.17347566392848845, "grad_norm": 1.559754490852356, "learning_rate": 1.928969722671601e-05, "loss": 1.5144, "step": 3173 }, { "epoch": 0.17353033637221, "grad_norm": 1.498030185699463, "learning_rate": 1.9289020664870505e-05, "loss": 1.3074, "step": 3174 }, { "epoch": 0.17358500881593156, "grad_norm": 1.5961552858352661, "learning_rate": 1.928834379284281e-05, "loss": 1.7105, "step": 3175 }, { "epoch": 0.1736396812596531, "grad_norm": 1.328994870185852, "learning_rate": 1.928766661065551e-05, "loss": 1.4273, "step": 3176 }, { "epoch": 0.17369435370337466, "grad_norm": 1.6137752532958984, "learning_rate": 1.928698911833123e-05, "loss": 1.3328, "step": 3177 }, { "epoch": 0.1737490261470962, "grad_norm": 1.4719239473342896, "learning_rate": 1.9286311315892595e-05, "loss": 1.4278, "step": 3178 }, { "epoch": 0.17380369859081776, "grad_norm": 1.3735195398330688, "learning_rate": 1.9285633203362234e-05, "loss": 1.5924, "step": 3179 }, { "epoch": 0.17385837103453933, "grad_norm": 1.3632441759109497, "learning_rate": 1.9284954780762787e-05, "loss": 1.4174, "step": 3180 }, { "epoch": 0.17391304347826086, "grad_norm": 1.2753674983978271, "learning_rate": 1.9284276048116915e-05, "loss": 1.4694, "step": 3181 }, { "epoch": 0.17396771592198243, "grad_norm": 1.5819717645645142, "learning_rate": 1.9283597005447277e-05, "loss": 1.6474, "step": 3182 }, { "epoch": 0.17402238836570397, "grad_norm": 1.2759345769882202, "learning_rate": 1.9282917652776554e-05, "loss": 1.4661, "step": 3183 }, { "epoch": 0.17407706080942553, "grad_norm": 1.610661506652832, "learning_rate": 1.9282237990127425e-05, "loss": 1.4018, "step": 3184 }, { "epoch": 0.17413173325314707, "grad_norm": 1.1496644020080566, "learning_rate": 1.9281558017522588e-05, "loss": 1.416, "step": 3185 }, { "epoch": 0.17418640569686863, "grad_norm": 1.5093488693237305, "learning_rate": 1.928087773498475e-05, "loss": 1.3351, "step": 3186 }, { "epoch": 0.1742410781405902, "grad_norm": 1.5648506879806519, "learning_rate": 1.9280197142536625e-05, "loss": 1.2458, "step": 3187 }, { "epoch": 0.17429575058431174, "grad_norm": 1.456801176071167, "learning_rate": 1.927951624020094e-05, "loss": 1.4453, "step": 3188 }, { "epoch": 0.1743504230280333, "grad_norm": 1.5647553205490112, "learning_rate": 1.9278835028000434e-05, "loss": 1.4419, "step": 3189 }, { "epoch": 0.17440509547175484, "grad_norm": 1.3229070901870728, "learning_rate": 1.9278153505957854e-05, "loss": 1.4338, "step": 3190 }, { "epoch": 0.1744597679154764, "grad_norm": 1.3539915084838867, "learning_rate": 1.9277471674095952e-05, "loss": 1.405, "step": 3191 }, { "epoch": 0.17451444035919794, "grad_norm": 1.24132239818573, "learning_rate": 1.9276789532437506e-05, "loss": 1.2797, "step": 3192 }, { "epoch": 0.1745691128029195, "grad_norm": 1.4148123264312744, "learning_rate": 1.9276107081005287e-05, "loss": 1.525, "step": 3193 }, { "epoch": 0.17462378524664107, "grad_norm": 1.5408523082733154, "learning_rate": 1.9275424319822088e-05, "loss": 1.3864, "step": 3194 }, { "epoch": 0.1746784576903626, "grad_norm": 1.3449515104293823, "learning_rate": 1.92747412489107e-05, "loss": 1.3326, "step": 3195 }, { "epoch": 0.17473313013408417, "grad_norm": 1.6390230655670166, "learning_rate": 1.927405786829394e-05, "loss": 1.3355, "step": 3196 }, { "epoch": 0.1747878025778057, "grad_norm": 1.4666671752929688, "learning_rate": 1.927337417799463e-05, "loss": 1.3727, "step": 3197 }, { "epoch": 0.17484247502152728, "grad_norm": 1.6979738473892212, "learning_rate": 1.927269017803559e-05, "loss": 1.2942, "step": 3198 }, { "epoch": 0.17489714746524881, "grad_norm": 1.4777473211288452, "learning_rate": 1.9272005868439667e-05, "loss": 1.2851, "step": 3199 }, { "epoch": 0.17495181990897038, "grad_norm": 1.2726755142211914, "learning_rate": 1.927132124922971e-05, "loss": 1.5381, "step": 3200 }, { "epoch": 0.17500649235269194, "grad_norm": 1.8815404176712036, "learning_rate": 1.9270636320428583e-05, "loss": 1.3549, "step": 3201 }, { "epoch": 0.17506116479641348, "grad_norm": 1.608542561531067, "learning_rate": 1.9269951082059152e-05, "loss": 1.3737, "step": 3202 }, { "epoch": 0.17511583724013505, "grad_norm": 1.382571816444397, "learning_rate": 1.9269265534144306e-05, "loss": 1.4094, "step": 3203 }, { "epoch": 0.17517050968385658, "grad_norm": 1.4755204916000366, "learning_rate": 1.926857967670693e-05, "loss": 1.5069, "step": 3204 }, { "epoch": 0.17522518212757815, "grad_norm": 1.2678354978561401, "learning_rate": 1.9267893509769927e-05, "loss": 1.6374, "step": 3205 }, { "epoch": 0.1752798545712997, "grad_norm": 1.5258554220199585, "learning_rate": 1.9267207033356215e-05, "loss": 1.4316, "step": 3206 }, { "epoch": 0.17533452701502125, "grad_norm": 1.4616429805755615, "learning_rate": 1.9266520247488716e-05, "loss": 1.4193, "step": 3207 }, { "epoch": 0.17538919945874282, "grad_norm": 1.4749833345413208, "learning_rate": 1.9265833152190358e-05, "loss": 1.5267, "step": 3208 }, { "epoch": 0.17544387190246435, "grad_norm": 1.88829505443573, "learning_rate": 1.926514574748409e-05, "loss": 1.4257, "step": 3209 }, { "epoch": 0.17549854434618592, "grad_norm": 1.6384061574935913, "learning_rate": 1.926445803339286e-05, "loss": 1.3445, "step": 3210 }, { "epoch": 0.17555321678990746, "grad_norm": 1.6705843210220337, "learning_rate": 1.926377000993964e-05, "loss": 1.5314, "step": 3211 }, { "epoch": 0.17560788923362902, "grad_norm": 1.354264497756958, "learning_rate": 1.92630816771474e-05, "loss": 1.4098, "step": 3212 }, { "epoch": 0.1756625616773506, "grad_norm": 1.0406694412231445, "learning_rate": 1.9262393035039132e-05, "loss": 1.4929, "step": 3213 }, { "epoch": 0.17571723412107212, "grad_norm": 1.9816030263900757, "learning_rate": 1.926170408363782e-05, "loss": 1.5349, "step": 3214 }, { "epoch": 0.1757719065647937, "grad_norm": 1.395647644996643, "learning_rate": 1.9261014822966474e-05, "loss": 1.3625, "step": 3215 }, { "epoch": 0.17582657900851523, "grad_norm": 1.4653589725494385, "learning_rate": 1.9260325253048116e-05, "loss": 1.5083, "step": 3216 }, { "epoch": 0.1758812514522368, "grad_norm": 1.265745997428894, "learning_rate": 1.9259635373905764e-05, "loss": 1.5694, "step": 3217 }, { "epoch": 0.17593592389595833, "grad_norm": 1.4635357856750488, "learning_rate": 1.925894518556246e-05, "loss": 1.4281, "step": 3218 }, { "epoch": 0.1759905963396799, "grad_norm": 1.2817387580871582, "learning_rate": 1.925825468804125e-05, "loss": 1.828, "step": 3219 }, { "epoch": 0.17604526878340146, "grad_norm": 1.742004156112671, "learning_rate": 1.925756388136519e-05, "loss": 1.7142, "step": 3220 }, { "epoch": 0.176099941227123, "grad_norm": 1.6317769289016724, "learning_rate": 1.9256872765557348e-05, "loss": 1.3578, "step": 3221 }, { "epoch": 0.17615461367084456, "grad_norm": 1.7528492212295532, "learning_rate": 1.9256181340640803e-05, "loss": 1.5329, "step": 3222 }, { "epoch": 0.1762092861145661, "grad_norm": 1.119004487991333, "learning_rate": 1.9255489606638643e-05, "loss": 1.6062, "step": 3223 }, { "epoch": 0.17626395855828766, "grad_norm": 1.451314091682434, "learning_rate": 1.9254797563573967e-05, "loss": 1.4698, "step": 3224 }, { "epoch": 0.1763186310020092, "grad_norm": 1.4310604333877563, "learning_rate": 1.9254105211469883e-05, "loss": 1.3585, "step": 3225 }, { "epoch": 0.17637330344573077, "grad_norm": 1.510898232460022, "learning_rate": 1.9253412550349507e-05, "loss": 1.4211, "step": 3226 }, { "epoch": 0.17642797588945233, "grad_norm": 1.4422861337661743, "learning_rate": 1.9252719580235978e-05, "loss": 1.1578, "step": 3227 }, { "epoch": 0.17648264833317387, "grad_norm": 1.2964863777160645, "learning_rate": 1.9252026301152423e-05, "loss": 1.4487, "step": 3228 }, { "epoch": 0.17653732077689543, "grad_norm": 1.4789834022521973, "learning_rate": 1.9251332713122006e-05, "loss": 1.3778, "step": 3229 }, { "epoch": 0.17659199322061697, "grad_norm": 2.0793673992156982, "learning_rate": 1.925063881616788e-05, "loss": 1.6525, "step": 3230 }, { "epoch": 0.17664666566433854, "grad_norm": 2.425964832305908, "learning_rate": 1.9249944610313212e-05, "loss": 1.2194, "step": 3231 }, { "epoch": 0.17670133810806007, "grad_norm": 1.471175193786621, "learning_rate": 1.9249250095581192e-05, "loss": 1.4831, "step": 3232 }, { "epoch": 0.17675601055178164, "grad_norm": 1.7405437231063843, "learning_rate": 1.9248555271995006e-05, "loss": 1.5572, "step": 3233 }, { "epoch": 0.1768106829955032, "grad_norm": 2.2658777236938477, "learning_rate": 1.9247860139577856e-05, "loss": 1.211, "step": 3234 }, { "epoch": 0.17686535543922474, "grad_norm": 1.5543853044509888, "learning_rate": 1.9247164698352955e-05, "loss": 1.6997, "step": 3235 }, { "epoch": 0.1769200278829463, "grad_norm": 1.8951077461242676, "learning_rate": 1.9246468948343528e-05, "loss": 1.457, "step": 3236 }, { "epoch": 0.17697470032666784, "grad_norm": 1.310366153717041, "learning_rate": 1.9245772889572803e-05, "loss": 1.535, "step": 3237 }, { "epoch": 0.1770293727703894, "grad_norm": 1.3464628458023071, "learning_rate": 1.9245076522064025e-05, "loss": 1.599, "step": 3238 }, { "epoch": 0.17708404521411095, "grad_norm": 1.551705002784729, "learning_rate": 1.924437984584045e-05, "loss": 1.2798, "step": 3239 }, { "epoch": 0.1771387176578325, "grad_norm": 1.613365888595581, "learning_rate": 1.924368286092534e-05, "loss": 1.466, "step": 3240 }, { "epoch": 0.17719339010155408, "grad_norm": 1.3242554664611816, "learning_rate": 1.9242985567341964e-05, "loss": 1.5854, "step": 3241 }, { "epoch": 0.17724806254527561, "grad_norm": 1.5413703918457031, "learning_rate": 1.9242287965113614e-05, "loss": 1.2697, "step": 3242 }, { "epoch": 0.17730273498899718, "grad_norm": 1.9413594007492065, "learning_rate": 1.924159005426358e-05, "loss": 1.43, "step": 3243 }, { "epoch": 0.17735740743271872, "grad_norm": 1.8103567361831665, "learning_rate": 1.924089183481517e-05, "loss": 1.3671, "step": 3244 }, { "epoch": 0.17741207987644028, "grad_norm": 1.3488564491271973, "learning_rate": 1.9240193306791695e-05, "loss": 1.44, "step": 3245 }, { "epoch": 0.17746675232016182, "grad_norm": 1.2303919792175293, "learning_rate": 1.923949447021648e-05, "loss": 1.484, "step": 3246 }, { "epoch": 0.17752142476388338, "grad_norm": 1.2917901277542114, "learning_rate": 1.9238795325112867e-05, "loss": 1.4291, "step": 3247 }, { "epoch": 0.17757609720760495, "grad_norm": 2.6274166107177734, "learning_rate": 1.9238095871504198e-05, "loss": 1.3438, "step": 3248 }, { "epoch": 0.1776307696513265, "grad_norm": 1.4622920751571655, "learning_rate": 1.9237396109413833e-05, "loss": 1.537, "step": 3249 }, { "epoch": 0.17768544209504805, "grad_norm": 1.237523078918457, "learning_rate": 1.9236696038865132e-05, "loss": 1.562, "step": 3250 }, { "epoch": 0.1777401145387696, "grad_norm": 1.5102958679199219, "learning_rate": 1.9235995659881478e-05, "loss": 1.3577, "step": 3251 }, { "epoch": 0.17779478698249115, "grad_norm": 1.3230719566345215, "learning_rate": 1.9235294972486254e-05, "loss": 1.2272, "step": 3252 }, { "epoch": 0.1778494594262127, "grad_norm": 1.2005116939544678, "learning_rate": 1.923459397670286e-05, "loss": 1.5606, "step": 3253 }, { "epoch": 0.17790413186993426, "grad_norm": 1.4219788312911987, "learning_rate": 1.92338926725547e-05, "loss": 1.4758, "step": 3254 }, { "epoch": 0.17795880431365582, "grad_norm": 1.5023164749145508, "learning_rate": 1.9233191060065203e-05, "loss": 1.2882, "step": 3255 }, { "epoch": 0.17801347675737736, "grad_norm": 1.6123223304748535, "learning_rate": 1.9232489139257782e-05, "loss": 1.5191, "step": 3256 }, { "epoch": 0.17806814920109892, "grad_norm": 1.6428111791610718, "learning_rate": 1.9231786910155892e-05, "loss": 1.2428, "step": 3257 }, { "epoch": 0.17812282164482046, "grad_norm": 1.336371898651123, "learning_rate": 1.923108437278297e-05, "loss": 1.4313, "step": 3258 }, { "epoch": 0.17817749408854203, "grad_norm": 1.6746714115142822, "learning_rate": 1.9230381527162477e-05, "loss": 1.2899, "step": 3259 }, { "epoch": 0.17823216653226356, "grad_norm": 1.3816581964492798, "learning_rate": 1.922967837331789e-05, "loss": 1.4223, "step": 3260 }, { "epoch": 0.17828683897598513, "grad_norm": 1.2071282863616943, "learning_rate": 1.9228974911272682e-05, "loss": 1.4894, "step": 3261 }, { "epoch": 0.1783415114197067, "grad_norm": 1.424338698387146, "learning_rate": 1.9228271141050346e-05, "loss": 1.3937, "step": 3262 }, { "epoch": 0.17839618386342823, "grad_norm": 1.4579589366912842, "learning_rate": 1.922756706267438e-05, "loss": 1.4015, "step": 3263 }, { "epoch": 0.1784508563071498, "grad_norm": 1.1780095100402832, "learning_rate": 1.92268626761683e-05, "loss": 1.5051, "step": 3264 }, { "epoch": 0.17850552875087133, "grad_norm": 1.477925419807434, "learning_rate": 1.9226157981555618e-05, "loss": 1.6148, "step": 3265 }, { "epoch": 0.1785602011945929, "grad_norm": 1.336164951324463, "learning_rate": 1.9225452978859876e-05, "loss": 1.3991, "step": 3266 }, { "epoch": 0.17861487363831444, "grad_norm": 1.8047471046447754, "learning_rate": 1.9224747668104607e-05, "loss": 1.4416, "step": 3267 }, { "epoch": 0.178669546082036, "grad_norm": 1.6534570455551147, "learning_rate": 1.922404204931337e-05, "loss": 1.614, "step": 3268 }, { "epoch": 0.17872421852575757, "grad_norm": 1.3620579242706299, "learning_rate": 1.9223336122509724e-05, "loss": 1.2376, "step": 3269 }, { "epoch": 0.1787788909694791, "grad_norm": 2.183300018310547, "learning_rate": 1.9222629887717238e-05, "loss": 1.6133, "step": 3270 }, { "epoch": 0.17883356341320067, "grad_norm": 1.4370880126953125, "learning_rate": 1.92219233449595e-05, "loss": 1.5076, "step": 3271 }, { "epoch": 0.1788882358569222, "grad_norm": 1.6673448085784912, "learning_rate": 1.9221216494260105e-05, "loss": 1.2884, "step": 3272 }, { "epoch": 0.17894290830064377, "grad_norm": 1.474085807800293, "learning_rate": 1.922050933564265e-05, "loss": 1.536, "step": 3273 }, { "epoch": 0.1789975807443653, "grad_norm": 1.3695271015167236, "learning_rate": 1.921980186913075e-05, "loss": 1.4753, "step": 3274 }, { "epoch": 0.17905225318808687, "grad_norm": 1.307868480682373, "learning_rate": 1.9219094094748035e-05, "loss": 1.2353, "step": 3275 }, { "epoch": 0.17910692563180844, "grad_norm": 1.404598593711853, "learning_rate": 1.9218386012518134e-05, "loss": 1.6202, "step": 3276 }, { "epoch": 0.17916159807552998, "grad_norm": 1.2406935691833496, "learning_rate": 1.921767762246469e-05, "loss": 1.3997, "step": 3277 }, { "epoch": 0.17921627051925154, "grad_norm": 1.491146445274353, "learning_rate": 1.921696892461136e-05, "loss": 1.5108, "step": 3278 }, { "epoch": 0.17927094296297308, "grad_norm": 1.5828661918640137, "learning_rate": 1.9216259918981812e-05, "loss": 1.4798, "step": 3279 }, { "epoch": 0.17932561540669464, "grad_norm": 1.5521180629730225, "learning_rate": 1.9215550605599717e-05, "loss": 1.73, "step": 3280 }, { "epoch": 0.17938028785041618, "grad_norm": 1.8862048387527466, "learning_rate": 1.9214840984488763e-05, "loss": 1.2961, "step": 3281 }, { "epoch": 0.17943496029413775, "grad_norm": 1.6828832626342773, "learning_rate": 1.9214131055672648e-05, "loss": 1.2089, "step": 3282 }, { "epoch": 0.1794896327378593, "grad_norm": 1.2741966247558594, "learning_rate": 1.921342081917507e-05, "loss": 1.4844, "step": 3283 }, { "epoch": 0.17954430518158085, "grad_norm": 1.3686009645462036, "learning_rate": 1.9212710275019754e-05, "loss": 1.5826, "step": 3284 }, { "epoch": 0.17959897762530241, "grad_norm": 1.2745622396469116, "learning_rate": 1.921199942323042e-05, "loss": 1.5273, "step": 3285 }, { "epoch": 0.17965365006902395, "grad_norm": 1.6127724647521973, "learning_rate": 1.9211288263830814e-05, "loss": 1.4783, "step": 3286 }, { "epoch": 0.17970832251274552, "grad_norm": 2.0670206546783447, "learning_rate": 1.9210576796844676e-05, "loss": 1.6427, "step": 3287 }, { "epoch": 0.17976299495646705, "grad_norm": 1.5034749507904053, "learning_rate": 1.9209865022295765e-05, "loss": 1.3248, "step": 3288 }, { "epoch": 0.17981766740018862, "grad_norm": 1.588435173034668, "learning_rate": 1.9209152940207846e-05, "loss": 1.3738, "step": 3289 }, { "epoch": 0.17987233984391018, "grad_norm": 1.810770869255066, "learning_rate": 1.9208440550604704e-05, "loss": 1.5511, "step": 3290 }, { "epoch": 0.17992701228763172, "grad_norm": 1.6244287490844727, "learning_rate": 1.9207727853510126e-05, "loss": 1.5341, "step": 3291 }, { "epoch": 0.1799816847313533, "grad_norm": 1.2439044713974, "learning_rate": 1.9207014848947905e-05, "loss": 1.414, "step": 3292 }, { "epoch": 0.18003635717507482, "grad_norm": 1.489519715309143, "learning_rate": 1.9206301536941855e-05, "loss": 1.4262, "step": 3293 }, { "epoch": 0.1800910296187964, "grad_norm": 1.713160514831543, "learning_rate": 1.920558791751579e-05, "loss": 1.3619, "step": 3294 }, { "epoch": 0.18014570206251793, "grad_norm": 2.0893001556396484, "learning_rate": 1.9204873990693546e-05, "loss": 1.25, "step": 3295 }, { "epoch": 0.1802003745062395, "grad_norm": 1.2638994455337524, "learning_rate": 1.920415975649896e-05, "loss": 1.5143, "step": 3296 }, { "epoch": 0.18025504694996106, "grad_norm": 1.6351981163024902, "learning_rate": 1.9203445214955877e-05, "loss": 1.2879, "step": 3297 }, { "epoch": 0.1803097193936826, "grad_norm": 1.6193493604660034, "learning_rate": 1.9202730366088165e-05, "loss": 1.2193, "step": 3298 }, { "epoch": 0.18036439183740416, "grad_norm": 1.5750741958618164, "learning_rate": 1.9202015209919692e-05, "loss": 1.6023, "step": 3299 }, { "epoch": 0.1804190642811257, "grad_norm": 1.4111576080322266, "learning_rate": 1.9201299746474337e-05, "loss": 1.423, "step": 3300 }, { "epoch": 0.18047373672484726, "grad_norm": 1.4375439882278442, "learning_rate": 1.9200583975775996e-05, "loss": 1.3424, "step": 3301 }, { "epoch": 0.1805284091685688, "grad_norm": 3.0823121070861816, "learning_rate": 1.919986789784856e-05, "loss": 1.3848, "step": 3302 }, { "epoch": 0.18058308161229036, "grad_norm": 1.3655601739883423, "learning_rate": 1.9199151512715948e-05, "loss": 1.4721, "step": 3303 }, { "epoch": 0.18063775405601193, "grad_norm": 2.0154221057891846, "learning_rate": 1.919843482040208e-05, "loss": 1.5292, "step": 3304 }, { "epoch": 0.18069242649973347, "grad_norm": 2.131226062774658, "learning_rate": 1.919771782093089e-05, "loss": 1.2831, "step": 3305 }, { "epoch": 0.18074709894345503, "grad_norm": 1.7872081995010376, "learning_rate": 1.919700051432632e-05, "loss": 1.3398, "step": 3306 }, { "epoch": 0.18080177138717657, "grad_norm": 1.4559319019317627, "learning_rate": 1.919628290061232e-05, "loss": 1.5014, "step": 3307 }, { "epoch": 0.18085644383089813, "grad_norm": 1.5747606754302979, "learning_rate": 1.9195564979812854e-05, "loss": 1.5323, "step": 3308 }, { "epoch": 0.18091111627461967, "grad_norm": 1.3273825645446777, "learning_rate": 1.9194846751951897e-05, "loss": 1.5841, "step": 3309 }, { "epoch": 0.18096578871834124, "grad_norm": 1.872076153755188, "learning_rate": 1.919412821705343e-05, "loss": 1.1509, "step": 3310 }, { "epoch": 0.1810204611620628, "grad_norm": 3.3995320796966553, "learning_rate": 1.9193409375141446e-05, "loss": 1.2872, "step": 3311 }, { "epoch": 0.18107513360578434, "grad_norm": 1.6156847476959229, "learning_rate": 1.919269022623995e-05, "loss": 1.4378, "step": 3312 }, { "epoch": 0.1811298060495059, "grad_norm": 1.388153314590454, "learning_rate": 1.9191970770372955e-05, "loss": 1.2823, "step": 3313 }, { "epoch": 0.18118447849322744, "grad_norm": 1.291616678237915, "learning_rate": 1.919125100756449e-05, "loss": 1.2405, "step": 3314 }, { "epoch": 0.181239150936949, "grad_norm": 1.476436734199524, "learning_rate": 1.919053093783858e-05, "loss": 1.5425, "step": 3315 }, { "epoch": 0.18129382338067057, "grad_norm": 1.3272895812988281, "learning_rate": 1.9189810561219282e-05, "loss": 1.4847, "step": 3316 }, { "epoch": 0.1813484958243921, "grad_norm": 1.8435523509979248, "learning_rate": 1.918908987773064e-05, "loss": 1.1662, "step": 3317 }, { "epoch": 0.18140316826811367, "grad_norm": 1.2557417154312134, "learning_rate": 1.918836888739673e-05, "loss": 1.6189, "step": 3318 }, { "epoch": 0.1814578407118352, "grad_norm": 1.3172765970230103, "learning_rate": 1.9187647590241615e-05, "loss": 1.5357, "step": 3319 }, { "epoch": 0.18151251315555678, "grad_norm": 2.282494306564331, "learning_rate": 1.918692598628939e-05, "loss": 1.17, "step": 3320 }, { "epoch": 0.1815671855992783, "grad_norm": 1.5557984113693237, "learning_rate": 1.918620407556415e-05, "loss": 1.6055, "step": 3321 }, { "epoch": 0.18162185804299988, "grad_norm": 1.1988648176193237, "learning_rate": 1.918548185809e-05, "loss": 1.5737, "step": 3322 }, { "epoch": 0.18167653048672144, "grad_norm": 1.535181999206543, "learning_rate": 1.918475933389105e-05, "loss": 1.4378, "step": 3323 }, { "epoch": 0.18173120293044298, "grad_norm": 1.4342960119247437, "learning_rate": 1.918403650299144e-05, "loss": 1.3514, "step": 3324 }, { "epoch": 0.18178587537416455, "grad_norm": 1.995713472366333, "learning_rate": 1.91833133654153e-05, "loss": 1.3224, "step": 3325 }, { "epoch": 0.18184054781788608, "grad_norm": 1.6034923791885376, "learning_rate": 1.9182589921186777e-05, "loss": 1.3601, "step": 3326 }, { "epoch": 0.18189522026160765, "grad_norm": 1.349507451057434, "learning_rate": 1.9181866170330025e-05, "loss": 1.3999, "step": 3327 }, { "epoch": 0.18194989270532919, "grad_norm": 1.6918001174926758, "learning_rate": 1.918114211286922e-05, "loss": 1.2353, "step": 3328 }, { "epoch": 0.18200456514905075, "grad_norm": 1.376750111579895, "learning_rate": 1.9180417748828534e-05, "loss": 1.5653, "step": 3329 }, { "epoch": 0.18205923759277232, "grad_norm": 1.6320205926895142, "learning_rate": 1.917969307823216e-05, "loss": 1.206, "step": 3330 }, { "epoch": 0.18211391003649385, "grad_norm": 1.5671718120574951, "learning_rate": 1.9178968101104285e-05, "loss": 1.512, "step": 3331 }, { "epoch": 0.18216858248021542, "grad_norm": 1.437673568725586, "learning_rate": 1.9178242817469133e-05, "loss": 1.6022, "step": 3332 }, { "epoch": 0.18222325492393696, "grad_norm": 1.439050555229187, "learning_rate": 1.9177517227350914e-05, "loss": 1.5373, "step": 3333 }, { "epoch": 0.18227792736765852, "grad_norm": 1.4718924760818481, "learning_rate": 1.9176791330773858e-05, "loss": 1.2502, "step": 3334 }, { "epoch": 0.18233259981138006, "grad_norm": 1.2603029012680054, "learning_rate": 1.9176065127762206e-05, "loss": 1.7183, "step": 3335 }, { "epoch": 0.18238727225510162, "grad_norm": 1.762696385383606, "learning_rate": 1.9175338618340208e-05, "loss": 1.4437, "step": 3336 }, { "epoch": 0.1824419446988232, "grad_norm": 1.5989503860473633, "learning_rate": 1.9174611802532124e-05, "loss": 1.3415, "step": 3337 }, { "epoch": 0.18249661714254473, "grad_norm": 1.2481266260147095, "learning_rate": 1.917388468036222e-05, "loss": 1.7313, "step": 3338 }, { "epoch": 0.1825512895862663, "grad_norm": 1.7269465923309326, "learning_rate": 1.917315725185478e-05, "loss": 1.2722, "step": 3339 }, { "epoch": 0.18260596202998783, "grad_norm": 1.3646869659423828, "learning_rate": 1.9172429517034094e-05, "loss": 1.437, "step": 3340 }, { "epoch": 0.1826606344737094, "grad_norm": 1.5008593797683716, "learning_rate": 1.9171701475924466e-05, "loss": 1.4343, "step": 3341 }, { "epoch": 0.18271530691743093, "grad_norm": 1.497603178024292, "learning_rate": 1.91709731285502e-05, "loss": 1.5477, "step": 3342 }, { "epoch": 0.1827699793611525, "grad_norm": 1.6084418296813965, "learning_rate": 1.917024447493562e-05, "loss": 1.1692, "step": 3343 }, { "epoch": 0.18282465180487406, "grad_norm": 2.084118604660034, "learning_rate": 1.9169515515105063e-05, "loss": 1.5645, "step": 3344 }, { "epoch": 0.1828793242485956, "grad_norm": 1.3936898708343506, "learning_rate": 1.9168786249082862e-05, "loss": 1.366, "step": 3345 }, { "epoch": 0.18293399669231716, "grad_norm": 1.5438787937164307, "learning_rate": 1.9168056676893373e-05, "loss": 1.6794, "step": 3346 }, { "epoch": 0.1829886691360387, "grad_norm": 1.3633484840393066, "learning_rate": 1.916732679856096e-05, "loss": 1.6943, "step": 3347 }, { "epoch": 0.18304334157976027, "grad_norm": 1.555654525756836, "learning_rate": 1.9166596614109993e-05, "loss": 1.4402, "step": 3348 }, { "epoch": 0.1830980140234818, "grad_norm": 2.0286779403686523, "learning_rate": 1.9165866123564854e-05, "loss": 1.5633, "step": 3349 }, { "epoch": 0.18315268646720337, "grad_norm": 1.2671599388122559, "learning_rate": 1.9165135326949937e-05, "loss": 1.3269, "step": 3350 }, { "epoch": 0.18320735891092493, "grad_norm": 1.6947603225708008, "learning_rate": 1.9164404224289645e-05, "loss": 1.3878, "step": 3351 }, { "epoch": 0.18326203135464647, "grad_norm": 1.4794855117797852, "learning_rate": 1.9163672815608392e-05, "loss": 1.6391, "step": 3352 }, { "epoch": 0.18331670379836804, "grad_norm": 1.9218971729278564, "learning_rate": 1.9162941100930597e-05, "loss": 1.7166, "step": 3353 }, { "epoch": 0.18337137624208957, "grad_norm": 1.4208929538726807, "learning_rate": 1.9162209080280704e-05, "loss": 1.5291, "step": 3354 }, { "epoch": 0.18342604868581114, "grad_norm": 1.5883320569992065, "learning_rate": 1.9161476753683144e-05, "loss": 1.3121, "step": 3355 }, { "epoch": 0.18348072112953268, "grad_norm": 1.4986684322357178, "learning_rate": 1.916074412116238e-05, "loss": 1.147, "step": 3356 }, { "epoch": 0.18353539357325424, "grad_norm": 1.643059492111206, "learning_rate": 1.916001118274287e-05, "loss": 1.5559, "step": 3357 }, { "epoch": 0.1835900660169758, "grad_norm": 1.3504316806793213, "learning_rate": 1.9159277938449095e-05, "loss": 1.4423, "step": 3358 }, { "epoch": 0.18364473846069734, "grad_norm": 1.806321382522583, "learning_rate": 1.9158544388305534e-05, "loss": 1.482, "step": 3359 }, { "epoch": 0.1836994109044189, "grad_norm": 1.4675003290176392, "learning_rate": 1.9157810532336686e-05, "loss": 1.4266, "step": 3360 }, { "epoch": 0.18375408334814045, "grad_norm": 1.2883799076080322, "learning_rate": 1.9157076370567056e-05, "loss": 1.7327, "step": 3361 }, { "epoch": 0.183808755791862, "grad_norm": 1.7647857666015625, "learning_rate": 1.9156341903021158e-05, "loss": 1.3364, "step": 3362 }, { "epoch": 0.18386342823558355, "grad_norm": 1.5032397508621216, "learning_rate": 1.915560712972352e-05, "loss": 1.3381, "step": 3363 }, { "epoch": 0.1839181006793051, "grad_norm": 1.3642157316207886, "learning_rate": 1.915487205069867e-05, "loss": 1.454, "step": 3364 }, { "epoch": 0.18397277312302668, "grad_norm": 1.5259160995483398, "learning_rate": 1.9154136665971163e-05, "loss": 1.7189, "step": 3365 }, { "epoch": 0.18402744556674822, "grad_norm": 1.7070008516311646, "learning_rate": 1.915340097556555e-05, "loss": 1.3246, "step": 3366 }, { "epoch": 0.18408211801046978, "grad_norm": 1.576074481010437, "learning_rate": 1.9152664979506405e-05, "loss": 1.608, "step": 3367 }, { "epoch": 0.18413679045419132, "grad_norm": 1.2888609170913696, "learning_rate": 1.915192867781829e-05, "loss": 1.4288, "step": 3368 }, { "epoch": 0.18419146289791288, "grad_norm": 1.4644806385040283, "learning_rate": 1.915119207052581e-05, "loss": 1.3698, "step": 3369 }, { "epoch": 0.18424613534163442, "grad_norm": 1.5194557905197144, "learning_rate": 1.9150455157653546e-05, "loss": 1.3924, "step": 3370 }, { "epoch": 0.18430080778535599, "grad_norm": 1.4616299867630005, "learning_rate": 1.9149717939226114e-05, "loss": 1.4914, "step": 3371 }, { "epoch": 0.18435548022907755, "grad_norm": 2.357161045074463, "learning_rate": 1.9148980415268132e-05, "loss": 1.4123, "step": 3372 }, { "epoch": 0.1844101526727991, "grad_norm": 1.3480979204177856, "learning_rate": 1.914824258580422e-05, "loss": 1.2985, "step": 3373 }, { "epoch": 0.18446482511652065, "grad_norm": 1.4232310056686401, "learning_rate": 1.9147504450859026e-05, "loss": 1.4842, "step": 3374 }, { "epoch": 0.1845194975602422, "grad_norm": 1.3145438432693481, "learning_rate": 1.9146766010457192e-05, "loss": 1.4168, "step": 3375 }, { "epoch": 0.18457417000396376, "grad_norm": 1.4277671575546265, "learning_rate": 1.9146027264623376e-05, "loss": 1.5101, "step": 3376 }, { "epoch": 0.1846288424476853, "grad_norm": 1.4839645624160767, "learning_rate": 1.9145288213382248e-05, "loss": 1.3527, "step": 3377 }, { "epoch": 0.18468351489140686, "grad_norm": 1.3047511577606201, "learning_rate": 1.914454885675849e-05, "loss": 1.6425, "step": 3378 }, { "epoch": 0.18473818733512842, "grad_norm": 1.2289388179779053, "learning_rate": 1.914380919477678e-05, "loss": 1.4745, "step": 3379 }, { "epoch": 0.18479285977884996, "grad_norm": 1.09296715259552, "learning_rate": 1.914306922746183e-05, "loss": 1.7492, "step": 3380 }, { "epoch": 0.18484753222257153, "grad_norm": 1.6014394760131836, "learning_rate": 1.9142328954838342e-05, "loss": 1.5036, "step": 3381 }, { "epoch": 0.18490220466629306, "grad_norm": 1.6617790460586548, "learning_rate": 1.914158837693104e-05, "loss": 1.3173, "step": 3382 }, { "epoch": 0.18495687711001463, "grad_norm": 1.559147834777832, "learning_rate": 1.9140847493764646e-05, "loss": 1.5217, "step": 3383 }, { "epoch": 0.18501154955373617, "grad_norm": 1.1959089040756226, "learning_rate": 1.9140106305363905e-05, "loss": 1.575, "step": 3384 }, { "epoch": 0.18506622199745773, "grad_norm": 1.2286244630813599, "learning_rate": 1.913936481175357e-05, "loss": 1.6518, "step": 3385 }, { "epoch": 0.1851208944411793, "grad_norm": 1.4423370361328125, "learning_rate": 1.9138623012958394e-05, "loss": 1.2183, "step": 3386 }, { "epoch": 0.18517556688490083, "grad_norm": 1.417704463005066, "learning_rate": 1.9137880909003155e-05, "loss": 1.1843, "step": 3387 }, { "epoch": 0.1852302393286224, "grad_norm": 1.3446335792541504, "learning_rate": 1.9137138499912626e-05, "loss": 1.5431, "step": 3388 }, { "epoch": 0.18528491177234394, "grad_norm": 1.6541006565093994, "learning_rate": 1.9136395785711602e-05, "loss": 1.4196, "step": 3389 }, { "epoch": 0.1853395842160655, "grad_norm": 1.7673988342285156, "learning_rate": 1.9135652766424887e-05, "loss": 1.4375, "step": 3390 }, { "epoch": 0.18539425665978704, "grad_norm": 1.4271591901779175, "learning_rate": 1.9134909442077286e-05, "loss": 1.4957, "step": 3391 }, { "epoch": 0.1854489291035086, "grad_norm": 1.3504106998443604, "learning_rate": 1.9134165812693623e-05, "loss": 1.2934, "step": 3392 }, { "epoch": 0.18550360154723017, "grad_norm": 1.2865861654281616, "learning_rate": 1.913342187829873e-05, "loss": 1.3615, "step": 3393 }, { "epoch": 0.1855582739909517, "grad_norm": 1.1389464139938354, "learning_rate": 1.913267763891745e-05, "loss": 1.4745, "step": 3394 }, { "epoch": 0.18561294643467327, "grad_norm": 1.1568500995635986, "learning_rate": 1.913193309457463e-05, "loss": 1.5001, "step": 3395 }, { "epoch": 0.1856676188783948, "grad_norm": 1.6700729131698608, "learning_rate": 1.9131188245295135e-05, "loss": 1.6831, "step": 3396 }, { "epoch": 0.18572229132211637, "grad_norm": 1.3848786354064941, "learning_rate": 1.9130443091103838e-05, "loss": 1.526, "step": 3397 }, { "epoch": 0.1857769637658379, "grad_norm": 2.045795440673828, "learning_rate": 1.9129697632025623e-05, "loss": 1.3234, "step": 3398 }, { "epoch": 0.18583163620955948, "grad_norm": 1.4402766227722168, "learning_rate": 1.912895186808538e-05, "loss": 1.4109, "step": 3399 }, { "epoch": 0.18588630865328104, "grad_norm": 1.3582193851470947, "learning_rate": 1.9128205799308012e-05, "loss": 1.4029, "step": 3400 }, { "epoch": 0.18594098109700258, "grad_norm": 1.5219321250915527, "learning_rate": 1.912745942571843e-05, "loss": 1.4753, "step": 3401 }, { "epoch": 0.18599565354072414, "grad_norm": 1.2649143934249878, "learning_rate": 1.9126712747341564e-05, "loss": 1.4348, "step": 3402 }, { "epoch": 0.18605032598444568, "grad_norm": 1.6727886199951172, "learning_rate": 1.912596576420234e-05, "loss": 1.5655, "step": 3403 }, { "epoch": 0.18610499842816725, "grad_norm": 1.7994436025619507, "learning_rate": 1.9125218476325705e-05, "loss": 1.5104, "step": 3404 }, { "epoch": 0.18615967087188878, "grad_norm": 1.3716973066329956, "learning_rate": 1.9124470883736614e-05, "loss": 1.7156, "step": 3405 }, { "epoch": 0.18621434331561035, "grad_norm": 1.7070354223251343, "learning_rate": 1.9123722986460023e-05, "loss": 1.4633, "step": 3406 }, { "epoch": 0.1862690157593319, "grad_norm": 1.3531365394592285, "learning_rate": 1.9122974784520918e-05, "loss": 1.4561, "step": 3407 }, { "epoch": 0.18632368820305345, "grad_norm": 1.6173063516616821, "learning_rate": 1.9122226277944276e-05, "loss": 1.469, "step": 3408 }, { "epoch": 0.18637836064677502, "grad_norm": 1.6497673988342285, "learning_rate": 1.912147746675509e-05, "loss": 1.4147, "step": 3409 }, { "epoch": 0.18643303309049655, "grad_norm": 1.7361290454864502, "learning_rate": 1.912072835097837e-05, "loss": 1.3398, "step": 3410 }, { "epoch": 0.18648770553421812, "grad_norm": 1.657985806465149, "learning_rate": 1.9119978930639127e-05, "loss": 1.3594, "step": 3411 }, { "epoch": 0.18654237797793966, "grad_norm": 1.5704365968704224, "learning_rate": 1.9119229205762386e-05, "loss": 1.3208, "step": 3412 }, { "epoch": 0.18659705042166122, "grad_norm": 1.6545884609222412, "learning_rate": 1.9118479176373184e-05, "loss": 1.4224, "step": 3413 }, { "epoch": 0.18665172286538279, "grad_norm": 1.2712544202804565, "learning_rate": 1.9117728842496562e-05, "loss": 1.4373, "step": 3414 }, { "epoch": 0.18670639530910432, "grad_norm": 1.5168650150299072, "learning_rate": 1.9116978204157583e-05, "loss": 1.7399, "step": 3415 }, { "epoch": 0.1867610677528259, "grad_norm": 1.3615915775299072, "learning_rate": 1.9116227261381305e-05, "loss": 1.4668, "step": 3416 }, { "epoch": 0.18681574019654743, "grad_norm": 1.392422080039978, "learning_rate": 1.9115476014192807e-05, "loss": 1.3051, "step": 3417 }, { "epoch": 0.186870412640269, "grad_norm": 1.4685430526733398, "learning_rate": 1.911472446261718e-05, "loss": 1.409, "step": 3418 }, { "epoch": 0.18692508508399056, "grad_norm": 1.5714861154556274, "learning_rate": 1.911397260667951e-05, "loss": 1.533, "step": 3419 }, { "epoch": 0.1869797575277121, "grad_norm": 1.448624849319458, "learning_rate": 1.9113220446404906e-05, "loss": 1.3513, "step": 3420 }, { "epoch": 0.18703442997143366, "grad_norm": 1.413225769996643, "learning_rate": 1.911246798181849e-05, "loss": 1.4336, "step": 3421 }, { "epoch": 0.1870891024151552, "grad_norm": 1.2507494688034058, "learning_rate": 1.9111715212945384e-05, "loss": 1.708, "step": 3422 }, { "epoch": 0.18714377485887676, "grad_norm": 1.4009809494018555, "learning_rate": 1.9110962139810726e-05, "loss": 1.5119, "step": 3423 }, { "epoch": 0.1871984473025983, "grad_norm": 1.3976269960403442, "learning_rate": 1.9110208762439662e-05, "loss": 1.5595, "step": 3424 }, { "epoch": 0.18725311974631986, "grad_norm": 1.31458580493927, "learning_rate": 1.910945508085735e-05, "loss": 1.6242, "step": 3425 }, { "epoch": 0.18730779219004143, "grad_norm": 1.586242437362671, "learning_rate": 1.910870109508896e-05, "loss": 1.3386, "step": 3426 }, { "epoch": 0.18736246463376297, "grad_norm": 1.409494400024414, "learning_rate": 1.910794680515966e-05, "loss": 1.5651, "step": 3427 }, { "epoch": 0.18741713707748453, "grad_norm": 1.523115634918213, "learning_rate": 1.9107192211094648e-05, "loss": 1.543, "step": 3428 }, { "epoch": 0.18747180952120607, "grad_norm": 1.200339674949646, "learning_rate": 1.9106437312919116e-05, "loss": 1.6239, "step": 3429 }, { "epoch": 0.18752648196492763, "grad_norm": 1.129023790359497, "learning_rate": 1.9105682110658275e-05, "loss": 1.4802, "step": 3430 }, { "epoch": 0.18758115440864917, "grad_norm": 1.4460471868515015, "learning_rate": 1.910492660433734e-05, "loss": 1.4166, "step": 3431 }, { "epoch": 0.18763582685237074, "grad_norm": 1.2031828165054321, "learning_rate": 1.910417079398154e-05, "loss": 1.7016, "step": 3432 }, { "epoch": 0.1876904992960923, "grad_norm": 1.701961636543274, "learning_rate": 1.9103414679616112e-05, "loss": 1.3704, "step": 3433 }, { "epoch": 0.18774517173981384, "grad_norm": 1.142910122871399, "learning_rate": 1.9102658261266307e-05, "loss": 1.5017, "step": 3434 }, { "epoch": 0.1877998441835354, "grad_norm": 1.3583590984344482, "learning_rate": 1.9101901538957385e-05, "loss": 1.4924, "step": 3435 }, { "epoch": 0.18785451662725694, "grad_norm": 2.0630998611450195, "learning_rate": 1.910114451271461e-05, "loss": 1.4695, "step": 3436 }, { "epoch": 0.1879091890709785, "grad_norm": 1.4129400253295898, "learning_rate": 1.9100387182563263e-05, "loss": 1.606, "step": 3437 }, { "epoch": 0.18796386151470004, "grad_norm": 1.4726288318634033, "learning_rate": 1.909962954852863e-05, "loss": 1.0975, "step": 3438 }, { "epoch": 0.1880185339584216, "grad_norm": 1.7592051029205322, "learning_rate": 1.909887161063602e-05, "loss": 1.5597, "step": 3439 }, { "epoch": 0.18807320640214317, "grad_norm": 1.4619410037994385, "learning_rate": 1.9098113368910734e-05, "loss": 1.5761, "step": 3440 }, { "epoch": 0.1881278788458647, "grad_norm": 1.5845531225204468, "learning_rate": 1.9097354823378094e-05, "loss": 1.5611, "step": 3441 }, { "epoch": 0.18818255128958628, "grad_norm": 1.8514974117279053, "learning_rate": 1.9096595974063426e-05, "loss": 1.4124, "step": 3442 }, { "epoch": 0.1882372237333078, "grad_norm": 1.4404314756393433, "learning_rate": 1.9095836820992074e-05, "loss": 1.4058, "step": 3443 }, { "epoch": 0.18829189617702938, "grad_norm": 1.3760418891906738, "learning_rate": 1.9095077364189388e-05, "loss": 1.5208, "step": 3444 }, { "epoch": 0.18834656862075091, "grad_norm": 1.3957587480545044, "learning_rate": 1.9094317603680725e-05, "loss": 1.3409, "step": 3445 }, { "epoch": 0.18840124106447248, "grad_norm": 1.2795077562332153, "learning_rate": 1.9093557539491458e-05, "loss": 1.2396, "step": 3446 }, { "epoch": 0.18845591350819405, "grad_norm": 1.4418087005615234, "learning_rate": 1.909279717164696e-05, "loss": 1.5855, "step": 3447 }, { "epoch": 0.18851058595191558, "grad_norm": 2.204897403717041, "learning_rate": 1.9092036500172635e-05, "loss": 1.2833, "step": 3448 }, { "epoch": 0.18856525839563715, "grad_norm": 1.4160730838775635, "learning_rate": 1.909127552509387e-05, "loss": 1.3782, "step": 3449 }, { "epoch": 0.18861993083935868, "grad_norm": 1.639096975326538, "learning_rate": 1.9090514246436085e-05, "loss": 1.4927, "step": 3450 }, { "epoch": 0.18867460328308025, "grad_norm": 1.6862136125564575, "learning_rate": 1.9089752664224697e-05, "loss": 1.4979, "step": 3451 }, { "epoch": 0.1887292757268018, "grad_norm": 1.460257649421692, "learning_rate": 1.908899077848514e-05, "loss": 1.4101, "step": 3452 }, { "epoch": 0.18878394817052335, "grad_norm": 1.5424948930740356, "learning_rate": 1.9088228589242855e-05, "loss": 1.2601, "step": 3453 }, { "epoch": 0.18883862061424492, "grad_norm": 1.473220705986023, "learning_rate": 1.9087466096523287e-05, "loss": 1.3695, "step": 3454 }, { "epoch": 0.18889329305796645, "grad_norm": 1.3888064622879028, "learning_rate": 1.9086703300351903e-05, "loss": 1.4179, "step": 3455 }, { "epoch": 0.18894796550168802, "grad_norm": 1.751262903213501, "learning_rate": 1.908594020075417e-05, "loss": 1.6668, "step": 3456 }, { "epoch": 0.18900263794540956, "grad_norm": 1.5728110074996948, "learning_rate": 1.9085176797755575e-05, "loss": 1.1762, "step": 3457 }, { "epoch": 0.18905731038913112, "grad_norm": 1.4497390985488892, "learning_rate": 1.908441309138161e-05, "loss": 1.4224, "step": 3458 }, { "epoch": 0.18911198283285266, "grad_norm": 1.438968300819397, "learning_rate": 1.9083649081657773e-05, "loss": 1.4333, "step": 3459 }, { "epoch": 0.18916665527657422, "grad_norm": 1.5116620063781738, "learning_rate": 1.9082884768609578e-05, "loss": 1.3641, "step": 3460 }, { "epoch": 0.1892213277202958, "grad_norm": 1.235630989074707, "learning_rate": 1.9082120152262544e-05, "loss": 1.2246, "step": 3461 }, { "epoch": 0.18927600016401733, "grad_norm": 1.50130295753479, "learning_rate": 1.908135523264221e-05, "loss": 1.3647, "step": 3462 }, { "epoch": 0.1893306726077389, "grad_norm": 1.4758293628692627, "learning_rate": 1.9080590009774114e-05, "loss": 1.5035, "step": 3463 }, { "epoch": 0.18938534505146043, "grad_norm": 1.3764792680740356, "learning_rate": 1.9079824483683808e-05, "loss": 1.2981, "step": 3464 }, { "epoch": 0.189440017495182, "grad_norm": 1.6291382312774658, "learning_rate": 1.907905865439686e-05, "loss": 1.4655, "step": 3465 }, { "epoch": 0.18949468993890353, "grad_norm": 1.5617924928665161, "learning_rate": 1.9078292521938834e-05, "loss": 1.4988, "step": 3466 }, { "epoch": 0.1895493623826251, "grad_norm": 1.5546749830245972, "learning_rate": 1.907752608633532e-05, "loss": 1.4291, "step": 3467 }, { "epoch": 0.18960403482634666, "grad_norm": 1.5183892250061035, "learning_rate": 1.9076759347611908e-05, "loss": 1.4084, "step": 3468 }, { "epoch": 0.1896587072700682, "grad_norm": 1.3796213865280151, "learning_rate": 1.90759923057942e-05, "loss": 1.5212, "step": 3469 }, { "epoch": 0.18971337971378976, "grad_norm": 1.3711947202682495, "learning_rate": 1.9075224960907815e-05, "loss": 1.4823, "step": 3470 }, { "epoch": 0.1897680521575113, "grad_norm": 1.1578952074050903, "learning_rate": 1.9074457312978373e-05, "loss": 1.4931, "step": 3471 }, { "epoch": 0.18982272460123287, "grad_norm": 1.456217885017395, "learning_rate": 1.9073689362031504e-05, "loss": 1.2658, "step": 3472 }, { "epoch": 0.1898773970449544, "grad_norm": 1.3297349214553833, "learning_rate": 1.907292110809286e-05, "loss": 1.3332, "step": 3473 }, { "epoch": 0.18993206948867597, "grad_norm": 1.5923540592193604, "learning_rate": 1.9072152551188085e-05, "loss": 1.2778, "step": 3474 }, { "epoch": 0.18998674193239753, "grad_norm": 1.8086700439453125, "learning_rate": 1.907138369134285e-05, "loss": 1.4527, "step": 3475 }, { "epoch": 0.19004141437611907, "grad_norm": 1.874176025390625, "learning_rate": 1.907061452858283e-05, "loss": 1.344, "step": 3476 }, { "epoch": 0.19009608681984064, "grad_norm": 1.2970778942108154, "learning_rate": 1.90698450629337e-05, "loss": 1.6439, "step": 3477 }, { "epoch": 0.19015075926356217, "grad_norm": 1.6305211782455444, "learning_rate": 1.9069075294421163e-05, "loss": 1.2608, "step": 3478 }, { "epoch": 0.19020543170728374, "grad_norm": 1.3322595357894897, "learning_rate": 1.9068305223070923e-05, "loss": 1.3587, "step": 3479 }, { "epoch": 0.19026010415100528, "grad_norm": 2.031468152999878, "learning_rate": 1.9067534848908688e-05, "loss": 1.2046, "step": 3480 }, { "epoch": 0.19031477659472684, "grad_norm": 1.6843562126159668, "learning_rate": 1.9066764171960187e-05, "loss": 1.4134, "step": 3481 }, { "epoch": 0.1903694490384484, "grad_norm": 1.4679683446884155, "learning_rate": 1.9065993192251157e-05, "loss": 1.5224, "step": 3482 }, { "epoch": 0.19042412148216994, "grad_norm": 1.5888160467147827, "learning_rate": 1.9065221909807337e-05, "loss": 1.4391, "step": 3483 }, { "epoch": 0.1904787939258915, "grad_norm": 1.2336759567260742, "learning_rate": 1.906445032465449e-05, "loss": 1.7041, "step": 3484 }, { "epoch": 0.19053346636961305, "grad_norm": 1.3987784385681152, "learning_rate": 1.9063678436818372e-05, "loss": 1.4673, "step": 3485 }, { "epoch": 0.1905881388133346, "grad_norm": 1.5360193252563477, "learning_rate": 1.9062906246324767e-05, "loss": 1.4025, "step": 3486 }, { "epoch": 0.19064281125705615, "grad_norm": 1.3209540843963623, "learning_rate": 1.9062133753199452e-05, "loss": 1.3712, "step": 3487 }, { "epoch": 0.19069748370077771, "grad_norm": 1.4606382846832275, "learning_rate": 1.906136095746823e-05, "loss": 1.392, "step": 3488 }, { "epoch": 0.19075215614449928, "grad_norm": 1.3567070960998535, "learning_rate": 1.90605878591569e-05, "loss": 1.4603, "step": 3489 }, { "epoch": 0.19080682858822082, "grad_norm": 1.3107552528381348, "learning_rate": 1.9059814458291277e-05, "loss": 1.3474, "step": 3490 }, { "epoch": 0.19086150103194238, "grad_norm": 1.237260103225708, "learning_rate": 1.9059040754897194e-05, "loss": 1.7122, "step": 3491 }, { "epoch": 0.19091617347566392, "grad_norm": 1.5877602100372314, "learning_rate": 1.905826674900048e-05, "loss": 1.6519, "step": 3492 }, { "epoch": 0.19097084591938548, "grad_norm": 1.4746099710464478, "learning_rate": 1.9057492440626983e-05, "loss": 1.2991, "step": 3493 }, { "epoch": 0.19102551836310702, "grad_norm": 1.4165269136428833, "learning_rate": 1.9056717829802562e-05, "loss": 1.3291, "step": 3494 }, { "epoch": 0.1910801908068286, "grad_norm": 1.2510476112365723, "learning_rate": 1.9055942916553078e-05, "loss": 1.4346, "step": 3495 }, { "epoch": 0.19113486325055015, "grad_norm": 1.4094207286834717, "learning_rate": 1.9055167700904412e-05, "loss": 1.2842, "step": 3496 }, { "epoch": 0.1911895356942717, "grad_norm": 1.236383080482483, "learning_rate": 1.9054392182882446e-05, "loss": 1.663, "step": 3497 }, { "epoch": 0.19124420813799325, "grad_norm": 1.6153544187545776, "learning_rate": 1.905361636251308e-05, "loss": 1.4864, "step": 3498 }, { "epoch": 0.1912988805817148, "grad_norm": 1.3860095739364624, "learning_rate": 1.9052840239822218e-05, "loss": 1.2622, "step": 3499 }, { "epoch": 0.19135355302543636, "grad_norm": 1.1541591882705688, "learning_rate": 1.9052063814835774e-05, "loss": 1.5016, "step": 3500 }, { "epoch": 0.1914082254691579, "grad_norm": 1.7839523553848267, "learning_rate": 1.905128708757968e-05, "loss": 1.421, "step": 3501 }, { "epoch": 0.19146289791287946, "grad_norm": 1.4300416707992554, "learning_rate": 1.905051005807987e-05, "loss": 1.3958, "step": 3502 }, { "epoch": 0.19151757035660102, "grad_norm": 1.6532130241394043, "learning_rate": 1.9049732726362295e-05, "loss": 1.3988, "step": 3503 }, { "epoch": 0.19157224280032256, "grad_norm": 1.2954274415969849, "learning_rate": 1.9048955092452905e-05, "loss": 1.4647, "step": 3504 }, { "epoch": 0.19162691524404413, "grad_norm": 1.4941835403442383, "learning_rate": 1.9048177156377673e-05, "loss": 1.4677, "step": 3505 }, { "epoch": 0.19168158768776566, "grad_norm": 1.7709826231002808, "learning_rate": 1.904739891816257e-05, "loss": 1.2561, "step": 3506 }, { "epoch": 0.19173626013148723, "grad_norm": 1.6178646087646484, "learning_rate": 1.9046620377833588e-05, "loss": 1.2287, "step": 3507 }, { "epoch": 0.19179093257520877, "grad_norm": 1.3711456060409546, "learning_rate": 1.9045841535416724e-05, "loss": 1.6253, "step": 3508 }, { "epoch": 0.19184560501893033, "grad_norm": 1.5640515089035034, "learning_rate": 1.9045062390937985e-05, "loss": 1.1276, "step": 3509 }, { "epoch": 0.1919002774626519, "grad_norm": 1.2777724266052246, "learning_rate": 1.9044282944423387e-05, "loss": 1.3505, "step": 3510 }, { "epoch": 0.19195494990637343, "grad_norm": 1.7599387168884277, "learning_rate": 1.9043503195898957e-05, "loss": 1.2705, "step": 3511 }, { "epoch": 0.192009622350095, "grad_norm": 1.1618375778198242, "learning_rate": 1.9042723145390738e-05, "loss": 1.3928, "step": 3512 }, { "epoch": 0.19206429479381654, "grad_norm": 1.2789753675460815, "learning_rate": 1.9041942792924767e-05, "loss": 1.3416, "step": 3513 }, { "epoch": 0.1921189672375381, "grad_norm": 1.3414247035980225, "learning_rate": 1.9041162138527115e-05, "loss": 1.4565, "step": 3514 }, { "epoch": 0.19217363968125964, "grad_norm": 1.3503068685531616, "learning_rate": 1.904038118222384e-05, "loss": 1.5928, "step": 3515 }, { "epoch": 0.1922283121249812, "grad_norm": 1.154059886932373, "learning_rate": 1.9039599924041026e-05, "loss": 1.3738, "step": 3516 }, { "epoch": 0.19228298456870277, "grad_norm": 1.2842415571212769, "learning_rate": 1.903881836400476e-05, "loss": 1.5672, "step": 3517 }, { "epoch": 0.1923376570124243, "grad_norm": 1.5993976593017578, "learning_rate": 1.9038036502141133e-05, "loss": 1.2484, "step": 3518 }, { "epoch": 0.19239232945614587, "grad_norm": 1.3954213857650757, "learning_rate": 1.9037254338476262e-05, "loss": 1.3851, "step": 3519 }, { "epoch": 0.1924470018998674, "grad_norm": 1.6530699729919434, "learning_rate": 1.903647187303626e-05, "loss": 1.2829, "step": 3520 }, { "epoch": 0.19250167434358897, "grad_norm": 1.5049116611480713, "learning_rate": 1.9035689105847258e-05, "loss": 1.4823, "step": 3521 }, { "epoch": 0.19255634678731054, "grad_norm": 1.8590201139450073, "learning_rate": 1.9034906036935393e-05, "loss": 1.4584, "step": 3522 }, { "epoch": 0.19261101923103208, "grad_norm": 1.461620807647705, "learning_rate": 1.9034122666326818e-05, "loss": 1.163, "step": 3523 }, { "epoch": 0.19266569167475364, "grad_norm": 1.4524924755096436, "learning_rate": 1.903333899404769e-05, "loss": 1.7013, "step": 3524 }, { "epoch": 0.19272036411847518, "grad_norm": 1.8037631511688232, "learning_rate": 1.903255502012417e-05, "loss": 1.4221, "step": 3525 }, { "epoch": 0.19277503656219674, "grad_norm": 1.196021556854248, "learning_rate": 1.9031770744582443e-05, "loss": 1.4767, "step": 3526 }, { "epoch": 0.19282970900591828, "grad_norm": 2.1045072078704834, "learning_rate": 1.9030986167448696e-05, "loss": 1.443, "step": 3527 }, { "epoch": 0.19288438144963985, "grad_norm": 1.3939933776855469, "learning_rate": 1.9030201288749133e-05, "loss": 1.6747, "step": 3528 }, { "epoch": 0.1929390538933614, "grad_norm": 1.7488393783569336, "learning_rate": 1.9029416108509953e-05, "loss": 1.4181, "step": 3529 }, { "epoch": 0.19299372633708295, "grad_norm": 1.6247669458389282, "learning_rate": 1.902863062675739e-05, "loss": 1.4033, "step": 3530 }, { "epoch": 0.19304839878080451, "grad_norm": 1.548336386680603, "learning_rate": 1.9027844843517657e-05, "loss": 1.4127, "step": 3531 }, { "epoch": 0.19310307122452605, "grad_norm": 1.697486400604248, "learning_rate": 1.9027058758817e-05, "loss": 1.5071, "step": 3532 }, { "epoch": 0.19315774366824762, "grad_norm": 1.5478111505508423, "learning_rate": 1.9026272372681674e-05, "loss": 1.3489, "step": 3533 }, { "epoch": 0.19321241611196915, "grad_norm": 1.745615005493164, "learning_rate": 1.902548568513793e-05, "loss": 1.246, "step": 3534 }, { "epoch": 0.19326708855569072, "grad_norm": 1.5120642185211182, "learning_rate": 1.9024698696212035e-05, "loss": 1.3853, "step": 3535 }, { "epoch": 0.19332176099941228, "grad_norm": 1.426313877105713, "learning_rate": 1.902391140593028e-05, "loss": 1.4397, "step": 3536 }, { "epoch": 0.19337643344313382, "grad_norm": 1.8839517831802368, "learning_rate": 1.9023123814318943e-05, "loss": 1.3838, "step": 3537 }, { "epoch": 0.1934311058868554, "grad_norm": 1.7655154466629028, "learning_rate": 1.9022335921404332e-05, "loss": 1.2923, "step": 3538 }, { "epoch": 0.19348577833057692, "grad_norm": 1.5052545070648193, "learning_rate": 1.9021547727212753e-05, "loss": 1.661, "step": 3539 }, { "epoch": 0.1935404507742985, "grad_norm": 1.5813435316085815, "learning_rate": 1.9020759231770526e-05, "loss": 1.4584, "step": 3540 }, { "epoch": 0.19359512321802003, "grad_norm": 1.1654460430145264, "learning_rate": 1.9019970435103978e-05, "loss": 1.5621, "step": 3541 }, { "epoch": 0.1936497956617416, "grad_norm": 1.6133919954299927, "learning_rate": 1.9019181337239453e-05, "loss": 1.1915, "step": 3542 }, { "epoch": 0.19370446810546316, "grad_norm": 1.726674199104309, "learning_rate": 1.9018391938203294e-05, "loss": 1.3603, "step": 3543 }, { "epoch": 0.1937591405491847, "grad_norm": 1.2422715425491333, "learning_rate": 1.9017602238021873e-05, "loss": 1.1791, "step": 3544 }, { "epoch": 0.19381381299290626, "grad_norm": 1.687167763710022, "learning_rate": 1.9016812236721548e-05, "loss": 1.4236, "step": 3545 }, { "epoch": 0.1938684854366278, "grad_norm": 1.8659069538116455, "learning_rate": 1.9016021934328708e-05, "loss": 1.1816, "step": 3546 }, { "epoch": 0.19392315788034936, "grad_norm": 1.4493554830551147, "learning_rate": 1.9015231330869736e-05, "loss": 1.2336, "step": 3547 }, { "epoch": 0.1939778303240709, "grad_norm": 1.6924666166305542, "learning_rate": 1.9014440426371034e-05, "loss": 1.3901, "step": 3548 }, { "epoch": 0.19403250276779246, "grad_norm": 1.5975761413574219, "learning_rate": 1.9013649220859017e-05, "loss": 1.4915, "step": 3549 }, { "epoch": 0.19408717521151403, "grad_norm": 1.608150839805603, "learning_rate": 1.9012857714360094e-05, "loss": 1.5536, "step": 3550 }, { "epoch": 0.19414184765523557, "grad_norm": 1.3373693227767944, "learning_rate": 1.901206590690071e-05, "loss": 1.4198, "step": 3551 }, { "epoch": 0.19419652009895713, "grad_norm": 1.5678069591522217, "learning_rate": 1.9011273798507295e-05, "loss": 1.5498, "step": 3552 }, { "epoch": 0.19425119254267867, "grad_norm": 1.4582668542861938, "learning_rate": 1.90104813892063e-05, "loss": 1.6297, "step": 3553 }, { "epoch": 0.19430586498640023, "grad_norm": 1.9935369491577148, "learning_rate": 1.900968867902419e-05, "loss": 1.4858, "step": 3554 }, { "epoch": 0.19436053743012177, "grad_norm": 1.5702918767929077, "learning_rate": 1.9008895667987434e-05, "loss": 1.5229, "step": 3555 }, { "epoch": 0.19441520987384334, "grad_norm": 1.7273075580596924, "learning_rate": 1.9008102356122515e-05, "loss": 1.5955, "step": 3556 }, { "epoch": 0.1944698823175649, "grad_norm": 1.4738045930862427, "learning_rate": 1.9007308743455914e-05, "loss": 1.2999, "step": 3557 }, { "epoch": 0.19452455476128644, "grad_norm": 1.724118947982788, "learning_rate": 1.900651483001414e-05, "loss": 1.2627, "step": 3558 }, { "epoch": 0.194579227205008, "grad_norm": 1.2406411170959473, "learning_rate": 1.9005720615823698e-05, "loss": 1.3975, "step": 3559 }, { "epoch": 0.19463389964872954, "grad_norm": 1.1369684934616089, "learning_rate": 1.9004926100911117e-05, "loss": 1.4591, "step": 3560 }, { "epoch": 0.1946885720924511, "grad_norm": 2.447732925415039, "learning_rate": 1.9004131285302924e-05, "loss": 1.4811, "step": 3561 }, { "epoch": 0.19474324453617264, "grad_norm": 1.3416129350662231, "learning_rate": 1.9003336169025655e-05, "loss": 1.3733, "step": 3562 }, { "epoch": 0.1947979169798942, "grad_norm": 1.5356684923171997, "learning_rate": 1.9002540752105862e-05, "loss": 1.5138, "step": 3563 }, { "epoch": 0.19485258942361577, "grad_norm": 1.3914995193481445, "learning_rate": 1.9001745034570113e-05, "loss": 1.5619, "step": 3564 }, { "epoch": 0.1949072618673373, "grad_norm": 1.9595906734466553, "learning_rate": 1.9000949016444972e-05, "loss": 1.3326, "step": 3565 }, { "epoch": 0.19496193431105888, "grad_norm": 1.7005250453948975, "learning_rate": 1.900015269775702e-05, "loss": 1.3786, "step": 3566 }, { "epoch": 0.1950166067547804, "grad_norm": 1.535401463508606, "learning_rate": 1.8999356078532852e-05, "loss": 1.4122, "step": 3567 }, { "epoch": 0.19507127919850198, "grad_norm": 1.4529832601547241, "learning_rate": 1.899855915879907e-05, "loss": 1.293, "step": 3568 }, { "epoch": 0.19512595164222352, "grad_norm": 1.573042631149292, "learning_rate": 1.8997761938582277e-05, "loss": 1.5817, "step": 3569 }, { "epoch": 0.19518062408594508, "grad_norm": 1.6344484090805054, "learning_rate": 1.89969644179091e-05, "loss": 1.5192, "step": 3570 }, { "epoch": 0.19523529652966665, "grad_norm": 1.365675687789917, "learning_rate": 1.899616659680617e-05, "loss": 1.5458, "step": 3571 }, { "epoch": 0.19528996897338818, "grad_norm": 1.5162514448165894, "learning_rate": 1.8995368475300128e-05, "loss": 1.3332, "step": 3572 }, { "epoch": 0.19534464141710975, "grad_norm": 2.06677508354187, "learning_rate": 1.8994570053417622e-05, "loss": 1.2928, "step": 3573 }, { "epoch": 0.1953993138608313, "grad_norm": 1.1660741567611694, "learning_rate": 1.8993771331185317e-05, "loss": 1.5464, "step": 3574 }, { "epoch": 0.19545398630455285, "grad_norm": 1.279827356338501, "learning_rate": 1.899297230862988e-05, "loss": 1.4204, "step": 3575 }, { "epoch": 0.1955086587482744, "grad_norm": 1.4575167894363403, "learning_rate": 1.8992172985778002e-05, "loss": 1.4256, "step": 3576 }, { "epoch": 0.19556333119199595, "grad_norm": 1.7247871160507202, "learning_rate": 1.899137336265636e-05, "loss": 1.4055, "step": 3577 }, { "epoch": 0.19561800363571752, "grad_norm": 1.2830740213394165, "learning_rate": 1.8990573439291666e-05, "loss": 1.575, "step": 3578 }, { "epoch": 0.19567267607943906, "grad_norm": 1.8127894401550293, "learning_rate": 1.8989773215710627e-05, "loss": 1.4584, "step": 3579 }, { "epoch": 0.19572734852316062, "grad_norm": 1.3836473226547241, "learning_rate": 1.8988972691939965e-05, "loss": 1.5651, "step": 3580 }, { "epoch": 0.19578202096688216, "grad_norm": 1.3603134155273438, "learning_rate": 1.898817186800641e-05, "loss": 1.5332, "step": 3581 }, { "epoch": 0.19583669341060372, "grad_norm": 1.279068112373352, "learning_rate": 1.8987370743936707e-05, "loss": 1.4204, "step": 3582 }, { "epoch": 0.19589136585432526, "grad_norm": 1.5787177085876465, "learning_rate": 1.8986569319757605e-05, "loss": 1.4874, "step": 3583 }, { "epoch": 0.19594603829804683, "grad_norm": 1.216514229774475, "learning_rate": 1.8985767595495868e-05, "loss": 1.5369, "step": 3584 }, { "epoch": 0.1960007107417684, "grad_norm": 1.4148777723312378, "learning_rate": 1.898496557117826e-05, "loss": 1.5947, "step": 3585 }, { "epoch": 0.19605538318548993, "grad_norm": 1.1819933652877808, "learning_rate": 1.898416324683157e-05, "loss": 1.5593, "step": 3586 }, { "epoch": 0.1961100556292115, "grad_norm": 1.8224732875823975, "learning_rate": 1.898336062248259e-05, "loss": 1.4669, "step": 3587 }, { "epoch": 0.19616472807293303, "grad_norm": 1.643369436264038, "learning_rate": 1.8982557698158114e-05, "loss": 1.2962, "step": 3588 }, { "epoch": 0.1962194005166546, "grad_norm": 1.6131287813186646, "learning_rate": 1.8981754473884962e-05, "loss": 1.4241, "step": 3589 }, { "epoch": 0.19627407296037613, "grad_norm": 1.0162503719329834, "learning_rate": 1.8980950949689952e-05, "loss": 1.5035, "step": 3590 }, { "epoch": 0.1963287454040977, "grad_norm": 1.3955408334732056, "learning_rate": 1.8980147125599912e-05, "loss": 1.5517, "step": 3591 }, { "epoch": 0.19638341784781926, "grad_norm": 1.6319166421890259, "learning_rate": 1.897934300164169e-05, "loss": 1.4157, "step": 3592 }, { "epoch": 0.1964380902915408, "grad_norm": 1.5124062299728394, "learning_rate": 1.897853857784213e-05, "loss": 1.4636, "step": 3593 }, { "epoch": 0.19649276273526237, "grad_norm": 1.9498225450515747, "learning_rate": 1.8977733854228102e-05, "loss": 1.5532, "step": 3594 }, { "epoch": 0.1965474351789839, "grad_norm": 1.085471510887146, "learning_rate": 1.8976928830826474e-05, "loss": 1.6332, "step": 3595 }, { "epoch": 0.19660210762270547, "grad_norm": 1.5449944734573364, "learning_rate": 1.8976123507664127e-05, "loss": 1.6004, "step": 3596 }, { "epoch": 0.196656780066427, "grad_norm": 1.44468092918396, "learning_rate": 1.897531788476795e-05, "loss": 1.3287, "step": 3597 }, { "epoch": 0.19671145251014857, "grad_norm": 1.1073545217514038, "learning_rate": 1.8974511962164853e-05, "loss": 1.4568, "step": 3598 }, { "epoch": 0.19676612495387014, "grad_norm": 1.2640622854232788, "learning_rate": 1.8973705739881736e-05, "loss": 1.6236, "step": 3599 }, { "epoch": 0.19682079739759167, "grad_norm": 1.5666803121566772, "learning_rate": 1.897289921794553e-05, "loss": 1.5291, "step": 3600 }, { "epoch": 0.19687546984131324, "grad_norm": 1.6166154146194458, "learning_rate": 1.8972092396383165e-05, "loss": 1.591, "step": 3601 }, { "epoch": 0.19693014228503478, "grad_norm": 1.165730357170105, "learning_rate": 1.897128527522158e-05, "loss": 1.3106, "step": 3602 }, { "epoch": 0.19698481472875634, "grad_norm": 1.1791702508926392, "learning_rate": 1.8970477854487726e-05, "loss": 1.472, "step": 3603 }, { "epoch": 0.19703948717247788, "grad_norm": 1.451081395149231, "learning_rate": 1.896967013420857e-05, "loss": 1.4462, "step": 3604 }, { "epoch": 0.19709415961619944, "grad_norm": 1.2576658725738525, "learning_rate": 1.8968862114411078e-05, "loss": 1.4962, "step": 3605 }, { "epoch": 0.197148832059921, "grad_norm": 1.4439971446990967, "learning_rate": 1.8968053795122232e-05, "loss": 1.7494, "step": 3606 }, { "epoch": 0.19720350450364255, "grad_norm": 1.2454676628112793, "learning_rate": 1.8967245176369028e-05, "loss": 1.4721, "step": 3607 }, { "epoch": 0.1972581769473641, "grad_norm": 1.2383203506469727, "learning_rate": 1.8966436258178465e-05, "loss": 1.4864, "step": 3608 }, { "epoch": 0.19731284939108565, "grad_norm": 1.6814484596252441, "learning_rate": 1.8965627040577558e-05, "loss": 1.2617, "step": 3609 }, { "epoch": 0.1973675218348072, "grad_norm": 1.5992095470428467, "learning_rate": 1.896481752359332e-05, "loss": 1.3493, "step": 3610 }, { "epoch": 0.19742219427852875, "grad_norm": 1.4973344802856445, "learning_rate": 1.896400770725279e-05, "loss": 1.4276, "step": 3611 }, { "epoch": 0.19747686672225032, "grad_norm": 1.4460231065750122, "learning_rate": 1.896319759158301e-05, "loss": 1.6728, "step": 3612 }, { "epoch": 0.19753153916597188, "grad_norm": 1.2865216732025146, "learning_rate": 1.896238717661103e-05, "loss": 1.4538, "step": 3613 }, { "epoch": 0.19758621160969342, "grad_norm": 1.7066247463226318, "learning_rate": 1.8961576462363908e-05, "loss": 1.1836, "step": 3614 }, { "epoch": 0.19764088405341498, "grad_norm": 1.5897430181503296, "learning_rate": 1.896076544886872e-05, "loss": 1.2105, "step": 3615 }, { "epoch": 0.19769555649713652, "grad_norm": 1.1952685117721558, "learning_rate": 1.8959954136152546e-05, "loss": 1.5938, "step": 3616 }, { "epoch": 0.19775022894085809, "grad_norm": 1.6015760898590088, "learning_rate": 1.8959142524242482e-05, "loss": 1.111, "step": 3617 }, { "epoch": 0.19780490138457965, "grad_norm": 1.4797332286834717, "learning_rate": 1.8958330613165622e-05, "loss": 1.2206, "step": 3618 }, { "epoch": 0.1978595738283012, "grad_norm": 1.2955517768859863, "learning_rate": 1.8957518402949082e-05, "loss": 1.6233, "step": 3619 }, { "epoch": 0.19791424627202275, "grad_norm": 1.7566242218017578, "learning_rate": 1.8956705893619984e-05, "loss": 1.5976, "step": 3620 }, { "epoch": 0.1979689187157443, "grad_norm": 1.6478511095046997, "learning_rate": 1.895589308520546e-05, "loss": 1.3271, "step": 3621 }, { "epoch": 0.19802359115946586, "grad_norm": 1.420523762702942, "learning_rate": 1.895507997773265e-05, "loss": 1.2185, "step": 3622 }, { "epoch": 0.1980782636031874, "grad_norm": 1.4044442176818848, "learning_rate": 1.8954266571228702e-05, "loss": 1.703, "step": 3623 }, { "epoch": 0.19813293604690896, "grad_norm": 1.3137481212615967, "learning_rate": 1.8953452865720784e-05, "loss": 1.4067, "step": 3624 }, { "epoch": 0.19818760849063052, "grad_norm": 1.9960193634033203, "learning_rate": 1.8952638861236066e-05, "loss": 1.6268, "step": 3625 }, { "epoch": 0.19824228093435206, "grad_norm": 1.6042652130126953, "learning_rate": 1.8951824557801726e-05, "loss": 1.2808, "step": 3626 }, { "epoch": 0.19829695337807363, "grad_norm": 1.8145582675933838, "learning_rate": 1.895100995544496e-05, "loss": 1.1054, "step": 3627 }, { "epoch": 0.19835162582179516, "grad_norm": 1.2705672979354858, "learning_rate": 1.8950195054192965e-05, "loss": 1.5827, "step": 3628 }, { "epoch": 0.19840629826551673, "grad_norm": 1.3097318410873413, "learning_rate": 1.8949379854072954e-05, "loss": 1.2882, "step": 3629 }, { "epoch": 0.19846097070923827, "grad_norm": 1.3375831842422485, "learning_rate": 1.8948564355112154e-05, "loss": 1.7397, "step": 3630 }, { "epoch": 0.19851564315295983, "grad_norm": 1.6289849281311035, "learning_rate": 1.8947748557337792e-05, "loss": 1.32, "step": 3631 }, { "epoch": 0.1985703155966814, "grad_norm": 1.4239858388900757, "learning_rate": 1.8946932460777105e-05, "loss": 1.4885, "step": 3632 }, { "epoch": 0.19862498804040293, "grad_norm": 1.3403804302215576, "learning_rate": 1.894611606545735e-05, "loss": 1.6135, "step": 3633 }, { "epoch": 0.1986796604841245, "grad_norm": 2.4180774688720703, "learning_rate": 1.8945299371405784e-05, "loss": 1.395, "step": 3634 }, { "epoch": 0.19873433292784604, "grad_norm": 1.9239970445632935, "learning_rate": 1.8944482378649686e-05, "loss": 1.4009, "step": 3635 }, { "epoch": 0.1987890053715676, "grad_norm": 1.604104995727539, "learning_rate": 1.8943665087216327e-05, "loss": 1.4228, "step": 3636 }, { "epoch": 0.19884367781528914, "grad_norm": 1.2311241626739502, "learning_rate": 1.8942847497133008e-05, "loss": 1.6161, "step": 3637 }, { "epoch": 0.1988983502590107, "grad_norm": 1.1339466571807861, "learning_rate": 1.8942029608427027e-05, "loss": 1.4139, "step": 3638 }, { "epoch": 0.19895302270273227, "grad_norm": 1.4545639753341675, "learning_rate": 1.894121142112569e-05, "loss": 1.594, "step": 3639 }, { "epoch": 0.1990076951464538, "grad_norm": 1.553495168685913, "learning_rate": 1.8940392935256325e-05, "loss": 1.2729, "step": 3640 }, { "epoch": 0.19906236759017537, "grad_norm": 1.56368887424469, "learning_rate": 1.8939574150846264e-05, "loss": 1.307, "step": 3641 }, { "epoch": 0.1991170400338969, "grad_norm": 1.4030667543411255, "learning_rate": 1.893875506792284e-05, "loss": 1.43, "step": 3642 }, { "epoch": 0.19917171247761847, "grad_norm": 1.3857313394546509, "learning_rate": 1.893793568651341e-05, "loss": 1.4584, "step": 3643 }, { "epoch": 0.19922638492134, "grad_norm": 1.8635960817337036, "learning_rate": 1.8937116006645332e-05, "loss": 1.3862, "step": 3644 }, { "epoch": 0.19928105736506158, "grad_norm": 1.3111708164215088, "learning_rate": 1.893629602834598e-05, "loss": 1.4951, "step": 3645 }, { "epoch": 0.19933572980878314, "grad_norm": 1.583883285522461, "learning_rate": 1.8935475751642736e-05, "loss": 1.4458, "step": 3646 }, { "epoch": 0.19939040225250468, "grad_norm": 1.2436323165893555, "learning_rate": 1.8934655176562988e-05, "loss": 1.6105, "step": 3647 }, { "epoch": 0.19944507469622624, "grad_norm": 1.4434911012649536, "learning_rate": 1.8933834303134136e-05, "loss": 1.379, "step": 3648 }, { "epoch": 0.19949974713994778, "grad_norm": 1.8330777883529663, "learning_rate": 1.8933013131383594e-05, "loss": 1.272, "step": 3649 }, { "epoch": 0.19955441958366935, "grad_norm": 1.5622930526733398, "learning_rate": 1.8932191661338785e-05, "loss": 1.3911, "step": 3650 }, { "epoch": 0.19960909202739088, "grad_norm": 1.326177954673767, "learning_rate": 1.893136989302713e-05, "loss": 1.2752, "step": 3651 }, { "epoch": 0.19966376447111245, "grad_norm": 1.7779123783111572, "learning_rate": 1.893054782647608e-05, "loss": 1.5856, "step": 3652 }, { "epoch": 0.199718436914834, "grad_norm": 1.6006968021392822, "learning_rate": 1.8929725461713083e-05, "loss": 1.4145, "step": 3653 }, { "epoch": 0.19977310935855555, "grad_norm": 1.6676241159439087, "learning_rate": 1.8928902798765594e-05, "loss": 1.5313, "step": 3654 }, { "epoch": 0.19982778180227712, "grad_norm": 2.2306976318359375, "learning_rate": 1.8928079837661092e-05, "loss": 1.4173, "step": 3655 }, { "epoch": 0.19988245424599865, "grad_norm": 1.65680992603302, "learning_rate": 1.8927256578427054e-05, "loss": 1.36, "step": 3656 }, { "epoch": 0.19993712668972022, "grad_norm": 1.4402161836624146, "learning_rate": 1.8926433021090967e-05, "loss": 1.5038, "step": 3657 }, { "epoch": 0.19999179913344176, "grad_norm": 1.6246148347854614, "learning_rate": 1.8925609165680338e-05, "loss": 1.5583, "step": 3658 }, { "epoch": 0.20004647157716332, "grad_norm": 1.5718811750411987, "learning_rate": 1.8924785012222676e-05, "loss": 1.4592, "step": 3659 }, { "epoch": 0.20010114402088489, "grad_norm": 1.5442537069320679, "learning_rate": 1.8923960560745495e-05, "loss": 1.4565, "step": 3660 }, { "epoch": 0.20015581646460642, "grad_norm": 1.4112074375152588, "learning_rate": 1.8923135811276333e-05, "loss": 1.6372, "step": 3661 }, { "epoch": 0.200210488908328, "grad_norm": 1.4120440483093262, "learning_rate": 1.8922310763842725e-05, "loss": 1.4295, "step": 3662 }, { "epoch": 0.20026516135204953, "grad_norm": 1.5614105463027954, "learning_rate": 1.8921485418472227e-05, "loss": 1.3072, "step": 3663 }, { "epoch": 0.2003198337957711, "grad_norm": 1.4184463024139404, "learning_rate": 1.8920659775192394e-05, "loss": 1.3963, "step": 3664 }, { "epoch": 0.20037450623949263, "grad_norm": 1.5909239053726196, "learning_rate": 1.89198338340308e-05, "loss": 1.3698, "step": 3665 }, { "epoch": 0.2004291786832142, "grad_norm": 1.1285879611968994, "learning_rate": 1.891900759501502e-05, "loss": 1.5695, "step": 3666 }, { "epoch": 0.20048385112693576, "grad_norm": 1.4025468826293945, "learning_rate": 1.891818105817265e-05, "loss": 1.3638, "step": 3667 }, { "epoch": 0.2005385235706573, "grad_norm": 1.5102771520614624, "learning_rate": 1.8917354223531287e-05, "loss": 1.3424, "step": 3668 }, { "epoch": 0.20059319601437886, "grad_norm": 1.3112741708755493, "learning_rate": 1.891652709111854e-05, "loss": 1.5003, "step": 3669 }, { "epoch": 0.2006478684581004, "grad_norm": 1.362791895866394, "learning_rate": 1.891569966096203e-05, "loss": 1.4495, "step": 3670 }, { "epoch": 0.20070254090182196, "grad_norm": 1.0829306840896606, "learning_rate": 1.891487193308939e-05, "loss": 1.5301, "step": 3671 }, { "epoch": 0.2007572133455435, "grad_norm": 1.3548589944839478, "learning_rate": 1.8914043907528254e-05, "loss": 1.3235, "step": 3672 }, { "epoch": 0.20081188578926507, "grad_norm": 1.6331127882003784, "learning_rate": 1.8913215584306276e-05, "loss": 1.3315, "step": 3673 }, { "epoch": 0.20086655823298663, "grad_norm": 1.3591171503067017, "learning_rate": 1.891238696345111e-05, "loss": 1.4314, "step": 3674 }, { "epoch": 0.20092123067670817, "grad_norm": 1.3038084506988525, "learning_rate": 1.8911558044990435e-05, "loss": 1.5545, "step": 3675 }, { "epoch": 0.20097590312042973, "grad_norm": 1.392406940460205, "learning_rate": 1.8910728828951926e-05, "loss": 1.2136, "step": 3676 }, { "epoch": 0.20103057556415127, "grad_norm": 1.4099425077438354, "learning_rate": 1.8909899315363265e-05, "loss": 1.3642, "step": 3677 }, { "epoch": 0.20108524800787284, "grad_norm": 1.45512056350708, "learning_rate": 1.8909069504252165e-05, "loss": 1.6153, "step": 3678 }, { "epoch": 0.20113992045159437, "grad_norm": 1.4020369052886963, "learning_rate": 1.8908239395646325e-05, "loss": 1.6504, "step": 3679 }, { "epoch": 0.20119459289531594, "grad_norm": 1.3489582538604736, "learning_rate": 1.8907408989573467e-05, "loss": 1.3471, "step": 3680 }, { "epoch": 0.2012492653390375, "grad_norm": 1.3362295627593994, "learning_rate": 1.8906578286061325e-05, "loss": 1.3158, "step": 3681 }, { "epoch": 0.20130393778275904, "grad_norm": 1.5160341262817383, "learning_rate": 1.890574728513763e-05, "loss": 1.6552, "step": 3682 }, { "epoch": 0.2013586102264806, "grad_norm": 1.2860592603683472, "learning_rate": 1.8904915986830135e-05, "loss": 1.3063, "step": 3683 }, { "epoch": 0.20141328267020214, "grad_norm": 1.143584132194519, "learning_rate": 1.89040843911666e-05, "loss": 1.599, "step": 3684 }, { "epoch": 0.2014679551139237, "grad_norm": 1.3867604732513428, "learning_rate": 1.8903252498174796e-05, "loss": 1.5512, "step": 3685 }, { "epoch": 0.20152262755764525, "grad_norm": 1.2057185173034668, "learning_rate": 1.8902420307882495e-05, "loss": 1.3755, "step": 3686 }, { "epoch": 0.2015773000013668, "grad_norm": 1.4388843774795532, "learning_rate": 1.8901587820317494e-05, "loss": 1.6606, "step": 3687 }, { "epoch": 0.20163197244508838, "grad_norm": 1.4434138536453247, "learning_rate": 1.890075503550758e-05, "loss": 1.6346, "step": 3688 }, { "epoch": 0.2016866448888099, "grad_norm": 1.4637845754623413, "learning_rate": 1.8899921953480576e-05, "loss": 1.496, "step": 3689 }, { "epoch": 0.20174131733253148, "grad_norm": 1.2925022840499878, "learning_rate": 1.8899088574264293e-05, "loss": 1.6416, "step": 3690 }, { "epoch": 0.20179598977625302, "grad_norm": 1.1401957273483276, "learning_rate": 1.8898254897886558e-05, "loss": 1.5936, "step": 3691 }, { "epoch": 0.20185066221997458, "grad_norm": 1.4538167715072632, "learning_rate": 1.889742092437521e-05, "loss": 1.0465, "step": 3692 }, { "epoch": 0.20190533466369612, "grad_norm": 1.3105919361114502, "learning_rate": 1.8896586653758104e-05, "loss": 1.7184, "step": 3693 }, { "epoch": 0.20196000710741768, "grad_norm": 1.5169260501861572, "learning_rate": 1.889575208606309e-05, "loss": 1.461, "step": 3694 }, { "epoch": 0.20201467955113925, "grad_norm": 1.382385492324829, "learning_rate": 1.8894917221318038e-05, "loss": 1.3105, "step": 3695 }, { "epoch": 0.20206935199486079, "grad_norm": 1.9903322458267212, "learning_rate": 1.8894082059550828e-05, "loss": 1.38, "step": 3696 }, { "epoch": 0.20212402443858235, "grad_norm": 1.8864059448242188, "learning_rate": 1.889324660078935e-05, "loss": 1.289, "step": 3697 }, { "epoch": 0.2021786968823039, "grad_norm": 1.3311916589736938, "learning_rate": 1.8892410845061498e-05, "loss": 1.5069, "step": 3698 }, { "epoch": 0.20223336932602545, "grad_norm": 1.5354993343353271, "learning_rate": 1.889157479239518e-05, "loss": 1.2956, "step": 3699 }, { "epoch": 0.202288041769747, "grad_norm": 2.116053581237793, "learning_rate": 1.8890738442818317e-05, "loss": 1.3536, "step": 3700 }, { "epoch": 0.20234271421346856, "grad_norm": 1.133012056350708, "learning_rate": 1.8889901796358835e-05, "loss": 1.5158, "step": 3701 }, { "epoch": 0.20239738665719012, "grad_norm": 1.9086166620254517, "learning_rate": 1.888906485304467e-05, "loss": 1.4299, "step": 3702 }, { "epoch": 0.20245205910091166, "grad_norm": 1.6972253322601318, "learning_rate": 1.8888227612903768e-05, "loss": 1.5018, "step": 3703 }, { "epoch": 0.20250673154463322, "grad_norm": 1.776668667793274, "learning_rate": 1.888739007596409e-05, "loss": 1.3993, "step": 3704 }, { "epoch": 0.20256140398835476, "grad_norm": 1.7328027486801147, "learning_rate": 1.8886552242253607e-05, "loss": 1.2833, "step": 3705 }, { "epoch": 0.20261607643207633, "grad_norm": 1.5191830396652222, "learning_rate": 1.8885714111800288e-05, "loss": 1.4729, "step": 3706 }, { "epoch": 0.20267074887579786, "grad_norm": 1.9203743934631348, "learning_rate": 1.8884875684632124e-05, "loss": 1.4242, "step": 3707 }, { "epoch": 0.20272542131951943, "grad_norm": 1.3720148801803589, "learning_rate": 1.8884036960777115e-05, "loss": 1.5004, "step": 3708 }, { "epoch": 0.202780093763241, "grad_norm": 1.4357995986938477, "learning_rate": 1.888319794026326e-05, "loss": 1.1733, "step": 3709 }, { "epoch": 0.20283476620696253, "grad_norm": 1.4776078462600708, "learning_rate": 1.8882358623118584e-05, "loss": 1.7136, "step": 3710 }, { "epoch": 0.2028894386506841, "grad_norm": 1.4208924770355225, "learning_rate": 1.888151900937111e-05, "loss": 1.6863, "step": 3711 }, { "epoch": 0.20294411109440563, "grad_norm": 1.2624766826629639, "learning_rate": 1.8880679099048875e-05, "loss": 1.496, "step": 3712 }, { "epoch": 0.2029987835381272, "grad_norm": 1.2097829580307007, "learning_rate": 1.8879838892179924e-05, "loss": 1.4574, "step": 3713 }, { "epoch": 0.20305345598184874, "grad_norm": 1.5843030214309692, "learning_rate": 1.8878998388792315e-05, "loss": 1.545, "step": 3714 }, { "epoch": 0.2031081284255703, "grad_norm": 1.556699275970459, "learning_rate": 1.8878157588914118e-05, "loss": 1.5263, "step": 3715 }, { "epoch": 0.20316280086929187, "grad_norm": 1.6751394271850586, "learning_rate": 1.88773164925734e-05, "loss": 1.3171, "step": 3716 }, { "epoch": 0.2032174733130134, "grad_norm": 1.9434833526611328, "learning_rate": 1.8876475099798258e-05, "loss": 1.5369, "step": 3717 }, { "epoch": 0.20327214575673497, "grad_norm": 1.3659417629241943, "learning_rate": 1.887563341061678e-05, "loss": 1.4424, "step": 3718 }, { "epoch": 0.2033268182004565, "grad_norm": 1.2926150560379028, "learning_rate": 1.887479142505708e-05, "loss": 1.3064, "step": 3719 }, { "epoch": 0.20338149064417807, "grad_norm": 1.5679548978805542, "learning_rate": 1.8873949143147267e-05, "loss": 1.2424, "step": 3720 }, { "epoch": 0.20343616308789964, "grad_norm": 1.567453384399414, "learning_rate": 1.887310656491547e-05, "loss": 1.5612, "step": 3721 }, { "epoch": 0.20349083553162117, "grad_norm": 1.3919178247451782, "learning_rate": 1.8872263690389817e-05, "loss": 1.4712, "step": 3722 }, { "epoch": 0.20354550797534274, "grad_norm": 1.771597981452942, "learning_rate": 1.887142051959847e-05, "loss": 1.6155, "step": 3723 }, { "epoch": 0.20360018041906427, "grad_norm": 1.4725514650344849, "learning_rate": 1.8870577052569564e-05, "loss": 1.6122, "step": 3724 }, { "epoch": 0.20365485286278584, "grad_norm": 1.5934618711471558, "learning_rate": 1.886973328933128e-05, "loss": 1.5385, "step": 3725 }, { "epoch": 0.20370952530650738, "grad_norm": 1.4322035312652588, "learning_rate": 1.8868889229911787e-05, "loss": 1.4629, "step": 3726 }, { "epoch": 0.20376419775022894, "grad_norm": 1.3967394828796387, "learning_rate": 1.8868044874339274e-05, "loss": 1.3848, "step": 3727 }, { "epoch": 0.2038188701939505, "grad_norm": 1.5080569982528687, "learning_rate": 1.8867200222641927e-05, "loss": 1.4006, "step": 3728 }, { "epoch": 0.20387354263767204, "grad_norm": 1.741076111793518, "learning_rate": 1.8866355274847964e-05, "loss": 1.4622, "step": 3729 }, { "epoch": 0.2039282150813936, "grad_norm": 1.387157678604126, "learning_rate": 1.8865510030985588e-05, "loss": 1.5059, "step": 3730 }, { "epoch": 0.20398288752511515, "grad_norm": 1.5452383756637573, "learning_rate": 1.8864664491083032e-05, "loss": 1.4832, "step": 3731 }, { "epoch": 0.2040375599688367, "grad_norm": 1.61245858669281, "learning_rate": 1.8863818655168522e-05, "loss": 1.2599, "step": 3732 }, { "epoch": 0.20409223241255825, "grad_norm": 1.238616704940796, "learning_rate": 1.886297252327031e-05, "loss": 1.464, "step": 3733 }, { "epoch": 0.20414690485627981, "grad_norm": 1.6095294952392578, "learning_rate": 1.886212609541665e-05, "loss": 1.3784, "step": 3734 }, { "epoch": 0.20420157730000138, "grad_norm": 1.6736280918121338, "learning_rate": 1.8861279371635805e-05, "loss": 1.6548, "step": 3735 }, { "epoch": 0.20425624974372292, "grad_norm": 1.261812448501587, "learning_rate": 1.8860432351956044e-05, "loss": 1.4866, "step": 3736 }, { "epoch": 0.20431092218744448, "grad_norm": 1.5172240734100342, "learning_rate": 1.8859585036405653e-05, "loss": 1.3473, "step": 3737 }, { "epoch": 0.20436559463116602, "grad_norm": 1.640701413154602, "learning_rate": 1.8858737425012934e-05, "loss": 1.2873, "step": 3738 }, { "epoch": 0.20442026707488758, "grad_norm": 1.214613914489746, "learning_rate": 1.8857889517806183e-05, "loss": 1.3327, "step": 3739 }, { "epoch": 0.20447493951860912, "grad_norm": 1.7507762908935547, "learning_rate": 1.8857041314813715e-05, "loss": 1.4762, "step": 3740 }, { "epoch": 0.2045296119623307, "grad_norm": 1.6707402467727661, "learning_rate": 1.8856192816063853e-05, "loss": 1.3994, "step": 3741 }, { "epoch": 0.20458428440605225, "grad_norm": 1.763269066810608, "learning_rate": 1.8855344021584933e-05, "loss": 1.4916, "step": 3742 }, { "epoch": 0.2046389568497738, "grad_norm": 1.4088279008865356, "learning_rate": 1.8854494931405293e-05, "loss": 1.3293, "step": 3743 }, { "epoch": 0.20469362929349535, "grad_norm": 1.5317124128341675, "learning_rate": 1.885364554555329e-05, "loss": 1.2755, "step": 3744 }, { "epoch": 0.2047483017372169, "grad_norm": 1.1558929681777954, "learning_rate": 1.885279586405729e-05, "loss": 1.5854, "step": 3745 }, { "epoch": 0.20480297418093846, "grad_norm": 1.4945060014724731, "learning_rate": 1.885194588694566e-05, "loss": 1.5055, "step": 3746 }, { "epoch": 0.20485764662466, "grad_norm": 1.355221152305603, "learning_rate": 1.8851095614246785e-05, "loss": 1.6467, "step": 3747 }, { "epoch": 0.20491231906838156, "grad_norm": 1.5902299880981445, "learning_rate": 1.885024504598906e-05, "loss": 1.3638, "step": 3748 }, { "epoch": 0.20496699151210312, "grad_norm": 1.2270896434783936, "learning_rate": 1.8849394182200883e-05, "loss": 1.3701, "step": 3749 }, { "epoch": 0.20502166395582466, "grad_norm": 1.5795990228652954, "learning_rate": 1.8848543022910668e-05, "loss": 1.8114, "step": 3750 }, { "epoch": 0.20507633639954623, "grad_norm": 1.3031023740768433, "learning_rate": 1.884769156814684e-05, "loss": 1.498, "step": 3751 }, { "epoch": 0.20513100884326776, "grad_norm": 1.6464353799819946, "learning_rate": 1.8846839817937827e-05, "loss": 1.629, "step": 3752 }, { "epoch": 0.20518568128698933, "grad_norm": 1.4183138608932495, "learning_rate": 1.884598777231207e-05, "loss": 1.3021, "step": 3753 }, { "epoch": 0.20524035373071087, "grad_norm": 1.57685387134552, "learning_rate": 1.8845135431298026e-05, "loss": 1.0663, "step": 3754 }, { "epoch": 0.20529502617443243, "grad_norm": 1.3614519834518433, "learning_rate": 1.8844282794924157e-05, "loss": 1.5669, "step": 3755 }, { "epoch": 0.205349698618154, "grad_norm": 1.244217038154602, "learning_rate": 1.884342986321893e-05, "loss": 1.4577, "step": 3756 }, { "epoch": 0.20540437106187553, "grad_norm": 1.4881993532180786, "learning_rate": 1.8842576636210827e-05, "loss": 1.1713, "step": 3757 }, { "epoch": 0.2054590435055971, "grad_norm": 1.3583952188491821, "learning_rate": 1.884172311392834e-05, "loss": 1.491, "step": 3758 }, { "epoch": 0.20551371594931864, "grad_norm": 1.5688873529434204, "learning_rate": 1.8840869296399972e-05, "loss": 1.3929, "step": 3759 }, { "epoch": 0.2055683883930402, "grad_norm": 1.4790537357330322, "learning_rate": 1.8840015183654233e-05, "loss": 1.3078, "step": 3760 }, { "epoch": 0.20562306083676174, "grad_norm": 1.4620023965835571, "learning_rate": 1.883916077571964e-05, "loss": 1.3539, "step": 3761 }, { "epoch": 0.2056777332804833, "grad_norm": 1.2374320030212402, "learning_rate": 1.883830607262473e-05, "loss": 1.6212, "step": 3762 }, { "epoch": 0.20573240572420487, "grad_norm": 2.020686388015747, "learning_rate": 1.8837451074398038e-05, "loss": 1.5581, "step": 3763 }, { "epoch": 0.2057870781679264, "grad_norm": 1.5253254175186157, "learning_rate": 1.883659578106812e-05, "loss": 1.4318, "step": 3764 }, { "epoch": 0.20584175061164797, "grad_norm": 1.5234344005584717, "learning_rate": 1.883574019266353e-05, "loss": 1.3826, "step": 3765 }, { "epoch": 0.2058964230553695, "grad_norm": 1.5553386211395264, "learning_rate": 1.8834884309212845e-05, "loss": 1.5938, "step": 3766 }, { "epoch": 0.20595109549909107, "grad_norm": 1.4876223802566528, "learning_rate": 1.8834028130744637e-05, "loss": 1.286, "step": 3767 }, { "epoch": 0.2060057679428126, "grad_norm": 1.5116546154022217, "learning_rate": 1.8833171657287503e-05, "loss": 1.5038, "step": 3768 }, { "epoch": 0.20606044038653418, "grad_norm": 1.2750612497329712, "learning_rate": 1.8832314888870037e-05, "loss": 1.6341, "step": 3769 }, { "epoch": 0.20611511283025574, "grad_norm": 1.1075389385223389, "learning_rate": 1.8831457825520855e-05, "loss": 1.4863, "step": 3770 }, { "epoch": 0.20616978527397728, "grad_norm": 1.4706852436065674, "learning_rate": 1.883060046726857e-05, "loss": 1.4862, "step": 3771 }, { "epoch": 0.20622445771769884, "grad_norm": 1.3544806241989136, "learning_rate": 1.8829742814141813e-05, "loss": 1.5398, "step": 3772 }, { "epoch": 0.20627913016142038, "grad_norm": 1.2320876121520996, "learning_rate": 1.882888486616923e-05, "loss": 1.2724, "step": 3773 }, { "epoch": 0.20633380260514195, "grad_norm": 1.7689915895462036, "learning_rate": 1.8828026623379455e-05, "loss": 1.4321, "step": 3774 }, { "epoch": 0.20638847504886348, "grad_norm": 1.1436734199523926, "learning_rate": 1.882716808580116e-05, "loss": 1.5233, "step": 3775 }, { "epoch": 0.20644314749258505, "grad_norm": 1.3776155710220337, "learning_rate": 1.882630925346301e-05, "loss": 1.3223, "step": 3776 }, { "epoch": 0.20649781993630661, "grad_norm": 1.1962478160858154, "learning_rate": 1.8825450126393678e-05, "loss": 1.2551, "step": 3777 }, { "epoch": 0.20655249238002815, "grad_norm": 1.4029011726379395, "learning_rate": 1.882459070462186e-05, "loss": 1.3806, "step": 3778 }, { "epoch": 0.20660716482374972, "grad_norm": 1.347475528717041, "learning_rate": 1.882373098817625e-05, "loss": 1.4824, "step": 3779 }, { "epoch": 0.20666183726747125, "grad_norm": 1.4318102598190308, "learning_rate": 1.8822870977085556e-05, "loss": 1.3945, "step": 3780 }, { "epoch": 0.20671650971119282, "grad_norm": 1.2801916599273682, "learning_rate": 1.8822010671378498e-05, "loss": 1.7301, "step": 3781 }, { "epoch": 0.20677118215491436, "grad_norm": 1.2934818267822266, "learning_rate": 1.8821150071083803e-05, "loss": 1.4253, "step": 3782 }, { "epoch": 0.20682585459863592, "grad_norm": 1.5137791633605957, "learning_rate": 1.8820289176230206e-05, "loss": 1.2214, "step": 3783 }, { "epoch": 0.2068805270423575, "grad_norm": 1.189167857170105, "learning_rate": 1.8819427986846457e-05, "loss": 1.4783, "step": 3784 }, { "epoch": 0.20693519948607902, "grad_norm": 1.3550710678100586, "learning_rate": 1.881856650296131e-05, "loss": 1.5394, "step": 3785 }, { "epoch": 0.2069898719298006, "grad_norm": 1.4994820356369019, "learning_rate": 1.8817704724603536e-05, "loss": 1.5543, "step": 3786 }, { "epoch": 0.20704454437352213, "grad_norm": 1.8297039270401, "learning_rate": 1.8816842651801906e-05, "loss": 1.5586, "step": 3787 }, { "epoch": 0.2070992168172437, "grad_norm": 1.4839671850204468, "learning_rate": 1.8815980284585218e-05, "loss": 1.4339, "step": 3788 }, { "epoch": 0.20715388926096523, "grad_norm": 1.7994784116744995, "learning_rate": 1.8815117622982255e-05, "loss": 1.5847, "step": 3789 }, { "epoch": 0.2072085617046868, "grad_norm": 1.536091685295105, "learning_rate": 1.8814254667021832e-05, "loss": 1.4036, "step": 3790 }, { "epoch": 0.20726323414840836, "grad_norm": 1.3711597919464111, "learning_rate": 1.881339141673276e-05, "loss": 1.4469, "step": 3791 }, { "epoch": 0.2073179065921299, "grad_norm": 1.4790282249450684, "learning_rate": 1.881252787214387e-05, "loss": 1.7101, "step": 3792 }, { "epoch": 0.20737257903585146, "grad_norm": 1.4607598781585693, "learning_rate": 1.8811664033283993e-05, "loss": 1.2443, "step": 3793 }, { "epoch": 0.207427251479573, "grad_norm": 1.3869001865386963, "learning_rate": 1.8810799900181978e-05, "loss": 1.6799, "step": 3794 }, { "epoch": 0.20748192392329456, "grad_norm": 1.7781181335449219, "learning_rate": 1.880993547286668e-05, "loss": 1.4095, "step": 3795 }, { "epoch": 0.2075365963670161, "grad_norm": 1.4886969327926636, "learning_rate": 1.880907075136696e-05, "loss": 1.4242, "step": 3796 }, { "epoch": 0.20759126881073767, "grad_norm": 1.6769442558288574, "learning_rate": 1.8808205735711697e-05, "loss": 1.684, "step": 3797 }, { "epoch": 0.20764594125445923, "grad_norm": 1.3415828943252563, "learning_rate": 1.880734042592978e-05, "loss": 1.5201, "step": 3798 }, { "epoch": 0.20770061369818077, "grad_norm": 1.3880995512008667, "learning_rate": 1.8806474822050096e-05, "loss": 1.5874, "step": 3799 }, { "epoch": 0.20775528614190233, "grad_norm": 1.2708359956741333, "learning_rate": 1.880560892410155e-05, "loss": 1.7193, "step": 3800 }, { "epoch": 0.20780995858562387, "grad_norm": 1.5277693271636963, "learning_rate": 1.880474273211306e-05, "loss": 1.5832, "step": 3801 }, { "epoch": 0.20786463102934544, "grad_norm": 1.4441884756088257, "learning_rate": 1.8803876246113553e-05, "loss": 1.324, "step": 3802 }, { "epoch": 0.20791930347306697, "grad_norm": 1.4024910926818848, "learning_rate": 1.880300946613196e-05, "loss": 1.4871, "step": 3803 }, { "epoch": 0.20797397591678854, "grad_norm": 1.4811432361602783, "learning_rate": 1.880214239219722e-05, "loss": 1.3678, "step": 3804 }, { "epoch": 0.2080286483605101, "grad_norm": 1.4921282529830933, "learning_rate": 1.880127502433829e-05, "loss": 1.3196, "step": 3805 }, { "epoch": 0.20808332080423164, "grad_norm": 2.815016269683838, "learning_rate": 1.8800407362584135e-05, "loss": 1.2755, "step": 3806 }, { "epoch": 0.2081379932479532, "grad_norm": 1.6173089742660522, "learning_rate": 1.879953940696373e-05, "loss": 1.5413, "step": 3807 }, { "epoch": 0.20819266569167474, "grad_norm": 1.2599778175354004, "learning_rate": 1.8798671157506052e-05, "loss": 1.4124, "step": 3808 }, { "epoch": 0.2082473381353963, "grad_norm": 1.9192537069320679, "learning_rate": 1.87978026142401e-05, "loss": 1.6324, "step": 3809 }, { "epoch": 0.20830201057911785, "grad_norm": 1.5402743816375732, "learning_rate": 1.8796933777194874e-05, "loss": 1.4119, "step": 3810 }, { "epoch": 0.2083566830228394, "grad_norm": 1.2678744792938232, "learning_rate": 1.8796064646399386e-05, "loss": 1.5061, "step": 3811 }, { "epoch": 0.20841135546656098, "grad_norm": 1.5771434307098389, "learning_rate": 1.8795195221882658e-05, "loss": 1.5002, "step": 3812 }, { "epoch": 0.20846602791028251, "grad_norm": 1.5991036891937256, "learning_rate": 1.879432550367372e-05, "loss": 1.7535, "step": 3813 }, { "epoch": 0.20852070035400408, "grad_norm": 1.4439018964767456, "learning_rate": 1.8793455491801623e-05, "loss": 1.4327, "step": 3814 }, { "epoch": 0.20857537279772562, "grad_norm": 1.5050259828567505, "learning_rate": 1.879258518629541e-05, "loss": 1.4169, "step": 3815 }, { "epoch": 0.20863004524144718, "grad_norm": 1.3452351093292236, "learning_rate": 1.8791714587184144e-05, "loss": 1.3498, "step": 3816 }, { "epoch": 0.20868471768516872, "grad_norm": 1.4853101968765259, "learning_rate": 1.87908436944969e-05, "loss": 1.3672, "step": 3817 }, { "epoch": 0.20873939012889028, "grad_norm": 1.534986138343811, "learning_rate": 1.8789972508262755e-05, "loss": 1.4566, "step": 3818 }, { "epoch": 0.20879406257261185, "grad_norm": 1.1869664192199707, "learning_rate": 1.8789101028510803e-05, "loss": 1.6395, "step": 3819 }, { "epoch": 0.2088487350163334, "grad_norm": 1.6017175912857056, "learning_rate": 1.878822925527014e-05, "loss": 1.3194, "step": 3820 }, { "epoch": 0.20890340746005495, "grad_norm": 1.3727085590362549, "learning_rate": 1.878735718856988e-05, "loss": 1.5045, "step": 3821 }, { "epoch": 0.2089580799037765, "grad_norm": 1.737278699874878, "learning_rate": 1.8786484828439148e-05, "loss": 1.3968, "step": 3822 }, { "epoch": 0.20901275234749805, "grad_norm": 1.6694315671920776, "learning_rate": 1.8785612174907067e-05, "loss": 1.2865, "step": 3823 }, { "epoch": 0.20906742479121962, "grad_norm": 1.6718374490737915, "learning_rate": 1.878473922800278e-05, "loss": 1.2954, "step": 3824 }, { "epoch": 0.20912209723494116, "grad_norm": 1.880860686302185, "learning_rate": 1.8783865987755432e-05, "loss": 1.4041, "step": 3825 }, { "epoch": 0.20917676967866272, "grad_norm": 1.6909750699996948, "learning_rate": 1.8782992454194192e-05, "loss": 1.2822, "step": 3826 }, { "epoch": 0.20923144212238426, "grad_norm": 1.630216121673584, "learning_rate": 1.8782118627348224e-05, "loss": 1.6791, "step": 3827 }, { "epoch": 0.20928611456610582, "grad_norm": 1.3603143692016602, "learning_rate": 1.8781244507246706e-05, "loss": 1.3634, "step": 3828 }, { "epoch": 0.20934078700982736, "grad_norm": 2.008476495742798, "learning_rate": 1.8780370093918825e-05, "loss": 1.514, "step": 3829 }, { "epoch": 0.20939545945354893, "grad_norm": 1.1619892120361328, "learning_rate": 1.8779495387393786e-05, "loss": 1.7564, "step": 3830 }, { "epoch": 0.2094501318972705, "grad_norm": 1.1335164308547974, "learning_rate": 1.877862038770079e-05, "loss": 1.6101, "step": 3831 }, { "epoch": 0.20950480434099203, "grad_norm": 1.369429349899292, "learning_rate": 1.8777745094869067e-05, "loss": 1.5851, "step": 3832 }, { "epoch": 0.2095594767847136, "grad_norm": 1.3962947130203247, "learning_rate": 1.8776869508927832e-05, "loss": 1.3489, "step": 3833 }, { "epoch": 0.20961414922843513, "grad_norm": 1.4532161951065063, "learning_rate": 1.8775993629906333e-05, "loss": 1.4831, "step": 3834 }, { "epoch": 0.2096688216721567, "grad_norm": 1.4927610158920288, "learning_rate": 1.877511745783381e-05, "loss": 1.7294, "step": 3835 }, { "epoch": 0.20972349411587823, "grad_norm": 1.796289086341858, "learning_rate": 1.8774240992739524e-05, "loss": 1.4985, "step": 3836 }, { "epoch": 0.2097781665595998, "grad_norm": 1.4963005781173706, "learning_rate": 1.877336423465274e-05, "loss": 1.5924, "step": 3837 }, { "epoch": 0.20983283900332136, "grad_norm": 1.7089240550994873, "learning_rate": 1.8772487183602738e-05, "loss": 1.4488, "step": 3838 }, { "epoch": 0.2098875114470429, "grad_norm": 1.7065093517303467, "learning_rate": 1.8771609839618806e-05, "loss": 1.4146, "step": 3839 }, { "epoch": 0.20994218389076447, "grad_norm": 1.614384651184082, "learning_rate": 1.877073220273024e-05, "loss": 1.3447, "step": 3840 }, { "epoch": 0.209996856334486, "grad_norm": 1.661206841468811, "learning_rate": 1.8769854272966337e-05, "loss": 1.1161, "step": 3841 }, { "epoch": 0.21005152877820757, "grad_norm": 1.3490042686462402, "learning_rate": 1.8768976050356428e-05, "loss": 1.405, "step": 3842 }, { "epoch": 0.2101062012219291, "grad_norm": 1.4504408836364746, "learning_rate": 1.8768097534929827e-05, "loss": 1.3957, "step": 3843 }, { "epoch": 0.21016087366565067, "grad_norm": 1.7446486949920654, "learning_rate": 1.8767218726715876e-05, "loss": 1.2771, "step": 3844 }, { "epoch": 0.21021554610937224, "grad_norm": 1.42527437210083, "learning_rate": 1.876633962574392e-05, "loss": 1.3803, "step": 3845 }, { "epoch": 0.21027021855309377, "grad_norm": 1.6900708675384521, "learning_rate": 1.876546023204331e-05, "loss": 1.5829, "step": 3846 }, { "epoch": 0.21032489099681534, "grad_norm": 1.6577845811843872, "learning_rate": 1.8764580545643417e-05, "loss": 1.6131, "step": 3847 }, { "epoch": 0.21037956344053688, "grad_norm": 1.7679030895233154, "learning_rate": 1.876370056657361e-05, "loss": 1.4805, "step": 3848 }, { "epoch": 0.21043423588425844, "grad_norm": 1.7787814140319824, "learning_rate": 1.876282029486328e-05, "loss": 1.4518, "step": 3849 }, { "epoch": 0.21048890832797998, "grad_norm": 1.427578330039978, "learning_rate": 1.8761939730541815e-05, "loss": 1.2638, "step": 3850 }, { "epoch": 0.21054358077170154, "grad_norm": 1.8796441555023193, "learning_rate": 1.876105887363862e-05, "loss": 1.5091, "step": 3851 }, { "epoch": 0.2105982532154231, "grad_norm": 2.058152675628662, "learning_rate": 1.8760177724183115e-05, "loss": 1.3269, "step": 3852 }, { "epoch": 0.21065292565914465, "grad_norm": 1.4800890684127808, "learning_rate": 1.8759296282204718e-05, "loss": 1.4669, "step": 3853 }, { "epoch": 0.2107075981028662, "grad_norm": 1.0389255285263062, "learning_rate": 1.8758414547732864e-05, "loss": 1.5115, "step": 3854 }, { "epoch": 0.21076227054658775, "grad_norm": 1.699278712272644, "learning_rate": 1.8757532520796993e-05, "loss": 1.4916, "step": 3855 }, { "epoch": 0.21081694299030931, "grad_norm": 1.3553625345230103, "learning_rate": 1.8756650201426565e-05, "loss": 1.4817, "step": 3856 }, { "epoch": 0.21087161543403085, "grad_norm": 1.496211290359497, "learning_rate": 1.8755767589651036e-05, "loss": 1.2607, "step": 3857 }, { "epoch": 0.21092628787775242, "grad_norm": 1.9067001342773438, "learning_rate": 1.8754884685499887e-05, "loss": 1.4436, "step": 3858 }, { "epoch": 0.21098096032147398, "grad_norm": 1.5551385879516602, "learning_rate": 1.8754001489002586e-05, "loss": 1.1296, "step": 3859 }, { "epoch": 0.21103563276519552, "grad_norm": 1.3536707162857056, "learning_rate": 1.875311800018864e-05, "loss": 1.5963, "step": 3860 }, { "epoch": 0.21109030520891708, "grad_norm": 1.5317569971084595, "learning_rate": 1.8752234219087538e-05, "loss": 1.6695, "step": 3861 }, { "epoch": 0.21114497765263862, "grad_norm": 1.2480674982070923, "learning_rate": 1.87513501457288e-05, "loss": 1.5615, "step": 3862 }, { "epoch": 0.2111996500963602, "grad_norm": 1.9782164096832275, "learning_rate": 1.8750465780141946e-05, "loss": 1.471, "step": 3863 }, { "epoch": 0.21125432254008172, "grad_norm": 1.519607663154602, "learning_rate": 1.8749581122356507e-05, "loss": 1.329, "step": 3864 }, { "epoch": 0.2113089949838033, "grad_norm": 1.6237661838531494, "learning_rate": 1.874869617240202e-05, "loss": 1.5081, "step": 3865 }, { "epoch": 0.21136366742752485, "grad_norm": 2.675809383392334, "learning_rate": 1.874781093030804e-05, "loss": 1.7825, "step": 3866 }, { "epoch": 0.2114183398712464, "grad_norm": 1.2254886627197266, "learning_rate": 1.8746925396104126e-05, "loss": 1.553, "step": 3867 }, { "epoch": 0.21147301231496796, "grad_norm": 1.654492735862732, "learning_rate": 1.874603956981985e-05, "loss": 1.3124, "step": 3868 }, { "epoch": 0.2115276847586895, "grad_norm": 1.5431544780731201, "learning_rate": 1.8745153451484786e-05, "loss": 1.2419, "step": 3869 }, { "epoch": 0.21158235720241106, "grad_norm": 1.803159236907959, "learning_rate": 1.8744267041128528e-05, "loss": 1.4263, "step": 3870 }, { "epoch": 0.2116370296461326, "grad_norm": 1.5820207595825195, "learning_rate": 1.8743380338780676e-05, "loss": 1.3848, "step": 3871 }, { "epoch": 0.21169170208985416, "grad_norm": 2.6771302223205566, "learning_rate": 1.8742493344470834e-05, "loss": 1.2604, "step": 3872 }, { "epoch": 0.21174637453357573, "grad_norm": 1.5485759973526, "learning_rate": 1.8741606058228626e-05, "loss": 1.299, "step": 3873 }, { "epoch": 0.21180104697729726, "grad_norm": 1.1631001234054565, "learning_rate": 1.874071848008368e-05, "loss": 1.5004, "step": 3874 }, { "epoch": 0.21185571942101883, "grad_norm": 1.6442376375198364, "learning_rate": 1.873983061006563e-05, "loss": 1.2699, "step": 3875 }, { "epoch": 0.21191039186474037, "grad_norm": 1.4123083353042603, "learning_rate": 1.873894244820413e-05, "loss": 1.5706, "step": 3876 }, { "epoch": 0.21196506430846193, "grad_norm": 2.0753414630889893, "learning_rate": 1.8738053994528835e-05, "loss": 1.2937, "step": 3877 }, { "epoch": 0.21201973675218347, "grad_norm": 1.3475399017333984, "learning_rate": 1.873716524906941e-05, "loss": 1.2849, "step": 3878 }, { "epoch": 0.21207440919590503, "grad_norm": 1.3887102603912354, "learning_rate": 1.873627621185554e-05, "loss": 1.4825, "step": 3879 }, { "epoch": 0.2121290816396266, "grad_norm": 1.2989463806152344, "learning_rate": 1.8735386882916904e-05, "loss": 1.1578, "step": 3880 }, { "epoch": 0.21218375408334814, "grad_norm": 1.3477164506912231, "learning_rate": 1.8734497262283203e-05, "loss": 1.4807, "step": 3881 }, { "epoch": 0.2122384265270697, "grad_norm": 1.6637665033340454, "learning_rate": 1.873360734998414e-05, "loss": 1.3958, "step": 3882 }, { "epoch": 0.21229309897079124, "grad_norm": 1.4028288125991821, "learning_rate": 1.8732717146049437e-05, "loss": 1.5698, "step": 3883 }, { "epoch": 0.2123477714145128, "grad_norm": 1.3348075151443481, "learning_rate": 1.8731826650508812e-05, "loss": 1.5598, "step": 3884 }, { "epoch": 0.21240244385823434, "grad_norm": 1.225008249282837, "learning_rate": 1.873093586339201e-05, "loss": 1.4004, "step": 3885 }, { "epoch": 0.2124571163019559, "grad_norm": 1.472497582435608, "learning_rate": 1.8730044784728767e-05, "loss": 1.4581, "step": 3886 }, { "epoch": 0.21251178874567747, "grad_norm": 1.6501260995864868, "learning_rate": 1.8729153414548843e-05, "loss": 1.4774, "step": 3887 }, { "epoch": 0.212566461189399, "grad_norm": 1.3763550519943237, "learning_rate": 1.8728261752882008e-05, "loss": 1.4338, "step": 3888 }, { "epoch": 0.21262113363312057, "grad_norm": 1.3870854377746582, "learning_rate": 1.8727369799758027e-05, "loss": 1.501, "step": 3889 }, { "epoch": 0.2126758060768421, "grad_norm": 1.5083149671554565, "learning_rate": 1.872647755520669e-05, "loss": 1.3853, "step": 3890 }, { "epoch": 0.21273047852056368, "grad_norm": 1.2219513654708862, "learning_rate": 1.8725585019257794e-05, "loss": 1.8493, "step": 3891 }, { "epoch": 0.2127851509642852, "grad_norm": 1.508490800857544, "learning_rate": 1.8724692191941134e-05, "loss": 1.4224, "step": 3892 }, { "epoch": 0.21283982340800678, "grad_norm": 1.4422398805618286, "learning_rate": 1.872379907328653e-05, "loss": 1.5866, "step": 3893 }, { "epoch": 0.21289449585172834, "grad_norm": 1.9292789697647095, "learning_rate": 1.8722905663323804e-05, "loss": 1.3668, "step": 3894 }, { "epoch": 0.21294916829544988, "grad_norm": 1.268516182899475, "learning_rate": 1.872201196208279e-05, "loss": 1.6924, "step": 3895 }, { "epoch": 0.21300384073917145, "grad_norm": 1.3289215564727783, "learning_rate": 1.872111796959333e-05, "loss": 1.3867, "step": 3896 }, { "epoch": 0.21305851318289298, "grad_norm": 1.2726136445999146, "learning_rate": 1.8720223685885275e-05, "loss": 1.6388, "step": 3897 }, { "epoch": 0.21311318562661455, "grad_norm": 1.8675611019134521, "learning_rate": 1.8719329110988487e-05, "loss": 1.2151, "step": 3898 }, { "epoch": 0.21316785807033609, "grad_norm": 1.450545310974121, "learning_rate": 1.871843424493284e-05, "loss": 1.2383, "step": 3899 }, { "epoch": 0.21322253051405765, "grad_norm": 1.5895240306854248, "learning_rate": 1.8717539087748217e-05, "loss": 1.4609, "step": 3900 }, { "epoch": 0.21327720295777922, "grad_norm": 1.5299140214920044, "learning_rate": 1.871664363946451e-05, "loss": 1.51, "step": 3901 }, { "epoch": 0.21333187540150075, "grad_norm": 1.461996078491211, "learning_rate": 1.8715747900111613e-05, "loss": 1.5254, "step": 3902 }, { "epoch": 0.21338654784522232, "grad_norm": 1.2560043334960938, "learning_rate": 1.8714851869719443e-05, "loss": 1.4358, "step": 3903 }, { "epoch": 0.21344122028894386, "grad_norm": 1.5627689361572266, "learning_rate": 1.871395554831792e-05, "loss": 1.2477, "step": 3904 }, { "epoch": 0.21349589273266542, "grad_norm": 1.6109325885772705, "learning_rate": 1.871305893593697e-05, "loss": 1.4612, "step": 3905 }, { "epoch": 0.21355056517638696, "grad_norm": 1.5729912519454956, "learning_rate": 1.8712162032606536e-05, "loss": 1.4142, "step": 3906 }, { "epoch": 0.21360523762010852, "grad_norm": 1.8608589172363281, "learning_rate": 1.871126483835657e-05, "loss": 1.2556, "step": 3907 }, { "epoch": 0.2136599100638301, "grad_norm": 1.4336923360824585, "learning_rate": 1.8710367353217034e-05, "loss": 1.5968, "step": 3908 }, { "epoch": 0.21371458250755163, "grad_norm": 1.540022373199463, "learning_rate": 1.8709469577217886e-05, "loss": 1.5405, "step": 3909 }, { "epoch": 0.2137692549512732, "grad_norm": 1.3641107082366943, "learning_rate": 1.8708571510389114e-05, "loss": 1.3736, "step": 3910 }, { "epoch": 0.21382392739499473, "grad_norm": 1.516208529472351, "learning_rate": 1.8707673152760705e-05, "loss": 1.4638, "step": 3911 }, { "epoch": 0.2138785998387163, "grad_norm": 1.872280240058899, "learning_rate": 1.8706774504362655e-05, "loss": 1.5164, "step": 3912 }, { "epoch": 0.21393327228243783, "grad_norm": 1.8649379014968872, "learning_rate": 1.8705875565224975e-05, "loss": 1.4592, "step": 3913 }, { "epoch": 0.2139879447261594, "grad_norm": 1.3472758531570435, "learning_rate": 1.8704976335377677e-05, "loss": 1.2875, "step": 3914 }, { "epoch": 0.21404261716988096, "grad_norm": 1.523036003112793, "learning_rate": 1.8704076814850795e-05, "loss": 1.3416, "step": 3915 }, { "epoch": 0.2140972896136025, "grad_norm": 1.4109041690826416, "learning_rate": 1.8703177003674362e-05, "loss": 1.3462, "step": 3916 }, { "epoch": 0.21415196205732406, "grad_norm": 1.2483106851577759, "learning_rate": 1.870227690187843e-05, "loss": 1.4846, "step": 3917 }, { "epoch": 0.2142066345010456, "grad_norm": 1.6570472717285156, "learning_rate": 1.8701376509493046e-05, "loss": 1.3255, "step": 3918 }, { "epoch": 0.21426130694476717, "grad_norm": 1.1713680028915405, "learning_rate": 1.8700475826548285e-05, "loss": 1.3703, "step": 3919 }, { "epoch": 0.2143159793884887, "grad_norm": 1.2029238939285278, "learning_rate": 1.8699574853074222e-05, "loss": 1.3762, "step": 3920 }, { "epoch": 0.21437065183221027, "grad_norm": 1.6445196866989136, "learning_rate": 1.8698673589100936e-05, "loss": 1.6759, "step": 3921 }, { "epoch": 0.21442532427593183, "grad_norm": 1.2273280620574951, "learning_rate": 1.8697772034658527e-05, "loss": 1.6317, "step": 3922 }, { "epoch": 0.21447999671965337, "grad_norm": 1.3719342947006226, "learning_rate": 1.8696870189777107e-05, "loss": 1.4699, "step": 3923 }, { "epoch": 0.21453466916337494, "grad_norm": 1.4748409986495972, "learning_rate": 1.8695968054486774e-05, "loss": 1.6674, "step": 3924 }, { "epoch": 0.21458934160709647, "grad_norm": 1.266859769821167, "learning_rate": 1.8695065628817667e-05, "loss": 1.5605, "step": 3925 }, { "epoch": 0.21464401405081804, "grad_norm": 1.5798380374908447, "learning_rate": 1.8694162912799917e-05, "loss": 1.4382, "step": 3926 }, { "epoch": 0.2146986864945396, "grad_norm": 1.4360848665237427, "learning_rate": 1.8693259906463663e-05, "loss": 1.4676, "step": 3927 }, { "epoch": 0.21475335893826114, "grad_norm": 1.3499799966812134, "learning_rate": 1.869235660983906e-05, "loss": 1.4616, "step": 3928 }, { "epoch": 0.2148080313819827, "grad_norm": 1.578836441040039, "learning_rate": 1.8691453022956274e-05, "loss": 1.2975, "step": 3929 }, { "epoch": 0.21486270382570424, "grad_norm": 1.399401068687439, "learning_rate": 1.8690549145845474e-05, "loss": 1.2884, "step": 3930 }, { "epoch": 0.2149173762694258, "grad_norm": 1.3698638677597046, "learning_rate": 1.8689644978536847e-05, "loss": 1.6137, "step": 3931 }, { "epoch": 0.21497204871314735, "grad_norm": 1.6347465515136719, "learning_rate": 1.8688740521060587e-05, "loss": 1.2522, "step": 3932 }, { "epoch": 0.2150267211568689, "grad_norm": 1.5675082206726074, "learning_rate": 1.868783577344689e-05, "loss": 1.2578, "step": 3933 }, { "epoch": 0.21508139360059048, "grad_norm": 1.5545570850372314, "learning_rate": 1.8686930735725965e-05, "loss": 1.3237, "step": 3934 }, { "epoch": 0.215136066044312, "grad_norm": 1.2649370431900024, "learning_rate": 1.868602540792804e-05, "loss": 1.3062, "step": 3935 }, { "epoch": 0.21519073848803358, "grad_norm": 1.563340425491333, "learning_rate": 1.8685119790083348e-05, "loss": 1.637, "step": 3936 }, { "epoch": 0.21524541093175512, "grad_norm": 1.3032349348068237, "learning_rate": 1.8684213882222123e-05, "loss": 1.4841, "step": 3937 }, { "epoch": 0.21530008337547668, "grad_norm": 1.626275897026062, "learning_rate": 1.8683307684374622e-05, "loss": 1.4447, "step": 3938 }, { "epoch": 0.21535475581919822, "grad_norm": 1.6444780826568604, "learning_rate": 1.8682401196571097e-05, "loss": 1.4834, "step": 3939 }, { "epoch": 0.21540942826291978, "grad_norm": 1.790311336517334, "learning_rate": 1.8681494418841825e-05, "loss": 1.3075, "step": 3940 }, { "epoch": 0.21546410070664135, "grad_norm": 1.5151134729385376, "learning_rate": 1.8680587351217082e-05, "loss": 1.4639, "step": 3941 }, { "epoch": 0.21551877315036289, "grad_norm": 1.6693280935287476, "learning_rate": 1.8679679993727157e-05, "loss": 1.1617, "step": 3942 }, { "epoch": 0.21557344559408445, "grad_norm": 1.6796585321426392, "learning_rate": 1.867877234640235e-05, "loss": 1.5608, "step": 3943 }, { "epoch": 0.215628118037806, "grad_norm": 1.3354688882827759, "learning_rate": 1.867786440927297e-05, "loss": 1.349, "step": 3944 }, { "epoch": 0.21568279048152755, "grad_norm": 1.3487813472747803, "learning_rate": 1.867695618236933e-05, "loss": 1.4053, "step": 3945 }, { "epoch": 0.2157374629252491, "grad_norm": 1.9008277654647827, "learning_rate": 1.8676047665721764e-05, "loss": 1.5239, "step": 3946 }, { "epoch": 0.21579213536897066, "grad_norm": 1.6989448070526123, "learning_rate": 1.867513885936061e-05, "loss": 1.5068, "step": 3947 }, { "epoch": 0.21584680781269222, "grad_norm": 1.6127259731292725, "learning_rate": 1.867422976331621e-05, "loss": 1.5386, "step": 3948 }, { "epoch": 0.21590148025641376, "grad_norm": 1.782577395439148, "learning_rate": 1.8673320377618927e-05, "loss": 1.3919, "step": 3949 }, { "epoch": 0.21595615270013532, "grad_norm": 1.2668238878250122, "learning_rate": 1.8672410702299118e-05, "loss": 1.6003, "step": 3950 }, { "epoch": 0.21601082514385686, "grad_norm": 1.3442057371139526, "learning_rate": 1.867150073738717e-05, "loss": 1.5087, "step": 3951 }, { "epoch": 0.21606549758757843, "grad_norm": 1.2593082189559937, "learning_rate": 1.8670590482913463e-05, "loss": 1.4537, "step": 3952 }, { "epoch": 0.21612017003129996, "grad_norm": 1.395640254020691, "learning_rate": 1.8669679938908393e-05, "loss": 1.4546, "step": 3953 }, { "epoch": 0.21617484247502153, "grad_norm": 1.4500776529312134, "learning_rate": 1.8668769105402366e-05, "loss": 1.6549, "step": 3954 }, { "epoch": 0.2162295149187431, "grad_norm": 1.7812178134918213, "learning_rate": 1.8667857982425797e-05, "loss": 1.211, "step": 3955 }, { "epoch": 0.21628418736246463, "grad_norm": 1.4446276426315308, "learning_rate": 1.866694657000911e-05, "loss": 1.4204, "step": 3956 }, { "epoch": 0.2163388598061862, "grad_norm": 1.4691271781921387, "learning_rate": 1.866603486818274e-05, "loss": 1.4612, "step": 3957 }, { "epoch": 0.21639353224990773, "grad_norm": 1.8737324476242065, "learning_rate": 1.866512287697713e-05, "loss": 1.2609, "step": 3958 }, { "epoch": 0.2164482046936293, "grad_norm": 1.5928107500076294, "learning_rate": 1.8664210596422733e-05, "loss": 1.2213, "step": 3959 }, { "epoch": 0.21650287713735084, "grad_norm": 1.379937767982483, "learning_rate": 1.8663298026550013e-05, "loss": 1.444, "step": 3960 }, { "epoch": 0.2165575495810724, "grad_norm": 1.5002251863479614, "learning_rate": 1.8662385167389443e-05, "loss": 1.3919, "step": 3961 }, { "epoch": 0.21661222202479397, "grad_norm": 1.3567665815353394, "learning_rate": 1.8661472018971506e-05, "loss": 1.308, "step": 3962 }, { "epoch": 0.2166668944685155, "grad_norm": 1.3716787099838257, "learning_rate": 1.8660558581326695e-05, "loss": 1.5095, "step": 3963 }, { "epoch": 0.21672156691223707, "grad_norm": 1.1755619049072266, "learning_rate": 1.8659644854485506e-05, "loss": 1.2116, "step": 3964 }, { "epoch": 0.2167762393559586, "grad_norm": 1.3509405851364136, "learning_rate": 1.8658730838478457e-05, "loss": 1.402, "step": 3965 }, { "epoch": 0.21683091179968017, "grad_norm": 1.494396686553955, "learning_rate": 1.8657816533336067e-05, "loss": 1.5845, "step": 3966 }, { "epoch": 0.2168855842434017, "grad_norm": 1.5599819421768188, "learning_rate": 1.8656901939088868e-05, "loss": 1.5222, "step": 3967 }, { "epoch": 0.21694025668712327, "grad_norm": 2.2314693927764893, "learning_rate": 1.8655987055767396e-05, "loss": 1.4358, "step": 3968 }, { "epoch": 0.21699492913084484, "grad_norm": 1.6443748474121094, "learning_rate": 1.865507188340221e-05, "loss": 1.4493, "step": 3969 }, { "epoch": 0.21704960157456638, "grad_norm": 1.5959833860397339, "learning_rate": 1.865415642202386e-05, "loss": 1.3763, "step": 3970 }, { "epoch": 0.21710427401828794, "grad_norm": 1.5689257383346558, "learning_rate": 1.8653240671662916e-05, "loss": 1.4599, "step": 3971 }, { "epoch": 0.21715894646200948, "grad_norm": 1.4050202369689941, "learning_rate": 1.8652324632349967e-05, "loss": 1.4037, "step": 3972 }, { "epoch": 0.21721361890573104, "grad_norm": 1.5204205513000488, "learning_rate": 1.8651408304115593e-05, "loss": 1.4693, "step": 3973 }, { "epoch": 0.21726829134945258, "grad_norm": 1.5086500644683838, "learning_rate": 1.8650491686990394e-05, "loss": 1.3017, "step": 3974 }, { "epoch": 0.21732296379317415, "grad_norm": 1.3455864191055298, "learning_rate": 1.864957478100498e-05, "loss": 1.4231, "step": 3975 }, { "epoch": 0.2173776362368957, "grad_norm": 1.7443143129348755, "learning_rate": 1.8648657586189963e-05, "loss": 1.5363, "step": 3976 }, { "epoch": 0.21743230868061725, "grad_norm": 1.6874475479125977, "learning_rate": 1.864774010257598e-05, "loss": 1.3048, "step": 3977 }, { "epoch": 0.2174869811243388, "grad_norm": 1.3138066530227661, "learning_rate": 1.864682233019366e-05, "loss": 1.4995, "step": 3978 }, { "epoch": 0.21754165356806035, "grad_norm": 1.467049241065979, "learning_rate": 1.864590426907365e-05, "loss": 1.4419, "step": 3979 }, { "epoch": 0.21759632601178192, "grad_norm": 1.674760103225708, "learning_rate": 1.8644985919246613e-05, "loss": 1.9902, "step": 3980 }, { "epoch": 0.21765099845550345, "grad_norm": 1.1664013862609863, "learning_rate": 1.8644067280743202e-05, "loss": 1.3229, "step": 3981 }, { "epoch": 0.21770567089922502, "grad_norm": 1.5050363540649414, "learning_rate": 1.864314835359411e-05, "loss": 1.5228, "step": 3982 }, { "epoch": 0.21776034334294658, "grad_norm": 1.5326693058013916, "learning_rate": 1.864222913783001e-05, "loss": 1.4569, "step": 3983 }, { "epoch": 0.21781501578666812, "grad_norm": 1.5833582878112793, "learning_rate": 1.8641309633481594e-05, "loss": 1.7884, "step": 3984 }, { "epoch": 0.21786968823038969, "grad_norm": 1.0528496503829956, "learning_rate": 1.8640389840579578e-05, "loss": 1.4427, "step": 3985 }, { "epoch": 0.21792436067411122, "grad_norm": 1.4104061126708984, "learning_rate": 1.8639469759154667e-05, "loss": 1.828, "step": 3986 }, { "epoch": 0.2179790331178328, "grad_norm": 1.544675350189209, "learning_rate": 1.8638549389237587e-05, "loss": 1.4993, "step": 3987 }, { "epoch": 0.21803370556155433, "grad_norm": 1.6189390420913696, "learning_rate": 1.863762873085907e-05, "loss": 1.8114, "step": 3988 }, { "epoch": 0.2180883780052759, "grad_norm": 1.5785592794418335, "learning_rate": 1.8636707784049867e-05, "loss": 1.3999, "step": 3989 }, { "epoch": 0.21814305044899746, "grad_norm": 1.5822025537490845, "learning_rate": 1.863578654884072e-05, "loss": 1.4618, "step": 3990 }, { "epoch": 0.218197722892719, "grad_norm": 1.5803086757659912, "learning_rate": 1.86348650252624e-05, "loss": 1.3514, "step": 3991 }, { "epoch": 0.21825239533644056, "grad_norm": 1.4568864107131958, "learning_rate": 1.863394321334567e-05, "loss": 1.4743, "step": 3992 }, { "epoch": 0.2183070677801621, "grad_norm": 1.4847913980484009, "learning_rate": 1.8633021113121318e-05, "loss": 1.3631, "step": 3993 }, { "epoch": 0.21836174022388366, "grad_norm": 2.277005910873413, "learning_rate": 1.8632098724620134e-05, "loss": 1.541, "step": 3994 }, { "epoch": 0.2184164126676052, "grad_norm": 1.4682950973510742, "learning_rate": 1.8631176047872913e-05, "loss": 1.4396, "step": 3995 }, { "epoch": 0.21847108511132676, "grad_norm": 1.650509238243103, "learning_rate": 1.8630253082910473e-05, "loss": 1.1895, "step": 3996 }, { "epoch": 0.21852575755504833, "grad_norm": 1.655499815940857, "learning_rate": 1.862932982976363e-05, "loss": 1.4392, "step": 3997 }, { "epoch": 0.21858042999876987, "grad_norm": 1.7130882740020752, "learning_rate": 1.862840628846322e-05, "loss": 1.3058, "step": 3998 }, { "epoch": 0.21863510244249143, "grad_norm": 1.6762266159057617, "learning_rate": 1.8627482459040068e-05, "loss": 1.458, "step": 3999 }, { "epoch": 0.21868977488621297, "grad_norm": 1.5305569171905518, "learning_rate": 1.8626558341525037e-05, "loss": 1.4263, "step": 4000 }, { "epoch": 0.21874444732993453, "grad_norm": 1.48497474193573, "learning_rate": 1.862563393594898e-05, "loss": 1.4841, "step": 4001 }, { "epoch": 0.21879911977365607, "grad_norm": 1.2765401601791382, "learning_rate": 1.8624709242342766e-05, "loss": 1.5001, "step": 4002 }, { "epoch": 0.21885379221737764, "grad_norm": 1.571272373199463, "learning_rate": 1.8623784260737272e-05, "loss": 1.4583, "step": 4003 }, { "epoch": 0.2189084646610992, "grad_norm": 1.4263635873794556, "learning_rate": 1.8622858991163385e-05, "loss": 1.6514, "step": 4004 }, { "epoch": 0.21896313710482074, "grad_norm": 1.3749841451644897, "learning_rate": 1.8621933433652e-05, "loss": 1.6164, "step": 4005 }, { "epoch": 0.2190178095485423, "grad_norm": 1.5768215656280518, "learning_rate": 1.8621007588234023e-05, "loss": 1.4406, "step": 4006 }, { "epoch": 0.21907248199226384, "grad_norm": 1.2330063581466675, "learning_rate": 1.862008145494038e-05, "loss": 1.3238, "step": 4007 }, { "epoch": 0.2191271544359854, "grad_norm": 1.381330966949463, "learning_rate": 1.8619155033801985e-05, "loss": 1.2173, "step": 4008 }, { "epoch": 0.21918182687970694, "grad_norm": 1.5775933265686035, "learning_rate": 1.8618228324849777e-05, "loss": 1.5042, "step": 4009 }, { "epoch": 0.2192364993234285, "grad_norm": 1.3227602243423462, "learning_rate": 1.8617301328114704e-05, "loss": 1.6853, "step": 4010 }, { "epoch": 0.21929117176715007, "grad_norm": 1.7883949279785156, "learning_rate": 1.861637404362772e-05, "loss": 1.1323, "step": 4011 }, { "epoch": 0.2193458442108716, "grad_norm": 1.39847993850708, "learning_rate": 1.8615446471419786e-05, "loss": 1.5004, "step": 4012 }, { "epoch": 0.21940051665459318, "grad_norm": 1.228210687637329, "learning_rate": 1.8614518611521876e-05, "loss": 1.473, "step": 4013 }, { "epoch": 0.2194551890983147, "grad_norm": 2.074437379837036, "learning_rate": 1.8613590463964977e-05, "loss": 1.3888, "step": 4014 }, { "epoch": 0.21950986154203628, "grad_norm": 1.4673653841018677, "learning_rate": 1.861266202878008e-05, "loss": 1.3286, "step": 4015 }, { "epoch": 0.21956453398575781, "grad_norm": 1.8469462394714355, "learning_rate": 1.8611733305998188e-05, "loss": 1.3567, "step": 4016 }, { "epoch": 0.21961920642947938, "grad_norm": 1.2280939817428589, "learning_rate": 1.861080429565031e-05, "loss": 1.4826, "step": 4017 }, { "epoch": 0.21967387887320095, "grad_norm": 1.3213307857513428, "learning_rate": 1.8609874997767473e-05, "loss": 1.5301, "step": 4018 }, { "epoch": 0.21972855131692248, "grad_norm": 1.3890585899353027, "learning_rate": 1.8608945412380705e-05, "loss": 1.4741, "step": 4019 }, { "epoch": 0.21978322376064405, "grad_norm": 1.7183270454406738, "learning_rate": 1.860801553952105e-05, "loss": 1.3535, "step": 4020 }, { "epoch": 0.21983789620436558, "grad_norm": 2.0221025943756104, "learning_rate": 1.8607085379219557e-05, "loss": 1.4464, "step": 4021 }, { "epoch": 0.21989256864808715, "grad_norm": 1.101698637008667, "learning_rate": 1.8606154931507285e-05, "loss": 1.5829, "step": 4022 }, { "epoch": 0.2199472410918087, "grad_norm": 1.642586350440979, "learning_rate": 1.8605224196415304e-05, "loss": 1.4921, "step": 4023 }, { "epoch": 0.22000191353553025, "grad_norm": 1.5694695711135864, "learning_rate": 1.8604293173974694e-05, "loss": 1.4791, "step": 4024 }, { "epoch": 0.22005658597925182, "grad_norm": 1.816205382347107, "learning_rate": 1.8603361864216544e-05, "loss": 1.3535, "step": 4025 }, { "epoch": 0.22011125842297335, "grad_norm": 1.2002280950546265, "learning_rate": 1.8602430267171954e-05, "loss": 1.4342, "step": 4026 }, { "epoch": 0.22016593086669492, "grad_norm": 1.1796636581420898, "learning_rate": 1.860149838287203e-05, "loss": 1.3676, "step": 4027 }, { "epoch": 0.22022060331041646, "grad_norm": 1.662825584411621, "learning_rate": 1.8600566211347897e-05, "loss": 1.3915, "step": 4028 }, { "epoch": 0.22027527575413802, "grad_norm": 1.7269248962402344, "learning_rate": 1.859963375263067e-05, "loss": 1.3954, "step": 4029 }, { "epoch": 0.2203299481978596, "grad_norm": 1.1979793310165405, "learning_rate": 1.8598701006751494e-05, "loss": 1.5678, "step": 4030 }, { "epoch": 0.22038462064158112, "grad_norm": 1.7392127513885498, "learning_rate": 1.8597767973741514e-05, "loss": 1.5398, "step": 4031 }, { "epoch": 0.2204392930853027, "grad_norm": 1.715641975402832, "learning_rate": 1.8596834653631887e-05, "loss": 1.2396, "step": 4032 }, { "epoch": 0.22049396552902423, "grad_norm": 1.4672346115112305, "learning_rate": 1.8595901046453776e-05, "loss": 1.3615, "step": 4033 }, { "epoch": 0.2205486379727458, "grad_norm": 1.511107325553894, "learning_rate": 1.859496715223836e-05, "loss": 1.4547, "step": 4034 }, { "epoch": 0.22060331041646733, "grad_norm": 1.7819955348968506, "learning_rate": 1.8594032971016818e-05, "loss": 1.4287, "step": 4035 }, { "epoch": 0.2206579828601889, "grad_norm": 1.9667562246322632, "learning_rate": 1.859309850282035e-05, "loss": 1.4673, "step": 4036 }, { "epoch": 0.22071265530391046, "grad_norm": 1.7587720155715942, "learning_rate": 1.8592163747680164e-05, "loss": 1.6913, "step": 4037 }, { "epoch": 0.220767327747632, "grad_norm": 1.2789888381958008, "learning_rate": 1.8591228705627464e-05, "loss": 1.505, "step": 4038 }, { "epoch": 0.22082200019135356, "grad_norm": 2.046172857284546, "learning_rate": 1.8590293376693476e-05, "loss": 1.3803, "step": 4039 }, { "epoch": 0.2208766726350751, "grad_norm": 1.7211757898330688, "learning_rate": 1.858935776090944e-05, "loss": 1.3653, "step": 4040 }, { "epoch": 0.22093134507879666, "grad_norm": 1.375563383102417, "learning_rate": 1.8588421858306587e-05, "loss": 1.5797, "step": 4041 }, { "epoch": 0.2209860175225182, "grad_norm": 1.7318329811096191, "learning_rate": 1.8587485668916176e-05, "loss": 1.4489, "step": 4042 }, { "epoch": 0.22104068996623977, "grad_norm": 1.4722927808761597, "learning_rate": 1.858654919276947e-05, "loss": 1.3947, "step": 4043 }, { "epoch": 0.22109536240996133, "grad_norm": 1.1935362815856934, "learning_rate": 1.8585612429897735e-05, "loss": 1.5445, "step": 4044 }, { "epoch": 0.22115003485368287, "grad_norm": 1.2660088539123535, "learning_rate": 1.8584675380332254e-05, "loss": 1.3299, "step": 4045 }, { "epoch": 0.22120470729740443, "grad_norm": 1.4568325281143188, "learning_rate": 1.858373804410432e-05, "loss": 1.3887, "step": 4046 }, { "epoch": 0.22125937974112597, "grad_norm": 1.8553135395050049, "learning_rate": 1.8582800421245227e-05, "loss": 1.3725, "step": 4047 }, { "epoch": 0.22131405218484754, "grad_norm": 1.9431936740875244, "learning_rate": 1.858186251178629e-05, "loss": 1.1247, "step": 4048 }, { "epoch": 0.22136872462856907, "grad_norm": 1.5337398052215576, "learning_rate": 1.8580924315758825e-05, "loss": 1.3587, "step": 4049 }, { "epoch": 0.22142339707229064, "grad_norm": 1.357643961906433, "learning_rate": 1.8579985833194164e-05, "loss": 1.4121, "step": 4050 }, { "epoch": 0.2214780695160122, "grad_norm": 1.481144666671753, "learning_rate": 1.8579047064123638e-05, "loss": 1.2287, "step": 4051 }, { "epoch": 0.22153274195973374, "grad_norm": 1.5336380004882812, "learning_rate": 1.8578108008578603e-05, "loss": 1.252, "step": 4052 }, { "epoch": 0.2215874144034553, "grad_norm": 1.99599289894104, "learning_rate": 1.857716866659041e-05, "loss": 1.5096, "step": 4053 }, { "epoch": 0.22164208684717684, "grad_norm": 1.2527939081192017, "learning_rate": 1.857622903819043e-05, "loss": 1.3691, "step": 4054 }, { "epoch": 0.2216967592908984, "grad_norm": 1.2206131219863892, "learning_rate": 1.8575289123410035e-05, "loss": 1.354, "step": 4055 }, { "epoch": 0.22175143173461995, "grad_norm": 1.81217622756958, "learning_rate": 1.8574348922280617e-05, "loss": 1.2316, "step": 4056 }, { "epoch": 0.2218061041783415, "grad_norm": 1.288155198097229, "learning_rate": 1.8573408434833565e-05, "loss": 1.2979, "step": 4057 }, { "epoch": 0.22186077662206308, "grad_norm": 1.5225738286972046, "learning_rate": 1.857246766110029e-05, "loss": 1.3625, "step": 4058 }, { "epoch": 0.22191544906578461, "grad_norm": 1.3216702938079834, "learning_rate": 1.8571526601112202e-05, "loss": 1.5073, "step": 4059 }, { "epoch": 0.22197012150950618, "grad_norm": 1.3840328454971313, "learning_rate": 1.857058525490073e-05, "loss": 1.403, "step": 4060 }, { "epoch": 0.22202479395322772, "grad_norm": 1.3601806163787842, "learning_rate": 1.85696436224973e-05, "loss": 1.3509, "step": 4061 }, { "epoch": 0.22207946639694928, "grad_norm": 1.564803957939148, "learning_rate": 1.856870170393336e-05, "loss": 1.3901, "step": 4062 }, { "epoch": 0.22213413884067082, "grad_norm": 1.5312864780426025, "learning_rate": 1.856775949924037e-05, "loss": 1.3741, "step": 4063 }, { "epoch": 0.22218881128439238, "grad_norm": 1.3600959777832031, "learning_rate": 1.856681700844978e-05, "loss": 1.4946, "step": 4064 }, { "epoch": 0.22224348372811395, "grad_norm": 1.36784827709198, "learning_rate": 1.8565874231593065e-05, "loss": 1.4544, "step": 4065 }, { "epoch": 0.2222981561718355, "grad_norm": 1.2364864349365234, "learning_rate": 1.8564931168701713e-05, "loss": 1.5103, "step": 4066 }, { "epoch": 0.22235282861555705, "grad_norm": 1.5241693258285522, "learning_rate": 1.8563987819807213e-05, "loss": 1.4732, "step": 4067 }, { "epoch": 0.2224075010592786, "grad_norm": 1.567435383796692, "learning_rate": 1.856304418494106e-05, "loss": 1.4432, "step": 4068 }, { "epoch": 0.22246217350300015, "grad_norm": 1.5554819107055664, "learning_rate": 1.856210026413477e-05, "loss": 1.4824, "step": 4069 }, { "epoch": 0.2225168459467217, "grad_norm": 1.5341233015060425, "learning_rate": 1.856115605741986e-05, "loss": 1.4878, "step": 4070 }, { "epoch": 0.22257151839044326, "grad_norm": 1.5522505044937134, "learning_rate": 1.856021156482786e-05, "loss": 1.5065, "step": 4071 }, { "epoch": 0.22262619083416482, "grad_norm": 1.8234809637069702, "learning_rate": 1.8559266786390305e-05, "loss": 1.2079, "step": 4072 }, { "epoch": 0.22268086327788636, "grad_norm": 1.5301284790039062, "learning_rate": 1.855832172213875e-05, "loss": 1.4474, "step": 4073 }, { "epoch": 0.22273553572160792, "grad_norm": 1.2120524644851685, "learning_rate": 1.8557376372104752e-05, "loss": 1.5105, "step": 4074 }, { "epoch": 0.22279020816532946, "grad_norm": 1.8455097675323486, "learning_rate": 1.8556430736319876e-05, "loss": 1.3894, "step": 4075 }, { "epoch": 0.22284488060905103, "grad_norm": 1.7904990911483765, "learning_rate": 1.8555484814815695e-05, "loss": 1.3482, "step": 4076 }, { "epoch": 0.22289955305277256, "grad_norm": 1.487613320350647, "learning_rate": 1.8554538607623805e-05, "loss": 1.5081, "step": 4077 }, { "epoch": 0.22295422549649413, "grad_norm": 2.1815335750579834, "learning_rate": 1.8553592114775796e-05, "loss": 1.4583, "step": 4078 }, { "epoch": 0.2230088979402157, "grad_norm": 1.4365404844284058, "learning_rate": 1.8552645336303274e-05, "loss": 1.4851, "step": 4079 }, { "epoch": 0.22306357038393723, "grad_norm": 1.3237600326538086, "learning_rate": 1.855169827223785e-05, "loss": 1.4681, "step": 4080 }, { "epoch": 0.2231182428276588, "grad_norm": 1.4313080310821533, "learning_rate": 1.855075092261116e-05, "loss": 1.4211, "step": 4081 }, { "epoch": 0.22317291527138033, "grad_norm": 1.527137279510498, "learning_rate": 1.8549803287454832e-05, "loss": 1.4574, "step": 4082 }, { "epoch": 0.2232275877151019, "grad_norm": 1.4839978218078613, "learning_rate": 1.8548855366800508e-05, "loss": 1.3595, "step": 4083 }, { "epoch": 0.22328226015882344, "grad_norm": 1.192995309829712, "learning_rate": 1.854790716067984e-05, "loss": 1.4557, "step": 4084 }, { "epoch": 0.223336932602545, "grad_norm": 1.2872858047485352, "learning_rate": 1.8546958669124494e-05, "loss": 1.6743, "step": 4085 }, { "epoch": 0.22339160504626657, "grad_norm": 1.1004741191864014, "learning_rate": 1.8546009892166145e-05, "loss": 1.6083, "step": 4086 }, { "epoch": 0.2234462774899881, "grad_norm": 1.1784976720809937, "learning_rate": 1.854506082983647e-05, "loss": 1.5353, "step": 4087 }, { "epoch": 0.22350094993370967, "grad_norm": 1.8309357166290283, "learning_rate": 1.8544111482167164e-05, "loss": 1.2033, "step": 4088 }, { "epoch": 0.2235556223774312, "grad_norm": 1.4533095359802246, "learning_rate": 1.8543161849189926e-05, "loss": 1.3678, "step": 4089 }, { "epoch": 0.22361029482115277, "grad_norm": 1.535571813583374, "learning_rate": 1.8542211930936464e-05, "loss": 1.4492, "step": 4090 }, { "epoch": 0.2236649672648743, "grad_norm": 1.5856975317001343, "learning_rate": 1.8541261727438503e-05, "loss": 1.3794, "step": 4091 }, { "epoch": 0.22371963970859587, "grad_norm": 1.2056976556777954, "learning_rate": 1.854031123872777e-05, "loss": 1.5994, "step": 4092 }, { "epoch": 0.22377431215231744, "grad_norm": 1.4632823467254639, "learning_rate": 1.8539360464836003e-05, "loss": 1.4911, "step": 4093 }, { "epoch": 0.22382898459603898, "grad_norm": 1.588132619857788, "learning_rate": 1.8538409405794952e-05, "loss": 1.2642, "step": 4094 }, { "epoch": 0.22388365703976054, "grad_norm": 1.4614795446395874, "learning_rate": 1.8537458061636377e-05, "loss": 1.4441, "step": 4095 }, { "epoch": 0.22393832948348208, "grad_norm": 1.5167101621627808, "learning_rate": 1.853650643239204e-05, "loss": 1.3774, "step": 4096 }, { "epoch": 0.22399300192720364, "grad_norm": 1.4758514165878296, "learning_rate": 1.8535554518093723e-05, "loss": 1.3967, "step": 4097 }, { "epoch": 0.22404767437092518, "grad_norm": 1.3229823112487793, "learning_rate": 1.8534602318773215e-05, "loss": 1.5686, "step": 4098 }, { "epoch": 0.22410234681464675, "grad_norm": 1.452122449874878, "learning_rate": 1.8533649834462303e-05, "loss": 1.2831, "step": 4099 }, { "epoch": 0.2241570192583683, "grad_norm": 1.6315631866455078, "learning_rate": 1.85326970651928e-05, "loss": 1.3192, "step": 4100 }, { "epoch": 0.22421169170208985, "grad_norm": 1.6553071737289429, "learning_rate": 1.8531744010996525e-05, "loss": 1.255, "step": 4101 }, { "epoch": 0.22426636414581141, "grad_norm": 1.2540022134780884, "learning_rate": 1.853079067190529e-05, "loss": 1.615, "step": 4102 }, { "epoch": 0.22432103658953295, "grad_norm": 1.3301714658737183, "learning_rate": 1.852983704795094e-05, "loss": 1.427, "step": 4103 }, { "epoch": 0.22437570903325452, "grad_norm": 1.3230133056640625, "learning_rate": 1.8528883139165313e-05, "loss": 1.5387, "step": 4104 }, { "epoch": 0.22443038147697605, "grad_norm": 1.3904852867126465, "learning_rate": 1.8527928945580266e-05, "loss": 1.4308, "step": 4105 }, { "epoch": 0.22448505392069762, "grad_norm": 1.665385127067566, "learning_rate": 1.852697446722766e-05, "loss": 1.4144, "step": 4106 }, { "epoch": 0.22453972636441918, "grad_norm": 1.3386390209197998, "learning_rate": 1.8526019704139364e-05, "loss": 1.523, "step": 4107 }, { "epoch": 0.22459439880814072, "grad_norm": 1.5517661571502686, "learning_rate": 1.8525064656347265e-05, "loss": 1.4813, "step": 4108 }, { "epoch": 0.2246490712518623, "grad_norm": 1.2601025104522705, "learning_rate": 1.8524109323883253e-05, "loss": 1.6485, "step": 4109 }, { "epoch": 0.22470374369558382, "grad_norm": 1.8134865760803223, "learning_rate": 1.852315370677923e-05, "loss": 1.1828, "step": 4110 }, { "epoch": 0.2247584161393054, "grad_norm": 1.504151701927185, "learning_rate": 1.8522197805067105e-05, "loss": 1.2821, "step": 4111 }, { "epoch": 0.22481308858302693, "grad_norm": 1.2746797800064087, "learning_rate": 1.852124161877879e-05, "loss": 1.2867, "step": 4112 }, { "epoch": 0.2248677610267485, "grad_norm": 1.6295030117034912, "learning_rate": 1.852028514794623e-05, "loss": 1.289, "step": 4113 }, { "epoch": 0.22492243347047006, "grad_norm": 1.4876594543457031, "learning_rate": 1.851932839260135e-05, "loss": 1.6273, "step": 4114 }, { "epoch": 0.2249771059141916, "grad_norm": 1.225803017616272, "learning_rate": 1.8518371352776107e-05, "loss": 1.5405, "step": 4115 }, { "epoch": 0.22503177835791316, "grad_norm": 1.1691499948501587, "learning_rate": 1.8517414028502454e-05, "loss": 1.6848, "step": 4116 }, { "epoch": 0.2250864508016347, "grad_norm": 1.330513596534729, "learning_rate": 1.8516456419812362e-05, "loss": 1.3769, "step": 4117 }, { "epoch": 0.22514112324535626, "grad_norm": 1.5114868879318237, "learning_rate": 1.8515498526737806e-05, "loss": 1.4989, "step": 4118 }, { "epoch": 0.2251957956890778, "grad_norm": 1.7981669902801514, "learning_rate": 1.8514540349310766e-05, "loss": 1.1456, "step": 4119 }, { "epoch": 0.22525046813279936, "grad_norm": 1.6302186250686646, "learning_rate": 1.851358188756325e-05, "loss": 1.4829, "step": 4120 }, { "epoch": 0.22530514057652093, "grad_norm": 1.367021083831787, "learning_rate": 1.8512623141527255e-05, "loss": 1.3364, "step": 4121 }, { "epoch": 0.22535981302024247, "grad_norm": 1.5356749296188354, "learning_rate": 1.85116641112348e-05, "loss": 1.6089, "step": 4122 }, { "epoch": 0.22541448546396403, "grad_norm": 1.7986990213394165, "learning_rate": 1.8510704796717902e-05, "loss": 1.5931, "step": 4123 }, { "epoch": 0.22546915790768557, "grad_norm": 1.4869717359542847, "learning_rate": 1.85097451980086e-05, "loss": 1.3424, "step": 4124 }, { "epoch": 0.22552383035140713, "grad_norm": 1.427499771118164, "learning_rate": 1.850878531513894e-05, "loss": 1.5132, "step": 4125 }, { "epoch": 0.22557850279512867, "grad_norm": 1.4562861919403076, "learning_rate": 1.8507825148140974e-05, "loss": 1.6554, "step": 4126 }, { "epoch": 0.22563317523885024, "grad_norm": 2.022099733352661, "learning_rate": 1.850686469704676e-05, "loss": 1.5426, "step": 4127 }, { "epoch": 0.2256878476825718, "grad_norm": 1.2900190353393555, "learning_rate": 1.850590396188837e-05, "loss": 1.398, "step": 4128 }, { "epoch": 0.22574252012629334, "grad_norm": 1.1399550437927246, "learning_rate": 1.850494294269789e-05, "loss": 1.5039, "step": 4129 }, { "epoch": 0.2257971925700149, "grad_norm": 1.655551791191101, "learning_rate": 1.8503981639507404e-05, "loss": 1.2532, "step": 4130 }, { "epoch": 0.22585186501373644, "grad_norm": 1.9016293287277222, "learning_rate": 1.8503020052349018e-05, "loss": 1.4054, "step": 4131 }, { "epoch": 0.225906537457458, "grad_norm": 1.7892783880233765, "learning_rate": 1.850205818125484e-05, "loss": 1.5706, "step": 4132 }, { "epoch": 0.22596120990117957, "grad_norm": 1.2796581983566284, "learning_rate": 1.8501096026256985e-05, "loss": 1.4962, "step": 4133 }, { "epoch": 0.2260158823449011, "grad_norm": 1.7490345239639282, "learning_rate": 1.850013358738759e-05, "loss": 1.4068, "step": 4134 }, { "epoch": 0.22607055478862267, "grad_norm": 1.1910665035247803, "learning_rate": 1.8499170864678787e-05, "loss": 1.6403, "step": 4135 }, { "epoch": 0.2261252272323442, "grad_norm": 1.739457607269287, "learning_rate": 1.8498207858162724e-05, "loss": 1.4123, "step": 4136 }, { "epoch": 0.22617989967606578, "grad_norm": 1.8025925159454346, "learning_rate": 1.849724456787156e-05, "loss": 1.4018, "step": 4137 }, { "epoch": 0.2262345721197873, "grad_norm": 1.880008578300476, "learning_rate": 1.849628099383746e-05, "loss": 1.4539, "step": 4138 }, { "epoch": 0.22628924456350888, "grad_norm": 1.552351951599121, "learning_rate": 1.84953171360926e-05, "loss": 1.3157, "step": 4139 }, { "epoch": 0.22634391700723044, "grad_norm": 1.4789998531341553, "learning_rate": 1.8494352994669166e-05, "loss": 1.5755, "step": 4140 }, { "epoch": 0.22639858945095198, "grad_norm": 1.1935449838638306, "learning_rate": 1.8493388569599352e-05, "loss": 1.717, "step": 4141 }, { "epoch": 0.22645326189467355, "grad_norm": 1.2183189392089844, "learning_rate": 1.8492423860915366e-05, "loss": 1.4168, "step": 4142 }, { "epoch": 0.22650793433839508, "grad_norm": 1.1690887212753296, "learning_rate": 1.8491458868649417e-05, "loss": 1.5609, "step": 4143 }, { "epoch": 0.22656260678211665, "grad_norm": 1.2232590913772583, "learning_rate": 1.849049359283373e-05, "loss": 1.5157, "step": 4144 }, { "epoch": 0.2266172792258382, "grad_norm": 1.580024003982544, "learning_rate": 1.8489528033500542e-05, "loss": 1.4334, "step": 4145 }, { "epoch": 0.22667195166955975, "grad_norm": 1.0053499937057495, "learning_rate": 1.848856219068209e-05, "loss": 1.3638, "step": 4146 }, { "epoch": 0.22672662411328132, "grad_norm": 2.1029179096221924, "learning_rate": 1.8487596064410623e-05, "loss": 1.5552, "step": 4147 }, { "epoch": 0.22678129655700285, "grad_norm": 1.1500402688980103, "learning_rate": 1.8486629654718412e-05, "loss": 1.4073, "step": 4148 }, { "epoch": 0.22683596900072442, "grad_norm": 1.3833410739898682, "learning_rate": 1.848566296163772e-05, "loss": 1.5457, "step": 4149 }, { "epoch": 0.22689064144444596, "grad_norm": 2.1338205337524414, "learning_rate": 1.8484695985200832e-05, "loss": 1.6315, "step": 4150 }, { "epoch": 0.22694531388816752, "grad_norm": 1.2438551187515259, "learning_rate": 1.8483728725440033e-05, "loss": 1.5064, "step": 4151 }, { "epoch": 0.22699998633188906, "grad_norm": 1.6090055704116821, "learning_rate": 1.8482761182387622e-05, "loss": 1.5302, "step": 4152 }, { "epoch": 0.22705465877561062, "grad_norm": 1.7053691148757935, "learning_rate": 1.848179335607591e-05, "loss": 1.3417, "step": 4153 }, { "epoch": 0.2271093312193322, "grad_norm": 2.0096542835235596, "learning_rate": 1.848082524653722e-05, "loss": 1.5301, "step": 4154 }, { "epoch": 0.22716400366305373, "grad_norm": 1.6232486963272095, "learning_rate": 1.847985685380387e-05, "loss": 1.3345, "step": 4155 }, { "epoch": 0.2272186761067753, "grad_norm": 1.7714613676071167, "learning_rate": 1.8478888177908202e-05, "loss": 1.6011, "step": 4156 }, { "epoch": 0.22727334855049683, "grad_norm": 1.8348219394683838, "learning_rate": 1.847791921888256e-05, "loss": 1.532, "step": 4157 }, { "epoch": 0.2273280209942184, "grad_norm": 1.2514532804489136, "learning_rate": 1.8476949976759302e-05, "loss": 1.5888, "step": 4158 }, { "epoch": 0.22738269343793993, "grad_norm": 1.4077942371368408, "learning_rate": 1.8475980451570797e-05, "loss": 1.4665, "step": 4159 }, { "epoch": 0.2274373658816615, "grad_norm": 1.891277551651001, "learning_rate": 1.847501064334941e-05, "loss": 1.0279, "step": 4160 }, { "epoch": 0.22749203832538306, "grad_norm": 2.253114938735962, "learning_rate": 1.8474040552127533e-05, "loss": 1.599, "step": 4161 }, { "epoch": 0.2275467107691046, "grad_norm": 1.4645841121673584, "learning_rate": 1.8473070177937552e-05, "loss": 1.5277, "step": 4162 }, { "epoch": 0.22760138321282616, "grad_norm": 1.3856781721115112, "learning_rate": 1.847209952081188e-05, "loss": 1.5493, "step": 4163 }, { "epoch": 0.2276560556565477, "grad_norm": 1.6554713249206543, "learning_rate": 1.8471128580782923e-05, "loss": 1.5228, "step": 4164 }, { "epoch": 0.22771072810026927, "grad_norm": 1.5830774307250977, "learning_rate": 1.8470157357883106e-05, "loss": 1.5845, "step": 4165 }, { "epoch": 0.2277654005439908, "grad_norm": 1.294276237487793, "learning_rate": 1.8469185852144854e-05, "loss": 1.5182, "step": 4166 }, { "epoch": 0.22782007298771237, "grad_norm": 2.012740135192871, "learning_rate": 1.846821406360062e-05, "loss": 1.4512, "step": 4167 }, { "epoch": 0.22787474543143393, "grad_norm": 1.5018610954284668, "learning_rate": 1.8467241992282842e-05, "loss": 1.5365, "step": 4168 }, { "epoch": 0.22792941787515547, "grad_norm": 1.7300446033477783, "learning_rate": 1.846626963822399e-05, "loss": 1.5004, "step": 4169 }, { "epoch": 0.22798409031887704, "grad_norm": 1.8736783266067505, "learning_rate": 1.846529700145652e-05, "loss": 1.4965, "step": 4170 }, { "epoch": 0.22803876276259857, "grad_norm": 1.3280019760131836, "learning_rate": 1.8464324082012926e-05, "loss": 1.4567, "step": 4171 }, { "epoch": 0.22809343520632014, "grad_norm": 1.5612268447875977, "learning_rate": 1.8463350879925686e-05, "loss": 1.4275, "step": 4172 }, { "epoch": 0.22814810765004168, "grad_norm": 1.3539454936981201, "learning_rate": 1.84623773952273e-05, "loss": 1.3093, "step": 4173 }, { "epoch": 0.22820278009376324, "grad_norm": 1.4296330213546753, "learning_rate": 1.8461403627950275e-05, "loss": 1.5031, "step": 4174 }, { "epoch": 0.2282574525374848, "grad_norm": 1.754865050315857, "learning_rate": 1.8460429578127132e-05, "loss": 1.4055, "step": 4175 }, { "epoch": 0.22831212498120634, "grad_norm": 1.4469226598739624, "learning_rate": 1.8459455245790386e-05, "loss": 1.3359, "step": 4176 }, { "epoch": 0.2283667974249279, "grad_norm": 1.687091588973999, "learning_rate": 1.845848063097258e-05, "loss": 1.3068, "step": 4177 }, { "epoch": 0.22842146986864945, "grad_norm": 1.5320794582366943, "learning_rate": 1.845750573370626e-05, "loss": 1.2918, "step": 4178 }, { "epoch": 0.228476142312371, "grad_norm": 1.4155480861663818, "learning_rate": 1.8456530554023973e-05, "loss": 1.2663, "step": 4179 }, { "epoch": 0.22853081475609255, "grad_norm": 1.4903085231781006, "learning_rate": 1.845555509195829e-05, "loss": 1.4299, "step": 4180 }, { "epoch": 0.2285854871998141, "grad_norm": 1.21424400806427, "learning_rate": 1.8454579347541783e-05, "loss": 1.5958, "step": 4181 }, { "epoch": 0.22864015964353568, "grad_norm": 1.3117461204528809, "learning_rate": 1.845360332080703e-05, "loss": 1.4455, "step": 4182 }, { "epoch": 0.22869483208725722, "grad_norm": 1.842161774635315, "learning_rate": 1.8452627011786623e-05, "loss": 1.5979, "step": 4183 }, { "epoch": 0.22874950453097878, "grad_norm": 1.4540245532989502, "learning_rate": 1.8451650420513167e-05, "loss": 1.5896, "step": 4184 }, { "epoch": 0.22880417697470032, "grad_norm": 1.164311408996582, "learning_rate": 1.8450673547019273e-05, "loss": 1.832, "step": 4185 }, { "epoch": 0.22885884941842188, "grad_norm": 1.4012963771820068, "learning_rate": 1.8449696391337556e-05, "loss": 1.2869, "step": 4186 }, { "epoch": 0.22891352186214342, "grad_norm": 1.9976413249969482, "learning_rate": 1.8448718953500653e-05, "loss": 1.4525, "step": 4187 }, { "epoch": 0.22896819430586499, "grad_norm": 1.1907130479812622, "learning_rate": 1.8447741233541195e-05, "loss": 1.4552, "step": 4188 }, { "epoch": 0.22902286674958655, "grad_norm": 1.908334493637085, "learning_rate": 1.8446763231491834e-05, "loss": 1.4023, "step": 4189 }, { "epoch": 0.2290775391933081, "grad_norm": 1.4182491302490234, "learning_rate": 1.844578494738523e-05, "loss": 1.313, "step": 4190 }, { "epoch": 0.22913221163702965, "grad_norm": 1.683608889579773, "learning_rate": 1.8444806381254046e-05, "loss": 1.4026, "step": 4191 }, { "epoch": 0.2291868840807512, "grad_norm": 1.729478120803833, "learning_rate": 1.844382753313096e-05, "loss": 1.4868, "step": 4192 }, { "epoch": 0.22924155652447276, "grad_norm": 1.8957159519195557, "learning_rate": 1.8442848403048658e-05, "loss": 1.7633, "step": 4193 }, { "epoch": 0.2292962289681943, "grad_norm": 1.5612224340438843, "learning_rate": 1.844186899103984e-05, "loss": 1.3786, "step": 4194 }, { "epoch": 0.22935090141191586, "grad_norm": 1.3546026945114136, "learning_rate": 1.8440889297137204e-05, "loss": 1.3678, "step": 4195 }, { "epoch": 0.22940557385563742, "grad_norm": 1.7505149841308594, "learning_rate": 1.843990932137347e-05, "loss": 1.3541, "step": 4196 }, { "epoch": 0.22946024629935896, "grad_norm": 1.4354363679885864, "learning_rate": 1.8438929063781354e-05, "loss": 1.4354, "step": 4197 }, { "epoch": 0.22951491874308053, "grad_norm": 1.3649545907974243, "learning_rate": 1.8437948524393598e-05, "loss": 1.4267, "step": 4198 }, { "epoch": 0.22956959118680206, "grad_norm": 1.5048214197158813, "learning_rate": 1.8436967703242938e-05, "loss": 1.4072, "step": 4199 }, { "epoch": 0.22962426363052363, "grad_norm": 1.244472622871399, "learning_rate": 1.843598660036213e-05, "loss": 1.5327, "step": 4200 }, { "epoch": 0.22967893607424517, "grad_norm": 1.2644909620285034, "learning_rate": 1.8435005215783933e-05, "loss": 1.5165, "step": 4201 }, { "epoch": 0.22973360851796673, "grad_norm": 1.3428114652633667, "learning_rate": 1.843402354954112e-05, "loss": 1.5361, "step": 4202 }, { "epoch": 0.2297882809616883, "grad_norm": 1.5834848880767822, "learning_rate": 1.8433041601666466e-05, "loss": 1.4724, "step": 4203 }, { "epoch": 0.22984295340540983, "grad_norm": 1.4462144374847412, "learning_rate": 1.843205937219277e-05, "loss": 1.3956, "step": 4204 }, { "epoch": 0.2298976258491314, "grad_norm": 1.276798129081726, "learning_rate": 1.843107686115282e-05, "loss": 1.5622, "step": 4205 }, { "epoch": 0.22995229829285294, "grad_norm": 1.5573595762252808, "learning_rate": 1.843009406857943e-05, "loss": 1.1806, "step": 4206 }, { "epoch": 0.2300069707365745, "grad_norm": 1.339268684387207, "learning_rate": 1.842911099450542e-05, "loss": 1.7647, "step": 4207 }, { "epoch": 0.23006164318029604, "grad_norm": 1.3434890508651733, "learning_rate": 1.8428127638963615e-05, "loss": 1.4179, "step": 4208 }, { "epoch": 0.2301163156240176, "grad_norm": 1.2959458827972412, "learning_rate": 1.8427144001986847e-05, "loss": 1.5874, "step": 4209 }, { "epoch": 0.23017098806773917, "grad_norm": 1.4784642457962036, "learning_rate": 1.8426160083607966e-05, "loss": 1.4535, "step": 4210 }, { "epoch": 0.2302256605114607, "grad_norm": 1.4696402549743652, "learning_rate": 1.842517588385983e-05, "loss": 1.4929, "step": 4211 }, { "epoch": 0.23028033295518227, "grad_norm": 1.9298806190490723, "learning_rate": 1.8424191402775297e-05, "loss": 1.4535, "step": 4212 }, { "epoch": 0.2303350053989038, "grad_norm": 1.359339952468872, "learning_rate": 1.842320664038725e-05, "loss": 1.4191, "step": 4213 }, { "epoch": 0.23038967784262537, "grad_norm": 1.5545034408569336, "learning_rate": 1.8422221596728564e-05, "loss": 1.3918, "step": 4214 }, { "epoch": 0.2304443502863469, "grad_norm": 1.311326026916504, "learning_rate": 1.842123627183214e-05, "loss": 1.4249, "step": 4215 }, { "epoch": 0.23049902273006848, "grad_norm": 1.5419234037399292, "learning_rate": 1.842025066573087e-05, "loss": 1.3978, "step": 4216 }, { "epoch": 0.23055369517379004, "grad_norm": 1.4588807821273804, "learning_rate": 1.8419264778457675e-05, "loss": 1.3141, "step": 4217 }, { "epoch": 0.23060836761751158, "grad_norm": 1.6243082284927368, "learning_rate": 1.841827861004547e-05, "loss": 1.3318, "step": 4218 }, { "epoch": 0.23066304006123314, "grad_norm": 1.6977025270462036, "learning_rate": 1.8417292160527193e-05, "loss": 1.1896, "step": 4219 }, { "epoch": 0.23071771250495468, "grad_norm": 1.3770872354507446, "learning_rate": 1.8416305429935776e-05, "loss": 1.3911, "step": 4220 }, { "epoch": 0.23077238494867625, "grad_norm": 1.345740556716919, "learning_rate": 1.8415318418304167e-05, "loss": 1.4559, "step": 4221 }, { "epoch": 0.23082705739239778, "grad_norm": 1.2463321685791016, "learning_rate": 1.8414331125665336e-05, "loss": 1.4257, "step": 4222 }, { "epoch": 0.23088172983611935, "grad_norm": 1.1920199394226074, "learning_rate": 1.8413343552052242e-05, "loss": 1.6267, "step": 4223 }, { "epoch": 0.2309364022798409, "grad_norm": 1.4041098356246948, "learning_rate": 1.8412355697497863e-05, "loss": 1.3667, "step": 4224 }, { "epoch": 0.23099107472356245, "grad_norm": 1.6160411834716797, "learning_rate": 1.8411367562035188e-05, "loss": 1.3857, "step": 4225 }, { "epoch": 0.23104574716728402, "grad_norm": 2.4308414459228516, "learning_rate": 1.841037914569721e-05, "loss": 1.3849, "step": 4226 }, { "epoch": 0.23110041961100555, "grad_norm": 1.3702338933944702, "learning_rate": 1.840939044851694e-05, "loss": 1.5393, "step": 4227 }, { "epoch": 0.23115509205472712, "grad_norm": 1.7114006280899048, "learning_rate": 1.8408401470527386e-05, "loss": 1.2075, "step": 4228 }, { "epoch": 0.23120976449844868, "grad_norm": 1.506381869316101, "learning_rate": 1.840741221176158e-05, "loss": 1.4836, "step": 4229 }, { "epoch": 0.23126443694217022, "grad_norm": 1.7390220165252686, "learning_rate": 1.8406422672252548e-05, "loss": 1.6955, "step": 4230 }, { "epoch": 0.23131910938589179, "grad_norm": 1.3710428476333618, "learning_rate": 1.8405432852033338e-05, "loss": 1.2516, "step": 4231 }, { "epoch": 0.23137378182961332, "grad_norm": 1.4207481145858765, "learning_rate": 1.8404442751137e-05, "loss": 1.2799, "step": 4232 }, { "epoch": 0.2314284542733349, "grad_norm": 1.440263032913208, "learning_rate": 1.84034523695966e-05, "loss": 1.3345, "step": 4233 }, { "epoch": 0.23148312671705643, "grad_norm": 1.7269104719161987, "learning_rate": 1.8402461707445206e-05, "loss": 1.3634, "step": 4234 }, { "epoch": 0.231537799160778, "grad_norm": 1.369903564453125, "learning_rate": 1.8401470764715898e-05, "loss": 1.7, "step": 4235 }, { "epoch": 0.23159247160449956, "grad_norm": 1.104788064956665, "learning_rate": 1.8400479541441763e-05, "loss": 1.4708, "step": 4236 }, { "epoch": 0.2316471440482211, "grad_norm": 1.5373749732971191, "learning_rate": 1.8399488037655906e-05, "loss": 1.5696, "step": 4237 }, { "epoch": 0.23170181649194266, "grad_norm": 1.4310388565063477, "learning_rate": 1.8398496253391433e-05, "loss": 1.4679, "step": 4238 }, { "epoch": 0.2317564889356642, "grad_norm": 1.5203810930252075, "learning_rate": 1.8397504188681463e-05, "loss": 1.269, "step": 4239 }, { "epoch": 0.23181116137938576, "grad_norm": 1.2975891828536987, "learning_rate": 1.8396511843559125e-05, "loss": 1.5464, "step": 4240 }, { "epoch": 0.2318658338231073, "grad_norm": 1.3333107233047485, "learning_rate": 1.839551921805755e-05, "loss": 1.7035, "step": 4241 }, { "epoch": 0.23192050626682886, "grad_norm": 1.6897159814834595, "learning_rate": 1.839452631220989e-05, "loss": 1.4178, "step": 4242 }, { "epoch": 0.23197517871055043, "grad_norm": 1.5578479766845703, "learning_rate": 1.8393533126049293e-05, "loss": 1.4355, "step": 4243 }, { "epoch": 0.23202985115427197, "grad_norm": 1.445204496383667, "learning_rate": 1.8392539659608934e-05, "loss": 1.3044, "step": 4244 }, { "epoch": 0.23208452359799353, "grad_norm": 1.627119779586792, "learning_rate": 1.839154591292198e-05, "loss": 1.2882, "step": 4245 }, { "epoch": 0.23213919604171507, "grad_norm": 1.4036579132080078, "learning_rate": 1.839055188602162e-05, "loss": 1.5088, "step": 4246 }, { "epoch": 0.23219386848543663, "grad_norm": 1.4489881992340088, "learning_rate": 1.838955757894104e-05, "loss": 1.6023, "step": 4247 }, { "epoch": 0.23224854092915817, "grad_norm": 1.4552644491195679, "learning_rate": 1.8388562991713447e-05, "loss": 1.4393, "step": 4248 }, { "epoch": 0.23230321337287974, "grad_norm": 1.6861051321029663, "learning_rate": 1.838756812437205e-05, "loss": 1.1583, "step": 4249 }, { "epoch": 0.2323578858166013, "grad_norm": 1.3424217700958252, "learning_rate": 1.8386572976950072e-05, "loss": 1.2874, "step": 4250 }, { "epoch": 0.23241255826032284, "grad_norm": 1.7193589210510254, "learning_rate": 1.838557754948074e-05, "loss": 1.3978, "step": 4251 }, { "epoch": 0.2324672307040444, "grad_norm": 1.8598064184188843, "learning_rate": 1.83845818419973e-05, "loss": 1.3323, "step": 4252 }, { "epoch": 0.23252190314776594, "grad_norm": 1.2732982635498047, "learning_rate": 1.8383585854533e-05, "loss": 1.2859, "step": 4253 }, { "epoch": 0.2325765755914875, "grad_norm": 1.5481221675872803, "learning_rate": 1.838258958712109e-05, "loss": 1.3242, "step": 4254 }, { "epoch": 0.23263124803520904, "grad_norm": 1.3483240604400635, "learning_rate": 1.8381593039794846e-05, "loss": 1.2547, "step": 4255 }, { "epoch": 0.2326859204789306, "grad_norm": 1.5085301399230957, "learning_rate": 1.8380596212587544e-05, "loss": 1.4442, "step": 4256 }, { "epoch": 0.23274059292265217, "grad_norm": 1.3101168870925903, "learning_rate": 1.8379599105532465e-05, "loss": 1.6413, "step": 4257 }, { "epoch": 0.2327952653663737, "grad_norm": 1.453710675239563, "learning_rate": 1.837860171866291e-05, "loss": 1.452, "step": 4258 }, { "epoch": 0.23284993781009528, "grad_norm": 1.353029727935791, "learning_rate": 1.8377604052012183e-05, "loss": 1.5659, "step": 4259 }, { "epoch": 0.2329046102538168, "grad_norm": 1.3838469982147217, "learning_rate": 1.8376606105613593e-05, "loss": 1.4328, "step": 4260 }, { "epoch": 0.23295928269753838, "grad_norm": 1.87196946144104, "learning_rate": 1.8375607879500476e-05, "loss": 1.4513, "step": 4261 }, { "epoch": 0.23301395514125992, "grad_norm": 1.8263298273086548, "learning_rate": 1.8374609373706156e-05, "loss": 1.5289, "step": 4262 }, { "epoch": 0.23306862758498148, "grad_norm": 1.4162299633026123, "learning_rate": 1.8373610588263976e-05, "loss": 1.4511, "step": 4263 }, { "epoch": 0.23312330002870305, "grad_norm": 2.064401626586914, "learning_rate": 1.837261152320729e-05, "loss": 1.4157, "step": 4264 }, { "epoch": 0.23317797247242458, "grad_norm": 1.89578378200531, "learning_rate": 1.837161217856946e-05, "loss": 1.3844, "step": 4265 }, { "epoch": 0.23323264491614615, "grad_norm": 1.4629063606262207, "learning_rate": 1.837061255438385e-05, "loss": 1.4986, "step": 4266 }, { "epoch": 0.23328731735986769, "grad_norm": 1.4935927391052246, "learning_rate": 1.8369612650683846e-05, "loss": 1.3199, "step": 4267 }, { "epoch": 0.23334198980358925, "grad_norm": 1.3057249784469604, "learning_rate": 1.836861246750284e-05, "loss": 1.3796, "step": 4268 }, { "epoch": 0.2333966622473108, "grad_norm": 1.7541307210922241, "learning_rate": 1.8367612004874224e-05, "loss": 1.3043, "step": 4269 }, { "epoch": 0.23345133469103235, "grad_norm": 1.9405608177185059, "learning_rate": 1.8366611262831408e-05, "loss": 1.2933, "step": 4270 }, { "epoch": 0.23350600713475392, "grad_norm": 1.8602914810180664, "learning_rate": 1.836561024140781e-05, "loss": 1.3111, "step": 4271 }, { "epoch": 0.23356067957847546, "grad_norm": 1.2994424104690552, "learning_rate": 1.8364608940636853e-05, "loss": 1.4526, "step": 4272 }, { "epoch": 0.23361535202219702, "grad_norm": 1.6461302042007446, "learning_rate": 1.8363607360551975e-05, "loss": 1.5965, "step": 4273 }, { "epoch": 0.23367002446591856, "grad_norm": 1.8301281929016113, "learning_rate": 1.836260550118662e-05, "loss": 1.3713, "step": 4274 }, { "epoch": 0.23372469690964012, "grad_norm": 1.3285857439041138, "learning_rate": 1.8361603362574247e-05, "loss": 1.562, "step": 4275 }, { "epoch": 0.23377936935336166, "grad_norm": 1.697189450263977, "learning_rate": 1.8360600944748316e-05, "loss": 1.4485, "step": 4276 }, { "epoch": 0.23383404179708323, "grad_norm": 1.3607724905014038, "learning_rate": 1.8359598247742305e-05, "loss": 1.6946, "step": 4277 }, { "epoch": 0.2338887142408048, "grad_norm": 1.6889551877975464, "learning_rate": 1.8358595271589683e-05, "loss": 1.5189, "step": 4278 }, { "epoch": 0.23394338668452633, "grad_norm": 1.4980144500732422, "learning_rate": 1.8357592016323958e-05, "loss": 1.1856, "step": 4279 }, { "epoch": 0.2339980591282479, "grad_norm": 1.3644160032272339, "learning_rate": 1.835658848197862e-05, "loss": 1.5542, "step": 4280 }, { "epoch": 0.23405273157196943, "grad_norm": 1.7231199741363525, "learning_rate": 1.8355584668587185e-05, "loss": 1.2378, "step": 4281 }, { "epoch": 0.234107404015691, "grad_norm": 1.3602858781814575, "learning_rate": 1.835458057618317e-05, "loss": 1.4666, "step": 4282 }, { "epoch": 0.23416207645941253, "grad_norm": 1.2954819202423096, "learning_rate": 1.8353576204800106e-05, "loss": 1.4299, "step": 4283 }, { "epoch": 0.2342167489031341, "grad_norm": 1.401768445968628, "learning_rate": 1.835257155447153e-05, "loss": 1.4344, "step": 4284 }, { "epoch": 0.23427142134685566, "grad_norm": 1.5337064266204834, "learning_rate": 1.835156662523099e-05, "loss": 1.264, "step": 4285 }, { "epoch": 0.2343260937905772, "grad_norm": 1.1594113111495972, "learning_rate": 1.8350561417112044e-05, "loss": 1.5397, "step": 4286 }, { "epoch": 0.23438076623429877, "grad_norm": 1.404596209526062, "learning_rate": 1.8349555930148253e-05, "loss": 1.2424, "step": 4287 }, { "epoch": 0.2344354386780203, "grad_norm": 1.4598153829574585, "learning_rate": 1.83485501643732e-05, "loss": 1.1735, "step": 4288 }, { "epoch": 0.23449011112174187, "grad_norm": 1.4280222654342651, "learning_rate": 1.8347544119820465e-05, "loss": 1.839, "step": 4289 }, { "epoch": 0.2345447835654634, "grad_norm": 1.8702081441879272, "learning_rate": 1.8346537796523643e-05, "loss": 1.3044, "step": 4290 }, { "epoch": 0.23459945600918497, "grad_norm": 1.5380955934524536, "learning_rate": 1.8345531194516343e-05, "loss": 1.2663, "step": 4291 }, { "epoch": 0.23465412845290654, "grad_norm": 1.2825374603271484, "learning_rate": 1.8344524313832167e-05, "loss": 1.4721, "step": 4292 }, { "epoch": 0.23470880089662807, "grad_norm": 1.2142449617385864, "learning_rate": 1.8343517154504747e-05, "loss": 1.4928, "step": 4293 }, { "epoch": 0.23476347334034964, "grad_norm": 1.4497056007385254, "learning_rate": 1.8342509716567712e-05, "loss": 1.4424, "step": 4294 }, { "epoch": 0.23481814578407117, "grad_norm": 1.522405743598938, "learning_rate": 1.8341502000054697e-05, "loss": 1.4537, "step": 4295 }, { "epoch": 0.23487281822779274, "grad_norm": 1.5490893125534058, "learning_rate": 1.834049400499936e-05, "loss": 1.5052, "step": 4296 }, { "epoch": 0.23492749067151428, "grad_norm": 1.178498387336731, "learning_rate": 1.833948573143535e-05, "loss": 1.5265, "step": 4297 }, { "epoch": 0.23498216311523584, "grad_norm": 1.4589773416519165, "learning_rate": 1.833847717939635e-05, "loss": 1.3726, "step": 4298 }, { "epoch": 0.2350368355589574, "grad_norm": 1.4429621696472168, "learning_rate": 1.8337468348916026e-05, "loss": 1.17, "step": 4299 }, { "epoch": 0.23509150800267894, "grad_norm": 1.4456790685653687, "learning_rate": 1.833645924002807e-05, "loss": 1.3291, "step": 4300 }, { "epoch": 0.2351461804464005, "grad_norm": 1.4546817541122437, "learning_rate": 1.8335449852766176e-05, "loss": 1.6331, "step": 4301 }, { "epoch": 0.23520085289012205, "grad_norm": 1.4673599004745483, "learning_rate": 1.8334440187164054e-05, "loss": 1.5404, "step": 4302 }, { "epoch": 0.2352555253338436, "grad_norm": 1.6681488752365112, "learning_rate": 1.8333430243255416e-05, "loss": 1.5314, "step": 4303 }, { "epoch": 0.23531019777756515, "grad_norm": 1.8053053617477417, "learning_rate": 1.8332420021073992e-05, "loss": 1.5747, "step": 4304 }, { "epoch": 0.23536487022128671, "grad_norm": 1.5301976203918457, "learning_rate": 1.8331409520653504e-05, "loss": 1.4474, "step": 4305 }, { "epoch": 0.23541954266500828, "grad_norm": 1.2601746320724487, "learning_rate": 1.8330398742027704e-05, "loss": 1.6499, "step": 4306 }, { "epoch": 0.23547421510872982, "grad_norm": 1.5201352834701538, "learning_rate": 1.832938768523034e-05, "loss": 1.4274, "step": 4307 }, { "epoch": 0.23552888755245138, "grad_norm": 1.3438152074813843, "learning_rate": 1.832837635029518e-05, "loss": 1.3674, "step": 4308 }, { "epoch": 0.23558355999617292, "grad_norm": 1.4210540056228638, "learning_rate": 1.832736473725599e-05, "loss": 1.434, "step": 4309 }, { "epoch": 0.23563823243989448, "grad_norm": 1.651275634765625, "learning_rate": 1.8326352846146548e-05, "loss": 1.2467, "step": 4310 }, { "epoch": 0.23569290488361602, "grad_norm": 1.4639291763305664, "learning_rate": 1.8325340677000646e-05, "loss": 1.2615, "step": 4311 }, { "epoch": 0.2357475773273376, "grad_norm": 1.6493568420410156, "learning_rate": 1.8324328229852087e-05, "loss": 1.6924, "step": 4312 }, { "epoch": 0.23580224977105915, "grad_norm": 1.613438367843628, "learning_rate": 1.832331550473467e-05, "loss": 1.3395, "step": 4313 }, { "epoch": 0.2358569222147807, "grad_norm": 1.532822608947754, "learning_rate": 1.832230250168222e-05, "loss": 1.1691, "step": 4314 }, { "epoch": 0.23591159465850225, "grad_norm": 1.4794443845748901, "learning_rate": 1.832128922072856e-05, "loss": 1.4283, "step": 4315 }, { "epoch": 0.2359662671022238, "grad_norm": 1.566258192062378, "learning_rate": 1.8320275661907527e-05, "loss": 1.5495, "step": 4316 }, { "epoch": 0.23602093954594536, "grad_norm": 1.3395863771438599, "learning_rate": 1.831926182525296e-05, "loss": 1.2172, "step": 4317 }, { "epoch": 0.2360756119896669, "grad_norm": 1.86275053024292, "learning_rate": 1.8318247710798728e-05, "loss": 1.3677, "step": 4318 }, { "epoch": 0.23613028443338846, "grad_norm": 1.4190541505813599, "learning_rate": 1.8317233318578678e-05, "loss": 1.5008, "step": 4319 }, { "epoch": 0.23618495687711002, "grad_norm": 1.7852988243103027, "learning_rate": 1.8316218648626693e-05, "loss": 1.4179, "step": 4320 }, { "epoch": 0.23623962932083156, "grad_norm": 1.9787344932556152, "learning_rate": 1.8315203700976653e-05, "loss": 1.1816, "step": 4321 }, { "epoch": 0.23629430176455313, "grad_norm": 1.288305640220642, "learning_rate": 1.831418847566245e-05, "loss": 1.4564, "step": 4322 }, { "epoch": 0.23634897420827466, "grad_norm": 1.4699195623397827, "learning_rate": 1.8313172972717982e-05, "loss": 1.3174, "step": 4323 }, { "epoch": 0.23640364665199623, "grad_norm": 1.4579013586044312, "learning_rate": 1.8312157192177166e-05, "loss": 1.2188, "step": 4324 }, { "epoch": 0.23645831909571777, "grad_norm": 1.3407506942749023, "learning_rate": 1.831114113407391e-05, "loss": 1.4255, "step": 4325 }, { "epoch": 0.23651299153943933, "grad_norm": 1.9508707523345947, "learning_rate": 1.8310124798442152e-05, "loss": 1.3442, "step": 4326 }, { "epoch": 0.2365676639831609, "grad_norm": 1.409476637840271, "learning_rate": 1.8309108185315826e-05, "loss": 1.3395, "step": 4327 }, { "epoch": 0.23662233642688243, "grad_norm": 1.6951184272766113, "learning_rate": 1.830809129472888e-05, "loss": 1.1242, "step": 4328 }, { "epoch": 0.236677008870604, "grad_norm": 1.5118900537490845, "learning_rate": 1.8307074126715267e-05, "loss": 1.5269, "step": 4329 }, { "epoch": 0.23673168131432554, "grad_norm": 1.4188053607940674, "learning_rate": 1.830605668130896e-05, "loss": 1.4288, "step": 4330 }, { "epoch": 0.2367863537580471, "grad_norm": 1.2040066719055176, "learning_rate": 1.830503895854393e-05, "loss": 1.4042, "step": 4331 }, { "epoch": 0.23684102620176867, "grad_norm": 1.6612223386764526, "learning_rate": 1.8304020958454156e-05, "loss": 1.377, "step": 4332 }, { "epoch": 0.2368956986454902, "grad_norm": 1.443603277206421, "learning_rate": 1.830300268107364e-05, "loss": 1.3184, "step": 4333 }, { "epoch": 0.23695037108921177, "grad_norm": 1.6009763479232788, "learning_rate": 1.830198412643638e-05, "loss": 1.4106, "step": 4334 }, { "epoch": 0.2370050435329333, "grad_norm": 1.4710577726364136, "learning_rate": 1.830096529457639e-05, "loss": 1.4187, "step": 4335 }, { "epoch": 0.23705971597665487, "grad_norm": 1.3573182821273804, "learning_rate": 1.829994618552769e-05, "loss": 1.3275, "step": 4336 }, { "epoch": 0.2371143884203764, "grad_norm": 1.8875682353973389, "learning_rate": 1.8298926799324307e-05, "loss": 1.4331, "step": 4337 }, { "epoch": 0.23716906086409797, "grad_norm": 1.464097499847412, "learning_rate": 1.8297907136000287e-05, "loss": 1.4123, "step": 4338 }, { "epoch": 0.23722373330781954, "grad_norm": 1.511817455291748, "learning_rate": 1.8296887195589678e-05, "loss": 1.5098, "step": 4339 }, { "epoch": 0.23727840575154108, "grad_norm": 1.3717598915100098, "learning_rate": 1.829586697812653e-05, "loss": 1.4636, "step": 4340 }, { "epoch": 0.23733307819526264, "grad_norm": 1.213494896888733, "learning_rate": 1.8294846483644922e-05, "loss": 1.4716, "step": 4341 }, { "epoch": 0.23738775063898418, "grad_norm": 1.2746371030807495, "learning_rate": 1.829382571217892e-05, "loss": 1.283, "step": 4342 }, { "epoch": 0.23744242308270574, "grad_norm": 1.4255441427230835, "learning_rate": 1.8292804663762624e-05, "loss": 1.5695, "step": 4343 }, { "epoch": 0.23749709552642728, "grad_norm": 1.7450538873672485, "learning_rate": 1.8291783338430113e-05, "loss": 1.5804, "step": 4344 }, { "epoch": 0.23755176797014885, "grad_norm": 1.5166826248168945, "learning_rate": 1.8290761736215503e-05, "loss": 1.4218, "step": 4345 }, { "epoch": 0.2376064404138704, "grad_norm": 1.4272980690002441, "learning_rate": 1.8289739857152903e-05, "loss": 1.5401, "step": 4346 }, { "epoch": 0.23766111285759195, "grad_norm": 1.2382707595825195, "learning_rate": 1.8288717701276436e-05, "loss": 1.733, "step": 4347 }, { "epoch": 0.23771578530131351, "grad_norm": 1.2439240217208862, "learning_rate": 1.8287695268620237e-05, "loss": 1.5464, "step": 4348 }, { "epoch": 0.23777045774503505, "grad_norm": 1.3131442070007324, "learning_rate": 1.8286672559218442e-05, "loss": 1.5028, "step": 4349 }, { "epoch": 0.23782513018875662, "grad_norm": 1.6510844230651855, "learning_rate": 1.828564957310521e-05, "loss": 1.4208, "step": 4350 }, { "epoch": 0.23787980263247815, "grad_norm": 2.2358193397521973, "learning_rate": 1.828462631031469e-05, "loss": 1.3269, "step": 4351 }, { "epoch": 0.23793447507619972, "grad_norm": 1.815441370010376, "learning_rate": 1.8283602770881058e-05, "loss": 1.5405, "step": 4352 }, { "epoch": 0.23798914751992128, "grad_norm": 1.6733287572860718, "learning_rate": 1.8282578954838493e-05, "loss": 1.5607, "step": 4353 }, { "epoch": 0.23804381996364282, "grad_norm": 1.5210593938827515, "learning_rate": 1.8281554862221182e-05, "loss": 1.2869, "step": 4354 }, { "epoch": 0.2380984924073644, "grad_norm": 1.2229911088943481, "learning_rate": 1.8280530493063318e-05, "loss": 1.5492, "step": 4355 }, { "epoch": 0.23815316485108592, "grad_norm": 1.2795048952102661, "learning_rate": 1.8279505847399113e-05, "loss": 1.7337, "step": 4356 }, { "epoch": 0.2382078372948075, "grad_norm": 1.3811830282211304, "learning_rate": 1.827848092526278e-05, "loss": 1.6574, "step": 4357 }, { "epoch": 0.23826250973852903, "grad_norm": 1.5505019426345825, "learning_rate": 1.8277455726688538e-05, "loss": 1.2828, "step": 4358 }, { "epoch": 0.2383171821822506, "grad_norm": 0.960382878780365, "learning_rate": 1.8276430251710628e-05, "loss": 1.514, "step": 4359 }, { "epoch": 0.23837185462597216, "grad_norm": 1.628628134727478, "learning_rate": 1.8275404500363293e-05, "loss": 1.2715, "step": 4360 }, { "epoch": 0.2384265270696937, "grad_norm": 1.4339725971221924, "learning_rate": 1.8274378472680782e-05, "loss": 1.5005, "step": 4361 }, { "epoch": 0.23848119951341526, "grad_norm": 1.8613665103912354, "learning_rate": 1.8273352168697354e-05, "loss": 1.514, "step": 4362 }, { "epoch": 0.2385358719571368, "grad_norm": 1.6399646997451782, "learning_rate": 1.8272325588447286e-05, "loss": 1.4269, "step": 4363 }, { "epoch": 0.23859054440085836, "grad_norm": 1.5819991827011108, "learning_rate": 1.8271298731964853e-05, "loss": 1.2669, "step": 4364 }, { "epoch": 0.2386452168445799, "grad_norm": 1.6651204824447632, "learning_rate": 1.827027159928435e-05, "loss": 1.5531, "step": 4365 }, { "epoch": 0.23869988928830146, "grad_norm": 1.4375728368759155, "learning_rate": 1.826924419044007e-05, "loss": 1.3751, "step": 4366 }, { "epoch": 0.23875456173202303, "grad_norm": 1.3063342571258545, "learning_rate": 1.8268216505466318e-05, "loss": 1.5415, "step": 4367 }, { "epoch": 0.23880923417574457, "grad_norm": 1.578347086906433, "learning_rate": 1.8267188544397417e-05, "loss": 1.6102, "step": 4368 }, { "epoch": 0.23886390661946613, "grad_norm": 1.48663330078125, "learning_rate": 1.8266160307267692e-05, "loss": 1.4677, "step": 4369 }, { "epoch": 0.23891857906318767, "grad_norm": 1.3231619596481323, "learning_rate": 1.8265131794111478e-05, "loss": 1.4982, "step": 4370 }, { "epoch": 0.23897325150690923, "grad_norm": 1.4206581115722656, "learning_rate": 1.826410300496312e-05, "loss": 1.4552, "step": 4371 }, { "epoch": 0.23902792395063077, "grad_norm": 1.9121074676513672, "learning_rate": 1.8263073939856965e-05, "loss": 1.5793, "step": 4372 }, { "epoch": 0.23908259639435234, "grad_norm": 1.657187819480896, "learning_rate": 1.8262044598827387e-05, "loss": 1.4622, "step": 4373 }, { "epoch": 0.2391372688380739, "grad_norm": 1.2263706922531128, "learning_rate": 1.826101498190875e-05, "loss": 1.494, "step": 4374 }, { "epoch": 0.23919194128179544, "grad_norm": 1.1760764122009277, "learning_rate": 1.8259985089135436e-05, "loss": 1.7202, "step": 4375 }, { "epoch": 0.239246613725517, "grad_norm": 1.7175670862197876, "learning_rate": 1.8258954920541838e-05, "loss": 1.4256, "step": 4376 }, { "epoch": 0.23930128616923854, "grad_norm": 1.7918281555175781, "learning_rate": 1.8257924476162355e-05, "loss": 1.4136, "step": 4377 }, { "epoch": 0.2393559586129601, "grad_norm": 1.31653892993927, "learning_rate": 1.82568937560314e-05, "loss": 1.4991, "step": 4378 }, { "epoch": 0.23941063105668164, "grad_norm": 1.1228971481323242, "learning_rate": 1.825586276018338e-05, "loss": 1.3605, "step": 4379 }, { "epoch": 0.2394653035004032, "grad_norm": 1.4024592638015747, "learning_rate": 1.8254831488652733e-05, "loss": 1.4451, "step": 4380 }, { "epoch": 0.23951997594412477, "grad_norm": 1.7503485679626465, "learning_rate": 1.8253799941473894e-05, "loss": 1.6659, "step": 4381 }, { "epoch": 0.2395746483878463, "grad_norm": 1.5667779445648193, "learning_rate": 1.8252768118681305e-05, "loss": 1.5337, "step": 4382 }, { "epoch": 0.23962932083156788, "grad_norm": 1.2993842363357544, "learning_rate": 1.825173602030942e-05, "loss": 1.4607, "step": 4383 }, { "epoch": 0.23968399327528941, "grad_norm": 1.5534952878952026, "learning_rate": 1.8250703646392712e-05, "loss": 1.1455, "step": 4384 }, { "epoch": 0.23973866571901098, "grad_norm": 1.7335959672927856, "learning_rate": 1.8249670996965647e-05, "loss": 1.1526, "step": 4385 }, { "epoch": 0.23979333816273252, "grad_norm": 1.284725546836853, "learning_rate": 1.8248638072062704e-05, "loss": 1.3554, "step": 4386 }, { "epoch": 0.23984801060645408, "grad_norm": 1.252813458442688, "learning_rate": 1.824760487171838e-05, "loss": 1.4911, "step": 4387 }, { "epoch": 0.23990268305017565, "grad_norm": 1.3771377801895142, "learning_rate": 1.824657139596718e-05, "loss": 1.5505, "step": 4388 }, { "epoch": 0.23995735549389718, "grad_norm": 1.3033348321914673, "learning_rate": 1.824553764484361e-05, "loss": 1.6083, "step": 4389 }, { "epoch": 0.24001202793761875, "grad_norm": 1.5569126605987549, "learning_rate": 1.8244503618382186e-05, "loss": 1.5826, "step": 4390 }, { "epoch": 0.2400667003813403, "grad_norm": 1.4962178468704224, "learning_rate": 1.824346931661744e-05, "loss": 1.4627, "step": 4391 }, { "epoch": 0.24012137282506185, "grad_norm": 2.0814976692199707, "learning_rate": 1.8242434739583914e-05, "loss": 1.3468, "step": 4392 }, { "epoch": 0.2401760452687834, "grad_norm": 1.4484044313430786, "learning_rate": 1.8241399887316145e-05, "loss": 1.511, "step": 4393 }, { "epoch": 0.24023071771250495, "grad_norm": 3.5660572052001953, "learning_rate": 1.82403647598487e-05, "loss": 1.4553, "step": 4394 }, { "epoch": 0.24028539015622652, "grad_norm": 1.4613205194473267, "learning_rate": 1.8239329357216135e-05, "loss": 1.5436, "step": 4395 }, { "epoch": 0.24034006259994806, "grad_norm": 1.6221731901168823, "learning_rate": 1.823829367945303e-05, "loss": 1.5224, "step": 4396 }, { "epoch": 0.24039473504366962, "grad_norm": 1.2806484699249268, "learning_rate": 1.8237257726593967e-05, "loss": 1.4353, "step": 4397 }, { "epoch": 0.24044940748739116, "grad_norm": 1.5337494611740112, "learning_rate": 1.8236221498673542e-05, "loss": 1.3255, "step": 4398 }, { "epoch": 0.24050407993111272, "grad_norm": 1.5891615152359009, "learning_rate": 1.823518499572635e-05, "loss": 1.6143, "step": 4399 }, { "epoch": 0.24055875237483426, "grad_norm": 1.3891161680221558, "learning_rate": 1.8234148217787007e-05, "loss": 1.4003, "step": 4400 }, { "epoch": 0.24061342481855583, "grad_norm": 1.3961390256881714, "learning_rate": 1.8233111164890135e-05, "loss": 1.4368, "step": 4401 }, { "epoch": 0.2406680972622774, "grad_norm": 1.4723377227783203, "learning_rate": 1.823207383707036e-05, "loss": 1.4653, "step": 4402 }, { "epoch": 0.24072276970599893, "grad_norm": 1.9873782396316528, "learning_rate": 1.823103623436232e-05, "loss": 1.4386, "step": 4403 }, { "epoch": 0.2407774421497205, "grad_norm": 1.7262704372406006, "learning_rate": 1.822999835680067e-05, "loss": 1.2898, "step": 4404 }, { "epoch": 0.24083211459344203, "grad_norm": 1.219927191734314, "learning_rate": 1.8228960204420063e-05, "loss": 1.5722, "step": 4405 }, { "epoch": 0.2408867870371636, "grad_norm": 1.407591700553894, "learning_rate": 1.8227921777255165e-05, "loss": 1.5027, "step": 4406 }, { "epoch": 0.24094145948088513, "grad_norm": 1.7759522199630737, "learning_rate": 1.822688307534065e-05, "loss": 1.4294, "step": 4407 }, { "epoch": 0.2409961319246067, "grad_norm": 1.749114751815796, "learning_rate": 1.82258440987112e-05, "loss": 1.3712, "step": 4408 }, { "epoch": 0.24105080436832826, "grad_norm": 1.5911543369293213, "learning_rate": 1.8224804847401518e-05, "loss": 1.2333, "step": 4409 }, { "epoch": 0.2411054768120498, "grad_norm": 1.3795427083969116, "learning_rate": 1.82237653214463e-05, "loss": 1.5655, "step": 4410 }, { "epoch": 0.24116014925577137, "grad_norm": 1.7835040092468262, "learning_rate": 1.822272552088026e-05, "loss": 1.3119, "step": 4411 }, { "epoch": 0.2412148216994929, "grad_norm": 1.8367869853973389, "learning_rate": 1.8221685445738123e-05, "loss": 1.2798, "step": 4412 }, { "epoch": 0.24126949414321447, "grad_norm": 1.7708547115325928, "learning_rate": 1.8220645096054613e-05, "loss": 1.3769, "step": 4413 }, { "epoch": 0.241324166586936, "grad_norm": 1.3820418119430542, "learning_rate": 1.8219604471864472e-05, "loss": 1.4923, "step": 4414 }, { "epoch": 0.24137883903065757, "grad_norm": 1.5433355569839478, "learning_rate": 1.8218563573202453e-05, "loss": 1.2327, "step": 4415 }, { "epoch": 0.24143351147437914, "grad_norm": 1.3131296634674072, "learning_rate": 1.821752240010331e-05, "loss": 1.4724, "step": 4416 }, { "epoch": 0.24148818391810067, "grad_norm": 2.609865188598633, "learning_rate": 1.821648095260181e-05, "loss": 1.3057, "step": 4417 }, { "epoch": 0.24154285636182224, "grad_norm": 1.4548983573913574, "learning_rate": 1.821543923073273e-05, "loss": 1.2589, "step": 4418 }, { "epoch": 0.24159752880554378, "grad_norm": 1.622302532196045, "learning_rate": 1.8214397234530855e-05, "loss": 1.4937, "step": 4419 }, { "epoch": 0.24165220124926534, "grad_norm": 1.1278131008148193, "learning_rate": 1.8213354964030984e-05, "loss": 1.6592, "step": 4420 }, { "epoch": 0.24170687369298688, "grad_norm": 1.2782602310180664, "learning_rate": 1.8212312419267917e-05, "loss": 1.5672, "step": 4421 }, { "epoch": 0.24176154613670844, "grad_norm": 1.238458275794983, "learning_rate": 1.8211269600276466e-05, "loss": 1.4031, "step": 4422 }, { "epoch": 0.24181621858043, "grad_norm": 1.5106315612792969, "learning_rate": 1.8210226507091454e-05, "loss": 1.4933, "step": 4423 }, { "epoch": 0.24187089102415155, "grad_norm": 1.8727208375930786, "learning_rate": 1.8209183139747716e-05, "loss": 1.3016, "step": 4424 }, { "epoch": 0.2419255634678731, "grad_norm": 1.3887834548950195, "learning_rate": 1.8208139498280087e-05, "loss": 1.3882, "step": 4425 }, { "epoch": 0.24198023591159465, "grad_norm": 1.4638066291809082, "learning_rate": 1.8207095582723418e-05, "loss": 1.3918, "step": 4426 }, { "epoch": 0.24203490835531621, "grad_norm": 1.2735580205917358, "learning_rate": 1.820605139311257e-05, "loss": 1.7082, "step": 4427 }, { "epoch": 0.24208958079903775, "grad_norm": 1.2765555381774902, "learning_rate": 1.8205006929482412e-05, "loss": 1.4092, "step": 4428 }, { "epoch": 0.24214425324275932, "grad_norm": 1.3928546905517578, "learning_rate": 1.8203962191867816e-05, "loss": 1.5886, "step": 4429 }, { "epoch": 0.24219892568648088, "grad_norm": 1.7290788888931274, "learning_rate": 1.8202917180303673e-05, "loss": 1.3322, "step": 4430 }, { "epoch": 0.24225359813020242, "grad_norm": 1.2556710243225098, "learning_rate": 1.8201871894824877e-05, "loss": 1.1737, "step": 4431 }, { "epoch": 0.24230827057392398, "grad_norm": 1.6034541130065918, "learning_rate": 1.820082633546633e-05, "loss": 1.3075, "step": 4432 }, { "epoch": 0.24236294301764552, "grad_norm": 0.9926468133926392, "learning_rate": 1.8199780502262948e-05, "loss": 1.7275, "step": 4433 }, { "epoch": 0.2424176154613671, "grad_norm": 1.661773920059204, "learning_rate": 1.8198734395249658e-05, "loss": 1.3928, "step": 4434 }, { "epoch": 0.24247228790508865, "grad_norm": 1.6017310619354248, "learning_rate": 1.8197688014461384e-05, "loss": 1.424, "step": 4435 }, { "epoch": 0.2425269603488102, "grad_norm": 1.248465657234192, "learning_rate": 1.8196641359933068e-05, "loss": 1.4324, "step": 4436 }, { "epoch": 0.24258163279253175, "grad_norm": 1.4996352195739746, "learning_rate": 1.819559443169967e-05, "loss": 1.3799, "step": 4437 }, { "epoch": 0.2426363052362533, "grad_norm": 1.5066146850585938, "learning_rate": 1.8194547229796137e-05, "loss": 1.452, "step": 4438 }, { "epoch": 0.24269097767997486, "grad_norm": 1.8179618120193481, "learning_rate": 1.8193499754257444e-05, "loss": 1.4243, "step": 4439 }, { "epoch": 0.2427456501236964, "grad_norm": 1.7265269756317139, "learning_rate": 1.8192452005118567e-05, "loss": 1.3688, "step": 4440 }, { "epoch": 0.24280032256741796, "grad_norm": 1.3155629634857178, "learning_rate": 1.8191403982414496e-05, "loss": 1.2387, "step": 4441 }, { "epoch": 0.24285499501113952, "grad_norm": 1.3139821290969849, "learning_rate": 1.819035568618022e-05, "loss": 1.42, "step": 4442 }, { "epoch": 0.24290966745486106, "grad_norm": 1.4352165460586548, "learning_rate": 1.8189307116450755e-05, "loss": 1.6536, "step": 4443 }, { "epoch": 0.24296433989858263, "grad_norm": 1.3805919885635376, "learning_rate": 1.8188258273261104e-05, "loss": 1.4197, "step": 4444 }, { "epoch": 0.24301901234230416, "grad_norm": 1.582065463066101, "learning_rate": 1.8187209156646294e-05, "loss": 1.3999, "step": 4445 }, { "epoch": 0.24307368478602573, "grad_norm": 1.3280854225158691, "learning_rate": 1.8186159766641362e-05, "loss": 1.4689, "step": 4446 }, { "epoch": 0.24312835722974727, "grad_norm": 1.4217755794525146, "learning_rate": 1.8185110103281343e-05, "loss": 1.3596, "step": 4447 }, { "epoch": 0.24318302967346883, "grad_norm": 1.5105993747711182, "learning_rate": 1.818406016660129e-05, "loss": 1.3191, "step": 4448 }, { "epoch": 0.2432377021171904, "grad_norm": 1.4302160739898682, "learning_rate": 1.8183009956636266e-05, "loss": 1.5099, "step": 4449 }, { "epoch": 0.24329237456091193, "grad_norm": 1.8169269561767578, "learning_rate": 1.8181959473421335e-05, "loss": 1.1495, "step": 4450 }, { "epoch": 0.2433470470046335, "grad_norm": 1.4323703050613403, "learning_rate": 1.818090871699158e-05, "loss": 1.2252, "step": 4451 }, { "epoch": 0.24340171944835504, "grad_norm": 1.4707245826721191, "learning_rate": 1.8179857687382084e-05, "loss": 1.5654, "step": 4452 }, { "epoch": 0.2434563918920766, "grad_norm": 1.4499056339263916, "learning_rate": 1.8178806384627947e-05, "loss": 1.3994, "step": 4453 }, { "epoch": 0.24351106433579814, "grad_norm": 1.5295994281768799, "learning_rate": 1.8177754808764275e-05, "loss": 1.5026, "step": 4454 }, { "epoch": 0.2435657367795197, "grad_norm": 1.349311351776123, "learning_rate": 1.8176702959826172e-05, "loss": 1.5976, "step": 4455 }, { "epoch": 0.24362040922324127, "grad_norm": 1.3783810138702393, "learning_rate": 1.817565083784878e-05, "loss": 1.3094, "step": 4456 }, { "epoch": 0.2436750816669628, "grad_norm": 1.716383695602417, "learning_rate": 1.8174598442867214e-05, "loss": 1.5014, "step": 4457 }, { "epoch": 0.24372975411068437, "grad_norm": 2.072890043258667, "learning_rate": 1.8173545774916628e-05, "loss": 1.3955, "step": 4458 }, { "epoch": 0.2437844265544059, "grad_norm": 1.4105658531188965, "learning_rate": 1.8172492834032165e-05, "loss": 1.5585, "step": 4459 }, { "epoch": 0.24383909899812747, "grad_norm": 1.7124316692352295, "learning_rate": 1.8171439620248993e-05, "loss": 1.3578, "step": 4460 }, { "epoch": 0.243893771441849, "grad_norm": 1.3617902994155884, "learning_rate": 1.8170386133602273e-05, "loss": 1.5709, "step": 4461 }, { "epoch": 0.24394844388557058, "grad_norm": 1.4046192169189453, "learning_rate": 1.8169332374127192e-05, "loss": 1.3091, "step": 4462 }, { "epoch": 0.24400311632929214, "grad_norm": 1.3702067136764526, "learning_rate": 1.816827834185893e-05, "loss": 1.4865, "step": 4463 }, { "epoch": 0.24405778877301368, "grad_norm": 1.3341110944747925, "learning_rate": 1.816722403683269e-05, "loss": 1.548, "step": 4464 }, { "epoch": 0.24411246121673524, "grad_norm": 2.5597410202026367, "learning_rate": 1.8166169459083673e-05, "loss": 1.4524, "step": 4465 }, { "epoch": 0.24416713366045678, "grad_norm": 1.5534906387329102, "learning_rate": 1.8165114608647093e-05, "loss": 1.4576, "step": 4466 }, { "epoch": 0.24422180610417835, "grad_norm": 1.09830904006958, "learning_rate": 1.8164059485558177e-05, "loss": 1.5781, "step": 4467 }, { "epoch": 0.24427647854789988, "grad_norm": 1.7137248516082764, "learning_rate": 1.8163004089852157e-05, "loss": 1.3451, "step": 4468 }, { "epoch": 0.24433115099162145, "grad_norm": 1.2867721319198608, "learning_rate": 1.8161948421564277e-05, "loss": 1.3366, "step": 4469 }, { "epoch": 0.244385823435343, "grad_norm": 1.7021749019622803, "learning_rate": 1.8160892480729787e-05, "loss": 1.5768, "step": 4470 }, { "epoch": 0.24444049587906455, "grad_norm": 2.2518603801727295, "learning_rate": 1.8159836267383944e-05, "loss": 1.5467, "step": 4471 }, { "epoch": 0.24449516832278612, "grad_norm": 1.4843249320983887, "learning_rate": 1.8158779781562022e-05, "loss": 1.3946, "step": 4472 }, { "epoch": 0.24454984076650765, "grad_norm": 1.63230562210083, "learning_rate": 1.8157723023299295e-05, "loss": 1.246, "step": 4473 }, { "epoch": 0.24460451321022922, "grad_norm": 1.517637014389038, "learning_rate": 1.815666599263106e-05, "loss": 1.3802, "step": 4474 }, { "epoch": 0.24465918565395076, "grad_norm": 1.7173938751220703, "learning_rate": 1.8155608689592604e-05, "loss": 1.4289, "step": 4475 }, { "epoch": 0.24471385809767232, "grad_norm": 1.6378408670425415, "learning_rate": 1.8154551114219235e-05, "loss": 1.6694, "step": 4476 }, { "epoch": 0.24476853054139389, "grad_norm": 1.178784728050232, "learning_rate": 1.815349326654627e-05, "loss": 1.5766, "step": 4477 }, { "epoch": 0.24482320298511542, "grad_norm": 1.4113175868988037, "learning_rate": 1.815243514660903e-05, "loss": 1.3553, "step": 4478 }, { "epoch": 0.244877875428837, "grad_norm": 1.5773059129714966, "learning_rate": 1.8151376754442856e-05, "loss": 1.4617, "step": 4479 }, { "epoch": 0.24493254787255853, "grad_norm": 1.3867225646972656, "learning_rate": 1.815031809008308e-05, "loss": 1.6789, "step": 4480 }, { "epoch": 0.2449872203162801, "grad_norm": 1.7186381816864014, "learning_rate": 1.8149259153565058e-05, "loss": 1.4509, "step": 4481 }, { "epoch": 0.24504189276000163, "grad_norm": 1.5460779666900635, "learning_rate": 1.814819994492415e-05, "loss": 1.3465, "step": 4482 }, { "epoch": 0.2450965652037232, "grad_norm": 1.3481128215789795, "learning_rate": 1.8147140464195726e-05, "loss": 1.2401, "step": 4483 }, { "epoch": 0.24515123764744476, "grad_norm": 1.1278249025344849, "learning_rate": 1.814608071141516e-05, "loss": 1.4337, "step": 4484 }, { "epoch": 0.2452059100911663, "grad_norm": 1.4565229415893555, "learning_rate": 1.8145020686617848e-05, "loss": 1.572, "step": 4485 }, { "epoch": 0.24526058253488786, "grad_norm": 1.781811237335205, "learning_rate": 1.8143960389839184e-05, "loss": 1.319, "step": 4486 }, { "epoch": 0.2453152549786094, "grad_norm": 1.4001421928405762, "learning_rate": 1.814289982111457e-05, "loss": 1.2609, "step": 4487 }, { "epoch": 0.24536992742233096, "grad_norm": 2.0830624103546143, "learning_rate": 1.8141838980479424e-05, "loss": 1.373, "step": 4488 }, { "epoch": 0.2454245998660525, "grad_norm": 1.9884668588638306, "learning_rate": 1.8140777867969167e-05, "loss": 1.4134, "step": 4489 }, { "epoch": 0.24547927230977407, "grad_norm": 1.4264129400253296, "learning_rate": 1.8139716483619234e-05, "loss": 1.5263, "step": 4490 }, { "epoch": 0.24553394475349563, "grad_norm": 1.3079171180725098, "learning_rate": 1.813865482746507e-05, "loss": 1.3569, "step": 4491 }, { "epoch": 0.24558861719721717, "grad_norm": 2.069701910018921, "learning_rate": 1.813759289954212e-05, "loss": 1.2341, "step": 4492 }, { "epoch": 0.24564328964093873, "grad_norm": 2.9517531394958496, "learning_rate": 1.8136530699885852e-05, "loss": 1.1328, "step": 4493 }, { "epoch": 0.24569796208466027, "grad_norm": 1.5957159996032715, "learning_rate": 1.8135468228531727e-05, "loss": 1.1129, "step": 4494 }, { "epoch": 0.24575263452838184, "grad_norm": 1.6868394613265991, "learning_rate": 1.813440548551523e-05, "loss": 1.1503, "step": 4495 }, { "epoch": 0.24580730697210337, "grad_norm": 1.510594367980957, "learning_rate": 1.8133342470871846e-05, "loss": 1.5777, "step": 4496 }, { "epoch": 0.24586197941582494, "grad_norm": 1.49777352809906, "learning_rate": 1.8132279184637064e-05, "loss": 1.3939, "step": 4497 }, { "epoch": 0.2459166518595465, "grad_norm": 1.2166814804077148, "learning_rate": 1.8131215626846405e-05, "loss": 1.5233, "step": 4498 }, { "epoch": 0.24597132430326804, "grad_norm": 1.4802706241607666, "learning_rate": 1.8130151797535375e-05, "loss": 1.3596, "step": 4499 }, { "epoch": 0.2460259967469896, "grad_norm": 1.5393376350402832, "learning_rate": 1.8129087696739497e-05, "loss": 1.3201, "step": 4500 }, { "epoch": 0.24608066919071114, "grad_norm": 1.9418416023254395, "learning_rate": 1.8128023324494303e-05, "loss": 1.3104, "step": 4501 }, { "epoch": 0.2461353416344327, "grad_norm": 2.038952112197876, "learning_rate": 1.812695868083534e-05, "loss": 1.4185, "step": 4502 }, { "epoch": 0.24619001407815425, "grad_norm": 1.280462384223938, "learning_rate": 1.8125893765798157e-05, "loss": 1.4785, "step": 4503 }, { "epoch": 0.2462446865218758, "grad_norm": 1.5364116430282593, "learning_rate": 1.812482857941831e-05, "loss": 1.5816, "step": 4504 }, { "epoch": 0.24629935896559738, "grad_norm": 1.3638484477996826, "learning_rate": 1.8123763121731374e-05, "loss": 1.5467, "step": 4505 }, { "epoch": 0.2463540314093189, "grad_norm": 1.8223953247070312, "learning_rate": 1.8122697392772925e-05, "loss": 1.505, "step": 4506 }, { "epoch": 0.24640870385304048, "grad_norm": 1.3121668100357056, "learning_rate": 1.8121631392578545e-05, "loss": 1.646, "step": 4507 }, { "epoch": 0.24646337629676202, "grad_norm": 1.8098939657211304, "learning_rate": 1.812056512118384e-05, "loss": 1.4824, "step": 4508 }, { "epoch": 0.24651804874048358, "grad_norm": 1.3783297538757324, "learning_rate": 1.8119498578624408e-05, "loss": 1.4447, "step": 4509 }, { "epoch": 0.24657272118420512, "grad_norm": 2.0511538982391357, "learning_rate": 1.8118431764935865e-05, "loss": 1.3821, "step": 4510 }, { "epoch": 0.24662739362792668, "grad_norm": 1.3897777795791626, "learning_rate": 1.8117364680153838e-05, "loss": 1.3909, "step": 4511 }, { "epoch": 0.24668206607164825, "grad_norm": 1.7103062868118286, "learning_rate": 1.8116297324313953e-05, "loss": 1.4719, "step": 4512 }, { "epoch": 0.24673673851536979, "grad_norm": 1.6844356060028076, "learning_rate": 1.8115229697451853e-05, "loss": 1.4781, "step": 4513 }, { "epoch": 0.24679141095909135, "grad_norm": 1.3048417568206787, "learning_rate": 1.8114161799603195e-05, "loss": 1.2329, "step": 4514 }, { "epoch": 0.2468460834028129, "grad_norm": 1.4742627143859863, "learning_rate": 1.8113093630803632e-05, "loss": 1.5741, "step": 4515 }, { "epoch": 0.24690075584653445, "grad_norm": 1.3190910816192627, "learning_rate": 1.8112025191088834e-05, "loss": 1.4404, "step": 4516 }, { "epoch": 0.246955428290256, "grad_norm": 1.6325273513793945, "learning_rate": 1.811095648049448e-05, "loss": 1.5569, "step": 4517 }, { "epoch": 0.24701010073397756, "grad_norm": 1.4320132732391357, "learning_rate": 1.8109887499056256e-05, "loss": 1.4667, "step": 4518 }, { "epoch": 0.24706477317769912, "grad_norm": 1.7204376459121704, "learning_rate": 1.8108818246809857e-05, "loss": 1.4004, "step": 4519 }, { "epoch": 0.24711944562142066, "grad_norm": 1.482323408126831, "learning_rate": 1.8107748723790993e-05, "loss": 1.4555, "step": 4520 }, { "epoch": 0.24717411806514222, "grad_norm": 1.8228751420974731, "learning_rate": 1.810667893003537e-05, "loss": 1.3725, "step": 4521 }, { "epoch": 0.24722879050886376, "grad_norm": 1.2078453302383423, "learning_rate": 1.8105608865578713e-05, "loss": 1.7236, "step": 4522 }, { "epoch": 0.24728346295258533, "grad_norm": 1.3165316581726074, "learning_rate": 1.8104538530456758e-05, "loss": 1.4311, "step": 4523 }, { "epoch": 0.24733813539630686, "grad_norm": 1.4897788763046265, "learning_rate": 1.8103467924705245e-05, "loss": 1.4973, "step": 4524 }, { "epoch": 0.24739280784002843, "grad_norm": 1.4655734300613403, "learning_rate": 1.810239704835992e-05, "loss": 1.3162, "step": 4525 }, { "epoch": 0.24744748028375, "grad_norm": 1.6061854362487793, "learning_rate": 1.8101325901456546e-05, "loss": 1.3079, "step": 4526 }, { "epoch": 0.24750215272747153, "grad_norm": 1.6040376424789429, "learning_rate": 1.810025448403089e-05, "loss": 1.77, "step": 4527 }, { "epoch": 0.2475568251711931, "grad_norm": 1.4290975332260132, "learning_rate": 1.8099182796118727e-05, "loss": 1.457, "step": 4528 }, { "epoch": 0.24761149761491463, "grad_norm": 1.5532253980636597, "learning_rate": 1.8098110837755845e-05, "loss": 1.5681, "step": 4529 }, { "epoch": 0.2476661700586362, "grad_norm": 1.233656406402588, "learning_rate": 1.809703860897804e-05, "loss": 1.657, "step": 4530 }, { "epoch": 0.24772084250235774, "grad_norm": 1.5809595584869385, "learning_rate": 1.809596610982112e-05, "loss": 1.5163, "step": 4531 }, { "epoch": 0.2477755149460793, "grad_norm": 1.3385004997253418, "learning_rate": 1.809489334032089e-05, "loss": 1.6114, "step": 4532 }, { "epoch": 0.24783018738980087, "grad_norm": 1.1838709115982056, "learning_rate": 1.8093820300513176e-05, "loss": 1.6125, "step": 4533 }, { "epoch": 0.2478848598335224, "grad_norm": 1.1787042617797852, "learning_rate": 1.809274699043381e-05, "loss": 1.4319, "step": 4534 }, { "epoch": 0.24793953227724397, "grad_norm": 1.4574044942855835, "learning_rate": 1.8091673410118633e-05, "loss": 1.5879, "step": 4535 }, { "epoch": 0.2479942047209655, "grad_norm": 1.6643470525741577, "learning_rate": 1.809059955960349e-05, "loss": 1.2526, "step": 4536 }, { "epoch": 0.24804887716468707, "grad_norm": 1.2376725673675537, "learning_rate": 1.8089525438924242e-05, "loss": 1.4444, "step": 4537 }, { "epoch": 0.24810354960840864, "grad_norm": 1.4256484508514404, "learning_rate": 1.808845104811676e-05, "loss": 1.2991, "step": 4538 }, { "epoch": 0.24815822205213017, "grad_norm": 1.3766546249389648, "learning_rate": 1.8087376387216917e-05, "loss": 1.2269, "step": 4539 }, { "epoch": 0.24821289449585174, "grad_norm": 2.2383012771606445, "learning_rate": 1.80863014562606e-05, "loss": 1.3291, "step": 4540 }, { "epoch": 0.24826756693957328, "grad_norm": 1.5921870470046997, "learning_rate": 1.80852262552837e-05, "loss": 1.5576, "step": 4541 }, { "epoch": 0.24832223938329484, "grad_norm": 1.5644361972808838, "learning_rate": 1.8084150784322123e-05, "loss": 1.5506, "step": 4542 }, { "epoch": 0.24837691182701638, "grad_norm": 1.1792516708374023, "learning_rate": 1.808307504341178e-05, "loss": 1.4865, "step": 4543 }, { "epoch": 0.24843158427073794, "grad_norm": 1.5978249311447144, "learning_rate": 1.8081999032588594e-05, "loss": 1.1726, "step": 4544 }, { "epoch": 0.2484862567144595, "grad_norm": 2.538531541824341, "learning_rate": 1.80809227518885e-05, "loss": 1.5579, "step": 4545 }, { "epoch": 0.24854092915818105, "grad_norm": 1.5576978921890259, "learning_rate": 1.8079846201347428e-05, "loss": 1.3986, "step": 4546 }, { "epoch": 0.2485956016019026, "grad_norm": 2.1582210063934326, "learning_rate": 1.8078769381001334e-05, "loss": 1.3372, "step": 4547 }, { "epoch": 0.24865027404562415, "grad_norm": 1.305220603942871, "learning_rate": 1.807769229088617e-05, "loss": 1.4166, "step": 4548 }, { "epoch": 0.2487049464893457, "grad_norm": 1.5799189805984497, "learning_rate": 1.8076614931037908e-05, "loss": 1.4977, "step": 4549 }, { "epoch": 0.24875961893306725, "grad_norm": 1.7178235054016113, "learning_rate": 1.8075537301492522e-05, "loss": 1.2243, "step": 4550 }, { "epoch": 0.24881429137678882, "grad_norm": 1.6420481204986572, "learning_rate": 1.807445940228599e-05, "loss": 1.5376, "step": 4551 }, { "epoch": 0.24886896382051038, "grad_norm": 1.4522451162338257, "learning_rate": 1.807338123345432e-05, "loss": 1.6038, "step": 4552 }, { "epoch": 0.24892363626423192, "grad_norm": 1.370431661605835, "learning_rate": 1.80723027950335e-05, "loss": 1.5426, "step": 4553 }, { "epoch": 0.24897830870795348, "grad_norm": 1.801801085472107, "learning_rate": 1.8071224087059547e-05, "loss": 1.3677, "step": 4554 }, { "epoch": 0.24903298115167502, "grad_norm": 1.3977479934692383, "learning_rate": 1.8070145109568484e-05, "loss": 1.4418, "step": 4555 }, { "epoch": 0.24908765359539659, "grad_norm": 1.4817068576812744, "learning_rate": 1.8069065862596338e-05, "loss": 1.2205, "step": 4556 }, { "epoch": 0.24914232603911812, "grad_norm": 1.348572850227356, "learning_rate": 1.806798634617915e-05, "loss": 1.6338, "step": 4557 }, { "epoch": 0.2491969984828397, "grad_norm": 1.4740008115768433, "learning_rate": 1.806690656035296e-05, "loss": 1.5485, "step": 4558 }, { "epoch": 0.24925167092656125, "grad_norm": 1.271515130996704, "learning_rate": 1.8065826505153835e-05, "loss": 1.5168, "step": 4559 }, { "epoch": 0.2493063433702828, "grad_norm": 1.3561687469482422, "learning_rate": 1.806474618061784e-05, "loss": 1.836, "step": 4560 }, { "epoch": 0.24936101581400436, "grad_norm": 1.4236514568328857, "learning_rate": 1.806366558678104e-05, "loss": 1.6247, "step": 4561 }, { "epoch": 0.2494156882577259, "grad_norm": 1.326379418373108, "learning_rate": 1.8062584723679524e-05, "loss": 1.3763, "step": 4562 }, { "epoch": 0.24947036070144746, "grad_norm": 1.6623762845993042, "learning_rate": 1.8061503591349386e-05, "loss": 1.5457, "step": 4563 }, { "epoch": 0.249525033145169, "grad_norm": 1.5333802700042725, "learning_rate": 1.8060422189826725e-05, "loss": 1.2421, "step": 4564 }, { "epoch": 0.24957970558889056, "grad_norm": 1.6899778842926025, "learning_rate": 1.8059340519147653e-05, "loss": 1.4751, "step": 4565 }, { "epoch": 0.24963437803261213, "grad_norm": 1.5837337970733643, "learning_rate": 1.805825857934829e-05, "loss": 1.3624, "step": 4566 }, { "epoch": 0.24968905047633366, "grad_norm": 1.3540685176849365, "learning_rate": 1.8057176370464765e-05, "loss": 1.3242, "step": 4567 }, { "epoch": 0.24974372292005523, "grad_norm": 1.8088198900222778, "learning_rate": 1.805609389253321e-05, "loss": 1.4705, "step": 4568 }, { "epoch": 0.24979839536377677, "grad_norm": 1.6662253141403198, "learning_rate": 1.805501114558978e-05, "loss": 1.2101, "step": 4569 }, { "epoch": 0.24985306780749833, "grad_norm": 1.4208934307098389, "learning_rate": 1.8053928129670624e-05, "loss": 1.4211, "step": 4570 }, { "epoch": 0.24990774025121987, "grad_norm": 1.7299093008041382, "learning_rate": 1.805284484481191e-05, "loss": 1.253, "step": 4571 }, { "epoch": 0.24996241269494143, "grad_norm": 1.1317040920257568, "learning_rate": 1.805176129104981e-05, "loss": 1.5574, "step": 4572 }, { "epoch": 0.250017085138663, "grad_norm": 1.529906153678894, "learning_rate": 1.8050677468420503e-05, "loss": 1.3631, "step": 4573 }, { "epoch": 0.25007175758238454, "grad_norm": 1.7994894981384277, "learning_rate": 1.8049593376960187e-05, "loss": 1.3735, "step": 4574 }, { "epoch": 0.2501264300261061, "grad_norm": 1.8093860149383545, "learning_rate": 1.804850901670506e-05, "loss": 1.3418, "step": 4575 }, { "epoch": 0.25018110246982767, "grad_norm": 1.7380870580673218, "learning_rate": 1.804742438769133e-05, "loss": 1.3392, "step": 4576 }, { "epoch": 0.2502357749135492, "grad_norm": 1.5009957551956177, "learning_rate": 1.8046339489955214e-05, "loss": 1.7401, "step": 4577 }, { "epoch": 0.25029044735727074, "grad_norm": 1.5432627201080322, "learning_rate": 1.804525432353294e-05, "loss": 1.4907, "step": 4578 }, { "epoch": 0.2503451198009923, "grad_norm": 1.3421525955200195, "learning_rate": 1.8044168888460748e-05, "loss": 1.5349, "step": 4579 }, { "epoch": 0.25039979224471387, "grad_norm": 1.3772718906402588, "learning_rate": 1.804308318477488e-05, "loss": 1.403, "step": 4580 }, { "epoch": 0.2504544646884354, "grad_norm": 1.5589196681976318, "learning_rate": 1.8041997212511594e-05, "loss": 1.5116, "step": 4581 }, { "epoch": 0.25050913713215694, "grad_norm": 1.3632798194885254, "learning_rate": 1.8040910971707143e-05, "loss": 1.5542, "step": 4582 }, { "epoch": 0.25056380957587854, "grad_norm": 1.2968522310256958, "learning_rate": 1.8039824462397812e-05, "loss": 1.4344, "step": 4583 }, { "epoch": 0.2506184820196001, "grad_norm": 1.4785895347595215, "learning_rate": 1.8038737684619874e-05, "loss": 1.4136, "step": 4584 }, { "epoch": 0.2506731544633216, "grad_norm": 1.416801929473877, "learning_rate": 1.8037650638409622e-05, "loss": 1.6612, "step": 4585 }, { "epoch": 0.2507278269070432, "grad_norm": 1.4247089624404907, "learning_rate": 1.803656332380335e-05, "loss": 1.5034, "step": 4586 }, { "epoch": 0.25078249935076474, "grad_norm": 1.2401609420776367, "learning_rate": 1.8035475740837376e-05, "loss": 1.6818, "step": 4587 }, { "epoch": 0.2508371717944863, "grad_norm": 1.7056949138641357, "learning_rate": 1.803438788954801e-05, "loss": 1.2415, "step": 4588 }, { "epoch": 0.2508918442382078, "grad_norm": 1.3460500240325928, "learning_rate": 1.8033299769971577e-05, "loss": 1.4976, "step": 4589 }, { "epoch": 0.2509465166819294, "grad_norm": 1.2645306587219238, "learning_rate": 1.8032211382144416e-05, "loss": 1.4068, "step": 4590 }, { "epoch": 0.25100118912565095, "grad_norm": 1.4170209169387817, "learning_rate": 1.8031122726102868e-05, "loss": 1.332, "step": 4591 }, { "epoch": 0.2510558615693725, "grad_norm": 1.5182164907455444, "learning_rate": 1.8030033801883285e-05, "loss": 1.3941, "step": 4592 }, { "epoch": 0.2511105340130941, "grad_norm": 2.154022216796875, "learning_rate": 1.802894460952203e-05, "loss": 1.1004, "step": 4593 }, { "epoch": 0.2511652064568156, "grad_norm": 1.421256422996521, "learning_rate": 1.802785514905548e-05, "loss": 1.4199, "step": 4594 }, { "epoch": 0.25121987890053715, "grad_norm": 1.4137681722640991, "learning_rate": 1.8026765420520002e-05, "loss": 1.2047, "step": 4595 }, { "epoch": 0.2512745513442587, "grad_norm": 1.1913782358169556, "learning_rate": 1.8025675423951995e-05, "loss": 1.3575, "step": 4596 }, { "epoch": 0.2513292237879803, "grad_norm": 1.2952404022216797, "learning_rate": 1.802458515938785e-05, "loss": 1.7031, "step": 4597 }, { "epoch": 0.2513838962317018, "grad_norm": 1.3953282833099365, "learning_rate": 1.8023494626863976e-05, "loss": 1.3292, "step": 4598 }, { "epoch": 0.25143856867542336, "grad_norm": 1.4667292833328247, "learning_rate": 1.8022403826416792e-05, "loss": 1.5909, "step": 4599 }, { "epoch": 0.25149324111914495, "grad_norm": 1.4391146898269653, "learning_rate": 1.8021312758082717e-05, "loss": 1.6169, "step": 4600 }, { "epoch": 0.2515479135628665, "grad_norm": 1.5325216054916382, "learning_rate": 1.8020221421898185e-05, "loss": 1.4288, "step": 4601 }, { "epoch": 0.251602586006588, "grad_norm": 1.35574209690094, "learning_rate": 1.8019129817899643e-05, "loss": 1.4134, "step": 4602 }, { "epoch": 0.25165725845030956, "grad_norm": 1.2784757614135742, "learning_rate": 1.8018037946123534e-05, "loss": 1.3821, "step": 4603 }, { "epoch": 0.25171193089403116, "grad_norm": 1.3773025274276733, "learning_rate": 1.8016945806606328e-05, "loss": 1.3638, "step": 4604 }, { "epoch": 0.2517666033377527, "grad_norm": 1.4514094591140747, "learning_rate": 1.8015853399384488e-05, "loss": 1.3811, "step": 4605 }, { "epoch": 0.25182127578147423, "grad_norm": 1.2724555730819702, "learning_rate": 1.8014760724494493e-05, "loss": 1.4375, "step": 4606 }, { "epoch": 0.2518759482251958, "grad_norm": 1.525388240814209, "learning_rate": 1.801366778197283e-05, "loss": 1.3616, "step": 4607 }, { "epoch": 0.25193062066891736, "grad_norm": 2.261533737182617, "learning_rate": 1.8012574571855995e-05, "loss": 1.4597, "step": 4608 }, { "epoch": 0.2519852931126389, "grad_norm": 1.400667428970337, "learning_rate": 1.8011481094180492e-05, "loss": 1.3903, "step": 4609 }, { "epoch": 0.25203996555636043, "grad_norm": 1.861167073249817, "learning_rate": 1.8010387348982837e-05, "loss": 1.3152, "step": 4610 }, { "epoch": 0.252094638000082, "grad_norm": 1.3583097457885742, "learning_rate": 1.800929333629955e-05, "loss": 1.2705, "step": 4611 }, { "epoch": 0.25214931044380356, "grad_norm": 1.3438043594360352, "learning_rate": 1.8008199056167167e-05, "loss": 1.3313, "step": 4612 }, { "epoch": 0.2522039828875251, "grad_norm": 1.2595908641815186, "learning_rate": 1.800710450862222e-05, "loss": 1.5474, "step": 4613 }, { "epoch": 0.2522586553312467, "grad_norm": 2.069756269454956, "learning_rate": 1.800600969370127e-05, "loss": 1.64, "step": 4614 }, { "epoch": 0.25231332777496823, "grad_norm": 1.563575267791748, "learning_rate": 1.8004914611440866e-05, "loss": 1.3826, "step": 4615 }, { "epoch": 0.25236800021868977, "grad_norm": 1.4433412551879883, "learning_rate": 1.8003819261877584e-05, "loss": 1.3834, "step": 4616 }, { "epoch": 0.2524226726624113, "grad_norm": 1.6497267484664917, "learning_rate": 1.800272364504799e-05, "loss": 1.5311, "step": 4617 }, { "epoch": 0.2524773451061329, "grad_norm": 1.6001627445220947, "learning_rate": 1.800162776098868e-05, "loss": 1.2922, "step": 4618 }, { "epoch": 0.25253201754985444, "grad_norm": 1.5392491817474365, "learning_rate": 1.8000531609736236e-05, "loss": 1.5319, "step": 4619 }, { "epoch": 0.252586689993576, "grad_norm": 1.965997338294983, "learning_rate": 1.7999435191327272e-05, "loss": 1.3552, "step": 4620 }, { "epoch": 0.25264136243729757, "grad_norm": 1.4947688579559326, "learning_rate": 1.7998338505798393e-05, "loss": 1.605, "step": 4621 }, { "epoch": 0.2526960348810191, "grad_norm": 1.276595115661621, "learning_rate": 1.7997241553186223e-05, "loss": 1.6468, "step": 4622 }, { "epoch": 0.25275070732474064, "grad_norm": 1.5769423246383667, "learning_rate": 1.7996144333527394e-05, "loss": 1.1334, "step": 4623 }, { "epoch": 0.2528053797684622, "grad_norm": 1.5786006450653076, "learning_rate": 1.7995046846858542e-05, "loss": 1.3742, "step": 4624 }, { "epoch": 0.2528600522121838, "grad_norm": 1.3173062801361084, "learning_rate": 1.7993949093216313e-05, "loss": 1.4089, "step": 4625 }, { "epoch": 0.2529147246559053, "grad_norm": 1.5100507736206055, "learning_rate": 1.7992851072637366e-05, "loss": 1.3705, "step": 4626 }, { "epoch": 0.25296939709962685, "grad_norm": 1.4866195917129517, "learning_rate": 1.7991752785158364e-05, "loss": 1.5509, "step": 4627 }, { "epoch": 0.25302406954334844, "grad_norm": 1.6538723707199097, "learning_rate": 1.7990654230815985e-05, "loss": 1.6665, "step": 4628 }, { "epoch": 0.25307874198707, "grad_norm": 1.3984668254852295, "learning_rate": 1.798955540964691e-05, "loss": 1.6947, "step": 4629 }, { "epoch": 0.2531334144307915, "grad_norm": 1.0561898946762085, "learning_rate": 1.798845632168783e-05, "loss": 1.4962, "step": 4630 }, { "epoch": 0.25318808687451305, "grad_norm": 1.7647563219070435, "learning_rate": 1.7987356966975455e-05, "loss": 1.3371, "step": 4631 }, { "epoch": 0.25324275931823464, "grad_norm": 1.720517873764038, "learning_rate": 1.7986257345546484e-05, "loss": 1.4431, "step": 4632 }, { "epoch": 0.2532974317619562, "grad_norm": 1.524576187133789, "learning_rate": 1.798515745743764e-05, "loss": 1.4692, "step": 4633 }, { "epoch": 0.2533521042056777, "grad_norm": 1.6170281171798706, "learning_rate": 1.7984057302685647e-05, "loss": 1.2151, "step": 4634 }, { "epoch": 0.2534067766493993, "grad_norm": 1.2413809299468994, "learning_rate": 1.7982956881327248e-05, "loss": 1.5093, "step": 4635 }, { "epoch": 0.25346144909312085, "grad_norm": 1.2952957153320312, "learning_rate": 1.798185619339919e-05, "loss": 1.6509, "step": 4636 }, { "epoch": 0.2535161215368424, "grad_norm": 1.4879121780395508, "learning_rate": 1.7980755238938216e-05, "loss": 1.1738, "step": 4637 }, { "epoch": 0.2535707939805639, "grad_norm": 1.4140782356262207, "learning_rate": 1.79796540179811e-05, "loss": 1.3709, "step": 4638 }, { "epoch": 0.2536254664242855, "grad_norm": 1.4647222757339478, "learning_rate": 1.7978552530564616e-05, "loss": 1.3224, "step": 4639 }, { "epoch": 0.25368013886800705, "grad_norm": 1.7470957040786743, "learning_rate": 1.797745077672554e-05, "loss": 1.2182, "step": 4640 }, { "epoch": 0.2537348113117286, "grad_norm": 1.5587363243103027, "learning_rate": 1.797634875650066e-05, "loss": 1.4049, "step": 4641 }, { "epoch": 0.2537894837554502, "grad_norm": 1.7810750007629395, "learning_rate": 1.7975246469926774e-05, "loss": 1.3482, "step": 4642 }, { "epoch": 0.2538441561991717, "grad_norm": 1.7387166023254395, "learning_rate": 1.79741439170407e-05, "loss": 1.6049, "step": 4643 }, { "epoch": 0.25389882864289326, "grad_norm": 1.3475861549377441, "learning_rate": 1.7973041097879246e-05, "loss": 1.5839, "step": 4644 }, { "epoch": 0.2539535010866148, "grad_norm": 1.4278833866119385, "learning_rate": 1.7971938012479242e-05, "loss": 1.4854, "step": 4645 }, { "epoch": 0.2540081735303364, "grad_norm": 1.1193764209747314, "learning_rate": 1.7970834660877522e-05, "loss": 1.4747, "step": 4646 }, { "epoch": 0.2540628459740579, "grad_norm": 1.2946363687515259, "learning_rate": 1.7969731043110928e-05, "loss": 1.3864, "step": 4647 }, { "epoch": 0.25411751841777946, "grad_norm": 1.3538236618041992, "learning_rate": 1.796862715921631e-05, "loss": 1.527, "step": 4648 }, { "epoch": 0.25417219086150106, "grad_norm": 1.2973952293395996, "learning_rate": 1.7967523009230535e-05, "loss": 1.5516, "step": 4649 }, { "epoch": 0.2542268633052226, "grad_norm": 1.7392240762710571, "learning_rate": 1.796641859319047e-05, "loss": 1.499, "step": 4650 }, { "epoch": 0.25428153574894413, "grad_norm": 1.7470964193344116, "learning_rate": 1.796531391113299e-05, "loss": 1.267, "step": 4651 }, { "epoch": 0.25433620819266567, "grad_norm": 1.1052467823028564, "learning_rate": 1.7964208963094993e-05, "loss": 1.4994, "step": 4652 }, { "epoch": 0.25439088063638726, "grad_norm": 1.3008419275283813, "learning_rate": 1.796310374911337e-05, "loss": 1.3687, "step": 4653 }, { "epoch": 0.2544455530801088, "grad_norm": 1.505622148513794, "learning_rate": 1.7961998269225024e-05, "loss": 1.6142, "step": 4654 }, { "epoch": 0.25450022552383034, "grad_norm": 1.490229606628418, "learning_rate": 1.7960892523466874e-05, "loss": 1.2243, "step": 4655 }, { "epoch": 0.25455489796755193, "grad_norm": 1.3829097747802734, "learning_rate": 1.795978651187584e-05, "loss": 1.3781, "step": 4656 }, { "epoch": 0.25460957041127347, "grad_norm": 1.3082422018051147, "learning_rate": 1.7958680234488857e-05, "loss": 1.5479, "step": 4657 }, { "epoch": 0.254664242854995, "grad_norm": 1.313814640045166, "learning_rate": 1.7957573691342866e-05, "loss": 1.301, "step": 4658 }, { "epoch": 0.25471891529871654, "grad_norm": 1.4956154823303223, "learning_rate": 1.7956466882474815e-05, "loss": 1.4649, "step": 4659 }, { "epoch": 0.25477358774243813, "grad_norm": 1.953248143196106, "learning_rate": 1.7955359807921667e-05, "loss": 1.4563, "step": 4660 }, { "epoch": 0.25482826018615967, "grad_norm": 1.5773755311965942, "learning_rate": 1.7954252467720386e-05, "loss": 1.4332, "step": 4661 }, { "epoch": 0.2548829326298812, "grad_norm": 1.7673676013946533, "learning_rate": 1.795314486190795e-05, "loss": 1.649, "step": 4662 }, { "epoch": 0.2549376050736028, "grad_norm": 1.4910377264022827, "learning_rate": 1.7952036990521344e-05, "loss": 1.3892, "step": 4663 }, { "epoch": 0.25499227751732434, "grad_norm": 1.6858289241790771, "learning_rate": 1.7950928853597562e-05, "loss": 1.5738, "step": 4664 }, { "epoch": 0.2550469499610459, "grad_norm": 1.6187984943389893, "learning_rate": 1.7949820451173607e-05, "loss": 1.4258, "step": 4665 }, { "epoch": 0.2551016224047674, "grad_norm": 1.513776183128357, "learning_rate": 1.7948711783286498e-05, "loss": 1.4115, "step": 4666 }, { "epoch": 0.255156294848489, "grad_norm": 1.2679985761642456, "learning_rate": 1.7947602849973245e-05, "loss": 1.3987, "step": 4667 }, { "epoch": 0.25521096729221054, "grad_norm": 1.708836555480957, "learning_rate": 1.7946493651270883e-05, "loss": 1.2444, "step": 4668 }, { "epoch": 0.2552656397359321, "grad_norm": 1.447554588317871, "learning_rate": 1.7945384187216456e-05, "loss": 1.625, "step": 4669 }, { "epoch": 0.2553203121796537, "grad_norm": 1.2271174192428589, "learning_rate": 1.7944274457847003e-05, "loss": 1.4739, "step": 4670 }, { "epoch": 0.2553749846233752, "grad_norm": 1.3766578435897827, "learning_rate": 1.7943164463199584e-05, "loss": 1.2908, "step": 4671 }, { "epoch": 0.25542965706709675, "grad_norm": 1.527355432510376, "learning_rate": 1.7942054203311265e-05, "loss": 1.6077, "step": 4672 }, { "epoch": 0.2554843295108183, "grad_norm": 2.0016255378723145, "learning_rate": 1.794094367821912e-05, "loss": 1.3464, "step": 4673 }, { "epoch": 0.2555390019545399, "grad_norm": 1.5113146305084229, "learning_rate": 1.793983288796023e-05, "loss": 1.4724, "step": 4674 }, { "epoch": 0.2555936743982614, "grad_norm": 1.6457682847976685, "learning_rate": 1.7938721832571688e-05, "loss": 1.3027, "step": 4675 }, { "epoch": 0.25564834684198295, "grad_norm": 1.6789849996566772, "learning_rate": 1.7937610512090597e-05, "loss": 1.5969, "step": 4676 }, { "epoch": 0.25570301928570455, "grad_norm": 1.1839547157287598, "learning_rate": 1.7936498926554065e-05, "loss": 1.3099, "step": 4677 }, { "epoch": 0.2557576917294261, "grad_norm": 1.729171872138977, "learning_rate": 1.793538707599921e-05, "loss": 1.2897, "step": 4678 }, { "epoch": 0.2558123641731476, "grad_norm": 1.434293508529663, "learning_rate": 1.7934274960463155e-05, "loss": 1.5998, "step": 4679 }, { "epoch": 0.25586703661686916, "grad_norm": 1.3174502849578857, "learning_rate": 1.7933162579983045e-05, "loss": 1.4538, "step": 4680 }, { "epoch": 0.25592170906059075, "grad_norm": 1.2215430736541748, "learning_rate": 1.7932049934596023e-05, "loss": 1.4821, "step": 4681 }, { "epoch": 0.2559763815043123, "grad_norm": 1.7897875308990479, "learning_rate": 1.793093702433924e-05, "loss": 1.4007, "step": 4682 }, { "epoch": 0.2560310539480338, "grad_norm": 1.5221196413040161, "learning_rate": 1.7929823849249858e-05, "loss": 1.4435, "step": 4683 }, { "epoch": 0.2560857263917554, "grad_norm": 1.24849271774292, "learning_rate": 1.7928710409365044e-05, "loss": 1.336, "step": 4684 }, { "epoch": 0.25614039883547696, "grad_norm": 1.5543969869613647, "learning_rate": 1.792759670472199e-05, "loss": 1.3583, "step": 4685 }, { "epoch": 0.2561950712791985, "grad_norm": 1.721354603767395, "learning_rate": 1.792648273535788e-05, "loss": 1.3853, "step": 4686 }, { "epoch": 0.25624974372292003, "grad_norm": 1.556980013847351, "learning_rate": 1.792536850130991e-05, "loss": 1.5398, "step": 4687 }, { "epoch": 0.2563044161666416, "grad_norm": 1.3191626071929932, "learning_rate": 1.792425400261529e-05, "loss": 1.2919, "step": 4688 }, { "epoch": 0.25635908861036316, "grad_norm": 1.3674863576889038, "learning_rate": 1.792313923931123e-05, "loss": 1.7484, "step": 4689 }, { "epoch": 0.2564137610540847, "grad_norm": 1.3984718322753906, "learning_rate": 1.792202421143496e-05, "loss": 1.3881, "step": 4690 }, { "epoch": 0.2564684334978063, "grad_norm": 1.0457497835159302, "learning_rate": 1.7920908919023712e-05, "loss": 1.7731, "step": 4691 }, { "epoch": 0.25652310594152783, "grad_norm": 1.3063348531723022, "learning_rate": 1.791979336211473e-05, "loss": 1.3438, "step": 4692 }, { "epoch": 0.25657777838524937, "grad_norm": 2.1284756660461426, "learning_rate": 1.7918677540745263e-05, "loss": 1.4442, "step": 4693 }, { "epoch": 0.2566324508289709, "grad_norm": 1.5015919208526611, "learning_rate": 1.791756145495257e-05, "loss": 1.4199, "step": 4694 }, { "epoch": 0.2566871232726925, "grad_norm": 2.2139203548431396, "learning_rate": 1.7916445104773923e-05, "loss": 1.4903, "step": 4695 }, { "epoch": 0.25674179571641403, "grad_norm": 1.4838632345199585, "learning_rate": 1.7915328490246594e-05, "loss": 1.4296, "step": 4696 }, { "epoch": 0.25679646816013557, "grad_norm": 1.6725199222564697, "learning_rate": 1.7914211611407875e-05, "loss": 1.6858, "step": 4697 }, { "epoch": 0.25685114060385716, "grad_norm": 1.868280291557312, "learning_rate": 1.7913094468295057e-05, "loss": 1.5009, "step": 4698 }, { "epoch": 0.2569058130475787, "grad_norm": 1.1897343397140503, "learning_rate": 1.7911977060945448e-05, "loss": 1.688, "step": 4699 }, { "epoch": 0.25696048549130024, "grad_norm": 1.15679132938385, "learning_rate": 1.7910859389396356e-05, "loss": 1.5076, "step": 4700 }, { "epoch": 0.2570151579350218, "grad_norm": 1.4106613397598267, "learning_rate": 1.790974145368511e-05, "loss": 1.4327, "step": 4701 }, { "epoch": 0.25706983037874337, "grad_norm": 1.37136971950531, "learning_rate": 1.7908623253849035e-05, "loss": 1.5686, "step": 4702 }, { "epoch": 0.2571245028224649, "grad_norm": 1.7799052000045776, "learning_rate": 1.7907504789925473e-05, "loss": 1.4308, "step": 4703 }, { "epoch": 0.25717917526618644, "grad_norm": 1.2728780508041382, "learning_rate": 1.7906386061951766e-05, "loss": 1.7382, "step": 4704 }, { "epoch": 0.25723384770990804, "grad_norm": 1.6323037147521973, "learning_rate": 1.7905267069965276e-05, "loss": 1.4155, "step": 4705 }, { "epoch": 0.2572885201536296, "grad_norm": 1.5071388483047485, "learning_rate": 1.790414781400337e-05, "loss": 1.3202, "step": 4706 }, { "epoch": 0.2573431925973511, "grad_norm": 1.447706937789917, "learning_rate": 1.790302829410342e-05, "loss": 1.2379, "step": 4707 }, { "epoch": 0.25739786504107265, "grad_norm": 1.3969334363937378, "learning_rate": 1.7901908510302813e-05, "loss": 1.4855, "step": 4708 }, { "epoch": 0.25745253748479424, "grad_norm": 1.1146934032440186, "learning_rate": 1.7900788462638937e-05, "loss": 1.4625, "step": 4709 }, { "epoch": 0.2575072099285158, "grad_norm": 2.1927342414855957, "learning_rate": 1.789966815114919e-05, "loss": 1.2684, "step": 4710 }, { "epoch": 0.2575618823722373, "grad_norm": 1.5042273998260498, "learning_rate": 1.7898547575870992e-05, "loss": 1.4857, "step": 4711 }, { "epoch": 0.2576165548159589, "grad_norm": 1.8621000051498413, "learning_rate": 1.7897426736841754e-05, "loss": 1.2936, "step": 4712 }, { "epoch": 0.25767122725968045, "grad_norm": 1.511860728263855, "learning_rate": 1.7896305634098904e-05, "loss": 1.4776, "step": 4713 }, { "epoch": 0.257725899703402, "grad_norm": 1.4471806287765503, "learning_rate": 1.7895184267679885e-05, "loss": 1.366, "step": 4714 }, { "epoch": 0.2577805721471235, "grad_norm": 1.1469630002975464, "learning_rate": 1.789406263762213e-05, "loss": 1.5909, "step": 4715 }, { "epoch": 0.2578352445908451, "grad_norm": 1.4686106443405151, "learning_rate": 1.78929407439631e-05, "loss": 1.8455, "step": 4716 }, { "epoch": 0.25788991703456665, "grad_norm": 1.272927165031433, "learning_rate": 1.789181858674026e-05, "loss": 1.5518, "step": 4717 }, { "epoch": 0.2579445894782882, "grad_norm": 1.2876033782958984, "learning_rate": 1.789069616599108e-05, "loss": 1.2913, "step": 4718 }, { "epoch": 0.2579992619220098, "grad_norm": 1.5108002424240112, "learning_rate": 1.7889573481753036e-05, "loss": 1.4791, "step": 4719 }, { "epoch": 0.2580539343657313, "grad_norm": 1.745845913887024, "learning_rate": 1.788845053406362e-05, "loss": 1.3901, "step": 4720 }, { "epoch": 0.25810860680945286, "grad_norm": 1.5218982696533203, "learning_rate": 1.7887327322960332e-05, "loss": 1.5506, "step": 4721 }, { "epoch": 0.2581632792531744, "grad_norm": 1.4399347305297852, "learning_rate": 1.7886203848480675e-05, "loss": 1.513, "step": 4722 }, { "epoch": 0.258217951696896, "grad_norm": 1.3974605798721313, "learning_rate": 1.7885080110662166e-05, "loss": 1.6259, "step": 4723 }, { "epoch": 0.2582726241406175, "grad_norm": 1.622773289680481, "learning_rate": 1.788395610954233e-05, "loss": 1.4417, "step": 4724 }, { "epoch": 0.25832729658433906, "grad_norm": 1.366370439529419, "learning_rate": 1.7882831845158696e-05, "loss": 1.5822, "step": 4725 }, { "epoch": 0.25838196902806065, "grad_norm": 1.3756319284439087, "learning_rate": 1.7881707317548814e-05, "loss": 1.2929, "step": 4726 }, { "epoch": 0.2584366414717822, "grad_norm": 1.604016661643982, "learning_rate": 1.7880582526750227e-05, "loss": 1.476, "step": 4727 }, { "epoch": 0.25849131391550373, "grad_norm": 1.2608880996704102, "learning_rate": 1.7879457472800496e-05, "loss": 1.6297, "step": 4728 }, { "epoch": 0.25854598635922527, "grad_norm": 1.5338106155395508, "learning_rate": 1.787833215573719e-05, "loss": 1.4977, "step": 4729 }, { "epoch": 0.25860065880294686, "grad_norm": 1.5955592393875122, "learning_rate": 1.787720657559789e-05, "loss": 1.5511, "step": 4730 }, { "epoch": 0.2586553312466684, "grad_norm": 1.6438088417053223, "learning_rate": 1.7876080732420176e-05, "loss": 1.4285, "step": 4731 }, { "epoch": 0.25871000369038993, "grad_norm": 1.658703088760376, "learning_rate": 1.7874954626241644e-05, "loss": 1.2493, "step": 4732 }, { "epoch": 0.2587646761341115, "grad_norm": 1.33057701587677, "learning_rate": 1.78738282570999e-05, "loss": 1.2798, "step": 4733 }, { "epoch": 0.25881934857783306, "grad_norm": 1.5305644273757935, "learning_rate": 1.787270162503255e-05, "loss": 1.2671, "step": 4734 }, { "epoch": 0.2588740210215546, "grad_norm": 1.6110973358154297, "learning_rate": 1.7871574730077222e-05, "loss": 1.5173, "step": 4735 }, { "epoch": 0.25892869346527614, "grad_norm": 1.2692303657531738, "learning_rate": 1.7870447572271542e-05, "loss": 1.5633, "step": 4736 }, { "epoch": 0.25898336590899773, "grad_norm": 1.4529223442077637, "learning_rate": 1.7869320151653148e-05, "loss": 1.4264, "step": 4737 }, { "epoch": 0.25903803835271927, "grad_norm": 1.6390410661697388, "learning_rate": 1.7868192468259686e-05, "loss": 1.5529, "step": 4738 }, { "epoch": 0.2590927107964408, "grad_norm": 1.4424772262573242, "learning_rate": 1.7867064522128817e-05, "loss": 1.5303, "step": 4739 }, { "epoch": 0.2591473832401624, "grad_norm": 1.406445860862732, "learning_rate": 1.7865936313298205e-05, "loss": 1.3644, "step": 4740 }, { "epoch": 0.25920205568388394, "grad_norm": 1.4253729581832886, "learning_rate": 1.786480784180552e-05, "loss": 1.4782, "step": 4741 }, { "epoch": 0.2592567281276055, "grad_norm": 1.7542665004730225, "learning_rate": 1.7863679107688444e-05, "loss": 1.3114, "step": 4742 }, { "epoch": 0.259311400571327, "grad_norm": 1.4786744117736816, "learning_rate": 1.7862550110984674e-05, "loss": 1.3899, "step": 4743 }, { "epoch": 0.2593660730150486, "grad_norm": 1.353837490081787, "learning_rate": 1.7861420851731903e-05, "loss": 1.2908, "step": 4744 }, { "epoch": 0.25942074545877014, "grad_norm": 1.1469672918319702, "learning_rate": 1.7860291329967842e-05, "loss": 1.4906, "step": 4745 }, { "epoch": 0.2594754179024917, "grad_norm": 1.7806313037872314, "learning_rate": 1.7859161545730206e-05, "loss": 1.3455, "step": 4746 }, { "epoch": 0.25953009034621327, "grad_norm": 1.6012941598892212, "learning_rate": 1.785803149905673e-05, "loss": 1.3355, "step": 4747 }, { "epoch": 0.2595847627899348, "grad_norm": 1.3295094966888428, "learning_rate": 1.7856901189985137e-05, "loss": 1.4039, "step": 4748 }, { "epoch": 0.25963943523365635, "grad_norm": 1.568803310394287, "learning_rate": 1.785577061855318e-05, "loss": 1.4448, "step": 4749 }, { "epoch": 0.2596941076773779, "grad_norm": 1.3341511487960815, "learning_rate": 1.7854639784798608e-05, "loss": 1.4174, "step": 4750 }, { "epoch": 0.2597487801210995, "grad_norm": 1.7393230199813843, "learning_rate": 1.785350868875918e-05, "loss": 1.3622, "step": 4751 }, { "epoch": 0.259803452564821, "grad_norm": 1.8050103187561035, "learning_rate": 1.7852377330472668e-05, "loss": 1.5225, "step": 4752 }, { "epoch": 0.25985812500854255, "grad_norm": 1.3181565999984741, "learning_rate": 1.7851245709976853e-05, "loss": 1.5297, "step": 4753 }, { "epoch": 0.25991279745226414, "grad_norm": 1.8359386920928955, "learning_rate": 1.785011382730952e-05, "loss": 1.3166, "step": 4754 }, { "epoch": 0.2599674698959857, "grad_norm": 1.5146281719207764, "learning_rate": 1.7848981682508465e-05, "loss": 1.6117, "step": 4755 }, { "epoch": 0.2600221423397072, "grad_norm": 1.2134591341018677, "learning_rate": 1.784784927561149e-05, "loss": 1.5127, "step": 4756 }, { "epoch": 0.26007681478342876, "grad_norm": 1.8399932384490967, "learning_rate": 1.7846716606656415e-05, "loss": 1.1575, "step": 4757 }, { "epoch": 0.26013148722715035, "grad_norm": 1.3472155332565308, "learning_rate": 1.784558367568106e-05, "loss": 1.8209, "step": 4758 }, { "epoch": 0.2601861596708719, "grad_norm": 1.5605567693710327, "learning_rate": 1.7844450482723258e-05, "loss": 1.3926, "step": 4759 }, { "epoch": 0.2602408321145934, "grad_norm": 1.6123135089874268, "learning_rate": 1.784331702782084e-05, "loss": 1.5099, "step": 4760 }, { "epoch": 0.260295504558315, "grad_norm": 1.720755934715271, "learning_rate": 1.7842183311011667e-05, "loss": 1.4712, "step": 4761 }, { "epoch": 0.26035017700203655, "grad_norm": 1.7723191976547241, "learning_rate": 1.7841049332333592e-05, "loss": 1.2875, "step": 4762 }, { "epoch": 0.2604048494457581, "grad_norm": 1.475468635559082, "learning_rate": 1.7839915091824476e-05, "loss": 1.5511, "step": 4763 }, { "epoch": 0.26045952188947963, "grad_norm": 1.6521270275115967, "learning_rate": 1.7838780589522203e-05, "loss": 1.4852, "step": 4764 }, { "epoch": 0.2605141943332012, "grad_norm": 1.5018755197525024, "learning_rate": 1.7837645825464646e-05, "loss": 1.4471, "step": 4765 }, { "epoch": 0.26056886677692276, "grad_norm": 1.4417965412139893, "learning_rate": 1.783651079968971e-05, "loss": 1.5692, "step": 4766 }, { "epoch": 0.2606235392206443, "grad_norm": 1.372140645980835, "learning_rate": 1.783537551223528e-05, "loss": 1.5257, "step": 4767 }, { "epoch": 0.2606782116643659, "grad_norm": 1.4483773708343506, "learning_rate": 1.7834239963139283e-05, "loss": 1.2428, "step": 4768 }, { "epoch": 0.2607328841080874, "grad_norm": 1.5936166048049927, "learning_rate": 1.783310415243963e-05, "loss": 1.5087, "step": 4769 }, { "epoch": 0.26078755655180896, "grad_norm": 1.9939810037612915, "learning_rate": 1.7831968080174247e-05, "loss": 1.3308, "step": 4770 }, { "epoch": 0.2608422289955305, "grad_norm": 1.3424746990203857, "learning_rate": 1.783083174638107e-05, "loss": 1.4472, "step": 4771 }, { "epoch": 0.2608969014392521, "grad_norm": 1.3248447179794312, "learning_rate": 1.7829695151098046e-05, "loss": 1.5676, "step": 4772 }, { "epoch": 0.26095157388297363, "grad_norm": 1.3976049423217773, "learning_rate": 1.782855829436313e-05, "loss": 1.4007, "step": 4773 }, { "epoch": 0.26100624632669517, "grad_norm": 1.5194740295410156, "learning_rate": 1.782742117621428e-05, "loss": 1.3753, "step": 4774 }, { "epoch": 0.26106091877041676, "grad_norm": 1.7428163290023804, "learning_rate": 1.782628379668947e-05, "loss": 1.5219, "step": 4775 }, { "epoch": 0.2611155912141383, "grad_norm": 1.5737932920455933, "learning_rate": 1.7825146155826682e-05, "loss": 1.3643, "step": 4776 }, { "epoch": 0.26117026365785984, "grad_norm": 1.4926769733428955, "learning_rate": 1.7824008253663897e-05, "loss": 0.9906, "step": 4777 }, { "epoch": 0.2612249361015814, "grad_norm": 1.5097147226333618, "learning_rate": 1.782287009023912e-05, "loss": 1.4564, "step": 4778 }, { "epoch": 0.26127960854530297, "grad_norm": 1.7790247201919556, "learning_rate": 1.782173166559035e-05, "loss": 1.5614, "step": 4779 }, { "epoch": 0.2613342809890245, "grad_norm": 1.404495358467102, "learning_rate": 1.7820592979755605e-05, "loss": 1.3192, "step": 4780 }, { "epoch": 0.26138895343274604, "grad_norm": 2.365861177444458, "learning_rate": 1.7819454032772913e-05, "loss": 1.489, "step": 4781 }, { "epoch": 0.26144362587646763, "grad_norm": 1.7717748880386353, "learning_rate": 1.78183148246803e-05, "loss": 1.762, "step": 4782 }, { "epoch": 0.26149829832018917, "grad_norm": 1.5505291223526, "learning_rate": 1.781717535551581e-05, "loss": 1.2203, "step": 4783 }, { "epoch": 0.2615529707639107, "grad_norm": 2.353236198425293, "learning_rate": 1.781603562531749e-05, "loss": 1.1122, "step": 4784 }, { "epoch": 0.26160764320763225, "grad_norm": 1.3540862798690796, "learning_rate": 1.7814895634123397e-05, "loss": 1.3571, "step": 4785 }, { "epoch": 0.26166231565135384, "grad_norm": 1.3466888666152954, "learning_rate": 1.7813755381971604e-05, "loss": 1.6278, "step": 4786 }, { "epoch": 0.2617169880950754, "grad_norm": 1.3868978023529053, "learning_rate": 1.7812614868900185e-05, "loss": 1.332, "step": 4787 }, { "epoch": 0.2617716605387969, "grad_norm": 1.5518462657928467, "learning_rate": 1.7811474094947222e-05, "loss": 1.6614, "step": 4788 }, { "epoch": 0.2618263329825185, "grad_norm": 1.4961826801300049, "learning_rate": 1.7810333060150803e-05, "loss": 1.428, "step": 4789 }, { "epoch": 0.26188100542624004, "grad_norm": 1.943381905555725, "learning_rate": 1.7809191764549042e-05, "loss": 1.1709, "step": 4790 }, { "epoch": 0.2619356778699616, "grad_norm": 1.5122489929199219, "learning_rate": 1.7808050208180037e-05, "loss": 1.4616, "step": 4791 }, { "epoch": 0.2619903503136832, "grad_norm": 1.5555213689804077, "learning_rate": 1.780690839108192e-05, "loss": 1.3996, "step": 4792 }, { "epoch": 0.2620450227574047, "grad_norm": 1.5178548097610474, "learning_rate": 1.780576631329281e-05, "loss": 1.7288, "step": 4793 }, { "epoch": 0.26209969520112625, "grad_norm": 1.5357476472854614, "learning_rate": 1.7804623974850844e-05, "loss": 1.5328, "step": 4794 }, { "epoch": 0.2621543676448478, "grad_norm": 1.8400671482086182, "learning_rate": 1.7803481375794174e-05, "loss": 1.2326, "step": 4795 }, { "epoch": 0.2622090400885694, "grad_norm": 1.3606356382369995, "learning_rate": 1.7802338516160947e-05, "loss": 1.598, "step": 4796 }, { "epoch": 0.2622637125322909, "grad_norm": 1.4135503768920898, "learning_rate": 1.7801195395989327e-05, "loss": 1.3736, "step": 4797 }, { "epoch": 0.26231838497601245, "grad_norm": 1.5777801275253296, "learning_rate": 1.7800052015317488e-05, "loss": 1.4879, "step": 4798 }, { "epoch": 0.26237305741973405, "grad_norm": 1.4662885665893555, "learning_rate": 1.7798908374183606e-05, "loss": 1.251, "step": 4799 }, { "epoch": 0.2624277298634556, "grad_norm": 1.2141985893249512, "learning_rate": 1.7797764472625874e-05, "loss": 1.6387, "step": 4800 }, { "epoch": 0.2624824023071771, "grad_norm": 1.6406952142715454, "learning_rate": 1.779662031068249e-05, "loss": 1.5272, "step": 4801 }, { "epoch": 0.26253707475089866, "grad_norm": 1.513818621635437, "learning_rate": 1.7795475888391656e-05, "loss": 1.4348, "step": 4802 }, { "epoch": 0.26259174719462025, "grad_norm": 1.544175386428833, "learning_rate": 1.7794331205791593e-05, "loss": 1.6994, "step": 4803 }, { "epoch": 0.2626464196383418, "grad_norm": 1.245303988456726, "learning_rate": 1.7793186262920517e-05, "loss": 1.3692, "step": 4804 }, { "epoch": 0.2627010920820633, "grad_norm": 2.3304948806762695, "learning_rate": 1.7792041059816668e-05, "loss": 1.5446, "step": 4805 }, { "epoch": 0.2627557645257849, "grad_norm": 1.7514978647232056, "learning_rate": 1.779089559651828e-05, "loss": 1.371, "step": 4806 }, { "epoch": 0.26281043696950646, "grad_norm": 1.431752324104309, "learning_rate": 1.778974987306361e-05, "loss": 1.4392, "step": 4807 }, { "epoch": 0.262865109413228, "grad_norm": 1.375279426574707, "learning_rate": 1.7788603889490907e-05, "loss": 1.4492, "step": 4808 }, { "epoch": 0.26291978185694953, "grad_norm": 1.51736319065094, "learning_rate": 1.778745764583845e-05, "loss": 1.0802, "step": 4809 }, { "epoch": 0.2629744543006711, "grad_norm": 2.479989767074585, "learning_rate": 1.7786311142144505e-05, "loss": 1.5172, "step": 4810 }, { "epoch": 0.26302912674439266, "grad_norm": 1.991146206855774, "learning_rate": 1.778516437844736e-05, "loss": 1.491, "step": 4811 }, { "epoch": 0.2630837991881142, "grad_norm": 1.5282096862792969, "learning_rate": 1.7784017354785307e-05, "loss": 1.2742, "step": 4812 }, { "epoch": 0.2631384716318358, "grad_norm": 1.4296045303344727, "learning_rate": 1.778287007119665e-05, "loss": 1.1747, "step": 4813 }, { "epoch": 0.26319314407555733, "grad_norm": 1.5745962858200073, "learning_rate": 1.77817225277197e-05, "loss": 1.4293, "step": 4814 }, { "epoch": 0.26324781651927887, "grad_norm": 1.4454532861709595, "learning_rate": 1.778057472439277e-05, "loss": 1.3059, "step": 4815 }, { "epoch": 0.2633024889630004, "grad_norm": 1.631398320198059, "learning_rate": 1.7779426661254196e-05, "loss": 1.3583, "step": 4816 }, { "epoch": 0.263357161406722, "grad_norm": 1.7783080339431763, "learning_rate": 1.777827833834231e-05, "loss": 1.6803, "step": 4817 }, { "epoch": 0.26341183385044353, "grad_norm": 1.745039463043213, "learning_rate": 1.7777129755695456e-05, "loss": 1.2471, "step": 4818 }, { "epoch": 0.26346650629416507, "grad_norm": 1.215528964996338, "learning_rate": 1.7775980913351994e-05, "loss": 1.5282, "step": 4819 }, { "epoch": 0.26352117873788666, "grad_norm": 1.5232702493667603, "learning_rate": 1.7774831811350278e-05, "loss": 1.3755, "step": 4820 }, { "epoch": 0.2635758511816082, "grad_norm": 1.7663395404815674, "learning_rate": 1.7773682449728684e-05, "loss": 1.2576, "step": 4821 }, { "epoch": 0.26363052362532974, "grad_norm": 1.1320291757583618, "learning_rate": 1.7772532828525593e-05, "loss": 1.4977, "step": 4822 }, { "epoch": 0.2636851960690513, "grad_norm": 1.4213978052139282, "learning_rate": 1.7771382947779393e-05, "loss": 1.4898, "step": 4823 }, { "epoch": 0.26373986851277287, "grad_norm": 1.675612211227417, "learning_rate": 1.7770232807528478e-05, "loss": 1.589, "step": 4824 }, { "epoch": 0.2637945409564944, "grad_norm": 1.5192664861679077, "learning_rate": 1.776908240781126e-05, "loss": 1.2134, "step": 4825 }, { "epoch": 0.26384921340021594, "grad_norm": 1.6956233978271484, "learning_rate": 1.7767931748666145e-05, "loss": 1.266, "step": 4826 }, { "epoch": 0.26390388584393754, "grad_norm": 1.337899923324585, "learning_rate": 1.7766780830131563e-05, "loss": 1.6142, "step": 4827 }, { "epoch": 0.2639585582876591, "grad_norm": 1.7776951789855957, "learning_rate": 1.7765629652245945e-05, "loss": 1.3983, "step": 4828 }, { "epoch": 0.2640132307313806, "grad_norm": 1.3666610717773438, "learning_rate": 1.7764478215047725e-05, "loss": 1.4586, "step": 4829 }, { "epoch": 0.26406790317510215, "grad_norm": 1.7045965194702148, "learning_rate": 1.7763326518575364e-05, "loss": 1.4049, "step": 4830 }, { "epoch": 0.26412257561882374, "grad_norm": 1.2960176467895508, "learning_rate": 1.776217456286731e-05, "loss": 1.3601, "step": 4831 }, { "epoch": 0.2641772480625453, "grad_norm": 1.4301174879074097, "learning_rate": 1.7761022347962034e-05, "loss": 1.5531, "step": 4832 }, { "epoch": 0.2642319205062668, "grad_norm": 1.2840015888214111, "learning_rate": 1.7759869873898008e-05, "loss": 1.5943, "step": 4833 }, { "epoch": 0.2642865929499884, "grad_norm": 1.406336784362793, "learning_rate": 1.775871714071372e-05, "loss": 1.391, "step": 4834 }, { "epoch": 0.26434126539370995, "grad_norm": 1.4666861295700073, "learning_rate": 1.7757564148447663e-05, "loss": 1.5605, "step": 4835 }, { "epoch": 0.2643959378374315, "grad_norm": 1.723723292350769, "learning_rate": 1.7756410897138326e-05, "loss": 1.4465, "step": 4836 }, { "epoch": 0.264450610281153, "grad_norm": 1.319774866104126, "learning_rate": 1.7755257386824238e-05, "loss": 1.2904, "step": 4837 }, { "epoch": 0.2645052827248746, "grad_norm": 1.09552800655365, "learning_rate": 1.7754103617543903e-05, "loss": 1.3498, "step": 4838 }, { "epoch": 0.26455995516859615, "grad_norm": 1.595521330833435, "learning_rate": 1.7752949589335853e-05, "loss": 1.437, "step": 4839 }, { "epoch": 0.2646146276123177, "grad_norm": 1.3700783252716064, "learning_rate": 1.7751795302238623e-05, "loss": 1.371, "step": 4840 }, { "epoch": 0.2646693000560393, "grad_norm": 1.6703859567642212, "learning_rate": 1.775064075629076e-05, "loss": 1.273, "step": 4841 }, { "epoch": 0.2647239724997608, "grad_norm": 1.5288039445877075, "learning_rate": 1.7749485951530815e-05, "loss": 1.3543, "step": 4842 }, { "epoch": 0.26477864494348236, "grad_norm": 1.5764752626419067, "learning_rate": 1.7748330887997344e-05, "loss": 1.5956, "step": 4843 }, { "epoch": 0.2648333173872039, "grad_norm": 1.2984107732772827, "learning_rate": 1.7747175565728928e-05, "loss": 1.4664, "step": 4844 }, { "epoch": 0.2648879898309255, "grad_norm": 1.519096851348877, "learning_rate": 1.7746019984764138e-05, "loss": 1.1728, "step": 4845 }, { "epoch": 0.264942662274647, "grad_norm": 1.2559075355529785, "learning_rate": 1.7744864145141564e-05, "loss": 1.5107, "step": 4846 }, { "epoch": 0.26499733471836856, "grad_norm": 1.4435279369354248, "learning_rate": 1.7743708046899804e-05, "loss": 1.3282, "step": 4847 }, { "epoch": 0.26505200716209015, "grad_norm": 1.4047778844833374, "learning_rate": 1.774255169007746e-05, "loss": 1.5396, "step": 4848 }, { "epoch": 0.2651066796058117, "grad_norm": 1.8554402589797974, "learning_rate": 1.7741395074713146e-05, "loss": 1.5085, "step": 4849 }, { "epoch": 0.2651613520495332, "grad_norm": 1.6568788290023804, "learning_rate": 1.7740238200845485e-05, "loss": 1.2477, "step": 4850 }, { "epoch": 0.26521602449325477, "grad_norm": 1.6700959205627441, "learning_rate": 1.773908106851311e-05, "loss": 1.6203, "step": 4851 }, { "epoch": 0.26527069693697636, "grad_norm": 1.3452244997024536, "learning_rate": 1.7737923677754657e-05, "loss": 1.7732, "step": 4852 }, { "epoch": 0.2653253693806979, "grad_norm": 1.3845970630645752, "learning_rate": 1.7736766028608768e-05, "loss": 1.5005, "step": 4853 }, { "epoch": 0.26538004182441943, "grad_norm": 1.3344777822494507, "learning_rate": 1.7735608121114112e-05, "loss": 1.5754, "step": 4854 }, { "epoch": 0.265434714268141, "grad_norm": 1.6433460712432861, "learning_rate": 1.7734449955309353e-05, "loss": 1.7111, "step": 4855 }, { "epoch": 0.26548938671186256, "grad_norm": 1.440935730934143, "learning_rate": 1.7733291531233156e-05, "loss": 1.392, "step": 4856 }, { "epoch": 0.2655440591555841, "grad_norm": 1.3704228401184082, "learning_rate": 1.7732132848924206e-05, "loss": 1.5482, "step": 4857 }, { "epoch": 0.26559873159930564, "grad_norm": 1.0732691287994385, "learning_rate": 1.77309739084212e-05, "loss": 1.5118, "step": 4858 }, { "epoch": 0.26565340404302723, "grad_norm": 1.5596388578414917, "learning_rate": 1.772981470976283e-05, "loss": 1.391, "step": 4859 }, { "epoch": 0.26570807648674877, "grad_norm": 1.6859889030456543, "learning_rate": 1.7728655252987808e-05, "loss": 1.4435, "step": 4860 }, { "epoch": 0.2657627489304703, "grad_norm": 1.8869062662124634, "learning_rate": 1.7727495538134857e-05, "loss": 1.6553, "step": 4861 }, { "epoch": 0.2658174213741919, "grad_norm": 1.4976181983947754, "learning_rate": 1.7726335565242693e-05, "loss": 1.2643, "step": 4862 }, { "epoch": 0.26587209381791344, "grad_norm": 1.4781982898712158, "learning_rate": 1.7725175334350057e-05, "loss": 1.3071, "step": 4863 }, { "epoch": 0.265926766261635, "grad_norm": 1.272583246231079, "learning_rate": 1.7724014845495684e-05, "loss": 1.674, "step": 4864 }, { "epoch": 0.2659814387053565, "grad_norm": 2.0739216804504395, "learning_rate": 1.7722854098718333e-05, "loss": 1.3504, "step": 4865 }, { "epoch": 0.2660361111490781, "grad_norm": 1.4938768148422241, "learning_rate": 1.7721693094056762e-05, "loss": 1.1293, "step": 4866 }, { "epoch": 0.26609078359279964, "grad_norm": 1.1363166570663452, "learning_rate": 1.772053183154974e-05, "loss": 1.707, "step": 4867 }, { "epoch": 0.2661454560365212, "grad_norm": 1.3557071685791016, "learning_rate": 1.7719370311236042e-05, "loss": 1.294, "step": 4868 }, { "epoch": 0.26620012848024277, "grad_norm": 1.4136649370193481, "learning_rate": 1.7718208533154454e-05, "loss": 1.3165, "step": 4869 }, { "epoch": 0.2662548009239643, "grad_norm": 1.8025085926055908, "learning_rate": 1.7717046497343773e-05, "loss": 1.206, "step": 4870 }, { "epoch": 0.26630947336768584, "grad_norm": 1.345123052597046, "learning_rate": 1.77158842038428e-05, "loss": 1.2298, "step": 4871 }, { "epoch": 0.2663641458114074, "grad_norm": 1.7396483421325684, "learning_rate": 1.7714721652690347e-05, "loss": 1.2886, "step": 4872 }, { "epoch": 0.266418818255129, "grad_norm": 2.85117506980896, "learning_rate": 1.7713558843925235e-05, "loss": 1.1882, "step": 4873 }, { "epoch": 0.2664734906988505, "grad_norm": 1.2095752954483032, "learning_rate": 1.7712395777586294e-05, "loss": 1.4117, "step": 4874 }, { "epoch": 0.26652816314257205, "grad_norm": 1.919175624847412, "learning_rate": 1.7711232453712363e-05, "loss": 1.2568, "step": 4875 }, { "epoch": 0.26658283558629364, "grad_norm": 1.5352392196655273, "learning_rate": 1.771006887234228e-05, "loss": 1.3045, "step": 4876 }, { "epoch": 0.2666375080300152, "grad_norm": 1.7387962341308594, "learning_rate": 1.7708905033514908e-05, "loss": 1.5121, "step": 4877 }, { "epoch": 0.2666921804737367, "grad_norm": 1.809617042541504, "learning_rate": 1.7707740937269108e-05, "loss": 1.1721, "step": 4878 }, { "epoch": 0.26674685291745825, "grad_norm": 1.3300517797470093, "learning_rate": 1.7706576583643748e-05, "loss": 1.3455, "step": 4879 }, { "epoch": 0.26680152536117985, "grad_norm": 1.7842744588851929, "learning_rate": 1.7705411972677713e-05, "loss": 1.3233, "step": 4880 }, { "epoch": 0.2668561978049014, "grad_norm": 1.5359196662902832, "learning_rate": 1.7704247104409893e-05, "loss": 1.2969, "step": 4881 }, { "epoch": 0.2669108702486229, "grad_norm": 1.304840326309204, "learning_rate": 1.7703081978879183e-05, "loss": 1.3255, "step": 4882 }, { "epoch": 0.2669655426923445, "grad_norm": 1.772865891456604, "learning_rate": 1.770191659612449e-05, "loss": 1.231, "step": 4883 }, { "epoch": 0.26702021513606605, "grad_norm": 1.262390375137329, "learning_rate": 1.7700750956184728e-05, "loss": 1.5607, "step": 4884 }, { "epoch": 0.2670748875797876, "grad_norm": 1.5255728960037231, "learning_rate": 1.769958505909882e-05, "loss": 1.4215, "step": 4885 }, { "epoch": 0.2671295600235091, "grad_norm": 1.4258999824523926, "learning_rate": 1.76984189049057e-05, "loss": 1.3171, "step": 4886 }, { "epoch": 0.2671842324672307, "grad_norm": 1.6348185539245605, "learning_rate": 1.769725249364431e-05, "loss": 1.3341, "step": 4887 }, { "epoch": 0.26723890491095226, "grad_norm": 1.4293872117996216, "learning_rate": 1.7696085825353593e-05, "loss": 1.5486, "step": 4888 }, { "epoch": 0.2672935773546738, "grad_norm": 1.5589540004730225, "learning_rate": 1.7694918900072515e-05, "loss": 1.5037, "step": 4889 }, { "epoch": 0.2673482497983954, "grad_norm": 1.3885488510131836, "learning_rate": 1.769375171784004e-05, "loss": 1.494, "step": 4890 }, { "epoch": 0.2674029222421169, "grad_norm": 1.4815633296966553, "learning_rate": 1.7692584278695134e-05, "loss": 1.442, "step": 4891 }, { "epoch": 0.26745759468583846, "grad_norm": 1.5518940687179565, "learning_rate": 1.7691416582676792e-05, "loss": 1.5762, "step": 4892 }, { "epoch": 0.26751226712956, "grad_norm": 1.414746880531311, "learning_rate": 1.7690248629824003e-05, "loss": 1.455, "step": 4893 }, { "epoch": 0.2675669395732816, "grad_norm": 1.3632968664169312, "learning_rate": 1.7689080420175764e-05, "loss": 1.2678, "step": 4894 }, { "epoch": 0.26762161201700313, "grad_norm": 1.335568904876709, "learning_rate": 1.7687911953771086e-05, "loss": 1.4326, "step": 4895 }, { "epoch": 0.26767628446072467, "grad_norm": 1.6831392049789429, "learning_rate": 1.768674323064899e-05, "loss": 1.5257, "step": 4896 }, { "epoch": 0.26773095690444626, "grad_norm": 1.9737403392791748, "learning_rate": 1.76855742508485e-05, "loss": 1.5465, "step": 4897 }, { "epoch": 0.2677856293481678, "grad_norm": 1.8263274431228638, "learning_rate": 1.768440501440865e-05, "loss": 1.5227, "step": 4898 }, { "epoch": 0.26784030179188933, "grad_norm": 1.4867280721664429, "learning_rate": 1.7683235521368484e-05, "loss": 1.5081, "step": 4899 }, { "epoch": 0.26789497423561087, "grad_norm": 1.6111347675323486, "learning_rate": 1.7682065771767055e-05, "loss": 1.3475, "step": 4900 }, { "epoch": 0.26794964667933246, "grad_norm": 1.4320690631866455, "learning_rate": 1.7680895765643423e-05, "loss": 1.4813, "step": 4901 }, { "epoch": 0.268004319123054, "grad_norm": 1.4706928730010986, "learning_rate": 1.767972550303666e-05, "loss": 1.4132, "step": 4902 }, { "epoch": 0.26805899156677554, "grad_norm": 1.5825941562652588, "learning_rate": 1.767855498398584e-05, "loss": 1.1309, "step": 4903 }, { "epoch": 0.26811366401049713, "grad_norm": 1.1091203689575195, "learning_rate": 1.767738420853005e-05, "loss": 1.5835, "step": 4904 }, { "epoch": 0.26816833645421867, "grad_norm": 1.5464118719100952, "learning_rate": 1.7676213176708387e-05, "loss": 1.2273, "step": 4905 }, { "epoch": 0.2682230088979402, "grad_norm": 1.472939372062683, "learning_rate": 1.7675041888559952e-05, "loss": 1.3612, "step": 4906 }, { "epoch": 0.26827768134166174, "grad_norm": 1.356412410736084, "learning_rate": 1.767387034412386e-05, "loss": 1.3891, "step": 4907 }, { "epoch": 0.26833235378538334, "grad_norm": 1.3866515159606934, "learning_rate": 1.7672698543439228e-05, "loss": 1.338, "step": 4908 }, { "epoch": 0.2683870262291049, "grad_norm": 1.5951156616210938, "learning_rate": 1.7671526486545188e-05, "loss": 1.5986, "step": 4909 }, { "epoch": 0.2684416986728264, "grad_norm": 1.1878211498260498, "learning_rate": 1.7670354173480876e-05, "loss": 1.6748, "step": 4910 }, { "epoch": 0.268496371116548, "grad_norm": 1.7206997871398926, "learning_rate": 1.766918160428544e-05, "loss": 1.5328, "step": 4911 }, { "epoch": 0.26855104356026954, "grad_norm": 1.5691081285476685, "learning_rate": 1.7668008778998034e-05, "loss": 1.5707, "step": 4912 }, { "epoch": 0.2686057160039911, "grad_norm": 1.3178988695144653, "learning_rate": 1.7666835697657824e-05, "loss": 1.6386, "step": 4913 }, { "epoch": 0.2686603884477126, "grad_norm": 1.9852588176727295, "learning_rate": 1.7665662360303972e-05, "loss": 1.3335, "step": 4914 }, { "epoch": 0.2687150608914342, "grad_norm": 1.7425487041473389, "learning_rate": 1.7664488766975673e-05, "loss": 1.4609, "step": 4915 }, { "epoch": 0.26876973333515575, "grad_norm": 1.7783005237579346, "learning_rate": 1.7663314917712103e-05, "loss": 1.204, "step": 4916 }, { "epoch": 0.2688244057788773, "grad_norm": 2.1767239570617676, "learning_rate": 1.766214081255247e-05, "loss": 1.5827, "step": 4917 }, { "epoch": 0.2688790782225989, "grad_norm": 2.3053483963012695, "learning_rate": 1.7660966451535974e-05, "loss": 1.3592, "step": 4918 }, { "epoch": 0.2689337506663204, "grad_norm": 1.55513596534729, "learning_rate": 1.765979183470183e-05, "loss": 1.4726, "step": 4919 }, { "epoch": 0.26898842311004195, "grad_norm": 1.918203353881836, "learning_rate": 1.7658616962089262e-05, "loss": 1.6618, "step": 4920 }, { "epoch": 0.2690430955537635, "grad_norm": 1.551023244857788, "learning_rate": 1.7657441833737505e-05, "loss": 1.356, "step": 4921 }, { "epoch": 0.2690977679974851, "grad_norm": 1.5033167600631714, "learning_rate": 1.7656266449685796e-05, "loss": 1.2436, "step": 4922 }, { "epoch": 0.2691524404412066, "grad_norm": 1.2884728908538818, "learning_rate": 1.765509080997338e-05, "loss": 1.387, "step": 4923 }, { "epoch": 0.26920711288492816, "grad_norm": 2.486550807952881, "learning_rate": 1.7653914914639524e-05, "loss": 1.52, "step": 4924 }, { "epoch": 0.26926178532864975, "grad_norm": 1.4567091464996338, "learning_rate": 1.7652738763723484e-05, "loss": 1.4374, "step": 4925 }, { "epoch": 0.2693164577723713, "grad_norm": 1.671915054321289, "learning_rate": 1.7651562357264543e-05, "loss": 1.4723, "step": 4926 }, { "epoch": 0.2693711302160928, "grad_norm": 1.528016448020935, "learning_rate": 1.765038569530198e-05, "loss": 1.5009, "step": 4927 }, { "epoch": 0.26942580265981436, "grad_norm": 1.7757444381713867, "learning_rate": 1.764920877787508e-05, "loss": 1.5319, "step": 4928 }, { "epoch": 0.26948047510353595, "grad_norm": 1.4623239040374756, "learning_rate": 1.764803160502316e-05, "loss": 1.4601, "step": 4929 }, { "epoch": 0.2695351475472575, "grad_norm": 1.3167285919189453, "learning_rate": 1.764685417678551e-05, "loss": 1.2515, "step": 4930 }, { "epoch": 0.26958981999097903, "grad_norm": 1.1040380001068115, "learning_rate": 1.7645676493201455e-05, "loss": 1.6779, "step": 4931 }, { "epoch": 0.2696444924347006, "grad_norm": 1.4328070878982544, "learning_rate": 1.7644498554310322e-05, "loss": 1.3207, "step": 4932 }, { "epoch": 0.26969916487842216, "grad_norm": 1.5192644596099854, "learning_rate": 1.764332036015145e-05, "loss": 1.4718, "step": 4933 }, { "epoch": 0.2697538373221437, "grad_norm": 1.564017415046692, "learning_rate": 1.7642141910764164e-05, "loss": 1.1476, "step": 4934 }, { "epoch": 0.26980850976586523, "grad_norm": 1.4567586183547974, "learning_rate": 1.7640963206187835e-05, "loss": 1.4527, "step": 4935 }, { "epoch": 0.2698631822095868, "grad_norm": 1.6464571952819824, "learning_rate": 1.7639784246461813e-05, "loss": 1.4857, "step": 4936 }, { "epoch": 0.26991785465330836, "grad_norm": 1.8078258037567139, "learning_rate": 1.7638605031625467e-05, "loss": 1.3557, "step": 4937 }, { "epoch": 0.2699725270970299, "grad_norm": 1.5283628702163696, "learning_rate": 1.7637425561718176e-05, "loss": 1.3189, "step": 4938 }, { "epoch": 0.2700271995407515, "grad_norm": 1.4672714471817017, "learning_rate": 1.763624583677932e-05, "loss": 1.5694, "step": 4939 }, { "epoch": 0.27008187198447303, "grad_norm": 1.759588599205017, "learning_rate": 1.76350658568483e-05, "loss": 1.314, "step": 4940 }, { "epoch": 0.27013654442819457, "grad_norm": 1.4039714336395264, "learning_rate": 1.7633885621964516e-05, "loss": 1.2288, "step": 4941 }, { "epoch": 0.2701912168719161, "grad_norm": 1.3297122716903687, "learning_rate": 1.7632705132167377e-05, "loss": 1.4551, "step": 4942 }, { "epoch": 0.2702458893156377, "grad_norm": 1.171952247619629, "learning_rate": 1.76315243874963e-05, "loss": 1.3042, "step": 4943 }, { "epoch": 0.27030056175935924, "grad_norm": 1.7469933032989502, "learning_rate": 1.7630343387990713e-05, "loss": 1.5099, "step": 4944 }, { "epoch": 0.2703552342030808, "grad_norm": 1.2750104665756226, "learning_rate": 1.7629162133690063e-05, "loss": 1.5489, "step": 4945 }, { "epoch": 0.27040990664680237, "grad_norm": 1.207187294960022, "learning_rate": 1.762798062463378e-05, "loss": 1.514, "step": 4946 }, { "epoch": 0.2704645790905239, "grad_norm": 1.3879555463790894, "learning_rate": 1.762679886086133e-05, "loss": 1.4011, "step": 4947 }, { "epoch": 0.27051925153424544, "grad_norm": 1.6135398149490356, "learning_rate": 1.7625616842412166e-05, "loss": 1.5827, "step": 4948 }, { "epoch": 0.270573923977967, "grad_norm": 1.3551523685455322, "learning_rate": 1.7624434569325762e-05, "loss": 1.398, "step": 4949 }, { "epoch": 0.27062859642168857, "grad_norm": 1.9636449813842773, "learning_rate": 1.7623252041641596e-05, "loss": 1.6691, "step": 4950 }, { "epoch": 0.2706832688654101, "grad_norm": 1.332996129989624, "learning_rate": 1.7622069259399158e-05, "loss": 1.5493, "step": 4951 }, { "epoch": 0.27073794130913165, "grad_norm": 1.6023917198181152, "learning_rate": 1.762088622263794e-05, "loss": 1.5173, "step": 4952 }, { "epoch": 0.27079261375285324, "grad_norm": 1.2521692514419556, "learning_rate": 1.7619702931397448e-05, "loss": 1.3139, "step": 4953 }, { "epoch": 0.2708472861965748, "grad_norm": 1.3119210004806519, "learning_rate": 1.7618519385717194e-05, "loss": 1.4859, "step": 4954 }, { "epoch": 0.2709019586402963, "grad_norm": 1.426761507987976, "learning_rate": 1.76173355856367e-05, "loss": 1.3694, "step": 4955 }, { "epoch": 0.27095663108401785, "grad_norm": 1.4127161502838135, "learning_rate": 1.76161515311955e-05, "loss": 1.2839, "step": 4956 }, { "epoch": 0.27101130352773944, "grad_norm": 2.3807644844055176, "learning_rate": 1.7614967222433125e-05, "loss": 1.2477, "step": 4957 }, { "epoch": 0.271065975971461, "grad_norm": 1.6977458000183105, "learning_rate": 1.761378265938913e-05, "loss": 1.1997, "step": 4958 }, { "epoch": 0.2711206484151825, "grad_norm": 1.5921976566314697, "learning_rate": 1.761259784210306e-05, "loss": 1.5265, "step": 4959 }, { "epoch": 0.2711753208589041, "grad_norm": 1.2335926294326782, "learning_rate": 1.7611412770614487e-05, "loss": 1.5416, "step": 4960 }, { "epoch": 0.27122999330262565, "grad_norm": 1.8672195672988892, "learning_rate": 1.761022744496298e-05, "loss": 1.5124, "step": 4961 }, { "epoch": 0.2712846657463472, "grad_norm": 1.3101022243499756, "learning_rate": 1.7609041865188122e-05, "loss": 1.2, "step": 4962 }, { "epoch": 0.2713393381900687, "grad_norm": 1.6110732555389404, "learning_rate": 1.7607856031329497e-05, "loss": 1.4601, "step": 4963 }, { "epoch": 0.2713940106337903, "grad_norm": 1.3930702209472656, "learning_rate": 1.760666994342671e-05, "loss": 1.4754, "step": 4964 }, { "epoch": 0.27144868307751185, "grad_norm": 1.699445128440857, "learning_rate": 1.7605483601519366e-05, "loss": 1.4032, "step": 4965 }, { "epoch": 0.2715033555212334, "grad_norm": 1.8506444692611694, "learning_rate": 1.7604297005647076e-05, "loss": 1.4251, "step": 4966 }, { "epoch": 0.271558027964955, "grad_norm": 1.4346524477005005, "learning_rate": 1.7603110155849463e-05, "loss": 1.3961, "step": 4967 }, { "epoch": 0.2716127004086765, "grad_norm": 1.657562255859375, "learning_rate": 1.7601923052166162e-05, "loss": 1.4082, "step": 4968 }, { "epoch": 0.27166737285239806, "grad_norm": 1.3068304061889648, "learning_rate": 1.7600735694636814e-05, "loss": 1.6154, "step": 4969 }, { "epoch": 0.2717220452961196, "grad_norm": 1.9301601648330688, "learning_rate": 1.759954808330106e-05, "loss": 1.4628, "step": 4970 }, { "epoch": 0.2717767177398412, "grad_norm": 1.4735784530639648, "learning_rate": 1.759836021819857e-05, "loss": 1.5681, "step": 4971 }, { "epoch": 0.2718313901835627, "grad_norm": 1.456135630607605, "learning_rate": 1.7597172099368998e-05, "loss": 1.246, "step": 4972 }, { "epoch": 0.27188606262728426, "grad_norm": 1.6424654722213745, "learning_rate": 1.7595983726852022e-05, "loss": 1.2248, "step": 4973 }, { "epoch": 0.27194073507100586, "grad_norm": 1.4727566242218018, "learning_rate": 1.7594795100687324e-05, "loss": 1.3829, "step": 4974 }, { "epoch": 0.2719954075147274, "grad_norm": 1.5948652029037476, "learning_rate": 1.75936062209146e-05, "loss": 1.568, "step": 4975 }, { "epoch": 0.27205007995844893, "grad_norm": 1.6440058946609497, "learning_rate": 1.759241708757354e-05, "loss": 1.3627, "step": 4976 }, { "epoch": 0.27210475240217047, "grad_norm": 1.3630974292755127, "learning_rate": 1.7591227700703858e-05, "loss": 1.4378, "step": 4977 }, { "epoch": 0.27215942484589206, "grad_norm": 1.894091010093689, "learning_rate": 1.7590038060345277e-05, "loss": 1.5336, "step": 4978 }, { "epoch": 0.2722140972896136, "grad_norm": 1.406640887260437, "learning_rate": 1.7588848166537507e-05, "loss": 1.4084, "step": 4979 }, { "epoch": 0.27226876973333514, "grad_norm": 1.4632987976074219, "learning_rate": 1.758765801932029e-05, "loss": 1.4519, "step": 4980 }, { "epoch": 0.27232344217705673, "grad_norm": 2.116274356842041, "learning_rate": 1.7586467618733368e-05, "loss": 1.4879, "step": 4981 }, { "epoch": 0.27237811462077827, "grad_norm": 1.9136592149734497, "learning_rate": 1.758527696481649e-05, "loss": 1.4458, "step": 4982 }, { "epoch": 0.2724327870644998, "grad_norm": 1.4578038454055786, "learning_rate": 1.7584086057609413e-05, "loss": 1.5329, "step": 4983 }, { "epoch": 0.27248745950822134, "grad_norm": 1.2614167928695679, "learning_rate": 1.7582894897151908e-05, "loss": 1.7671, "step": 4984 }, { "epoch": 0.27254213195194293, "grad_norm": 1.4943108558654785, "learning_rate": 1.758170348348375e-05, "loss": 1.5715, "step": 4985 }, { "epoch": 0.27259680439566447, "grad_norm": 1.4294036626815796, "learning_rate": 1.7580511816644718e-05, "loss": 1.6488, "step": 4986 }, { "epoch": 0.272651476839386, "grad_norm": 1.2907284498214722, "learning_rate": 1.7579319896674612e-05, "loss": 1.5536, "step": 4987 }, { "epoch": 0.2727061492831076, "grad_norm": 1.5603901147842407, "learning_rate": 1.7578127723613224e-05, "loss": 1.178, "step": 4988 }, { "epoch": 0.27276082172682914, "grad_norm": 1.7525216341018677, "learning_rate": 1.7576935297500374e-05, "loss": 1.174, "step": 4989 }, { "epoch": 0.2728154941705507, "grad_norm": 1.3746180534362793, "learning_rate": 1.757574261837587e-05, "loss": 1.4028, "step": 4990 }, { "epoch": 0.2728701666142722, "grad_norm": 1.4484531879425049, "learning_rate": 1.7574549686279545e-05, "loss": 1.2463, "step": 4991 }, { "epoch": 0.2729248390579938, "grad_norm": 1.4099116325378418, "learning_rate": 1.7573356501251235e-05, "loss": 1.5059, "step": 4992 }, { "epoch": 0.27297951150171534, "grad_norm": 1.8263530731201172, "learning_rate": 1.7572163063330773e-05, "loss": 1.4725, "step": 4993 }, { "epoch": 0.2730341839454369, "grad_norm": 1.2141705751419067, "learning_rate": 1.7570969372558023e-05, "loss": 1.5304, "step": 4994 }, { "epoch": 0.2730888563891585, "grad_norm": 1.533989667892456, "learning_rate": 1.756977542897284e-05, "loss": 1.4878, "step": 4995 }, { "epoch": 0.27314352883288, "grad_norm": 1.3128925561904907, "learning_rate": 1.756858123261509e-05, "loss": 1.1856, "step": 4996 }, { "epoch": 0.27319820127660155, "grad_norm": 1.2581030130386353, "learning_rate": 1.7567386783524655e-05, "loss": 1.5095, "step": 4997 }, { "epoch": 0.27325287372032314, "grad_norm": 1.4263269901275635, "learning_rate": 1.7566192081741416e-05, "loss": 1.5168, "step": 4998 }, { "epoch": 0.2733075461640447, "grad_norm": 1.3214389085769653, "learning_rate": 1.7564997127305268e-05, "loss": 1.3939, "step": 4999 }, { "epoch": 0.2733622186077662, "grad_norm": 1.7175710201263428, "learning_rate": 1.7563801920256115e-05, "loss": 1.197, "step": 5000 }, { "epoch": 0.27341689105148775, "grad_norm": 1.8216233253479004, "learning_rate": 1.7562606460633867e-05, "loss": 1.3556, "step": 5001 }, { "epoch": 0.27347156349520935, "grad_norm": 1.6116775274276733, "learning_rate": 1.7561410748478443e-05, "loss": 1.5211, "step": 5002 }, { "epoch": 0.2735262359389309, "grad_norm": 1.6491804122924805, "learning_rate": 1.756021478382977e-05, "loss": 1.5309, "step": 5003 }, { "epoch": 0.2735809083826524, "grad_norm": 1.4940252304077148, "learning_rate": 1.7559018566727788e-05, "loss": 1.251, "step": 5004 }, { "epoch": 0.273635580826374, "grad_norm": 1.5629616975784302, "learning_rate": 1.7557822097212433e-05, "loss": 1.6642, "step": 5005 }, { "epoch": 0.27369025327009555, "grad_norm": 1.5489624738693237, "learning_rate": 1.755662537532367e-05, "loss": 1.3745, "step": 5006 }, { "epoch": 0.2737449257138171, "grad_norm": 1.8549931049346924, "learning_rate": 1.7555428401101445e-05, "loss": 1.5186, "step": 5007 }, { "epoch": 0.2737995981575386, "grad_norm": 1.4068785905838013, "learning_rate": 1.7554231174585742e-05, "loss": 1.3802, "step": 5008 }, { "epoch": 0.2738542706012602, "grad_norm": 1.4932836294174194, "learning_rate": 1.755303369581653e-05, "loss": 1.4718, "step": 5009 }, { "epoch": 0.27390894304498176, "grad_norm": 1.4905953407287598, "learning_rate": 1.7551835964833803e-05, "loss": 1.3513, "step": 5010 }, { "epoch": 0.2739636154887033, "grad_norm": 1.4248664379119873, "learning_rate": 1.755063798167755e-05, "loss": 1.3437, "step": 5011 }, { "epoch": 0.2740182879324249, "grad_norm": 1.5828442573547363, "learning_rate": 1.7549439746387776e-05, "loss": 1.3477, "step": 5012 }, { "epoch": 0.2740729603761464, "grad_norm": 1.2672665119171143, "learning_rate": 1.7548241259004496e-05, "loss": 1.4177, "step": 5013 }, { "epoch": 0.27412763281986796, "grad_norm": 1.3540939092636108, "learning_rate": 1.7547042519567728e-05, "loss": 1.5236, "step": 5014 }, { "epoch": 0.2741823052635895, "grad_norm": 1.582667589187622, "learning_rate": 1.75458435281175e-05, "loss": 1.3902, "step": 5015 }, { "epoch": 0.2742369777073111, "grad_norm": 1.1157463788986206, "learning_rate": 1.7544644284693847e-05, "loss": 1.3112, "step": 5016 }, { "epoch": 0.27429165015103263, "grad_norm": 1.7181696891784668, "learning_rate": 1.754344478933682e-05, "loss": 1.745, "step": 5017 }, { "epoch": 0.27434632259475417, "grad_norm": 1.3849804401397705, "learning_rate": 1.754224504208647e-05, "loss": 1.5046, "step": 5018 }, { "epoch": 0.27440099503847576, "grad_norm": 1.5611021518707275, "learning_rate": 1.754104504298286e-05, "loss": 1.4666, "step": 5019 }, { "epoch": 0.2744556674821973, "grad_norm": 1.6044939756393433, "learning_rate": 1.753984479206606e-05, "loss": 1.3564, "step": 5020 }, { "epoch": 0.27451033992591883, "grad_norm": 1.2452678680419922, "learning_rate": 1.7538644289376147e-05, "loss": 1.3855, "step": 5021 }, { "epoch": 0.27456501236964037, "grad_norm": 1.2720526456832886, "learning_rate": 1.7537443534953213e-05, "loss": 1.467, "step": 5022 }, { "epoch": 0.27461968481336196, "grad_norm": 1.511444091796875, "learning_rate": 1.753624252883735e-05, "loss": 1.3462, "step": 5023 }, { "epoch": 0.2746743572570835, "grad_norm": 1.6427950859069824, "learning_rate": 1.753504127106867e-05, "loss": 1.3293, "step": 5024 }, { "epoch": 0.27472902970080504, "grad_norm": 1.4970898628234863, "learning_rate": 1.7533839761687278e-05, "loss": 1.4735, "step": 5025 }, { "epoch": 0.27478370214452663, "grad_norm": 1.409195065498352, "learning_rate": 1.7532638000733296e-05, "loss": 1.3541, "step": 5026 }, { "epoch": 0.27483837458824817, "grad_norm": 2.536038398742676, "learning_rate": 1.7531435988246857e-05, "loss": 1.0408, "step": 5027 }, { "epoch": 0.2748930470319697, "grad_norm": 2.023815155029297, "learning_rate": 1.75302337242681e-05, "loss": 1.152, "step": 5028 }, { "epoch": 0.27494771947569124, "grad_norm": 1.5396796464920044, "learning_rate": 1.7529031208837165e-05, "loss": 1.5641, "step": 5029 }, { "epoch": 0.27500239191941284, "grad_norm": 1.667946696281433, "learning_rate": 1.752782844199421e-05, "loss": 1.0919, "step": 5030 }, { "epoch": 0.2750570643631344, "grad_norm": 1.4602162837982178, "learning_rate": 1.75266254237794e-05, "loss": 1.309, "step": 5031 }, { "epoch": 0.2751117368068559, "grad_norm": 1.2699148654937744, "learning_rate": 1.7525422154232906e-05, "loss": 1.4658, "step": 5032 }, { "epoch": 0.2751664092505775, "grad_norm": 1.3893980979919434, "learning_rate": 1.7524218633394904e-05, "loss": 1.727, "step": 5033 }, { "epoch": 0.27522108169429904, "grad_norm": 1.4716377258300781, "learning_rate": 1.7523014861305588e-05, "loss": 1.4562, "step": 5034 }, { "epoch": 0.2752757541380206, "grad_norm": 1.2013696432113647, "learning_rate": 1.7521810838005154e-05, "loss": 1.5505, "step": 5035 }, { "epoch": 0.2753304265817421, "grad_norm": 1.4184387922286987, "learning_rate": 1.75206065635338e-05, "loss": 1.3774, "step": 5036 }, { "epoch": 0.2753850990254637, "grad_norm": 1.6598355770111084, "learning_rate": 1.751940203793175e-05, "loss": 1.2542, "step": 5037 }, { "epoch": 0.27543977146918525, "grad_norm": 1.4817473888397217, "learning_rate": 1.751819726123922e-05, "loss": 1.5075, "step": 5038 }, { "epoch": 0.2754944439129068, "grad_norm": 1.6555449962615967, "learning_rate": 1.7516992233496443e-05, "loss": 1.4454, "step": 5039 }, { "epoch": 0.2755491163566284, "grad_norm": 3.6615893840789795, "learning_rate": 1.7515786954743657e-05, "loss": 1.5324, "step": 5040 }, { "epoch": 0.2756037888003499, "grad_norm": 1.3737199306488037, "learning_rate": 1.7514581425021107e-05, "loss": 1.313, "step": 5041 }, { "epoch": 0.27565846124407145, "grad_norm": 1.638536810874939, "learning_rate": 1.7513375644369048e-05, "loss": 1.6003, "step": 5042 }, { "epoch": 0.275713133687793, "grad_norm": 1.821294903755188, "learning_rate": 1.7512169612827748e-05, "loss": 1.312, "step": 5043 }, { "epoch": 0.2757678061315146, "grad_norm": 2.370626449584961, "learning_rate": 1.7510963330437474e-05, "loss": 1.4003, "step": 5044 }, { "epoch": 0.2758224785752361, "grad_norm": 1.352689266204834, "learning_rate": 1.7509756797238512e-05, "loss": 1.5493, "step": 5045 }, { "epoch": 0.27587715101895766, "grad_norm": 1.4973481893539429, "learning_rate": 1.7508550013271146e-05, "loss": 1.5765, "step": 5046 }, { "epoch": 0.27593182346267925, "grad_norm": 1.2690856456756592, "learning_rate": 1.7507342978575676e-05, "loss": 1.4165, "step": 5047 }, { "epoch": 0.2759864959064008, "grad_norm": 1.5001630783081055, "learning_rate": 1.750613569319241e-05, "loss": 1.4099, "step": 5048 }, { "epoch": 0.2760411683501223, "grad_norm": 1.7417542934417725, "learning_rate": 1.7504928157161657e-05, "loss": 1.6898, "step": 5049 }, { "epoch": 0.27609584079384386, "grad_norm": 1.6958824396133423, "learning_rate": 1.7503720370523742e-05, "loss": 1.4363, "step": 5050 }, { "epoch": 0.27615051323756545, "grad_norm": 1.5742642879486084, "learning_rate": 1.7502512333318998e-05, "loss": 1.223, "step": 5051 }, { "epoch": 0.276205185681287, "grad_norm": 1.2307446002960205, "learning_rate": 1.7501304045587756e-05, "loss": 1.5712, "step": 5052 }, { "epoch": 0.27625985812500853, "grad_norm": 1.285581350326538, "learning_rate": 1.7500095507370376e-05, "loss": 1.4935, "step": 5053 }, { "epoch": 0.2763145305687301, "grad_norm": 1.2939780950546265, "learning_rate": 1.7498886718707203e-05, "loss": 1.3208, "step": 5054 }, { "epoch": 0.27636920301245166, "grad_norm": 1.5946605205535889, "learning_rate": 1.749767767963861e-05, "loss": 1.509, "step": 5055 }, { "epoch": 0.2764238754561732, "grad_norm": 1.4588724374771118, "learning_rate": 1.7496468390204965e-05, "loss": 1.524, "step": 5056 }, { "epoch": 0.27647854789989473, "grad_norm": 1.1305636167526245, "learning_rate": 1.7495258850446646e-05, "loss": 1.4185, "step": 5057 }, { "epoch": 0.2765332203436163, "grad_norm": 1.254809856414795, "learning_rate": 1.7494049060404047e-05, "loss": 1.4202, "step": 5058 }, { "epoch": 0.27658789278733786, "grad_norm": 1.580524206161499, "learning_rate": 1.7492839020117567e-05, "loss": 1.3726, "step": 5059 }, { "epoch": 0.2766425652310594, "grad_norm": 1.4348491430282593, "learning_rate": 1.749162872962761e-05, "loss": 1.5629, "step": 5060 }, { "epoch": 0.276697237674781, "grad_norm": 1.4403420686721802, "learning_rate": 1.7490418188974586e-05, "loss": 1.366, "step": 5061 }, { "epoch": 0.27675191011850253, "grad_norm": 1.3946142196655273, "learning_rate": 1.7489207398198924e-05, "loss": 1.5014, "step": 5062 }, { "epoch": 0.27680658256222407, "grad_norm": 1.3884738683700562, "learning_rate": 1.7487996357341054e-05, "loss": 1.5585, "step": 5063 }, { "epoch": 0.2768612550059456, "grad_norm": 1.4925448894500732, "learning_rate": 1.7486785066441412e-05, "loss": 1.3309, "step": 5064 }, { "epoch": 0.2769159274496672, "grad_norm": 1.9364774227142334, "learning_rate": 1.748557352554045e-05, "loss": 1.5037, "step": 5065 }, { "epoch": 0.27697059989338874, "grad_norm": 1.2976378202438354, "learning_rate": 1.7484361734678623e-05, "loss": 1.4688, "step": 5066 }, { "epoch": 0.2770252723371103, "grad_norm": 1.7562052011489868, "learning_rate": 1.7483149693896396e-05, "loss": 1.5734, "step": 5067 }, { "epoch": 0.27707994478083187, "grad_norm": 1.3403263092041016, "learning_rate": 1.7481937403234236e-05, "loss": 1.6715, "step": 5068 }, { "epoch": 0.2771346172245534, "grad_norm": 1.3003346920013428, "learning_rate": 1.7480724862732634e-05, "loss": 1.6277, "step": 5069 }, { "epoch": 0.27718928966827494, "grad_norm": 1.91537344455719, "learning_rate": 1.747951207243207e-05, "loss": 1.1522, "step": 5070 }, { "epoch": 0.2772439621119965, "grad_norm": 1.616081953048706, "learning_rate": 1.7478299032373053e-05, "loss": 1.2891, "step": 5071 }, { "epoch": 0.27729863455571807, "grad_norm": 1.7296055555343628, "learning_rate": 1.747708574259608e-05, "loss": 1.4154, "step": 5072 }, { "epoch": 0.2773533069994396, "grad_norm": 1.6241238117218018, "learning_rate": 1.747587220314167e-05, "loss": 1.2849, "step": 5073 }, { "epoch": 0.27740797944316115, "grad_norm": 2.171408176422119, "learning_rate": 1.7474658414050344e-05, "loss": 1.3057, "step": 5074 }, { "epoch": 0.27746265188688274, "grad_norm": 1.465773344039917, "learning_rate": 1.747344437536263e-05, "loss": 1.3736, "step": 5075 }, { "epoch": 0.2775173243306043, "grad_norm": 1.4030648469924927, "learning_rate": 1.7472230087119074e-05, "loss": 1.3464, "step": 5076 }, { "epoch": 0.2775719967743258, "grad_norm": 1.232485055923462, "learning_rate": 1.747101554936022e-05, "loss": 1.6661, "step": 5077 }, { "epoch": 0.27762666921804735, "grad_norm": 1.3515115976333618, "learning_rate": 1.746980076212663e-05, "loss": 1.6766, "step": 5078 }, { "epoch": 0.27768134166176894, "grad_norm": 1.4238622188568115, "learning_rate": 1.746858572545886e-05, "loss": 1.5421, "step": 5079 }, { "epoch": 0.2777360141054905, "grad_norm": 1.4243710041046143, "learning_rate": 1.7467370439397487e-05, "loss": 1.3505, "step": 5080 }, { "epoch": 0.277790686549212, "grad_norm": 1.434484839439392, "learning_rate": 1.7466154903983092e-05, "loss": 1.6898, "step": 5081 }, { "epoch": 0.2778453589929336, "grad_norm": 1.3879624605178833, "learning_rate": 1.746493911925627e-05, "loss": 1.4693, "step": 5082 }, { "epoch": 0.27790003143665515, "grad_norm": 1.944150686264038, "learning_rate": 1.7463723085257606e-05, "loss": 1.4723, "step": 5083 }, { "epoch": 0.2779547038803767, "grad_norm": 2.0656306743621826, "learning_rate": 1.746250680202772e-05, "loss": 1.296, "step": 5084 }, { "epoch": 0.2780093763240982, "grad_norm": 3.090953826904297, "learning_rate": 1.7461290269607217e-05, "loss": 1.2247, "step": 5085 }, { "epoch": 0.2780640487678198, "grad_norm": 1.4793643951416016, "learning_rate": 1.7460073488036723e-05, "loss": 1.3883, "step": 5086 }, { "epoch": 0.27811872121154135, "grad_norm": 1.6169825792312622, "learning_rate": 1.7458856457356873e-05, "loss": 1.4407, "step": 5087 }, { "epoch": 0.2781733936552629, "grad_norm": 1.3667123317718506, "learning_rate": 1.74576391776083e-05, "loss": 1.2533, "step": 5088 }, { "epoch": 0.2782280660989845, "grad_norm": 1.4241361618041992, "learning_rate": 1.7456421648831658e-05, "loss": 1.2677, "step": 5089 }, { "epoch": 0.278282738542706, "grad_norm": 1.395361304283142, "learning_rate": 1.74552038710676e-05, "loss": 1.6931, "step": 5090 }, { "epoch": 0.27833741098642756, "grad_norm": 1.4884551763534546, "learning_rate": 1.7453985844356786e-05, "loss": 1.4612, "step": 5091 }, { "epoch": 0.2783920834301491, "grad_norm": 1.2776036262512207, "learning_rate": 1.74527675687399e-05, "loss": 1.4048, "step": 5092 }, { "epoch": 0.2784467558738707, "grad_norm": 1.2052174806594849, "learning_rate": 1.7451549044257608e-05, "loss": 1.8205, "step": 5093 }, { "epoch": 0.2785014283175922, "grad_norm": 1.3140789270401, "learning_rate": 1.7450330270950614e-05, "loss": 1.33, "step": 5094 }, { "epoch": 0.27855610076131376, "grad_norm": 1.840505599975586, "learning_rate": 1.744911124885961e-05, "loss": 1.4689, "step": 5095 }, { "epoch": 0.27861077320503536, "grad_norm": 1.2402433156967163, "learning_rate": 1.74478919780253e-05, "loss": 1.3562, "step": 5096 }, { "epoch": 0.2786654456487569, "grad_norm": 1.4440301656723022, "learning_rate": 1.7446672458488398e-05, "loss": 1.4559, "step": 5097 }, { "epoch": 0.27872011809247843, "grad_norm": 1.9332703351974487, "learning_rate": 1.7445452690289632e-05, "loss": 1.4672, "step": 5098 }, { "epoch": 0.27877479053619997, "grad_norm": 1.8397574424743652, "learning_rate": 1.7444232673469726e-05, "loss": 1.5679, "step": 5099 }, { "epoch": 0.27882946297992156, "grad_norm": 1.2143856287002563, "learning_rate": 1.7443012408069427e-05, "loss": 1.3483, "step": 5100 }, { "epoch": 0.2788841354236431, "grad_norm": 1.2255820035934448, "learning_rate": 1.744179189412947e-05, "loss": 1.4777, "step": 5101 }, { "epoch": 0.27893880786736464, "grad_norm": 1.8382353782653809, "learning_rate": 1.7440571131690626e-05, "loss": 1.3505, "step": 5102 }, { "epoch": 0.27899348031108623, "grad_norm": 1.4923852682113647, "learning_rate": 1.7439350120793652e-05, "loss": 1.4052, "step": 5103 }, { "epoch": 0.27904815275480777, "grad_norm": 1.498110294342041, "learning_rate": 1.7438128861479316e-05, "loss": 1.4963, "step": 5104 }, { "epoch": 0.2791028251985293, "grad_norm": 1.594634771347046, "learning_rate": 1.7436907353788404e-05, "loss": 1.3043, "step": 5105 }, { "epoch": 0.27915749764225084, "grad_norm": 1.2574981451034546, "learning_rate": 1.743568559776171e-05, "loss": 1.5514, "step": 5106 }, { "epoch": 0.27921217008597243, "grad_norm": 1.085053563117981, "learning_rate": 1.743446359344002e-05, "loss": 1.3671, "step": 5107 }, { "epoch": 0.27926684252969397, "grad_norm": 1.5081779956817627, "learning_rate": 1.7433241340864147e-05, "loss": 1.5003, "step": 5108 }, { "epoch": 0.2793215149734155, "grad_norm": 1.5526076555252075, "learning_rate": 1.7432018840074905e-05, "loss": 1.256, "step": 5109 }, { "epoch": 0.2793761874171371, "grad_norm": 1.429464340209961, "learning_rate": 1.743079609111311e-05, "loss": 1.561, "step": 5110 }, { "epoch": 0.27943085986085864, "grad_norm": 2.0199837684631348, "learning_rate": 1.74295730940196e-05, "loss": 1.3752, "step": 5111 }, { "epoch": 0.2794855323045802, "grad_norm": 1.215151309967041, "learning_rate": 1.7428349848835208e-05, "loss": 1.5845, "step": 5112 }, { "epoch": 0.2795402047483017, "grad_norm": 2.0679168701171875, "learning_rate": 1.742712635560078e-05, "loss": 1.162, "step": 5113 }, { "epoch": 0.2795948771920233, "grad_norm": 1.6704418659210205, "learning_rate": 1.7425902614357182e-05, "loss": 1.4612, "step": 5114 }, { "epoch": 0.27964954963574484, "grad_norm": 1.4366761445999146, "learning_rate": 1.7424678625145266e-05, "loss": 1.2631, "step": 5115 }, { "epoch": 0.2797042220794664, "grad_norm": 1.5792278051376343, "learning_rate": 1.742345438800591e-05, "loss": 1.2451, "step": 5116 }, { "epoch": 0.279758894523188, "grad_norm": 1.392989993095398, "learning_rate": 1.7422229902979992e-05, "loss": 1.3843, "step": 5117 }, { "epoch": 0.2798135669669095, "grad_norm": 1.4671728610992432, "learning_rate": 1.7421005170108402e-05, "loss": 1.3405, "step": 5118 }, { "epoch": 0.27986823941063105, "grad_norm": 1.2378809452056885, "learning_rate": 1.741978018943203e-05, "loss": 1.5407, "step": 5119 }, { "epoch": 0.2799229118543526, "grad_norm": 1.1053752899169922, "learning_rate": 1.741855496099179e-05, "loss": 1.7804, "step": 5120 }, { "epoch": 0.2799775842980742, "grad_norm": 1.7208917140960693, "learning_rate": 1.7417329484828594e-05, "loss": 1.3362, "step": 5121 }, { "epoch": 0.2800322567417957, "grad_norm": 1.4088205099105835, "learning_rate": 1.7416103760983357e-05, "loss": 1.3471, "step": 5122 }, { "epoch": 0.28008692918551725, "grad_norm": 1.7337838411331177, "learning_rate": 1.7414877789497017e-05, "loss": 1.4063, "step": 5123 }, { "epoch": 0.28014160162923885, "grad_norm": 1.3620549440383911, "learning_rate": 1.7413651570410504e-05, "loss": 1.8575, "step": 5124 }, { "epoch": 0.2801962740729604, "grad_norm": 1.2876297235488892, "learning_rate": 1.7412425103764773e-05, "loss": 1.7117, "step": 5125 }, { "epoch": 0.2802509465166819, "grad_norm": 1.6011697053909302, "learning_rate": 1.741119838960077e-05, "loss": 1.3619, "step": 5126 }, { "epoch": 0.28030561896040346, "grad_norm": 1.58529531955719, "learning_rate": 1.7409971427959465e-05, "loss": 1.4108, "step": 5127 }, { "epoch": 0.28036029140412505, "grad_norm": 1.917777419090271, "learning_rate": 1.7408744218881823e-05, "loss": 1.2028, "step": 5128 }, { "epoch": 0.2804149638478466, "grad_norm": 1.9151448011398315, "learning_rate": 1.7407516762408826e-05, "loss": 1.4283, "step": 5129 }, { "epoch": 0.2804696362915681, "grad_norm": 1.4877471923828125, "learning_rate": 1.7406289058581466e-05, "loss": 1.4094, "step": 5130 }, { "epoch": 0.2805243087352897, "grad_norm": 1.605910062789917, "learning_rate": 1.740506110744073e-05, "loss": 1.5338, "step": 5131 }, { "epoch": 0.28057898117901126, "grad_norm": 1.7228487730026245, "learning_rate": 1.7403832909027633e-05, "loss": 1.5709, "step": 5132 }, { "epoch": 0.2806336536227328, "grad_norm": 1.4266769886016846, "learning_rate": 1.7402604463383176e-05, "loss": 1.5349, "step": 5133 }, { "epoch": 0.28068832606645433, "grad_norm": 1.657078504562378, "learning_rate": 1.7401375770548387e-05, "loss": 1.2856, "step": 5134 }, { "epoch": 0.2807429985101759, "grad_norm": 1.3017748594284058, "learning_rate": 1.7400146830564295e-05, "loss": 1.3893, "step": 5135 }, { "epoch": 0.28079767095389746, "grad_norm": 1.2150017023086548, "learning_rate": 1.7398917643471933e-05, "loss": 1.4903, "step": 5136 }, { "epoch": 0.280852343397619, "grad_norm": 1.5556902885437012, "learning_rate": 1.739768820931235e-05, "loss": 1.3666, "step": 5137 }, { "epoch": 0.2809070158413406, "grad_norm": 1.292521595954895, "learning_rate": 1.7396458528126595e-05, "loss": 1.4333, "step": 5138 }, { "epoch": 0.28096168828506213, "grad_norm": 1.4202125072479248, "learning_rate": 1.739522859995574e-05, "loss": 1.2572, "step": 5139 }, { "epoch": 0.28101636072878367, "grad_norm": 1.5347808599472046, "learning_rate": 1.7393998424840845e-05, "loss": 1.404, "step": 5140 }, { "epoch": 0.2810710331725052, "grad_norm": 1.3793634176254272, "learning_rate": 1.739276800282299e-05, "loss": 1.347, "step": 5141 }, { "epoch": 0.2811257056162268, "grad_norm": 1.758022665977478, "learning_rate": 1.7391537333943267e-05, "loss": 1.6684, "step": 5142 }, { "epoch": 0.28118037805994833, "grad_norm": 1.4725732803344727, "learning_rate": 1.7390306418242767e-05, "loss": 1.4121, "step": 5143 }, { "epoch": 0.28123505050366987, "grad_norm": 1.3347285985946655, "learning_rate": 1.7389075255762592e-05, "loss": 1.6769, "step": 5144 }, { "epoch": 0.28128972294739146, "grad_norm": 1.4930094480514526, "learning_rate": 1.7387843846543858e-05, "loss": 1.5489, "step": 5145 }, { "epoch": 0.281344395391113, "grad_norm": 1.2866716384887695, "learning_rate": 1.7386612190627682e-05, "loss": 1.6077, "step": 5146 }, { "epoch": 0.28139906783483454, "grad_norm": 1.367680311203003, "learning_rate": 1.7385380288055187e-05, "loss": 1.4113, "step": 5147 }, { "epoch": 0.2814537402785561, "grad_norm": 1.586698293685913, "learning_rate": 1.7384148138867518e-05, "loss": 1.4841, "step": 5148 }, { "epoch": 0.28150841272227767, "grad_norm": 1.3629831075668335, "learning_rate": 1.7382915743105813e-05, "loss": 1.4405, "step": 5149 }, { "epoch": 0.2815630851659992, "grad_norm": 1.5368340015411377, "learning_rate": 1.738168310081123e-05, "loss": 1.2156, "step": 5150 }, { "epoch": 0.28161775760972074, "grad_norm": 1.5566694736480713, "learning_rate": 1.7380450212024924e-05, "loss": 1.3571, "step": 5151 }, { "epoch": 0.28167243005344234, "grad_norm": 1.5468534231185913, "learning_rate": 1.7379217076788068e-05, "loss": 1.5429, "step": 5152 }, { "epoch": 0.2817271024971639, "grad_norm": 1.2998663187026978, "learning_rate": 1.7377983695141836e-05, "loss": 1.4614, "step": 5153 }, { "epoch": 0.2817817749408854, "grad_norm": 1.5450669527053833, "learning_rate": 1.7376750067127415e-05, "loss": 1.4306, "step": 5154 }, { "epoch": 0.28183644738460695, "grad_norm": 1.4473049640655518, "learning_rate": 1.7375516192786e-05, "loss": 1.3175, "step": 5155 }, { "epoch": 0.28189111982832854, "grad_norm": 1.5310940742492676, "learning_rate": 1.7374282072158796e-05, "loss": 1.5021, "step": 5156 }, { "epoch": 0.2819457922720501, "grad_norm": 1.5370073318481445, "learning_rate": 1.7373047705287004e-05, "loss": 1.6225, "step": 5157 }, { "epoch": 0.2820004647157716, "grad_norm": 2.458655834197998, "learning_rate": 1.737181309221185e-05, "loss": 1.4095, "step": 5158 }, { "epoch": 0.2820551371594932, "grad_norm": 1.2549419403076172, "learning_rate": 1.7370578232974558e-05, "loss": 1.3706, "step": 5159 }, { "epoch": 0.28210980960321475, "grad_norm": 1.9198631048202515, "learning_rate": 1.7369343127616367e-05, "loss": 1.1812, "step": 5160 }, { "epoch": 0.2821644820469363, "grad_norm": 1.226333737373352, "learning_rate": 1.736810777617851e-05, "loss": 1.5083, "step": 5161 }, { "epoch": 0.2822191544906578, "grad_norm": 1.3789740800857544, "learning_rate": 1.736687217870225e-05, "loss": 1.4183, "step": 5162 }, { "epoch": 0.2822738269343794, "grad_norm": 1.415299892425537, "learning_rate": 1.7365636335228834e-05, "loss": 1.4914, "step": 5163 }, { "epoch": 0.28232849937810095, "grad_norm": 2.137476921081543, "learning_rate": 1.736440024579954e-05, "loss": 1.4079, "step": 5164 }, { "epoch": 0.2823831718218225, "grad_norm": 1.9414845705032349, "learning_rate": 1.7363163910455646e-05, "loss": 1.2496, "step": 5165 }, { "epoch": 0.2824378442655441, "grad_norm": 1.0730496644973755, "learning_rate": 1.7361927329238425e-05, "loss": 1.5506, "step": 5166 }, { "epoch": 0.2824925167092656, "grad_norm": 1.3901649713516235, "learning_rate": 1.7360690502189176e-05, "loss": 1.5615, "step": 5167 }, { "epoch": 0.28254718915298715, "grad_norm": 2.033538579940796, "learning_rate": 1.73594534293492e-05, "loss": 1.5148, "step": 5168 }, { "epoch": 0.2826018615967087, "grad_norm": 1.362980842590332, "learning_rate": 1.7358216110759803e-05, "loss": 1.3927, "step": 5169 }, { "epoch": 0.2826565340404303, "grad_norm": 1.4266101121902466, "learning_rate": 1.7356978546462306e-05, "loss": 1.3956, "step": 5170 }, { "epoch": 0.2827112064841518, "grad_norm": 1.984647274017334, "learning_rate": 1.735574073649803e-05, "loss": 1.5989, "step": 5171 }, { "epoch": 0.28276587892787336, "grad_norm": 1.5012749433517456, "learning_rate": 1.735450268090831e-05, "loss": 1.4629, "step": 5172 }, { "epoch": 0.28282055137159495, "grad_norm": 1.3551708459854126, "learning_rate": 1.7353264379734486e-05, "loss": 1.1802, "step": 5173 }, { "epoch": 0.2828752238153165, "grad_norm": 1.199647068977356, "learning_rate": 1.735202583301791e-05, "loss": 1.3209, "step": 5174 }, { "epoch": 0.282929896259038, "grad_norm": 1.5406348705291748, "learning_rate": 1.7350787040799945e-05, "loss": 1.2966, "step": 5175 }, { "epoch": 0.28298456870275956, "grad_norm": 1.3378818035125732, "learning_rate": 1.7349548003121945e-05, "loss": 1.5517, "step": 5176 }, { "epoch": 0.28303924114648116, "grad_norm": 1.3353853225708008, "learning_rate": 1.7348308720025293e-05, "loss": 1.2777, "step": 5177 }, { "epoch": 0.2830939135902027, "grad_norm": 1.3787020444869995, "learning_rate": 1.734706919155137e-05, "loss": 1.4703, "step": 5178 }, { "epoch": 0.28314858603392423, "grad_norm": 1.4115983247756958, "learning_rate": 1.7345829417741564e-05, "loss": 1.3826, "step": 5179 }, { "epoch": 0.2832032584776458, "grad_norm": 1.509161114692688, "learning_rate": 1.734458939863728e-05, "loss": 1.4039, "step": 5180 }, { "epoch": 0.28325793092136736, "grad_norm": 1.3191277980804443, "learning_rate": 1.7343349134279917e-05, "loss": 1.4701, "step": 5181 }, { "epoch": 0.2833126033650889, "grad_norm": 1.1421926021575928, "learning_rate": 1.7342108624710898e-05, "loss": 1.5486, "step": 5182 }, { "epoch": 0.28336727580881044, "grad_norm": 2.640254259109497, "learning_rate": 1.734086786997164e-05, "loss": 1.0395, "step": 5183 }, { "epoch": 0.28342194825253203, "grad_norm": 1.9541611671447754, "learning_rate": 1.733962687010358e-05, "loss": 1.3865, "step": 5184 }, { "epoch": 0.28347662069625357, "grad_norm": 1.8352073431015015, "learning_rate": 1.7338385625148156e-05, "loss": 1.5083, "step": 5185 }, { "epoch": 0.2835312931399751, "grad_norm": 1.5558562278747559, "learning_rate": 1.7337144135146818e-05, "loss": 1.41, "step": 5186 }, { "epoch": 0.2835859655836967, "grad_norm": 1.2924697399139404, "learning_rate": 1.7335902400141017e-05, "loss": 1.5233, "step": 5187 }, { "epoch": 0.28364063802741823, "grad_norm": 1.6678063869476318, "learning_rate": 1.7334660420172224e-05, "loss": 1.3323, "step": 5188 }, { "epoch": 0.28369531047113977, "grad_norm": 1.4805923700332642, "learning_rate": 1.7333418195281906e-05, "loss": 1.442, "step": 5189 }, { "epoch": 0.2837499829148613, "grad_norm": 1.63032066822052, "learning_rate": 1.7332175725511544e-05, "loss": 1.343, "step": 5190 }, { "epoch": 0.2838046553585829, "grad_norm": 1.8598357439041138, "learning_rate": 1.733093301090263e-05, "loss": 1.5122, "step": 5191 }, { "epoch": 0.28385932780230444, "grad_norm": 1.5866738557815552, "learning_rate": 1.7329690051496663e-05, "loss": 1.1924, "step": 5192 }, { "epoch": 0.283914000246026, "grad_norm": 1.2708925008773804, "learning_rate": 1.7328446847335142e-05, "loss": 1.6185, "step": 5193 }, { "epoch": 0.28396867268974757, "grad_norm": 1.1805051565170288, "learning_rate": 1.7327203398459586e-05, "loss": 1.5481, "step": 5194 }, { "epoch": 0.2840233451334691, "grad_norm": 1.4593608379364014, "learning_rate": 1.7325959704911516e-05, "loss": 1.4044, "step": 5195 }, { "epoch": 0.28407801757719064, "grad_norm": 1.4950802326202393, "learning_rate": 1.732471576673246e-05, "loss": 1.5789, "step": 5196 }, { "epoch": 0.28413269002091224, "grad_norm": 1.4658313989639282, "learning_rate": 1.7323471583963953e-05, "loss": 1.3949, "step": 5197 }, { "epoch": 0.2841873624646338, "grad_norm": 1.2983332872390747, "learning_rate": 1.7322227156647548e-05, "loss": 1.4605, "step": 5198 }, { "epoch": 0.2842420349083553, "grad_norm": 1.5513466596603394, "learning_rate": 1.7320982484824796e-05, "loss": 1.425, "step": 5199 }, { "epoch": 0.28429670735207685, "grad_norm": 1.7580690383911133, "learning_rate": 1.731973756853726e-05, "loss": 1.306, "step": 5200 }, { "epoch": 0.28435137979579844, "grad_norm": 2.834414482116699, "learning_rate": 1.7318492407826508e-05, "loss": 1.2896, "step": 5201 }, { "epoch": 0.28440605223952, "grad_norm": 1.597334384918213, "learning_rate": 1.731724700273412e-05, "loss": 1.1637, "step": 5202 }, { "epoch": 0.2844607246832415, "grad_norm": 1.4305940866470337, "learning_rate": 1.731600135330169e-05, "loss": 1.4769, "step": 5203 }, { "epoch": 0.2845153971269631, "grad_norm": 1.5401639938354492, "learning_rate": 1.7314755459570803e-05, "loss": 1.4125, "step": 5204 }, { "epoch": 0.28457006957068465, "grad_norm": 1.4313945770263672, "learning_rate": 1.7313509321583066e-05, "loss": 1.6093, "step": 5205 }, { "epoch": 0.2846247420144062, "grad_norm": 1.6301205158233643, "learning_rate": 1.7312262939380094e-05, "loss": 1.4794, "step": 5206 }, { "epoch": 0.2846794144581277, "grad_norm": 1.4269871711730957, "learning_rate": 1.73110163130035e-05, "loss": 1.4999, "step": 5207 }, { "epoch": 0.2847340869018493, "grad_norm": 1.6894763708114624, "learning_rate": 1.7309769442494918e-05, "loss": 1.4594, "step": 5208 }, { "epoch": 0.28478875934557085, "grad_norm": 1.6662592887878418, "learning_rate": 1.7308522327895984e-05, "loss": 1.428, "step": 5209 }, { "epoch": 0.2848434317892924, "grad_norm": 1.5695302486419678, "learning_rate": 1.7307274969248334e-05, "loss": 1.3758, "step": 5210 }, { "epoch": 0.284898104233014, "grad_norm": 1.270321011543274, "learning_rate": 1.7306027366593627e-05, "loss": 1.4938, "step": 5211 }, { "epoch": 0.2849527766767355, "grad_norm": 1.3231741189956665, "learning_rate": 1.7304779519973526e-05, "loss": 1.4947, "step": 5212 }, { "epoch": 0.28500744912045706, "grad_norm": 1.2736867666244507, "learning_rate": 1.730353142942969e-05, "loss": 1.5332, "step": 5213 }, { "epoch": 0.2850621215641786, "grad_norm": 1.1512012481689453, "learning_rate": 1.7302283095003807e-05, "loss": 1.5064, "step": 5214 }, { "epoch": 0.2851167940079002, "grad_norm": 1.4558957815170288, "learning_rate": 1.730103451673755e-05, "loss": 1.282, "step": 5215 }, { "epoch": 0.2851714664516217, "grad_norm": 1.3665330410003662, "learning_rate": 1.7299785694672624e-05, "loss": 1.5141, "step": 5216 }, { "epoch": 0.28522613889534326, "grad_norm": 1.7398457527160645, "learning_rate": 1.729853662885072e-05, "loss": 1.3712, "step": 5217 }, { "epoch": 0.28528081133906485, "grad_norm": 2.6594631671905518, "learning_rate": 1.7297287319313554e-05, "loss": 1.2648, "step": 5218 }, { "epoch": 0.2853354837827864, "grad_norm": 1.3957984447479248, "learning_rate": 1.7296037766102842e-05, "loss": 1.1832, "step": 5219 }, { "epoch": 0.28539015622650793, "grad_norm": 1.855777621269226, "learning_rate": 1.7294787969260303e-05, "loss": 1.2837, "step": 5220 }, { "epoch": 0.28544482867022947, "grad_norm": 1.4848520755767822, "learning_rate": 1.729353792882768e-05, "loss": 1.1804, "step": 5221 }, { "epoch": 0.28549950111395106, "grad_norm": 1.8242095708847046, "learning_rate": 1.729228764484671e-05, "loss": 1.5735, "step": 5222 }, { "epoch": 0.2855541735576726, "grad_norm": 1.8076019287109375, "learning_rate": 1.7291037117359144e-05, "loss": 1.3965, "step": 5223 }, { "epoch": 0.28560884600139413, "grad_norm": 1.8575948476791382, "learning_rate": 1.728978634640674e-05, "loss": 1.2645, "step": 5224 }, { "epoch": 0.2856635184451157, "grad_norm": 1.5324474573135376, "learning_rate": 1.7288535332031262e-05, "loss": 1.5366, "step": 5225 }, { "epoch": 0.28571819088883726, "grad_norm": 1.546401858329773, "learning_rate": 1.7287284074274485e-05, "loss": 1.4214, "step": 5226 }, { "epoch": 0.2857728633325588, "grad_norm": 1.5915743112564087, "learning_rate": 1.7286032573178198e-05, "loss": 1.5121, "step": 5227 }, { "epoch": 0.28582753577628034, "grad_norm": 1.7238047122955322, "learning_rate": 1.728478082878418e-05, "loss": 1.2566, "step": 5228 }, { "epoch": 0.28588220822000193, "grad_norm": 2.238553524017334, "learning_rate": 1.7283528841134242e-05, "loss": 1.3805, "step": 5229 }, { "epoch": 0.28593688066372347, "grad_norm": 1.2122715711593628, "learning_rate": 1.7282276610270183e-05, "loss": 1.4335, "step": 5230 }, { "epoch": 0.285991553107445, "grad_norm": 1.7130951881408691, "learning_rate": 1.7281024136233816e-05, "loss": 1.3152, "step": 5231 }, { "epoch": 0.2860462255511666, "grad_norm": 1.2537190914154053, "learning_rate": 1.727977141906697e-05, "loss": 1.5834, "step": 5232 }, { "epoch": 0.28610089799488814, "grad_norm": 1.4410675764083862, "learning_rate": 1.7278518458811472e-05, "loss": 1.3701, "step": 5233 }, { "epoch": 0.2861555704386097, "grad_norm": 1.7057380676269531, "learning_rate": 1.7277265255509165e-05, "loss": 1.5158, "step": 5234 }, { "epoch": 0.2862102428823312, "grad_norm": 1.4014791250228882, "learning_rate": 1.7276011809201896e-05, "loss": 1.3555, "step": 5235 }, { "epoch": 0.2862649153260528, "grad_norm": 1.4616317749023438, "learning_rate": 1.7274758119931515e-05, "loss": 1.0512, "step": 5236 }, { "epoch": 0.28631958776977434, "grad_norm": 1.5746948719024658, "learning_rate": 1.7273504187739893e-05, "loss": 1.4025, "step": 5237 }, { "epoch": 0.2863742602134959, "grad_norm": 1.1514892578125, "learning_rate": 1.7272250012668896e-05, "loss": 1.4692, "step": 5238 }, { "epoch": 0.28642893265721747, "grad_norm": 1.6253854036331177, "learning_rate": 1.7270995594760407e-05, "loss": 1.3849, "step": 5239 }, { "epoch": 0.286483605100939, "grad_norm": 1.8920305967330933, "learning_rate": 1.7269740934056317e-05, "loss": 1.2663, "step": 5240 }, { "epoch": 0.28653827754466055, "grad_norm": 1.3049437999725342, "learning_rate": 1.726848603059851e-05, "loss": 1.7026, "step": 5241 }, { "epoch": 0.2865929499883821, "grad_norm": 1.3435680866241455, "learning_rate": 1.7267230884428905e-05, "loss": 1.6887, "step": 5242 }, { "epoch": 0.2866476224321037, "grad_norm": 1.3550852537155151, "learning_rate": 1.7265975495589408e-05, "loss": 1.8047, "step": 5243 }, { "epoch": 0.2867022948758252, "grad_norm": 1.3759222030639648, "learning_rate": 1.7264719864121935e-05, "loss": 1.2452, "step": 5244 }, { "epoch": 0.28675696731954675, "grad_norm": 1.3547426462173462, "learning_rate": 1.726346399006842e-05, "loss": 1.5972, "step": 5245 }, { "epoch": 0.28681163976326834, "grad_norm": 1.835184097290039, "learning_rate": 1.72622078734708e-05, "loss": 1.341, "step": 5246 }, { "epoch": 0.2868663122069899, "grad_norm": 1.5021547079086304, "learning_rate": 1.7260951514371015e-05, "loss": 1.582, "step": 5247 }, { "epoch": 0.2869209846507114, "grad_norm": 1.8201825618743896, "learning_rate": 1.7259694912811022e-05, "loss": 1.4157, "step": 5248 }, { "epoch": 0.28697565709443296, "grad_norm": 2.8211748600006104, "learning_rate": 1.725843806883278e-05, "loss": 1.1803, "step": 5249 }, { "epoch": 0.28703032953815455, "grad_norm": 1.3520835638046265, "learning_rate": 1.7257180982478256e-05, "loss": 1.5928, "step": 5250 }, { "epoch": 0.2870850019818761, "grad_norm": 1.582287073135376, "learning_rate": 1.7255923653789436e-05, "loss": 1.4895, "step": 5251 }, { "epoch": 0.2871396744255976, "grad_norm": 1.650228500366211, "learning_rate": 1.7254666082808292e-05, "loss": 1.5255, "step": 5252 }, { "epoch": 0.2871943468693192, "grad_norm": 1.2289350032806396, "learning_rate": 1.725340826957683e-05, "loss": 1.3854, "step": 5253 }, { "epoch": 0.28724901931304075, "grad_norm": 1.7220510244369507, "learning_rate": 1.725215021413704e-05, "loss": 1.4873, "step": 5254 }, { "epoch": 0.2873036917567623, "grad_norm": 1.7278263568878174, "learning_rate": 1.725089191653094e-05, "loss": 1.2462, "step": 5255 }, { "epoch": 0.28735836420048383, "grad_norm": 1.4235658645629883, "learning_rate": 1.7249633376800542e-05, "loss": 1.4063, "step": 5256 }, { "epoch": 0.2874130366442054, "grad_norm": 1.0425395965576172, "learning_rate": 1.7248374594987873e-05, "loss": 1.6605, "step": 5257 }, { "epoch": 0.28746770908792696, "grad_norm": 1.9400484561920166, "learning_rate": 1.724711557113497e-05, "loss": 1.2929, "step": 5258 }, { "epoch": 0.2875223815316485, "grad_norm": 1.582944393157959, "learning_rate": 1.724585630528387e-05, "loss": 1.4341, "step": 5259 }, { "epoch": 0.2875770539753701, "grad_norm": 1.198030710220337, "learning_rate": 1.7244596797476627e-05, "loss": 1.5997, "step": 5260 }, { "epoch": 0.2876317264190916, "grad_norm": 1.354891300201416, "learning_rate": 1.7243337047755297e-05, "loss": 1.3487, "step": 5261 }, { "epoch": 0.28768639886281316, "grad_norm": 1.690737247467041, "learning_rate": 1.7242077056161943e-05, "loss": 1.4994, "step": 5262 }, { "epoch": 0.2877410713065347, "grad_norm": 1.4106988906860352, "learning_rate": 1.7240816822738646e-05, "loss": 1.3607, "step": 5263 }, { "epoch": 0.2877957437502563, "grad_norm": 1.5219204425811768, "learning_rate": 1.723955634752748e-05, "loss": 1.2249, "step": 5264 }, { "epoch": 0.28785041619397783, "grad_norm": 1.381805658340454, "learning_rate": 1.7238295630570544e-05, "loss": 1.6882, "step": 5265 }, { "epoch": 0.28790508863769937, "grad_norm": 1.598186731338501, "learning_rate": 1.723703467190993e-05, "loss": 1.6189, "step": 5266 }, { "epoch": 0.28795976108142096, "grad_norm": 1.498657464981079, "learning_rate": 1.7235773471587743e-05, "loss": 1.7036, "step": 5267 }, { "epoch": 0.2880144335251425, "grad_norm": 1.6943720579147339, "learning_rate": 1.7234512029646104e-05, "loss": 1.4138, "step": 5268 }, { "epoch": 0.28806910596886404, "grad_norm": 1.2429097890853882, "learning_rate": 1.7233250346127132e-05, "loss": 1.4249, "step": 5269 }, { "epoch": 0.2881237784125856, "grad_norm": 1.3993864059448242, "learning_rate": 1.7231988421072957e-05, "loss": 1.4551, "step": 5270 }, { "epoch": 0.28817845085630717, "grad_norm": 1.502365231513977, "learning_rate": 1.723072625452572e-05, "loss": 1.22, "step": 5271 }, { "epoch": 0.2882331233000287, "grad_norm": 1.26738440990448, "learning_rate": 1.722946384652756e-05, "loss": 1.4205, "step": 5272 }, { "epoch": 0.28828779574375024, "grad_norm": 1.1126081943511963, "learning_rate": 1.7228201197120642e-05, "loss": 1.8922, "step": 5273 }, { "epoch": 0.28834246818747183, "grad_norm": 1.763580322265625, "learning_rate": 1.7226938306347124e-05, "loss": 1.3917, "step": 5274 }, { "epoch": 0.28839714063119337, "grad_norm": 1.3647319078445435, "learning_rate": 1.722567517424918e-05, "loss": 1.2238, "step": 5275 }, { "epoch": 0.2884518130749149, "grad_norm": 1.6056536436080933, "learning_rate": 1.722441180086898e-05, "loss": 1.445, "step": 5276 }, { "epoch": 0.28850648551863645, "grad_norm": 1.3904588222503662, "learning_rate": 1.722314818624872e-05, "loss": 1.3717, "step": 5277 }, { "epoch": 0.28856115796235804, "grad_norm": 1.47500741481781, "learning_rate": 1.7221884330430593e-05, "loss": 1.4987, "step": 5278 }, { "epoch": 0.2886158304060796, "grad_norm": 1.6900339126586914, "learning_rate": 1.7220620233456806e-05, "loss": 1.7863, "step": 5279 }, { "epoch": 0.2886705028498011, "grad_norm": 1.5200482606887817, "learning_rate": 1.721935589536956e-05, "loss": 1.4536, "step": 5280 }, { "epoch": 0.2887251752935227, "grad_norm": 1.115403413772583, "learning_rate": 1.7218091316211083e-05, "loss": 1.5705, "step": 5281 }, { "epoch": 0.28877984773724424, "grad_norm": 1.4236048460006714, "learning_rate": 1.7216826496023594e-05, "loss": 1.5236, "step": 5282 }, { "epoch": 0.2888345201809658, "grad_norm": 1.3052794933319092, "learning_rate": 1.721556143484934e-05, "loss": 1.4589, "step": 5283 }, { "epoch": 0.2888891926246873, "grad_norm": 1.6636697053909302, "learning_rate": 1.7214296132730555e-05, "loss": 1.3261, "step": 5284 }, { "epoch": 0.2889438650684089, "grad_norm": 1.423445701599121, "learning_rate": 1.7213030589709493e-05, "loss": 1.5543, "step": 5285 }, { "epoch": 0.28899853751213045, "grad_norm": 1.1685341596603394, "learning_rate": 1.7211764805828415e-05, "loss": 1.4403, "step": 5286 }, { "epoch": 0.289053209955852, "grad_norm": 1.3917421102523804, "learning_rate": 1.7210498781129585e-05, "loss": 1.358, "step": 5287 }, { "epoch": 0.2891078823995736, "grad_norm": 1.3395256996154785, "learning_rate": 1.7209232515655283e-05, "loss": 1.5309, "step": 5288 }, { "epoch": 0.2891625548432951, "grad_norm": 1.294106125831604, "learning_rate": 1.720796600944779e-05, "loss": 1.5059, "step": 5289 }, { "epoch": 0.28921722728701665, "grad_norm": 2.2647690773010254, "learning_rate": 1.7206699262549395e-05, "loss": 1.2728, "step": 5290 }, { "epoch": 0.2892718997307382, "grad_norm": 1.4253753423690796, "learning_rate": 1.7205432275002403e-05, "loss": 1.4171, "step": 5291 }, { "epoch": 0.2893265721744598, "grad_norm": 1.9454154968261719, "learning_rate": 1.720416504684912e-05, "loss": 1.6317, "step": 5292 }, { "epoch": 0.2893812446181813, "grad_norm": 1.511766791343689, "learning_rate": 1.7202897578131858e-05, "loss": 1.3235, "step": 5293 }, { "epoch": 0.28943591706190286, "grad_norm": 1.5903713703155518, "learning_rate": 1.7201629868892947e-05, "loss": 1.3842, "step": 5294 }, { "epoch": 0.28949058950562445, "grad_norm": 1.868545651435852, "learning_rate": 1.7200361919174715e-05, "loss": 1.331, "step": 5295 }, { "epoch": 0.289545261949346, "grad_norm": 1.4945054054260254, "learning_rate": 1.7199093729019503e-05, "loss": 1.5799, "step": 5296 }, { "epoch": 0.2895999343930675, "grad_norm": 1.6246753931045532, "learning_rate": 1.7197825298469655e-05, "loss": 1.5637, "step": 5297 }, { "epoch": 0.28965460683678906, "grad_norm": 1.6722869873046875, "learning_rate": 1.719655662756753e-05, "loss": 1.3951, "step": 5298 }, { "epoch": 0.28970927928051066, "grad_norm": 1.2826353311538696, "learning_rate": 1.7195287716355495e-05, "loss": 1.2779, "step": 5299 }, { "epoch": 0.2897639517242322, "grad_norm": 1.7715625762939453, "learning_rate": 1.719401856487592e-05, "loss": 1.4584, "step": 5300 }, { "epoch": 0.28981862416795373, "grad_norm": 1.367239236831665, "learning_rate": 1.7192749173171183e-05, "loss": 1.3519, "step": 5301 }, { "epoch": 0.2898732966116753, "grad_norm": 1.442449927330017, "learning_rate": 1.7191479541283668e-05, "loss": 1.7201, "step": 5302 }, { "epoch": 0.28992796905539686, "grad_norm": 2.3344948291778564, "learning_rate": 1.719020966925578e-05, "loss": 1.3804, "step": 5303 }, { "epoch": 0.2899826414991184, "grad_norm": 1.2573168277740479, "learning_rate": 1.7188939557129918e-05, "loss": 1.3275, "step": 5304 }, { "epoch": 0.29003731394283994, "grad_norm": 1.6413437128067017, "learning_rate": 1.7187669204948495e-05, "loss": 1.4508, "step": 5305 }, { "epoch": 0.29009198638656153, "grad_norm": 1.291262149810791, "learning_rate": 1.718639861275393e-05, "loss": 1.7002, "step": 5306 }, { "epoch": 0.29014665883028307, "grad_norm": 1.4573673009872437, "learning_rate": 1.7185127780588654e-05, "loss": 1.5966, "step": 5307 }, { "epoch": 0.2902013312740046, "grad_norm": 1.642520546913147, "learning_rate": 1.7183856708495098e-05, "loss": 1.4158, "step": 5308 }, { "epoch": 0.2902560037177262, "grad_norm": 1.480785608291626, "learning_rate": 1.7182585396515712e-05, "loss": 1.4965, "step": 5309 }, { "epoch": 0.29031067616144773, "grad_norm": 1.4887468814849854, "learning_rate": 1.7181313844692944e-05, "loss": 1.4925, "step": 5310 }, { "epoch": 0.29036534860516927, "grad_norm": 1.2998460531234741, "learning_rate": 1.7180042053069253e-05, "loss": 1.3248, "step": 5311 }, { "epoch": 0.2904200210488908, "grad_norm": 1.5770972967147827, "learning_rate": 1.7178770021687113e-05, "loss": 1.4027, "step": 5312 }, { "epoch": 0.2904746934926124, "grad_norm": 1.549009084701538, "learning_rate": 1.7177497750588994e-05, "loss": 1.4447, "step": 5313 }, { "epoch": 0.29052936593633394, "grad_norm": 1.3445504903793335, "learning_rate": 1.717622523981738e-05, "loss": 1.3729, "step": 5314 }, { "epoch": 0.2905840383800555, "grad_norm": 1.4093035459518433, "learning_rate": 1.7174952489414772e-05, "loss": 1.3866, "step": 5315 }, { "epoch": 0.29063871082377707, "grad_norm": 1.222676157951355, "learning_rate": 1.717367949942366e-05, "loss": 1.3836, "step": 5316 }, { "epoch": 0.2906933832674986, "grad_norm": 1.3325557708740234, "learning_rate": 1.7172406269886555e-05, "loss": 1.5265, "step": 5317 }, { "epoch": 0.29074805571122014, "grad_norm": 1.7201759815216064, "learning_rate": 1.717113280084598e-05, "loss": 1.3813, "step": 5318 }, { "epoch": 0.2908027281549417, "grad_norm": 1.6185086965560913, "learning_rate": 1.7169859092344448e-05, "loss": 1.5292, "step": 5319 }, { "epoch": 0.2908574005986633, "grad_norm": 1.2932344675064087, "learning_rate": 1.71685851444245e-05, "loss": 1.6114, "step": 5320 }, { "epoch": 0.2909120730423848, "grad_norm": 2.0937082767486572, "learning_rate": 1.716731095712867e-05, "loss": 1.2868, "step": 5321 }, { "epoch": 0.29096674548610635, "grad_norm": 1.7129963636398315, "learning_rate": 1.7166036530499503e-05, "loss": 1.6068, "step": 5322 }, { "epoch": 0.29102141792982794, "grad_norm": 1.5976848602294922, "learning_rate": 1.716476186457957e-05, "loss": 1.1553, "step": 5323 }, { "epoch": 0.2910760903735495, "grad_norm": 1.5588291883468628, "learning_rate": 1.7163486959411418e-05, "loss": 1.3541, "step": 5324 }, { "epoch": 0.291130762817271, "grad_norm": 1.4468222856521606, "learning_rate": 1.7162211815037633e-05, "loss": 1.2433, "step": 5325 }, { "epoch": 0.29118543526099255, "grad_norm": 1.5876268148422241, "learning_rate": 1.7160936431500785e-05, "loss": 1.5851, "step": 5326 }, { "epoch": 0.29124010770471415, "grad_norm": 1.5352760553359985, "learning_rate": 1.715966080884347e-05, "loss": 1.3541, "step": 5327 }, { "epoch": 0.2912947801484357, "grad_norm": 1.072920799255371, "learning_rate": 1.715838494710827e-05, "loss": 1.6569, "step": 5328 }, { "epoch": 0.2913494525921572, "grad_norm": 1.2994896173477173, "learning_rate": 1.7157108846337812e-05, "loss": 1.4777, "step": 5329 }, { "epoch": 0.2914041250358788, "grad_norm": 1.0598454475402832, "learning_rate": 1.7155832506574688e-05, "loss": 1.5236, "step": 5330 }, { "epoch": 0.29145879747960035, "grad_norm": 1.2142488956451416, "learning_rate": 1.7154555927861524e-05, "loss": 1.3487, "step": 5331 }, { "epoch": 0.2915134699233219, "grad_norm": 1.400402307510376, "learning_rate": 1.715327911024095e-05, "loss": 1.272, "step": 5332 }, { "epoch": 0.2915681423670434, "grad_norm": 1.8771860599517822, "learning_rate": 1.7152002053755604e-05, "loss": 1.5685, "step": 5333 }, { "epoch": 0.291622814810765, "grad_norm": 1.4962718486785889, "learning_rate": 1.715072475844812e-05, "loss": 1.4618, "step": 5334 }, { "epoch": 0.29167748725448656, "grad_norm": 1.3604938983917236, "learning_rate": 1.7149447224361163e-05, "loss": 1.7686, "step": 5335 }, { "epoch": 0.2917321596982081, "grad_norm": 1.5076301097869873, "learning_rate": 1.7148169451537385e-05, "loss": 1.6254, "step": 5336 }, { "epoch": 0.2917868321419297, "grad_norm": 1.5100502967834473, "learning_rate": 1.7146891440019456e-05, "loss": 1.4849, "step": 5337 }, { "epoch": 0.2918415045856512, "grad_norm": 1.7787134647369385, "learning_rate": 1.714561318985005e-05, "loss": 1.5612, "step": 5338 }, { "epoch": 0.29189617702937276, "grad_norm": 1.692586064338684, "learning_rate": 1.714433470107185e-05, "loss": 1.5634, "step": 5339 }, { "epoch": 0.2919508494730943, "grad_norm": 1.2752203941345215, "learning_rate": 1.714305597372755e-05, "loss": 1.5837, "step": 5340 }, { "epoch": 0.2920055219168159, "grad_norm": 1.4694030284881592, "learning_rate": 1.7141777007859852e-05, "loss": 1.3642, "step": 5341 }, { "epoch": 0.29206019436053743, "grad_norm": 1.593335509300232, "learning_rate": 1.7140497803511457e-05, "loss": 1.3629, "step": 5342 }, { "epoch": 0.29211486680425897, "grad_norm": 1.556828260421753, "learning_rate": 1.713921836072509e-05, "loss": 1.4209, "step": 5343 }, { "epoch": 0.29216953924798056, "grad_norm": 1.4931904077529907, "learning_rate": 1.7137938679543466e-05, "loss": 1.5398, "step": 5344 }, { "epoch": 0.2922242116917021, "grad_norm": 1.6229677200317383, "learning_rate": 1.713665876000932e-05, "loss": 1.4291, "step": 5345 }, { "epoch": 0.29227888413542363, "grad_norm": 2.051001787185669, "learning_rate": 1.713537860216539e-05, "loss": 1.3324, "step": 5346 }, { "epoch": 0.29233355657914517, "grad_norm": 1.9870537519454956, "learning_rate": 1.713409820605443e-05, "loss": 1.2361, "step": 5347 }, { "epoch": 0.29238822902286676, "grad_norm": 1.5822638273239136, "learning_rate": 1.7132817571719185e-05, "loss": 1.1388, "step": 5348 }, { "epoch": 0.2924429014665883, "grad_norm": 1.2427586317062378, "learning_rate": 1.7131536699202427e-05, "loss": 1.5264, "step": 5349 }, { "epoch": 0.29249757391030984, "grad_norm": 1.6624637842178345, "learning_rate": 1.713025558854692e-05, "loss": 1.4504, "step": 5350 }, { "epoch": 0.29255224635403143, "grad_norm": 1.2969249486923218, "learning_rate": 1.7128974239795448e-05, "loss": 1.3583, "step": 5351 }, { "epoch": 0.29260691879775297, "grad_norm": 1.506223201751709, "learning_rate": 1.71276926529908e-05, "loss": 1.0022, "step": 5352 }, { "epoch": 0.2926615912414745, "grad_norm": 1.3915228843688965, "learning_rate": 1.7126410828175768e-05, "loss": 1.4446, "step": 5353 }, { "epoch": 0.29271626368519604, "grad_norm": 1.5002025365829468, "learning_rate": 1.7125128765393157e-05, "loss": 1.3253, "step": 5354 }, { "epoch": 0.29277093612891764, "grad_norm": 1.496495246887207, "learning_rate": 1.7123846464685774e-05, "loss": 1.4151, "step": 5355 }, { "epoch": 0.2928256085726392, "grad_norm": 1.42983877658844, "learning_rate": 1.712256392609644e-05, "loss": 1.9004, "step": 5356 }, { "epoch": 0.2928802810163607, "grad_norm": 1.6212226152420044, "learning_rate": 1.7121281149667987e-05, "loss": 1.2335, "step": 5357 }, { "epoch": 0.2929349534600823, "grad_norm": 1.9434689283370972, "learning_rate": 1.7119998135443245e-05, "loss": 1.3609, "step": 5358 }, { "epoch": 0.29298962590380384, "grad_norm": 1.2530041933059692, "learning_rate": 1.7118714883465055e-05, "loss": 1.6233, "step": 5359 }, { "epoch": 0.2930442983475254, "grad_norm": 1.832501769065857, "learning_rate": 1.711743139377627e-05, "loss": 1.7003, "step": 5360 }, { "epoch": 0.2930989707912469, "grad_norm": 1.4567631483078003, "learning_rate": 1.7116147666419755e-05, "loss": 1.2991, "step": 5361 }, { "epoch": 0.2931536432349685, "grad_norm": 1.4638681411743164, "learning_rate": 1.7114863701438365e-05, "loss": 1.344, "step": 5362 }, { "epoch": 0.29320831567869005, "grad_norm": 1.4054673910140991, "learning_rate": 1.711357949887498e-05, "loss": 1.3636, "step": 5363 }, { "epoch": 0.2932629881224116, "grad_norm": 1.5655596256256104, "learning_rate": 1.7112295058772487e-05, "loss": 1.3965, "step": 5364 }, { "epoch": 0.2933176605661332, "grad_norm": 1.702621340751648, "learning_rate": 1.711101038117377e-05, "loss": 1.2204, "step": 5365 }, { "epoch": 0.2933723330098547, "grad_norm": 1.6295663118362427, "learning_rate": 1.7109725466121734e-05, "loss": 1.6124, "step": 5366 }, { "epoch": 0.29342700545357625, "grad_norm": 1.2698463201522827, "learning_rate": 1.7108440313659275e-05, "loss": 1.5809, "step": 5367 }, { "epoch": 0.2934816778972978, "grad_norm": 1.3503094911575317, "learning_rate": 1.7107154923829317e-05, "loss": 1.5596, "step": 5368 }, { "epoch": 0.2935363503410194, "grad_norm": 1.8193447589874268, "learning_rate": 1.710586929667478e-05, "loss": 1.4959, "step": 5369 }, { "epoch": 0.2935910227847409, "grad_norm": 2.2188897132873535, "learning_rate": 1.7104583432238588e-05, "loss": 1.1476, "step": 5370 }, { "epoch": 0.29364569522846246, "grad_norm": 1.4692585468292236, "learning_rate": 1.710329733056369e-05, "loss": 1.4266, "step": 5371 }, { "epoch": 0.29370036767218405, "grad_norm": 1.2591347694396973, "learning_rate": 1.710201099169302e-05, "loss": 1.607, "step": 5372 }, { "epoch": 0.2937550401159056, "grad_norm": 1.3401418924331665, "learning_rate": 1.710072441566954e-05, "loss": 1.6951, "step": 5373 }, { "epoch": 0.2938097125596271, "grad_norm": 1.4773560762405396, "learning_rate": 1.7099437602536208e-05, "loss": 1.5396, "step": 5374 }, { "epoch": 0.29386438500334866, "grad_norm": 1.243357539176941, "learning_rate": 1.7098150552335997e-05, "loss": 1.612, "step": 5375 }, { "epoch": 0.29391905744707025, "grad_norm": 1.5144938230514526, "learning_rate": 1.709686326511188e-05, "loss": 1.628, "step": 5376 }, { "epoch": 0.2939737298907918, "grad_norm": 1.2831356525421143, "learning_rate": 1.709557574090685e-05, "loss": 1.5873, "step": 5377 }, { "epoch": 0.29402840233451333, "grad_norm": 1.7112760543823242, "learning_rate": 1.7094287979763892e-05, "loss": 1.3077, "step": 5378 }, { "epoch": 0.2940830747782349, "grad_norm": 1.5226833820343018, "learning_rate": 1.7092999981726013e-05, "loss": 1.3993, "step": 5379 }, { "epoch": 0.29413774722195646, "grad_norm": 1.7475699186325073, "learning_rate": 1.7091711746836218e-05, "loss": 1.3181, "step": 5380 }, { "epoch": 0.294192419665678, "grad_norm": 1.692934513092041, "learning_rate": 1.709042327513753e-05, "loss": 1.4103, "step": 5381 }, { "epoch": 0.29424709210939953, "grad_norm": 1.5869685411453247, "learning_rate": 1.708913456667297e-05, "loss": 1.4405, "step": 5382 }, { "epoch": 0.2943017645531211, "grad_norm": 1.4399250745773315, "learning_rate": 1.708784562148557e-05, "loss": 1.5013, "step": 5383 }, { "epoch": 0.29435643699684266, "grad_norm": 1.4012808799743652, "learning_rate": 1.7086556439618373e-05, "loss": 1.3545, "step": 5384 }, { "epoch": 0.2944111094405642, "grad_norm": 1.6941394805908203, "learning_rate": 1.7085267021114424e-05, "loss": 1.5402, "step": 5385 }, { "epoch": 0.2944657818842858, "grad_norm": 1.8286402225494385, "learning_rate": 1.7083977366016785e-05, "loss": 1.5815, "step": 5386 }, { "epoch": 0.29452045432800733, "grad_norm": 1.7110778093338013, "learning_rate": 1.7082687474368523e-05, "loss": 1.2929, "step": 5387 }, { "epoch": 0.29457512677172887, "grad_norm": 1.6224175691604614, "learning_rate": 1.7081397346212703e-05, "loss": 1.3601, "step": 5388 }, { "epoch": 0.2946297992154504, "grad_norm": 1.3282577991485596, "learning_rate": 1.7080106981592407e-05, "loss": 1.3226, "step": 5389 }, { "epoch": 0.294684471659172, "grad_norm": 1.695278525352478, "learning_rate": 1.7078816380550728e-05, "loss": 1.3774, "step": 5390 }, { "epoch": 0.29473914410289354, "grad_norm": 1.2579846382141113, "learning_rate": 1.707752554313076e-05, "loss": 1.5158, "step": 5391 }, { "epoch": 0.2947938165466151, "grad_norm": 1.737620234489441, "learning_rate": 1.7076234469375604e-05, "loss": 1.3503, "step": 5392 }, { "epoch": 0.29484848899033667, "grad_norm": 1.2666656970977783, "learning_rate": 1.707494315932837e-05, "loss": 1.5862, "step": 5393 }, { "epoch": 0.2949031614340582, "grad_norm": 1.4537231922149658, "learning_rate": 1.7073651613032186e-05, "loss": 1.4188, "step": 5394 }, { "epoch": 0.29495783387777974, "grad_norm": 1.4100505113601685, "learning_rate": 1.7072359830530178e-05, "loss": 1.5939, "step": 5395 }, { "epoch": 0.2950125063215013, "grad_norm": 1.2996212244033813, "learning_rate": 1.7071067811865477e-05, "loss": 1.5202, "step": 5396 }, { "epoch": 0.29506717876522287, "grad_norm": 1.061834454536438, "learning_rate": 1.706977555708123e-05, "loss": 1.5991, "step": 5397 }, { "epoch": 0.2951218512089444, "grad_norm": 1.6103013753890991, "learning_rate": 1.7068483066220586e-05, "loss": 1.4421, "step": 5398 }, { "epoch": 0.29517652365266595, "grad_norm": 1.7568880319595337, "learning_rate": 1.7067190339326705e-05, "loss": 1.4904, "step": 5399 }, { "epoch": 0.29523119609638754, "grad_norm": 1.408503770828247, "learning_rate": 1.7065897376442757e-05, "loss": 1.4173, "step": 5400 }, { "epoch": 0.2952858685401091, "grad_norm": 1.5998773574829102, "learning_rate": 1.7064604177611913e-05, "loss": 1.5454, "step": 5401 }, { "epoch": 0.2953405409838306, "grad_norm": 1.3513096570968628, "learning_rate": 1.7063310742877362e-05, "loss": 1.2807, "step": 5402 }, { "epoch": 0.2953952134275522, "grad_norm": 1.5744495391845703, "learning_rate": 1.7062017072282285e-05, "loss": 1.372, "step": 5403 }, { "epoch": 0.29544988587127374, "grad_norm": 1.6084641218185425, "learning_rate": 1.7060723165869892e-05, "loss": 1.3842, "step": 5404 }, { "epoch": 0.2955045583149953, "grad_norm": 1.3534733057022095, "learning_rate": 1.7059429023683384e-05, "loss": 1.3723, "step": 5405 }, { "epoch": 0.2955592307587168, "grad_norm": 1.3889343738555908, "learning_rate": 1.705813464576597e-05, "loss": 1.5691, "step": 5406 }, { "epoch": 0.2956139032024384, "grad_norm": 1.4223873615264893, "learning_rate": 1.705684003216088e-05, "loss": 1.4184, "step": 5407 }, { "epoch": 0.29566857564615995, "grad_norm": 1.3055129051208496, "learning_rate": 1.7055545182911343e-05, "loss": 1.4062, "step": 5408 }, { "epoch": 0.2957232480898815, "grad_norm": 1.4500948190689087, "learning_rate": 1.7054250098060598e-05, "loss": 1.3447, "step": 5409 }, { "epoch": 0.2957779205336031, "grad_norm": 1.4818040132522583, "learning_rate": 1.705295477765188e-05, "loss": 1.4034, "step": 5410 }, { "epoch": 0.2958325929773246, "grad_norm": 1.888024926185608, "learning_rate": 1.705165922172846e-05, "loss": 1.2769, "step": 5411 }, { "epoch": 0.29588726542104615, "grad_norm": 1.8051533699035645, "learning_rate": 1.705036343033359e-05, "loss": 1.3415, "step": 5412 }, { "epoch": 0.2959419378647677, "grad_norm": 1.4010095596313477, "learning_rate": 1.7049067403510544e-05, "loss": 1.5346, "step": 5413 }, { "epoch": 0.2959966103084893, "grad_norm": 1.3128875494003296, "learning_rate": 1.704777114130259e-05, "loss": 1.4408, "step": 5414 }, { "epoch": 0.2960512827522108, "grad_norm": 1.5564281940460205, "learning_rate": 1.7046474643753018e-05, "loss": 1.5792, "step": 5415 }, { "epoch": 0.29610595519593236, "grad_norm": 1.447950005531311, "learning_rate": 1.7045177910905128e-05, "loss": 1.4404, "step": 5416 }, { "epoch": 0.29616062763965395, "grad_norm": 1.721466302871704, "learning_rate": 1.7043880942802212e-05, "loss": 1.3975, "step": 5417 }, { "epoch": 0.2962153000833755, "grad_norm": 1.5005086660385132, "learning_rate": 1.7042583739487585e-05, "loss": 1.5082, "step": 5418 }, { "epoch": 0.296269972527097, "grad_norm": 1.6863157749176025, "learning_rate": 1.7041286301004563e-05, "loss": 1.5253, "step": 5419 }, { "epoch": 0.29632464497081856, "grad_norm": 1.5289283990859985, "learning_rate": 1.7039988627396464e-05, "loss": 1.482, "step": 5420 }, { "epoch": 0.29637931741454016, "grad_norm": 1.2318412065505981, "learning_rate": 1.703869071870663e-05, "loss": 1.4654, "step": 5421 }, { "epoch": 0.2964339898582617, "grad_norm": 1.7362005710601807, "learning_rate": 1.703739257497839e-05, "loss": 1.3833, "step": 5422 }, { "epoch": 0.29648866230198323, "grad_norm": 2.154494524002075, "learning_rate": 1.7036094196255103e-05, "loss": 1.3571, "step": 5423 }, { "epoch": 0.2965433347457048, "grad_norm": 1.3672358989715576, "learning_rate": 1.7034795582580118e-05, "loss": 1.4295, "step": 5424 }, { "epoch": 0.29659800718942636, "grad_norm": 1.5080121755599976, "learning_rate": 1.7033496733996798e-05, "loss": 1.2567, "step": 5425 }, { "epoch": 0.2966526796331479, "grad_norm": 1.4218522310256958, "learning_rate": 1.703219765054852e-05, "loss": 1.6489, "step": 5426 }, { "epoch": 0.29670735207686944, "grad_norm": 1.703151822090149, "learning_rate": 1.7030898332278663e-05, "loss": 1.3083, "step": 5427 }, { "epoch": 0.29676202452059103, "grad_norm": 1.39940345287323, "learning_rate": 1.702959877923061e-05, "loss": 1.5088, "step": 5428 }, { "epoch": 0.29681669696431257, "grad_norm": 1.7035452127456665, "learning_rate": 1.702829899144776e-05, "loss": 1.2389, "step": 5429 }, { "epoch": 0.2968713694080341, "grad_norm": 1.3939688205718994, "learning_rate": 1.702699896897351e-05, "loss": 1.5231, "step": 5430 }, { "epoch": 0.2969260418517557, "grad_norm": 1.7615875005722046, "learning_rate": 1.7025698711851283e-05, "loss": 1.3278, "step": 5431 }, { "epoch": 0.29698071429547723, "grad_norm": 1.7618573904037476, "learning_rate": 1.7024398220124483e-05, "loss": 1.2659, "step": 5432 }, { "epoch": 0.29703538673919877, "grad_norm": 1.7582767009735107, "learning_rate": 1.7023097493836544e-05, "loss": 1.5495, "step": 5433 }, { "epoch": 0.2970900591829203, "grad_norm": 1.876306176185608, "learning_rate": 1.70217965330309e-05, "loss": 1.2656, "step": 5434 }, { "epoch": 0.2971447316266419, "grad_norm": 1.3369956016540527, "learning_rate": 1.7020495337750997e-05, "loss": 1.4888, "step": 5435 }, { "epoch": 0.29719940407036344, "grad_norm": 1.0352922677993774, "learning_rate": 1.7019193908040274e-05, "loss": 1.726, "step": 5436 }, { "epoch": 0.297254076514085, "grad_norm": 1.7680745124816895, "learning_rate": 1.7017892243942195e-05, "loss": 1.6408, "step": 5437 }, { "epoch": 0.29730874895780657, "grad_norm": 1.4449074268341064, "learning_rate": 1.701659034550023e-05, "loss": 1.2731, "step": 5438 }, { "epoch": 0.2973634214015281, "grad_norm": 1.397861361503601, "learning_rate": 1.7015288212757848e-05, "loss": 1.2585, "step": 5439 }, { "epoch": 0.29741809384524964, "grad_norm": 1.445455551147461, "learning_rate": 1.701398584575853e-05, "loss": 1.5617, "step": 5440 }, { "epoch": 0.2974727662889712, "grad_norm": 1.4874202013015747, "learning_rate": 1.7012683244545768e-05, "loss": 1.3747, "step": 5441 }, { "epoch": 0.2975274387326928, "grad_norm": 1.659664273262024, "learning_rate": 1.7011380409163053e-05, "loss": 1.4375, "step": 5442 }, { "epoch": 0.2975821111764143, "grad_norm": 1.828471302986145, "learning_rate": 1.7010077339653895e-05, "loss": 1.3816, "step": 5443 }, { "epoch": 0.29763678362013585, "grad_norm": 1.5879466533660889, "learning_rate": 1.7008774036061802e-05, "loss": 1.6154, "step": 5444 }, { "epoch": 0.29769145606385744, "grad_norm": 1.4857673645019531, "learning_rate": 1.70074704984303e-05, "loss": 1.4264, "step": 5445 }, { "epoch": 0.297746128507579, "grad_norm": 1.5574331283569336, "learning_rate": 1.7006166726802916e-05, "loss": 1.5015, "step": 5446 }, { "epoch": 0.2978008009513005, "grad_norm": 1.208070993423462, "learning_rate": 1.7004862721223184e-05, "loss": 1.56, "step": 5447 }, { "epoch": 0.29785547339502205, "grad_norm": 1.4145327806472778, "learning_rate": 1.7003558481734647e-05, "loss": 1.2605, "step": 5448 }, { "epoch": 0.29791014583874365, "grad_norm": 1.8494106531143188, "learning_rate": 1.7002254008380858e-05, "loss": 1.5781, "step": 5449 }, { "epoch": 0.2979648182824652, "grad_norm": 1.7372312545776367, "learning_rate": 1.7000949301205376e-05, "loss": 1.3914, "step": 5450 }, { "epoch": 0.2980194907261867, "grad_norm": 1.5342214107513428, "learning_rate": 1.6999644360251772e-05, "loss": 1.4355, "step": 5451 }, { "epoch": 0.2980741631699083, "grad_norm": 1.3238214254379272, "learning_rate": 1.6998339185563614e-05, "loss": 1.2718, "step": 5452 }, { "epoch": 0.29812883561362985, "grad_norm": 1.403716802597046, "learning_rate": 1.699703377718449e-05, "loss": 1.2402, "step": 5453 }, { "epoch": 0.2981835080573514, "grad_norm": 1.8201982975006104, "learning_rate": 1.699572813515799e-05, "loss": 1.157, "step": 5454 }, { "epoch": 0.2982381805010729, "grad_norm": 2.375767946243286, "learning_rate": 1.6994422259527708e-05, "loss": 1.2067, "step": 5455 }, { "epoch": 0.2982928529447945, "grad_norm": 1.497251033782959, "learning_rate": 1.6993116150337258e-05, "loss": 1.1375, "step": 5456 }, { "epoch": 0.29834752538851605, "grad_norm": 1.437088131904602, "learning_rate": 1.6991809807630245e-05, "loss": 1.5486, "step": 5457 }, { "epoch": 0.2984021978322376, "grad_norm": 1.9044415950775146, "learning_rate": 1.69905032314503e-05, "loss": 1.2691, "step": 5458 }, { "epoch": 0.2984568702759592, "grad_norm": 1.4372165203094482, "learning_rate": 1.6989196421841045e-05, "loss": 1.4557, "step": 5459 }, { "epoch": 0.2985115427196807, "grad_norm": 1.6232709884643555, "learning_rate": 1.698788937884612e-05, "loss": 1.2573, "step": 5460 }, { "epoch": 0.29856621516340226, "grad_norm": 1.638960361480713, "learning_rate": 1.6986582102509176e-05, "loss": 0.9704, "step": 5461 }, { "epoch": 0.2986208876071238, "grad_norm": 1.531902551651001, "learning_rate": 1.698527459287386e-05, "loss": 1.1719, "step": 5462 }, { "epoch": 0.2986755600508454, "grad_norm": 1.544211983680725, "learning_rate": 1.6983966849983833e-05, "loss": 1.6722, "step": 5463 }, { "epoch": 0.2987302324945669, "grad_norm": 1.8260185718536377, "learning_rate": 1.6982658873882764e-05, "loss": 1.519, "step": 5464 }, { "epoch": 0.29878490493828846, "grad_norm": 1.3250112533569336, "learning_rate": 1.6981350664614332e-05, "loss": 1.3709, "step": 5465 }, { "epoch": 0.29883957738201006, "grad_norm": 1.240242600440979, "learning_rate": 1.6980042222222216e-05, "loss": 1.88, "step": 5466 }, { "epoch": 0.2988942498257316, "grad_norm": 1.8128607273101807, "learning_rate": 1.6978733546750112e-05, "loss": 1.4816, "step": 5467 }, { "epoch": 0.29894892226945313, "grad_norm": 1.1568443775177002, "learning_rate": 1.697742463824172e-05, "loss": 1.6608, "step": 5468 }, { "epoch": 0.29900359471317467, "grad_norm": 1.6580195426940918, "learning_rate": 1.6976115496740747e-05, "loss": 1.7453, "step": 5469 }, { "epoch": 0.29905826715689626, "grad_norm": 1.4141178131103516, "learning_rate": 1.6974806122290902e-05, "loss": 1.5663, "step": 5470 }, { "epoch": 0.2991129396006178, "grad_norm": 1.796768307685852, "learning_rate": 1.6973496514935918e-05, "loss": 1.5408, "step": 5471 }, { "epoch": 0.29916761204433934, "grad_norm": 1.2623239755630493, "learning_rate": 1.6972186674719522e-05, "loss": 1.4068, "step": 5472 }, { "epoch": 0.29922228448806093, "grad_norm": 1.5307625532150269, "learning_rate": 1.697087660168545e-05, "loss": 1.3615, "step": 5473 }, { "epoch": 0.29927695693178247, "grad_norm": 1.229988694190979, "learning_rate": 1.6969566295877453e-05, "loss": 1.4339, "step": 5474 }, { "epoch": 0.299331629375504, "grad_norm": 1.4747607707977295, "learning_rate": 1.6968255757339282e-05, "loss": 1.6241, "step": 5475 }, { "epoch": 0.29938630181922554, "grad_norm": 1.1187100410461426, "learning_rate": 1.69669449861147e-05, "loss": 1.4355, "step": 5476 }, { "epoch": 0.29944097426294713, "grad_norm": 1.45856773853302, "learning_rate": 1.6965633982247472e-05, "loss": 1.4214, "step": 5477 }, { "epoch": 0.2994956467066687, "grad_norm": 1.5439512729644775, "learning_rate": 1.6964322745781386e-05, "loss": 1.4077, "step": 5478 }, { "epoch": 0.2995503191503902, "grad_norm": 1.6821544170379639, "learning_rate": 1.696301127676022e-05, "loss": 1.321, "step": 5479 }, { "epoch": 0.2996049915941118, "grad_norm": 1.6748775243759155, "learning_rate": 1.6961699575227767e-05, "loss": 1.2863, "step": 5480 }, { "epoch": 0.29965966403783334, "grad_norm": 1.9082908630371094, "learning_rate": 1.696038764122783e-05, "loss": 1.1159, "step": 5481 }, { "epoch": 0.2997143364815549, "grad_norm": 1.6777217388153076, "learning_rate": 1.6959075474804217e-05, "loss": 1.0637, "step": 5482 }, { "epoch": 0.2997690089252764, "grad_norm": 1.2384474277496338, "learning_rate": 1.695776307600074e-05, "loss": 1.5374, "step": 5483 }, { "epoch": 0.299823681368998, "grad_norm": 1.4374077320098877, "learning_rate": 1.6956450444861232e-05, "loss": 1.3254, "step": 5484 }, { "epoch": 0.29987835381271954, "grad_norm": 1.5009113550186157, "learning_rate": 1.6955137581429518e-05, "loss": 1.5384, "step": 5485 }, { "epoch": 0.2999330262564411, "grad_norm": 1.4265304803848267, "learning_rate": 1.695382448574944e-05, "loss": 1.4444, "step": 5486 }, { "epoch": 0.2999876987001627, "grad_norm": 1.710893988609314, "learning_rate": 1.695251115786484e-05, "loss": 1.3393, "step": 5487 }, { "epoch": 0.3000423711438842, "grad_norm": 1.4301384687423706, "learning_rate": 1.695119759781958e-05, "loss": 1.3569, "step": 5488 }, { "epoch": 0.30009704358760575, "grad_norm": 1.6327600479125977, "learning_rate": 1.6949883805657524e-05, "loss": 1.3591, "step": 5489 }, { "epoch": 0.3001517160313273, "grad_norm": 1.403163194656372, "learning_rate": 1.694856978142254e-05, "loss": 1.122, "step": 5490 }, { "epoch": 0.3002063884750489, "grad_norm": 1.8522577285766602, "learning_rate": 1.6947255525158503e-05, "loss": 1.2397, "step": 5491 }, { "epoch": 0.3002610609187704, "grad_norm": 1.6698349714279175, "learning_rate": 1.69459410369093e-05, "loss": 1.402, "step": 5492 }, { "epoch": 0.30031573336249195, "grad_norm": 1.6892564296722412, "learning_rate": 1.694462631671883e-05, "loss": 1.0993, "step": 5493 }, { "epoch": 0.30037040580621355, "grad_norm": 1.4595788717269897, "learning_rate": 1.694331136463099e-05, "loss": 1.6339, "step": 5494 }, { "epoch": 0.3004250782499351, "grad_norm": 1.425127387046814, "learning_rate": 1.6941996180689692e-05, "loss": 1.1802, "step": 5495 }, { "epoch": 0.3004797506936566, "grad_norm": 1.4353708028793335, "learning_rate": 1.694068076493885e-05, "loss": 1.4964, "step": 5496 }, { "epoch": 0.30053442313737816, "grad_norm": 1.8445980548858643, "learning_rate": 1.6939365117422392e-05, "loss": 1.4094, "step": 5497 }, { "epoch": 0.30058909558109975, "grad_norm": 1.450025200843811, "learning_rate": 1.6938049238184245e-05, "loss": 1.3564, "step": 5498 }, { "epoch": 0.3006437680248213, "grad_norm": 1.3317636251449585, "learning_rate": 1.6936733127268357e-05, "loss": 1.4941, "step": 5499 }, { "epoch": 0.3006984404685428, "grad_norm": 1.653112530708313, "learning_rate": 1.693541678471867e-05, "loss": 1.488, "step": 5500 }, { "epoch": 0.3007531129122644, "grad_norm": 1.5704524517059326, "learning_rate": 1.6934100210579144e-05, "loss": 1.4781, "step": 5501 }, { "epoch": 0.30080778535598596, "grad_norm": 1.4342254400253296, "learning_rate": 1.693278340489374e-05, "loss": 1.4918, "step": 5502 }, { "epoch": 0.3008624577997075, "grad_norm": 2.027066230773926, "learning_rate": 1.693146636770643e-05, "loss": 1.1514, "step": 5503 }, { "epoch": 0.30091713024342903, "grad_norm": 1.516089916229248, "learning_rate": 1.6930149099061194e-05, "loss": 1.3862, "step": 5504 }, { "epoch": 0.3009718026871506, "grad_norm": 1.6688017845153809, "learning_rate": 1.6928831599002013e-05, "loss": 1.4607, "step": 5505 }, { "epoch": 0.30102647513087216, "grad_norm": 1.6193230152130127, "learning_rate": 1.692751386757289e-05, "loss": 1.3793, "step": 5506 }, { "epoch": 0.3010811475745937, "grad_norm": 1.1917173862457275, "learning_rate": 1.6926195904817823e-05, "loss": 1.4549, "step": 5507 }, { "epoch": 0.3011358200183153, "grad_norm": 1.7427841424942017, "learning_rate": 1.6924877710780818e-05, "loss": 1.6675, "step": 5508 }, { "epoch": 0.30119049246203683, "grad_norm": 1.673424243927002, "learning_rate": 1.69235592855059e-05, "loss": 1.603, "step": 5509 }, { "epoch": 0.30124516490575837, "grad_norm": 1.2354711294174194, "learning_rate": 1.6922240629037094e-05, "loss": 1.6128, "step": 5510 }, { "epoch": 0.3012998373494799, "grad_norm": 1.6115717887878418, "learning_rate": 1.6920921741418425e-05, "loss": 1.3719, "step": 5511 }, { "epoch": 0.3013545097932015, "grad_norm": 1.1729618310928345, "learning_rate": 1.691960262269394e-05, "loss": 1.5864, "step": 5512 }, { "epoch": 0.30140918223692303, "grad_norm": 1.1859681606292725, "learning_rate": 1.6918283272907684e-05, "loss": 1.589, "step": 5513 }, { "epoch": 0.30146385468064457, "grad_norm": 1.1699802875518799, "learning_rate": 1.6916963692103716e-05, "loss": 1.4352, "step": 5514 }, { "epoch": 0.30151852712436616, "grad_norm": 1.5677125453948975, "learning_rate": 1.69156438803261e-05, "loss": 1.6206, "step": 5515 }, { "epoch": 0.3015731995680877, "grad_norm": 1.398308277130127, "learning_rate": 1.6914323837618906e-05, "loss": 1.3221, "step": 5516 }, { "epoch": 0.30162787201180924, "grad_norm": 1.2503557205200195, "learning_rate": 1.691300356402622e-05, "loss": 1.3739, "step": 5517 }, { "epoch": 0.3016825444555308, "grad_norm": 1.6617971658706665, "learning_rate": 1.6911683059592115e-05, "loss": 1.2661, "step": 5518 }, { "epoch": 0.30173721689925237, "grad_norm": 1.621833324432373, "learning_rate": 1.6910362324360695e-05, "loss": 1.5913, "step": 5519 }, { "epoch": 0.3017918893429739, "grad_norm": 1.3933185338974, "learning_rate": 1.6909041358376065e-05, "loss": 1.5049, "step": 5520 }, { "epoch": 0.30184656178669544, "grad_norm": 1.2750887870788574, "learning_rate": 1.6907720161682332e-05, "loss": 1.3185, "step": 5521 }, { "epoch": 0.30190123423041704, "grad_norm": 1.4162606000900269, "learning_rate": 1.690639873432361e-05, "loss": 1.3894, "step": 5522 }, { "epoch": 0.3019559066741386, "grad_norm": 1.7503557205200195, "learning_rate": 1.6905077076344024e-05, "loss": 1.4764, "step": 5523 }, { "epoch": 0.3020105791178601, "grad_norm": 1.8522497415542603, "learning_rate": 1.6903755187787722e-05, "loss": 1.4606, "step": 5524 }, { "epoch": 0.30206525156158165, "grad_norm": 1.6933144330978394, "learning_rate": 1.6902433068698827e-05, "loss": 1.2378, "step": 5525 }, { "epoch": 0.30211992400530324, "grad_norm": 1.6221048831939697, "learning_rate": 1.6901110719121493e-05, "loss": 1.1879, "step": 5526 }, { "epoch": 0.3021745964490248, "grad_norm": 1.1471976041793823, "learning_rate": 1.689978813909988e-05, "loss": 1.6001, "step": 5527 }, { "epoch": 0.3022292688927463, "grad_norm": 1.6891567707061768, "learning_rate": 1.6898465328678154e-05, "loss": 1.0197, "step": 5528 }, { "epoch": 0.3022839413364679, "grad_norm": 1.5647368431091309, "learning_rate": 1.6897142287900477e-05, "loss": 1.4281, "step": 5529 }, { "epoch": 0.30233861378018945, "grad_norm": 1.473351240158081, "learning_rate": 1.6895819016811043e-05, "loss": 1.5423, "step": 5530 }, { "epoch": 0.302393286223911, "grad_norm": 1.437127947807312, "learning_rate": 1.6894495515454025e-05, "loss": 1.393, "step": 5531 }, { "epoch": 0.3024479586676325, "grad_norm": 1.6966239213943481, "learning_rate": 1.6893171783873624e-05, "loss": 1.4123, "step": 5532 }, { "epoch": 0.3025026311113541, "grad_norm": 1.7893829345703125, "learning_rate": 1.689184782211404e-05, "loss": 1.4173, "step": 5533 }, { "epoch": 0.30255730355507565, "grad_norm": 1.4233810901641846, "learning_rate": 1.6890523630219486e-05, "loss": 1.6392, "step": 5534 }, { "epoch": 0.3026119759987972, "grad_norm": 1.4302756786346436, "learning_rate": 1.6889199208234178e-05, "loss": 1.6174, "step": 5535 }, { "epoch": 0.3026666484425188, "grad_norm": 1.427146077156067, "learning_rate": 1.6887874556202342e-05, "loss": 1.5501, "step": 5536 }, { "epoch": 0.3027213208862403, "grad_norm": 1.6107474565505981, "learning_rate": 1.6886549674168213e-05, "loss": 1.5246, "step": 5537 }, { "epoch": 0.30277599332996186, "grad_norm": 1.5675801038742065, "learning_rate": 1.6885224562176033e-05, "loss": 1.1901, "step": 5538 }, { "epoch": 0.3028306657736834, "grad_norm": 1.1235707998275757, "learning_rate": 1.6883899220270047e-05, "loss": 1.5272, "step": 5539 }, { "epoch": 0.302885338217405, "grad_norm": 1.3440923690795898, "learning_rate": 1.688257364849451e-05, "loss": 1.5403, "step": 5540 }, { "epoch": 0.3029400106611265, "grad_norm": 1.4090454578399658, "learning_rate": 1.688124784689369e-05, "loss": 1.2719, "step": 5541 }, { "epoch": 0.30299468310484806, "grad_norm": 1.6750298738479614, "learning_rate": 1.6879921815511858e-05, "loss": 1.1326, "step": 5542 }, { "epoch": 0.30304935554856965, "grad_norm": 1.3342182636260986, "learning_rate": 1.687859555439329e-05, "loss": 1.3992, "step": 5543 }, { "epoch": 0.3031040279922912, "grad_norm": 1.5001873970031738, "learning_rate": 1.6877269063582274e-05, "loss": 1.3211, "step": 5544 }, { "epoch": 0.30315870043601273, "grad_norm": 1.844316005706787, "learning_rate": 1.687594234312311e-05, "loss": 1.6694, "step": 5545 }, { "epoch": 0.30321337287973427, "grad_norm": 1.8642762899398804, "learning_rate": 1.6874615393060093e-05, "loss": 1.5285, "step": 5546 }, { "epoch": 0.30326804532345586, "grad_norm": 1.8133306503295898, "learning_rate": 1.687328821343754e-05, "loss": 1.375, "step": 5547 }, { "epoch": 0.3033227177671774, "grad_norm": 1.4604170322418213, "learning_rate": 1.687196080429976e-05, "loss": 1.5278, "step": 5548 }, { "epoch": 0.30337739021089893, "grad_norm": 2.00691556930542, "learning_rate": 1.6870633165691087e-05, "loss": 1.3112, "step": 5549 }, { "epoch": 0.3034320626546205, "grad_norm": 1.4401733875274658, "learning_rate": 1.686930529765585e-05, "loss": 1.5273, "step": 5550 }, { "epoch": 0.30348673509834206, "grad_norm": 1.6530433893203735, "learning_rate": 1.6867977200238388e-05, "loss": 1.5221, "step": 5551 }, { "epoch": 0.3035414075420636, "grad_norm": 1.547279715538025, "learning_rate": 1.6866648873483052e-05, "loss": 1.4701, "step": 5552 }, { "epoch": 0.30359607998578514, "grad_norm": 1.0673744678497314, "learning_rate": 1.6865320317434197e-05, "loss": 1.5735, "step": 5553 }, { "epoch": 0.30365075242950673, "grad_norm": 1.4895939826965332, "learning_rate": 1.6863991532136186e-05, "loss": 1.3941, "step": 5554 }, { "epoch": 0.30370542487322827, "grad_norm": 1.1676054000854492, "learning_rate": 1.6862662517633394e-05, "loss": 1.5069, "step": 5555 }, { "epoch": 0.3037600973169498, "grad_norm": 1.4327045679092407, "learning_rate": 1.6861333273970192e-05, "loss": 1.2584, "step": 5556 }, { "epoch": 0.3038147697606714, "grad_norm": 1.4869823455810547, "learning_rate": 1.6860003801190975e-05, "loss": 1.6082, "step": 5557 }, { "epoch": 0.30386944220439294, "grad_norm": 1.4601308107376099, "learning_rate": 1.685867409934013e-05, "loss": 1.4413, "step": 5558 }, { "epoch": 0.3039241146481145, "grad_norm": 1.5423532724380493, "learning_rate": 1.6857344168462065e-05, "loss": 1.3927, "step": 5559 }, { "epoch": 0.303978787091836, "grad_norm": 1.2221839427947998, "learning_rate": 1.6856014008601187e-05, "loss": 1.4601, "step": 5560 }, { "epoch": 0.3040334595355576, "grad_norm": 1.4659563302993774, "learning_rate": 1.6854683619801915e-05, "loss": 1.5201, "step": 5561 }, { "epoch": 0.30408813197927914, "grad_norm": 1.3646318912506104, "learning_rate": 1.6853353002108667e-05, "loss": 1.4466, "step": 5562 }, { "epoch": 0.3041428044230007, "grad_norm": 2.8812787532806396, "learning_rate": 1.6852022155565882e-05, "loss": 1.321, "step": 5563 }, { "epoch": 0.30419747686672227, "grad_norm": 1.7294518947601318, "learning_rate": 1.6850691080218e-05, "loss": 1.2855, "step": 5564 }, { "epoch": 0.3042521493104438, "grad_norm": 1.5699713230133057, "learning_rate": 1.684935977610947e-05, "loss": 1.4802, "step": 5565 }, { "epoch": 0.30430682175416535, "grad_norm": 1.151077389717102, "learning_rate": 1.684802824328474e-05, "loss": 1.7639, "step": 5566 }, { "epoch": 0.3043614941978869, "grad_norm": 1.1881332397460938, "learning_rate": 1.6846696481788276e-05, "loss": 1.4487, "step": 5567 }, { "epoch": 0.3044161666416085, "grad_norm": 1.1712088584899902, "learning_rate": 1.6845364491664555e-05, "loss": 1.6493, "step": 5568 }, { "epoch": 0.30447083908533, "grad_norm": 1.4836539030075073, "learning_rate": 1.684403227295805e-05, "loss": 1.7606, "step": 5569 }, { "epoch": 0.30452551152905155, "grad_norm": 1.4077634811401367, "learning_rate": 1.6842699825713244e-05, "loss": 1.5079, "step": 5570 }, { "epoch": 0.30458018397277314, "grad_norm": 1.54362154006958, "learning_rate": 1.6841367149974638e-05, "loss": 1.5391, "step": 5571 }, { "epoch": 0.3046348564164947, "grad_norm": 1.5238454341888428, "learning_rate": 1.6840034245786726e-05, "loss": 1.4336, "step": 5572 }, { "epoch": 0.3046895288602162, "grad_norm": 1.6083985567092896, "learning_rate": 1.6838701113194022e-05, "loss": 1.3886, "step": 5573 }, { "epoch": 0.30474420130393776, "grad_norm": 1.4511759281158447, "learning_rate": 1.6837367752241035e-05, "loss": 1.423, "step": 5574 }, { "epoch": 0.30479887374765935, "grad_norm": 1.3313353061676025, "learning_rate": 1.68360341629723e-05, "loss": 1.5939, "step": 5575 }, { "epoch": 0.3048535461913809, "grad_norm": 1.5940024852752686, "learning_rate": 1.683470034543234e-05, "loss": 1.4073, "step": 5576 }, { "epoch": 0.3049082186351024, "grad_norm": 1.6136702299118042, "learning_rate": 1.68333662996657e-05, "loss": 1.496, "step": 5577 }, { "epoch": 0.304962891078824, "grad_norm": 1.3258634805679321, "learning_rate": 1.683203202571692e-05, "loss": 1.4904, "step": 5578 }, { "epoch": 0.30501756352254555, "grad_norm": 1.4824451208114624, "learning_rate": 1.6830697523630564e-05, "loss": 1.4911, "step": 5579 }, { "epoch": 0.3050722359662671, "grad_norm": 1.8639659881591797, "learning_rate": 1.6829362793451186e-05, "loss": 1.4437, "step": 5580 }, { "epoch": 0.30512690840998863, "grad_norm": 1.5464262962341309, "learning_rate": 1.6828027835223363e-05, "loss": 1.1712, "step": 5581 }, { "epoch": 0.3051815808537102, "grad_norm": 1.3899471759796143, "learning_rate": 1.682669264899166e-05, "loss": 1.3622, "step": 5582 }, { "epoch": 0.30523625329743176, "grad_norm": 1.4099737405776978, "learning_rate": 1.6825357234800676e-05, "loss": 1.4491, "step": 5583 }, { "epoch": 0.3052909257411533, "grad_norm": 1.1928497552871704, "learning_rate": 1.6824021592694995e-05, "loss": 1.2718, "step": 5584 }, { "epoch": 0.3053455981848749, "grad_norm": 1.4936270713806152, "learning_rate": 1.6822685722719224e-05, "loss": 1.5226, "step": 5585 }, { "epoch": 0.3054002706285964, "grad_norm": 1.6583858728408813, "learning_rate": 1.6821349624917962e-05, "loss": 1.564, "step": 5586 }, { "epoch": 0.30545494307231796, "grad_norm": 1.3155592679977417, "learning_rate": 1.6820013299335833e-05, "loss": 1.5087, "step": 5587 }, { "epoch": 0.3055096155160395, "grad_norm": 1.9527047872543335, "learning_rate": 1.6818676746017455e-05, "loss": 1.4412, "step": 5588 }, { "epoch": 0.3055642879597611, "grad_norm": 1.3883601427078247, "learning_rate": 1.681733996500746e-05, "loss": 1.6932, "step": 5589 }, { "epoch": 0.30561896040348263, "grad_norm": 1.1956512928009033, "learning_rate": 1.6816002956350486e-05, "loss": 1.4237, "step": 5590 }, { "epoch": 0.30567363284720417, "grad_norm": 1.7958002090454102, "learning_rate": 1.681466572009118e-05, "loss": 1.5237, "step": 5591 }, { "epoch": 0.30572830529092576, "grad_norm": 2.1182992458343506, "learning_rate": 1.681332825627419e-05, "loss": 1.4665, "step": 5592 }, { "epoch": 0.3057829777346473, "grad_norm": 1.3412836790084839, "learning_rate": 1.6811990564944186e-05, "loss": 1.2799, "step": 5593 }, { "epoch": 0.30583765017836884, "grad_norm": 1.6005598306655884, "learning_rate": 1.681065264614583e-05, "loss": 1.383, "step": 5594 }, { "epoch": 0.3058923226220904, "grad_norm": 1.3031666278839111, "learning_rate": 1.6809314499923802e-05, "loss": 1.6483, "step": 5595 }, { "epoch": 0.30594699506581197, "grad_norm": 1.595867395401001, "learning_rate": 1.6807976126322784e-05, "loss": 1.2908, "step": 5596 }, { "epoch": 0.3060016675095335, "grad_norm": 1.7076243162155151, "learning_rate": 1.680663752538747e-05, "loss": 1.2592, "step": 5597 }, { "epoch": 0.30605633995325504, "grad_norm": 1.6450176239013672, "learning_rate": 1.680529869716255e-05, "loss": 1.266, "step": 5598 }, { "epoch": 0.30611101239697663, "grad_norm": 1.4185090065002441, "learning_rate": 1.680395964169274e-05, "loss": 1.442, "step": 5599 }, { "epoch": 0.30616568484069817, "grad_norm": 1.1848258972167969, "learning_rate": 1.6802620359022757e-05, "loss": 1.4024, "step": 5600 }, { "epoch": 0.3062203572844197, "grad_norm": 1.2720109224319458, "learning_rate": 1.6801280849197316e-05, "loss": 1.2852, "step": 5601 }, { "epoch": 0.30627502972814125, "grad_norm": 1.7208271026611328, "learning_rate": 1.6799941112261143e-05, "loss": 1.4431, "step": 5602 }, { "epoch": 0.30632970217186284, "grad_norm": 1.3674230575561523, "learning_rate": 1.679860114825898e-05, "loss": 1.5858, "step": 5603 }, { "epoch": 0.3063843746155844, "grad_norm": 1.3684368133544922, "learning_rate": 1.6797260957235576e-05, "loss": 1.4707, "step": 5604 }, { "epoch": 0.3064390470593059, "grad_norm": 1.2058472633361816, "learning_rate": 1.6795920539235676e-05, "loss": 1.411, "step": 5605 }, { "epoch": 0.3064937195030275, "grad_norm": 1.0407148599624634, "learning_rate": 1.6794579894304043e-05, "loss": 1.4716, "step": 5606 }, { "epoch": 0.30654839194674904, "grad_norm": 1.421509027481079, "learning_rate": 1.679323902248544e-05, "loss": 1.3632, "step": 5607 }, { "epoch": 0.3066030643904706, "grad_norm": 1.7126903533935547, "learning_rate": 1.679189792382465e-05, "loss": 1.3455, "step": 5608 }, { "epoch": 0.3066577368341922, "grad_norm": 1.252241611480713, "learning_rate": 1.6790556598366447e-05, "loss": 1.5656, "step": 5609 }, { "epoch": 0.3067124092779137, "grad_norm": 1.3466922044754028, "learning_rate": 1.678921504615562e-05, "loss": 1.4141, "step": 5610 }, { "epoch": 0.30676708172163525, "grad_norm": 1.5326348543167114, "learning_rate": 1.678787326723698e-05, "loss": 1.2743, "step": 5611 }, { "epoch": 0.3068217541653568, "grad_norm": 1.64082670211792, "learning_rate": 1.6786531261655322e-05, "loss": 1.4139, "step": 5612 }, { "epoch": 0.3068764266090784, "grad_norm": 1.4974111318588257, "learning_rate": 1.6785189029455455e-05, "loss": 1.148, "step": 5613 }, { "epoch": 0.3069310990527999, "grad_norm": 1.0191665887832642, "learning_rate": 1.6783846570682207e-05, "loss": 1.3769, "step": 5614 }, { "epoch": 0.30698577149652145, "grad_norm": 1.8707795143127441, "learning_rate": 1.6782503885380404e-05, "loss": 1.1733, "step": 5615 }, { "epoch": 0.30704044394024305, "grad_norm": 1.4961918592453003, "learning_rate": 1.6781160973594884e-05, "loss": 1.4962, "step": 5616 }, { "epoch": 0.3070951163839646, "grad_norm": 1.4365705251693726, "learning_rate": 1.677981783537048e-05, "loss": 1.5474, "step": 5617 }, { "epoch": 0.3071497888276861, "grad_norm": 1.196275234222412, "learning_rate": 1.6778474470752053e-05, "loss": 1.3421, "step": 5618 }, { "epoch": 0.30720446127140766, "grad_norm": 1.6134735345840454, "learning_rate": 1.6777130879784456e-05, "loss": 1.3881, "step": 5619 }, { "epoch": 0.30725913371512925, "grad_norm": 1.829271674156189, "learning_rate": 1.6775787062512557e-05, "loss": 1.6024, "step": 5620 }, { "epoch": 0.3073138061588508, "grad_norm": 1.7236251831054688, "learning_rate": 1.6774443018981227e-05, "loss": 1.3496, "step": 5621 }, { "epoch": 0.3073684786025723, "grad_norm": 1.737693190574646, "learning_rate": 1.6773098749235348e-05, "loss": 1.2641, "step": 5622 }, { "epoch": 0.3074231510462939, "grad_norm": 1.6157439947128296, "learning_rate": 1.677175425331981e-05, "loss": 1.5346, "step": 5623 }, { "epoch": 0.30747782349001546, "grad_norm": 1.4728773832321167, "learning_rate": 1.6770409531279504e-05, "loss": 1.0374, "step": 5624 }, { "epoch": 0.307532495933737, "grad_norm": 1.5518525838851929, "learning_rate": 1.6769064583159338e-05, "loss": 1.7177, "step": 5625 }, { "epoch": 0.30758716837745853, "grad_norm": 1.3183029890060425, "learning_rate": 1.676771940900422e-05, "loss": 1.5207, "step": 5626 }, { "epoch": 0.3076418408211801, "grad_norm": 2.3950107097625732, "learning_rate": 1.676637400885907e-05, "loss": 1.491, "step": 5627 }, { "epoch": 0.30769651326490166, "grad_norm": 1.516573429107666, "learning_rate": 1.6765028382768815e-05, "loss": 1.4491, "step": 5628 }, { "epoch": 0.3077511857086232, "grad_norm": 1.4211783409118652, "learning_rate": 1.6763682530778388e-05, "loss": 1.3876, "step": 5629 }, { "epoch": 0.3078058581523448, "grad_norm": 1.4100914001464844, "learning_rate": 1.6762336452932734e-05, "loss": 1.3366, "step": 5630 }, { "epoch": 0.30786053059606633, "grad_norm": 1.69888436794281, "learning_rate": 1.676099014927679e-05, "loss": 1.4856, "step": 5631 }, { "epoch": 0.30791520303978787, "grad_norm": 1.6722280979156494, "learning_rate": 1.6759643619855525e-05, "loss": 1.5998, "step": 5632 }, { "epoch": 0.3079698754835094, "grad_norm": 1.5390011072158813, "learning_rate": 1.6758296864713897e-05, "loss": 1.4253, "step": 5633 }, { "epoch": 0.308024547927231, "grad_norm": 1.7200229167938232, "learning_rate": 1.6756949883896874e-05, "loss": 1.8844, "step": 5634 }, { "epoch": 0.30807922037095253, "grad_norm": 1.4855655431747437, "learning_rate": 1.6755602677449445e-05, "loss": 1.3705, "step": 5635 }, { "epoch": 0.30813389281467407, "grad_norm": 1.3509122133255005, "learning_rate": 1.6754255245416585e-05, "loss": 1.2701, "step": 5636 }, { "epoch": 0.30818856525839566, "grad_norm": 1.3924044370651245, "learning_rate": 1.6752907587843294e-05, "loss": 1.2869, "step": 5637 }, { "epoch": 0.3082432377021172, "grad_norm": 1.4959112405776978, "learning_rate": 1.6751559704774572e-05, "loss": 1.5013, "step": 5638 }, { "epoch": 0.30829791014583874, "grad_norm": 1.5725724697113037, "learning_rate": 1.6750211596255427e-05, "loss": 1.4849, "step": 5639 }, { "epoch": 0.3083525825895603, "grad_norm": 1.482086181640625, "learning_rate": 1.674886326233088e-05, "loss": 1.5715, "step": 5640 }, { "epoch": 0.30840725503328187, "grad_norm": 1.3427425622940063, "learning_rate": 1.6747514703045952e-05, "loss": 1.5268, "step": 5641 }, { "epoch": 0.3084619274770034, "grad_norm": 1.3458850383758545, "learning_rate": 1.6746165918445675e-05, "loss": 1.4695, "step": 5642 }, { "epoch": 0.30851659992072494, "grad_norm": 1.5112069845199585, "learning_rate": 1.6744816908575085e-05, "loss": 1.4469, "step": 5643 }, { "epoch": 0.30857127236444654, "grad_norm": 2.65610671043396, "learning_rate": 1.6743467673479233e-05, "loss": 1.2687, "step": 5644 }, { "epoch": 0.3086259448081681, "grad_norm": 1.6155614852905273, "learning_rate": 1.6742118213203173e-05, "loss": 1.5199, "step": 5645 }, { "epoch": 0.3086806172518896, "grad_norm": 1.2494831085205078, "learning_rate": 1.6740768527791962e-05, "loss": 1.4074, "step": 5646 }, { "epoch": 0.30873528969561115, "grad_norm": 1.4876288175582886, "learning_rate": 1.673941861729067e-05, "loss": 1.4877, "step": 5647 }, { "epoch": 0.30878996213933274, "grad_norm": 1.281490445137024, "learning_rate": 1.673806848174438e-05, "loss": 1.4443, "step": 5648 }, { "epoch": 0.3088446345830543, "grad_norm": 1.4652272462844849, "learning_rate": 1.673671812119817e-05, "loss": 1.4283, "step": 5649 }, { "epoch": 0.3088993070267758, "grad_norm": 1.7562272548675537, "learning_rate": 1.6735367535697136e-05, "loss": 1.3154, "step": 5650 }, { "epoch": 0.3089539794704974, "grad_norm": 1.3466839790344238, "learning_rate": 1.6734016725286374e-05, "loss": 1.3509, "step": 5651 }, { "epoch": 0.30900865191421895, "grad_norm": 1.4231340885162354, "learning_rate": 1.673266569001099e-05, "loss": 1.3412, "step": 5652 }, { "epoch": 0.3090633243579405, "grad_norm": 1.4382306337356567, "learning_rate": 1.6731314429916104e-05, "loss": 1.2305, "step": 5653 }, { "epoch": 0.309117996801662, "grad_norm": 1.7499215602874756, "learning_rate": 1.672996294504683e-05, "loss": 1.3279, "step": 5654 }, { "epoch": 0.3091726692453836, "grad_norm": 1.369307279586792, "learning_rate": 1.67286112354483e-05, "loss": 1.513, "step": 5655 }, { "epoch": 0.30922734168910515, "grad_norm": 1.2160217761993408, "learning_rate": 1.6727259301165654e-05, "loss": 1.4288, "step": 5656 }, { "epoch": 0.3092820141328267, "grad_norm": 1.3956682682037354, "learning_rate": 1.6725907142244033e-05, "loss": 1.4859, "step": 5657 }, { "epoch": 0.3093366865765483, "grad_norm": 1.7020727396011353, "learning_rate": 1.6724554758728587e-05, "loss": 1.2881, "step": 5658 }, { "epoch": 0.3093913590202698, "grad_norm": 1.669309139251709, "learning_rate": 1.6723202150664483e-05, "loss": 1.3193, "step": 5659 }, { "epoch": 0.30944603146399136, "grad_norm": 1.5451393127441406, "learning_rate": 1.672184931809688e-05, "loss": 1.4011, "step": 5660 }, { "epoch": 0.3095007039077129, "grad_norm": 1.2235060930252075, "learning_rate": 1.6720496261070956e-05, "loss": 1.3692, "step": 5661 }, { "epoch": 0.3095553763514345, "grad_norm": 1.4971108436584473, "learning_rate": 1.671914297963189e-05, "loss": 1.3651, "step": 5662 }, { "epoch": 0.309610048795156, "grad_norm": 1.7552903890609741, "learning_rate": 1.6717789473824875e-05, "loss": 1.4688, "step": 5663 }, { "epoch": 0.30966472123887756, "grad_norm": 1.6898561716079712, "learning_rate": 1.6716435743695104e-05, "loss": 1.3977, "step": 5664 }, { "epoch": 0.30971939368259915, "grad_norm": 1.6398701667785645, "learning_rate": 1.6715081789287784e-05, "loss": 1.5124, "step": 5665 }, { "epoch": 0.3097740661263207, "grad_norm": 2.08432674407959, "learning_rate": 1.6713727610648125e-05, "loss": 1.4757, "step": 5666 }, { "epoch": 0.30982873857004223, "grad_norm": 1.3274847269058228, "learning_rate": 1.671237320782135e-05, "loss": 1.5918, "step": 5667 }, { "epoch": 0.30988341101376377, "grad_norm": 1.4706522226333618, "learning_rate": 1.6711018580852677e-05, "loss": 1.2155, "step": 5668 }, { "epoch": 0.30993808345748536, "grad_norm": 1.6292295455932617, "learning_rate": 1.670966372978735e-05, "loss": 1.1525, "step": 5669 }, { "epoch": 0.3099927559012069, "grad_norm": 1.545606017112732, "learning_rate": 1.6708308654670605e-05, "loss": 1.4595, "step": 5670 }, { "epoch": 0.31004742834492843, "grad_norm": 1.683512568473816, "learning_rate": 1.6706953355547693e-05, "loss": 1.3904, "step": 5671 }, { "epoch": 0.31010210078865, "grad_norm": 1.8691209554672241, "learning_rate": 1.670559783246387e-05, "loss": 1.2466, "step": 5672 }, { "epoch": 0.31015677323237156, "grad_norm": 1.8908976316452026, "learning_rate": 1.6704242085464398e-05, "loss": 1.2836, "step": 5673 }, { "epoch": 0.3102114456760931, "grad_norm": 1.4626706838607788, "learning_rate": 1.6702886114594553e-05, "loss": 1.4765, "step": 5674 }, { "epoch": 0.31026611811981464, "grad_norm": 1.8968688249588013, "learning_rate": 1.670152991989961e-05, "loss": 1.5174, "step": 5675 }, { "epoch": 0.31032079056353623, "grad_norm": 1.349476933479309, "learning_rate": 1.670017350142486e-05, "loss": 1.4277, "step": 5676 }, { "epoch": 0.31037546300725777, "grad_norm": 1.414817214012146, "learning_rate": 1.669881685921559e-05, "loss": 1.48, "step": 5677 }, { "epoch": 0.3104301354509793, "grad_norm": 1.4782260656356812, "learning_rate": 1.669745999331711e-05, "loss": 1.301, "step": 5678 }, { "epoch": 0.3104848078947009, "grad_norm": 1.5628199577331543, "learning_rate": 1.6696102903774725e-05, "loss": 1.3634, "step": 5679 }, { "epoch": 0.31053948033842244, "grad_norm": 1.5964711904525757, "learning_rate": 1.6694745590633744e-05, "loss": 1.3719, "step": 5680 }, { "epoch": 0.310594152782144, "grad_norm": 1.4909934997558594, "learning_rate": 1.6693388053939508e-05, "loss": 1.3931, "step": 5681 }, { "epoch": 0.3106488252258655, "grad_norm": 1.4146348237991333, "learning_rate": 1.6692030293737332e-05, "loss": 1.5102, "step": 5682 }, { "epoch": 0.3107034976695871, "grad_norm": 2.3350589275360107, "learning_rate": 1.669067231007256e-05, "loss": 1.3917, "step": 5683 }, { "epoch": 0.31075817011330864, "grad_norm": 1.8471479415893555, "learning_rate": 1.6689314102990544e-05, "loss": 1.1459, "step": 5684 }, { "epoch": 0.3108128425570302, "grad_norm": 1.6463245153427124, "learning_rate": 1.6687955672536635e-05, "loss": 1.3714, "step": 5685 }, { "epoch": 0.31086751500075177, "grad_norm": 1.510076880455017, "learning_rate": 1.6686597018756188e-05, "loss": 1.754, "step": 5686 }, { "epoch": 0.3109221874444733, "grad_norm": 1.3499681949615479, "learning_rate": 1.6685238141694576e-05, "loss": 1.4515, "step": 5687 }, { "epoch": 0.31097685988819485, "grad_norm": 1.3845851421356201, "learning_rate": 1.6683879041397174e-05, "loss": 1.6461, "step": 5688 }, { "epoch": 0.3110315323319164, "grad_norm": 1.7191531658172607, "learning_rate": 1.668251971790937e-05, "loss": 1.7289, "step": 5689 }, { "epoch": 0.311086204775638, "grad_norm": 2.3479654788970947, "learning_rate": 1.668116017127655e-05, "loss": 1.1557, "step": 5690 }, { "epoch": 0.3111408772193595, "grad_norm": 1.6243946552276611, "learning_rate": 1.6679800401544116e-05, "loss": 1.5621, "step": 5691 }, { "epoch": 0.31119554966308105, "grad_norm": 1.6245700120925903, "learning_rate": 1.667844040875747e-05, "loss": 1.3178, "step": 5692 }, { "epoch": 0.31125022210680264, "grad_norm": 2.027578592300415, "learning_rate": 1.667708019296203e-05, "loss": 1.3382, "step": 5693 }, { "epoch": 0.3113048945505242, "grad_norm": 1.6605778932571411, "learning_rate": 1.6675719754203207e-05, "loss": 1.4743, "step": 5694 }, { "epoch": 0.3113595669942457, "grad_norm": 1.41554594039917, "learning_rate": 1.6674359092526442e-05, "loss": 1.4278, "step": 5695 }, { "epoch": 0.31141423943796726, "grad_norm": 1.2213003635406494, "learning_rate": 1.6672998207977165e-05, "loss": 1.465, "step": 5696 }, { "epoch": 0.31146891188168885, "grad_norm": 1.5361188650131226, "learning_rate": 1.667163710060082e-05, "loss": 1.7315, "step": 5697 }, { "epoch": 0.3115235843254104, "grad_norm": 1.4536117315292358, "learning_rate": 1.667027577044285e-05, "loss": 1.4409, "step": 5698 }, { "epoch": 0.3115782567691319, "grad_norm": 1.6534937620162964, "learning_rate": 1.6668914217548727e-05, "loss": 1.5996, "step": 5699 }, { "epoch": 0.3116329292128535, "grad_norm": 1.994755506515503, "learning_rate": 1.6667552441963904e-05, "loss": 1.2972, "step": 5700 }, { "epoch": 0.31168760165657505, "grad_norm": 1.6013376712799072, "learning_rate": 1.666619044373386e-05, "loss": 1.5078, "step": 5701 }, { "epoch": 0.3117422741002966, "grad_norm": 1.4461015462875366, "learning_rate": 1.666482822290408e-05, "loss": 1.6019, "step": 5702 }, { "epoch": 0.3117969465440181, "grad_norm": 1.5894290208816528, "learning_rate": 1.6663465779520042e-05, "loss": 1.4371, "step": 5703 }, { "epoch": 0.3118516189877397, "grad_norm": 1.6368821859359741, "learning_rate": 1.6662103113627246e-05, "loss": 1.366, "step": 5704 }, { "epoch": 0.31190629143146126, "grad_norm": 1.3602100610733032, "learning_rate": 1.666074022527119e-05, "loss": 1.4791, "step": 5705 }, { "epoch": 0.3119609638751828, "grad_norm": 1.335303544998169, "learning_rate": 1.6659377114497393e-05, "loss": 1.398, "step": 5706 }, { "epoch": 0.3120156363189044, "grad_norm": 1.5102362632751465, "learning_rate": 1.6658013781351367e-05, "loss": 1.3275, "step": 5707 }, { "epoch": 0.3120703087626259, "grad_norm": 1.435250163078308, "learning_rate": 1.6656650225878634e-05, "loss": 1.327, "step": 5708 }, { "epoch": 0.31212498120634746, "grad_norm": 1.9813263416290283, "learning_rate": 1.6655286448124734e-05, "loss": 1.2599, "step": 5709 }, { "epoch": 0.312179653650069, "grad_norm": 1.386608362197876, "learning_rate": 1.6653922448135202e-05, "loss": 1.1324, "step": 5710 }, { "epoch": 0.3122343260937906, "grad_norm": 1.328877329826355, "learning_rate": 1.6652558225955582e-05, "loss": 1.6546, "step": 5711 }, { "epoch": 0.31228899853751213, "grad_norm": 1.1733251810073853, "learning_rate": 1.665119378163143e-05, "loss": 1.548, "step": 5712 }, { "epoch": 0.31234367098123367, "grad_norm": 1.9000804424285889, "learning_rate": 1.6649829115208316e-05, "loss": 1.1638, "step": 5713 }, { "epoch": 0.31239834342495526, "grad_norm": 1.3295682668685913, "learning_rate": 1.66484642267318e-05, "loss": 1.3497, "step": 5714 }, { "epoch": 0.3124530158686768, "grad_norm": 1.8996303081512451, "learning_rate": 1.6647099116247465e-05, "loss": 1.2275, "step": 5715 }, { "epoch": 0.31250768831239834, "grad_norm": 1.662840723991394, "learning_rate": 1.6645733783800893e-05, "loss": 1.322, "step": 5716 }, { "epoch": 0.3125623607561199, "grad_norm": 1.2406059503555298, "learning_rate": 1.6644368229437673e-05, "loss": 1.5034, "step": 5717 }, { "epoch": 0.31261703319984147, "grad_norm": 1.4519799947738647, "learning_rate": 1.6643002453203405e-05, "loss": 1.5007, "step": 5718 }, { "epoch": 0.312671705643563, "grad_norm": 1.742306113243103, "learning_rate": 1.66416364551437e-05, "loss": 1.3931, "step": 5719 }, { "epoch": 0.31272637808728454, "grad_norm": 1.3183962106704712, "learning_rate": 1.664027023530417e-05, "loss": 1.4098, "step": 5720 }, { "epoch": 0.31278105053100613, "grad_norm": 1.3052401542663574, "learning_rate": 1.6638903793730434e-05, "loss": 1.4721, "step": 5721 }, { "epoch": 0.31283572297472767, "grad_norm": 1.5657352209091187, "learning_rate": 1.6637537130468115e-05, "loss": 1.2855, "step": 5722 }, { "epoch": 0.3128903954184492, "grad_norm": 1.429341197013855, "learning_rate": 1.6636170245562864e-05, "loss": 1.6718, "step": 5723 }, { "epoch": 0.31294506786217074, "grad_norm": 1.623735785484314, "learning_rate": 1.6634803139060313e-05, "loss": 1.4824, "step": 5724 }, { "epoch": 0.31299974030589234, "grad_norm": 1.4070801734924316, "learning_rate": 1.6633435811006117e-05, "loss": 1.387, "step": 5725 }, { "epoch": 0.3130544127496139, "grad_norm": 1.4692152738571167, "learning_rate": 1.663206826144593e-05, "loss": 1.4256, "step": 5726 }, { "epoch": 0.3131090851933354, "grad_norm": 1.34259033203125, "learning_rate": 1.6630700490425425e-05, "loss": 1.5677, "step": 5727 }, { "epoch": 0.313163757637057, "grad_norm": 1.4253944158554077, "learning_rate": 1.6629332497990268e-05, "loss": 1.6407, "step": 5728 }, { "epoch": 0.31321843008077854, "grad_norm": 1.3865768909454346, "learning_rate": 1.6627964284186146e-05, "loss": 1.3763, "step": 5729 }, { "epoch": 0.3132731025245001, "grad_norm": 1.4994378089904785, "learning_rate": 1.6626595849058742e-05, "loss": 1.4908, "step": 5730 }, { "epoch": 0.3133277749682216, "grad_norm": 1.3960411548614502, "learning_rate": 1.6625227192653756e-05, "loss": 1.3563, "step": 5731 }, { "epoch": 0.3133824474119432, "grad_norm": 1.3762881755828857, "learning_rate": 1.662385831501688e-05, "loss": 1.3738, "step": 5732 }, { "epoch": 0.31343711985566475, "grad_norm": 1.534857153892517, "learning_rate": 1.6622489216193835e-05, "loss": 1.3983, "step": 5733 }, { "epoch": 0.3134917922993863, "grad_norm": 1.4131766557693481, "learning_rate": 1.6621119896230336e-05, "loss": 1.2306, "step": 5734 }, { "epoch": 0.3135464647431079, "grad_norm": 1.2815266847610474, "learning_rate": 1.661975035517211e-05, "loss": 1.2077, "step": 5735 }, { "epoch": 0.3136011371868294, "grad_norm": 1.610687255859375, "learning_rate": 1.6618380593064882e-05, "loss": 1.3968, "step": 5736 }, { "epoch": 0.31365580963055095, "grad_norm": 1.6396836042404175, "learning_rate": 1.6617010609954396e-05, "loss": 1.4347, "step": 5737 }, { "epoch": 0.3137104820742725, "grad_norm": 1.5811104774475098, "learning_rate": 1.6615640405886398e-05, "loss": 1.3216, "step": 5738 }, { "epoch": 0.3137651545179941, "grad_norm": 1.7144749164581299, "learning_rate": 1.661426998090664e-05, "loss": 1.6159, "step": 5739 }, { "epoch": 0.3138198269617156, "grad_norm": 1.100196361541748, "learning_rate": 1.661289933506089e-05, "loss": 1.506, "step": 5740 }, { "epoch": 0.31387449940543716, "grad_norm": 1.8065849542617798, "learning_rate": 1.6611528468394913e-05, "loss": 1.5631, "step": 5741 }, { "epoch": 0.31392917184915875, "grad_norm": 1.6413161754608154, "learning_rate": 1.661015738095449e-05, "loss": 1.441, "step": 5742 }, { "epoch": 0.3139838442928803, "grad_norm": 1.4845237731933594, "learning_rate": 1.6608786072785393e-05, "loss": 1.4489, "step": 5743 }, { "epoch": 0.3140385167366018, "grad_norm": 1.6661640405654907, "learning_rate": 1.660741454393343e-05, "loss": 1.5874, "step": 5744 }, { "epoch": 0.31409318918032336, "grad_norm": 1.29182767868042, "learning_rate": 1.6606042794444383e-05, "loss": 1.4996, "step": 5745 }, { "epoch": 0.31414786162404496, "grad_norm": 1.7319939136505127, "learning_rate": 1.6604670824364067e-05, "loss": 1.4177, "step": 5746 }, { "epoch": 0.3142025340677665, "grad_norm": 1.5248496532440186, "learning_rate": 1.6603298633738293e-05, "loss": 1.4203, "step": 5747 }, { "epoch": 0.31425720651148803, "grad_norm": 1.3604289293289185, "learning_rate": 1.660192622261289e-05, "loss": 1.3061, "step": 5748 }, { "epoch": 0.3143118789552096, "grad_norm": 1.2153469324111938, "learning_rate": 1.660055359103367e-05, "loss": 1.3817, "step": 5749 }, { "epoch": 0.31436655139893116, "grad_norm": 1.405740737915039, "learning_rate": 1.6599180739046483e-05, "loss": 1.4682, "step": 5750 }, { "epoch": 0.3144212238426527, "grad_norm": 1.778218150138855, "learning_rate": 1.6597807666697157e-05, "loss": 1.4697, "step": 5751 }, { "epoch": 0.31447589628637423, "grad_norm": 1.5934733152389526, "learning_rate": 1.659643437403156e-05, "loss": 1.3894, "step": 5752 }, { "epoch": 0.3145305687300958, "grad_norm": 1.6832586526870728, "learning_rate": 1.6595060861095534e-05, "loss": 1.3345, "step": 5753 }, { "epoch": 0.31458524117381736, "grad_norm": 1.311733603477478, "learning_rate": 1.6593687127934953e-05, "loss": 1.5573, "step": 5754 }, { "epoch": 0.3146399136175389, "grad_norm": 1.5431857109069824, "learning_rate": 1.6592313174595685e-05, "loss": 1.3997, "step": 5755 }, { "epoch": 0.3146945860612605, "grad_norm": 1.3656197786331177, "learning_rate": 1.6590939001123614e-05, "loss": 1.4119, "step": 5756 }, { "epoch": 0.31474925850498203, "grad_norm": 2.1477203369140625, "learning_rate": 1.658956460756462e-05, "loss": 1.4168, "step": 5757 }, { "epoch": 0.31480393094870357, "grad_norm": 1.5613467693328857, "learning_rate": 1.6588189993964603e-05, "loss": 1.4022, "step": 5758 }, { "epoch": 0.3148586033924251, "grad_norm": 1.705854892730713, "learning_rate": 1.658681516036946e-05, "loss": 1.414, "step": 5759 }, { "epoch": 0.3149132758361467, "grad_norm": 1.4096065759658813, "learning_rate": 1.6585440106825107e-05, "loss": 1.4698, "step": 5760 }, { "epoch": 0.31496794827986824, "grad_norm": 1.2997217178344727, "learning_rate": 1.658406483337745e-05, "loss": 1.3472, "step": 5761 }, { "epoch": 0.3150226207235898, "grad_norm": 1.5453684329986572, "learning_rate": 1.6582689340072418e-05, "loss": 1.47, "step": 5762 }, { "epoch": 0.31507729316731137, "grad_norm": 1.2797547578811646, "learning_rate": 1.6581313626955948e-05, "loss": 1.5844, "step": 5763 }, { "epoch": 0.3151319656110329, "grad_norm": 1.5568922758102417, "learning_rate": 1.6579937694073967e-05, "loss": 1.2308, "step": 5764 }, { "epoch": 0.31518663805475444, "grad_norm": 1.382057547569275, "learning_rate": 1.657856154147243e-05, "loss": 1.4724, "step": 5765 }, { "epoch": 0.315241310498476, "grad_norm": 1.5595169067382812, "learning_rate": 1.6577185169197284e-05, "loss": 1.4415, "step": 5766 }, { "epoch": 0.3152959829421976, "grad_norm": 1.355209469795227, "learning_rate": 1.6575808577294492e-05, "loss": 1.546, "step": 5767 }, { "epoch": 0.3153506553859191, "grad_norm": 1.6202726364135742, "learning_rate": 1.6574431765810023e-05, "loss": 1.4072, "step": 5768 }, { "epoch": 0.31540532782964065, "grad_norm": 1.5530725717544556, "learning_rate": 1.6573054734789846e-05, "loss": 1.5639, "step": 5769 }, { "epoch": 0.31546000027336224, "grad_norm": 1.820172905921936, "learning_rate": 1.657167748427995e-05, "loss": 1.2431, "step": 5770 }, { "epoch": 0.3155146727170838, "grad_norm": 1.6945066452026367, "learning_rate": 1.657030001432632e-05, "loss": 1.4305, "step": 5771 }, { "epoch": 0.3155693451608053, "grad_norm": 1.2756253480911255, "learning_rate": 1.6568922324974958e-05, "loss": 1.4506, "step": 5772 }, { "epoch": 0.31562401760452685, "grad_norm": 1.4521477222442627, "learning_rate": 1.6567544416271862e-05, "loss": 1.5081, "step": 5773 }, { "epoch": 0.31567869004824844, "grad_norm": 1.6184128522872925, "learning_rate": 1.6566166288263046e-05, "loss": 1.3694, "step": 5774 }, { "epoch": 0.31573336249197, "grad_norm": 1.7350022792816162, "learning_rate": 1.656478794099453e-05, "loss": 1.4791, "step": 5775 }, { "epoch": 0.3157880349356915, "grad_norm": 2.3010284900665283, "learning_rate": 1.6563409374512344e-05, "loss": 1.2546, "step": 5776 }, { "epoch": 0.3158427073794131, "grad_norm": 1.9687585830688477, "learning_rate": 1.6562030588862513e-05, "loss": 1.4127, "step": 5777 }, { "epoch": 0.31589737982313465, "grad_norm": 1.7931232452392578, "learning_rate": 1.6560651584091083e-05, "loss": 1.5406, "step": 5778 }, { "epoch": 0.3159520522668562, "grad_norm": 1.3835809230804443, "learning_rate": 1.6559272360244104e-05, "loss": 1.3725, "step": 5779 }, { "epoch": 0.3160067247105777, "grad_norm": 1.1777247190475464, "learning_rate": 1.655789291736763e-05, "loss": 1.3052, "step": 5780 }, { "epoch": 0.3160613971542993, "grad_norm": 1.6780232191085815, "learning_rate": 1.6556513255507714e-05, "loss": 1.7352, "step": 5781 }, { "epoch": 0.31611606959802085, "grad_norm": 1.4314615726470947, "learning_rate": 1.6555133374710442e-05, "loss": 1.2749, "step": 5782 }, { "epoch": 0.3161707420417424, "grad_norm": 1.8021752834320068, "learning_rate": 1.655375327502189e-05, "loss": 1.447, "step": 5783 }, { "epoch": 0.316225414485464, "grad_norm": 1.3754043579101562, "learning_rate": 1.6552372956488128e-05, "loss": 1.5214, "step": 5784 }, { "epoch": 0.3162800869291855, "grad_norm": 1.7332338094711304, "learning_rate": 1.655099241915526e-05, "loss": 1.233, "step": 5785 }, { "epoch": 0.31633475937290706, "grad_norm": 1.6368005275726318, "learning_rate": 1.6549611663069383e-05, "loss": 1.6976, "step": 5786 }, { "epoch": 0.3163894318166286, "grad_norm": 1.4346168041229248, "learning_rate": 1.6548230688276605e-05, "loss": 1.6813, "step": 5787 }, { "epoch": 0.3164441042603502, "grad_norm": 1.1874067783355713, "learning_rate": 1.6546849494823037e-05, "loss": 1.5773, "step": 5788 }, { "epoch": 0.3164987767040717, "grad_norm": 1.38876211643219, "learning_rate": 1.6545468082754802e-05, "loss": 1.3586, "step": 5789 }, { "epoch": 0.31655344914779326, "grad_norm": 1.4153461456298828, "learning_rate": 1.654408645211803e-05, "loss": 1.5764, "step": 5790 }, { "epoch": 0.31660812159151486, "grad_norm": 1.6021279096603394, "learning_rate": 1.654270460295885e-05, "loss": 1.5143, "step": 5791 }, { "epoch": 0.3166627940352364, "grad_norm": 1.5256754159927368, "learning_rate": 1.6541322535323417e-05, "loss": 1.2484, "step": 5792 }, { "epoch": 0.31671746647895793, "grad_norm": 1.5029085874557495, "learning_rate": 1.653994024925787e-05, "loss": 1.4715, "step": 5793 }, { "epoch": 0.31677213892267947, "grad_norm": 1.7258434295654297, "learning_rate": 1.6538557744808373e-05, "loss": 1.4911, "step": 5794 }, { "epoch": 0.31682681136640106, "grad_norm": 2.0459632873535156, "learning_rate": 1.653717502202109e-05, "loss": 1.3434, "step": 5795 }, { "epoch": 0.3168814838101226, "grad_norm": 2.2078583240509033, "learning_rate": 1.6535792080942194e-05, "loss": 1.3841, "step": 5796 }, { "epoch": 0.31693615625384414, "grad_norm": 1.6437677145004272, "learning_rate": 1.653440892161786e-05, "loss": 1.2726, "step": 5797 }, { "epoch": 0.31699082869756573, "grad_norm": 1.8322862386703491, "learning_rate": 1.6533025544094284e-05, "loss": 1.4922, "step": 5798 }, { "epoch": 0.31704550114128727, "grad_norm": 1.9770710468292236, "learning_rate": 1.653164194841765e-05, "loss": 1.3679, "step": 5799 }, { "epoch": 0.3171001735850088, "grad_norm": 1.7677826881408691, "learning_rate": 1.6530258134634168e-05, "loss": 1.263, "step": 5800 }, { "epoch": 0.31715484602873034, "grad_norm": 1.5710179805755615, "learning_rate": 1.652887410279004e-05, "loss": 1.343, "step": 5801 }, { "epoch": 0.31720951847245193, "grad_norm": 1.7284356355667114, "learning_rate": 1.652748985293149e-05, "loss": 1.4494, "step": 5802 }, { "epoch": 0.31726419091617347, "grad_norm": 1.6911075115203857, "learning_rate": 1.652610538510473e-05, "loss": 1.4497, "step": 5803 }, { "epoch": 0.317318863359895, "grad_norm": 1.3411579132080078, "learning_rate": 1.6524720699356e-05, "loss": 1.612, "step": 5804 }, { "epoch": 0.3173735358036166, "grad_norm": 2.4572668075561523, "learning_rate": 1.652333579573154e-05, "loss": 1.3664, "step": 5805 }, { "epoch": 0.31742820824733814, "grad_norm": 1.2271827459335327, "learning_rate": 1.6521950674277585e-05, "loss": 1.5697, "step": 5806 }, { "epoch": 0.3174828806910597, "grad_norm": 1.6047488451004028, "learning_rate": 1.6520565335040392e-05, "loss": 1.2054, "step": 5807 }, { "epoch": 0.3175375531347812, "grad_norm": 2.2263317108154297, "learning_rate": 1.6519179778066226e-05, "loss": 1.2701, "step": 5808 }, { "epoch": 0.3175922255785028, "grad_norm": 1.7097634077072144, "learning_rate": 1.6517794003401345e-05, "loss": 1.4268, "step": 5809 }, { "epoch": 0.31764689802222434, "grad_norm": 1.3810195922851562, "learning_rate": 1.651640801109203e-05, "loss": 1.4778, "step": 5810 }, { "epoch": 0.3177015704659459, "grad_norm": 1.5918253660202026, "learning_rate": 1.651502180118456e-05, "loss": 1.6199, "step": 5811 }, { "epoch": 0.3177562429096675, "grad_norm": 1.676749587059021, "learning_rate": 1.6513635373725224e-05, "loss": 1.332, "step": 5812 }, { "epoch": 0.317810915353389, "grad_norm": 1.29435133934021, "learning_rate": 1.6512248728760316e-05, "loss": 1.6732, "step": 5813 }, { "epoch": 0.31786558779711055, "grad_norm": 1.1378203630447388, "learning_rate": 1.6510861866336145e-05, "loss": 1.5581, "step": 5814 }, { "epoch": 0.31792026024083214, "grad_norm": 1.3222296237945557, "learning_rate": 1.6509474786499017e-05, "loss": 1.3848, "step": 5815 }, { "epoch": 0.3179749326845537, "grad_norm": 1.5719023942947388, "learning_rate": 1.650808748929525e-05, "loss": 1.6295, "step": 5816 }, { "epoch": 0.3180296051282752, "grad_norm": 1.3296337127685547, "learning_rate": 1.6506699974771174e-05, "loss": 1.46, "step": 5817 }, { "epoch": 0.31808427757199675, "grad_norm": 1.1851091384887695, "learning_rate": 1.650531224297311e-05, "loss": 1.3686, "step": 5818 }, { "epoch": 0.31813895001571835, "grad_norm": 1.722719669342041, "learning_rate": 1.6503924293947408e-05, "loss": 1.6403, "step": 5819 }, { "epoch": 0.3181936224594399, "grad_norm": 1.5860657691955566, "learning_rate": 1.6502536127740414e-05, "loss": 1.4436, "step": 5820 }, { "epoch": 0.3182482949031614, "grad_norm": 1.5214406251907349, "learning_rate": 1.650114774439848e-05, "loss": 1.421, "step": 5821 }, { "epoch": 0.318302967346883, "grad_norm": 1.4333994388580322, "learning_rate": 1.6499759143967966e-05, "loss": 1.4443, "step": 5822 }, { "epoch": 0.31835763979060455, "grad_norm": 1.4405502080917358, "learning_rate": 1.6498370326495242e-05, "loss": 1.38, "step": 5823 }, { "epoch": 0.3184123122343261, "grad_norm": 1.5577377080917358, "learning_rate": 1.6496981292026687e-05, "loss": 1.3775, "step": 5824 }, { "epoch": 0.3184669846780476, "grad_norm": 1.9324959516525269, "learning_rate": 1.6495592040608677e-05, "loss": 1.2597, "step": 5825 }, { "epoch": 0.3185216571217692, "grad_norm": 1.1297305822372437, "learning_rate": 1.6494202572287607e-05, "loss": 1.5955, "step": 5826 }, { "epoch": 0.31857632956549076, "grad_norm": 1.5802034139633179, "learning_rate": 1.6492812887109876e-05, "loss": 1.1434, "step": 5827 }, { "epoch": 0.3186310020092123, "grad_norm": 1.172353982925415, "learning_rate": 1.6491422985121882e-05, "loss": 1.6075, "step": 5828 }, { "epoch": 0.3186856744529339, "grad_norm": 1.682131052017212, "learning_rate": 1.6490032866370046e-05, "loss": 1.7247, "step": 5829 }, { "epoch": 0.3187403468966554, "grad_norm": 1.6953145265579224, "learning_rate": 1.648864253090078e-05, "loss": 1.4101, "step": 5830 }, { "epoch": 0.31879501934037696, "grad_norm": 1.9144186973571777, "learning_rate": 1.648725197876052e-05, "loss": 1.4094, "step": 5831 }, { "epoch": 0.3188496917840985, "grad_norm": 1.1540672779083252, "learning_rate": 1.648586120999569e-05, "loss": 1.5062, "step": 5832 }, { "epoch": 0.3189043642278201, "grad_norm": 1.5053967237472534, "learning_rate": 1.6484470224652734e-05, "loss": 1.1974, "step": 5833 }, { "epoch": 0.31895903667154163, "grad_norm": 1.3980753421783447, "learning_rate": 1.6483079022778102e-05, "loss": 1.4855, "step": 5834 }, { "epoch": 0.31901370911526317, "grad_norm": 1.5062296390533447, "learning_rate": 1.648168760441825e-05, "loss": 1.2378, "step": 5835 }, { "epoch": 0.31906838155898476, "grad_norm": 1.4333754777908325, "learning_rate": 1.6480295969619636e-05, "loss": 1.5261, "step": 5836 }, { "epoch": 0.3191230540027063, "grad_norm": 1.6362134218215942, "learning_rate": 1.6478904118428735e-05, "loss": 1.4449, "step": 5837 }, { "epoch": 0.31917772644642783, "grad_norm": 1.3437395095825195, "learning_rate": 1.647751205089202e-05, "loss": 1.4388, "step": 5838 }, { "epoch": 0.31923239889014937, "grad_norm": 1.8139468431472778, "learning_rate": 1.647611976705598e-05, "loss": 1.3822, "step": 5839 }, { "epoch": 0.31928707133387096, "grad_norm": 1.807451605796814, "learning_rate": 1.64747272669671e-05, "loss": 1.1498, "step": 5840 }, { "epoch": 0.3193417437775925, "grad_norm": 1.3210197687149048, "learning_rate": 1.6473334550671887e-05, "loss": 1.7045, "step": 5841 }, { "epoch": 0.31939641622131404, "grad_norm": 1.4879069328308105, "learning_rate": 1.6471941618216845e-05, "loss": 1.271, "step": 5842 }, { "epoch": 0.31945108866503563, "grad_norm": 1.4280387163162231, "learning_rate": 1.6470548469648486e-05, "loss": 1.4678, "step": 5843 }, { "epoch": 0.31950576110875717, "grad_norm": 1.4033610820770264, "learning_rate": 1.6469155105013324e-05, "loss": 1.3232, "step": 5844 }, { "epoch": 0.3195604335524787, "grad_norm": 1.4773781299591064, "learning_rate": 1.6467761524357896e-05, "loss": 1.5755, "step": 5845 }, { "epoch": 0.31961510599620024, "grad_norm": 1.126787781715393, "learning_rate": 1.6466367727728735e-05, "loss": 1.5404, "step": 5846 }, { "epoch": 0.31966977843992184, "grad_norm": 1.1991413831710815, "learning_rate": 1.646497371517238e-05, "loss": 1.7969, "step": 5847 }, { "epoch": 0.3197244508836434, "grad_norm": 1.4618052244186401, "learning_rate": 1.6463579486735383e-05, "loss": 1.4032, "step": 5848 }, { "epoch": 0.3197791233273649, "grad_norm": 1.661153793334961, "learning_rate": 1.6462185042464298e-05, "loss": 1.3041, "step": 5849 }, { "epoch": 0.3198337957710865, "grad_norm": 1.6926958560943604, "learning_rate": 1.6460790382405688e-05, "loss": 1.372, "step": 5850 }, { "epoch": 0.31988846821480804, "grad_norm": 1.3575165271759033, "learning_rate": 1.6459395506606133e-05, "loss": 1.3498, "step": 5851 }, { "epoch": 0.3199431406585296, "grad_norm": 1.3043959140777588, "learning_rate": 1.64580004151122e-05, "loss": 1.182, "step": 5852 }, { "epoch": 0.3199978131022511, "grad_norm": 1.63236665725708, "learning_rate": 1.645660510797048e-05, "loss": 1.2997, "step": 5853 }, { "epoch": 0.3200524855459727, "grad_norm": 1.259765386581421, "learning_rate": 1.6455209585227568e-05, "loss": 1.5267, "step": 5854 }, { "epoch": 0.32010715798969425, "grad_norm": 1.6823451519012451, "learning_rate": 1.6453813846930057e-05, "loss": 1.6572, "step": 5855 }, { "epoch": 0.3201618304334158, "grad_norm": 1.6513792276382446, "learning_rate": 1.645241789312456e-05, "loss": 1.5671, "step": 5856 }, { "epoch": 0.3202165028771374, "grad_norm": 1.5688591003417969, "learning_rate": 1.6451021723857683e-05, "loss": 1.1953, "step": 5857 }, { "epoch": 0.3202711753208589, "grad_norm": 1.5644848346710205, "learning_rate": 1.6449625339176056e-05, "loss": 1.3641, "step": 5858 }, { "epoch": 0.32032584776458045, "grad_norm": 1.1437745094299316, "learning_rate": 1.6448228739126302e-05, "loss": 1.4875, "step": 5859 }, { "epoch": 0.320380520208302, "grad_norm": 1.5087382793426514, "learning_rate": 1.6446831923755065e-05, "loss": 1.3538, "step": 5860 }, { "epoch": 0.3204351926520236, "grad_norm": 1.4670265913009644, "learning_rate": 1.6445434893108978e-05, "loss": 1.3842, "step": 5861 }, { "epoch": 0.3204898650957451, "grad_norm": 1.6168042421340942, "learning_rate": 1.6444037647234695e-05, "loss": 1.3439, "step": 5862 }, { "epoch": 0.32054453753946666, "grad_norm": 1.4012848138809204, "learning_rate": 1.6442640186178875e-05, "loss": 1.323, "step": 5863 }, { "epoch": 0.32059920998318825, "grad_norm": 1.4373623132705688, "learning_rate": 1.644124250998818e-05, "loss": 1.485, "step": 5864 }, { "epoch": 0.3206538824269098, "grad_norm": 2.051023006439209, "learning_rate": 1.6439844618709285e-05, "loss": 1.4634, "step": 5865 }, { "epoch": 0.3207085548706313, "grad_norm": 2.0633950233459473, "learning_rate": 1.6438446512388862e-05, "loss": 1.4477, "step": 5866 }, { "epoch": 0.32076322731435286, "grad_norm": 1.6524958610534668, "learning_rate": 1.6437048191073608e-05, "loss": 1.3379, "step": 5867 }, { "epoch": 0.32081789975807445, "grad_norm": 1.6720951795578003, "learning_rate": 1.6435649654810204e-05, "loss": 1.2289, "step": 5868 }, { "epoch": 0.320872572201796, "grad_norm": 1.4803857803344727, "learning_rate": 1.6434250903645356e-05, "loss": 1.2121, "step": 5869 }, { "epoch": 0.32092724464551753, "grad_norm": 1.5894763469696045, "learning_rate": 1.6432851937625776e-05, "loss": 1.4788, "step": 5870 }, { "epoch": 0.3209819170892391, "grad_norm": 1.7021669149398804, "learning_rate": 1.6431452756798174e-05, "loss": 1.2534, "step": 5871 }, { "epoch": 0.32103658953296066, "grad_norm": 1.3827075958251953, "learning_rate": 1.6430053361209274e-05, "loss": 1.4914, "step": 5872 }, { "epoch": 0.3210912619766822, "grad_norm": 1.5606526136398315, "learning_rate": 1.64286537509058e-05, "loss": 1.5158, "step": 5873 }, { "epoch": 0.32114593442040373, "grad_norm": 1.4616498947143555, "learning_rate": 1.6427253925934496e-05, "loss": 1.1581, "step": 5874 }, { "epoch": 0.3212006068641253, "grad_norm": 1.858760952949524, "learning_rate": 1.6425853886342096e-05, "loss": 1.1859, "step": 5875 }, { "epoch": 0.32125527930784686, "grad_norm": 1.352031946182251, "learning_rate": 1.642445363217536e-05, "loss": 1.5384, "step": 5876 }, { "epoch": 0.3213099517515684, "grad_norm": 2.026956796646118, "learning_rate": 1.6423053163481042e-05, "loss": 1.3694, "step": 5877 }, { "epoch": 0.32136462419529, "grad_norm": 1.32326340675354, "learning_rate": 1.6421652480305904e-05, "loss": 1.2152, "step": 5878 }, { "epoch": 0.32141929663901153, "grad_norm": 1.5053191184997559, "learning_rate": 1.642025158269672e-05, "loss": 1.6856, "step": 5879 }, { "epoch": 0.32147396908273307, "grad_norm": 1.5345001220703125, "learning_rate": 1.6418850470700274e-05, "loss": 1.5157, "step": 5880 }, { "epoch": 0.3215286415264546, "grad_norm": 1.1636592149734497, "learning_rate": 1.6417449144363346e-05, "loss": 1.5439, "step": 5881 }, { "epoch": 0.3215833139701762, "grad_norm": 1.4983289241790771, "learning_rate": 1.6416047603732734e-05, "loss": 1.3157, "step": 5882 }, { "epoch": 0.32163798641389774, "grad_norm": 1.6615787744522095, "learning_rate": 1.6414645848855234e-05, "loss": 1.4705, "step": 5883 }, { "epoch": 0.3216926588576193, "grad_norm": 1.354387879371643, "learning_rate": 1.6413243879777657e-05, "loss": 1.4634, "step": 5884 }, { "epoch": 0.32174733130134087, "grad_norm": 1.7033412456512451, "learning_rate": 1.641184169654682e-05, "loss": 1.4898, "step": 5885 }, { "epoch": 0.3218020037450624, "grad_norm": 1.310022234916687, "learning_rate": 1.641043929920954e-05, "loss": 1.481, "step": 5886 }, { "epoch": 0.32185667618878394, "grad_norm": 1.4505821466445923, "learning_rate": 1.6409036687812654e-05, "loss": 1.4264, "step": 5887 }, { "epoch": 0.3219113486325055, "grad_norm": 1.3084840774536133, "learning_rate": 1.640763386240299e-05, "loss": 1.506, "step": 5888 }, { "epoch": 0.32196602107622707, "grad_norm": 1.328668236732483, "learning_rate": 1.6406230823027398e-05, "loss": 1.401, "step": 5889 }, { "epoch": 0.3220206935199486, "grad_norm": 1.3026140928268433, "learning_rate": 1.640482756973272e-05, "loss": 1.3553, "step": 5890 }, { "epoch": 0.32207536596367015, "grad_norm": 1.5481023788452148, "learning_rate": 1.6403424102565826e-05, "loss": 1.3761, "step": 5891 }, { "epoch": 0.32213003840739174, "grad_norm": 1.544403076171875, "learning_rate": 1.640202042157357e-05, "loss": 1.502, "step": 5892 }, { "epoch": 0.3221847108511133, "grad_norm": 1.2956780195236206, "learning_rate": 1.6400616526802835e-05, "loss": 1.565, "step": 5893 }, { "epoch": 0.3222393832948348, "grad_norm": 1.417154312133789, "learning_rate": 1.6399212418300496e-05, "loss": 1.4231, "step": 5894 }, { "epoch": 0.32229405573855635, "grad_norm": 1.481022834777832, "learning_rate": 1.639780809611343e-05, "loss": 1.5051, "step": 5895 }, { "epoch": 0.32234872818227794, "grad_norm": 1.77730131149292, "learning_rate": 1.6396403560288546e-05, "loss": 1.1761, "step": 5896 }, { "epoch": 0.3224034006259995, "grad_norm": 2.1723697185516357, "learning_rate": 1.6394998810872734e-05, "loss": 1.4201, "step": 5897 }, { "epoch": 0.322458073069721, "grad_norm": 1.7854326963424683, "learning_rate": 1.6393593847912905e-05, "loss": 1.4817, "step": 5898 }, { "epoch": 0.3225127455134426, "grad_norm": 1.586259126663208, "learning_rate": 1.6392188671455976e-05, "loss": 1.3474, "step": 5899 }, { "epoch": 0.32256741795716415, "grad_norm": 1.2718297243118286, "learning_rate": 1.6390783281548865e-05, "loss": 1.4388, "step": 5900 }, { "epoch": 0.3226220904008857, "grad_norm": 1.5065274238586426, "learning_rate": 1.6389377678238508e-05, "loss": 1.3958, "step": 5901 }, { "epoch": 0.3226767628446072, "grad_norm": 1.3601194620132446, "learning_rate": 1.6387971861571834e-05, "loss": 1.3971, "step": 5902 }, { "epoch": 0.3227314352883288, "grad_norm": 1.663293480873108, "learning_rate": 1.638656583159579e-05, "loss": 1.5645, "step": 5903 }, { "epoch": 0.32278610773205035, "grad_norm": 1.789211630821228, "learning_rate": 1.6385159588357327e-05, "loss": 1.5128, "step": 5904 }, { "epoch": 0.3228407801757719, "grad_norm": 1.205823540687561, "learning_rate": 1.6383753131903405e-05, "loss": 1.5356, "step": 5905 }, { "epoch": 0.3228954526194935, "grad_norm": 1.2646797895431519, "learning_rate": 1.638234646228098e-05, "loss": 1.4658, "step": 5906 }, { "epoch": 0.322950125063215, "grad_norm": 1.6581324338912964, "learning_rate": 1.6380939579537033e-05, "loss": 1.4361, "step": 5907 }, { "epoch": 0.32300479750693656, "grad_norm": 1.4146661758422852, "learning_rate": 1.6379532483718543e-05, "loss": 1.4564, "step": 5908 }, { "epoch": 0.3230594699506581, "grad_norm": 1.485207200050354, "learning_rate": 1.6378125174872486e-05, "loss": 1.3567, "step": 5909 }, { "epoch": 0.3231141423943797, "grad_norm": 1.4225410223007202, "learning_rate": 1.637671765304587e-05, "loss": 1.326, "step": 5910 }, { "epoch": 0.3231688148381012, "grad_norm": 1.0675766468048096, "learning_rate": 1.637530991828568e-05, "loss": 1.561, "step": 5911 }, { "epoch": 0.32322348728182276, "grad_norm": 1.2890690565109253, "learning_rate": 1.6373901970638943e-05, "loss": 1.3963, "step": 5912 }, { "epoch": 0.32327815972554436, "grad_norm": 1.4552456140518188, "learning_rate": 1.6372493810152655e-05, "loss": 1.5332, "step": 5913 }, { "epoch": 0.3233328321692659, "grad_norm": 1.5368695259094238, "learning_rate": 1.6371085436873847e-05, "loss": 1.5418, "step": 5914 }, { "epoch": 0.32338750461298743, "grad_norm": 1.3461211919784546, "learning_rate": 1.6369676850849547e-05, "loss": 1.4025, "step": 5915 }, { "epoch": 0.32344217705670897, "grad_norm": 1.6360985040664673, "learning_rate": 1.6368268052126787e-05, "loss": 1.5367, "step": 5916 }, { "epoch": 0.32349684950043056, "grad_norm": 1.6003190279006958, "learning_rate": 1.6366859040752614e-05, "loss": 1.5028, "step": 5917 }, { "epoch": 0.3235515219441521, "grad_norm": 1.9352890253067017, "learning_rate": 1.6365449816774076e-05, "loss": 1.368, "step": 5918 }, { "epoch": 0.32360619438787364, "grad_norm": 1.5378546714782715, "learning_rate": 1.6364040380238234e-05, "loss": 1.4932, "step": 5919 }, { "epoch": 0.32366086683159523, "grad_norm": 1.7617014646530151, "learning_rate": 1.6362630731192152e-05, "loss": 1.1844, "step": 5920 }, { "epoch": 0.32371553927531677, "grad_norm": 1.4160653352737427, "learning_rate": 1.6361220869682896e-05, "loss": 1.4846, "step": 5921 }, { "epoch": 0.3237702117190383, "grad_norm": 1.512423038482666, "learning_rate": 1.635981079575755e-05, "loss": 1.3429, "step": 5922 }, { "epoch": 0.32382488416275984, "grad_norm": 1.276005506515503, "learning_rate": 1.6358400509463198e-05, "loss": 1.4289, "step": 5923 }, { "epoch": 0.32387955660648143, "grad_norm": 1.5583266019821167, "learning_rate": 1.635699001084693e-05, "loss": 1.6377, "step": 5924 }, { "epoch": 0.32393422905020297, "grad_norm": 2.291757583618164, "learning_rate": 1.635557929995585e-05, "loss": 1.1702, "step": 5925 }, { "epoch": 0.3239889014939245, "grad_norm": 1.6814101934432983, "learning_rate": 1.6354168376837063e-05, "loss": 1.4879, "step": 5926 }, { "epoch": 0.3240435739376461, "grad_norm": 1.3860095739364624, "learning_rate": 1.6352757241537682e-05, "loss": 1.4699, "step": 5927 }, { "epoch": 0.32409824638136764, "grad_norm": 1.4192568063735962, "learning_rate": 1.635134589410483e-05, "loss": 1.2216, "step": 5928 }, { "epoch": 0.3241529188250892, "grad_norm": 1.1505334377288818, "learning_rate": 1.634993433458564e-05, "loss": 1.4268, "step": 5929 }, { "epoch": 0.3242075912688107, "grad_norm": 1.8486557006835938, "learning_rate": 1.6348522563027236e-05, "loss": 1.6202, "step": 5930 }, { "epoch": 0.3242622637125323, "grad_norm": 1.5774468183517456, "learning_rate": 1.634711057947677e-05, "loss": 1.6436, "step": 5931 }, { "epoch": 0.32431693615625384, "grad_norm": 1.530867099761963, "learning_rate": 1.6345698383981387e-05, "loss": 1.4228, "step": 5932 }, { "epoch": 0.3243716085999754, "grad_norm": 1.4006026983261108, "learning_rate": 1.634428597658824e-05, "loss": 1.5638, "step": 5933 }, { "epoch": 0.324426281043697, "grad_norm": 1.3608108758926392, "learning_rate": 1.6342873357344503e-05, "loss": 1.4949, "step": 5934 }, { "epoch": 0.3244809534874185, "grad_norm": 1.396440029144287, "learning_rate": 1.6341460526297335e-05, "loss": 1.524, "step": 5935 }, { "epoch": 0.32453562593114005, "grad_norm": 1.5619009733200073, "learning_rate": 1.6340047483493923e-05, "loss": 1.3225, "step": 5936 }, { "epoch": 0.3245902983748616, "grad_norm": 1.5811525583267212, "learning_rate": 1.633863422898145e-05, "loss": 1.5516, "step": 5937 }, { "epoch": 0.3246449708185832, "grad_norm": 1.784597396850586, "learning_rate": 1.63372207628071e-05, "loss": 1.3975, "step": 5938 }, { "epoch": 0.3246996432623047, "grad_norm": 1.55703866481781, "learning_rate": 1.6335807085018082e-05, "loss": 1.1095, "step": 5939 }, { "epoch": 0.32475431570602625, "grad_norm": 1.1177879571914673, "learning_rate": 1.6334393195661597e-05, "loss": 1.4996, "step": 5940 }, { "epoch": 0.32480898814974785, "grad_norm": 1.649773120880127, "learning_rate": 1.6332979094784857e-05, "loss": 1.4146, "step": 5941 }, { "epoch": 0.3248636605934694, "grad_norm": 1.8206534385681152, "learning_rate": 1.6331564782435087e-05, "loss": 1.367, "step": 5942 }, { "epoch": 0.3249183330371909, "grad_norm": 1.2173539400100708, "learning_rate": 1.6330150258659513e-05, "loss": 1.5676, "step": 5943 }, { "epoch": 0.32497300548091246, "grad_norm": 1.3358583450317383, "learning_rate": 1.6328735523505366e-05, "loss": 1.6307, "step": 5944 }, { "epoch": 0.32502767792463405, "grad_norm": 1.501266360282898, "learning_rate": 1.6327320577019887e-05, "loss": 1.3558, "step": 5945 }, { "epoch": 0.3250823503683556, "grad_norm": 1.5871636867523193, "learning_rate": 1.6325905419250327e-05, "loss": 1.2654, "step": 5946 }, { "epoch": 0.3251370228120771, "grad_norm": 1.3921445608139038, "learning_rate": 1.6324490050243943e-05, "loss": 1.4506, "step": 5947 }, { "epoch": 0.3251916952557987, "grad_norm": 1.235223650932312, "learning_rate": 1.6323074470047993e-05, "loss": 1.416, "step": 5948 }, { "epoch": 0.32524636769952026, "grad_norm": 1.2933839559555054, "learning_rate": 1.6321658678709752e-05, "loss": 1.6109, "step": 5949 }, { "epoch": 0.3253010401432418, "grad_norm": 1.7859677076339722, "learning_rate": 1.632024267627649e-05, "loss": 1.2343, "step": 5950 }, { "epoch": 0.32535571258696333, "grad_norm": 1.6123169660568237, "learning_rate": 1.6318826462795497e-05, "loss": 1.4436, "step": 5951 }, { "epoch": 0.3254103850306849, "grad_norm": 1.841007947921753, "learning_rate": 1.631741003831406e-05, "loss": 1.4956, "step": 5952 }, { "epoch": 0.32546505747440646, "grad_norm": 1.416930913925171, "learning_rate": 1.6315993402879477e-05, "loss": 1.604, "step": 5953 }, { "epoch": 0.325519729918128, "grad_norm": 1.512703537940979, "learning_rate": 1.6314576556539053e-05, "loss": 1.3026, "step": 5954 }, { "epoch": 0.3255744023618496, "grad_norm": 1.483385682106018, "learning_rate": 1.63131594993401e-05, "loss": 1.336, "step": 5955 }, { "epoch": 0.32562907480557113, "grad_norm": 1.5088026523590088, "learning_rate": 1.6311742231329936e-05, "loss": 1.6872, "step": 5956 }, { "epoch": 0.32568374724929267, "grad_norm": 1.8095284700393677, "learning_rate": 1.6310324752555893e-05, "loss": 1.2769, "step": 5957 }, { "epoch": 0.3257384196930142, "grad_norm": 2.439584970474243, "learning_rate": 1.6308907063065294e-05, "loss": 1.2501, "step": 5958 }, { "epoch": 0.3257930921367358, "grad_norm": 1.8640882968902588, "learning_rate": 1.6307489162905485e-05, "loss": 1.2869, "step": 5959 }, { "epoch": 0.32584776458045733, "grad_norm": 1.3605865240097046, "learning_rate": 1.630607105212381e-05, "loss": 1.4678, "step": 5960 }, { "epoch": 0.32590243702417887, "grad_norm": 2.7510337829589844, "learning_rate": 1.6304652730767628e-05, "loss": 1.1954, "step": 5961 }, { "epoch": 0.32595710946790046, "grad_norm": 1.2862156629562378, "learning_rate": 1.6303234198884298e-05, "loss": 1.2756, "step": 5962 }, { "epoch": 0.326011781911622, "grad_norm": 1.5933035612106323, "learning_rate": 1.6301815456521185e-05, "loss": 1.4552, "step": 5963 }, { "epoch": 0.32606645435534354, "grad_norm": 1.8999103307724, "learning_rate": 1.6300396503725665e-05, "loss": 1.6071, "step": 5964 }, { "epoch": 0.3261211267990651, "grad_norm": 1.507246494293213, "learning_rate": 1.6298977340545126e-05, "loss": 1.3001, "step": 5965 }, { "epoch": 0.32617579924278667, "grad_norm": 1.5147114992141724, "learning_rate": 1.629755796702695e-05, "loss": 1.4524, "step": 5966 }, { "epoch": 0.3262304716865082, "grad_norm": 1.2568622827529907, "learning_rate": 1.6296138383218534e-05, "loss": 1.4504, "step": 5967 }, { "epoch": 0.32628514413022974, "grad_norm": 1.3134002685546875, "learning_rate": 1.6294718589167283e-05, "loss": 1.5139, "step": 5968 }, { "epoch": 0.32633981657395134, "grad_norm": 2.9971213340759277, "learning_rate": 1.629329858492061e-05, "loss": 0.9917, "step": 5969 }, { "epoch": 0.3263944890176729, "grad_norm": 1.5137073993682861, "learning_rate": 1.6291878370525925e-05, "loss": 1.4772, "step": 5970 }, { "epoch": 0.3264491614613944, "grad_norm": 1.7033908367156982, "learning_rate": 1.6290457946030663e-05, "loss": 1.2316, "step": 5971 }, { "epoch": 0.32650383390511595, "grad_norm": 1.7542243003845215, "learning_rate": 1.6289037311482245e-05, "loss": 1.6231, "step": 5972 }, { "epoch": 0.32655850634883754, "grad_norm": 1.2808353900909424, "learning_rate": 1.6287616466928112e-05, "loss": 1.5359, "step": 5973 }, { "epoch": 0.3266131787925591, "grad_norm": 2.022029399871826, "learning_rate": 1.6286195412415714e-05, "loss": 1.2033, "step": 5974 }, { "epoch": 0.3266678512362806, "grad_norm": 1.210101842880249, "learning_rate": 1.62847741479925e-05, "loss": 1.955, "step": 5975 }, { "epoch": 0.3267225236800022, "grad_norm": 1.4853882789611816, "learning_rate": 1.628335267370593e-05, "loss": 1.4923, "step": 5976 }, { "epoch": 0.32677719612372375, "grad_norm": 1.803653597831726, "learning_rate": 1.6281930989603466e-05, "loss": 1.5604, "step": 5977 }, { "epoch": 0.3268318685674453, "grad_norm": 1.5880638360977173, "learning_rate": 1.628050909573259e-05, "loss": 1.4806, "step": 5978 }, { "epoch": 0.3268865410111668, "grad_norm": 1.61172354221344, "learning_rate": 1.6279086992140777e-05, "loss": 1.3108, "step": 5979 }, { "epoch": 0.3269412134548884, "grad_norm": 1.5432343482971191, "learning_rate": 1.6277664678875514e-05, "loss": 1.5843, "step": 5980 }, { "epoch": 0.32699588589860995, "grad_norm": 1.7739185094833374, "learning_rate": 1.6276242155984295e-05, "loss": 1.3508, "step": 5981 }, { "epoch": 0.3270505583423315, "grad_norm": 1.253482460975647, "learning_rate": 1.6274819423514624e-05, "loss": 1.4203, "step": 5982 }, { "epoch": 0.3271052307860531, "grad_norm": 1.502397894859314, "learning_rate": 1.6273396481514007e-05, "loss": 1.5477, "step": 5983 }, { "epoch": 0.3271599032297746, "grad_norm": 1.6046779155731201, "learning_rate": 1.627197333002996e-05, "loss": 1.3238, "step": 5984 }, { "epoch": 0.32721457567349616, "grad_norm": 1.4806978702545166, "learning_rate": 1.6270549969110012e-05, "loss": 1.48, "step": 5985 }, { "epoch": 0.3272692481172177, "grad_norm": 2.0102410316467285, "learning_rate": 1.626912639880168e-05, "loss": 1.4166, "step": 5986 }, { "epoch": 0.3273239205609393, "grad_norm": 2.0798521041870117, "learning_rate": 1.6267702619152508e-05, "loss": 1.34, "step": 5987 }, { "epoch": 0.3273785930046608, "grad_norm": 1.384189486503601, "learning_rate": 1.6266278630210036e-05, "loss": 1.2631, "step": 5988 }, { "epoch": 0.32743326544838236, "grad_norm": 1.1529252529144287, "learning_rate": 1.626485443202182e-05, "loss": 1.5379, "step": 5989 }, { "epoch": 0.32748793789210395, "grad_norm": 1.366735816001892, "learning_rate": 1.626343002463541e-05, "loss": 1.5729, "step": 5990 }, { "epoch": 0.3275426103358255, "grad_norm": 1.4880315065383911, "learning_rate": 1.6262005408098378e-05, "loss": 1.5604, "step": 5991 }, { "epoch": 0.327597282779547, "grad_norm": 1.5727338790893555, "learning_rate": 1.626058058245829e-05, "loss": 1.2837, "step": 5992 }, { "epoch": 0.32765195522326857, "grad_norm": 1.3518636226654053, "learning_rate": 1.625915554776272e-05, "loss": 1.5764, "step": 5993 }, { "epoch": 0.32770662766699016, "grad_norm": 1.4454360008239746, "learning_rate": 1.6257730304059265e-05, "loss": 1.1912, "step": 5994 }, { "epoch": 0.3277613001107117, "grad_norm": 1.2510696649551392, "learning_rate": 1.625630485139551e-05, "loss": 1.6618, "step": 5995 }, { "epoch": 0.32781597255443323, "grad_norm": 1.4215384721755981, "learning_rate": 1.6254879189819055e-05, "loss": 1.5704, "step": 5996 }, { "epoch": 0.3278706449981548, "grad_norm": 2.077924966812134, "learning_rate": 1.6253453319377504e-05, "loss": 1.111, "step": 5997 }, { "epoch": 0.32792531744187636, "grad_norm": 1.5544759035110474, "learning_rate": 1.6252027240118472e-05, "loss": 1.5981, "step": 5998 }, { "epoch": 0.3279799898855979, "grad_norm": 1.438264012336731, "learning_rate": 1.6250600952089586e-05, "loss": 1.4444, "step": 5999 }, { "epoch": 0.32803466232931944, "grad_norm": 1.508399248123169, "learning_rate": 1.624917445533846e-05, "loss": 1.3755, "step": 6000 }, { "epoch": 0.32808933477304103, "grad_norm": 1.3447858095169067, "learning_rate": 1.6247747749912738e-05, "loss": 1.6003, "step": 6001 }, { "epoch": 0.32814400721676257, "grad_norm": 1.12517249584198, "learning_rate": 1.6246320835860053e-05, "loss": 1.3847, "step": 6002 }, { "epoch": 0.3281986796604841, "grad_norm": 1.3513363599777222, "learning_rate": 1.6244893713228066e-05, "loss": 1.6146, "step": 6003 }, { "epoch": 0.3282533521042057, "grad_norm": 1.64858078956604, "learning_rate": 1.6243466382064418e-05, "loss": 1.3732, "step": 6004 }, { "epoch": 0.32830802454792724, "grad_norm": 2.1589066982269287, "learning_rate": 1.624203884241678e-05, "loss": 1.1338, "step": 6005 }, { "epoch": 0.3283626969916488, "grad_norm": 1.5408525466918945, "learning_rate": 1.6240611094332814e-05, "loss": 1.4545, "step": 6006 }, { "epoch": 0.3284173694353703, "grad_norm": 1.8170640468597412, "learning_rate": 1.62391831378602e-05, "loss": 1.3015, "step": 6007 }, { "epoch": 0.3284720418790919, "grad_norm": 1.2818549871444702, "learning_rate": 1.6237754973046625e-05, "loss": 1.481, "step": 6008 }, { "epoch": 0.32852671432281344, "grad_norm": 1.5035731792449951, "learning_rate": 1.623632659993977e-05, "loss": 1.3503, "step": 6009 }, { "epoch": 0.328581386766535, "grad_norm": 1.495105504989624, "learning_rate": 1.6234898018587336e-05, "loss": 1.4323, "step": 6010 }, { "epoch": 0.32863605921025657, "grad_norm": 1.676527500152588, "learning_rate": 1.6233469229037026e-05, "loss": 1.5028, "step": 6011 }, { "epoch": 0.3286907316539781, "grad_norm": 1.3923050165176392, "learning_rate": 1.6232040231336556e-05, "loss": 1.4917, "step": 6012 }, { "epoch": 0.32874540409769964, "grad_norm": 1.3903017044067383, "learning_rate": 1.6230611025533632e-05, "loss": 1.42, "step": 6013 }, { "epoch": 0.32880007654142124, "grad_norm": 1.5974185466766357, "learning_rate": 1.622918161167599e-05, "loss": 1.1857, "step": 6014 }, { "epoch": 0.3288547489851428, "grad_norm": 1.6230907440185547, "learning_rate": 1.6227751989811355e-05, "loss": 1.3561, "step": 6015 }, { "epoch": 0.3289094214288643, "grad_norm": 2.793478488922119, "learning_rate": 1.622632215998747e-05, "loss": 1.3374, "step": 6016 }, { "epoch": 0.32896409387258585, "grad_norm": 1.4426946640014648, "learning_rate": 1.622489212225207e-05, "loss": 1.4344, "step": 6017 }, { "epoch": 0.32901876631630744, "grad_norm": 1.3294355869293213, "learning_rate": 1.6223461876652922e-05, "loss": 1.3285, "step": 6018 }, { "epoch": 0.329073438760029, "grad_norm": 1.717307448387146, "learning_rate": 1.6222031423237776e-05, "loss": 1.3924, "step": 6019 }, { "epoch": 0.3291281112037505, "grad_norm": 2.2813000679016113, "learning_rate": 1.6220600762054403e-05, "loss": 1.4474, "step": 6020 }, { "epoch": 0.3291827836474721, "grad_norm": 1.5321975946426392, "learning_rate": 1.6219169893150568e-05, "loss": 1.3291, "step": 6021 }, { "epoch": 0.32923745609119365, "grad_norm": 1.5926822423934937, "learning_rate": 1.621773881657406e-05, "loss": 1.4369, "step": 6022 }, { "epoch": 0.3292921285349152, "grad_norm": 1.6345109939575195, "learning_rate": 1.621630753237266e-05, "loss": 1.5787, "step": 6023 }, { "epoch": 0.3293468009786367, "grad_norm": 1.4037076234817505, "learning_rate": 1.6214876040594166e-05, "loss": 1.405, "step": 6024 }, { "epoch": 0.3294014734223583, "grad_norm": 2.151491165161133, "learning_rate": 1.6213444341286376e-05, "loss": 1.4726, "step": 6025 }, { "epoch": 0.32945614586607985, "grad_norm": 1.8681083917617798, "learning_rate": 1.6212012434497103e-05, "loss": 1.5657, "step": 6026 }, { "epoch": 0.3295108183098014, "grad_norm": 1.1435221433639526, "learning_rate": 1.6210580320274157e-05, "loss": 1.4553, "step": 6027 }, { "epoch": 0.329565490753523, "grad_norm": 1.7834123373031616, "learning_rate": 1.620914799866536e-05, "loss": 1.5601, "step": 6028 }, { "epoch": 0.3296201631972445, "grad_norm": 3.2595958709716797, "learning_rate": 1.6207715469718538e-05, "loss": 1.1784, "step": 6029 }, { "epoch": 0.32967483564096606, "grad_norm": 1.545054316520691, "learning_rate": 1.620628273348153e-05, "loss": 1.6089, "step": 6030 }, { "epoch": 0.3297295080846876, "grad_norm": 1.2947102785110474, "learning_rate": 1.620484979000218e-05, "loss": 1.6687, "step": 6031 }, { "epoch": 0.3297841805284092, "grad_norm": 1.3119337558746338, "learning_rate": 1.6203416639328334e-05, "loss": 1.614, "step": 6032 }, { "epoch": 0.3298388529721307, "grad_norm": 1.3891445398330688, "learning_rate": 1.620198328150785e-05, "loss": 1.2556, "step": 6033 }, { "epoch": 0.32989352541585226, "grad_norm": 1.5401527881622314, "learning_rate": 1.6200549716588595e-05, "loss": 1.1565, "step": 6034 }, { "epoch": 0.32994819785957386, "grad_norm": 1.8759334087371826, "learning_rate": 1.619911594461843e-05, "loss": 1.403, "step": 6035 }, { "epoch": 0.3300028703032954, "grad_norm": 1.2697985172271729, "learning_rate": 1.619768196564524e-05, "loss": 1.5657, "step": 6036 }, { "epoch": 0.33005754274701693, "grad_norm": 1.46409010887146, "learning_rate": 1.6196247779716902e-05, "loss": 1.5341, "step": 6037 }, { "epoch": 0.33011221519073847, "grad_norm": 1.8650394678115845, "learning_rate": 1.6194813386881314e-05, "loss": 1.451, "step": 6038 }, { "epoch": 0.33016688763446006, "grad_norm": 1.7298544645309448, "learning_rate": 1.619337878718637e-05, "loss": 1.6384, "step": 6039 }, { "epoch": 0.3302215600781816, "grad_norm": 1.3610042333602905, "learning_rate": 1.6191943980679975e-05, "loss": 1.5423, "step": 6040 }, { "epoch": 0.33027623252190313, "grad_norm": 1.4518924951553345, "learning_rate": 1.6190508967410043e-05, "loss": 1.341, "step": 6041 }, { "epoch": 0.3303309049656247, "grad_norm": 1.1020805835723877, "learning_rate": 1.6189073747424485e-05, "loss": 1.4565, "step": 6042 }, { "epoch": 0.33038557740934626, "grad_norm": 1.4947878122329712, "learning_rate": 1.6187638320771233e-05, "loss": 1.4366, "step": 6043 }, { "epoch": 0.3304402498530678, "grad_norm": 1.1987923383712769, "learning_rate": 1.6186202687498218e-05, "loss": 1.3827, "step": 6044 }, { "epoch": 0.33049492229678934, "grad_norm": 1.5003310441970825, "learning_rate": 1.618476684765338e-05, "loss": 1.4867, "step": 6045 }, { "epoch": 0.33054959474051093, "grad_norm": 1.746124029159546, "learning_rate": 1.6183330801284664e-05, "loss": 1.2906, "step": 6046 }, { "epoch": 0.33060426718423247, "grad_norm": 1.3861732482910156, "learning_rate": 1.6181894548440022e-05, "loss": 1.384, "step": 6047 }, { "epoch": 0.330658939627954, "grad_norm": 1.5305391550064087, "learning_rate": 1.6180458089167413e-05, "loss": 1.3062, "step": 6048 }, { "epoch": 0.3307136120716756, "grad_norm": 1.428922176361084, "learning_rate": 1.617902142351481e-05, "loss": 1.3468, "step": 6049 }, { "epoch": 0.33076828451539714, "grad_norm": 1.6041415929794312, "learning_rate": 1.6177584551530178e-05, "loss": 1.5147, "step": 6050 }, { "epoch": 0.3308229569591187, "grad_norm": 1.3435784578323364, "learning_rate": 1.6176147473261503e-05, "loss": 1.4454, "step": 6051 }, { "epoch": 0.3308776294028402, "grad_norm": 1.393373966217041, "learning_rate": 1.6174710188756773e-05, "loss": 1.3687, "step": 6052 }, { "epoch": 0.3309323018465618, "grad_norm": 1.5605255365371704, "learning_rate": 1.617327269806398e-05, "loss": 1.3569, "step": 6053 }, { "epoch": 0.33098697429028334, "grad_norm": 1.7284740209579468, "learning_rate": 1.617183500123112e-05, "loss": 1.3113, "step": 6054 }, { "epoch": 0.3310416467340049, "grad_norm": 1.5284603834152222, "learning_rate": 1.6170397098306212e-05, "loss": 1.5974, "step": 6055 }, { "epoch": 0.3310963191777265, "grad_norm": 1.3029547929763794, "learning_rate": 1.6168958989337266e-05, "loss": 1.556, "step": 6056 }, { "epoch": 0.331150991621448, "grad_norm": 1.340201735496521, "learning_rate": 1.61675206743723e-05, "loss": 1.6439, "step": 6057 }, { "epoch": 0.33120566406516955, "grad_norm": 1.3366438150405884, "learning_rate": 1.616608215345935e-05, "loss": 1.4159, "step": 6058 }, { "epoch": 0.3312603365088911, "grad_norm": 1.1430505514144897, "learning_rate": 1.6164643426646445e-05, "loss": 1.5324, "step": 6059 }, { "epoch": 0.3313150089526127, "grad_norm": 1.6050509214401245, "learning_rate": 1.616320449398163e-05, "loss": 1.2328, "step": 6060 }, { "epoch": 0.3313696813963342, "grad_norm": 1.3890494108200073, "learning_rate": 1.6161765355512958e-05, "loss": 1.2946, "step": 6061 }, { "epoch": 0.33142435384005575, "grad_norm": 1.609432578086853, "learning_rate": 1.616032601128848e-05, "loss": 1.5045, "step": 6062 }, { "epoch": 0.33147902628377734, "grad_norm": 1.4390687942504883, "learning_rate": 1.615888646135626e-05, "loss": 1.3623, "step": 6063 }, { "epoch": 0.3315336987274989, "grad_norm": 1.4125200510025024, "learning_rate": 1.6157446705764367e-05, "loss": 1.295, "step": 6064 }, { "epoch": 0.3315883711712204, "grad_norm": 1.8550539016723633, "learning_rate": 1.6156006744560882e-05, "loss": 1.3414, "step": 6065 }, { "epoch": 0.33164304361494196, "grad_norm": 1.329485535621643, "learning_rate": 1.6154566577793886e-05, "loss": 1.3902, "step": 6066 }, { "epoch": 0.33169771605866355, "grad_norm": 1.441409945487976, "learning_rate": 1.6153126205511468e-05, "loss": 1.6454, "step": 6067 }, { "epoch": 0.3317523885023851, "grad_norm": 1.8854106664657593, "learning_rate": 1.615168562776173e-05, "loss": 1.5917, "step": 6068 }, { "epoch": 0.3318070609461066, "grad_norm": 1.3521517515182495, "learning_rate": 1.6150244844592774e-05, "loss": 1.4672, "step": 6069 }, { "epoch": 0.3318617333898282, "grad_norm": 1.2120388746261597, "learning_rate": 1.6148803856052708e-05, "loss": 1.6466, "step": 6070 }, { "epoch": 0.33191640583354975, "grad_norm": 1.2654740810394287, "learning_rate": 1.6147362662189653e-05, "loss": 1.4793, "step": 6071 }, { "epoch": 0.3319710782772713, "grad_norm": 1.3605287075042725, "learning_rate": 1.6145921263051735e-05, "loss": 1.2843, "step": 6072 }, { "epoch": 0.33202575072099283, "grad_norm": 1.7438956499099731, "learning_rate": 1.614447965868708e-05, "loss": 1.4262, "step": 6073 }, { "epoch": 0.3320804231647144, "grad_norm": 2.283388376235962, "learning_rate": 1.6143037849143834e-05, "loss": 1.5859, "step": 6074 }, { "epoch": 0.33213509560843596, "grad_norm": 1.4857807159423828, "learning_rate": 1.6141595834470142e-05, "loss": 1.4014, "step": 6075 }, { "epoch": 0.3321897680521575, "grad_norm": 1.684153437614441, "learning_rate": 1.6140153614714148e-05, "loss": 1.5248, "step": 6076 }, { "epoch": 0.3322444404958791, "grad_norm": 1.4272931814193726, "learning_rate": 1.613871118992402e-05, "loss": 1.3131, "step": 6077 }, { "epoch": 0.3322991129396006, "grad_norm": 1.4616144895553589, "learning_rate": 1.613726856014792e-05, "loss": 1.2825, "step": 6078 }, { "epoch": 0.33235378538332216, "grad_norm": 1.6834461688995361, "learning_rate": 1.613582572543402e-05, "loss": 1.794, "step": 6079 }, { "epoch": 0.3324084578270437, "grad_norm": 1.7193541526794434, "learning_rate": 1.6134382685830502e-05, "loss": 1.2916, "step": 6080 }, { "epoch": 0.3324631302707653, "grad_norm": 1.4562586545944214, "learning_rate": 1.613293944138555e-05, "loss": 1.4971, "step": 6081 }, { "epoch": 0.33251780271448683, "grad_norm": 1.481183409690857, "learning_rate": 1.6131495992147363e-05, "loss": 1.6664, "step": 6082 }, { "epoch": 0.33257247515820837, "grad_norm": 1.135606288909912, "learning_rate": 1.6130052338164133e-05, "loss": 1.5056, "step": 6083 }, { "epoch": 0.33262714760192996, "grad_norm": 1.6535825729370117, "learning_rate": 1.612860847948407e-05, "loss": 1.6284, "step": 6084 }, { "epoch": 0.3326818200456515, "grad_norm": 1.5952954292297363, "learning_rate": 1.6127164416155387e-05, "loss": 1.2931, "step": 6085 }, { "epoch": 0.33273649248937304, "grad_norm": 1.4842443466186523, "learning_rate": 1.612572014822631e-05, "loss": 1.4292, "step": 6086 }, { "epoch": 0.3327911649330946, "grad_norm": 1.4833290576934814, "learning_rate": 1.6124275675745063e-05, "loss": 1.5766, "step": 6087 }, { "epoch": 0.33284583737681617, "grad_norm": 1.1972841024398804, "learning_rate": 1.612283099875988e-05, "loss": 1.6794, "step": 6088 }, { "epoch": 0.3329005098205377, "grad_norm": 1.4461100101470947, "learning_rate": 1.6121386117319e-05, "loss": 1.398, "step": 6089 }, { "epoch": 0.33295518226425924, "grad_norm": 1.183948040008545, "learning_rate": 1.6119941031470676e-05, "loss": 1.4773, "step": 6090 }, { "epoch": 0.33300985470798083, "grad_norm": 1.8549349308013916, "learning_rate": 1.611849574126316e-05, "loss": 1.2584, "step": 6091 }, { "epoch": 0.33306452715170237, "grad_norm": 1.4485846757888794, "learning_rate": 1.6117050246744708e-05, "loss": 1.3537, "step": 6092 }, { "epoch": 0.3331191995954239, "grad_norm": 1.5988950729370117, "learning_rate": 1.6115604547963597e-05, "loss": 1.4189, "step": 6093 }, { "epoch": 0.33317387203914545, "grad_norm": 1.4933688640594482, "learning_rate": 1.6114158644968102e-05, "loss": 1.3426, "step": 6094 }, { "epoch": 0.33322854448286704, "grad_norm": 1.5533668994903564, "learning_rate": 1.6112712537806502e-05, "loss": 1.4028, "step": 6095 }, { "epoch": 0.3332832169265886, "grad_norm": 1.3217633962631226, "learning_rate": 1.6111266226527086e-05, "loss": 1.4756, "step": 6096 }, { "epoch": 0.3333378893703101, "grad_norm": 1.7613136768341064, "learning_rate": 1.610981971117815e-05, "loss": 1.0868, "step": 6097 }, { "epoch": 0.3333925618140317, "grad_norm": 1.2689845561981201, "learning_rate": 1.6108372991807998e-05, "loss": 1.8022, "step": 6098 }, { "epoch": 0.33344723425775324, "grad_norm": 1.1368188858032227, "learning_rate": 1.6106926068464936e-05, "loss": 1.2782, "step": 6099 }, { "epoch": 0.3335019067014748, "grad_norm": 1.8267260789871216, "learning_rate": 1.610547894119728e-05, "loss": 1.3566, "step": 6100 }, { "epoch": 0.3335565791451963, "grad_norm": 1.5970219373703003, "learning_rate": 1.610403161005336e-05, "loss": 1.5483, "step": 6101 }, { "epoch": 0.3336112515889179, "grad_norm": 1.7394249439239502, "learning_rate": 1.61025840750815e-05, "loss": 1.3409, "step": 6102 }, { "epoch": 0.33366592403263945, "grad_norm": 1.6299498081207275, "learning_rate": 1.6101136336330037e-05, "loss": 1.5577, "step": 6103 }, { "epoch": 0.333720596476361, "grad_norm": 1.7336699962615967, "learning_rate": 1.6099688393847313e-05, "loss": 1.3892, "step": 6104 }, { "epoch": 0.3337752689200826, "grad_norm": 1.3294401168823242, "learning_rate": 1.6098240247681684e-05, "loss": 1.4687, "step": 6105 }, { "epoch": 0.3338299413638041, "grad_norm": 1.2118126153945923, "learning_rate": 1.60967918978815e-05, "loss": 1.3698, "step": 6106 }, { "epoch": 0.33388461380752565, "grad_norm": 1.01011323928833, "learning_rate": 1.609534334449513e-05, "loss": 1.637, "step": 6107 }, { "epoch": 0.3339392862512472, "grad_norm": 1.2092032432556152, "learning_rate": 1.6093894587570942e-05, "loss": 1.4987, "step": 6108 }, { "epoch": 0.3339939586949688, "grad_norm": 1.7037149667739868, "learning_rate": 1.6092445627157314e-05, "loss": 1.5346, "step": 6109 }, { "epoch": 0.3340486311386903, "grad_norm": 1.6542234420776367, "learning_rate": 1.609099646330263e-05, "loss": 1.0616, "step": 6110 }, { "epoch": 0.33410330358241186, "grad_norm": 1.5384461879730225, "learning_rate": 1.608954709605528e-05, "loss": 1.2787, "step": 6111 }, { "epoch": 0.33415797602613345, "grad_norm": 1.507163405418396, "learning_rate": 1.6088097525463663e-05, "loss": 1.5926, "step": 6112 }, { "epoch": 0.334212648469855, "grad_norm": 1.2214860916137695, "learning_rate": 1.6086647751576184e-05, "loss": 1.5184, "step": 6113 }, { "epoch": 0.3342673209135765, "grad_norm": 1.385817527770996, "learning_rate": 1.6085197774441253e-05, "loss": 1.6705, "step": 6114 }, { "epoch": 0.33432199335729806, "grad_norm": 1.5202990770339966, "learning_rate": 1.608374759410729e-05, "loss": 1.3804, "step": 6115 }, { "epoch": 0.33437666580101966, "grad_norm": 1.3520362377166748, "learning_rate": 1.608229721062272e-05, "loss": 1.398, "step": 6116 }, { "epoch": 0.3344313382447412, "grad_norm": 1.3772399425506592, "learning_rate": 1.6080846624035972e-05, "loss": 1.2944, "step": 6117 }, { "epoch": 0.33448601068846273, "grad_norm": 1.467930793762207, "learning_rate": 1.6079395834395487e-05, "loss": 1.4338, "step": 6118 }, { "epoch": 0.3345406831321843, "grad_norm": 1.562410831451416, "learning_rate": 1.6077944841749706e-05, "loss": 1.4842, "step": 6119 }, { "epoch": 0.33459535557590586, "grad_norm": 1.4382274150848389, "learning_rate": 1.6076493646147088e-05, "loss": 1.4636, "step": 6120 }, { "epoch": 0.3346500280196274, "grad_norm": 1.6101715564727783, "learning_rate": 1.607504224763609e-05, "loss": 1.4122, "step": 6121 }, { "epoch": 0.33470470046334894, "grad_norm": 1.743561863899231, "learning_rate": 1.607359064626517e-05, "loss": 1.373, "step": 6122 }, { "epoch": 0.33475937290707053, "grad_norm": 1.6830966472625732, "learning_rate": 1.607213884208281e-05, "loss": 1.3076, "step": 6123 }, { "epoch": 0.33481404535079207, "grad_norm": 1.568368911743164, "learning_rate": 1.6070686835137484e-05, "loss": 1.5617, "step": 6124 }, { "epoch": 0.3348687177945136, "grad_norm": 1.3189170360565186, "learning_rate": 1.606923462547768e-05, "loss": 1.5193, "step": 6125 }, { "epoch": 0.3349233902382352, "grad_norm": 1.502820372581482, "learning_rate": 1.606778221315189e-05, "loss": 1.3112, "step": 6126 }, { "epoch": 0.33497806268195673, "grad_norm": 1.6215529441833496, "learning_rate": 1.6066329598208615e-05, "loss": 1.1902, "step": 6127 }, { "epoch": 0.33503273512567827, "grad_norm": 1.8112953901290894, "learning_rate": 1.6064876780696356e-05, "loss": 1.42, "step": 6128 }, { "epoch": 0.3350874075693998, "grad_norm": 1.4008703231811523, "learning_rate": 1.6063423760663633e-05, "loss": 1.3156, "step": 6129 }, { "epoch": 0.3351420800131214, "grad_norm": 1.4391319751739502, "learning_rate": 1.606197053815896e-05, "loss": 1.7448, "step": 6130 }, { "epoch": 0.33519675245684294, "grad_norm": 1.338532567024231, "learning_rate": 1.6060517113230866e-05, "loss": 1.6433, "step": 6131 }, { "epoch": 0.3352514249005645, "grad_norm": 3.519597291946411, "learning_rate": 1.6059063485927886e-05, "loss": 1.2385, "step": 6132 }, { "epoch": 0.33530609734428607, "grad_norm": 1.5546132326126099, "learning_rate": 1.6057609656298558e-05, "loss": 1.3779, "step": 6133 }, { "epoch": 0.3353607697880076, "grad_norm": 1.5228668451309204, "learning_rate": 1.605615562439143e-05, "loss": 1.2452, "step": 6134 }, { "epoch": 0.33541544223172914, "grad_norm": 1.2465507984161377, "learning_rate": 1.605470139025505e-05, "loss": 1.3911, "step": 6135 }, { "epoch": 0.3354701146754507, "grad_norm": 1.572574496269226, "learning_rate": 1.6053246953937985e-05, "loss": 1.3986, "step": 6136 }, { "epoch": 0.3355247871191723, "grad_norm": 1.5730255842208862, "learning_rate": 1.6051792315488798e-05, "loss": 1.658, "step": 6137 }, { "epoch": 0.3355794595628938, "grad_norm": 1.3087974786758423, "learning_rate": 1.605033747495607e-05, "loss": 1.518, "step": 6138 }, { "epoch": 0.33563413200661535, "grad_norm": 1.285878300666809, "learning_rate": 1.604888243238837e-05, "loss": 1.2895, "step": 6139 }, { "epoch": 0.33568880445033694, "grad_norm": 1.3565808534622192, "learning_rate": 1.6047427187834295e-05, "loss": 1.5182, "step": 6140 }, { "epoch": 0.3357434768940585, "grad_norm": 1.4429919719696045, "learning_rate": 1.6045971741342435e-05, "loss": 1.6681, "step": 6141 }, { "epoch": 0.33579814933778, "grad_norm": 1.2675493955612183, "learning_rate": 1.604451609296139e-05, "loss": 1.6003, "step": 6142 }, { "epoch": 0.33585282178150155, "grad_norm": 1.3894562721252441, "learning_rate": 1.604306024273977e-05, "loss": 1.3149, "step": 6143 }, { "epoch": 0.33590749422522315, "grad_norm": 1.7132242918014526, "learning_rate": 1.604160419072619e-05, "loss": 1.1936, "step": 6144 }, { "epoch": 0.3359621666689447, "grad_norm": 1.4195473194122314, "learning_rate": 1.6040147936969263e-05, "loss": 1.3984, "step": 6145 }, { "epoch": 0.3360168391126662, "grad_norm": 1.3464876413345337, "learning_rate": 1.603869148151763e-05, "loss": 1.4982, "step": 6146 }, { "epoch": 0.3360715115563878, "grad_norm": 1.470285177230835, "learning_rate": 1.6037234824419915e-05, "loss": 1.6154, "step": 6147 }, { "epoch": 0.33612618400010935, "grad_norm": 1.3451099395751953, "learning_rate": 1.603577796572476e-05, "loss": 1.4169, "step": 6148 }, { "epoch": 0.3361808564438309, "grad_norm": 1.496902346611023, "learning_rate": 1.6034320905480817e-05, "loss": 1.6199, "step": 6149 }, { "epoch": 0.3362355288875524, "grad_norm": 1.5122003555297852, "learning_rate": 1.603286364373674e-05, "loss": 1.2964, "step": 6150 }, { "epoch": 0.336290201331274, "grad_norm": 1.675186276435852, "learning_rate": 1.603140618054119e-05, "loss": 1.4104, "step": 6151 }, { "epoch": 0.33634487377499556, "grad_norm": 1.6576011180877686, "learning_rate": 1.602994851594283e-05, "loss": 1.4866, "step": 6152 }, { "epoch": 0.3363995462187171, "grad_norm": 1.491165041923523, "learning_rate": 1.6028490649990346e-05, "loss": 1.4466, "step": 6153 }, { "epoch": 0.3364542186624387, "grad_norm": 1.392867922782898, "learning_rate": 1.6027032582732408e-05, "loss": 1.7159, "step": 6154 }, { "epoch": 0.3365088911061602, "grad_norm": 1.990692377090454, "learning_rate": 1.602557431421771e-05, "loss": 1.4173, "step": 6155 }, { "epoch": 0.33656356354988176, "grad_norm": 1.4093644618988037, "learning_rate": 1.6024115844494948e-05, "loss": 1.3019, "step": 6156 }, { "epoch": 0.3366182359936033, "grad_norm": 1.3769521713256836, "learning_rate": 1.602265717361282e-05, "loss": 1.343, "step": 6157 }, { "epoch": 0.3366729084373249, "grad_norm": 1.366668462753296, "learning_rate": 1.6021198301620036e-05, "loss": 1.6652, "step": 6158 }, { "epoch": 0.33672758088104643, "grad_norm": 1.4690529108047485, "learning_rate": 1.6019739228565314e-05, "loss": 1.4005, "step": 6159 }, { "epoch": 0.33678225332476797, "grad_norm": 1.7071805000305176, "learning_rate": 1.6018279954497374e-05, "loss": 1.4931, "step": 6160 }, { "epoch": 0.33683692576848956, "grad_norm": 1.1020493507385254, "learning_rate": 1.601682047946494e-05, "loss": 1.6378, "step": 6161 }, { "epoch": 0.3368915982122111, "grad_norm": 1.4069185256958008, "learning_rate": 1.6015360803516755e-05, "loss": 1.4868, "step": 6162 }, { "epoch": 0.33694627065593263, "grad_norm": 1.7445639371871948, "learning_rate": 1.6013900926701555e-05, "loss": 1.3697, "step": 6163 }, { "epoch": 0.33700094309965417, "grad_norm": 1.5473510026931763, "learning_rate": 1.6012440849068092e-05, "loss": 1.3832, "step": 6164 }, { "epoch": 0.33705561554337576, "grad_norm": 1.2338733673095703, "learning_rate": 1.601098057066512e-05, "loss": 1.4408, "step": 6165 }, { "epoch": 0.3371102879870973, "grad_norm": 1.4864544868469238, "learning_rate": 1.6009520091541403e-05, "loss": 1.2527, "step": 6166 }, { "epoch": 0.33716496043081884, "grad_norm": 1.1884114742279053, "learning_rate": 1.6008059411745705e-05, "loss": 1.258, "step": 6167 }, { "epoch": 0.33721963287454043, "grad_norm": 2.2702476978302, "learning_rate": 1.6006598531326808e-05, "loss": 1.3263, "step": 6168 }, { "epoch": 0.33727430531826197, "grad_norm": 1.5343153476715088, "learning_rate": 1.6005137450333487e-05, "loss": 1.6666, "step": 6169 }, { "epoch": 0.3373289777619835, "grad_norm": 1.0941400527954102, "learning_rate": 1.600367616881454e-05, "loss": 1.4847, "step": 6170 }, { "epoch": 0.33738365020570504, "grad_norm": 1.4288010597229004, "learning_rate": 1.6002214686818755e-05, "loss": 1.3318, "step": 6171 }, { "epoch": 0.33743832264942664, "grad_norm": 1.6582211256027222, "learning_rate": 1.6000753004394938e-05, "loss": 1.4215, "step": 6172 }, { "epoch": 0.3374929950931482, "grad_norm": 1.8099932670593262, "learning_rate": 1.5999291121591894e-05, "loss": 1.4033, "step": 6173 }, { "epoch": 0.3375476675368697, "grad_norm": 1.6265281438827515, "learning_rate": 1.5997829038458447e-05, "loss": 1.4951, "step": 6174 }, { "epoch": 0.3376023399805913, "grad_norm": 1.229717493057251, "learning_rate": 1.5996366755043413e-05, "loss": 1.5754, "step": 6175 }, { "epoch": 0.33765701242431284, "grad_norm": 1.4556852579116821, "learning_rate": 1.599490427139562e-05, "loss": 1.3995, "step": 6176 }, { "epoch": 0.3377116848680344, "grad_norm": 1.4653162956237793, "learning_rate": 1.5993441587563906e-05, "loss": 1.3522, "step": 6177 }, { "epoch": 0.3377663573117559, "grad_norm": 1.213779091835022, "learning_rate": 1.599197870359711e-05, "loss": 1.5684, "step": 6178 }, { "epoch": 0.3378210297554775, "grad_norm": 1.48911452293396, "learning_rate": 1.5990515619544092e-05, "loss": 1.5015, "step": 6179 }, { "epoch": 0.33787570219919905, "grad_norm": 1.128709077835083, "learning_rate": 1.5989052335453695e-05, "loss": 1.573, "step": 6180 }, { "epoch": 0.3379303746429206, "grad_norm": 1.7917487621307373, "learning_rate": 1.598758885137479e-05, "loss": 1.4345, "step": 6181 }, { "epoch": 0.3379850470866422, "grad_norm": 1.6802986860275269, "learning_rate": 1.598612516735624e-05, "loss": 1.4036, "step": 6182 }, { "epoch": 0.3380397195303637, "grad_norm": 1.9066331386566162, "learning_rate": 1.5984661283446924e-05, "loss": 1.3155, "step": 6183 }, { "epoch": 0.33809439197408525, "grad_norm": 1.3039093017578125, "learning_rate": 1.5983197199695727e-05, "loss": 1.5519, "step": 6184 }, { "epoch": 0.3381490644178068, "grad_norm": 1.6026942729949951, "learning_rate": 1.5981732916151534e-05, "loss": 1.528, "step": 6185 }, { "epoch": 0.3382037368615284, "grad_norm": 1.8271565437316895, "learning_rate": 1.598026843286324e-05, "loss": 1.465, "step": 6186 }, { "epoch": 0.3382584093052499, "grad_norm": 1.0877196788787842, "learning_rate": 1.5978803749879754e-05, "loss": 1.4144, "step": 6187 }, { "epoch": 0.33831308174897146, "grad_norm": 1.5045307874679565, "learning_rate": 1.5977338867249978e-05, "loss": 1.4125, "step": 6188 }, { "epoch": 0.33836775419269305, "grad_norm": 1.3515045642852783, "learning_rate": 1.5975873785022834e-05, "loss": 1.4368, "step": 6189 }, { "epoch": 0.3384224266364146, "grad_norm": 1.7119603157043457, "learning_rate": 1.5974408503247237e-05, "loss": 1.4411, "step": 6190 }, { "epoch": 0.3384770990801361, "grad_norm": 1.5358012914657593, "learning_rate": 1.5972943021972125e-05, "loss": 1.5351, "step": 6191 }, { "epoch": 0.33853177152385766, "grad_norm": 1.4990839958190918, "learning_rate": 1.5971477341246425e-05, "loss": 1.2068, "step": 6192 }, { "epoch": 0.33858644396757925, "grad_norm": 1.689782977104187, "learning_rate": 1.597001146111909e-05, "loss": 1.4469, "step": 6193 }, { "epoch": 0.3386411164113008, "grad_norm": 1.6158487796783447, "learning_rate": 1.596854538163906e-05, "loss": 1.6165, "step": 6194 }, { "epoch": 0.33869578885502233, "grad_norm": 1.4254902601242065, "learning_rate": 1.5967079102855293e-05, "loss": 1.3988, "step": 6195 }, { "epoch": 0.3387504612987439, "grad_norm": 1.6531708240509033, "learning_rate": 1.5965612624816755e-05, "loss": 1.3324, "step": 6196 }, { "epoch": 0.33880513374246546, "grad_norm": 2.279665946960449, "learning_rate": 1.5964145947572412e-05, "loss": 1.5552, "step": 6197 }, { "epoch": 0.338859806186187, "grad_norm": 1.447980284690857, "learning_rate": 1.596267907117124e-05, "loss": 1.2448, "step": 6198 }, { "epoch": 0.33891447862990853, "grad_norm": 1.3203734159469604, "learning_rate": 1.596121199566222e-05, "loss": 1.2422, "step": 6199 }, { "epoch": 0.3389691510736301, "grad_norm": 1.6694142818450928, "learning_rate": 1.5959744721094343e-05, "loss": 1.3627, "step": 6200 }, { "epoch": 0.33902382351735166, "grad_norm": 1.465228796005249, "learning_rate": 1.595827724751661e-05, "loss": 1.4303, "step": 6201 }, { "epoch": 0.3390784959610732, "grad_norm": 1.612260341644287, "learning_rate": 1.5956809574978014e-05, "loss": 1.4308, "step": 6202 }, { "epoch": 0.3391331684047948, "grad_norm": 1.4780973196029663, "learning_rate": 1.595534170352757e-05, "loss": 1.5402, "step": 6203 }, { "epoch": 0.33918784084851633, "grad_norm": 1.7897155284881592, "learning_rate": 1.595387363321429e-05, "loss": 1.3162, "step": 6204 }, { "epoch": 0.33924251329223787, "grad_norm": 1.2582626342773438, "learning_rate": 1.59524053640872e-05, "loss": 1.5224, "step": 6205 }, { "epoch": 0.3392971857359594, "grad_norm": 1.6207587718963623, "learning_rate": 1.5950936896195328e-05, "loss": 1.3927, "step": 6206 }, { "epoch": 0.339351858179681, "grad_norm": 1.2760292291641235, "learning_rate": 1.5949468229587704e-05, "loss": 1.5473, "step": 6207 }, { "epoch": 0.33940653062340254, "grad_norm": 1.781623363494873, "learning_rate": 1.5947999364313378e-05, "loss": 1.4137, "step": 6208 }, { "epoch": 0.3394612030671241, "grad_norm": 1.35957932472229, "learning_rate": 1.5946530300421396e-05, "loss": 1.3535, "step": 6209 }, { "epoch": 0.33951587551084567, "grad_norm": 1.3438193798065186, "learning_rate": 1.5945061037960812e-05, "loss": 1.3995, "step": 6210 }, { "epoch": 0.3395705479545672, "grad_norm": 1.553338885307312, "learning_rate": 1.594359157698069e-05, "loss": 1.4726, "step": 6211 }, { "epoch": 0.33962522039828874, "grad_norm": 1.9748361110687256, "learning_rate": 1.59421219175301e-05, "loss": 1.3699, "step": 6212 }, { "epoch": 0.3396798928420103, "grad_norm": 1.9491114616394043, "learning_rate": 1.5940652059658116e-05, "loss": 1.1281, "step": 6213 }, { "epoch": 0.33973456528573187, "grad_norm": 1.4362870454788208, "learning_rate": 1.5939182003413816e-05, "loss": 1.4113, "step": 6214 }, { "epoch": 0.3397892377294534, "grad_norm": 1.4210104942321777, "learning_rate": 1.5937711748846292e-05, "loss": 1.2647, "step": 6215 }, { "epoch": 0.33984391017317495, "grad_norm": 1.605415940284729, "learning_rate": 1.5936241296004646e-05, "loss": 1.4578, "step": 6216 }, { "epoch": 0.33989858261689654, "grad_norm": 1.1929662227630615, "learning_rate": 1.5934770644937967e-05, "loss": 1.4809, "step": 6217 }, { "epoch": 0.3399532550606181, "grad_norm": 1.4079327583312988, "learning_rate": 1.593329979569537e-05, "loss": 1.5004, "step": 6218 }, { "epoch": 0.3400079275043396, "grad_norm": 1.669852614402771, "learning_rate": 1.5931828748325974e-05, "loss": 1.7273, "step": 6219 }, { "epoch": 0.3400625999480612, "grad_norm": 1.4272769689559937, "learning_rate": 1.5930357502878892e-05, "loss": 1.4835, "step": 6220 }, { "epoch": 0.34011727239178274, "grad_norm": 1.5236772298812866, "learning_rate": 1.592888605940326e-05, "loss": 1.3041, "step": 6221 }, { "epoch": 0.3401719448355043, "grad_norm": 1.4822317361831665, "learning_rate": 1.5927414417948205e-05, "loss": 1.3897, "step": 6222 }, { "epoch": 0.3402266172792258, "grad_norm": 1.4622483253479004, "learning_rate": 1.592594257856288e-05, "loss": 1.6196, "step": 6223 }, { "epoch": 0.3402812897229474, "grad_norm": 1.3353890180587769, "learning_rate": 1.5924470541296423e-05, "loss": 1.3949, "step": 6224 }, { "epoch": 0.34033596216666895, "grad_norm": 1.5116338729858398, "learning_rate": 1.5922998306197993e-05, "loss": 1.3612, "step": 6225 }, { "epoch": 0.3403906346103905, "grad_norm": 1.4515563249588013, "learning_rate": 1.5921525873316754e-05, "loss": 1.3203, "step": 6226 }, { "epoch": 0.3404453070541121, "grad_norm": 1.4142162799835205, "learning_rate": 1.5920053242701867e-05, "loss": 1.4892, "step": 6227 }, { "epoch": 0.3404999794978336, "grad_norm": 1.7525932788848877, "learning_rate": 1.591858041440251e-05, "loss": 1.2356, "step": 6228 }, { "epoch": 0.34055465194155515, "grad_norm": 1.3563235998153687, "learning_rate": 1.5917107388467866e-05, "loss": 1.6062, "step": 6229 }, { "epoch": 0.3406093243852767, "grad_norm": 1.3450641632080078, "learning_rate": 1.591563416494712e-05, "loss": 1.4397, "step": 6230 }, { "epoch": 0.3406639968289983, "grad_norm": 1.5868972539901733, "learning_rate": 1.591416074388947e-05, "loss": 1.5065, "step": 6231 }, { "epoch": 0.3407186692727198, "grad_norm": 1.450095534324646, "learning_rate": 1.5912687125344114e-05, "loss": 1.341, "step": 6232 }, { "epoch": 0.34077334171644136, "grad_norm": 1.5877795219421387, "learning_rate": 1.591121330936026e-05, "loss": 1.5634, "step": 6233 }, { "epoch": 0.34082801416016295, "grad_norm": 2.187891960144043, "learning_rate": 1.5909739295987123e-05, "loss": 1.1621, "step": 6234 }, { "epoch": 0.3408826866038845, "grad_norm": 1.7775819301605225, "learning_rate": 1.5908265085273923e-05, "loss": 1.277, "step": 6235 }, { "epoch": 0.340937359047606, "grad_norm": 1.7384945154190063, "learning_rate": 1.5906790677269887e-05, "loss": 1.3903, "step": 6236 }, { "epoch": 0.34099203149132756, "grad_norm": 1.6378660202026367, "learning_rate": 1.590531607202425e-05, "loss": 1.5848, "step": 6237 }, { "epoch": 0.34104670393504916, "grad_norm": 1.30210542678833, "learning_rate": 1.5903841269586254e-05, "loss": 1.6041, "step": 6238 }, { "epoch": 0.3411013763787707, "grad_norm": 1.4987425804138184, "learning_rate": 1.590236627000514e-05, "loss": 1.8063, "step": 6239 }, { "epoch": 0.34115604882249223, "grad_norm": 1.4648723602294922, "learning_rate": 1.590089107333017e-05, "loss": 1.6032, "step": 6240 }, { "epoch": 0.3412107212662138, "grad_norm": 1.9589678049087524, "learning_rate": 1.5899415679610597e-05, "loss": 1.7503, "step": 6241 }, { "epoch": 0.34126539370993536, "grad_norm": 1.8459833860397339, "learning_rate": 1.5897940088895693e-05, "loss": 1.3958, "step": 6242 }, { "epoch": 0.3413200661536569, "grad_norm": 1.2228678464889526, "learning_rate": 1.589646430123473e-05, "loss": 1.2589, "step": 6243 }, { "epoch": 0.34137473859737844, "grad_norm": 1.621414065361023, "learning_rate": 1.5894988316676986e-05, "loss": 1.3519, "step": 6244 }, { "epoch": 0.34142941104110003, "grad_norm": 1.6018338203430176, "learning_rate": 1.589351213527175e-05, "loss": 1.4077, "step": 6245 }, { "epoch": 0.34148408348482157, "grad_norm": 2.030662775039673, "learning_rate": 1.5892035757068313e-05, "loss": 1.1358, "step": 6246 }, { "epoch": 0.3415387559285431, "grad_norm": 1.5036060810089111, "learning_rate": 1.5890559182115978e-05, "loss": 1.3303, "step": 6247 }, { "epoch": 0.3415934283722647, "grad_norm": 1.6786699295043945, "learning_rate": 1.5889082410464046e-05, "loss": 1.5303, "step": 6248 }, { "epoch": 0.34164810081598623, "grad_norm": 1.3996779918670654, "learning_rate": 1.5887605442161834e-05, "loss": 1.4779, "step": 6249 }, { "epoch": 0.34170277325970777, "grad_norm": 1.1685818433761597, "learning_rate": 1.5886128277258665e-05, "loss": 1.2962, "step": 6250 }, { "epoch": 0.3417574457034293, "grad_norm": 1.2884231805801392, "learning_rate": 1.5884650915803858e-05, "loss": 1.5061, "step": 6251 }, { "epoch": 0.3418121181471509, "grad_norm": 1.444249153137207, "learning_rate": 1.5883173357846745e-05, "loss": 1.5509, "step": 6252 }, { "epoch": 0.34186679059087244, "grad_norm": 1.716431736946106, "learning_rate": 1.5881695603436674e-05, "loss": 1.2815, "step": 6253 }, { "epoch": 0.341921463034594, "grad_norm": 1.3790043592453003, "learning_rate": 1.588021765262298e-05, "loss": 1.5755, "step": 6254 }, { "epoch": 0.34197613547831557, "grad_norm": 1.5074915885925293, "learning_rate": 1.5878739505455023e-05, "loss": 1.6048, "step": 6255 }, { "epoch": 0.3420308079220371, "grad_norm": 1.665313720703125, "learning_rate": 1.5877261161982157e-05, "loss": 1.4365, "step": 6256 }, { "epoch": 0.34208548036575864, "grad_norm": 1.3456476926803589, "learning_rate": 1.587578262225375e-05, "loss": 1.4029, "step": 6257 }, { "epoch": 0.3421401528094802, "grad_norm": 1.277795672416687, "learning_rate": 1.5874303886319175e-05, "loss": 1.6151, "step": 6258 }, { "epoch": 0.3421948252532018, "grad_norm": 2.29915452003479, "learning_rate": 1.5872824954227807e-05, "loss": 1.3476, "step": 6259 }, { "epoch": 0.3422494976969233, "grad_norm": 1.212266206741333, "learning_rate": 1.5871345826029032e-05, "loss": 1.7051, "step": 6260 }, { "epoch": 0.34230417014064485, "grad_norm": 1.4731849431991577, "learning_rate": 1.5869866501772247e-05, "loss": 1.28, "step": 6261 }, { "epoch": 0.34235884258436644, "grad_norm": 1.550316333770752, "learning_rate": 1.586838698150684e-05, "loss": 1.5259, "step": 6262 }, { "epoch": 0.342413515028088, "grad_norm": 1.3214735984802246, "learning_rate": 1.586690726528222e-05, "loss": 1.4319, "step": 6263 }, { "epoch": 0.3424681874718095, "grad_norm": 1.4012022018432617, "learning_rate": 1.5865427353147805e-05, "loss": 1.5204, "step": 6264 }, { "epoch": 0.34252285991553105, "grad_norm": 1.4753719568252563, "learning_rate": 1.5863947245153006e-05, "loss": 1.4827, "step": 6265 }, { "epoch": 0.34257753235925265, "grad_norm": 2.5215606689453125, "learning_rate": 1.5862466941347247e-05, "loss": 1.3865, "step": 6266 }, { "epoch": 0.3426322048029742, "grad_norm": 1.382779836654663, "learning_rate": 1.586098644177996e-05, "loss": 1.5032, "step": 6267 }, { "epoch": 0.3426868772466957, "grad_norm": 1.2390084266662598, "learning_rate": 1.5859505746500582e-05, "loss": 1.4343, "step": 6268 }, { "epoch": 0.3427415496904173, "grad_norm": 1.8583095073699951, "learning_rate": 1.585802485555856e-05, "loss": 1.4844, "step": 6269 }, { "epoch": 0.34279622213413885, "grad_norm": 1.2766762971878052, "learning_rate": 1.5856543769003338e-05, "loss": 1.5783, "step": 6270 }, { "epoch": 0.3428508945778604, "grad_norm": 1.6661607027053833, "learning_rate": 1.5855062486884377e-05, "loss": 1.5103, "step": 6271 }, { "epoch": 0.3429055670215819, "grad_norm": 1.5436694622039795, "learning_rate": 1.585358100925114e-05, "loss": 1.3809, "step": 6272 }, { "epoch": 0.3429602394653035, "grad_norm": 1.4535213708877563, "learning_rate": 1.58520993361531e-05, "loss": 1.5673, "step": 6273 }, { "epoch": 0.34301491190902506, "grad_norm": 1.3371148109436035, "learning_rate": 1.5850617467639728e-05, "loss": 1.5228, "step": 6274 }, { "epoch": 0.3430695843527466, "grad_norm": 1.506317377090454, "learning_rate": 1.5849135403760514e-05, "loss": 1.5417, "step": 6275 }, { "epoch": 0.3431242567964682, "grad_norm": 1.3964487314224243, "learning_rate": 1.5847653144564938e-05, "loss": 1.2744, "step": 6276 }, { "epoch": 0.3431789292401897, "grad_norm": 1.4608525037765503, "learning_rate": 1.5846170690102505e-05, "loss": 1.3401, "step": 6277 }, { "epoch": 0.34323360168391126, "grad_norm": 1.205901861190796, "learning_rate": 1.5844688040422714e-05, "loss": 1.6196, "step": 6278 }, { "epoch": 0.3432882741276328, "grad_norm": 1.2788922786712646, "learning_rate": 1.5843205195575074e-05, "loss": 1.387, "step": 6279 }, { "epoch": 0.3433429465713544, "grad_norm": 1.4816462993621826, "learning_rate": 1.58417221556091e-05, "loss": 1.3663, "step": 6280 }, { "epoch": 0.34339761901507593, "grad_norm": 1.6659505367279053, "learning_rate": 1.5840238920574315e-05, "loss": 1.4942, "step": 6281 }, { "epoch": 0.34345229145879747, "grad_norm": 2.0695199966430664, "learning_rate": 1.583875549052025e-05, "loss": 1.3652, "step": 6282 }, { "epoch": 0.34350696390251906, "grad_norm": 1.6396214962005615, "learning_rate": 1.5837271865496435e-05, "loss": 1.2976, "step": 6283 }, { "epoch": 0.3435616363462406, "grad_norm": 3.634568214416504, "learning_rate": 1.5835788045552418e-05, "loss": 1.1448, "step": 6284 }, { "epoch": 0.34361630878996213, "grad_norm": 1.2876381874084473, "learning_rate": 1.5834304030737744e-05, "loss": 1.4426, "step": 6285 }, { "epoch": 0.34367098123368367, "grad_norm": 1.76248037815094, "learning_rate": 1.583281982110197e-05, "loss": 1.4166, "step": 6286 }, { "epoch": 0.34372565367740526, "grad_norm": 1.209139108657837, "learning_rate": 1.5831335416694648e-05, "loss": 1.5472, "step": 6287 }, { "epoch": 0.3437803261211268, "grad_norm": 1.3445674180984497, "learning_rate": 1.5829850817565358e-05, "loss": 1.4111, "step": 6288 }, { "epoch": 0.34383499856484834, "grad_norm": 1.4923810958862305, "learning_rate": 1.5828366023763665e-05, "loss": 1.1121, "step": 6289 }, { "epoch": 0.34388967100856993, "grad_norm": 1.3173854351043701, "learning_rate": 1.5826881035339157e-05, "loss": 1.7026, "step": 6290 }, { "epoch": 0.34394434345229147, "grad_norm": 1.5204323530197144, "learning_rate": 1.582539585234142e-05, "loss": 1.5005, "step": 6291 }, { "epoch": 0.343999015896013, "grad_norm": 1.7303346395492554, "learning_rate": 1.582391047482004e-05, "loss": 1.4849, "step": 6292 }, { "epoch": 0.34405368833973454, "grad_norm": 1.2262557744979858, "learning_rate": 1.5822424902824627e-05, "loss": 1.2871, "step": 6293 }, { "epoch": 0.34410836078345614, "grad_norm": 1.6220866441726685, "learning_rate": 1.5820939136404783e-05, "loss": 1.391, "step": 6294 }, { "epoch": 0.3441630332271777, "grad_norm": 1.4925460815429688, "learning_rate": 1.581945317561012e-05, "loss": 1.5478, "step": 6295 }, { "epoch": 0.3442177056708992, "grad_norm": 1.1768012046813965, "learning_rate": 1.5817967020490262e-05, "loss": 1.7515, "step": 6296 }, { "epoch": 0.3442723781146208, "grad_norm": 1.6544407606124878, "learning_rate": 1.5816480671094835e-05, "loss": 1.2523, "step": 6297 }, { "epoch": 0.34432705055834234, "grad_norm": 1.4907017946243286, "learning_rate": 1.5814994127473465e-05, "loss": 1.3986, "step": 6298 }, { "epoch": 0.3443817230020639, "grad_norm": 1.5649970769882202, "learning_rate": 1.5813507389675796e-05, "loss": 1.346, "step": 6299 }, { "epoch": 0.3444363954457854, "grad_norm": 1.5214509963989258, "learning_rate": 1.581202045775148e-05, "loss": 1.422, "step": 6300 }, { "epoch": 0.344491067889507, "grad_norm": 1.4387425184249878, "learning_rate": 1.5810533331750155e-05, "loss": 1.3395, "step": 6301 }, { "epoch": 0.34454574033322855, "grad_norm": 1.3903071880340576, "learning_rate": 1.580904601172149e-05, "loss": 1.3526, "step": 6302 }, { "epoch": 0.3446004127769501, "grad_norm": 1.4052706956863403, "learning_rate": 1.580755849771515e-05, "loss": 1.1781, "step": 6303 }, { "epoch": 0.3446550852206717, "grad_norm": 1.8713743686676025, "learning_rate": 1.58060707897808e-05, "loss": 1.397, "step": 6304 }, { "epoch": 0.3447097576643932, "grad_norm": 1.5373852252960205, "learning_rate": 1.580458288796812e-05, "loss": 1.1038, "step": 6305 }, { "epoch": 0.34476443010811475, "grad_norm": 1.4202383756637573, "learning_rate": 1.58030947923268e-05, "loss": 1.3191, "step": 6306 }, { "epoch": 0.3448191025518363, "grad_norm": 1.45395827293396, "learning_rate": 1.580160650290653e-05, "loss": 1.2285, "step": 6307 }, { "epoch": 0.3448737749955579, "grad_norm": 1.5552400350570679, "learning_rate": 1.5800118019757e-05, "loss": 1.5263, "step": 6308 }, { "epoch": 0.3449284474392794, "grad_norm": 1.4760279655456543, "learning_rate": 1.5798629342927923e-05, "loss": 1.3952, "step": 6309 }, { "epoch": 0.34498311988300095, "grad_norm": 1.380524754524231, "learning_rate": 1.5797140472469002e-05, "loss": 1.3646, "step": 6310 }, { "epoch": 0.34503779232672255, "grad_norm": 1.83951735496521, "learning_rate": 1.579565140842996e-05, "loss": 1.3624, "step": 6311 }, { "epoch": 0.3450924647704441, "grad_norm": 1.9347249269485474, "learning_rate": 1.5794162150860513e-05, "loss": 1.5175, "step": 6312 }, { "epoch": 0.3451471372141656, "grad_norm": 1.457533359527588, "learning_rate": 1.57926726998104e-05, "loss": 1.3265, "step": 6313 }, { "epoch": 0.34520180965788716, "grad_norm": 2.731018304824829, "learning_rate": 1.5791183055329353e-05, "loss": 1.396, "step": 6314 }, { "epoch": 0.34525648210160875, "grad_norm": 1.643674612045288, "learning_rate": 1.578969321746711e-05, "loss": 1.3656, "step": 6315 }, { "epoch": 0.3453111545453303, "grad_norm": 1.538230061531067, "learning_rate": 1.578820318627343e-05, "loss": 1.1432, "step": 6316 }, { "epoch": 0.3453658269890518, "grad_norm": 1.1987316608428955, "learning_rate": 1.578671296179806e-05, "loss": 1.3395, "step": 6317 }, { "epoch": 0.3454204994327734, "grad_norm": 1.36481773853302, "learning_rate": 1.5785222544090766e-05, "loss": 1.2999, "step": 6318 }, { "epoch": 0.34547517187649496, "grad_norm": 1.5656273365020752, "learning_rate": 1.5783731933201315e-05, "loss": 1.4189, "step": 6319 }, { "epoch": 0.3455298443202165, "grad_norm": 1.519411563873291, "learning_rate": 1.5782241129179482e-05, "loss": 1.5438, "step": 6320 }, { "epoch": 0.34558451676393803, "grad_norm": 1.610081672668457, "learning_rate": 1.5780750132075052e-05, "loss": 1.4168, "step": 6321 }, { "epoch": 0.3456391892076596, "grad_norm": 1.5275764465332031, "learning_rate": 1.5779258941937803e-05, "loss": 1.3076, "step": 6322 }, { "epoch": 0.34569386165138116, "grad_norm": 1.3825511932373047, "learning_rate": 1.5777767558817545e-05, "loss": 1.5438, "step": 6323 }, { "epoch": 0.3457485340951027, "grad_norm": 1.4767423868179321, "learning_rate": 1.577627598276407e-05, "loss": 1.4836, "step": 6324 }, { "epoch": 0.3458032065388243, "grad_norm": 1.332975149154663, "learning_rate": 1.577478421382718e-05, "loss": 1.4252, "step": 6325 }, { "epoch": 0.34585787898254583, "grad_norm": 1.2565010786056519, "learning_rate": 1.5773292252056695e-05, "loss": 1.6034, "step": 6326 }, { "epoch": 0.34591255142626737, "grad_norm": 1.989363431930542, "learning_rate": 1.5771800097502437e-05, "loss": 1.2883, "step": 6327 }, { "epoch": 0.3459672238699889, "grad_norm": 1.4637960195541382, "learning_rate": 1.5770307750214228e-05, "loss": 1.4034, "step": 6328 }, { "epoch": 0.3460218963137105, "grad_norm": 1.5321431159973145, "learning_rate": 1.5768815210241907e-05, "loss": 1.4904, "step": 6329 }, { "epoch": 0.34607656875743203, "grad_norm": 1.5304688215255737, "learning_rate": 1.5767322477635303e-05, "loss": 1.5587, "step": 6330 }, { "epoch": 0.34613124120115357, "grad_norm": 1.5729016065597534, "learning_rate": 1.5765829552444273e-05, "loss": 1.6123, "step": 6331 }, { "epoch": 0.34618591364487517, "grad_norm": 1.4724169969558716, "learning_rate": 1.576433643471866e-05, "loss": 1.4381, "step": 6332 }, { "epoch": 0.3462405860885967, "grad_norm": 1.3520046472549438, "learning_rate": 1.5762843124508333e-05, "loss": 1.2757, "step": 6333 }, { "epoch": 0.34629525853231824, "grad_norm": 2.003373146057129, "learning_rate": 1.5761349621863145e-05, "loss": 1.2265, "step": 6334 }, { "epoch": 0.3463499309760398, "grad_norm": 1.5290822982788086, "learning_rate": 1.5759855926832973e-05, "loss": 1.4185, "step": 6335 }, { "epoch": 0.34640460341976137, "grad_norm": 1.5558617115020752, "learning_rate": 1.57583620394677e-05, "loss": 1.195, "step": 6336 }, { "epoch": 0.3464592758634829, "grad_norm": 1.5339233875274658, "learning_rate": 1.5756867959817205e-05, "loss": 1.2636, "step": 6337 }, { "epoch": 0.34651394830720444, "grad_norm": 1.2911275625228882, "learning_rate": 1.5755373687931382e-05, "loss": 1.4165, "step": 6338 }, { "epoch": 0.34656862075092604, "grad_norm": 1.9818965196609497, "learning_rate": 1.5753879223860123e-05, "loss": 1.5226, "step": 6339 }, { "epoch": 0.3466232931946476, "grad_norm": 1.3933531045913696, "learning_rate": 1.5752384567653334e-05, "loss": 1.5375, "step": 6340 }, { "epoch": 0.3466779656383691, "grad_norm": 1.6141270399093628, "learning_rate": 1.5750889719360927e-05, "loss": 1.289, "step": 6341 }, { "epoch": 0.34673263808209065, "grad_norm": 1.3701519966125488, "learning_rate": 1.5749394679032818e-05, "loss": 1.5374, "step": 6342 }, { "epoch": 0.34678731052581224, "grad_norm": 1.6211625337600708, "learning_rate": 1.574789944671893e-05, "loss": 1.2946, "step": 6343 }, { "epoch": 0.3468419829695338, "grad_norm": 1.713455319404602, "learning_rate": 1.5746404022469192e-05, "loss": 1.6516, "step": 6344 }, { "epoch": 0.3468966554132553, "grad_norm": 1.6576050519943237, "learning_rate": 1.5744908406333537e-05, "loss": 1.3904, "step": 6345 }, { "epoch": 0.3469513278569769, "grad_norm": 1.351009726524353, "learning_rate": 1.574341259836191e-05, "loss": 1.4549, "step": 6346 }, { "epoch": 0.34700600030069845, "grad_norm": 1.76277494430542, "learning_rate": 1.574191659860426e-05, "loss": 1.4666, "step": 6347 }, { "epoch": 0.34706067274442, "grad_norm": 1.6526124477386475, "learning_rate": 1.574042040711054e-05, "loss": 1.3655, "step": 6348 }, { "epoch": 0.3471153451881415, "grad_norm": 1.4211188554763794, "learning_rate": 1.5738924023930712e-05, "loss": 1.3588, "step": 6349 }, { "epoch": 0.3471700176318631, "grad_norm": 1.433441400527954, "learning_rate": 1.5737427449114744e-05, "loss": 1.4522, "step": 6350 }, { "epoch": 0.34722469007558465, "grad_norm": 2.5208230018615723, "learning_rate": 1.5735930682712613e-05, "loss": 1.3765, "step": 6351 }, { "epoch": 0.3472793625193062, "grad_norm": 1.4471262693405151, "learning_rate": 1.5734433724774295e-05, "loss": 1.6169, "step": 6352 }, { "epoch": 0.3473340349630278, "grad_norm": 1.8017559051513672, "learning_rate": 1.5732936575349777e-05, "loss": 1.0756, "step": 6353 }, { "epoch": 0.3473887074067493, "grad_norm": 1.2934709787368774, "learning_rate": 1.5731439234489054e-05, "loss": 1.5624, "step": 6354 }, { "epoch": 0.34744337985047086, "grad_norm": 1.367171049118042, "learning_rate": 1.572994170224213e-05, "loss": 1.6367, "step": 6355 }, { "epoch": 0.3474980522941924, "grad_norm": 1.516540765762329, "learning_rate": 1.5728443978659002e-05, "loss": 1.215, "step": 6356 }, { "epoch": 0.347552724737914, "grad_norm": 1.5751399993896484, "learning_rate": 1.572694606378969e-05, "loss": 1.0823, "step": 6357 }, { "epoch": 0.3476073971816355, "grad_norm": 1.6406116485595703, "learning_rate": 1.572544795768421e-05, "loss": 1.5659, "step": 6358 }, { "epoch": 0.34766206962535706, "grad_norm": 1.2985543012619019, "learning_rate": 1.572394966039259e-05, "loss": 1.4893, "step": 6359 }, { "epoch": 0.34771674206907865, "grad_norm": 1.7614519596099854, "learning_rate": 1.5722451171964853e-05, "loss": 1.4527, "step": 6360 }, { "epoch": 0.3477714145128002, "grad_norm": 1.3033692836761475, "learning_rate": 1.5720952492451047e-05, "loss": 1.4604, "step": 6361 }, { "epoch": 0.34782608695652173, "grad_norm": 1.5740519762039185, "learning_rate": 1.571945362190121e-05, "loss": 1.3498, "step": 6362 }, { "epoch": 0.34788075940024327, "grad_norm": 1.6066190004348755, "learning_rate": 1.5717954560365402e-05, "loss": 1.3746, "step": 6363 }, { "epoch": 0.34793543184396486, "grad_norm": 1.911557912826538, "learning_rate": 1.5716455307893665e-05, "loss": 1.2223, "step": 6364 }, { "epoch": 0.3479901042876864, "grad_norm": 1.6137750148773193, "learning_rate": 1.5714955864536078e-05, "loss": 1.3054, "step": 6365 }, { "epoch": 0.34804477673140793, "grad_norm": 1.4575884342193604, "learning_rate": 1.57134562303427e-05, "loss": 1.5398, "step": 6366 }, { "epoch": 0.3480994491751295, "grad_norm": 1.5015619993209839, "learning_rate": 1.5711956405363613e-05, "loss": 1.6854, "step": 6367 }, { "epoch": 0.34815412161885106, "grad_norm": 1.4753996133804321, "learning_rate": 1.57104563896489e-05, "loss": 1.4756, "step": 6368 }, { "epoch": 0.3482087940625726, "grad_norm": 1.308268427848816, "learning_rate": 1.5708956183248644e-05, "loss": 1.4496, "step": 6369 }, { "epoch": 0.34826346650629414, "grad_norm": 1.5610688924789429, "learning_rate": 1.5707455786212948e-05, "loss": 1.4213, "step": 6370 }, { "epoch": 0.34831813895001573, "grad_norm": 1.4455305337905884, "learning_rate": 1.5705955198591908e-05, "loss": 1.3818, "step": 6371 }, { "epoch": 0.34837281139373727, "grad_norm": 1.328055739402771, "learning_rate": 1.5704454420435635e-05, "loss": 1.6604, "step": 6372 }, { "epoch": 0.3484274838374588, "grad_norm": 1.4083259105682373, "learning_rate": 1.5702953451794245e-05, "loss": 1.6331, "step": 6373 }, { "epoch": 0.3484821562811804, "grad_norm": 1.1829718351364136, "learning_rate": 1.5701452292717853e-05, "loss": 1.6689, "step": 6374 }, { "epoch": 0.34853682872490194, "grad_norm": 1.4163538217544556, "learning_rate": 1.5699950943256593e-05, "loss": 1.3944, "step": 6375 }, { "epoch": 0.3485915011686235, "grad_norm": 1.7774624824523926, "learning_rate": 1.5698449403460593e-05, "loss": 1.2683, "step": 6376 }, { "epoch": 0.348646173612345, "grad_norm": 1.4517725706100464, "learning_rate": 1.569694767338e-05, "loss": 1.2893, "step": 6377 }, { "epoch": 0.3487008460560666, "grad_norm": 1.4475854635238647, "learning_rate": 1.5695445753064954e-05, "loss": 1.6603, "step": 6378 }, { "epoch": 0.34875551849978814, "grad_norm": 2.1658239364624023, "learning_rate": 1.5693943642565604e-05, "loss": 1.3234, "step": 6379 }, { "epoch": 0.3488101909435097, "grad_norm": 1.8073698282241821, "learning_rate": 1.569244134193212e-05, "loss": 1.3256, "step": 6380 }, { "epoch": 0.34886486338723127, "grad_norm": 1.4728398323059082, "learning_rate": 1.5690938851214664e-05, "loss": 1.2493, "step": 6381 }, { "epoch": 0.3489195358309528, "grad_norm": 1.7231179475784302, "learning_rate": 1.5689436170463403e-05, "loss": 1.4776, "step": 6382 }, { "epoch": 0.34897420827467435, "grad_norm": 1.228975534439087, "learning_rate": 1.5687933299728517e-05, "loss": 1.8815, "step": 6383 }, { "epoch": 0.3490288807183959, "grad_norm": 1.4199893474578857, "learning_rate": 1.5686430239060194e-05, "loss": 1.4168, "step": 6384 }, { "epoch": 0.3490835531621175, "grad_norm": 1.0744986534118652, "learning_rate": 1.568492698850862e-05, "loss": 1.6084, "step": 6385 }, { "epoch": 0.349138225605839, "grad_norm": 1.2480461597442627, "learning_rate": 1.568342354812399e-05, "loss": 1.3903, "step": 6386 }, { "epoch": 0.34919289804956055, "grad_norm": 1.674863338470459, "learning_rate": 1.5681919917956515e-05, "loss": 1.1892, "step": 6387 }, { "epoch": 0.34924757049328214, "grad_norm": 1.630265235900879, "learning_rate": 1.56804160980564e-05, "loss": 1.4607, "step": 6388 }, { "epoch": 0.3493022429370037, "grad_norm": 1.3741967678070068, "learning_rate": 1.567891208847386e-05, "loss": 1.4615, "step": 6389 }, { "epoch": 0.3493569153807252, "grad_norm": 1.6969387531280518, "learning_rate": 1.567740788925912e-05, "loss": 1.2744, "step": 6390 }, { "epoch": 0.34941158782444676, "grad_norm": 1.7724483013153076, "learning_rate": 1.5675903500462412e-05, "loss": 1.327, "step": 6391 }, { "epoch": 0.34946626026816835, "grad_norm": 1.661858081817627, "learning_rate": 1.5674398922133963e-05, "loss": 1.434, "step": 6392 }, { "epoch": 0.3495209327118899, "grad_norm": 1.0397000312805176, "learning_rate": 1.567289415432402e-05, "loss": 1.4469, "step": 6393 }, { "epoch": 0.3495756051556114, "grad_norm": 1.631797194480896, "learning_rate": 1.5671389197082832e-05, "loss": 1.3495, "step": 6394 }, { "epoch": 0.349630277599333, "grad_norm": 1.3782159090042114, "learning_rate": 1.5669884050460646e-05, "loss": 1.3793, "step": 6395 }, { "epoch": 0.34968495004305455, "grad_norm": 1.4192779064178467, "learning_rate": 1.566837871450773e-05, "loss": 1.4296, "step": 6396 }, { "epoch": 0.3497396224867761, "grad_norm": 1.3923382759094238, "learning_rate": 1.5666873189274344e-05, "loss": 1.7266, "step": 6397 }, { "epoch": 0.34979429493049763, "grad_norm": 1.7417240142822266, "learning_rate": 1.566536747481077e-05, "loss": 1.3547, "step": 6398 }, { "epoch": 0.3498489673742192, "grad_norm": 1.4198724031448364, "learning_rate": 1.5663861571167277e-05, "loss": 1.2991, "step": 6399 }, { "epoch": 0.34990363981794076, "grad_norm": 1.5244778394699097, "learning_rate": 1.5662355478394157e-05, "loss": 1.5552, "step": 6400 }, { "epoch": 0.3499583122616623, "grad_norm": 1.5778170824050903, "learning_rate": 1.56608491965417e-05, "loss": 1.4435, "step": 6401 }, { "epoch": 0.3500129847053839, "grad_norm": 1.5716984272003174, "learning_rate": 1.5659342725660206e-05, "loss": 1.6044, "step": 6402 }, { "epoch": 0.3500676571491054, "grad_norm": 2.127135992050171, "learning_rate": 1.5657836065799975e-05, "loss": 1.5012, "step": 6403 }, { "epoch": 0.35012232959282696, "grad_norm": 1.7413941621780396, "learning_rate": 1.5656329217011322e-05, "loss": 1.242, "step": 6404 }, { "epoch": 0.3501770020365485, "grad_norm": 1.4770509004592896, "learning_rate": 1.5654822179344568e-05, "loss": 1.5472, "step": 6405 }, { "epoch": 0.3502316744802701, "grad_norm": 1.359008550643921, "learning_rate": 1.565331495285003e-05, "loss": 1.377, "step": 6406 }, { "epoch": 0.35028634692399163, "grad_norm": 1.3648250102996826, "learning_rate": 1.565180753757804e-05, "loss": 1.4567, "step": 6407 }, { "epoch": 0.35034101936771317, "grad_norm": 1.3736085891723633, "learning_rate": 1.565029993357893e-05, "loss": 1.3774, "step": 6408 }, { "epoch": 0.35039569181143476, "grad_norm": 1.4956672191619873, "learning_rate": 1.564879214090305e-05, "loss": 1.2536, "step": 6409 }, { "epoch": 0.3504503642551563, "grad_norm": 1.574499487876892, "learning_rate": 1.5647284159600744e-05, "loss": 1.3132, "step": 6410 }, { "epoch": 0.35050503669887784, "grad_norm": 1.8839976787567139, "learning_rate": 1.5645775989722366e-05, "loss": 1.2454, "step": 6411 }, { "epoch": 0.3505597091425994, "grad_norm": 1.4687734842300415, "learning_rate": 1.5644267631318286e-05, "loss": 1.9276, "step": 6412 }, { "epoch": 0.35061438158632097, "grad_norm": 1.3273913860321045, "learning_rate": 1.5642759084438858e-05, "loss": 1.317, "step": 6413 }, { "epoch": 0.3506690540300425, "grad_norm": 1.53519606590271, "learning_rate": 1.5641250349134467e-05, "loss": 1.4668, "step": 6414 }, { "epoch": 0.35072372647376404, "grad_norm": 1.2792484760284424, "learning_rate": 1.563974142545549e-05, "loss": 1.4479, "step": 6415 }, { "epoch": 0.35077839891748563, "grad_norm": 1.9131017923355103, "learning_rate": 1.563823231345231e-05, "loss": 1.1505, "step": 6416 }, { "epoch": 0.35083307136120717, "grad_norm": 1.5261937379837036, "learning_rate": 1.5636723013175325e-05, "loss": 1.5539, "step": 6417 }, { "epoch": 0.3508877438049287, "grad_norm": 1.2158191204071045, "learning_rate": 1.563521352467493e-05, "loss": 1.4197, "step": 6418 }, { "epoch": 0.35094241624865025, "grad_norm": 1.6005339622497559, "learning_rate": 1.563370384800153e-05, "loss": 1.1939, "step": 6419 }, { "epoch": 0.35099708869237184, "grad_norm": 1.4541654586791992, "learning_rate": 1.5632193983205542e-05, "loss": 1.5158, "step": 6420 }, { "epoch": 0.3510517611360934, "grad_norm": 1.222203016281128, "learning_rate": 1.563068393033738e-05, "loss": 1.4465, "step": 6421 }, { "epoch": 0.3511064335798149, "grad_norm": 1.7328814268112183, "learning_rate": 1.5629173689447467e-05, "loss": 1.4797, "step": 6422 }, { "epoch": 0.3511611060235365, "grad_norm": 1.9484165906906128, "learning_rate": 1.5627663260586235e-05, "loss": 1.4527, "step": 6423 }, { "epoch": 0.35121577846725804, "grad_norm": 1.356245517730713, "learning_rate": 1.5626152643804124e-05, "loss": 1.6543, "step": 6424 }, { "epoch": 0.3512704509109796, "grad_norm": 1.260462760925293, "learning_rate": 1.562464183915157e-05, "loss": 1.2303, "step": 6425 }, { "epoch": 0.3513251233547012, "grad_norm": 1.5867146253585815, "learning_rate": 1.562313084667903e-05, "loss": 1.3001, "step": 6426 }, { "epoch": 0.3513797957984227, "grad_norm": 1.6777565479278564, "learning_rate": 1.5621619666436956e-05, "loss": 1.4452, "step": 6427 }, { "epoch": 0.35143446824214425, "grad_norm": 1.8381221294403076, "learning_rate": 1.5620108298475808e-05, "loss": 1.3269, "step": 6428 }, { "epoch": 0.3514891406858658, "grad_norm": 1.4256258010864258, "learning_rate": 1.5618596742846057e-05, "loss": 1.7326, "step": 6429 }, { "epoch": 0.3515438131295874, "grad_norm": 1.2335315942764282, "learning_rate": 1.5617084999598177e-05, "loss": 1.4413, "step": 6430 }, { "epoch": 0.3515984855733089, "grad_norm": 1.5560916662216187, "learning_rate": 1.5615573068782643e-05, "loss": 1.3524, "step": 6431 }, { "epoch": 0.35165315801703045, "grad_norm": 1.4423609972000122, "learning_rate": 1.5614060950449948e-05, "loss": 1.6674, "step": 6432 }, { "epoch": 0.35170783046075205, "grad_norm": 1.1854180097579956, "learning_rate": 1.561254864465059e-05, "loss": 1.5839, "step": 6433 }, { "epoch": 0.3517625029044736, "grad_norm": 1.6827514171600342, "learning_rate": 1.561103615143506e-05, "loss": 1.2388, "step": 6434 }, { "epoch": 0.3518171753481951, "grad_norm": 1.3778259754180908, "learning_rate": 1.5609523470853862e-05, "loss": 1.4563, "step": 6435 }, { "epoch": 0.35187184779191666, "grad_norm": 1.6342684030532837, "learning_rate": 1.5608010602957518e-05, "loss": 1.5907, "step": 6436 }, { "epoch": 0.35192652023563825, "grad_norm": 1.6193559169769287, "learning_rate": 1.5606497547796538e-05, "loss": 1.3449, "step": 6437 }, { "epoch": 0.3519811926793598, "grad_norm": 1.9555729627609253, "learning_rate": 1.5604984305421446e-05, "loss": 1.6066, "step": 6438 }, { "epoch": 0.3520358651230813, "grad_norm": 1.5854564905166626, "learning_rate": 1.560347087588278e-05, "loss": 1.2095, "step": 6439 }, { "epoch": 0.3520905375668029, "grad_norm": 1.3317337036132812, "learning_rate": 1.5601957259231072e-05, "loss": 1.6576, "step": 6440 }, { "epoch": 0.35214521001052446, "grad_norm": 1.1178562641143799, "learning_rate": 1.560044345551686e-05, "loss": 1.4567, "step": 6441 }, { "epoch": 0.352199882454246, "grad_norm": 1.5902491807937622, "learning_rate": 1.5598929464790705e-05, "loss": 1.1679, "step": 6442 }, { "epoch": 0.35225455489796753, "grad_norm": 1.3588860034942627, "learning_rate": 1.5597415287103158e-05, "loss": 1.2917, "step": 6443 }, { "epoch": 0.3523092273416891, "grad_norm": 1.5472707748413086, "learning_rate": 1.5595900922504776e-05, "loss": 1.284, "step": 6444 }, { "epoch": 0.35236389978541066, "grad_norm": 1.638206124305725, "learning_rate": 1.559438637104613e-05, "loss": 1.2777, "step": 6445 }, { "epoch": 0.3524185722291322, "grad_norm": 1.406789779663086, "learning_rate": 1.5592871632777798e-05, "loss": 1.3369, "step": 6446 }, { "epoch": 0.3524732446728538, "grad_norm": 1.4393737316131592, "learning_rate": 1.559135670775036e-05, "loss": 1.3533, "step": 6447 }, { "epoch": 0.35252791711657533, "grad_norm": 1.4623103141784668, "learning_rate": 1.5589841596014398e-05, "loss": 1.4176, "step": 6448 }, { "epoch": 0.35258258956029687, "grad_norm": 1.3907006978988647, "learning_rate": 1.558832629762051e-05, "loss": 1.5242, "step": 6449 }, { "epoch": 0.3526372620040184, "grad_norm": 1.3615416288375854, "learning_rate": 1.558681081261929e-05, "loss": 1.5383, "step": 6450 }, { "epoch": 0.35269193444774, "grad_norm": 1.6483019590377808, "learning_rate": 1.558529514106135e-05, "loss": 1.4471, "step": 6451 }, { "epoch": 0.35274660689146153, "grad_norm": 1.425347924232483, "learning_rate": 1.5583779282997296e-05, "loss": 1.3506, "step": 6452 }, { "epoch": 0.35280127933518307, "grad_norm": 1.3645702600479126, "learning_rate": 1.5582263238477753e-05, "loss": 1.2996, "step": 6453 }, { "epoch": 0.35285595177890466, "grad_norm": 1.3599863052368164, "learning_rate": 1.5580747007553342e-05, "loss": 1.4054, "step": 6454 }, { "epoch": 0.3529106242226262, "grad_norm": 2.8170793056488037, "learning_rate": 1.557923059027469e-05, "loss": 1.3236, "step": 6455 }, { "epoch": 0.35296529666634774, "grad_norm": 1.4387520551681519, "learning_rate": 1.5577713986692435e-05, "loss": 1.4178, "step": 6456 }, { "epoch": 0.3530199691100693, "grad_norm": 1.8194286823272705, "learning_rate": 1.5576197196857227e-05, "loss": 1.3625, "step": 6457 }, { "epoch": 0.35307464155379087, "grad_norm": 1.706595540046692, "learning_rate": 1.55746802208197e-05, "loss": 1.2446, "step": 6458 }, { "epoch": 0.3531293139975124, "grad_norm": 1.0755506753921509, "learning_rate": 1.557316305863053e-05, "loss": 1.6037, "step": 6459 }, { "epoch": 0.35318398644123394, "grad_norm": 1.6116961240768433, "learning_rate": 1.557164571034036e-05, "loss": 1.6312, "step": 6460 }, { "epoch": 0.35323865888495554, "grad_norm": 1.3724883794784546, "learning_rate": 1.557012817599987e-05, "loss": 1.3852, "step": 6461 }, { "epoch": 0.3532933313286771, "grad_norm": 1.4190990924835205, "learning_rate": 1.5568610455659727e-05, "loss": 1.4258, "step": 6462 }, { "epoch": 0.3533480037723986, "grad_norm": 1.631698489189148, "learning_rate": 1.5567092549370615e-05, "loss": 1.7256, "step": 6463 }, { "epoch": 0.35340267621612015, "grad_norm": 1.4870550632476807, "learning_rate": 1.5565574457183215e-05, "loss": 1.4605, "step": 6464 }, { "epoch": 0.35345734865984174, "grad_norm": 1.4877930879592896, "learning_rate": 1.556405617914823e-05, "loss": 1.2928, "step": 6465 }, { "epoch": 0.3535120211035633, "grad_norm": 1.6302673816680908, "learning_rate": 1.556253771531635e-05, "loss": 1.5487, "step": 6466 }, { "epoch": 0.3535666935472848, "grad_norm": 1.5554624795913696, "learning_rate": 1.5561019065738282e-05, "loss": 1.4307, "step": 6467 }, { "epoch": 0.3536213659910064, "grad_norm": 1.137425184249878, "learning_rate": 1.5559500230464738e-05, "loss": 1.5589, "step": 6468 }, { "epoch": 0.35367603843472795, "grad_norm": 1.7125099897384644, "learning_rate": 1.555798120954644e-05, "loss": 1.3735, "step": 6469 }, { "epoch": 0.3537307108784495, "grad_norm": 1.8000473976135254, "learning_rate": 1.5556462003034104e-05, "loss": 1.3399, "step": 6470 }, { "epoch": 0.353785383322171, "grad_norm": 1.3917361497879028, "learning_rate": 1.5554942610978462e-05, "loss": 1.5125, "step": 6471 }, { "epoch": 0.3538400557658926, "grad_norm": 1.3157744407653809, "learning_rate": 1.555342303343025e-05, "loss": 1.413, "step": 6472 }, { "epoch": 0.35389472820961415, "grad_norm": 1.433708906173706, "learning_rate": 1.5551903270440213e-05, "loss": 1.3703, "step": 6473 }, { "epoch": 0.3539494006533357, "grad_norm": 1.2440009117126465, "learning_rate": 1.55503833220591e-05, "loss": 1.5148, "step": 6474 }, { "epoch": 0.3540040730970573, "grad_norm": 1.5954639911651611, "learning_rate": 1.554886318833766e-05, "loss": 1.6357, "step": 6475 }, { "epoch": 0.3540587455407788, "grad_norm": 1.2750771045684814, "learning_rate": 1.554734286932666e-05, "loss": 1.5628, "step": 6476 }, { "epoch": 0.35411341798450036, "grad_norm": 1.5202432870864868, "learning_rate": 1.5545822365076865e-05, "loss": 1.269, "step": 6477 }, { "epoch": 0.3541680904282219, "grad_norm": 1.6524529457092285, "learning_rate": 1.5544301675639045e-05, "loss": 1.3054, "step": 6478 }, { "epoch": 0.3542227628719435, "grad_norm": 2.0779106616973877, "learning_rate": 1.5542780801063983e-05, "loss": 1.2795, "step": 6479 }, { "epoch": 0.354277435315665, "grad_norm": 1.470114827156067, "learning_rate": 1.554125974140246e-05, "loss": 1.4153, "step": 6480 }, { "epoch": 0.35433210775938656, "grad_norm": 1.737776517868042, "learning_rate": 1.5539738496705277e-05, "loss": 1.4404, "step": 6481 }, { "epoch": 0.35438678020310815, "grad_norm": 1.3146601915359497, "learning_rate": 1.5538217067023223e-05, "loss": 1.3969, "step": 6482 }, { "epoch": 0.3544414526468297, "grad_norm": 1.572918176651001, "learning_rate": 1.5536695452407107e-05, "loss": 1.4199, "step": 6483 }, { "epoch": 0.35449612509055123, "grad_norm": 1.7730449438095093, "learning_rate": 1.5535173652907737e-05, "loss": 1.3435, "step": 6484 }, { "epoch": 0.35455079753427277, "grad_norm": 1.1929229497909546, "learning_rate": 1.553365166857593e-05, "loss": 1.6028, "step": 6485 }, { "epoch": 0.35460546997799436, "grad_norm": 1.2796207666397095, "learning_rate": 1.5532129499462507e-05, "loss": 1.2518, "step": 6486 }, { "epoch": 0.3546601424217159, "grad_norm": 1.5896893739700317, "learning_rate": 1.55306071456183e-05, "loss": 1.3973, "step": 6487 }, { "epoch": 0.35471481486543743, "grad_norm": 1.3802986145019531, "learning_rate": 1.5529084607094144e-05, "loss": 1.4908, "step": 6488 }, { "epoch": 0.354769487309159, "grad_norm": 1.916144609451294, "learning_rate": 1.5527561883940877e-05, "loss": 1.5856, "step": 6489 }, { "epoch": 0.35482415975288056, "grad_norm": 1.9153400659561157, "learning_rate": 1.5526038976209345e-05, "loss": 1.3579, "step": 6490 }, { "epoch": 0.3548788321966021, "grad_norm": 1.7124837636947632, "learning_rate": 1.5524515883950405e-05, "loss": 1.5423, "step": 6491 }, { "epoch": 0.35493350464032364, "grad_norm": 1.3712594509124756, "learning_rate": 1.5522992607214923e-05, "loss": 1.6309, "step": 6492 }, { "epoch": 0.35498817708404523, "grad_norm": 1.392159104347229, "learning_rate": 1.552146914605375e-05, "loss": 1.5878, "step": 6493 }, { "epoch": 0.35504284952776677, "grad_norm": 1.652449369430542, "learning_rate": 1.551994550051777e-05, "loss": 1.1542, "step": 6494 }, { "epoch": 0.3550975219714883, "grad_norm": 1.5777612924575806, "learning_rate": 1.5518421670657856e-05, "loss": 1.4867, "step": 6495 }, { "epoch": 0.3551521944152099, "grad_norm": 1.3933219909667969, "learning_rate": 1.5516897656524892e-05, "loss": 1.3605, "step": 6496 }, { "epoch": 0.35520686685893144, "grad_norm": 1.7962894439697266, "learning_rate": 1.5515373458169767e-05, "loss": 1.286, "step": 6497 }, { "epoch": 0.355261539302653, "grad_norm": 1.519457459449768, "learning_rate": 1.5513849075643384e-05, "loss": 1.4344, "step": 6498 }, { "epoch": 0.3553162117463745, "grad_norm": 1.5599069595336914, "learning_rate": 1.5512324508996643e-05, "loss": 1.3237, "step": 6499 }, { "epoch": 0.3553708841900961, "grad_norm": 1.3686326742172241, "learning_rate": 1.5510799758280447e-05, "loss": 1.366, "step": 6500 }, { "epoch": 0.35542555663381764, "grad_norm": 1.733872652053833, "learning_rate": 1.5509274823545716e-05, "loss": 1.4268, "step": 6501 }, { "epoch": 0.3554802290775392, "grad_norm": 1.6977437734603882, "learning_rate": 1.550774970484337e-05, "loss": 1.3179, "step": 6502 }, { "epoch": 0.35553490152126077, "grad_norm": 1.9184715747833252, "learning_rate": 1.5506224402224342e-05, "loss": 1.4199, "step": 6503 }, { "epoch": 0.3555895739649823, "grad_norm": 1.920063853263855, "learning_rate": 1.550469891573956e-05, "loss": 1.53, "step": 6504 }, { "epoch": 0.35564424640870385, "grad_norm": 1.492619514465332, "learning_rate": 1.550317324543996e-05, "loss": 1.3226, "step": 6505 }, { "epoch": 0.3556989188524254, "grad_norm": 1.4799567461013794, "learning_rate": 1.5501647391376492e-05, "loss": 1.5575, "step": 6506 }, { "epoch": 0.355753591296147, "grad_norm": 1.4590458869934082, "learning_rate": 1.550012135360011e-05, "loss": 1.4868, "step": 6507 }, { "epoch": 0.3558082637398685, "grad_norm": 1.6961169242858887, "learning_rate": 1.549859513216177e-05, "loss": 1.2618, "step": 6508 }, { "epoch": 0.35586293618359005, "grad_norm": 1.4589353799819946, "learning_rate": 1.5497068727112435e-05, "loss": 1.6251, "step": 6509 }, { "epoch": 0.35591760862731164, "grad_norm": 1.5811817646026611, "learning_rate": 1.5495542138503073e-05, "loss": 1.3384, "step": 6510 }, { "epoch": 0.3559722810710332, "grad_norm": 1.4228668212890625, "learning_rate": 1.5494015366384662e-05, "loss": 1.4233, "step": 6511 }, { "epoch": 0.3560269535147547, "grad_norm": 1.5727609395980835, "learning_rate": 1.5492488410808193e-05, "loss": 1.3642, "step": 6512 }, { "epoch": 0.35608162595847626, "grad_norm": 1.475812554359436, "learning_rate": 1.5490961271824644e-05, "loss": 1.3769, "step": 6513 }, { "epoch": 0.35613629840219785, "grad_norm": 1.6070561408996582, "learning_rate": 1.5489433949485012e-05, "loss": 1.1138, "step": 6514 }, { "epoch": 0.3561909708459194, "grad_norm": 1.3531986474990845, "learning_rate": 1.54879064438403e-05, "loss": 1.3078, "step": 6515 }, { "epoch": 0.3562456432896409, "grad_norm": 1.3917295932769775, "learning_rate": 1.5486378754941514e-05, "loss": 1.4714, "step": 6516 }, { "epoch": 0.3563003157333625, "grad_norm": 1.4998779296875, "learning_rate": 1.5484850882839667e-05, "loss": 1.6167, "step": 6517 }, { "epoch": 0.35635498817708405, "grad_norm": 1.2954069375991821, "learning_rate": 1.5483322827585777e-05, "loss": 1.3808, "step": 6518 }, { "epoch": 0.3564096606208056, "grad_norm": 1.6045812368392944, "learning_rate": 1.5481794589230875e-05, "loss": 1.4643, "step": 6519 }, { "epoch": 0.35646433306452713, "grad_norm": 1.2908437252044678, "learning_rate": 1.5480266167825987e-05, "loss": 1.4584, "step": 6520 }, { "epoch": 0.3565190055082487, "grad_norm": 1.5972154140472412, "learning_rate": 1.5478737563422148e-05, "loss": 1.1812, "step": 6521 }, { "epoch": 0.35657367795197026, "grad_norm": 1.3068310022354126, "learning_rate": 1.547720877607041e-05, "loss": 1.6188, "step": 6522 }, { "epoch": 0.3566283503956918, "grad_norm": 1.594078779220581, "learning_rate": 1.5475679805821814e-05, "loss": 1.4434, "step": 6523 }, { "epoch": 0.3566830228394134, "grad_norm": 1.2897872924804688, "learning_rate": 1.5474150652727423e-05, "loss": 1.5151, "step": 6524 }, { "epoch": 0.3567376952831349, "grad_norm": 1.5157222747802734, "learning_rate": 1.5472621316838297e-05, "loss": 1.4477, "step": 6525 }, { "epoch": 0.35679236772685646, "grad_norm": 1.8999577760696411, "learning_rate": 1.54710917982055e-05, "loss": 1.4098, "step": 6526 }, { "epoch": 0.356847040170578, "grad_norm": 1.2267179489135742, "learning_rate": 1.5469562096880113e-05, "loss": 1.3754, "step": 6527 }, { "epoch": 0.3569017126142996, "grad_norm": 1.2620455026626587, "learning_rate": 1.546803221291321e-05, "loss": 1.5247, "step": 6528 }, { "epoch": 0.35695638505802113, "grad_norm": 1.4096002578735352, "learning_rate": 1.5466502146355883e-05, "loss": 1.275, "step": 6529 }, { "epoch": 0.35701105750174267, "grad_norm": 1.309022068977356, "learning_rate": 1.546497189725922e-05, "loss": 1.509, "step": 6530 }, { "epoch": 0.35706572994546426, "grad_norm": 1.3444303274154663, "learning_rate": 1.546344146567432e-05, "loss": 1.4262, "step": 6531 }, { "epoch": 0.3571204023891858, "grad_norm": 1.2597806453704834, "learning_rate": 1.546191085165229e-05, "loss": 1.6466, "step": 6532 }, { "epoch": 0.35717507483290734, "grad_norm": 1.4899011850357056, "learning_rate": 1.546038005524424e-05, "loss": 1.1948, "step": 6533 }, { "epoch": 0.3572297472766289, "grad_norm": 1.78203547000885, "learning_rate": 1.545884907650129e-05, "loss": 1.569, "step": 6534 }, { "epoch": 0.35728441972035047, "grad_norm": 1.739242434501648, "learning_rate": 1.5457317915474556e-05, "loss": 1.4169, "step": 6535 }, { "epoch": 0.357339092164072, "grad_norm": 1.5251846313476562, "learning_rate": 1.545578657221517e-05, "loss": 1.3208, "step": 6536 }, { "epoch": 0.35739376460779354, "grad_norm": 1.4919918775558472, "learning_rate": 1.5454255046774273e-05, "loss": 1.2559, "step": 6537 }, { "epoch": 0.35744843705151513, "grad_norm": 1.6736043691635132, "learning_rate": 1.5452723339203e-05, "loss": 1.5316, "step": 6538 }, { "epoch": 0.35750310949523667, "grad_norm": 1.277778148651123, "learning_rate": 1.54511914495525e-05, "loss": 1.5534, "step": 6539 }, { "epoch": 0.3575577819389582, "grad_norm": 1.3349652290344238, "learning_rate": 1.544965937787392e-05, "loss": 1.4721, "step": 6540 }, { "epoch": 0.35761245438267975, "grad_norm": 1.8296988010406494, "learning_rate": 1.544812712421843e-05, "loss": 1.3601, "step": 6541 }, { "epoch": 0.35766712682640134, "grad_norm": 1.8864117860794067, "learning_rate": 1.544659468863719e-05, "loss": 1.5696, "step": 6542 }, { "epoch": 0.3577217992701229, "grad_norm": 1.9180954694747925, "learning_rate": 1.544506207118137e-05, "loss": 1.144, "step": 6543 }, { "epoch": 0.3577764717138444, "grad_norm": 2.012328624725342, "learning_rate": 1.5443529271902155e-05, "loss": 1.1547, "step": 6544 }, { "epoch": 0.357831144157566, "grad_norm": 1.629960060119629, "learning_rate": 1.544199629085072e-05, "loss": 1.232, "step": 6545 }, { "epoch": 0.35788581660128754, "grad_norm": 1.6143404245376587, "learning_rate": 1.5440463128078264e-05, "loss": 1.6245, "step": 6546 }, { "epoch": 0.3579404890450091, "grad_norm": 1.4226435422897339, "learning_rate": 1.5438929783635968e-05, "loss": 1.4345, "step": 6547 }, { "epoch": 0.3579951614887306, "grad_norm": 1.362935185432434, "learning_rate": 1.5437396257575053e-05, "loss": 1.315, "step": 6548 }, { "epoch": 0.3580498339324522, "grad_norm": 1.410164713859558, "learning_rate": 1.5435862549946714e-05, "loss": 1.4533, "step": 6549 }, { "epoch": 0.35810450637617375, "grad_norm": 1.2900569438934326, "learning_rate": 1.543432866080217e-05, "loss": 1.3148, "step": 6550 }, { "epoch": 0.3581591788198953, "grad_norm": 1.5240013599395752, "learning_rate": 1.543279459019264e-05, "loss": 1.4444, "step": 6551 }, { "epoch": 0.3582138512636169, "grad_norm": 1.280018925666809, "learning_rate": 1.5431260338169345e-05, "loss": 1.38, "step": 6552 }, { "epoch": 0.3582685237073384, "grad_norm": 1.714881181716919, "learning_rate": 1.542972590478353e-05, "loss": 1.5527, "step": 6553 }, { "epoch": 0.35832319615105995, "grad_norm": 1.3051685094833374, "learning_rate": 1.5428191290086424e-05, "loss": 1.6065, "step": 6554 }, { "epoch": 0.3583778685947815, "grad_norm": 1.987695574760437, "learning_rate": 1.542665649412927e-05, "loss": 1.5092, "step": 6555 }, { "epoch": 0.3584325410385031, "grad_norm": 1.5528522729873657, "learning_rate": 1.5425121516963323e-05, "loss": 1.5124, "step": 6556 }, { "epoch": 0.3584872134822246, "grad_norm": 1.4281929731369019, "learning_rate": 1.542358635863984e-05, "loss": 1.4482, "step": 6557 }, { "epoch": 0.35854188592594616, "grad_norm": 1.8543598651885986, "learning_rate": 1.5422051019210082e-05, "loss": 1.2772, "step": 6558 }, { "epoch": 0.35859655836966775, "grad_norm": 1.647856593132019, "learning_rate": 1.5420515498725315e-05, "loss": 1.1662, "step": 6559 }, { "epoch": 0.3586512308133893, "grad_norm": 1.264974594116211, "learning_rate": 1.5418979797236814e-05, "loss": 1.3753, "step": 6560 }, { "epoch": 0.3587059032571108, "grad_norm": 2.269669532775879, "learning_rate": 1.5417443914795864e-05, "loss": 1.3255, "step": 6561 }, { "epoch": 0.35876057570083236, "grad_norm": 1.647790551185608, "learning_rate": 1.541590785145375e-05, "loss": 1.3906, "step": 6562 }, { "epoch": 0.35881524814455396, "grad_norm": 1.3909708261489868, "learning_rate": 1.5414371607261762e-05, "loss": 1.4562, "step": 6563 }, { "epoch": 0.3588699205882755, "grad_norm": 1.318238615989685, "learning_rate": 1.5412835182271202e-05, "loss": 1.1741, "step": 6564 }, { "epoch": 0.35892459303199703, "grad_norm": 1.7771153450012207, "learning_rate": 1.5411298576533376e-05, "loss": 1.4561, "step": 6565 }, { "epoch": 0.3589792654757186, "grad_norm": 2.1006476879119873, "learning_rate": 1.5409761790099586e-05, "loss": 1.598, "step": 6566 }, { "epoch": 0.35903393791944016, "grad_norm": 1.154797077178955, "learning_rate": 1.540822482302116e-05, "loss": 1.6159, "step": 6567 }, { "epoch": 0.3590886103631617, "grad_norm": 1.0882116556167603, "learning_rate": 1.5406687675349415e-05, "loss": 1.6694, "step": 6568 }, { "epoch": 0.35914328280688324, "grad_norm": 1.5565555095672607, "learning_rate": 1.5405150347135684e-05, "loss": 1.1567, "step": 6569 }, { "epoch": 0.35919795525060483, "grad_norm": 1.6022003889083862, "learning_rate": 1.54036128384313e-05, "loss": 1.4463, "step": 6570 }, { "epoch": 0.35925262769432637, "grad_norm": 1.3179625272750854, "learning_rate": 1.54020751492876e-05, "loss": 1.6449, "step": 6571 }, { "epoch": 0.3593073001380479, "grad_norm": 1.7110410928726196, "learning_rate": 1.5400537279755935e-05, "loss": 1.4493, "step": 6572 }, { "epoch": 0.3593619725817695, "grad_norm": 1.3058178424835205, "learning_rate": 1.5398999229887656e-05, "loss": 1.4861, "step": 6573 }, { "epoch": 0.35941664502549103, "grad_norm": 1.5514723062515259, "learning_rate": 1.5397460999734126e-05, "loss": 1.5038, "step": 6574 }, { "epoch": 0.35947131746921257, "grad_norm": 1.3543354272842407, "learning_rate": 1.5395922589346707e-05, "loss": 1.4662, "step": 6575 }, { "epoch": 0.3595259899129341, "grad_norm": 1.4045054912567139, "learning_rate": 1.5394383998776768e-05, "loss": 1.4788, "step": 6576 }, { "epoch": 0.3595806623566557, "grad_norm": 1.2996704578399658, "learning_rate": 1.5392845228075692e-05, "loss": 1.469, "step": 6577 }, { "epoch": 0.35963533480037724, "grad_norm": 1.2494450807571411, "learning_rate": 1.5391306277294862e-05, "loss": 1.582, "step": 6578 }, { "epoch": 0.3596900072440988, "grad_norm": 1.5114672183990479, "learning_rate": 1.538976714648566e-05, "loss": 1.2962, "step": 6579 }, { "epoch": 0.35974467968782037, "grad_norm": 1.4701083898544312, "learning_rate": 1.5388227835699487e-05, "loss": 1.8203, "step": 6580 }, { "epoch": 0.3597993521315419, "grad_norm": 1.4771798849105835, "learning_rate": 1.538668834498774e-05, "loss": 1.5807, "step": 6581 }, { "epoch": 0.35985402457526344, "grad_norm": 1.9567084312438965, "learning_rate": 1.5385148674401833e-05, "loss": 1.3658, "step": 6582 }, { "epoch": 0.359908697018985, "grad_norm": 1.2961443662643433, "learning_rate": 1.5383608823993175e-05, "loss": 1.541, "step": 6583 }, { "epoch": 0.3599633694627066, "grad_norm": 2.022843599319458, "learning_rate": 1.538206879381318e-05, "loss": 1.3418, "step": 6584 }, { "epoch": 0.3600180419064281, "grad_norm": 1.2401394844055176, "learning_rate": 1.5380528583913285e-05, "loss": 1.5917, "step": 6585 }, { "epoch": 0.36007271435014965, "grad_norm": 1.3038301467895508, "learning_rate": 1.5378988194344913e-05, "loss": 1.4302, "step": 6586 }, { "epoch": 0.36012738679387124, "grad_norm": 1.4209976196289062, "learning_rate": 1.5377447625159502e-05, "loss": 1.1961, "step": 6587 }, { "epoch": 0.3601820592375928, "grad_norm": 1.2517611980438232, "learning_rate": 1.5375906876408496e-05, "loss": 1.6242, "step": 6588 }, { "epoch": 0.3602367316813143, "grad_norm": 1.0517656803131104, "learning_rate": 1.5374365948143345e-05, "loss": 1.5755, "step": 6589 }, { "epoch": 0.36029140412503585, "grad_norm": 1.418839454650879, "learning_rate": 1.5372824840415504e-05, "loss": 1.56, "step": 6590 }, { "epoch": 0.36034607656875745, "grad_norm": 1.3680216073989868, "learning_rate": 1.5371283553276432e-05, "loss": 1.4893, "step": 6591 }, { "epoch": 0.360400749012479, "grad_norm": 1.558039903640747, "learning_rate": 1.5369742086777598e-05, "loss": 1.3788, "step": 6592 }, { "epoch": 0.3604554214562005, "grad_norm": 1.2220168113708496, "learning_rate": 1.5368200440970478e-05, "loss": 1.6514, "step": 6593 }, { "epoch": 0.3605100938999221, "grad_norm": 1.7053064107894897, "learning_rate": 1.5366658615906548e-05, "loss": 1.326, "step": 6594 }, { "epoch": 0.36056476634364365, "grad_norm": 1.2985423803329468, "learning_rate": 1.536511661163729e-05, "loss": 1.5334, "step": 6595 }, { "epoch": 0.3606194387873652, "grad_norm": 1.353350043296814, "learning_rate": 1.53635744282142e-05, "loss": 1.3794, "step": 6596 }, { "epoch": 0.3606741112310867, "grad_norm": 1.24734628200531, "learning_rate": 1.5362032065688778e-05, "loss": 1.3849, "step": 6597 }, { "epoch": 0.3607287836748083, "grad_norm": 1.6225498914718628, "learning_rate": 1.536048952411252e-05, "loss": 1.6473, "step": 6598 }, { "epoch": 0.36078345611852985, "grad_norm": 1.5546972751617432, "learning_rate": 1.5358946803536937e-05, "loss": 1.4355, "step": 6599 }, { "epoch": 0.3608381285622514, "grad_norm": 1.694828987121582, "learning_rate": 1.5357403904013546e-05, "loss": 1.5433, "step": 6600 }, { "epoch": 0.360892801005973, "grad_norm": 1.729853630065918, "learning_rate": 1.5355860825593866e-05, "loss": 1.3726, "step": 6601 }, { "epoch": 0.3609474734496945, "grad_norm": 1.4010326862335205, "learning_rate": 1.5354317568329427e-05, "loss": 1.3791, "step": 6602 }, { "epoch": 0.36100214589341606, "grad_norm": 1.7727813720703125, "learning_rate": 1.5352774132271756e-05, "loss": 1.2098, "step": 6603 }, { "epoch": 0.3610568183371376, "grad_norm": 1.4417363405227661, "learning_rate": 1.5351230517472402e-05, "loss": 1.2717, "step": 6604 }, { "epoch": 0.3611114907808592, "grad_norm": 1.5716700553894043, "learning_rate": 1.53496867239829e-05, "loss": 1.4481, "step": 6605 }, { "epoch": 0.3611661632245807, "grad_norm": 1.5399540662765503, "learning_rate": 1.5348142751854807e-05, "loss": 1.3517, "step": 6606 }, { "epoch": 0.36122083566830226, "grad_norm": 1.6653976440429688, "learning_rate": 1.5346598601139677e-05, "loss": 1.52, "step": 6607 }, { "epoch": 0.36127550811202386, "grad_norm": 1.4526234865188599, "learning_rate": 1.5345054271889073e-05, "loss": 1.5766, "step": 6608 }, { "epoch": 0.3613301805557454, "grad_norm": 1.4982781410217285, "learning_rate": 1.5343509764154566e-05, "loss": 1.2703, "step": 6609 }, { "epoch": 0.36138485299946693, "grad_norm": 1.516716480255127, "learning_rate": 1.5341965077987727e-05, "loss": 1.4719, "step": 6610 }, { "epoch": 0.36143952544318847, "grad_norm": 1.479128360748291, "learning_rate": 1.534042021344014e-05, "loss": 1.3828, "step": 6611 }, { "epoch": 0.36149419788691006, "grad_norm": 1.3864126205444336, "learning_rate": 1.533887517056339e-05, "loss": 1.5286, "step": 6612 }, { "epoch": 0.3615488703306316, "grad_norm": 1.3243941068649292, "learning_rate": 1.533732994940907e-05, "loss": 1.3607, "step": 6613 }, { "epoch": 0.36160354277435314, "grad_norm": 1.4997636079788208, "learning_rate": 1.533578455002878e-05, "loss": 1.3827, "step": 6614 }, { "epoch": 0.36165821521807473, "grad_norm": 1.606566071510315, "learning_rate": 1.533423897247412e-05, "loss": 1.2661, "step": 6615 }, { "epoch": 0.36171288766179627, "grad_norm": 1.5196681022644043, "learning_rate": 1.5332693216796704e-05, "loss": 1.5441, "step": 6616 }, { "epoch": 0.3617675601055178, "grad_norm": 1.4498381614685059, "learning_rate": 1.533114728304815e-05, "loss": 1.3666, "step": 6617 }, { "epoch": 0.36182223254923934, "grad_norm": 1.4361788034439087, "learning_rate": 1.5329601171280076e-05, "loss": 1.1943, "step": 6618 }, { "epoch": 0.36187690499296093, "grad_norm": 1.97951078414917, "learning_rate": 1.5328054881544113e-05, "loss": 1.2368, "step": 6619 }, { "epoch": 0.3619315774366825, "grad_norm": 1.5347918272018433, "learning_rate": 1.5326508413891894e-05, "loss": 1.5401, "step": 6620 }, { "epoch": 0.361986249880404, "grad_norm": 1.401241660118103, "learning_rate": 1.5324961768375065e-05, "loss": 1.3808, "step": 6621 }, { "epoch": 0.3620409223241256, "grad_norm": 1.568260669708252, "learning_rate": 1.5323414945045262e-05, "loss": 1.237, "step": 6622 }, { "epoch": 0.36209559476784714, "grad_norm": 1.46729576587677, "learning_rate": 1.5321867943954143e-05, "loss": 1.4277, "step": 6623 }, { "epoch": 0.3621502672115687, "grad_norm": 1.3442617654800415, "learning_rate": 1.5320320765153367e-05, "loss": 1.3221, "step": 6624 }, { "epoch": 0.36220493965529027, "grad_norm": 1.5099623203277588, "learning_rate": 1.5318773408694596e-05, "loss": 1.6163, "step": 6625 }, { "epoch": 0.3622596120990118, "grad_norm": 1.390013575553894, "learning_rate": 1.53172258746295e-05, "loss": 1.2801, "step": 6626 }, { "epoch": 0.36231428454273334, "grad_norm": 1.8295915126800537, "learning_rate": 1.5315678163009752e-05, "loss": 1.6175, "step": 6627 }, { "epoch": 0.3623689569864549, "grad_norm": 1.6334264278411865, "learning_rate": 1.531413027388704e-05, "loss": 1.5959, "step": 6628 }, { "epoch": 0.3624236294301765, "grad_norm": 1.4660919904708862, "learning_rate": 1.5312582207313046e-05, "loss": 1.3736, "step": 6629 }, { "epoch": 0.362478301873898, "grad_norm": 1.3574172258377075, "learning_rate": 1.5311033963339465e-05, "loss": 1.5107, "step": 6630 }, { "epoch": 0.36253297431761955, "grad_norm": 1.1539443731307983, "learning_rate": 1.5309485542018e-05, "loss": 1.4049, "step": 6631 }, { "epoch": 0.36258764676134114, "grad_norm": 1.6838351488113403, "learning_rate": 1.5307936943400355e-05, "loss": 1.3773, "step": 6632 }, { "epoch": 0.3626423192050627, "grad_norm": 1.467801570892334, "learning_rate": 1.5306388167538235e-05, "loss": 1.341, "step": 6633 }, { "epoch": 0.3626969916487842, "grad_norm": 1.3781085014343262, "learning_rate": 1.530483921448336e-05, "loss": 1.4505, "step": 6634 }, { "epoch": 0.36275166409250575, "grad_norm": 1.3877649307250977, "learning_rate": 1.5303290084287465e-05, "loss": 1.3433, "step": 6635 }, { "epoch": 0.36280633653622735, "grad_norm": 1.9235341548919678, "learning_rate": 1.5301740777002265e-05, "loss": 1.2716, "step": 6636 }, { "epoch": 0.3628610089799489, "grad_norm": 1.2318772077560425, "learning_rate": 1.5300191292679497e-05, "loss": 1.6457, "step": 6637 }, { "epoch": 0.3629156814236704, "grad_norm": 1.557208776473999, "learning_rate": 1.5298641631370907e-05, "loss": 1.6572, "step": 6638 }, { "epoch": 0.362970353867392, "grad_norm": 1.5580683946609497, "learning_rate": 1.529709179312824e-05, "loss": 1.3266, "step": 6639 }, { "epoch": 0.36302502631111355, "grad_norm": 2.034794330596924, "learning_rate": 1.5295541778003243e-05, "loss": 1.3446, "step": 6640 }, { "epoch": 0.3630796987548351, "grad_norm": 2.059068202972412, "learning_rate": 1.5293991586047685e-05, "loss": 1.391, "step": 6641 }, { "epoch": 0.3631343711985566, "grad_norm": 1.3686490058898926, "learning_rate": 1.5292441217313324e-05, "loss": 1.3825, "step": 6642 }, { "epoch": 0.3631890436422782, "grad_norm": 1.4405879974365234, "learning_rate": 1.529089067185193e-05, "loss": 1.5689, "step": 6643 }, { "epoch": 0.36324371608599976, "grad_norm": 1.5374603271484375, "learning_rate": 1.5289339949715285e-05, "loss": 1.1301, "step": 6644 }, { "epoch": 0.3632983885297213, "grad_norm": 1.4826594591140747, "learning_rate": 1.5287789050955164e-05, "loss": 1.4421, "step": 6645 }, { "epoch": 0.3633530609734429, "grad_norm": 1.1568777561187744, "learning_rate": 1.528623797562336e-05, "loss": 1.3023, "step": 6646 }, { "epoch": 0.3634077334171644, "grad_norm": 1.5088826417922974, "learning_rate": 1.5284686723771664e-05, "loss": 1.4742, "step": 6647 }, { "epoch": 0.36346240586088596, "grad_norm": 1.8742055892944336, "learning_rate": 1.5283135295451877e-05, "loss": 1.5399, "step": 6648 }, { "epoch": 0.3635170783046075, "grad_norm": 2.3193163871765137, "learning_rate": 1.5281583690715805e-05, "loss": 1.6827, "step": 6649 }, { "epoch": 0.3635717507483291, "grad_norm": 1.395696759223938, "learning_rate": 1.5280031909615263e-05, "loss": 1.5283, "step": 6650 }, { "epoch": 0.36362642319205063, "grad_norm": 1.8932489156723022, "learning_rate": 1.5278479952202064e-05, "loss": 1.4512, "step": 6651 }, { "epoch": 0.36368109563577217, "grad_norm": 1.4741944074630737, "learning_rate": 1.5276927818528032e-05, "loss": 1.4549, "step": 6652 }, { "epoch": 0.36373576807949376, "grad_norm": 1.4254831075668335, "learning_rate": 1.5275375508644997e-05, "loss": 1.5444, "step": 6653 }, { "epoch": 0.3637904405232153, "grad_norm": 1.7549712657928467, "learning_rate": 1.5273823022604798e-05, "loss": 1.1367, "step": 6654 }, { "epoch": 0.36384511296693683, "grad_norm": 1.4774730205535889, "learning_rate": 1.5272270360459266e-05, "loss": 1.3247, "step": 6655 }, { "epoch": 0.36389978541065837, "grad_norm": 1.6275748014450073, "learning_rate": 1.5270717522260264e-05, "loss": 1.4774, "step": 6656 }, { "epoch": 0.36395445785437996, "grad_norm": 1.5979753732681274, "learning_rate": 1.526916450805963e-05, "loss": 1.3764, "step": 6657 }, { "epoch": 0.3640091302981015, "grad_norm": 1.4133683443069458, "learning_rate": 1.526761131790923e-05, "loss": 1.8411, "step": 6658 }, { "epoch": 0.36406380274182304, "grad_norm": 1.153214931488037, "learning_rate": 1.5266057951860927e-05, "loss": 1.2639, "step": 6659 }, { "epoch": 0.36411847518554463, "grad_norm": 1.3616819381713867, "learning_rate": 1.5264504409966593e-05, "loss": 1.5076, "step": 6660 }, { "epoch": 0.36417314762926617, "grad_norm": 1.3059632778167725, "learning_rate": 1.52629506922781e-05, "loss": 1.3726, "step": 6661 }, { "epoch": 0.3642278200729877, "grad_norm": 1.4500728845596313, "learning_rate": 1.5261396798847335e-05, "loss": 1.3791, "step": 6662 }, { "epoch": 0.36428249251670924, "grad_norm": 1.5144984722137451, "learning_rate": 1.5259842729726186e-05, "loss": 1.3021, "step": 6663 }, { "epoch": 0.36433716496043084, "grad_norm": 1.656890630722046, "learning_rate": 1.5258288484966545e-05, "loss": 1.4781, "step": 6664 }, { "epoch": 0.3643918374041524, "grad_norm": 1.5404719114303589, "learning_rate": 1.5256734064620313e-05, "loss": 1.6986, "step": 6665 }, { "epoch": 0.3644465098478739, "grad_norm": 1.2600423097610474, "learning_rate": 1.5255179468739393e-05, "loss": 1.2958, "step": 6666 }, { "epoch": 0.3645011822915955, "grad_norm": 1.3655931949615479, "learning_rate": 1.5253624697375702e-05, "loss": 1.2889, "step": 6667 }, { "epoch": 0.36455585473531704, "grad_norm": 1.489124059677124, "learning_rate": 1.525206975058115e-05, "loss": 1.5608, "step": 6668 }, { "epoch": 0.3646105271790386, "grad_norm": 1.6346708536148071, "learning_rate": 1.5250514628407671e-05, "loss": 1.4613, "step": 6669 }, { "epoch": 0.3646651996227601, "grad_norm": 1.7120028734207153, "learning_rate": 1.5248959330907186e-05, "loss": 1.2212, "step": 6670 }, { "epoch": 0.3647198720664817, "grad_norm": 1.4970142841339111, "learning_rate": 1.5247403858131629e-05, "loss": 1.3695, "step": 6671 }, { "epoch": 0.36477454451020325, "grad_norm": 1.447906732559204, "learning_rate": 1.5245848210132943e-05, "loss": 1.3574, "step": 6672 }, { "epoch": 0.3648292169539248, "grad_norm": 1.4240869283676147, "learning_rate": 1.5244292386963077e-05, "loss": 1.3902, "step": 6673 }, { "epoch": 0.3648838893976464, "grad_norm": 1.290507435798645, "learning_rate": 1.5242736388673984e-05, "loss": 1.4908, "step": 6674 }, { "epoch": 0.3649385618413679, "grad_norm": 1.692221760749817, "learning_rate": 1.524118021531762e-05, "loss": 1.3308, "step": 6675 }, { "epoch": 0.36499323428508945, "grad_norm": 1.5597440004348755, "learning_rate": 1.523962386694595e-05, "loss": 1.4767, "step": 6676 }, { "epoch": 0.365047906728811, "grad_norm": 1.6248044967651367, "learning_rate": 1.5238067343610943e-05, "loss": 1.1754, "step": 6677 }, { "epoch": 0.3651025791725326, "grad_norm": 1.2757775783538818, "learning_rate": 1.5236510645364575e-05, "loss": 1.9485, "step": 6678 }, { "epoch": 0.3651572516162541, "grad_norm": 1.356554627418518, "learning_rate": 1.5234953772258827e-05, "loss": 1.506, "step": 6679 }, { "epoch": 0.36521192405997566, "grad_norm": 1.7234492301940918, "learning_rate": 1.5233396724345691e-05, "loss": 1.6895, "step": 6680 }, { "epoch": 0.36526659650369725, "grad_norm": 1.3452423810958862, "learning_rate": 1.523183950167716e-05, "loss": 1.4877, "step": 6681 }, { "epoch": 0.3653212689474188, "grad_norm": 1.5653408765792847, "learning_rate": 1.5230282104305227e-05, "loss": 1.2562, "step": 6682 }, { "epoch": 0.3653759413911403, "grad_norm": 1.155920386314392, "learning_rate": 1.5228724532281904e-05, "loss": 1.6564, "step": 6683 }, { "epoch": 0.36543061383486186, "grad_norm": 2.076282501220703, "learning_rate": 1.52271667856592e-05, "loss": 1.1553, "step": 6684 }, { "epoch": 0.36548528627858345, "grad_norm": 1.2808599472045898, "learning_rate": 1.5225608864489128e-05, "loss": 1.5073, "step": 6685 }, { "epoch": 0.365539958722305, "grad_norm": 1.7421804666519165, "learning_rate": 1.5224050768823716e-05, "loss": 1.4016, "step": 6686 }, { "epoch": 0.36559463116602653, "grad_norm": 1.348976969718933, "learning_rate": 1.5222492498714986e-05, "loss": 1.3524, "step": 6687 }, { "epoch": 0.3656493036097481, "grad_norm": 1.8344718217849731, "learning_rate": 1.5220934054214982e-05, "loss": 1.3944, "step": 6688 }, { "epoch": 0.36570397605346966, "grad_norm": 1.4043291807174683, "learning_rate": 1.5219375435375736e-05, "loss": 1.2136, "step": 6689 }, { "epoch": 0.3657586484971912, "grad_norm": 1.2784074544906616, "learning_rate": 1.5217816642249297e-05, "loss": 1.3647, "step": 6690 }, { "epoch": 0.36581332094091273, "grad_norm": 1.5890437364578247, "learning_rate": 1.5216257674887718e-05, "loss": 1.4007, "step": 6691 }, { "epoch": 0.3658679933846343, "grad_norm": 1.5830732583999634, "learning_rate": 1.5214698533343053e-05, "loss": 1.2934, "step": 6692 }, { "epoch": 0.36592266582835586, "grad_norm": 1.182170033454895, "learning_rate": 1.5213139217667366e-05, "loss": 1.613, "step": 6693 }, { "epoch": 0.3659773382720774, "grad_norm": 1.5814069509506226, "learning_rate": 1.5211579727912728e-05, "loss": 1.3553, "step": 6694 }, { "epoch": 0.366032010715799, "grad_norm": 1.7244435548782349, "learning_rate": 1.5210020064131217e-05, "loss": 1.5615, "step": 6695 }, { "epoch": 0.36608668315952053, "grad_norm": 1.5682579278945923, "learning_rate": 1.5208460226374907e-05, "loss": 1.5913, "step": 6696 }, { "epoch": 0.36614135560324207, "grad_norm": 1.1597217321395874, "learning_rate": 1.520690021469589e-05, "loss": 1.5954, "step": 6697 }, { "epoch": 0.3661960280469636, "grad_norm": 1.5499703884124756, "learning_rate": 1.5205340029146256e-05, "loss": 1.441, "step": 6698 }, { "epoch": 0.3662507004906852, "grad_norm": 1.4410141706466675, "learning_rate": 1.5203779669778102e-05, "loss": 1.3103, "step": 6699 }, { "epoch": 0.36630537293440674, "grad_norm": 1.7343734502792358, "learning_rate": 1.5202219136643535e-05, "loss": 1.3627, "step": 6700 }, { "epoch": 0.3663600453781283, "grad_norm": 1.1304713487625122, "learning_rate": 1.5200658429794662e-05, "loss": 1.5876, "step": 6701 }, { "epoch": 0.36641471782184987, "grad_norm": 1.1460323333740234, "learning_rate": 1.5199097549283604e-05, "loss": 1.4741, "step": 6702 }, { "epoch": 0.3664693902655714, "grad_norm": 1.3362032175064087, "learning_rate": 1.5197536495162478e-05, "loss": 1.5544, "step": 6703 }, { "epoch": 0.36652406270929294, "grad_norm": 1.61513090133667, "learning_rate": 1.5195975267483408e-05, "loss": 1.4431, "step": 6704 }, { "epoch": 0.3665787351530145, "grad_norm": 1.374038577079773, "learning_rate": 1.5194413866298536e-05, "loss": 1.4116, "step": 6705 }, { "epoch": 0.36663340759673607, "grad_norm": 1.6284139156341553, "learning_rate": 1.5192852291659992e-05, "loss": 1.1366, "step": 6706 }, { "epoch": 0.3666880800404576, "grad_norm": 1.6282914876937866, "learning_rate": 1.5191290543619925e-05, "loss": 1.302, "step": 6707 }, { "epoch": 0.36674275248417915, "grad_norm": 1.2458654642105103, "learning_rate": 1.5189728622230489e-05, "loss": 1.3581, "step": 6708 }, { "epoch": 0.36679742492790074, "grad_norm": 1.7892340421676636, "learning_rate": 1.5188166527543832e-05, "loss": 1.2344, "step": 6709 }, { "epoch": 0.3668520973716223, "grad_norm": 1.5867983102798462, "learning_rate": 1.5186604259612123e-05, "loss": 1.393, "step": 6710 }, { "epoch": 0.3669067698153438, "grad_norm": 1.4030439853668213, "learning_rate": 1.5185041818487525e-05, "loss": 1.4021, "step": 6711 }, { "epoch": 0.36696144225906535, "grad_norm": 1.8647931814193726, "learning_rate": 1.5183479204222216e-05, "loss": 1.2006, "step": 6712 }, { "epoch": 0.36701611470278694, "grad_norm": 1.6937940120697021, "learning_rate": 1.518191641686837e-05, "loss": 1.5475, "step": 6713 }, { "epoch": 0.3670707871465085, "grad_norm": 1.3722684383392334, "learning_rate": 1.5180353456478174e-05, "loss": 1.5115, "step": 6714 }, { "epoch": 0.36712545959023, "grad_norm": 1.8279200792312622, "learning_rate": 1.5178790323103825e-05, "loss": 1.3629, "step": 6715 }, { "epoch": 0.3671801320339516, "grad_norm": 1.652754545211792, "learning_rate": 1.5177227016797514e-05, "loss": 1.4822, "step": 6716 }, { "epoch": 0.36723480447767315, "grad_norm": 1.3224796056747437, "learning_rate": 1.517566353761144e-05, "loss": 1.4641, "step": 6717 }, { "epoch": 0.3672894769213947, "grad_norm": 1.955363154411316, "learning_rate": 1.5174099885597817e-05, "loss": 1.3933, "step": 6718 }, { "epoch": 0.3673441493651162, "grad_norm": 1.6171231269836426, "learning_rate": 1.5172536060808857e-05, "loss": 1.4217, "step": 6719 }, { "epoch": 0.3673988218088378, "grad_norm": 1.4984421730041504, "learning_rate": 1.5170972063296783e-05, "loss": 1.4096, "step": 6720 }, { "epoch": 0.36745349425255935, "grad_norm": 1.600043535232544, "learning_rate": 1.5169407893113816e-05, "loss": 1.2392, "step": 6721 }, { "epoch": 0.3675081666962809, "grad_norm": 1.5627321004867554, "learning_rate": 1.516784355031219e-05, "loss": 1.5144, "step": 6722 }, { "epoch": 0.3675628391400025, "grad_norm": 1.4748194217681885, "learning_rate": 1.5166279034944141e-05, "loss": 1.4054, "step": 6723 }, { "epoch": 0.367617511583724, "grad_norm": 1.3954511880874634, "learning_rate": 1.5164714347061908e-05, "loss": 1.3539, "step": 6724 }, { "epoch": 0.36767218402744556, "grad_norm": 1.5840014219284058, "learning_rate": 1.5163149486717747e-05, "loss": 1.6404, "step": 6725 }, { "epoch": 0.3677268564711671, "grad_norm": 1.1239665746688843, "learning_rate": 1.5161584453963908e-05, "loss": 1.5164, "step": 6726 }, { "epoch": 0.3677815289148887, "grad_norm": 1.5644583702087402, "learning_rate": 1.5160019248852655e-05, "loss": 1.3479, "step": 6727 }, { "epoch": 0.3678362013586102, "grad_norm": 1.685797095298767, "learning_rate": 1.515845387143625e-05, "loss": 1.1991, "step": 6728 }, { "epoch": 0.36789087380233176, "grad_norm": 1.384603500366211, "learning_rate": 1.515688832176696e-05, "loss": 1.5312, "step": 6729 }, { "epoch": 0.36794554624605336, "grad_norm": 1.7523223161697388, "learning_rate": 1.5155322599897076e-05, "loss": 1.2465, "step": 6730 }, { "epoch": 0.3680002186897749, "grad_norm": 1.6741985082626343, "learning_rate": 1.5153756705878867e-05, "loss": 1.3284, "step": 6731 }, { "epoch": 0.36805489113349643, "grad_norm": 1.2086163759231567, "learning_rate": 1.515219063976463e-05, "loss": 1.4519, "step": 6732 }, { "epoch": 0.36810956357721797, "grad_norm": 1.3376855850219727, "learning_rate": 1.5150624401606658e-05, "loss": 1.4922, "step": 6733 }, { "epoch": 0.36816423602093956, "grad_norm": 1.7447700500488281, "learning_rate": 1.514905799145725e-05, "loss": 1.441, "step": 6734 }, { "epoch": 0.3682189084646611, "grad_norm": 1.4256864786148071, "learning_rate": 1.5147491409368713e-05, "loss": 1.3629, "step": 6735 }, { "epoch": 0.36827358090838264, "grad_norm": 1.3863362073898315, "learning_rate": 1.514592465539336e-05, "loss": 1.8387, "step": 6736 }, { "epoch": 0.36832825335210423, "grad_norm": 1.4661744832992554, "learning_rate": 1.514435772958351e-05, "loss": 1.1032, "step": 6737 }, { "epoch": 0.36838292579582577, "grad_norm": 1.5914852619171143, "learning_rate": 1.514279063199148e-05, "loss": 1.4881, "step": 6738 }, { "epoch": 0.3684375982395473, "grad_norm": 2.070807933807373, "learning_rate": 1.5141223362669602e-05, "loss": 1.2665, "step": 6739 }, { "epoch": 0.36849227068326884, "grad_norm": 1.6095128059387207, "learning_rate": 1.5139655921670213e-05, "loss": 1.2955, "step": 6740 }, { "epoch": 0.36854694312699043, "grad_norm": 1.681757926940918, "learning_rate": 1.5138088309045653e-05, "loss": 1.4395, "step": 6741 }, { "epoch": 0.36860161557071197, "grad_norm": 1.558353304862976, "learning_rate": 1.5136520524848266e-05, "loss": 1.3739, "step": 6742 }, { "epoch": 0.3686562880144335, "grad_norm": 1.4289997816085815, "learning_rate": 1.5134952569130406e-05, "loss": 1.603, "step": 6743 }, { "epoch": 0.3687109604581551, "grad_norm": 1.3679968118667603, "learning_rate": 1.5133384441944432e-05, "loss": 1.4295, "step": 6744 }, { "epoch": 0.36876563290187664, "grad_norm": 1.417109489440918, "learning_rate": 1.5131816143342701e-05, "loss": 1.3728, "step": 6745 }, { "epoch": 0.3688203053455982, "grad_norm": 1.4300422668457031, "learning_rate": 1.513024767337759e-05, "loss": 1.3975, "step": 6746 }, { "epoch": 0.3688749777893197, "grad_norm": 1.5169174671173096, "learning_rate": 1.5128679032101472e-05, "loss": 1.4713, "step": 6747 }, { "epoch": 0.3689296502330413, "grad_norm": 1.4736337661743164, "learning_rate": 1.5127110219566725e-05, "loss": 1.5631, "step": 6748 }, { "epoch": 0.36898432267676284, "grad_norm": 1.277603030204773, "learning_rate": 1.5125541235825738e-05, "loss": 1.4162, "step": 6749 }, { "epoch": 0.3690389951204844, "grad_norm": 1.5751463174819946, "learning_rate": 1.51239720809309e-05, "loss": 1.3783, "step": 6750 }, { "epoch": 0.369093667564206, "grad_norm": 1.4878261089324951, "learning_rate": 1.512240275493461e-05, "loss": 1.3941, "step": 6751 }, { "epoch": 0.3691483400079275, "grad_norm": 1.7886673212051392, "learning_rate": 1.5120833257889272e-05, "loss": 1.1286, "step": 6752 }, { "epoch": 0.36920301245164905, "grad_norm": 1.5382921695709229, "learning_rate": 1.5119263589847295e-05, "loss": 1.3409, "step": 6753 }, { "epoch": 0.3692576848953706, "grad_norm": 1.4695830345153809, "learning_rate": 1.5117693750861096e-05, "loss": 1.3872, "step": 6754 }, { "epoch": 0.3693123573390922, "grad_norm": 1.3432683944702148, "learning_rate": 1.5116123740983093e-05, "loss": 1.5347, "step": 6755 }, { "epoch": 0.3693670297828137, "grad_norm": 1.8605260848999023, "learning_rate": 1.5114553560265712e-05, "loss": 1.1572, "step": 6756 }, { "epoch": 0.36942170222653525, "grad_norm": 1.091282606124878, "learning_rate": 1.5112983208761384e-05, "loss": 1.3655, "step": 6757 }, { "epoch": 0.36947637467025685, "grad_norm": 1.3043736219406128, "learning_rate": 1.511141268652255e-05, "loss": 1.4976, "step": 6758 }, { "epoch": 0.3695310471139784, "grad_norm": 1.6628732681274414, "learning_rate": 1.5109841993601654e-05, "loss": 0.9807, "step": 6759 }, { "epoch": 0.3695857195576999, "grad_norm": 1.832659363746643, "learning_rate": 1.5108271130051141e-05, "loss": 1.5014, "step": 6760 }, { "epoch": 0.36964039200142146, "grad_norm": 1.3674205541610718, "learning_rate": 1.5106700095923471e-05, "loss": 1.4016, "step": 6761 }, { "epoch": 0.36969506444514305, "grad_norm": 1.3140562772750854, "learning_rate": 1.5105128891271102e-05, "loss": 1.514, "step": 6762 }, { "epoch": 0.3697497368888646, "grad_norm": 1.7336187362670898, "learning_rate": 1.5103557516146494e-05, "loss": 1.2297, "step": 6763 }, { "epoch": 0.3698044093325861, "grad_norm": 1.5217036008834839, "learning_rate": 1.510198597060213e-05, "loss": 1.6148, "step": 6764 }, { "epoch": 0.3698590817763077, "grad_norm": 1.6151577234268188, "learning_rate": 1.5100414254690478e-05, "loss": 1.3849, "step": 6765 }, { "epoch": 0.36991375422002926, "grad_norm": 1.648004174232483, "learning_rate": 1.5098842368464031e-05, "loss": 1.4264, "step": 6766 }, { "epoch": 0.3699684266637508, "grad_norm": 1.5600361824035645, "learning_rate": 1.5097270311975267e-05, "loss": 1.532, "step": 6767 }, { "epoch": 0.37002309910747233, "grad_norm": 1.8083826303482056, "learning_rate": 1.5095698085276692e-05, "loss": 1.5828, "step": 6768 }, { "epoch": 0.3700777715511939, "grad_norm": 1.5413789749145508, "learning_rate": 1.5094125688420795e-05, "loss": 1.4317, "step": 6769 }, { "epoch": 0.37013244399491546, "grad_norm": 1.2549426555633545, "learning_rate": 1.509255312146009e-05, "loss": 1.4624, "step": 6770 }, { "epoch": 0.370187116438637, "grad_norm": 1.8554681539535522, "learning_rate": 1.5090980384447083e-05, "loss": 1.4305, "step": 6771 }, { "epoch": 0.3702417888823586, "grad_norm": 1.4344713687896729, "learning_rate": 1.5089407477434299e-05, "loss": 1.3971, "step": 6772 }, { "epoch": 0.37029646132608013, "grad_norm": 1.3435168266296387, "learning_rate": 1.5087834400474255e-05, "loss": 1.5626, "step": 6773 }, { "epoch": 0.37035113376980167, "grad_norm": 1.5488799810409546, "learning_rate": 1.508626115361948e-05, "loss": 1.3021, "step": 6774 }, { "epoch": 0.3704058062135232, "grad_norm": 1.5458253622055054, "learning_rate": 1.5084687736922514e-05, "loss": 1.3094, "step": 6775 }, { "epoch": 0.3704604786572448, "grad_norm": 1.6152862310409546, "learning_rate": 1.5083114150435889e-05, "loss": 1.534, "step": 6776 }, { "epoch": 0.37051515110096633, "grad_norm": 4.301610469818115, "learning_rate": 1.5081540394212155e-05, "loss": 1.2233, "step": 6777 }, { "epoch": 0.37056982354468787, "grad_norm": 1.301921010017395, "learning_rate": 1.5079966468303866e-05, "loss": 1.3959, "step": 6778 }, { "epoch": 0.37062449598840946, "grad_norm": 2.0018739700317383, "learning_rate": 1.5078392372763573e-05, "loss": 1.4953, "step": 6779 }, { "epoch": 0.370679168432131, "grad_norm": 1.344045877456665, "learning_rate": 1.5076818107643844e-05, "loss": 1.4798, "step": 6780 }, { "epoch": 0.37073384087585254, "grad_norm": 1.3646509647369385, "learning_rate": 1.5075243672997242e-05, "loss": 1.46, "step": 6781 }, { "epoch": 0.3707885133195741, "grad_norm": 1.368131399154663, "learning_rate": 1.5073669068876348e-05, "loss": 1.4125, "step": 6782 }, { "epoch": 0.37084318576329567, "grad_norm": 1.750922679901123, "learning_rate": 1.5072094295333734e-05, "loss": 1.3458, "step": 6783 }, { "epoch": 0.3708978582070172, "grad_norm": 1.2933019399642944, "learning_rate": 1.5070519352421993e-05, "loss": 1.5623, "step": 6784 }, { "epoch": 0.37095253065073874, "grad_norm": 1.0833717584609985, "learning_rate": 1.5068944240193713e-05, "loss": 1.5864, "step": 6785 }, { "epoch": 0.37100720309446034, "grad_norm": 1.7249772548675537, "learning_rate": 1.5067368958701487e-05, "loss": 1.1897, "step": 6786 }, { "epoch": 0.3710618755381819, "grad_norm": 1.8797345161437988, "learning_rate": 1.5065793507997923e-05, "loss": 0.9888, "step": 6787 }, { "epoch": 0.3711165479819034, "grad_norm": 1.7346326112747192, "learning_rate": 1.5064217888135626e-05, "loss": 1.3542, "step": 6788 }, { "epoch": 0.37117122042562495, "grad_norm": 1.3676015138626099, "learning_rate": 1.5062642099167208e-05, "loss": 1.54, "step": 6789 }, { "epoch": 0.37122589286934654, "grad_norm": 1.5097393989562988, "learning_rate": 1.5061066141145294e-05, "loss": 1.3165, "step": 6790 }, { "epoch": 0.3712805653130681, "grad_norm": 1.378829002380371, "learning_rate": 1.5059490014122502e-05, "loss": 1.5523, "step": 6791 }, { "epoch": 0.3713352377567896, "grad_norm": 1.242213249206543, "learning_rate": 1.5057913718151468e-05, "loss": 1.3037, "step": 6792 }, { "epoch": 0.3713899102005112, "grad_norm": 1.4637576341629028, "learning_rate": 1.5056337253284825e-05, "loss": 1.36, "step": 6793 }, { "epoch": 0.37144458264423275, "grad_norm": 1.2951672077178955, "learning_rate": 1.5054760619575217e-05, "loss": 1.6683, "step": 6794 }, { "epoch": 0.3714992550879543, "grad_norm": 1.1939719915390015, "learning_rate": 1.505318381707529e-05, "loss": 1.5102, "step": 6795 }, { "epoch": 0.3715539275316758, "grad_norm": 2.3866662979125977, "learning_rate": 1.5051606845837699e-05, "loss": 1.3813, "step": 6796 }, { "epoch": 0.3716085999753974, "grad_norm": 1.3167071342468262, "learning_rate": 1.5050029705915101e-05, "loss": 1.3855, "step": 6797 }, { "epoch": 0.37166327241911895, "grad_norm": 1.6897377967834473, "learning_rate": 1.5048452397360158e-05, "loss": 1.1439, "step": 6798 }, { "epoch": 0.3717179448628405, "grad_norm": 1.384634256362915, "learning_rate": 1.5046874920225544e-05, "loss": 1.5028, "step": 6799 }, { "epoch": 0.3717726173065621, "grad_norm": 1.3065993785858154, "learning_rate": 1.5045297274563937e-05, "loss": 1.3662, "step": 6800 }, { "epoch": 0.3718272897502836, "grad_norm": 1.701450228691101, "learning_rate": 1.5043719460428013e-05, "loss": 1.4339, "step": 6801 }, { "epoch": 0.37188196219400516, "grad_norm": 1.268039345741272, "learning_rate": 1.504214147787046e-05, "loss": 1.4911, "step": 6802 }, { "epoch": 0.3719366346377267, "grad_norm": 1.576856255531311, "learning_rate": 1.5040563326943974e-05, "loss": 1.3483, "step": 6803 }, { "epoch": 0.3719913070814483, "grad_norm": 1.355661392211914, "learning_rate": 1.5038985007701246e-05, "loss": 1.4651, "step": 6804 }, { "epoch": 0.3720459795251698, "grad_norm": 1.5877830982208252, "learning_rate": 1.5037406520194985e-05, "loss": 1.4757, "step": 6805 }, { "epoch": 0.37210065196889136, "grad_norm": 1.4776544570922852, "learning_rate": 1.50358278644779e-05, "loss": 1.2355, "step": 6806 }, { "epoch": 0.37215532441261295, "grad_norm": 1.3377584218978882, "learning_rate": 1.5034249040602709e-05, "loss": 1.4849, "step": 6807 }, { "epoch": 0.3722099968563345, "grad_norm": 1.28628408908844, "learning_rate": 1.5032670048622126e-05, "loss": 1.6077, "step": 6808 }, { "epoch": 0.37226466930005603, "grad_norm": 2.123210906982422, "learning_rate": 1.5031090888588882e-05, "loss": 1.4059, "step": 6809 }, { "epoch": 0.37231934174377757, "grad_norm": 1.4537460803985596, "learning_rate": 1.5029511560555707e-05, "loss": 1.4846, "step": 6810 }, { "epoch": 0.37237401418749916, "grad_norm": 1.3889315128326416, "learning_rate": 1.5027932064575339e-05, "loss": 1.6337, "step": 6811 }, { "epoch": 0.3724286866312207, "grad_norm": 1.7050843238830566, "learning_rate": 1.5026352400700517e-05, "loss": 1.3274, "step": 6812 }, { "epoch": 0.37248335907494223, "grad_norm": 2.0512266159057617, "learning_rate": 1.5024772568983998e-05, "loss": 1.5503, "step": 6813 }, { "epoch": 0.3725380315186638, "grad_norm": 1.3163816928863525, "learning_rate": 1.5023192569478533e-05, "loss": 1.1846, "step": 6814 }, { "epoch": 0.37259270396238536, "grad_norm": 1.4218482971191406, "learning_rate": 1.5021612402236878e-05, "loss": 1.7599, "step": 6815 }, { "epoch": 0.3726473764061069, "grad_norm": 1.957276463508606, "learning_rate": 1.5020032067311804e-05, "loss": 1.2995, "step": 6816 }, { "epoch": 0.37270204884982844, "grad_norm": 1.4865522384643555, "learning_rate": 1.5018451564756078e-05, "loss": 1.3204, "step": 6817 }, { "epoch": 0.37275672129355003, "grad_norm": 2.0278964042663574, "learning_rate": 1.5016870894622475e-05, "loss": 1.5341, "step": 6818 }, { "epoch": 0.37281139373727157, "grad_norm": 1.319962501525879, "learning_rate": 1.5015290056963787e-05, "loss": 1.4536, "step": 6819 }, { "epoch": 0.3728660661809931, "grad_norm": 1.564177393913269, "learning_rate": 1.5013709051832792e-05, "loss": 1.2977, "step": 6820 }, { "epoch": 0.3729207386247147, "grad_norm": 1.3183001279830933, "learning_rate": 1.5012127879282284e-05, "loss": 1.5103, "step": 6821 }, { "epoch": 0.37297541106843624, "grad_norm": 1.6717175245285034, "learning_rate": 1.5010546539365067e-05, "loss": 1.347, "step": 6822 }, { "epoch": 0.3730300835121578, "grad_norm": 1.8350566625595093, "learning_rate": 1.5008965032133942e-05, "loss": 1.4548, "step": 6823 }, { "epoch": 0.3730847559558793, "grad_norm": 1.3187404870986938, "learning_rate": 1.5007383357641723e-05, "loss": 1.461, "step": 6824 }, { "epoch": 0.3731394283996009, "grad_norm": 1.6794956922531128, "learning_rate": 1.5005801515941221e-05, "loss": 1.3302, "step": 6825 }, { "epoch": 0.37319410084332244, "grad_norm": 1.7211220264434814, "learning_rate": 1.5004219507085264e-05, "loss": 1.3305, "step": 6826 }, { "epoch": 0.373248773287044, "grad_norm": 1.4302256107330322, "learning_rate": 1.5002637331126672e-05, "loss": 1.52, "step": 6827 }, { "epoch": 0.37330344573076557, "grad_norm": 1.2990105152130127, "learning_rate": 1.500105498811828e-05, "loss": 1.2153, "step": 6828 }, { "epoch": 0.3733581181744871, "grad_norm": 1.5283482074737549, "learning_rate": 1.4999472478112927e-05, "loss": 1.2758, "step": 6829 }, { "epoch": 0.37341279061820865, "grad_norm": 1.451947569847107, "learning_rate": 1.4997889801163456e-05, "loss": 1.5506, "step": 6830 }, { "epoch": 0.37346746306193024, "grad_norm": 1.8044990301132202, "learning_rate": 1.499630695732272e-05, "loss": 1.301, "step": 6831 }, { "epoch": 0.3735221355056518, "grad_norm": 1.6563130617141724, "learning_rate": 1.4994723946643568e-05, "loss": 1.4331, "step": 6832 }, { "epoch": 0.3735768079493733, "grad_norm": 1.4435184001922607, "learning_rate": 1.4993140769178862e-05, "loss": 1.4622, "step": 6833 }, { "epoch": 0.37363148039309485, "grad_norm": 1.3074569702148438, "learning_rate": 1.4991557424981471e-05, "loss": 1.5098, "step": 6834 }, { "epoch": 0.37368615283681644, "grad_norm": 1.2685683965682983, "learning_rate": 1.4989973914104262e-05, "loss": 1.2428, "step": 6835 }, { "epoch": 0.373740825280538, "grad_norm": 1.4253454208374023, "learning_rate": 1.4988390236600117e-05, "loss": 1.4282, "step": 6836 }, { "epoch": 0.3737954977242595, "grad_norm": 1.5921403169631958, "learning_rate": 1.4986806392521913e-05, "loss": 1.6188, "step": 6837 }, { "epoch": 0.3738501701679811, "grad_norm": 1.7350038290023804, "learning_rate": 1.4985222381922543e-05, "loss": 1.4069, "step": 6838 }, { "epoch": 0.37390484261170265, "grad_norm": 1.4798243045806885, "learning_rate": 1.4983638204854902e-05, "loss": 1.6263, "step": 6839 }, { "epoch": 0.3739595150554242, "grad_norm": 1.631090521812439, "learning_rate": 1.4982053861371885e-05, "loss": 1.4382, "step": 6840 }, { "epoch": 0.3740141874991457, "grad_norm": 1.2646886110305786, "learning_rate": 1.4980469351526402e-05, "loss": 1.5418, "step": 6841 }, { "epoch": 0.3740688599428673, "grad_norm": 1.5032434463500977, "learning_rate": 1.4978884675371354e-05, "loss": 1.4738, "step": 6842 }, { "epoch": 0.37412353238658885, "grad_norm": 1.2211076021194458, "learning_rate": 1.4977299832959666e-05, "loss": 1.3956, "step": 6843 }, { "epoch": 0.3741782048303104, "grad_norm": 1.578380823135376, "learning_rate": 1.4975714824344258e-05, "loss": 1.3481, "step": 6844 }, { "epoch": 0.374232877274032, "grad_norm": 1.4269553422927856, "learning_rate": 1.4974129649578058e-05, "loss": 1.4573, "step": 6845 }, { "epoch": 0.3742875497177535, "grad_norm": 1.625053882598877, "learning_rate": 1.4972544308713995e-05, "loss": 1.4408, "step": 6846 }, { "epoch": 0.37434222216147506, "grad_norm": 1.3735271692276, "learning_rate": 1.497095880180501e-05, "loss": 1.1757, "step": 6847 }, { "epoch": 0.3743968946051966, "grad_norm": 1.5668630599975586, "learning_rate": 1.4969373128904043e-05, "loss": 1.7438, "step": 6848 }, { "epoch": 0.3744515670489182, "grad_norm": 1.8180893659591675, "learning_rate": 1.4967787290064048e-05, "loss": 1.5385, "step": 6849 }, { "epoch": 0.3745062394926397, "grad_norm": 1.3873398303985596, "learning_rate": 1.4966201285337978e-05, "loss": 1.454, "step": 6850 }, { "epoch": 0.37456091193636126, "grad_norm": 1.3544360399246216, "learning_rate": 1.4964615114778794e-05, "loss": 1.4688, "step": 6851 }, { "epoch": 0.37461558438008286, "grad_norm": 1.3092453479766846, "learning_rate": 1.496302877843946e-05, "loss": 1.4316, "step": 6852 }, { "epoch": 0.3746702568238044, "grad_norm": 1.4210280179977417, "learning_rate": 1.4961442276372951e-05, "loss": 1.3072, "step": 6853 }, { "epoch": 0.37472492926752593, "grad_norm": 1.502630352973938, "learning_rate": 1.495985560863224e-05, "loss": 1.5302, "step": 6854 }, { "epoch": 0.37477960171124747, "grad_norm": 2.407557964324951, "learning_rate": 1.4958268775270315e-05, "loss": 1.3738, "step": 6855 }, { "epoch": 0.37483427415496906, "grad_norm": 1.2253583669662476, "learning_rate": 1.4956681776340157e-05, "loss": 1.5121, "step": 6856 }, { "epoch": 0.3748889465986906, "grad_norm": 1.429381012916565, "learning_rate": 1.4955094611894763e-05, "loss": 1.2601, "step": 6857 }, { "epoch": 0.37494361904241214, "grad_norm": 1.4702062606811523, "learning_rate": 1.4953507281987137e-05, "loss": 1.5047, "step": 6858 }, { "epoch": 0.37499829148613373, "grad_norm": 1.8559107780456543, "learning_rate": 1.4951919786670274e-05, "loss": 1.514, "step": 6859 }, { "epoch": 0.37505296392985527, "grad_norm": 1.6767598390579224, "learning_rate": 1.4950332125997192e-05, "loss": 1.5264, "step": 6860 }, { "epoch": 0.3751076363735768, "grad_norm": 1.380214810371399, "learning_rate": 1.4948744300020903e-05, "loss": 1.3808, "step": 6861 }, { "epoch": 0.37516230881729834, "grad_norm": 1.32334566116333, "learning_rate": 1.494715630879443e-05, "loss": 1.47, "step": 6862 }, { "epoch": 0.37521698126101993, "grad_norm": 1.9763487577438354, "learning_rate": 1.4945568152370797e-05, "loss": 1.2982, "step": 6863 }, { "epoch": 0.37527165370474147, "grad_norm": 1.3685534000396729, "learning_rate": 1.494397983080304e-05, "loss": 1.7467, "step": 6864 }, { "epoch": 0.375326326148463, "grad_norm": 1.581796407699585, "learning_rate": 1.4942391344144196e-05, "loss": 1.4067, "step": 6865 }, { "epoch": 0.3753809985921846, "grad_norm": 1.2832911014556885, "learning_rate": 1.4940802692447306e-05, "loss": 1.4468, "step": 6866 }, { "epoch": 0.37543567103590614, "grad_norm": 1.3111655712127686, "learning_rate": 1.493921387576542e-05, "loss": 1.4961, "step": 6867 }, { "epoch": 0.3754903434796277, "grad_norm": 1.4707704782485962, "learning_rate": 1.4937624894151592e-05, "loss": 1.2729, "step": 6868 }, { "epoch": 0.3755450159233492, "grad_norm": 1.785990595817566, "learning_rate": 1.4936035747658884e-05, "loss": 1.5471, "step": 6869 }, { "epoch": 0.3755996883670708, "grad_norm": 1.3566588163375854, "learning_rate": 1.4934446436340357e-05, "loss": 1.295, "step": 6870 }, { "epoch": 0.37565436081079234, "grad_norm": 1.8296931982040405, "learning_rate": 1.4932856960249087e-05, "loss": 1.3016, "step": 6871 }, { "epoch": 0.3757090332545139, "grad_norm": 1.2735509872436523, "learning_rate": 1.4931267319438148e-05, "loss": 1.4766, "step": 6872 }, { "epoch": 0.3757637056982355, "grad_norm": 1.662115216255188, "learning_rate": 1.4929677513960621e-05, "loss": 1.2345, "step": 6873 }, { "epoch": 0.375818378141957, "grad_norm": 1.6426255702972412, "learning_rate": 1.4928087543869594e-05, "loss": 1.4233, "step": 6874 }, { "epoch": 0.37587305058567855, "grad_norm": 1.7474122047424316, "learning_rate": 1.4926497409218156e-05, "loss": 1.4146, "step": 6875 }, { "epoch": 0.3759277230294001, "grad_norm": 1.90827476978302, "learning_rate": 1.4924907110059415e-05, "loss": 1.2921, "step": 6876 }, { "epoch": 0.3759823954731217, "grad_norm": 1.509315013885498, "learning_rate": 1.4923316646446466e-05, "loss": 1.4442, "step": 6877 }, { "epoch": 0.3760370679168432, "grad_norm": 1.7068153619766235, "learning_rate": 1.492172601843242e-05, "loss": 1.5387, "step": 6878 }, { "epoch": 0.37609174036056475, "grad_norm": 1.4848840236663818, "learning_rate": 1.4920135226070395e-05, "loss": 1.3896, "step": 6879 }, { "epoch": 0.37614641280428635, "grad_norm": 1.6690163612365723, "learning_rate": 1.4918544269413511e-05, "loss": 1.3748, "step": 6880 }, { "epoch": 0.3762010852480079, "grad_norm": 1.3425828218460083, "learning_rate": 1.491695314851489e-05, "loss": 1.4741, "step": 6881 }, { "epoch": 0.3762557576917294, "grad_norm": 1.275730013847351, "learning_rate": 1.4915361863427662e-05, "loss": 1.4982, "step": 6882 }, { "epoch": 0.37631043013545096, "grad_norm": 1.5891246795654297, "learning_rate": 1.4913770414204973e-05, "loss": 1.2897, "step": 6883 }, { "epoch": 0.37636510257917255, "grad_norm": 1.5602360963821411, "learning_rate": 1.4912178800899954e-05, "loss": 1.5768, "step": 6884 }, { "epoch": 0.3764197750228941, "grad_norm": 1.4463812112808228, "learning_rate": 1.4910587023565763e-05, "loss": 1.4187, "step": 6885 }, { "epoch": 0.3764744474666156, "grad_norm": 1.3109936714172363, "learning_rate": 1.4908995082255546e-05, "loss": 1.4623, "step": 6886 }, { "epoch": 0.3765291199103372, "grad_norm": 1.5092480182647705, "learning_rate": 1.4907402977022465e-05, "loss": 1.4844, "step": 6887 }, { "epoch": 0.37658379235405876, "grad_norm": 1.3619784116744995, "learning_rate": 1.4905810707919681e-05, "loss": 1.3118, "step": 6888 }, { "epoch": 0.3766384647977803, "grad_norm": 1.3805145025253296, "learning_rate": 1.4904218275000366e-05, "loss": 1.4404, "step": 6889 }, { "epoch": 0.37669313724150183, "grad_norm": 1.4831669330596924, "learning_rate": 1.4902625678317696e-05, "loss": 1.5261, "step": 6890 }, { "epoch": 0.3767478096852234, "grad_norm": 1.3243473768234253, "learning_rate": 1.490103291792485e-05, "loss": 1.4009, "step": 6891 }, { "epoch": 0.37680248212894496, "grad_norm": 1.462903618812561, "learning_rate": 1.4899439993875016e-05, "loss": 1.2087, "step": 6892 }, { "epoch": 0.3768571545726665, "grad_norm": 2.0213804244995117, "learning_rate": 1.4897846906221381e-05, "loss": 1.5693, "step": 6893 }, { "epoch": 0.3769118270163881, "grad_norm": 1.6323529481887817, "learning_rate": 1.4896253655017146e-05, "loss": 1.3551, "step": 6894 }, { "epoch": 0.3769664994601096, "grad_norm": 1.8282184600830078, "learning_rate": 1.4894660240315508e-05, "loss": 1.497, "step": 6895 }, { "epoch": 0.37702117190383116, "grad_norm": 1.516114354133606, "learning_rate": 1.4893066662169684e-05, "loss": 1.4372, "step": 6896 }, { "epoch": 0.3770758443475527, "grad_norm": 1.59792160987854, "learning_rate": 1.489147292063288e-05, "loss": 1.5762, "step": 6897 }, { "epoch": 0.3771305167912743, "grad_norm": 1.738349199295044, "learning_rate": 1.488987901575832e-05, "loss": 1.1859, "step": 6898 }, { "epoch": 0.37718518923499583, "grad_norm": 1.4509952068328857, "learning_rate": 1.4888284947599222e-05, "loss": 1.551, "step": 6899 }, { "epoch": 0.37723986167871737, "grad_norm": 1.4664976596832275, "learning_rate": 1.4886690716208816e-05, "loss": 1.3288, "step": 6900 }, { "epoch": 0.37729453412243896, "grad_norm": 1.6945375204086304, "learning_rate": 1.4885096321640346e-05, "loss": 1.4026, "step": 6901 }, { "epoch": 0.3773492065661605, "grad_norm": 1.4130682945251465, "learning_rate": 1.4883501763947043e-05, "loss": 1.4314, "step": 6902 }, { "epoch": 0.37740387900988204, "grad_norm": 1.4222321510314941, "learning_rate": 1.4881907043182158e-05, "loss": 1.7873, "step": 6903 }, { "epoch": 0.3774585514536036, "grad_norm": 1.4410176277160645, "learning_rate": 1.488031215939894e-05, "loss": 1.5248, "step": 6904 }, { "epoch": 0.37751322389732517, "grad_norm": 1.7547523975372314, "learning_rate": 1.4878717112650649e-05, "loss": 1.4303, "step": 6905 }, { "epoch": 0.3775678963410467, "grad_norm": 1.5255753993988037, "learning_rate": 1.4877121902990543e-05, "loss": 1.3025, "step": 6906 }, { "epoch": 0.37762256878476824, "grad_norm": 1.5933502912521362, "learning_rate": 1.4875526530471893e-05, "loss": 1.2524, "step": 6907 }, { "epoch": 0.37767724122848984, "grad_norm": 1.5351957082748413, "learning_rate": 1.4873930995147971e-05, "loss": 1.2311, "step": 6908 }, { "epoch": 0.3777319136722114, "grad_norm": 1.201635479927063, "learning_rate": 1.4872335297072057e-05, "loss": 1.4282, "step": 6909 }, { "epoch": 0.3777865861159329, "grad_norm": 1.5219231843948364, "learning_rate": 1.4870739436297435e-05, "loss": 1.376, "step": 6910 }, { "epoch": 0.37784125855965445, "grad_norm": 1.7391058206558228, "learning_rate": 1.4869143412877393e-05, "loss": 1.482, "step": 6911 }, { "epoch": 0.37789593100337604, "grad_norm": 1.2787954807281494, "learning_rate": 1.4867547226865227e-05, "loss": 1.569, "step": 6912 }, { "epoch": 0.3779506034470976, "grad_norm": 1.755133032798767, "learning_rate": 1.4865950878314234e-05, "loss": 1.476, "step": 6913 }, { "epoch": 0.3780052758908191, "grad_norm": 1.7397152185440063, "learning_rate": 1.4864354367277725e-05, "loss": 1.4521, "step": 6914 }, { "epoch": 0.3780599483345407, "grad_norm": 1.9190804958343506, "learning_rate": 1.4862757693809009e-05, "loss": 1.4714, "step": 6915 }, { "epoch": 0.37811462077826224, "grad_norm": 1.602965235710144, "learning_rate": 1.48611608579614e-05, "loss": 1.3793, "step": 6916 }, { "epoch": 0.3781692932219838, "grad_norm": 1.477704405784607, "learning_rate": 1.4859563859788228e-05, "loss": 1.4505, "step": 6917 }, { "epoch": 0.3782239656657053, "grad_norm": 1.2903867959976196, "learning_rate": 1.4857966699342817e-05, "loss": 1.4852, "step": 6918 }, { "epoch": 0.3782786381094269, "grad_norm": 1.1977801322937012, "learning_rate": 1.4856369376678492e-05, "loss": 1.5744, "step": 6919 }, { "epoch": 0.37833331055314845, "grad_norm": 1.6100987195968628, "learning_rate": 1.4854771891848598e-05, "loss": 1.5253, "step": 6920 }, { "epoch": 0.37838798299687, "grad_norm": 1.2742115259170532, "learning_rate": 1.485317424490648e-05, "loss": 1.4809, "step": 6921 }, { "epoch": 0.3784426554405916, "grad_norm": 1.3764162063598633, "learning_rate": 1.4851576435905489e-05, "loss": 1.6764, "step": 6922 }, { "epoch": 0.3784973278843131, "grad_norm": 1.7345741987228394, "learning_rate": 1.4849978464898971e-05, "loss": 1.2935, "step": 6923 }, { "epoch": 0.37855200032803465, "grad_norm": 1.7870608568191528, "learning_rate": 1.4848380331940295e-05, "loss": 1.666, "step": 6924 }, { "epoch": 0.3786066727717562, "grad_norm": 1.2987736463546753, "learning_rate": 1.4846782037082824e-05, "loss": 1.7124, "step": 6925 }, { "epoch": 0.3786613452154778, "grad_norm": 1.317282795906067, "learning_rate": 1.4845183580379923e-05, "loss": 1.4537, "step": 6926 }, { "epoch": 0.3787160176591993, "grad_norm": 1.8856167793273926, "learning_rate": 1.4843584961884973e-05, "loss": 1.0534, "step": 6927 }, { "epoch": 0.37877069010292086, "grad_norm": 1.603245735168457, "learning_rate": 1.4841986181651355e-05, "loss": 1.1944, "step": 6928 }, { "epoch": 0.37882536254664245, "grad_norm": 1.2743040323257446, "learning_rate": 1.484038723973246e-05, "loss": 1.4935, "step": 6929 }, { "epoch": 0.378880034990364, "grad_norm": 1.5629158020019531, "learning_rate": 1.4838788136181676e-05, "loss": 1.3035, "step": 6930 }, { "epoch": 0.3789347074340855, "grad_norm": 1.4373693466186523, "learning_rate": 1.4837188871052399e-05, "loss": 1.5844, "step": 6931 }, { "epoch": 0.37898937987780706, "grad_norm": 1.5046643018722534, "learning_rate": 1.4835589444398037e-05, "loss": 1.1163, "step": 6932 }, { "epoch": 0.37904405232152866, "grad_norm": 1.255746841430664, "learning_rate": 1.4833989856271995e-05, "loss": 1.4883, "step": 6933 }, { "epoch": 0.3790987247652502, "grad_norm": 1.912887692451477, "learning_rate": 1.4832390106727688e-05, "loss": 1.342, "step": 6934 }, { "epoch": 0.37915339720897173, "grad_norm": 1.846318244934082, "learning_rate": 1.4830790195818537e-05, "loss": 1.5328, "step": 6935 }, { "epoch": 0.3792080696526933, "grad_norm": 1.4004138708114624, "learning_rate": 1.4829190123597965e-05, "loss": 1.3556, "step": 6936 }, { "epoch": 0.37926274209641486, "grad_norm": 1.308897852897644, "learning_rate": 1.4827589890119404e-05, "loss": 1.4191, "step": 6937 }, { "epoch": 0.3793174145401364, "grad_norm": 1.2688584327697754, "learning_rate": 1.4825989495436286e-05, "loss": 1.4294, "step": 6938 }, { "epoch": 0.37937208698385794, "grad_norm": 1.7538148164749146, "learning_rate": 1.4824388939602056e-05, "loss": 1.3025, "step": 6939 }, { "epoch": 0.37942675942757953, "grad_norm": 1.3828380107879639, "learning_rate": 1.482278822267016e-05, "loss": 1.5614, "step": 6940 }, { "epoch": 0.37948143187130107, "grad_norm": 1.565063238143921, "learning_rate": 1.4821187344694043e-05, "loss": 1.5242, "step": 6941 }, { "epoch": 0.3795361043150226, "grad_norm": 2.410468339920044, "learning_rate": 1.4819586305727169e-05, "loss": 1.6073, "step": 6942 }, { "epoch": 0.3795907767587442, "grad_norm": 1.233491063117981, "learning_rate": 1.4817985105823003e-05, "loss": 1.7226, "step": 6943 }, { "epoch": 0.37964544920246573, "grad_norm": 1.4823050498962402, "learning_rate": 1.4816383745035006e-05, "loss": 1.3802, "step": 6944 }, { "epoch": 0.37970012164618727, "grad_norm": 1.6974034309387207, "learning_rate": 1.4814782223416653e-05, "loss": 1.0562, "step": 6945 }, { "epoch": 0.3797547940899088, "grad_norm": 1.3442387580871582, "learning_rate": 1.4813180541021425e-05, "loss": 1.4864, "step": 6946 }, { "epoch": 0.3798094665336304, "grad_norm": 1.150368332862854, "learning_rate": 1.4811578697902802e-05, "loss": 1.5256, "step": 6947 }, { "epoch": 0.37986413897735194, "grad_norm": 1.8391051292419434, "learning_rate": 1.4809976694114276e-05, "loss": 1.3568, "step": 6948 }, { "epoch": 0.3799188114210735, "grad_norm": 1.4226586818695068, "learning_rate": 1.4808374529709344e-05, "loss": 1.5605, "step": 6949 }, { "epoch": 0.37997348386479507, "grad_norm": 1.2111799716949463, "learning_rate": 1.4806772204741503e-05, "loss": 1.4175, "step": 6950 }, { "epoch": 0.3800281563085166, "grad_norm": 1.6492512226104736, "learning_rate": 1.4805169719264255e-05, "loss": 1.3557, "step": 6951 }, { "epoch": 0.38008282875223814, "grad_norm": 1.1637623310089111, "learning_rate": 1.4803567073331115e-05, "loss": 1.4539, "step": 6952 }, { "epoch": 0.3801375011959597, "grad_norm": 1.6401290893554688, "learning_rate": 1.4801964266995601e-05, "loss": 1.4516, "step": 6953 }, { "epoch": 0.3801921736396813, "grad_norm": 1.3789337873458862, "learning_rate": 1.480036130031123e-05, "loss": 1.3973, "step": 6954 }, { "epoch": 0.3802468460834028, "grad_norm": 1.2848509550094604, "learning_rate": 1.4798758173331528e-05, "loss": 1.3738, "step": 6955 }, { "epoch": 0.38030151852712435, "grad_norm": 1.420736312866211, "learning_rate": 1.4797154886110037e-05, "loss": 1.6177, "step": 6956 }, { "epoch": 0.38035619097084594, "grad_norm": 1.4986472129821777, "learning_rate": 1.4795551438700283e-05, "loss": 1.5422, "step": 6957 }, { "epoch": 0.3804108634145675, "grad_norm": 1.2151336669921875, "learning_rate": 1.4793947831155815e-05, "loss": 1.6101, "step": 6958 }, { "epoch": 0.380465535858289, "grad_norm": 1.9038573503494263, "learning_rate": 1.4792344063530177e-05, "loss": 1.4392, "step": 6959 }, { "epoch": 0.38052020830201055, "grad_norm": 1.7400602102279663, "learning_rate": 1.4790740135876929e-05, "loss": 1.3068, "step": 6960 }, { "epoch": 0.38057488074573215, "grad_norm": 1.5331166982650757, "learning_rate": 1.4789136048249621e-05, "loss": 1.6912, "step": 6961 }, { "epoch": 0.3806295531894537, "grad_norm": 1.5081557035446167, "learning_rate": 1.4787531800701826e-05, "loss": 1.2702, "step": 6962 }, { "epoch": 0.3806842256331752, "grad_norm": 1.1130917072296143, "learning_rate": 1.478592739328711e-05, "loss": 1.6535, "step": 6963 }, { "epoch": 0.3807388980768968, "grad_norm": 1.151685118675232, "learning_rate": 1.4784322826059048e-05, "loss": 1.6053, "step": 6964 }, { "epoch": 0.38079357052061835, "grad_norm": 1.5961358547210693, "learning_rate": 1.4782718099071219e-05, "loss": 1.3225, "step": 6965 }, { "epoch": 0.3808482429643399, "grad_norm": 1.5404725074768066, "learning_rate": 1.4781113212377207e-05, "loss": 1.3934, "step": 6966 }, { "epoch": 0.3809029154080614, "grad_norm": 1.6217056512832642, "learning_rate": 1.4779508166030609e-05, "loss": 1.4259, "step": 6967 }, { "epoch": 0.380957587851783, "grad_norm": 1.556835412979126, "learning_rate": 1.4777902960085017e-05, "loss": 1.5155, "step": 6968 }, { "epoch": 0.38101226029550456, "grad_norm": 2.0222973823547363, "learning_rate": 1.4776297594594033e-05, "loss": 1.4087, "step": 6969 }, { "epoch": 0.3810669327392261, "grad_norm": 1.4442055225372314, "learning_rate": 1.4774692069611267e-05, "loss": 1.5014, "step": 6970 }, { "epoch": 0.3811216051829477, "grad_norm": 1.5270031690597534, "learning_rate": 1.4773086385190328e-05, "loss": 1.1752, "step": 6971 }, { "epoch": 0.3811762776266692, "grad_norm": 1.6630313396453857, "learning_rate": 1.4771480541384831e-05, "loss": 1.2942, "step": 6972 }, { "epoch": 0.38123095007039076, "grad_norm": 1.5053389072418213, "learning_rate": 1.4769874538248404e-05, "loss": 1.4642, "step": 6973 }, { "epoch": 0.3812856225141123, "grad_norm": 1.4408389329910278, "learning_rate": 1.4768268375834673e-05, "loss": 1.4301, "step": 6974 }, { "epoch": 0.3813402949578339, "grad_norm": 1.4516648054122925, "learning_rate": 1.476666205419727e-05, "loss": 1.4259, "step": 6975 }, { "epoch": 0.38139496740155543, "grad_norm": 1.4620388746261597, "learning_rate": 1.476505557338984e-05, "loss": 1.5154, "step": 6976 }, { "epoch": 0.38144963984527697, "grad_norm": 1.7041213512420654, "learning_rate": 1.4763448933466018e-05, "loss": 1.4342, "step": 6977 }, { "epoch": 0.38150431228899856, "grad_norm": 1.2629578113555908, "learning_rate": 1.4761842134479463e-05, "loss": 1.5275, "step": 6978 }, { "epoch": 0.3815589847327201, "grad_norm": 1.4763975143432617, "learning_rate": 1.4760235176483821e-05, "loss": 1.4786, "step": 6979 }, { "epoch": 0.38161365717644163, "grad_norm": 1.559616208076477, "learning_rate": 1.475862805953276e-05, "loss": 1.6523, "step": 6980 }, { "epoch": 0.38166832962016317, "grad_norm": 1.54331636428833, "learning_rate": 1.475702078367994e-05, "loss": 1.717, "step": 6981 }, { "epoch": 0.38172300206388476, "grad_norm": 1.599038004875183, "learning_rate": 1.4755413348979034e-05, "loss": 1.1129, "step": 6982 }, { "epoch": 0.3817776745076063, "grad_norm": 1.490925669670105, "learning_rate": 1.4753805755483717e-05, "loss": 1.4333, "step": 6983 }, { "epoch": 0.38183234695132784, "grad_norm": 1.383884072303772, "learning_rate": 1.4752198003247669e-05, "loss": 1.3768, "step": 6984 }, { "epoch": 0.38188701939504943, "grad_norm": 1.2559796571731567, "learning_rate": 1.4750590092324579e-05, "loss": 1.537, "step": 6985 }, { "epoch": 0.38194169183877097, "grad_norm": 1.6892789602279663, "learning_rate": 1.4748982022768139e-05, "loss": 1.2564, "step": 6986 }, { "epoch": 0.3819963642824925, "grad_norm": 1.4565718173980713, "learning_rate": 1.4747373794632043e-05, "loss": 1.3015, "step": 6987 }, { "epoch": 0.38205103672621404, "grad_norm": 1.6970053911209106, "learning_rate": 1.474576540797e-05, "loss": 1.4513, "step": 6988 }, { "epoch": 0.38210570916993564, "grad_norm": 1.1059081554412842, "learning_rate": 1.4744156862835712e-05, "loss": 1.7575, "step": 6989 }, { "epoch": 0.3821603816136572, "grad_norm": 1.2496553659439087, "learning_rate": 1.4742548159282892e-05, "loss": 1.487, "step": 6990 }, { "epoch": 0.3822150540573787, "grad_norm": 1.6889461278915405, "learning_rate": 1.4740939297365261e-05, "loss": 1.766, "step": 6991 }, { "epoch": 0.3822697265011003, "grad_norm": 1.6929646730422974, "learning_rate": 1.4739330277136546e-05, "loss": 1.4087, "step": 6992 }, { "epoch": 0.38232439894482184, "grad_norm": 1.3834930658340454, "learning_rate": 1.4737721098650468e-05, "loss": 1.1389, "step": 6993 }, { "epoch": 0.3823790713885434, "grad_norm": 1.5489641427993774, "learning_rate": 1.4736111761960766e-05, "loss": 1.4303, "step": 6994 }, { "epoch": 0.3824337438322649, "grad_norm": 1.8082008361816406, "learning_rate": 1.4734502267121177e-05, "loss": 1.423, "step": 6995 }, { "epoch": 0.3824884162759865, "grad_norm": 1.377840280532837, "learning_rate": 1.473289261418545e-05, "loss": 1.4074, "step": 6996 }, { "epoch": 0.38254308871970805, "grad_norm": 1.4587019681930542, "learning_rate": 1.473128280320733e-05, "loss": 1.4368, "step": 6997 }, { "epoch": 0.3825977611634296, "grad_norm": 1.9123421907424927, "learning_rate": 1.4729672834240575e-05, "loss": 1.1007, "step": 6998 }, { "epoch": 0.3826524336071512, "grad_norm": 1.6484402418136597, "learning_rate": 1.4728062707338949e-05, "loss": 1.3585, "step": 6999 }, { "epoch": 0.3827071060508727, "grad_norm": 1.9690696001052856, "learning_rate": 1.4726452422556212e-05, "loss": 1.3571, "step": 7000 }, { "epoch": 0.38276177849459425, "grad_norm": 1.4387526512145996, "learning_rate": 1.4724841979946139e-05, "loss": 1.2395, "step": 7001 }, { "epoch": 0.3828164509383158, "grad_norm": 1.4908658266067505, "learning_rate": 1.4723231379562504e-05, "loss": 1.4946, "step": 7002 }, { "epoch": 0.3828711233820374, "grad_norm": 1.477999210357666, "learning_rate": 1.472162062145909e-05, "loss": 1.408, "step": 7003 }, { "epoch": 0.3829257958257589, "grad_norm": 1.720225214958191, "learning_rate": 1.4720009705689682e-05, "loss": 1.4415, "step": 7004 }, { "epoch": 0.38298046826948046, "grad_norm": 1.5251449346542358, "learning_rate": 1.4718398632308075e-05, "loss": 1.5478, "step": 7005 }, { "epoch": 0.38303514071320205, "grad_norm": 1.2123388051986694, "learning_rate": 1.4716787401368067e-05, "loss": 1.5027, "step": 7006 }, { "epoch": 0.3830898131569236, "grad_norm": 1.5114853382110596, "learning_rate": 1.4715176012923458e-05, "loss": 1.3635, "step": 7007 }, { "epoch": 0.3831444856006451, "grad_norm": 1.902620792388916, "learning_rate": 1.471356446702806e-05, "loss": 1.3346, "step": 7008 }, { "epoch": 0.38319915804436666, "grad_norm": 1.8479610681533813, "learning_rate": 1.4711952763735683e-05, "loss": 1.2179, "step": 7009 }, { "epoch": 0.38325383048808825, "grad_norm": 1.2482352256774902, "learning_rate": 1.4710340903100145e-05, "loss": 1.5903, "step": 7010 }, { "epoch": 0.3833085029318098, "grad_norm": 1.4387037754058838, "learning_rate": 1.470872888517527e-05, "loss": 1.5934, "step": 7011 }, { "epoch": 0.38336317537553133, "grad_norm": 1.5278890132904053, "learning_rate": 1.4707116710014887e-05, "loss": 1.6882, "step": 7012 }, { "epoch": 0.3834178478192529, "grad_norm": 1.2074068784713745, "learning_rate": 1.4705504377672834e-05, "loss": 1.4515, "step": 7013 }, { "epoch": 0.38347252026297446, "grad_norm": 1.4592180252075195, "learning_rate": 1.4703891888202948e-05, "loss": 1.4036, "step": 7014 }, { "epoch": 0.383527192706696, "grad_norm": 2.001567840576172, "learning_rate": 1.4702279241659075e-05, "loss": 1.2724, "step": 7015 }, { "epoch": 0.38358186515041753, "grad_norm": 1.4115698337554932, "learning_rate": 1.4700666438095064e-05, "loss": 1.2391, "step": 7016 }, { "epoch": 0.3836365375941391, "grad_norm": 1.5410726070404053, "learning_rate": 1.4699053477564768e-05, "loss": 1.6301, "step": 7017 }, { "epoch": 0.38369121003786066, "grad_norm": 1.7049261331558228, "learning_rate": 1.4697440360122048e-05, "loss": 1.3777, "step": 7018 }, { "epoch": 0.3837458824815822, "grad_norm": 1.5354143381118774, "learning_rate": 1.4695827085820775e-05, "loss": 1.5461, "step": 7019 }, { "epoch": 0.3838005549253038, "grad_norm": 1.649013876914978, "learning_rate": 1.4694213654714816e-05, "loss": 1.3179, "step": 7020 }, { "epoch": 0.38385522736902533, "grad_norm": 1.403260350227356, "learning_rate": 1.4692600066858048e-05, "loss": 1.311, "step": 7021 }, { "epoch": 0.38390989981274687, "grad_norm": 1.4008238315582275, "learning_rate": 1.469098632230435e-05, "loss": 1.4699, "step": 7022 }, { "epoch": 0.3839645722564684, "grad_norm": 1.374402403831482, "learning_rate": 1.4689372421107612e-05, "loss": 1.5971, "step": 7023 }, { "epoch": 0.38401924470019, "grad_norm": 1.8999525308609009, "learning_rate": 1.4687758363321725e-05, "loss": 1.1305, "step": 7024 }, { "epoch": 0.38407391714391154, "grad_norm": 1.4144772291183472, "learning_rate": 1.4686144149000585e-05, "loss": 1.5833, "step": 7025 }, { "epoch": 0.3841285895876331, "grad_norm": 1.7290245294570923, "learning_rate": 1.4684529778198097e-05, "loss": 1.4087, "step": 7026 }, { "epoch": 0.38418326203135467, "grad_norm": 1.7425222396850586, "learning_rate": 1.4682915250968169e-05, "loss": 1.2458, "step": 7027 }, { "epoch": 0.3842379344750762, "grad_norm": 1.6337246894836426, "learning_rate": 1.468130056736471e-05, "loss": 1.3195, "step": 7028 }, { "epoch": 0.38429260691879774, "grad_norm": 1.2080398797988892, "learning_rate": 1.467968572744164e-05, "loss": 1.3815, "step": 7029 }, { "epoch": 0.3843472793625193, "grad_norm": 1.351986050605774, "learning_rate": 1.4678070731252883e-05, "loss": 1.307, "step": 7030 }, { "epoch": 0.38440195180624087, "grad_norm": 1.6175721883773804, "learning_rate": 1.4676455578852365e-05, "loss": 1.4154, "step": 7031 }, { "epoch": 0.3844566242499624, "grad_norm": 1.817962646484375, "learning_rate": 1.4674840270294022e-05, "loss": 1.5072, "step": 7032 }, { "epoch": 0.38451129669368395, "grad_norm": 1.602249264717102, "learning_rate": 1.4673224805631792e-05, "loss": 1.4936, "step": 7033 }, { "epoch": 0.38456596913740554, "grad_norm": 1.7110998630523682, "learning_rate": 1.4671609184919622e-05, "loss": 1.4152, "step": 7034 }, { "epoch": 0.3846206415811271, "grad_norm": 1.4349024295806885, "learning_rate": 1.4669993408211458e-05, "loss": 1.2498, "step": 7035 }, { "epoch": 0.3846753140248486, "grad_norm": 1.9441996812820435, "learning_rate": 1.4668377475561255e-05, "loss": 1.1862, "step": 7036 }, { "epoch": 0.3847299864685702, "grad_norm": 1.098013997077942, "learning_rate": 1.4666761387022974e-05, "loss": 1.7064, "step": 7037 }, { "epoch": 0.38478465891229174, "grad_norm": 1.544616937637329, "learning_rate": 1.4665145142650578e-05, "loss": 1.49, "step": 7038 }, { "epoch": 0.3848393313560133, "grad_norm": 1.7517985105514526, "learning_rate": 1.466352874249804e-05, "loss": 1.2843, "step": 7039 }, { "epoch": 0.3848940037997348, "grad_norm": 1.1183700561523438, "learning_rate": 1.4661912186619336e-05, "loss": 1.5771, "step": 7040 }, { "epoch": 0.3849486762434564, "grad_norm": 1.478485107421875, "learning_rate": 1.4660295475068443e-05, "loss": 1.3726, "step": 7041 }, { "epoch": 0.38500334868717795, "grad_norm": 1.6378421783447266, "learning_rate": 1.4658678607899348e-05, "loss": 1.3776, "step": 7042 }, { "epoch": 0.3850580211308995, "grad_norm": 1.9373505115509033, "learning_rate": 1.465706158516604e-05, "loss": 1.3128, "step": 7043 }, { "epoch": 0.3851126935746211, "grad_norm": 1.3090425729751587, "learning_rate": 1.4655444406922521e-05, "loss": 1.4212, "step": 7044 }, { "epoch": 0.3851673660183426, "grad_norm": 1.3769898414611816, "learning_rate": 1.4653827073222785e-05, "loss": 1.5905, "step": 7045 }, { "epoch": 0.38522203846206415, "grad_norm": 1.7511379718780518, "learning_rate": 1.4652209584120847e-05, "loss": 1.1907, "step": 7046 }, { "epoch": 0.3852767109057857, "grad_norm": 1.835775375366211, "learning_rate": 1.4650591939670713e-05, "loss": 1.4913, "step": 7047 }, { "epoch": 0.3853313833495073, "grad_norm": 1.473755955696106, "learning_rate": 1.4648974139926403e-05, "loss": 1.5938, "step": 7048 }, { "epoch": 0.3853860557932288, "grad_norm": 1.5363173484802246, "learning_rate": 1.4647356184941932e-05, "loss": 1.4338, "step": 7049 }, { "epoch": 0.38544072823695036, "grad_norm": 1.644144892692566, "learning_rate": 1.4645738074771334e-05, "loss": 1.4471, "step": 7050 }, { "epoch": 0.38549540068067195, "grad_norm": 1.6402273178100586, "learning_rate": 1.4644119809468645e-05, "loss": 1.4787, "step": 7051 }, { "epoch": 0.3855500731243935, "grad_norm": 1.1476978063583374, "learning_rate": 1.4642501389087891e-05, "loss": 1.3805, "step": 7052 }, { "epoch": 0.385604745568115, "grad_norm": 1.2824922800064087, "learning_rate": 1.4640882813683125e-05, "loss": 1.3529, "step": 7053 }, { "epoch": 0.38565941801183656, "grad_norm": 1.4816651344299316, "learning_rate": 1.4639264083308393e-05, "loss": 1.4905, "step": 7054 }, { "epoch": 0.38571409045555816, "grad_norm": 1.7024691104888916, "learning_rate": 1.4637645198017745e-05, "loss": 1.2617, "step": 7055 }, { "epoch": 0.3857687628992797, "grad_norm": 1.639458179473877, "learning_rate": 1.4636026157865242e-05, "loss": 1.3955, "step": 7056 }, { "epoch": 0.38582343534300123, "grad_norm": 1.4308854341506958, "learning_rate": 1.4634406962904945e-05, "loss": 1.324, "step": 7057 }, { "epoch": 0.3858781077867228, "grad_norm": 1.6781479120254517, "learning_rate": 1.4632787613190928e-05, "loss": 1.3679, "step": 7058 }, { "epoch": 0.38593278023044436, "grad_norm": 1.5493881702423096, "learning_rate": 1.463116810877726e-05, "loss": 1.4179, "step": 7059 }, { "epoch": 0.3859874526741659, "grad_norm": 1.5266143083572388, "learning_rate": 1.462954844971802e-05, "loss": 1.4822, "step": 7060 }, { "epoch": 0.38604212511788744, "grad_norm": 1.83003830909729, "learning_rate": 1.4627928636067295e-05, "loss": 1.229, "step": 7061 }, { "epoch": 0.38609679756160903, "grad_norm": 1.6475597620010376, "learning_rate": 1.4626308667879175e-05, "loss": 1.3326, "step": 7062 }, { "epoch": 0.38615147000533057, "grad_norm": 1.2984222173690796, "learning_rate": 1.4624688545207749e-05, "loss": 1.5068, "step": 7063 }, { "epoch": 0.3862061424490521, "grad_norm": 2.0594024658203125, "learning_rate": 1.4623068268107119e-05, "loss": 1.6589, "step": 7064 }, { "epoch": 0.3862608148927737, "grad_norm": 1.412103295326233, "learning_rate": 1.4621447836631395e-05, "loss": 1.3689, "step": 7065 }, { "epoch": 0.38631548733649523, "grad_norm": 1.536595106124878, "learning_rate": 1.4619827250834681e-05, "loss": 1.1374, "step": 7066 }, { "epoch": 0.38637015978021677, "grad_norm": 1.5544451475143433, "learning_rate": 1.4618206510771097e-05, "loss": 1.2884, "step": 7067 }, { "epoch": 0.3864248322239383, "grad_norm": 1.3927894830703735, "learning_rate": 1.4616585616494759e-05, "loss": 1.2387, "step": 7068 }, { "epoch": 0.3864795046676599, "grad_norm": 1.3402496576309204, "learning_rate": 1.4614964568059795e-05, "loss": 1.5243, "step": 7069 }, { "epoch": 0.38653417711138144, "grad_norm": 1.5228257179260254, "learning_rate": 1.4613343365520333e-05, "loss": 1.1601, "step": 7070 }, { "epoch": 0.386588849555103, "grad_norm": 1.3701176643371582, "learning_rate": 1.4611722008930512e-05, "loss": 1.3966, "step": 7071 }, { "epoch": 0.38664352199882457, "grad_norm": 1.434477686882019, "learning_rate": 1.4610100498344471e-05, "loss": 1.3625, "step": 7072 }, { "epoch": 0.3866981944425461, "grad_norm": 1.3282755613327026, "learning_rate": 1.4608478833816356e-05, "loss": 1.4923, "step": 7073 }, { "epoch": 0.38675286688626764, "grad_norm": 1.7400023937225342, "learning_rate": 1.4606857015400317e-05, "loss": 1.4542, "step": 7074 }, { "epoch": 0.3868075393299892, "grad_norm": 1.4609184265136719, "learning_rate": 1.4605235043150514e-05, "loss": 1.7852, "step": 7075 }, { "epoch": 0.3868622117737108, "grad_norm": 1.2542262077331543, "learning_rate": 1.4603612917121107e-05, "loss": 1.3627, "step": 7076 }, { "epoch": 0.3869168842174323, "grad_norm": 1.2960498332977295, "learning_rate": 1.460199063736626e-05, "loss": 1.3682, "step": 7077 }, { "epoch": 0.38697155666115385, "grad_norm": 1.3270831108093262, "learning_rate": 1.4600368203940147e-05, "loss": 1.3332, "step": 7078 }, { "epoch": 0.38702622910487544, "grad_norm": 1.3325037956237793, "learning_rate": 1.4598745616896946e-05, "loss": 1.4517, "step": 7079 }, { "epoch": 0.387080901548597, "grad_norm": 1.2185032367706299, "learning_rate": 1.4597122876290839e-05, "loss": 1.5421, "step": 7080 }, { "epoch": 0.3871355739923185, "grad_norm": 1.250750184059143, "learning_rate": 1.4595499982176007e-05, "loss": 1.374, "step": 7081 }, { "epoch": 0.38719024643604005, "grad_norm": 1.4453123807907104, "learning_rate": 1.459387693460665e-05, "loss": 1.5418, "step": 7082 }, { "epoch": 0.38724491887976165, "grad_norm": 1.5576446056365967, "learning_rate": 1.4592253733636961e-05, "loss": 1.5469, "step": 7083 }, { "epoch": 0.3872995913234832, "grad_norm": 1.5909631252288818, "learning_rate": 1.4590630379321145e-05, "loss": 1.4429, "step": 7084 }, { "epoch": 0.3873542637672047, "grad_norm": 1.561903476715088, "learning_rate": 1.4589006871713407e-05, "loss": 1.3051, "step": 7085 }, { "epoch": 0.3874089362109263, "grad_norm": 1.480119228363037, "learning_rate": 1.4587383210867963e-05, "loss": 1.4736, "step": 7086 }, { "epoch": 0.38746360865464785, "grad_norm": 1.602303385734558, "learning_rate": 1.458575939683903e-05, "loss": 1.2866, "step": 7087 }, { "epoch": 0.3875182810983694, "grad_norm": 1.683087944984436, "learning_rate": 1.4584135429680826e-05, "loss": 1.4359, "step": 7088 }, { "epoch": 0.3875729535420909, "grad_norm": 1.6629360914230347, "learning_rate": 1.4582511309447585e-05, "loss": 1.4663, "step": 7089 }, { "epoch": 0.3876276259858125, "grad_norm": 1.4453859329223633, "learning_rate": 1.4580887036193539e-05, "loss": 1.3063, "step": 7090 }, { "epoch": 0.38768229842953406, "grad_norm": 1.582611322402954, "learning_rate": 1.4579262609972922e-05, "loss": 1.4811, "step": 7091 }, { "epoch": 0.3877369708732556, "grad_norm": 1.4744651317596436, "learning_rate": 1.4577638030839985e-05, "loss": 1.1257, "step": 7092 }, { "epoch": 0.3877916433169772, "grad_norm": 1.311450719833374, "learning_rate": 1.4576013298848971e-05, "loss": 1.3239, "step": 7093 }, { "epoch": 0.3878463157606987, "grad_norm": 1.9186785221099854, "learning_rate": 1.4574388414054134e-05, "loss": 1.2438, "step": 7094 }, { "epoch": 0.38790098820442026, "grad_norm": 1.431541085243225, "learning_rate": 1.4572763376509732e-05, "loss": 1.5526, "step": 7095 }, { "epoch": 0.3879556606481418, "grad_norm": 1.5429840087890625, "learning_rate": 1.4571138186270037e-05, "loss": 1.261, "step": 7096 }, { "epoch": 0.3880103330918634, "grad_norm": 1.3353934288024902, "learning_rate": 1.4569512843389306e-05, "loss": 1.4719, "step": 7097 }, { "epoch": 0.38806500553558493, "grad_norm": 1.643576979637146, "learning_rate": 1.4567887347921818e-05, "loss": 1.2582, "step": 7098 }, { "epoch": 0.38811967797930647, "grad_norm": 1.3848867416381836, "learning_rate": 1.4566261699921857e-05, "loss": 1.5381, "step": 7099 }, { "epoch": 0.38817435042302806, "grad_norm": 1.6623506546020508, "learning_rate": 1.4564635899443702e-05, "loss": 1.3701, "step": 7100 }, { "epoch": 0.3882290228667496, "grad_norm": 1.6573834419250488, "learning_rate": 1.456300994654164e-05, "loss": 1.4167, "step": 7101 }, { "epoch": 0.38828369531047113, "grad_norm": 1.5060263872146606, "learning_rate": 1.4561383841269967e-05, "loss": 1.516, "step": 7102 }, { "epoch": 0.38833836775419267, "grad_norm": 1.6007335186004639, "learning_rate": 1.4559757583682989e-05, "loss": 1.467, "step": 7103 }, { "epoch": 0.38839304019791426, "grad_norm": 1.2903188467025757, "learning_rate": 1.4558131173835002e-05, "loss": 1.6232, "step": 7104 }, { "epoch": 0.3884477126416358, "grad_norm": 1.2866467237472534, "learning_rate": 1.455650461178032e-05, "loss": 1.4856, "step": 7105 }, { "epoch": 0.38850238508535734, "grad_norm": 2.0301718711853027, "learning_rate": 1.4554877897573259e-05, "loss": 1.2088, "step": 7106 }, { "epoch": 0.38855705752907893, "grad_norm": 1.3366457223892212, "learning_rate": 1.4553251031268134e-05, "loss": 1.4023, "step": 7107 }, { "epoch": 0.38861172997280047, "grad_norm": 1.7512214183807373, "learning_rate": 1.4551624012919274e-05, "loss": 1.4284, "step": 7108 }, { "epoch": 0.388666402416522, "grad_norm": 1.9382193088531494, "learning_rate": 1.4549996842581005e-05, "loss": 1.4571, "step": 7109 }, { "epoch": 0.38872107486024354, "grad_norm": 1.5681654214859009, "learning_rate": 1.4548369520307669e-05, "loss": 1.7236, "step": 7110 }, { "epoch": 0.38877574730396514, "grad_norm": 1.347062587738037, "learning_rate": 1.4546742046153596e-05, "loss": 1.4986, "step": 7111 }, { "epoch": 0.3888304197476867, "grad_norm": 1.117882490158081, "learning_rate": 1.454511442017314e-05, "loss": 1.5748, "step": 7112 }, { "epoch": 0.3888850921914082, "grad_norm": 1.617596983909607, "learning_rate": 1.4543486642420647e-05, "loss": 1.5914, "step": 7113 }, { "epoch": 0.3889397646351298, "grad_norm": 1.459242343902588, "learning_rate": 1.4541858712950477e-05, "loss": 1.2393, "step": 7114 }, { "epoch": 0.38899443707885134, "grad_norm": 1.2703009843826294, "learning_rate": 1.4540230631816984e-05, "loss": 1.198, "step": 7115 }, { "epoch": 0.3890491095225729, "grad_norm": 1.7866207361221313, "learning_rate": 1.4538602399074532e-05, "loss": 1.4794, "step": 7116 }, { "epoch": 0.3891037819662944, "grad_norm": 1.745490312576294, "learning_rate": 1.4536974014777503e-05, "loss": 1.4083, "step": 7117 }, { "epoch": 0.389158454410016, "grad_norm": 1.1500906944274902, "learning_rate": 1.453534547898026e-05, "loss": 1.435, "step": 7118 }, { "epoch": 0.38921312685373755, "grad_norm": 1.7120230197906494, "learning_rate": 1.4533716791737193e-05, "loss": 1.3918, "step": 7119 }, { "epoch": 0.3892677992974591, "grad_norm": 2.063520669937134, "learning_rate": 1.453208795310268e-05, "loss": 1.1903, "step": 7120 }, { "epoch": 0.3893224717411807, "grad_norm": 1.450769066810608, "learning_rate": 1.453045896313112e-05, "loss": 1.2259, "step": 7121 }, { "epoch": 0.3893771441849022, "grad_norm": 1.2532440423965454, "learning_rate": 1.45288298218769e-05, "loss": 1.4245, "step": 7122 }, { "epoch": 0.38943181662862375, "grad_norm": 1.3421660661697388, "learning_rate": 1.4527200529394425e-05, "loss": 1.4454, "step": 7123 }, { "epoch": 0.3894864890723453, "grad_norm": 1.3610262870788574, "learning_rate": 1.4525571085738104e-05, "loss": 1.2737, "step": 7124 }, { "epoch": 0.3895411615160669, "grad_norm": 1.6400121450424194, "learning_rate": 1.4523941490962342e-05, "loss": 1.4249, "step": 7125 }, { "epoch": 0.3895958339597884, "grad_norm": 1.5342433452606201, "learning_rate": 1.452231174512156e-05, "loss": 1.5308, "step": 7126 }, { "epoch": 0.38965050640350996, "grad_norm": 1.6571372747421265, "learning_rate": 1.4520681848270176e-05, "loss": 1.4846, "step": 7127 }, { "epoch": 0.38970517884723155, "grad_norm": 1.5560314655303955, "learning_rate": 1.4519051800462617e-05, "loss": 1.5849, "step": 7128 }, { "epoch": 0.3897598512909531, "grad_norm": 1.5599147081375122, "learning_rate": 1.4517421601753312e-05, "loss": 1.3306, "step": 7129 }, { "epoch": 0.3898145237346746, "grad_norm": 1.6038554906845093, "learning_rate": 1.45157912521967e-05, "loss": 1.3, "step": 7130 }, { "epoch": 0.38986919617839616, "grad_norm": 1.7748311758041382, "learning_rate": 1.4514160751847226e-05, "loss": 1.499, "step": 7131 }, { "epoch": 0.38992386862211775, "grad_norm": 1.4241167306900024, "learning_rate": 1.451253010075933e-05, "loss": 1.5998, "step": 7132 }, { "epoch": 0.3899785410658393, "grad_norm": 1.4972634315490723, "learning_rate": 1.4510899298987463e-05, "loss": 1.4863, "step": 7133 }, { "epoch": 0.3900332135095608, "grad_norm": 1.8526533842086792, "learning_rate": 1.4509268346586081e-05, "loss": 1.5521, "step": 7134 }, { "epoch": 0.3900878859532824, "grad_norm": 1.4252476692199707, "learning_rate": 1.4507637243609651e-05, "loss": 1.5277, "step": 7135 }, { "epoch": 0.39014255839700396, "grad_norm": 1.4069335460662842, "learning_rate": 1.4506005990112635e-05, "loss": 1.4492, "step": 7136 }, { "epoch": 0.3901972308407255, "grad_norm": 1.4244297742843628, "learning_rate": 1.4504374586149503e-05, "loss": 1.6288, "step": 7137 }, { "epoch": 0.39025190328444703, "grad_norm": 1.4347201585769653, "learning_rate": 1.4502743031774737e-05, "loss": 1.5426, "step": 7138 }, { "epoch": 0.3903065757281686, "grad_norm": 2.268920421600342, "learning_rate": 1.4501111327042817e-05, "loss": 1.4539, "step": 7139 }, { "epoch": 0.39036124817189016, "grad_norm": 1.737056851387024, "learning_rate": 1.4499479472008222e-05, "loss": 1.2909, "step": 7140 }, { "epoch": 0.3904159206156117, "grad_norm": 1.5299968719482422, "learning_rate": 1.4497847466725453e-05, "loss": 1.3965, "step": 7141 }, { "epoch": 0.3904705930593333, "grad_norm": 1.5113334655761719, "learning_rate": 1.4496215311249002e-05, "loss": 1.2578, "step": 7142 }, { "epoch": 0.39052526550305483, "grad_norm": 1.3641111850738525, "learning_rate": 1.4494583005633369e-05, "loss": 1.4312, "step": 7143 }, { "epoch": 0.39057993794677637, "grad_norm": 1.6365160942077637, "learning_rate": 1.4492950549933063e-05, "loss": 1.2641, "step": 7144 }, { "epoch": 0.3906346103904979, "grad_norm": 1.1630258560180664, "learning_rate": 1.4491317944202598e-05, "loss": 1.6887, "step": 7145 }, { "epoch": 0.3906892828342195, "grad_norm": 1.4165220260620117, "learning_rate": 1.4489685188496488e-05, "loss": 1.5333, "step": 7146 }, { "epoch": 0.39074395527794104, "grad_norm": 1.6581774950027466, "learning_rate": 1.448805228286925e-05, "loss": 1.287, "step": 7147 }, { "epoch": 0.3907986277216626, "grad_norm": 1.2602193355560303, "learning_rate": 1.4486419227375415e-05, "loss": 1.5982, "step": 7148 }, { "epoch": 0.39085330016538417, "grad_norm": 1.472606897354126, "learning_rate": 1.4484786022069517e-05, "loss": 1.3596, "step": 7149 }, { "epoch": 0.3909079726091057, "grad_norm": 1.5021356344223022, "learning_rate": 1.4483152667006088e-05, "loss": 1.5802, "step": 7150 }, { "epoch": 0.39096264505282724, "grad_norm": 2.100522518157959, "learning_rate": 1.4481519162239675e-05, "loss": 1.4516, "step": 7151 }, { "epoch": 0.3910173174965488, "grad_norm": 1.3329191207885742, "learning_rate": 1.4479885507824818e-05, "loss": 1.5458, "step": 7152 }, { "epoch": 0.39107198994027037, "grad_norm": 1.2672134637832642, "learning_rate": 1.447825170381607e-05, "loss": 1.7411, "step": 7153 }, { "epoch": 0.3911266623839919, "grad_norm": 1.4710332155227661, "learning_rate": 1.4476617750267991e-05, "loss": 1.2364, "step": 7154 }, { "epoch": 0.39118133482771345, "grad_norm": 1.3788866996765137, "learning_rate": 1.447498364723514e-05, "loss": 1.4162, "step": 7155 }, { "epoch": 0.39123600727143504, "grad_norm": 1.3083665370941162, "learning_rate": 1.4473349394772085e-05, "loss": 1.5633, "step": 7156 }, { "epoch": 0.3912906797151566, "grad_norm": 1.656556487083435, "learning_rate": 1.4471714992933397e-05, "loss": 1.6353, "step": 7157 }, { "epoch": 0.3913453521588781, "grad_norm": 1.4187761545181274, "learning_rate": 1.4470080441773651e-05, "loss": 1.5139, "step": 7158 }, { "epoch": 0.39140002460259965, "grad_norm": 1.5856164693832397, "learning_rate": 1.4468445741347432e-05, "loss": 1.4233, "step": 7159 }, { "epoch": 0.39145469704632124, "grad_norm": 1.5193908214569092, "learning_rate": 1.446681089170932e-05, "loss": 1.3934, "step": 7160 }, { "epoch": 0.3915093694900428, "grad_norm": 1.3380786180496216, "learning_rate": 1.4465175892913915e-05, "loss": 1.4244, "step": 7161 }, { "epoch": 0.3915640419337643, "grad_norm": 1.7380751371383667, "learning_rate": 1.4463540745015805e-05, "loss": 1.6046, "step": 7162 }, { "epoch": 0.3916187143774859, "grad_norm": 3.338465929031372, "learning_rate": 1.4461905448069597e-05, "loss": 1.4408, "step": 7163 }, { "epoch": 0.39167338682120745, "grad_norm": 1.50246000289917, "learning_rate": 1.4460270002129897e-05, "loss": 1.5696, "step": 7164 }, { "epoch": 0.391728059264929, "grad_norm": 1.1698991060256958, "learning_rate": 1.4458634407251315e-05, "loss": 1.2892, "step": 7165 }, { "epoch": 0.3917827317086505, "grad_norm": 1.731502652168274, "learning_rate": 1.4456998663488468e-05, "loss": 1.523, "step": 7166 }, { "epoch": 0.3918374041523721, "grad_norm": 1.386374831199646, "learning_rate": 1.4455362770895976e-05, "loss": 1.4695, "step": 7167 }, { "epoch": 0.39189207659609365, "grad_norm": 1.7415493726730347, "learning_rate": 1.4453726729528466e-05, "loss": 1.4602, "step": 7168 }, { "epoch": 0.3919467490398152, "grad_norm": 1.5063096284866333, "learning_rate": 1.4452090539440569e-05, "loss": 1.4808, "step": 7169 }, { "epoch": 0.3920014214835368, "grad_norm": 1.414181113243103, "learning_rate": 1.4450454200686922e-05, "loss": 1.2166, "step": 7170 }, { "epoch": 0.3920560939272583, "grad_norm": 1.5821038484573364, "learning_rate": 1.4448817713322169e-05, "loss": 1.45, "step": 7171 }, { "epoch": 0.39211076637097986, "grad_norm": 2.1899337768554688, "learning_rate": 1.4447181077400948e-05, "loss": 1.3132, "step": 7172 }, { "epoch": 0.3921654388147014, "grad_norm": 1.417356014251709, "learning_rate": 1.444554429297792e-05, "loss": 1.5643, "step": 7173 }, { "epoch": 0.392220111258423, "grad_norm": 1.6935081481933594, "learning_rate": 1.4443907360107734e-05, "loss": 1.3699, "step": 7174 }, { "epoch": 0.3922747837021445, "grad_norm": 1.5796736478805542, "learning_rate": 1.4442270278845052e-05, "loss": 1.5486, "step": 7175 }, { "epoch": 0.39232945614586606, "grad_norm": 1.3128029108047485, "learning_rate": 1.4440633049244541e-05, "loss": 1.3935, "step": 7176 }, { "epoch": 0.39238412858958766, "grad_norm": 1.3618589639663696, "learning_rate": 1.4438995671360875e-05, "loss": 1.4204, "step": 7177 }, { "epoch": 0.3924388010333092, "grad_norm": 1.4972034692764282, "learning_rate": 1.4437358145248727e-05, "loss": 1.3548, "step": 7178 }, { "epoch": 0.39249347347703073, "grad_norm": 1.5183120965957642, "learning_rate": 1.4435720470962778e-05, "loss": 1.5074, "step": 7179 }, { "epoch": 0.39254814592075227, "grad_norm": 1.3379759788513184, "learning_rate": 1.4434082648557712e-05, "loss": 1.3121, "step": 7180 }, { "epoch": 0.39260281836447386, "grad_norm": 1.1412409543991089, "learning_rate": 1.4432444678088222e-05, "loss": 1.5192, "step": 7181 }, { "epoch": 0.3926574908081954, "grad_norm": 1.4276502132415771, "learning_rate": 1.4430806559609e-05, "loss": 1.6281, "step": 7182 }, { "epoch": 0.39271216325191693, "grad_norm": 1.641216516494751, "learning_rate": 1.4429168293174756e-05, "loss": 1.5338, "step": 7183 }, { "epoch": 0.3927668356956385, "grad_norm": 1.7876802682876587, "learning_rate": 1.4427529878840184e-05, "loss": 1.3095, "step": 7184 }, { "epoch": 0.39282150813936006, "grad_norm": 1.358993411064148, "learning_rate": 1.4425891316660005e-05, "loss": 1.4583, "step": 7185 }, { "epoch": 0.3928761805830816, "grad_norm": 1.5616862773895264, "learning_rate": 1.4424252606688924e-05, "loss": 1.5939, "step": 7186 }, { "epoch": 0.39293085302680314, "grad_norm": 2.026440143585205, "learning_rate": 1.442261374898167e-05, "loss": 1.2061, "step": 7187 }, { "epoch": 0.39298552547052473, "grad_norm": 1.9477779865264893, "learning_rate": 1.4420974743592964e-05, "loss": 1.3879, "step": 7188 }, { "epoch": 0.39304019791424627, "grad_norm": 1.4549216032028198, "learning_rate": 1.4419335590577537e-05, "loss": 1.713, "step": 7189 }, { "epoch": 0.3930948703579678, "grad_norm": 1.6803961992263794, "learning_rate": 1.4417696289990127e-05, "loss": 1.2936, "step": 7190 }, { "epoch": 0.3931495428016894, "grad_norm": 1.7309823036193848, "learning_rate": 1.4416056841885469e-05, "loss": 1.4864, "step": 7191 }, { "epoch": 0.39320421524541094, "grad_norm": 1.3620638847351074, "learning_rate": 1.4414417246318308e-05, "loss": 1.4769, "step": 7192 }, { "epoch": 0.3932588876891325, "grad_norm": 1.5558685064315796, "learning_rate": 1.44127775033434e-05, "loss": 1.5946, "step": 7193 }, { "epoch": 0.393313560132854, "grad_norm": 1.2016615867614746, "learning_rate": 1.4411137613015496e-05, "loss": 1.6062, "step": 7194 }, { "epoch": 0.3933682325765756, "grad_norm": 1.7449626922607422, "learning_rate": 1.4409497575389352e-05, "loss": 1.2813, "step": 7195 }, { "epoch": 0.39342290502029714, "grad_norm": 1.1600341796875, "learning_rate": 1.440785739051974e-05, "loss": 1.556, "step": 7196 }, { "epoch": 0.3934775774640187, "grad_norm": 1.5114588737487793, "learning_rate": 1.4406217058461427e-05, "loss": 1.6071, "step": 7197 }, { "epoch": 0.3935322499077403, "grad_norm": 1.5570030212402344, "learning_rate": 1.4404576579269187e-05, "loss": 1.2271, "step": 7198 }, { "epoch": 0.3935869223514618, "grad_norm": 1.3373968601226807, "learning_rate": 1.4402935952997799e-05, "loss": 1.5214, "step": 7199 }, { "epoch": 0.39364159479518335, "grad_norm": 1.418100357055664, "learning_rate": 1.4401295179702046e-05, "loss": 1.519, "step": 7200 }, { "epoch": 0.3936962672389049, "grad_norm": 1.4316061735153198, "learning_rate": 1.4399654259436721e-05, "loss": 1.6328, "step": 7201 }, { "epoch": 0.3937509396826265, "grad_norm": 1.370835781097412, "learning_rate": 1.4398013192256615e-05, "loss": 1.5448, "step": 7202 }, { "epoch": 0.393805612126348, "grad_norm": 1.388464331626892, "learning_rate": 1.4396371978216528e-05, "loss": 1.3482, "step": 7203 }, { "epoch": 0.39386028457006955, "grad_norm": 1.203192949295044, "learning_rate": 1.4394730617371266e-05, "loss": 1.5855, "step": 7204 }, { "epoch": 0.39391495701379114, "grad_norm": 1.2626616954803467, "learning_rate": 1.4393089109775635e-05, "loss": 1.506, "step": 7205 }, { "epoch": 0.3939696294575127, "grad_norm": 1.3814144134521484, "learning_rate": 1.4391447455484448e-05, "loss": 1.5613, "step": 7206 }, { "epoch": 0.3940243019012342, "grad_norm": 1.1177022457122803, "learning_rate": 1.438980565455253e-05, "loss": 1.6335, "step": 7207 }, { "epoch": 0.39407897434495576, "grad_norm": 1.3937227725982666, "learning_rate": 1.4388163707034697e-05, "loss": 1.5699, "step": 7208 }, { "epoch": 0.39413364678867735, "grad_norm": 1.8049805164337158, "learning_rate": 1.438652161298578e-05, "loss": 1.5071, "step": 7209 }, { "epoch": 0.3941883192323989, "grad_norm": 1.4120172262191772, "learning_rate": 1.4384879372460617e-05, "loss": 1.481, "step": 7210 }, { "epoch": 0.3942429916761204, "grad_norm": 1.407534122467041, "learning_rate": 1.4383236985514037e-05, "loss": 1.4101, "step": 7211 }, { "epoch": 0.394297664119842, "grad_norm": 3.453523874282837, "learning_rate": 1.4381594452200894e-05, "loss": 1.5918, "step": 7212 }, { "epoch": 0.39435233656356355, "grad_norm": 1.5437120199203491, "learning_rate": 1.4379951772576024e-05, "loss": 1.3405, "step": 7213 }, { "epoch": 0.3944070090072851, "grad_norm": 1.5589998960494995, "learning_rate": 1.4378308946694291e-05, "loss": 1.1355, "step": 7214 }, { "epoch": 0.39446168145100663, "grad_norm": 1.3090566396713257, "learning_rate": 1.437666597461055e-05, "loss": 1.8005, "step": 7215 }, { "epoch": 0.3945163538947282, "grad_norm": 1.4348270893096924, "learning_rate": 1.4375022856379657e-05, "loss": 1.4118, "step": 7216 }, { "epoch": 0.39457102633844976, "grad_norm": 1.5828195810317993, "learning_rate": 1.4373379592056487e-05, "loss": 1.4906, "step": 7217 }, { "epoch": 0.3946256987821713, "grad_norm": 1.5659352540969849, "learning_rate": 1.4371736181695908e-05, "loss": 1.272, "step": 7218 }, { "epoch": 0.3946803712258929, "grad_norm": 1.3983832597732544, "learning_rate": 1.4370092625352803e-05, "loss": 1.7642, "step": 7219 }, { "epoch": 0.3947350436696144, "grad_norm": 1.5283573865890503, "learning_rate": 1.4368448923082048e-05, "loss": 1.2704, "step": 7220 }, { "epoch": 0.39478971611333596, "grad_norm": 1.3798331022262573, "learning_rate": 1.4366805074938533e-05, "loss": 1.3942, "step": 7221 }, { "epoch": 0.3948443885570575, "grad_norm": 1.6122936010360718, "learning_rate": 1.436516108097715e-05, "loss": 1.4227, "step": 7222 }, { "epoch": 0.3948990610007791, "grad_norm": 1.483542561531067, "learning_rate": 1.4363516941252795e-05, "loss": 1.4356, "step": 7223 }, { "epoch": 0.39495373344450063, "grad_norm": 1.6358317136764526, "learning_rate": 1.4361872655820371e-05, "loss": 1.3604, "step": 7224 }, { "epoch": 0.39500840588822217, "grad_norm": 1.7651431560516357, "learning_rate": 1.436022822473478e-05, "loss": 1.5502, "step": 7225 }, { "epoch": 0.39506307833194376, "grad_norm": 1.6906516551971436, "learning_rate": 1.435858364805094e-05, "loss": 1.2573, "step": 7226 }, { "epoch": 0.3951177507756653, "grad_norm": 1.393196940422058, "learning_rate": 1.4356938925823764e-05, "loss": 1.4824, "step": 7227 }, { "epoch": 0.39517242321938684, "grad_norm": 1.4786781072616577, "learning_rate": 1.4355294058108173e-05, "loss": 1.3944, "step": 7228 }, { "epoch": 0.3952270956631084, "grad_norm": 1.2977122068405151, "learning_rate": 1.4353649044959094e-05, "loss": 1.3871, "step": 7229 }, { "epoch": 0.39528176810682997, "grad_norm": 1.230331540107727, "learning_rate": 1.4352003886431459e-05, "loss": 1.7429, "step": 7230 }, { "epoch": 0.3953364405505515, "grad_norm": 1.5848784446716309, "learning_rate": 1.4350358582580197e-05, "loss": 1.4662, "step": 7231 }, { "epoch": 0.39539111299427304, "grad_norm": 1.385634422302246, "learning_rate": 1.4348713133460257e-05, "loss": 1.3946, "step": 7232 }, { "epoch": 0.39544578543799463, "grad_norm": 1.4962635040283203, "learning_rate": 1.4347067539126581e-05, "loss": 1.5384, "step": 7233 }, { "epoch": 0.39550045788171617, "grad_norm": 1.5663495063781738, "learning_rate": 1.4345421799634118e-05, "loss": 1.3884, "step": 7234 }, { "epoch": 0.3955551303254377, "grad_norm": 1.4184982776641846, "learning_rate": 1.4343775915037822e-05, "loss": 1.29, "step": 7235 }, { "epoch": 0.3956098027691593, "grad_norm": 1.422788381576538, "learning_rate": 1.434212988539266e-05, "loss": 1.5073, "step": 7236 }, { "epoch": 0.39566447521288084, "grad_norm": 1.5972321033477783, "learning_rate": 1.434048371075359e-05, "loss": 1.4895, "step": 7237 }, { "epoch": 0.3957191476566024, "grad_norm": 1.7144825458526611, "learning_rate": 1.4338837391175582e-05, "loss": 1.513, "step": 7238 }, { "epoch": 0.3957738201003239, "grad_norm": 1.7479606866836548, "learning_rate": 1.4337190926713613e-05, "loss": 1.4329, "step": 7239 }, { "epoch": 0.3958284925440455, "grad_norm": 1.2581639289855957, "learning_rate": 1.4335544317422663e-05, "loss": 1.614, "step": 7240 }, { "epoch": 0.39588316498776704, "grad_norm": 1.3884762525558472, "learning_rate": 1.4333897563357712e-05, "loss": 1.5113, "step": 7241 }, { "epoch": 0.3959378374314886, "grad_norm": 1.321347951889038, "learning_rate": 1.4332250664573754e-05, "loss": 1.4705, "step": 7242 }, { "epoch": 0.3959925098752102, "grad_norm": 1.6524074077606201, "learning_rate": 1.433060362112578e-05, "loss": 1.2935, "step": 7243 }, { "epoch": 0.3960471823189317, "grad_norm": 1.3959076404571533, "learning_rate": 1.4328956433068789e-05, "loss": 1.2942, "step": 7244 }, { "epoch": 0.39610185476265325, "grad_norm": 1.3834559917449951, "learning_rate": 1.4327309100457783e-05, "loss": 1.4764, "step": 7245 }, { "epoch": 0.3961565272063748, "grad_norm": 1.9797592163085938, "learning_rate": 1.4325661623347772e-05, "loss": 1.2217, "step": 7246 }, { "epoch": 0.3962111996500964, "grad_norm": 2.177243947982788, "learning_rate": 1.432401400179377e-05, "loss": 1.5204, "step": 7247 }, { "epoch": 0.3962658720938179, "grad_norm": 1.7718638181686401, "learning_rate": 1.4322366235850794e-05, "loss": 1.531, "step": 7248 }, { "epoch": 0.39632054453753945, "grad_norm": 1.25528883934021, "learning_rate": 1.4320718325573865e-05, "loss": 1.3702, "step": 7249 }, { "epoch": 0.39637521698126105, "grad_norm": 1.6635053157806396, "learning_rate": 1.4319070271018016e-05, "loss": 1.4185, "step": 7250 }, { "epoch": 0.3964298894249826, "grad_norm": 2.4201064109802246, "learning_rate": 1.4317422072238271e-05, "loss": 1.3472, "step": 7251 }, { "epoch": 0.3964845618687041, "grad_norm": 1.4942198991775513, "learning_rate": 1.4315773729289673e-05, "loss": 1.3832, "step": 7252 }, { "epoch": 0.39653923431242566, "grad_norm": 1.7942954301834106, "learning_rate": 1.4314125242227263e-05, "loss": 1.473, "step": 7253 }, { "epoch": 0.39659390675614725, "grad_norm": 1.306586742401123, "learning_rate": 1.431247661110609e-05, "loss": 1.1871, "step": 7254 }, { "epoch": 0.3966485791998688, "grad_norm": 1.2936556339263916, "learning_rate": 1.4310827835981203e-05, "loss": 1.4906, "step": 7255 }, { "epoch": 0.3967032516435903, "grad_norm": 1.7759424448013306, "learning_rate": 1.4309178916907658e-05, "loss": 1.3153, "step": 7256 }, { "epoch": 0.3967579240873119, "grad_norm": 1.4208647012710571, "learning_rate": 1.4307529853940519e-05, "loss": 1.471, "step": 7257 }, { "epoch": 0.39681259653103346, "grad_norm": 1.3671284914016724, "learning_rate": 1.4305880647134847e-05, "loss": 1.3209, "step": 7258 }, { "epoch": 0.396867268974755, "grad_norm": 1.3055585622787476, "learning_rate": 1.4304231296545714e-05, "loss": 1.7239, "step": 7259 }, { "epoch": 0.39692194141847653, "grad_norm": 1.5462487936019897, "learning_rate": 1.4302581802228202e-05, "loss": 1.3486, "step": 7260 }, { "epoch": 0.3969766138621981, "grad_norm": 1.7330257892608643, "learning_rate": 1.4300932164237386e-05, "loss": 1.4715, "step": 7261 }, { "epoch": 0.39703128630591966, "grad_norm": 1.334163784980774, "learning_rate": 1.4299282382628355e-05, "loss": 1.3781, "step": 7262 }, { "epoch": 0.3970859587496412, "grad_norm": 1.4467167854309082, "learning_rate": 1.4297632457456194e-05, "loss": 1.4856, "step": 7263 }, { "epoch": 0.3971406311933628, "grad_norm": 1.252593755722046, "learning_rate": 1.4295982388776003e-05, "loss": 1.3087, "step": 7264 }, { "epoch": 0.39719530363708433, "grad_norm": 1.7430837154388428, "learning_rate": 1.4294332176642875e-05, "loss": 1.3666, "step": 7265 }, { "epoch": 0.39724997608080587, "grad_norm": 1.5348061323165894, "learning_rate": 1.429268182111192e-05, "loss": 1.6119, "step": 7266 }, { "epoch": 0.3973046485245274, "grad_norm": 1.7216459512710571, "learning_rate": 1.4291031322238247e-05, "loss": 1.577, "step": 7267 }, { "epoch": 0.397359320968249, "grad_norm": 1.670975685119629, "learning_rate": 1.428938068007697e-05, "loss": 1.1669, "step": 7268 }, { "epoch": 0.39741399341197053, "grad_norm": 1.4816269874572754, "learning_rate": 1.4287729894683207e-05, "loss": 1.4328, "step": 7269 }, { "epoch": 0.39746866585569207, "grad_norm": 1.5647854804992676, "learning_rate": 1.4286078966112078e-05, "loss": 1.5721, "step": 7270 }, { "epoch": 0.39752333829941366, "grad_norm": 1.3030169010162354, "learning_rate": 1.4284427894418717e-05, "loss": 1.4056, "step": 7271 }, { "epoch": 0.3975780107431352, "grad_norm": 1.6572576761245728, "learning_rate": 1.4282776679658255e-05, "loss": 1.3774, "step": 7272 }, { "epoch": 0.39763268318685674, "grad_norm": 1.6670664548873901, "learning_rate": 1.4281125321885826e-05, "loss": 1.5978, "step": 7273 }, { "epoch": 0.3976873556305783, "grad_norm": 1.6853814125061035, "learning_rate": 1.427947382115658e-05, "loss": 1.5967, "step": 7274 }, { "epoch": 0.39774202807429987, "grad_norm": 1.9783787727355957, "learning_rate": 1.4277822177525664e-05, "loss": 1.2986, "step": 7275 }, { "epoch": 0.3977967005180214, "grad_norm": 1.4967321157455444, "learning_rate": 1.4276170391048224e-05, "loss": 1.5013, "step": 7276 }, { "epoch": 0.39785137296174294, "grad_norm": 1.2371279001235962, "learning_rate": 1.4274518461779421e-05, "loss": 1.6292, "step": 7277 }, { "epoch": 0.39790604540546454, "grad_norm": 1.1684571504592896, "learning_rate": 1.4272866389774415e-05, "loss": 1.3652, "step": 7278 }, { "epoch": 0.3979607178491861, "grad_norm": 1.4130890369415283, "learning_rate": 1.4271214175088374e-05, "loss": 1.3862, "step": 7279 }, { "epoch": 0.3980153902929076, "grad_norm": 1.5384275913238525, "learning_rate": 1.426956181777647e-05, "loss": 1.6667, "step": 7280 }, { "epoch": 0.39807006273662915, "grad_norm": 1.881072759628296, "learning_rate": 1.4267909317893875e-05, "loss": 1.4163, "step": 7281 }, { "epoch": 0.39812473518035074, "grad_norm": 1.6114003658294678, "learning_rate": 1.4266256675495777e-05, "loss": 1.331, "step": 7282 }, { "epoch": 0.3981794076240723, "grad_norm": 1.5083918571472168, "learning_rate": 1.4264603890637357e-05, "loss": 1.3968, "step": 7283 }, { "epoch": 0.3982340800677938, "grad_norm": 1.4404560327529907, "learning_rate": 1.4262950963373802e-05, "loss": 1.3139, "step": 7284 }, { "epoch": 0.3982887525115154, "grad_norm": 1.262699007987976, "learning_rate": 1.4261297893760315e-05, "loss": 1.4308, "step": 7285 }, { "epoch": 0.39834342495523695, "grad_norm": 1.35447359085083, "learning_rate": 1.425964468185209e-05, "loss": 1.4261, "step": 7286 }, { "epoch": 0.3983980973989585, "grad_norm": 1.2634968757629395, "learning_rate": 1.4257991327704332e-05, "loss": 1.425, "step": 7287 }, { "epoch": 0.39845276984268, "grad_norm": 1.74425208568573, "learning_rate": 1.4256337831372256e-05, "loss": 1.4509, "step": 7288 }, { "epoch": 0.3985074422864016, "grad_norm": 1.4395782947540283, "learning_rate": 1.425468419291107e-05, "loss": 1.5651, "step": 7289 }, { "epoch": 0.39856211473012315, "grad_norm": 1.3959991931915283, "learning_rate": 1.4253030412375994e-05, "loss": 1.4475, "step": 7290 }, { "epoch": 0.3986167871738447, "grad_norm": 1.4426288604736328, "learning_rate": 1.4251376489822253e-05, "loss": 1.5338, "step": 7291 }, { "epoch": 0.3986714596175663, "grad_norm": 1.2805094718933105, "learning_rate": 1.4249722425305077e-05, "loss": 1.3021, "step": 7292 }, { "epoch": 0.3987261320612878, "grad_norm": 1.7739055156707764, "learning_rate": 1.4248068218879691e-05, "loss": 1.284, "step": 7293 }, { "epoch": 0.39878080450500936, "grad_norm": 1.3715360164642334, "learning_rate": 1.4246413870601343e-05, "loss": 1.4959, "step": 7294 }, { "epoch": 0.3988354769487309, "grad_norm": 1.4518022537231445, "learning_rate": 1.4244759380525273e-05, "loss": 1.2819, "step": 7295 }, { "epoch": 0.3988901493924525, "grad_norm": 1.574214220046997, "learning_rate": 1.4243104748706724e-05, "loss": 1.3323, "step": 7296 }, { "epoch": 0.398944821836174, "grad_norm": 1.0743191242218018, "learning_rate": 1.4241449975200951e-05, "loss": 1.2888, "step": 7297 }, { "epoch": 0.39899949427989556, "grad_norm": 2.0439324378967285, "learning_rate": 1.4239795060063211e-05, "loss": 1.5371, "step": 7298 }, { "epoch": 0.39905416672361715, "grad_norm": 2.316951036453247, "learning_rate": 1.4238140003348766e-05, "loss": 1.4129, "step": 7299 }, { "epoch": 0.3991088391673387, "grad_norm": 1.4234248399734497, "learning_rate": 1.4236484805112878e-05, "loss": 1.3974, "step": 7300 }, { "epoch": 0.39916351161106023, "grad_norm": 1.334225058555603, "learning_rate": 1.4234829465410824e-05, "loss": 1.3052, "step": 7301 }, { "epoch": 0.39921818405478177, "grad_norm": 1.6719728708267212, "learning_rate": 1.4233173984297876e-05, "loss": 1.1574, "step": 7302 }, { "epoch": 0.39927285649850336, "grad_norm": 1.3982008695602417, "learning_rate": 1.4231518361829317e-05, "loss": 1.5642, "step": 7303 }, { "epoch": 0.3993275289422249, "grad_norm": 1.6216206550598145, "learning_rate": 1.4229862598060426e-05, "loss": 1.2638, "step": 7304 }, { "epoch": 0.39938220138594643, "grad_norm": 1.293664813041687, "learning_rate": 1.42282066930465e-05, "loss": 1.4784, "step": 7305 }, { "epoch": 0.399436873829668, "grad_norm": 1.4697314500808716, "learning_rate": 1.4226550646842831e-05, "loss": 1.3712, "step": 7306 }, { "epoch": 0.39949154627338956, "grad_norm": 1.162454605102539, "learning_rate": 1.4224894459504717e-05, "loss": 1.5871, "step": 7307 }, { "epoch": 0.3995462187171111, "grad_norm": 1.3958286046981812, "learning_rate": 1.4223238131087465e-05, "loss": 1.2449, "step": 7308 }, { "epoch": 0.39960089116083264, "grad_norm": 1.9351625442504883, "learning_rate": 1.4221581661646377e-05, "loss": 1.3854, "step": 7309 }, { "epoch": 0.39965556360455423, "grad_norm": 1.7859282493591309, "learning_rate": 1.4219925051236777e-05, "loss": 1.4129, "step": 7310 }, { "epoch": 0.39971023604827577, "grad_norm": 1.3997137546539307, "learning_rate": 1.4218268299913973e-05, "loss": 1.6819, "step": 7311 }, { "epoch": 0.3997649084919973, "grad_norm": 2.1268246173858643, "learning_rate": 1.4216611407733292e-05, "loss": 1.4533, "step": 7312 }, { "epoch": 0.3998195809357189, "grad_norm": 2.0184133052825928, "learning_rate": 1.4214954374750062e-05, "loss": 1.5945, "step": 7313 }, { "epoch": 0.39987425337944044, "grad_norm": 1.457910180091858, "learning_rate": 1.4213297201019618e-05, "loss": 1.5177, "step": 7314 }, { "epoch": 0.399928925823162, "grad_norm": 1.320469617843628, "learning_rate": 1.421163988659729e-05, "loss": 1.4652, "step": 7315 }, { "epoch": 0.3999835982668835, "grad_norm": 1.4666110277175903, "learning_rate": 1.4209982431538425e-05, "loss": 1.5363, "step": 7316 }, { "epoch": 0.4000382707106051, "grad_norm": 1.478973388671875, "learning_rate": 1.4208324835898367e-05, "loss": 1.3284, "step": 7317 }, { "epoch": 0.40009294315432664, "grad_norm": 1.7322169542312622, "learning_rate": 1.4206667099732467e-05, "loss": 1.4277, "step": 7318 }, { "epoch": 0.4001476155980482, "grad_norm": 1.8279017210006714, "learning_rate": 1.420500922309608e-05, "loss": 1.2969, "step": 7319 }, { "epoch": 0.40020228804176977, "grad_norm": 1.4112601280212402, "learning_rate": 1.420335120604457e-05, "loss": 1.3927, "step": 7320 }, { "epoch": 0.4002569604854913, "grad_norm": 1.306929349899292, "learning_rate": 1.4201693048633302e-05, "loss": 1.606, "step": 7321 }, { "epoch": 0.40031163292921285, "grad_norm": 1.3618063926696777, "learning_rate": 1.420003475091764e-05, "loss": 1.5386, "step": 7322 }, { "epoch": 0.4003663053729344, "grad_norm": 1.3601635694503784, "learning_rate": 1.4198376312952962e-05, "loss": 1.5174, "step": 7323 }, { "epoch": 0.400420977816656, "grad_norm": 1.6534490585327148, "learning_rate": 1.4196717734794647e-05, "loss": 1.4131, "step": 7324 }, { "epoch": 0.4004756502603775, "grad_norm": 1.6316022872924805, "learning_rate": 1.4195059016498081e-05, "loss": 1.4046, "step": 7325 }, { "epoch": 0.40053032270409905, "grad_norm": 1.534224033355713, "learning_rate": 1.419340015811865e-05, "loss": 1.442, "step": 7326 }, { "epoch": 0.40058499514782064, "grad_norm": 1.5852220058441162, "learning_rate": 1.4191741159711746e-05, "loss": 1.6559, "step": 7327 }, { "epoch": 0.4006396675915422, "grad_norm": 1.3346962928771973, "learning_rate": 1.419008202133277e-05, "loss": 1.3351, "step": 7328 }, { "epoch": 0.4006943400352637, "grad_norm": 1.1483207941055298, "learning_rate": 1.418842274303712e-05, "loss": 1.4567, "step": 7329 }, { "epoch": 0.40074901247898526, "grad_norm": 1.4310733079910278, "learning_rate": 1.4186763324880208e-05, "loss": 1.433, "step": 7330 }, { "epoch": 0.40080368492270685, "grad_norm": 1.7284488677978516, "learning_rate": 1.4185103766917445e-05, "loss": 1.3712, "step": 7331 }, { "epoch": 0.4008583573664284, "grad_norm": 1.5931891202926636, "learning_rate": 1.4183444069204246e-05, "loss": 1.4837, "step": 7332 }, { "epoch": 0.4009130298101499, "grad_norm": 1.414831519126892, "learning_rate": 1.4181784231796034e-05, "loss": 1.608, "step": 7333 }, { "epoch": 0.4009677022538715, "grad_norm": 1.2932208776474, "learning_rate": 1.4180124254748233e-05, "loss": 1.435, "step": 7334 }, { "epoch": 0.40102237469759305, "grad_norm": 1.3621073961257935, "learning_rate": 1.4178464138116272e-05, "loss": 1.6348, "step": 7335 }, { "epoch": 0.4010770471413146, "grad_norm": 1.3230265378952026, "learning_rate": 1.4176803881955592e-05, "loss": 1.5279, "step": 7336 }, { "epoch": 0.40113171958503613, "grad_norm": 1.6193724870681763, "learning_rate": 1.4175143486321626e-05, "loss": 1.5685, "step": 7337 }, { "epoch": 0.4011863920287577, "grad_norm": 1.7535820007324219, "learning_rate": 1.4173482951269823e-05, "loss": 1.5257, "step": 7338 }, { "epoch": 0.40124106447247926, "grad_norm": 1.4053606986999512, "learning_rate": 1.417182227685563e-05, "loss": 1.2716, "step": 7339 }, { "epoch": 0.4012957369162008, "grad_norm": 1.5206128358840942, "learning_rate": 1.4170161463134502e-05, "loss": 1.5117, "step": 7340 }, { "epoch": 0.4013504093599224, "grad_norm": 1.4161323308944702, "learning_rate": 1.41685005101619e-05, "loss": 1.2282, "step": 7341 }, { "epoch": 0.4014050818036439, "grad_norm": 1.4881013631820679, "learning_rate": 1.4166839417993281e-05, "loss": 1.2927, "step": 7342 }, { "epoch": 0.40145975424736546, "grad_norm": 1.4848331212997437, "learning_rate": 1.4165178186684116e-05, "loss": 1.4631, "step": 7343 }, { "epoch": 0.401514426691087, "grad_norm": 1.3491902351379395, "learning_rate": 1.4163516816289878e-05, "loss": 1.6278, "step": 7344 }, { "epoch": 0.4015690991348086, "grad_norm": 1.7662038803100586, "learning_rate": 1.4161855306866043e-05, "loss": 1.1078, "step": 7345 }, { "epoch": 0.40162377157853013, "grad_norm": 1.3106775283813477, "learning_rate": 1.4160193658468093e-05, "loss": 1.5623, "step": 7346 }, { "epoch": 0.40167844402225167, "grad_norm": 1.5335338115692139, "learning_rate": 1.4158531871151517e-05, "loss": 1.5253, "step": 7347 }, { "epoch": 0.40173311646597326, "grad_norm": 1.1972209215164185, "learning_rate": 1.4156869944971804e-05, "loss": 1.4361, "step": 7348 }, { "epoch": 0.4017877889096948, "grad_norm": 1.4879529476165771, "learning_rate": 1.4155207879984447e-05, "loss": 1.5721, "step": 7349 }, { "epoch": 0.40184246135341634, "grad_norm": 1.3433187007904053, "learning_rate": 1.415354567624495e-05, "loss": 1.5376, "step": 7350 }, { "epoch": 0.4018971337971379, "grad_norm": 1.4905283451080322, "learning_rate": 1.4151883333808811e-05, "loss": 1.3948, "step": 7351 }, { "epoch": 0.40195180624085947, "grad_norm": 1.6066724061965942, "learning_rate": 1.4150220852731551e-05, "loss": 1.3231, "step": 7352 }, { "epoch": 0.402006478684581, "grad_norm": 1.2832682132720947, "learning_rate": 1.4148558233068677e-05, "loss": 1.4543, "step": 7353 }, { "epoch": 0.40206115112830254, "grad_norm": 1.3543518781661987, "learning_rate": 1.4146895474875706e-05, "loss": 1.3263, "step": 7354 }, { "epoch": 0.40211582357202413, "grad_norm": 1.259977102279663, "learning_rate": 1.4145232578208165e-05, "loss": 1.3583, "step": 7355 }, { "epoch": 0.40217049601574567, "grad_norm": 1.524775743484497, "learning_rate": 1.414356954312158e-05, "loss": 1.263, "step": 7356 }, { "epoch": 0.4022251684594672, "grad_norm": 1.5689424276351929, "learning_rate": 1.4141906369671488e-05, "loss": 1.2555, "step": 7357 }, { "epoch": 0.40227984090318875, "grad_norm": 1.7431483268737793, "learning_rate": 1.4140243057913418e-05, "loss": 1.2976, "step": 7358 }, { "epoch": 0.40233451334691034, "grad_norm": 1.2307226657867432, "learning_rate": 1.4138579607902922e-05, "loss": 1.5288, "step": 7359 }, { "epoch": 0.4023891857906319, "grad_norm": 2.3490149974823, "learning_rate": 1.4136916019695541e-05, "loss": 1.1532, "step": 7360 }, { "epoch": 0.4024438582343534, "grad_norm": 1.6932471990585327, "learning_rate": 1.4135252293346824e-05, "loss": 1.593, "step": 7361 }, { "epoch": 0.402498530678075, "grad_norm": 1.218105673789978, "learning_rate": 1.4133588428912333e-05, "loss": 1.5959, "step": 7362 }, { "epoch": 0.40255320312179654, "grad_norm": 1.7960920333862305, "learning_rate": 1.4131924426447621e-05, "loss": 1.1537, "step": 7363 }, { "epoch": 0.4026078755655181, "grad_norm": 1.594980001449585, "learning_rate": 1.4130260286008257e-05, "loss": 1.4103, "step": 7364 }, { "epoch": 0.4026625480092396, "grad_norm": 1.7377345561981201, "learning_rate": 1.4128596007649808e-05, "loss": 1.5479, "step": 7365 }, { "epoch": 0.4027172204529612, "grad_norm": 1.3150168657302856, "learning_rate": 1.4126931591427855e-05, "loss": 1.445, "step": 7366 }, { "epoch": 0.40277189289668275, "grad_norm": 1.971764326095581, "learning_rate": 1.4125267037397972e-05, "loss": 1.1074, "step": 7367 }, { "epoch": 0.4028265653404043, "grad_norm": 1.745542049407959, "learning_rate": 1.412360234561574e-05, "loss": 1.5493, "step": 7368 }, { "epoch": 0.4028812377841259, "grad_norm": 1.2005068063735962, "learning_rate": 1.4121937516136747e-05, "loss": 1.6366, "step": 7369 }, { "epoch": 0.4029359102278474, "grad_norm": 1.6301913261413574, "learning_rate": 1.4120272549016591e-05, "loss": 1.8601, "step": 7370 }, { "epoch": 0.40299058267156895, "grad_norm": 1.2730090618133545, "learning_rate": 1.4118607444310866e-05, "loss": 1.492, "step": 7371 }, { "epoch": 0.4030452551152905, "grad_norm": 1.2523391246795654, "learning_rate": 1.4116942202075175e-05, "loss": 1.4731, "step": 7372 }, { "epoch": 0.4030999275590121, "grad_norm": 1.6218668222427368, "learning_rate": 1.4115276822365123e-05, "loss": 1.4232, "step": 7373 }, { "epoch": 0.4031546000027336, "grad_norm": 1.3258854150772095, "learning_rate": 1.4113611305236317e-05, "loss": 1.3288, "step": 7374 }, { "epoch": 0.40320927244645516, "grad_norm": 1.6407090425491333, "learning_rate": 1.4111945650744379e-05, "loss": 1.353, "step": 7375 }, { "epoch": 0.40326394489017675, "grad_norm": 1.3017276525497437, "learning_rate": 1.4110279858944928e-05, "loss": 1.51, "step": 7376 }, { "epoch": 0.4033186173338983, "grad_norm": 1.4324886798858643, "learning_rate": 1.4108613929893586e-05, "loss": 1.319, "step": 7377 }, { "epoch": 0.4033732897776198, "grad_norm": 1.3672505617141724, "learning_rate": 1.4106947863645983e-05, "loss": 1.2849, "step": 7378 }, { "epoch": 0.40342796222134136, "grad_norm": 1.4960945844650269, "learning_rate": 1.4105281660257757e-05, "loss": 1.4924, "step": 7379 }, { "epoch": 0.40348263466506296, "grad_norm": 1.2488218545913696, "learning_rate": 1.410361531978454e-05, "loss": 1.6239, "step": 7380 }, { "epoch": 0.4035373071087845, "grad_norm": 1.190600872039795, "learning_rate": 1.4101948842281978e-05, "loss": 1.4557, "step": 7381 }, { "epoch": 0.40359197955250603, "grad_norm": 1.249596118927002, "learning_rate": 1.410028222780572e-05, "loss": 1.4522, "step": 7382 }, { "epoch": 0.4036466519962276, "grad_norm": 1.4897786378860474, "learning_rate": 1.4098615476411416e-05, "loss": 1.289, "step": 7383 }, { "epoch": 0.40370132443994916, "grad_norm": 1.4088259935379028, "learning_rate": 1.4096948588154723e-05, "loss": 1.3784, "step": 7384 }, { "epoch": 0.4037559968836707, "grad_norm": 1.5666673183441162, "learning_rate": 1.4095281563091303e-05, "loss": 1.4104, "step": 7385 }, { "epoch": 0.40381066932739224, "grad_norm": 1.4129787683486938, "learning_rate": 1.4093614401276826e-05, "loss": 1.456, "step": 7386 }, { "epoch": 0.40386534177111383, "grad_norm": 1.515453577041626, "learning_rate": 1.4091947102766953e-05, "loss": 1.6416, "step": 7387 }, { "epoch": 0.40392001421483537, "grad_norm": 1.2555140256881714, "learning_rate": 1.4090279667617366e-05, "loss": 1.376, "step": 7388 }, { "epoch": 0.4039746866585569, "grad_norm": 1.3804603815078735, "learning_rate": 1.408861209588374e-05, "loss": 1.2609, "step": 7389 }, { "epoch": 0.4040293591022785, "grad_norm": 1.5761182308197021, "learning_rate": 1.4086944387621766e-05, "loss": 1.1131, "step": 7390 }, { "epoch": 0.40408403154600003, "grad_norm": 1.3454090356826782, "learning_rate": 1.4085276542887128e-05, "loss": 1.4733, "step": 7391 }, { "epoch": 0.40413870398972157, "grad_norm": 1.3449442386627197, "learning_rate": 1.4083608561735517e-05, "loss": 1.5894, "step": 7392 }, { "epoch": 0.4041933764334431, "grad_norm": 1.3234668970108032, "learning_rate": 1.4081940444222637e-05, "loss": 1.4758, "step": 7393 }, { "epoch": 0.4042480488771647, "grad_norm": 1.3846653699874878, "learning_rate": 1.4080272190404185e-05, "loss": 1.4338, "step": 7394 }, { "epoch": 0.40430272132088624, "grad_norm": 1.1005051136016846, "learning_rate": 1.4078603800335871e-05, "loss": 1.5127, "step": 7395 }, { "epoch": 0.4043573937646078, "grad_norm": 1.4242299795150757, "learning_rate": 1.4076935274073402e-05, "loss": 1.449, "step": 7396 }, { "epoch": 0.40441206620832937, "grad_norm": 1.2870134115219116, "learning_rate": 1.4075266611672502e-05, "loss": 1.4158, "step": 7397 }, { "epoch": 0.4044667386520509, "grad_norm": 1.9078646898269653, "learning_rate": 1.4073597813188884e-05, "loss": 1.3624, "step": 7398 }, { "epoch": 0.40452141109577244, "grad_norm": 1.467490792274475, "learning_rate": 1.4071928878678278e-05, "loss": 1.1924, "step": 7399 }, { "epoch": 0.404576083539494, "grad_norm": 1.454422116279602, "learning_rate": 1.4070259808196411e-05, "loss": 1.4967, "step": 7400 }, { "epoch": 0.4046307559832156, "grad_norm": 1.2550126314163208, "learning_rate": 1.4068590601799018e-05, "loss": 1.6268, "step": 7401 }, { "epoch": 0.4046854284269371, "grad_norm": 1.545095682144165, "learning_rate": 1.4066921259541837e-05, "loss": 1.502, "step": 7402 }, { "epoch": 0.40474010087065865, "grad_norm": 1.4282807111740112, "learning_rate": 1.4065251781480612e-05, "loss": 1.4901, "step": 7403 }, { "epoch": 0.40479477331438024, "grad_norm": 2.122511386871338, "learning_rate": 1.4063582167671091e-05, "loss": 1.4056, "step": 7404 }, { "epoch": 0.4048494457581018, "grad_norm": 1.3622015714645386, "learning_rate": 1.4061912418169024e-05, "loss": 1.5732, "step": 7405 }, { "epoch": 0.4049041182018233, "grad_norm": 1.735202670097351, "learning_rate": 1.4060242533030173e-05, "loss": 1.473, "step": 7406 }, { "epoch": 0.40495879064554485, "grad_norm": 1.5610610246658325, "learning_rate": 1.4058572512310293e-05, "loss": 1.3713, "step": 7407 }, { "epoch": 0.40501346308926645, "grad_norm": 1.3530049324035645, "learning_rate": 1.4056902356065154e-05, "loss": 1.4986, "step": 7408 }, { "epoch": 0.405068135532988, "grad_norm": 2.3031537532806396, "learning_rate": 1.4055232064350526e-05, "loss": 1.3499, "step": 7409 }, { "epoch": 0.4051228079767095, "grad_norm": 1.441666841506958, "learning_rate": 1.4053561637222182e-05, "loss": 1.3891, "step": 7410 }, { "epoch": 0.4051774804204311, "grad_norm": 1.528557538986206, "learning_rate": 1.4051891074735906e-05, "loss": 1.327, "step": 7411 }, { "epoch": 0.40523215286415265, "grad_norm": 1.3484206199645996, "learning_rate": 1.405022037694748e-05, "loss": 1.4736, "step": 7412 }, { "epoch": 0.4052868253078742, "grad_norm": 1.2912715673446655, "learning_rate": 1.4048549543912687e-05, "loss": 1.4062, "step": 7413 }, { "epoch": 0.4053414977515957, "grad_norm": 1.4324185848236084, "learning_rate": 1.4046878575687326e-05, "loss": 1.5536, "step": 7414 }, { "epoch": 0.4053961701953173, "grad_norm": 1.6588082313537598, "learning_rate": 1.4045207472327194e-05, "loss": 1.4144, "step": 7415 }, { "epoch": 0.40545084263903886, "grad_norm": 1.5071855783462524, "learning_rate": 1.4043536233888091e-05, "loss": 1.4811, "step": 7416 }, { "epoch": 0.4055055150827604, "grad_norm": 1.5664381980895996, "learning_rate": 1.4041864860425822e-05, "loss": 1.3781, "step": 7417 }, { "epoch": 0.405560187526482, "grad_norm": 1.5995630025863647, "learning_rate": 1.4040193351996206e-05, "loss": 1.4501, "step": 7418 }, { "epoch": 0.4056148599702035, "grad_norm": 1.828160047531128, "learning_rate": 1.4038521708655054e-05, "loss": 1.2736, "step": 7419 }, { "epoch": 0.40566953241392506, "grad_norm": 1.3857008218765259, "learning_rate": 1.4036849930458181e-05, "loss": 1.7092, "step": 7420 }, { "epoch": 0.4057242048576466, "grad_norm": 1.8853821754455566, "learning_rate": 1.4035178017461419e-05, "loss": 1.2644, "step": 7421 }, { "epoch": 0.4057788773013682, "grad_norm": 1.790565013885498, "learning_rate": 1.4033505969720592e-05, "loss": 1.5543, "step": 7422 }, { "epoch": 0.40583354974508973, "grad_norm": 1.4298232793807983, "learning_rate": 1.4031833787291536e-05, "loss": 1.427, "step": 7423 }, { "epoch": 0.40588822218881127, "grad_norm": 1.789391279220581, "learning_rate": 1.4030161470230088e-05, "loss": 1.4331, "step": 7424 }, { "epoch": 0.40594289463253286, "grad_norm": 1.428863286972046, "learning_rate": 1.4028489018592095e-05, "loss": 1.4989, "step": 7425 }, { "epoch": 0.4059975670762544, "grad_norm": 1.5912574529647827, "learning_rate": 1.40268164324334e-05, "loss": 1.6996, "step": 7426 }, { "epoch": 0.40605223951997593, "grad_norm": 1.6036276817321777, "learning_rate": 1.4025143711809853e-05, "loss": 1.3532, "step": 7427 }, { "epoch": 0.40610691196369747, "grad_norm": 1.6578474044799805, "learning_rate": 1.4023470856777313e-05, "loss": 1.4281, "step": 7428 }, { "epoch": 0.40616158440741906, "grad_norm": 1.7788336277008057, "learning_rate": 1.402179786739164e-05, "loss": 1.2561, "step": 7429 }, { "epoch": 0.4062162568511406, "grad_norm": 1.5506354570388794, "learning_rate": 1.4020124743708696e-05, "loss": 1.5037, "step": 7430 }, { "epoch": 0.40627092929486214, "grad_norm": 1.2232353687286377, "learning_rate": 1.4018451485784357e-05, "loss": 1.3162, "step": 7431 }, { "epoch": 0.40632560173858373, "grad_norm": 1.4603068828582764, "learning_rate": 1.4016778093674493e-05, "loss": 1.3856, "step": 7432 }, { "epoch": 0.40638027418230527, "grad_norm": 1.6137399673461914, "learning_rate": 1.4015104567434981e-05, "loss": 1.04, "step": 7433 }, { "epoch": 0.4064349466260268, "grad_norm": 1.453905701637268, "learning_rate": 1.4013430907121706e-05, "loss": 1.5165, "step": 7434 }, { "epoch": 0.40648961906974834, "grad_norm": 1.4225223064422607, "learning_rate": 1.4011757112790556e-05, "loss": 1.4676, "step": 7435 }, { "epoch": 0.40654429151346994, "grad_norm": 1.5698294639587402, "learning_rate": 1.401008318449742e-05, "loss": 1.2671, "step": 7436 }, { "epoch": 0.4065989639571915, "grad_norm": 1.372837781906128, "learning_rate": 1.4008409122298199e-05, "loss": 1.3987, "step": 7437 }, { "epoch": 0.406653636400913, "grad_norm": 1.7568053007125854, "learning_rate": 1.400673492624879e-05, "loss": 1.4482, "step": 7438 }, { "epoch": 0.4067083088446346, "grad_norm": 1.3343487977981567, "learning_rate": 1.4005060596405102e-05, "loss": 1.242, "step": 7439 }, { "epoch": 0.40676298128835614, "grad_norm": 1.3291499614715576, "learning_rate": 1.400338613282304e-05, "loss": 1.6934, "step": 7440 }, { "epoch": 0.4068176537320777, "grad_norm": 1.5797985792160034, "learning_rate": 1.4001711535558523e-05, "loss": 1.2664, "step": 7441 }, { "epoch": 0.40687232617579927, "grad_norm": 1.7934151887893677, "learning_rate": 1.4000036804667464e-05, "loss": 1.2813, "step": 7442 }, { "epoch": 0.4069269986195208, "grad_norm": 1.452642798423767, "learning_rate": 1.3998361940205794e-05, "loss": 1.7133, "step": 7443 }, { "epoch": 0.40698167106324235, "grad_norm": 1.5211091041564941, "learning_rate": 1.3996686942229435e-05, "loss": 1.3398, "step": 7444 }, { "epoch": 0.4070363435069639, "grad_norm": 1.4960647821426392, "learning_rate": 1.3995011810794319e-05, "loss": 1.1752, "step": 7445 }, { "epoch": 0.4070910159506855, "grad_norm": 1.42557692527771, "learning_rate": 1.3993336545956386e-05, "loss": 1.3355, "step": 7446 }, { "epoch": 0.407145688394407, "grad_norm": 1.2364296913146973, "learning_rate": 1.3991661147771574e-05, "loss": 1.3351, "step": 7447 }, { "epoch": 0.40720036083812855, "grad_norm": 1.5547764301300049, "learning_rate": 1.3989985616295826e-05, "loss": 1.4988, "step": 7448 }, { "epoch": 0.40725503328185014, "grad_norm": 1.41245436668396, "learning_rate": 1.3988309951585101e-05, "loss": 1.4189, "step": 7449 }, { "epoch": 0.4073097057255717, "grad_norm": 1.2830177545547485, "learning_rate": 1.3986634153695343e-05, "loss": 1.305, "step": 7450 }, { "epoch": 0.4073643781692932, "grad_norm": 1.215154767036438, "learning_rate": 1.3984958222682522e-05, "loss": 1.5026, "step": 7451 }, { "epoch": 0.40741905061301475, "grad_norm": 1.3685446977615356, "learning_rate": 1.3983282158602589e-05, "loss": 1.4173, "step": 7452 }, { "epoch": 0.40747372305673635, "grad_norm": 1.407259225845337, "learning_rate": 1.3981605961511522e-05, "loss": 1.3348, "step": 7453 }, { "epoch": 0.4075283955004579, "grad_norm": 2.1029224395751953, "learning_rate": 1.3979929631465286e-05, "loss": 1.3796, "step": 7454 }, { "epoch": 0.4075830679441794, "grad_norm": 1.3697253465652466, "learning_rate": 1.3978253168519859e-05, "loss": 1.5352, "step": 7455 }, { "epoch": 0.407637740387901, "grad_norm": 1.631426453590393, "learning_rate": 1.3976576572731228e-05, "loss": 1.4906, "step": 7456 }, { "epoch": 0.40769241283162255, "grad_norm": 1.3712788820266724, "learning_rate": 1.3974899844155373e-05, "loss": 1.3201, "step": 7457 }, { "epoch": 0.4077470852753441, "grad_norm": 1.4892371892929077, "learning_rate": 1.3973222982848282e-05, "loss": 1.6265, "step": 7458 }, { "epoch": 0.4078017577190656, "grad_norm": 1.4908934831619263, "learning_rate": 1.3971545988865953e-05, "loss": 1.4077, "step": 7459 }, { "epoch": 0.4078564301627872, "grad_norm": 1.7587170600891113, "learning_rate": 1.3969868862264386e-05, "loss": 1.1799, "step": 7460 }, { "epoch": 0.40791110260650876, "grad_norm": 1.3512288331985474, "learning_rate": 1.396819160309958e-05, "loss": 1.293, "step": 7461 }, { "epoch": 0.4079657750502303, "grad_norm": 1.5102254152297974, "learning_rate": 1.3966514211427544e-05, "loss": 1.5309, "step": 7462 }, { "epoch": 0.4080204474939519, "grad_norm": 1.309865951538086, "learning_rate": 1.3964836687304293e-05, "loss": 1.6938, "step": 7463 }, { "epoch": 0.4080751199376734, "grad_norm": 1.3241468667984009, "learning_rate": 1.3963159030785843e-05, "loss": 1.5528, "step": 7464 }, { "epoch": 0.40812979238139496, "grad_norm": 1.8504680395126343, "learning_rate": 1.3961481241928207e-05, "loss": 1.3706, "step": 7465 }, { "epoch": 0.4081844648251165, "grad_norm": 1.5684517621994019, "learning_rate": 1.395980332078742e-05, "loss": 1.5585, "step": 7466 }, { "epoch": 0.4082391372688381, "grad_norm": 1.798779010772705, "learning_rate": 1.3958125267419509e-05, "loss": 1.121, "step": 7467 }, { "epoch": 0.40829380971255963, "grad_norm": 1.5235897302627563, "learning_rate": 1.3956447081880506e-05, "loss": 1.7161, "step": 7468 }, { "epoch": 0.40834848215628117, "grad_norm": 1.6134599447250366, "learning_rate": 1.3954768764226449e-05, "loss": 1.3151, "step": 7469 }, { "epoch": 0.40840315460000276, "grad_norm": 1.5809508562088013, "learning_rate": 1.3953090314513387e-05, "loss": 1.3426, "step": 7470 }, { "epoch": 0.4084578270437243, "grad_norm": 1.666266679763794, "learning_rate": 1.3951411732797363e-05, "loss": 1.1995, "step": 7471 }, { "epoch": 0.40851249948744583, "grad_norm": 2.06972599029541, "learning_rate": 1.3949733019134427e-05, "loss": 1.3815, "step": 7472 }, { "epoch": 0.40856717193116737, "grad_norm": 1.574842929840088, "learning_rate": 1.3948054173580636e-05, "loss": 1.4474, "step": 7473 }, { "epoch": 0.40862184437488897, "grad_norm": 2.0573740005493164, "learning_rate": 1.3946375196192052e-05, "loss": 1.4444, "step": 7474 }, { "epoch": 0.4086765168186105, "grad_norm": 1.5302553176879883, "learning_rate": 1.394469608702474e-05, "loss": 1.3622, "step": 7475 }, { "epoch": 0.40873118926233204, "grad_norm": 1.4455331563949585, "learning_rate": 1.394301684613477e-05, "loss": 1.4486, "step": 7476 }, { "epoch": 0.40878586170605363, "grad_norm": 1.6177518367767334, "learning_rate": 1.3941337473578216e-05, "loss": 1.226, "step": 7477 }, { "epoch": 0.40884053414977517, "grad_norm": 1.4673240184783936, "learning_rate": 1.3939657969411155e-05, "loss": 1.4646, "step": 7478 }, { "epoch": 0.4088952065934967, "grad_norm": 1.3072177171707153, "learning_rate": 1.3937978333689667e-05, "loss": 1.6768, "step": 7479 }, { "epoch": 0.40894987903721824, "grad_norm": 2.253169536590576, "learning_rate": 1.3936298566469843e-05, "loss": 1.2607, "step": 7480 }, { "epoch": 0.40900455148093984, "grad_norm": 1.819033145904541, "learning_rate": 1.3934618667807773e-05, "loss": 1.4229, "step": 7481 }, { "epoch": 0.4090592239246614, "grad_norm": 1.2667021751403809, "learning_rate": 1.3932938637759555e-05, "loss": 1.5531, "step": 7482 }, { "epoch": 0.4091138963683829, "grad_norm": 1.8228785991668701, "learning_rate": 1.3931258476381284e-05, "loss": 1.5108, "step": 7483 }, { "epoch": 0.4091685688121045, "grad_norm": 1.5343319177627563, "learning_rate": 1.392957818372907e-05, "loss": 1.4749, "step": 7484 }, { "epoch": 0.40922324125582604, "grad_norm": 1.6091707944869995, "learning_rate": 1.3927897759859018e-05, "loss": 1.473, "step": 7485 }, { "epoch": 0.4092779136995476, "grad_norm": 1.353952169418335, "learning_rate": 1.3926217204827241e-05, "loss": 1.4065, "step": 7486 }, { "epoch": 0.4093325861432691, "grad_norm": 1.6755218505859375, "learning_rate": 1.392453651868986e-05, "loss": 1.5362, "step": 7487 }, { "epoch": 0.4093872585869907, "grad_norm": 1.2147291898727417, "learning_rate": 1.3922855701502997e-05, "loss": 1.5712, "step": 7488 }, { "epoch": 0.40944193103071225, "grad_norm": 1.8890749216079712, "learning_rate": 1.3921174753322775e-05, "loss": 1.337, "step": 7489 }, { "epoch": 0.4094966034744338, "grad_norm": 1.452134132385254, "learning_rate": 1.3919493674205326e-05, "loss": 1.433, "step": 7490 }, { "epoch": 0.4095512759181554, "grad_norm": 1.581758737564087, "learning_rate": 1.391781246420679e-05, "loss": 1.2763, "step": 7491 }, { "epoch": 0.4096059483618769, "grad_norm": 1.8228795528411865, "learning_rate": 1.3916131123383298e-05, "loss": 1.526, "step": 7492 }, { "epoch": 0.40966062080559845, "grad_norm": 1.4709372520446777, "learning_rate": 1.3914449651790998e-05, "loss": 1.5779, "step": 7493 }, { "epoch": 0.40971529324932, "grad_norm": 1.8585152626037598, "learning_rate": 1.3912768049486039e-05, "loss": 1.4155, "step": 7494 }, { "epoch": 0.4097699656930416, "grad_norm": 1.2868919372558594, "learning_rate": 1.3911086316524576e-05, "loss": 1.4574, "step": 7495 }, { "epoch": 0.4098246381367631, "grad_norm": 1.3110352754592896, "learning_rate": 1.390940445296276e-05, "loss": 1.3155, "step": 7496 }, { "epoch": 0.40987931058048466, "grad_norm": 2.183100938796997, "learning_rate": 1.3907722458856758e-05, "loss": 1.3642, "step": 7497 }, { "epoch": 0.40993398302420625, "grad_norm": 1.837126612663269, "learning_rate": 1.3906040334262733e-05, "loss": 1.2138, "step": 7498 }, { "epoch": 0.4099886554679278, "grad_norm": 1.9117395877838135, "learning_rate": 1.3904358079236854e-05, "loss": 1.5349, "step": 7499 }, { "epoch": 0.4100433279116493, "grad_norm": 1.2592991590499878, "learning_rate": 1.3902675693835299e-05, "loss": 1.4153, "step": 7500 }, { "epoch": 0.41009800035537086, "grad_norm": 1.1365493535995483, "learning_rate": 1.3900993178114241e-05, "loss": 1.4575, "step": 7501 }, { "epoch": 0.41015267279909245, "grad_norm": 1.735968828201294, "learning_rate": 1.3899310532129872e-05, "loss": 1.6065, "step": 7502 }, { "epoch": 0.410207345242814, "grad_norm": 1.4902466535568237, "learning_rate": 1.3897627755938372e-05, "loss": 1.435, "step": 7503 }, { "epoch": 0.41026201768653553, "grad_norm": 1.5400806665420532, "learning_rate": 1.3895944849595934e-05, "loss": 1.5694, "step": 7504 }, { "epoch": 0.4103166901302571, "grad_norm": 1.583594560623169, "learning_rate": 1.3894261813158758e-05, "loss": 1.2728, "step": 7505 }, { "epoch": 0.41037136257397866, "grad_norm": 1.3645563125610352, "learning_rate": 1.389257864668304e-05, "loss": 1.3359, "step": 7506 }, { "epoch": 0.4104260350177002, "grad_norm": 1.3360008001327515, "learning_rate": 1.3890895350224984e-05, "loss": 1.4198, "step": 7507 }, { "epoch": 0.41048070746142173, "grad_norm": 1.5742568969726562, "learning_rate": 1.3889211923840805e-05, "loss": 1.5429, "step": 7508 }, { "epoch": 0.4105353799051433, "grad_norm": 1.166139841079712, "learning_rate": 1.3887528367586714e-05, "loss": 1.4315, "step": 7509 }, { "epoch": 0.41059005234886486, "grad_norm": 1.29972243309021, "learning_rate": 1.388584468151893e-05, "loss": 1.3722, "step": 7510 }, { "epoch": 0.4106447247925864, "grad_norm": 1.2300305366516113, "learning_rate": 1.388416086569367e-05, "loss": 1.3187, "step": 7511 }, { "epoch": 0.410699397236308, "grad_norm": 1.236289381980896, "learning_rate": 1.3882476920167167e-05, "loss": 1.6116, "step": 7512 }, { "epoch": 0.41075406968002953, "grad_norm": 1.4917436838150024, "learning_rate": 1.3880792844995646e-05, "loss": 1.245, "step": 7513 }, { "epoch": 0.41080874212375107, "grad_norm": 1.4340962171554565, "learning_rate": 1.3879108640235346e-05, "loss": 1.2809, "step": 7514 }, { "epoch": 0.4108634145674726, "grad_norm": 1.560807466506958, "learning_rate": 1.3877424305942506e-05, "loss": 1.4024, "step": 7515 }, { "epoch": 0.4109180870111942, "grad_norm": 1.641801118850708, "learning_rate": 1.3875739842173372e-05, "loss": 1.5003, "step": 7516 }, { "epoch": 0.41097275945491574, "grad_norm": 1.8503952026367188, "learning_rate": 1.3874055248984191e-05, "loss": 1.3023, "step": 7517 }, { "epoch": 0.4110274318986373, "grad_norm": 2.3919641971588135, "learning_rate": 1.387237052643121e-05, "loss": 1.5991, "step": 7518 }, { "epoch": 0.41108210434235887, "grad_norm": 1.6504470109939575, "learning_rate": 1.3870685674570695e-05, "loss": 1.3913, "step": 7519 }, { "epoch": 0.4111367767860804, "grad_norm": 1.2346628904342651, "learning_rate": 1.3869000693458898e-05, "loss": 1.3868, "step": 7520 }, { "epoch": 0.41119144922980194, "grad_norm": 2.107778549194336, "learning_rate": 1.386731558315209e-05, "loss": 1.1235, "step": 7521 }, { "epoch": 0.4112461216735235, "grad_norm": 1.5538398027420044, "learning_rate": 1.3865630343706543e-05, "loss": 1.2554, "step": 7522 }, { "epoch": 0.41130079411724507, "grad_norm": 1.4363285303115845, "learning_rate": 1.3863944975178525e-05, "loss": 1.4721, "step": 7523 }, { "epoch": 0.4113554665609666, "grad_norm": 1.6020451784133911, "learning_rate": 1.3862259477624317e-05, "loss": 1.4476, "step": 7524 }, { "epoch": 0.41141013900468815, "grad_norm": 1.1873583793640137, "learning_rate": 1.3860573851100203e-05, "loss": 1.5894, "step": 7525 }, { "epoch": 0.41146481144840974, "grad_norm": 1.2142863273620605, "learning_rate": 1.385888809566247e-05, "loss": 1.4541, "step": 7526 }, { "epoch": 0.4115194838921313, "grad_norm": 1.8136811256408691, "learning_rate": 1.3857202211367406e-05, "loss": 1.3024, "step": 7527 }, { "epoch": 0.4115741563358528, "grad_norm": 1.535170555114746, "learning_rate": 1.3855516198271307e-05, "loss": 1.3919, "step": 7528 }, { "epoch": 0.41162882877957435, "grad_norm": 1.9544166326522827, "learning_rate": 1.385383005643048e-05, "loss": 1.3152, "step": 7529 }, { "epoch": 0.41168350122329594, "grad_norm": 1.3805955648422241, "learning_rate": 1.3852143785901224e-05, "loss": 1.3113, "step": 7530 }, { "epoch": 0.4117381736670175, "grad_norm": 1.6837239265441895, "learning_rate": 1.3850457386739846e-05, "loss": 1.3249, "step": 7531 }, { "epoch": 0.411792846110739, "grad_norm": 2.1170341968536377, "learning_rate": 1.3848770859002658e-05, "loss": 1.1981, "step": 7532 }, { "epoch": 0.4118475185544606, "grad_norm": 1.9931081533432007, "learning_rate": 1.3847084202745982e-05, "loss": 1.4206, "step": 7533 }, { "epoch": 0.41190219099818215, "grad_norm": 1.962001919746399, "learning_rate": 1.3845397418026136e-05, "loss": 1.2652, "step": 7534 }, { "epoch": 0.4119568634419037, "grad_norm": 2.1907920837402344, "learning_rate": 1.3843710504899448e-05, "loss": 0.9947, "step": 7535 }, { "epoch": 0.4120115358856252, "grad_norm": 1.891071081161499, "learning_rate": 1.3842023463422247e-05, "loss": 1.0942, "step": 7536 }, { "epoch": 0.4120662083293468, "grad_norm": 1.552461862564087, "learning_rate": 1.3840336293650867e-05, "loss": 1.402, "step": 7537 }, { "epoch": 0.41212088077306835, "grad_norm": 1.8508663177490234, "learning_rate": 1.3838648995641645e-05, "loss": 1.3447, "step": 7538 }, { "epoch": 0.4121755532167899, "grad_norm": 1.6127127408981323, "learning_rate": 1.3836961569450924e-05, "loss": 1.4116, "step": 7539 }, { "epoch": 0.4122302256605115, "grad_norm": 1.9590500593185425, "learning_rate": 1.3835274015135056e-05, "loss": 1.2888, "step": 7540 }, { "epoch": 0.412284898104233, "grad_norm": 1.8161031007766724, "learning_rate": 1.3833586332750386e-05, "loss": 1.3986, "step": 7541 }, { "epoch": 0.41233957054795456, "grad_norm": 1.3363711833953857, "learning_rate": 1.3831898522353275e-05, "loss": 1.3846, "step": 7542 }, { "epoch": 0.4123942429916761, "grad_norm": 1.4672437906265259, "learning_rate": 1.3830210584000078e-05, "loss": 1.4238, "step": 7543 }, { "epoch": 0.4124489154353977, "grad_norm": 1.6018383502960205, "learning_rate": 1.3828522517747164e-05, "loss": 1.3135, "step": 7544 }, { "epoch": 0.4125035878791192, "grad_norm": 1.76198410987854, "learning_rate": 1.3826834323650899e-05, "loss": 1.5879, "step": 7545 }, { "epoch": 0.41255826032284076, "grad_norm": 1.5391801595687866, "learning_rate": 1.3825146001767656e-05, "loss": 1.5378, "step": 7546 }, { "epoch": 0.41261293276656236, "grad_norm": 1.3804723024368286, "learning_rate": 1.3823457552153812e-05, "loss": 1.8005, "step": 7547 }, { "epoch": 0.4126676052102839, "grad_norm": 1.5631084442138672, "learning_rate": 1.3821768974865746e-05, "loss": 1.2758, "step": 7548 }, { "epoch": 0.41272227765400543, "grad_norm": 1.558612585067749, "learning_rate": 1.3820080269959848e-05, "loss": 1.456, "step": 7549 }, { "epoch": 0.41277695009772697, "grad_norm": 1.3872990608215332, "learning_rate": 1.3818391437492504e-05, "loss": 1.4491, "step": 7550 }, { "epoch": 0.41283162254144856, "grad_norm": 1.449080228805542, "learning_rate": 1.3816702477520113e-05, "loss": 1.4529, "step": 7551 }, { "epoch": 0.4128862949851701, "grad_norm": 1.7326314449310303, "learning_rate": 1.3815013390099068e-05, "loss": 1.4844, "step": 7552 }, { "epoch": 0.41294096742889164, "grad_norm": 1.4813778400421143, "learning_rate": 1.3813324175285772e-05, "loss": 1.3651, "step": 7553 }, { "epoch": 0.41299563987261323, "grad_norm": 1.4557209014892578, "learning_rate": 1.3811634833136638e-05, "loss": 1.4827, "step": 7554 }, { "epoch": 0.41305031231633477, "grad_norm": 1.641704797744751, "learning_rate": 1.3809945363708071e-05, "loss": 1.2815, "step": 7555 }, { "epoch": 0.4131049847600563, "grad_norm": 1.2956072092056274, "learning_rate": 1.3808255767056484e-05, "loss": 1.2769, "step": 7556 }, { "epoch": 0.41315965720377784, "grad_norm": 1.6421749591827393, "learning_rate": 1.3806566043238302e-05, "loss": 1.304, "step": 7557 }, { "epoch": 0.41321432964749943, "grad_norm": 1.5067511796951294, "learning_rate": 1.3804876192309952e-05, "loss": 1.3364, "step": 7558 }, { "epoch": 0.41326900209122097, "grad_norm": 1.4962224960327148, "learning_rate": 1.3803186214327852e-05, "loss": 1.3685, "step": 7559 }, { "epoch": 0.4133236745349425, "grad_norm": 1.7362509965896606, "learning_rate": 1.3801496109348442e-05, "loss": 1.4472, "step": 7560 }, { "epoch": 0.4133783469786641, "grad_norm": 1.483543038368225, "learning_rate": 1.3799805877428159e-05, "loss": 1.3746, "step": 7561 }, { "epoch": 0.41343301942238564, "grad_norm": 1.2738169431686401, "learning_rate": 1.379811551862344e-05, "loss": 1.5636, "step": 7562 }, { "epoch": 0.4134876918661072, "grad_norm": 1.4718356132507324, "learning_rate": 1.379642503299073e-05, "loss": 1.7999, "step": 7563 }, { "epoch": 0.4135423643098287, "grad_norm": 1.505937099456787, "learning_rate": 1.379473442058648e-05, "loss": 1.4645, "step": 7564 }, { "epoch": 0.4135970367535503, "grad_norm": 1.6798925399780273, "learning_rate": 1.3793043681467141e-05, "loss": 1.2752, "step": 7565 }, { "epoch": 0.41365170919727184, "grad_norm": 1.4036613702774048, "learning_rate": 1.3791352815689174e-05, "loss": 1.3361, "step": 7566 }, { "epoch": 0.4137063816409934, "grad_norm": 1.2788554430007935, "learning_rate": 1.3789661823309041e-05, "loss": 1.7351, "step": 7567 }, { "epoch": 0.413761054084715, "grad_norm": 1.550612449645996, "learning_rate": 1.3787970704383207e-05, "loss": 1.3893, "step": 7568 }, { "epoch": 0.4138157265284365, "grad_norm": 1.671056866645813, "learning_rate": 1.3786279458968143e-05, "loss": 1.4586, "step": 7569 }, { "epoch": 0.41387039897215805, "grad_norm": 1.5360822677612305, "learning_rate": 1.3784588087120323e-05, "loss": 1.4605, "step": 7570 }, { "epoch": 0.4139250714158796, "grad_norm": 1.47188401222229, "learning_rate": 1.3782896588896222e-05, "loss": 1.4787, "step": 7571 }, { "epoch": 0.4139797438596012, "grad_norm": 1.798034906387329, "learning_rate": 1.3781204964352332e-05, "loss": 1.4871, "step": 7572 }, { "epoch": 0.4140344163033227, "grad_norm": 1.4265934228897095, "learning_rate": 1.3779513213545132e-05, "loss": 1.2756, "step": 7573 }, { "epoch": 0.41408908874704425, "grad_norm": 1.6558396816253662, "learning_rate": 1.3777821336531121e-05, "loss": 1.498, "step": 7574 }, { "epoch": 0.41414376119076585, "grad_norm": 1.2595374584197998, "learning_rate": 1.3776129333366787e-05, "loss": 1.5097, "step": 7575 }, { "epoch": 0.4141984336344874, "grad_norm": 1.3062925338745117, "learning_rate": 1.3774437204108634e-05, "loss": 1.4771, "step": 7576 }, { "epoch": 0.4142531060782089, "grad_norm": 1.5199896097183228, "learning_rate": 1.3772744948813166e-05, "loss": 1.3739, "step": 7577 }, { "epoch": 0.41430777852193046, "grad_norm": 1.3766165971755981, "learning_rate": 1.377105256753689e-05, "loss": 1.3082, "step": 7578 }, { "epoch": 0.41436245096565205, "grad_norm": 1.6067609786987305, "learning_rate": 1.3769360060336323e-05, "loss": 1.4058, "step": 7579 }, { "epoch": 0.4144171234093736, "grad_norm": 1.6489143371582031, "learning_rate": 1.3767667427267976e-05, "loss": 1.2621, "step": 7580 }, { "epoch": 0.4144717958530951, "grad_norm": 1.4217582941055298, "learning_rate": 1.3765974668388373e-05, "loss": 1.311, "step": 7581 }, { "epoch": 0.4145264682968167, "grad_norm": 1.59813392162323, "learning_rate": 1.376428178375404e-05, "loss": 1.4841, "step": 7582 }, { "epoch": 0.41458114074053826, "grad_norm": 1.8776293992996216, "learning_rate": 1.3762588773421506e-05, "loss": 1.1882, "step": 7583 }, { "epoch": 0.4146358131842598, "grad_norm": 1.4964169263839722, "learning_rate": 1.37608956374473e-05, "loss": 1.329, "step": 7584 }, { "epoch": 0.41469048562798133, "grad_norm": 1.3148912191390991, "learning_rate": 1.3759202375887963e-05, "loss": 1.416, "step": 7585 }, { "epoch": 0.4147451580717029, "grad_norm": 1.2757155895233154, "learning_rate": 1.3757508988800042e-05, "loss": 1.6689, "step": 7586 }, { "epoch": 0.41479983051542446, "grad_norm": 1.8218164443969727, "learning_rate": 1.3755815476240076e-05, "loss": 1.3394, "step": 7587 }, { "epoch": 0.414854502959146, "grad_norm": 1.4456086158752441, "learning_rate": 1.3754121838264618e-05, "loss": 1.4581, "step": 7588 }, { "epoch": 0.4149091754028676, "grad_norm": 3.7632880210876465, "learning_rate": 1.3752428074930224e-05, "loss": 1.4991, "step": 7589 }, { "epoch": 0.41496384784658913, "grad_norm": 1.737320899963379, "learning_rate": 1.3750734186293448e-05, "loss": 1.3186, "step": 7590 }, { "epoch": 0.41501852029031067, "grad_norm": 1.6016472578048706, "learning_rate": 1.3749040172410857e-05, "loss": 1.4491, "step": 7591 }, { "epoch": 0.4150731927340322, "grad_norm": 1.5437936782836914, "learning_rate": 1.3747346033339017e-05, "loss": 1.4379, "step": 7592 }, { "epoch": 0.4151278651777538, "grad_norm": 1.7420814037322998, "learning_rate": 1.3745651769134502e-05, "loss": 1.2025, "step": 7593 }, { "epoch": 0.41518253762147533, "grad_norm": 1.2362357378005981, "learning_rate": 1.3743957379853885e-05, "loss": 1.4177, "step": 7594 }, { "epoch": 0.41523721006519687, "grad_norm": 1.2657145261764526, "learning_rate": 1.3742262865553744e-05, "loss": 1.5047, "step": 7595 }, { "epoch": 0.41529188250891846, "grad_norm": 1.5015976428985596, "learning_rate": 1.3740568226290665e-05, "loss": 1.4216, "step": 7596 }, { "epoch": 0.41534655495264, "grad_norm": 1.2111495733261108, "learning_rate": 1.3738873462121235e-05, "loss": 1.6405, "step": 7597 }, { "epoch": 0.41540122739636154, "grad_norm": 1.5142931938171387, "learning_rate": 1.3737178573102044e-05, "loss": 1.6333, "step": 7598 }, { "epoch": 0.4154558998400831, "grad_norm": 1.441725254058838, "learning_rate": 1.3735483559289693e-05, "loss": 1.839, "step": 7599 }, { "epoch": 0.41551057228380467, "grad_norm": 1.096408724784851, "learning_rate": 1.3733788420740783e-05, "loss": 1.5413, "step": 7600 }, { "epoch": 0.4155652447275262, "grad_norm": 1.7909400463104248, "learning_rate": 1.3732093157511914e-05, "loss": 1.3286, "step": 7601 }, { "epoch": 0.41561991717124774, "grad_norm": 1.7164602279663086, "learning_rate": 1.3730397769659696e-05, "loss": 1.4401, "step": 7602 }, { "epoch": 0.41567458961496934, "grad_norm": 1.7781192064285278, "learning_rate": 1.3728702257240745e-05, "loss": 1.3095, "step": 7603 }, { "epoch": 0.4157292620586909, "grad_norm": 1.7077693939208984, "learning_rate": 1.3727006620311674e-05, "loss": 1.3268, "step": 7604 }, { "epoch": 0.4157839345024124, "grad_norm": 1.5271435976028442, "learning_rate": 1.3725310858929106e-05, "loss": 1.2942, "step": 7605 }, { "epoch": 0.41583860694613395, "grad_norm": 1.2541146278381348, "learning_rate": 1.3723614973149667e-05, "loss": 1.3919, "step": 7606 }, { "epoch": 0.41589327938985554, "grad_norm": 1.1079785823822021, "learning_rate": 1.3721918963029987e-05, "loss": 1.7393, "step": 7607 }, { "epoch": 0.4159479518335771, "grad_norm": 1.4165276288986206, "learning_rate": 1.3720222828626699e-05, "loss": 1.365, "step": 7608 }, { "epoch": 0.4160026242772986, "grad_norm": 1.4502933025360107, "learning_rate": 1.3718526569996441e-05, "loss": 1.5065, "step": 7609 }, { "epoch": 0.4160572967210202, "grad_norm": 1.1933057308197021, "learning_rate": 1.3716830187195856e-05, "loss": 1.7224, "step": 7610 }, { "epoch": 0.41611196916474175, "grad_norm": 1.6320991516113281, "learning_rate": 1.3715133680281586e-05, "loss": 1.4661, "step": 7611 }, { "epoch": 0.4161666416084633, "grad_norm": 1.4811985492706299, "learning_rate": 1.3713437049310287e-05, "loss": 1.3382, "step": 7612 }, { "epoch": 0.4162213140521848, "grad_norm": 1.5715107917785645, "learning_rate": 1.3711740294338612e-05, "loss": 1.3877, "step": 7613 }, { "epoch": 0.4162759864959064, "grad_norm": 1.5302625894546509, "learning_rate": 1.3710043415423218e-05, "loss": 1.39, "step": 7614 }, { "epoch": 0.41633065893962795, "grad_norm": 1.3203294277191162, "learning_rate": 1.3708346412620768e-05, "loss": 1.3421, "step": 7615 }, { "epoch": 0.4163853313833495, "grad_norm": 1.30901038646698, "learning_rate": 1.3706649285987928e-05, "loss": 1.3686, "step": 7616 }, { "epoch": 0.4164400038270711, "grad_norm": 1.4492281675338745, "learning_rate": 1.3704952035581371e-05, "loss": 1.2444, "step": 7617 }, { "epoch": 0.4164946762707926, "grad_norm": 1.1809805631637573, "learning_rate": 1.3703254661457775e-05, "loss": 1.4253, "step": 7618 }, { "epoch": 0.41654934871451416, "grad_norm": 1.7081019878387451, "learning_rate": 1.3701557163673811e-05, "loss": 1.5561, "step": 7619 }, { "epoch": 0.4166040211582357, "grad_norm": 1.4440582990646362, "learning_rate": 1.3699859542286168e-05, "loss": 1.174, "step": 7620 }, { "epoch": 0.4166586936019573, "grad_norm": 1.49919855594635, "learning_rate": 1.3698161797351536e-05, "loss": 1.5978, "step": 7621 }, { "epoch": 0.4167133660456788, "grad_norm": 1.4772160053253174, "learning_rate": 1.3696463928926602e-05, "loss": 1.2981, "step": 7622 }, { "epoch": 0.41676803848940036, "grad_norm": 1.4847935438156128, "learning_rate": 1.3694765937068063e-05, "loss": 1.1651, "step": 7623 }, { "epoch": 0.41682271093312195, "grad_norm": 1.4994670152664185, "learning_rate": 1.3693067821832622e-05, "loss": 1.43, "step": 7624 }, { "epoch": 0.4168773833768435, "grad_norm": 1.822784662246704, "learning_rate": 1.3691369583276977e-05, "loss": 1.537, "step": 7625 }, { "epoch": 0.41693205582056503, "grad_norm": 1.5321866273880005, "learning_rate": 1.368967122145784e-05, "loss": 1.4067, "step": 7626 }, { "epoch": 0.41698672826428657, "grad_norm": 1.45036780834198, "learning_rate": 1.3687972736431925e-05, "loss": 1.1614, "step": 7627 }, { "epoch": 0.41704140070800816, "grad_norm": 1.5449522733688354, "learning_rate": 1.3686274128255945e-05, "loss": 1.2238, "step": 7628 }, { "epoch": 0.4170960731517297, "grad_norm": 1.3675459623336792, "learning_rate": 1.3684575396986622e-05, "loss": 1.413, "step": 7629 }, { "epoch": 0.41715074559545123, "grad_norm": 1.4120678901672363, "learning_rate": 1.3682876542680677e-05, "loss": 1.5793, "step": 7630 }, { "epoch": 0.4172054180391728, "grad_norm": 1.4362610578536987, "learning_rate": 1.3681177565394845e-05, "loss": 1.2905, "step": 7631 }, { "epoch": 0.41726009048289436, "grad_norm": 1.4769014120101929, "learning_rate": 1.3679478465185856e-05, "loss": 1.539, "step": 7632 }, { "epoch": 0.4173147629266159, "grad_norm": 1.513222575187683, "learning_rate": 1.3677779242110447e-05, "loss": 1.5198, "step": 7633 }, { "epoch": 0.41736943537033744, "grad_norm": 1.6815999746322632, "learning_rate": 1.3676079896225358e-05, "loss": 1.4097, "step": 7634 }, { "epoch": 0.41742410781405903, "grad_norm": 1.5458310842514038, "learning_rate": 1.3674380427587337e-05, "loss": 1.6498, "step": 7635 }, { "epoch": 0.41747878025778057, "grad_norm": 1.777653455734253, "learning_rate": 1.3672680836253129e-05, "loss": 1.387, "step": 7636 }, { "epoch": 0.4175334527015021, "grad_norm": 1.5484373569488525, "learning_rate": 1.367098112227949e-05, "loss": 1.2212, "step": 7637 }, { "epoch": 0.4175881251452237, "grad_norm": 1.5776314735412598, "learning_rate": 1.366928128572318e-05, "loss": 1.5921, "step": 7638 }, { "epoch": 0.41764279758894524, "grad_norm": 1.2704918384552002, "learning_rate": 1.3667581326640954e-05, "loss": 1.3737, "step": 7639 }, { "epoch": 0.4176974700326668, "grad_norm": 1.822857141494751, "learning_rate": 1.3665881245089585e-05, "loss": 1.3374, "step": 7640 }, { "epoch": 0.4177521424763883, "grad_norm": 1.2023401260375977, "learning_rate": 1.3664181041125835e-05, "loss": 1.5108, "step": 7641 }, { "epoch": 0.4178068149201099, "grad_norm": 1.8572674989700317, "learning_rate": 1.3662480714806483e-05, "loss": 1.2054, "step": 7642 }, { "epoch": 0.41786148736383144, "grad_norm": 1.2937268018722534, "learning_rate": 1.3660780266188306e-05, "loss": 1.4645, "step": 7643 }, { "epoch": 0.417916159807553, "grad_norm": 1.7284375429153442, "learning_rate": 1.3659079695328086e-05, "loss": 1.205, "step": 7644 }, { "epoch": 0.41797083225127457, "grad_norm": 1.7535896301269531, "learning_rate": 1.365737900228261e-05, "loss": 1.4497, "step": 7645 }, { "epoch": 0.4180255046949961, "grad_norm": 1.3311386108398438, "learning_rate": 1.3655678187108663e-05, "loss": 1.5001, "step": 7646 }, { "epoch": 0.41808017713871765, "grad_norm": 1.3340781927108765, "learning_rate": 1.3653977249863046e-05, "loss": 1.4148, "step": 7647 }, { "epoch": 0.41813484958243924, "grad_norm": 1.3881351947784424, "learning_rate": 1.3652276190602551e-05, "loss": 1.2746, "step": 7648 }, { "epoch": 0.4181895220261608, "grad_norm": 1.6059495210647583, "learning_rate": 1.3650575009383988e-05, "loss": 1.5247, "step": 7649 }, { "epoch": 0.4182441944698823, "grad_norm": 1.4500948190689087, "learning_rate": 1.3648873706264159e-05, "loss": 1.4869, "step": 7650 }, { "epoch": 0.41829886691360385, "grad_norm": 1.8383653163909912, "learning_rate": 1.364717228129987e-05, "loss": 1.3054, "step": 7651 }, { "epoch": 0.41835353935732544, "grad_norm": 1.4617918729782104, "learning_rate": 1.3645470734547946e-05, "loss": 1.3123, "step": 7652 }, { "epoch": 0.418408211801047, "grad_norm": 1.3072245121002197, "learning_rate": 1.3643769066065199e-05, "loss": 1.5125, "step": 7653 }, { "epoch": 0.4184628842447685, "grad_norm": 1.368186593055725, "learning_rate": 1.3642067275908449e-05, "loss": 1.4659, "step": 7654 }, { "epoch": 0.4185175566884901, "grad_norm": 1.6659971475601196, "learning_rate": 1.3640365364134524e-05, "loss": 1.4144, "step": 7655 }, { "epoch": 0.41857222913221165, "grad_norm": 1.417669653892517, "learning_rate": 1.3638663330800262e-05, "loss": 1.3994, "step": 7656 }, { "epoch": 0.4186269015759332, "grad_norm": 1.657250165939331, "learning_rate": 1.363696117596249e-05, "loss": 1.3274, "step": 7657 }, { "epoch": 0.4186815740196547, "grad_norm": 1.4043234586715698, "learning_rate": 1.3635258899678052e-05, "loss": 1.1884, "step": 7658 }, { "epoch": 0.4187362464633763, "grad_norm": 1.5126984119415283, "learning_rate": 1.3633556502003789e-05, "loss": 1.4249, "step": 7659 }, { "epoch": 0.41879091890709785, "grad_norm": 1.4079748392105103, "learning_rate": 1.363185398299655e-05, "loss": 1.5377, "step": 7660 }, { "epoch": 0.4188455913508194, "grad_norm": 1.3955614566802979, "learning_rate": 1.363015134271318e-05, "loss": 1.5043, "step": 7661 }, { "epoch": 0.418900263794541, "grad_norm": 1.6860889196395874, "learning_rate": 1.3628448581210538e-05, "loss": 1.1316, "step": 7662 }, { "epoch": 0.4189549362382625, "grad_norm": 1.3300225734710693, "learning_rate": 1.3626745698545487e-05, "loss": 1.4482, "step": 7663 }, { "epoch": 0.41900960868198406, "grad_norm": 1.6255966424942017, "learning_rate": 1.3625042694774886e-05, "loss": 1.2567, "step": 7664 }, { "epoch": 0.4190642811257056, "grad_norm": 1.4259507656097412, "learning_rate": 1.3623339569955603e-05, "loss": 1.48, "step": 7665 }, { "epoch": 0.4191189535694272, "grad_norm": 1.4132095575332642, "learning_rate": 1.362163632414451e-05, "loss": 1.3485, "step": 7666 }, { "epoch": 0.4191736260131487, "grad_norm": 1.431712031364441, "learning_rate": 1.3619932957398478e-05, "loss": 1.5363, "step": 7667 }, { "epoch": 0.41922829845687026, "grad_norm": 2.008392572402954, "learning_rate": 1.3618229469774392e-05, "loss": 1.1985, "step": 7668 }, { "epoch": 0.41928297090059186, "grad_norm": 1.4281039237976074, "learning_rate": 1.3616525861329133e-05, "loss": 1.6421, "step": 7669 }, { "epoch": 0.4193376433443134, "grad_norm": 1.6667121648788452, "learning_rate": 1.361482213211959e-05, "loss": 1.5525, "step": 7670 }, { "epoch": 0.41939231578803493, "grad_norm": 1.368321180343628, "learning_rate": 1.3613118282202653e-05, "loss": 1.3715, "step": 7671 }, { "epoch": 0.41944698823175647, "grad_norm": 1.3323286771774292, "learning_rate": 1.3611414311635219e-05, "loss": 1.6004, "step": 7672 }, { "epoch": 0.41950166067547806, "grad_norm": 1.4201425313949585, "learning_rate": 1.3609710220474187e-05, "loss": 1.5561, "step": 7673 }, { "epoch": 0.4195563331191996, "grad_norm": 1.7283575534820557, "learning_rate": 1.360800600877646e-05, "loss": 1.4502, "step": 7674 }, { "epoch": 0.41961100556292114, "grad_norm": 1.5128850936889648, "learning_rate": 1.3606301676598942e-05, "loss": 1.4539, "step": 7675 }, { "epoch": 0.41966567800664273, "grad_norm": 1.866719126701355, "learning_rate": 1.3604597223998553e-05, "loss": 1.245, "step": 7676 }, { "epoch": 0.41972035045036427, "grad_norm": 1.6776998043060303, "learning_rate": 1.3602892651032205e-05, "loss": 1.53, "step": 7677 }, { "epoch": 0.4197750228940858, "grad_norm": 1.644587755203247, "learning_rate": 1.3601187957756814e-05, "loss": 1.5149, "step": 7678 }, { "epoch": 0.41982969533780734, "grad_norm": 1.4334371089935303, "learning_rate": 1.3599483144229309e-05, "loss": 1.5899, "step": 7679 }, { "epoch": 0.41988436778152893, "grad_norm": 1.6688127517700195, "learning_rate": 1.3597778210506615e-05, "loss": 1.4434, "step": 7680 }, { "epoch": 0.41993904022525047, "grad_norm": 1.478109359741211, "learning_rate": 1.3596073156645662e-05, "loss": 1.6997, "step": 7681 }, { "epoch": 0.419993712668972, "grad_norm": 1.4452784061431885, "learning_rate": 1.359436798270339e-05, "loss": 1.4491, "step": 7682 }, { "epoch": 0.4200483851126936, "grad_norm": 1.357507586479187, "learning_rate": 1.3592662688736734e-05, "loss": 1.4921, "step": 7683 }, { "epoch": 0.42010305755641514, "grad_norm": 1.264782190322876, "learning_rate": 1.3590957274802641e-05, "loss": 1.8069, "step": 7684 }, { "epoch": 0.4201577300001367, "grad_norm": 1.7310168743133545, "learning_rate": 1.358925174095806e-05, "loss": 1.4146, "step": 7685 }, { "epoch": 0.4202124024438582, "grad_norm": 1.6067487001419067, "learning_rate": 1.3587546087259939e-05, "loss": 1.3872, "step": 7686 }, { "epoch": 0.4202670748875798, "grad_norm": 1.3758046627044678, "learning_rate": 1.358584031376524e-05, "loss": 1.2388, "step": 7687 }, { "epoch": 0.42032174733130134, "grad_norm": 1.447699785232544, "learning_rate": 1.3584134420530915e-05, "loss": 1.5064, "step": 7688 }, { "epoch": 0.4203764197750229, "grad_norm": 1.3508236408233643, "learning_rate": 1.358242840761393e-05, "loss": 1.4375, "step": 7689 }, { "epoch": 0.4204310922187445, "grad_norm": 1.594961166381836, "learning_rate": 1.3580722275071255e-05, "loss": 1.4292, "step": 7690 }, { "epoch": 0.420485764662466, "grad_norm": 1.5574116706848145, "learning_rate": 1.3579016022959862e-05, "loss": 1.4412, "step": 7691 }, { "epoch": 0.42054043710618755, "grad_norm": 1.629448652267456, "learning_rate": 1.3577309651336728e-05, "loss": 1.4734, "step": 7692 }, { "epoch": 0.4205951095499091, "grad_norm": 1.8265341520309448, "learning_rate": 1.3575603160258824e-05, "loss": 1.2076, "step": 7693 }, { "epoch": 0.4206497819936307, "grad_norm": 1.4617342948913574, "learning_rate": 1.3573896549783146e-05, "loss": 1.3843, "step": 7694 }, { "epoch": 0.4207044544373522, "grad_norm": 1.5693198442459106, "learning_rate": 1.3572189819966672e-05, "loss": 1.4493, "step": 7695 }, { "epoch": 0.42075912688107375, "grad_norm": 1.5224909782409668, "learning_rate": 1.3570482970866397e-05, "loss": 1.4859, "step": 7696 }, { "epoch": 0.42081379932479535, "grad_norm": 1.3879228830337524, "learning_rate": 1.3568776002539319e-05, "loss": 1.5036, "step": 7697 }, { "epoch": 0.4208684717685169, "grad_norm": 1.3883659839630127, "learning_rate": 1.3567068915042436e-05, "loss": 1.5265, "step": 7698 }, { "epoch": 0.4209231442122384, "grad_norm": 1.4640673398971558, "learning_rate": 1.3565361708432754e-05, "loss": 1.4676, "step": 7699 }, { "epoch": 0.42097781665595996, "grad_norm": 1.6306763887405396, "learning_rate": 1.3563654382767273e-05, "loss": 1.3515, "step": 7700 }, { "epoch": 0.42103248909968155, "grad_norm": 1.2702397108078003, "learning_rate": 1.3561946938103015e-05, "loss": 1.3324, "step": 7701 }, { "epoch": 0.4210871615434031, "grad_norm": 1.550175428390503, "learning_rate": 1.3560239374496986e-05, "loss": 1.1887, "step": 7702 }, { "epoch": 0.4211418339871246, "grad_norm": 1.281103253364563, "learning_rate": 1.355853169200621e-05, "loss": 1.4473, "step": 7703 }, { "epoch": 0.4211965064308462, "grad_norm": 1.3060859441757202, "learning_rate": 1.3556823890687714e-05, "loss": 1.3905, "step": 7704 }, { "epoch": 0.42125117887456776, "grad_norm": 2.3311755657196045, "learning_rate": 1.355511597059852e-05, "loss": 1.3395, "step": 7705 }, { "epoch": 0.4213058513182893, "grad_norm": 1.5299655199050903, "learning_rate": 1.3553407931795662e-05, "loss": 1.4789, "step": 7706 }, { "epoch": 0.42136052376201083, "grad_norm": 1.5746783018112183, "learning_rate": 1.3551699774336173e-05, "loss": 1.2972, "step": 7707 }, { "epoch": 0.4214151962057324, "grad_norm": 1.7800824642181396, "learning_rate": 1.3549991498277095e-05, "loss": 1.3522, "step": 7708 }, { "epoch": 0.42146986864945396, "grad_norm": 1.5151759386062622, "learning_rate": 1.354828310367547e-05, "loss": 1.3555, "step": 7709 }, { "epoch": 0.4215245410931755, "grad_norm": 1.33584463596344, "learning_rate": 1.3546574590588346e-05, "loss": 1.0194, "step": 7710 }, { "epoch": 0.4215792135368971, "grad_norm": 1.5834648609161377, "learning_rate": 1.3544865959072777e-05, "loss": 1.2723, "step": 7711 }, { "epoch": 0.42163388598061863, "grad_norm": 1.1610404253005981, "learning_rate": 1.3543157209185813e-05, "loss": 1.8688, "step": 7712 }, { "epoch": 0.42168855842434017, "grad_norm": 1.5182788372039795, "learning_rate": 1.3541448340984516e-05, "loss": 1.5718, "step": 7713 }, { "epoch": 0.4217432308680617, "grad_norm": 1.4115846157073975, "learning_rate": 1.3539739354525947e-05, "loss": 1.5693, "step": 7714 }, { "epoch": 0.4217979033117833, "grad_norm": 1.3303823471069336, "learning_rate": 1.3538030249867178e-05, "loss": 1.5944, "step": 7715 }, { "epoch": 0.42185257575550483, "grad_norm": 2.137765645980835, "learning_rate": 1.3536321027065273e-05, "loss": 1.1303, "step": 7716 }, { "epoch": 0.42190724819922637, "grad_norm": 1.7380542755126953, "learning_rate": 1.3534611686177312e-05, "loss": 1.3343, "step": 7717 }, { "epoch": 0.42196192064294796, "grad_norm": 2.0676040649414062, "learning_rate": 1.3532902227260374e-05, "loss": 1.5116, "step": 7718 }, { "epoch": 0.4220165930866695, "grad_norm": 1.3817150592803955, "learning_rate": 1.3531192650371541e-05, "loss": 1.4236, "step": 7719 }, { "epoch": 0.42207126553039104, "grad_norm": 1.626686930656433, "learning_rate": 1.3529482955567896e-05, "loss": 1.545, "step": 7720 }, { "epoch": 0.4221259379741126, "grad_norm": 1.6142845153808594, "learning_rate": 1.3527773142906532e-05, "loss": 1.3108, "step": 7721 }, { "epoch": 0.42218061041783417, "grad_norm": 1.1160613298416138, "learning_rate": 1.3526063212444552e-05, "loss": 1.8431, "step": 7722 }, { "epoch": 0.4222352828615557, "grad_norm": 1.4942734241485596, "learning_rate": 1.352435316423904e-05, "loss": 1.4263, "step": 7723 }, { "epoch": 0.42228995530527724, "grad_norm": 1.5102577209472656, "learning_rate": 1.352264299834711e-05, "loss": 1.4046, "step": 7724 }, { "epoch": 0.42234462774899884, "grad_norm": 1.2965996265411377, "learning_rate": 1.3520932714825863e-05, "loss": 1.3547, "step": 7725 }, { "epoch": 0.4223993001927204, "grad_norm": 1.4010757207870483, "learning_rate": 1.3519222313732407e-05, "loss": 1.5492, "step": 7726 }, { "epoch": 0.4224539726364419, "grad_norm": 1.776675820350647, "learning_rate": 1.3517511795123864e-05, "loss": 1.3787, "step": 7727 }, { "epoch": 0.42250864508016345, "grad_norm": 1.314020037651062, "learning_rate": 1.3515801159057344e-05, "loss": 1.1431, "step": 7728 }, { "epoch": 0.42256331752388504, "grad_norm": 1.3273757696151733, "learning_rate": 1.3514090405589978e-05, "loss": 1.4591, "step": 7729 }, { "epoch": 0.4226179899676066, "grad_norm": 2.034345865249634, "learning_rate": 1.3512379534778883e-05, "loss": 1.3212, "step": 7730 }, { "epoch": 0.4226726624113281, "grad_norm": 1.4985895156860352, "learning_rate": 1.3510668546681198e-05, "loss": 1.2275, "step": 7731 }, { "epoch": 0.4227273348550497, "grad_norm": 1.3101450204849243, "learning_rate": 1.3508957441354049e-05, "loss": 1.5496, "step": 7732 }, { "epoch": 0.42278200729877125, "grad_norm": 1.1479936838150024, "learning_rate": 1.3507246218854576e-05, "loss": 1.326, "step": 7733 }, { "epoch": 0.4228366797424928, "grad_norm": 1.1703662872314453, "learning_rate": 1.3505534879239923e-05, "loss": 1.4769, "step": 7734 }, { "epoch": 0.4228913521862143, "grad_norm": 1.6361372470855713, "learning_rate": 1.3503823422567235e-05, "loss": 1.6072, "step": 7735 }, { "epoch": 0.4229460246299359, "grad_norm": 1.4385420083999634, "learning_rate": 1.3502111848893663e-05, "loss": 1.4478, "step": 7736 }, { "epoch": 0.42300069707365745, "grad_norm": 1.222287893295288, "learning_rate": 1.3500400158276352e-05, "loss": 1.4833, "step": 7737 }, { "epoch": 0.423055369517379, "grad_norm": 1.6381298303604126, "learning_rate": 1.3498688350772473e-05, "loss": 1.4288, "step": 7738 }, { "epoch": 0.4231100419611006, "grad_norm": 1.2730079889297485, "learning_rate": 1.3496976426439177e-05, "loss": 1.3708, "step": 7739 }, { "epoch": 0.4231647144048221, "grad_norm": 1.7234081029891968, "learning_rate": 1.349526438533363e-05, "loss": 1.6039, "step": 7740 }, { "epoch": 0.42321938684854365, "grad_norm": 1.2536176443099976, "learning_rate": 1.3493552227513007e-05, "loss": 1.4905, "step": 7741 }, { "epoch": 0.4232740592922652, "grad_norm": 1.6112196445465088, "learning_rate": 1.3491839953034474e-05, "loss": 1.411, "step": 7742 }, { "epoch": 0.4233287317359868, "grad_norm": 1.291067361831665, "learning_rate": 1.3490127561955214e-05, "loss": 1.4007, "step": 7743 }, { "epoch": 0.4233834041797083, "grad_norm": 1.611459493637085, "learning_rate": 1.3488415054332404e-05, "loss": 1.2752, "step": 7744 }, { "epoch": 0.42343807662342986, "grad_norm": 1.275644063949585, "learning_rate": 1.348670243022323e-05, "loss": 1.5334, "step": 7745 }, { "epoch": 0.42349274906715145, "grad_norm": 1.6819020509719849, "learning_rate": 1.348498968968488e-05, "loss": 1.2287, "step": 7746 }, { "epoch": 0.423547421510873, "grad_norm": 1.370845079421997, "learning_rate": 1.3483276832774543e-05, "loss": 1.488, "step": 7747 }, { "epoch": 0.4236020939545945, "grad_norm": 1.4219022989273071, "learning_rate": 1.348156385954942e-05, "loss": 1.5246, "step": 7748 }, { "epoch": 0.42365676639831606, "grad_norm": 1.8372248411178589, "learning_rate": 1.3479850770066712e-05, "loss": 1.4097, "step": 7749 }, { "epoch": 0.42371143884203766, "grad_norm": 1.3853139877319336, "learning_rate": 1.3478137564383621e-05, "loss": 1.3726, "step": 7750 }, { "epoch": 0.4237661112857592, "grad_norm": 1.5155013799667358, "learning_rate": 1.3476424242557355e-05, "loss": 1.3053, "step": 7751 }, { "epoch": 0.42382078372948073, "grad_norm": 1.4229745864868164, "learning_rate": 1.3474710804645125e-05, "loss": 1.2697, "step": 7752 }, { "epoch": 0.4238754561732023, "grad_norm": 1.7629916667938232, "learning_rate": 1.3472997250704149e-05, "loss": 1.356, "step": 7753 }, { "epoch": 0.42393012861692386, "grad_norm": 1.5678424835205078, "learning_rate": 1.3471283580791643e-05, "loss": 1.3858, "step": 7754 }, { "epoch": 0.4239848010606454, "grad_norm": 1.4297916889190674, "learning_rate": 1.3469569794964832e-05, "loss": 1.3253, "step": 7755 }, { "epoch": 0.42403947350436694, "grad_norm": 1.5981426239013672, "learning_rate": 1.3467855893280945e-05, "loss": 1.2258, "step": 7756 }, { "epoch": 0.42409414594808853, "grad_norm": 1.6026004552841187, "learning_rate": 1.3466141875797214e-05, "loss": 1.493, "step": 7757 }, { "epoch": 0.42414881839181007, "grad_norm": 1.5317076444625854, "learning_rate": 1.346442774257087e-05, "loss": 1.5073, "step": 7758 }, { "epoch": 0.4242034908355316, "grad_norm": 1.3118442296981812, "learning_rate": 1.3462713493659156e-05, "loss": 1.3704, "step": 7759 }, { "epoch": 0.4242581632792532, "grad_norm": 1.450036644935608, "learning_rate": 1.3460999129119315e-05, "loss": 1.3902, "step": 7760 }, { "epoch": 0.42431283572297473, "grad_norm": 1.736395001411438, "learning_rate": 1.3459284649008585e-05, "loss": 1.336, "step": 7761 }, { "epoch": 0.4243675081666963, "grad_norm": 2.4062533378601074, "learning_rate": 1.3457570053384225e-05, "loss": 1.5938, "step": 7762 }, { "epoch": 0.4244221806104178, "grad_norm": 1.2662335634231567, "learning_rate": 1.3455855342303491e-05, "loss": 1.5121, "step": 7763 }, { "epoch": 0.4244768530541394, "grad_norm": 1.3531687259674072, "learning_rate": 1.3454140515823637e-05, "loss": 1.6593, "step": 7764 }, { "epoch": 0.42453152549786094, "grad_norm": 1.5879757404327393, "learning_rate": 1.3452425574001926e-05, "loss": 1.3505, "step": 7765 }, { "epoch": 0.4245861979415825, "grad_norm": 1.589587688446045, "learning_rate": 1.3450710516895619e-05, "loss": 1.4576, "step": 7766 }, { "epoch": 0.42464087038530407, "grad_norm": 2.3362793922424316, "learning_rate": 1.3448995344561997e-05, "loss": 1.2241, "step": 7767 }, { "epoch": 0.4246955428290256, "grad_norm": 1.40545654296875, "learning_rate": 1.3447280057058322e-05, "loss": 1.5812, "step": 7768 }, { "epoch": 0.42475021527274714, "grad_norm": 1.7691556215286255, "learning_rate": 1.3445564654441879e-05, "loss": 1.3223, "step": 7769 }, { "epoch": 0.4248048877164687, "grad_norm": 1.6177083253860474, "learning_rate": 1.3443849136769946e-05, "loss": 1.4822, "step": 7770 }, { "epoch": 0.4248595601601903, "grad_norm": 1.4197214841842651, "learning_rate": 1.3442133504099812e-05, "loss": 1.3098, "step": 7771 }, { "epoch": 0.4249142326039118, "grad_norm": 1.4560283422470093, "learning_rate": 1.344041775648876e-05, "loss": 1.3737, "step": 7772 }, { "epoch": 0.42496890504763335, "grad_norm": 1.5907230377197266, "learning_rate": 1.3438701893994087e-05, "loss": 1.2203, "step": 7773 }, { "epoch": 0.42502357749135494, "grad_norm": 1.3703694343566895, "learning_rate": 1.3436985916673088e-05, "loss": 1.5772, "step": 7774 }, { "epoch": 0.4250782499350765, "grad_norm": 1.5480679273605347, "learning_rate": 1.3435269824583064e-05, "loss": 1.6148, "step": 7775 }, { "epoch": 0.425132922378798, "grad_norm": 1.5594701766967773, "learning_rate": 1.3433553617781318e-05, "loss": 1.4927, "step": 7776 }, { "epoch": 0.42518759482251955, "grad_norm": 1.7355955839157104, "learning_rate": 1.3431837296325163e-05, "loss": 1.3765, "step": 7777 }, { "epoch": 0.42524226726624115, "grad_norm": 1.5511934757232666, "learning_rate": 1.3430120860271906e-05, "loss": 1.483, "step": 7778 }, { "epoch": 0.4252969397099627, "grad_norm": 1.5778805017471313, "learning_rate": 1.3428404309678863e-05, "loss": 1.3181, "step": 7779 }, { "epoch": 0.4253516121536842, "grad_norm": 1.759199619293213, "learning_rate": 1.3426687644603358e-05, "loss": 1.1585, "step": 7780 }, { "epoch": 0.4254062845974058, "grad_norm": 1.3843457698822021, "learning_rate": 1.3424970865102709e-05, "loss": 1.6583, "step": 7781 }, { "epoch": 0.42546095704112735, "grad_norm": 1.5564141273498535, "learning_rate": 1.3423253971234248e-05, "loss": 1.4745, "step": 7782 }, { "epoch": 0.4255156294848489, "grad_norm": 1.4006861448287964, "learning_rate": 1.3421536963055304e-05, "loss": 1.4542, "step": 7783 }, { "epoch": 0.4255703019285704, "grad_norm": 1.9171769618988037, "learning_rate": 1.341981984062321e-05, "loss": 1.3744, "step": 7784 }, { "epoch": 0.425624974372292, "grad_norm": 1.5226041078567505, "learning_rate": 1.3418102603995307e-05, "loss": 1.4088, "step": 7785 }, { "epoch": 0.42567964681601356, "grad_norm": 1.481689691543579, "learning_rate": 1.341638525322894e-05, "loss": 1.2133, "step": 7786 }, { "epoch": 0.4257343192597351, "grad_norm": 1.628363847732544, "learning_rate": 1.3414667788381449e-05, "loss": 1.2964, "step": 7787 }, { "epoch": 0.4257889917034567, "grad_norm": 1.4447909593582153, "learning_rate": 1.341295020951019e-05, "loss": 1.3543, "step": 7788 }, { "epoch": 0.4258436641471782, "grad_norm": 0.9841188192367554, "learning_rate": 1.3411232516672512e-05, "loss": 1.8008, "step": 7789 }, { "epoch": 0.42589833659089976, "grad_norm": 1.5852402448654175, "learning_rate": 1.3409514709925777e-05, "loss": 1.2218, "step": 7790 }, { "epoch": 0.4259530090346213, "grad_norm": 1.2280235290527344, "learning_rate": 1.3407796789327345e-05, "loss": 1.4208, "step": 7791 }, { "epoch": 0.4260076814783429, "grad_norm": 1.5342365503311157, "learning_rate": 1.3406078754934584e-05, "loss": 1.4678, "step": 7792 }, { "epoch": 0.42606235392206443, "grad_norm": 1.3451008796691895, "learning_rate": 1.3404360606804858e-05, "loss": 1.3994, "step": 7793 }, { "epoch": 0.42611702636578597, "grad_norm": 1.6684423685073853, "learning_rate": 1.3402642344995543e-05, "loss": 1.6016, "step": 7794 }, { "epoch": 0.42617169880950756, "grad_norm": 1.4425398111343384, "learning_rate": 1.3400923969564017e-05, "loss": 1.4568, "step": 7795 }, { "epoch": 0.4262263712532291, "grad_norm": 1.3464019298553467, "learning_rate": 1.3399205480567659e-05, "loss": 1.6205, "step": 7796 }, { "epoch": 0.42628104369695063, "grad_norm": 1.8041738271713257, "learning_rate": 1.3397486878063852e-05, "loss": 1.2145, "step": 7797 }, { "epoch": 0.42633571614067217, "grad_norm": 1.3469318151474, "learning_rate": 1.3395768162109986e-05, "loss": 1.4563, "step": 7798 }, { "epoch": 0.42639038858439376, "grad_norm": 1.4577913284301758, "learning_rate": 1.3394049332763454e-05, "loss": 1.2425, "step": 7799 }, { "epoch": 0.4264450610281153, "grad_norm": 1.6754183769226074, "learning_rate": 1.339233039008165e-05, "loss": 1.4563, "step": 7800 }, { "epoch": 0.42649973347183684, "grad_norm": 1.37284255027771, "learning_rate": 1.339061133412197e-05, "loss": 1.3228, "step": 7801 }, { "epoch": 0.42655440591555843, "grad_norm": 1.6442501544952393, "learning_rate": 1.3388892164941828e-05, "loss": 1.4638, "step": 7802 }, { "epoch": 0.42660907835927997, "grad_norm": 2.197408437728882, "learning_rate": 1.3387172882598622e-05, "loss": 1.2687, "step": 7803 }, { "epoch": 0.4266637508030015, "grad_norm": 1.320185661315918, "learning_rate": 1.3385453487149765e-05, "loss": 1.4526, "step": 7804 }, { "epoch": 0.42671842324672304, "grad_norm": 1.608462929725647, "learning_rate": 1.3383733978652669e-05, "loss": 1.1377, "step": 7805 }, { "epoch": 0.42677309569044464, "grad_norm": 1.6781835556030273, "learning_rate": 1.3382014357164756e-05, "loss": 1.4117, "step": 7806 }, { "epoch": 0.4268277681341662, "grad_norm": 1.3649944067001343, "learning_rate": 1.338029462274345e-05, "loss": 1.2707, "step": 7807 }, { "epoch": 0.4268824405778877, "grad_norm": 1.3262161016464233, "learning_rate": 1.3378574775446171e-05, "loss": 1.52, "step": 7808 }, { "epoch": 0.4269371130216093, "grad_norm": 1.3505830764770508, "learning_rate": 1.3376854815330357e-05, "loss": 1.4634, "step": 7809 }, { "epoch": 0.42699178546533084, "grad_norm": 1.4489946365356445, "learning_rate": 1.3375134742453435e-05, "loss": 1.4908, "step": 7810 }, { "epoch": 0.4270464579090524, "grad_norm": 1.43833327293396, "learning_rate": 1.3373414556872844e-05, "loss": 1.2238, "step": 7811 }, { "epoch": 0.4271011303527739, "grad_norm": 1.3955169916152954, "learning_rate": 1.3371694258646021e-05, "loss": 1.5404, "step": 7812 }, { "epoch": 0.4271558027964955, "grad_norm": 1.568109154701233, "learning_rate": 1.336997384783042e-05, "loss": 1.4061, "step": 7813 }, { "epoch": 0.42721047524021705, "grad_norm": 1.5673267841339111, "learning_rate": 1.336825332448348e-05, "loss": 1.5232, "step": 7814 }, { "epoch": 0.4272651476839386, "grad_norm": 1.3060964345932007, "learning_rate": 1.336653268866266e-05, "loss": 1.4839, "step": 7815 }, { "epoch": 0.4273198201276602, "grad_norm": 1.311104416847229, "learning_rate": 1.3364811940425417e-05, "loss": 1.5124, "step": 7816 }, { "epoch": 0.4273744925713817, "grad_norm": 1.4943643808364868, "learning_rate": 1.3363091079829202e-05, "loss": 1.4522, "step": 7817 }, { "epoch": 0.42742916501510325, "grad_norm": 1.5356773138046265, "learning_rate": 1.3361370106931486e-05, "loss": 1.605, "step": 7818 }, { "epoch": 0.4274838374588248, "grad_norm": 1.3848177194595337, "learning_rate": 1.3359649021789734e-05, "loss": 1.396, "step": 7819 }, { "epoch": 0.4275385099025464, "grad_norm": 1.8039703369140625, "learning_rate": 1.3357927824461418e-05, "loss": 1.4084, "step": 7820 }, { "epoch": 0.4275931823462679, "grad_norm": 1.4238548278808594, "learning_rate": 1.3356206515004013e-05, "loss": 1.2861, "step": 7821 }, { "epoch": 0.42764785478998946, "grad_norm": 1.288102149963379, "learning_rate": 1.3354485093474998e-05, "loss": 1.5052, "step": 7822 }, { "epoch": 0.42770252723371105, "grad_norm": 1.474239706993103, "learning_rate": 1.3352763559931852e-05, "loss": 1.478, "step": 7823 }, { "epoch": 0.4277571996774326, "grad_norm": 1.6164382696151733, "learning_rate": 1.3351041914432064e-05, "loss": 1.3879, "step": 7824 }, { "epoch": 0.4278118721211541, "grad_norm": 1.5548219680786133, "learning_rate": 1.3349320157033121e-05, "loss": 1.3626, "step": 7825 }, { "epoch": 0.42786654456487566, "grad_norm": 1.8132404088974, "learning_rate": 1.334759828779252e-05, "loss": 1.2773, "step": 7826 }, { "epoch": 0.42792121700859725, "grad_norm": 1.5961960554122925, "learning_rate": 1.3345876306767757e-05, "loss": 1.6581, "step": 7827 }, { "epoch": 0.4279758894523188, "grad_norm": 1.5323021411895752, "learning_rate": 1.3344154214016331e-05, "loss": 1.5109, "step": 7828 }, { "epoch": 0.42803056189604033, "grad_norm": 1.9351205825805664, "learning_rate": 1.3342432009595754e-05, "loss": 1.3175, "step": 7829 }, { "epoch": 0.4280852343397619, "grad_norm": 1.6412261724472046, "learning_rate": 1.3340709693563525e-05, "loss": 1.2331, "step": 7830 }, { "epoch": 0.42813990678348346, "grad_norm": 1.3964848518371582, "learning_rate": 1.333898726597716e-05, "loss": 1.3398, "step": 7831 }, { "epoch": 0.428194579227205, "grad_norm": 1.5876047611236572, "learning_rate": 1.3337264726894175e-05, "loss": 1.468, "step": 7832 }, { "epoch": 0.42824925167092653, "grad_norm": 1.3098558187484741, "learning_rate": 1.3335542076372088e-05, "loss": 1.2839, "step": 7833 }, { "epoch": 0.4283039241146481, "grad_norm": 1.0846974849700928, "learning_rate": 1.3333819314468428e-05, "loss": 1.3977, "step": 7834 }, { "epoch": 0.42835859655836966, "grad_norm": 1.9353865385055542, "learning_rate": 1.3332096441240716e-05, "loss": 1.1348, "step": 7835 }, { "epoch": 0.4284132690020912, "grad_norm": 1.7391769886016846, "learning_rate": 1.3330373456746486e-05, "loss": 1.4682, "step": 7836 }, { "epoch": 0.4284679414458128, "grad_norm": 1.5309278964996338, "learning_rate": 1.3328650361043269e-05, "loss": 1.3175, "step": 7837 }, { "epoch": 0.42852261388953433, "grad_norm": 1.9880666732788086, "learning_rate": 1.3326927154188607e-05, "loss": 1.2434, "step": 7838 }, { "epoch": 0.42857728633325587, "grad_norm": 2.046271562576294, "learning_rate": 1.3325203836240039e-05, "loss": 1.4009, "step": 7839 }, { "epoch": 0.4286319587769774, "grad_norm": 2.0807533264160156, "learning_rate": 1.3323480407255112e-05, "loss": 1.334, "step": 7840 }, { "epoch": 0.428686631220699, "grad_norm": 1.6979875564575195, "learning_rate": 1.3321756867291378e-05, "loss": 1.3661, "step": 7841 }, { "epoch": 0.42874130366442054, "grad_norm": 1.3627296686172485, "learning_rate": 1.3320033216406388e-05, "loss": 1.4896, "step": 7842 }, { "epoch": 0.4287959761081421, "grad_norm": 1.53213632106781, "learning_rate": 1.3318309454657695e-05, "loss": 1.2489, "step": 7843 }, { "epoch": 0.42885064855186367, "grad_norm": 1.9897431135177612, "learning_rate": 1.3316585582102865e-05, "loss": 1.4704, "step": 7844 }, { "epoch": 0.4289053209955852, "grad_norm": 1.8113194704055786, "learning_rate": 1.3314861598799458e-05, "loss": 1.4902, "step": 7845 }, { "epoch": 0.42895999343930674, "grad_norm": 2.0057966709136963, "learning_rate": 1.3313137504805042e-05, "loss": 1.1551, "step": 7846 }, { "epoch": 0.42901466588302833, "grad_norm": 1.593790054321289, "learning_rate": 1.3311413300177192e-05, "loss": 1.3672, "step": 7847 }, { "epoch": 0.42906933832674987, "grad_norm": 1.3288949728012085, "learning_rate": 1.3309688984973484e-05, "loss": 1.7013, "step": 7848 }, { "epoch": 0.4291240107704714, "grad_norm": 1.5792884826660156, "learning_rate": 1.3307964559251494e-05, "loss": 1.466, "step": 7849 }, { "epoch": 0.42917868321419295, "grad_norm": 1.5407768487930298, "learning_rate": 1.3306240023068801e-05, "loss": 1.4212, "step": 7850 }, { "epoch": 0.42923335565791454, "grad_norm": 1.7455856800079346, "learning_rate": 1.3304515376482998e-05, "loss": 1.3548, "step": 7851 }, { "epoch": 0.4292880281016361, "grad_norm": 1.5281031131744385, "learning_rate": 1.3302790619551673e-05, "loss": 1.0648, "step": 7852 }, { "epoch": 0.4293427005453576, "grad_norm": 1.5833386182785034, "learning_rate": 1.3301065752332415e-05, "loss": 1.3751, "step": 7853 }, { "epoch": 0.4293973729890792, "grad_norm": 1.4517686367034912, "learning_rate": 1.3299340774882833e-05, "loss": 1.3123, "step": 7854 }, { "epoch": 0.42945204543280074, "grad_norm": 1.6152527332305908, "learning_rate": 1.3297615687260515e-05, "loss": 1.264, "step": 7855 }, { "epoch": 0.4295067178765223, "grad_norm": 1.5354177951812744, "learning_rate": 1.3295890489523071e-05, "loss": 1.3734, "step": 7856 }, { "epoch": 0.4295613903202438, "grad_norm": 1.4508652687072754, "learning_rate": 1.329416518172811e-05, "loss": 1.456, "step": 7857 }, { "epoch": 0.4296160627639654, "grad_norm": 2.0222177505493164, "learning_rate": 1.3292439763933245e-05, "loss": 1.6193, "step": 7858 }, { "epoch": 0.42967073520768695, "grad_norm": 1.100037693977356, "learning_rate": 1.3290714236196087e-05, "loss": 1.4586, "step": 7859 }, { "epoch": 0.4297254076514085, "grad_norm": 1.5163027048110962, "learning_rate": 1.328898859857426e-05, "loss": 1.3613, "step": 7860 }, { "epoch": 0.4297800800951301, "grad_norm": 1.9826362133026123, "learning_rate": 1.3287262851125387e-05, "loss": 1.4618, "step": 7861 }, { "epoch": 0.4298347525388516, "grad_norm": 1.5177081823349, "learning_rate": 1.3285536993907095e-05, "loss": 1.274, "step": 7862 }, { "epoch": 0.42988942498257315, "grad_norm": 1.526849389076233, "learning_rate": 1.328381102697701e-05, "loss": 1.1423, "step": 7863 }, { "epoch": 0.4299440974262947, "grad_norm": 1.5235265493392944, "learning_rate": 1.328208495039277e-05, "loss": 1.3007, "step": 7864 }, { "epoch": 0.4299987698700163, "grad_norm": 1.6929268836975098, "learning_rate": 1.3280358764212013e-05, "loss": 1.4146, "step": 7865 }, { "epoch": 0.4300534423137378, "grad_norm": 1.7619571685791016, "learning_rate": 1.327863246849238e-05, "loss": 1.3862, "step": 7866 }, { "epoch": 0.43010811475745936, "grad_norm": 1.3640906810760498, "learning_rate": 1.3276906063291511e-05, "loss": 1.3948, "step": 7867 }, { "epoch": 0.43016278720118095, "grad_norm": 1.5195008516311646, "learning_rate": 1.3275179548667062e-05, "loss": 1.4036, "step": 7868 }, { "epoch": 0.4302174596449025, "grad_norm": 1.4366915225982666, "learning_rate": 1.3273452924676684e-05, "loss": 1.4574, "step": 7869 }, { "epoch": 0.430272132088624, "grad_norm": 1.413638949394226, "learning_rate": 1.327172619137803e-05, "loss": 1.4835, "step": 7870 }, { "epoch": 0.43032680453234556, "grad_norm": 1.6494112014770508, "learning_rate": 1.326999934882876e-05, "loss": 1.5821, "step": 7871 }, { "epoch": 0.43038147697606716, "grad_norm": 1.2678427696228027, "learning_rate": 1.3268272397086542e-05, "loss": 1.702, "step": 7872 }, { "epoch": 0.4304361494197887, "grad_norm": 1.9020239114761353, "learning_rate": 1.3266545336209034e-05, "loss": 1.2109, "step": 7873 }, { "epoch": 0.43049082186351023, "grad_norm": 1.7513471841812134, "learning_rate": 1.3264818166253917e-05, "loss": 1.4624, "step": 7874 }, { "epoch": 0.4305454943072318, "grad_norm": 1.3944380283355713, "learning_rate": 1.3263090887278855e-05, "loss": 1.2637, "step": 7875 }, { "epoch": 0.43060016675095336, "grad_norm": 1.1960657835006714, "learning_rate": 1.3261363499341537e-05, "loss": 1.4613, "step": 7876 }, { "epoch": 0.4306548391946749, "grad_norm": 1.2732489109039307, "learning_rate": 1.3259636002499634e-05, "loss": 1.3583, "step": 7877 }, { "epoch": 0.43070951163839644, "grad_norm": 1.579054594039917, "learning_rate": 1.3257908396810838e-05, "loss": 1.4061, "step": 7878 }, { "epoch": 0.43076418408211803, "grad_norm": 1.7187252044677734, "learning_rate": 1.3256180682332836e-05, "loss": 1.4629, "step": 7879 }, { "epoch": 0.43081885652583957, "grad_norm": 1.511799931526184, "learning_rate": 1.325445285912332e-05, "loss": 1.3494, "step": 7880 }, { "epoch": 0.4308735289695611, "grad_norm": 1.3776694536209106, "learning_rate": 1.3252724927239986e-05, "loss": 1.3963, "step": 7881 }, { "epoch": 0.4309282014132827, "grad_norm": 1.5529086589813232, "learning_rate": 1.3250996886740532e-05, "loss": 1.3672, "step": 7882 }, { "epoch": 0.43098287385700423, "grad_norm": 1.4835981130599976, "learning_rate": 1.3249268737682669e-05, "loss": 1.1993, "step": 7883 }, { "epoch": 0.43103754630072577, "grad_norm": 1.2549108266830444, "learning_rate": 1.3247540480124093e-05, "loss": 1.488, "step": 7884 }, { "epoch": 0.4310922187444473, "grad_norm": 1.1466310024261475, "learning_rate": 1.324581211412252e-05, "loss": 1.4771, "step": 7885 }, { "epoch": 0.4311468911881689, "grad_norm": 1.5502467155456543, "learning_rate": 1.3244083639735665e-05, "loss": 1.5636, "step": 7886 }, { "epoch": 0.43120156363189044, "grad_norm": 1.3300777673721313, "learning_rate": 1.3242355057021246e-05, "loss": 1.5401, "step": 7887 }, { "epoch": 0.431256236075612, "grad_norm": 1.6854712963104248, "learning_rate": 1.3240626366036982e-05, "loss": 1.4784, "step": 7888 }, { "epoch": 0.43131090851933357, "grad_norm": 1.713178277015686, "learning_rate": 1.32388975668406e-05, "loss": 1.2226, "step": 7889 }, { "epoch": 0.4313655809630551, "grad_norm": 1.3454171419143677, "learning_rate": 1.3237168659489827e-05, "loss": 1.4157, "step": 7890 }, { "epoch": 0.43142025340677664, "grad_norm": 1.4456677436828613, "learning_rate": 1.3235439644042396e-05, "loss": 1.4435, "step": 7891 }, { "epoch": 0.4314749258504982, "grad_norm": 1.6946431398391724, "learning_rate": 1.3233710520556042e-05, "loss": 1.3248, "step": 7892 }, { "epoch": 0.4315295982942198, "grad_norm": 1.4614923000335693, "learning_rate": 1.3231981289088509e-05, "loss": 1.5601, "step": 7893 }, { "epoch": 0.4315842707379413, "grad_norm": 1.8837486505508423, "learning_rate": 1.3230251949697537e-05, "loss": 1.4935, "step": 7894 }, { "epoch": 0.43163894318166285, "grad_norm": 1.4355040788650513, "learning_rate": 1.3228522502440868e-05, "loss": 1.2475, "step": 7895 }, { "epoch": 0.43169361562538444, "grad_norm": 1.214036464691162, "learning_rate": 1.322679294737626e-05, "loss": 1.2631, "step": 7896 }, { "epoch": 0.431748288069106, "grad_norm": 1.2257623672485352, "learning_rate": 1.3225063284561461e-05, "loss": 1.4219, "step": 7897 }, { "epoch": 0.4318029605128275, "grad_norm": 1.757858395576477, "learning_rate": 1.3223333514054232e-05, "loss": 1.3023, "step": 7898 }, { "epoch": 0.43185763295654905, "grad_norm": 1.940649390220642, "learning_rate": 1.3221603635912335e-05, "loss": 1.0757, "step": 7899 }, { "epoch": 0.43191230540027065, "grad_norm": 1.4190399646759033, "learning_rate": 1.321987365019353e-05, "loss": 1.4871, "step": 7900 }, { "epoch": 0.4319669778439922, "grad_norm": 1.8692749738693237, "learning_rate": 1.3218143556955592e-05, "loss": 1.7072, "step": 7901 }, { "epoch": 0.4320216502877137, "grad_norm": 1.4383487701416016, "learning_rate": 1.3216413356256286e-05, "loss": 1.4943, "step": 7902 }, { "epoch": 0.4320763227314353, "grad_norm": 2.292198419570923, "learning_rate": 1.3214683048153392e-05, "loss": 1.4202, "step": 7903 }, { "epoch": 0.43213099517515685, "grad_norm": 2.7202608585357666, "learning_rate": 1.3212952632704688e-05, "loss": 1.3547, "step": 7904 }, { "epoch": 0.4321856676188784, "grad_norm": 1.2003720998764038, "learning_rate": 1.3211222109967953e-05, "loss": 1.5729, "step": 7905 }, { "epoch": 0.4322403400625999, "grad_norm": 1.4514598846435547, "learning_rate": 1.3209491480000979e-05, "loss": 1.4939, "step": 7906 }, { "epoch": 0.4322950125063215, "grad_norm": 1.5262378454208374, "learning_rate": 1.3207760742861555e-05, "loss": 1.4304, "step": 7907 }, { "epoch": 0.43234968495004306, "grad_norm": 1.6689316034317017, "learning_rate": 1.3206029898607468e-05, "loss": 1.5265, "step": 7908 }, { "epoch": 0.4324043573937646, "grad_norm": 1.4211676120758057, "learning_rate": 1.3204298947296521e-05, "loss": 1.5066, "step": 7909 }, { "epoch": 0.4324590298374862, "grad_norm": 1.560344934463501, "learning_rate": 1.3202567888986512e-05, "loss": 1.512, "step": 7910 }, { "epoch": 0.4325137022812077, "grad_norm": 1.373421549797058, "learning_rate": 1.320083672373525e-05, "loss": 1.456, "step": 7911 }, { "epoch": 0.43256837472492926, "grad_norm": 1.0069864988327026, "learning_rate": 1.3199105451600536e-05, "loss": 1.7376, "step": 7912 }, { "epoch": 0.4326230471686508, "grad_norm": 1.461133599281311, "learning_rate": 1.3197374072640186e-05, "loss": 1.5801, "step": 7913 }, { "epoch": 0.4326777196123724, "grad_norm": 1.6209429502487183, "learning_rate": 1.3195642586912012e-05, "loss": 1.4931, "step": 7914 }, { "epoch": 0.43273239205609393, "grad_norm": 1.476252555847168, "learning_rate": 1.3193910994473831e-05, "loss": 1.5518, "step": 7915 }, { "epoch": 0.43278706449981547, "grad_norm": 1.4757014513015747, "learning_rate": 1.319217929538347e-05, "loss": 1.728, "step": 7916 }, { "epoch": 0.43284173694353706, "grad_norm": 1.283328652381897, "learning_rate": 1.3190447489698748e-05, "loss": 1.3349, "step": 7917 }, { "epoch": 0.4328964093872586, "grad_norm": 1.606824517250061, "learning_rate": 1.31887155774775e-05, "loss": 1.4963, "step": 7918 }, { "epoch": 0.43295108183098013, "grad_norm": 1.3771781921386719, "learning_rate": 1.3186983558777557e-05, "loss": 1.5509, "step": 7919 }, { "epoch": 0.43300575427470167, "grad_norm": 1.5627398490905762, "learning_rate": 1.3185251433656756e-05, "loss": 1.5339, "step": 7920 }, { "epoch": 0.43306042671842326, "grad_norm": 1.4913851022720337, "learning_rate": 1.3183519202172935e-05, "loss": 1.5267, "step": 7921 }, { "epoch": 0.4331150991621448, "grad_norm": 1.418771743774414, "learning_rate": 1.3181786864383934e-05, "loss": 1.3811, "step": 7922 }, { "epoch": 0.43316977160586634, "grad_norm": 1.6254533529281616, "learning_rate": 1.3180054420347603e-05, "loss": 1.3456, "step": 7923 }, { "epoch": 0.43322444404958793, "grad_norm": 1.8766765594482422, "learning_rate": 1.3178321870121793e-05, "loss": 1.4387, "step": 7924 }, { "epoch": 0.43327911649330947, "grad_norm": 1.7247260808944702, "learning_rate": 1.3176589213764362e-05, "loss": 1.4484, "step": 7925 }, { "epoch": 0.433333788937031, "grad_norm": 1.662521243095398, "learning_rate": 1.317485645133316e-05, "loss": 1.5123, "step": 7926 }, { "epoch": 0.43338846138075254, "grad_norm": 1.5344946384429932, "learning_rate": 1.3173123582886052e-05, "loss": 1.3425, "step": 7927 }, { "epoch": 0.43344313382447414, "grad_norm": 2.138082504272461, "learning_rate": 1.31713906084809e-05, "loss": 1.3722, "step": 7928 }, { "epoch": 0.4334978062681957, "grad_norm": 1.695934534072876, "learning_rate": 1.3169657528175574e-05, "loss": 1.2684, "step": 7929 }, { "epoch": 0.4335524787119172, "grad_norm": 1.4119396209716797, "learning_rate": 1.3167924342027947e-05, "loss": 1.4573, "step": 7930 }, { "epoch": 0.4336071511556388, "grad_norm": 1.4524401426315308, "learning_rate": 1.3166191050095888e-05, "loss": 1.5946, "step": 7931 }, { "epoch": 0.43366182359936034, "grad_norm": 1.2904894351959229, "learning_rate": 1.3164457652437285e-05, "loss": 1.4641, "step": 7932 }, { "epoch": 0.4337164960430819, "grad_norm": 1.3798552751541138, "learning_rate": 1.3162724149110016e-05, "loss": 1.5232, "step": 7933 }, { "epoch": 0.4337711684868034, "grad_norm": 1.597743272781372, "learning_rate": 1.3160990540171963e-05, "loss": 1.4766, "step": 7934 }, { "epoch": 0.433825840930525, "grad_norm": 1.507830023765564, "learning_rate": 1.315925682568102e-05, "loss": 1.3998, "step": 7935 }, { "epoch": 0.43388051337424655, "grad_norm": 1.3010252714157104, "learning_rate": 1.3157523005695077e-05, "loss": 1.4759, "step": 7936 }, { "epoch": 0.4339351858179681, "grad_norm": 1.8701435327529907, "learning_rate": 1.315578908027203e-05, "loss": 1.2551, "step": 7937 }, { "epoch": 0.4339898582616897, "grad_norm": 1.221548318862915, "learning_rate": 1.3154055049469782e-05, "loss": 1.56, "step": 7938 }, { "epoch": 0.4340445307054112, "grad_norm": 1.4227172136306763, "learning_rate": 1.3152320913346234e-05, "loss": 1.5522, "step": 7939 }, { "epoch": 0.43409920314913275, "grad_norm": 1.385764479637146, "learning_rate": 1.3150586671959298e-05, "loss": 1.5769, "step": 7940 }, { "epoch": 0.4341538755928543, "grad_norm": 1.4157036542892456, "learning_rate": 1.3148852325366874e-05, "loss": 1.2801, "step": 7941 }, { "epoch": 0.4342085480365759, "grad_norm": 1.659869909286499, "learning_rate": 1.3147117873626886e-05, "loss": 1.3364, "step": 7942 }, { "epoch": 0.4342632204802974, "grad_norm": 1.3477590084075928, "learning_rate": 1.3145383316797244e-05, "loss": 1.2874, "step": 7943 }, { "epoch": 0.43431789292401896, "grad_norm": 1.3124985694885254, "learning_rate": 1.3143648654935875e-05, "loss": 1.4448, "step": 7944 }, { "epoch": 0.43437256536774055, "grad_norm": 2.1117475032806396, "learning_rate": 1.3141913888100699e-05, "loss": 1.2476, "step": 7945 }, { "epoch": 0.4344272378114621, "grad_norm": 1.6304364204406738, "learning_rate": 1.3140179016349648e-05, "loss": 1.3206, "step": 7946 }, { "epoch": 0.4344819102551836, "grad_norm": 1.473453164100647, "learning_rate": 1.3138444039740648e-05, "loss": 1.4267, "step": 7947 }, { "epoch": 0.43453658269890516, "grad_norm": 1.0277721881866455, "learning_rate": 1.3136708958331636e-05, "loss": 1.5161, "step": 7948 }, { "epoch": 0.43459125514262675, "grad_norm": 1.375534176826477, "learning_rate": 1.3134973772180554e-05, "loss": 1.4754, "step": 7949 }, { "epoch": 0.4346459275863483, "grad_norm": 2.0324928760528564, "learning_rate": 1.3133238481345341e-05, "loss": 1.4676, "step": 7950 }, { "epoch": 0.43470060003006983, "grad_norm": 1.6464978456497192, "learning_rate": 1.313150308588394e-05, "loss": 1.2258, "step": 7951 }, { "epoch": 0.4347552724737914, "grad_norm": 1.812315821647644, "learning_rate": 1.3129767585854304e-05, "loss": 1.4969, "step": 7952 }, { "epoch": 0.43480994491751296, "grad_norm": 1.4759207963943481, "learning_rate": 1.3128031981314388e-05, "loss": 1.6662, "step": 7953 }, { "epoch": 0.4348646173612345, "grad_norm": 1.4311467409133911, "learning_rate": 1.312629627232214e-05, "loss": 1.379, "step": 7954 }, { "epoch": 0.43491928980495603, "grad_norm": 2.140381336212158, "learning_rate": 1.3124560458935522e-05, "loss": 1.1208, "step": 7955 }, { "epoch": 0.4349739622486776, "grad_norm": 0.9924528002738953, "learning_rate": 1.3122824541212503e-05, "loss": 1.5358, "step": 7956 }, { "epoch": 0.43502863469239916, "grad_norm": 1.7180957794189453, "learning_rate": 1.3121088519211043e-05, "loss": 1.2507, "step": 7957 }, { "epoch": 0.4350833071361207, "grad_norm": 1.0883604288101196, "learning_rate": 1.311935239298911e-05, "loss": 1.4355, "step": 7958 }, { "epoch": 0.4351379795798423, "grad_norm": 1.7856035232543945, "learning_rate": 1.3117616162604684e-05, "loss": 1.3835, "step": 7959 }, { "epoch": 0.43519265202356383, "grad_norm": 1.7872084379196167, "learning_rate": 1.311587982811574e-05, "loss": 1.1574, "step": 7960 }, { "epoch": 0.43524732446728537, "grad_norm": 1.3062067031860352, "learning_rate": 1.3114143389580254e-05, "loss": 1.5593, "step": 7961 }, { "epoch": 0.4353019969110069, "grad_norm": 1.355840802192688, "learning_rate": 1.3112406847056213e-05, "loss": 1.3827, "step": 7962 }, { "epoch": 0.4353566693547285, "grad_norm": 1.4719347953796387, "learning_rate": 1.3110670200601604e-05, "loss": 1.4407, "step": 7963 }, { "epoch": 0.43541134179845004, "grad_norm": 1.616029977798462, "learning_rate": 1.310893345027442e-05, "loss": 1.207, "step": 7964 }, { "epoch": 0.4354660142421716, "grad_norm": 1.547745943069458, "learning_rate": 1.310719659613265e-05, "loss": 1.4671, "step": 7965 }, { "epoch": 0.43552068668589317, "grad_norm": 1.2317076921463013, "learning_rate": 1.3105459638234294e-05, "loss": 1.4499, "step": 7966 }, { "epoch": 0.4355753591296147, "grad_norm": 1.4829614162445068, "learning_rate": 1.3103722576637357e-05, "loss": 1.5624, "step": 7967 }, { "epoch": 0.43563003157333624, "grad_norm": 1.9314405918121338, "learning_rate": 1.3101985411399838e-05, "loss": 1.5408, "step": 7968 }, { "epoch": 0.4356847040170578, "grad_norm": 1.5270367860794067, "learning_rate": 1.3100248142579743e-05, "loss": 1.5119, "step": 7969 }, { "epoch": 0.43573937646077937, "grad_norm": 1.5931687355041504, "learning_rate": 1.3098510770235093e-05, "loss": 1.2941, "step": 7970 }, { "epoch": 0.4357940489045009, "grad_norm": 1.9888882637023926, "learning_rate": 1.3096773294423896e-05, "loss": 1.4116, "step": 7971 }, { "epoch": 0.43584872134822245, "grad_norm": 1.534515142440796, "learning_rate": 1.3095035715204171e-05, "loss": 1.4594, "step": 7972 }, { "epoch": 0.43590339379194404, "grad_norm": 1.6830006837844849, "learning_rate": 1.3093298032633943e-05, "loss": 1.4397, "step": 7973 }, { "epoch": 0.4359580662356656, "grad_norm": 1.1557999849319458, "learning_rate": 1.3091560246771234e-05, "loss": 1.5221, "step": 7974 }, { "epoch": 0.4360127386793871, "grad_norm": 1.4198713302612305, "learning_rate": 1.3089822357674073e-05, "loss": 1.3023, "step": 7975 }, { "epoch": 0.43606741112310865, "grad_norm": 1.639073133468628, "learning_rate": 1.3088084365400493e-05, "loss": 1.2719, "step": 7976 }, { "epoch": 0.43612208356683024, "grad_norm": 1.5806151628494263, "learning_rate": 1.308634627000853e-05, "loss": 1.395, "step": 7977 }, { "epoch": 0.4361767560105518, "grad_norm": 1.5735894441604614, "learning_rate": 1.3084608071556222e-05, "loss": 1.6546, "step": 7978 }, { "epoch": 0.4362314284542733, "grad_norm": 1.6355172395706177, "learning_rate": 1.3082869770101613e-05, "loss": 1.4232, "step": 7979 }, { "epoch": 0.4362861008979949, "grad_norm": 1.6305851936340332, "learning_rate": 1.3081131365702749e-05, "loss": 1.2582, "step": 7980 }, { "epoch": 0.43634077334171645, "grad_norm": 1.5807453393936157, "learning_rate": 1.3079392858417679e-05, "loss": 1.442, "step": 7981 }, { "epoch": 0.436395445785438, "grad_norm": 1.3143105506896973, "learning_rate": 1.3077654248304452e-05, "loss": 1.469, "step": 7982 }, { "epoch": 0.4364501182291595, "grad_norm": 1.5229548215866089, "learning_rate": 1.307591553542113e-05, "loss": 1.6145, "step": 7983 }, { "epoch": 0.4365047906728811, "grad_norm": 1.3631714582443237, "learning_rate": 1.307417671982577e-05, "loss": 1.4919, "step": 7984 }, { "epoch": 0.43655946311660265, "grad_norm": 1.5664427280426025, "learning_rate": 1.3072437801576438e-05, "loss": 1.3121, "step": 7985 }, { "epoch": 0.4366141355603242, "grad_norm": 1.3333439826965332, "learning_rate": 1.3070698780731194e-05, "loss": 1.453, "step": 7986 }, { "epoch": 0.4366688080040458, "grad_norm": 1.569043517112732, "learning_rate": 1.3068959657348112e-05, "loss": 1.5267, "step": 7987 }, { "epoch": 0.4367234804477673, "grad_norm": 1.8144209384918213, "learning_rate": 1.306722043148527e-05, "loss": 1.3957, "step": 7988 }, { "epoch": 0.43677815289148886, "grad_norm": 1.5027384757995605, "learning_rate": 1.3065481103200736e-05, "loss": 1.5321, "step": 7989 }, { "epoch": 0.4368328253352104, "grad_norm": 2.1702420711517334, "learning_rate": 1.3063741672552597e-05, "loss": 1.3334, "step": 7990 }, { "epoch": 0.436887497778932, "grad_norm": 2.183192491531372, "learning_rate": 1.3062002139598934e-05, "loss": 1.4315, "step": 7991 }, { "epoch": 0.4369421702226535, "grad_norm": 1.2680268287658691, "learning_rate": 1.3060262504397836e-05, "loss": 1.3849, "step": 7992 }, { "epoch": 0.43699684266637506, "grad_norm": 1.7475250959396362, "learning_rate": 1.305852276700739e-05, "loss": 1.3061, "step": 7993 }, { "epoch": 0.43705151511009666, "grad_norm": 1.5099501609802246, "learning_rate": 1.305678292748569e-05, "loss": 1.2428, "step": 7994 }, { "epoch": 0.4371061875538182, "grad_norm": 1.5852965116500854, "learning_rate": 1.3055042985890837e-05, "loss": 1.4999, "step": 7995 }, { "epoch": 0.43716085999753973, "grad_norm": 2.047792673110962, "learning_rate": 1.305330294228093e-05, "loss": 1.2653, "step": 7996 }, { "epoch": 0.43721553244126127, "grad_norm": 1.760535717010498, "learning_rate": 1.305156279671407e-05, "loss": 1.571, "step": 7997 }, { "epoch": 0.43727020488498286, "grad_norm": 1.4199703931808472, "learning_rate": 1.3049822549248372e-05, "loss": 1.3642, "step": 7998 }, { "epoch": 0.4373248773287044, "grad_norm": 1.3598227500915527, "learning_rate": 1.3048082199941941e-05, "loss": 1.589, "step": 7999 }, { "epoch": 0.43737954977242594, "grad_norm": 1.7441439628601074, "learning_rate": 1.304634174885289e-05, "loss": 1.1596, "step": 8000 }, { "epoch": 0.43743422221614753, "grad_norm": 1.7089788913726807, "learning_rate": 1.3044601196039341e-05, "loss": 1.4178, "step": 8001 }, { "epoch": 0.43748889465986907, "grad_norm": 1.6574549674987793, "learning_rate": 1.3042860541559416e-05, "loss": 1.4795, "step": 8002 }, { "epoch": 0.4375435671035906, "grad_norm": 1.2343525886535645, "learning_rate": 1.3041119785471236e-05, "loss": 1.4954, "step": 8003 }, { "epoch": 0.43759823954731214, "grad_norm": 2.0861735343933105, "learning_rate": 1.303937892783293e-05, "loss": 1.5851, "step": 8004 }, { "epoch": 0.43765291199103373, "grad_norm": 1.429660439491272, "learning_rate": 1.3037637968702632e-05, "loss": 1.2884, "step": 8005 }, { "epoch": 0.43770758443475527, "grad_norm": 1.2403844594955444, "learning_rate": 1.303589690813847e-05, "loss": 1.6056, "step": 8006 }, { "epoch": 0.4377622568784768, "grad_norm": 1.7069438695907593, "learning_rate": 1.3034155746198588e-05, "loss": 1.2567, "step": 8007 }, { "epoch": 0.4378169293221984, "grad_norm": 1.2726085186004639, "learning_rate": 1.3032414482941125e-05, "loss": 1.4464, "step": 8008 }, { "epoch": 0.43787160176591994, "grad_norm": 1.2139376401901245, "learning_rate": 1.3030673118424227e-05, "loss": 1.5542, "step": 8009 }, { "epoch": 0.4379262742096415, "grad_norm": 1.5441303253173828, "learning_rate": 1.302893165270604e-05, "loss": 1.3786, "step": 8010 }, { "epoch": 0.437980946653363, "grad_norm": 1.656376838684082, "learning_rate": 1.3027190085844721e-05, "loss": 1.5114, "step": 8011 }, { "epoch": 0.4380356190970846, "grad_norm": 2.0241384506225586, "learning_rate": 1.3025448417898421e-05, "loss": 1.3931, "step": 8012 }, { "epoch": 0.43809029154080614, "grad_norm": 1.4794845581054688, "learning_rate": 1.3023706648925299e-05, "loss": 1.6914, "step": 8013 }, { "epoch": 0.4381449639845277, "grad_norm": 1.4019198417663574, "learning_rate": 1.3021964778983513e-05, "loss": 1.6987, "step": 8014 }, { "epoch": 0.4381996364282493, "grad_norm": 1.3927706480026245, "learning_rate": 1.3020222808131236e-05, "loss": 1.4642, "step": 8015 }, { "epoch": 0.4382543088719708, "grad_norm": 1.8710249662399292, "learning_rate": 1.301848073642663e-05, "loss": 1.442, "step": 8016 }, { "epoch": 0.43830898131569235, "grad_norm": 1.6052125692367554, "learning_rate": 1.301673856392787e-05, "loss": 1.3117, "step": 8017 }, { "epoch": 0.4383636537594139, "grad_norm": 1.4175325632095337, "learning_rate": 1.301499629069313e-05, "loss": 1.433, "step": 8018 }, { "epoch": 0.4384183262031355, "grad_norm": 1.521179437637329, "learning_rate": 1.301325391678059e-05, "loss": 1.4492, "step": 8019 }, { "epoch": 0.438472998646857, "grad_norm": 1.6926823854446411, "learning_rate": 1.301151144224843e-05, "loss": 1.0931, "step": 8020 }, { "epoch": 0.43852767109057855, "grad_norm": 1.674727439880371, "learning_rate": 1.3009768867154834e-05, "loss": 1.3163, "step": 8021 }, { "epoch": 0.43858234353430015, "grad_norm": 1.2164151668548584, "learning_rate": 1.3008026191557996e-05, "loss": 1.3079, "step": 8022 }, { "epoch": 0.4386370159780217, "grad_norm": 1.361409068107605, "learning_rate": 1.3006283415516103e-05, "loss": 1.3233, "step": 8023 }, { "epoch": 0.4386916884217432, "grad_norm": 1.2447913885116577, "learning_rate": 1.3004540539087357e-05, "loss": 1.4653, "step": 8024 }, { "epoch": 0.43874636086546476, "grad_norm": 1.5204616785049438, "learning_rate": 1.3002797562329944e-05, "loss": 1.4442, "step": 8025 }, { "epoch": 0.43880103330918635, "grad_norm": 1.8245962858200073, "learning_rate": 1.300105448530208e-05, "loss": 1.2735, "step": 8026 }, { "epoch": 0.4388557057529079, "grad_norm": 1.438572645187378, "learning_rate": 1.2999311308061964e-05, "loss": 1.537, "step": 8027 }, { "epoch": 0.4389103781966294, "grad_norm": 1.666999101638794, "learning_rate": 1.2997568030667802e-05, "loss": 1.3758, "step": 8028 }, { "epoch": 0.438965050640351, "grad_norm": 1.2617578506469727, "learning_rate": 1.2995824653177813e-05, "loss": 1.6335, "step": 8029 }, { "epoch": 0.43901972308407256, "grad_norm": 1.6514959335327148, "learning_rate": 1.2994081175650206e-05, "loss": 1.4945, "step": 8030 }, { "epoch": 0.4390743955277941, "grad_norm": 2.048206090927124, "learning_rate": 1.2992337598143206e-05, "loss": 1.4725, "step": 8031 }, { "epoch": 0.43912906797151563, "grad_norm": 1.6156541109085083, "learning_rate": 1.2990593920715032e-05, "loss": 1.468, "step": 8032 }, { "epoch": 0.4391837404152372, "grad_norm": 1.5832140445709229, "learning_rate": 1.2988850143423908e-05, "loss": 1.33, "step": 8033 }, { "epoch": 0.43923841285895876, "grad_norm": 1.2931389808654785, "learning_rate": 1.298710626632806e-05, "loss": 1.5913, "step": 8034 }, { "epoch": 0.4392930853026803, "grad_norm": 1.5849729776382446, "learning_rate": 1.2985362289485728e-05, "loss": 1.3054, "step": 8035 }, { "epoch": 0.4393477577464019, "grad_norm": 1.4459220170974731, "learning_rate": 1.2983618212955145e-05, "loss": 1.3497, "step": 8036 }, { "epoch": 0.4394024301901234, "grad_norm": 1.5521773099899292, "learning_rate": 1.2981874036794548e-05, "loss": 1.4198, "step": 8037 }, { "epoch": 0.43945710263384496, "grad_norm": 1.283128261566162, "learning_rate": 1.2980129761062178e-05, "loss": 1.4089, "step": 8038 }, { "epoch": 0.4395117750775665, "grad_norm": 1.6098445653915405, "learning_rate": 1.2978385385816284e-05, "loss": 1.3789, "step": 8039 }, { "epoch": 0.4395664475212881, "grad_norm": 1.6977301836013794, "learning_rate": 1.2976640911115113e-05, "loss": 1.5506, "step": 8040 }, { "epoch": 0.43962111996500963, "grad_norm": 1.5496814250946045, "learning_rate": 1.2974896337016914e-05, "loss": 1.492, "step": 8041 }, { "epoch": 0.43967579240873117, "grad_norm": 1.6710718870162964, "learning_rate": 1.2973151663579948e-05, "loss": 1.4706, "step": 8042 }, { "epoch": 0.43973046485245276, "grad_norm": 1.6223911046981812, "learning_rate": 1.2971406890862473e-05, "loss": 1.2777, "step": 8043 }, { "epoch": 0.4397851372961743, "grad_norm": 1.8088809251785278, "learning_rate": 1.2969662018922748e-05, "loss": 1.3182, "step": 8044 }, { "epoch": 0.43983980973989584, "grad_norm": 1.4629257917404175, "learning_rate": 1.2967917047819038e-05, "loss": 1.5224, "step": 8045 }, { "epoch": 0.4398944821836174, "grad_norm": 1.6179862022399902, "learning_rate": 1.2966171977609614e-05, "loss": 1.4855, "step": 8046 }, { "epoch": 0.43994915462733897, "grad_norm": 1.9378739595413208, "learning_rate": 1.2964426808352747e-05, "loss": 1.5514, "step": 8047 }, { "epoch": 0.4400038270710605, "grad_norm": 1.237722396850586, "learning_rate": 1.2962681540106713e-05, "loss": 1.6837, "step": 8048 }, { "epoch": 0.44005849951478204, "grad_norm": 1.284472942352295, "learning_rate": 1.296093617292979e-05, "loss": 1.3453, "step": 8049 }, { "epoch": 0.44011317195850364, "grad_norm": 1.6795446872711182, "learning_rate": 1.295919070688026e-05, "loss": 1.1301, "step": 8050 }, { "epoch": 0.4401678444022252, "grad_norm": 1.6764540672302246, "learning_rate": 1.2957445142016412e-05, "loss": 1.5414, "step": 8051 }, { "epoch": 0.4402225168459467, "grad_norm": 1.619103193283081, "learning_rate": 1.2955699478396527e-05, "loss": 1.1838, "step": 8052 }, { "epoch": 0.4402771892896683, "grad_norm": 1.321914792060852, "learning_rate": 1.29539537160789e-05, "loss": 1.4747, "step": 8053 }, { "epoch": 0.44033186173338984, "grad_norm": 1.3304698467254639, "learning_rate": 1.295220785512183e-05, "loss": 1.2181, "step": 8054 }, { "epoch": 0.4403865341771114, "grad_norm": 1.2432695627212524, "learning_rate": 1.2950461895583608e-05, "loss": 1.5087, "step": 8055 }, { "epoch": 0.4404412066208329, "grad_norm": 1.7304970026016235, "learning_rate": 1.2948715837522542e-05, "loss": 1.4036, "step": 8056 }, { "epoch": 0.4404958790645545, "grad_norm": 1.3029147386550903, "learning_rate": 1.2946969680996939e-05, "loss": 1.3324, "step": 8057 }, { "epoch": 0.44055055150827604, "grad_norm": 2.039837121963501, "learning_rate": 1.2945223426065096e-05, "loss": 1.3717, "step": 8058 }, { "epoch": 0.4406052239519976, "grad_norm": 1.4364407062530518, "learning_rate": 1.2943477072785336e-05, "loss": 1.4102, "step": 8059 }, { "epoch": 0.4406598963957192, "grad_norm": 1.6319738626480103, "learning_rate": 1.2941730621215966e-05, "loss": 1.2963, "step": 8060 }, { "epoch": 0.4407145688394407, "grad_norm": 1.7752571105957031, "learning_rate": 1.293998407141531e-05, "loss": 1.5252, "step": 8061 }, { "epoch": 0.44076924128316225, "grad_norm": 1.4179809093475342, "learning_rate": 1.2938237423441686e-05, "loss": 1.4062, "step": 8062 }, { "epoch": 0.4408239137268838, "grad_norm": 1.951672077178955, "learning_rate": 1.2936490677353422e-05, "loss": 1.5336, "step": 8063 }, { "epoch": 0.4408785861706054, "grad_norm": 1.6987366676330566, "learning_rate": 1.2934743833208842e-05, "loss": 1.5601, "step": 8064 }, { "epoch": 0.4409332586143269, "grad_norm": 1.4891901016235352, "learning_rate": 1.2932996891066279e-05, "loss": 1.5041, "step": 8065 }, { "epoch": 0.44098793105804845, "grad_norm": 1.5410467386245728, "learning_rate": 1.2931249850984066e-05, "loss": 1.276, "step": 8066 }, { "epoch": 0.44104260350177005, "grad_norm": 1.7936818599700928, "learning_rate": 1.292950271302054e-05, "loss": 1.5668, "step": 8067 }, { "epoch": 0.4410972759454916, "grad_norm": 1.6849002838134766, "learning_rate": 1.292775547723405e-05, "loss": 1.2815, "step": 8068 }, { "epoch": 0.4411519483892131, "grad_norm": 1.6702102422714233, "learning_rate": 1.292600814368293e-05, "loss": 1.5353, "step": 8069 }, { "epoch": 0.44120662083293466, "grad_norm": 1.7961419820785522, "learning_rate": 1.2924260712425536e-05, "loss": 1.5581, "step": 8070 }, { "epoch": 0.44126129327665625, "grad_norm": 1.5824393033981323, "learning_rate": 1.2922513183520212e-05, "loss": 1.4321, "step": 8071 }, { "epoch": 0.4413159657203778, "grad_norm": 1.3362174034118652, "learning_rate": 1.2920765557025316e-05, "loss": 1.5319, "step": 8072 }, { "epoch": 0.4413706381640993, "grad_norm": 1.5422250032424927, "learning_rate": 1.2919017832999203e-05, "loss": 1.1102, "step": 8073 }, { "epoch": 0.4414253106078209, "grad_norm": 1.6979447603225708, "learning_rate": 1.2917270011500233e-05, "loss": 1.1637, "step": 8074 }, { "epoch": 0.44147998305154246, "grad_norm": 1.593833565711975, "learning_rate": 1.2915522092586777e-05, "loss": 1.7139, "step": 8075 }, { "epoch": 0.441534655495264, "grad_norm": 1.5793615579605103, "learning_rate": 1.2913774076317193e-05, "loss": 1.4113, "step": 8076 }, { "epoch": 0.44158932793898553, "grad_norm": 2.314288854598999, "learning_rate": 1.2912025962749856e-05, "loss": 0.9255, "step": 8077 }, { "epoch": 0.4416440003827071, "grad_norm": 1.274243950843811, "learning_rate": 1.2910277751943141e-05, "loss": 1.298, "step": 8078 }, { "epoch": 0.44169867282642866, "grad_norm": 1.4698200225830078, "learning_rate": 1.290852944395542e-05, "loss": 1.3909, "step": 8079 }, { "epoch": 0.4417533452701502, "grad_norm": 1.7711883783340454, "learning_rate": 1.2906781038845076e-05, "loss": 1.0558, "step": 8080 }, { "epoch": 0.4418080177138718, "grad_norm": 1.4738898277282715, "learning_rate": 1.2905032536670492e-05, "loss": 1.595, "step": 8081 }, { "epoch": 0.44186269015759333, "grad_norm": 1.2920875549316406, "learning_rate": 1.2903283937490056e-05, "loss": 1.5392, "step": 8082 }, { "epoch": 0.44191736260131487, "grad_norm": 1.4763727188110352, "learning_rate": 1.2901535241362158e-05, "loss": 1.2962, "step": 8083 }, { "epoch": 0.4419720350450364, "grad_norm": 1.602399468421936, "learning_rate": 1.2899786448345186e-05, "loss": 1.537, "step": 8084 }, { "epoch": 0.442026707488758, "grad_norm": 1.6330798864364624, "learning_rate": 1.2898037558497542e-05, "loss": 1.4136, "step": 8085 }, { "epoch": 0.44208137993247953, "grad_norm": 1.8599040508270264, "learning_rate": 1.2896288571877623e-05, "loss": 1.5652, "step": 8086 }, { "epoch": 0.44213605237620107, "grad_norm": 1.622414469718933, "learning_rate": 1.2894539488543832e-05, "loss": 1.4372, "step": 8087 }, { "epoch": 0.44219072481992266, "grad_norm": 1.6085765361785889, "learning_rate": 1.2892790308554574e-05, "loss": 1.5902, "step": 8088 }, { "epoch": 0.4422453972636442, "grad_norm": 1.385465145111084, "learning_rate": 1.2891041031968261e-05, "loss": 1.415, "step": 8089 }, { "epoch": 0.44230006970736574, "grad_norm": 1.4616578817367554, "learning_rate": 1.2889291658843306e-05, "loss": 1.5672, "step": 8090 }, { "epoch": 0.4423547421510873, "grad_norm": 1.5771653652191162, "learning_rate": 1.288754218923812e-05, "loss": 1.2469, "step": 8091 }, { "epoch": 0.44240941459480887, "grad_norm": 1.9085063934326172, "learning_rate": 1.2885792623211124e-05, "loss": 1.5265, "step": 8092 }, { "epoch": 0.4424640870385304, "grad_norm": 1.6847286224365234, "learning_rate": 1.2884042960820742e-05, "loss": 1.2926, "step": 8093 }, { "epoch": 0.44251875948225194, "grad_norm": 1.3348711729049683, "learning_rate": 1.2882293202125395e-05, "loss": 1.2047, "step": 8094 }, { "epoch": 0.44257343192597354, "grad_norm": 1.3073780536651611, "learning_rate": 1.2880543347183519e-05, "loss": 1.4545, "step": 8095 }, { "epoch": 0.4426281043696951, "grad_norm": 1.7151159048080444, "learning_rate": 1.287879339605354e-05, "loss": 1.5923, "step": 8096 }, { "epoch": 0.4426827768134166, "grad_norm": 1.6454739570617676, "learning_rate": 1.2877043348793893e-05, "loss": 1.2765, "step": 8097 }, { "epoch": 0.44273744925713815, "grad_norm": 1.2933087348937988, "learning_rate": 1.2875293205463018e-05, "loss": 1.6383, "step": 8098 }, { "epoch": 0.44279212170085974, "grad_norm": 1.6183019876480103, "learning_rate": 1.2873542966119355e-05, "loss": 1.4648, "step": 8099 }, { "epoch": 0.4428467941445813, "grad_norm": 1.2152273654937744, "learning_rate": 1.2871792630821349e-05, "loss": 1.2046, "step": 8100 }, { "epoch": 0.4429014665883028, "grad_norm": 1.5068029165267944, "learning_rate": 1.2870042199627448e-05, "loss": 1.6005, "step": 8101 }, { "epoch": 0.4429561390320244, "grad_norm": 2.2329812049865723, "learning_rate": 1.2868291672596104e-05, "loss": 1.5237, "step": 8102 }, { "epoch": 0.44301081147574595, "grad_norm": 1.7597063779830933, "learning_rate": 1.2866541049785773e-05, "loss": 1.3507, "step": 8103 }, { "epoch": 0.4430654839194675, "grad_norm": 1.8091295957565308, "learning_rate": 1.2864790331254906e-05, "loss": 1.3165, "step": 8104 }, { "epoch": 0.443120156363189, "grad_norm": 1.3619604110717773, "learning_rate": 1.2863039517061968e-05, "loss": 1.276, "step": 8105 }, { "epoch": 0.4431748288069106, "grad_norm": 1.7686209678649902, "learning_rate": 1.2861288607265425e-05, "loss": 1.3349, "step": 8106 }, { "epoch": 0.44322950125063215, "grad_norm": 2.1248066425323486, "learning_rate": 1.2859537601923737e-05, "loss": 1.4308, "step": 8107 }, { "epoch": 0.4432841736943537, "grad_norm": 1.4429097175598145, "learning_rate": 1.285778650109538e-05, "loss": 1.2445, "step": 8108 }, { "epoch": 0.4433388461380753, "grad_norm": 1.5925313234329224, "learning_rate": 1.2856035304838827e-05, "loss": 1.3715, "step": 8109 }, { "epoch": 0.4433935185817968, "grad_norm": 1.2614619731903076, "learning_rate": 1.2854284013212555e-05, "loss": 1.4995, "step": 8110 }, { "epoch": 0.44344819102551836, "grad_norm": 1.766526222229004, "learning_rate": 1.2852532626275038e-05, "loss": 1.5336, "step": 8111 }, { "epoch": 0.4435028634692399, "grad_norm": 1.056537389755249, "learning_rate": 1.2850781144084763e-05, "loss": 1.5731, "step": 8112 }, { "epoch": 0.4435575359129615, "grad_norm": 1.431517243385315, "learning_rate": 1.284902956670022e-05, "loss": 1.2543, "step": 8113 }, { "epoch": 0.443612208356683, "grad_norm": 1.4366172552108765, "learning_rate": 1.284727789417989e-05, "loss": 1.498, "step": 8114 }, { "epoch": 0.44366688080040456, "grad_norm": 1.7262049913406372, "learning_rate": 1.2845526126582273e-05, "loss": 1.3681, "step": 8115 }, { "epoch": 0.44372155324412615, "grad_norm": 1.755946397781372, "learning_rate": 1.2843774263965857e-05, "loss": 1.1862, "step": 8116 }, { "epoch": 0.4437762256878477, "grad_norm": 1.347845435142517, "learning_rate": 1.2842022306389153e-05, "loss": 1.6595, "step": 8117 }, { "epoch": 0.44383089813156923, "grad_norm": 1.6522873640060425, "learning_rate": 1.2840270253910648e-05, "loss": 1.4676, "step": 8118 }, { "epoch": 0.44388557057529077, "grad_norm": 1.867841362953186, "learning_rate": 1.2838518106588856e-05, "loss": 1.3736, "step": 8119 }, { "epoch": 0.44394024301901236, "grad_norm": 1.5711597204208374, "learning_rate": 1.2836765864482286e-05, "loss": 1.5111, "step": 8120 }, { "epoch": 0.4439949154627339, "grad_norm": 1.2153418064117432, "learning_rate": 1.2835013527649443e-05, "loss": 1.3429, "step": 8121 }, { "epoch": 0.44404958790645543, "grad_norm": 1.5353946685791016, "learning_rate": 1.283326109614885e-05, "loss": 1.6656, "step": 8122 }, { "epoch": 0.444104260350177, "grad_norm": 1.7294031381607056, "learning_rate": 1.2831508570039017e-05, "loss": 1.3126, "step": 8123 }, { "epoch": 0.44415893279389856, "grad_norm": 1.8735580444335938, "learning_rate": 1.282975594937847e-05, "loss": 1.5413, "step": 8124 }, { "epoch": 0.4442136052376201, "grad_norm": 1.874268889427185, "learning_rate": 1.2828003234225733e-05, "loss": 1.4004, "step": 8125 }, { "epoch": 0.44426827768134164, "grad_norm": 1.7512766122817993, "learning_rate": 1.2826250424639329e-05, "loss": 1.4807, "step": 8126 }, { "epoch": 0.44432295012506323, "grad_norm": 1.1509332656860352, "learning_rate": 1.2824497520677794e-05, "loss": 1.3669, "step": 8127 }, { "epoch": 0.44437762256878477, "grad_norm": 1.6016439199447632, "learning_rate": 1.2822744522399658e-05, "loss": 1.3884, "step": 8128 }, { "epoch": 0.4444322950125063, "grad_norm": 1.3837898969650269, "learning_rate": 1.282099142986346e-05, "loss": 1.3856, "step": 8129 }, { "epoch": 0.4444869674562279, "grad_norm": 1.4431365728378296, "learning_rate": 1.2819238243127736e-05, "loss": 1.4649, "step": 8130 }, { "epoch": 0.44454163989994944, "grad_norm": 1.7799152135849, "learning_rate": 1.2817484962251033e-05, "loss": 1.3985, "step": 8131 }, { "epoch": 0.444596312343671, "grad_norm": 1.72176194190979, "learning_rate": 1.2815731587291893e-05, "loss": 1.4102, "step": 8132 }, { "epoch": 0.4446509847873925, "grad_norm": 1.5067596435546875, "learning_rate": 1.2813978118308872e-05, "loss": 1.3169, "step": 8133 }, { "epoch": 0.4447056572311141, "grad_norm": 1.4802905321121216, "learning_rate": 1.2812224555360518e-05, "loss": 1.2951, "step": 8134 }, { "epoch": 0.44476032967483564, "grad_norm": 1.4588502645492554, "learning_rate": 1.2810470898505384e-05, "loss": 1.4167, "step": 8135 }, { "epoch": 0.4448150021185572, "grad_norm": 1.365122675895691, "learning_rate": 1.2808717147802035e-05, "loss": 1.4273, "step": 8136 }, { "epoch": 0.44486967456227877, "grad_norm": 1.33549165725708, "learning_rate": 1.2806963303309025e-05, "loss": 1.6616, "step": 8137 }, { "epoch": 0.4449243470060003, "grad_norm": 1.2655587196350098, "learning_rate": 1.2805209365084928e-05, "loss": 1.5938, "step": 8138 }, { "epoch": 0.44497901944972185, "grad_norm": 1.3209364414215088, "learning_rate": 1.2803455333188306e-05, "loss": 1.4045, "step": 8139 }, { "epoch": 0.4450336918934434, "grad_norm": 1.1840323209762573, "learning_rate": 1.2801701207677731e-05, "loss": 1.4886, "step": 8140 }, { "epoch": 0.445088364337165, "grad_norm": 1.5818711519241333, "learning_rate": 1.279994698861178e-05, "loss": 1.5139, "step": 8141 }, { "epoch": 0.4451430367808865, "grad_norm": 1.2446471452713013, "learning_rate": 1.279819267604903e-05, "loss": 1.5018, "step": 8142 }, { "epoch": 0.44519770922460805, "grad_norm": 1.8299585580825806, "learning_rate": 1.2796438270048057e-05, "loss": 1.3849, "step": 8143 }, { "epoch": 0.44525238166832964, "grad_norm": 1.715919852256775, "learning_rate": 1.2794683770667448e-05, "loss": 1.4719, "step": 8144 }, { "epoch": 0.4453070541120512, "grad_norm": 1.5894051790237427, "learning_rate": 1.2792929177965793e-05, "loss": 1.6284, "step": 8145 }, { "epoch": 0.4453617265557727, "grad_norm": 1.346168875694275, "learning_rate": 1.2791174492001677e-05, "loss": 1.4177, "step": 8146 }, { "epoch": 0.44541639899949426, "grad_norm": 1.4722404479980469, "learning_rate": 1.2789419712833698e-05, "loss": 1.2373, "step": 8147 }, { "epoch": 0.44547107144321585, "grad_norm": 1.3630555868148804, "learning_rate": 1.2787664840520446e-05, "loss": 1.2874, "step": 8148 }, { "epoch": 0.4455257438869374, "grad_norm": 1.4276931285858154, "learning_rate": 1.2785909875120523e-05, "loss": 1.448, "step": 8149 }, { "epoch": 0.4455804163306589, "grad_norm": 1.41645085811615, "learning_rate": 1.2784154816692533e-05, "loss": 1.5379, "step": 8150 }, { "epoch": 0.4456350887743805, "grad_norm": 1.6590783596038818, "learning_rate": 1.278239966529508e-05, "loss": 1.4214, "step": 8151 }, { "epoch": 0.44568976121810205, "grad_norm": 1.5040361881256104, "learning_rate": 1.2780644420986774e-05, "loss": 1.4453, "step": 8152 }, { "epoch": 0.4457444336618236, "grad_norm": 1.2594020366668701, "learning_rate": 1.2778889083826225e-05, "loss": 1.3878, "step": 8153 }, { "epoch": 0.44579910610554513, "grad_norm": 1.8331036567687988, "learning_rate": 1.277713365387205e-05, "loss": 1.5961, "step": 8154 }, { "epoch": 0.4458537785492667, "grad_norm": 1.2629365921020508, "learning_rate": 1.2775378131182867e-05, "loss": 1.4032, "step": 8155 }, { "epoch": 0.44590845099298826, "grad_norm": 1.4782001972198486, "learning_rate": 1.2773622515817292e-05, "loss": 1.5045, "step": 8156 }, { "epoch": 0.4459631234367098, "grad_norm": 1.7277644872665405, "learning_rate": 1.2771866807833952e-05, "loss": 1.2961, "step": 8157 }, { "epoch": 0.4460177958804314, "grad_norm": 1.6775174140930176, "learning_rate": 1.2770111007291476e-05, "loss": 1.2011, "step": 8158 }, { "epoch": 0.4460724683241529, "grad_norm": 1.5269508361816406, "learning_rate": 1.2768355114248493e-05, "loss": 1.3951, "step": 8159 }, { "epoch": 0.44612714076787446, "grad_norm": 1.446560025215149, "learning_rate": 1.276659912876364e-05, "loss": 1.4919, "step": 8160 }, { "epoch": 0.446181813211596, "grad_norm": 1.5652241706848145, "learning_rate": 1.2764843050895548e-05, "loss": 1.5309, "step": 8161 }, { "epoch": 0.4462364856553176, "grad_norm": 1.5111768245697021, "learning_rate": 1.2763086880702859e-05, "loss": 1.6509, "step": 8162 }, { "epoch": 0.44629115809903913, "grad_norm": 1.496193766593933, "learning_rate": 1.2761330618244215e-05, "loss": 1.368, "step": 8163 }, { "epoch": 0.44634583054276067, "grad_norm": 1.3547091484069824, "learning_rate": 1.275957426357826e-05, "loss": 1.3817, "step": 8164 }, { "epoch": 0.44640050298648226, "grad_norm": 1.6340296268463135, "learning_rate": 1.2757817816763645e-05, "loss": 1.4617, "step": 8165 }, { "epoch": 0.4464551754302038, "grad_norm": 1.620073676109314, "learning_rate": 1.2756061277859024e-05, "loss": 1.3768, "step": 8166 }, { "epoch": 0.44650984787392534, "grad_norm": 1.6793102025985718, "learning_rate": 1.275430464692305e-05, "loss": 1.3545, "step": 8167 }, { "epoch": 0.4465645203176469, "grad_norm": 1.155965805053711, "learning_rate": 1.2752547924014378e-05, "loss": 1.546, "step": 8168 }, { "epoch": 0.44661919276136847, "grad_norm": 1.9757009744644165, "learning_rate": 1.2750791109191677e-05, "loss": 1.6037, "step": 8169 }, { "epoch": 0.44667386520509, "grad_norm": 1.6698212623596191, "learning_rate": 1.27490342025136e-05, "loss": 1.3983, "step": 8170 }, { "epoch": 0.44672853764881154, "grad_norm": 1.7124236822128296, "learning_rate": 1.2747277204038818e-05, "loss": 1.2239, "step": 8171 }, { "epoch": 0.44678321009253313, "grad_norm": 1.405550241470337, "learning_rate": 1.2745520113826009e-05, "loss": 1.2643, "step": 8172 }, { "epoch": 0.44683788253625467, "grad_norm": 1.4640984535217285, "learning_rate": 1.274376293193384e-05, "loss": 1.4851, "step": 8173 }, { "epoch": 0.4468925549799762, "grad_norm": 1.6788666248321533, "learning_rate": 1.2742005658420988e-05, "loss": 1.3869, "step": 8174 }, { "epoch": 0.44694722742369775, "grad_norm": 1.5118874311447144, "learning_rate": 1.2740248293346134e-05, "loss": 1.7089, "step": 8175 }, { "epoch": 0.44700189986741934, "grad_norm": 1.533774495124817, "learning_rate": 1.2738490836767958e-05, "loss": 1.5347, "step": 8176 }, { "epoch": 0.4470565723111409, "grad_norm": 1.6355006694793701, "learning_rate": 1.2736733288745144e-05, "loss": 1.6365, "step": 8177 }, { "epoch": 0.4471112447548624, "grad_norm": 1.683141827583313, "learning_rate": 1.2734975649336385e-05, "loss": 1.3222, "step": 8178 }, { "epoch": 0.447165917198584, "grad_norm": 1.3522275686264038, "learning_rate": 1.2733217918600374e-05, "loss": 1.5989, "step": 8179 }, { "epoch": 0.44722058964230554, "grad_norm": 1.4806946516036987, "learning_rate": 1.2731460096595802e-05, "loss": 1.3209, "step": 8180 }, { "epoch": 0.4472752620860271, "grad_norm": 1.375083088874817, "learning_rate": 1.272970218338137e-05, "loss": 1.59, "step": 8181 }, { "epoch": 0.4473299345297486, "grad_norm": 1.8986952304840088, "learning_rate": 1.2727944179015773e-05, "loss": 1.1383, "step": 8182 }, { "epoch": 0.4473846069734702, "grad_norm": 1.905146837234497, "learning_rate": 1.2726186083557719e-05, "loss": 1.2677, "step": 8183 }, { "epoch": 0.44743927941719175, "grad_norm": 1.687307596206665, "learning_rate": 1.2724427897065915e-05, "loss": 1.2912, "step": 8184 }, { "epoch": 0.4474939518609133, "grad_norm": 1.4794118404388428, "learning_rate": 1.2722669619599068e-05, "loss": 1.6512, "step": 8185 }, { "epoch": 0.4475486243046349, "grad_norm": 1.7225141525268555, "learning_rate": 1.2720911251215897e-05, "loss": 1.2864, "step": 8186 }, { "epoch": 0.4476032967483564, "grad_norm": 1.351758360862732, "learning_rate": 1.2719152791975113e-05, "loss": 1.3567, "step": 8187 }, { "epoch": 0.44765796919207795, "grad_norm": 1.4555569887161255, "learning_rate": 1.2717394241935437e-05, "loss": 1.1784, "step": 8188 }, { "epoch": 0.4477126416357995, "grad_norm": 1.1703919172286987, "learning_rate": 1.271563560115559e-05, "loss": 1.4363, "step": 8189 }, { "epoch": 0.4477673140795211, "grad_norm": 1.4772610664367676, "learning_rate": 1.2713876869694299e-05, "loss": 1.348, "step": 8190 }, { "epoch": 0.4478219865232426, "grad_norm": 1.3771981000900269, "learning_rate": 1.2712118047610291e-05, "loss": 1.2465, "step": 8191 }, { "epoch": 0.44787665896696416, "grad_norm": 1.8912807703018188, "learning_rate": 1.2710359134962295e-05, "loss": 1.4769, "step": 8192 }, { "epoch": 0.44793133141068575, "grad_norm": 1.482688307762146, "learning_rate": 1.270860013180905e-05, "loss": 1.2757, "step": 8193 }, { "epoch": 0.4479860038544073, "grad_norm": 1.5850863456726074, "learning_rate": 1.2706841038209293e-05, "loss": 1.3529, "step": 8194 }, { "epoch": 0.4480406762981288, "grad_norm": 1.6502540111541748, "learning_rate": 1.2705081854221758e-05, "loss": 1.332, "step": 8195 }, { "epoch": 0.44809534874185036, "grad_norm": 1.4876171350479126, "learning_rate": 1.2703322579905191e-05, "loss": 1.3816, "step": 8196 }, { "epoch": 0.44815002118557196, "grad_norm": 1.5103631019592285, "learning_rate": 1.2701563215318343e-05, "loss": 1.3398, "step": 8197 }, { "epoch": 0.4482046936292935, "grad_norm": 1.112539529800415, "learning_rate": 1.2699803760519955e-05, "loss": 1.6529, "step": 8198 }, { "epoch": 0.44825936607301503, "grad_norm": 1.5937914848327637, "learning_rate": 1.2698044215568787e-05, "loss": 1.4039, "step": 8199 }, { "epoch": 0.4483140385167366, "grad_norm": 1.3678674697875977, "learning_rate": 1.2696284580523592e-05, "loss": 1.6166, "step": 8200 }, { "epoch": 0.44836871096045816, "grad_norm": 2.0089211463928223, "learning_rate": 1.2694524855443131e-05, "loss": 1.4856, "step": 8201 }, { "epoch": 0.4484233834041797, "grad_norm": 1.7224587202072144, "learning_rate": 1.2692765040386157e-05, "loss": 1.4281, "step": 8202 }, { "epoch": 0.44847805584790124, "grad_norm": 1.812721610069275, "learning_rate": 1.269100513541144e-05, "loss": 1.3231, "step": 8203 }, { "epoch": 0.44853272829162283, "grad_norm": 1.5255577564239502, "learning_rate": 1.268924514057775e-05, "loss": 1.5704, "step": 8204 }, { "epoch": 0.44858740073534437, "grad_norm": 1.5670700073242188, "learning_rate": 1.2687485055943852e-05, "loss": 1.4075, "step": 8205 }, { "epoch": 0.4486420731790659, "grad_norm": 1.5689994096755981, "learning_rate": 1.2685724881568522e-05, "loss": 1.2814, "step": 8206 }, { "epoch": 0.4486967456227875, "grad_norm": 1.4504964351654053, "learning_rate": 1.2683964617510536e-05, "loss": 1.3012, "step": 8207 }, { "epoch": 0.44875141806650903, "grad_norm": 1.409509301185608, "learning_rate": 1.2682204263828675e-05, "loss": 1.5606, "step": 8208 }, { "epoch": 0.44880609051023057, "grad_norm": 1.1773737668991089, "learning_rate": 1.2680443820581717e-05, "loss": 1.3974, "step": 8209 }, { "epoch": 0.4488607629539521, "grad_norm": 1.1763046979904175, "learning_rate": 1.2678683287828451e-05, "loss": 1.3532, "step": 8210 }, { "epoch": 0.4489154353976737, "grad_norm": 1.8808763027191162, "learning_rate": 1.2676922665627664e-05, "loss": 1.4881, "step": 8211 }, { "epoch": 0.44897010784139524, "grad_norm": 1.523558497428894, "learning_rate": 1.2675161954038147e-05, "loss": 1.2038, "step": 8212 }, { "epoch": 0.4490247802851168, "grad_norm": 2.180427312850952, "learning_rate": 1.2673401153118699e-05, "loss": 1.3589, "step": 8213 }, { "epoch": 0.44907945272883837, "grad_norm": 1.8116745948791504, "learning_rate": 1.2671640262928109e-05, "loss": 1.4574, "step": 8214 }, { "epoch": 0.4491341251725599, "grad_norm": 1.639218807220459, "learning_rate": 1.2669879283525182e-05, "loss": 1.4916, "step": 8215 }, { "epoch": 0.44918879761628144, "grad_norm": 1.6050888299942017, "learning_rate": 1.2668118214968721e-05, "loss": 1.1698, "step": 8216 }, { "epoch": 0.449243470060003, "grad_norm": 2.834124803543091, "learning_rate": 1.266635705731753e-05, "loss": 1.3502, "step": 8217 }, { "epoch": 0.4492981425037246, "grad_norm": 1.3017581701278687, "learning_rate": 1.2664595810630424e-05, "loss": 1.455, "step": 8218 }, { "epoch": 0.4493528149474461, "grad_norm": 1.324672818183899, "learning_rate": 1.2662834474966208e-05, "loss": 1.7597, "step": 8219 }, { "epoch": 0.44940748739116765, "grad_norm": 1.504941463470459, "learning_rate": 1.2661073050383701e-05, "loss": 1.3901, "step": 8220 }, { "epoch": 0.44946215983488924, "grad_norm": 1.4197423458099365, "learning_rate": 1.2659311536941721e-05, "loss": 1.4262, "step": 8221 }, { "epoch": 0.4495168322786108, "grad_norm": 1.5701764822006226, "learning_rate": 1.265754993469909e-05, "loss": 1.5075, "step": 8222 }, { "epoch": 0.4495715047223323, "grad_norm": 1.6425635814666748, "learning_rate": 1.2655788243714629e-05, "loss": 1.3709, "step": 8223 }, { "epoch": 0.44962617716605385, "grad_norm": 1.7471739053726196, "learning_rate": 1.2654026464047165e-05, "loss": 1.5073, "step": 8224 }, { "epoch": 0.44968084960977545, "grad_norm": 1.4826691150665283, "learning_rate": 1.2652264595755532e-05, "loss": 1.4291, "step": 8225 }, { "epoch": 0.449735522053497, "grad_norm": 1.7250162363052368, "learning_rate": 1.265050263889856e-05, "loss": 1.3866, "step": 8226 }, { "epoch": 0.4497901944972185, "grad_norm": 2.021803379058838, "learning_rate": 1.2648740593535084e-05, "loss": 1.4513, "step": 8227 }, { "epoch": 0.4498448669409401, "grad_norm": 1.4889103174209595, "learning_rate": 1.2646978459723945e-05, "loss": 1.1616, "step": 8228 }, { "epoch": 0.44989953938466165, "grad_norm": 1.439231276512146, "learning_rate": 1.2645216237523986e-05, "loss": 1.7341, "step": 8229 }, { "epoch": 0.4499542118283832, "grad_norm": 1.5424675941467285, "learning_rate": 1.2643453926994045e-05, "loss": 1.2903, "step": 8230 }, { "epoch": 0.4500088842721047, "grad_norm": 1.5817739963531494, "learning_rate": 1.2641691528192976e-05, "loss": 1.2793, "step": 8231 }, { "epoch": 0.4500635567158263, "grad_norm": 1.359634280204773, "learning_rate": 1.2639929041179628e-05, "loss": 1.5039, "step": 8232 }, { "epoch": 0.45011822915954786, "grad_norm": 1.675331473350525, "learning_rate": 1.2638166466012858e-05, "loss": 1.4264, "step": 8233 }, { "epoch": 0.4501729016032694, "grad_norm": 1.652420163154602, "learning_rate": 1.2636403802751516e-05, "loss": 1.3246, "step": 8234 }, { "epoch": 0.450227574046991, "grad_norm": 1.5798457860946655, "learning_rate": 1.2634641051454461e-05, "loss": 1.5683, "step": 8235 }, { "epoch": 0.4502822464907125, "grad_norm": 2.234515905380249, "learning_rate": 1.2632878212180566e-05, "loss": 1.4078, "step": 8236 }, { "epoch": 0.45033691893443406, "grad_norm": 1.5039222240447998, "learning_rate": 1.2631115284988685e-05, "loss": 1.3875, "step": 8237 }, { "epoch": 0.4503915913781556, "grad_norm": 1.625691294670105, "learning_rate": 1.262935226993769e-05, "loss": 1.3222, "step": 8238 }, { "epoch": 0.4504462638218772, "grad_norm": 1.2488471269607544, "learning_rate": 1.2627589167086455e-05, "loss": 1.2079, "step": 8239 }, { "epoch": 0.45050093626559873, "grad_norm": 1.8075683116912842, "learning_rate": 1.2625825976493853e-05, "loss": 1.365, "step": 8240 }, { "epoch": 0.45055560870932027, "grad_norm": 1.6265969276428223, "learning_rate": 1.2624062698218755e-05, "loss": 1.4488, "step": 8241 }, { "epoch": 0.45061028115304186, "grad_norm": 1.3884773254394531, "learning_rate": 1.262229933232005e-05, "loss": 1.5175, "step": 8242 }, { "epoch": 0.4506649535967634, "grad_norm": 1.8411874771118164, "learning_rate": 1.2620535878856617e-05, "loss": 1.2503, "step": 8243 }, { "epoch": 0.45071962604048493, "grad_norm": 1.576514720916748, "learning_rate": 1.261877233788734e-05, "loss": 1.2809, "step": 8244 }, { "epoch": 0.45077429848420647, "grad_norm": 1.5890593528747559, "learning_rate": 1.261700870947111e-05, "loss": 1.3238, "step": 8245 }, { "epoch": 0.45082897092792806, "grad_norm": 1.3749711513519287, "learning_rate": 1.261524499366682e-05, "loss": 1.1421, "step": 8246 }, { "epoch": 0.4508836433716496, "grad_norm": 1.4224886894226074, "learning_rate": 1.2613481190533362e-05, "loss": 1.2747, "step": 8247 }, { "epoch": 0.45093831581537114, "grad_norm": 1.8661094903945923, "learning_rate": 1.2611717300129631e-05, "loss": 1.3457, "step": 8248 }, { "epoch": 0.45099298825909273, "grad_norm": 1.285811424255371, "learning_rate": 1.2609953322514531e-05, "loss": 1.4511, "step": 8249 }, { "epoch": 0.45104766070281427, "grad_norm": 1.2440094947814941, "learning_rate": 1.260818925774697e-05, "loss": 1.3306, "step": 8250 }, { "epoch": 0.4511023331465358, "grad_norm": 1.55301833152771, "learning_rate": 1.2606425105885844e-05, "loss": 1.2835, "step": 8251 }, { "epoch": 0.45115700559025734, "grad_norm": 1.6191391944885254, "learning_rate": 1.2604660866990072e-05, "loss": 1.4086, "step": 8252 }, { "epoch": 0.45121167803397894, "grad_norm": 1.713870644569397, "learning_rate": 1.2602896541118562e-05, "loss": 1.2456, "step": 8253 }, { "epoch": 0.4512663504777005, "grad_norm": 1.481843113899231, "learning_rate": 1.2601132128330224e-05, "loss": 1.2812, "step": 8254 }, { "epoch": 0.451321022921422, "grad_norm": 2.5754544734954834, "learning_rate": 1.2599367628683982e-05, "loss": 1.5485, "step": 8255 }, { "epoch": 0.4513756953651436, "grad_norm": 1.3649797439575195, "learning_rate": 1.2597603042238756e-05, "loss": 1.3609, "step": 8256 }, { "epoch": 0.45143036780886514, "grad_norm": 1.7339948415756226, "learning_rate": 1.2595838369053471e-05, "loss": 1.4999, "step": 8257 }, { "epoch": 0.4514850402525867, "grad_norm": 1.6229051351547241, "learning_rate": 1.2594073609187047e-05, "loss": 1.2305, "step": 8258 }, { "epoch": 0.45153971269630827, "grad_norm": 1.6860415935516357, "learning_rate": 1.2592308762698422e-05, "loss": 1.9221, "step": 8259 }, { "epoch": 0.4515943851400298, "grad_norm": 1.7030614614486694, "learning_rate": 1.2590543829646524e-05, "loss": 1.4115, "step": 8260 }, { "epoch": 0.45164905758375135, "grad_norm": 1.2514585256576538, "learning_rate": 1.2588778810090288e-05, "loss": 1.3489, "step": 8261 }, { "epoch": 0.4517037300274729, "grad_norm": 1.6312824487686157, "learning_rate": 1.258701370408865e-05, "loss": 1.4224, "step": 8262 }, { "epoch": 0.4517584024711945, "grad_norm": 1.7996381521224976, "learning_rate": 1.2585248511700556e-05, "loss": 1.6769, "step": 8263 }, { "epoch": 0.451813074914916, "grad_norm": 1.3725719451904297, "learning_rate": 1.258348323298495e-05, "loss": 1.4026, "step": 8264 }, { "epoch": 0.45186774735863755, "grad_norm": 1.2609704732894897, "learning_rate": 1.2581717868000775e-05, "loss": 1.3147, "step": 8265 }, { "epoch": 0.45192241980235914, "grad_norm": 1.4648182392120361, "learning_rate": 1.257995241680698e-05, "loss": 1.2622, "step": 8266 }, { "epoch": 0.4519770922460807, "grad_norm": 1.6485921144485474, "learning_rate": 1.2578186879462525e-05, "loss": 1.1128, "step": 8267 }, { "epoch": 0.4520317646898022, "grad_norm": 1.3231990337371826, "learning_rate": 1.2576421256026355e-05, "loss": 1.5104, "step": 8268 }, { "epoch": 0.45208643713352376, "grad_norm": 1.8646669387817383, "learning_rate": 1.2574655546557432e-05, "loss": 1.3341, "step": 8269 }, { "epoch": 0.45214110957724535, "grad_norm": 1.713239073753357, "learning_rate": 1.257288975111472e-05, "loss": 1.4438, "step": 8270 }, { "epoch": 0.4521957820209669, "grad_norm": 1.3840391635894775, "learning_rate": 1.2571123869757186e-05, "loss": 1.7692, "step": 8271 }, { "epoch": 0.4522504544646884, "grad_norm": 1.5735880136489868, "learning_rate": 1.2569357902543793e-05, "loss": 1.6482, "step": 8272 }, { "epoch": 0.45230512690841, "grad_norm": 1.8636045455932617, "learning_rate": 1.2567591849533507e-05, "loss": 1.329, "step": 8273 }, { "epoch": 0.45235979935213155, "grad_norm": 1.530547022819519, "learning_rate": 1.2565825710785305e-05, "loss": 1.2058, "step": 8274 }, { "epoch": 0.4524144717958531, "grad_norm": 1.3076521158218384, "learning_rate": 1.256405948635816e-05, "loss": 1.5024, "step": 8275 }, { "epoch": 0.4524691442395746, "grad_norm": 1.7178937196731567, "learning_rate": 1.2562293176311054e-05, "loss": 1.3736, "step": 8276 }, { "epoch": 0.4525238166832962, "grad_norm": 1.4037572145462036, "learning_rate": 1.2560526780702963e-05, "loss": 1.2853, "step": 8277 }, { "epoch": 0.45257848912701776, "grad_norm": 1.3036282062530518, "learning_rate": 1.255876029959288e-05, "loss": 1.4775, "step": 8278 }, { "epoch": 0.4526331615707393, "grad_norm": 1.567874550819397, "learning_rate": 1.2556993733039785e-05, "loss": 1.7051, "step": 8279 }, { "epoch": 0.4526878340144609, "grad_norm": 1.4331612586975098, "learning_rate": 1.2555227081102663e-05, "loss": 1.4838, "step": 8280 }, { "epoch": 0.4527425064581824, "grad_norm": 1.2218283414840698, "learning_rate": 1.255346034384052e-05, "loss": 1.8123, "step": 8281 }, { "epoch": 0.45279717890190396, "grad_norm": 1.826436996459961, "learning_rate": 1.2551693521312341e-05, "loss": 1.312, "step": 8282 }, { "epoch": 0.4528518513456255, "grad_norm": 1.4901907444000244, "learning_rate": 1.2549926613577126e-05, "loss": 1.5602, "step": 8283 }, { "epoch": 0.4529065237893471, "grad_norm": 1.3542168140411377, "learning_rate": 1.2548159620693881e-05, "loss": 1.3569, "step": 8284 }, { "epoch": 0.45296119623306863, "grad_norm": 1.7091467380523682, "learning_rate": 1.2546392542721606e-05, "loss": 1.402, "step": 8285 }, { "epoch": 0.45301586867679017, "grad_norm": 1.3999801874160767, "learning_rate": 1.2544625379719305e-05, "loss": 1.4681, "step": 8286 }, { "epoch": 0.45307054112051176, "grad_norm": 1.5979292392730713, "learning_rate": 1.2542858131745997e-05, "loss": 1.5931, "step": 8287 }, { "epoch": 0.4531252135642333, "grad_norm": 1.2369749546051025, "learning_rate": 1.2541090798860686e-05, "loss": 1.3485, "step": 8288 }, { "epoch": 0.45317988600795484, "grad_norm": 1.3767027854919434, "learning_rate": 1.253932338112239e-05, "loss": 1.1892, "step": 8289 }, { "epoch": 0.4532345584516764, "grad_norm": 1.7623051404953003, "learning_rate": 1.2537555878590126e-05, "loss": 1.4799, "step": 8290 }, { "epoch": 0.45328923089539797, "grad_norm": 1.7859666347503662, "learning_rate": 1.2535788291322921e-05, "loss": 1.3177, "step": 8291 }, { "epoch": 0.4533439033391195, "grad_norm": 1.3843927383422852, "learning_rate": 1.2534020619379794e-05, "loss": 1.4346, "step": 8292 }, { "epoch": 0.45339857578284104, "grad_norm": 1.5592644214630127, "learning_rate": 1.2532252862819772e-05, "loss": 1.3473, "step": 8293 }, { "epoch": 0.45345324822656263, "grad_norm": 1.8342688083648682, "learning_rate": 1.253048502170188e-05, "loss": 1.4509, "step": 8294 }, { "epoch": 0.45350792067028417, "grad_norm": 1.3863539695739746, "learning_rate": 1.2528717096085162e-05, "loss": 1.4356, "step": 8295 }, { "epoch": 0.4535625931140057, "grad_norm": 1.47028648853302, "learning_rate": 1.2526949086028641e-05, "loss": 1.3839, "step": 8296 }, { "epoch": 0.45361726555772725, "grad_norm": 1.6276257038116455, "learning_rate": 1.2525180991591363e-05, "loss": 1.4455, "step": 8297 }, { "epoch": 0.45367193800144884, "grad_norm": 2.7509427070617676, "learning_rate": 1.2523412812832368e-05, "loss": 1.3498, "step": 8298 }, { "epoch": 0.4537266104451704, "grad_norm": 1.3039535284042358, "learning_rate": 1.2521644549810695e-05, "loss": 1.1615, "step": 8299 }, { "epoch": 0.4537812828888919, "grad_norm": 1.5806516408920288, "learning_rate": 1.2519876202585393e-05, "loss": 1.4322, "step": 8300 }, { "epoch": 0.4538359553326135, "grad_norm": 1.476752519607544, "learning_rate": 1.2518107771215511e-05, "loss": 1.4691, "step": 8301 }, { "epoch": 0.45389062777633504, "grad_norm": 1.7851080894470215, "learning_rate": 1.2516339255760103e-05, "loss": 1.5497, "step": 8302 }, { "epoch": 0.4539453002200566, "grad_norm": 2.1312434673309326, "learning_rate": 1.2514570656278222e-05, "loss": 1.3551, "step": 8303 }, { "epoch": 0.4539999726637781, "grad_norm": 1.6936976909637451, "learning_rate": 1.2512801972828927e-05, "loss": 1.4003, "step": 8304 }, { "epoch": 0.4540546451074997, "grad_norm": 1.2353522777557373, "learning_rate": 1.2511033205471277e-05, "loss": 1.4207, "step": 8305 }, { "epoch": 0.45410931755122125, "grad_norm": 1.593531847000122, "learning_rate": 1.250926435426434e-05, "loss": 1.8716, "step": 8306 }, { "epoch": 0.4541639899949428, "grad_norm": 1.646995186805725, "learning_rate": 1.2507495419267173e-05, "loss": 1.5851, "step": 8307 }, { "epoch": 0.4542186624386644, "grad_norm": 1.8681131601333618, "learning_rate": 1.2505726400538849e-05, "loss": 1.4573, "step": 8308 }, { "epoch": 0.4542733348823859, "grad_norm": 2.1147429943084717, "learning_rate": 1.2503957298138443e-05, "loss": 1.2571, "step": 8309 }, { "epoch": 0.45432800732610745, "grad_norm": 1.6494158506393433, "learning_rate": 1.2502188112125027e-05, "loss": 1.621, "step": 8310 }, { "epoch": 0.454382679769829, "grad_norm": 1.3285871744155884, "learning_rate": 1.2500418842557678e-05, "loss": 1.5893, "step": 8311 }, { "epoch": 0.4544373522135506, "grad_norm": 1.5515680313110352, "learning_rate": 1.2498649489495476e-05, "loss": 1.5993, "step": 8312 }, { "epoch": 0.4544920246572721, "grad_norm": 1.8479063510894775, "learning_rate": 1.2496880052997507e-05, "loss": 1.3067, "step": 8313 }, { "epoch": 0.45454669710099366, "grad_norm": 1.2940537929534912, "learning_rate": 1.249511053312285e-05, "loss": 1.2253, "step": 8314 }, { "epoch": 0.45460136954471525, "grad_norm": 1.7683874368667603, "learning_rate": 1.24933409299306e-05, "loss": 1.2776, "step": 8315 }, { "epoch": 0.4546560419884368, "grad_norm": 1.441920518875122, "learning_rate": 1.2491571243479846e-05, "loss": 1.3174, "step": 8316 }, { "epoch": 0.4547107144321583, "grad_norm": 1.7641230821609497, "learning_rate": 1.248980147382968e-05, "loss": 1.3309, "step": 8317 }, { "epoch": 0.45476538687587986, "grad_norm": 1.5652551651000977, "learning_rate": 1.24880316210392e-05, "loss": 1.5424, "step": 8318 }, { "epoch": 0.45482005931960146, "grad_norm": 1.648282527923584, "learning_rate": 1.2486261685167507e-05, "loss": 1.3396, "step": 8319 }, { "epoch": 0.454874731763323, "grad_norm": 2.5389528274536133, "learning_rate": 1.2484491666273701e-05, "loss": 1.2541, "step": 8320 }, { "epoch": 0.45492940420704453, "grad_norm": 1.5338218212127686, "learning_rate": 1.2482721564416887e-05, "loss": 1.3171, "step": 8321 }, { "epoch": 0.4549840766507661, "grad_norm": 1.6857942342758179, "learning_rate": 1.2480951379656175e-05, "loss": 1.2635, "step": 8322 }, { "epoch": 0.45503874909448766, "grad_norm": 1.7030476331710815, "learning_rate": 1.2479181112050677e-05, "loss": 1.375, "step": 8323 }, { "epoch": 0.4550934215382092, "grad_norm": 1.2067598104476929, "learning_rate": 1.2477410761659503e-05, "loss": 1.4816, "step": 8324 }, { "epoch": 0.45514809398193073, "grad_norm": 2.068605422973633, "learning_rate": 1.247564032854177e-05, "loss": 1.185, "step": 8325 }, { "epoch": 0.4552027664256523, "grad_norm": 1.8470818996429443, "learning_rate": 1.2473869812756598e-05, "loss": 1.3336, "step": 8326 }, { "epoch": 0.45525743886937386, "grad_norm": 1.8098477125167847, "learning_rate": 1.2472099214363105e-05, "loss": 1.4865, "step": 8327 }, { "epoch": 0.4553121113130954, "grad_norm": 1.486994743347168, "learning_rate": 1.247032853342042e-05, "loss": 1.2069, "step": 8328 }, { "epoch": 0.455366783756817, "grad_norm": 1.9912793636322021, "learning_rate": 1.2468557769987667e-05, "loss": 1.0283, "step": 8329 }, { "epoch": 0.45542145620053853, "grad_norm": 1.4836335182189941, "learning_rate": 1.246678692412398e-05, "loss": 1.5247, "step": 8330 }, { "epoch": 0.45547612864426007, "grad_norm": 1.4757169485092163, "learning_rate": 1.2465015995888489e-05, "loss": 1.374, "step": 8331 }, { "epoch": 0.4555308010879816, "grad_norm": 1.5596967935562134, "learning_rate": 1.2463244985340329e-05, "loss": 1.3559, "step": 8332 }, { "epoch": 0.4555854735317032, "grad_norm": 1.5589772462844849, "learning_rate": 1.2461473892538637e-05, "loss": 1.2394, "step": 8333 }, { "epoch": 0.45564014597542474, "grad_norm": 1.449580430984497, "learning_rate": 1.245970271754256e-05, "loss": 1.4391, "step": 8334 }, { "epoch": 0.4556948184191463, "grad_norm": 1.3444546461105347, "learning_rate": 1.2457931460411233e-05, "loss": 1.4232, "step": 8335 }, { "epoch": 0.45574949086286787, "grad_norm": 1.597023367881775, "learning_rate": 1.2456160121203808e-05, "loss": 1.5701, "step": 8336 }, { "epoch": 0.4558041633065894, "grad_norm": 1.6757572889328003, "learning_rate": 1.2454388699979435e-05, "loss": 1.2086, "step": 8337 }, { "epoch": 0.45585883575031094, "grad_norm": 1.3036500215530396, "learning_rate": 1.2452617196797261e-05, "loss": 1.4799, "step": 8338 }, { "epoch": 0.4559135081940325, "grad_norm": 1.5823293924331665, "learning_rate": 1.2450845611716441e-05, "loss": 1.4106, "step": 8339 }, { "epoch": 0.4559681806377541, "grad_norm": 1.4186168909072876, "learning_rate": 1.2449073944796142e-05, "loss": 1.3718, "step": 8340 }, { "epoch": 0.4560228530814756, "grad_norm": 1.5105098485946655, "learning_rate": 1.2447302196095512e-05, "loss": 1.2246, "step": 8341 }, { "epoch": 0.45607752552519715, "grad_norm": 1.6503534317016602, "learning_rate": 1.2445530365673722e-05, "loss": 1.3414, "step": 8342 }, { "epoch": 0.45613219796891874, "grad_norm": 1.2207764387130737, "learning_rate": 1.2443758453589934e-05, "loss": 1.4943, "step": 8343 }, { "epoch": 0.4561868704126403, "grad_norm": 1.8373656272888184, "learning_rate": 1.2441986459903315e-05, "loss": 1.3776, "step": 8344 }, { "epoch": 0.4562415428563618, "grad_norm": 1.2859946489334106, "learning_rate": 1.244021438467304e-05, "loss": 1.2411, "step": 8345 }, { "epoch": 0.45629621530008335, "grad_norm": 1.251986026763916, "learning_rate": 1.2438442227958277e-05, "loss": 1.2953, "step": 8346 }, { "epoch": 0.45635088774380494, "grad_norm": 1.4736993312835693, "learning_rate": 1.2436669989818209e-05, "loss": 1.3129, "step": 8347 }, { "epoch": 0.4564055601875265, "grad_norm": 1.2429816722869873, "learning_rate": 1.2434897670312012e-05, "loss": 1.5996, "step": 8348 }, { "epoch": 0.456460232631248, "grad_norm": 1.2840092182159424, "learning_rate": 1.2433125269498865e-05, "loss": 1.2564, "step": 8349 }, { "epoch": 0.4565149050749696, "grad_norm": 1.1243367195129395, "learning_rate": 1.243135278743796e-05, "loss": 1.4084, "step": 8350 }, { "epoch": 0.45656957751869115, "grad_norm": 1.2943267822265625, "learning_rate": 1.242958022418848e-05, "loss": 1.5022, "step": 8351 }, { "epoch": 0.4566242499624127, "grad_norm": 1.5180977582931519, "learning_rate": 1.2427807579809611e-05, "loss": 1.5064, "step": 8352 }, { "epoch": 0.4566789224061342, "grad_norm": 1.6757357120513916, "learning_rate": 1.2426034854360554e-05, "loss": 1.5324, "step": 8353 }, { "epoch": 0.4567335948498558, "grad_norm": 1.3749581575393677, "learning_rate": 1.24242620479005e-05, "loss": 1.2476, "step": 8354 }, { "epoch": 0.45678826729357735, "grad_norm": 1.9830231666564941, "learning_rate": 1.2422489160488644e-05, "loss": 1.1766, "step": 8355 }, { "epoch": 0.4568429397372989, "grad_norm": 1.8282872438430786, "learning_rate": 1.2420716192184195e-05, "loss": 1.5064, "step": 8356 }, { "epoch": 0.4568976121810205, "grad_norm": 1.5725511312484741, "learning_rate": 1.2418943143046346e-05, "loss": 1.3515, "step": 8357 }, { "epoch": 0.456952284624742, "grad_norm": 1.7282419204711914, "learning_rate": 1.2417170013134315e-05, "loss": 1.4484, "step": 8358 }, { "epoch": 0.45700695706846356, "grad_norm": 1.2151738405227661, "learning_rate": 1.2415396802507302e-05, "loss": 1.3679, "step": 8359 }, { "epoch": 0.4570616295121851, "grad_norm": 1.7479190826416016, "learning_rate": 1.2413623511224522e-05, "loss": 1.353, "step": 8360 }, { "epoch": 0.4571163019559067, "grad_norm": 1.304974913597107, "learning_rate": 1.2411850139345192e-05, "loss": 1.7151, "step": 8361 }, { "epoch": 0.4571709743996282, "grad_norm": 2.2192490100860596, "learning_rate": 1.2410076686928522e-05, "loss": 1.4608, "step": 8362 }, { "epoch": 0.45722564684334976, "grad_norm": 1.3780807256698608, "learning_rate": 1.240830315403374e-05, "loss": 1.5369, "step": 8363 }, { "epoch": 0.45728031928707136, "grad_norm": 1.2810956239700317, "learning_rate": 1.2406529540720063e-05, "loss": 1.7357, "step": 8364 }, { "epoch": 0.4573349917307929, "grad_norm": 1.6208422183990479, "learning_rate": 1.2404755847046717e-05, "loss": 1.3146, "step": 8365 }, { "epoch": 0.45738966417451443, "grad_norm": 1.2637323141098022, "learning_rate": 1.2402982073072931e-05, "loss": 1.6213, "step": 8366 }, { "epoch": 0.45744433661823597, "grad_norm": 1.505691409111023, "learning_rate": 1.2401208218857932e-05, "loss": 1.5285, "step": 8367 }, { "epoch": 0.45749900906195756, "grad_norm": 1.6826056241989136, "learning_rate": 1.239943428446096e-05, "loss": 1.5951, "step": 8368 }, { "epoch": 0.4575536815056791, "grad_norm": 2.82729172706604, "learning_rate": 1.2397660269941246e-05, "loss": 1.176, "step": 8369 }, { "epoch": 0.45760835394940064, "grad_norm": 1.3240785598754883, "learning_rate": 1.2395886175358027e-05, "loss": 1.4978, "step": 8370 }, { "epoch": 0.45766302639312223, "grad_norm": 1.7720149755477905, "learning_rate": 1.239411200077055e-05, "loss": 1.3849, "step": 8371 }, { "epoch": 0.45771769883684377, "grad_norm": 1.3682233095169067, "learning_rate": 1.2392337746238052e-05, "loss": 1.4394, "step": 8372 }, { "epoch": 0.4577723712805653, "grad_norm": 1.5318282842636108, "learning_rate": 1.2390563411819786e-05, "loss": 1.2713, "step": 8373 }, { "epoch": 0.45782704372428684, "grad_norm": 1.7073348760604858, "learning_rate": 1.2388788997574994e-05, "loss": 1.6893, "step": 8374 }, { "epoch": 0.45788171616800843, "grad_norm": 1.6141637563705444, "learning_rate": 1.2387014503562935e-05, "loss": 1.6226, "step": 8375 }, { "epoch": 0.45793638861172997, "grad_norm": 2.0537941455841064, "learning_rate": 1.238523992984286e-05, "loss": 1.3581, "step": 8376 }, { "epoch": 0.4579910610554515, "grad_norm": 1.990234613418579, "learning_rate": 1.2383465276474024e-05, "loss": 1.3997, "step": 8377 }, { "epoch": 0.4580457334991731, "grad_norm": 1.4018441438674927, "learning_rate": 1.2381690543515692e-05, "loss": 1.4701, "step": 8378 }, { "epoch": 0.45810040594289464, "grad_norm": 1.4213474988937378, "learning_rate": 1.2379915731027126e-05, "loss": 1.3933, "step": 8379 }, { "epoch": 0.4581550783866162, "grad_norm": 1.2719111442565918, "learning_rate": 1.2378140839067585e-05, "loss": 1.5792, "step": 8380 }, { "epoch": 0.4582097508303377, "grad_norm": 1.4528422355651855, "learning_rate": 1.2376365867696341e-05, "loss": 1.5615, "step": 8381 }, { "epoch": 0.4582644232740593, "grad_norm": 1.4090690612792969, "learning_rate": 1.2374590816972667e-05, "loss": 1.2805, "step": 8382 }, { "epoch": 0.45831909571778084, "grad_norm": 1.709394931793213, "learning_rate": 1.2372815686955835e-05, "loss": 1.4058, "step": 8383 }, { "epoch": 0.4583737681615024, "grad_norm": 1.4211146831512451, "learning_rate": 1.2371040477705113e-05, "loss": 1.4016, "step": 8384 }, { "epoch": 0.458428440605224, "grad_norm": 1.5165609121322632, "learning_rate": 1.236926518927979e-05, "loss": 1.4603, "step": 8385 }, { "epoch": 0.4584831130489455, "grad_norm": 1.4622083902359009, "learning_rate": 1.2367489821739142e-05, "loss": 1.468, "step": 8386 }, { "epoch": 0.45853778549266705, "grad_norm": 1.39779794216156, "learning_rate": 1.2365714375142452e-05, "loss": 1.4582, "step": 8387 }, { "epoch": 0.4585924579363886, "grad_norm": 1.5813961029052734, "learning_rate": 1.236393884954901e-05, "loss": 1.4855, "step": 8388 }, { "epoch": 0.4586471303801102, "grad_norm": 1.467574954032898, "learning_rate": 1.2362163245018104e-05, "loss": 1.5989, "step": 8389 }, { "epoch": 0.4587018028238317, "grad_norm": 1.5478856563568115, "learning_rate": 1.2360387561609021e-05, "loss": 1.5675, "step": 8390 }, { "epoch": 0.45875647526755325, "grad_norm": 1.4810469150543213, "learning_rate": 1.2358611799381058e-05, "loss": 1.4064, "step": 8391 }, { "epoch": 0.45881114771127485, "grad_norm": 1.3244820833206177, "learning_rate": 1.2356835958393513e-05, "loss": 1.2918, "step": 8392 }, { "epoch": 0.4588658201549964, "grad_norm": 2.360818386077881, "learning_rate": 1.2355060038705686e-05, "loss": 1.3982, "step": 8393 }, { "epoch": 0.4589204925987179, "grad_norm": 1.6908419132232666, "learning_rate": 1.2353284040376878e-05, "loss": 1.3741, "step": 8394 }, { "epoch": 0.45897516504243946, "grad_norm": 1.4811445474624634, "learning_rate": 1.2351507963466394e-05, "loss": 1.5701, "step": 8395 }, { "epoch": 0.45902983748616105, "grad_norm": 1.297670602798462, "learning_rate": 1.2349731808033542e-05, "loss": 1.4981, "step": 8396 }, { "epoch": 0.4590845099298826, "grad_norm": 1.416335940361023, "learning_rate": 1.2347955574137629e-05, "loss": 1.2485, "step": 8397 }, { "epoch": 0.4591391823736041, "grad_norm": 1.425537109375, "learning_rate": 1.234617926183797e-05, "loss": 1.5556, "step": 8398 }, { "epoch": 0.4591938548173257, "grad_norm": 2.4852640628814697, "learning_rate": 1.2344402871193876e-05, "loss": 1.2413, "step": 8399 }, { "epoch": 0.45924852726104726, "grad_norm": 1.4352951049804688, "learning_rate": 1.2342626402264677e-05, "loss": 1.6543, "step": 8400 }, { "epoch": 0.4593031997047688, "grad_norm": 1.7849191427230835, "learning_rate": 1.234084985510968e-05, "loss": 1.3879, "step": 8401 }, { "epoch": 0.45935787214849033, "grad_norm": 1.9194221496582031, "learning_rate": 1.2339073229788214e-05, "loss": 1.3665, "step": 8402 }, { "epoch": 0.4594125445922119, "grad_norm": 1.591257929801941, "learning_rate": 1.2337296526359608e-05, "loss": 1.451, "step": 8403 }, { "epoch": 0.45946721703593346, "grad_norm": 1.410502552986145, "learning_rate": 1.2335519744883182e-05, "loss": 1.4758, "step": 8404 }, { "epoch": 0.459521889479655, "grad_norm": 1.3669512271881104, "learning_rate": 1.233374288541827e-05, "loss": 1.4607, "step": 8405 }, { "epoch": 0.4595765619233766, "grad_norm": 1.3386040925979614, "learning_rate": 1.2331965948024209e-05, "loss": 1.6161, "step": 8406 }, { "epoch": 0.45963123436709813, "grad_norm": 1.3852357864379883, "learning_rate": 1.2330188932760333e-05, "loss": 1.2479, "step": 8407 }, { "epoch": 0.45968590681081967, "grad_norm": 1.697757363319397, "learning_rate": 1.2328411839685984e-05, "loss": 1.4395, "step": 8408 }, { "epoch": 0.4597405792545412, "grad_norm": 1.2922077178955078, "learning_rate": 1.2326634668860493e-05, "loss": 1.2865, "step": 8409 }, { "epoch": 0.4597952516982628, "grad_norm": 1.3752665519714355, "learning_rate": 1.2324857420343217e-05, "loss": 1.2544, "step": 8410 }, { "epoch": 0.45984992414198433, "grad_norm": 1.3156213760375977, "learning_rate": 1.2323080094193492e-05, "loss": 1.4825, "step": 8411 }, { "epoch": 0.45990459658570587, "grad_norm": 1.2948368787765503, "learning_rate": 1.2321302690470671e-05, "loss": 1.5034, "step": 8412 }, { "epoch": 0.45995926902942746, "grad_norm": 1.8889894485473633, "learning_rate": 1.2319525209234109e-05, "loss": 1.4241, "step": 8413 }, { "epoch": 0.460013941473149, "grad_norm": 1.3560482263565063, "learning_rate": 1.2317747650543158e-05, "loss": 1.4769, "step": 8414 }, { "epoch": 0.46006861391687054, "grad_norm": 1.2564517259597778, "learning_rate": 1.2315970014457172e-05, "loss": 1.5483, "step": 8415 }, { "epoch": 0.4601232863605921, "grad_norm": 1.5273149013519287, "learning_rate": 1.2314192301035512e-05, "loss": 1.3668, "step": 8416 }, { "epoch": 0.46017795880431367, "grad_norm": 2.0659825801849365, "learning_rate": 1.2312414510337543e-05, "loss": 1.2948, "step": 8417 }, { "epoch": 0.4602326312480352, "grad_norm": 1.7201833724975586, "learning_rate": 1.2310636642422624e-05, "loss": 1.7141, "step": 8418 }, { "epoch": 0.46028730369175674, "grad_norm": 1.8481310606002808, "learning_rate": 1.2308858697350128e-05, "loss": 1.4505, "step": 8419 }, { "epoch": 0.46034197613547834, "grad_norm": 1.4662010669708252, "learning_rate": 1.230708067517942e-05, "loss": 1.3195, "step": 8420 }, { "epoch": 0.4603966485791999, "grad_norm": 1.6985173225402832, "learning_rate": 1.2305302575969878e-05, "loss": 1.3599, "step": 8421 }, { "epoch": 0.4604513210229214, "grad_norm": 1.2462007999420166, "learning_rate": 1.2303524399780873e-05, "loss": 1.5417, "step": 8422 }, { "epoch": 0.46050599346664295, "grad_norm": 1.8103742599487305, "learning_rate": 1.230174614667178e-05, "loss": 1.3786, "step": 8423 }, { "epoch": 0.46056066591036454, "grad_norm": 1.72665536403656, "learning_rate": 1.2299967816701984e-05, "loss": 1.3815, "step": 8424 }, { "epoch": 0.4606153383540861, "grad_norm": 1.6942225694656372, "learning_rate": 1.2298189409930863e-05, "loss": 1.3101, "step": 8425 }, { "epoch": 0.4606700107978076, "grad_norm": 1.374701976776123, "learning_rate": 1.2296410926417806e-05, "loss": 1.359, "step": 8426 }, { "epoch": 0.4607246832415292, "grad_norm": 1.7069143056869507, "learning_rate": 1.2294632366222201e-05, "loss": 1.2748, "step": 8427 }, { "epoch": 0.46077935568525075, "grad_norm": 1.5745755434036255, "learning_rate": 1.2292853729403437e-05, "loss": 1.3303, "step": 8428 }, { "epoch": 0.4608340281289723, "grad_norm": 1.6231409311294556, "learning_rate": 1.2291075016020906e-05, "loss": 1.7463, "step": 8429 }, { "epoch": 0.4608887005726938, "grad_norm": 1.3475853204727173, "learning_rate": 1.2289296226134002e-05, "loss": 1.4253, "step": 8430 }, { "epoch": 0.4609433730164154, "grad_norm": 1.7958141565322876, "learning_rate": 1.2287517359802129e-05, "loss": 1.3628, "step": 8431 }, { "epoch": 0.46099804546013695, "grad_norm": 1.969929814338684, "learning_rate": 1.2285738417084679e-05, "loss": 1.5076, "step": 8432 }, { "epoch": 0.4610527179038585, "grad_norm": 1.1621143817901611, "learning_rate": 1.228395939804106e-05, "loss": 1.5689, "step": 8433 }, { "epoch": 0.4611073903475801, "grad_norm": 1.4047256708145142, "learning_rate": 1.2282180302730683e-05, "loss": 1.5029, "step": 8434 }, { "epoch": 0.4611620627913016, "grad_norm": 1.5748846530914307, "learning_rate": 1.2280401131212945e-05, "loss": 1.2468, "step": 8435 }, { "epoch": 0.46121673523502316, "grad_norm": 1.4643760919570923, "learning_rate": 1.2278621883547264e-05, "loss": 1.4672, "step": 8436 }, { "epoch": 0.4612714076787447, "grad_norm": 1.531885027885437, "learning_rate": 1.2276842559793049e-05, "loss": 1.4922, "step": 8437 }, { "epoch": 0.4613260801224663, "grad_norm": 1.1350082159042358, "learning_rate": 1.2275063160009722e-05, "loss": 1.5297, "step": 8438 }, { "epoch": 0.4613807525661878, "grad_norm": 1.204853892326355, "learning_rate": 1.2273283684256695e-05, "loss": 1.4785, "step": 8439 }, { "epoch": 0.46143542500990936, "grad_norm": 1.8102797269821167, "learning_rate": 1.2271504132593388e-05, "loss": 1.3591, "step": 8440 }, { "epoch": 0.46149009745363095, "grad_norm": 1.6586754322052002, "learning_rate": 1.2269724505079234e-05, "loss": 1.5125, "step": 8441 }, { "epoch": 0.4615447698973525, "grad_norm": 1.3619942665100098, "learning_rate": 1.226794480177365e-05, "loss": 1.4151, "step": 8442 }, { "epoch": 0.46159944234107403, "grad_norm": 1.2025114297866821, "learning_rate": 1.2266165022736067e-05, "loss": 1.4588, "step": 8443 }, { "epoch": 0.46165411478479557, "grad_norm": 1.3162602186203003, "learning_rate": 1.2264385168025917e-05, "loss": 1.5638, "step": 8444 }, { "epoch": 0.46170878722851716, "grad_norm": 1.4200690984725952, "learning_rate": 1.2262605237702631e-05, "loss": 1.4366, "step": 8445 }, { "epoch": 0.4617634596722387, "grad_norm": 1.7487893104553223, "learning_rate": 1.2260825231825648e-05, "loss": 1.508, "step": 8446 }, { "epoch": 0.46181813211596023, "grad_norm": 1.194015383720398, "learning_rate": 1.2259045150454407e-05, "loss": 1.5475, "step": 8447 }, { "epoch": 0.4618728045596818, "grad_norm": 1.2983906269073486, "learning_rate": 1.2257264993648345e-05, "loss": 1.5245, "step": 8448 }, { "epoch": 0.46192747700340336, "grad_norm": 1.8573744297027588, "learning_rate": 1.225548476146691e-05, "loss": 1.1445, "step": 8449 }, { "epoch": 0.4619821494471249, "grad_norm": 1.6409493684768677, "learning_rate": 1.2253704453969544e-05, "loss": 1.4016, "step": 8450 }, { "epoch": 0.46203682189084644, "grad_norm": 1.6181247234344482, "learning_rate": 1.2251924071215697e-05, "loss": 1.4021, "step": 8451 }, { "epoch": 0.46209149433456803, "grad_norm": 1.4340219497680664, "learning_rate": 1.2250143613264824e-05, "loss": 1.3953, "step": 8452 }, { "epoch": 0.46214616677828957, "grad_norm": 2.0364081859588623, "learning_rate": 1.2248363080176373e-05, "loss": 1.4124, "step": 8453 }, { "epoch": 0.4622008392220111, "grad_norm": 1.6473952531814575, "learning_rate": 1.2246582472009804e-05, "loss": 1.2678, "step": 8454 }, { "epoch": 0.4622555116657327, "grad_norm": 1.4436686038970947, "learning_rate": 1.2244801788824577e-05, "loss": 1.11, "step": 8455 }, { "epoch": 0.46231018410945424, "grad_norm": 1.6599639654159546, "learning_rate": 1.2243021030680149e-05, "loss": 1.298, "step": 8456 }, { "epoch": 0.4623648565531758, "grad_norm": 1.412703275680542, "learning_rate": 1.2241240197635982e-05, "loss": 1.3922, "step": 8457 }, { "epoch": 0.46241952899689737, "grad_norm": 1.58863365650177, "learning_rate": 1.2239459289751548e-05, "loss": 1.3961, "step": 8458 }, { "epoch": 0.4624742014406189, "grad_norm": 1.5630015134811401, "learning_rate": 1.2237678307086314e-05, "loss": 1.1998, "step": 8459 }, { "epoch": 0.46252887388434044, "grad_norm": 1.565580129623413, "learning_rate": 1.2235897249699749e-05, "loss": 1.5293, "step": 8460 }, { "epoch": 0.462583546328062, "grad_norm": 1.320203185081482, "learning_rate": 1.223411611765133e-05, "loss": 1.565, "step": 8461 }, { "epoch": 0.46263821877178357, "grad_norm": 1.704870581626892, "learning_rate": 1.223233491100053e-05, "loss": 1.2217, "step": 8462 }, { "epoch": 0.4626928912155051, "grad_norm": 1.7595773935317993, "learning_rate": 1.2230553629806829e-05, "loss": 1.3648, "step": 8463 }, { "epoch": 0.46274756365922665, "grad_norm": 1.7475416660308838, "learning_rate": 1.2228772274129708e-05, "loss": 1.3154, "step": 8464 }, { "epoch": 0.46280223610294824, "grad_norm": 1.5539116859436035, "learning_rate": 1.2226990844028653e-05, "loss": 1.5686, "step": 8465 }, { "epoch": 0.4628569085466698, "grad_norm": 1.4271929264068604, "learning_rate": 1.2225209339563144e-05, "loss": 1.4149, "step": 8466 }, { "epoch": 0.4629115809903913, "grad_norm": 1.1305772066116333, "learning_rate": 1.2223427760792678e-05, "loss": 1.3722, "step": 8467 }, { "epoch": 0.46296625343411285, "grad_norm": 1.423306941986084, "learning_rate": 1.2221646107776739e-05, "loss": 1.5003, "step": 8468 }, { "epoch": 0.46302092587783444, "grad_norm": 1.4006578922271729, "learning_rate": 1.2219864380574822e-05, "loss": 1.5654, "step": 8469 }, { "epoch": 0.463075598321556, "grad_norm": 1.9190276861190796, "learning_rate": 1.2218082579246429e-05, "loss": 1.3417, "step": 8470 }, { "epoch": 0.4631302707652775, "grad_norm": 1.5939050912857056, "learning_rate": 1.2216300703851047e-05, "loss": 1.6241, "step": 8471 }, { "epoch": 0.4631849432089991, "grad_norm": 2.855695962905884, "learning_rate": 1.2214518754448188e-05, "loss": 1.1779, "step": 8472 }, { "epoch": 0.46323961565272065, "grad_norm": 1.4844988584518433, "learning_rate": 1.2212736731097352e-05, "loss": 1.4987, "step": 8473 }, { "epoch": 0.4632942880964422, "grad_norm": 2.155968427658081, "learning_rate": 1.2210954633858042e-05, "loss": 1.3532, "step": 8474 }, { "epoch": 0.4633489605401637, "grad_norm": 1.8963350057601929, "learning_rate": 1.220917246278977e-05, "loss": 1.2537, "step": 8475 }, { "epoch": 0.4634036329838853, "grad_norm": 1.5949727296829224, "learning_rate": 1.2207390217952044e-05, "loss": 1.5816, "step": 8476 }, { "epoch": 0.46345830542760685, "grad_norm": 1.6354371309280396, "learning_rate": 1.220560789940438e-05, "loss": 1.3464, "step": 8477 }, { "epoch": 0.4635129778713284, "grad_norm": 1.423333764076233, "learning_rate": 1.2203825507206293e-05, "loss": 1.4533, "step": 8478 }, { "epoch": 0.46356765031505, "grad_norm": 1.5208429098129272, "learning_rate": 1.2202043041417298e-05, "loss": 1.2841, "step": 8479 }, { "epoch": 0.4636223227587715, "grad_norm": 1.3071335554122925, "learning_rate": 1.2200260502096923e-05, "loss": 1.4514, "step": 8480 }, { "epoch": 0.46367699520249306, "grad_norm": 1.5597538948059082, "learning_rate": 1.2198477889304684e-05, "loss": 1.8283, "step": 8481 }, { "epoch": 0.4637316676462146, "grad_norm": 1.3387959003448486, "learning_rate": 1.2196695203100111e-05, "loss": 1.4853, "step": 8482 }, { "epoch": 0.4637863400899362, "grad_norm": 1.5547302961349487, "learning_rate": 1.2194912443542728e-05, "loss": 1.1575, "step": 8483 }, { "epoch": 0.4638410125336577, "grad_norm": 1.7243945598602295, "learning_rate": 1.2193129610692069e-05, "loss": 1.3343, "step": 8484 }, { "epoch": 0.46389568497737926, "grad_norm": 2.0464186668395996, "learning_rate": 1.2191346704607668e-05, "loss": 1.1904, "step": 8485 }, { "epoch": 0.46395035742110086, "grad_norm": 1.3368756771087646, "learning_rate": 1.2189563725349056e-05, "loss": 1.2967, "step": 8486 }, { "epoch": 0.4640050298648224, "grad_norm": 1.9816851615905762, "learning_rate": 1.2187780672975775e-05, "loss": 1.1296, "step": 8487 }, { "epoch": 0.46405970230854393, "grad_norm": 1.41233491897583, "learning_rate": 1.2185997547547364e-05, "loss": 1.4339, "step": 8488 }, { "epoch": 0.46411437475226547, "grad_norm": 1.4428123235702515, "learning_rate": 1.2184214349123361e-05, "loss": 1.5596, "step": 8489 }, { "epoch": 0.46416904719598706, "grad_norm": 1.6090795993804932, "learning_rate": 1.2182431077763317e-05, "loss": 1.4442, "step": 8490 }, { "epoch": 0.4642237196397086, "grad_norm": 1.7004354000091553, "learning_rate": 1.218064773352678e-05, "loss": 1.4836, "step": 8491 }, { "epoch": 0.46427839208343014, "grad_norm": 1.5296238660812378, "learning_rate": 1.2178864316473298e-05, "loss": 1.4011, "step": 8492 }, { "epoch": 0.46433306452715173, "grad_norm": 1.5498608350753784, "learning_rate": 1.2177080826662424e-05, "loss": 1.4447, "step": 8493 }, { "epoch": 0.46438773697087327, "grad_norm": 1.7835605144500732, "learning_rate": 1.2175297264153713e-05, "loss": 1.4077, "step": 8494 }, { "epoch": 0.4644424094145948, "grad_norm": 1.635483980178833, "learning_rate": 1.217351362900672e-05, "loss": 1.4739, "step": 8495 }, { "epoch": 0.46449708185831634, "grad_norm": 1.5642989873886108, "learning_rate": 1.2171729921281006e-05, "loss": 1.3092, "step": 8496 }, { "epoch": 0.46455175430203793, "grad_norm": 1.367942452430725, "learning_rate": 1.2169946141036133e-05, "loss": 1.4381, "step": 8497 }, { "epoch": 0.46460642674575947, "grad_norm": 1.2991070747375488, "learning_rate": 1.2168162288331671e-05, "loss": 1.4735, "step": 8498 }, { "epoch": 0.464661099189481, "grad_norm": 1.3248398303985596, "learning_rate": 1.2166378363227178e-05, "loss": 1.5472, "step": 8499 }, { "epoch": 0.4647157716332026, "grad_norm": 1.727513074874878, "learning_rate": 1.216459436578223e-05, "loss": 1.3872, "step": 8500 }, { "epoch": 0.46477044407692414, "grad_norm": 1.6166249513626099, "learning_rate": 1.2162810296056398e-05, "loss": 1.4475, "step": 8501 }, { "epoch": 0.4648251165206457, "grad_norm": 1.243120551109314, "learning_rate": 1.2161026154109253e-05, "loss": 1.235, "step": 8502 }, { "epoch": 0.4648797889643672, "grad_norm": 1.4237394332885742, "learning_rate": 1.2159241940000372e-05, "loss": 1.2637, "step": 8503 }, { "epoch": 0.4649344614080888, "grad_norm": 1.4751662015914917, "learning_rate": 1.2157457653789337e-05, "loss": 1.3859, "step": 8504 }, { "epoch": 0.46498913385181034, "grad_norm": 1.552599549293518, "learning_rate": 1.215567329553573e-05, "loss": 1.3098, "step": 8505 }, { "epoch": 0.4650438062955319, "grad_norm": 1.1844172477722168, "learning_rate": 1.2153888865299134e-05, "loss": 1.3703, "step": 8506 }, { "epoch": 0.4650984787392535, "grad_norm": 1.6205278635025024, "learning_rate": 1.2152104363139133e-05, "loss": 1.3708, "step": 8507 }, { "epoch": 0.465153151182975, "grad_norm": 1.2339421510696411, "learning_rate": 1.215031978911532e-05, "loss": 1.5845, "step": 8508 }, { "epoch": 0.46520782362669655, "grad_norm": 1.7389024496078491, "learning_rate": 1.214853514328728e-05, "loss": 1.5795, "step": 8509 }, { "epoch": 0.4652624960704181, "grad_norm": 1.3377069234848022, "learning_rate": 1.2146750425714609e-05, "loss": 1.5432, "step": 8510 }, { "epoch": 0.4653171685141397, "grad_norm": 3.078287363052368, "learning_rate": 1.2144965636456903e-05, "loss": 1.367, "step": 8511 }, { "epoch": 0.4653718409578612, "grad_norm": 1.4321577548980713, "learning_rate": 1.2143180775573764e-05, "loss": 1.4609, "step": 8512 }, { "epoch": 0.46542651340158275, "grad_norm": 1.4346777200698853, "learning_rate": 1.214139584312479e-05, "loss": 1.4614, "step": 8513 }, { "epoch": 0.46548118584530435, "grad_norm": 1.4787635803222656, "learning_rate": 1.2139610839169582e-05, "loss": 1.3038, "step": 8514 }, { "epoch": 0.4655358582890259, "grad_norm": 1.7601476907730103, "learning_rate": 1.213782576376775e-05, "loss": 1.3073, "step": 8515 }, { "epoch": 0.4655905307327474, "grad_norm": 1.4966636896133423, "learning_rate": 1.2136040616978897e-05, "loss": 1.2566, "step": 8516 }, { "epoch": 0.46564520317646896, "grad_norm": 1.610171914100647, "learning_rate": 1.2134255398862633e-05, "loss": 1.5308, "step": 8517 }, { "epoch": 0.46569987562019055, "grad_norm": 2.092235803604126, "learning_rate": 1.2132470109478577e-05, "loss": 1.2535, "step": 8518 }, { "epoch": 0.4657545480639121, "grad_norm": 1.6602635383605957, "learning_rate": 1.213068474888634e-05, "loss": 1.5504, "step": 8519 }, { "epoch": 0.4658092205076336, "grad_norm": 1.567139744758606, "learning_rate": 1.212889931714554e-05, "loss": 1.3095, "step": 8520 }, { "epoch": 0.4658638929513552, "grad_norm": 1.4861719608306885, "learning_rate": 1.2127113814315791e-05, "loss": 1.4347, "step": 8521 }, { "epoch": 0.46591856539507676, "grad_norm": 1.5889686346054077, "learning_rate": 1.2125328240456727e-05, "loss": 1.3988, "step": 8522 }, { "epoch": 0.4659732378387983, "grad_norm": 1.8958590030670166, "learning_rate": 1.2123542595627961e-05, "loss": 1.4102, "step": 8523 }, { "epoch": 0.46602791028251983, "grad_norm": 1.6456577777862549, "learning_rate": 1.2121756879889126e-05, "loss": 1.5269, "step": 8524 }, { "epoch": 0.4660825827262414, "grad_norm": 1.1884404420852661, "learning_rate": 1.2119971093299852e-05, "loss": 1.5764, "step": 8525 }, { "epoch": 0.46613725516996296, "grad_norm": 1.3820819854736328, "learning_rate": 1.2118185235919766e-05, "loss": 1.3892, "step": 8526 }, { "epoch": 0.4661919276136845, "grad_norm": 1.7077386379241943, "learning_rate": 1.2116399307808506e-05, "loss": 1.3096, "step": 8527 }, { "epoch": 0.4662466000574061, "grad_norm": 1.701342225074768, "learning_rate": 1.2114613309025705e-05, "loss": 1.3685, "step": 8528 }, { "epoch": 0.46630127250112763, "grad_norm": 1.7718780040740967, "learning_rate": 1.2112827239631004e-05, "loss": 1.2942, "step": 8529 }, { "epoch": 0.46635594494484917, "grad_norm": 1.447364091873169, "learning_rate": 1.2111041099684045e-05, "loss": 1.3359, "step": 8530 }, { "epoch": 0.4664106173885707, "grad_norm": 1.6901087760925293, "learning_rate": 1.2109254889244469e-05, "loss": 1.2786, "step": 8531 }, { "epoch": 0.4664652898322923, "grad_norm": 1.6139775514602661, "learning_rate": 1.2107468608371924e-05, "loss": 1.1669, "step": 8532 }, { "epoch": 0.46651996227601383, "grad_norm": 1.7755467891693115, "learning_rate": 1.2105682257126057e-05, "loss": 1.3869, "step": 8533 }, { "epoch": 0.46657463471973537, "grad_norm": 1.3768141269683838, "learning_rate": 1.2103895835566516e-05, "loss": 1.3237, "step": 8534 }, { "epoch": 0.46662930716345696, "grad_norm": 1.1879669427871704, "learning_rate": 1.2102109343752955e-05, "loss": 1.2908, "step": 8535 }, { "epoch": 0.4666839796071785, "grad_norm": 1.548256516456604, "learning_rate": 1.2100322781745034e-05, "loss": 1.305, "step": 8536 }, { "epoch": 0.46673865205090004, "grad_norm": 1.423718810081482, "learning_rate": 1.2098536149602405e-05, "loss": 1.5882, "step": 8537 }, { "epoch": 0.4667933244946216, "grad_norm": 1.952497124671936, "learning_rate": 1.209674944738473e-05, "loss": 1.1671, "step": 8538 }, { "epoch": 0.46684799693834317, "grad_norm": 1.5330787897109985, "learning_rate": 1.209496267515167e-05, "loss": 1.3147, "step": 8539 }, { "epoch": 0.4669026693820647, "grad_norm": 1.5791884660720825, "learning_rate": 1.2093175832962891e-05, "loss": 1.348, "step": 8540 }, { "epoch": 0.46695734182578624, "grad_norm": 1.5901039838790894, "learning_rate": 1.2091388920878059e-05, "loss": 1.3813, "step": 8541 }, { "epoch": 0.46701201426950784, "grad_norm": 1.5405398607254028, "learning_rate": 1.2089601938956843e-05, "loss": 1.3956, "step": 8542 }, { "epoch": 0.4670666867132294, "grad_norm": 1.17475163936615, "learning_rate": 1.2087814887258916e-05, "loss": 1.4342, "step": 8543 }, { "epoch": 0.4671213591569509, "grad_norm": 1.3799811601638794, "learning_rate": 1.2086027765843948e-05, "loss": 1.5676, "step": 8544 }, { "epoch": 0.46717603160067245, "grad_norm": 1.5399965047836304, "learning_rate": 1.2084240574771621e-05, "loss": 1.2938, "step": 8545 }, { "epoch": 0.46723070404439404, "grad_norm": 1.3501863479614258, "learning_rate": 1.2082453314101607e-05, "loss": 1.5456, "step": 8546 }, { "epoch": 0.4672853764881156, "grad_norm": 1.6950491666793823, "learning_rate": 1.2080665983893595e-05, "loss": 1.3438, "step": 8547 }, { "epoch": 0.4673400489318371, "grad_norm": 1.5494894981384277, "learning_rate": 1.2078878584207259e-05, "loss": 1.3272, "step": 8548 }, { "epoch": 0.4673947213755587, "grad_norm": 1.5384808778762817, "learning_rate": 1.2077091115102291e-05, "loss": 1.3481, "step": 8549 }, { "epoch": 0.46744939381928025, "grad_norm": 1.3975971937179565, "learning_rate": 1.2075303576638378e-05, "loss": 1.4036, "step": 8550 }, { "epoch": 0.4675040662630018, "grad_norm": 1.4363631010055542, "learning_rate": 1.2073515968875204e-05, "loss": 1.3652, "step": 8551 }, { "epoch": 0.4675587387067233, "grad_norm": 1.7527724504470825, "learning_rate": 1.2071728291872471e-05, "loss": 1.2562, "step": 8552 }, { "epoch": 0.4676134111504449, "grad_norm": 1.7437165975570679, "learning_rate": 1.2069940545689867e-05, "loss": 1.4142, "step": 8553 }, { "epoch": 0.46766808359416645, "grad_norm": 2.0768933296203613, "learning_rate": 1.2068152730387091e-05, "loss": 1.34, "step": 8554 }, { "epoch": 0.467722756037888, "grad_norm": 1.2996283769607544, "learning_rate": 1.2066364846023841e-05, "loss": 1.5029, "step": 8555 }, { "epoch": 0.4677774284816096, "grad_norm": 1.3425415754318237, "learning_rate": 1.2064576892659821e-05, "loss": 1.6008, "step": 8556 }, { "epoch": 0.4678321009253311, "grad_norm": 1.5254433155059814, "learning_rate": 1.2062788870354734e-05, "loss": 1.4117, "step": 8557 }, { "epoch": 0.46788677336905266, "grad_norm": 1.3588941097259521, "learning_rate": 1.2061000779168288e-05, "loss": 1.4193, "step": 8558 }, { "epoch": 0.4679414458127742, "grad_norm": 1.3861348628997803, "learning_rate": 1.2059212619160186e-05, "loss": 1.4303, "step": 8559 }, { "epoch": 0.4679961182564958, "grad_norm": 1.3970264196395874, "learning_rate": 1.2057424390390141e-05, "loss": 1.2574, "step": 8560 }, { "epoch": 0.4680507907002173, "grad_norm": 1.3528636693954468, "learning_rate": 1.2055636092917875e-05, "loss": 1.4831, "step": 8561 }, { "epoch": 0.46810546314393886, "grad_norm": 1.1957981586456299, "learning_rate": 1.205384772680309e-05, "loss": 1.342, "step": 8562 }, { "epoch": 0.46816013558766045, "grad_norm": 1.5700604915618896, "learning_rate": 1.2052059292105511e-05, "loss": 1.4515, "step": 8563 }, { "epoch": 0.468214808031382, "grad_norm": 1.441763162612915, "learning_rate": 1.2050270788884859e-05, "loss": 1.086, "step": 8564 }, { "epoch": 0.46826948047510353, "grad_norm": 1.5703552961349487, "learning_rate": 1.2048482217200854e-05, "loss": 1.5884, "step": 8565 }, { "epoch": 0.46832415291882507, "grad_norm": 1.819270372390747, "learning_rate": 1.204669357711322e-05, "loss": 1.2934, "step": 8566 }, { "epoch": 0.46837882536254666, "grad_norm": 1.502984881401062, "learning_rate": 1.2044904868681684e-05, "loss": 1.2647, "step": 8567 }, { "epoch": 0.4684334978062682, "grad_norm": 1.615566611289978, "learning_rate": 1.204311609196598e-05, "loss": 1.3226, "step": 8568 }, { "epoch": 0.46848817024998973, "grad_norm": 1.8391495943069458, "learning_rate": 1.2041327247025829e-05, "loss": 1.5606, "step": 8569 }, { "epoch": 0.4685428426937113, "grad_norm": 1.2373132705688477, "learning_rate": 1.2039538333920972e-05, "loss": 1.6423, "step": 8570 }, { "epoch": 0.46859751513743286, "grad_norm": 1.5824085474014282, "learning_rate": 1.2037749352711147e-05, "loss": 1.408, "step": 8571 }, { "epoch": 0.4686521875811544, "grad_norm": 1.4109611511230469, "learning_rate": 1.2035960303456091e-05, "loss": 1.7055, "step": 8572 }, { "epoch": 0.46870686002487594, "grad_norm": 1.367093563079834, "learning_rate": 1.203417118621554e-05, "loss": 1.5673, "step": 8573 }, { "epoch": 0.46876153246859753, "grad_norm": 1.4660370349884033, "learning_rate": 1.203238200104924e-05, "loss": 1.3372, "step": 8574 }, { "epoch": 0.46881620491231907, "grad_norm": 1.427748203277588, "learning_rate": 1.2030592748016936e-05, "loss": 1.4662, "step": 8575 }, { "epoch": 0.4688708773560406, "grad_norm": 1.576256275177002, "learning_rate": 1.2028803427178376e-05, "loss": 1.3943, "step": 8576 }, { "epoch": 0.4689255497997622, "grad_norm": 1.1932991743087769, "learning_rate": 1.2027014038593308e-05, "loss": 1.4202, "step": 8577 }, { "epoch": 0.46898022224348374, "grad_norm": 1.6239213943481445, "learning_rate": 1.2025224582321486e-05, "loss": 1.6433, "step": 8578 }, { "epoch": 0.4690348946872053, "grad_norm": 1.6375218629837036, "learning_rate": 1.202343505842266e-05, "loss": 1.4474, "step": 8579 }, { "epoch": 0.4690895671309268, "grad_norm": 1.6267049312591553, "learning_rate": 1.202164546695659e-05, "loss": 1.3485, "step": 8580 }, { "epoch": 0.4691442395746484, "grad_norm": 1.5269769430160522, "learning_rate": 1.2019855807983036e-05, "loss": 1.6017, "step": 8581 }, { "epoch": 0.46919891201836994, "grad_norm": 1.5938907861709595, "learning_rate": 1.2018066081561756e-05, "loss": 1.5382, "step": 8582 }, { "epoch": 0.4692535844620915, "grad_norm": 1.7741619348526, "learning_rate": 1.2016276287752513e-05, "loss": 1.359, "step": 8583 }, { "epoch": 0.46930825690581307, "grad_norm": 1.4472697973251343, "learning_rate": 1.2014486426615076e-05, "loss": 1.4493, "step": 8584 }, { "epoch": 0.4693629293495346, "grad_norm": 1.449466347694397, "learning_rate": 1.201269649820921e-05, "loss": 1.3884, "step": 8585 }, { "epoch": 0.46941760179325615, "grad_norm": 1.5765026807785034, "learning_rate": 1.2010906502594682e-05, "loss": 1.5052, "step": 8586 }, { "epoch": 0.4694722742369777, "grad_norm": 1.1351344585418701, "learning_rate": 1.2009116439831267e-05, "loss": 1.3851, "step": 8587 }, { "epoch": 0.4695269466806993, "grad_norm": 1.4899767637252808, "learning_rate": 1.200732630997874e-05, "loss": 1.7189, "step": 8588 }, { "epoch": 0.4695816191244208, "grad_norm": 1.117985486984253, "learning_rate": 1.2005536113096878e-05, "loss": 1.5804, "step": 8589 }, { "epoch": 0.46963629156814235, "grad_norm": 1.4170384407043457, "learning_rate": 1.2003745849245458e-05, "loss": 1.3597, "step": 8590 }, { "epoch": 0.46969096401186394, "grad_norm": 1.5364952087402344, "learning_rate": 1.2001955518484266e-05, "loss": 1.2275, "step": 8591 }, { "epoch": 0.4697456364555855, "grad_norm": 1.4846583604812622, "learning_rate": 1.2000165120873079e-05, "loss": 1.4172, "step": 8592 }, { "epoch": 0.469800308899307, "grad_norm": 1.4193981885910034, "learning_rate": 1.1998374656471685e-05, "loss": 1.3084, "step": 8593 }, { "epoch": 0.46985498134302855, "grad_norm": 1.4332712888717651, "learning_rate": 1.1996584125339869e-05, "loss": 1.5273, "step": 8594 }, { "epoch": 0.46990965378675015, "grad_norm": 1.3500901460647583, "learning_rate": 1.1994793527537427e-05, "loss": 1.4063, "step": 8595 }, { "epoch": 0.4699643262304717, "grad_norm": 1.5568556785583496, "learning_rate": 1.199300286312415e-05, "loss": 1.4018, "step": 8596 }, { "epoch": 0.4700189986741932, "grad_norm": 1.1708177328109741, "learning_rate": 1.199121213215983e-05, "loss": 1.5722, "step": 8597 }, { "epoch": 0.4700736711179148, "grad_norm": 1.2127015590667725, "learning_rate": 1.1989421334704261e-05, "loss": 1.3701, "step": 8598 }, { "epoch": 0.47012834356163635, "grad_norm": 1.4139453172683716, "learning_rate": 1.1987630470817249e-05, "loss": 1.421, "step": 8599 }, { "epoch": 0.4701830160053579, "grad_norm": 1.277880311012268, "learning_rate": 1.1985839540558588e-05, "loss": 1.3218, "step": 8600 }, { "epoch": 0.4702376884490794, "grad_norm": 1.142728328704834, "learning_rate": 1.1984048543988086e-05, "loss": 1.728, "step": 8601 }, { "epoch": 0.470292360892801, "grad_norm": 1.1760709285736084, "learning_rate": 1.1982257481165547e-05, "loss": 1.376, "step": 8602 }, { "epoch": 0.47034703333652256, "grad_norm": 1.5817031860351562, "learning_rate": 1.1980466352150782e-05, "loss": 1.4174, "step": 8603 }, { "epoch": 0.4704017057802441, "grad_norm": 1.454235315322876, "learning_rate": 1.1978675157003597e-05, "loss": 1.5849, "step": 8604 }, { "epoch": 0.4704563782239657, "grad_norm": 1.5087591409683228, "learning_rate": 1.1976883895783804e-05, "loss": 1.2314, "step": 8605 }, { "epoch": 0.4705110506676872, "grad_norm": 1.581491231918335, "learning_rate": 1.1975092568551221e-05, "loss": 1.1689, "step": 8606 }, { "epoch": 0.47056572311140876, "grad_norm": 1.6096690893173218, "learning_rate": 1.197330117536566e-05, "loss": 1.2686, "step": 8607 }, { "epoch": 0.4706203955551303, "grad_norm": 1.5054665803909302, "learning_rate": 1.1971509716286942e-05, "loss": 1.4059, "step": 8608 }, { "epoch": 0.4706750679988519, "grad_norm": 1.4649845361709595, "learning_rate": 1.1969718191374888e-05, "loss": 1.6571, "step": 8609 }, { "epoch": 0.47072974044257343, "grad_norm": 1.326258897781372, "learning_rate": 1.1967926600689324e-05, "loss": 1.5139, "step": 8610 }, { "epoch": 0.47078441288629497, "grad_norm": 1.2568812370300293, "learning_rate": 1.1966134944290073e-05, "loss": 1.4704, "step": 8611 }, { "epoch": 0.47083908533001656, "grad_norm": 1.351928949356079, "learning_rate": 1.196434322223696e-05, "loss": 1.5055, "step": 8612 }, { "epoch": 0.4708937577737381, "grad_norm": 1.3826193809509277, "learning_rate": 1.1962551434589822e-05, "loss": 1.7255, "step": 8613 }, { "epoch": 0.47094843021745963, "grad_norm": 1.5427236557006836, "learning_rate": 1.196075958140848e-05, "loss": 1.6139, "step": 8614 }, { "epoch": 0.47100310266118117, "grad_norm": 1.534825325012207, "learning_rate": 1.1958967662752778e-05, "loss": 1.3246, "step": 8615 }, { "epoch": 0.47105777510490277, "grad_norm": 1.8064159154891968, "learning_rate": 1.1957175678682548e-05, "loss": 1.3563, "step": 8616 }, { "epoch": 0.4711124475486243, "grad_norm": 1.9555021524429321, "learning_rate": 1.1955383629257633e-05, "loss": 1.3745, "step": 8617 }, { "epoch": 0.47116711999234584, "grad_norm": 1.2354319095611572, "learning_rate": 1.1953591514537866e-05, "loss": 1.4148, "step": 8618 }, { "epoch": 0.47122179243606743, "grad_norm": 1.391704797744751, "learning_rate": 1.1951799334583093e-05, "loss": 1.3361, "step": 8619 }, { "epoch": 0.47127646487978897, "grad_norm": 1.8046085834503174, "learning_rate": 1.1950007089453166e-05, "loss": 1.4365, "step": 8620 }, { "epoch": 0.4713311373235105, "grad_norm": 1.7842923402786255, "learning_rate": 1.1948214779207921e-05, "loss": 1.0995, "step": 8621 }, { "epoch": 0.47138580976723204, "grad_norm": 1.544914722442627, "learning_rate": 1.1946422403907214e-05, "loss": 1.3509, "step": 8622 }, { "epoch": 0.47144048221095364, "grad_norm": 1.538064956665039, "learning_rate": 1.1944629963610897e-05, "loss": 1.4261, "step": 8623 }, { "epoch": 0.4714951546546752, "grad_norm": 1.9533798694610596, "learning_rate": 1.1942837458378821e-05, "loss": 1.2525, "step": 8624 }, { "epoch": 0.4715498270983967, "grad_norm": 1.8465595245361328, "learning_rate": 1.1941044888270845e-05, "loss": 1.345, "step": 8625 }, { "epoch": 0.4716044995421183, "grad_norm": 1.5813604593276978, "learning_rate": 1.1939252253346822e-05, "loss": 1.3951, "step": 8626 }, { "epoch": 0.47165917198583984, "grad_norm": 1.5360907316207886, "learning_rate": 1.193745955366662e-05, "loss": 1.3917, "step": 8627 }, { "epoch": 0.4717138444295614, "grad_norm": 1.7289807796478271, "learning_rate": 1.1935666789290095e-05, "loss": 1.504, "step": 8628 }, { "epoch": 0.4717685168732829, "grad_norm": 1.4811464548110962, "learning_rate": 1.1933873960277114e-05, "loss": 1.5901, "step": 8629 }, { "epoch": 0.4718231893170045, "grad_norm": 2.0668373107910156, "learning_rate": 1.1932081066687544e-05, "loss": 1.3789, "step": 8630 }, { "epoch": 0.47187786176072605, "grad_norm": 1.112316608428955, "learning_rate": 1.1930288108581252e-05, "loss": 1.4946, "step": 8631 }, { "epoch": 0.4719325342044476, "grad_norm": 1.6147147417068481, "learning_rate": 1.192849508601811e-05, "loss": 1.4615, "step": 8632 }, { "epoch": 0.4719872066481692, "grad_norm": 1.5110946893692017, "learning_rate": 1.1926701999057992e-05, "loss": 1.6753, "step": 8633 }, { "epoch": 0.4720418790918907, "grad_norm": 1.4655095338821411, "learning_rate": 1.1924908847760774e-05, "loss": 1.5043, "step": 8634 }, { "epoch": 0.47209655153561225, "grad_norm": 1.7798691987991333, "learning_rate": 1.1923115632186332e-05, "loss": 1.366, "step": 8635 }, { "epoch": 0.4721512239793338, "grad_norm": 1.5364375114440918, "learning_rate": 1.1921322352394548e-05, "loss": 1.578, "step": 8636 }, { "epoch": 0.4722058964230554, "grad_norm": 1.7838618755340576, "learning_rate": 1.1919529008445302e-05, "loss": 1.4119, "step": 8637 }, { "epoch": 0.4722605688667769, "grad_norm": 1.3892450332641602, "learning_rate": 1.1917735600398476e-05, "loss": 1.3617, "step": 8638 }, { "epoch": 0.47231524131049846, "grad_norm": 1.575421690940857, "learning_rate": 1.191594212831396e-05, "loss": 1.2421, "step": 8639 }, { "epoch": 0.47236991375422005, "grad_norm": 1.4006623029708862, "learning_rate": 1.1914148592251638e-05, "loss": 1.3952, "step": 8640 }, { "epoch": 0.4724245861979416, "grad_norm": 2.0290424823760986, "learning_rate": 1.1912354992271404e-05, "loss": 1.3091, "step": 8641 }, { "epoch": 0.4724792586416631, "grad_norm": 1.4881235361099243, "learning_rate": 1.191056132843315e-05, "loss": 1.4713, "step": 8642 }, { "epoch": 0.47253393108538466, "grad_norm": 1.5044690370559692, "learning_rate": 1.1908767600796771e-05, "loss": 1.4219, "step": 8643 }, { "epoch": 0.47258860352910625, "grad_norm": 1.2765003442764282, "learning_rate": 1.1906973809422163e-05, "loss": 1.5821, "step": 8644 }, { "epoch": 0.4726432759728278, "grad_norm": 1.5523040294647217, "learning_rate": 1.1905179954369222e-05, "loss": 1.4798, "step": 8645 }, { "epoch": 0.47269794841654933, "grad_norm": 1.549308180809021, "learning_rate": 1.1903386035697853e-05, "loss": 1.487, "step": 8646 }, { "epoch": 0.4727526208602709, "grad_norm": 1.6920559406280518, "learning_rate": 1.1901592053467956e-05, "loss": 1.3454, "step": 8647 }, { "epoch": 0.47280729330399246, "grad_norm": 1.2225875854492188, "learning_rate": 1.189979800773944e-05, "loss": 1.3298, "step": 8648 }, { "epoch": 0.472861965747714, "grad_norm": 1.7164489030838013, "learning_rate": 1.1898003898572212e-05, "loss": 1.4717, "step": 8649 }, { "epoch": 0.47291663819143553, "grad_norm": 1.861608862876892, "learning_rate": 1.1896209726026177e-05, "loss": 1.4751, "step": 8650 }, { "epoch": 0.4729713106351571, "grad_norm": 1.4352961778640747, "learning_rate": 1.1894415490161253e-05, "loss": 1.4559, "step": 8651 }, { "epoch": 0.47302598307887866, "grad_norm": 1.6673616170883179, "learning_rate": 1.1892621191037347e-05, "loss": 1.0873, "step": 8652 }, { "epoch": 0.4730806555226002, "grad_norm": 1.439552664756775, "learning_rate": 1.1890826828714378e-05, "loss": 1.3859, "step": 8653 }, { "epoch": 0.4731353279663218, "grad_norm": 1.701836347579956, "learning_rate": 1.1889032403252266e-05, "loss": 1.3193, "step": 8654 }, { "epoch": 0.47319000041004333, "grad_norm": 1.4774872064590454, "learning_rate": 1.188723791471093e-05, "loss": 1.6328, "step": 8655 }, { "epoch": 0.47324467285376487, "grad_norm": 1.4576950073242188, "learning_rate": 1.1885443363150291e-05, "loss": 1.751, "step": 8656 }, { "epoch": 0.4732993452974864, "grad_norm": 1.6222596168518066, "learning_rate": 1.1883648748630274e-05, "loss": 1.451, "step": 8657 }, { "epoch": 0.473354017741208, "grad_norm": 1.545484185218811, "learning_rate": 1.1881854071210805e-05, "loss": 1.4061, "step": 8658 }, { "epoch": 0.47340869018492954, "grad_norm": 1.3877315521240234, "learning_rate": 1.1880059330951811e-05, "loss": 1.4355, "step": 8659 }, { "epoch": 0.4734633626286511, "grad_norm": 1.7942280769348145, "learning_rate": 1.1878264527913226e-05, "loss": 1.3822, "step": 8660 }, { "epoch": 0.47351803507237267, "grad_norm": 1.585318922996521, "learning_rate": 1.187646966215498e-05, "loss": 1.37, "step": 8661 }, { "epoch": 0.4735727075160942, "grad_norm": 1.8174000978469849, "learning_rate": 1.1874674733737009e-05, "loss": 1.3718, "step": 8662 }, { "epoch": 0.47362737995981574, "grad_norm": 1.3529276847839355, "learning_rate": 1.187287974271925e-05, "loss": 1.5635, "step": 8663 }, { "epoch": 0.47368205240353733, "grad_norm": 1.3198577165603638, "learning_rate": 1.187108468916164e-05, "loss": 1.3329, "step": 8664 }, { "epoch": 0.47373672484725887, "grad_norm": 1.2674665451049805, "learning_rate": 1.1869289573124125e-05, "loss": 1.6229, "step": 8665 }, { "epoch": 0.4737913972909804, "grad_norm": 1.4048861265182495, "learning_rate": 1.1867494394666641e-05, "loss": 1.4926, "step": 8666 }, { "epoch": 0.47384606973470195, "grad_norm": 1.7598626613616943, "learning_rate": 1.1865699153849137e-05, "loss": 1.4602, "step": 8667 }, { "epoch": 0.47390074217842354, "grad_norm": 1.4413429498672485, "learning_rate": 1.1863903850731564e-05, "loss": 1.2555, "step": 8668 }, { "epoch": 0.4739554146221451, "grad_norm": 1.3814074993133545, "learning_rate": 1.1862108485373866e-05, "loss": 1.3573, "step": 8669 }, { "epoch": 0.4740100870658666, "grad_norm": 1.472266674041748, "learning_rate": 1.1860313057835995e-05, "loss": 1.504, "step": 8670 }, { "epoch": 0.4740647595095882, "grad_norm": 1.39662504196167, "learning_rate": 1.1858517568177905e-05, "loss": 1.2583, "step": 8671 }, { "epoch": 0.47411943195330974, "grad_norm": 1.5660802125930786, "learning_rate": 1.1856722016459554e-05, "loss": 1.564, "step": 8672 }, { "epoch": 0.4741741043970313, "grad_norm": 1.9534893035888672, "learning_rate": 1.1854926402740897e-05, "loss": 1.2152, "step": 8673 }, { "epoch": 0.4742287768407528, "grad_norm": 1.0645266771316528, "learning_rate": 1.1853130727081895e-05, "loss": 1.5543, "step": 8674 }, { "epoch": 0.4742834492844744, "grad_norm": 1.6939741373062134, "learning_rate": 1.185133498954251e-05, "loss": 1.4603, "step": 8675 }, { "epoch": 0.47433812172819595, "grad_norm": 1.7299710512161255, "learning_rate": 1.1849539190182706e-05, "loss": 1.3112, "step": 8676 }, { "epoch": 0.4743927941719175, "grad_norm": 1.7938990592956543, "learning_rate": 1.1847743329062447e-05, "loss": 1.3185, "step": 8677 }, { "epoch": 0.4744474666156391, "grad_norm": 1.6286860704421997, "learning_rate": 1.18459474062417e-05, "loss": 1.4477, "step": 8678 }, { "epoch": 0.4745021390593606, "grad_norm": 1.4274855852127075, "learning_rate": 1.1844151421780442e-05, "loss": 1.3923, "step": 8679 }, { "epoch": 0.47455681150308215, "grad_norm": 1.603124737739563, "learning_rate": 1.1842355375738639e-05, "loss": 1.3807, "step": 8680 }, { "epoch": 0.4746114839468037, "grad_norm": 1.7741214036941528, "learning_rate": 1.1840559268176263e-05, "loss": 1.1998, "step": 8681 }, { "epoch": 0.4746661563905253, "grad_norm": 1.6641788482666016, "learning_rate": 1.18387630991533e-05, "loss": 1.3961, "step": 8682 }, { "epoch": 0.4747208288342468, "grad_norm": 1.556717872619629, "learning_rate": 1.1836966868729722e-05, "loss": 1.4455, "step": 8683 }, { "epoch": 0.47477550127796836, "grad_norm": 1.5642852783203125, "learning_rate": 1.1835170576965507e-05, "loss": 1.5136, "step": 8684 }, { "epoch": 0.47483017372168995, "grad_norm": 1.8476561307907104, "learning_rate": 1.1833374223920639e-05, "loss": 1.5065, "step": 8685 }, { "epoch": 0.4748848461654115, "grad_norm": 1.5066941976547241, "learning_rate": 1.1831577809655105e-05, "loss": 1.4318, "step": 8686 }, { "epoch": 0.474939518609133, "grad_norm": 1.5245856046676636, "learning_rate": 1.1829781334228889e-05, "loss": 1.2957, "step": 8687 }, { "epoch": 0.47499419105285456, "grad_norm": 1.24684739112854, "learning_rate": 1.1827984797701983e-05, "loss": 1.2476, "step": 8688 }, { "epoch": 0.47504886349657616, "grad_norm": 1.6612200736999512, "learning_rate": 1.1826188200134374e-05, "loss": 1.3495, "step": 8689 }, { "epoch": 0.4751035359402977, "grad_norm": 1.5875848531723022, "learning_rate": 1.1824391541586055e-05, "loss": 1.2635, "step": 8690 }, { "epoch": 0.47515820838401923, "grad_norm": 1.3556143045425415, "learning_rate": 1.1822594822117022e-05, "loss": 1.4528, "step": 8691 }, { "epoch": 0.4752128808277408, "grad_norm": 1.2979873418807983, "learning_rate": 1.182079804178727e-05, "loss": 1.435, "step": 8692 }, { "epoch": 0.47526755327146236, "grad_norm": 1.6548782587051392, "learning_rate": 1.18190012006568e-05, "loss": 1.3526, "step": 8693 }, { "epoch": 0.4753222257151839, "grad_norm": 1.5706536769866943, "learning_rate": 1.1817204298785612e-05, "loss": 1.4904, "step": 8694 }, { "epoch": 0.47537689815890544, "grad_norm": 1.3309777975082397, "learning_rate": 1.1815407336233709e-05, "loss": 1.3955, "step": 8695 }, { "epoch": 0.47543157060262703, "grad_norm": 1.3518246412277222, "learning_rate": 1.1813610313061091e-05, "loss": 1.4178, "step": 8696 }, { "epoch": 0.47548624304634857, "grad_norm": 1.3623400926589966, "learning_rate": 1.1811813229327774e-05, "loss": 1.6044, "step": 8697 }, { "epoch": 0.4755409154900701, "grad_norm": 1.530931830406189, "learning_rate": 1.1810016085093756e-05, "loss": 1.4709, "step": 8698 }, { "epoch": 0.4755955879337917, "grad_norm": 1.2657932043075562, "learning_rate": 1.1808218880419055e-05, "loss": 1.5192, "step": 8699 }, { "epoch": 0.47565026037751323, "grad_norm": 1.733345627784729, "learning_rate": 1.1806421615363685e-05, "loss": 1.2335, "step": 8700 }, { "epoch": 0.47570493282123477, "grad_norm": 1.714619755744934, "learning_rate": 1.1804624289987658e-05, "loss": 1.4648, "step": 8701 }, { "epoch": 0.4757596052649563, "grad_norm": 1.5353788137435913, "learning_rate": 1.1802826904350992e-05, "loss": 1.4069, "step": 8702 }, { "epoch": 0.4758142777086779, "grad_norm": 1.4908020496368408, "learning_rate": 1.1801029458513702e-05, "loss": 1.3523, "step": 8703 }, { "epoch": 0.47586895015239944, "grad_norm": 1.596498727798462, "learning_rate": 1.1799231952535817e-05, "loss": 1.555, "step": 8704 }, { "epoch": 0.475923622596121, "grad_norm": 1.361932396888733, "learning_rate": 1.179743438647735e-05, "loss": 1.3022, "step": 8705 }, { "epoch": 0.47597829503984257, "grad_norm": 1.630024790763855, "learning_rate": 1.1795636760398332e-05, "loss": 1.6833, "step": 8706 }, { "epoch": 0.4760329674835641, "grad_norm": 1.368604063987732, "learning_rate": 1.179383907435879e-05, "loss": 1.3871, "step": 8707 }, { "epoch": 0.47608763992728564, "grad_norm": 1.6895461082458496, "learning_rate": 1.1792041328418755e-05, "loss": 1.1383, "step": 8708 }, { "epoch": 0.4761423123710072, "grad_norm": 2.22171950340271, "learning_rate": 1.1790243522638252e-05, "loss": 1.3051, "step": 8709 }, { "epoch": 0.4761969848147288, "grad_norm": 1.5223350524902344, "learning_rate": 1.1788445657077315e-05, "loss": 1.4693, "step": 8710 }, { "epoch": 0.4762516572584503, "grad_norm": 1.644076943397522, "learning_rate": 1.1786647731795984e-05, "loss": 1.1437, "step": 8711 }, { "epoch": 0.47630632970217185, "grad_norm": 1.2483570575714111, "learning_rate": 1.178484974685429e-05, "loss": 1.4882, "step": 8712 }, { "epoch": 0.47636100214589344, "grad_norm": 1.2796311378479004, "learning_rate": 1.1783051702312274e-05, "loss": 1.4594, "step": 8713 }, { "epoch": 0.476415674589615, "grad_norm": 1.5353031158447266, "learning_rate": 1.1781253598229982e-05, "loss": 1.462, "step": 8714 }, { "epoch": 0.4764703470333365, "grad_norm": 1.745017170906067, "learning_rate": 1.177945543466745e-05, "loss": 1.4049, "step": 8715 }, { "epoch": 0.47652501947705805, "grad_norm": 1.5669713020324707, "learning_rate": 1.1777657211684726e-05, "loss": 1.2242, "step": 8716 }, { "epoch": 0.47657969192077965, "grad_norm": 1.607805848121643, "learning_rate": 1.1775858929341855e-05, "loss": 1.4993, "step": 8717 }, { "epoch": 0.4766343643645012, "grad_norm": 1.677101731300354, "learning_rate": 1.1774060587698888e-05, "loss": 1.651, "step": 8718 }, { "epoch": 0.4766890368082227, "grad_norm": 1.5827258825302124, "learning_rate": 1.1772262186815875e-05, "loss": 1.5411, "step": 8719 }, { "epoch": 0.4767437092519443, "grad_norm": 1.4706052541732788, "learning_rate": 1.1770463726752868e-05, "loss": 1.3323, "step": 8720 }, { "epoch": 0.47679838169566585, "grad_norm": 1.5512416362762451, "learning_rate": 1.1768665207569922e-05, "loss": 1.523, "step": 8721 }, { "epoch": 0.4768530541393874, "grad_norm": 1.394088864326477, "learning_rate": 1.1766866629327099e-05, "loss": 1.569, "step": 8722 }, { "epoch": 0.4769077265831089, "grad_norm": 1.4201185703277588, "learning_rate": 1.176506799208445e-05, "loss": 1.6553, "step": 8723 }, { "epoch": 0.4769623990268305, "grad_norm": 1.7041376829147339, "learning_rate": 1.1763269295902036e-05, "loss": 1.3522, "step": 8724 }, { "epoch": 0.47701707147055206, "grad_norm": 1.6148028373718262, "learning_rate": 1.1761470540839927e-05, "loss": 1.4357, "step": 8725 }, { "epoch": 0.4770717439142736, "grad_norm": 1.4001400470733643, "learning_rate": 1.1759671726958181e-05, "loss": 1.3207, "step": 8726 }, { "epoch": 0.4771264163579952, "grad_norm": 1.433712363243103, "learning_rate": 1.175787285431687e-05, "loss": 1.7717, "step": 8727 }, { "epoch": 0.4771810888017167, "grad_norm": 1.6897951364517212, "learning_rate": 1.1756073922976056e-05, "loss": 1.598, "step": 8728 }, { "epoch": 0.47723576124543826, "grad_norm": 1.574042558670044, "learning_rate": 1.1754274932995813e-05, "loss": 1.389, "step": 8729 }, { "epoch": 0.4772904336891598, "grad_norm": 2.3329379558563232, "learning_rate": 1.1752475884436214e-05, "loss": 1.3159, "step": 8730 }, { "epoch": 0.4773451061328814, "grad_norm": 1.5753530263900757, "learning_rate": 1.1750676777357333e-05, "loss": 1.3639, "step": 8731 }, { "epoch": 0.47739977857660293, "grad_norm": 1.376442551612854, "learning_rate": 1.1748877611819247e-05, "loss": 1.407, "step": 8732 }, { "epoch": 0.47745445102032447, "grad_norm": 1.4316534996032715, "learning_rate": 1.1747078387882031e-05, "loss": 1.5335, "step": 8733 }, { "epoch": 0.47750912346404606, "grad_norm": 1.3670775890350342, "learning_rate": 1.1745279105605774e-05, "loss": 1.577, "step": 8734 }, { "epoch": 0.4775637959077676, "grad_norm": 1.5151503086090088, "learning_rate": 1.1743479765050549e-05, "loss": 1.5244, "step": 8735 }, { "epoch": 0.47761846835148913, "grad_norm": 1.3212239742279053, "learning_rate": 1.1741680366276442e-05, "loss": 1.3434, "step": 8736 }, { "epoch": 0.47767314079521067, "grad_norm": 1.446953296661377, "learning_rate": 1.173988090934354e-05, "loss": 1.4991, "step": 8737 }, { "epoch": 0.47772781323893226, "grad_norm": 1.5825133323669434, "learning_rate": 1.1738081394311933e-05, "loss": 1.3516, "step": 8738 }, { "epoch": 0.4777824856826538, "grad_norm": 1.6114916801452637, "learning_rate": 1.173628182124171e-05, "loss": 1.371, "step": 8739 }, { "epoch": 0.47783715812637534, "grad_norm": 2.2256855964660645, "learning_rate": 1.1734482190192964e-05, "loss": 1.4489, "step": 8740 }, { "epoch": 0.47789183057009693, "grad_norm": 1.467585802078247, "learning_rate": 1.1732682501225785e-05, "loss": 1.5264, "step": 8741 }, { "epoch": 0.47794650301381847, "grad_norm": 1.7528414726257324, "learning_rate": 1.1730882754400274e-05, "loss": 1.272, "step": 8742 }, { "epoch": 0.47800117545754, "grad_norm": 1.4294668436050415, "learning_rate": 1.1729082949776524e-05, "loss": 1.2814, "step": 8743 }, { "epoch": 0.47805584790126154, "grad_norm": 1.4958364963531494, "learning_rate": 1.1727283087414636e-05, "loss": 1.4659, "step": 8744 }, { "epoch": 0.47811052034498314, "grad_norm": 1.5371637344360352, "learning_rate": 1.1725483167374713e-05, "loss": 1.339, "step": 8745 }, { "epoch": 0.4781651927887047, "grad_norm": 1.8317451477050781, "learning_rate": 1.1723683189716862e-05, "loss": 1.2986, "step": 8746 }, { "epoch": 0.4782198652324262, "grad_norm": 1.8174656629562378, "learning_rate": 1.1721883154501184e-05, "loss": 1.3282, "step": 8747 }, { "epoch": 0.4782745376761478, "grad_norm": 1.4200068712234497, "learning_rate": 1.1720083061787782e-05, "loss": 1.2979, "step": 8748 }, { "epoch": 0.47832921011986934, "grad_norm": 2.2068734169006348, "learning_rate": 1.1718282911636774e-05, "loss": 1.3405, "step": 8749 }, { "epoch": 0.4783838825635909, "grad_norm": 1.5804524421691895, "learning_rate": 1.1716482704108265e-05, "loss": 1.4716, "step": 8750 }, { "epoch": 0.4784385550073124, "grad_norm": 1.441644549369812, "learning_rate": 1.1714682439262373e-05, "loss": 1.6507, "step": 8751 }, { "epoch": 0.478493227451034, "grad_norm": 1.2014250755310059, "learning_rate": 1.1712882117159207e-05, "loss": 1.8876, "step": 8752 }, { "epoch": 0.47854789989475555, "grad_norm": 1.3176796436309814, "learning_rate": 1.1711081737858891e-05, "loss": 1.5885, "step": 8753 }, { "epoch": 0.4786025723384771, "grad_norm": 1.1719141006469727, "learning_rate": 1.1709281301421543e-05, "loss": 1.4483, "step": 8754 }, { "epoch": 0.4786572447821987, "grad_norm": 1.3360792398452759, "learning_rate": 1.1707480807907277e-05, "loss": 1.6641, "step": 8755 }, { "epoch": 0.4787119172259202, "grad_norm": 1.3427150249481201, "learning_rate": 1.1705680257376224e-05, "loss": 1.325, "step": 8756 }, { "epoch": 0.47876658966964175, "grad_norm": 1.479235291481018, "learning_rate": 1.1703879649888501e-05, "loss": 1.409, "step": 8757 }, { "epoch": 0.4788212621133633, "grad_norm": 1.747376561164856, "learning_rate": 1.170207898550424e-05, "loss": 1.5349, "step": 8758 }, { "epoch": 0.4788759345570849, "grad_norm": 1.7065426111221313, "learning_rate": 1.1700278264283567e-05, "loss": 1.2343, "step": 8759 }, { "epoch": 0.4789306070008064, "grad_norm": 1.8975093364715576, "learning_rate": 1.1698477486286615e-05, "loss": 1.4055, "step": 8760 }, { "epoch": 0.47898527944452796, "grad_norm": 1.193968415260315, "learning_rate": 1.169667665157351e-05, "loss": 1.5405, "step": 8761 }, { "epoch": 0.47903995188824955, "grad_norm": 1.468158483505249, "learning_rate": 1.1694875760204391e-05, "loss": 1.389, "step": 8762 }, { "epoch": 0.4790946243319711, "grad_norm": 1.2227442264556885, "learning_rate": 1.1693074812239397e-05, "loss": 1.6158, "step": 8763 }, { "epoch": 0.4791492967756926, "grad_norm": 1.8892911672592163, "learning_rate": 1.1691273807738659e-05, "loss": 1.3001, "step": 8764 }, { "epoch": 0.47920396921941416, "grad_norm": 1.5275108814239502, "learning_rate": 1.1689472746762317e-05, "loss": 1.4897, "step": 8765 }, { "epoch": 0.47925864166313575, "grad_norm": 2.0831387042999268, "learning_rate": 1.1687671629370522e-05, "loss": 1.1141, "step": 8766 }, { "epoch": 0.4793133141068573, "grad_norm": 1.7348793745040894, "learning_rate": 1.1685870455623409e-05, "loss": 1.2659, "step": 8767 }, { "epoch": 0.47936798655057883, "grad_norm": 1.703239917755127, "learning_rate": 1.1684069225581126e-05, "loss": 1.36, "step": 8768 }, { "epoch": 0.4794226589943004, "grad_norm": 1.5561280250549316, "learning_rate": 1.1682267939303815e-05, "loss": 1.5128, "step": 8769 }, { "epoch": 0.47947733143802196, "grad_norm": 1.6280452013015747, "learning_rate": 1.1680466596851635e-05, "loss": 1.5684, "step": 8770 }, { "epoch": 0.4795320038817435, "grad_norm": 1.1354769468307495, "learning_rate": 1.167866519828473e-05, "loss": 1.363, "step": 8771 }, { "epoch": 0.47958667632546503, "grad_norm": 1.6706559658050537, "learning_rate": 1.1676863743663254e-05, "loss": 1.3546, "step": 8772 }, { "epoch": 0.4796413487691866, "grad_norm": 1.5106117725372314, "learning_rate": 1.1675062233047365e-05, "loss": 1.4141, "step": 8773 }, { "epoch": 0.47969602121290816, "grad_norm": 1.316218376159668, "learning_rate": 1.1673260666497218e-05, "loss": 1.393, "step": 8774 }, { "epoch": 0.4797506936566297, "grad_norm": 1.3947412967681885, "learning_rate": 1.1671459044072969e-05, "loss": 1.3526, "step": 8775 }, { "epoch": 0.4798053661003513, "grad_norm": 1.8920924663543701, "learning_rate": 1.1669657365834779e-05, "loss": 1.3221, "step": 8776 }, { "epoch": 0.47986003854407283, "grad_norm": 2.0899171829223633, "learning_rate": 1.1667855631842815e-05, "loss": 1.4905, "step": 8777 }, { "epoch": 0.47991471098779437, "grad_norm": 1.7812778949737549, "learning_rate": 1.1666053842157234e-05, "loss": 1.3405, "step": 8778 }, { "epoch": 0.4799693834315159, "grad_norm": 1.3478838205337524, "learning_rate": 1.1664251996838209e-05, "loss": 1.5144, "step": 8779 }, { "epoch": 0.4800240558752375, "grad_norm": 1.4700404405593872, "learning_rate": 1.1662450095945903e-05, "loss": 1.4452, "step": 8780 }, { "epoch": 0.48007872831895904, "grad_norm": 1.384905457496643, "learning_rate": 1.1660648139540487e-05, "loss": 1.5513, "step": 8781 }, { "epoch": 0.4801334007626806, "grad_norm": 1.4780458211898804, "learning_rate": 1.1658846127682133e-05, "loss": 1.36, "step": 8782 }, { "epoch": 0.48018807320640217, "grad_norm": 1.474164366722107, "learning_rate": 1.1657044060431012e-05, "loss": 1.5248, "step": 8783 }, { "epoch": 0.4802427456501237, "grad_norm": 1.2260032892227173, "learning_rate": 1.1655241937847305e-05, "loss": 1.3616, "step": 8784 }, { "epoch": 0.48029741809384524, "grad_norm": 1.5549633502960205, "learning_rate": 1.1653439759991185e-05, "loss": 1.6414, "step": 8785 }, { "epoch": 0.4803520905375668, "grad_norm": 1.2543258666992188, "learning_rate": 1.1651637526922832e-05, "loss": 1.3496, "step": 8786 }, { "epoch": 0.48040676298128837, "grad_norm": 1.5859196186065674, "learning_rate": 1.1649835238702425e-05, "loss": 1.4887, "step": 8787 }, { "epoch": 0.4804614354250099, "grad_norm": 1.4409611225128174, "learning_rate": 1.1648032895390148e-05, "loss": 1.4337, "step": 8788 }, { "epoch": 0.48051610786873145, "grad_norm": 1.747122883796692, "learning_rate": 1.1646230497046185e-05, "loss": 1.1685, "step": 8789 }, { "epoch": 0.48057078031245304, "grad_norm": 1.4124596118927002, "learning_rate": 1.1644428043730722e-05, "loss": 1.6193, "step": 8790 }, { "epoch": 0.4806254527561746, "grad_norm": 1.7816059589385986, "learning_rate": 1.164262553550395e-05, "loss": 1.5463, "step": 8791 }, { "epoch": 0.4806801251998961, "grad_norm": 1.5298821926116943, "learning_rate": 1.1640822972426055e-05, "loss": 1.3735, "step": 8792 }, { "epoch": 0.48073479764361765, "grad_norm": 1.3459242582321167, "learning_rate": 1.1639020354557234e-05, "loss": 1.41, "step": 8793 }, { "epoch": 0.48078947008733924, "grad_norm": 1.7425740957260132, "learning_rate": 1.1637217681957673e-05, "loss": 1.4754, "step": 8794 }, { "epoch": 0.4808441425310608, "grad_norm": 1.8508577346801758, "learning_rate": 1.1635414954687574e-05, "loss": 1.3201, "step": 8795 }, { "epoch": 0.4808988149747823, "grad_norm": 1.7131811380386353, "learning_rate": 1.1633612172807131e-05, "loss": 1.3805, "step": 8796 }, { "epoch": 0.4809534874185039, "grad_norm": 1.5537221431732178, "learning_rate": 1.1631809336376544e-05, "loss": 1.1931, "step": 8797 }, { "epoch": 0.48100815986222545, "grad_norm": 1.6650338172912598, "learning_rate": 1.1630006445456015e-05, "loss": 1.4312, "step": 8798 }, { "epoch": 0.481062832305947, "grad_norm": 1.3650894165039062, "learning_rate": 1.1628203500105748e-05, "loss": 1.4312, "step": 8799 }, { "epoch": 0.4811175047496685, "grad_norm": 1.6555067300796509, "learning_rate": 1.1626400500385941e-05, "loss": 1.4056, "step": 8800 }, { "epoch": 0.4811721771933901, "grad_norm": 1.4288430213928223, "learning_rate": 1.1624597446356807e-05, "loss": 1.3598, "step": 8801 }, { "epoch": 0.48122684963711165, "grad_norm": 1.5167030096054077, "learning_rate": 1.1622794338078554e-05, "loss": 1.4469, "step": 8802 }, { "epoch": 0.4812815220808332, "grad_norm": 1.552294135093689, "learning_rate": 1.1620991175611385e-05, "loss": 1.3355, "step": 8803 }, { "epoch": 0.4813361945245548, "grad_norm": 1.7866307497024536, "learning_rate": 1.1619187959015519e-05, "loss": 1.4988, "step": 8804 }, { "epoch": 0.4813908669682763, "grad_norm": 1.3335716724395752, "learning_rate": 1.161738468835117e-05, "loss": 1.3743, "step": 8805 }, { "epoch": 0.48144553941199786, "grad_norm": 1.3998595476150513, "learning_rate": 1.161558136367855e-05, "loss": 1.2808, "step": 8806 }, { "epoch": 0.4815002118557194, "grad_norm": 1.612751841545105, "learning_rate": 1.1613777985057877e-05, "loss": 1.4757, "step": 8807 }, { "epoch": 0.481554884299441, "grad_norm": 1.6449377536773682, "learning_rate": 1.1611974552549367e-05, "loss": 1.3669, "step": 8808 }, { "epoch": 0.4816095567431625, "grad_norm": 1.5803687572479248, "learning_rate": 1.161017106621325e-05, "loss": 1.2983, "step": 8809 }, { "epoch": 0.48166422918688406, "grad_norm": 1.928958773612976, "learning_rate": 1.1608367526109738e-05, "loss": 1.4481, "step": 8810 }, { "epoch": 0.48171890163060566, "grad_norm": 1.4159773588180542, "learning_rate": 1.160656393229906e-05, "loss": 1.4453, "step": 8811 }, { "epoch": 0.4817735740743272, "grad_norm": 1.2809611558914185, "learning_rate": 1.1604760284841446e-05, "loss": 1.3458, "step": 8812 }, { "epoch": 0.48182824651804873, "grad_norm": 1.3440485000610352, "learning_rate": 1.1602956583797118e-05, "loss": 1.5197, "step": 8813 }, { "epoch": 0.48188291896177027, "grad_norm": 1.9468761682510376, "learning_rate": 1.1601152829226308e-05, "loss": 1.2766, "step": 8814 }, { "epoch": 0.48193759140549186, "grad_norm": 1.4509313106536865, "learning_rate": 1.1599349021189247e-05, "loss": 1.4918, "step": 8815 }, { "epoch": 0.4819922638492134, "grad_norm": 1.2215708494186401, "learning_rate": 1.1597545159746174e-05, "loss": 1.5225, "step": 8816 }, { "epoch": 0.48204693629293494, "grad_norm": 1.6851966381072998, "learning_rate": 1.1595741244957312e-05, "loss": 1.3333, "step": 8817 }, { "epoch": 0.48210160873665653, "grad_norm": 1.7125940322875977, "learning_rate": 1.1593937276882911e-05, "loss": 1.5104, "step": 8818 }, { "epoch": 0.48215628118037807, "grad_norm": 1.5158523321151733, "learning_rate": 1.1592133255583204e-05, "loss": 1.5405, "step": 8819 }, { "epoch": 0.4822109536240996, "grad_norm": 1.439762830734253, "learning_rate": 1.159032918111843e-05, "loss": 1.5132, "step": 8820 }, { "epoch": 0.48226562606782114, "grad_norm": 1.438928484916687, "learning_rate": 1.1588525053548831e-05, "loss": 1.3658, "step": 8821 }, { "epoch": 0.48232029851154273, "grad_norm": 1.4470922946929932, "learning_rate": 1.1586720872934654e-05, "loss": 1.3936, "step": 8822 }, { "epoch": 0.48237497095526427, "grad_norm": 1.6062562465667725, "learning_rate": 1.1584916639336146e-05, "loss": 1.7986, "step": 8823 }, { "epoch": 0.4824296433989858, "grad_norm": 1.6296074390411377, "learning_rate": 1.1583112352813548e-05, "loss": 1.3579, "step": 8824 }, { "epoch": 0.4824843158427074, "grad_norm": 1.473326563835144, "learning_rate": 1.158130801342712e-05, "loss": 1.6175, "step": 8825 }, { "epoch": 0.48253898828642894, "grad_norm": 1.453503966331482, "learning_rate": 1.1579503621237102e-05, "loss": 1.3988, "step": 8826 }, { "epoch": 0.4825936607301505, "grad_norm": 1.2918617725372314, "learning_rate": 1.1577699176303752e-05, "loss": 1.4893, "step": 8827 }, { "epoch": 0.482648333173872, "grad_norm": 1.3360795974731445, "learning_rate": 1.1575894678687325e-05, "loss": 1.3043, "step": 8828 }, { "epoch": 0.4827030056175936, "grad_norm": 1.8501949310302734, "learning_rate": 1.1574090128448075e-05, "loss": 1.4587, "step": 8829 }, { "epoch": 0.48275767806131514, "grad_norm": 1.8393373489379883, "learning_rate": 1.1572285525646265e-05, "loss": 1.6177, "step": 8830 }, { "epoch": 0.4828123505050367, "grad_norm": 1.4037340879440308, "learning_rate": 1.157048087034215e-05, "loss": 1.2655, "step": 8831 }, { "epoch": 0.4828670229487583, "grad_norm": 2.3554983139038086, "learning_rate": 1.1568676162595995e-05, "loss": 1.5677, "step": 8832 }, { "epoch": 0.4829216953924798, "grad_norm": 1.379670262336731, "learning_rate": 1.156687140246806e-05, "loss": 1.243, "step": 8833 }, { "epoch": 0.48297636783620135, "grad_norm": 1.3068143129348755, "learning_rate": 1.1565066590018615e-05, "loss": 1.2276, "step": 8834 }, { "epoch": 0.4830310402799229, "grad_norm": 1.535454273223877, "learning_rate": 1.156326172530792e-05, "loss": 1.4463, "step": 8835 }, { "epoch": 0.4830857127236445, "grad_norm": 1.3703153133392334, "learning_rate": 1.1561456808396248e-05, "loss": 1.2023, "step": 8836 }, { "epoch": 0.483140385167366, "grad_norm": 1.469473958015442, "learning_rate": 1.155965183934387e-05, "loss": 1.3385, "step": 8837 }, { "epoch": 0.48319505761108755, "grad_norm": 1.2553423643112183, "learning_rate": 1.1557846818211061e-05, "loss": 1.5193, "step": 8838 }, { "epoch": 0.48324973005480915, "grad_norm": 1.5379799604415894, "learning_rate": 1.1556041745058086e-05, "loss": 1.4976, "step": 8839 }, { "epoch": 0.4833044024985307, "grad_norm": 1.1442300081253052, "learning_rate": 1.1554236619945229e-05, "loss": 1.5158, "step": 8840 }, { "epoch": 0.4833590749422522, "grad_norm": 1.4132431745529175, "learning_rate": 1.1552431442932764e-05, "loss": 1.3586, "step": 8841 }, { "epoch": 0.48341374738597376, "grad_norm": 1.559852123260498, "learning_rate": 1.1550626214080967e-05, "loss": 1.3931, "step": 8842 }, { "epoch": 0.48346841982969535, "grad_norm": 1.4889291524887085, "learning_rate": 1.1548820933450125e-05, "loss": 1.5849, "step": 8843 }, { "epoch": 0.4835230922734169, "grad_norm": 1.828728199005127, "learning_rate": 1.1547015601100518e-05, "loss": 1.3369, "step": 8844 }, { "epoch": 0.4835777647171384, "grad_norm": 1.5257774591445923, "learning_rate": 1.1545210217092428e-05, "loss": 1.5829, "step": 8845 }, { "epoch": 0.48363243716086, "grad_norm": 1.3293712139129639, "learning_rate": 1.1543404781486142e-05, "loss": 1.4988, "step": 8846 }, { "epoch": 0.48368710960458156, "grad_norm": 1.158132553100586, "learning_rate": 1.1541599294341952e-05, "loss": 1.3511, "step": 8847 }, { "epoch": 0.4837417820483031, "grad_norm": 1.8666049242019653, "learning_rate": 1.1539793755720141e-05, "loss": 1.543, "step": 8848 }, { "epoch": 0.48379645449202463, "grad_norm": 1.507961392402649, "learning_rate": 1.1537988165681004e-05, "loss": 1.3505, "step": 8849 }, { "epoch": 0.4838511269357462, "grad_norm": 1.737060308456421, "learning_rate": 1.1536182524284833e-05, "loss": 1.2938, "step": 8850 }, { "epoch": 0.48390579937946776, "grad_norm": 1.593797206878662, "learning_rate": 1.1534376831591925e-05, "loss": 1.3256, "step": 8851 }, { "epoch": 0.4839604718231893, "grad_norm": 1.6732829809188843, "learning_rate": 1.1532571087662575e-05, "loss": 1.2371, "step": 8852 }, { "epoch": 0.4840151442669109, "grad_norm": 1.8720875978469849, "learning_rate": 1.1530765292557076e-05, "loss": 1.393, "step": 8853 }, { "epoch": 0.48406981671063243, "grad_norm": 1.3316253423690796, "learning_rate": 1.1528959446335735e-05, "loss": 1.5655, "step": 8854 }, { "epoch": 0.48412448915435397, "grad_norm": 1.4134804010391235, "learning_rate": 1.1527153549058847e-05, "loss": 1.4765, "step": 8855 }, { "epoch": 0.4841791615980755, "grad_norm": 1.229213833808899, "learning_rate": 1.152534760078672e-05, "loss": 1.4106, "step": 8856 }, { "epoch": 0.4842338340417971, "grad_norm": 1.4488816261291504, "learning_rate": 1.1523541601579658e-05, "loss": 1.4401, "step": 8857 }, { "epoch": 0.48428850648551863, "grad_norm": 1.705153226852417, "learning_rate": 1.1521735551497967e-05, "loss": 1.4643, "step": 8858 }, { "epoch": 0.48434317892924017, "grad_norm": 1.8040872812271118, "learning_rate": 1.1519929450601954e-05, "loss": 1.3899, "step": 8859 }, { "epoch": 0.48439785137296176, "grad_norm": 1.1961418390274048, "learning_rate": 1.1518123298951929e-05, "loss": 1.5561, "step": 8860 }, { "epoch": 0.4844525238166833, "grad_norm": 1.3910006284713745, "learning_rate": 1.1516317096608207e-05, "loss": 1.444, "step": 8861 }, { "epoch": 0.48450719626040484, "grad_norm": 1.3606443405151367, "learning_rate": 1.1514510843631097e-05, "loss": 1.4731, "step": 8862 }, { "epoch": 0.4845618687041264, "grad_norm": 1.8101565837860107, "learning_rate": 1.1512704540080916e-05, "loss": 1.317, "step": 8863 }, { "epoch": 0.48461654114784797, "grad_norm": 1.4632270336151123, "learning_rate": 1.1510898186017984e-05, "loss": 1.4715, "step": 8864 }, { "epoch": 0.4846712135915695, "grad_norm": 1.4425907135009766, "learning_rate": 1.1509091781502614e-05, "loss": 1.3663, "step": 8865 }, { "epoch": 0.48472588603529104, "grad_norm": 1.4574737548828125, "learning_rate": 1.1507285326595128e-05, "loss": 1.373, "step": 8866 }, { "epoch": 0.48478055847901264, "grad_norm": 1.6578543186187744, "learning_rate": 1.1505478821355847e-05, "loss": 1.6188, "step": 8867 }, { "epoch": 0.4848352309227342, "grad_norm": 1.7565768957138062, "learning_rate": 1.1503672265845098e-05, "loss": 1.4191, "step": 8868 }, { "epoch": 0.4848899033664557, "grad_norm": 1.9845813512802124, "learning_rate": 1.1501865660123201e-05, "loss": 1.301, "step": 8869 }, { "epoch": 0.4849445758101773, "grad_norm": 1.8714227676391602, "learning_rate": 1.1500059004250487e-05, "loss": 1.3852, "step": 8870 }, { "epoch": 0.48499924825389884, "grad_norm": 1.24365234375, "learning_rate": 1.1498252298287282e-05, "loss": 1.4409, "step": 8871 }, { "epoch": 0.4850539206976204, "grad_norm": 1.8675552606582642, "learning_rate": 1.1496445542293919e-05, "loss": 1.3068, "step": 8872 }, { "epoch": 0.4851085931413419, "grad_norm": 1.544710397720337, "learning_rate": 1.1494638736330726e-05, "loss": 1.2835, "step": 8873 }, { "epoch": 0.4851632655850635, "grad_norm": 1.6150137186050415, "learning_rate": 1.1492831880458037e-05, "loss": 1.3472, "step": 8874 }, { "epoch": 0.48521793802878505, "grad_norm": 1.8333423137664795, "learning_rate": 1.1491024974736191e-05, "loss": 1.4717, "step": 8875 }, { "epoch": 0.4852726104725066, "grad_norm": 1.4338146448135376, "learning_rate": 1.1489218019225521e-05, "loss": 1.241, "step": 8876 }, { "epoch": 0.4853272829162282, "grad_norm": 1.4457122087478638, "learning_rate": 1.1487411013986367e-05, "loss": 1.1678, "step": 8877 }, { "epoch": 0.4853819553599497, "grad_norm": 1.5370100736618042, "learning_rate": 1.1485603959079067e-05, "loss": 1.5152, "step": 8878 }, { "epoch": 0.48543662780367125, "grad_norm": 1.5608989000320435, "learning_rate": 1.1483796854563969e-05, "loss": 1.4033, "step": 8879 }, { "epoch": 0.4854913002473928, "grad_norm": 1.505638837814331, "learning_rate": 1.148198970050141e-05, "loss": 1.3632, "step": 8880 }, { "epoch": 0.4855459726911144, "grad_norm": 1.4434913396835327, "learning_rate": 1.1480182496951735e-05, "loss": 1.5752, "step": 8881 }, { "epoch": 0.4856006451348359, "grad_norm": 1.5278488397598267, "learning_rate": 1.1478375243975298e-05, "loss": 1.3423, "step": 8882 }, { "epoch": 0.48565531757855745, "grad_norm": 1.3759102821350098, "learning_rate": 1.1476567941632437e-05, "loss": 1.4152, "step": 8883 }, { "epoch": 0.48570999002227905, "grad_norm": 1.252164363861084, "learning_rate": 1.1474760589983513e-05, "loss": 1.5076, "step": 8884 }, { "epoch": 0.4857646624660006, "grad_norm": 1.5259681940078735, "learning_rate": 1.1472953189088867e-05, "loss": 1.4732, "step": 8885 }, { "epoch": 0.4858193349097221, "grad_norm": 1.794079303741455, "learning_rate": 1.1471145739008863e-05, "loss": 1.4488, "step": 8886 }, { "epoch": 0.48587400735344366, "grad_norm": 1.1802901029586792, "learning_rate": 1.1469338239803846e-05, "loss": 1.5259, "step": 8887 }, { "epoch": 0.48592867979716525, "grad_norm": 1.7933703660964966, "learning_rate": 1.1467530691534178e-05, "loss": 1.4853, "step": 8888 }, { "epoch": 0.4859833522408868, "grad_norm": 1.1972103118896484, "learning_rate": 1.1465723094260219e-05, "loss": 1.4279, "step": 8889 }, { "epoch": 0.4860380246846083, "grad_norm": 1.7058316469192505, "learning_rate": 1.1463915448042326e-05, "loss": 1.4784, "step": 8890 }, { "epoch": 0.4860926971283299, "grad_norm": 1.3573158979415894, "learning_rate": 1.1462107752940859e-05, "loss": 1.2121, "step": 8891 }, { "epoch": 0.48614736957205146, "grad_norm": 1.3992449045181274, "learning_rate": 1.1460300009016182e-05, "loss": 1.5022, "step": 8892 }, { "epoch": 0.486202042015773, "grad_norm": 1.6110786199569702, "learning_rate": 1.1458492216328668e-05, "loss": 1.291, "step": 8893 }, { "epoch": 0.48625671445949453, "grad_norm": 1.4700818061828613, "learning_rate": 1.145668437493867e-05, "loss": 1.2701, "step": 8894 }, { "epoch": 0.4863113869032161, "grad_norm": 1.3782931566238403, "learning_rate": 1.1454876484906562e-05, "loss": 1.4947, "step": 8895 }, { "epoch": 0.48636605934693766, "grad_norm": 1.7854129076004028, "learning_rate": 1.1453068546292718e-05, "loss": 1.126, "step": 8896 }, { "epoch": 0.4864207317906592, "grad_norm": 1.4587424993515015, "learning_rate": 1.1451260559157505e-05, "loss": 1.5125, "step": 8897 }, { "epoch": 0.4864754042343808, "grad_norm": 1.9261643886566162, "learning_rate": 1.1449452523561295e-05, "loss": 1.4255, "step": 8898 }, { "epoch": 0.48653007667810233, "grad_norm": 1.5255701541900635, "learning_rate": 1.1447644439564462e-05, "loss": 1.6323, "step": 8899 }, { "epoch": 0.48658474912182387, "grad_norm": 1.3661471605300903, "learning_rate": 1.144583630722739e-05, "loss": 1.6587, "step": 8900 }, { "epoch": 0.4866394215655454, "grad_norm": 1.3206429481506348, "learning_rate": 1.1444028126610445e-05, "loss": 1.3116, "step": 8901 }, { "epoch": 0.486694094009267, "grad_norm": 2.0105416774749756, "learning_rate": 1.1442219897774014e-05, "loss": 1.5222, "step": 8902 }, { "epoch": 0.48674876645298853, "grad_norm": 2.050224781036377, "learning_rate": 1.1440411620778478e-05, "loss": 1.4254, "step": 8903 }, { "epoch": 0.4868034388967101, "grad_norm": 1.3918483257293701, "learning_rate": 1.1438603295684219e-05, "loss": 1.3456, "step": 8904 }, { "epoch": 0.48685811134043167, "grad_norm": 1.776885986328125, "learning_rate": 1.1436794922551617e-05, "loss": 1.3922, "step": 8905 }, { "epoch": 0.4869127837841532, "grad_norm": 1.5878130197525024, "learning_rate": 1.1434986501441062e-05, "loss": 1.506, "step": 8906 }, { "epoch": 0.48696745622787474, "grad_norm": 1.5196475982666016, "learning_rate": 1.1433178032412941e-05, "loss": 1.4034, "step": 8907 }, { "epoch": 0.4870221286715963, "grad_norm": 1.4518100023269653, "learning_rate": 1.1431369515527642e-05, "loss": 1.2044, "step": 8908 }, { "epoch": 0.48707680111531787, "grad_norm": 1.979851484298706, "learning_rate": 1.1429560950845555e-05, "loss": 1.4078, "step": 8909 }, { "epoch": 0.4871314735590394, "grad_norm": 1.949257493019104, "learning_rate": 1.1427752338427075e-05, "loss": 1.2943, "step": 8910 }, { "epoch": 0.48718614600276094, "grad_norm": 1.568165898323059, "learning_rate": 1.1425943678332595e-05, "loss": 1.2835, "step": 8911 }, { "epoch": 0.48724081844648254, "grad_norm": 1.8336970806121826, "learning_rate": 1.1424134970622507e-05, "loss": 1.1629, "step": 8912 }, { "epoch": 0.4872954908902041, "grad_norm": 1.4205341339111328, "learning_rate": 1.1422326215357209e-05, "loss": 1.4716, "step": 8913 }, { "epoch": 0.4873501633339256, "grad_norm": 1.90969717502594, "learning_rate": 1.1420517412597106e-05, "loss": 1.1289, "step": 8914 }, { "epoch": 0.48740483577764715, "grad_norm": 1.6185232400894165, "learning_rate": 1.1418708562402589e-05, "loss": 1.4671, "step": 8915 }, { "epoch": 0.48745950822136874, "grad_norm": 1.2744663953781128, "learning_rate": 1.1416899664834066e-05, "loss": 1.6598, "step": 8916 }, { "epoch": 0.4875141806650903, "grad_norm": 1.3707634210586548, "learning_rate": 1.141509071995194e-05, "loss": 1.5352, "step": 8917 }, { "epoch": 0.4875688531088118, "grad_norm": 1.5348221063613892, "learning_rate": 1.1413281727816612e-05, "loss": 1.2679, "step": 8918 }, { "epoch": 0.4876235255525334, "grad_norm": 1.6640204191207886, "learning_rate": 1.1411472688488489e-05, "loss": 1.3152, "step": 8919 }, { "epoch": 0.48767819799625495, "grad_norm": 1.8180683851242065, "learning_rate": 1.1409663602027984e-05, "loss": 1.2797, "step": 8920 }, { "epoch": 0.4877328704399765, "grad_norm": 1.239664912223816, "learning_rate": 1.1407854468495502e-05, "loss": 1.5368, "step": 8921 }, { "epoch": 0.487787542883698, "grad_norm": 1.3449698686599731, "learning_rate": 1.1406045287951458e-05, "loss": 1.4484, "step": 8922 }, { "epoch": 0.4878422153274196, "grad_norm": 1.524965763092041, "learning_rate": 1.140423606045626e-05, "loss": 1.4091, "step": 8923 }, { "epoch": 0.48789688777114115, "grad_norm": 1.7783403396606445, "learning_rate": 1.1402426786070326e-05, "loss": 1.3638, "step": 8924 }, { "epoch": 0.4879515602148627, "grad_norm": 1.5937474966049194, "learning_rate": 1.1400617464854069e-05, "loss": 1.2276, "step": 8925 }, { "epoch": 0.4880062326585843, "grad_norm": 1.3333781957626343, "learning_rate": 1.1398808096867908e-05, "loss": 1.1953, "step": 8926 }, { "epoch": 0.4880609051023058, "grad_norm": 1.4410265684127808, "learning_rate": 1.1396998682172264e-05, "loss": 1.4128, "step": 8927 }, { "epoch": 0.48811557754602736, "grad_norm": 1.1648179292678833, "learning_rate": 1.1395189220827556e-05, "loss": 1.5199, "step": 8928 }, { "epoch": 0.4881702499897489, "grad_norm": 1.7513419389724731, "learning_rate": 1.1393379712894205e-05, "loss": 1.4356, "step": 8929 }, { "epoch": 0.4882249224334705, "grad_norm": 1.3653157949447632, "learning_rate": 1.1391570158432636e-05, "loss": 1.516, "step": 8930 }, { "epoch": 0.488279594877192, "grad_norm": 1.4766244888305664, "learning_rate": 1.1389760557503275e-05, "loss": 1.4286, "step": 8931 }, { "epoch": 0.48833426732091356, "grad_norm": 1.5388985872268677, "learning_rate": 1.1387950910166545e-05, "loss": 1.3856, "step": 8932 }, { "epoch": 0.48838893976463515, "grad_norm": 1.8913787603378296, "learning_rate": 1.138614121648288e-05, "loss": 1.4397, "step": 8933 }, { "epoch": 0.4884436122083567, "grad_norm": 1.2344890832901, "learning_rate": 1.1384331476512706e-05, "loss": 1.7313, "step": 8934 }, { "epoch": 0.48849828465207823, "grad_norm": 1.6272186040878296, "learning_rate": 1.1382521690316455e-05, "loss": 1.2646, "step": 8935 }, { "epoch": 0.48855295709579977, "grad_norm": 1.3568193912506104, "learning_rate": 1.1380711857954562e-05, "loss": 1.4898, "step": 8936 }, { "epoch": 0.48860762953952136, "grad_norm": 1.1211986541748047, "learning_rate": 1.137890197948746e-05, "loss": 1.591, "step": 8937 }, { "epoch": 0.4886623019832429, "grad_norm": 1.5598065853118896, "learning_rate": 1.1377092054975586e-05, "loss": 1.2905, "step": 8938 }, { "epoch": 0.48871697442696443, "grad_norm": 1.6434842348098755, "learning_rate": 1.1375282084479373e-05, "loss": 1.4502, "step": 8939 }, { "epoch": 0.488771646870686, "grad_norm": 1.7066419124603271, "learning_rate": 1.1373472068059266e-05, "loss": 1.3596, "step": 8940 }, { "epoch": 0.48882631931440756, "grad_norm": 2.3666961193084717, "learning_rate": 1.1371662005775705e-05, "loss": 1.5368, "step": 8941 }, { "epoch": 0.4888809917581291, "grad_norm": 1.7492284774780273, "learning_rate": 1.1369851897689128e-05, "loss": 1.1811, "step": 8942 }, { "epoch": 0.48893566420185064, "grad_norm": 1.7015050649642944, "learning_rate": 1.1368041743859985e-05, "loss": 1.4451, "step": 8943 }, { "epoch": 0.48899033664557223, "grad_norm": 1.3437987565994263, "learning_rate": 1.1366231544348716e-05, "loss": 1.3194, "step": 8944 }, { "epoch": 0.48904500908929377, "grad_norm": 1.4579887390136719, "learning_rate": 1.1364421299215773e-05, "loss": 1.4099, "step": 8945 }, { "epoch": 0.4890996815330153, "grad_norm": 1.4875651597976685, "learning_rate": 1.1362611008521597e-05, "loss": 1.568, "step": 8946 }, { "epoch": 0.4891543539767369, "grad_norm": 1.4797601699829102, "learning_rate": 1.1360800672326642e-05, "loss": 1.574, "step": 8947 }, { "epoch": 0.48920902642045844, "grad_norm": 1.7536314725875854, "learning_rate": 1.1358990290691364e-05, "loss": 1.376, "step": 8948 }, { "epoch": 0.48926369886418, "grad_norm": 1.3073221445083618, "learning_rate": 1.1357179863676207e-05, "loss": 1.3143, "step": 8949 }, { "epoch": 0.4893183713079015, "grad_norm": 1.9730000495910645, "learning_rate": 1.135536939134163e-05, "loss": 1.4538, "step": 8950 }, { "epoch": 0.4893730437516231, "grad_norm": 1.6042910814285278, "learning_rate": 1.135355887374809e-05, "loss": 1.3342, "step": 8951 }, { "epoch": 0.48942771619534464, "grad_norm": 1.4496374130249023, "learning_rate": 1.135174831095604e-05, "loss": 1.5377, "step": 8952 }, { "epoch": 0.4894823886390662, "grad_norm": 2.2347052097320557, "learning_rate": 1.1349937703025944e-05, "loss": 1.2763, "step": 8953 }, { "epoch": 0.48953706108278777, "grad_norm": 1.4843546152114868, "learning_rate": 1.134812705001826e-05, "loss": 1.377, "step": 8954 }, { "epoch": 0.4895917335265093, "grad_norm": 1.530427098274231, "learning_rate": 1.1346316351993454e-05, "loss": 1.5516, "step": 8955 }, { "epoch": 0.48964640597023085, "grad_norm": 1.4232473373413086, "learning_rate": 1.1344505609011983e-05, "loss": 1.3691, "step": 8956 }, { "epoch": 0.4897010784139524, "grad_norm": 1.466610074043274, "learning_rate": 1.1342694821134313e-05, "loss": 1.221, "step": 8957 }, { "epoch": 0.489755750857674, "grad_norm": 1.6698038578033447, "learning_rate": 1.1340883988420912e-05, "loss": 1.4282, "step": 8958 }, { "epoch": 0.4898104233013955, "grad_norm": 1.3248378038406372, "learning_rate": 1.133907311093225e-05, "loss": 1.7295, "step": 8959 }, { "epoch": 0.48986509574511705, "grad_norm": 1.6409947872161865, "learning_rate": 1.1337262188728795e-05, "loss": 1.6601, "step": 8960 }, { "epoch": 0.48991976818883864, "grad_norm": 1.6363000869750977, "learning_rate": 1.1335451221871015e-05, "loss": 1.4461, "step": 8961 }, { "epoch": 0.4899744406325602, "grad_norm": 1.8288363218307495, "learning_rate": 1.1333640210419388e-05, "loss": 1.509, "step": 8962 }, { "epoch": 0.4900291130762817, "grad_norm": 1.5952742099761963, "learning_rate": 1.1331829154434386e-05, "loss": 1.5311, "step": 8963 }, { "epoch": 0.49008378552000326, "grad_norm": 1.339078426361084, "learning_rate": 1.133001805397648e-05, "loss": 1.5622, "step": 8964 }, { "epoch": 0.49013845796372485, "grad_norm": 1.483960509300232, "learning_rate": 1.132820690910615e-05, "loss": 1.4119, "step": 8965 }, { "epoch": 0.4901931304074464, "grad_norm": 1.3789081573486328, "learning_rate": 1.1326395719883876e-05, "loss": 1.6707, "step": 8966 }, { "epoch": 0.4902478028511679, "grad_norm": 1.3669031858444214, "learning_rate": 1.1324584486370136e-05, "loss": 1.6575, "step": 8967 }, { "epoch": 0.4903024752948895, "grad_norm": 1.4056005477905273, "learning_rate": 1.1322773208625413e-05, "loss": 1.5183, "step": 8968 }, { "epoch": 0.49035714773861105, "grad_norm": 1.1205755472183228, "learning_rate": 1.1320961886710189e-05, "loss": 1.4848, "step": 8969 }, { "epoch": 0.4904118201823326, "grad_norm": 1.5234280824661255, "learning_rate": 1.1319150520684946e-05, "loss": 1.4815, "step": 8970 }, { "epoch": 0.49046649262605413, "grad_norm": 1.656439185142517, "learning_rate": 1.1317339110610171e-05, "loss": 1.3862, "step": 8971 }, { "epoch": 0.4905211650697757, "grad_norm": 1.577574610710144, "learning_rate": 1.131552765654635e-05, "loss": 1.4125, "step": 8972 }, { "epoch": 0.49057583751349726, "grad_norm": 1.4618744850158691, "learning_rate": 1.1313716158553978e-05, "loss": 1.5288, "step": 8973 }, { "epoch": 0.4906305099572188, "grad_norm": 2.0807840824127197, "learning_rate": 1.1311904616693539e-05, "loss": 1.2979, "step": 8974 }, { "epoch": 0.4906851824009404, "grad_norm": 1.586108684539795, "learning_rate": 1.1310093031025527e-05, "loss": 1.4213, "step": 8975 }, { "epoch": 0.4907398548446619, "grad_norm": 1.2120859622955322, "learning_rate": 1.1308281401610434e-05, "loss": 1.3335, "step": 8976 }, { "epoch": 0.49079452728838346, "grad_norm": 1.6331650018692017, "learning_rate": 1.1306469728508755e-05, "loss": 1.3145, "step": 8977 }, { "epoch": 0.490849199732105, "grad_norm": 1.4908758401870728, "learning_rate": 1.1304658011780985e-05, "loss": 1.1966, "step": 8978 }, { "epoch": 0.4909038721758266, "grad_norm": 1.3025950193405151, "learning_rate": 1.1302846251487623e-05, "loss": 1.4501, "step": 8979 }, { "epoch": 0.49095854461954813, "grad_norm": 1.2125316858291626, "learning_rate": 1.130103444768917e-05, "loss": 1.5446, "step": 8980 }, { "epoch": 0.49101321706326967, "grad_norm": 1.1376277208328247, "learning_rate": 1.1299222600446122e-05, "loss": 1.5171, "step": 8981 }, { "epoch": 0.49106788950699126, "grad_norm": 1.5921401977539062, "learning_rate": 1.129741070981898e-05, "loss": 1.5411, "step": 8982 }, { "epoch": 0.4911225619507128, "grad_norm": 1.1790990829467773, "learning_rate": 1.1295598775868255e-05, "loss": 1.4525, "step": 8983 }, { "epoch": 0.49117723439443434, "grad_norm": 1.711439609527588, "learning_rate": 1.1293786798654442e-05, "loss": 1.4194, "step": 8984 }, { "epoch": 0.4912319068381559, "grad_norm": 1.7575907707214355, "learning_rate": 1.1291974778238055e-05, "loss": 1.6882, "step": 8985 }, { "epoch": 0.49128657928187747, "grad_norm": 1.363368034362793, "learning_rate": 1.1290162714679596e-05, "loss": 1.2023, "step": 8986 }, { "epoch": 0.491341251725599, "grad_norm": 1.6361868381500244, "learning_rate": 1.1288350608039577e-05, "loss": 1.4351, "step": 8987 }, { "epoch": 0.49139592416932054, "grad_norm": 1.7315125465393066, "learning_rate": 1.1286538458378512e-05, "loss": 1.4585, "step": 8988 }, { "epoch": 0.49145059661304213, "grad_norm": 1.4047974348068237, "learning_rate": 1.1284726265756904e-05, "loss": 1.3548, "step": 8989 }, { "epoch": 0.49150526905676367, "grad_norm": 1.7135006189346313, "learning_rate": 1.1282914030235275e-05, "loss": 1.3637, "step": 8990 }, { "epoch": 0.4915599415004852, "grad_norm": 1.1550047397613525, "learning_rate": 1.1281101751874132e-05, "loss": 1.5508, "step": 8991 }, { "epoch": 0.49161461394420675, "grad_norm": 1.3339701890945435, "learning_rate": 1.1279289430733998e-05, "loss": 1.6816, "step": 8992 }, { "epoch": 0.49166928638792834, "grad_norm": 1.4171308279037476, "learning_rate": 1.1277477066875384e-05, "loss": 1.3166, "step": 8993 }, { "epoch": 0.4917239588316499, "grad_norm": 1.4296579360961914, "learning_rate": 1.1275664660358818e-05, "loss": 1.5487, "step": 8994 }, { "epoch": 0.4917786312753714, "grad_norm": 1.8972058296203613, "learning_rate": 1.1273852211244816e-05, "loss": 1.3366, "step": 8995 }, { "epoch": 0.491833303719093, "grad_norm": 1.233897089958191, "learning_rate": 1.1272039719593898e-05, "loss": 1.4195, "step": 8996 }, { "epoch": 0.49188797616281454, "grad_norm": 1.6973809003829956, "learning_rate": 1.1270227185466586e-05, "loss": 1.3474, "step": 8997 }, { "epoch": 0.4919426486065361, "grad_norm": 1.8016749620437622, "learning_rate": 1.126841460892341e-05, "loss": 1.4081, "step": 8998 }, { "epoch": 0.4919973210502576, "grad_norm": 1.688572645187378, "learning_rate": 1.1266601990024893e-05, "loss": 1.2909, "step": 8999 }, { "epoch": 0.4920519934939792, "grad_norm": 1.8300822973251343, "learning_rate": 1.1264789328831564e-05, "loss": 1.2198, "step": 9000 }, { "epoch": 0.49210666593770075, "grad_norm": 1.981605887413025, "learning_rate": 1.1262976625403954e-05, "loss": 1.3165, "step": 9001 }, { "epoch": 0.4921613383814223, "grad_norm": 1.4486862421035767, "learning_rate": 1.1261163879802587e-05, "loss": 1.609, "step": 9002 }, { "epoch": 0.4922160108251439, "grad_norm": 1.1719231605529785, "learning_rate": 1.1259351092088e-05, "loss": 1.5688, "step": 9003 }, { "epoch": 0.4922706832688654, "grad_norm": 1.3894600868225098, "learning_rate": 1.1257538262320724e-05, "loss": 1.4108, "step": 9004 }, { "epoch": 0.49232535571258695, "grad_norm": 1.4598749876022339, "learning_rate": 1.1255725390561296e-05, "loss": 1.4518, "step": 9005 }, { "epoch": 0.4923800281563085, "grad_norm": 1.5580464601516724, "learning_rate": 1.1253912476870247e-05, "loss": 1.4996, "step": 9006 }, { "epoch": 0.4924347006000301, "grad_norm": 2.1538519859313965, "learning_rate": 1.1252099521308124e-05, "loss": 1.4909, "step": 9007 }, { "epoch": 0.4924893730437516, "grad_norm": 1.777477502822876, "learning_rate": 1.1250286523935456e-05, "loss": 1.3924, "step": 9008 }, { "epoch": 0.49254404548747316, "grad_norm": 1.5677978992462158, "learning_rate": 1.1248473484812787e-05, "loss": 1.4652, "step": 9009 }, { "epoch": 0.49259871793119475, "grad_norm": 1.6123223304748535, "learning_rate": 1.124666040400066e-05, "loss": 1.2545, "step": 9010 }, { "epoch": 0.4926533903749163, "grad_norm": 1.328993320465088, "learning_rate": 1.1244847281559616e-05, "loss": 1.3777, "step": 9011 }, { "epoch": 0.4927080628186378, "grad_norm": 1.5391027927398682, "learning_rate": 1.12430341175502e-05, "loss": 1.1297, "step": 9012 }, { "epoch": 0.49276273526235936, "grad_norm": 1.388431429862976, "learning_rate": 1.1241220912032958e-05, "loss": 1.4443, "step": 9013 }, { "epoch": 0.49281740770608096, "grad_norm": 1.38567054271698, "learning_rate": 1.1239407665068437e-05, "loss": 1.4301, "step": 9014 }, { "epoch": 0.4928720801498025, "grad_norm": 1.8166264295578003, "learning_rate": 1.1237594376717188e-05, "loss": 1.427, "step": 9015 }, { "epoch": 0.49292675259352403, "grad_norm": 1.5648488998413086, "learning_rate": 1.1235781047039756e-05, "loss": 1.5884, "step": 9016 }, { "epoch": 0.4929814250372456, "grad_norm": 1.792878270149231, "learning_rate": 1.1233967676096693e-05, "loss": 1.4656, "step": 9017 }, { "epoch": 0.49303609748096716, "grad_norm": 1.4197098016738892, "learning_rate": 1.1232154263948558e-05, "loss": 1.5731, "step": 9018 }, { "epoch": 0.4930907699246887, "grad_norm": 1.41350257396698, "learning_rate": 1.12303408106559e-05, "loss": 1.291, "step": 9019 }, { "epoch": 0.49314544236841024, "grad_norm": 1.542455792427063, "learning_rate": 1.1228527316279273e-05, "loss": 1.2931, "step": 9020 }, { "epoch": 0.49320011481213183, "grad_norm": 1.3863908052444458, "learning_rate": 1.1226713780879236e-05, "loss": 1.3522, "step": 9021 }, { "epoch": 0.49325478725585337, "grad_norm": 1.4681600332260132, "learning_rate": 1.122490020451635e-05, "loss": 1.234, "step": 9022 }, { "epoch": 0.4933094596995749, "grad_norm": 1.3270893096923828, "learning_rate": 1.122308658725117e-05, "loss": 1.2472, "step": 9023 }, { "epoch": 0.4933641321432965, "grad_norm": 1.4239400625228882, "learning_rate": 1.122127292914426e-05, "loss": 1.5279, "step": 9024 }, { "epoch": 0.49341880458701803, "grad_norm": 1.302234172821045, "learning_rate": 1.1219459230256182e-05, "loss": 1.4061, "step": 9025 }, { "epoch": 0.49347347703073957, "grad_norm": 1.5694916248321533, "learning_rate": 1.1217645490647494e-05, "loss": 1.4414, "step": 9026 }, { "epoch": 0.4935281494744611, "grad_norm": 1.8723273277282715, "learning_rate": 1.1215831710378772e-05, "loss": 1.4385, "step": 9027 }, { "epoch": 0.4935828219181827, "grad_norm": 1.636615514755249, "learning_rate": 1.1214017889510573e-05, "loss": 1.2825, "step": 9028 }, { "epoch": 0.49363749436190424, "grad_norm": 1.839158535003662, "learning_rate": 1.1212204028103469e-05, "loss": 1.1965, "step": 9029 }, { "epoch": 0.4936921668056258, "grad_norm": 1.702659249305725, "learning_rate": 1.1210390126218024e-05, "loss": 1.4018, "step": 9030 }, { "epoch": 0.49374683924934737, "grad_norm": 1.5178446769714355, "learning_rate": 1.1208576183914816e-05, "loss": 1.2794, "step": 9031 }, { "epoch": 0.4938015116930689, "grad_norm": 1.4694486856460571, "learning_rate": 1.1206762201254415e-05, "loss": 1.4043, "step": 9032 }, { "epoch": 0.49385618413679044, "grad_norm": 1.6776901483535767, "learning_rate": 1.1204948178297387e-05, "loss": 1.4053, "step": 9033 }, { "epoch": 0.493910856580512, "grad_norm": 1.3708178997039795, "learning_rate": 1.1203134115104318e-05, "loss": 1.3571, "step": 9034 }, { "epoch": 0.4939655290242336, "grad_norm": 1.5647475719451904, "learning_rate": 1.1201320011735772e-05, "loss": 1.4963, "step": 9035 }, { "epoch": 0.4940202014679551, "grad_norm": 2.1507041454315186, "learning_rate": 1.1199505868252336e-05, "loss": 1.4972, "step": 9036 }, { "epoch": 0.49407487391167665, "grad_norm": 1.3057844638824463, "learning_rate": 1.1197691684714582e-05, "loss": 1.51, "step": 9037 }, { "epoch": 0.49412954635539824, "grad_norm": 1.5054457187652588, "learning_rate": 1.1195877461183091e-05, "loss": 1.2186, "step": 9038 }, { "epoch": 0.4941842187991198, "grad_norm": 1.608855128288269, "learning_rate": 1.119406319771845e-05, "loss": 1.4645, "step": 9039 }, { "epoch": 0.4942388912428413, "grad_norm": 1.6185106039047241, "learning_rate": 1.1192248894381234e-05, "loss": 1.356, "step": 9040 }, { "epoch": 0.49429356368656285, "grad_norm": 1.5696464776992798, "learning_rate": 1.119043455123203e-05, "loss": 1.4141, "step": 9041 }, { "epoch": 0.49434823613028445, "grad_norm": 2.347327947616577, "learning_rate": 1.1188620168331421e-05, "loss": 1.2103, "step": 9042 }, { "epoch": 0.494402908574006, "grad_norm": 1.3986326456069946, "learning_rate": 1.1186805745739999e-05, "loss": 1.2092, "step": 9043 }, { "epoch": 0.4944575810177275, "grad_norm": 1.4172024726867676, "learning_rate": 1.1184991283518346e-05, "loss": 1.2411, "step": 9044 }, { "epoch": 0.4945122534614491, "grad_norm": 2.408799171447754, "learning_rate": 1.1183176781727052e-05, "loss": 1.3611, "step": 9045 }, { "epoch": 0.49456692590517065, "grad_norm": 1.3342657089233398, "learning_rate": 1.1181362240426711e-05, "loss": 1.4051, "step": 9046 }, { "epoch": 0.4946215983488922, "grad_norm": 1.631804347038269, "learning_rate": 1.1179547659677915e-05, "loss": 1.5539, "step": 9047 }, { "epoch": 0.4946762707926137, "grad_norm": 1.49224054813385, "learning_rate": 1.117773303954125e-05, "loss": 1.3153, "step": 9048 }, { "epoch": 0.4947309432363353, "grad_norm": 1.7186802625656128, "learning_rate": 1.1175918380077316e-05, "loss": 1.4179, "step": 9049 }, { "epoch": 0.49478561568005686, "grad_norm": 1.4532610177993774, "learning_rate": 1.1174103681346711e-05, "loss": 1.4735, "step": 9050 }, { "epoch": 0.4948402881237784, "grad_norm": 1.7488905191421509, "learning_rate": 1.1172288943410025e-05, "loss": 1.748, "step": 9051 }, { "epoch": 0.4948949605675, "grad_norm": 1.641648769378662, "learning_rate": 1.117047416632786e-05, "loss": 1.5461, "step": 9052 }, { "epoch": 0.4949496330112215, "grad_norm": 1.4916447401046753, "learning_rate": 1.116865935016082e-05, "loss": 1.5816, "step": 9053 }, { "epoch": 0.49500430545494306, "grad_norm": 1.3967331647872925, "learning_rate": 1.1166844494969501e-05, "loss": 1.4214, "step": 9054 }, { "epoch": 0.4950589778986646, "grad_norm": 1.3990150690078735, "learning_rate": 1.1165029600814505e-05, "loss": 1.3546, "step": 9055 }, { "epoch": 0.4951136503423862, "grad_norm": 1.5502359867095947, "learning_rate": 1.1163214667756437e-05, "loss": 1.2877, "step": 9056 }, { "epoch": 0.49516832278610773, "grad_norm": 1.3613395690917969, "learning_rate": 1.1161399695855903e-05, "loss": 1.4516, "step": 9057 }, { "epoch": 0.49522299522982927, "grad_norm": 1.3557018041610718, "learning_rate": 1.1159584685173506e-05, "loss": 1.2999, "step": 9058 }, { "epoch": 0.49527766767355086, "grad_norm": 2.029775381088257, "learning_rate": 1.1157769635769857e-05, "loss": 1.5577, "step": 9059 }, { "epoch": 0.4953323401172724, "grad_norm": 1.7303380966186523, "learning_rate": 1.1155954547705563e-05, "loss": 1.4636, "step": 9060 }, { "epoch": 0.49538701256099393, "grad_norm": 1.4984532594680786, "learning_rate": 1.1154139421041232e-05, "loss": 1.4863, "step": 9061 }, { "epoch": 0.49544168500471547, "grad_norm": 1.3659262657165527, "learning_rate": 1.115232425583748e-05, "loss": 1.4756, "step": 9062 }, { "epoch": 0.49549635744843706, "grad_norm": 1.3920551538467407, "learning_rate": 1.1150509052154913e-05, "loss": 1.3389, "step": 9063 }, { "epoch": 0.4955510298921586, "grad_norm": 1.6163356304168701, "learning_rate": 1.1148693810054152e-05, "loss": 1.4985, "step": 9064 }, { "epoch": 0.49560570233588014, "grad_norm": 1.865308165550232, "learning_rate": 1.1146878529595808e-05, "loss": 1.5803, "step": 9065 }, { "epoch": 0.49566037477960173, "grad_norm": 2.0961813926696777, "learning_rate": 1.11450632108405e-05, "loss": 1.6006, "step": 9066 }, { "epoch": 0.49571504722332327, "grad_norm": 1.5178142786026, "learning_rate": 1.1143247853848846e-05, "loss": 1.4628, "step": 9067 }, { "epoch": 0.4957697196670448, "grad_norm": 1.615357518196106, "learning_rate": 1.114143245868146e-05, "loss": 1.2844, "step": 9068 }, { "epoch": 0.4958243921107664, "grad_norm": 1.4774140119552612, "learning_rate": 1.1139617025398968e-05, "loss": 1.2911, "step": 9069 }, { "epoch": 0.49587906455448794, "grad_norm": 1.3272842168807983, "learning_rate": 1.1137801554061987e-05, "loss": 1.5982, "step": 9070 }, { "epoch": 0.4959337369982095, "grad_norm": 1.5868749618530273, "learning_rate": 1.1135986044731144e-05, "loss": 1.3711, "step": 9071 }, { "epoch": 0.495988409441931, "grad_norm": 1.471232295036316, "learning_rate": 1.113417049746706e-05, "loss": 1.3595, "step": 9072 }, { "epoch": 0.4960430818856526, "grad_norm": 1.3440427780151367, "learning_rate": 1.1132354912330366e-05, "loss": 1.4187, "step": 9073 }, { "epoch": 0.49609775432937414, "grad_norm": 1.7721571922302246, "learning_rate": 1.113053928938168e-05, "loss": 1.3999, "step": 9074 }, { "epoch": 0.4961524267730957, "grad_norm": 1.7270853519439697, "learning_rate": 1.1128723628681635e-05, "loss": 1.6087, "step": 9075 }, { "epoch": 0.49620709921681727, "grad_norm": 1.4347931146621704, "learning_rate": 1.1126907930290861e-05, "loss": 1.6267, "step": 9076 }, { "epoch": 0.4962617716605388, "grad_norm": 1.1912896633148193, "learning_rate": 1.1125092194269982e-05, "loss": 1.4387, "step": 9077 }, { "epoch": 0.49631644410426035, "grad_norm": 1.3600553274154663, "learning_rate": 1.112327642067964e-05, "loss": 1.549, "step": 9078 }, { "epoch": 0.4963711165479819, "grad_norm": 1.4214699268341064, "learning_rate": 1.1121460609580461e-05, "loss": 1.4667, "step": 9079 }, { "epoch": 0.4964257889917035, "grad_norm": 1.8480945825576782, "learning_rate": 1.1119644761033079e-05, "loss": 1.5709, "step": 9080 }, { "epoch": 0.496480461435425, "grad_norm": 1.1064337491989136, "learning_rate": 1.1117828875098136e-05, "loss": 1.4473, "step": 9081 }, { "epoch": 0.49653513387914655, "grad_norm": 1.270141839981079, "learning_rate": 1.1116012951836257e-05, "loss": 1.4801, "step": 9082 }, { "epoch": 0.49658980632286814, "grad_norm": 1.6380950212478638, "learning_rate": 1.1114196991308091e-05, "loss": 1.5621, "step": 9083 }, { "epoch": 0.4966444787665897, "grad_norm": 1.4206279516220093, "learning_rate": 1.111238099357427e-05, "loss": 1.3893, "step": 9084 }, { "epoch": 0.4966991512103112, "grad_norm": 1.6913789510726929, "learning_rate": 1.111056495869544e-05, "loss": 1.5403, "step": 9085 }, { "epoch": 0.49675382365403276, "grad_norm": 1.4083566665649414, "learning_rate": 1.1108748886732239e-05, "loss": 1.4262, "step": 9086 }, { "epoch": 0.49680849609775435, "grad_norm": 1.7631105184555054, "learning_rate": 1.110693277774531e-05, "loss": 1.2479, "step": 9087 }, { "epoch": 0.4968631685414759, "grad_norm": 1.5675758123397827, "learning_rate": 1.1105116631795301e-05, "loss": 1.4117, "step": 9088 }, { "epoch": 0.4969178409851974, "grad_norm": 1.7802481651306152, "learning_rate": 1.110330044894285e-05, "loss": 1.5414, "step": 9089 }, { "epoch": 0.496972513428919, "grad_norm": 1.5569806098937988, "learning_rate": 1.1101484229248612e-05, "loss": 1.4352, "step": 9090 }, { "epoch": 0.49702718587264055, "grad_norm": 1.6333914995193481, "learning_rate": 1.1099667972773228e-05, "loss": 1.5928, "step": 9091 }, { "epoch": 0.4970818583163621, "grad_norm": 1.678789734840393, "learning_rate": 1.1097851679577351e-05, "loss": 1.4488, "step": 9092 }, { "epoch": 0.49713653076008363, "grad_norm": 1.5255663394927979, "learning_rate": 1.1096035349721633e-05, "loss": 1.4645, "step": 9093 }, { "epoch": 0.4971912032038052, "grad_norm": 1.2110121250152588, "learning_rate": 1.1094218983266718e-05, "loss": 1.498, "step": 9094 }, { "epoch": 0.49724587564752676, "grad_norm": 1.4017770290374756, "learning_rate": 1.1092402580273268e-05, "loss": 1.5361, "step": 9095 }, { "epoch": 0.4973005480912483, "grad_norm": 1.7051507234573364, "learning_rate": 1.109058614080193e-05, "loss": 1.4172, "step": 9096 }, { "epoch": 0.4973552205349699, "grad_norm": 1.3362115621566772, "learning_rate": 1.1088769664913359e-05, "loss": 1.5086, "step": 9097 }, { "epoch": 0.4974098929786914, "grad_norm": 1.5123090744018555, "learning_rate": 1.1086953152668218e-05, "loss": 1.2094, "step": 9098 }, { "epoch": 0.49746456542241296, "grad_norm": 2.0252294540405273, "learning_rate": 1.1085136604127161e-05, "loss": 1.2333, "step": 9099 }, { "epoch": 0.4975192378661345, "grad_norm": 1.1901212930679321, "learning_rate": 1.1083320019350845e-05, "loss": 1.5521, "step": 9100 }, { "epoch": 0.4975739103098561, "grad_norm": 1.6860564947128296, "learning_rate": 1.108150339839993e-05, "loss": 1.3691, "step": 9101 }, { "epoch": 0.49762858275357763, "grad_norm": 1.7642024755477905, "learning_rate": 1.107968674133508e-05, "loss": 1.5044, "step": 9102 }, { "epoch": 0.49768325519729917, "grad_norm": 1.6698628664016724, "learning_rate": 1.1077870048216957e-05, "loss": 1.3308, "step": 9103 }, { "epoch": 0.49773792764102076, "grad_norm": 1.6568622589111328, "learning_rate": 1.107605331910622e-05, "loss": 1.3293, "step": 9104 }, { "epoch": 0.4977926000847423, "grad_norm": 1.5304447412490845, "learning_rate": 1.1074236554063543e-05, "loss": 1.5103, "step": 9105 }, { "epoch": 0.49784727252846384, "grad_norm": 1.3208136558532715, "learning_rate": 1.1072419753149585e-05, "loss": 1.4167, "step": 9106 }, { "epoch": 0.4979019449721854, "grad_norm": 1.520240306854248, "learning_rate": 1.1070602916425013e-05, "loss": 1.1446, "step": 9107 }, { "epoch": 0.49795661741590697, "grad_norm": 1.6496107578277588, "learning_rate": 1.1068786043950496e-05, "loss": 1.2545, "step": 9108 }, { "epoch": 0.4980112898596285, "grad_norm": 1.431809425354004, "learning_rate": 1.1066969135786711e-05, "loss": 1.4792, "step": 9109 }, { "epoch": 0.49806596230335004, "grad_norm": 1.2086946964263916, "learning_rate": 1.1065152191994318e-05, "loss": 1.147, "step": 9110 }, { "epoch": 0.49812063474707163, "grad_norm": 1.6467205286026, "learning_rate": 1.1063335212633996e-05, "loss": 1.1695, "step": 9111 }, { "epoch": 0.49817530719079317, "grad_norm": 1.8861743211746216, "learning_rate": 1.1061518197766415e-05, "loss": 1.2169, "step": 9112 }, { "epoch": 0.4982299796345147, "grad_norm": 1.770715355873108, "learning_rate": 1.105970114745225e-05, "loss": 1.1228, "step": 9113 }, { "epoch": 0.49828465207823625, "grad_norm": 1.351383090019226, "learning_rate": 1.1057884061752177e-05, "loss": 1.5997, "step": 9114 }, { "epoch": 0.49833932452195784, "grad_norm": 1.4457459449768066, "learning_rate": 1.1056066940726874e-05, "loss": 1.4837, "step": 9115 }, { "epoch": 0.4983939969656794, "grad_norm": 1.3363070487976074, "learning_rate": 1.1054249784437018e-05, "loss": 1.3998, "step": 9116 }, { "epoch": 0.4984486694094009, "grad_norm": 1.4282976388931274, "learning_rate": 1.1052432592943287e-05, "loss": 1.147, "step": 9117 }, { "epoch": 0.4985033418531225, "grad_norm": 1.3158029317855835, "learning_rate": 1.1050615366306364e-05, "loss": 1.6013, "step": 9118 }, { "epoch": 0.49855801429684404, "grad_norm": 1.583696722984314, "learning_rate": 1.1048798104586923e-05, "loss": 1.4873, "step": 9119 }, { "epoch": 0.4986126867405656, "grad_norm": 1.6619316339492798, "learning_rate": 1.104698080784566e-05, "loss": 1.4396, "step": 9120 }, { "epoch": 0.4986673591842871, "grad_norm": 1.2742494344711304, "learning_rate": 1.1045163476143247e-05, "loss": 1.509, "step": 9121 }, { "epoch": 0.4987220316280087, "grad_norm": 1.6757882833480835, "learning_rate": 1.104334610954037e-05, "loss": 1.467, "step": 9122 }, { "epoch": 0.49877670407173025, "grad_norm": 1.8435096740722656, "learning_rate": 1.1041528708097722e-05, "loss": 1.3812, "step": 9123 }, { "epoch": 0.4988313765154518, "grad_norm": 1.4071056842803955, "learning_rate": 1.1039711271875986e-05, "loss": 1.4868, "step": 9124 }, { "epoch": 0.4988860489591734, "grad_norm": 1.5839967727661133, "learning_rate": 1.1037893800935851e-05, "loss": 1.2141, "step": 9125 }, { "epoch": 0.4989407214028949, "grad_norm": 1.882143497467041, "learning_rate": 1.1036076295338005e-05, "loss": 1.5257, "step": 9126 }, { "epoch": 0.49899539384661645, "grad_norm": 1.419055700302124, "learning_rate": 1.1034258755143141e-05, "loss": 1.3125, "step": 9127 }, { "epoch": 0.499050066290338, "grad_norm": 1.4257692098617554, "learning_rate": 1.103244118041195e-05, "loss": 1.5785, "step": 9128 }, { "epoch": 0.4991047387340596, "grad_norm": 1.3923879861831665, "learning_rate": 1.1030623571205125e-05, "loss": 1.3884, "step": 9129 }, { "epoch": 0.4991594111777811, "grad_norm": 1.278900146484375, "learning_rate": 1.1028805927583362e-05, "loss": 1.5264, "step": 9130 }, { "epoch": 0.49921408362150266, "grad_norm": 1.4669437408447266, "learning_rate": 1.1026988249607355e-05, "loss": 1.5076, "step": 9131 }, { "epoch": 0.49926875606522425, "grad_norm": 1.4127031564712524, "learning_rate": 1.1025170537337799e-05, "loss": 1.3553, "step": 9132 }, { "epoch": 0.4993234285089458, "grad_norm": 1.7936440706253052, "learning_rate": 1.1023352790835393e-05, "loss": 1.3732, "step": 9133 }, { "epoch": 0.4993781009526673, "grad_norm": 1.4196499586105347, "learning_rate": 1.1021535010160838e-05, "loss": 1.4011, "step": 9134 }, { "epoch": 0.49943277339638886, "grad_norm": 1.5440324544906616, "learning_rate": 1.1019717195374828e-05, "loss": 1.4089, "step": 9135 }, { "epoch": 0.49948744584011046, "grad_norm": 1.5091913938522339, "learning_rate": 1.1017899346538071e-05, "loss": 1.5824, "step": 9136 }, { "epoch": 0.499542118283832, "grad_norm": 1.23623526096344, "learning_rate": 1.1016081463711266e-05, "loss": 1.4588, "step": 9137 }, { "epoch": 0.49959679072755353, "grad_norm": 1.6922953128814697, "learning_rate": 1.1014263546955118e-05, "loss": 1.5044, "step": 9138 }, { "epoch": 0.4996514631712751, "grad_norm": 1.6195347309112549, "learning_rate": 1.1012445596330327e-05, "loss": 1.4709, "step": 9139 }, { "epoch": 0.49970613561499666, "grad_norm": 1.3043804168701172, "learning_rate": 1.1010627611897602e-05, "loss": 1.5164, "step": 9140 }, { "epoch": 0.4997608080587182, "grad_norm": 1.3322519063949585, "learning_rate": 1.1008809593717653e-05, "loss": 1.6986, "step": 9141 }, { "epoch": 0.49981548050243974, "grad_norm": 1.908170223236084, "learning_rate": 1.100699154185118e-05, "loss": 1.2919, "step": 9142 }, { "epoch": 0.49987015294616133, "grad_norm": 1.3069998025894165, "learning_rate": 1.1005173456358898e-05, "loss": 1.4007, "step": 9143 }, { "epoch": 0.49992482538988287, "grad_norm": 1.9527990818023682, "learning_rate": 1.1003355337301517e-05, "loss": 1.1708, "step": 9144 }, { "epoch": 0.4999794978336044, "grad_norm": 1.4292675256729126, "learning_rate": 1.1001537184739748e-05, "loss": 1.5634, "step": 9145 }, { "epoch": 0.500034170277326, "grad_norm": 1.8104937076568604, "learning_rate": 1.0999718998734298e-05, "loss": 1.4255, "step": 9146 }, { "epoch": 0.5000888427210475, "grad_norm": 1.3769959211349487, "learning_rate": 1.0997900779345887e-05, "loss": 1.4897, "step": 9147 }, { "epoch": 0.5001435151647691, "grad_norm": 1.3664121627807617, "learning_rate": 1.0996082526635227e-05, "loss": 1.4065, "step": 9148 }, { "epoch": 0.5001981876084907, "grad_norm": 1.2719597816467285, "learning_rate": 1.0994264240663035e-05, "loss": 1.3907, "step": 9149 }, { "epoch": 0.5002528600522121, "grad_norm": 1.5426864624023438, "learning_rate": 1.0992445921490026e-05, "loss": 1.3048, "step": 9150 }, { "epoch": 0.5003075324959337, "grad_norm": 1.2532882690429688, "learning_rate": 1.0990627569176921e-05, "loss": 1.3576, "step": 9151 }, { "epoch": 0.5003622049396553, "grad_norm": 1.4881041049957275, "learning_rate": 1.0988809183784435e-05, "loss": 1.4654, "step": 9152 }, { "epoch": 0.5004168773833768, "grad_norm": 1.3694936037063599, "learning_rate": 1.0986990765373289e-05, "loss": 1.4539, "step": 9153 }, { "epoch": 0.5004715498270984, "grad_norm": 1.3210910558700562, "learning_rate": 1.0985172314004205e-05, "loss": 1.6179, "step": 9154 }, { "epoch": 0.50052622227082, "grad_norm": 1.2879568338394165, "learning_rate": 1.0983353829737909e-05, "loss": 1.4478, "step": 9155 }, { "epoch": 0.5005808947145415, "grad_norm": 1.5447138547897339, "learning_rate": 1.0981535312635116e-05, "loss": 1.2258, "step": 9156 }, { "epoch": 0.5006355671582631, "grad_norm": 1.4438856840133667, "learning_rate": 1.097971676275656e-05, "loss": 1.5324, "step": 9157 }, { "epoch": 0.5006902396019846, "grad_norm": 1.672016978263855, "learning_rate": 1.0977898180162962e-05, "loss": 1.4789, "step": 9158 }, { "epoch": 0.5007449120457061, "grad_norm": 1.619435429573059, "learning_rate": 1.0976079564915047e-05, "loss": 1.5182, "step": 9159 }, { "epoch": 0.5007995844894277, "grad_norm": 1.5820485353469849, "learning_rate": 1.0974260917073548e-05, "loss": 1.4034, "step": 9160 }, { "epoch": 0.5008542569331492, "grad_norm": 1.3367723226547241, "learning_rate": 1.0972442236699186e-05, "loss": 1.6962, "step": 9161 }, { "epoch": 0.5009089293768708, "grad_norm": 1.7843366861343384, "learning_rate": 1.0970623523852699e-05, "loss": 1.3142, "step": 9162 }, { "epoch": 0.5009636018205924, "grad_norm": 1.3612791299819946, "learning_rate": 1.0968804778594815e-05, "loss": 1.4583, "step": 9163 }, { "epoch": 0.5010182742643139, "grad_norm": 1.5678991079330444, "learning_rate": 1.0966986000986267e-05, "loss": 1.3887, "step": 9164 }, { "epoch": 0.5010729467080355, "grad_norm": 1.3598434925079346, "learning_rate": 1.0965167191087786e-05, "loss": 1.4362, "step": 9165 }, { "epoch": 0.5011276191517571, "grad_norm": 1.257779836654663, "learning_rate": 1.0963348348960109e-05, "loss": 1.4134, "step": 9166 }, { "epoch": 0.5011822915954786, "grad_norm": 1.5769251585006714, "learning_rate": 1.0961529474663966e-05, "loss": 1.5783, "step": 9167 }, { "epoch": 0.5012369640392002, "grad_norm": 1.5245660543441772, "learning_rate": 1.0959710568260098e-05, "loss": 1.3153, "step": 9168 }, { "epoch": 0.5012916364829217, "grad_norm": 2.9814720153808594, "learning_rate": 1.0957891629809248e-05, "loss": 1.3482, "step": 9169 }, { "epoch": 0.5013463089266432, "grad_norm": 1.6343774795532227, "learning_rate": 1.0956072659372144e-05, "loss": 1.4065, "step": 9170 }, { "epoch": 0.5014009813703648, "grad_norm": 1.4008146524429321, "learning_rate": 1.0954253657009531e-05, "loss": 1.7087, "step": 9171 }, { "epoch": 0.5014556538140864, "grad_norm": 1.4108036756515503, "learning_rate": 1.095243462278215e-05, "loss": 1.4031, "step": 9172 }, { "epoch": 0.5015103262578079, "grad_norm": 1.599269986152649, "learning_rate": 1.095061555675074e-05, "loss": 1.4477, "step": 9173 }, { "epoch": 0.5015649987015295, "grad_norm": 1.9476033449172974, "learning_rate": 1.0948796458976048e-05, "loss": 1.4123, "step": 9174 }, { "epoch": 0.501619671145251, "grad_norm": 1.5432026386260986, "learning_rate": 1.0946977329518813e-05, "loss": 1.4152, "step": 9175 }, { "epoch": 0.5016743435889726, "grad_norm": 1.3361787796020508, "learning_rate": 1.0945158168439786e-05, "loss": 1.3024, "step": 9176 }, { "epoch": 0.5017290160326942, "grad_norm": 1.363503098487854, "learning_rate": 1.094333897579971e-05, "loss": 1.399, "step": 9177 }, { "epoch": 0.5017836884764156, "grad_norm": 1.3344775438308716, "learning_rate": 1.094151975165933e-05, "loss": 1.37, "step": 9178 }, { "epoch": 0.5018383609201372, "grad_norm": 1.2415635585784912, "learning_rate": 1.0939700496079399e-05, "loss": 1.5286, "step": 9179 }, { "epoch": 0.5018930333638588, "grad_norm": 1.4196304082870483, "learning_rate": 1.093788120912066e-05, "loss": 1.2598, "step": 9180 }, { "epoch": 0.5019477058075803, "grad_norm": 1.2729747295379639, "learning_rate": 1.0936061890843868e-05, "loss": 1.4682, "step": 9181 }, { "epoch": 0.5020023782513019, "grad_norm": 1.7135388851165771, "learning_rate": 1.0934242541309772e-05, "loss": 1.4693, "step": 9182 }, { "epoch": 0.5020570506950235, "grad_norm": 1.4809695482254028, "learning_rate": 1.0932423160579126e-05, "loss": 1.2332, "step": 9183 }, { "epoch": 0.502111723138745, "grad_norm": 1.7416454553604126, "learning_rate": 1.0930603748712685e-05, "loss": 1.2576, "step": 9184 }, { "epoch": 0.5021663955824666, "grad_norm": 1.5219972133636475, "learning_rate": 1.0928784305771199e-05, "loss": 1.4459, "step": 9185 }, { "epoch": 0.5022210680261882, "grad_norm": 1.512986183166504, "learning_rate": 1.0926964831815425e-05, "loss": 1.5639, "step": 9186 }, { "epoch": 0.5022757404699096, "grad_norm": 1.7153574228286743, "learning_rate": 1.0925145326906121e-05, "loss": 1.2635, "step": 9187 }, { "epoch": 0.5023304129136312, "grad_norm": 1.2067817449569702, "learning_rate": 1.0923325791104042e-05, "loss": 1.5515, "step": 9188 }, { "epoch": 0.5023850853573527, "grad_norm": 1.2703850269317627, "learning_rate": 1.0921506224469952e-05, "loss": 1.4463, "step": 9189 }, { "epoch": 0.5024397578010743, "grad_norm": 1.4821091890335083, "learning_rate": 1.0919686627064603e-05, "loss": 1.3053, "step": 9190 }, { "epoch": 0.5024944302447959, "grad_norm": 1.7214113473892212, "learning_rate": 1.091786699894876e-05, "loss": 1.5185, "step": 9191 }, { "epoch": 0.5025491026885174, "grad_norm": 1.5352967977523804, "learning_rate": 1.0916047340183184e-05, "loss": 1.2333, "step": 9192 }, { "epoch": 0.502603775132239, "grad_norm": 1.818145751953125, "learning_rate": 1.091422765082864e-05, "loss": 1.4243, "step": 9193 }, { "epoch": 0.5026584475759606, "grad_norm": 1.3082342147827148, "learning_rate": 1.0912407930945887e-05, "loss": 1.3721, "step": 9194 }, { "epoch": 0.502713120019682, "grad_norm": 1.8295300006866455, "learning_rate": 1.0910588180595692e-05, "loss": 1.5603, "step": 9195 }, { "epoch": 0.5027677924634036, "grad_norm": 1.3757150173187256, "learning_rate": 1.0908768399838823e-05, "loss": 1.3177, "step": 9196 }, { "epoch": 0.5028224649071252, "grad_norm": 1.3429417610168457, "learning_rate": 1.0906948588736044e-05, "loss": 1.5364, "step": 9197 }, { "epoch": 0.5028771373508467, "grad_norm": 1.3577241897583008, "learning_rate": 1.0905128747348121e-05, "loss": 1.9358, "step": 9198 }, { "epoch": 0.5029318097945683, "grad_norm": 1.6328468322753906, "learning_rate": 1.0903308875735827e-05, "loss": 1.0816, "step": 9199 }, { "epoch": 0.5029864822382899, "grad_norm": 1.3817944526672363, "learning_rate": 1.0901488973959933e-05, "loss": 1.5441, "step": 9200 }, { "epoch": 0.5030411546820114, "grad_norm": 1.4567596912384033, "learning_rate": 1.0899669042081202e-05, "loss": 1.5678, "step": 9201 }, { "epoch": 0.503095827125733, "grad_norm": 1.895716667175293, "learning_rate": 1.0897849080160411e-05, "loss": 1.6464, "step": 9202 }, { "epoch": 0.5031504995694545, "grad_norm": 1.32338547706604, "learning_rate": 1.0896029088258336e-05, "loss": 1.8332, "step": 9203 }, { "epoch": 0.503205172013176, "grad_norm": 1.676030158996582, "learning_rate": 1.0894209066435746e-05, "loss": 1.5642, "step": 9204 }, { "epoch": 0.5032598444568976, "grad_norm": 1.2920998334884644, "learning_rate": 1.0892389014753412e-05, "loss": 1.3674, "step": 9205 }, { "epoch": 0.5033145169006191, "grad_norm": 1.5025438070297241, "learning_rate": 1.0890568933272119e-05, "loss": 1.2247, "step": 9206 }, { "epoch": 0.5033691893443407, "grad_norm": 1.4308288097381592, "learning_rate": 1.0888748822052642e-05, "loss": 1.169, "step": 9207 }, { "epoch": 0.5034238617880623, "grad_norm": 1.6747183799743652, "learning_rate": 1.088692868115575e-05, "loss": 1.5181, "step": 9208 }, { "epoch": 0.5034785342317838, "grad_norm": 1.615469217300415, "learning_rate": 1.0885108510642232e-05, "loss": 1.5832, "step": 9209 }, { "epoch": 0.5035332066755054, "grad_norm": 1.4734227657318115, "learning_rate": 1.0883288310572863e-05, "loss": 1.2426, "step": 9210 }, { "epoch": 0.503587879119227, "grad_norm": 1.7072020769119263, "learning_rate": 1.0881468081008428e-05, "loss": 1.2638, "step": 9211 }, { "epoch": 0.5036425515629485, "grad_norm": 1.3590377569198608, "learning_rate": 1.08796478220097e-05, "loss": 1.2497, "step": 9212 }, { "epoch": 0.50369722400667, "grad_norm": 1.7926315069198608, "learning_rate": 1.0877827533637469e-05, "loss": 1.3971, "step": 9213 }, { "epoch": 0.5037518964503916, "grad_norm": 1.5071686506271362, "learning_rate": 1.0876007215952518e-05, "loss": 1.3489, "step": 9214 }, { "epoch": 0.5038065688941131, "grad_norm": 1.4712698459625244, "learning_rate": 1.0874186869015627e-05, "loss": 1.58, "step": 9215 }, { "epoch": 0.5038612413378347, "grad_norm": 1.3748517036437988, "learning_rate": 1.0872366492887591e-05, "loss": 1.712, "step": 9216 }, { "epoch": 0.5039159137815562, "grad_norm": 1.3499858379364014, "learning_rate": 1.0870546087629185e-05, "loss": 1.2818, "step": 9217 }, { "epoch": 0.5039705862252778, "grad_norm": 1.6537336111068726, "learning_rate": 1.0868725653301206e-05, "loss": 1.2643, "step": 9218 }, { "epoch": 0.5040252586689994, "grad_norm": 1.584787130355835, "learning_rate": 1.0866905189964438e-05, "loss": 1.4374, "step": 9219 }, { "epoch": 0.5040799311127209, "grad_norm": 2.097658634185791, "learning_rate": 1.0865084697679671e-05, "loss": 1.5015, "step": 9220 }, { "epoch": 0.5041346035564425, "grad_norm": 1.738918662071228, "learning_rate": 1.0863264176507695e-05, "loss": 1.351, "step": 9221 }, { "epoch": 0.504189276000164, "grad_norm": 1.6787900924682617, "learning_rate": 1.0861443626509305e-05, "loss": 1.2749, "step": 9222 }, { "epoch": 0.5042439484438855, "grad_norm": 1.637730598449707, "learning_rate": 1.0859623047745289e-05, "loss": 1.2945, "step": 9223 }, { "epoch": 0.5042986208876071, "grad_norm": 1.6038581132888794, "learning_rate": 1.0857802440276442e-05, "loss": 1.3446, "step": 9224 }, { "epoch": 0.5043532933313287, "grad_norm": 1.4295904636383057, "learning_rate": 1.085598180416356e-05, "loss": 1.5782, "step": 9225 }, { "epoch": 0.5044079657750502, "grad_norm": 1.5738162994384766, "learning_rate": 1.0854161139467436e-05, "loss": 1.3139, "step": 9226 }, { "epoch": 0.5044626382187718, "grad_norm": 1.4930096864700317, "learning_rate": 1.0852340446248867e-05, "loss": 1.522, "step": 9227 }, { "epoch": 0.5045173106624934, "grad_norm": 1.8793680667877197, "learning_rate": 1.0850519724568652e-05, "loss": 1.4408, "step": 9228 }, { "epoch": 0.5045719831062149, "grad_norm": 1.4466437101364136, "learning_rate": 1.0848698974487585e-05, "loss": 1.3216, "step": 9229 }, { "epoch": 0.5046266555499365, "grad_norm": 1.4407943487167358, "learning_rate": 1.0846878196066468e-05, "loss": 1.5974, "step": 9230 }, { "epoch": 0.504681327993658, "grad_norm": 1.3724273443222046, "learning_rate": 1.0845057389366102e-05, "loss": 1.6334, "step": 9231 }, { "epoch": 0.5047360004373795, "grad_norm": 1.794918179512024, "learning_rate": 1.0843236554447288e-05, "loss": 1.597, "step": 9232 }, { "epoch": 0.5047906728811011, "grad_norm": 1.682470440864563, "learning_rate": 1.0841415691370825e-05, "loss": 1.3429, "step": 9233 }, { "epoch": 0.5048453453248226, "grad_norm": 1.6406848430633545, "learning_rate": 1.0839594800197516e-05, "loss": 1.2286, "step": 9234 }, { "epoch": 0.5049000177685442, "grad_norm": 1.4210604429244995, "learning_rate": 1.083777388098817e-05, "loss": 1.5445, "step": 9235 }, { "epoch": 0.5049546902122658, "grad_norm": 1.4759691953659058, "learning_rate": 1.0835952933803588e-05, "loss": 1.2183, "step": 9236 }, { "epoch": 0.5050093626559873, "grad_norm": 1.6857409477233887, "learning_rate": 1.0834131958704575e-05, "loss": 1.4164, "step": 9237 }, { "epoch": 0.5050640350997089, "grad_norm": 1.4324668645858765, "learning_rate": 1.0832310955751937e-05, "loss": 1.4467, "step": 9238 }, { "epoch": 0.5051187075434305, "grad_norm": 1.6722524166107178, "learning_rate": 1.0830489925006485e-05, "loss": 1.4308, "step": 9239 }, { "epoch": 0.505173379987152, "grad_norm": 1.9677289724349976, "learning_rate": 1.0828668866529024e-05, "loss": 1.4214, "step": 9240 }, { "epoch": 0.5052280524308735, "grad_norm": 1.7233290672302246, "learning_rate": 1.0826847780380365e-05, "loss": 1.3361, "step": 9241 }, { "epoch": 0.5052827248745951, "grad_norm": 1.1106672286987305, "learning_rate": 1.0825026666621321e-05, "loss": 1.5284, "step": 9242 }, { "epoch": 0.5053373973183166, "grad_norm": 1.2709344625473022, "learning_rate": 1.0823205525312699e-05, "loss": 1.6344, "step": 9243 }, { "epoch": 0.5053920697620382, "grad_norm": 1.5729376077651978, "learning_rate": 1.082138435651531e-05, "loss": 1.3946, "step": 9244 }, { "epoch": 0.5054467422057597, "grad_norm": 1.7237467765808105, "learning_rate": 1.0819563160289975e-05, "loss": 1.4483, "step": 9245 }, { "epoch": 0.5055014146494813, "grad_norm": 1.9707900285720825, "learning_rate": 1.0817741936697499e-05, "loss": 1.3925, "step": 9246 }, { "epoch": 0.5055560870932029, "grad_norm": 2.078331232070923, "learning_rate": 1.0815920685798702e-05, "loss": 1.2537, "step": 9247 }, { "epoch": 0.5056107595369244, "grad_norm": 1.4908004999160767, "learning_rate": 1.0814099407654399e-05, "loss": 1.4423, "step": 9248 }, { "epoch": 0.505665431980646, "grad_norm": 1.225391149520874, "learning_rate": 1.0812278102325408e-05, "loss": 1.4167, "step": 9249 }, { "epoch": 0.5057201044243675, "grad_norm": 2.0461177825927734, "learning_rate": 1.0810456769872544e-05, "loss": 1.3856, "step": 9250 }, { "epoch": 0.505774776868089, "grad_norm": 1.4167261123657227, "learning_rate": 1.0808635410356625e-05, "loss": 1.3374, "step": 9251 }, { "epoch": 0.5058294493118106, "grad_norm": 1.4397258758544922, "learning_rate": 1.0806814023838473e-05, "loss": 1.4757, "step": 9252 }, { "epoch": 0.5058841217555322, "grad_norm": 1.1133447885513306, "learning_rate": 1.0804992610378907e-05, "loss": 1.1842, "step": 9253 }, { "epoch": 0.5059387941992537, "grad_norm": 1.5030605792999268, "learning_rate": 1.0803171170038748e-05, "loss": 1.5514, "step": 9254 }, { "epoch": 0.5059934666429753, "grad_norm": 1.305299162864685, "learning_rate": 1.0801349702878822e-05, "loss": 1.226, "step": 9255 }, { "epoch": 0.5060481390866969, "grad_norm": 1.2670540809631348, "learning_rate": 1.0799528208959949e-05, "loss": 1.634, "step": 9256 }, { "epoch": 0.5061028115304184, "grad_norm": 1.2894244194030762, "learning_rate": 1.079770668834295e-05, "loss": 1.3728, "step": 9257 }, { "epoch": 0.50615748397414, "grad_norm": 1.5469772815704346, "learning_rate": 1.0795885141088653e-05, "loss": 1.4596, "step": 9258 }, { "epoch": 0.5062121564178614, "grad_norm": 1.464271903038025, "learning_rate": 1.0794063567257886e-05, "loss": 1.2935, "step": 9259 }, { "epoch": 0.506266828861583, "grad_norm": 1.5106585025787354, "learning_rate": 1.0792241966911472e-05, "loss": 1.466, "step": 9260 }, { "epoch": 0.5063215013053046, "grad_norm": 1.590548038482666, "learning_rate": 1.079042034011024e-05, "loss": 1.4776, "step": 9261 }, { "epoch": 0.5063761737490261, "grad_norm": 1.4787334203720093, "learning_rate": 1.0788598686915018e-05, "loss": 1.3687, "step": 9262 }, { "epoch": 0.5064308461927477, "grad_norm": 2.742635488510132, "learning_rate": 1.0786777007386636e-05, "loss": 1.2405, "step": 9263 }, { "epoch": 0.5064855186364693, "grad_norm": 1.5406900644302368, "learning_rate": 1.078495530158592e-05, "loss": 1.5556, "step": 9264 }, { "epoch": 0.5065401910801908, "grad_norm": 1.6349668502807617, "learning_rate": 1.0783133569573708e-05, "loss": 1.3211, "step": 9265 }, { "epoch": 0.5065948635239124, "grad_norm": 1.870841383934021, "learning_rate": 1.0781311811410826e-05, "loss": 1.2536, "step": 9266 }, { "epoch": 0.506649535967634, "grad_norm": 1.9175809621810913, "learning_rate": 1.077949002715811e-05, "loss": 1.3031, "step": 9267 }, { "epoch": 0.5067042084113554, "grad_norm": 1.2238861322402954, "learning_rate": 1.0777668216876395e-05, "loss": 1.415, "step": 9268 }, { "epoch": 0.506758880855077, "grad_norm": 1.3635213375091553, "learning_rate": 1.0775846380626512e-05, "loss": 1.0541, "step": 9269 }, { "epoch": 0.5068135532987986, "grad_norm": 1.6410695314407349, "learning_rate": 1.0774024518469297e-05, "loss": 1.4653, "step": 9270 }, { "epoch": 0.5068682257425201, "grad_norm": 1.88834810256958, "learning_rate": 1.0772202630465586e-05, "loss": 1.326, "step": 9271 }, { "epoch": 0.5069228981862417, "grad_norm": 1.6267911195755005, "learning_rate": 1.0770380716676218e-05, "loss": 1.3858, "step": 9272 }, { "epoch": 0.5069775706299632, "grad_norm": 1.7607086896896362, "learning_rate": 1.0768558777162035e-05, "loss": 1.7391, "step": 9273 }, { "epoch": 0.5070322430736848, "grad_norm": 1.5721588134765625, "learning_rate": 1.0766736811983864e-05, "loss": 1.401, "step": 9274 }, { "epoch": 0.5070869155174064, "grad_norm": 1.5932202339172363, "learning_rate": 1.0764914821202556e-05, "loss": 1.4705, "step": 9275 }, { "epoch": 0.5071415879611278, "grad_norm": 1.706814169883728, "learning_rate": 1.0763092804878945e-05, "loss": 1.5413, "step": 9276 }, { "epoch": 0.5071962604048494, "grad_norm": 1.7307748794555664, "learning_rate": 1.0761270763073877e-05, "loss": 1.3199, "step": 9277 }, { "epoch": 0.507250932848571, "grad_norm": 1.2935539484024048, "learning_rate": 1.075944869584819e-05, "loss": 1.4282, "step": 9278 }, { "epoch": 0.5073056052922925, "grad_norm": 1.771756887435913, "learning_rate": 1.0757626603262728e-05, "loss": 1.5369, "step": 9279 }, { "epoch": 0.5073602777360141, "grad_norm": 1.4159214496612549, "learning_rate": 1.0755804485378336e-05, "loss": 1.3371, "step": 9280 }, { "epoch": 0.5074149501797357, "grad_norm": 1.5640146732330322, "learning_rate": 1.0753982342255863e-05, "loss": 1.3808, "step": 9281 }, { "epoch": 0.5074696226234572, "grad_norm": 1.3444966077804565, "learning_rate": 1.0752160173956145e-05, "loss": 1.4762, "step": 9282 }, { "epoch": 0.5075242950671788, "grad_norm": 1.3408606052398682, "learning_rate": 1.0750337980540035e-05, "loss": 1.4229, "step": 9283 }, { "epoch": 0.5075789675109004, "grad_norm": 1.6160829067230225, "learning_rate": 1.0748515762068382e-05, "loss": 1.4491, "step": 9284 }, { "epoch": 0.5076336399546219, "grad_norm": 1.3902089595794678, "learning_rate": 1.0746693518602026e-05, "loss": 1.4216, "step": 9285 }, { "epoch": 0.5076883123983434, "grad_norm": 1.4403091669082642, "learning_rate": 1.0744871250201825e-05, "loss": 1.3527, "step": 9286 }, { "epoch": 0.5077429848420649, "grad_norm": 1.2534019947052002, "learning_rate": 1.0743048956928626e-05, "loss": 1.657, "step": 9287 }, { "epoch": 0.5077976572857865, "grad_norm": 1.7089160680770874, "learning_rate": 1.0741226638843276e-05, "loss": 1.5074, "step": 9288 }, { "epoch": 0.5078523297295081, "grad_norm": 1.5159661769866943, "learning_rate": 1.0739404296006631e-05, "loss": 1.6373, "step": 9289 }, { "epoch": 0.5079070021732296, "grad_norm": 2.1053335666656494, "learning_rate": 1.073758192847954e-05, "loss": 1.2803, "step": 9290 }, { "epoch": 0.5079616746169512, "grad_norm": 1.5300946235656738, "learning_rate": 1.0735759536322859e-05, "loss": 1.4538, "step": 9291 }, { "epoch": 0.5080163470606728, "grad_norm": 1.2037854194641113, "learning_rate": 1.0733937119597439e-05, "loss": 1.3901, "step": 9292 }, { "epoch": 0.5080710195043943, "grad_norm": 1.3626728057861328, "learning_rate": 1.0732114678364135e-05, "loss": 1.6278, "step": 9293 }, { "epoch": 0.5081256919481159, "grad_norm": 1.8458207845687866, "learning_rate": 1.0730292212683808e-05, "loss": 1.4222, "step": 9294 }, { "epoch": 0.5081803643918374, "grad_norm": 2.1589443683624268, "learning_rate": 1.072846972261731e-05, "loss": 1.374, "step": 9295 }, { "epoch": 0.5082350368355589, "grad_norm": 1.8611433506011963, "learning_rate": 1.0726647208225498e-05, "loss": 1.3753, "step": 9296 }, { "epoch": 0.5082897092792805, "grad_norm": 1.8618669509887695, "learning_rate": 1.0724824669569226e-05, "loss": 1.6169, "step": 9297 }, { "epoch": 0.5083443817230021, "grad_norm": 1.3359532356262207, "learning_rate": 1.0723002106709364e-05, "loss": 1.5627, "step": 9298 }, { "epoch": 0.5083990541667236, "grad_norm": 1.619670033454895, "learning_rate": 1.072117951970676e-05, "loss": 1.5391, "step": 9299 }, { "epoch": 0.5084537266104452, "grad_norm": 1.8141053915023804, "learning_rate": 1.0719356908622286e-05, "loss": 1.4495, "step": 9300 }, { "epoch": 0.5085083990541667, "grad_norm": 1.3308489322662354, "learning_rate": 1.0717534273516791e-05, "loss": 1.4257, "step": 9301 }, { "epoch": 0.5085630714978883, "grad_norm": 1.619585394859314, "learning_rate": 1.0715711614451146e-05, "loss": 1.5609, "step": 9302 }, { "epoch": 0.5086177439416099, "grad_norm": 1.2383116483688354, "learning_rate": 1.0713888931486208e-05, "loss": 1.4688, "step": 9303 }, { "epoch": 0.5086724163853313, "grad_norm": 1.189988136291504, "learning_rate": 1.0712066224682842e-05, "loss": 1.4919, "step": 9304 }, { "epoch": 0.5087270888290529, "grad_norm": 1.4760822057724, "learning_rate": 1.0710243494101917e-05, "loss": 1.4749, "step": 9305 }, { "epoch": 0.5087817612727745, "grad_norm": 1.6986690759658813, "learning_rate": 1.0708420739804296e-05, "loss": 1.268, "step": 9306 }, { "epoch": 0.508836433716496, "grad_norm": 1.6686317920684814, "learning_rate": 1.0706597961850842e-05, "loss": 1.3496, "step": 9307 }, { "epoch": 0.5088911061602176, "grad_norm": 1.4460737705230713, "learning_rate": 1.0704775160302425e-05, "loss": 1.2145, "step": 9308 }, { "epoch": 0.5089457786039392, "grad_norm": 1.8901630640029907, "learning_rate": 1.0702952335219912e-05, "loss": 1.361, "step": 9309 }, { "epoch": 0.5090004510476607, "grad_norm": 1.387499213218689, "learning_rate": 1.0701129486664168e-05, "loss": 1.4769, "step": 9310 }, { "epoch": 0.5090551234913823, "grad_norm": 1.1232646703720093, "learning_rate": 1.0699306614696068e-05, "loss": 1.5582, "step": 9311 }, { "epoch": 0.5091097959351039, "grad_norm": 1.3197858333587646, "learning_rate": 1.0697483719376479e-05, "loss": 1.5489, "step": 9312 }, { "epoch": 0.5091644683788253, "grad_norm": 1.5218000411987305, "learning_rate": 1.069566080076627e-05, "loss": 1.294, "step": 9313 }, { "epoch": 0.5092191408225469, "grad_norm": 1.670056700706482, "learning_rate": 1.0693837858926317e-05, "loss": 1.5222, "step": 9314 }, { "epoch": 0.5092738132662684, "grad_norm": 1.2663573026657104, "learning_rate": 1.069201489391749e-05, "loss": 1.5703, "step": 9315 }, { "epoch": 0.50932848570999, "grad_norm": 1.4585604667663574, "learning_rate": 1.0690191905800659e-05, "loss": 1.3789, "step": 9316 }, { "epoch": 0.5093831581537116, "grad_norm": 1.5228699445724487, "learning_rate": 1.0688368894636702e-05, "loss": 1.5371, "step": 9317 }, { "epoch": 0.5094378305974331, "grad_norm": 1.865820288658142, "learning_rate": 1.068654586048649e-05, "loss": 1.3875, "step": 9318 }, { "epoch": 0.5094925030411547, "grad_norm": 1.6971086263656616, "learning_rate": 1.0684722803410904e-05, "loss": 1.4724, "step": 9319 }, { "epoch": 0.5095471754848763, "grad_norm": 1.4890822172164917, "learning_rate": 1.0682899723470818e-05, "loss": 1.2896, "step": 9320 }, { "epoch": 0.5096018479285978, "grad_norm": 1.3524816036224365, "learning_rate": 1.0681076620727104e-05, "loss": 1.3297, "step": 9321 }, { "epoch": 0.5096565203723193, "grad_norm": 1.6265710592269897, "learning_rate": 1.0679253495240646e-05, "loss": 1.1594, "step": 9322 }, { "epoch": 0.5097111928160409, "grad_norm": 1.798346757888794, "learning_rate": 1.067743034707232e-05, "loss": 1.2824, "step": 9323 }, { "epoch": 0.5097658652597624, "grad_norm": 1.5768425464630127, "learning_rate": 1.0675607176283002e-05, "loss": 1.5397, "step": 9324 }, { "epoch": 0.509820537703484, "grad_norm": 1.1569914817810059, "learning_rate": 1.0673783982933576e-05, "loss": 1.5345, "step": 9325 }, { "epoch": 0.5098752101472056, "grad_norm": 1.4224843978881836, "learning_rate": 1.0671960767084925e-05, "loss": 1.3429, "step": 9326 }, { "epoch": 0.5099298825909271, "grad_norm": 1.3583427667617798, "learning_rate": 1.0670137528797927e-05, "loss": 1.3926, "step": 9327 }, { "epoch": 0.5099845550346487, "grad_norm": 1.2096036672592163, "learning_rate": 1.0668314268133462e-05, "loss": 1.3597, "step": 9328 }, { "epoch": 0.5100392274783702, "grad_norm": 1.6269752979278564, "learning_rate": 1.0666490985152416e-05, "loss": 1.4557, "step": 9329 }, { "epoch": 0.5100938999220918, "grad_norm": 1.901692509651184, "learning_rate": 1.066466767991567e-05, "loss": 1.5571, "step": 9330 }, { "epoch": 0.5101485723658133, "grad_norm": 1.5530377626419067, "learning_rate": 1.0662844352484112e-05, "loss": 1.6672, "step": 9331 }, { "epoch": 0.5102032448095348, "grad_norm": 1.4787113666534424, "learning_rate": 1.0661021002918626e-05, "loss": 1.4105, "step": 9332 }, { "epoch": 0.5102579172532564, "grad_norm": 1.5975921154022217, "learning_rate": 1.0659197631280098e-05, "loss": 1.407, "step": 9333 }, { "epoch": 0.510312589696978, "grad_norm": 1.4487526416778564, "learning_rate": 1.0657374237629414e-05, "loss": 1.3847, "step": 9334 }, { "epoch": 0.5103672621406995, "grad_norm": 1.7015917301177979, "learning_rate": 1.065555082202746e-05, "loss": 1.7544, "step": 9335 }, { "epoch": 0.5104219345844211, "grad_norm": 1.7424092292785645, "learning_rate": 1.0653727384535131e-05, "loss": 1.4863, "step": 9336 }, { "epoch": 0.5104766070281427, "grad_norm": 1.6704652309417725, "learning_rate": 1.0651903925213304e-05, "loss": 1.5015, "step": 9337 }, { "epoch": 0.5105312794718642, "grad_norm": 1.3033932447433472, "learning_rate": 1.0650080444122876e-05, "loss": 1.4079, "step": 9338 }, { "epoch": 0.5105859519155858, "grad_norm": 1.4259037971496582, "learning_rate": 1.0648256941324742e-05, "loss": 1.4184, "step": 9339 }, { "epoch": 0.5106406243593073, "grad_norm": 1.4649603366851807, "learning_rate": 1.0646433416879785e-05, "loss": 1.6527, "step": 9340 }, { "epoch": 0.5106952968030288, "grad_norm": 1.3328454494476318, "learning_rate": 1.0644609870848896e-05, "loss": 1.3248, "step": 9341 }, { "epoch": 0.5107499692467504, "grad_norm": 1.3247665166854858, "learning_rate": 1.0642786303292973e-05, "loss": 1.6249, "step": 9342 }, { "epoch": 0.5108046416904719, "grad_norm": 1.4822735786437988, "learning_rate": 1.064096271427291e-05, "loss": 1.7622, "step": 9343 }, { "epoch": 0.5108593141341935, "grad_norm": 1.8110870122909546, "learning_rate": 1.0639139103849591e-05, "loss": 1.3844, "step": 9344 }, { "epoch": 0.5109139865779151, "grad_norm": 1.4743924140930176, "learning_rate": 1.0637315472083921e-05, "loss": 1.3532, "step": 9345 }, { "epoch": 0.5109686590216366, "grad_norm": 1.7430882453918457, "learning_rate": 1.0635491819036794e-05, "loss": 1.3837, "step": 9346 }, { "epoch": 0.5110233314653582, "grad_norm": 1.538386344909668, "learning_rate": 1.0633668144769103e-05, "loss": 1.4289, "step": 9347 }, { "epoch": 0.5110780039090798, "grad_norm": 1.776409387588501, "learning_rate": 1.0631844449341743e-05, "loss": 1.0637, "step": 9348 }, { "epoch": 0.5111326763528012, "grad_norm": 1.5665816068649292, "learning_rate": 1.0630020732815617e-05, "loss": 1.4835, "step": 9349 }, { "epoch": 0.5111873487965228, "grad_norm": 1.9276893138885498, "learning_rate": 1.062819699525162e-05, "loss": 1.677, "step": 9350 }, { "epoch": 0.5112420212402444, "grad_norm": 1.3687766790390015, "learning_rate": 1.062637323671065e-05, "loss": 1.4328, "step": 9351 }, { "epoch": 0.5112966936839659, "grad_norm": 2.125558614730835, "learning_rate": 1.062454945725361e-05, "loss": 1.3464, "step": 9352 }, { "epoch": 0.5113513661276875, "grad_norm": 1.469988465309143, "learning_rate": 1.0622725656941396e-05, "loss": 1.5568, "step": 9353 }, { "epoch": 0.5114060385714091, "grad_norm": 1.3846592903137207, "learning_rate": 1.0620901835834914e-05, "loss": 1.5219, "step": 9354 }, { "epoch": 0.5114607110151306, "grad_norm": 1.5066280364990234, "learning_rate": 1.0619077993995058e-05, "loss": 1.2319, "step": 9355 }, { "epoch": 0.5115153834588522, "grad_norm": 1.4152902364730835, "learning_rate": 1.0617254131482737e-05, "loss": 1.4216, "step": 9356 }, { "epoch": 0.5115700559025737, "grad_norm": 1.4550881385803223, "learning_rate": 1.0615430248358853e-05, "loss": 1.164, "step": 9357 }, { "epoch": 0.5116247283462952, "grad_norm": 1.4420955181121826, "learning_rate": 1.0613606344684309e-05, "loss": 1.2603, "step": 9358 }, { "epoch": 0.5116794007900168, "grad_norm": 1.4366397857666016, "learning_rate": 1.061178242052001e-05, "loss": 1.5059, "step": 9359 }, { "epoch": 0.5117340732337383, "grad_norm": 1.628514289855957, "learning_rate": 1.0609958475926856e-05, "loss": 1.505, "step": 9360 }, { "epoch": 0.5117887456774599, "grad_norm": 1.2656763792037964, "learning_rate": 1.0608134510965762e-05, "loss": 1.6856, "step": 9361 }, { "epoch": 0.5118434181211815, "grad_norm": 2.0102622509002686, "learning_rate": 1.0606310525697627e-05, "loss": 1.4333, "step": 9362 }, { "epoch": 0.511898090564903, "grad_norm": 1.4772052764892578, "learning_rate": 1.0604486520183362e-05, "loss": 1.5458, "step": 9363 }, { "epoch": 0.5119527630086246, "grad_norm": 1.6445695161819458, "learning_rate": 1.0602662494483872e-05, "loss": 1.4245, "step": 9364 }, { "epoch": 0.5120074354523462, "grad_norm": 1.7804358005523682, "learning_rate": 1.060083844866007e-05, "loss": 1.5316, "step": 9365 }, { "epoch": 0.5120621078960677, "grad_norm": 1.5968600511550903, "learning_rate": 1.059901438277286e-05, "loss": 1.3115, "step": 9366 }, { "epoch": 0.5121167803397892, "grad_norm": 1.6167553663253784, "learning_rate": 1.0597190296883156e-05, "loss": 1.323, "step": 9367 }, { "epoch": 0.5121714527835108, "grad_norm": 1.8905695676803589, "learning_rate": 1.0595366191051866e-05, "loss": 1.394, "step": 9368 }, { "epoch": 0.5122261252272323, "grad_norm": 1.4831264019012451, "learning_rate": 1.0593542065339899e-05, "loss": 1.4011, "step": 9369 }, { "epoch": 0.5122807976709539, "grad_norm": 1.4960217475891113, "learning_rate": 1.059171791980817e-05, "loss": 1.5473, "step": 9370 }, { "epoch": 0.5123354701146754, "grad_norm": 1.6744476556777954, "learning_rate": 1.0589893754517597e-05, "loss": 1.29, "step": 9371 }, { "epoch": 0.512390142558397, "grad_norm": 1.2122858762741089, "learning_rate": 1.0588069569529085e-05, "loss": 1.4896, "step": 9372 }, { "epoch": 0.5124448150021186, "grad_norm": 1.7495580911636353, "learning_rate": 1.0586245364903547e-05, "loss": 1.4133, "step": 9373 }, { "epoch": 0.5124994874458401, "grad_norm": 1.9668179750442505, "learning_rate": 1.0584421140701902e-05, "loss": 1.4229, "step": 9374 }, { "epoch": 0.5125541598895617, "grad_norm": 1.867566704750061, "learning_rate": 1.0582596896985065e-05, "loss": 1.5636, "step": 9375 }, { "epoch": 0.5126088323332832, "grad_norm": 1.2111107110977173, "learning_rate": 1.0580772633813947e-05, "loss": 1.2286, "step": 9376 }, { "epoch": 0.5126635047770047, "grad_norm": 1.6241624355316162, "learning_rate": 1.057894835124947e-05, "loss": 1.2747, "step": 9377 }, { "epoch": 0.5127181772207263, "grad_norm": 1.743109107017517, "learning_rate": 1.0577124049352549e-05, "loss": 1.552, "step": 9378 }, { "epoch": 0.5127728496644479, "grad_norm": 2.1097707748413086, "learning_rate": 1.0575299728184105e-05, "loss": 1.4329, "step": 9379 }, { "epoch": 0.5128275221081694, "grad_norm": 1.778602957725525, "learning_rate": 1.0573475387805047e-05, "loss": 1.3302, "step": 9380 }, { "epoch": 0.512882194551891, "grad_norm": 1.6657596826553345, "learning_rate": 1.0571651028276304e-05, "loss": 1.2759, "step": 9381 }, { "epoch": 0.5129368669956126, "grad_norm": 1.7205160856246948, "learning_rate": 1.0569826649658791e-05, "loss": 1.5622, "step": 9382 }, { "epoch": 0.5129915394393341, "grad_norm": 1.9298720359802246, "learning_rate": 1.056800225201343e-05, "loss": 1.2201, "step": 9383 }, { "epoch": 0.5130462118830557, "grad_norm": 1.5051642656326294, "learning_rate": 1.0566177835401136e-05, "loss": 1.4096, "step": 9384 }, { "epoch": 0.5131008843267773, "grad_norm": 1.3158793449401855, "learning_rate": 1.0564353399882843e-05, "loss": 1.542, "step": 9385 }, { "epoch": 0.5131555567704987, "grad_norm": 1.3761348724365234, "learning_rate": 1.0562528945519463e-05, "loss": 1.4593, "step": 9386 }, { "epoch": 0.5132102292142203, "grad_norm": 1.6713262796401978, "learning_rate": 1.0560704472371919e-05, "loss": 1.4459, "step": 9387 }, { "epoch": 0.5132649016579418, "grad_norm": 1.273640751838684, "learning_rate": 1.0558879980501137e-05, "loss": 1.483, "step": 9388 }, { "epoch": 0.5133195741016634, "grad_norm": 1.378542423248291, "learning_rate": 1.0557055469968045e-05, "loss": 1.4594, "step": 9389 }, { "epoch": 0.513374246545385, "grad_norm": 1.5815939903259277, "learning_rate": 1.0555230940833561e-05, "loss": 1.4867, "step": 9390 }, { "epoch": 0.5134289189891065, "grad_norm": 1.948106050491333, "learning_rate": 1.0553406393158614e-05, "loss": 1.3553, "step": 9391 }, { "epoch": 0.5134835914328281, "grad_norm": 1.4457508325576782, "learning_rate": 1.0551581827004129e-05, "loss": 1.4688, "step": 9392 }, { "epoch": 0.5135382638765497, "grad_norm": 2.3028814792633057, "learning_rate": 1.0549757242431032e-05, "loss": 1.3113, "step": 9393 }, { "epoch": 0.5135929363202711, "grad_norm": 1.6581578254699707, "learning_rate": 1.0547932639500246e-05, "loss": 1.2954, "step": 9394 }, { "epoch": 0.5136476087639927, "grad_norm": 1.2784392833709717, "learning_rate": 1.0546108018272707e-05, "loss": 1.6062, "step": 9395 }, { "epoch": 0.5137022812077143, "grad_norm": 1.7681647539138794, "learning_rate": 1.0544283378809343e-05, "loss": 1.3365, "step": 9396 }, { "epoch": 0.5137569536514358, "grad_norm": 1.656112790107727, "learning_rate": 1.0542458721171076e-05, "loss": 1.57, "step": 9397 }, { "epoch": 0.5138116260951574, "grad_norm": 1.7468198537826538, "learning_rate": 1.0540634045418843e-05, "loss": 1.3891, "step": 9398 }, { "epoch": 0.513866298538879, "grad_norm": 1.8448209762573242, "learning_rate": 1.0538809351613567e-05, "loss": 1.5698, "step": 9399 }, { "epoch": 0.5139209709826005, "grad_norm": 1.1245957612991333, "learning_rate": 1.0536984639816183e-05, "loss": 1.4609, "step": 9400 }, { "epoch": 0.5139756434263221, "grad_norm": 1.5605450868606567, "learning_rate": 1.0535159910087623e-05, "loss": 1.3546, "step": 9401 }, { "epoch": 0.5140303158700436, "grad_norm": 1.580923318862915, "learning_rate": 1.0533335162488816e-05, "loss": 1.4088, "step": 9402 }, { "epoch": 0.5140849883137651, "grad_norm": 1.5450184345245361, "learning_rate": 1.0531510397080697e-05, "loss": 1.3642, "step": 9403 }, { "epoch": 0.5141396607574867, "grad_norm": 1.5192582607269287, "learning_rate": 1.0529685613924199e-05, "loss": 1.2586, "step": 9404 }, { "epoch": 0.5141943332012082, "grad_norm": 2.3088152408599854, "learning_rate": 1.0527860813080257e-05, "loss": 1.3764, "step": 9405 }, { "epoch": 0.5142490056449298, "grad_norm": 1.666368842124939, "learning_rate": 1.0526035994609805e-05, "loss": 1.3809, "step": 9406 }, { "epoch": 0.5143036780886514, "grad_norm": 1.2206439971923828, "learning_rate": 1.0524211158573772e-05, "loss": 1.4805, "step": 9407 }, { "epoch": 0.5143583505323729, "grad_norm": 1.4198371171951294, "learning_rate": 1.05223863050331e-05, "loss": 1.4853, "step": 9408 }, { "epoch": 0.5144130229760945, "grad_norm": 1.810757040977478, "learning_rate": 1.0520561434048724e-05, "loss": 1.243, "step": 9409 }, { "epoch": 0.5144676954198161, "grad_norm": 1.6871416568756104, "learning_rate": 1.051873654568158e-05, "loss": 1.4476, "step": 9410 }, { "epoch": 0.5145223678635376, "grad_norm": 2.0705337524414062, "learning_rate": 1.0516911639992607e-05, "loss": 1.2409, "step": 9411 }, { "epoch": 0.5145770403072591, "grad_norm": 1.973109483718872, "learning_rate": 1.051508671704274e-05, "loss": 1.4347, "step": 9412 }, { "epoch": 0.5146317127509807, "grad_norm": 1.44532310962677, "learning_rate": 1.0513261776892918e-05, "loss": 1.576, "step": 9413 }, { "epoch": 0.5146863851947022, "grad_norm": 1.9122734069824219, "learning_rate": 1.0511436819604082e-05, "loss": 1.4228, "step": 9414 }, { "epoch": 0.5147410576384238, "grad_norm": 1.3427507877349854, "learning_rate": 1.0509611845237168e-05, "loss": 1.8368, "step": 9415 }, { "epoch": 0.5147957300821453, "grad_norm": 1.422714114189148, "learning_rate": 1.050778685385312e-05, "loss": 1.3767, "step": 9416 }, { "epoch": 0.5148504025258669, "grad_norm": 1.4786083698272705, "learning_rate": 1.050596184551288e-05, "loss": 1.2354, "step": 9417 }, { "epoch": 0.5149050749695885, "grad_norm": 1.6641521453857422, "learning_rate": 1.0504136820277386e-05, "loss": 1.2587, "step": 9418 }, { "epoch": 0.51495974741331, "grad_norm": 1.2796376943588257, "learning_rate": 1.050231177820758e-05, "loss": 1.5864, "step": 9419 }, { "epoch": 0.5150144198570316, "grad_norm": 1.534932255744934, "learning_rate": 1.0500486719364405e-05, "loss": 1.5774, "step": 9420 }, { "epoch": 0.5150690923007532, "grad_norm": 1.3832858800888062, "learning_rate": 1.0498661643808801e-05, "loss": 1.4438, "step": 9421 }, { "epoch": 0.5151237647444746, "grad_norm": 1.376173496246338, "learning_rate": 1.0496836551601717e-05, "loss": 1.469, "step": 9422 }, { "epoch": 0.5151784371881962, "grad_norm": 1.5743516683578491, "learning_rate": 1.0495011442804094e-05, "loss": 1.3731, "step": 9423 }, { "epoch": 0.5152331096319178, "grad_norm": 1.2450323104858398, "learning_rate": 1.0493186317476878e-05, "loss": 1.591, "step": 9424 }, { "epoch": 0.5152877820756393, "grad_norm": 1.4661372900009155, "learning_rate": 1.0491361175681016e-05, "loss": 1.459, "step": 9425 }, { "epoch": 0.5153424545193609, "grad_norm": 1.363430380821228, "learning_rate": 1.0489536017477448e-05, "loss": 1.5552, "step": 9426 }, { "epoch": 0.5153971269630825, "grad_norm": 1.818297028541565, "learning_rate": 1.0487710842927126e-05, "loss": 1.5126, "step": 9427 }, { "epoch": 0.515451799406804, "grad_norm": 1.3676538467407227, "learning_rate": 1.0485885652090992e-05, "loss": 1.6946, "step": 9428 }, { "epoch": 0.5155064718505256, "grad_norm": 1.4798234701156616, "learning_rate": 1.0484060445029995e-05, "loss": 1.39, "step": 9429 }, { "epoch": 0.515561144294247, "grad_norm": 1.8517006635665894, "learning_rate": 1.0482235221805088e-05, "loss": 1.3246, "step": 9430 }, { "epoch": 0.5156158167379686, "grad_norm": 1.4728336334228516, "learning_rate": 1.0480409982477214e-05, "loss": 1.3985, "step": 9431 }, { "epoch": 0.5156704891816902, "grad_norm": 1.5884807109832764, "learning_rate": 1.0478584727107322e-05, "loss": 1.2462, "step": 9432 }, { "epoch": 0.5157251616254117, "grad_norm": 1.369640827178955, "learning_rate": 1.0476759455756363e-05, "loss": 1.1804, "step": 9433 }, { "epoch": 0.5157798340691333, "grad_norm": 1.6457918882369995, "learning_rate": 1.0474934168485289e-05, "loss": 1.4931, "step": 9434 }, { "epoch": 0.5158345065128549, "grad_norm": 1.7567799091339111, "learning_rate": 1.0473108865355046e-05, "loss": 1.5386, "step": 9435 }, { "epoch": 0.5158891789565764, "grad_norm": 1.4745519161224365, "learning_rate": 1.0471283546426586e-05, "loss": 1.3949, "step": 9436 }, { "epoch": 0.515943851400298, "grad_norm": 1.4113709926605225, "learning_rate": 1.0469458211760868e-05, "loss": 1.5569, "step": 9437 }, { "epoch": 0.5159985238440196, "grad_norm": 1.2835716009140015, "learning_rate": 1.0467632861418837e-05, "loss": 1.3559, "step": 9438 }, { "epoch": 0.516053196287741, "grad_norm": 1.7188888788223267, "learning_rate": 1.0465807495461446e-05, "loss": 1.3296, "step": 9439 }, { "epoch": 0.5161078687314626, "grad_norm": 1.0008816719055176, "learning_rate": 1.046398211394965e-05, "loss": 1.5622, "step": 9440 }, { "epoch": 0.5161625411751842, "grad_norm": 1.7292488813400269, "learning_rate": 1.0462156716944403e-05, "loss": 1.4039, "step": 9441 }, { "epoch": 0.5162172136189057, "grad_norm": 1.521628737449646, "learning_rate": 1.0460331304506658e-05, "loss": 1.3841, "step": 9442 }, { "epoch": 0.5162718860626273, "grad_norm": 1.2784637212753296, "learning_rate": 1.045850587669737e-05, "loss": 1.5073, "step": 9443 }, { "epoch": 0.5163265585063488, "grad_norm": 1.2833824157714844, "learning_rate": 1.0456680433577497e-05, "loss": 1.2568, "step": 9444 }, { "epoch": 0.5163812309500704, "grad_norm": 1.3554548025131226, "learning_rate": 1.0454854975207993e-05, "loss": 1.542, "step": 9445 }, { "epoch": 0.516435903393792, "grad_norm": 1.5528446435928345, "learning_rate": 1.0453029501649812e-05, "loss": 1.2745, "step": 9446 }, { "epoch": 0.5164905758375135, "grad_norm": 1.437011957168579, "learning_rate": 1.0451204012963912e-05, "loss": 1.6846, "step": 9447 }, { "epoch": 0.516545248281235, "grad_norm": 1.621563196182251, "learning_rate": 1.0449378509211254e-05, "loss": 1.3577, "step": 9448 }, { "epoch": 0.5165999207249566, "grad_norm": 1.0368422269821167, "learning_rate": 1.044755299045279e-05, "loss": 1.5284, "step": 9449 }, { "epoch": 0.5166545931686781, "grad_norm": 1.4992650747299194, "learning_rate": 1.0445727456749484e-05, "loss": 1.2409, "step": 9450 }, { "epoch": 0.5167092656123997, "grad_norm": 1.475735068321228, "learning_rate": 1.0443901908162291e-05, "loss": 1.4448, "step": 9451 }, { "epoch": 0.5167639380561213, "grad_norm": 1.784429907798767, "learning_rate": 1.0442076344752173e-05, "loss": 1.7205, "step": 9452 }, { "epoch": 0.5168186104998428, "grad_norm": 1.5033293962478638, "learning_rate": 1.0440250766580086e-05, "loss": 1.3912, "step": 9453 }, { "epoch": 0.5168732829435644, "grad_norm": 1.706118106842041, "learning_rate": 1.0438425173706994e-05, "loss": 1.3432, "step": 9454 }, { "epoch": 0.516927955387286, "grad_norm": 1.379155158996582, "learning_rate": 1.043659956619386e-05, "loss": 1.4195, "step": 9455 }, { "epoch": 0.5169826278310075, "grad_norm": 1.7775615453720093, "learning_rate": 1.0434773944101637e-05, "loss": 1.1966, "step": 9456 }, { "epoch": 0.517037300274729, "grad_norm": 1.190186619758606, "learning_rate": 1.0432948307491296e-05, "loss": 1.7238, "step": 9457 }, { "epoch": 0.5170919727184505, "grad_norm": 1.6858360767364502, "learning_rate": 1.0431122656423791e-05, "loss": 1.366, "step": 9458 }, { "epoch": 0.5171466451621721, "grad_norm": 1.945777177810669, "learning_rate": 1.0429296990960092e-05, "loss": 1.5246, "step": 9459 }, { "epoch": 0.5172013176058937, "grad_norm": 1.5448704957962036, "learning_rate": 1.0427471311161157e-05, "loss": 1.3862, "step": 9460 }, { "epoch": 0.5172559900496152, "grad_norm": 1.3813601732254028, "learning_rate": 1.0425645617087951e-05, "loss": 1.5944, "step": 9461 }, { "epoch": 0.5173106624933368, "grad_norm": 1.6299573183059692, "learning_rate": 1.0423819908801443e-05, "loss": 1.3088, "step": 9462 }, { "epoch": 0.5173653349370584, "grad_norm": 1.39553964138031, "learning_rate": 1.0421994186362591e-05, "loss": 1.3413, "step": 9463 }, { "epoch": 0.5174200073807799, "grad_norm": 2.2075860500335693, "learning_rate": 1.0420168449832362e-05, "loss": 1.4563, "step": 9464 }, { "epoch": 0.5174746798245015, "grad_norm": 1.5709004402160645, "learning_rate": 1.0418342699271724e-05, "loss": 1.6066, "step": 9465 }, { "epoch": 0.517529352268223, "grad_norm": 1.6658031940460205, "learning_rate": 1.0416516934741643e-05, "loss": 1.2363, "step": 9466 }, { "epoch": 0.5175840247119445, "grad_norm": 1.481308937072754, "learning_rate": 1.041469115630308e-05, "loss": 1.4252, "step": 9467 }, { "epoch": 0.5176386971556661, "grad_norm": 1.5036242008209229, "learning_rate": 1.041286536401701e-05, "loss": 1.4445, "step": 9468 }, { "epoch": 0.5176933695993877, "grad_norm": 1.4618089199066162, "learning_rate": 1.0411039557944396e-05, "loss": 1.287, "step": 9469 }, { "epoch": 0.5177480420431092, "grad_norm": 1.9410111904144287, "learning_rate": 1.0409213738146207e-05, "loss": 1.2599, "step": 9470 }, { "epoch": 0.5178027144868308, "grad_norm": 1.404855728149414, "learning_rate": 1.0407387904683408e-05, "loss": 1.3398, "step": 9471 }, { "epoch": 0.5178573869305523, "grad_norm": 1.1959495544433594, "learning_rate": 1.0405562057616972e-05, "loss": 1.5435, "step": 9472 }, { "epoch": 0.5179120593742739, "grad_norm": 1.7927746772766113, "learning_rate": 1.040373619700787e-05, "loss": 1.2168, "step": 9473 }, { "epoch": 0.5179667318179955, "grad_norm": 1.6932177543640137, "learning_rate": 1.0401910322917066e-05, "loss": 1.3152, "step": 9474 }, { "epoch": 0.518021404261717, "grad_norm": 1.5150210857391357, "learning_rate": 1.0400084435405533e-05, "loss": 1.469, "step": 9475 }, { "epoch": 0.5180760767054385, "grad_norm": 1.2970386743545532, "learning_rate": 1.0398258534534245e-05, "loss": 1.4313, "step": 9476 }, { "epoch": 0.5181307491491601, "grad_norm": 1.4123804569244385, "learning_rate": 1.039643262036417e-05, "loss": 1.3921, "step": 9477 }, { "epoch": 0.5181854215928816, "grad_norm": 1.3145794868469238, "learning_rate": 1.0394606692956275e-05, "loss": 1.3032, "step": 9478 }, { "epoch": 0.5182400940366032, "grad_norm": 1.7081230878829956, "learning_rate": 1.0392780752371539e-05, "loss": 1.3339, "step": 9479 }, { "epoch": 0.5182947664803248, "grad_norm": 2.112717628479004, "learning_rate": 1.0390954798670934e-05, "loss": 1.1948, "step": 9480 }, { "epoch": 0.5183494389240463, "grad_norm": 1.3689545392990112, "learning_rate": 1.0389128831915427e-05, "loss": 1.467, "step": 9481 }, { "epoch": 0.5184041113677679, "grad_norm": 2.4553985595703125, "learning_rate": 1.0387302852166e-05, "loss": 1.5484, "step": 9482 }, { "epoch": 0.5184587838114895, "grad_norm": 1.2123903036117554, "learning_rate": 1.038547685948362e-05, "loss": 1.385, "step": 9483 }, { "epoch": 0.518513456255211, "grad_norm": 1.7914663553237915, "learning_rate": 1.0383650853929261e-05, "loss": 1.4627, "step": 9484 }, { "epoch": 0.5185681286989325, "grad_norm": 2.1321911811828613, "learning_rate": 1.0381824835563901e-05, "loss": 1.2335, "step": 9485 }, { "epoch": 0.518622801142654, "grad_norm": 1.3403706550598145, "learning_rate": 1.0379998804448512e-05, "loss": 1.4694, "step": 9486 }, { "epoch": 0.5186774735863756, "grad_norm": 1.3986400365829468, "learning_rate": 1.0378172760644074e-05, "loss": 1.3411, "step": 9487 }, { "epoch": 0.5187321460300972, "grad_norm": 1.784864902496338, "learning_rate": 1.037634670421156e-05, "loss": 1.4832, "step": 9488 }, { "epoch": 0.5187868184738187, "grad_norm": 1.3675917387008667, "learning_rate": 1.0374520635211947e-05, "loss": 1.3706, "step": 9489 }, { "epoch": 0.5188414909175403, "grad_norm": 1.6482994556427002, "learning_rate": 1.037269455370621e-05, "loss": 1.3811, "step": 9490 }, { "epoch": 0.5188961633612619, "grad_norm": 1.8179937601089478, "learning_rate": 1.0370868459755325e-05, "loss": 1.4027, "step": 9491 }, { "epoch": 0.5189508358049834, "grad_norm": 1.5345019102096558, "learning_rate": 1.0369042353420274e-05, "loss": 1.4048, "step": 9492 }, { "epoch": 0.519005508248705, "grad_norm": 1.2836384773254395, "learning_rate": 1.0367216234762032e-05, "loss": 1.6907, "step": 9493 }, { "epoch": 0.5190601806924265, "grad_norm": 1.4855374097824097, "learning_rate": 1.0365390103841579e-05, "loss": 1.1587, "step": 9494 }, { "epoch": 0.519114853136148, "grad_norm": 1.957719326019287, "learning_rate": 1.0363563960719894e-05, "loss": 1.3967, "step": 9495 }, { "epoch": 0.5191695255798696, "grad_norm": 1.3860127925872803, "learning_rate": 1.0361737805457954e-05, "loss": 1.4384, "step": 9496 }, { "epoch": 0.5192241980235912, "grad_norm": 1.24916672706604, "learning_rate": 1.0359911638116742e-05, "loss": 1.6786, "step": 9497 }, { "epoch": 0.5192788704673127, "grad_norm": 1.5831433534622192, "learning_rate": 1.0358085458757233e-05, "loss": 1.5076, "step": 9498 }, { "epoch": 0.5193335429110343, "grad_norm": 1.8731753826141357, "learning_rate": 1.035625926744041e-05, "loss": 1.5025, "step": 9499 }, { "epoch": 0.5193882153547558, "grad_norm": 1.30350661277771, "learning_rate": 1.0354433064227255e-05, "loss": 1.5641, "step": 9500 }, { "epoch": 0.5194428877984774, "grad_norm": 1.9699941873550415, "learning_rate": 1.0352606849178747e-05, "loss": 1.2303, "step": 9501 }, { "epoch": 0.519497560242199, "grad_norm": 1.5715248584747314, "learning_rate": 1.0350780622355874e-05, "loss": 1.5742, "step": 9502 }, { "epoch": 0.5195522326859204, "grad_norm": 1.4119459390640259, "learning_rate": 1.0348954383819607e-05, "loss": 1.2742, "step": 9503 }, { "epoch": 0.519606905129642, "grad_norm": 1.6349090337753296, "learning_rate": 1.034712813363094e-05, "loss": 1.533, "step": 9504 }, { "epoch": 0.5196615775733636, "grad_norm": 1.5597453117370605, "learning_rate": 1.0345301871850843e-05, "loss": 1.4492, "step": 9505 }, { "epoch": 0.5197162500170851, "grad_norm": 1.3719269037246704, "learning_rate": 1.034347559854031e-05, "loss": 1.391, "step": 9506 }, { "epoch": 0.5197709224608067, "grad_norm": 1.7799710035324097, "learning_rate": 1.034164931376032e-05, "loss": 1.5037, "step": 9507 }, { "epoch": 0.5198255949045283, "grad_norm": 1.7704325914382935, "learning_rate": 1.033982301757186e-05, "loss": 1.3674, "step": 9508 }, { "epoch": 0.5198802673482498, "grad_norm": 1.6116118431091309, "learning_rate": 1.0337996710035911e-05, "loss": 1.3485, "step": 9509 }, { "epoch": 0.5199349397919714, "grad_norm": 1.572139024734497, "learning_rate": 1.0336170391213457e-05, "loss": 1.5256, "step": 9510 }, { "epoch": 0.519989612235693, "grad_norm": 1.6956537961959839, "learning_rate": 1.0334344061165486e-05, "loss": 1.3237, "step": 9511 }, { "epoch": 0.5200442846794144, "grad_norm": 1.5688881874084473, "learning_rate": 1.0332517719952982e-05, "loss": 1.3676, "step": 9512 }, { "epoch": 0.520098957123136, "grad_norm": 1.4642541408538818, "learning_rate": 1.0330691367636932e-05, "loss": 1.6199, "step": 9513 }, { "epoch": 0.5201536295668575, "grad_norm": 1.3330248594284058, "learning_rate": 1.0328865004278317e-05, "loss": 1.6837, "step": 9514 }, { "epoch": 0.5202083020105791, "grad_norm": 1.5774924755096436, "learning_rate": 1.0327038629938134e-05, "loss": 1.3608, "step": 9515 }, { "epoch": 0.5202629744543007, "grad_norm": 1.382809042930603, "learning_rate": 1.0325212244677361e-05, "loss": 1.415, "step": 9516 }, { "epoch": 0.5203176468980222, "grad_norm": 1.5463695526123047, "learning_rate": 1.0323385848556989e-05, "loss": 1.328, "step": 9517 }, { "epoch": 0.5203723193417438, "grad_norm": 1.282014012336731, "learning_rate": 1.0321559441638002e-05, "loss": 1.5404, "step": 9518 }, { "epoch": 0.5204269917854654, "grad_norm": 1.4782519340515137, "learning_rate": 1.0319733023981392e-05, "loss": 1.3686, "step": 9519 }, { "epoch": 0.5204816642291868, "grad_norm": 1.6561379432678223, "learning_rate": 1.0317906595648146e-05, "loss": 1.3608, "step": 9520 }, { "epoch": 0.5205363366729084, "grad_norm": 1.409732699394226, "learning_rate": 1.0316080156699253e-05, "loss": 1.3674, "step": 9521 }, { "epoch": 0.52059100911663, "grad_norm": 1.425402283668518, "learning_rate": 1.0314253707195706e-05, "loss": 1.5926, "step": 9522 }, { "epoch": 0.5206456815603515, "grad_norm": 1.4366158246994019, "learning_rate": 1.0312427247198484e-05, "loss": 1.7165, "step": 9523 }, { "epoch": 0.5207003540040731, "grad_norm": 1.391263484954834, "learning_rate": 1.0310600776768585e-05, "loss": 1.3353, "step": 9524 }, { "epoch": 0.5207550264477947, "grad_norm": 1.221497893333435, "learning_rate": 1.0308774295966999e-05, "loss": 1.6156, "step": 9525 }, { "epoch": 0.5208096988915162, "grad_norm": 1.9363654851913452, "learning_rate": 1.030694780485471e-05, "loss": 1.2016, "step": 9526 }, { "epoch": 0.5208643713352378, "grad_norm": 1.343027949333191, "learning_rate": 1.0305121303492718e-05, "loss": 1.3214, "step": 9527 }, { "epoch": 0.5209190437789593, "grad_norm": 1.2388818264007568, "learning_rate": 1.030329479194201e-05, "loss": 1.4892, "step": 9528 }, { "epoch": 0.5209737162226808, "grad_norm": 3.3604884147644043, "learning_rate": 1.0301468270263575e-05, "loss": 1.3087, "step": 9529 }, { "epoch": 0.5210283886664024, "grad_norm": 1.0953480005264282, "learning_rate": 1.0299641738518407e-05, "loss": 1.5038, "step": 9530 }, { "epoch": 0.5210830611101239, "grad_norm": 1.5119776725769043, "learning_rate": 1.0297815196767498e-05, "loss": 1.3582, "step": 9531 }, { "epoch": 0.5211377335538455, "grad_norm": 1.4230530261993408, "learning_rate": 1.0295988645071844e-05, "loss": 1.5598, "step": 9532 }, { "epoch": 0.5211924059975671, "grad_norm": 1.4691795110702515, "learning_rate": 1.0294162083492429e-05, "loss": 1.3461, "step": 9533 }, { "epoch": 0.5212470784412886, "grad_norm": 1.730675458908081, "learning_rate": 1.0292335512090255e-05, "loss": 1.2703, "step": 9534 }, { "epoch": 0.5213017508850102, "grad_norm": 1.4965087175369263, "learning_rate": 1.0290508930926314e-05, "loss": 1.0476, "step": 9535 }, { "epoch": 0.5213564233287318, "grad_norm": 1.881549596786499, "learning_rate": 1.0288682340061598e-05, "loss": 1.2976, "step": 9536 }, { "epoch": 0.5214110957724533, "grad_norm": 1.467572569847107, "learning_rate": 1.0286855739557097e-05, "loss": 1.4224, "step": 9537 }, { "epoch": 0.5214657682161749, "grad_norm": 1.3375370502471924, "learning_rate": 1.0285029129473814e-05, "loss": 1.3119, "step": 9538 }, { "epoch": 0.5215204406598964, "grad_norm": 1.2665774822235107, "learning_rate": 1.0283202509872738e-05, "loss": 1.2658, "step": 9539 }, { "epoch": 0.5215751131036179, "grad_norm": 1.4920939207077026, "learning_rate": 1.0281375880814864e-05, "loss": 1.3978, "step": 9540 }, { "epoch": 0.5216297855473395, "grad_norm": 1.6608456373214722, "learning_rate": 1.0279549242361193e-05, "loss": 1.6323, "step": 9541 }, { "epoch": 0.521684457991061, "grad_norm": 1.5766637325286865, "learning_rate": 1.0277722594572714e-05, "loss": 1.4016, "step": 9542 }, { "epoch": 0.5217391304347826, "grad_norm": 1.8118834495544434, "learning_rate": 1.0275895937510426e-05, "loss": 1.5637, "step": 9543 }, { "epoch": 0.5217938028785042, "grad_norm": 1.6617714166641235, "learning_rate": 1.0274069271235326e-05, "loss": 1.3242, "step": 9544 }, { "epoch": 0.5218484753222257, "grad_norm": 1.3102530241012573, "learning_rate": 1.0272242595808406e-05, "loss": 1.4652, "step": 9545 }, { "epoch": 0.5219031477659473, "grad_norm": 2.647425889968872, "learning_rate": 1.0270415911290673e-05, "loss": 1.5378, "step": 9546 }, { "epoch": 0.5219578202096689, "grad_norm": 1.4659761190414429, "learning_rate": 1.0268589217743114e-05, "loss": 1.2951, "step": 9547 }, { "epoch": 0.5220124926533903, "grad_norm": 1.4257006645202637, "learning_rate": 1.0266762515226734e-05, "loss": 1.6555, "step": 9548 }, { "epoch": 0.5220671650971119, "grad_norm": 1.2420858144760132, "learning_rate": 1.0264935803802527e-05, "loss": 1.6307, "step": 9549 }, { "epoch": 0.5221218375408335, "grad_norm": 1.3390271663665771, "learning_rate": 1.0263109083531489e-05, "loss": 1.3918, "step": 9550 }, { "epoch": 0.522176509984555, "grad_norm": 1.3616902828216553, "learning_rate": 1.0261282354474622e-05, "loss": 1.3897, "step": 9551 }, { "epoch": 0.5222311824282766, "grad_norm": 1.4102479219436646, "learning_rate": 1.0259455616692924e-05, "loss": 1.4568, "step": 9552 }, { "epoch": 0.5222858548719982, "grad_norm": 1.3807568550109863, "learning_rate": 1.0257628870247396e-05, "loss": 1.3907, "step": 9553 }, { "epoch": 0.5223405273157197, "grad_norm": 1.5499027967453003, "learning_rate": 1.0255802115199034e-05, "loss": 1.4855, "step": 9554 }, { "epoch": 0.5223951997594413, "grad_norm": 1.6361329555511475, "learning_rate": 1.0253975351608842e-05, "loss": 1.3942, "step": 9555 }, { "epoch": 0.5224498722031627, "grad_norm": 1.371179461479187, "learning_rate": 1.0252148579537816e-05, "loss": 1.663, "step": 9556 }, { "epoch": 0.5225045446468843, "grad_norm": 1.4841679334640503, "learning_rate": 1.0250321799046953e-05, "loss": 1.6513, "step": 9557 }, { "epoch": 0.5225592170906059, "grad_norm": 1.3501248359680176, "learning_rate": 1.0248495010197262e-05, "loss": 1.4604, "step": 9558 }, { "epoch": 0.5226138895343274, "grad_norm": 1.5969504117965698, "learning_rate": 1.0246668213049737e-05, "loss": 1.3864, "step": 9559 }, { "epoch": 0.522668561978049, "grad_norm": 1.2931658029556274, "learning_rate": 1.0244841407665385e-05, "loss": 1.5996, "step": 9560 }, { "epoch": 0.5227232344217706, "grad_norm": 1.4827301502227783, "learning_rate": 1.0243014594105201e-05, "loss": 1.6319, "step": 9561 }, { "epoch": 0.5227779068654921, "grad_norm": 1.716277837753296, "learning_rate": 1.024118777243019e-05, "loss": 1.3191, "step": 9562 }, { "epoch": 0.5228325793092137, "grad_norm": 1.5473700761795044, "learning_rate": 1.0239360942701356e-05, "loss": 1.289, "step": 9563 }, { "epoch": 0.5228872517529353, "grad_norm": 1.484862208366394, "learning_rate": 1.0237534104979694e-05, "loss": 1.6577, "step": 9564 }, { "epoch": 0.5229419241966567, "grad_norm": 1.631850004196167, "learning_rate": 1.0235707259326211e-05, "loss": 1.5154, "step": 9565 }, { "epoch": 0.5229965966403783, "grad_norm": 1.3578171730041504, "learning_rate": 1.023388040580191e-05, "loss": 1.3508, "step": 9566 }, { "epoch": 0.5230512690840999, "grad_norm": 1.727455973625183, "learning_rate": 1.0232053544467798e-05, "loss": 1.423, "step": 9567 }, { "epoch": 0.5231059415278214, "grad_norm": 1.6165056228637695, "learning_rate": 1.0230226675384868e-05, "loss": 1.261, "step": 9568 }, { "epoch": 0.523160613971543, "grad_norm": 1.7448046207427979, "learning_rate": 1.0228399798614132e-05, "loss": 1.5706, "step": 9569 }, { "epoch": 0.5232152864152645, "grad_norm": 1.4978910684585571, "learning_rate": 1.0226572914216593e-05, "loss": 1.5302, "step": 9570 }, { "epoch": 0.5232699588589861, "grad_norm": 1.5265334844589233, "learning_rate": 1.0224746022253248e-05, "loss": 1.2417, "step": 9571 }, { "epoch": 0.5233246313027077, "grad_norm": 1.422877311706543, "learning_rate": 1.0222919122785107e-05, "loss": 1.4453, "step": 9572 }, { "epoch": 0.5233793037464292, "grad_norm": 1.3293960094451904, "learning_rate": 1.0221092215873171e-05, "loss": 1.4214, "step": 9573 }, { "epoch": 0.5234339761901508, "grad_norm": 1.885008454322815, "learning_rate": 1.0219265301578454e-05, "loss": 1.3905, "step": 9574 }, { "epoch": 0.5234886486338723, "grad_norm": 1.684382438659668, "learning_rate": 1.021743837996195e-05, "loss": 1.3442, "step": 9575 }, { "epoch": 0.5235433210775938, "grad_norm": 1.5597299337387085, "learning_rate": 1.0215611451084668e-05, "loss": 1.5171, "step": 9576 }, { "epoch": 0.5235979935213154, "grad_norm": 1.5859898328781128, "learning_rate": 1.0213784515007616e-05, "loss": 1.496, "step": 9577 }, { "epoch": 0.523652665965037, "grad_norm": 1.3239117860794067, "learning_rate": 1.0211957571791796e-05, "loss": 1.3982, "step": 9578 }, { "epoch": 0.5237073384087585, "grad_norm": 1.642538070678711, "learning_rate": 1.0210130621498214e-05, "loss": 1.8195, "step": 9579 }, { "epoch": 0.5237620108524801, "grad_norm": 1.4519110918045044, "learning_rate": 1.0208303664187877e-05, "loss": 1.4399, "step": 9580 }, { "epoch": 0.5238166832962017, "grad_norm": 1.487790822982788, "learning_rate": 1.0206476699921795e-05, "loss": 1.5282, "step": 9581 }, { "epoch": 0.5238713557399232, "grad_norm": 1.7531850337982178, "learning_rate": 1.0204649728760969e-05, "loss": 1.4368, "step": 9582 }, { "epoch": 0.5239260281836448, "grad_norm": 1.4730682373046875, "learning_rate": 1.0202822750766408e-05, "loss": 1.2223, "step": 9583 }, { "epoch": 0.5239807006273663, "grad_norm": 1.3207170963287354, "learning_rate": 1.0200995765999122e-05, "loss": 1.4763, "step": 9584 }, { "epoch": 0.5240353730710878, "grad_norm": 1.4568802118301392, "learning_rate": 1.0199168774520115e-05, "loss": 1.1815, "step": 9585 }, { "epoch": 0.5240900455148094, "grad_norm": 1.564182162284851, "learning_rate": 1.0197341776390393e-05, "loss": 1.2586, "step": 9586 }, { "epoch": 0.5241447179585309, "grad_norm": 1.946634292602539, "learning_rate": 1.0195514771670967e-05, "loss": 1.49, "step": 9587 }, { "epoch": 0.5241993904022525, "grad_norm": 1.6663957834243774, "learning_rate": 1.0193687760422846e-05, "loss": 1.4926, "step": 9588 }, { "epoch": 0.5242540628459741, "grad_norm": 1.4186947345733643, "learning_rate": 1.0191860742707034e-05, "loss": 1.3109, "step": 9589 }, { "epoch": 0.5243087352896956, "grad_norm": 1.5680357217788696, "learning_rate": 1.0190033718584542e-05, "loss": 1.4002, "step": 9590 }, { "epoch": 0.5243634077334172, "grad_norm": 1.7567137479782104, "learning_rate": 1.018820668811638e-05, "loss": 1.4509, "step": 9591 }, { "epoch": 0.5244180801771388, "grad_norm": 1.4306787252426147, "learning_rate": 1.0186379651363551e-05, "loss": 1.3866, "step": 9592 }, { "epoch": 0.5244727526208602, "grad_norm": 1.5509271621704102, "learning_rate": 1.0184552608387072e-05, "loss": 1.5239, "step": 9593 }, { "epoch": 0.5245274250645818, "grad_norm": 1.7545429468154907, "learning_rate": 1.0182725559247945e-05, "loss": 1.4105, "step": 9594 }, { "epoch": 0.5245820975083034, "grad_norm": 1.3016672134399414, "learning_rate": 1.0180898504007188e-05, "loss": 1.4404, "step": 9595 }, { "epoch": 0.5246367699520249, "grad_norm": 2.2005209922790527, "learning_rate": 1.0179071442725801e-05, "loss": 1.0484, "step": 9596 }, { "epoch": 0.5246914423957465, "grad_norm": 1.4966756105422974, "learning_rate": 1.01772443754648e-05, "loss": 1.5824, "step": 9597 }, { "epoch": 0.5247461148394681, "grad_norm": 1.5005784034729004, "learning_rate": 1.0175417302285194e-05, "loss": 1.5224, "step": 9598 }, { "epoch": 0.5248007872831896, "grad_norm": 1.8239734172821045, "learning_rate": 1.017359022324799e-05, "loss": 1.2779, "step": 9599 }, { "epoch": 0.5248554597269112, "grad_norm": 1.4578038454055786, "learning_rate": 1.0171763138414203e-05, "loss": 1.4084, "step": 9600 }, { "epoch": 0.5249101321706326, "grad_norm": 1.453037142753601, "learning_rate": 1.016993604784484e-05, "loss": 1.3519, "step": 9601 }, { "epoch": 0.5249648046143542, "grad_norm": 1.6057989597320557, "learning_rate": 1.0168108951600917e-05, "loss": 1.4969, "step": 9602 }, { "epoch": 0.5250194770580758, "grad_norm": 1.3942850828170776, "learning_rate": 1.0166281849743438e-05, "loss": 1.5469, "step": 9603 }, { "epoch": 0.5250741495017973, "grad_norm": 1.1347101926803589, "learning_rate": 1.0164454742333419e-05, "loss": 1.4158, "step": 9604 }, { "epoch": 0.5251288219455189, "grad_norm": 1.5395292043685913, "learning_rate": 1.016262762943187e-05, "loss": 1.3679, "step": 9605 }, { "epoch": 0.5251834943892405, "grad_norm": 1.4997031688690186, "learning_rate": 1.0160800511099805e-05, "loss": 1.385, "step": 9606 }, { "epoch": 0.525238166832962, "grad_norm": 1.336421251296997, "learning_rate": 1.0158973387398231e-05, "loss": 1.4057, "step": 9607 }, { "epoch": 0.5252928392766836, "grad_norm": 1.6211873292922974, "learning_rate": 1.0157146258388163e-05, "loss": 1.3818, "step": 9608 }, { "epoch": 0.5253475117204052, "grad_norm": 1.8092206716537476, "learning_rate": 1.0155319124130616e-05, "loss": 1.327, "step": 9609 }, { "epoch": 0.5254021841641267, "grad_norm": 1.762580156326294, "learning_rate": 1.0153491984686595e-05, "loss": 1.3317, "step": 9610 }, { "epoch": 0.5254568566078482, "grad_norm": 1.4258779287338257, "learning_rate": 1.0151664840117118e-05, "loss": 1.4391, "step": 9611 }, { "epoch": 0.5255115290515698, "grad_norm": 1.5103635787963867, "learning_rate": 1.0149837690483195e-05, "loss": 1.3234, "step": 9612 }, { "epoch": 0.5255662014952913, "grad_norm": 1.6144709587097168, "learning_rate": 1.0148010535845842e-05, "loss": 1.0535, "step": 9613 }, { "epoch": 0.5256208739390129, "grad_norm": 1.7262521982192993, "learning_rate": 1.014618337626607e-05, "loss": 1.4907, "step": 9614 }, { "epoch": 0.5256755463827344, "grad_norm": 1.6874160766601562, "learning_rate": 1.0144356211804888e-05, "loss": 1.3634, "step": 9615 }, { "epoch": 0.525730218826456, "grad_norm": 1.4567347764968872, "learning_rate": 1.014252904252332e-05, "loss": 1.402, "step": 9616 }, { "epoch": 0.5257848912701776, "grad_norm": 1.382949948310852, "learning_rate": 1.0140701868482365e-05, "loss": 1.5828, "step": 9617 }, { "epoch": 0.5258395637138991, "grad_norm": 1.747551679611206, "learning_rate": 1.0138874689743048e-05, "loss": 1.6407, "step": 9618 }, { "epoch": 0.5258942361576207, "grad_norm": 1.4860577583312988, "learning_rate": 1.0137047506366382e-05, "loss": 1.3275, "step": 9619 }, { "epoch": 0.5259489086013422, "grad_norm": 1.4895817041397095, "learning_rate": 1.0135220318413377e-05, "loss": 1.4829, "step": 9620 }, { "epoch": 0.5260035810450637, "grad_norm": 1.1512504816055298, "learning_rate": 1.0133393125945045e-05, "loss": 1.5835, "step": 9621 }, { "epoch": 0.5260582534887853, "grad_norm": 1.2913618087768555, "learning_rate": 1.0131565929022405e-05, "loss": 1.4785, "step": 9622 }, { "epoch": 0.5261129259325069, "grad_norm": 1.4470386505126953, "learning_rate": 1.012973872770647e-05, "loss": 1.2727, "step": 9623 }, { "epoch": 0.5261675983762284, "grad_norm": 1.3898831605911255, "learning_rate": 1.0127911522058256e-05, "loss": 1.6013, "step": 9624 }, { "epoch": 0.52622227081995, "grad_norm": 1.528260350227356, "learning_rate": 1.0126084312138774e-05, "loss": 1.3982, "step": 9625 }, { "epoch": 0.5262769432636716, "grad_norm": 1.530029296875, "learning_rate": 1.0124257098009042e-05, "loss": 1.5386, "step": 9626 }, { "epoch": 0.5263316157073931, "grad_norm": 1.757620930671692, "learning_rate": 1.0122429879730075e-05, "loss": 1.4357, "step": 9627 }, { "epoch": 0.5263862881511147, "grad_norm": 2.199023485183716, "learning_rate": 1.0120602657362885e-05, "loss": 1.279, "step": 9628 }, { "epoch": 0.5264409605948361, "grad_norm": 1.476717472076416, "learning_rate": 1.011877543096849e-05, "loss": 1.5018, "step": 9629 }, { "epoch": 0.5264956330385577, "grad_norm": 1.4468457698822021, "learning_rate": 1.0116948200607906e-05, "loss": 1.7629, "step": 9630 }, { "epoch": 0.5265503054822793, "grad_norm": 1.3427790403366089, "learning_rate": 1.0115120966342145e-05, "loss": 1.3386, "step": 9631 }, { "epoch": 0.5266049779260008, "grad_norm": 1.4263873100280762, "learning_rate": 1.0113293728232227e-05, "loss": 1.489, "step": 9632 }, { "epoch": 0.5266596503697224, "grad_norm": 1.5474082231521606, "learning_rate": 1.0111466486339166e-05, "loss": 1.4593, "step": 9633 }, { "epoch": 0.526714322813444, "grad_norm": 1.2884938716888428, "learning_rate": 1.0109639240723974e-05, "loss": 1.5887, "step": 9634 }, { "epoch": 0.5267689952571655, "grad_norm": 1.5263426303863525, "learning_rate": 1.0107811991447672e-05, "loss": 1.4781, "step": 9635 }, { "epoch": 0.5268236677008871, "grad_norm": 1.7393357753753662, "learning_rate": 1.0105984738571274e-05, "loss": 1.339, "step": 9636 }, { "epoch": 0.5268783401446087, "grad_norm": 1.3737356662750244, "learning_rate": 1.0104157482155798e-05, "loss": 1.231, "step": 9637 }, { "epoch": 0.5269330125883301, "grad_norm": 2.5898325443267822, "learning_rate": 1.0102330222262257e-05, "loss": 1.4107, "step": 9638 }, { "epoch": 0.5269876850320517, "grad_norm": 1.4323267936706543, "learning_rate": 1.0100502958951675e-05, "loss": 1.5468, "step": 9639 }, { "epoch": 0.5270423574757733, "grad_norm": 1.579158067703247, "learning_rate": 1.0098675692285061e-05, "loss": 1.6161, "step": 9640 }, { "epoch": 0.5270970299194948, "grad_norm": 1.884071707725525, "learning_rate": 1.009684842232343e-05, "loss": 1.4816, "step": 9641 }, { "epoch": 0.5271517023632164, "grad_norm": 1.4113185405731201, "learning_rate": 1.0095021149127806e-05, "loss": 1.5805, "step": 9642 }, { "epoch": 0.5272063748069379, "grad_norm": 1.4611164331436157, "learning_rate": 1.0093193872759204e-05, "loss": 1.3347, "step": 9643 }, { "epoch": 0.5272610472506595, "grad_norm": 1.5350545644760132, "learning_rate": 1.0091366593278639e-05, "loss": 1.5471, "step": 9644 }, { "epoch": 0.5273157196943811, "grad_norm": 1.4817132949829102, "learning_rate": 1.0089539310747127e-05, "loss": 1.4461, "step": 9645 }, { "epoch": 0.5273703921381026, "grad_norm": 1.7338566780090332, "learning_rate": 1.0087712025225691e-05, "loss": 1.1817, "step": 9646 }, { "epoch": 0.5274250645818241, "grad_norm": 1.4489959478378296, "learning_rate": 1.0085884736775345e-05, "loss": 1.5154, "step": 9647 }, { "epoch": 0.5274797370255457, "grad_norm": 1.9117851257324219, "learning_rate": 1.0084057445457103e-05, "loss": 1.4539, "step": 9648 }, { "epoch": 0.5275344094692672, "grad_norm": 1.1980140209197998, "learning_rate": 1.0082230151331984e-05, "loss": 1.4298, "step": 9649 }, { "epoch": 0.5275890819129888, "grad_norm": 1.922875165939331, "learning_rate": 1.008040285446101e-05, "loss": 1.4004, "step": 9650 }, { "epoch": 0.5276437543567104, "grad_norm": 1.7281657457351685, "learning_rate": 1.0078575554905197e-05, "loss": 1.3626, "step": 9651 }, { "epoch": 0.5276984268004319, "grad_norm": 1.4668527841567993, "learning_rate": 1.007674825272556e-05, "loss": 1.4945, "step": 9652 }, { "epoch": 0.5277530992441535, "grad_norm": 1.7260551452636719, "learning_rate": 1.007492094798312e-05, "loss": 1.2277, "step": 9653 }, { "epoch": 0.5278077716878751, "grad_norm": 1.6430107355117798, "learning_rate": 1.0073093640738896e-05, "loss": 1.411, "step": 9654 }, { "epoch": 0.5278624441315966, "grad_norm": 1.3504682779312134, "learning_rate": 1.00712663310539e-05, "loss": 1.2421, "step": 9655 }, { "epoch": 0.5279171165753181, "grad_norm": 1.46849524974823, "learning_rate": 1.0069439018989153e-05, "loss": 1.2744, "step": 9656 }, { "epoch": 0.5279717890190396, "grad_norm": 1.9746615886688232, "learning_rate": 1.0067611704605675e-05, "loss": 1.5078, "step": 9657 }, { "epoch": 0.5280264614627612, "grad_norm": 1.6549549102783203, "learning_rate": 1.0065784387964486e-05, "loss": 1.5036, "step": 9658 }, { "epoch": 0.5280811339064828, "grad_norm": 1.146820068359375, "learning_rate": 1.0063957069126602e-05, "loss": 1.3067, "step": 9659 }, { "epoch": 0.5281358063502043, "grad_norm": 1.9494142532348633, "learning_rate": 1.006212974815304e-05, "loss": 1.3691, "step": 9660 }, { "epoch": 0.5281904787939259, "grad_norm": 1.2805063724517822, "learning_rate": 1.0060302425104823e-05, "loss": 1.4061, "step": 9661 }, { "epoch": 0.5282451512376475, "grad_norm": 1.3820561170578003, "learning_rate": 1.0058475100042962e-05, "loss": 1.7869, "step": 9662 }, { "epoch": 0.528299823681369, "grad_norm": 1.4829702377319336, "learning_rate": 1.0056647773028478e-05, "loss": 1.7084, "step": 9663 }, { "epoch": 0.5283544961250906, "grad_norm": 1.4105159044265747, "learning_rate": 1.0054820444122395e-05, "loss": 1.5278, "step": 9664 }, { "epoch": 0.5284091685688121, "grad_norm": 1.4314539432525635, "learning_rate": 1.0052993113385732e-05, "loss": 1.5431, "step": 9665 }, { "epoch": 0.5284638410125336, "grad_norm": 1.679502248764038, "learning_rate": 1.0051165780879503e-05, "loss": 1.358, "step": 9666 }, { "epoch": 0.5285185134562552, "grad_norm": 1.5320980548858643, "learning_rate": 1.0049338446664726e-05, "loss": 1.5134, "step": 9667 }, { "epoch": 0.5285731858999768, "grad_norm": 1.3601391315460205, "learning_rate": 1.0047511110802426e-05, "loss": 1.4583, "step": 9668 }, { "epoch": 0.5286278583436983, "grad_norm": 1.7036525011062622, "learning_rate": 1.0045683773353616e-05, "loss": 1.3907, "step": 9669 }, { "epoch": 0.5286825307874199, "grad_norm": 1.8140742778778076, "learning_rate": 1.0043856434379316e-05, "loss": 1.5413, "step": 9670 }, { "epoch": 0.5287372032311414, "grad_norm": 1.9013842344284058, "learning_rate": 1.004202909394055e-05, "loss": 1.3771, "step": 9671 }, { "epoch": 0.528791875674863, "grad_norm": 1.4116123914718628, "learning_rate": 1.0040201752098335e-05, "loss": 1.4888, "step": 9672 }, { "epoch": 0.5288465481185846, "grad_norm": 1.4251140356063843, "learning_rate": 1.0038374408913684e-05, "loss": 1.4364, "step": 9673 }, { "epoch": 0.528901220562306, "grad_norm": 1.2192802429199219, "learning_rate": 1.0036547064447623e-05, "loss": 1.3347, "step": 9674 }, { "epoch": 0.5289558930060276, "grad_norm": 1.504915475845337, "learning_rate": 1.0034719718761174e-05, "loss": 1.4917, "step": 9675 }, { "epoch": 0.5290105654497492, "grad_norm": 1.6427741050720215, "learning_rate": 1.0032892371915348e-05, "loss": 1.5297, "step": 9676 }, { "epoch": 0.5290652378934707, "grad_norm": 1.8929181098937988, "learning_rate": 1.003106502397117e-05, "loss": 1.5271, "step": 9677 }, { "epoch": 0.5291199103371923, "grad_norm": 1.4341788291931152, "learning_rate": 1.0029237674989658e-05, "loss": 1.4539, "step": 9678 }, { "epoch": 0.5291745827809139, "grad_norm": 1.1983416080474854, "learning_rate": 1.0027410325031831e-05, "loss": 1.6748, "step": 9679 }, { "epoch": 0.5292292552246354, "grad_norm": 1.7282695770263672, "learning_rate": 1.002558297415871e-05, "loss": 1.4181, "step": 9680 }, { "epoch": 0.529283927668357, "grad_norm": 1.5782989263534546, "learning_rate": 1.002375562243131e-05, "loss": 1.5153, "step": 9681 }, { "epoch": 0.5293386001120786, "grad_norm": 1.1501888036727905, "learning_rate": 1.0021928269910658e-05, "loss": 1.3746, "step": 9682 }, { "epoch": 0.5293932725558, "grad_norm": 1.5916987657546997, "learning_rate": 1.0020100916657769e-05, "loss": 1.4575, "step": 9683 }, { "epoch": 0.5294479449995216, "grad_norm": 2.131174087524414, "learning_rate": 1.001827356273366e-05, "loss": 1.3629, "step": 9684 }, { "epoch": 0.5295026174432431, "grad_norm": 1.6812496185302734, "learning_rate": 1.0016446208199357e-05, "loss": 1.4123, "step": 9685 }, { "epoch": 0.5295572898869647, "grad_norm": 1.8164215087890625, "learning_rate": 1.0014618853115879e-05, "loss": 1.2801, "step": 9686 }, { "epoch": 0.5296119623306863, "grad_norm": 1.4614495038986206, "learning_rate": 1.0012791497544238e-05, "loss": 1.3484, "step": 9687 }, { "epoch": 0.5296666347744078, "grad_norm": 1.4060724973678589, "learning_rate": 1.0010964141545461e-05, "loss": 1.4903, "step": 9688 }, { "epoch": 0.5297213072181294, "grad_norm": 1.6502046585083008, "learning_rate": 1.0009136785180566e-05, "loss": 1.4731, "step": 9689 }, { "epoch": 0.529775979661851, "grad_norm": 1.4888883829116821, "learning_rate": 1.000730942851057e-05, "loss": 1.1393, "step": 9690 }, { "epoch": 0.5298306521055725, "grad_norm": 1.553694248199463, "learning_rate": 1.0005482071596497e-05, "loss": 1.3961, "step": 9691 }, { "epoch": 0.529885324549294, "grad_norm": 1.4298382997512817, "learning_rate": 1.0003654714499365e-05, "loss": 1.4394, "step": 9692 }, { "epoch": 0.5299399969930156, "grad_norm": 1.4827560186386108, "learning_rate": 1.0001827357280193e-05, "loss": 1.3383, "step": 9693 }, { "epoch": 0.5299946694367371, "grad_norm": 1.3800557851791382, "learning_rate": 1e-05, "loss": 1.5515, "step": 9694 }, { "epoch": 0.5300493418804587, "grad_norm": 1.544997215270996, "learning_rate": 9.998172642719812e-06, "loss": 1.4742, "step": 9695 }, { "epoch": 0.5301040143241803, "grad_norm": 1.888908863067627, "learning_rate": 9.996345285500636e-06, "loss": 1.7185, "step": 9696 }, { "epoch": 0.5301586867679018, "grad_norm": 1.2614631652832031, "learning_rate": 9.994517928403505e-06, "loss": 1.4708, "step": 9697 }, { "epoch": 0.5302133592116234, "grad_norm": 1.4922873973846436, "learning_rate": 9.992690571489432e-06, "loss": 1.5503, "step": 9698 }, { "epoch": 0.5302680316553449, "grad_norm": 1.5969340801239014, "learning_rate": 9.990863214819438e-06, "loss": 1.3429, "step": 9699 }, { "epoch": 0.5303227040990665, "grad_norm": 1.5629125833511353, "learning_rate": 9.989035858454544e-06, "loss": 1.3528, "step": 9700 }, { "epoch": 0.530377376542788, "grad_norm": 1.4514305591583252, "learning_rate": 9.987208502455767e-06, "loss": 1.2493, "step": 9701 }, { "epoch": 0.5304320489865095, "grad_norm": 1.1945945024490356, "learning_rate": 9.985381146884125e-06, "loss": 1.5433, "step": 9702 }, { "epoch": 0.5304867214302311, "grad_norm": 1.511967658996582, "learning_rate": 9.983553791800645e-06, "loss": 1.6326, "step": 9703 }, { "epoch": 0.5305413938739527, "grad_norm": 1.4751447439193726, "learning_rate": 9.98172643726634e-06, "loss": 1.2468, "step": 9704 }, { "epoch": 0.5305960663176742, "grad_norm": 1.4281883239746094, "learning_rate": 9.979899083342234e-06, "loss": 1.5105, "step": 9705 }, { "epoch": 0.5306507387613958, "grad_norm": 1.6356306076049805, "learning_rate": 9.978071730089347e-06, "loss": 1.4663, "step": 9706 }, { "epoch": 0.5307054112051174, "grad_norm": 1.5109626054763794, "learning_rate": 9.976244377568688e-06, "loss": 1.4245, "step": 9707 }, { "epoch": 0.5307600836488389, "grad_norm": 1.4098142385482788, "learning_rate": 9.974417025841293e-06, "loss": 1.1671, "step": 9708 }, { "epoch": 0.5308147560925605, "grad_norm": 1.3405414819717407, "learning_rate": 9.972589674968174e-06, "loss": 1.1445, "step": 9709 }, { "epoch": 0.530869428536282, "grad_norm": 1.3737118244171143, "learning_rate": 9.970762325010344e-06, "loss": 1.5375, "step": 9710 }, { "epoch": 0.5309241009800035, "grad_norm": 1.8373241424560547, "learning_rate": 9.968934976028833e-06, "loss": 1.357, "step": 9711 }, { "epoch": 0.5309787734237251, "grad_norm": 1.6541080474853516, "learning_rate": 9.967107628084657e-06, "loss": 1.5397, "step": 9712 }, { "epoch": 0.5310334458674466, "grad_norm": 2.3751587867736816, "learning_rate": 9.965280281238828e-06, "loss": 1.4242, "step": 9713 }, { "epoch": 0.5310881183111682, "grad_norm": 2.4991395473480225, "learning_rate": 9.963452935552379e-06, "loss": 1.3423, "step": 9714 }, { "epoch": 0.5311427907548898, "grad_norm": 1.7697398662567139, "learning_rate": 9.961625591086321e-06, "loss": 1.4204, "step": 9715 }, { "epoch": 0.5311974631986113, "grad_norm": 1.2898907661437988, "learning_rate": 9.959798247901668e-06, "loss": 1.3696, "step": 9716 }, { "epoch": 0.5312521356423329, "grad_norm": 1.5000828504562378, "learning_rate": 9.957970906059453e-06, "loss": 1.4152, "step": 9717 }, { "epoch": 0.5313068080860545, "grad_norm": 1.9134212732315063, "learning_rate": 9.956143565620684e-06, "loss": 1.2955, "step": 9718 }, { "epoch": 0.5313614805297759, "grad_norm": 1.4589643478393555, "learning_rate": 9.954316226646389e-06, "loss": 1.3889, "step": 9719 }, { "epoch": 0.5314161529734975, "grad_norm": 1.3357988595962524, "learning_rate": 9.952488889197579e-06, "loss": 1.6995, "step": 9720 }, { "epoch": 0.5314708254172191, "grad_norm": 1.3308720588684082, "learning_rate": 9.950661553335275e-06, "loss": 1.1088, "step": 9721 }, { "epoch": 0.5315254978609406, "grad_norm": 1.5592752695083618, "learning_rate": 9.9488342191205e-06, "loss": 1.4671, "step": 9722 }, { "epoch": 0.5315801703046622, "grad_norm": 1.352396845817566, "learning_rate": 9.947006886614271e-06, "loss": 1.2551, "step": 9723 }, { "epoch": 0.5316348427483838, "grad_norm": 1.1861164569854736, "learning_rate": 9.945179555877605e-06, "loss": 1.3651, "step": 9724 }, { "epoch": 0.5316895151921053, "grad_norm": 1.6282981634140015, "learning_rate": 9.943352226971524e-06, "loss": 1.4095, "step": 9725 }, { "epoch": 0.5317441876358269, "grad_norm": 1.668364405632019, "learning_rate": 9.941524899957045e-06, "loss": 1.5315, "step": 9726 }, { "epoch": 0.5317988600795484, "grad_norm": 1.5597217082977295, "learning_rate": 9.939697574895182e-06, "loss": 1.2657, "step": 9727 }, { "epoch": 0.53185353252327, "grad_norm": 1.4864073991775513, "learning_rate": 9.937870251846963e-06, "loss": 1.4569, "step": 9728 }, { "epoch": 0.5319082049669915, "grad_norm": 1.847192406654358, "learning_rate": 9.936042930873403e-06, "loss": 1.5055, "step": 9729 }, { "epoch": 0.531962877410713, "grad_norm": 1.5176526308059692, "learning_rate": 9.934215612035516e-06, "loss": 1.4608, "step": 9730 }, { "epoch": 0.5320175498544346, "grad_norm": 1.7853659391403198, "learning_rate": 9.932388295394328e-06, "loss": 1.3127, "step": 9731 }, { "epoch": 0.5320722222981562, "grad_norm": 1.8563381433486938, "learning_rate": 9.930560981010847e-06, "loss": 1.3354, "step": 9732 }, { "epoch": 0.5321268947418777, "grad_norm": 1.331058382987976, "learning_rate": 9.928733668946104e-06, "loss": 1.415, "step": 9733 }, { "epoch": 0.5321815671855993, "grad_norm": 1.5456092357635498, "learning_rate": 9.926906359261111e-06, "loss": 1.6546, "step": 9734 }, { "epoch": 0.5322362396293209, "grad_norm": 1.819567322731018, "learning_rate": 9.92507905201688e-06, "loss": 1.3243, "step": 9735 }, { "epoch": 0.5322909120730424, "grad_norm": 1.5806185007095337, "learning_rate": 9.923251747274441e-06, "loss": 1.5112, "step": 9736 }, { "epoch": 0.532345584516764, "grad_norm": 1.60003662109375, "learning_rate": 9.921424445094806e-06, "loss": 1.2666, "step": 9737 }, { "epoch": 0.5324002569604855, "grad_norm": 1.694541096687317, "learning_rate": 9.91959714553899e-06, "loss": 1.336, "step": 9738 }, { "epoch": 0.532454929404207, "grad_norm": 1.436950445175171, "learning_rate": 9.917769848668018e-06, "loss": 1.4038, "step": 9739 }, { "epoch": 0.5325096018479286, "grad_norm": 1.7142874002456665, "learning_rate": 9.915942554542902e-06, "loss": 1.6031, "step": 9740 }, { "epoch": 0.5325642742916501, "grad_norm": 1.4366014003753662, "learning_rate": 9.914115263224658e-06, "loss": 1.4485, "step": 9741 }, { "epoch": 0.5326189467353717, "grad_norm": 1.924125075340271, "learning_rate": 9.912287974774312e-06, "loss": 1.5568, "step": 9742 }, { "epoch": 0.5326736191790933, "grad_norm": 2.0005531311035156, "learning_rate": 9.910460689252876e-06, "loss": 1.5631, "step": 9743 }, { "epoch": 0.5327282916228148, "grad_norm": 1.2960891723632812, "learning_rate": 9.908633406721364e-06, "loss": 1.5319, "step": 9744 }, { "epoch": 0.5327829640665364, "grad_norm": 1.7109507322311401, "learning_rate": 9.9068061272408e-06, "loss": 1.1545, "step": 9745 }, { "epoch": 0.532837636510258, "grad_norm": 1.4524304866790771, "learning_rate": 9.904978850872193e-06, "loss": 1.3627, "step": 9746 }, { "epoch": 0.5328923089539794, "grad_norm": 1.6855367422103882, "learning_rate": 9.903151577676571e-06, "loss": 1.1656, "step": 9747 }, { "epoch": 0.532946981397701, "grad_norm": 1.365830898284912, "learning_rate": 9.901324307714944e-06, "loss": 1.3424, "step": 9748 }, { "epoch": 0.5330016538414226, "grad_norm": 1.706243872642517, "learning_rate": 9.899497041048329e-06, "loss": 1.3752, "step": 9749 }, { "epoch": 0.5330563262851441, "grad_norm": 1.5007611513137817, "learning_rate": 9.897669777737745e-06, "loss": 1.3741, "step": 9750 }, { "epoch": 0.5331109987288657, "grad_norm": 1.356778621673584, "learning_rate": 9.895842517844208e-06, "loss": 1.4386, "step": 9751 }, { "epoch": 0.5331656711725873, "grad_norm": 1.4572758674621582, "learning_rate": 9.894015261428728e-06, "loss": 1.6735, "step": 9752 }, { "epoch": 0.5332203436163088, "grad_norm": 1.8623837232589722, "learning_rate": 9.892188008552331e-06, "loss": 1.3385, "step": 9753 }, { "epoch": 0.5332750160600304, "grad_norm": 1.5948976278305054, "learning_rate": 9.890360759276031e-06, "loss": 1.4888, "step": 9754 }, { "epoch": 0.5333296885037518, "grad_norm": 1.705566644668579, "learning_rate": 9.888533513660838e-06, "loss": 1.5033, "step": 9755 }, { "epoch": 0.5333843609474734, "grad_norm": 1.5961072444915771, "learning_rate": 9.886706271767776e-06, "loss": 1.306, "step": 9756 }, { "epoch": 0.533439033391195, "grad_norm": 1.5526747703552246, "learning_rate": 9.884879033657859e-06, "loss": 1.3572, "step": 9757 }, { "epoch": 0.5334937058349165, "grad_norm": 1.390786051750183, "learning_rate": 9.883051799392097e-06, "loss": 1.2844, "step": 9758 }, { "epoch": 0.5335483782786381, "grad_norm": 1.8527731895446777, "learning_rate": 9.881224569031513e-06, "loss": 1.4705, "step": 9759 }, { "epoch": 0.5336030507223597, "grad_norm": 1.8815842866897583, "learning_rate": 9.879397342637115e-06, "loss": 1.3197, "step": 9760 }, { "epoch": 0.5336577231660812, "grad_norm": 1.8052769899368286, "learning_rate": 9.877570120269927e-06, "loss": 1.4531, "step": 9761 }, { "epoch": 0.5337123956098028, "grad_norm": 1.6754822731018066, "learning_rate": 9.87574290199096e-06, "loss": 1.4152, "step": 9762 }, { "epoch": 0.5337670680535244, "grad_norm": 1.5258815288543701, "learning_rate": 9.873915687861228e-06, "loss": 1.361, "step": 9763 }, { "epoch": 0.5338217404972458, "grad_norm": 1.5512683391571045, "learning_rate": 9.872088477941748e-06, "loss": 1.4141, "step": 9764 }, { "epoch": 0.5338764129409674, "grad_norm": 1.9945462942123413, "learning_rate": 9.870261272293533e-06, "loss": 1.5219, "step": 9765 }, { "epoch": 0.533931085384689, "grad_norm": 1.474975347518921, "learning_rate": 9.868434070977597e-06, "loss": 1.6555, "step": 9766 }, { "epoch": 0.5339857578284105, "grad_norm": 1.6550216674804688, "learning_rate": 9.866606874054958e-06, "loss": 1.3376, "step": 9767 }, { "epoch": 0.5340404302721321, "grad_norm": 1.4998217821121216, "learning_rate": 9.86477968158663e-06, "loss": 1.4149, "step": 9768 }, { "epoch": 0.5340951027158536, "grad_norm": 1.4775810241699219, "learning_rate": 9.862952493633621e-06, "loss": 1.4869, "step": 9769 }, { "epoch": 0.5341497751595752, "grad_norm": 1.6079412698745728, "learning_rate": 9.861125310256955e-06, "loss": 1.1773, "step": 9770 }, { "epoch": 0.5342044476032968, "grad_norm": 1.4485292434692383, "learning_rate": 9.859298131517639e-06, "loss": 1.4376, "step": 9771 }, { "epoch": 0.5342591200470183, "grad_norm": 1.3843348026275635, "learning_rate": 9.857470957476685e-06, "loss": 1.5173, "step": 9772 }, { "epoch": 0.5343137924907398, "grad_norm": 1.4070732593536377, "learning_rate": 9.855643788195113e-06, "loss": 1.1921, "step": 9773 }, { "epoch": 0.5343684649344614, "grad_norm": 1.7885401248931885, "learning_rate": 9.853816623733931e-06, "loss": 1.403, "step": 9774 }, { "epoch": 0.5344231373781829, "grad_norm": 1.5342276096343994, "learning_rate": 9.85198946415416e-06, "loss": 1.4072, "step": 9775 }, { "epoch": 0.5344778098219045, "grad_norm": 1.6264829635620117, "learning_rate": 9.850162309516807e-06, "loss": 1.515, "step": 9776 }, { "epoch": 0.5345324822656261, "grad_norm": 1.1985080242156982, "learning_rate": 9.848335159882884e-06, "loss": 1.6033, "step": 9777 }, { "epoch": 0.5345871547093476, "grad_norm": 1.4919410943984985, "learning_rate": 9.846508015313407e-06, "loss": 1.3804, "step": 9778 }, { "epoch": 0.5346418271530692, "grad_norm": 1.5920724868774414, "learning_rate": 9.844680875869389e-06, "loss": 1.1848, "step": 9779 }, { "epoch": 0.5346964995967908, "grad_norm": 1.8396484851837158, "learning_rate": 9.842853741611837e-06, "loss": 1.5624, "step": 9780 }, { "epoch": 0.5347511720405123, "grad_norm": 1.8475524187088013, "learning_rate": 9.84102661260177e-06, "loss": 1.6558, "step": 9781 }, { "epoch": 0.5348058444842338, "grad_norm": 1.6364758014678955, "learning_rate": 9.839199488900198e-06, "loss": 1.1842, "step": 9782 }, { "epoch": 0.5348605169279554, "grad_norm": 1.5474258661270142, "learning_rate": 9.83737237056813e-06, "loss": 1.4251, "step": 9783 }, { "epoch": 0.5349151893716769, "grad_norm": 1.100224256515503, "learning_rate": 9.835545257666585e-06, "loss": 1.5045, "step": 9784 }, { "epoch": 0.5349698618153985, "grad_norm": 1.6763828992843628, "learning_rate": 9.833718150256567e-06, "loss": 1.2093, "step": 9785 }, { "epoch": 0.53502453425912, "grad_norm": 1.5375139713287354, "learning_rate": 9.831891048399087e-06, "loss": 1.3695, "step": 9786 }, { "epoch": 0.5350792067028416, "grad_norm": 1.7502899169921875, "learning_rate": 9.830063952155162e-06, "loss": 1.2808, "step": 9787 }, { "epoch": 0.5351338791465632, "grad_norm": 1.4574393033981323, "learning_rate": 9.8282368615858e-06, "loss": 1.7023, "step": 9788 }, { "epoch": 0.5351885515902847, "grad_norm": 1.3077666759490967, "learning_rate": 9.826409776752014e-06, "loss": 1.9365, "step": 9789 }, { "epoch": 0.5352432240340063, "grad_norm": 1.4451717138290405, "learning_rate": 9.824582697714813e-06, "loss": 1.3572, "step": 9790 }, { "epoch": 0.5352978964777279, "grad_norm": 1.4014354944229126, "learning_rate": 9.822755624535202e-06, "loss": 1.4086, "step": 9791 }, { "epoch": 0.5353525689214493, "grad_norm": 1.3646349906921387, "learning_rate": 9.820928557274202e-06, "loss": 1.3405, "step": 9792 }, { "epoch": 0.5354072413651709, "grad_norm": 1.3491789102554321, "learning_rate": 9.819101495992817e-06, "loss": 1.5311, "step": 9793 }, { "epoch": 0.5354619138088925, "grad_norm": 1.4522207975387573, "learning_rate": 9.817274440752053e-06, "loss": 1.552, "step": 9794 }, { "epoch": 0.535516586252614, "grad_norm": 1.577941656112671, "learning_rate": 9.81544739161293e-06, "loss": 1.5005, "step": 9795 }, { "epoch": 0.5355712586963356, "grad_norm": 1.6103838682174683, "learning_rate": 9.81362034863645e-06, "loss": 1.6434, "step": 9796 }, { "epoch": 0.5356259311400572, "grad_norm": 2.885788917541504, "learning_rate": 9.811793311883624e-06, "loss": 1.3567, "step": 9797 }, { "epoch": 0.5356806035837787, "grad_norm": 1.6489746570587158, "learning_rate": 9.809966281415461e-06, "loss": 1.3565, "step": 9798 }, { "epoch": 0.5357352760275003, "grad_norm": 1.577412486076355, "learning_rate": 9.808139257292971e-06, "loss": 1.2377, "step": 9799 }, { "epoch": 0.5357899484712217, "grad_norm": 1.8819056749343872, "learning_rate": 9.806312239577156e-06, "loss": 1.3752, "step": 9800 }, { "epoch": 0.5358446209149433, "grad_norm": 1.383133888244629, "learning_rate": 9.804485228329035e-06, "loss": 1.7367, "step": 9801 }, { "epoch": 0.5358992933586649, "grad_norm": 1.6669834852218628, "learning_rate": 9.802658223609609e-06, "loss": 1.2838, "step": 9802 }, { "epoch": 0.5359539658023864, "grad_norm": 1.63349187374115, "learning_rate": 9.80083122547989e-06, "loss": 1.4615, "step": 9803 }, { "epoch": 0.536008638246108, "grad_norm": 1.099668264389038, "learning_rate": 9.799004234000883e-06, "loss": 1.6689, "step": 9804 }, { "epoch": 0.5360633106898296, "grad_norm": 1.7450120449066162, "learning_rate": 9.797177249233592e-06, "loss": 1.4449, "step": 9805 }, { "epoch": 0.5361179831335511, "grad_norm": 1.4943392276763916, "learning_rate": 9.795350271239034e-06, "loss": 1.6117, "step": 9806 }, { "epoch": 0.5361726555772727, "grad_norm": 2.3990674018859863, "learning_rate": 9.79352330007821e-06, "loss": 1.2186, "step": 9807 }, { "epoch": 0.5362273280209943, "grad_norm": 1.509750485420227, "learning_rate": 9.791696335812125e-06, "loss": 1.1124, "step": 9808 }, { "epoch": 0.5362820004647157, "grad_norm": 1.6420652866363525, "learning_rate": 9.789869378501791e-06, "loss": 1.4552, "step": 9809 }, { "epoch": 0.5363366729084373, "grad_norm": 1.337786316871643, "learning_rate": 9.788042428208211e-06, "loss": 1.4982, "step": 9810 }, { "epoch": 0.5363913453521589, "grad_norm": 1.4822345972061157, "learning_rate": 9.786215484992387e-06, "loss": 1.4576, "step": 9811 }, { "epoch": 0.5364460177958804, "grad_norm": 1.4234586954116821, "learning_rate": 9.784388548915334e-06, "loss": 1.2987, "step": 9812 }, { "epoch": 0.536500690239602, "grad_norm": 1.5970689058303833, "learning_rate": 9.782561620038055e-06, "loss": 1.4494, "step": 9813 }, { "epoch": 0.5365553626833235, "grad_norm": 2.234164237976074, "learning_rate": 9.78073469842155e-06, "loss": 1.5792, "step": 9814 }, { "epoch": 0.5366100351270451, "grad_norm": 1.5450769662857056, "learning_rate": 9.77890778412683e-06, "loss": 1.2775, "step": 9815 }, { "epoch": 0.5366647075707667, "grad_norm": 1.451454758644104, "learning_rate": 9.777080877214895e-06, "loss": 1.3896, "step": 9816 }, { "epoch": 0.5367193800144882, "grad_norm": 1.5614449977874756, "learning_rate": 9.775253977746756e-06, "loss": 1.3753, "step": 9817 }, { "epoch": 0.5367740524582097, "grad_norm": 1.452893614768982, "learning_rate": 9.773427085783413e-06, "loss": 1.4072, "step": 9818 }, { "epoch": 0.5368287249019313, "grad_norm": 1.4917432069778442, "learning_rate": 9.771600201385868e-06, "loss": 1.3684, "step": 9819 }, { "epoch": 0.5368833973456528, "grad_norm": 1.5772935152053833, "learning_rate": 9.769773324615133e-06, "loss": 1.3063, "step": 9820 }, { "epoch": 0.5369380697893744, "grad_norm": 1.6565786600112915, "learning_rate": 9.767946455532207e-06, "loss": 1.739, "step": 9821 }, { "epoch": 0.536992742233096, "grad_norm": 1.4510024785995483, "learning_rate": 9.76611959419809e-06, "loss": 1.3832, "step": 9822 }, { "epoch": 0.5370474146768175, "grad_norm": 1.4949535131454468, "learning_rate": 9.764292740673792e-06, "loss": 1.2175, "step": 9823 }, { "epoch": 0.5371020871205391, "grad_norm": 1.6923667192459106, "learning_rate": 9.762465895020312e-06, "loss": 1.3505, "step": 9824 }, { "epoch": 0.5371567595642607, "grad_norm": 1.4029161930084229, "learning_rate": 9.76063905729865e-06, "loss": 1.4972, "step": 9825 }, { "epoch": 0.5372114320079822, "grad_norm": 1.5489044189453125, "learning_rate": 9.758812227569813e-06, "loss": 1.3765, "step": 9826 }, { "epoch": 0.5372661044517038, "grad_norm": 1.5786219835281372, "learning_rate": 9.756985405894802e-06, "loss": 1.3492, "step": 9827 }, { "epoch": 0.5373207768954252, "grad_norm": 1.4788380861282349, "learning_rate": 9.755158592334619e-06, "loss": 1.5556, "step": 9828 }, { "epoch": 0.5373754493391468, "grad_norm": 1.3231596946716309, "learning_rate": 9.753331786950266e-06, "loss": 1.8549, "step": 9829 }, { "epoch": 0.5374301217828684, "grad_norm": 1.197874903678894, "learning_rate": 9.75150498980274e-06, "loss": 1.6654, "step": 9830 }, { "epoch": 0.5374847942265899, "grad_norm": 1.3249841928482056, "learning_rate": 9.749678200953048e-06, "loss": 1.3543, "step": 9831 }, { "epoch": 0.5375394666703115, "grad_norm": 1.3409864902496338, "learning_rate": 9.74785142046219e-06, "loss": 1.3866, "step": 9832 }, { "epoch": 0.5375941391140331, "grad_norm": 1.4723291397094727, "learning_rate": 9.746024648391162e-06, "loss": 1.6454, "step": 9833 }, { "epoch": 0.5376488115577546, "grad_norm": 1.4267302751541138, "learning_rate": 9.744197884800968e-06, "loss": 1.5176, "step": 9834 }, { "epoch": 0.5377034840014762, "grad_norm": 1.6560286283493042, "learning_rate": 9.742371129752607e-06, "loss": 1.1254, "step": 9835 }, { "epoch": 0.5377581564451978, "grad_norm": 2.047051429748535, "learning_rate": 9.740544383307077e-06, "loss": 1.4288, "step": 9836 }, { "epoch": 0.5378128288889192, "grad_norm": 1.3211967945098877, "learning_rate": 9.738717645525381e-06, "loss": 1.3429, "step": 9837 }, { "epoch": 0.5378675013326408, "grad_norm": 1.3041101694107056, "learning_rate": 9.736890916468515e-06, "loss": 1.5068, "step": 9838 }, { "epoch": 0.5379221737763624, "grad_norm": 1.3816756010055542, "learning_rate": 9.735064196197477e-06, "loss": 1.4432, "step": 9839 }, { "epoch": 0.5379768462200839, "grad_norm": 1.4933679103851318, "learning_rate": 9.73323748477327e-06, "loss": 1.5614, "step": 9840 }, { "epoch": 0.5380315186638055, "grad_norm": 1.508699893951416, "learning_rate": 9.731410782256889e-06, "loss": 1.6213, "step": 9841 }, { "epoch": 0.538086191107527, "grad_norm": 1.6822915077209473, "learning_rate": 9.72958408870933e-06, "loss": 1.2861, "step": 9842 }, { "epoch": 0.5381408635512486, "grad_norm": 1.434260606765747, "learning_rate": 9.727757404191596e-06, "loss": 1.4367, "step": 9843 }, { "epoch": 0.5381955359949702, "grad_norm": 2.377894163131714, "learning_rate": 9.725930728764676e-06, "loss": 1.6526, "step": 9844 }, { "epoch": 0.5382502084386916, "grad_norm": 1.573715329170227, "learning_rate": 9.724104062489576e-06, "loss": 1.4638, "step": 9845 }, { "epoch": 0.5383048808824132, "grad_norm": 1.8765486478805542, "learning_rate": 9.722277405427291e-06, "loss": 1.4169, "step": 9846 }, { "epoch": 0.5383595533261348, "grad_norm": 1.34388267993927, "learning_rate": 9.72045075763881e-06, "loss": 1.4886, "step": 9847 }, { "epoch": 0.5384142257698563, "grad_norm": 1.3805263042449951, "learning_rate": 9.718624119185138e-06, "loss": 1.4498, "step": 9848 }, { "epoch": 0.5384688982135779, "grad_norm": 1.7152104377746582, "learning_rate": 9.716797490127268e-06, "loss": 1.1772, "step": 9849 }, { "epoch": 0.5385235706572995, "grad_norm": 1.5781632661819458, "learning_rate": 9.714970870526188e-06, "loss": 1.4749, "step": 9850 }, { "epoch": 0.538578243101021, "grad_norm": 1.5223536491394043, "learning_rate": 9.713144260442904e-06, "loss": 1.4598, "step": 9851 }, { "epoch": 0.5386329155447426, "grad_norm": 1.5191102027893066, "learning_rate": 9.711317659938407e-06, "loss": 1.3724, "step": 9852 }, { "epoch": 0.5386875879884642, "grad_norm": 1.7876315116882324, "learning_rate": 9.709491069073688e-06, "loss": 1.4249, "step": 9853 }, { "epoch": 0.5387422604321856, "grad_norm": 2.1374263763427734, "learning_rate": 9.707664487909746e-06, "loss": 1.1378, "step": 9854 }, { "epoch": 0.5387969328759072, "grad_norm": 1.4911675453186035, "learning_rate": 9.705837916507575e-06, "loss": 1.4967, "step": 9855 }, { "epoch": 0.5388516053196287, "grad_norm": 1.428987979888916, "learning_rate": 9.70401135492816e-06, "loss": 1.673, "step": 9856 }, { "epoch": 0.5389062777633503, "grad_norm": 1.6132344007492065, "learning_rate": 9.702184803232506e-06, "loss": 1.4771, "step": 9857 }, { "epoch": 0.5389609502070719, "grad_norm": 1.5478363037109375, "learning_rate": 9.700358261481593e-06, "loss": 1.4938, "step": 9858 }, { "epoch": 0.5390156226507934, "grad_norm": 1.3240827322006226, "learning_rate": 9.698531729736426e-06, "loss": 1.5057, "step": 9859 }, { "epoch": 0.539070295094515, "grad_norm": 1.6529899835586548, "learning_rate": 9.696705208057994e-06, "loss": 1.5967, "step": 9860 }, { "epoch": 0.5391249675382366, "grad_norm": 1.8803037405014038, "learning_rate": 9.694878696507284e-06, "loss": 1.3083, "step": 9861 }, { "epoch": 0.5391796399819581, "grad_norm": 1.5553568601608276, "learning_rate": 9.693052195145292e-06, "loss": 1.2553, "step": 9862 }, { "epoch": 0.5392343124256797, "grad_norm": 1.0783908367156982, "learning_rate": 9.691225704033008e-06, "loss": 1.5115, "step": 9863 }, { "epoch": 0.5392889848694012, "grad_norm": 1.250562310218811, "learning_rate": 9.689399223231416e-06, "loss": 1.2769, "step": 9864 }, { "epoch": 0.5393436573131227, "grad_norm": 1.594657063484192, "learning_rate": 9.68757275280152e-06, "loss": 1.3199, "step": 9865 }, { "epoch": 0.5393983297568443, "grad_norm": 1.407688856124878, "learning_rate": 9.685746292804301e-06, "loss": 1.2307, "step": 9866 }, { "epoch": 0.5394530022005659, "grad_norm": 1.3710402250289917, "learning_rate": 9.683919843300748e-06, "loss": 1.2715, "step": 9867 }, { "epoch": 0.5395076746442874, "grad_norm": 1.3957618474960327, "learning_rate": 9.682093404351856e-06, "loss": 1.3936, "step": 9868 }, { "epoch": 0.539562347088009, "grad_norm": 1.5201544761657715, "learning_rate": 9.680266976018613e-06, "loss": 1.3987, "step": 9869 }, { "epoch": 0.5396170195317305, "grad_norm": 1.2562001943588257, "learning_rate": 9.678440558362e-06, "loss": 1.4544, "step": 9870 }, { "epoch": 0.5396716919754521, "grad_norm": 1.1428720951080322, "learning_rate": 9.676614151443016e-06, "loss": 1.5257, "step": 9871 }, { "epoch": 0.5397263644191737, "grad_norm": 1.7850896120071411, "learning_rate": 9.67478775532264e-06, "loss": 1.3742, "step": 9872 }, { "epoch": 0.5397810368628951, "grad_norm": 1.4478007555007935, "learning_rate": 9.67296137006187e-06, "loss": 1.2277, "step": 9873 }, { "epoch": 0.5398357093066167, "grad_norm": 1.9346799850463867, "learning_rate": 9.671134995721684e-06, "loss": 1.4574, "step": 9874 }, { "epoch": 0.5398903817503383, "grad_norm": 1.4656389951705933, "learning_rate": 9.66930863236307e-06, "loss": 1.3274, "step": 9875 }, { "epoch": 0.5399450541940598, "grad_norm": 1.4649381637573242, "learning_rate": 9.66748228004702e-06, "loss": 1.4504, "step": 9876 }, { "epoch": 0.5399997266377814, "grad_norm": 1.5334126949310303, "learning_rate": 9.665655938834519e-06, "loss": 1.3501, "step": 9877 }, { "epoch": 0.540054399081503, "grad_norm": 1.9911980628967285, "learning_rate": 9.663829608786543e-06, "loss": 1.2123, "step": 9878 }, { "epoch": 0.5401090715252245, "grad_norm": 1.544268012046814, "learning_rate": 9.662003289964092e-06, "loss": 1.5097, "step": 9879 }, { "epoch": 0.5401637439689461, "grad_norm": 1.7107547521591187, "learning_rate": 9.660176982428144e-06, "loss": 1.3462, "step": 9880 }, { "epoch": 0.5402184164126677, "grad_norm": 1.3666619062423706, "learning_rate": 9.658350686239682e-06, "loss": 1.4348, "step": 9881 }, { "epoch": 0.5402730888563891, "grad_norm": 2.3566343784332275, "learning_rate": 9.656524401459692e-06, "loss": 1.4277, "step": 9882 }, { "epoch": 0.5403277613001107, "grad_norm": 1.4208495616912842, "learning_rate": 9.654698128149162e-06, "loss": 1.5405, "step": 9883 }, { "epoch": 0.5403824337438322, "grad_norm": 1.5807991027832031, "learning_rate": 9.652871866369064e-06, "loss": 1.4592, "step": 9884 }, { "epoch": 0.5404371061875538, "grad_norm": 1.8275984525680542, "learning_rate": 9.651045616180395e-06, "loss": 1.3087, "step": 9885 }, { "epoch": 0.5404917786312754, "grad_norm": 1.2725706100463867, "learning_rate": 9.64921937764413e-06, "loss": 1.4081, "step": 9886 }, { "epoch": 0.5405464510749969, "grad_norm": 1.4574005603790283, "learning_rate": 9.647393150821254e-06, "loss": 1.4795, "step": 9887 }, { "epoch": 0.5406011235187185, "grad_norm": 1.5843873023986816, "learning_rate": 9.645566935772749e-06, "loss": 1.4091, "step": 9888 }, { "epoch": 0.5406557959624401, "grad_norm": 1.6051558256149292, "learning_rate": 9.64374073255959e-06, "loss": 1.4577, "step": 9889 }, { "epoch": 0.5407104684061615, "grad_norm": 1.6648906469345093, "learning_rate": 9.64191454124277e-06, "loss": 1.3245, "step": 9890 }, { "epoch": 0.5407651408498831, "grad_norm": 1.4574885368347168, "learning_rate": 9.640088361883263e-06, "loss": 1.2708, "step": 9891 }, { "epoch": 0.5408198132936047, "grad_norm": 1.4617245197296143, "learning_rate": 9.638262194542048e-06, "loss": 1.6307, "step": 9892 }, { "epoch": 0.5408744857373262, "grad_norm": 1.9879637956619263, "learning_rate": 9.636436039280111e-06, "loss": 1.3764, "step": 9893 }, { "epoch": 0.5409291581810478, "grad_norm": 2.124099016189575, "learning_rate": 9.634609896158426e-06, "loss": 1.4369, "step": 9894 }, { "epoch": 0.5409838306247694, "grad_norm": 2.2465176582336426, "learning_rate": 9.632783765237968e-06, "loss": 1.2616, "step": 9895 }, { "epoch": 0.5410385030684909, "grad_norm": 1.4899688959121704, "learning_rate": 9.63095764657973e-06, "loss": 1.4136, "step": 9896 }, { "epoch": 0.5410931755122125, "grad_norm": 1.322479486465454, "learning_rate": 9.62913154024468e-06, "loss": 1.315, "step": 9897 }, { "epoch": 0.541147847955934, "grad_norm": 1.544257402420044, "learning_rate": 9.627305446293793e-06, "loss": 1.5111, "step": 9898 }, { "epoch": 0.5412025203996556, "grad_norm": 1.3395893573760986, "learning_rate": 9.625479364788058e-06, "loss": 1.4319, "step": 9899 }, { "epoch": 0.5412571928433771, "grad_norm": 1.8727455139160156, "learning_rate": 9.623653295788442e-06, "loss": 1.7505, "step": 9900 }, { "epoch": 0.5413118652870986, "grad_norm": 1.324205756187439, "learning_rate": 9.62182723935593e-06, "loss": 1.5754, "step": 9901 }, { "epoch": 0.5413665377308202, "grad_norm": 1.193849802017212, "learning_rate": 9.62000119555149e-06, "loss": 1.6363, "step": 9902 }, { "epoch": 0.5414212101745418, "grad_norm": 1.4246931076049805, "learning_rate": 9.6181751644361e-06, "loss": 1.3853, "step": 9903 }, { "epoch": 0.5414758826182633, "grad_norm": 1.526654601097107, "learning_rate": 9.61634914607074e-06, "loss": 1.3406, "step": 9904 }, { "epoch": 0.5415305550619849, "grad_norm": 2.5304365158081055, "learning_rate": 9.614523140516385e-06, "loss": 1.1963, "step": 9905 }, { "epoch": 0.5415852275057065, "grad_norm": 1.5788021087646484, "learning_rate": 9.612697147834004e-06, "loss": 1.514, "step": 9906 }, { "epoch": 0.541639899949428, "grad_norm": 1.551209807395935, "learning_rate": 9.610871168084575e-06, "loss": 1.4025, "step": 9907 }, { "epoch": 0.5416945723931496, "grad_norm": 1.4974050521850586, "learning_rate": 9.609045201329071e-06, "loss": 1.4798, "step": 9908 }, { "epoch": 0.5417492448368711, "grad_norm": 1.8283594846725464, "learning_rate": 9.607219247628461e-06, "loss": 1.3311, "step": 9909 }, { "epoch": 0.5418039172805926, "grad_norm": 1.4950106143951416, "learning_rate": 9.605393307043726e-06, "loss": 1.5059, "step": 9910 }, { "epoch": 0.5418585897243142, "grad_norm": 1.7032612562179565, "learning_rate": 9.603567379635836e-06, "loss": 1.6029, "step": 9911 }, { "epoch": 0.5419132621680357, "grad_norm": 1.6624298095703125, "learning_rate": 9.601741465465759e-06, "loss": 1.3943, "step": 9912 }, { "epoch": 0.5419679346117573, "grad_norm": 1.2986587285995483, "learning_rate": 9.59991556459447e-06, "loss": 1.5262, "step": 9913 }, { "epoch": 0.5420226070554789, "grad_norm": 3.0173492431640625, "learning_rate": 9.598089677082934e-06, "loss": 1.3305, "step": 9914 }, { "epoch": 0.5420772794992004, "grad_norm": 1.2781615257263184, "learning_rate": 9.596263802992134e-06, "loss": 1.4773, "step": 9915 }, { "epoch": 0.542131951942922, "grad_norm": 1.5099111795425415, "learning_rate": 9.59443794238303e-06, "loss": 1.3673, "step": 9916 }, { "epoch": 0.5421866243866436, "grad_norm": 1.497973084449768, "learning_rate": 9.592612095316592e-06, "loss": 1.4275, "step": 9917 }, { "epoch": 0.542241296830365, "grad_norm": 1.507935643196106, "learning_rate": 9.590786261853798e-06, "loss": 1.3806, "step": 9918 }, { "epoch": 0.5422959692740866, "grad_norm": 1.6466929912567139, "learning_rate": 9.588960442055609e-06, "loss": 1.4953, "step": 9919 }, { "epoch": 0.5423506417178082, "grad_norm": 1.523329496383667, "learning_rate": 9.587134635982992e-06, "loss": 1.1936, "step": 9920 }, { "epoch": 0.5424053141615297, "grad_norm": 1.5960719585418701, "learning_rate": 9.585308843696923e-06, "loss": 1.3723, "step": 9921 }, { "epoch": 0.5424599866052513, "grad_norm": 2.2773749828338623, "learning_rate": 9.583483065258363e-06, "loss": 1.3166, "step": 9922 }, { "epoch": 0.5425146590489729, "grad_norm": 1.264003038406372, "learning_rate": 9.581657300728278e-06, "loss": 1.614, "step": 9923 }, { "epoch": 0.5425693314926944, "grad_norm": 1.473344087600708, "learning_rate": 9.57983155016764e-06, "loss": 1.2111, "step": 9924 }, { "epoch": 0.542624003936416, "grad_norm": 1.3328425884246826, "learning_rate": 9.578005813637414e-06, "loss": 1.4326, "step": 9925 }, { "epoch": 0.5426786763801374, "grad_norm": 1.5272618532180786, "learning_rate": 9.576180091198562e-06, "loss": 1.3649, "step": 9926 }, { "epoch": 0.542733348823859, "grad_norm": 1.8659685850143433, "learning_rate": 9.574354382912052e-06, "loss": 1.2678, "step": 9927 }, { "epoch": 0.5427880212675806, "grad_norm": 1.018336296081543, "learning_rate": 9.572528688838845e-06, "loss": 1.5842, "step": 9928 }, { "epoch": 0.5428426937113021, "grad_norm": 1.4435795545578003, "learning_rate": 9.570703009039911e-06, "loss": 1.1499, "step": 9929 }, { "epoch": 0.5428973661550237, "grad_norm": 1.4860570430755615, "learning_rate": 9.568877343576212e-06, "loss": 1.2118, "step": 9930 }, { "epoch": 0.5429520385987453, "grad_norm": 1.7399110794067383, "learning_rate": 9.56705169250871e-06, "loss": 1.4444, "step": 9931 }, { "epoch": 0.5430067110424668, "grad_norm": 1.721189022064209, "learning_rate": 9.565226055898366e-06, "loss": 1.4793, "step": 9932 }, { "epoch": 0.5430613834861884, "grad_norm": 1.8070369958877563, "learning_rate": 9.563400433806147e-06, "loss": 1.2023, "step": 9933 }, { "epoch": 0.54311605592991, "grad_norm": 1.943055272102356, "learning_rate": 9.561574826293006e-06, "loss": 1.1674, "step": 9934 }, { "epoch": 0.5431707283736315, "grad_norm": 1.896438717842102, "learning_rate": 9.559749233419915e-06, "loss": 1.4095, "step": 9935 }, { "epoch": 0.543225400817353, "grad_norm": 1.4076193571090698, "learning_rate": 9.557923655247832e-06, "loss": 1.2336, "step": 9936 }, { "epoch": 0.5432800732610746, "grad_norm": 1.4404717683792114, "learning_rate": 9.55609809183771e-06, "loss": 1.1827, "step": 9937 }, { "epoch": 0.5433347457047961, "grad_norm": 1.2150214910507202, "learning_rate": 9.554272543250518e-06, "loss": 1.4376, "step": 9938 }, { "epoch": 0.5433894181485177, "grad_norm": 1.552610158920288, "learning_rate": 9.552447009547214e-06, "loss": 1.5448, "step": 9939 }, { "epoch": 0.5434440905922392, "grad_norm": 1.6189756393432617, "learning_rate": 9.550621490788749e-06, "loss": 1.2506, "step": 9940 }, { "epoch": 0.5434987630359608, "grad_norm": 2.3189449310302734, "learning_rate": 9.548795987036091e-06, "loss": 1.1408, "step": 9941 }, { "epoch": 0.5435534354796824, "grad_norm": 1.9888997077941895, "learning_rate": 9.54697049835019e-06, "loss": 1.3973, "step": 9942 }, { "epoch": 0.5436081079234039, "grad_norm": 1.3091098070144653, "learning_rate": 9.545145024792012e-06, "loss": 1.5757, "step": 9943 }, { "epoch": 0.5436627803671255, "grad_norm": 1.6543636322021484, "learning_rate": 9.543319566422507e-06, "loss": 1.1422, "step": 9944 }, { "epoch": 0.543717452810847, "grad_norm": 1.4605592489242554, "learning_rate": 9.541494123302632e-06, "loss": 1.4184, "step": 9945 }, { "epoch": 0.5437721252545685, "grad_norm": 1.4642102718353271, "learning_rate": 9.539668695493344e-06, "loss": 1.3056, "step": 9946 }, { "epoch": 0.5438267976982901, "grad_norm": 1.8370863199234009, "learning_rate": 9.537843283055602e-06, "loss": 1.4075, "step": 9947 }, { "epoch": 0.5438814701420117, "grad_norm": 1.513370156288147, "learning_rate": 9.536017886050352e-06, "loss": 1.4152, "step": 9948 }, { "epoch": 0.5439361425857332, "grad_norm": 1.5071896314620972, "learning_rate": 9.534192504538557e-06, "loss": 1.4601, "step": 9949 }, { "epoch": 0.5439908150294548, "grad_norm": 1.379056453704834, "learning_rate": 9.532367138581168e-06, "loss": 1.3701, "step": 9950 }, { "epoch": 0.5440454874731764, "grad_norm": 1.353258728981018, "learning_rate": 9.530541788239135e-06, "loss": 1.6054, "step": 9951 }, { "epoch": 0.5441001599168979, "grad_norm": 1.8500421047210693, "learning_rate": 9.528716453573415e-06, "loss": 1.4091, "step": 9952 }, { "epoch": 0.5441548323606195, "grad_norm": 1.5832579135894775, "learning_rate": 9.52689113464496e-06, "loss": 1.2839, "step": 9953 }, { "epoch": 0.5442095048043409, "grad_norm": 2.0613808631896973, "learning_rate": 9.525065831514715e-06, "loss": 1.4247, "step": 9954 }, { "epoch": 0.5442641772480625, "grad_norm": 1.4666798114776611, "learning_rate": 9.52324054424364e-06, "loss": 1.4257, "step": 9955 }, { "epoch": 0.5443188496917841, "grad_norm": 1.8645620346069336, "learning_rate": 9.521415272892678e-06, "loss": 1.4238, "step": 9956 }, { "epoch": 0.5443735221355056, "grad_norm": 1.2524358034133911, "learning_rate": 9.519590017522788e-06, "loss": 1.4632, "step": 9957 }, { "epoch": 0.5444281945792272, "grad_norm": 1.6962310075759888, "learning_rate": 9.517764778194915e-06, "loss": 1.3056, "step": 9958 }, { "epoch": 0.5444828670229488, "grad_norm": 1.186475157737732, "learning_rate": 9.515939554970005e-06, "loss": 1.3178, "step": 9959 }, { "epoch": 0.5445375394666703, "grad_norm": 1.336824893951416, "learning_rate": 9.514114347909011e-06, "loss": 1.2252, "step": 9960 }, { "epoch": 0.5445922119103919, "grad_norm": 1.5716705322265625, "learning_rate": 9.512289157072879e-06, "loss": 1.284, "step": 9961 }, { "epoch": 0.5446468843541135, "grad_norm": 1.4743300676345825, "learning_rate": 9.510463982522554e-06, "loss": 1.5129, "step": 9962 }, { "epoch": 0.5447015567978349, "grad_norm": 1.5965157747268677, "learning_rate": 9.508638824318988e-06, "loss": 1.3862, "step": 9963 }, { "epoch": 0.5447562292415565, "grad_norm": 1.4063388109207153, "learning_rate": 9.506813682523124e-06, "loss": 1.4948, "step": 9964 }, { "epoch": 0.5448109016852781, "grad_norm": 1.4901697635650635, "learning_rate": 9.504988557195906e-06, "loss": 1.5528, "step": 9965 }, { "epoch": 0.5448655741289996, "grad_norm": 1.5240486860275269, "learning_rate": 9.503163448398286e-06, "loss": 1.3166, "step": 9966 }, { "epoch": 0.5449202465727212, "grad_norm": 1.433523416519165, "learning_rate": 9.501338356191204e-06, "loss": 1.2953, "step": 9967 }, { "epoch": 0.5449749190164427, "grad_norm": 1.649702787399292, "learning_rate": 9.499513280635598e-06, "loss": 1.2061, "step": 9968 }, { "epoch": 0.5450295914601643, "grad_norm": 1.3957504034042358, "learning_rate": 9.497688221792424e-06, "loss": 1.6532, "step": 9969 }, { "epoch": 0.5450842639038859, "grad_norm": 1.357051134109497, "learning_rate": 9.495863179722616e-06, "loss": 1.4607, "step": 9970 }, { "epoch": 0.5451389363476074, "grad_norm": 1.2396795749664307, "learning_rate": 9.494038154487124e-06, "loss": 1.5492, "step": 9971 }, { "epoch": 0.5451936087913289, "grad_norm": 1.5666626691818237, "learning_rate": 9.492213146146883e-06, "loss": 1.4099, "step": 9972 }, { "epoch": 0.5452482812350505, "grad_norm": 1.2915799617767334, "learning_rate": 9.490388154762832e-06, "loss": 1.273, "step": 9973 }, { "epoch": 0.545302953678772, "grad_norm": 1.5484051704406738, "learning_rate": 9.488563180395922e-06, "loss": 1.2867, "step": 9974 }, { "epoch": 0.5453576261224936, "grad_norm": 1.345611572265625, "learning_rate": 9.486738223107087e-06, "loss": 1.4928, "step": 9975 }, { "epoch": 0.5454122985662152, "grad_norm": 1.6808550357818604, "learning_rate": 9.484913282957262e-06, "loss": 1.1707, "step": 9976 }, { "epoch": 0.5454669710099367, "grad_norm": 1.5499732494354248, "learning_rate": 9.483088360007396e-06, "loss": 1.2744, "step": 9977 }, { "epoch": 0.5455216434536583, "grad_norm": 1.2327936887741089, "learning_rate": 9.481263454318423e-06, "loss": 1.459, "step": 9978 }, { "epoch": 0.5455763158973799, "grad_norm": 1.9539984464645386, "learning_rate": 9.479438565951278e-06, "loss": 1.3258, "step": 9979 }, { "epoch": 0.5456309883411014, "grad_norm": 1.5690593719482422, "learning_rate": 9.477613694966902e-06, "loss": 1.392, "step": 9980 }, { "epoch": 0.545685660784823, "grad_norm": 1.3977174758911133, "learning_rate": 9.475788841426232e-06, "loss": 1.4185, "step": 9981 }, { "epoch": 0.5457403332285444, "grad_norm": 1.6894464492797852, "learning_rate": 9.473964005390198e-06, "loss": 1.2503, "step": 9982 }, { "epoch": 0.545795005672266, "grad_norm": 1.8959236145019531, "learning_rate": 9.472139186919745e-06, "loss": 1.4563, "step": 9983 }, { "epoch": 0.5458496781159876, "grad_norm": 1.6685391664505005, "learning_rate": 9.470314386075801e-06, "loss": 1.3374, "step": 9984 }, { "epoch": 0.5459043505597091, "grad_norm": 1.1047974824905396, "learning_rate": 9.468489602919305e-06, "loss": 1.601, "step": 9985 }, { "epoch": 0.5459590230034307, "grad_norm": 1.744225263595581, "learning_rate": 9.466664837511188e-06, "loss": 1.5088, "step": 9986 }, { "epoch": 0.5460136954471523, "grad_norm": 1.5570628643035889, "learning_rate": 9.464840089912379e-06, "loss": 1.5996, "step": 9987 }, { "epoch": 0.5460683678908738, "grad_norm": 1.5924829244613647, "learning_rate": 9.463015360183819e-06, "loss": 1.2345, "step": 9988 }, { "epoch": 0.5461230403345954, "grad_norm": 1.5577725172042847, "learning_rate": 9.461190648386436e-06, "loss": 1.5644, "step": 9989 }, { "epoch": 0.546177712778317, "grad_norm": 1.4810614585876465, "learning_rate": 9.459365954581162e-06, "loss": 1.5011, "step": 9990 }, { "epoch": 0.5462323852220384, "grad_norm": 1.2347406148910522, "learning_rate": 9.457541278828927e-06, "loss": 1.5137, "step": 9991 }, { "epoch": 0.54628705766576, "grad_norm": 1.4274837970733643, "learning_rate": 9.455716621190662e-06, "loss": 1.5878, "step": 9992 }, { "epoch": 0.5463417301094816, "grad_norm": 1.3600826263427734, "learning_rate": 9.453891981727293e-06, "loss": 1.544, "step": 9993 }, { "epoch": 0.5463964025532031, "grad_norm": 1.5705115795135498, "learning_rate": 9.452067360499755e-06, "loss": 1.1787, "step": 9994 }, { "epoch": 0.5464510749969247, "grad_norm": 1.667967438697815, "learning_rate": 9.450242757568975e-06, "loss": 1.4658, "step": 9995 }, { "epoch": 0.5465057474406463, "grad_norm": 1.4724180698394775, "learning_rate": 9.448418172995875e-06, "loss": 1.3494, "step": 9996 }, { "epoch": 0.5465604198843678, "grad_norm": 1.4509164094924927, "learning_rate": 9.44659360684139e-06, "loss": 1.3545, "step": 9997 }, { "epoch": 0.5466150923280894, "grad_norm": 1.5070143938064575, "learning_rate": 9.44476905916644e-06, "loss": 1.5208, "step": 9998 }, { "epoch": 0.5466697647718108, "grad_norm": 1.4162653684616089, "learning_rate": 9.442944530031957e-06, "loss": 1.49, "step": 9999 }, { "epoch": 0.5467244372155324, "grad_norm": 1.5181773900985718, "learning_rate": 9.441120019498864e-06, "loss": 1.4643, "step": 10000 }, { "epoch": 0.546779109659254, "grad_norm": 1.256861686706543, "learning_rate": 9.439295527628083e-06, "loss": 1.4839, "step": 10001 }, { "epoch": 0.5468337821029755, "grad_norm": 1.4103342294692993, "learning_rate": 9.43747105448054e-06, "loss": 1.5395, "step": 10002 }, { "epoch": 0.5468884545466971, "grad_norm": 1.322877049446106, "learning_rate": 9.43564660011716e-06, "loss": 1.4665, "step": 10003 }, { "epoch": 0.5469431269904187, "grad_norm": 2.314699411392212, "learning_rate": 9.433822164598862e-06, "loss": 1.5076, "step": 10004 }, { "epoch": 0.5469977994341402, "grad_norm": 1.6307332515716553, "learning_rate": 9.431997747986575e-06, "loss": 1.3002, "step": 10005 }, { "epoch": 0.5470524718778618, "grad_norm": 1.9734472036361694, "learning_rate": 9.430173350341214e-06, "loss": 1.2492, "step": 10006 }, { "epoch": 0.5471071443215834, "grad_norm": 1.4094330072402954, "learning_rate": 9.428348971723697e-06, "loss": 1.643, "step": 10007 }, { "epoch": 0.5471618167653048, "grad_norm": 1.4776906967163086, "learning_rate": 9.426524612194954e-06, "loss": 1.2466, "step": 10008 }, { "epoch": 0.5472164892090264, "grad_norm": 1.5767475366592407, "learning_rate": 9.424700271815901e-06, "loss": 1.1239, "step": 10009 }, { "epoch": 0.547271161652748, "grad_norm": 1.4670286178588867, "learning_rate": 9.422875950647453e-06, "loss": 1.5212, "step": 10010 }, { "epoch": 0.5473258340964695, "grad_norm": 1.6395686864852905, "learning_rate": 9.421051648750533e-06, "loss": 1.3305, "step": 10011 }, { "epoch": 0.5473805065401911, "grad_norm": 2.213823080062866, "learning_rate": 9.419227366186058e-06, "loss": 1.4451, "step": 10012 }, { "epoch": 0.5474351789839126, "grad_norm": 1.5433188676834106, "learning_rate": 9.41740310301494e-06, "loss": 1.0618, "step": 10013 }, { "epoch": 0.5474898514276342, "grad_norm": 1.5822430849075317, "learning_rate": 9.415578859298103e-06, "loss": 1.3917, "step": 10014 }, { "epoch": 0.5475445238713558, "grad_norm": 1.7785253524780273, "learning_rate": 9.413754635096454e-06, "loss": 1.2403, "step": 10015 }, { "epoch": 0.5475991963150773, "grad_norm": 1.3325870037078857, "learning_rate": 9.41193043047092e-06, "loss": 1.4301, "step": 10016 }, { "epoch": 0.5476538687587988, "grad_norm": 1.224750280380249, "learning_rate": 9.410106245482406e-06, "loss": 1.3906, "step": 10017 }, { "epoch": 0.5477085412025204, "grad_norm": 1.6051747798919678, "learning_rate": 9.408282080191828e-06, "loss": 1.5451, "step": 10018 }, { "epoch": 0.5477632136462419, "grad_norm": 1.3871984481811523, "learning_rate": 9.406457934660103e-06, "loss": 1.6755, "step": 10019 }, { "epoch": 0.5478178860899635, "grad_norm": 1.6815707683563232, "learning_rate": 9.404633808948139e-06, "loss": 1.3661, "step": 10020 }, { "epoch": 0.5478725585336851, "grad_norm": 1.9106128215789795, "learning_rate": 9.402809703116846e-06, "loss": 1.4954, "step": 10021 }, { "epoch": 0.5479272309774066, "grad_norm": 1.5804286003112793, "learning_rate": 9.400985617227141e-06, "loss": 1.6468, "step": 10022 }, { "epoch": 0.5479819034211282, "grad_norm": 2.0450282096862793, "learning_rate": 9.399161551339933e-06, "loss": 1.359, "step": 10023 }, { "epoch": 0.5480365758648498, "grad_norm": 1.6386382579803467, "learning_rate": 9.397337505516129e-06, "loss": 1.5872, "step": 10024 }, { "epoch": 0.5480912483085713, "grad_norm": 1.4463428258895874, "learning_rate": 9.395513479816642e-06, "loss": 1.5931, "step": 10025 }, { "epoch": 0.5481459207522928, "grad_norm": 1.613360047340393, "learning_rate": 9.393689474302378e-06, "loss": 1.477, "step": 10026 }, { "epoch": 0.5482005931960143, "grad_norm": 1.4281346797943115, "learning_rate": 9.39186548903424e-06, "loss": 1.3341, "step": 10027 }, { "epoch": 0.5482552656397359, "grad_norm": 1.4533661603927612, "learning_rate": 9.390041524073146e-06, "loss": 1.3465, "step": 10028 }, { "epoch": 0.5483099380834575, "grad_norm": 1.131889820098877, "learning_rate": 9.388217579479994e-06, "loss": 1.4944, "step": 10029 }, { "epoch": 0.548364610527179, "grad_norm": 1.4762988090515137, "learning_rate": 9.386393655315696e-06, "loss": 1.4448, "step": 10030 }, { "epoch": 0.5484192829709006, "grad_norm": 1.5813089609146118, "learning_rate": 9.384569751641152e-06, "loss": 1.3222, "step": 10031 }, { "epoch": 0.5484739554146222, "grad_norm": 1.7038118839263916, "learning_rate": 9.382745868517263e-06, "loss": 1.4161, "step": 10032 }, { "epoch": 0.5485286278583437, "grad_norm": 1.3799173831939697, "learning_rate": 9.380922006004944e-06, "loss": 1.8433, "step": 10033 }, { "epoch": 0.5485833003020653, "grad_norm": 1.1941550970077515, "learning_rate": 9.379098164165093e-06, "loss": 1.5813, "step": 10034 }, { "epoch": 0.5486379727457869, "grad_norm": 1.539294719696045, "learning_rate": 9.377274343058604e-06, "loss": 1.4023, "step": 10035 }, { "epoch": 0.5486926451895083, "grad_norm": 2.066324472427368, "learning_rate": 9.375450542746393e-06, "loss": 1.4386, "step": 10036 }, { "epoch": 0.5487473176332299, "grad_norm": 1.548478603363037, "learning_rate": 9.373626763289352e-06, "loss": 1.4027, "step": 10037 }, { "epoch": 0.5488019900769515, "grad_norm": 1.3012975454330444, "learning_rate": 9.371803004748383e-06, "loss": 1.4871, "step": 10038 }, { "epoch": 0.548856662520673, "grad_norm": 1.9123218059539795, "learning_rate": 9.369979267184386e-06, "loss": 1.3816, "step": 10039 }, { "epoch": 0.5489113349643946, "grad_norm": 1.49739670753479, "learning_rate": 9.36815555065826e-06, "loss": 1.5273, "step": 10040 }, { "epoch": 0.5489660074081161, "grad_norm": 1.471885085105896, "learning_rate": 9.366331855230898e-06, "loss": 1.3547, "step": 10041 }, { "epoch": 0.5490206798518377, "grad_norm": 1.9259532690048218, "learning_rate": 9.364508180963209e-06, "loss": 1.347, "step": 10042 }, { "epoch": 0.5490753522955593, "grad_norm": 1.6918141841888428, "learning_rate": 9.36268452791608e-06, "loss": 1.1963, "step": 10043 }, { "epoch": 0.5491300247392807, "grad_norm": 1.8074966669082642, "learning_rate": 9.36086089615041e-06, "loss": 1.4425, "step": 10044 }, { "epoch": 0.5491846971830023, "grad_norm": 2.225398302078247, "learning_rate": 9.359037285727097e-06, "loss": 1.4089, "step": 10045 }, { "epoch": 0.5492393696267239, "grad_norm": 1.2204868793487549, "learning_rate": 9.357213696707028e-06, "loss": 1.6145, "step": 10046 }, { "epoch": 0.5492940420704454, "grad_norm": 1.3861629962921143, "learning_rate": 9.355390129151106e-06, "loss": 1.567, "step": 10047 }, { "epoch": 0.549348714514167, "grad_norm": 1.678189992904663, "learning_rate": 9.35356658312022e-06, "loss": 1.4477, "step": 10048 }, { "epoch": 0.5494033869578886, "grad_norm": 1.9954392910003662, "learning_rate": 9.351743058675263e-06, "loss": 1.3662, "step": 10049 }, { "epoch": 0.5494580594016101, "grad_norm": 1.7148315906524658, "learning_rate": 9.349919555877125e-06, "loss": 1.3643, "step": 10050 }, { "epoch": 0.5495127318453317, "grad_norm": 1.512791633605957, "learning_rate": 9.3480960747867e-06, "loss": 1.4947, "step": 10051 }, { "epoch": 0.5495674042890533, "grad_norm": 1.4631537199020386, "learning_rate": 9.346272615464874e-06, "loss": 1.3126, "step": 10052 }, { "epoch": 0.5496220767327747, "grad_norm": 1.3503801822662354, "learning_rate": 9.344449177972541e-06, "loss": 1.4395, "step": 10053 }, { "epoch": 0.5496767491764963, "grad_norm": 1.6292375326156616, "learning_rate": 9.342625762370589e-06, "loss": 1.2693, "step": 10054 }, { "epoch": 0.5497314216202178, "grad_norm": 1.879607915878296, "learning_rate": 9.340802368719904e-06, "loss": 1.3381, "step": 10055 }, { "epoch": 0.5497860940639394, "grad_norm": 2.1613729000091553, "learning_rate": 9.338978997081378e-06, "loss": 1.4501, "step": 10056 }, { "epoch": 0.549840766507661, "grad_norm": 1.4549834728240967, "learning_rate": 9.337155647515888e-06, "loss": 1.3694, "step": 10057 }, { "epoch": 0.5498954389513825, "grad_norm": 1.497646689414978, "learning_rate": 9.335332320084331e-06, "loss": 1.5541, "step": 10058 }, { "epoch": 0.5499501113951041, "grad_norm": 1.3560034036636353, "learning_rate": 9.333509014847589e-06, "loss": 1.6075, "step": 10059 }, { "epoch": 0.5500047838388257, "grad_norm": 1.2498273849487305, "learning_rate": 9.33168573186654e-06, "loss": 1.4351, "step": 10060 }, { "epoch": 0.5500594562825472, "grad_norm": 1.5684984922409058, "learning_rate": 9.329862471202075e-06, "loss": 1.4213, "step": 10061 }, { "epoch": 0.5501141287262687, "grad_norm": 1.6396583318710327, "learning_rate": 9.328039232915076e-06, "loss": 1.3107, "step": 10062 }, { "epoch": 0.5501688011699903, "grad_norm": 1.7991304397583008, "learning_rate": 9.326216017066422e-06, "loss": 1.2708, "step": 10063 }, { "epoch": 0.5502234736137118, "grad_norm": 1.4068553447723389, "learning_rate": 9.324392823717e-06, "loss": 1.621, "step": 10064 }, { "epoch": 0.5502781460574334, "grad_norm": 1.6910595893859863, "learning_rate": 9.322569652927685e-06, "loss": 1.5548, "step": 10065 }, { "epoch": 0.550332818501155, "grad_norm": 1.6533403396606445, "learning_rate": 9.320746504759355e-06, "loss": 1.1984, "step": 10066 }, { "epoch": 0.5503874909448765, "grad_norm": 1.5824724435806274, "learning_rate": 9.318923379272898e-06, "loss": 1.3596, "step": 10067 }, { "epoch": 0.5504421633885981, "grad_norm": 1.9267393350601196, "learning_rate": 9.317100276529187e-06, "loss": 1.4736, "step": 10068 }, { "epoch": 0.5504968358323196, "grad_norm": 1.6097455024719238, "learning_rate": 9.315277196589097e-06, "loss": 1.4853, "step": 10069 }, { "epoch": 0.5505515082760412, "grad_norm": 1.3956305980682373, "learning_rate": 9.313454139513512e-06, "loss": 1.4272, "step": 10070 }, { "epoch": 0.5506061807197628, "grad_norm": 1.1618753671646118, "learning_rate": 9.3116311053633e-06, "loss": 1.4634, "step": 10071 }, { "epoch": 0.5506608531634842, "grad_norm": 1.5056926012039185, "learning_rate": 9.309808094199343e-06, "loss": 1.4464, "step": 10072 }, { "epoch": 0.5507155256072058, "grad_norm": 1.3432388305664062, "learning_rate": 9.307985106082515e-06, "loss": 1.4305, "step": 10073 }, { "epoch": 0.5507701980509274, "grad_norm": 1.5726432800292969, "learning_rate": 9.306162141073687e-06, "loss": 1.4454, "step": 10074 }, { "epoch": 0.5508248704946489, "grad_norm": 2.1810927391052246, "learning_rate": 9.304339199233733e-06, "loss": 1.776, "step": 10075 }, { "epoch": 0.5508795429383705, "grad_norm": 1.8536906242370605, "learning_rate": 9.302516280623526e-06, "loss": 1.2645, "step": 10076 }, { "epoch": 0.5509342153820921, "grad_norm": 1.5571438074111938, "learning_rate": 9.300693385303934e-06, "loss": 1.5338, "step": 10077 }, { "epoch": 0.5509888878258136, "grad_norm": 1.6717411279678345, "learning_rate": 9.298870513335835e-06, "loss": 1.5485, "step": 10078 }, { "epoch": 0.5510435602695352, "grad_norm": 1.6989104747772217, "learning_rate": 9.297047664780093e-06, "loss": 1.639, "step": 10079 }, { "epoch": 0.5510982327132568, "grad_norm": 1.9004132747650146, "learning_rate": 9.295224839697577e-06, "loss": 1.3907, "step": 10080 }, { "epoch": 0.5511529051569782, "grad_norm": 1.5927913188934326, "learning_rate": 9.293402038149161e-06, "loss": 1.5102, "step": 10081 }, { "epoch": 0.5512075776006998, "grad_norm": 1.699158787727356, "learning_rate": 9.29157926019571e-06, "loss": 1.4987, "step": 10082 }, { "epoch": 0.5512622500444213, "grad_norm": 1.4045331478118896, "learning_rate": 9.289756505898085e-06, "loss": 1.5532, "step": 10083 }, { "epoch": 0.5513169224881429, "grad_norm": 1.1855391263961792, "learning_rate": 9.287933775317161e-06, "loss": 1.685, "step": 10084 }, { "epoch": 0.5513715949318645, "grad_norm": 1.7236173152923584, "learning_rate": 9.286111068513794e-06, "loss": 1.5069, "step": 10085 }, { "epoch": 0.551426267375586, "grad_norm": 1.2666501998901367, "learning_rate": 9.284288385548858e-06, "loss": 1.6134, "step": 10086 }, { "epoch": 0.5514809398193076, "grad_norm": 1.7152971029281616, "learning_rate": 9.282465726483214e-06, "loss": 1.3415, "step": 10087 }, { "epoch": 0.5515356122630292, "grad_norm": 1.1885062456130981, "learning_rate": 9.28064309137772e-06, "loss": 1.7539, "step": 10088 }, { "epoch": 0.5515902847067506, "grad_norm": 1.5920050144195557, "learning_rate": 9.278820480293241e-06, "loss": 1.5881, "step": 10089 }, { "epoch": 0.5516449571504722, "grad_norm": 1.2312198877334595, "learning_rate": 9.276997893290641e-06, "loss": 1.5255, "step": 10090 }, { "epoch": 0.5516996295941938, "grad_norm": 1.6983214616775513, "learning_rate": 9.275175330430774e-06, "loss": 1.2006, "step": 10091 }, { "epoch": 0.5517543020379153, "grad_norm": 1.7131965160369873, "learning_rate": 9.273352791774505e-06, "loss": 1.3306, "step": 10092 }, { "epoch": 0.5518089744816369, "grad_norm": 1.5688927173614502, "learning_rate": 9.271530277382695e-06, "loss": 1.2632, "step": 10093 }, { "epoch": 0.5518636469253585, "grad_norm": 1.4790881872177124, "learning_rate": 9.269707787316194e-06, "loss": 1.242, "step": 10094 }, { "epoch": 0.55191831936908, "grad_norm": 1.546454906463623, "learning_rate": 9.267885321635866e-06, "loss": 1.3047, "step": 10095 }, { "epoch": 0.5519729918128016, "grad_norm": 1.1472312211990356, "learning_rate": 9.266062880402566e-06, "loss": 1.4741, "step": 10096 }, { "epoch": 0.552027664256523, "grad_norm": 1.7257252931594849, "learning_rate": 9.264240463677143e-06, "loss": 1.256, "step": 10097 }, { "epoch": 0.5520823367002446, "grad_norm": 1.6860508918762207, "learning_rate": 9.262418071520464e-06, "loss": 1.5572, "step": 10098 }, { "epoch": 0.5521370091439662, "grad_norm": 1.7395687103271484, "learning_rate": 9.26059570399337e-06, "loss": 1.3531, "step": 10099 }, { "epoch": 0.5521916815876877, "grad_norm": 1.3156652450561523, "learning_rate": 9.258773361156725e-06, "loss": 1.5788, "step": 10100 }, { "epoch": 0.5522463540314093, "grad_norm": 1.646719217300415, "learning_rate": 9.256951043071379e-06, "loss": 1.4711, "step": 10101 }, { "epoch": 0.5523010264751309, "grad_norm": 1.7242763042449951, "learning_rate": 9.255128749798177e-06, "loss": 1.133, "step": 10102 }, { "epoch": 0.5523556989188524, "grad_norm": 1.7076035737991333, "learning_rate": 9.253306481397975e-06, "loss": 1.2643, "step": 10103 }, { "epoch": 0.552410371362574, "grad_norm": 1.7078754901885986, "learning_rate": 9.251484237931625e-06, "loss": 1.3847, "step": 10104 }, { "epoch": 0.5524650438062956, "grad_norm": 1.7568941116333008, "learning_rate": 9.249662019459967e-06, "loss": 1.1674, "step": 10105 }, { "epoch": 0.5525197162500171, "grad_norm": 1.3816266059875488, "learning_rate": 9.247839826043859e-06, "loss": 1.3813, "step": 10106 }, { "epoch": 0.5525743886937386, "grad_norm": 1.7310584783554077, "learning_rate": 9.246017657744142e-06, "loss": 1.3742, "step": 10107 }, { "epoch": 0.5526290611374602, "grad_norm": 1.4482015371322632, "learning_rate": 9.244195514621665e-06, "loss": 1.4835, "step": 10108 }, { "epoch": 0.5526837335811817, "grad_norm": 1.477502703666687, "learning_rate": 9.242373396737277e-06, "loss": 1.4626, "step": 10109 }, { "epoch": 0.5527384060249033, "grad_norm": 1.7090476751327515, "learning_rate": 9.240551304151817e-06, "loss": 1.422, "step": 10110 }, { "epoch": 0.5527930784686248, "grad_norm": 1.4621838331222534, "learning_rate": 9.238729236926126e-06, "loss": 1.3761, "step": 10111 }, { "epoch": 0.5528477509123464, "grad_norm": 1.3907514810562134, "learning_rate": 9.236907195121058e-06, "loss": 1.288, "step": 10112 }, { "epoch": 0.552902423356068, "grad_norm": 1.493072509765625, "learning_rate": 9.235085178797447e-06, "loss": 1.4, "step": 10113 }, { "epoch": 0.5529570957997895, "grad_norm": 1.7974421977996826, "learning_rate": 9.233263188016138e-06, "loss": 1.5514, "step": 10114 }, { "epoch": 0.5530117682435111, "grad_norm": 1.450134038925171, "learning_rate": 9.231441222837971e-06, "loss": 1.5377, "step": 10115 }, { "epoch": 0.5530664406872327, "grad_norm": 1.642806887626648, "learning_rate": 9.22961928332378e-06, "loss": 1.6694, "step": 10116 }, { "epoch": 0.5531211131309541, "grad_norm": 1.4357517957687378, "learning_rate": 9.227797369534415e-06, "loss": 1.3031, "step": 10117 }, { "epoch": 0.5531757855746757, "grad_norm": 1.5613895654678345, "learning_rate": 9.225975481530707e-06, "loss": 1.3897, "step": 10118 }, { "epoch": 0.5532304580183973, "grad_norm": 1.5255120992660522, "learning_rate": 9.22415361937349e-06, "loss": 1.3789, "step": 10119 }, { "epoch": 0.5532851304621188, "grad_norm": 1.3382751941680908, "learning_rate": 9.222331783123608e-06, "loss": 1.4961, "step": 10120 }, { "epoch": 0.5533398029058404, "grad_norm": 1.6044234037399292, "learning_rate": 9.220509972841893e-06, "loss": 1.4963, "step": 10121 }, { "epoch": 0.553394475349562, "grad_norm": 1.4196131229400635, "learning_rate": 9.218688188589176e-06, "loss": 1.213, "step": 10122 }, { "epoch": 0.5534491477932835, "grad_norm": 1.4044297933578491, "learning_rate": 9.216866430426297e-06, "loss": 1.4255, "step": 10123 }, { "epoch": 0.5535038202370051, "grad_norm": 1.7228115797042847, "learning_rate": 9.215044698414086e-06, "loss": 1.407, "step": 10124 }, { "epoch": 0.5535584926807265, "grad_norm": 1.6288203001022339, "learning_rate": 9.213222992613368e-06, "loss": 1.5316, "step": 10125 }, { "epoch": 0.5536131651244481, "grad_norm": 1.5195024013519287, "learning_rate": 9.211401313084986e-06, "loss": 1.5967, "step": 10126 }, { "epoch": 0.5536678375681697, "grad_norm": 1.5081043243408203, "learning_rate": 9.209579659889762e-06, "loss": 1.4715, "step": 10127 }, { "epoch": 0.5537225100118912, "grad_norm": 1.601570963859558, "learning_rate": 9.207758033088533e-06, "loss": 1.4497, "step": 10128 }, { "epoch": 0.5537771824556128, "grad_norm": 1.5259307622909546, "learning_rate": 9.205936432742119e-06, "loss": 1.5729, "step": 10129 }, { "epoch": 0.5538318548993344, "grad_norm": 1.5176753997802734, "learning_rate": 9.204114858911346e-06, "loss": 1.5011, "step": 10130 }, { "epoch": 0.5538865273430559, "grad_norm": 1.3806273937225342, "learning_rate": 9.202293311657053e-06, "loss": 1.4528, "step": 10131 }, { "epoch": 0.5539411997867775, "grad_norm": 1.5473988056182861, "learning_rate": 9.200471791040056e-06, "loss": 1.4526, "step": 10132 }, { "epoch": 0.5539958722304991, "grad_norm": 1.6012505292892456, "learning_rate": 9.198650297121181e-06, "loss": 1.2022, "step": 10133 }, { "epoch": 0.5540505446742205, "grad_norm": 1.4426337480545044, "learning_rate": 9.196828829961254e-06, "loss": 1.3739, "step": 10134 }, { "epoch": 0.5541052171179421, "grad_norm": 1.7538539171218872, "learning_rate": 9.195007389621098e-06, "loss": 1.3825, "step": 10135 }, { "epoch": 0.5541598895616637, "grad_norm": 1.8337126970291138, "learning_rate": 9.193185976161529e-06, "loss": 1.1977, "step": 10136 }, { "epoch": 0.5542145620053852, "grad_norm": 1.6735661029815674, "learning_rate": 9.191364589643378e-06, "loss": 1.422, "step": 10137 }, { "epoch": 0.5542692344491068, "grad_norm": 1.1530814170837402, "learning_rate": 9.189543230127463e-06, "loss": 1.5558, "step": 10138 }, { "epoch": 0.5543239068928283, "grad_norm": 1.428802728652954, "learning_rate": 9.187721897674595e-06, "loss": 1.4602, "step": 10139 }, { "epoch": 0.5543785793365499, "grad_norm": 1.699715495109558, "learning_rate": 9.185900592345603e-06, "loss": 1.2773, "step": 10140 }, { "epoch": 0.5544332517802715, "grad_norm": 1.2656819820404053, "learning_rate": 9.1840793142013e-06, "loss": 1.6918, "step": 10141 }, { "epoch": 0.554487924223993, "grad_norm": 2.5121097564697266, "learning_rate": 9.182258063302504e-06, "loss": 1.087, "step": 10142 }, { "epoch": 0.5545425966677145, "grad_norm": 1.4697957038879395, "learning_rate": 9.18043683971003e-06, "loss": 1.5194, "step": 10143 }, { "epoch": 0.5545972691114361, "grad_norm": 1.4098918437957764, "learning_rate": 9.178615643484689e-06, "loss": 1.3043, "step": 10144 }, { "epoch": 0.5546519415551576, "grad_norm": 1.5386769771575928, "learning_rate": 9.176794474687305e-06, "loss": 1.6401, "step": 10145 }, { "epoch": 0.5547066139988792, "grad_norm": 1.2280553579330444, "learning_rate": 9.174973333378684e-06, "loss": 1.2747, "step": 10146 }, { "epoch": 0.5547612864426008, "grad_norm": 1.6582374572753906, "learning_rate": 9.173152219619637e-06, "loss": 1.586, "step": 10147 }, { "epoch": 0.5548159588863223, "grad_norm": 1.835609793663025, "learning_rate": 9.171331133470979e-06, "loss": 1.4134, "step": 10148 }, { "epoch": 0.5548706313300439, "grad_norm": 2.081333637237549, "learning_rate": 9.16951007499352e-06, "loss": 1.4008, "step": 10149 }, { "epoch": 0.5549253037737655, "grad_norm": 1.6351300477981567, "learning_rate": 9.167689044248065e-06, "loss": 1.554, "step": 10150 }, { "epoch": 0.554979976217487, "grad_norm": 1.4723541736602783, "learning_rate": 9.16586804129543e-06, "loss": 1.2294, "step": 10151 }, { "epoch": 0.5550346486612086, "grad_norm": 1.371894121170044, "learning_rate": 9.164047066196417e-06, "loss": 1.4359, "step": 10152 }, { "epoch": 0.55508932110493, "grad_norm": 1.490033507347107, "learning_rate": 9.162226119011832e-06, "loss": 1.3556, "step": 10153 }, { "epoch": 0.5551439935486516, "grad_norm": 1.1924424171447754, "learning_rate": 9.160405199802487e-06, "loss": 1.4021, "step": 10154 }, { "epoch": 0.5551986659923732, "grad_norm": 1.31260085105896, "learning_rate": 9.158584308629175e-06, "loss": 1.5698, "step": 10155 }, { "epoch": 0.5552533384360947, "grad_norm": 1.4256690740585327, "learning_rate": 9.156763445552714e-06, "loss": 1.5017, "step": 10156 }, { "epoch": 0.5553080108798163, "grad_norm": 1.5318893194198608, "learning_rate": 9.154942610633901e-06, "loss": 1.3265, "step": 10157 }, { "epoch": 0.5553626833235379, "grad_norm": 1.8847428560256958, "learning_rate": 9.153121803933532e-06, "loss": 1.3779, "step": 10158 }, { "epoch": 0.5554173557672594, "grad_norm": 1.566867470741272, "learning_rate": 9.151301025512417e-06, "loss": 1.4169, "step": 10159 }, { "epoch": 0.555472028210981, "grad_norm": 1.2192519903182983, "learning_rate": 9.149480275431353e-06, "loss": 1.487, "step": 10160 }, { "epoch": 0.5555267006547026, "grad_norm": 1.3773128986358643, "learning_rate": 9.147659553751135e-06, "loss": 1.6112, "step": 10161 }, { "epoch": 0.555581373098424, "grad_norm": 1.5758564472198486, "learning_rate": 9.145838860532567e-06, "loss": 1.1936, "step": 10162 }, { "epoch": 0.5556360455421456, "grad_norm": 1.347749948501587, "learning_rate": 9.144018195836445e-06, "loss": 1.2858, "step": 10163 }, { "epoch": 0.5556907179858672, "grad_norm": 1.5834767818450928, "learning_rate": 9.14219755972356e-06, "loss": 1.4659, "step": 10164 }, { "epoch": 0.5557453904295887, "grad_norm": 1.2557556629180908, "learning_rate": 9.140376952254713e-06, "loss": 1.4152, "step": 10165 }, { "epoch": 0.5558000628733103, "grad_norm": 1.317510724067688, "learning_rate": 9.1385563734907e-06, "loss": 1.4916, "step": 10166 }, { "epoch": 0.5558547353170318, "grad_norm": 1.5022515058517456, "learning_rate": 9.136735823492307e-06, "loss": 1.4508, "step": 10167 }, { "epoch": 0.5559094077607534, "grad_norm": 1.3875582218170166, "learning_rate": 9.134915302320334e-06, "loss": 1.3118, "step": 10168 }, { "epoch": 0.555964080204475, "grad_norm": 1.8688433170318604, "learning_rate": 9.133094810035564e-06, "loss": 1.3677, "step": 10169 }, { "epoch": 0.5560187526481964, "grad_norm": 1.383579969406128, "learning_rate": 9.131274346698797e-06, "loss": 1.5388, "step": 10170 }, { "epoch": 0.556073425091918, "grad_norm": 1.4961143732070923, "learning_rate": 9.129453912370817e-06, "loss": 1.6588, "step": 10171 }, { "epoch": 0.5561280975356396, "grad_norm": 1.6921042203903198, "learning_rate": 9.127633507112412e-06, "loss": 1.4322, "step": 10172 }, { "epoch": 0.5561827699793611, "grad_norm": 1.6113874912261963, "learning_rate": 9.125813130984374e-06, "loss": 1.6584, "step": 10173 }, { "epoch": 0.5562374424230827, "grad_norm": 1.918479561805725, "learning_rate": 9.123992784047487e-06, "loss": 1.5215, "step": 10174 }, { "epoch": 0.5562921148668043, "grad_norm": 1.575848937034607, "learning_rate": 9.122172466362533e-06, "loss": 1.5754, "step": 10175 }, { "epoch": 0.5563467873105258, "grad_norm": 1.3997331857681274, "learning_rate": 9.120352177990303e-06, "loss": 1.5464, "step": 10176 }, { "epoch": 0.5564014597542474, "grad_norm": 2.9164175987243652, "learning_rate": 9.118531918991578e-06, "loss": 1.321, "step": 10177 }, { "epoch": 0.556456132197969, "grad_norm": 1.4826604127883911, "learning_rate": 9.116711689427137e-06, "loss": 1.4331, "step": 10178 }, { "epoch": 0.5565108046416904, "grad_norm": 1.3204851150512695, "learning_rate": 9.11489148935777e-06, "loss": 1.2952, "step": 10179 }, { "epoch": 0.556565477085412, "grad_norm": 1.4025074243545532, "learning_rate": 9.113071318844252e-06, "loss": 1.5057, "step": 10180 }, { "epoch": 0.5566201495291335, "grad_norm": 1.740812063217163, "learning_rate": 9.111251177947363e-06, "loss": 1.4415, "step": 10181 }, { "epoch": 0.5566748219728551, "grad_norm": 1.865410327911377, "learning_rate": 9.109431066727883e-06, "loss": 1.2869, "step": 10182 }, { "epoch": 0.5567294944165767, "grad_norm": 1.970666766166687, "learning_rate": 9.107610985246586e-06, "loss": 1.4329, "step": 10183 }, { "epoch": 0.5567841668602982, "grad_norm": 1.310897946357727, "learning_rate": 9.105790933564259e-06, "loss": 1.3562, "step": 10184 }, { "epoch": 0.5568388393040198, "grad_norm": 1.2437679767608643, "learning_rate": 9.103970911741668e-06, "loss": 1.7014, "step": 10185 }, { "epoch": 0.5568935117477414, "grad_norm": 1.5137653350830078, "learning_rate": 9.10215091983959e-06, "loss": 1.2887, "step": 10186 }, { "epoch": 0.5569481841914629, "grad_norm": 1.601913332939148, "learning_rate": 9.100330957918802e-06, "loss": 1.5466, "step": 10187 }, { "epoch": 0.5570028566351845, "grad_norm": 1.746018648147583, "learning_rate": 9.098511026040072e-06, "loss": 1.4516, "step": 10188 }, { "epoch": 0.557057529078906, "grad_norm": 1.2636982202529907, "learning_rate": 9.096691124264173e-06, "loss": 1.5914, "step": 10189 }, { "epoch": 0.5571122015226275, "grad_norm": 1.576630711555481, "learning_rate": 9.09487125265188e-06, "loss": 1.3076, "step": 10190 }, { "epoch": 0.5571668739663491, "grad_norm": 1.491495132446289, "learning_rate": 9.09305141126396e-06, "loss": 1.3562, "step": 10191 }, { "epoch": 0.5572215464100707, "grad_norm": 1.628411054611206, "learning_rate": 9.09123160016118e-06, "loss": 1.4942, "step": 10192 }, { "epoch": 0.5572762188537922, "grad_norm": 1.5465768575668335, "learning_rate": 9.089411819404311e-06, "loss": 1.3398, "step": 10193 }, { "epoch": 0.5573308912975138, "grad_norm": 1.761866807937622, "learning_rate": 9.08759206905412e-06, "loss": 1.3347, "step": 10194 }, { "epoch": 0.5573855637412354, "grad_norm": 1.8044294118881226, "learning_rate": 9.085772349171364e-06, "loss": 1.1762, "step": 10195 }, { "epoch": 0.5574402361849569, "grad_norm": 1.870086431503296, "learning_rate": 9.08395265981682e-06, "loss": 1.4921, "step": 10196 }, { "epoch": 0.5574949086286785, "grad_norm": 1.6588456630706787, "learning_rate": 9.08213300105124e-06, "loss": 1.4629, "step": 10197 }, { "epoch": 0.5575495810723999, "grad_norm": 1.355366587638855, "learning_rate": 9.080313372935399e-06, "loss": 1.5409, "step": 10198 }, { "epoch": 0.5576042535161215, "grad_norm": 1.5791189670562744, "learning_rate": 9.078493775530053e-06, "loss": 1.2596, "step": 10199 }, { "epoch": 0.5576589259598431, "grad_norm": 1.3567909002304077, "learning_rate": 9.07667420889596e-06, "loss": 1.5506, "step": 10200 }, { "epoch": 0.5577135984035646, "grad_norm": 1.3632436990737915, "learning_rate": 9.074854673093882e-06, "loss": 1.3274, "step": 10201 }, { "epoch": 0.5577682708472862, "grad_norm": 1.4194369316101074, "learning_rate": 9.07303516818458e-06, "loss": 1.2601, "step": 10202 }, { "epoch": 0.5578229432910078, "grad_norm": 1.5673977136611938, "learning_rate": 9.071215694228801e-06, "loss": 1.1633, "step": 10203 }, { "epoch": 0.5578776157347293, "grad_norm": 1.56361985206604, "learning_rate": 9.069396251287319e-06, "loss": 1.3808, "step": 10204 }, { "epoch": 0.5579322881784509, "grad_norm": 1.257116675376892, "learning_rate": 9.067576839420876e-06, "loss": 1.3014, "step": 10205 }, { "epoch": 0.5579869606221725, "grad_norm": 1.4106707572937012, "learning_rate": 9.065757458690228e-06, "loss": 1.2832, "step": 10206 }, { "epoch": 0.5580416330658939, "grad_norm": 1.545837163925171, "learning_rate": 9.063938109156135e-06, "loss": 1.4869, "step": 10207 }, { "epoch": 0.5580963055096155, "grad_norm": 1.2563889026641846, "learning_rate": 9.062118790879344e-06, "loss": 1.5857, "step": 10208 }, { "epoch": 0.5581509779533371, "grad_norm": 1.203938603401184, "learning_rate": 9.060299503920603e-06, "loss": 1.2641, "step": 10209 }, { "epoch": 0.5582056503970586, "grad_norm": 1.7657790184020996, "learning_rate": 9.058480248340672e-06, "loss": 1.2723, "step": 10210 }, { "epoch": 0.5582603228407802, "grad_norm": 1.6087089776992798, "learning_rate": 9.056661024200291e-06, "loss": 1.4678, "step": 10211 }, { "epoch": 0.5583149952845017, "grad_norm": 1.8501187562942505, "learning_rate": 9.054841831560216e-06, "loss": 1.2889, "step": 10212 }, { "epoch": 0.5583696677282233, "grad_norm": 1.2991759777069092, "learning_rate": 9.053022670481189e-06, "loss": 1.72, "step": 10213 }, { "epoch": 0.5584243401719449, "grad_norm": 1.6250056028366089, "learning_rate": 9.051203541023952e-06, "loss": 1.602, "step": 10214 }, { "epoch": 0.5584790126156663, "grad_norm": 2.0798656940460205, "learning_rate": 9.049384443249261e-06, "loss": 1.2936, "step": 10215 }, { "epoch": 0.5585336850593879, "grad_norm": 1.852260708808899, "learning_rate": 9.047565377217855e-06, "loss": 1.6036, "step": 10216 }, { "epoch": 0.5585883575031095, "grad_norm": 1.3366044759750366, "learning_rate": 9.04574634299047e-06, "loss": 1.5816, "step": 10217 }, { "epoch": 0.558643029946831, "grad_norm": 1.5431392192840576, "learning_rate": 9.043927340627858e-06, "loss": 1.4832, "step": 10218 }, { "epoch": 0.5586977023905526, "grad_norm": 1.3591896295547485, "learning_rate": 9.042108370190757e-06, "loss": 1.2215, "step": 10219 }, { "epoch": 0.5587523748342742, "grad_norm": 1.3219032287597656, "learning_rate": 9.040289431739902e-06, "loss": 1.4283, "step": 10220 }, { "epoch": 0.5588070472779957, "grad_norm": 1.6061909198760986, "learning_rate": 9.038470525336037e-06, "loss": 1.164, "step": 10221 }, { "epoch": 0.5588617197217173, "grad_norm": 1.7292320728302002, "learning_rate": 9.036651651039898e-06, "loss": 1.517, "step": 10222 }, { "epoch": 0.5589163921654389, "grad_norm": 1.2827235460281372, "learning_rate": 9.034832808912215e-06, "loss": 1.254, "step": 10223 }, { "epoch": 0.5589710646091604, "grad_norm": 1.528208613395691, "learning_rate": 9.033013999013737e-06, "loss": 1.4596, "step": 10224 }, { "epoch": 0.5590257370528819, "grad_norm": 1.552841067314148, "learning_rate": 9.031195221405185e-06, "loss": 1.5249, "step": 10225 }, { "epoch": 0.5590804094966034, "grad_norm": 1.3830195665359497, "learning_rate": 9.029376476147303e-06, "loss": 1.3019, "step": 10226 }, { "epoch": 0.559135081940325, "grad_norm": 1.6877994537353516, "learning_rate": 9.027557763300815e-06, "loss": 1.4691, "step": 10227 }, { "epoch": 0.5591897543840466, "grad_norm": 1.3002861738204956, "learning_rate": 9.025739082926454e-06, "loss": 1.3904, "step": 10228 }, { "epoch": 0.5592444268277681, "grad_norm": 1.5812827348709106, "learning_rate": 9.023920435084955e-06, "loss": 1.3033, "step": 10229 }, { "epoch": 0.5592990992714897, "grad_norm": 1.5880851745605469, "learning_rate": 9.02210181983704e-06, "loss": 1.4959, "step": 10230 }, { "epoch": 0.5593537717152113, "grad_norm": 1.4660130739212036, "learning_rate": 9.020283237243441e-06, "loss": 1.4691, "step": 10231 }, { "epoch": 0.5594084441589328, "grad_norm": 1.724912166595459, "learning_rate": 9.018464687364885e-06, "loss": 1.6986, "step": 10232 }, { "epoch": 0.5594631166026544, "grad_norm": 1.4946784973144531, "learning_rate": 9.016646170262096e-06, "loss": 1.4523, "step": 10233 }, { "epoch": 0.559517789046376, "grad_norm": 1.6684041023254395, "learning_rate": 9.014827685995795e-06, "loss": 1.2945, "step": 10234 }, { "epoch": 0.5595724614900974, "grad_norm": 1.348747730255127, "learning_rate": 9.013009234626715e-06, "loss": 1.3305, "step": 10235 }, { "epoch": 0.559627133933819, "grad_norm": 1.4398428201675415, "learning_rate": 9.01119081621557e-06, "loss": 1.3481, "step": 10236 }, { "epoch": 0.5596818063775406, "grad_norm": 1.5703314542770386, "learning_rate": 9.009372430823082e-06, "loss": 1.3675, "step": 10237 }, { "epoch": 0.5597364788212621, "grad_norm": 1.6875814199447632, "learning_rate": 9.007554078509975e-06, "loss": 1.682, "step": 10238 }, { "epoch": 0.5597911512649837, "grad_norm": 1.2787001132965088, "learning_rate": 9.005735759336965e-06, "loss": 1.6385, "step": 10239 }, { "epoch": 0.5598458237087052, "grad_norm": 1.2063939571380615, "learning_rate": 9.003917473364774e-06, "loss": 1.2084, "step": 10240 }, { "epoch": 0.5599004961524268, "grad_norm": 2.021440267562866, "learning_rate": 9.002099220654116e-06, "loss": 1.0753, "step": 10241 }, { "epoch": 0.5599551685961484, "grad_norm": 1.3465462923049927, "learning_rate": 9.000281001265702e-06, "loss": 1.4556, "step": 10242 }, { "epoch": 0.5600098410398698, "grad_norm": 1.034391164779663, "learning_rate": 8.998462815260255e-06, "loss": 1.3969, "step": 10243 }, { "epoch": 0.5600645134835914, "grad_norm": 1.452406883239746, "learning_rate": 8.996644662698485e-06, "loss": 1.4141, "step": 10244 }, { "epoch": 0.560119185927313, "grad_norm": 1.3483757972717285, "learning_rate": 8.994826543641102e-06, "loss": 1.5243, "step": 10245 }, { "epoch": 0.5601738583710345, "grad_norm": 1.3946632146835327, "learning_rate": 8.993008458148822e-06, "loss": 1.3943, "step": 10246 }, { "epoch": 0.5602285308147561, "grad_norm": 1.4996819496154785, "learning_rate": 8.991190406282352e-06, "loss": 1.3938, "step": 10247 }, { "epoch": 0.5602832032584777, "grad_norm": 1.535549283027649, "learning_rate": 8.989372388102398e-06, "loss": 1.4635, "step": 10248 }, { "epoch": 0.5603378757021992, "grad_norm": 2.050241470336914, "learning_rate": 8.987554403669676e-06, "loss": 1.6618, "step": 10249 }, { "epoch": 0.5603925481459208, "grad_norm": 1.7270468473434448, "learning_rate": 8.985736453044887e-06, "loss": 1.572, "step": 10250 }, { "epoch": 0.5604472205896424, "grad_norm": 1.1822192668914795, "learning_rate": 8.983918536288736e-06, "loss": 1.4629, "step": 10251 }, { "epoch": 0.5605018930333638, "grad_norm": 1.4443479776382446, "learning_rate": 8.982100653461932e-06, "loss": 1.3162, "step": 10252 }, { "epoch": 0.5605565654770854, "grad_norm": 1.8228989839553833, "learning_rate": 8.980282804625172e-06, "loss": 1.4331, "step": 10253 }, { "epoch": 0.5606112379208069, "grad_norm": 1.3430838584899902, "learning_rate": 8.978464989839165e-06, "loss": 1.2998, "step": 10254 }, { "epoch": 0.5606659103645285, "grad_norm": 1.4408869743347168, "learning_rate": 8.97664720916461e-06, "loss": 1.5045, "step": 10255 }, { "epoch": 0.5607205828082501, "grad_norm": 1.5854603052139282, "learning_rate": 8.974829462662201e-06, "loss": 1.3899, "step": 10256 }, { "epoch": 0.5607752552519716, "grad_norm": 1.526417851448059, "learning_rate": 8.973011750392648e-06, "loss": 1.3738, "step": 10257 }, { "epoch": 0.5608299276956932, "grad_norm": 1.251535415649414, "learning_rate": 8.97119407241664e-06, "loss": 1.4957, "step": 10258 }, { "epoch": 0.5608846001394148, "grad_norm": 1.2887848615646362, "learning_rate": 8.969376428794877e-06, "loss": 1.2392, "step": 10259 }, { "epoch": 0.5609392725831363, "grad_norm": 2.0378549098968506, "learning_rate": 8.967558819588052e-06, "loss": 1.379, "step": 10260 }, { "epoch": 0.5609939450268578, "grad_norm": 1.4646871089935303, "learning_rate": 8.965741244856864e-06, "loss": 1.1157, "step": 10261 }, { "epoch": 0.5610486174705794, "grad_norm": 1.417319893836975, "learning_rate": 8.963923704661996e-06, "loss": 1.4558, "step": 10262 }, { "epoch": 0.5611032899143009, "grad_norm": 1.8610366582870483, "learning_rate": 8.962106199064152e-06, "loss": 1.3206, "step": 10263 }, { "epoch": 0.5611579623580225, "grad_norm": 1.7385194301605225, "learning_rate": 8.960288728124018e-06, "loss": 1.4433, "step": 10264 }, { "epoch": 0.5612126348017441, "grad_norm": 1.8778163194656372, "learning_rate": 8.95847129190228e-06, "loss": 1.3716, "step": 10265 }, { "epoch": 0.5612673072454656, "grad_norm": 1.370263934135437, "learning_rate": 8.956653890459632e-06, "loss": 1.5006, "step": 10266 }, { "epoch": 0.5613219796891872, "grad_norm": 1.9464375972747803, "learning_rate": 8.954836523856755e-06, "loss": 1.4161, "step": 10267 }, { "epoch": 0.5613766521329087, "grad_norm": 1.8171974420547485, "learning_rate": 8.953019192154344e-06, "loss": 1.1533, "step": 10268 }, { "epoch": 0.5614313245766303, "grad_norm": 1.664807677268982, "learning_rate": 8.951201895413078e-06, "loss": 1.3591, "step": 10269 }, { "epoch": 0.5614859970203518, "grad_norm": 1.4964793920516968, "learning_rate": 8.94938463369364e-06, "loss": 1.355, "step": 10270 }, { "epoch": 0.5615406694640733, "grad_norm": 1.6272157430648804, "learning_rate": 8.947567407056716e-06, "loss": 1.3792, "step": 10271 }, { "epoch": 0.5615953419077949, "grad_norm": 1.4528886079788208, "learning_rate": 8.945750215562987e-06, "loss": 1.3241, "step": 10272 }, { "epoch": 0.5616500143515165, "grad_norm": 1.7875069379806519, "learning_rate": 8.943933059273127e-06, "loss": 1.3148, "step": 10273 }, { "epoch": 0.561704686795238, "grad_norm": 1.4940978288650513, "learning_rate": 8.942115938247824e-06, "loss": 1.5986, "step": 10274 }, { "epoch": 0.5617593592389596, "grad_norm": 1.6394203901290894, "learning_rate": 8.940298852547753e-06, "loss": 1.3536, "step": 10275 }, { "epoch": 0.5618140316826812, "grad_norm": 1.657840609550476, "learning_rate": 8.938481802233587e-06, "loss": 1.3076, "step": 10276 }, { "epoch": 0.5618687041264027, "grad_norm": 1.4076651334762573, "learning_rate": 8.936664787366007e-06, "loss": 1.5082, "step": 10277 }, { "epoch": 0.5619233765701243, "grad_norm": 1.8322519063949585, "learning_rate": 8.934847808005684e-06, "loss": 1.3644, "step": 10278 }, { "epoch": 0.5619780490138458, "grad_norm": 2.3343310356140137, "learning_rate": 8.933030864213292e-06, "loss": 1.4488, "step": 10279 }, { "epoch": 0.5620327214575673, "grad_norm": 1.6975919008255005, "learning_rate": 8.931213956049505e-06, "loss": 1.6005, "step": 10280 }, { "epoch": 0.5620873939012889, "grad_norm": 1.4335274696350098, "learning_rate": 8.929397083574987e-06, "loss": 1.5611, "step": 10281 }, { "epoch": 0.5621420663450104, "grad_norm": 1.6938797235488892, "learning_rate": 8.927580246850418e-06, "loss": 1.457, "step": 10282 }, { "epoch": 0.562196738788732, "grad_norm": 1.5151687860488892, "learning_rate": 8.92576344593646e-06, "loss": 1.5006, "step": 10283 }, { "epoch": 0.5622514112324536, "grad_norm": 1.7746191024780273, "learning_rate": 8.923946680893781e-06, "loss": 1.1889, "step": 10284 }, { "epoch": 0.5623060836761751, "grad_norm": 2.6894803047180176, "learning_rate": 8.922129951783047e-06, "loss": 1.4515, "step": 10285 }, { "epoch": 0.5623607561198967, "grad_norm": 1.9646070003509521, "learning_rate": 8.920313258664925e-06, "loss": 1.2019, "step": 10286 }, { "epoch": 0.5624154285636183, "grad_norm": 1.7109527587890625, "learning_rate": 8.918496601600072e-06, "loss": 1.4746, "step": 10287 }, { "epoch": 0.5624701010073397, "grad_norm": 1.3863993883132935, "learning_rate": 8.916679980649159e-06, "loss": 1.2888, "step": 10288 }, { "epoch": 0.5625247734510613, "grad_norm": 1.508500337600708, "learning_rate": 8.914863395872844e-06, "loss": 1.4242, "step": 10289 }, { "epoch": 0.5625794458947829, "grad_norm": 1.4451994895935059, "learning_rate": 8.913046847331784e-06, "loss": 1.4649, "step": 10290 }, { "epoch": 0.5626341183385044, "grad_norm": 1.5965832471847534, "learning_rate": 8.911230335086643e-06, "loss": 1.4428, "step": 10291 }, { "epoch": 0.562688790782226, "grad_norm": 1.3350605964660645, "learning_rate": 8.909413859198075e-06, "loss": 1.3545, "step": 10292 }, { "epoch": 0.5627434632259476, "grad_norm": 1.5371381044387817, "learning_rate": 8.907597419726736e-06, "loss": 1.604, "step": 10293 }, { "epoch": 0.5627981356696691, "grad_norm": 1.8959699869155884, "learning_rate": 8.905781016733285e-06, "loss": 1.2279, "step": 10294 }, { "epoch": 0.5628528081133907, "grad_norm": 1.7070667743682861, "learning_rate": 8.90396465027837e-06, "loss": 1.2349, "step": 10295 }, { "epoch": 0.5629074805571121, "grad_norm": 1.931138515472412, "learning_rate": 8.90214832042265e-06, "loss": 1.3182, "step": 10296 }, { "epoch": 0.5629621530008337, "grad_norm": 1.5806093215942383, "learning_rate": 8.900332027226776e-06, "loss": 1.2817, "step": 10297 }, { "epoch": 0.5630168254445553, "grad_norm": 1.4050066471099854, "learning_rate": 8.89851577075139e-06, "loss": 1.4701, "step": 10298 }, { "epoch": 0.5630714978882768, "grad_norm": 1.5120294094085693, "learning_rate": 8.896699551057151e-06, "loss": 1.5809, "step": 10299 }, { "epoch": 0.5631261703319984, "grad_norm": 1.1112676858901978, "learning_rate": 8.894883368204704e-06, "loss": 1.6347, "step": 10300 }, { "epoch": 0.56318084277572, "grad_norm": 2.826122283935547, "learning_rate": 8.89306722225469e-06, "loss": 0.932, "step": 10301 }, { "epoch": 0.5632355152194415, "grad_norm": 1.6910028457641602, "learning_rate": 8.891251113267763e-06, "loss": 1.2849, "step": 10302 }, { "epoch": 0.5632901876631631, "grad_norm": 1.3545515537261963, "learning_rate": 8.889435041304565e-06, "loss": 1.5468, "step": 10303 }, { "epoch": 0.5633448601068847, "grad_norm": 1.2877025604248047, "learning_rate": 8.887619006425732e-06, "loss": 1.7003, "step": 10304 }, { "epoch": 0.5633995325506062, "grad_norm": 1.3036161661148071, "learning_rate": 8.885803008691914e-06, "loss": 1.5841, "step": 10305 }, { "epoch": 0.5634542049943277, "grad_norm": 1.4597536325454712, "learning_rate": 8.883987048163746e-06, "loss": 1.3879, "step": 10306 }, { "epoch": 0.5635088774380493, "grad_norm": 1.4391640424728394, "learning_rate": 8.882171124901867e-06, "loss": 1.3342, "step": 10307 }, { "epoch": 0.5635635498817708, "grad_norm": 2.318840503692627, "learning_rate": 8.880355238966923e-06, "loss": 1.2084, "step": 10308 }, { "epoch": 0.5636182223254924, "grad_norm": 0.9625078439712524, "learning_rate": 8.878539390419542e-06, "loss": 1.7049, "step": 10309 }, { "epoch": 0.5636728947692139, "grad_norm": 1.3685754537582397, "learning_rate": 8.876723579320363e-06, "loss": 1.6701, "step": 10310 }, { "epoch": 0.5637275672129355, "grad_norm": 1.2822046279907227, "learning_rate": 8.87490780573002e-06, "loss": 1.3362, "step": 10311 }, { "epoch": 0.5637822396566571, "grad_norm": 1.6549338102340698, "learning_rate": 8.87309206970914e-06, "loss": 1.4863, "step": 10312 }, { "epoch": 0.5638369121003786, "grad_norm": 1.3755443096160889, "learning_rate": 8.871276371318367e-06, "loss": 1.5769, "step": 10313 }, { "epoch": 0.5638915845441002, "grad_norm": 1.7066845893859863, "learning_rate": 8.869460710618324e-06, "loss": 1.347, "step": 10314 }, { "epoch": 0.5639462569878217, "grad_norm": 1.0433367490768433, "learning_rate": 8.867645087669637e-06, "loss": 1.5633, "step": 10315 }, { "epoch": 0.5640009294315432, "grad_norm": 1.4727782011032104, "learning_rate": 8.865829502532942e-06, "loss": 1.4597, "step": 10316 }, { "epoch": 0.5640556018752648, "grad_norm": 1.8902338743209839, "learning_rate": 8.86401395526886e-06, "loss": 1.1809, "step": 10317 }, { "epoch": 0.5641102743189864, "grad_norm": 1.5439261198043823, "learning_rate": 8.862198445938013e-06, "loss": 1.1984, "step": 10318 }, { "epoch": 0.5641649467627079, "grad_norm": 1.2889909744262695, "learning_rate": 8.860382974601035e-06, "loss": 1.5844, "step": 10319 }, { "epoch": 0.5642196192064295, "grad_norm": 1.5354136228561401, "learning_rate": 8.858567541318543e-06, "loss": 1.2653, "step": 10320 }, { "epoch": 0.5642742916501511, "grad_norm": 1.4414069652557373, "learning_rate": 8.856752146151156e-06, "loss": 1.244, "step": 10321 }, { "epoch": 0.5643289640938726, "grad_norm": 1.5289181470870972, "learning_rate": 8.854936789159501e-06, "loss": 1.3076, "step": 10322 }, { "epoch": 0.5643836365375942, "grad_norm": 1.696273922920227, "learning_rate": 8.853121470404193e-06, "loss": 1.5656, "step": 10323 }, { "epoch": 0.5644383089813156, "grad_norm": 1.6586157083511353, "learning_rate": 8.85130618994585e-06, "loss": 1.4712, "step": 10324 }, { "epoch": 0.5644929814250372, "grad_norm": 1.623436450958252, "learning_rate": 8.849490947845089e-06, "loss": 1.3289, "step": 10325 }, { "epoch": 0.5645476538687588, "grad_norm": 1.3864878416061401, "learning_rate": 8.847675744162522e-06, "loss": 1.5988, "step": 10326 }, { "epoch": 0.5646023263124803, "grad_norm": 1.3920950889587402, "learning_rate": 8.84586057895877e-06, "loss": 1.4553, "step": 10327 }, { "epoch": 0.5646569987562019, "grad_norm": 1.4499863386154175, "learning_rate": 8.844045452294442e-06, "loss": 1.4674, "step": 10328 }, { "epoch": 0.5647116711999235, "grad_norm": 1.8694566488265991, "learning_rate": 8.842230364230146e-06, "loss": 1.37, "step": 10329 }, { "epoch": 0.564766343643645, "grad_norm": 2.024509906768799, "learning_rate": 8.840415314826497e-06, "loss": 1.2185, "step": 10330 }, { "epoch": 0.5648210160873666, "grad_norm": 1.5806922912597656, "learning_rate": 8.838600304144102e-06, "loss": 1.3783, "step": 10331 }, { "epoch": 0.5648756885310882, "grad_norm": 1.2480623722076416, "learning_rate": 8.836785332243563e-06, "loss": 1.599, "step": 10332 }, { "epoch": 0.5649303609748096, "grad_norm": 1.9052082300186157, "learning_rate": 8.834970399185497e-06, "loss": 1.2055, "step": 10333 }, { "epoch": 0.5649850334185312, "grad_norm": 1.486103892326355, "learning_rate": 8.833155505030504e-06, "loss": 1.103, "step": 10334 }, { "epoch": 0.5650397058622528, "grad_norm": 1.2514277696609497, "learning_rate": 8.831340649839182e-06, "loss": 1.4666, "step": 10335 }, { "epoch": 0.5650943783059743, "grad_norm": 1.6012803316116333, "learning_rate": 8.829525833672142e-06, "loss": 1.3233, "step": 10336 }, { "epoch": 0.5651490507496959, "grad_norm": 1.7090646028518677, "learning_rate": 8.82771105658998e-06, "loss": 1.6102, "step": 10337 }, { "epoch": 0.5652037231934174, "grad_norm": 1.7684389352798462, "learning_rate": 8.825896318653294e-06, "loss": 1.3448, "step": 10338 }, { "epoch": 0.565258395637139, "grad_norm": 1.9991945028305054, "learning_rate": 8.824081619922688e-06, "loss": 1.0643, "step": 10339 }, { "epoch": 0.5653130680808606, "grad_norm": 1.7918152809143066, "learning_rate": 8.82226696045875e-06, "loss": 1.4158, "step": 10340 }, { "epoch": 0.565367740524582, "grad_norm": 1.5733520984649658, "learning_rate": 8.82045234032209e-06, "loss": 1.3721, "step": 10341 }, { "epoch": 0.5654224129683036, "grad_norm": 1.4554471969604492, "learning_rate": 8.818637759573292e-06, "loss": 1.4299, "step": 10342 }, { "epoch": 0.5654770854120252, "grad_norm": 1.5720669031143188, "learning_rate": 8.81682321827295e-06, "loss": 1.4001, "step": 10343 }, { "epoch": 0.5655317578557467, "grad_norm": 1.3637272119522095, "learning_rate": 8.815008716481658e-06, "loss": 1.2897, "step": 10344 }, { "epoch": 0.5655864302994683, "grad_norm": 1.5859028100967407, "learning_rate": 8.813194254260006e-06, "loss": 1.3171, "step": 10345 }, { "epoch": 0.5656411027431899, "grad_norm": 1.126280426979065, "learning_rate": 8.81137983166858e-06, "loss": 1.6715, "step": 10346 }, { "epoch": 0.5656957751869114, "grad_norm": 1.6619112491607666, "learning_rate": 8.809565448767975e-06, "loss": 1.3323, "step": 10347 }, { "epoch": 0.565750447630633, "grad_norm": 1.406389832496643, "learning_rate": 8.807751105618771e-06, "loss": 1.5203, "step": 10348 }, { "epoch": 0.5658051200743546, "grad_norm": 1.293522596359253, "learning_rate": 8.805936802281554e-06, "loss": 1.3983, "step": 10349 }, { "epoch": 0.565859792518076, "grad_norm": 1.3835381269454956, "learning_rate": 8.80412253881691e-06, "loss": 1.4493, "step": 10350 }, { "epoch": 0.5659144649617976, "grad_norm": 1.306742787361145, "learning_rate": 8.802308315285423e-06, "loss": 1.4582, "step": 10351 }, { "epoch": 0.5659691374055191, "grad_norm": 1.5862250328063965, "learning_rate": 8.800494131747667e-06, "loss": 1.5009, "step": 10352 }, { "epoch": 0.5660238098492407, "grad_norm": 2.159956216812134, "learning_rate": 8.79867998826423e-06, "loss": 1.2209, "step": 10353 }, { "epoch": 0.5660784822929623, "grad_norm": 1.425481915473938, "learning_rate": 8.796865884895686e-06, "loss": 1.4438, "step": 10354 }, { "epoch": 0.5661331547366838, "grad_norm": 1.5316352844238281, "learning_rate": 8.795051821702614e-06, "loss": 1.6342, "step": 10355 }, { "epoch": 0.5661878271804054, "grad_norm": 1.1888686418533325, "learning_rate": 8.793237798745591e-06, "loss": 1.431, "step": 10356 }, { "epoch": 0.566242499624127, "grad_norm": 1.3754801750183105, "learning_rate": 8.791423816085184e-06, "loss": 1.5362, "step": 10357 }, { "epoch": 0.5662971720678485, "grad_norm": 1.4746134281158447, "learning_rate": 8.789609873781978e-06, "loss": 1.5525, "step": 10358 }, { "epoch": 0.5663518445115701, "grad_norm": 1.4572138786315918, "learning_rate": 8.787795971896536e-06, "loss": 1.377, "step": 10359 }, { "epoch": 0.5664065169552917, "grad_norm": 1.4162580966949463, "learning_rate": 8.785982110489428e-06, "loss": 1.2657, "step": 10360 }, { "epoch": 0.5664611893990131, "grad_norm": 2.0728774070739746, "learning_rate": 8.784168289621231e-06, "loss": 1.2365, "step": 10361 }, { "epoch": 0.5665158618427347, "grad_norm": 1.4919495582580566, "learning_rate": 8.782354509352507e-06, "loss": 1.355, "step": 10362 }, { "epoch": 0.5665705342864563, "grad_norm": 1.1725531816482544, "learning_rate": 8.780540769743821e-06, "loss": 1.3767, "step": 10363 }, { "epoch": 0.5666252067301778, "grad_norm": 1.5739761590957642, "learning_rate": 8.778727070855743e-06, "loss": 1.4876, "step": 10364 }, { "epoch": 0.5666798791738994, "grad_norm": 1.66754949092865, "learning_rate": 8.776913412748833e-06, "loss": 1.4474, "step": 10365 }, { "epoch": 0.5667345516176209, "grad_norm": 1.6413465738296509, "learning_rate": 8.775099795483651e-06, "loss": 1.7468, "step": 10366 }, { "epoch": 0.5667892240613425, "grad_norm": 1.8040690422058105, "learning_rate": 8.773286219120765e-06, "loss": 1.4011, "step": 10367 }, { "epoch": 0.5668438965050641, "grad_norm": 1.557844638824463, "learning_rate": 8.771472683720728e-06, "loss": 1.3436, "step": 10368 }, { "epoch": 0.5668985689487855, "grad_norm": 1.5995372533798218, "learning_rate": 8.769659189344105e-06, "loss": 1.3515, "step": 10369 }, { "epoch": 0.5669532413925071, "grad_norm": 1.5035638809204102, "learning_rate": 8.767845736051447e-06, "loss": 1.3732, "step": 10370 }, { "epoch": 0.5670079138362287, "grad_norm": 1.8507907390594482, "learning_rate": 8.766032323903306e-06, "loss": 1.6077, "step": 10371 }, { "epoch": 0.5670625862799502, "grad_norm": 1.2894885540008545, "learning_rate": 8.764218952960247e-06, "loss": 1.4321, "step": 10372 }, { "epoch": 0.5671172587236718, "grad_norm": 1.5613995790481567, "learning_rate": 8.762405623282817e-06, "loss": 1.1833, "step": 10373 }, { "epoch": 0.5671719311673934, "grad_norm": 1.4898850917816162, "learning_rate": 8.760592334931566e-06, "loss": 1.314, "step": 10374 }, { "epoch": 0.5672266036111149, "grad_norm": 1.1771314144134521, "learning_rate": 8.758779087967047e-06, "loss": 1.765, "step": 10375 }, { "epoch": 0.5672812760548365, "grad_norm": 2.0003700256347656, "learning_rate": 8.756965882449806e-06, "loss": 1.2506, "step": 10376 }, { "epoch": 0.5673359484985581, "grad_norm": 1.2840665578842163, "learning_rate": 8.755152718440387e-06, "loss": 1.4318, "step": 10377 }, { "epoch": 0.5673906209422795, "grad_norm": 1.343368649482727, "learning_rate": 8.753339595999344e-06, "loss": 1.4558, "step": 10378 }, { "epoch": 0.5674452933860011, "grad_norm": 1.8983793258666992, "learning_rate": 8.751526515187218e-06, "loss": 1.4068, "step": 10379 }, { "epoch": 0.5674999658297226, "grad_norm": 2.9073362350463867, "learning_rate": 8.749713476064547e-06, "loss": 1.2844, "step": 10380 }, { "epoch": 0.5675546382734442, "grad_norm": 2.5314879417419434, "learning_rate": 8.74790047869188e-06, "loss": 1.5707, "step": 10381 }, { "epoch": 0.5676093107171658, "grad_norm": 1.7829433679580688, "learning_rate": 8.746087523129752e-06, "loss": 1.1923, "step": 10382 }, { "epoch": 0.5676639831608873, "grad_norm": 1.5299782752990723, "learning_rate": 8.744274609438707e-06, "loss": 1.2755, "step": 10383 }, { "epoch": 0.5677186556046089, "grad_norm": 1.7492461204528809, "learning_rate": 8.742461737679279e-06, "loss": 1.3509, "step": 10384 }, { "epoch": 0.5677733280483305, "grad_norm": 2.0257389545440674, "learning_rate": 8.740648907912002e-06, "loss": 1.4367, "step": 10385 }, { "epoch": 0.567828000492052, "grad_norm": 1.3508683443069458, "learning_rate": 8.738836120197416e-06, "loss": 1.5216, "step": 10386 }, { "epoch": 0.5678826729357735, "grad_norm": 2.0259571075439453, "learning_rate": 8.737023374596051e-06, "loss": 1.4862, "step": 10387 }, { "epoch": 0.5679373453794951, "grad_norm": 1.552311658859253, "learning_rate": 8.735210671168438e-06, "loss": 1.1584, "step": 10388 }, { "epoch": 0.5679920178232166, "grad_norm": 1.5664572715759277, "learning_rate": 8.733398009975109e-06, "loss": 1.3788, "step": 10389 }, { "epoch": 0.5680466902669382, "grad_norm": 1.177076816558838, "learning_rate": 8.731585391076594e-06, "loss": 1.4662, "step": 10390 }, { "epoch": 0.5681013627106598, "grad_norm": 2.111921787261963, "learning_rate": 8.729772814533415e-06, "loss": 1.4624, "step": 10391 }, { "epoch": 0.5681560351543813, "grad_norm": 1.5049020051956177, "learning_rate": 8.727960280406107e-06, "loss": 1.3091, "step": 10392 }, { "epoch": 0.5682107075981029, "grad_norm": 1.794858694076538, "learning_rate": 8.72614778875519e-06, "loss": 1.5007, "step": 10393 }, { "epoch": 0.5682653800418245, "grad_norm": 1.5472630262374878, "learning_rate": 8.724335339641185e-06, "loss": 1.3894, "step": 10394 }, { "epoch": 0.568320052485546, "grad_norm": 1.5766113996505737, "learning_rate": 8.722522933124617e-06, "loss": 1.4273, "step": 10395 }, { "epoch": 0.5683747249292675, "grad_norm": 1.2711023092269897, "learning_rate": 8.720710569266004e-06, "loss": 1.4317, "step": 10396 }, { "epoch": 0.568429397372989, "grad_norm": 1.1931850910186768, "learning_rate": 8.718898248125871e-06, "loss": 1.3789, "step": 10397 }, { "epoch": 0.5684840698167106, "grad_norm": 1.3759554624557495, "learning_rate": 8.717085969764732e-06, "loss": 1.3742, "step": 10398 }, { "epoch": 0.5685387422604322, "grad_norm": 1.4643079042434692, "learning_rate": 8.715273734243098e-06, "loss": 1.324, "step": 10399 }, { "epoch": 0.5685934147041537, "grad_norm": 1.6290874481201172, "learning_rate": 8.713461541621492e-06, "loss": 1.4579, "step": 10400 }, { "epoch": 0.5686480871478753, "grad_norm": 1.1969497203826904, "learning_rate": 8.711649391960424e-06, "loss": 1.721, "step": 10401 }, { "epoch": 0.5687027595915969, "grad_norm": 1.5234695672988892, "learning_rate": 8.709837285320406e-06, "loss": 1.4475, "step": 10402 }, { "epoch": 0.5687574320353184, "grad_norm": 2.1486306190490723, "learning_rate": 8.708025221761949e-06, "loss": 1.3419, "step": 10403 }, { "epoch": 0.56881210447904, "grad_norm": 1.5591331720352173, "learning_rate": 8.706213201345561e-06, "loss": 1.5781, "step": 10404 }, { "epoch": 0.5688667769227616, "grad_norm": 2.0062403678894043, "learning_rate": 8.704401224131747e-06, "loss": 1.1244, "step": 10405 }, { "epoch": 0.568921449366483, "grad_norm": 3.815917491912842, "learning_rate": 8.702589290181021e-06, "loss": 1.372, "step": 10406 }, { "epoch": 0.5689761218102046, "grad_norm": 1.4976348876953125, "learning_rate": 8.700777399553883e-06, "loss": 1.4453, "step": 10407 }, { "epoch": 0.5690307942539262, "grad_norm": 1.119570016860962, "learning_rate": 8.698965552310834e-06, "loss": 1.5083, "step": 10408 }, { "epoch": 0.5690854666976477, "grad_norm": 1.739649772644043, "learning_rate": 8.69715374851238e-06, "loss": 1.4308, "step": 10409 }, { "epoch": 0.5691401391413693, "grad_norm": 1.6018455028533936, "learning_rate": 8.695341988219015e-06, "loss": 1.5084, "step": 10410 }, { "epoch": 0.5691948115850908, "grad_norm": 1.2180026769638062, "learning_rate": 8.693530271491249e-06, "loss": 1.5913, "step": 10411 }, { "epoch": 0.5692494840288124, "grad_norm": 1.482967734336853, "learning_rate": 8.69171859838957e-06, "loss": 1.4974, "step": 10412 }, { "epoch": 0.569304156472534, "grad_norm": 1.8759510517120361, "learning_rate": 8.689906968974476e-06, "loss": 1.2534, "step": 10413 }, { "epoch": 0.5693588289162554, "grad_norm": 1.5139824151992798, "learning_rate": 8.688095383306465e-06, "loss": 1.2877, "step": 10414 }, { "epoch": 0.569413501359977, "grad_norm": 1.7450551986694336, "learning_rate": 8.686283841446027e-06, "loss": 1.3857, "step": 10415 }, { "epoch": 0.5694681738036986, "grad_norm": 1.7555960416793823, "learning_rate": 8.68447234345365e-06, "loss": 1.3983, "step": 10416 }, { "epoch": 0.5695228462474201, "grad_norm": 1.4542902708053589, "learning_rate": 8.682660889389834e-06, "loss": 1.3527, "step": 10417 }, { "epoch": 0.5695775186911417, "grad_norm": 1.5887123346328735, "learning_rate": 8.680849479315061e-06, "loss": 1.4658, "step": 10418 }, { "epoch": 0.5696321911348633, "grad_norm": 2.4320931434631348, "learning_rate": 8.679038113289815e-06, "loss": 1.4588, "step": 10419 }, { "epoch": 0.5696868635785848, "grad_norm": 1.332999348640442, "learning_rate": 8.67722679137459e-06, "loss": 1.3686, "step": 10420 }, { "epoch": 0.5697415360223064, "grad_norm": 1.7304877042770386, "learning_rate": 8.675415513629867e-06, "loss": 1.7145, "step": 10421 }, { "epoch": 0.569796208466028, "grad_norm": 1.4335625171661377, "learning_rate": 8.673604280116127e-06, "loss": 1.5105, "step": 10422 }, { "epoch": 0.5698508809097494, "grad_norm": 1.5292689800262451, "learning_rate": 8.671793090893853e-06, "loss": 1.3555, "step": 10423 }, { "epoch": 0.569905553353471, "grad_norm": 1.4374396800994873, "learning_rate": 8.66998194602352e-06, "loss": 1.482, "step": 10424 }, { "epoch": 0.5699602257971925, "grad_norm": 1.4348669052124023, "learning_rate": 8.668170845565618e-06, "loss": 1.3115, "step": 10425 }, { "epoch": 0.5700148982409141, "grad_norm": 1.3924232721328735, "learning_rate": 8.666359789580613e-06, "loss": 1.5304, "step": 10426 }, { "epoch": 0.5700695706846357, "grad_norm": 2.244903564453125, "learning_rate": 8.664548778128985e-06, "loss": 1.6294, "step": 10427 }, { "epoch": 0.5701242431283572, "grad_norm": 1.3553005456924438, "learning_rate": 8.662737811271208e-06, "loss": 1.5836, "step": 10428 }, { "epoch": 0.5701789155720788, "grad_norm": 1.3583405017852783, "learning_rate": 8.660926889067753e-06, "loss": 1.3044, "step": 10429 }, { "epoch": 0.5702335880158004, "grad_norm": 1.9356197118759155, "learning_rate": 8.659116011579088e-06, "loss": 1.3807, "step": 10430 }, { "epoch": 0.5702882604595219, "grad_norm": 1.3365520238876343, "learning_rate": 8.65730517886569e-06, "loss": 1.4038, "step": 10431 }, { "epoch": 0.5703429329032434, "grad_norm": 1.4658987522125244, "learning_rate": 8.655494390988022e-06, "loss": 1.6636, "step": 10432 }, { "epoch": 0.570397605346965, "grad_norm": 1.1244570016860962, "learning_rate": 8.65368364800655e-06, "loss": 1.4247, "step": 10433 }, { "epoch": 0.5704522777906865, "grad_norm": 1.424681305885315, "learning_rate": 8.651872949981743e-06, "loss": 1.2617, "step": 10434 }, { "epoch": 0.5705069502344081, "grad_norm": 1.4471158981323242, "learning_rate": 8.65006229697406e-06, "loss": 1.2889, "step": 10435 }, { "epoch": 0.5705616226781297, "grad_norm": 1.4201221466064453, "learning_rate": 8.648251689043961e-06, "loss": 1.5739, "step": 10436 }, { "epoch": 0.5706162951218512, "grad_norm": 1.491566777229309, "learning_rate": 8.646441126251914e-06, "loss": 1.1564, "step": 10437 }, { "epoch": 0.5706709675655728, "grad_norm": 1.6576658487319946, "learning_rate": 8.644630608658371e-06, "loss": 1.3152, "step": 10438 }, { "epoch": 0.5707256400092943, "grad_norm": 1.8146721124649048, "learning_rate": 8.642820136323794e-06, "loss": 1.3953, "step": 10439 }, { "epoch": 0.5707803124530159, "grad_norm": 1.3488715887069702, "learning_rate": 8.641009709308641e-06, "loss": 1.3671, "step": 10440 }, { "epoch": 0.5708349848967375, "grad_norm": 1.7455356121063232, "learning_rate": 8.639199327673358e-06, "loss": 1.4055, "step": 10441 }, { "epoch": 0.5708896573404589, "grad_norm": 2.191368579864502, "learning_rate": 8.637388991478406e-06, "loss": 1.2143, "step": 10442 }, { "epoch": 0.5709443297841805, "grad_norm": 1.4320825338363647, "learning_rate": 8.635578700784232e-06, "loss": 1.2115, "step": 10443 }, { "epoch": 0.5709990022279021, "grad_norm": 2.4640426635742188, "learning_rate": 8.633768455651283e-06, "loss": 1.2535, "step": 10444 }, { "epoch": 0.5710536746716236, "grad_norm": 1.5381090641021729, "learning_rate": 8.631958256140017e-06, "loss": 1.3891, "step": 10445 }, { "epoch": 0.5711083471153452, "grad_norm": 1.3780080080032349, "learning_rate": 8.630148102310874e-06, "loss": 1.4387, "step": 10446 }, { "epoch": 0.5711630195590668, "grad_norm": 1.4431167840957642, "learning_rate": 8.628337994224298e-06, "loss": 1.4779, "step": 10447 }, { "epoch": 0.5712176920027883, "grad_norm": 1.7812575101852417, "learning_rate": 8.626527931940736e-06, "loss": 1.4836, "step": 10448 }, { "epoch": 0.5712723644465099, "grad_norm": 1.7018662691116333, "learning_rate": 8.624717915520632e-06, "loss": 1.2832, "step": 10449 }, { "epoch": 0.5713270368902315, "grad_norm": 1.2967761754989624, "learning_rate": 8.622907945024418e-06, "loss": 1.43, "step": 10450 }, { "epoch": 0.5713817093339529, "grad_norm": 1.4180020093917847, "learning_rate": 8.621098020512543e-06, "loss": 1.4553, "step": 10451 }, { "epoch": 0.5714363817776745, "grad_norm": 1.4959774017333984, "learning_rate": 8.61928814204544e-06, "loss": 1.5477, "step": 10452 }, { "epoch": 0.571491054221396, "grad_norm": 1.3964684009552002, "learning_rate": 8.617478309683548e-06, "loss": 1.2791, "step": 10453 }, { "epoch": 0.5715457266651176, "grad_norm": 1.8026955127716064, "learning_rate": 8.615668523487299e-06, "loss": 1.2015, "step": 10454 }, { "epoch": 0.5716003991088392, "grad_norm": 1.4526758193969727, "learning_rate": 8.613858783517122e-06, "loss": 1.4096, "step": 10455 }, { "epoch": 0.5716550715525607, "grad_norm": 1.9431287050247192, "learning_rate": 8.612049089833457e-06, "loss": 1.2712, "step": 10456 }, { "epoch": 0.5717097439962823, "grad_norm": 1.2432061433792114, "learning_rate": 8.61023944249673e-06, "loss": 1.9333, "step": 10457 }, { "epoch": 0.5717644164400039, "grad_norm": 1.590198040008545, "learning_rate": 8.608429841567365e-06, "loss": 1.3257, "step": 10458 }, { "epoch": 0.5718190888837253, "grad_norm": 1.7531039714813232, "learning_rate": 8.606620287105796e-06, "loss": 1.4068, "step": 10459 }, { "epoch": 0.5718737613274469, "grad_norm": 1.4890789985656738, "learning_rate": 8.604810779172447e-06, "loss": 1.4929, "step": 10460 }, { "epoch": 0.5719284337711685, "grad_norm": 1.41488778591156, "learning_rate": 8.603001317827738e-06, "loss": 1.6102, "step": 10461 }, { "epoch": 0.57198310621489, "grad_norm": 1.3851710557937622, "learning_rate": 8.601191903132094e-06, "loss": 1.3785, "step": 10462 }, { "epoch": 0.5720377786586116, "grad_norm": 1.4255285263061523, "learning_rate": 8.599382535145936e-06, "loss": 1.2444, "step": 10463 }, { "epoch": 0.5720924511023332, "grad_norm": 1.6741632223129272, "learning_rate": 8.597573213929677e-06, "loss": 1.5117, "step": 10464 }, { "epoch": 0.5721471235460547, "grad_norm": 1.4439234733581543, "learning_rate": 8.595763939543743e-06, "loss": 1.2322, "step": 10465 }, { "epoch": 0.5722017959897763, "grad_norm": 1.522915005683899, "learning_rate": 8.593954712048544e-06, "loss": 1.061, "step": 10466 }, { "epoch": 0.5722564684334978, "grad_norm": 1.3927968740463257, "learning_rate": 8.592145531504499e-06, "loss": 1.3337, "step": 10467 }, { "epoch": 0.5723111408772193, "grad_norm": 1.5350412130355835, "learning_rate": 8.590336397972018e-06, "loss": 1.6196, "step": 10468 }, { "epoch": 0.5723658133209409, "grad_norm": 1.4714583158493042, "learning_rate": 8.58852731151151e-06, "loss": 1.2201, "step": 10469 }, { "epoch": 0.5724204857646624, "grad_norm": 1.9120676517486572, "learning_rate": 8.586718272183392e-06, "loss": 1.533, "step": 10470 }, { "epoch": 0.572475158208384, "grad_norm": 1.4956202507019043, "learning_rate": 8.584909280048064e-06, "loss": 1.1455, "step": 10471 }, { "epoch": 0.5725298306521056, "grad_norm": 1.7651708126068115, "learning_rate": 8.583100335165936e-06, "loss": 1.76, "step": 10472 }, { "epoch": 0.5725845030958271, "grad_norm": 1.3848448991775513, "learning_rate": 8.581291437597413e-06, "loss": 1.3009, "step": 10473 }, { "epoch": 0.5726391755395487, "grad_norm": 1.2565288543701172, "learning_rate": 8.579482587402899e-06, "loss": 1.473, "step": 10474 }, { "epoch": 0.5726938479832703, "grad_norm": 1.2044398784637451, "learning_rate": 8.577673784642791e-06, "loss": 1.3241, "step": 10475 }, { "epoch": 0.5727485204269918, "grad_norm": 1.6688913106918335, "learning_rate": 8.575865029377498e-06, "loss": 1.2885, "step": 10476 }, { "epoch": 0.5728031928707134, "grad_norm": 1.4168708324432373, "learning_rate": 8.57405632166741e-06, "loss": 1.3001, "step": 10477 }, { "epoch": 0.5728578653144349, "grad_norm": 1.735264778137207, "learning_rate": 8.572247661572926e-06, "loss": 1.3483, "step": 10478 }, { "epoch": 0.5729125377581564, "grad_norm": 1.6099376678466797, "learning_rate": 8.570439049154447e-06, "loss": 1.5165, "step": 10479 }, { "epoch": 0.572967210201878, "grad_norm": 1.5650032758712769, "learning_rate": 8.56863048447236e-06, "loss": 1.4345, "step": 10480 }, { "epoch": 0.5730218826455995, "grad_norm": 1.5059704780578613, "learning_rate": 8.566821967587062e-06, "loss": 1.5304, "step": 10481 }, { "epoch": 0.5730765550893211, "grad_norm": 1.6736027002334595, "learning_rate": 8.565013498558942e-06, "loss": 1.3854, "step": 10482 }, { "epoch": 0.5731312275330427, "grad_norm": 2.9819276332855225, "learning_rate": 8.563205077448385e-06, "loss": 1.4273, "step": 10483 }, { "epoch": 0.5731858999767642, "grad_norm": 1.6771405935287476, "learning_rate": 8.561396704315785e-06, "loss": 1.4395, "step": 10484 }, { "epoch": 0.5732405724204858, "grad_norm": 1.505387544631958, "learning_rate": 8.559588379221525e-06, "loss": 1.3524, "step": 10485 }, { "epoch": 0.5732952448642074, "grad_norm": 2.7225842475891113, "learning_rate": 8.557780102225987e-06, "loss": 1.169, "step": 10486 }, { "epoch": 0.5733499173079288, "grad_norm": 1.6902093887329102, "learning_rate": 8.555971873389558e-06, "loss": 1.4512, "step": 10487 }, { "epoch": 0.5734045897516504, "grad_norm": 1.1854479312896729, "learning_rate": 8.554163692772617e-06, "loss": 1.5767, "step": 10488 }, { "epoch": 0.573459262195372, "grad_norm": 1.4147272109985352, "learning_rate": 8.552355560435538e-06, "loss": 1.6597, "step": 10489 }, { "epoch": 0.5735139346390935, "grad_norm": 1.6316699981689453, "learning_rate": 8.550547476438708e-06, "loss": 1.5212, "step": 10490 }, { "epoch": 0.5735686070828151, "grad_norm": 1.492815613746643, "learning_rate": 8.548739440842499e-06, "loss": 1.382, "step": 10491 }, { "epoch": 0.5736232795265367, "grad_norm": 1.3574339151382446, "learning_rate": 8.546931453707285e-06, "loss": 1.4544, "step": 10492 }, { "epoch": 0.5736779519702582, "grad_norm": 1.39319908618927, "learning_rate": 8.545123515093441e-06, "loss": 1.3688, "step": 10493 }, { "epoch": 0.5737326244139798, "grad_norm": 1.430602788925171, "learning_rate": 8.543315625061332e-06, "loss": 1.6385, "step": 10494 }, { "epoch": 0.5737872968577012, "grad_norm": 1.6502765417099, "learning_rate": 8.541507783671337e-06, "loss": 1.4086, "step": 10495 }, { "epoch": 0.5738419693014228, "grad_norm": 1.803546667098999, "learning_rate": 8.53969999098382e-06, "loss": 1.5546, "step": 10496 }, { "epoch": 0.5738966417451444, "grad_norm": 1.6746989488601685, "learning_rate": 8.537892247059141e-06, "loss": 1.2085, "step": 10497 }, { "epoch": 0.5739513141888659, "grad_norm": 1.2220313549041748, "learning_rate": 8.536084551957676e-06, "loss": 1.7092, "step": 10498 }, { "epoch": 0.5740059866325875, "grad_norm": 1.7095184326171875, "learning_rate": 8.534276905739783e-06, "loss": 1.4777, "step": 10499 }, { "epoch": 0.5740606590763091, "grad_norm": 2.261028528213501, "learning_rate": 8.532469308465823e-06, "loss": 1.3589, "step": 10500 }, { "epoch": 0.5741153315200306, "grad_norm": 1.4665323495864868, "learning_rate": 8.530661760196157e-06, "loss": 1.433, "step": 10501 }, { "epoch": 0.5741700039637522, "grad_norm": 1.461812973022461, "learning_rate": 8.528854260991142e-06, "loss": 1.3575, "step": 10502 }, { "epoch": 0.5742246764074738, "grad_norm": 1.8260929584503174, "learning_rate": 8.527046810911133e-06, "loss": 1.4589, "step": 10503 }, { "epoch": 0.5742793488511952, "grad_norm": 1.5001431703567505, "learning_rate": 8.52523941001649e-06, "loss": 1.2644, "step": 10504 }, { "epoch": 0.5743340212949168, "grad_norm": 1.3360852003097534, "learning_rate": 8.523432058367564e-06, "loss": 1.309, "step": 10505 }, { "epoch": 0.5743886937386384, "grad_norm": 1.5728405714035034, "learning_rate": 8.521624756024706e-06, "loss": 1.3408, "step": 10506 }, { "epoch": 0.5744433661823599, "grad_norm": 2.0995030403137207, "learning_rate": 8.519817503048267e-06, "loss": 1.4003, "step": 10507 }, { "epoch": 0.5744980386260815, "grad_norm": 1.4668341875076294, "learning_rate": 8.518010299498591e-06, "loss": 1.4953, "step": 10508 }, { "epoch": 0.574552711069803, "grad_norm": 1.8244423866271973, "learning_rate": 8.516203145436033e-06, "loss": 1.451, "step": 10509 }, { "epoch": 0.5746073835135246, "grad_norm": 1.9221524000167847, "learning_rate": 8.514396040920934e-06, "loss": 1.3333, "step": 10510 }, { "epoch": 0.5746620559572462, "grad_norm": 1.7251800298690796, "learning_rate": 8.512588986013635e-06, "loss": 1.4722, "step": 10511 }, { "epoch": 0.5747167284009677, "grad_norm": 1.9810409545898438, "learning_rate": 8.510781980774482e-06, "loss": 1.4818, "step": 10512 }, { "epoch": 0.5747714008446893, "grad_norm": 1.360062837600708, "learning_rate": 8.508975025263814e-06, "loss": 1.5217, "step": 10513 }, { "epoch": 0.5748260732884108, "grad_norm": 1.6321980953216553, "learning_rate": 8.507168119541964e-06, "loss": 1.131, "step": 10514 }, { "epoch": 0.5748807457321323, "grad_norm": 1.5432233810424805, "learning_rate": 8.505361263669278e-06, "loss": 1.3787, "step": 10515 }, { "epoch": 0.5749354181758539, "grad_norm": 1.3818750381469727, "learning_rate": 8.503554457706086e-06, "loss": 1.6454, "step": 10516 }, { "epoch": 0.5749900906195755, "grad_norm": 2.160383939743042, "learning_rate": 8.501747701712718e-06, "loss": 1.3807, "step": 10517 }, { "epoch": 0.575044763063297, "grad_norm": 1.3381927013397217, "learning_rate": 8.499940995749514e-06, "loss": 1.3807, "step": 10518 }, { "epoch": 0.5750994355070186, "grad_norm": 1.1344650983810425, "learning_rate": 8.498134339876802e-06, "loss": 1.4031, "step": 10519 }, { "epoch": 0.5751541079507402, "grad_norm": 1.0594582557678223, "learning_rate": 8.496327734154905e-06, "loss": 1.5134, "step": 10520 }, { "epoch": 0.5752087803944617, "grad_norm": 1.9792163372039795, "learning_rate": 8.494521178644155e-06, "loss": 1.3773, "step": 10521 }, { "epoch": 0.5752634528381833, "grad_norm": 1.3827061653137207, "learning_rate": 8.492714673404873e-06, "loss": 1.6381, "step": 10522 }, { "epoch": 0.5753181252819047, "grad_norm": 1.337998867034912, "learning_rate": 8.490908218497387e-06, "loss": 1.6844, "step": 10523 }, { "epoch": 0.5753727977256263, "grad_norm": 1.6707429885864258, "learning_rate": 8.489101813982019e-06, "loss": 1.2201, "step": 10524 }, { "epoch": 0.5754274701693479, "grad_norm": 1.4590345621109009, "learning_rate": 8.487295459919084e-06, "loss": 1.3793, "step": 10525 }, { "epoch": 0.5754821426130694, "grad_norm": 1.245676875114441, "learning_rate": 8.485489156368904e-06, "loss": 1.486, "step": 10526 }, { "epoch": 0.575536815056791, "grad_norm": 1.5627843141555786, "learning_rate": 8.483682903391796e-06, "loss": 1.3492, "step": 10527 }, { "epoch": 0.5755914875005126, "grad_norm": 1.6278212070465088, "learning_rate": 8.481876701048071e-06, "loss": 1.5689, "step": 10528 }, { "epoch": 0.5756461599442341, "grad_norm": 1.4774596691131592, "learning_rate": 8.480070549398048e-06, "loss": 1.2956, "step": 10529 }, { "epoch": 0.5757008323879557, "grad_norm": 1.749250888824463, "learning_rate": 8.478264448502038e-06, "loss": 1.2604, "step": 10530 }, { "epoch": 0.5757555048316773, "grad_norm": 1.3618905544281006, "learning_rate": 8.476458398420344e-06, "loss": 1.3357, "step": 10531 }, { "epoch": 0.5758101772753987, "grad_norm": 1.564925193786621, "learning_rate": 8.474652399213283e-06, "loss": 1.3136, "step": 10532 }, { "epoch": 0.5758648497191203, "grad_norm": 1.2742143869400024, "learning_rate": 8.472846450941158e-06, "loss": 1.2323, "step": 10533 }, { "epoch": 0.5759195221628419, "grad_norm": 1.7418142557144165, "learning_rate": 8.471040553664269e-06, "loss": 1.3792, "step": 10534 }, { "epoch": 0.5759741946065634, "grad_norm": 1.8552515506744385, "learning_rate": 8.469234707442927e-06, "loss": 1.3044, "step": 10535 }, { "epoch": 0.576028867050285, "grad_norm": 1.9830526113510132, "learning_rate": 8.467428912337429e-06, "loss": 1.3543, "step": 10536 }, { "epoch": 0.5760835394940065, "grad_norm": 1.396134376525879, "learning_rate": 8.465623168408077e-06, "loss": 1.8278, "step": 10537 }, { "epoch": 0.5761382119377281, "grad_norm": 1.7217774391174316, "learning_rate": 8.463817475715169e-06, "loss": 1.4699, "step": 10538 }, { "epoch": 0.5761928843814497, "grad_norm": 1.6621787548065186, "learning_rate": 8.462011834318996e-06, "loss": 1.2017, "step": 10539 }, { "epoch": 0.5762475568251711, "grad_norm": 1.4575337171554565, "learning_rate": 8.46020624427986e-06, "loss": 1.4468, "step": 10540 }, { "epoch": 0.5763022292688927, "grad_norm": 1.846940279006958, "learning_rate": 8.458400705658051e-06, "loss": 1.3212, "step": 10541 }, { "epoch": 0.5763569017126143, "grad_norm": 1.5448254346847534, "learning_rate": 8.456595218513857e-06, "loss": 1.3874, "step": 10542 }, { "epoch": 0.5764115741563358, "grad_norm": 1.5866527557373047, "learning_rate": 8.454789782907575e-06, "loss": 1.3345, "step": 10543 }, { "epoch": 0.5764662466000574, "grad_norm": 1.5664399862289429, "learning_rate": 8.452984398899487e-06, "loss": 1.4315, "step": 10544 }, { "epoch": 0.576520919043779, "grad_norm": 1.3706700801849365, "learning_rate": 8.451179066549877e-06, "loss": 1.7353, "step": 10545 }, { "epoch": 0.5765755914875005, "grad_norm": 1.779465913772583, "learning_rate": 8.449373785919034e-06, "loss": 1.3124, "step": 10546 }, { "epoch": 0.5766302639312221, "grad_norm": 2.1020936965942383, "learning_rate": 8.447568557067241e-06, "loss": 1.0412, "step": 10547 }, { "epoch": 0.5766849363749437, "grad_norm": 1.3343130350112915, "learning_rate": 8.445763380054773e-06, "loss": 1.3187, "step": 10548 }, { "epoch": 0.5767396088186652, "grad_norm": 1.4031472206115723, "learning_rate": 8.443958254941915e-06, "loss": 1.1383, "step": 10549 }, { "epoch": 0.5767942812623867, "grad_norm": 1.3684594631195068, "learning_rate": 8.44215318178894e-06, "loss": 1.3685, "step": 10550 }, { "epoch": 0.5768489537061082, "grad_norm": 1.457821011543274, "learning_rate": 8.440348160656132e-06, "loss": 1.5036, "step": 10551 }, { "epoch": 0.5769036261498298, "grad_norm": 1.4312504529953003, "learning_rate": 8.438543191603755e-06, "loss": 1.4547, "step": 10552 }, { "epoch": 0.5769582985935514, "grad_norm": 1.518001914024353, "learning_rate": 8.43673827469208e-06, "loss": 1.293, "step": 10553 }, { "epoch": 0.5770129710372729, "grad_norm": 1.5646278858184814, "learning_rate": 8.43493340998139e-06, "loss": 1.2549, "step": 10554 }, { "epoch": 0.5770676434809945, "grad_norm": 2.0880730152130127, "learning_rate": 8.433128597531943e-06, "loss": 1.2703, "step": 10555 }, { "epoch": 0.5771223159247161, "grad_norm": 1.3588178157806396, "learning_rate": 8.431323837404008e-06, "loss": 1.5367, "step": 10556 }, { "epoch": 0.5771769883684376, "grad_norm": 1.3606986999511719, "learning_rate": 8.429519129657854e-06, "loss": 1.2838, "step": 10557 }, { "epoch": 0.5772316608121592, "grad_norm": 1.5169727802276611, "learning_rate": 8.42771447435374e-06, "loss": 1.2536, "step": 10558 }, { "epoch": 0.5772863332558807, "grad_norm": 1.9502590894699097, "learning_rate": 8.425909871551925e-06, "loss": 1.609, "step": 10559 }, { "epoch": 0.5773410056996022, "grad_norm": 1.2543998956680298, "learning_rate": 8.424105321312678e-06, "loss": 1.8079, "step": 10560 }, { "epoch": 0.5773956781433238, "grad_norm": 1.3947678804397583, "learning_rate": 8.422300823696252e-06, "loss": 1.6585, "step": 10561 }, { "epoch": 0.5774503505870454, "grad_norm": 1.9910974502563477, "learning_rate": 8.420496378762901e-06, "loss": 1.2806, "step": 10562 }, { "epoch": 0.5775050230307669, "grad_norm": 1.3573768138885498, "learning_rate": 8.418691986572884e-06, "loss": 1.546, "step": 10563 }, { "epoch": 0.5775596954744885, "grad_norm": 1.516329288482666, "learning_rate": 8.416887647186452e-06, "loss": 1.581, "step": 10564 }, { "epoch": 0.57761436791821, "grad_norm": 1.2893306016921997, "learning_rate": 8.415083360663858e-06, "loss": 1.4785, "step": 10565 }, { "epoch": 0.5776690403619316, "grad_norm": 1.3674240112304688, "learning_rate": 8.41327912706535e-06, "loss": 1.5585, "step": 10566 }, { "epoch": 0.5777237128056532, "grad_norm": 1.624789834022522, "learning_rate": 8.411474946451169e-06, "loss": 1.3501, "step": 10567 }, { "epoch": 0.5777783852493746, "grad_norm": 1.625670075416565, "learning_rate": 8.409670818881573e-06, "loss": 1.2619, "step": 10568 }, { "epoch": 0.5778330576930962, "grad_norm": 1.334517002105713, "learning_rate": 8.407866744416801e-06, "loss": 1.5916, "step": 10569 }, { "epoch": 0.5778877301368178, "grad_norm": 1.4423317909240723, "learning_rate": 8.40606272311709e-06, "loss": 1.4611, "step": 10570 }, { "epoch": 0.5779424025805393, "grad_norm": 1.3504079580307007, "learning_rate": 8.40425875504269e-06, "loss": 1.3409, "step": 10571 }, { "epoch": 0.5779970750242609, "grad_norm": 1.6366888284683228, "learning_rate": 8.402454840253831e-06, "loss": 1.3959, "step": 10572 }, { "epoch": 0.5780517474679825, "grad_norm": 1.6654086112976074, "learning_rate": 8.400650978810753e-06, "loss": 1.4239, "step": 10573 }, { "epoch": 0.578106419911704, "grad_norm": 1.3107959032058716, "learning_rate": 8.398847170773694e-06, "loss": 1.4663, "step": 10574 }, { "epoch": 0.5781610923554256, "grad_norm": 1.38828706741333, "learning_rate": 8.397043416202887e-06, "loss": 1.5761, "step": 10575 }, { "epoch": 0.5782157647991472, "grad_norm": 1.5639721155166626, "learning_rate": 8.395239715158558e-06, "loss": 1.3861, "step": 10576 }, { "epoch": 0.5782704372428686, "grad_norm": 1.4955434799194336, "learning_rate": 8.393436067700943e-06, "loss": 1.3681, "step": 10577 }, { "epoch": 0.5783251096865902, "grad_norm": 2.0749449729919434, "learning_rate": 8.391632473890264e-06, "loss": 1.1196, "step": 10578 }, { "epoch": 0.5783797821303117, "grad_norm": 1.6948044300079346, "learning_rate": 8.389828933786755e-06, "loss": 1.3152, "step": 10579 }, { "epoch": 0.5784344545740333, "grad_norm": 1.6260844469070435, "learning_rate": 8.388025447450635e-06, "loss": 1.2201, "step": 10580 }, { "epoch": 0.5784891270177549, "grad_norm": 1.8119728565216064, "learning_rate": 8.386222014942125e-06, "loss": 1.1147, "step": 10581 }, { "epoch": 0.5785437994614764, "grad_norm": 1.60270094871521, "learning_rate": 8.384418636321452e-06, "loss": 1.541, "step": 10582 }, { "epoch": 0.578598471905198, "grad_norm": 1.685691475868225, "learning_rate": 8.382615311648833e-06, "loss": 1.353, "step": 10583 }, { "epoch": 0.5786531443489196, "grad_norm": 1.7228299379348755, "learning_rate": 8.380812040984481e-06, "loss": 1.5649, "step": 10584 }, { "epoch": 0.578707816792641, "grad_norm": 1.6231555938720703, "learning_rate": 8.379008824388617e-06, "loss": 1.5671, "step": 10585 }, { "epoch": 0.5787624892363626, "grad_norm": 1.3537001609802246, "learning_rate": 8.377205661921453e-06, "loss": 1.4392, "step": 10586 }, { "epoch": 0.5788171616800842, "grad_norm": 1.5813121795654297, "learning_rate": 8.375402553643194e-06, "loss": 1.5572, "step": 10587 }, { "epoch": 0.5788718341238057, "grad_norm": 2.0441839694976807, "learning_rate": 8.37359949961406e-06, "loss": 1.3334, "step": 10588 }, { "epoch": 0.5789265065675273, "grad_norm": 1.896559476852417, "learning_rate": 8.371796499894259e-06, "loss": 1.6871, "step": 10589 }, { "epoch": 0.5789811790112489, "grad_norm": 1.5311250686645508, "learning_rate": 8.369993554543987e-06, "loss": 1.607, "step": 10590 }, { "epoch": 0.5790358514549704, "grad_norm": 1.6138910055160522, "learning_rate": 8.368190663623458e-06, "loss": 1.4469, "step": 10591 }, { "epoch": 0.579090523898692, "grad_norm": 1.6177140474319458, "learning_rate": 8.36638782719287e-06, "loss": 1.305, "step": 10592 }, { "epoch": 0.5791451963424135, "grad_norm": 1.5568994283676147, "learning_rate": 8.36458504531243e-06, "loss": 1.2797, "step": 10593 }, { "epoch": 0.579199868786135, "grad_norm": 1.879788875579834, "learning_rate": 8.36278231804233e-06, "loss": 1.4447, "step": 10594 }, { "epoch": 0.5792545412298566, "grad_norm": 1.3668780326843262, "learning_rate": 8.360979645442771e-06, "loss": 1.4571, "step": 10595 }, { "epoch": 0.5793092136735781, "grad_norm": 1.3598415851593018, "learning_rate": 8.359177027573948e-06, "loss": 1.4566, "step": 10596 }, { "epoch": 0.5793638861172997, "grad_norm": 1.7715306282043457, "learning_rate": 8.357374464496056e-06, "loss": 1.3084, "step": 10597 }, { "epoch": 0.5794185585610213, "grad_norm": 1.3694958686828613, "learning_rate": 8.355571956269278e-06, "loss": 1.4557, "step": 10598 }, { "epoch": 0.5794732310047428, "grad_norm": 1.305947184562683, "learning_rate": 8.353769502953818e-06, "loss": 1.4767, "step": 10599 }, { "epoch": 0.5795279034484644, "grad_norm": 1.4313459396362305, "learning_rate": 8.351967104609857e-06, "loss": 1.4937, "step": 10600 }, { "epoch": 0.579582575892186, "grad_norm": 1.370552897453308, "learning_rate": 8.350164761297577e-06, "loss": 1.3241, "step": 10601 }, { "epoch": 0.5796372483359075, "grad_norm": 1.536681890487671, "learning_rate": 8.34836247307717e-06, "loss": 1.5539, "step": 10602 }, { "epoch": 0.579691920779629, "grad_norm": 1.481987714767456, "learning_rate": 8.346560240008818e-06, "loss": 1.3218, "step": 10603 }, { "epoch": 0.5797465932233506, "grad_norm": 1.7529720067977905, "learning_rate": 8.344758062152696e-06, "loss": 1.3881, "step": 10604 }, { "epoch": 0.5798012656670721, "grad_norm": 1.5736123323440552, "learning_rate": 8.34295593956899e-06, "loss": 1.4997, "step": 10605 }, { "epoch": 0.5798559381107937, "grad_norm": 1.6279622316360474, "learning_rate": 8.341153872317867e-06, "loss": 1.6077, "step": 10606 }, { "epoch": 0.5799106105545153, "grad_norm": 1.4002838134765625, "learning_rate": 8.339351860459515e-06, "loss": 1.5559, "step": 10607 }, { "epoch": 0.5799652829982368, "grad_norm": 1.6852246522903442, "learning_rate": 8.3375499040541e-06, "loss": 1.5486, "step": 10608 }, { "epoch": 0.5800199554419584, "grad_norm": 1.3909019231796265, "learning_rate": 8.335748003161793e-06, "loss": 1.3782, "step": 10609 }, { "epoch": 0.5800746278856799, "grad_norm": 2.1810390949249268, "learning_rate": 8.333946157842768e-06, "loss": 1.3629, "step": 10610 }, { "epoch": 0.5801293003294015, "grad_norm": 1.6192927360534668, "learning_rate": 8.332144368157192e-06, "loss": 1.5586, "step": 10611 }, { "epoch": 0.5801839727731231, "grad_norm": 1.5250247716903687, "learning_rate": 8.330342634165221e-06, "loss": 1.3627, "step": 10612 }, { "epoch": 0.5802386452168445, "grad_norm": 1.6751948595046997, "learning_rate": 8.328540955927035e-06, "loss": 1.3403, "step": 10613 }, { "epoch": 0.5802933176605661, "grad_norm": 1.6332752704620361, "learning_rate": 8.326739333502787e-06, "loss": 1.5844, "step": 10614 }, { "epoch": 0.5803479901042877, "grad_norm": 1.3269999027252197, "learning_rate": 8.324937766952638e-06, "loss": 1.8297, "step": 10615 }, { "epoch": 0.5804026625480092, "grad_norm": 1.472227692604065, "learning_rate": 8.323136256336747e-06, "loss": 1.3126, "step": 10616 }, { "epoch": 0.5804573349917308, "grad_norm": 1.671311616897583, "learning_rate": 8.321334801715276e-06, "loss": 1.3107, "step": 10617 }, { "epoch": 0.5805120074354524, "grad_norm": 1.2857118844985962, "learning_rate": 8.319533403148368e-06, "loss": 1.424, "step": 10618 }, { "epoch": 0.5805666798791739, "grad_norm": 1.525223970413208, "learning_rate": 8.317732060696186e-06, "loss": 1.4393, "step": 10619 }, { "epoch": 0.5806213523228955, "grad_norm": 2.021709680557251, "learning_rate": 8.315930774418881e-06, "loss": 1.2221, "step": 10620 }, { "epoch": 0.5806760247666171, "grad_norm": 1.1336795091629028, "learning_rate": 8.314129544376593e-06, "loss": 1.6657, "step": 10621 }, { "epoch": 0.5807306972103385, "grad_norm": 1.3109824657440186, "learning_rate": 8.31232837062948e-06, "loss": 1.3578, "step": 10622 }, { "epoch": 0.5807853696540601, "grad_norm": 1.8892911672592163, "learning_rate": 8.31052725323768e-06, "loss": 1.394, "step": 10623 }, { "epoch": 0.5808400420977816, "grad_norm": 1.874518632888794, "learning_rate": 8.308726192261344e-06, "loss": 1.193, "step": 10624 }, { "epoch": 0.5808947145415032, "grad_norm": 1.240864872932434, "learning_rate": 8.306925187760608e-06, "loss": 1.3317, "step": 10625 }, { "epoch": 0.5809493869852248, "grad_norm": 1.4955155849456787, "learning_rate": 8.305124239795609e-06, "loss": 1.4258, "step": 10626 }, { "epoch": 0.5810040594289463, "grad_norm": 1.8929120302200317, "learning_rate": 8.303323348426493e-06, "loss": 1.516, "step": 10627 }, { "epoch": 0.5810587318726679, "grad_norm": 2.145479679107666, "learning_rate": 8.301522513713392e-06, "loss": 1.127, "step": 10628 }, { "epoch": 0.5811134043163895, "grad_norm": 1.5859167575836182, "learning_rate": 8.299721735716437e-06, "loss": 1.569, "step": 10629 }, { "epoch": 0.581168076760111, "grad_norm": 1.7974145412445068, "learning_rate": 8.297921014495764e-06, "loss": 1.5687, "step": 10630 }, { "epoch": 0.5812227492038325, "grad_norm": 1.7903987169265747, "learning_rate": 8.296120350111504e-06, "loss": 1.5651, "step": 10631 }, { "epoch": 0.5812774216475541, "grad_norm": 1.8420259952545166, "learning_rate": 8.29431974262378e-06, "loss": 1.6535, "step": 10632 }, { "epoch": 0.5813320940912756, "grad_norm": 1.8739044666290283, "learning_rate": 8.292519192092725e-06, "loss": 1.4994, "step": 10633 }, { "epoch": 0.5813867665349972, "grad_norm": 1.3487746715545654, "learning_rate": 8.29071869857846e-06, "loss": 1.5171, "step": 10634 }, { "epoch": 0.5814414389787188, "grad_norm": 1.5109609365463257, "learning_rate": 8.28891826214111e-06, "loss": 1.5477, "step": 10635 }, { "epoch": 0.5814961114224403, "grad_norm": 1.396698236465454, "learning_rate": 8.287117882840795e-06, "loss": 1.4465, "step": 10636 }, { "epoch": 0.5815507838661619, "grad_norm": 1.4634777307510376, "learning_rate": 8.285317560737629e-06, "loss": 1.5587, "step": 10637 }, { "epoch": 0.5816054563098834, "grad_norm": 1.6894298791885376, "learning_rate": 8.283517295891737e-06, "loss": 1.1752, "step": 10638 }, { "epoch": 0.581660128753605, "grad_norm": 1.4997369050979614, "learning_rate": 8.28171708836323e-06, "loss": 1.5467, "step": 10639 }, { "epoch": 0.5817148011973265, "grad_norm": 1.496498703956604, "learning_rate": 8.279916938212218e-06, "loss": 1.4348, "step": 10640 }, { "epoch": 0.581769473641048, "grad_norm": 1.8602380752563477, "learning_rate": 8.27811684549882e-06, "loss": 1.4762, "step": 10641 }, { "epoch": 0.5818241460847696, "grad_norm": 1.3947120904922485, "learning_rate": 8.276316810283142e-06, "loss": 1.7359, "step": 10642 }, { "epoch": 0.5818788185284912, "grad_norm": 1.294188141822815, "learning_rate": 8.274516832625287e-06, "loss": 1.33, "step": 10643 }, { "epoch": 0.5819334909722127, "grad_norm": 1.8946828842163086, "learning_rate": 8.272716912585366e-06, "loss": 1.2454, "step": 10644 }, { "epoch": 0.5819881634159343, "grad_norm": 1.7078520059585571, "learning_rate": 8.270917050223481e-06, "loss": 1.3665, "step": 10645 }, { "epoch": 0.5820428358596559, "grad_norm": 1.4853453636169434, "learning_rate": 8.269117245599729e-06, "loss": 1.4604, "step": 10646 }, { "epoch": 0.5820975083033774, "grad_norm": 1.3990626335144043, "learning_rate": 8.267317498774217e-06, "loss": 1.3315, "step": 10647 }, { "epoch": 0.582152180747099, "grad_norm": 1.2338340282440186, "learning_rate": 8.26551780980704e-06, "loss": 1.517, "step": 10648 }, { "epoch": 0.5822068531908206, "grad_norm": 1.432827353477478, "learning_rate": 8.263718178758292e-06, "loss": 1.6491, "step": 10649 }, { "epoch": 0.582261525634542, "grad_norm": 1.4936991930007935, "learning_rate": 8.26191860568807e-06, "loss": 1.3834, "step": 10650 }, { "epoch": 0.5823161980782636, "grad_norm": 1.7505632638931274, "learning_rate": 8.26011909065646e-06, "loss": 1.3792, "step": 10651 }, { "epoch": 0.5823708705219851, "grad_norm": 1.2266278266906738, "learning_rate": 8.258319633723562e-06, "loss": 1.5451, "step": 10652 }, { "epoch": 0.5824255429657067, "grad_norm": 2.0601134300231934, "learning_rate": 8.256520234949456e-06, "loss": 1.4073, "step": 10653 }, { "epoch": 0.5824802154094283, "grad_norm": 1.5803369283676147, "learning_rate": 8.254720894394231e-06, "loss": 1.2408, "step": 10654 }, { "epoch": 0.5825348878531498, "grad_norm": 1.8938136100769043, "learning_rate": 8.25292161211797e-06, "loss": 1.2749, "step": 10655 }, { "epoch": 0.5825895602968714, "grad_norm": 1.3971045017242432, "learning_rate": 8.251122388180758e-06, "loss": 1.4153, "step": 10656 }, { "epoch": 0.582644232740593, "grad_norm": 1.5805217027664185, "learning_rate": 8.249323222642668e-06, "loss": 1.5422, "step": 10657 }, { "epoch": 0.5826989051843144, "grad_norm": 1.5544992685317993, "learning_rate": 8.247524115563789e-06, "loss": 1.467, "step": 10658 }, { "epoch": 0.582753577628036, "grad_norm": 1.8453924655914307, "learning_rate": 8.24572506700419e-06, "loss": 1.517, "step": 10659 }, { "epoch": 0.5828082500717576, "grad_norm": 1.280705451965332, "learning_rate": 8.243926077023945e-06, "loss": 1.6373, "step": 10660 }, { "epoch": 0.5828629225154791, "grad_norm": 1.5722299814224243, "learning_rate": 8.242127145683134e-06, "loss": 1.3589, "step": 10661 }, { "epoch": 0.5829175949592007, "grad_norm": 1.7057163715362549, "learning_rate": 8.240328273041822e-06, "loss": 1.2629, "step": 10662 }, { "epoch": 0.5829722674029223, "grad_norm": 1.5563397407531738, "learning_rate": 8.238529459160076e-06, "loss": 1.0248, "step": 10663 }, { "epoch": 0.5830269398466438, "grad_norm": 1.5785927772521973, "learning_rate": 8.236730704097966e-06, "loss": 1.4711, "step": 10664 }, { "epoch": 0.5830816122903654, "grad_norm": 2.237037181854248, "learning_rate": 8.234932007915552e-06, "loss": 1.3701, "step": 10665 }, { "epoch": 0.5831362847340869, "grad_norm": 2.359060287475586, "learning_rate": 8.233133370672905e-06, "loss": 0.9653, "step": 10666 }, { "epoch": 0.5831909571778084, "grad_norm": 1.444583773612976, "learning_rate": 8.23133479243008e-06, "loss": 1.2924, "step": 10667 }, { "epoch": 0.58324562962153, "grad_norm": 1.9677925109863281, "learning_rate": 8.229536273247133e-06, "loss": 1.4993, "step": 10668 }, { "epoch": 0.5833003020652515, "grad_norm": 1.46457839012146, "learning_rate": 8.227737813184129e-06, "loss": 1.4723, "step": 10669 }, { "epoch": 0.5833549745089731, "grad_norm": 1.26967191696167, "learning_rate": 8.225939412301117e-06, "loss": 1.5764, "step": 10670 }, { "epoch": 0.5834096469526947, "grad_norm": 1.655318260192871, "learning_rate": 8.224141070658147e-06, "loss": 1.2805, "step": 10671 }, { "epoch": 0.5834643193964162, "grad_norm": 1.4433971643447876, "learning_rate": 8.222342788315277e-06, "loss": 1.3724, "step": 10672 }, { "epoch": 0.5835189918401378, "grad_norm": 1.4049521684646606, "learning_rate": 8.220544565332555e-06, "loss": 1.5893, "step": 10673 }, { "epoch": 0.5835736642838594, "grad_norm": 1.685021996498108, "learning_rate": 8.218746401770021e-06, "loss": 1.1036, "step": 10674 }, { "epoch": 0.5836283367275809, "grad_norm": 1.5805792808532715, "learning_rate": 8.216948297687727e-06, "loss": 1.3912, "step": 10675 }, { "epoch": 0.5836830091713024, "grad_norm": 1.3783107995986938, "learning_rate": 8.215150253145715e-06, "loss": 1.2157, "step": 10676 }, { "epoch": 0.583737681615024, "grad_norm": 1.5770313739776611, "learning_rate": 8.21335226820402e-06, "loss": 1.4555, "step": 10677 }, { "epoch": 0.5837923540587455, "grad_norm": 1.5467798709869385, "learning_rate": 8.211554342922688e-06, "loss": 1.4653, "step": 10678 }, { "epoch": 0.5838470265024671, "grad_norm": 1.5639359951019287, "learning_rate": 8.20975647736175e-06, "loss": 1.3165, "step": 10679 }, { "epoch": 0.5839016989461886, "grad_norm": 1.3085625171661377, "learning_rate": 8.207958671581248e-06, "loss": 1.3245, "step": 10680 }, { "epoch": 0.5839563713899102, "grad_norm": 1.6802725791931152, "learning_rate": 8.206160925641211e-06, "loss": 1.4591, "step": 10681 }, { "epoch": 0.5840110438336318, "grad_norm": 1.3156436681747437, "learning_rate": 8.204363239601668e-06, "loss": 1.2628, "step": 10682 }, { "epoch": 0.5840657162773533, "grad_norm": 2.1072683334350586, "learning_rate": 8.202565613522653e-06, "loss": 1.2405, "step": 10683 }, { "epoch": 0.5841203887210749, "grad_norm": 1.7168376445770264, "learning_rate": 8.20076804746419e-06, "loss": 1.3881, "step": 10684 }, { "epoch": 0.5841750611647964, "grad_norm": 1.4719241857528687, "learning_rate": 8.198970541486298e-06, "loss": 1.5645, "step": 10685 }, { "epoch": 0.5842297336085179, "grad_norm": 1.5703552961349487, "learning_rate": 8.197173095649011e-06, "loss": 1.4224, "step": 10686 }, { "epoch": 0.5842844060522395, "grad_norm": 1.6618725061416626, "learning_rate": 8.195375710012345e-06, "loss": 1.4796, "step": 10687 }, { "epoch": 0.5843390784959611, "grad_norm": 1.4002292156219482, "learning_rate": 8.193578384636317e-06, "loss": 1.684, "step": 10688 }, { "epoch": 0.5843937509396826, "grad_norm": 1.500969409942627, "learning_rate": 8.191781119580947e-06, "loss": 1.4643, "step": 10689 }, { "epoch": 0.5844484233834042, "grad_norm": 2.1938235759735107, "learning_rate": 8.18998391490625e-06, "loss": 1.4851, "step": 10690 }, { "epoch": 0.5845030958271258, "grad_norm": 1.6168315410614014, "learning_rate": 8.188186770672231e-06, "loss": 1.4565, "step": 10691 }, { "epoch": 0.5845577682708473, "grad_norm": 1.5778244733810425, "learning_rate": 8.18638968693891e-06, "loss": 1.363, "step": 10692 }, { "epoch": 0.5846124407145689, "grad_norm": 1.2499096393585205, "learning_rate": 8.184592663766296e-06, "loss": 1.5474, "step": 10693 }, { "epoch": 0.5846671131582903, "grad_norm": 1.4718431234359741, "learning_rate": 8.182795701214393e-06, "loss": 1.6089, "step": 10694 }, { "epoch": 0.5847217856020119, "grad_norm": 1.5513182878494263, "learning_rate": 8.180998799343203e-06, "loss": 1.4154, "step": 10695 }, { "epoch": 0.5847764580457335, "grad_norm": 1.946662425994873, "learning_rate": 8.17920195821273e-06, "loss": 1.2267, "step": 10696 }, { "epoch": 0.584831130489455, "grad_norm": 1.9697849750518799, "learning_rate": 8.17740517788298e-06, "loss": 1.2595, "step": 10697 }, { "epoch": 0.5848858029331766, "grad_norm": 2.1316139698028564, "learning_rate": 8.175608458413948e-06, "loss": 1.6306, "step": 10698 }, { "epoch": 0.5849404753768982, "grad_norm": 1.5390557050704956, "learning_rate": 8.173811799865628e-06, "loss": 1.3206, "step": 10699 }, { "epoch": 0.5849951478206197, "grad_norm": 1.3362618684768677, "learning_rate": 8.172015202298019e-06, "loss": 1.5233, "step": 10700 }, { "epoch": 0.5850498202643413, "grad_norm": 1.395766019821167, "learning_rate": 8.170218665771113e-06, "loss": 1.3778, "step": 10701 }, { "epoch": 0.5851044927080629, "grad_norm": 1.5647668838500977, "learning_rate": 8.168422190344896e-06, "loss": 1.339, "step": 10702 }, { "epoch": 0.5851591651517843, "grad_norm": 1.881750464439392, "learning_rate": 8.166625776079365e-06, "loss": 1.3475, "step": 10703 }, { "epoch": 0.5852138375955059, "grad_norm": 1.67024564743042, "learning_rate": 8.1648294230345e-06, "loss": 1.5816, "step": 10704 }, { "epoch": 0.5852685100392275, "grad_norm": 1.6734604835510254, "learning_rate": 8.163033131270281e-06, "loss": 1.3237, "step": 10705 }, { "epoch": 0.585323182482949, "grad_norm": 1.5522233247756958, "learning_rate": 8.161236900846703e-06, "loss": 1.1424, "step": 10706 }, { "epoch": 0.5853778549266706, "grad_norm": 1.9437483549118042, "learning_rate": 8.159440731823735e-06, "loss": 1.2785, "step": 10707 }, { "epoch": 0.5854325273703921, "grad_norm": 1.7373969554901123, "learning_rate": 8.157644624261364e-06, "loss": 1.4521, "step": 10708 }, { "epoch": 0.5854871998141137, "grad_norm": 1.8034257888793945, "learning_rate": 8.155848578219563e-06, "loss": 1.3435, "step": 10709 }, { "epoch": 0.5855418722578353, "grad_norm": 1.485376000404358, "learning_rate": 8.1540525937583e-06, "loss": 1.6126, "step": 10710 }, { "epoch": 0.5855965447015568, "grad_norm": 1.6251498460769653, "learning_rate": 8.152256670937557e-06, "loss": 1.5639, "step": 10711 }, { "epoch": 0.5856512171452783, "grad_norm": 1.6516647338867188, "learning_rate": 8.1504608098173e-06, "loss": 1.5096, "step": 10712 }, { "epoch": 0.5857058895889999, "grad_norm": 1.6750876903533936, "learning_rate": 8.148665010457492e-06, "loss": 1.4198, "step": 10713 }, { "epoch": 0.5857605620327214, "grad_norm": 1.6164692640304565, "learning_rate": 8.146869272918109e-06, "loss": 1.5521, "step": 10714 }, { "epoch": 0.585815234476443, "grad_norm": 1.581917405128479, "learning_rate": 8.145073597259108e-06, "loss": 1.0528, "step": 10715 }, { "epoch": 0.5858699069201646, "grad_norm": 2.2052550315856934, "learning_rate": 8.14327798354045e-06, "loss": 1.394, "step": 10716 }, { "epoch": 0.5859245793638861, "grad_norm": 1.323338270187378, "learning_rate": 8.141482431822098e-06, "loss": 1.5401, "step": 10717 }, { "epoch": 0.5859792518076077, "grad_norm": 1.526709794998169, "learning_rate": 8.13968694216401e-06, "loss": 1.4821, "step": 10718 }, { "epoch": 0.5860339242513293, "grad_norm": 1.3791706562042236, "learning_rate": 8.137891514626137e-06, "loss": 1.5184, "step": 10719 }, { "epoch": 0.5860885966950508, "grad_norm": 1.1687169075012207, "learning_rate": 8.13609614926844e-06, "loss": 1.5523, "step": 10720 }, { "epoch": 0.5861432691387723, "grad_norm": 2.138460159301758, "learning_rate": 8.134300846150862e-06, "loss": 1.0945, "step": 10721 }, { "epoch": 0.5861979415824938, "grad_norm": 2.007720708847046, "learning_rate": 8.132505605333362e-06, "loss": 1.4997, "step": 10722 }, { "epoch": 0.5862526140262154, "grad_norm": 1.8749436140060425, "learning_rate": 8.130710426875881e-06, "loss": 1.423, "step": 10723 }, { "epoch": 0.586307286469937, "grad_norm": 1.7869325876235962, "learning_rate": 8.12891531083836e-06, "loss": 1.4542, "step": 10724 }, { "epoch": 0.5863619589136585, "grad_norm": 1.8914823532104492, "learning_rate": 8.127120257280752e-06, "loss": 1.2379, "step": 10725 }, { "epoch": 0.5864166313573801, "grad_norm": 1.6838653087615967, "learning_rate": 8.125325266262994e-06, "loss": 1.6505, "step": 10726 }, { "epoch": 0.5864713038011017, "grad_norm": 1.8600841760635376, "learning_rate": 8.123530337845022e-06, "loss": 1.2396, "step": 10727 }, { "epoch": 0.5865259762448232, "grad_norm": 1.5696067810058594, "learning_rate": 8.121735472086777e-06, "loss": 1.291, "step": 10728 }, { "epoch": 0.5865806486885448, "grad_norm": 1.4693409204483032, "learning_rate": 8.119940669048194e-06, "loss": 1.3365, "step": 10729 }, { "epoch": 0.5866353211322664, "grad_norm": 1.5995842218399048, "learning_rate": 8.118145928789198e-06, "loss": 1.4092, "step": 10730 }, { "epoch": 0.5866899935759878, "grad_norm": 1.3226011991500854, "learning_rate": 8.11635125136973e-06, "loss": 1.489, "step": 10731 }, { "epoch": 0.5867446660197094, "grad_norm": 1.2304997444152832, "learning_rate": 8.114556636849714e-06, "loss": 1.5151, "step": 10732 }, { "epoch": 0.586799338463431, "grad_norm": 1.5644376277923584, "learning_rate": 8.112762085289073e-06, "loss": 1.4444, "step": 10733 }, { "epoch": 0.5868540109071525, "grad_norm": 1.5516102313995361, "learning_rate": 8.110967596747738e-06, "loss": 1.4721, "step": 10734 }, { "epoch": 0.5869086833508741, "grad_norm": 1.5489771366119385, "learning_rate": 8.109173171285623e-06, "loss": 1.4499, "step": 10735 }, { "epoch": 0.5869633557945956, "grad_norm": 1.363751769065857, "learning_rate": 8.107378808962656e-06, "loss": 1.4987, "step": 10736 }, { "epoch": 0.5870180282383172, "grad_norm": 2.2360010147094727, "learning_rate": 8.105584509838754e-06, "loss": 1.3381, "step": 10737 }, { "epoch": 0.5870727006820388, "grad_norm": 1.1988917589187622, "learning_rate": 8.103790273973824e-06, "loss": 1.6975, "step": 10738 }, { "epoch": 0.5871273731257602, "grad_norm": 1.2641282081604004, "learning_rate": 8.101996101427791e-06, "loss": 1.7093, "step": 10739 }, { "epoch": 0.5871820455694818, "grad_norm": 1.3731735944747925, "learning_rate": 8.100201992260563e-06, "loss": 1.4968, "step": 10740 }, { "epoch": 0.5872367180132034, "grad_norm": 1.14990234375, "learning_rate": 8.098407946532045e-06, "loss": 1.4218, "step": 10741 }, { "epoch": 0.5872913904569249, "grad_norm": 1.3670215606689453, "learning_rate": 8.096613964302152e-06, "loss": 1.6044, "step": 10742 }, { "epoch": 0.5873460629006465, "grad_norm": 1.7537654638290405, "learning_rate": 8.094820045630783e-06, "loss": 1.3704, "step": 10743 }, { "epoch": 0.5874007353443681, "grad_norm": 1.5350825786590576, "learning_rate": 8.093026190577839e-06, "loss": 1.5138, "step": 10744 }, { "epoch": 0.5874554077880896, "grad_norm": 1.5034801959991455, "learning_rate": 8.091232399203232e-06, "loss": 1.4872, "step": 10745 }, { "epoch": 0.5875100802318112, "grad_norm": 1.5073130130767822, "learning_rate": 8.089438671566853e-06, "loss": 1.4757, "step": 10746 }, { "epoch": 0.5875647526755328, "grad_norm": 1.5092267990112305, "learning_rate": 8.087645007728598e-06, "loss": 1.258, "step": 10747 }, { "epoch": 0.5876194251192542, "grad_norm": 1.6911158561706543, "learning_rate": 8.085851407748365e-06, "loss": 1.2622, "step": 10748 }, { "epoch": 0.5876740975629758, "grad_norm": 1.2595165967941284, "learning_rate": 8.084057871686041e-06, "loss": 1.3521, "step": 10749 }, { "epoch": 0.5877287700066973, "grad_norm": 2.1117751598358154, "learning_rate": 8.082264399601527e-06, "loss": 1.1334, "step": 10750 }, { "epoch": 0.5877834424504189, "grad_norm": 1.886659860610962, "learning_rate": 8.080470991554703e-06, "loss": 1.3079, "step": 10751 }, { "epoch": 0.5878381148941405, "grad_norm": 1.376678466796875, "learning_rate": 8.078677647605455e-06, "loss": 1.331, "step": 10752 }, { "epoch": 0.587892787337862, "grad_norm": 1.6477690935134888, "learning_rate": 8.076884367813671e-06, "loss": 1.3154, "step": 10753 }, { "epoch": 0.5879474597815836, "grad_norm": 1.5310040712356567, "learning_rate": 8.075091152239231e-06, "loss": 1.4251, "step": 10754 }, { "epoch": 0.5880021322253052, "grad_norm": 1.469266414642334, "learning_rate": 8.07329800094201e-06, "loss": 1.3674, "step": 10755 }, { "epoch": 0.5880568046690267, "grad_norm": 1.7734594345092773, "learning_rate": 8.071504913981894e-06, "loss": 1.5404, "step": 10756 }, { "epoch": 0.5881114771127482, "grad_norm": 1.7498009204864502, "learning_rate": 8.069711891418753e-06, "loss": 1.5515, "step": 10757 }, { "epoch": 0.5881661495564698, "grad_norm": 1.2389365434646606, "learning_rate": 8.067918933312459e-06, "loss": 1.546, "step": 10758 }, { "epoch": 0.5882208220001913, "grad_norm": 1.6462031602859497, "learning_rate": 8.066126039722889e-06, "loss": 1.3194, "step": 10759 }, { "epoch": 0.5882754944439129, "grad_norm": 1.7772036790847778, "learning_rate": 8.064333210709908e-06, "loss": 1.5261, "step": 10760 }, { "epoch": 0.5883301668876345, "grad_norm": 1.6356691122055054, "learning_rate": 8.062540446333384e-06, "loss": 1.445, "step": 10761 }, { "epoch": 0.588384839331356, "grad_norm": 1.806491494178772, "learning_rate": 8.060747746653181e-06, "loss": 1.4761, "step": 10762 }, { "epoch": 0.5884395117750776, "grad_norm": 1.35731041431427, "learning_rate": 8.058955111729157e-06, "loss": 1.4231, "step": 10763 }, { "epoch": 0.5884941842187991, "grad_norm": 1.862533688545227, "learning_rate": 8.05716254162118e-06, "loss": 1.3123, "step": 10764 }, { "epoch": 0.5885488566625207, "grad_norm": 1.2898991107940674, "learning_rate": 8.055370036389105e-06, "loss": 1.3279, "step": 10765 }, { "epoch": 0.5886035291062423, "grad_norm": 1.344420075416565, "learning_rate": 8.053577596092788e-06, "loss": 1.4638, "step": 10766 }, { "epoch": 0.5886582015499637, "grad_norm": 1.5425920486450195, "learning_rate": 8.051785220792082e-06, "loss": 1.402, "step": 10767 }, { "epoch": 0.5887128739936853, "grad_norm": 1.4778550863265991, "learning_rate": 8.04999291054684e-06, "loss": 1.3302, "step": 10768 }, { "epoch": 0.5887675464374069, "grad_norm": 1.4307307004928589, "learning_rate": 8.048200665416907e-06, "loss": 1.3261, "step": 10769 }, { "epoch": 0.5888222188811284, "grad_norm": 1.674522042274475, "learning_rate": 8.046408485462138e-06, "loss": 1.325, "step": 10770 }, { "epoch": 0.58887689132485, "grad_norm": 1.592525839805603, "learning_rate": 8.044616370742372e-06, "loss": 1.4583, "step": 10771 }, { "epoch": 0.5889315637685716, "grad_norm": 1.5474274158477783, "learning_rate": 8.042824321317453e-06, "loss": 1.5929, "step": 10772 }, { "epoch": 0.5889862362122931, "grad_norm": 1.5386632680892944, "learning_rate": 8.041032337247226e-06, "loss": 1.3357, "step": 10773 }, { "epoch": 0.5890409086560147, "grad_norm": 1.7173515558242798, "learning_rate": 8.039240418591525e-06, "loss": 1.3694, "step": 10774 }, { "epoch": 0.5890955810997363, "grad_norm": 1.2930315732955933, "learning_rate": 8.037448565410183e-06, "loss": 1.3013, "step": 10775 }, { "epoch": 0.5891502535434577, "grad_norm": 1.536392331123352, "learning_rate": 8.035656777763041e-06, "loss": 1.3615, "step": 10776 }, { "epoch": 0.5892049259871793, "grad_norm": 1.469061255455017, "learning_rate": 8.033865055709928e-06, "loss": 1.3722, "step": 10777 }, { "epoch": 0.5892595984309008, "grad_norm": 1.9987796545028687, "learning_rate": 8.032073399310678e-06, "loss": 1.218, "step": 10778 }, { "epoch": 0.5893142708746224, "grad_norm": 2.2733969688415527, "learning_rate": 8.030281808625114e-06, "loss": 1.2895, "step": 10779 }, { "epoch": 0.589368943318344, "grad_norm": 1.7686246633529663, "learning_rate": 8.02849028371306e-06, "loss": 1.3723, "step": 10780 }, { "epoch": 0.5894236157620655, "grad_norm": 1.5048452615737915, "learning_rate": 8.026698824634344e-06, "loss": 1.315, "step": 10781 }, { "epoch": 0.5894782882057871, "grad_norm": 1.4639180898666382, "learning_rate": 8.024907431448786e-06, "loss": 1.6481, "step": 10782 }, { "epoch": 0.5895329606495087, "grad_norm": 1.7235910892486572, "learning_rate": 8.023116104216198e-06, "loss": 1.4139, "step": 10783 }, { "epoch": 0.5895876330932301, "grad_norm": 1.5934175252914429, "learning_rate": 8.021324842996405e-06, "loss": 1.5033, "step": 10784 }, { "epoch": 0.5896423055369517, "grad_norm": 1.5808141231536865, "learning_rate": 8.019533647849221e-06, "loss": 1.3425, "step": 10785 }, { "epoch": 0.5896969779806733, "grad_norm": 1.4626357555389404, "learning_rate": 8.017742518834454e-06, "loss": 1.3741, "step": 10786 }, { "epoch": 0.5897516504243948, "grad_norm": 1.602647304534912, "learning_rate": 8.015951456011917e-06, "loss": 1.2785, "step": 10787 }, { "epoch": 0.5898063228681164, "grad_norm": 1.56632399559021, "learning_rate": 8.014160459441417e-06, "loss": 1.272, "step": 10788 }, { "epoch": 0.589860995311838, "grad_norm": 1.7642778158187866, "learning_rate": 8.012369529182755e-06, "loss": 1.4266, "step": 10789 }, { "epoch": 0.5899156677555595, "grad_norm": 2.1494457721710205, "learning_rate": 8.010578665295742e-06, "loss": 1.3568, "step": 10790 }, { "epoch": 0.5899703401992811, "grad_norm": 1.50413978099823, "learning_rate": 8.008787867840172e-06, "loss": 1.5063, "step": 10791 }, { "epoch": 0.5900250126430026, "grad_norm": 1.5327070951461792, "learning_rate": 8.006997136875854e-06, "loss": 1.6092, "step": 10792 }, { "epoch": 0.5900796850867241, "grad_norm": 1.8452914953231812, "learning_rate": 8.005206472462576e-06, "loss": 1.1319, "step": 10793 }, { "epoch": 0.5901343575304457, "grad_norm": 1.7028553485870361, "learning_rate": 8.00341587466013e-06, "loss": 1.5044, "step": 10794 }, { "epoch": 0.5901890299741672, "grad_norm": 1.5509947538375854, "learning_rate": 8.001625343528318e-06, "loss": 1.4452, "step": 10795 }, { "epoch": 0.5902437024178888, "grad_norm": 1.3281267881393433, "learning_rate": 7.999834879126925e-06, "loss": 1.6035, "step": 10796 }, { "epoch": 0.5902983748616104, "grad_norm": 1.6950066089630127, "learning_rate": 7.998044481515736e-06, "loss": 1.116, "step": 10797 }, { "epoch": 0.5903530473053319, "grad_norm": 1.6761879920959473, "learning_rate": 7.996254150754544e-06, "loss": 1.4057, "step": 10798 }, { "epoch": 0.5904077197490535, "grad_norm": 1.9371588230133057, "learning_rate": 7.994463886903125e-06, "loss": 1.3974, "step": 10799 }, { "epoch": 0.5904623921927751, "grad_norm": 1.791553020477295, "learning_rate": 7.99267369002126e-06, "loss": 1.4549, "step": 10800 }, { "epoch": 0.5905170646364966, "grad_norm": 1.7661710977554321, "learning_rate": 7.990883560168736e-06, "loss": 1.427, "step": 10801 }, { "epoch": 0.5905717370802182, "grad_norm": 1.5894548892974854, "learning_rate": 7.989093497405323e-06, "loss": 1.2328, "step": 10802 }, { "epoch": 0.5906264095239397, "grad_norm": 1.5299128293991089, "learning_rate": 7.987303501790794e-06, "loss": 1.1979, "step": 10803 }, { "epoch": 0.5906810819676612, "grad_norm": 1.658044695854187, "learning_rate": 7.985513573384927e-06, "loss": 1.5771, "step": 10804 }, { "epoch": 0.5907357544113828, "grad_norm": 1.3920234441757202, "learning_rate": 7.983723712247487e-06, "loss": 1.6357, "step": 10805 }, { "epoch": 0.5907904268551044, "grad_norm": 1.4468780755996704, "learning_rate": 7.981933918438246e-06, "loss": 1.5212, "step": 10806 }, { "epoch": 0.5908450992988259, "grad_norm": 1.6849777698516846, "learning_rate": 7.980144192016967e-06, "loss": 1.4229, "step": 10807 }, { "epoch": 0.5908997717425475, "grad_norm": 1.5778557062149048, "learning_rate": 7.97835453304341e-06, "loss": 1.1215, "step": 10808 }, { "epoch": 0.590954444186269, "grad_norm": 1.4867440462112427, "learning_rate": 7.97656494157734e-06, "loss": 1.2086, "step": 10809 }, { "epoch": 0.5910091166299906, "grad_norm": 1.5000061988830566, "learning_rate": 7.974775417678518e-06, "loss": 1.5071, "step": 10810 }, { "epoch": 0.5910637890737122, "grad_norm": 1.6061499118804932, "learning_rate": 7.972985961406693e-06, "loss": 1.3521, "step": 10811 }, { "epoch": 0.5911184615174336, "grad_norm": 1.4963767528533936, "learning_rate": 7.971196572821628e-06, "loss": 1.4417, "step": 10812 }, { "epoch": 0.5911731339611552, "grad_norm": 1.8456445932388306, "learning_rate": 7.969407251983069e-06, "loss": 1.1435, "step": 10813 }, { "epoch": 0.5912278064048768, "grad_norm": 1.085951328277588, "learning_rate": 7.967617998950762e-06, "loss": 1.5902, "step": 10814 }, { "epoch": 0.5912824788485983, "grad_norm": 1.3290318250656128, "learning_rate": 7.965828813784464e-06, "loss": 1.4736, "step": 10815 }, { "epoch": 0.5913371512923199, "grad_norm": 1.6414916515350342, "learning_rate": 7.964039696543914e-06, "loss": 1.287, "step": 10816 }, { "epoch": 0.5913918237360415, "grad_norm": 1.8435300588607788, "learning_rate": 7.962250647288855e-06, "loss": 1.7273, "step": 10817 }, { "epoch": 0.591446496179763, "grad_norm": 1.5698732137680054, "learning_rate": 7.96046166607903e-06, "loss": 1.3513, "step": 10818 }, { "epoch": 0.5915011686234846, "grad_norm": 2.0734012126922607, "learning_rate": 7.958672752974173e-06, "loss": 1.3177, "step": 10819 }, { "epoch": 0.5915558410672062, "grad_norm": 1.8751999139785767, "learning_rate": 7.956883908034026e-06, "loss": 1.489, "step": 10820 }, { "epoch": 0.5916105135109276, "grad_norm": 1.305191993713379, "learning_rate": 7.955095131318319e-06, "loss": 1.7239, "step": 10821 }, { "epoch": 0.5916651859546492, "grad_norm": 1.3631972074508667, "learning_rate": 7.953306422886781e-06, "loss": 1.4482, "step": 10822 }, { "epoch": 0.5917198583983707, "grad_norm": 1.5317314863204956, "learning_rate": 7.951517782799147e-06, "loss": 1.412, "step": 10823 }, { "epoch": 0.5917745308420923, "grad_norm": 1.0410295724868774, "learning_rate": 7.949729211115144e-06, "loss": 1.6521, "step": 10824 }, { "epoch": 0.5918292032858139, "grad_norm": 1.2651036977767944, "learning_rate": 7.947940707894489e-06, "loss": 1.9156, "step": 10825 }, { "epoch": 0.5918838757295354, "grad_norm": 1.7197798490524292, "learning_rate": 7.946152273196912e-06, "loss": 1.3509, "step": 10826 }, { "epoch": 0.591938548173257, "grad_norm": 1.5506134033203125, "learning_rate": 7.94436390708213e-06, "loss": 1.4221, "step": 10827 }, { "epoch": 0.5919932206169786, "grad_norm": 1.606400966644287, "learning_rate": 7.942575609609857e-06, "loss": 1.4977, "step": 10828 }, { "epoch": 0.5920478930607, "grad_norm": 1.5915080308914185, "learning_rate": 7.940787380839818e-06, "loss": 1.4423, "step": 10829 }, { "epoch": 0.5921025655044216, "grad_norm": 1.562288522720337, "learning_rate": 7.938999220831718e-06, "loss": 1.3604, "step": 10830 }, { "epoch": 0.5921572379481432, "grad_norm": 1.6070644855499268, "learning_rate": 7.93721112964527e-06, "loss": 1.7059, "step": 10831 }, { "epoch": 0.5922119103918647, "grad_norm": 2.00640869140625, "learning_rate": 7.935423107340184e-06, "loss": 1.3191, "step": 10832 }, { "epoch": 0.5922665828355863, "grad_norm": 1.4507639408111572, "learning_rate": 7.93363515397616e-06, "loss": 1.3602, "step": 10833 }, { "epoch": 0.5923212552793079, "grad_norm": 1.5528483390808105, "learning_rate": 7.931847269612912e-06, "loss": 1.3844, "step": 10834 }, { "epoch": 0.5923759277230294, "grad_norm": 1.7313340902328491, "learning_rate": 7.930059454310138e-06, "loss": 1.4326, "step": 10835 }, { "epoch": 0.592430600166751, "grad_norm": 1.6686381101608276, "learning_rate": 7.928271708127532e-06, "loss": 1.3993, "step": 10836 }, { "epoch": 0.5924852726104725, "grad_norm": 1.0506153106689453, "learning_rate": 7.926484031124799e-06, "loss": 1.831, "step": 10837 }, { "epoch": 0.592539945054194, "grad_norm": 1.25496244430542, "learning_rate": 7.924696423361629e-06, "loss": 1.6009, "step": 10838 }, { "epoch": 0.5925946174979156, "grad_norm": 1.6915839910507202, "learning_rate": 7.92290888489771e-06, "loss": 1.1701, "step": 10839 }, { "epoch": 0.5926492899416371, "grad_norm": 1.8511031866073608, "learning_rate": 7.921121415792743e-06, "loss": 1.2821, "step": 10840 }, { "epoch": 0.5927039623853587, "grad_norm": 1.7032657861709595, "learning_rate": 7.91933401610641e-06, "loss": 1.4537, "step": 10841 }, { "epoch": 0.5927586348290803, "grad_norm": 1.412563443183899, "learning_rate": 7.917546685898393e-06, "loss": 1.3524, "step": 10842 }, { "epoch": 0.5928133072728018, "grad_norm": 1.7370747327804565, "learning_rate": 7.915759425228382e-06, "loss": 1.303, "step": 10843 }, { "epoch": 0.5928679797165234, "grad_norm": 1.2581095695495605, "learning_rate": 7.913972234156054e-06, "loss": 1.6397, "step": 10844 }, { "epoch": 0.592922652160245, "grad_norm": 1.684508204460144, "learning_rate": 7.912185112741087e-06, "loss": 1.4804, "step": 10845 }, { "epoch": 0.5929773246039665, "grad_norm": 1.3415100574493408, "learning_rate": 7.910398061043162e-06, "loss": 1.3924, "step": 10846 }, { "epoch": 0.593031997047688, "grad_norm": 1.2560794353485107, "learning_rate": 7.908611079121941e-06, "loss": 1.5466, "step": 10847 }, { "epoch": 0.5930866694914096, "grad_norm": 1.4080559015274048, "learning_rate": 7.906824167037112e-06, "loss": 1.5188, "step": 10848 }, { "epoch": 0.5931413419351311, "grad_norm": 1.676485300064087, "learning_rate": 7.905037324848334e-06, "loss": 1.5108, "step": 10849 }, { "epoch": 0.5931960143788527, "grad_norm": 1.3242238759994507, "learning_rate": 7.903250552615273e-06, "loss": 1.3406, "step": 10850 }, { "epoch": 0.5932506868225742, "grad_norm": 1.900640845298767, "learning_rate": 7.901463850397599e-06, "loss": 1.4869, "step": 10851 }, { "epoch": 0.5933053592662958, "grad_norm": 1.1475110054016113, "learning_rate": 7.899677218254971e-06, "loss": 1.518, "step": 10852 }, { "epoch": 0.5933600317100174, "grad_norm": 1.4604731798171997, "learning_rate": 7.897890656247045e-06, "loss": 1.3884, "step": 10853 }, { "epoch": 0.5934147041537389, "grad_norm": 2.0028419494628906, "learning_rate": 7.896104164433488e-06, "loss": 1.1056, "step": 10854 }, { "epoch": 0.5934693765974605, "grad_norm": 1.5479578971862793, "learning_rate": 7.89431774287395e-06, "loss": 1.5211, "step": 10855 }, { "epoch": 0.5935240490411821, "grad_norm": 1.5832934379577637, "learning_rate": 7.89253139162808e-06, "loss": 1.6219, "step": 10856 }, { "epoch": 0.5935787214849035, "grad_norm": 1.4784983396530151, "learning_rate": 7.890745110755535e-06, "loss": 1.3793, "step": 10857 }, { "epoch": 0.5936333939286251, "grad_norm": 1.583077073097229, "learning_rate": 7.88895890031596e-06, "loss": 1.5081, "step": 10858 }, { "epoch": 0.5936880663723467, "grad_norm": 1.3534389734268188, "learning_rate": 7.887172760368998e-06, "loss": 1.3027, "step": 10859 }, { "epoch": 0.5937427388160682, "grad_norm": 1.776088833808899, "learning_rate": 7.885386690974299e-06, "loss": 1.3533, "step": 10860 }, { "epoch": 0.5937974112597898, "grad_norm": 1.7622724771499634, "learning_rate": 7.883600692191496e-06, "loss": 1.4809, "step": 10861 }, { "epoch": 0.5938520837035114, "grad_norm": 1.391247034072876, "learning_rate": 7.881814764080235e-06, "loss": 1.4755, "step": 10862 }, { "epoch": 0.5939067561472329, "grad_norm": 1.4313876628875732, "learning_rate": 7.880028906700153e-06, "loss": 1.3536, "step": 10863 }, { "epoch": 0.5939614285909545, "grad_norm": 1.2971960306167603, "learning_rate": 7.878243120110876e-06, "loss": 1.8099, "step": 10864 }, { "epoch": 0.594016101034676, "grad_norm": 1.5217854976654053, "learning_rate": 7.876457404372042e-06, "loss": 1.3251, "step": 10865 }, { "epoch": 0.5940707734783975, "grad_norm": 1.2545374631881714, "learning_rate": 7.874671759543278e-06, "loss": 1.4551, "step": 10866 }, { "epoch": 0.5941254459221191, "grad_norm": 1.6109845638275146, "learning_rate": 7.872886185684207e-06, "loss": 1.1841, "step": 10867 }, { "epoch": 0.5941801183658406, "grad_norm": 1.421712040901184, "learning_rate": 7.871100682854465e-06, "loss": 1.2771, "step": 10868 }, { "epoch": 0.5942347908095622, "grad_norm": 1.3443866968154907, "learning_rate": 7.869315251113663e-06, "loss": 1.3319, "step": 10869 }, { "epoch": 0.5942894632532838, "grad_norm": 1.8139582872390747, "learning_rate": 7.867529890521424e-06, "loss": 1.4078, "step": 10870 }, { "epoch": 0.5943441356970053, "grad_norm": 1.4061654806137085, "learning_rate": 7.865744601137369e-06, "loss": 1.4131, "step": 10871 }, { "epoch": 0.5943988081407269, "grad_norm": 1.608107566833496, "learning_rate": 7.86395938302111e-06, "loss": 1.5262, "step": 10872 }, { "epoch": 0.5944534805844485, "grad_norm": 1.5148906707763672, "learning_rate": 7.862174236232252e-06, "loss": 1.078, "step": 10873 }, { "epoch": 0.59450815302817, "grad_norm": 1.3674952983856201, "learning_rate": 7.86038916083042e-06, "loss": 1.2685, "step": 10874 }, { "epoch": 0.5945628254718915, "grad_norm": 1.404867172241211, "learning_rate": 7.858604156875212e-06, "loss": 1.3431, "step": 10875 }, { "epoch": 0.5946174979156131, "grad_norm": 1.7046942710876465, "learning_rate": 7.856819224426239e-06, "loss": 1.5509, "step": 10876 }, { "epoch": 0.5946721703593346, "grad_norm": 1.468927264213562, "learning_rate": 7.8550343635431e-06, "loss": 1.1616, "step": 10877 }, { "epoch": 0.5947268428030562, "grad_norm": 1.3582370281219482, "learning_rate": 7.853249574285393e-06, "loss": 1.3006, "step": 10878 }, { "epoch": 0.5947815152467777, "grad_norm": 1.4501069784164429, "learning_rate": 7.851464856712725e-06, "loss": 1.4437, "step": 10879 }, { "epoch": 0.5948361876904993, "grad_norm": 1.3096009492874146, "learning_rate": 7.849680210884687e-06, "loss": 1.3396, "step": 10880 }, { "epoch": 0.5948908601342209, "grad_norm": 1.519492745399475, "learning_rate": 7.847895636860867e-06, "loss": 1.3946, "step": 10881 }, { "epoch": 0.5949455325779424, "grad_norm": 2.7242190837860107, "learning_rate": 7.846111134700867e-06, "loss": 1.349, "step": 10882 }, { "epoch": 0.595000205021664, "grad_norm": 1.4746506214141846, "learning_rate": 7.844326704464271e-06, "loss": 1.2351, "step": 10883 }, { "epoch": 0.5950548774653855, "grad_norm": 1.6470052003860474, "learning_rate": 7.842542346210663e-06, "loss": 1.395, "step": 10884 }, { "epoch": 0.595109549909107, "grad_norm": 1.5347319841384888, "learning_rate": 7.840758059999631e-06, "loss": 1.3767, "step": 10885 }, { "epoch": 0.5951642223528286, "grad_norm": 1.5667247772216797, "learning_rate": 7.838973845890752e-06, "loss": 1.3339, "step": 10886 }, { "epoch": 0.5952188947965502, "grad_norm": 1.3963273763656616, "learning_rate": 7.837189703943604e-06, "loss": 1.4622, "step": 10887 }, { "epoch": 0.5952735672402717, "grad_norm": 1.6956473588943481, "learning_rate": 7.835405634217772e-06, "loss": 1.4, "step": 10888 }, { "epoch": 0.5953282396839933, "grad_norm": 1.5312471389770508, "learning_rate": 7.833621636772824e-06, "loss": 1.3683, "step": 10889 }, { "epoch": 0.5953829121277149, "grad_norm": 1.5647876262664795, "learning_rate": 7.831837711668334e-06, "loss": 1.3895, "step": 10890 }, { "epoch": 0.5954375845714364, "grad_norm": 1.2889162302017212, "learning_rate": 7.83005385896387e-06, "loss": 1.7221, "step": 10891 }, { "epoch": 0.595492257015158, "grad_norm": 1.2934496402740479, "learning_rate": 7.828270078718994e-06, "loss": 1.4286, "step": 10892 }, { "epoch": 0.5955469294588794, "grad_norm": 1.233223795890808, "learning_rate": 7.826486370993284e-06, "loss": 1.3237, "step": 10893 }, { "epoch": 0.595601601902601, "grad_norm": 1.6493622064590454, "learning_rate": 7.824702735846292e-06, "loss": 1.3959, "step": 10894 }, { "epoch": 0.5956562743463226, "grad_norm": 1.7877721786499023, "learning_rate": 7.822919173337579e-06, "loss": 1.2748, "step": 10895 }, { "epoch": 0.5957109467900441, "grad_norm": 1.8683000802993774, "learning_rate": 7.821135683526706e-06, "loss": 1.2759, "step": 10896 }, { "epoch": 0.5957656192337657, "grad_norm": 1.2479217052459717, "learning_rate": 7.819352266473223e-06, "loss": 1.4612, "step": 10897 }, { "epoch": 0.5958202916774873, "grad_norm": 1.5341168642044067, "learning_rate": 7.817568922236683e-06, "loss": 1.2606, "step": 10898 }, { "epoch": 0.5958749641212088, "grad_norm": 1.4057236909866333, "learning_rate": 7.815785650876642e-06, "loss": 1.4799, "step": 10899 }, { "epoch": 0.5959296365649304, "grad_norm": 1.350419521331787, "learning_rate": 7.814002452452643e-06, "loss": 1.4218, "step": 10900 }, { "epoch": 0.595984309008652, "grad_norm": 1.4419972896575928, "learning_rate": 7.812219327024227e-06, "loss": 1.2773, "step": 10901 }, { "epoch": 0.5960389814523734, "grad_norm": 1.3088090419769287, "learning_rate": 7.810436274650946e-06, "loss": 1.3884, "step": 10902 }, { "epoch": 0.596093653896095, "grad_norm": 1.6727104187011719, "learning_rate": 7.808653295392334e-06, "loss": 1.4486, "step": 10903 }, { "epoch": 0.5961483263398166, "grad_norm": 1.4185184240341187, "learning_rate": 7.806870389307933e-06, "loss": 1.6695, "step": 10904 }, { "epoch": 0.5962029987835381, "grad_norm": 1.5495452880859375, "learning_rate": 7.805087556457275e-06, "loss": 1.6582, "step": 10905 }, { "epoch": 0.5962576712272597, "grad_norm": 1.9032363891601562, "learning_rate": 7.80330479689989e-06, "loss": 1.5557, "step": 10906 }, { "epoch": 0.5963123436709812, "grad_norm": 1.4056655168533325, "learning_rate": 7.801522110695317e-06, "loss": 1.386, "step": 10907 }, { "epoch": 0.5963670161147028, "grad_norm": 1.4805320501327515, "learning_rate": 7.79973949790308e-06, "loss": 1.4003, "step": 10908 }, { "epoch": 0.5964216885584244, "grad_norm": 1.2201488018035889, "learning_rate": 7.797956958582702e-06, "loss": 1.5951, "step": 10909 }, { "epoch": 0.5964763610021458, "grad_norm": 1.2538050413131714, "learning_rate": 7.796174492793712e-06, "loss": 1.7351, "step": 10910 }, { "epoch": 0.5965310334458674, "grad_norm": 1.5464171171188354, "learning_rate": 7.794392100595624e-06, "loss": 1.1691, "step": 10911 }, { "epoch": 0.596585705889589, "grad_norm": 1.6364562511444092, "learning_rate": 7.792609782047958e-06, "loss": 1.5533, "step": 10912 }, { "epoch": 0.5966403783333105, "grad_norm": 1.4285098314285278, "learning_rate": 7.790827537210232e-06, "loss": 1.2614, "step": 10913 }, { "epoch": 0.5966950507770321, "grad_norm": 1.6076356172561646, "learning_rate": 7.789045366141963e-06, "loss": 1.2656, "step": 10914 }, { "epoch": 0.5967497232207537, "grad_norm": 1.5331101417541504, "learning_rate": 7.787263268902652e-06, "loss": 1.3066, "step": 10915 }, { "epoch": 0.5968043956644752, "grad_norm": 1.4587910175323486, "learning_rate": 7.785481245551816e-06, "loss": 1.5612, "step": 10916 }, { "epoch": 0.5968590681081968, "grad_norm": 1.709076166152954, "learning_rate": 7.783699296148953e-06, "loss": 1.3758, "step": 10917 }, { "epoch": 0.5969137405519184, "grad_norm": 1.6983699798583984, "learning_rate": 7.781917420753575e-06, "loss": 1.4603, "step": 10918 }, { "epoch": 0.5969684129956399, "grad_norm": 1.3797054290771484, "learning_rate": 7.78013561942518e-06, "loss": 1.3502, "step": 10919 }, { "epoch": 0.5970230854393614, "grad_norm": 1.7943702936172485, "learning_rate": 7.778353892223261e-06, "loss": 1.0972, "step": 10920 }, { "epoch": 0.5970777578830829, "grad_norm": 1.6073659658432007, "learning_rate": 7.776572239207323e-06, "loss": 1.3721, "step": 10921 }, { "epoch": 0.5971324303268045, "grad_norm": 1.6476290225982666, "learning_rate": 7.774790660436857e-06, "loss": 1.2903, "step": 10922 }, { "epoch": 0.5971871027705261, "grad_norm": 1.518267035484314, "learning_rate": 7.773009155971349e-06, "loss": 1.5199, "step": 10923 }, { "epoch": 0.5972417752142476, "grad_norm": 1.7068979740142822, "learning_rate": 7.771227725870293e-06, "loss": 1.523, "step": 10924 }, { "epoch": 0.5972964476579692, "grad_norm": 1.6402628421783447, "learning_rate": 7.769446370193174e-06, "loss": 1.5109, "step": 10925 }, { "epoch": 0.5973511201016908, "grad_norm": 2.087547540664673, "learning_rate": 7.76766508899947e-06, "loss": 1.4046, "step": 10926 }, { "epoch": 0.5974057925454123, "grad_norm": 1.7478678226470947, "learning_rate": 7.765883882348673e-06, "loss": 1.2369, "step": 10927 }, { "epoch": 0.5974604649891339, "grad_norm": 1.4765726327896118, "learning_rate": 7.764102750300253e-06, "loss": 1.3825, "step": 10928 }, { "epoch": 0.5975151374328554, "grad_norm": 1.5093072652816772, "learning_rate": 7.762321692913687e-06, "loss": 1.5006, "step": 10929 }, { "epoch": 0.5975698098765769, "grad_norm": 1.6992172002792358, "learning_rate": 7.760540710248455e-06, "loss": 1.7837, "step": 10930 }, { "epoch": 0.5976244823202985, "grad_norm": 1.6263376474380493, "learning_rate": 7.758759802364022e-06, "loss": 1.1994, "step": 10931 }, { "epoch": 0.5976791547640201, "grad_norm": 1.502871036529541, "learning_rate": 7.756978969319855e-06, "loss": 1.4752, "step": 10932 }, { "epoch": 0.5977338272077416, "grad_norm": 1.8518153429031372, "learning_rate": 7.755198211175428e-06, "loss": 1.2405, "step": 10933 }, { "epoch": 0.5977884996514632, "grad_norm": 1.344430923461914, "learning_rate": 7.753417527990198e-06, "loss": 1.3927, "step": 10934 }, { "epoch": 0.5978431720951847, "grad_norm": 1.967859148979187, "learning_rate": 7.751636919823629e-06, "loss": 1.547, "step": 10935 }, { "epoch": 0.5978978445389063, "grad_norm": 1.5791951417922974, "learning_rate": 7.74985638673518e-06, "loss": 1.4919, "step": 10936 }, { "epoch": 0.5979525169826279, "grad_norm": 1.7026033401489258, "learning_rate": 7.748075928784303e-06, "loss": 1.4632, "step": 10937 }, { "epoch": 0.5980071894263493, "grad_norm": 1.4749161005020142, "learning_rate": 7.746295546030459e-06, "loss": 1.4451, "step": 10938 }, { "epoch": 0.5980618618700709, "grad_norm": 1.63347589969635, "learning_rate": 7.744515238533095e-06, "loss": 1.3976, "step": 10939 }, { "epoch": 0.5981165343137925, "grad_norm": 1.9068188667297363, "learning_rate": 7.742735006351656e-06, "loss": 1.4152, "step": 10940 }, { "epoch": 0.598171206757514, "grad_norm": 1.5201431512832642, "learning_rate": 7.740954849545596e-06, "loss": 1.3172, "step": 10941 }, { "epoch": 0.5982258792012356, "grad_norm": 1.4059083461761475, "learning_rate": 7.739174768174355e-06, "loss": 1.5791, "step": 10942 }, { "epoch": 0.5982805516449572, "grad_norm": 1.4140619039535522, "learning_rate": 7.73739476229737e-06, "loss": 1.3326, "step": 10943 }, { "epoch": 0.5983352240886787, "grad_norm": 1.613248348236084, "learning_rate": 7.735614831974086e-06, "loss": 1.3936, "step": 10944 }, { "epoch": 0.5983898965324003, "grad_norm": 1.3837825059890747, "learning_rate": 7.733834977263938e-06, "loss": 1.3173, "step": 10945 }, { "epoch": 0.5984445689761219, "grad_norm": 1.2523605823516846, "learning_rate": 7.732055198226352e-06, "loss": 1.5738, "step": 10946 }, { "epoch": 0.5984992414198433, "grad_norm": 1.9641042947769165, "learning_rate": 7.73027549492077e-06, "loss": 1.2266, "step": 10947 }, { "epoch": 0.5985539138635649, "grad_norm": 1.747961401939392, "learning_rate": 7.72849586740661e-06, "loss": 1.5933, "step": 10948 }, { "epoch": 0.5986085863072864, "grad_norm": 1.4864298105239868, "learning_rate": 7.72671631574331e-06, "loss": 1.5457, "step": 10949 }, { "epoch": 0.598663258751008, "grad_norm": 1.5405696630477905, "learning_rate": 7.724936839990285e-06, "loss": 1.3318, "step": 10950 }, { "epoch": 0.5987179311947296, "grad_norm": 1.7269091606140137, "learning_rate": 7.723157440206953e-06, "loss": 1.4435, "step": 10951 }, { "epoch": 0.5987726036384511, "grad_norm": 1.4397079944610596, "learning_rate": 7.721378116452741e-06, "loss": 1.657, "step": 10952 }, { "epoch": 0.5988272760821727, "grad_norm": 1.7741097211837769, "learning_rate": 7.71959886878706e-06, "loss": 1.3049, "step": 10953 }, { "epoch": 0.5988819485258943, "grad_norm": 1.4890131950378418, "learning_rate": 7.717819697269322e-06, "loss": 1.4266, "step": 10954 }, { "epoch": 0.5989366209696158, "grad_norm": 1.4532188177108765, "learning_rate": 7.716040601958941e-06, "loss": 1.4018, "step": 10955 }, { "epoch": 0.5989912934133373, "grad_norm": 1.3629423379898071, "learning_rate": 7.714261582915325e-06, "loss": 1.3743, "step": 10956 }, { "epoch": 0.5990459658570589, "grad_norm": 1.3782681226730347, "learning_rate": 7.712482640197874e-06, "loss": 1.5436, "step": 10957 }, { "epoch": 0.5991006383007804, "grad_norm": 1.3994636535644531, "learning_rate": 7.710703773866001e-06, "loss": 1.4792, "step": 10958 }, { "epoch": 0.599155310744502, "grad_norm": 1.9688665866851807, "learning_rate": 7.708924983979099e-06, "loss": 1.1943, "step": 10959 }, { "epoch": 0.5992099831882236, "grad_norm": 1.4985215663909912, "learning_rate": 7.707146270596564e-06, "loss": 1.3497, "step": 10960 }, { "epoch": 0.5992646556319451, "grad_norm": 1.3821377754211426, "learning_rate": 7.7053676337778e-06, "loss": 1.452, "step": 10961 }, { "epoch": 0.5993193280756667, "grad_norm": 1.531480312347412, "learning_rate": 7.703589073582194e-06, "loss": 1.2832, "step": 10962 }, { "epoch": 0.5993740005193882, "grad_norm": 1.6070165634155273, "learning_rate": 7.701810590069138e-06, "loss": 1.2527, "step": 10963 }, { "epoch": 0.5994286729631098, "grad_norm": 1.7603274583816528, "learning_rate": 7.700032183298021e-06, "loss": 1.476, "step": 10964 }, { "epoch": 0.5994833454068313, "grad_norm": 0.972983717918396, "learning_rate": 7.698253853328222e-06, "loss": 1.5627, "step": 10965 }, { "epoch": 0.5995380178505528, "grad_norm": 1.508955717086792, "learning_rate": 7.69647560021913e-06, "loss": 1.4029, "step": 10966 }, { "epoch": 0.5995926902942744, "grad_norm": 1.4625416994094849, "learning_rate": 7.694697424030126e-06, "loss": 1.5106, "step": 10967 }, { "epoch": 0.599647362737996, "grad_norm": 1.8031322956085205, "learning_rate": 7.69291932482058e-06, "loss": 1.5915, "step": 10968 }, { "epoch": 0.5997020351817175, "grad_norm": 2.6684329509735107, "learning_rate": 7.691141302649877e-06, "loss": 1.3542, "step": 10969 }, { "epoch": 0.5997567076254391, "grad_norm": 1.610630750656128, "learning_rate": 7.68936335757738e-06, "loss": 1.4286, "step": 10970 }, { "epoch": 0.5998113800691607, "grad_norm": 1.6181381940841675, "learning_rate": 7.68758548966246e-06, "loss": 1.4836, "step": 10971 }, { "epoch": 0.5998660525128822, "grad_norm": 1.2320438623428345, "learning_rate": 7.685807698964491e-06, "loss": 1.5883, "step": 10972 }, { "epoch": 0.5999207249566038, "grad_norm": 2.2802138328552246, "learning_rate": 7.684029985542833e-06, "loss": 1.1399, "step": 10973 }, { "epoch": 0.5999753974003253, "grad_norm": 1.369992733001709, "learning_rate": 7.682252349456847e-06, "loss": 1.1664, "step": 10974 }, { "epoch": 0.6000300698440468, "grad_norm": 1.4972186088562012, "learning_rate": 7.680474790765895e-06, "loss": 1.5761, "step": 10975 }, { "epoch": 0.6000847422877684, "grad_norm": 1.6128090620040894, "learning_rate": 7.678697309529329e-06, "loss": 1.4855, "step": 10976 }, { "epoch": 0.6001394147314899, "grad_norm": 1.4792814254760742, "learning_rate": 7.676919905806512e-06, "loss": 1.2137, "step": 10977 }, { "epoch": 0.6001940871752115, "grad_norm": 1.395521640777588, "learning_rate": 7.67514257965679e-06, "loss": 1.3557, "step": 10978 }, { "epoch": 0.6002487596189331, "grad_norm": 1.6087783575057983, "learning_rate": 7.673365331139507e-06, "loss": 1.4018, "step": 10979 }, { "epoch": 0.6003034320626546, "grad_norm": 1.5576295852661133, "learning_rate": 7.671588160314021e-06, "loss": 1.2652, "step": 10980 }, { "epoch": 0.6003581045063762, "grad_norm": 1.5921134948730469, "learning_rate": 7.66981106723967e-06, "loss": 1.3875, "step": 10981 }, { "epoch": 0.6004127769500978, "grad_norm": 1.4851047992706299, "learning_rate": 7.668034051975793e-06, "loss": 1.4219, "step": 10982 }, { "epoch": 0.6004674493938192, "grad_norm": 1.2507176399230957, "learning_rate": 7.666257114581732e-06, "loss": 1.5515, "step": 10983 }, { "epoch": 0.6005221218375408, "grad_norm": 1.7699753046035767, "learning_rate": 7.664480255116825e-06, "loss": 1.2579, "step": 10984 }, { "epoch": 0.6005767942812624, "grad_norm": 1.3763625621795654, "learning_rate": 7.662703473640396e-06, "loss": 1.5917, "step": 10985 }, { "epoch": 0.6006314667249839, "grad_norm": 0.9962906241416931, "learning_rate": 7.660926770211788e-06, "loss": 1.5734, "step": 10986 }, { "epoch": 0.6006861391687055, "grad_norm": 1.3500540256500244, "learning_rate": 7.659150144890325e-06, "loss": 1.3328, "step": 10987 }, { "epoch": 0.6007408116124271, "grad_norm": 1.6937346458435059, "learning_rate": 7.657373597735327e-06, "loss": 1.4994, "step": 10988 }, { "epoch": 0.6007954840561486, "grad_norm": 1.41469407081604, "learning_rate": 7.655597128806125e-06, "loss": 1.3669, "step": 10989 }, { "epoch": 0.6008501564998702, "grad_norm": 1.3405647277832031, "learning_rate": 7.653820738162031e-06, "loss": 1.3591, "step": 10990 }, { "epoch": 0.6009048289435917, "grad_norm": 1.538936734199524, "learning_rate": 7.652044425862375e-06, "loss": 1.2929, "step": 10991 }, { "epoch": 0.6009595013873132, "grad_norm": 1.8431814908981323, "learning_rate": 7.650268191966463e-06, "loss": 1.2929, "step": 10992 }, { "epoch": 0.6010141738310348, "grad_norm": 1.6851004362106323, "learning_rate": 7.64849203653361e-06, "loss": 1.4195, "step": 10993 }, { "epoch": 0.6010688462747563, "grad_norm": 1.6631661653518677, "learning_rate": 7.646715959623125e-06, "loss": 1.526, "step": 10994 }, { "epoch": 0.6011235187184779, "grad_norm": 1.7655415534973145, "learning_rate": 7.644939961294318e-06, "loss": 1.4141, "step": 10995 }, { "epoch": 0.6011781911621995, "grad_norm": 1.6460423469543457, "learning_rate": 7.643164041606489e-06, "loss": 1.3998, "step": 10996 }, { "epoch": 0.601232863605921, "grad_norm": 1.8691942691802979, "learning_rate": 7.641388200618944e-06, "loss": 1.2763, "step": 10997 }, { "epoch": 0.6012875360496426, "grad_norm": 1.4803436994552612, "learning_rate": 7.639612438390984e-06, "loss": 1.6329, "step": 10998 }, { "epoch": 0.6013422084933642, "grad_norm": 1.652008056640625, "learning_rate": 7.6378367549819e-06, "loss": 1.3826, "step": 10999 }, { "epoch": 0.6013968809370857, "grad_norm": 1.2831151485443115, "learning_rate": 7.636061150450991e-06, "loss": 1.46, "step": 11000 }, { "epoch": 0.6014515533808072, "grad_norm": 1.70205819606781, "learning_rate": 7.63428562485755e-06, "loss": 1.4231, "step": 11001 }, { "epoch": 0.6015062258245288, "grad_norm": 1.828338384628296, "learning_rate": 7.63251017826086e-06, "loss": 1.5074, "step": 11002 }, { "epoch": 0.6015608982682503, "grad_norm": 1.6128123998641968, "learning_rate": 7.630734810720212e-06, "loss": 1.0603, "step": 11003 }, { "epoch": 0.6016155707119719, "grad_norm": 1.2991154193878174, "learning_rate": 7.6289595222948865e-06, "loss": 1.4452, "step": 11004 }, { "epoch": 0.6016702431556934, "grad_norm": 1.4587610960006714, "learning_rate": 7.627184313044169e-06, "loss": 1.5228, "step": 11005 }, { "epoch": 0.601724915599415, "grad_norm": 1.3448176383972168, "learning_rate": 7.625409183027336e-06, "loss": 1.5285, "step": 11006 }, { "epoch": 0.6017795880431366, "grad_norm": 1.3270424604415894, "learning_rate": 7.623634132303659e-06, "loss": 1.6488, "step": 11007 }, { "epoch": 0.6018342604868581, "grad_norm": 1.417190432548523, "learning_rate": 7.621859160932419e-06, "loss": 1.3036, "step": 11008 }, { "epoch": 0.6018889329305797, "grad_norm": 1.5452516078948975, "learning_rate": 7.62008426897288e-06, "loss": 1.6967, "step": 11009 }, { "epoch": 0.6019436053743012, "grad_norm": 1.1250550746917725, "learning_rate": 7.618309456484309e-06, "loss": 1.376, "step": 11010 }, { "epoch": 0.6019982778180227, "grad_norm": 1.2017459869384766, "learning_rate": 7.616534723525977e-06, "loss": 1.3812, "step": 11011 }, { "epoch": 0.6020529502617443, "grad_norm": 1.2918510437011719, "learning_rate": 7.614760070157145e-06, "loss": 1.4739, "step": 11012 }, { "epoch": 0.6021076227054659, "grad_norm": 2.100167751312256, "learning_rate": 7.612985496437069e-06, "loss": 1.4155, "step": 11013 }, { "epoch": 0.6021622951491874, "grad_norm": 1.3970392942428589, "learning_rate": 7.61121100242501e-06, "loss": 1.4331, "step": 11014 }, { "epoch": 0.602216967592909, "grad_norm": 1.0578336715698242, "learning_rate": 7.609436588180221e-06, "loss": 1.5718, "step": 11015 }, { "epoch": 0.6022716400366306, "grad_norm": 1.5190269947052002, "learning_rate": 7.60766225376195e-06, "loss": 1.4667, "step": 11016 }, { "epoch": 0.6023263124803521, "grad_norm": 1.3819481134414673, "learning_rate": 7.605887999229454e-06, "loss": 1.5408, "step": 11017 }, { "epoch": 0.6023809849240737, "grad_norm": 1.3977829217910767, "learning_rate": 7.604113824641975e-06, "loss": 1.5107, "step": 11018 }, { "epoch": 0.6024356573677953, "grad_norm": 1.2913641929626465, "learning_rate": 7.602339730058759e-06, "loss": 1.4034, "step": 11019 }, { "epoch": 0.6024903298115167, "grad_norm": 1.3235459327697754, "learning_rate": 7.600565715539044e-06, "loss": 1.5184, "step": 11020 }, { "epoch": 0.6025450022552383, "grad_norm": 1.3768515586853027, "learning_rate": 7.5987917811420675e-06, "loss": 1.4353, "step": 11021 }, { "epoch": 0.6025996746989598, "grad_norm": 1.806366205215454, "learning_rate": 7.597017926927073e-06, "loss": 1.27, "step": 11022 }, { "epoch": 0.6026543471426814, "grad_norm": 2.1195273399353027, "learning_rate": 7.595244152953287e-06, "loss": 1.4447, "step": 11023 }, { "epoch": 0.602709019586403, "grad_norm": 1.8228496313095093, "learning_rate": 7.593470459279939e-06, "loss": 1.346, "step": 11024 }, { "epoch": 0.6027636920301245, "grad_norm": 1.8381582498550415, "learning_rate": 7.591696845966263e-06, "loss": 1.2918, "step": 11025 }, { "epoch": 0.6028183644738461, "grad_norm": 1.832097053527832, "learning_rate": 7.58992331307148e-06, "loss": 1.5111, "step": 11026 }, { "epoch": 0.6028730369175677, "grad_norm": 1.5348145961761475, "learning_rate": 7.588149860654812e-06, "loss": 1.3133, "step": 11027 }, { "epoch": 0.6029277093612891, "grad_norm": 1.477232575416565, "learning_rate": 7.586376488775481e-06, "loss": 1.5826, "step": 11028 }, { "epoch": 0.6029823818050107, "grad_norm": 1.419479250907898, "learning_rate": 7.584603197492703e-06, "loss": 1.3892, "step": 11029 }, { "epoch": 0.6030370542487323, "grad_norm": 1.625468134880066, "learning_rate": 7.582829986865688e-06, "loss": 1.8212, "step": 11030 }, { "epoch": 0.6030917266924538, "grad_norm": 1.2553174495697021, "learning_rate": 7.581056856953656e-06, "loss": 1.589, "step": 11031 }, { "epoch": 0.6031463991361754, "grad_norm": 1.494396686553955, "learning_rate": 7.5792838078158094e-06, "loss": 1.708, "step": 11032 }, { "epoch": 0.603201071579897, "grad_norm": 1.3880494832992554, "learning_rate": 7.57751083951136e-06, "loss": 1.4739, "step": 11033 }, { "epoch": 0.6032557440236185, "grad_norm": 1.892447829246521, "learning_rate": 7.575737952099507e-06, "loss": 1.3594, "step": 11034 }, { "epoch": 0.6033104164673401, "grad_norm": 1.5607885122299194, "learning_rate": 7.573965145639448e-06, "loss": 1.0581, "step": 11035 }, { "epoch": 0.6033650889110616, "grad_norm": 2.110814332962036, "learning_rate": 7.57219242019039e-06, "loss": 1.294, "step": 11036 }, { "epoch": 0.6034197613547831, "grad_norm": 1.3573325872421265, "learning_rate": 7.570419775811526e-06, "loss": 1.4411, "step": 11037 }, { "epoch": 0.6034744337985047, "grad_norm": 1.5884621143341064, "learning_rate": 7.568647212562043e-06, "loss": 1.3346, "step": 11038 }, { "epoch": 0.6035291062422262, "grad_norm": 1.4478175640106201, "learning_rate": 7.566874730501137e-06, "loss": 1.2681, "step": 11039 }, { "epoch": 0.6035837786859478, "grad_norm": 2.0733659267425537, "learning_rate": 7.565102329687994e-06, "loss": 1.3377, "step": 11040 }, { "epoch": 0.6036384511296694, "grad_norm": 1.1662282943725586, "learning_rate": 7.563330010181793e-06, "loss": 1.5689, "step": 11041 }, { "epoch": 0.6036931235733909, "grad_norm": 1.4982472658157349, "learning_rate": 7.561557772041725e-06, "loss": 1.3991, "step": 11042 }, { "epoch": 0.6037477960171125, "grad_norm": 1.6847810745239258, "learning_rate": 7.559785615326967e-06, "loss": 1.3626, "step": 11043 }, { "epoch": 0.6038024684608341, "grad_norm": 1.9635106325149536, "learning_rate": 7.558013540096687e-06, "loss": 1.3343, "step": 11044 }, { "epoch": 0.6038571409045556, "grad_norm": 1.5313607454299927, "learning_rate": 7.55624154641007e-06, "loss": 1.3545, "step": 11045 }, { "epoch": 0.6039118133482771, "grad_norm": 1.784651279449463, "learning_rate": 7.5544696343262805e-06, "loss": 1.44, "step": 11046 }, { "epoch": 0.6039664857919987, "grad_norm": 1.387984037399292, "learning_rate": 7.552697803904489e-06, "loss": 1.4109, "step": 11047 }, { "epoch": 0.6040211582357202, "grad_norm": 1.3679102659225464, "learning_rate": 7.550926055203863e-06, "loss": 1.4552, "step": 11048 }, { "epoch": 0.6040758306794418, "grad_norm": 1.8509870767593384, "learning_rate": 7.549154388283557e-06, "loss": 1.3145, "step": 11049 }, { "epoch": 0.6041305031231633, "grad_norm": 1.617070198059082, "learning_rate": 7.547382803202743e-06, "loss": 1.3724, "step": 11050 }, { "epoch": 0.6041851755668849, "grad_norm": 1.6619075536727905, "learning_rate": 7.545611300020571e-06, "loss": 1.461, "step": 11051 }, { "epoch": 0.6042398480106065, "grad_norm": 2.157846212387085, "learning_rate": 7.543839878796195e-06, "loss": 1.2925, "step": 11052 }, { "epoch": 0.604294520454328, "grad_norm": 1.493599534034729, "learning_rate": 7.542068539588771e-06, "loss": 1.465, "step": 11053 }, { "epoch": 0.6043491928980496, "grad_norm": 1.3042370080947876, "learning_rate": 7.5402972824574475e-06, "loss": 1.3476, "step": 11054 }, { "epoch": 0.6044038653417712, "grad_norm": 1.4698779582977295, "learning_rate": 7.5385261074613634e-06, "loss": 1.3738, "step": 11055 }, { "epoch": 0.6044585377854926, "grad_norm": 1.3729479312896729, "learning_rate": 7.536755014659674e-06, "loss": 1.2538, "step": 11056 }, { "epoch": 0.6045132102292142, "grad_norm": 1.4128096103668213, "learning_rate": 7.534984004111515e-06, "loss": 1.3536, "step": 11057 }, { "epoch": 0.6045678826729358, "grad_norm": 1.6260535717010498, "learning_rate": 7.533213075876022e-06, "loss": 1.3216, "step": 11058 }, { "epoch": 0.6046225551166573, "grad_norm": 1.0406028032302856, "learning_rate": 7.531442230012336e-06, "loss": 1.8928, "step": 11059 }, { "epoch": 0.6046772275603789, "grad_norm": 1.9070343971252441, "learning_rate": 7.529671466579581e-06, "loss": 1.2444, "step": 11060 }, { "epoch": 0.6047319000041005, "grad_norm": 2.067253351211548, "learning_rate": 7.527900785636897e-06, "loss": 1.2829, "step": 11061 }, { "epoch": 0.604786572447822, "grad_norm": 1.567018747329712, "learning_rate": 7.526130187243408e-06, "loss": 1.4255, "step": 11062 }, { "epoch": 0.6048412448915436, "grad_norm": 1.855852723121643, "learning_rate": 7.5243596714582315e-06, "loss": 1.2091, "step": 11063 }, { "epoch": 0.604895917335265, "grad_norm": 1.4886178970336914, "learning_rate": 7.522589238340499e-06, "loss": 1.5122, "step": 11064 }, { "epoch": 0.6049505897789866, "grad_norm": 1.2963197231292725, "learning_rate": 7.520818887949326e-06, "loss": 1.4239, "step": 11065 }, { "epoch": 0.6050052622227082, "grad_norm": 1.6851003170013428, "learning_rate": 7.519048620343825e-06, "loss": 1.5065, "step": 11066 }, { "epoch": 0.6050599346664297, "grad_norm": 1.3206840753555298, "learning_rate": 7.517278435583115e-06, "loss": 1.3142, "step": 11067 }, { "epoch": 0.6051146071101513, "grad_norm": 1.7697710990905762, "learning_rate": 7.515508333726304e-06, "loss": 1.3323, "step": 11068 }, { "epoch": 0.6051692795538729, "grad_norm": 1.7284815311431885, "learning_rate": 7.513738314832496e-06, "loss": 1.6251, "step": 11069 }, { "epoch": 0.6052239519975944, "grad_norm": 1.632994532585144, "learning_rate": 7.5119683789608035e-06, "loss": 1.449, "step": 11070 }, { "epoch": 0.605278624441316, "grad_norm": 1.8258181810379028, "learning_rate": 7.510198526170324e-06, "loss": 1.0335, "step": 11071 }, { "epoch": 0.6053332968850376, "grad_norm": 1.3631629943847656, "learning_rate": 7.508428756520158e-06, "loss": 1.7061, "step": 11072 }, { "epoch": 0.605387969328759, "grad_norm": 1.349111557006836, "learning_rate": 7.506659070069404e-06, "loss": 1.2439, "step": 11073 }, { "epoch": 0.6054426417724806, "grad_norm": 1.2384225130081177, "learning_rate": 7.50488946687715e-06, "loss": 1.5979, "step": 11074 }, { "epoch": 0.6054973142162022, "grad_norm": 1.2861207723617554, "learning_rate": 7.503119947002496e-06, "loss": 1.5567, "step": 11075 }, { "epoch": 0.6055519866599237, "grad_norm": 1.4566720724105835, "learning_rate": 7.501350510504526e-06, "loss": 1.2474, "step": 11076 }, { "epoch": 0.6056066591036453, "grad_norm": 1.722357988357544, "learning_rate": 7.4995811574423235e-06, "loss": 1.5725, "step": 11077 }, { "epoch": 0.6056613315473668, "grad_norm": 1.389035940170288, "learning_rate": 7.497811887874976e-06, "loss": 1.3822, "step": 11078 }, { "epoch": 0.6057160039910884, "grad_norm": 1.3420735597610474, "learning_rate": 7.496042701861561e-06, "loss": 1.2129, "step": 11079 }, { "epoch": 0.60577067643481, "grad_norm": 2.193683624267578, "learning_rate": 7.494273599461153e-06, "loss": 1.4846, "step": 11080 }, { "epoch": 0.6058253488785315, "grad_norm": 1.586911916732788, "learning_rate": 7.492504580732831e-06, "loss": 1.2954, "step": 11081 }, { "epoch": 0.605880021322253, "grad_norm": 1.6757527589797974, "learning_rate": 7.490735645735667e-06, "loss": 1.303, "step": 11082 }, { "epoch": 0.6059346937659746, "grad_norm": 1.4150277376174927, "learning_rate": 7.4889667945287224e-06, "loss": 1.4511, "step": 11083 }, { "epoch": 0.6059893662096961, "grad_norm": 1.4598714113235474, "learning_rate": 7.487198027171074e-06, "loss": 1.3462, "step": 11084 }, { "epoch": 0.6060440386534177, "grad_norm": 1.7377405166625977, "learning_rate": 7.485429343721779e-06, "loss": 1.3498, "step": 11085 }, { "epoch": 0.6060987110971393, "grad_norm": 1.3423676490783691, "learning_rate": 7.4836607442398974e-06, "loss": 1.57, "step": 11086 }, { "epoch": 0.6061533835408608, "grad_norm": 1.4534226655960083, "learning_rate": 7.481892228784491e-06, "loss": 1.2878, "step": 11087 }, { "epoch": 0.6062080559845824, "grad_norm": 2.1035146713256836, "learning_rate": 7.480123797414608e-06, "loss": 1.3735, "step": 11088 }, { "epoch": 0.606262728428304, "grad_norm": 1.309571623802185, "learning_rate": 7.478355450189307e-06, "loss": 1.3494, "step": 11089 }, { "epoch": 0.6063174008720255, "grad_norm": 1.868104100227356, "learning_rate": 7.476587187167636e-06, "loss": 1.3601, "step": 11090 }, { "epoch": 0.606372073315747, "grad_norm": 1.1883643865585327, "learning_rate": 7.474819008408638e-06, "loss": 1.3616, "step": 11091 }, { "epoch": 0.6064267457594685, "grad_norm": 1.3831205368041992, "learning_rate": 7.47305091397136e-06, "loss": 1.3977, "step": 11092 }, { "epoch": 0.6064814182031901, "grad_norm": 1.5662891864776611, "learning_rate": 7.4712829039148425e-06, "loss": 1.1487, "step": 11093 }, { "epoch": 0.6065360906469117, "grad_norm": 1.6505019664764404, "learning_rate": 7.469514978298119e-06, "loss": 1.2346, "step": 11094 }, { "epoch": 0.6065907630906332, "grad_norm": 1.600957989692688, "learning_rate": 7.467747137180232e-06, "loss": 1.4102, "step": 11095 }, { "epoch": 0.6066454355343548, "grad_norm": 1.6174092292785645, "learning_rate": 7.46597938062021e-06, "loss": 1.2762, "step": 11096 }, { "epoch": 0.6067001079780764, "grad_norm": 1.4462900161743164, "learning_rate": 7.46421170867708e-06, "loss": 1.45, "step": 11097 }, { "epoch": 0.6067547804217979, "grad_norm": 1.2344083786010742, "learning_rate": 7.462444121409875e-06, "loss": 1.6245, "step": 11098 }, { "epoch": 0.6068094528655195, "grad_norm": 1.370033621788025, "learning_rate": 7.460676618877615e-06, "loss": 1.5236, "step": 11099 }, { "epoch": 0.606864125309241, "grad_norm": 1.5379834175109863, "learning_rate": 7.458909201139317e-06, "loss": 1.4622, "step": 11100 }, { "epoch": 0.6069187977529625, "grad_norm": 1.5298255681991577, "learning_rate": 7.457141868254007e-06, "loss": 1.2773, "step": 11101 }, { "epoch": 0.6069734701966841, "grad_norm": 1.3243625164031982, "learning_rate": 7.455374620280693e-06, "loss": 1.3212, "step": 11102 }, { "epoch": 0.6070281426404057, "grad_norm": 1.3536005020141602, "learning_rate": 7.453607457278398e-06, "loss": 1.62, "step": 11103 }, { "epoch": 0.6070828150841272, "grad_norm": 1.1295573711395264, "learning_rate": 7.451840379306123e-06, "loss": 1.6906, "step": 11104 }, { "epoch": 0.6071374875278488, "grad_norm": 1.7772793769836426, "learning_rate": 7.450073386422876e-06, "loss": 1.6933, "step": 11105 }, { "epoch": 0.6071921599715703, "grad_norm": 1.499808430671692, "learning_rate": 7.448306478687663e-06, "loss": 1.5285, "step": 11106 }, { "epoch": 0.6072468324152919, "grad_norm": 1.4814454317092896, "learning_rate": 7.446539656159486e-06, "loss": 1.3201, "step": 11107 }, { "epoch": 0.6073015048590135, "grad_norm": 1.1856023073196411, "learning_rate": 7.444772918897336e-06, "loss": 1.4243, "step": 11108 }, { "epoch": 0.607356177302735, "grad_norm": 1.1002073287963867, "learning_rate": 7.44300626696022e-06, "loss": 1.5041, "step": 11109 }, { "epoch": 0.6074108497464565, "grad_norm": 1.445713996887207, "learning_rate": 7.441239700407124e-06, "loss": 1.3446, "step": 11110 }, { "epoch": 0.6074655221901781, "grad_norm": 1.917493224143982, "learning_rate": 7.4394732192970375e-06, "loss": 1.3146, "step": 11111 }, { "epoch": 0.6075201946338996, "grad_norm": 1.4284083843231201, "learning_rate": 7.43770682368895e-06, "loss": 1.4437, "step": 11112 }, { "epoch": 0.6075748670776212, "grad_norm": 1.2887603044509888, "learning_rate": 7.435940513641845e-06, "loss": 1.359, "step": 11113 }, { "epoch": 0.6076295395213428, "grad_norm": 1.3382070064544678, "learning_rate": 7.4341742892146976e-06, "loss": 1.5927, "step": 11114 }, { "epoch": 0.6076842119650643, "grad_norm": 2.027270793914795, "learning_rate": 7.432408150466497e-06, "loss": 1.2909, "step": 11115 }, { "epoch": 0.6077388844087859, "grad_norm": 1.2580260038375854, "learning_rate": 7.430642097456211e-06, "loss": 1.5766, "step": 11116 }, { "epoch": 0.6077935568525075, "grad_norm": 1.562485933303833, "learning_rate": 7.428876130242816e-06, "loss": 1.2564, "step": 11117 }, { "epoch": 0.607848229296229, "grad_norm": 1.4204182624816895, "learning_rate": 7.427110248885281e-06, "loss": 1.3927, "step": 11118 }, { "epoch": 0.6079029017399505, "grad_norm": 1.433082938194275, "learning_rate": 7.425344453442566e-06, "loss": 1.4626, "step": 11119 }, { "epoch": 0.607957574183672, "grad_norm": 1.8205856084823608, "learning_rate": 7.423578743973649e-06, "loss": 1.4838, "step": 11120 }, { "epoch": 0.6080122466273936, "grad_norm": 2.0422399044036865, "learning_rate": 7.421813120537482e-06, "loss": 1.4092, "step": 11121 }, { "epoch": 0.6080669190711152, "grad_norm": 1.5986124277114868, "learning_rate": 7.42004758319302e-06, "loss": 1.3285, "step": 11122 }, { "epoch": 0.6081215915148367, "grad_norm": 1.5161195993423462, "learning_rate": 7.418282131999228e-06, "loss": 1.4356, "step": 11123 }, { "epoch": 0.6081762639585583, "grad_norm": 1.544889211654663, "learning_rate": 7.416516767015054e-06, "loss": 1.3362, "step": 11124 }, { "epoch": 0.6082309364022799, "grad_norm": 1.4691088199615479, "learning_rate": 7.414751488299444e-06, "loss": 1.569, "step": 11125 }, { "epoch": 0.6082856088460014, "grad_norm": 1.811532974243164, "learning_rate": 7.4129862959113515e-06, "loss": 1.1849, "step": 11126 }, { "epoch": 0.608340281289723, "grad_norm": 1.7296943664550781, "learning_rate": 7.411221189909718e-06, "loss": 1.6435, "step": 11127 }, { "epoch": 0.6083949537334445, "grad_norm": 1.9450578689575195, "learning_rate": 7.409456170353477e-06, "loss": 1.3357, "step": 11128 }, { "epoch": 0.608449626177166, "grad_norm": 1.6049875020980835, "learning_rate": 7.40769123730158e-06, "loss": 1.4838, "step": 11129 }, { "epoch": 0.6085042986208876, "grad_norm": 1.1974400281906128, "learning_rate": 7.405926390812953e-06, "loss": 1.4724, "step": 11130 }, { "epoch": 0.6085589710646092, "grad_norm": 1.5569289922714233, "learning_rate": 7.404161630946532e-06, "loss": 1.388, "step": 11131 }, { "epoch": 0.6086136435083307, "grad_norm": 1.8314696550369263, "learning_rate": 7.402396957761247e-06, "loss": 1.2246, "step": 11132 }, { "epoch": 0.6086683159520523, "grad_norm": 1.6766256093978882, "learning_rate": 7.400632371316019e-06, "loss": 1.1525, "step": 11133 }, { "epoch": 0.6087229883957738, "grad_norm": 1.4958579540252686, "learning_rate": 7.398867871669778e-06, "loss": 1.2573, "step": 11134 }, { "epoch": 0.6087776608394954, "grad_norm": 1.347160816192627, "learning_rate": 7.397103458881444e-06, "loss": 1.5396, "step": 11135 }, { "epoch": 0.608832333283217, "grad_norm": 1.6461611986160278, "learning_rate": 7.395339133009931e-06, "loss": 1.444, "step": 11136 }, { "epoch": 0.6088870057269384, "grad_norm": 1.565922737121582, "learning_rate": 7.393574894114157e-06, "loss": 1.4604, "step": 11137 }, { "epoch": 0.60894167817066, "grad_norm": 1.4621453285217285, "learning_rate": 7.391810742253036e-06, "loss": 1.456, "step": 11138 }, { "epoch": 0.6089963506143816, "grad_norm": 1.6722652912139893, "learning_rate": 7.3900466774854695e-06, "loss": 1.5908, "step": 11139 }, { "epoch": 0.6090510230581031, "grad_norm": 1.4291824102401733, "learning_rate": 7.388282699870373e-06, "loss": 1.3905, "step": 11140 }, { "epoch": 0.6091056955018247, "grad_norm": 1.491989016532898, "learning_rate": 7.386518809466645e-06, "loss": 1.3303, "step": 11141 }, { "epoch": 0.6091603679455463, "grad_norm": 1.460108757019043, "learning_rate": 7.384755006333183e-06, "loss": 1.2398, "step": 11142 }, { "epoch": 0.6092150403892678, "grad_norm": 1.2160968780517578, "learning_rate": 7.382991290528892e-06, "loss": 1.5235, "step": 11143 }, { "epoch": 0.6092697128329894, "grad_norm": 1.5336984395980835, "learning_rate": 7.3812276621126625e-06, "loss": 1.5861, "step": 11144 }, { "epoch": 0.609324385276711, "grad_norm": 1.7948276996612549, "learning_rate": 7.379464121143386e-06, "loss": 1.2074, "step": 11145 }, { "epoch": 0.6093790577204324, "grad_norm": 1.5361716747283936, "learning_rate": 7.377700667679954e-06, "loss": 1.6596, "step": 11146 }, { "epoch": 0.609433730164154, "grad_norm": 1.3224685192108154, "learning_rate": 7.375937301781244e-06, "loss": 1.3713, "step": 11147 }, { "epoch": 0.6094884026078755, "grad_norm": 1.371509313583374, "learning_rate": 7.374174023506151e-06, "loss": 1.5173, "step": 11148 }, { "epoch": 0.6095430750515971, "grad_norm": 1.5986748933792114, "learning_rate": 7.372410832913548e-06, "loss": 1.3926, "step": 11149 }, { "epoch": 0.6095977474953187, "grad_norm": 1.713915467262268, "learning_rate": 7.370647730062311e-06, "loss": 1.4066, "step": 11150 }, { "epoch": 0.6096524199390402, "grad_norm": 1.3997915983200073, "learning_rate": 7.3688847150113185e-06, "loss": 1.6011, "step": 11151 }, { "epoch": 0.6097070923827618, "grad_norm": 1.5507339239120483, "learning_rate": 7.36712178781944e-06, "loss": 1.4204, "step": 11152 }, { "epoch": 0.6097617648264834, "grad_norm": 1.5809931755065918, "learning_rate": 7.365358948545538e-06, "loss": 1.3556, "step": 11153 }, { "epoch": 0.6098164372702048, "grad_norm": 1.4632424116134644, "learning_rate": 7.363596197248488e-06, "loss": 1.5085, "step": 11154 }, { "epoch": 0.6098711097139264, "grad_norm": 1.1847944259643555, "learning_rate": 7.361833533987148e-06, "loss": 1.4066, "step": 11155 }, { "epoch": 0.609925782157648, "grad_norm": 1.2259920835494995, "learning_rate": 7.360070958820373e-06, "loss": 1.321, "step": 11156 }, { "epoch": 0.6099804546013695, "grad_norm": 1.4884215593338013, "learning_rate": 7.358308471807028e-06, "loss": 1.3036, "step": 11157 }, { "epoch": 0.6100351270450911, "grad_norm": 2.3524868488311768, "learning_rate": 7.356546073005957e-06, "loss": 1.2754, "step": 11158 }, { "epoch": 0.6100897994888127, "grad_norm": 1.4079817533493042, "learning_rate": 7.354783762476019e-06, "loss": 1.4015, "step": 11159 }, { "epoch": 0.6101444719325342, "grad_norm": 1.7348229885101318, "learning_rate": 7.353021540276059e-06, "loss": 1.3022, "step": 11160 }, { "epoch": 0.6101991443762558, "grad_norm": 1.766181230545044, "learning_rate": 7.351259406464917e-06, "loss": 1.3969, "step": 11161 }, { "epoch": 0.6102538168199773, "grad_norm": 2.2779922485351562, "learning_rate": 7.349497361101443e-06, "loss": 1.3924, "step": 11162 }, { "epoch": 0.6103084892636989, "grad_norm": 1.648481845855713, "learning_rate": 7.3477354042444716e-06, "loss": 1.2143, "step": 11163 }, { "epoch": 0.6103631617074204, "grad_norm": 1.461674690246582, "learning_rate": 7.3459735359528366e-06, "loss": 1.4101, "step": 11164 }, { "epoch": 0.6104178341511419, "grad_norm": 1.4399114847183228, "learning_rate": 7.344211756285375e-06, "loss": 1.4947, "step": 11165 }, { "epoch": 0.6104725065948635, "grad_norm": 1.7609580755233765, "learning_rate": 7.342450065300914e-06, "loss": 1.5188, "step": 11166 }, { "epoch": 0.6105271790385851, "grad_norm": 1.2921581268310547, "learning_rate": 7.34068846305828e-06, "loss": 1.4863, "step": 11167 }, { "epoch": 0.6105818514823066, "grad_norm": 1.268578052520752, "learning_rate": 7.338926949616301e-06, "loss": 1.5362, "step": 11168 }, { "epoch": 0.6106365239260282, "grad_norm": 1.6448289155960083, "learning_rate": 7.337165525033796e-06, "loss": 1.4489, "step": 11169 }, { "epoch": 0.6106911963697498, "grad_norm": 2.0746943950653076, "learning_rate": 7.335404189369579e-06, "loss": 1.3653, "step": 11170 }, { "epoch": 0.6107458688134713, "grad_norm": 1.4919710159301758, "learning_rate": 7.333642942682473e-06, "loss": 1.5452, "step": 11171 }, { "epoch": 0.6108005412571929, "grad_norm": 2.391563653945923, "learning_rate": 7.3318817850312804e-06, "loss": 1.1306, "step": 11172 }, { "epoch": 0.6108552137009144, "grad_norm": 1.643783688545227, "learning_rate": 7.33012071647482e-06, "loss": 1.3844, "step": 11173 }, { "epoch": 0.6109098861446359, "grad_norm": 1.4305307865142822, "learning_rate": 7.328359737071895e-06, "loss": 1.0374, "step": 11174 }, { "epoch": 0.6109645585883575, "grad_norm": 1.3788843154907227, "learning_rate": 7.326598846881305e-06, "loss": 1.4674, "step": 11175 }, { "epoch": 0.611019231032079, "grad_norm": 1.2434710264205933, "learning_rate": 7.3248380459618555e-06, "loss": 1.4184, "step": 11176 }, { "epoch": 0.6110739034758006, "grad_norm": 1.316650152206421, "learning_rate": 7.323077334372341e-06, "loss": 1.4604, "step": 11177 }, { "epoch": 0.6111285759195222, "grad_norm": 1.3379020690917969, "learning_rate": 7.3213167121715514e-06, "loss": 1.5821, "step": 11178 }, { "epoch": 0.6111832483632437, "grad_norm": 1.459867000579834, "learning_rate": 7.319556179418286e-06, "loss": 1.4073, "step": 11179 }, { "epoch": 0.6112379208069653, "grad_norm": 1.5399585962295532, "learning_rate": 7.3177957361713305e-06, "loss": 1.4771, "step": 11180 }, { "epoch": 0.6112925932506869, "grad_norm": 1.3775140047073364, "learning_rate": 7.316035382489464e-06, "loss": 1.4728, "step": 11181 }, { "epoch": 0.6113472656944083, "grad_norm": 1.397856593132019, "learning_rate": 7.31427511843148e-06, "loss": 1.3438, "step": 11182 }, { "epoch": 0.6114019381381299, "grad_norm": 1.6408512592315674, "learning_rate": 7.3125149440561505e-06, "loss": 1.4117, "step": 11183 }, { "epoch": 0.6114566105818515, "grad_norm": 1.7992955446243286, "learning_rate": 7.310754859422253e-06, "loss": 1.5733, "step": 11184 }, { "epoch": 0.611511283025573, "grad_norm": 1.4465502500534058, "learning_rate": 7.308994864588562e-06, "loss": 1.3799, "step": 11185 }, { "epoch": 0.6115659554692946, "grad_norm": 1.4119120836257935, "learning_rate": 7.3072349596138435e-06, "loss": 1.4608, "step": 11186 }, { "epoch": 0.6116206279130162, "grad_norm": 1.5574485063552856, "learning_rate": 7.305475144556873e-06, "loss": 1.3945, "step": 11187 }, { "epoch": 0.6116753003567377, "grad_norm": 2.035651206970215, "learning_rate": 7.30371541947641e-06, "loss": 1.3245, "step": 11188 }, { "epoch": 0.6117299728004593, "grad_norm": 1.423349380493164, "learning_rate": 7.301955784431214e-06, "loss": 1.5595, "step": 11189 }, { "epoch": 0.6117846452441807, "grad_norm": 1.9741835594177246, "learning_rate": 7.300196239480046e-06, "loss": 1.3956, "step": 11190 }, { "epoch": 0.6118393176879023, "grad_norm": 2.268160343170166, "learning_rate": 7.298436784681664e-06, "loss": 1.3526, "step": 11191 }, { "epoch": 0.6118939901316239, "grad_norm": 1.4200090169906616, "learning_rate": 7.296677420094811e-06, "loss": 1.5298, "step": 11192 }, { "epoch": 0.6119486625753454, "grad_norm": 1.404711365699768, "learning_rate": 7.2949181457782466e-06, "loss": 1.2534, "step": 11193 }, { "epoch": 0.612003335019067, "grad_norm": 1.478430986404419, "learning_rate": 7.293158961790714e-06, "loss": 1.2965, "step": 11194 }, { "epoch": 0.6120580074627886, "grad_norm": 1.4984391927719116, "learning_rate": 7.291399868190953e-06, "loss": 1.494, "step": 11195 }, { "epoch": 0.6121126799065101, "grad_norm": 1.2938623428344727, "learning_rate": 7.289640865037708e-06, "loss": 1.3798, "step": 11196 }, { "epoch": 0.6121673523502317, "grad_norm": 1.6714009046554565, "learning_rate": 7.287881952389715e-06, "loss": 1.3236, "step": 11197 }, { "epoch": 0.6122220247939533, "grad_norm": 1.2054065465927124, "learning_rate": 7.286123130305702e-06, "loss": 1.6196, "step": 11198 }, { "epoch": 0.6122766972376747, "grad_norm": 1.6284263134002686, "learning_rate": 7.284364398844412e-06, "loss": 1.3857, "step": 11199 }, { "epoch": 0.6123313696813963, "grad_norm": 1.9096485376358032, "learning_rate": 7.282605758064563e-06, "loss": 1.6446, "step": 11200 }, { "epoch": 0.6123860421251179, "grad_norm": 1.6242529153823853, "learning_rate": 7.280847208024888e-06, "loss": 1.576, "step": 11201 }, { "epoch": 0.6124407145688394, "grad_norm": 1.2427804470062256, "learning_rate": 7.279088748784106e-06, "loss": 1.3213, "step": 11202 }, { "epoch": 0.612495387012561, "grad_norm": 1.4922181367874146, "learning_rate": 7.277330380400933e-06, "loss": 1.4745, "step": 11203 }, { "epoch": 0.6125500594562825, "grad_norm": 1.4312071800231934, "learning_rate": 7.275572102934089e-06, "loss": 1.5329, "step": 11204 }, { "epoch": 0.6126047319000041, "grad_norm": 1.6310210227966309, "learning_rate": 7.273813916442286e-06, "loss": 1.1618, "step": 11205 }, { "epoch": 0.6126594043437257, "grad_norm": 1.9578649997711182, "learning_rate": 7.27205582098423e-06, "loss": 1.4286, "step": 11206 }, { "epoch": 0.6127140767874472, "grad_norm": 1.34890878200531, "learning_rate": 7.270297816618634e-06, "loss": 1.4155, "step": 11207 }, { "epoch": 0.6127687492311688, "grad_norm": 1.6578702926635742, "learning_rate": 7.268539903404201e-06, "loss": 1.2349, "step": 11208 }, { "epoch": 0.6128234216748903, "grad_norm": 1.4085493087768555, "learning_rate": 7.266782081399628e-06, "loss": 1.5574, "step": 11209 }, { "epoch": 0.6128780941186118, "grad_norm": 1.351953148841858, "learning_rate": 7.265024350663617e-06, "loss": 1.3789, "step": 11210 }, { "epoch": 0.6129327665623334, "grad_norm": 1.7483232021331787, "learning_rate": 7.263266711254859e-06, "loss": 1.4415, "step": 11211 }, { "epoch": 0.612987439006055, "grad_norm": 1.8164385557174683, "learning_rate": 7.261509163232046e-06, "loss": 1.4192, "step": 11212 }, { "epoch": 0.6130421114497765, "grad_norm": 1.703884243965149, "learning_rate": 7.25975170665387e-06, "loss": 1.2683, "step": 11213 }, { "epoch": 0.6130967838934981, "grad_norm": 1.5824843645095825, "learning_rate": 7.2579943415790134e-06, "loss": 1.2602, "step": 11214 }, { "epoch": 0.6131514563372197, "grad_norm": 1.348307490348816, "learning_rate": 7.256237068066163e-06, "loss": 1.5626, "step": 11215 }, { "epoch": 0.6132061287809412, "grad_norm": 1.5300934314727783, "learning_rate": 7.254479886173994e-06, "loss": 1.3507, "step": 11216 }, { "epoch": 0.6132608012246628, "grad_norm": 2.0473947525024414, "learning_rate": 7.25272279596118e-06, "loss": 1.289, "step": 11217 }, { "epoch": 0.6133154736683843, "grad_norm": 2.1505324840545654, "learning_rate": 7.2509657974864045e-06, "loss": 1.5167, "step": 11218 }, { "epoch": 0.6133701461121058, "grad_norm": 2.4532251358032227, "learning_rate": 7.24920889080833e-06, "loss": 1.4514, "step": 11219 }, { "epoch": 0.6134248185558274, "grad_norm": 1.3016122579574585, "learning_rate": 7.247452075985622e-06, "loss": 1.4795, "step": 11220 }, { "epoch": 0.6134794909995489, "grad_norm": 2.0453572273254395, "learning_rate": 7.245695353076953e-06, "loss": 1.587, "step": 11221 }, { "epoch": 0.6135341634432705, "grad_norm": 1.6187151670455933, "learning_rate": 7.243938722140978e-06, "loss": 1.4711, "step": 11222 }, { "epoch": 0.6135888358869921, "grad_norm": 1.6402263641357422, "learning_rate": 7.242182183236356e-06, "loss": 1.1521, "step": 11223 }, { "epoch": 0.6136435083307136, "grad_norm": 1.4863569736480713, "learning_rate": 7.240425736421743e-06, "loss": 1.4764, "step": 11224 }, { "epoch": 0.6136981807744352, "grad_norm": 1.5104738473892212, "learning_rate": 7.238669381755791e-06, "loss": 1.3473, "step": 11225 }, { "epoch": 0.6137528532181568, "grad_norm": 1.8301535844802856, "learning_rate": 7.236913119297144e-06, "loss": 1.157, "step": 11226 }, { "epoch": 0.6138075256618782, "grad_norm": 1.5497742891311646, "learning_rate": 7.235156949104455e-06, "loss": 1.4216, "step": 11227 }, { "epoch": 0.6138621981055998, "grad_norm": 1.2314895391464233, "learning_rate": 7.233400871236362e-06, "loss": 1.7025, "step": 11228 }, { "epoch": 0.6139168705493214, "grad_norm": 1.6858651638031006, "learning_rate": 7.2316448857515076e-06, "loss": 1.3219, "step": 11229 }, { "epoch": 0.6139715429930429, "grad_norm": 1.4193800687789917, "learning_rate": 7.229888992708527e-06, "loss": 1.4855, "step": 11230 }, { "epoch": 0.6140262154367645, "grad_norm": 1.6348395347595215, "learning_rate": 7.228133192166049e-06, "loss": 1.4584, "step": 11231 }, { "epoch": 0.6140808878804861, "grad_norm": 1.886634111404419, "learning_rate": 7.226377484182712e-06, "loss": 1.423, "step": 11232 }, { "epoch": 0.6141355603242076, "grad_norm": 1.4445784091949463, "learning_rate": 7.224621868817139e-06, "loss": 1.2814, "step": 11233 }, { "epoch": 0.6141902327679292, "grad_norm": 1.4149730205535889, "learning_rate": 7.222866346127952e-06, "loss": 1.578, "step": 11234 }, { "epoch": 0.6142449052116506, "grad_norm": 1.872101902961731, "learning_rate": 7.221110916173778e-06, "loss": 1.2775, "step": 11235 }, { "epoch": 0.6142995776553722, "grad_norm": 1.4633903503417969, "learning_rate": 7.2193555790132296e-06, "loss": 1.4656, "step": 11236 }, { "epoch": 0.6143542500990938, "grad_norm": 1.4956696033477783, "learning_rate": 7.217600334704922e-06, "loss": 1.4926, "step": 11237 }, { "epoch": 0.6144089225428153, "grad_norm": 1.748359203338623, "learning_rate": 7.21584518330747e-06, "loss": 1.5175, "step": 11238 }, { "epoch": 0.6144635949865369, "grad_norm": 1.5305460691452026, "learning_rate": 7.214090124879481e-06, "loss": 1.3826, "step": 11239 }, { "epoch": 0.6145182674302585, "grad_norm": 1.257407784461975, "learning_rate": 7.212335159479557e-06, "loss": 1.6026, "step": 11240 }, { "epoch": 0.61457293987398, "grad_norm": 1.3459250926971436, "learning_rate": 7.210580287166307e-06, "loss": 1.7224, "step": 11241 }, { "epoch": 0.6146276123177016, "grad_norm": 1.8068866729736328, "learning_rate": 7.208825507998326e-06, "loss": 1.2536, "step": 11242 }, { "epoch": 0.6146822847614232, "grad_norm": 1.3003957271575928, "learning_rate": 7.20707082203421e-06, "loss": 1.6646, "step": 11243 }, { "epoch": 0.6147369572051447, "grad_norm": 1.5545612573623657, "learning_rate": 7.205316229332555e-06, "loss": 1.4009, "step": 11244 }, { "epoch": 0.6147916296488662, "grad_norm": 1.6391186714172363, "learning_rate": 7.203561729951944e-06, "loss": 1.2189, "step": 11245 }, { "epoch": 0.6148463020925878, "grad_norm": 1.429194688796997, "learning_rate": 7.201807323950973e-06, "loss": 1.5163, "step": 11246 }, { "epoch": 0.6149009745363093, "grad_norm": 1.5495649576187134, "learning_rate": 7.200053011388223e-06, "loss": 1.2676, "step": 11247 }, { "epoch": 0.6149556469800309, "grad_norm": 1.3402105569839478, "learning_rate": 7.198298792322271e-06, "loss": 1.3596, "step": 11248 }, { "epoch": 0.6150103194237524, "grad_norm": 1.4492799043655396, "learning_rate": 7.196544666811698e-06, "loss": 1.4147, "step": 11249 }, { "epoch": 0.615064991867474, "grad_norm": 1.4169853925704956, "learning_rate": 7.194790634915077e-06, "loss": 1.3677, "step": 11250 }, { "epoch": 0.6151196643111956, "grad_norm": 1.2510043382644653, "learning_rate": 7.193036696690976e-06, "loss": 1.6263, "step": 11251 }, { "epoch": 0.6151743367549171, "grad_norm": 1.760087013244629, "learning_rate": 7.1912828521979695e-06, "loss": 1.593, "step": 11252 }, { "epoch": 0.6152290091986387, "grad_norm": 1.7153080701828003, "learning_rate": 7.18952910149462e-06, "loss": 1.5255, "step": 11253 }, { "epoch": 0.6152836816423602, "grad_norm": 1.3756707906723022, "learning_rate": 7.1877754446394865e-06, "loss": 1.5079, "step": 11254 }, { "epoch": 0.6153383540860817, "grad_norm": 1.5081299543380737, "learning_rate": 7.186021881691132e-06, "loss": 1.3809, "step": 11255 }, { "epoch": 0.6153930265298033, "grad_norm": 1.633537769317627, "learning_rate": 7.184268412708111e-06, "loss": 1.3376, "step": 11256 }, { "epoch": 0.6154476989735249, "grad_norm": 1.6242032051086426, "learning_rate": 7.182515037748969e-06, "loss": 1.3197, "step": 11257 }, { "epoch": 0.6155023714172464, "grad_norm": 2.2417895793914795, "learning_rate": 7.1807617568722674e-06, "loss": 1.2922, "step": 11258 }, { "epoch": 0.615557043860968, "grad_norm": 1.753690242767334, "learning_rate": 7.179008570136543e-06, "loss": 1.4927, "step": 11259 }, { "epoch": 0.6156117163046896, "grad_norm": 1.4424638748168945, "learning_rate": 7.177255477600346e-06, "loss": 1.3944, "step": 11260 }, { "epoch": 0.6156663887484111, "grad_norm": 1.3792285919189453, "learning_rate": 7.175502479322211e-06, "loss": 1.4066, "step": 11261 }, { "epoch": 0.6157210611921327, "grad_norm": 1.3641494512557983, "learning_rate": 7.173749575360671e-06, "loss": 1.5032, "step": 11262 }, { "epoch": 0.6157757336358541, "grad_norm": 1.5398757457733154, "learning_rate": 7.1719967657742696e-06, "loss": 1.5047, "step": 11263 }, { "epoch": 0.6158304060795757, "grad_norm": 1.3802276849746704, "learning_rate": 7.170244050621533e-06, "loss": 1.3341, "step": 11264 }, { "epoch": 0.6158850785232973, "grad_norm": 1.848983883857727, "learning_rate": 7.168491429960983e-06, "loss": 1.4627, "step": 11265 }, { "epoch": 0.6159397509670188, "grad_norm": 1.5379103422164917, "learning_rate": 7.1667389038511535e-06, "loss": 1.2693, "step": 11266 }, { "epoch": 0.6159944234107404, "grad_norm": 1.1315399408340454, "learning_rate": 7.164986472350559e-06, "loss": 1.4761, "step": 11267 }, { "epoch": 0.616049095854462, "grad_norm": 1.6154404878616333, "learning_rate": 7.1632341355177185e-06, "loss": 1.4578, "step": 11268 }, { "epoch": 0.6161037682981835, "grad_norm": 1.6424856185913086, "learning_rate": 7.1614818934111475e-06, "loss": 1.2208, "step": 11269 }, { "epoch": 0.6161584407419051, "grad_norm": 1.4523969888687134, "learning_rate": 7.159729746089356e-06, "loss": 1.3618, "step": 11270 }, { "epoch": 0.6162131131856267, "grad_norm": 1.4379878044128418, "learning_rate": 7.1579776936108516e-06, "loss": 1.4513, "step": 11271 }, { "epoch": 0.6162677856293481, "grad_norm": 1.461208462715149, "learning_rate": 7.1562257360341434e-06, "loss": 1.5904, "step": 11272 }, { "epoch": 0.6163224580730697, "grad_norm": 1.7146226167678833, "learning_rate": 7.15447387341773e-06, "loss": 1.3256, "step": 11273 }, { "epoch": 0.6163771305167913, "grad_norm": 1.705416202545166, "learning_rate": 7.1527221058201136e-06, "loss": 1.4501, "step": 11274 }, { "epoch": 0.6164318029605128, "grad_norm": 1.400843620300293, "learning_rate": 7.150970433299787e-06, "loss": 1.1833, "step": 11275 }, { "epoch": 0.6164864754042344, "grad_norm": 2.544898748397827, "learning_rate": 7.1492188559152364e-06, "loss": 1.3339, "step": 11276 }, { "epoch": 0.6165411478479559, "grad_norm": 1.459667444229126, "learning_rate": 7.147467373724965e-06, "loss": 1.4898, "step": 11277 }, { "epoch": 0.6165958202916775, "grad_norm": 1.3234955072402954, "learning_rate": 7.14571598678745e-06, "loss": 1.4593, "step": 11278 }, { "epoch": 0.6166504927353991, "grad_norm": 1.8402785062789917, "learning_rate": 7.143964695161175e-06, "loss": 1.3026, "step": 11279 }, { "epoch": 0.6167051651791206, "grad_norm": 1.3447704315185547, "learning_rate": 7.142213498904622e-06, "loss": 1.4617, "step": 11280 }, { "epoch": 0.6167598376228421, "grad_norm": 1.5878570079803467, "learning_rate": 7.140462398076267e-06, "loss": 1.6442, "step": 11281 }, { "epoch": 0.6168145100665637, "grad_norm": 1.4656578302383423, "learning_rate": 7.138711392734579e-06, "loss": 1.2487, "step": 11282 }, { "epoch": 0.6168691825102852, "grad_norm": 1.7065987586975098, "learning_rate": 7.136960482938035e-06, "loss": 1.3944, "step": 11283 }, { "epoch": 0.6169238549540068, "grad_norm": 1.3633531332015991, "learning_rate": 7.135209668745097e-06, "loss": 1.4437, "step": 11284 }, { "epoch": 0.6169785273977284, "grad_norm": 1.5763665437698364, "learning_rate": 7.133458950214229e-06, "loss": 1.5746, "step": 11285 }, { "epoch": 0.6170331998414499, "grad_norm": 1.6191112995147705, "learning_rate": 7.131708327403897e-06, "loss": 1.4797, "step": 11286 }, { "epoch": 0.6170878722851715, "grad_norm": 1.4464510679244995, "learning_rate": 7.129957800372554e-06, "loss": 1.3849, "step": 11287 }, { "epoch": 0.6171425447288931, "grad_norm": 1.524493932723999, "learning_rate": 7.128207369178654e-06, "loss": 1.628, "step": 11288 }, { "epoch": 0.6171972171726146, "grad_norm": 1.504202127456665, "learning_rate": 7.12645703388065e-06, "loss": 1.6444, "step": 11289 }, { "epoch": 0.6172518896163361, "grad_norm": 1.375104546546936, "learning_rate": 7.124706794536984e-06, "loss": 1.2285, "step": 11290 }, { "epoch": 0.6173065620600576, "grad_norm": 1.6637234687805176, "learning_rate": 7.12295665120611e-06, "loss": 1.1591, "step": 11291 }, { "epoch": 0.6173612345037792, "grad_norm": 1.3882360458374023, "learning_rate": 7.1212066039464645e-06, "loss": 1.5092, "step": 11292 }, { "epoch": 0.6174159069475008, "grad_norm": 1.4441370964050293, "learning_rate": 7.119456652816483e-06, "loss": 1.2799, "step": 11293 }, { "epoch": 0.6174705793912223, "grad_norm": 1.5820952653884888, "learning_rate": 7.117706797874606e-06, "loss": 1.4333, "step": 11294 }, { "epoch": 0.6175252518349439, "grad_norm": 1.7396843433380127, "learning_rate": 7.115957039179263e-06, "loss": 1.603, "step": 11295 }, { "epoch": 0.6175799242786655, "grad_norm": 1.1819034814834595, "learning_rate": 7.1142073767888774e-06, "loss": 1.7353, "step": 11296 }, { "epoch": 0.617634596722387, "grad_norm": 1.4512218236923218, "learning_rate": 7.112457810761883e-06, "loss": 1.5342, "step": 11297 }, { "epoch": 0.6176892691661086, "grad_norm": 1.528106451034546, "learning_rate": 7.1107083411566994e-06, "loss": 1.4454, "step": 11298 }, { "epoch": 0.6177439416098301, "grad_norm": 1.6390272378921509, "learning_rate": 7.10895896803174e-06, "loss": 1.6175, "step": 11299 }, { "epoch": 0.6177986140535516, "grad_norm": 1.4779529571533203, "learning_rate": 7.107209691445429e-06, "loss": 1.4853, "step": 11300 }, { "epoch": 0.6178532864972732, "grad_norm": 1.757627010345459, "learning_rate": 7.10546051145617e-06, "loss": 1.3064, "step": 11301 }, { "epoch": 0.6179079589409948, "grad_norm": 1.6238553524017334, "learning_rate": 7.10371142812238e-06, "loss": 1.2803, "step": 11302 }, { "epoch": 0.6179626313847163, "grad_norm": 1.4807052612304688, "learning_rate": 7.101962441502462e-06, "loss": 1.5441, "step": 11303 }, { "epoch": 0.6180173038284379, "grad_norm": 2.2373743057250977, "learning_rate": 7.100213551654816e-06, "loss": 1.5027, "step": 11304 }, { "epoch": 0.6180719762721594, "grad_norm": 1.359342098236084, "learning_rate": 7.098464758637846e-06, "loss": 1.3501, "step": 11305 }, { "epoch": 0.618126648715881, "grad_norm": 1.4706637859344482, "learning_rate": 7.096716062509947e-06, "loss": 1.4173, "step": 11306 }, { "epoch": 0.6181813211596026, "grad_norm": 1.4985132217407227, "learning_rate": 7.0949674633295094e-06, "loss": 1.3409, "step": 11307 }, { "epoch": 0.618235993603324, "grad_norm": 1.319406509399414, "learning_rate": 7.093218961154926e-06, "loss": 1.6923, "step": 11308 }, { "epoch": 0.6182906660470456, "grad_norm": 1.8607397079467773, "learning_rate": 7.091470556044584e-06, "loss": 1.5025, "step": 11309 }, { "epoch": 0.6183453384907672, "grad_norm": 1.5655828714370728, "learning_rate": 7.089722248056862e-06, "loss": 1.3748, "step": 11310 }, { "epoch": 0.6184000109344887, "grad_norm": 1.414266586303711, "learning_rate": 7.087974037250146e-06, "loss": 1.3018, "step": 11311 }, { "epoch": 0.6184546833782103, "grad_norm": 1.6345511674880981, "learning_rate": 7.08622592368281e-06, "loss": 1.3063, "step": 11312 }, { "epoch": 0.6185093558219319, "grad_norm": 1.433296799659729, "learning_rate": 7.084477907413226e-06, "loss": 1.4609, "step": 11313 }, { "epoch": 0.6185640282656534, "grad_norm": 1.7406957149505615, "learning_rate": 7.082729988499768e-06, "loss": 1.464, "step": 11314 }, { "epoch": 0.618618700709375, "grad_norm": 1.5185952186584473, "learning_rate": 7.080982167000799e-06, "loss": 1.4834, "step": 11315 }, { "epoch": 0.6186733731530966, "grad_norm": 1.5787160396575928, "learning_rate": 7.079234442974688e-06, "loss": 1.2461, "step": 11316 }, { "epoch": 0.618728045596818, "grad_norm": 1.4674038887023926, "learning_rate": 7.077486816479792e-06, "loss": 1.3489, "step": 11317 }, { "epoch": 0.6187827180405396, "grad_norm": 1.6812870502471924, "learning_rate": 7.075739287574467e-06, "loss": 1.4833, "step": 11318 }, { "epoch": 0.6188373904842611, "grad_norm": 1.4979859590530396, "learning_rate": 7.073991856317072e-06, "loss": 1.2838, "step": 11319 }, { "epoch": 0.6188920629279827, "grad_norm": 1.3491965532302856, "learning_rate": 7.072244522765954e-06, "loss": 1.6518, "step": 11320 }, { "epoch": 0.6189467353717043, "grad_norm": 1.5611926317214966, "learning_rate": 7.070497286979459e-06, "loss": 1.3785, "step": 11321 }, { "epoch": 0.6190014078154258, "grad_norm": 1.6815696954727173, "learning_rate": 7.068750149015937e-06, "loss": 1.3884, "step": 11322 }, { "epoch": 0.6190560802591474, "grad_norm": 1.8264747858047485, "learning_rate": 7.067003108933725e-06, "loss": 1.3347, "step": 11323 }, { "epoch": 0.619110752702869, "grad_norm": 1.4619265794754028, "learning_rate": 7.0652561667911605e-06, "loss": 1.4886, "step": 11324 }, { "epoch": 0.6191654251465905, "grad_norm": 1.512402892112732, "learning_rate": 7.063509322646581e-06, "loss": 1.4211, "step": 11325 }, { "epoch": 0.619220097590312, "grad_norm": 1.3135905265808105, "learning_rate": 7.061762576558316e-06, "loss": 1.3519, "step": 11326 }, { "epoch": 0.6192747700340336, "grad_norm": 1.7494937181472778, "learning_rate": 7.060015928584691e-06, "loss": 1.6922, "step": 11327 }, { "epoch": 0.6193294424777551, "grad_norm": 1.7933592796325684, "learning_rate": 7.058269378784037e-06, "loss": 1.3419, "step": 11328 }, { "epoch": 0.6193841149214767, "grad_norm": 1.6474171876907349, "learning_rate": 7.056522927214666e-06, "loss": 1.3694, "step": 11329 }, { "epoch": 0.6194387873651983, "grad_norm": 1.4458845853805542, "learning_rate": 7.054776573934906e-06, "loss": 1.2825, "step": 11330 }, { "epoch": 0.6194934598089198, "grad_norm": 1.2351984977722168, "learning_rate": 7.053030319003067e-06, "loss": 1.3294, "step": 11331 }, { "epoch": 0.6195481322526414, "grad_norm": 2.1927409172058105, "learning_rate": 7.051284162477459e-06, "loss": 1.332, "step": 11332 }, { "epoch": 0.6196028046963629, "grad_norm": 1.310225486755371, "learning_rate": 7.049538104416395e-06, "loss": 1.4654, "step": 11333 }, { "epoch": 0.6196574771400845, "grad_norm": 1.2794573307037354, "learning_rate": 7.047792144878176e-06, "loss": 1.382, "step": 11334 }, { "epoch": 0.619712149583806, "grad_norm": 1.4466944932937622, "learning_rate": 7.046046283921102e-06, "loss": 1.6144, "step": 11335 }, { "epoch": 0.6197668220275275, "grad_norm": 2.607064962387085, "learning_rate": 7.044300521603476e-06, "loss": 1.1955, "step": 11336 }, { "epoch": 0.6198214944712491, "grad_norm": 1.460787296295166, "learning_rate": 7.042554857983594e-06, "loss": 1.3923, "step": 11337 }, { "epoch": 0.6198761669149707, "grad_norm": 1.8896231651306152, "learning_rate": 7.040809293119741e-06, "loss": 1.3793, "step": 11338 }, { "epoch": 0.6199308393586922, "grad_norm": 1.4281679391860962, "learning_rate": 7.039063827070214e-06, "loss": 1.6191, "step": 11339 }, { "epoch": 0.6199855118024138, "grad_norm": 1.3062673807144165, "learning_rate": 7.037318459893292e-06, "loss": 1.6189, "step": 11340 }, { "epoch": 0.6200401842461354, "grad_norm": 1.1233584880828857, "learning_rate": 7.035573191647256e-06, "loss": 1.5662, "step": 11341 }, { "epoch": 0.6200948566898569, "grad_norm": 1.5763376951217651, "learning_rate": 7.0338280223903895e-06, "loss": 1.3315, "step": 11342 }, { "epoch": 0.6201495291335785, "grad_norm": 2.0261340141296387, "learning_rate": 7.032082952180963e-06, "loss": 1.3861, "step": 11343 }, { "epoch": 0.6202042015773, "grad_norm": 1.2894010543823242, "learning_rate": 7.030337981077255e-06, "loss": 1.3442, "step": 11344 }, { "epoch": 0.6202588740210215, "grad_norm": 1.4944922924041748, "learning_rate": 7.028593109137531e-06, "loss": 1.5176, "step": 11345 }, { "epoch": 0.6203135464647431, "grad_norm": 1.5323817729949951, "learning_rate": 7.026848336420053e-06, "loss": 1.5301, "step": 11346 }, { "epoch": 0.6203682189084646, "grad_norm": 1.4216561317443848, "learning_rate": 7.025103662983088e-06, "loss": 1.5118, "step": 11347 }, { "epoch": 0.6204228913521862, "grad_norm": 1.4222922325134277, "learning_rate": 7.023359088884892e-06, "loss": 1.4509, "step": 11348 }, { "epoch": 0.6204775637959078, "grad_norm": 1.4402666091918945, "learning_rate": 7.021614614183719e-06, "loss": 1.4691, "step": 11349 }, { "epoch": 0.6205322362396293, "grad_norm": 1.696864128112793, "learning_rate": 7.019870238937825e-06, "loss": 1.4771, "step": 11350 }, { "epoch": 0.6205869086833509, "grad_norm": 1.37208092212677, "learning_rate": 7.0181259632054555e-06, "loss": 1.2817, "step": 11351 }, { "epoch": 0.6206415811270725, "grad_norm": 1.2084633111953735, "learning_rate": 7.016381787044857e-06, "loss": 1.5454, "step": 11352 }, { "epoch": 0.6206962535707939, "grad_norm": 1.447717308998108, "learning_rate": 7.014637710514274e-06, "loss": 1.3494, "step": 11353 }, { "epoch": 0.6207509260145155, "grad_norm": 1.9387143850326538, "learning_rate": 7.012893733671944e-06, "loss": 1.4554, "step": 11354 }, { "epoch": 0.6208055984582371, "grad_norm": 1.9197217226028442, "learning_rate": 7.011149856576096e-06, "loss": 1.4102, "step": 11355 }, { "epoch": 0.6208602709019586, "grad_norm": 1.3938220739364624, "learning_rate": 7.009406079284973e-06, "loss": 1.3651, "step": 11356 }, { "epoch": 0.6209149433456802, "grad_norm": 1.5540060997009277, "learning_rate": 7.007662401856796e-06, "loss": 1.1132, "step": 11357 }, { "epoch": 0.6209696157894018, "grad_norm": 1.580112099647522, "learning_rate": 7.005918824349796e-06, "loss": 1.333, "step": 11358 }, { "epoch": 0.6210242882331233, "grad_norm": 1.8824738264083862, "learning_rate": 7.004175346822191e-06, "loss": 1.3496, "step": 11359 }, { "epoch": 0.6210789606768449, "grad_norm": 1.333573818206787, "learning_rate": 7.002431969332197e-06, "loss": 1.4462, "step": 11360 }, { "epoch": 0.6211336331205664, "grad_norm": 1.440523386001587, "learning_rate": 7.00068869193804e-06, "loss": 1.4576, "step": 11361 }, { "epoch": 0.621188305564288, "grad_norm": 2.3728222846984863, "learning_rate": 6.9989455146979236e-06, "loss": 1.3705, "step": 11362 }, { "epoch": 0.6212429780080095, "grad_norm": 1.6373800039291382, "learning_rate": 6.997202437670054e-06, "loss": 1.6768, "step": 11363 }, { "epoch": 0.621297650451731, "grad_norm": 1.6219078302383423, "learning_rate": 6.9954594609126484e-06, "loss": 1.4045, "step": 11364 }, { "epoch": 0.6213523228954526, "grad_norm": 2.1498217582702637, "learning_rate": 6.9937165844838986e-06, "loss": 1.404, "step": 11365 }, { "epoch": 0.6214069953391742, "grad_norm": 1.091376781463623, "learning_rate": 6.9919738084420055e-06, "loss": 1.4965, "step": 11366 }, { "epoch": 0.6214616677828957, "grad_norm": 1.5916694402694702, "learning_rate": 6.990231132845169e-06, "loss": 1.2486, "step": 11367 }, { "epoch": 0.6215163402266173, "grad_norm": 1.8854091167449951, "learning_rate": 6.988488557751576e-06, "loss": 1.4526, "step": 11368 }, { "epoch": 0.6215710126703389, "grad_norm": 1.3796271085739136, "learning_rate": 6.986746083219412e-06, "loss": 1.5548, "step": 11369 }, { "epoch": 0.6216256851140604, "grad_norm": 1.5529210567474365, "learning_rate": 6.985003709306872e-06, "loss": 1.463, "step": 11370 }, { "epoch": 0.621680357557782, "grad_norm": 1.605086326599121, "learning_rate": 6.983261436072132e-06, "loss": 1.1529, "step": 11371 }, { "epoch": 0.6217350300015035, "grad_norm": 1.5614289045333862, "learning_rate": 6.981519263573373e-06, "loss": 1.4256, "step": 11372 }, { "epoch": 0.621789702445225, "grad_norm": 1.3688485622406006, "learning_rate": 6.979777191868768e-06, "loss": 1.3524, "step": 11373 }, { "epoch": 0.6218443748889466, "grad_norm": 2.2328670024871826, "learning_rate": 6.978035221016487e-06, "loss": 1.2061, "step": 11374 }, { "epoch": 0.6218990473326681, "grad_norm": 1.8408626317977905, "learning_rate": 6.976293351074705e-06, "loss": 1.47, "step": 11375 }, { "epoch": 0.6219537197763897, "grad_norm": 1.4408093690872192, "learning_rate": 6.974551582101583e-06, "loss": 1.4281, "step": 11376 }, { "epoch": 0.6220083922201113, "grad_norm": 1.3799002170562744, "learning_rate": 6.97280991415528e-06, "loss": 1.7323, "step": 11377 }, { "epoch": 0.6220630646638328, "grad_norm": 1.4892898797988892, "learning_rate": 6.971068347293961e-06, "loss": 1.4842, "step": 11378 }, { "epoch": 0.6221177371075544, "grad_norm": 1.9336215257644653, "learning_rate": 6.969326881575777e-06, "loss": 1.2305, "step": 11379 }, { "epoch": 0.622172409551276, "grad_norm": 1.5307207107543945, "learning_rate": 6.967585517058877e-06, "loss": 1.4353, "step": 11380 }, { "epoch": 0.6222270819949974, "grad_norm": 1.4494043588638306, "learning_rate": 6.965844253801416e-06, "loss": 1.4938, "step": 11381 }, { "epoch": 0.622281754438719, "grad_norm": 1.57766854763031, "learning_rate": 6.964103091861535e-06, "loss": 1.4663, "step": 11382 }, { "epoch": 0.6223364268824406, "grad_norm": 2.0718719959259033, "learning_rate": 6.962362031297372e-06, "loss": 1.2717, "step": 11383 }, { "epoch": 0.6223910993261621, "grad_norm": 1.4643800258636475, "learning_rate": 6.960621072167071e-06, "loss": 1.4038, "step": 11384 }, { "epoch": 0.6224457717698837, "grad_norm": 1.5238417387008667, "learning_rate": 6.9588802145287645e-06, "loss": 1.3819, "step": 11385 }, { "epoch": 0.6225004442136053, "grad_norm": 1.5932613611221313, "learning_rate": 6.957139458440585e-06, "loss": 1.3441, "step": 11386 }, { "epoch": 0.6225551166573268, "grad_norm": 1.425839900970459, "learning_rate": 6.95539880396066e-06, "loss": 1.2513, "step": 11387 }, { "epoch": 0.6226097891010484, "grad_norm": 1.9941974878311157, "learning_rate": 6.953658251147109e-06, "loss": 1.2813, "step": 11388 }, { "epoch": 0.6226644615447698, "grad_norm": 1.4684650897979736, "learning_rate": 6.951917800058061e-06, "loss": 1.5097, "step": 11389 }, { "epoch": 0.6227191339884914, "grad_norm": 1.5028961896896362, "learning_rate": 6.950177450751631e-06, "loss": 1.4099, "step": 11390 }, { "epoch": 0.622773806432213, "grad_norm": 2.0108110904693604, "learning_rate": 6.948437203285929e-06, "loss": 1.3336, "step": 11391 }, { "epoch": 0.6228284788759345, "grad_norm": 1.771707534790039, "learning_rate": 6.946697057719074e-06, "loss": 1.4304, "step": 11392 }, { "epoch": 0.6228831513196561, "grad_norm": 1.7253416776657104, "learning_rate": 6.944957014109167e-06, "loss": 1.5928, "step": 11393 }, { "epoch": 0.6229378237633777, "grad_norm": 1.8333567380905151, "learning_rate": 6.943217072514311e-06, "loss": 1.3544, "step": 11394 }, { "epoch": 0.6229924962070992, "grad_norm": 1.417013168334961, "learning_rate": 6.941477232992614e-06, "loss": 1.5772, "step": 11395 }, { "epoch": 0.6230471686508208, "grad_norm": 1.6294643878936768, "learning_rate": 6.939737495602169e-06, "loss": 1.5159, "step": 11396 }, { "epoch": 0.6231018410945424, "grad_norm": 1.535080075263977, "learning_rate": 6.937997860401068e-06, "loss": 1.2203, "step": 11397 }, { "epoch": 0.6231565135382638, "grad_norm": 1.4953962564468384, "learning_rate": 6.936258327447406e-06, "loss": 1.5149, "step": 11398 }, { "epoch": 0.6232111859819854, "grad_norm": 2.407630443572998, "learning_rate": 6.934518896799263e-06, "loss": 1.0021, "step": 11399 }, { "epoch": 0.623265858425707, "grad_norm": 1.6352978944778442, "learning_rate": 6.932779568514731e-06, "loss": 1.4094, "step": 11400 }, { "epoch": 0.6233205308694285, "grad_norm": 2.0795938968658447, "learning_rate": 6.9310403426518895e-06, "loss": 1.1786, "step": 11401 }, { "epoch": 0.6233752033131501, "grad_norm": 1.83932363986969, "learning_rate": 6.929301219268806e-06, "loss": 1.4372, "step": 11402 }, { "epoch": 0.6234298757568716, "grad_norm": 1.8943977355957031, "learning_rate": 6.9275621984235654e-06, "loss": 1.3681, "step": 11403 }, { "epoch": 0.6234845482005932, "grad_norm": 1.590958595275879, "learning_rate": 6.925823280174232e-06, "loss": 1.4283, "step": 11404 }, { "epoch": 0.6235392206443148, "grad_norm": 1.5035252571105957, "learning_rate": 6.924084464578871e-06, "loss": 1.399, "step": 11405 }, { "epoch": 0.6235938930880363, "grad_norm": 1.6934242248535156, "learning_rate": 6.92234575169555e-06, "loss": 1.4925, "step": 11406 }, { "epoch": 0.6236485655317578, "grad_norm": 1.8525630235671997, "learning_rate": 6.920607141582327e-06, "loss": 1.4533, "step": 11407 }, { "epoch": 0.6237032379754794, "grad_norm": 1.4430087804794312, "learning_rate": 6.918868634297252e-06, "loss": 1.5359, "step": 11408 }, { "epoch": 0.6237579104192009, "grad_norm": 1.4878010749816895, "learning_rate": 6.917130229898387e-06, "loss": 1.4892, "step": 11409 }, { "epoch": 0.6238125828629225, "grad_norm": 1.154786467552185, "learning_rate": 6.9153919284437795e-06, "loss": 1.5086, "step": 11410 }, { "epoch": 0.6238672553066441, "grad_norm": 1.5799885988235474, "learning_rate": 6.913653729991472e-06, "loss": 1.562, "step": 11411 }, { "epoch": 0.6239219277503656, "grad_norm": 1.6553279161453247, "learning_rate": 6.911915634599511e-06, "loss": 1.4006, "step": 11412 }, { "epoch": 0.6239766001940872, "grad_norm": 1.2883718013763428, "learning_rate": 6.9101776423259285e-06, "loss": 1.4371, "step": 11413 }, { "epoch": 0.6240312726378088, "grad_norm": 1.2390766143798828, "learning_rate": 6.908439753228769e-06, "loss": 1.2564, "step": 11414 }, { "epoch": 0.6240859450815303, "grad_norm": 1.1941630840301514, "learning_rate": 6.906701967366061e-06, "loss": 1.4226, "step": 11415 }, { "epoch": 0.6241406175252519, "grad_norm": 1.6323943138122559, "learning_rate": 6.90496428479583e-06, "loss": 1.4001, "step": 11416 }, { "epoch": 0.6241952899689734, "grad_norm": 1.4083201885223389, "learning_rate": 6.903226705576107e-06, "loss": 1.3608, "step": 11417 }, { "epoch": 0.6242499624126949, "grad_norm": 1.5966194868087769, "learning_rate": 6.90148922976491e-06, "loss": 1.2536, "step": 11418 }, { "epoch": 0.6243046348564165, "grad_norm": 1.5506019592285156, "learning_rate": 6.899751857420256e-06, "loss": 1.322, "step": 11419 }, { "epoch": 0.624359307300138, "grad_norm": 1.8143508434295654, "learning_rate": 6.898014588600166e-06, "loss": 1.723, "step": 11420 }, { "epoch": 0.6244139797438596, "grad_norm": 1.391292691230774, "learning_rate": 6.896277423362648e-06, "loss": 1.7052, "step": 11421 }, { "epoch": 0.6244686521875812, "grad_norm": 1.2139602899551392, "learning_rate": 6.894540361765706e-06, "loss": 1.603, "step": 11422 }, { "epoch": 0.6245233246313027, "grad_norm": 1.4569171667099, "learning_rate": 6.892803403867352e-06, "loss": 1.4927, "step": 11423 }, { "epoch": 0.6245779970750243, "grad_norm": 1.954535961151123, "learning_rate": 6.891066549725585e-06, "loss": 1.4397, "step": 11424 }, { "epoch": 0.6246326695187459, "grad_norm": 1.5651391744613647, "learning_rate": 6.889329799398397e-06, "loss": 1.4794, "step": 11425 }, { "epoch": 0.6246873419624673, "grad_norm": 1.56135892868042, "learning_rate": 6.88759315294379e-06, "loss": 1.3041, "step": 11426 }, { "epoch": 0.6247420144061889, "grad_norm": 1.7732430696487427, "learning_rate": 6.885856610419747e-06, "loss": 1.2408, "step": 11427 }, { "epoch": 0.6247966868499105, "grad_norm": 1.3343873023986816, "learning_rate": 6.884120171884263e-06, "loss": 1.4515, "step": 11428 }, { "epoch": 0.624851359293632, "grad_norm": 1.9700583219528198, "learning_rate": 6.882383837395319e-06, "loss": 1.1482, "step": 11429 }, { "epoch": 0.6249060317373536, "grad_norm": 1.4362844228744507, "learning_rate": 6.8806476070108905e-06, "loss": 1.4002, "step": 11430 }, { "epoch": 0.6249607041810752, "grad_norm": 1.7999708652496338, "learning_rate": 6.878911480788961e-06, "loss": 1.5614, "step": 11431 }, { "epoch": 0.6250153766247967, "grad_norm": 1.5261461734771729, "learning_rate": 6.8771754587875015e-06, "loss": 1.2481, "step": 11432 }, { "epoch": 0.6250700490685183, "grad_norm": 1.357266902923584, "learning_rate": 6.875439541064477e-06, "loss": 1.6517, "step": 11433 }, { "epoch": 0.6251247215122397, "grad_norm": 1.4883133172988892, "learning_rate": 6.873703727677863e-06, "loss": 1.3771, "step": 11434 }, { "epoch": 0.6251793939559613, "grad_norm": 1.3946762084960938, "learning_rate": 6.8719680186856175e-06, "loss": 1.4032, "step": 11435 }, { "epoch": 0.6252340663996829, "grad_norm": 1.6810438632965088, "learning_rate": 6.870232414145696e-06, "loss": 1.5682, "step": 11436 }, { "epoch": 0.6252887388434044, "grad_norm": 1.1611254215240479, "learning_rate": 6.868496914116063e-06, "loss": 1.474, "step": 11437 }, { "epoch": 0.625343411287126, "grad_norm": 1.7051328420639038, "learning_rate": 6.866761518654665e-06, "loss": 1.4901, "step": 11438 }, { "epoch": 0.6253980837308476, "grad_norm": 1.3288072347640991, "learning_rate": 6.865026227819449e-06, "loss": 1.453, "step": 11439 }, { "epoch": 0.6254527561745691, "grad_norm": 1.9481987953186035, "learning_rate": 6.8632910416683674e-06, "loss": 1.2483, "step": 11440 }, { "epoch": 0.6255074286182907, "grad_norm": 1.552666425704956, "learning_rate": 6.861555960259354e-06, "loss": 1.4811, "step": 11441 }, { "epoch": 0.6255621010620123, "grad_norm": 2.0674331188201904, "learning_rate": 6.859820983650356e-06, "loss": 1.4141, "step": 11442 }, { "epoch": 0.6256167735057337, "grad_norm": 1.5129008293151855, "learning_rate": 6.858086111899304e-06, "loss": 1.4325, "step": 11443 }, { "epoch": 0.6256714459494553, "grad_norm": 1.472609043121338, "learning_rate": 6.856351345064127e-06, "loss": 1.5727, "step": 11444 }, { "epoch": 0.6257261183931769, "grad_norm": 1.2482975721359253, "learning_rate": 6.854616683202757e-06, "loss": 1.4493, "step": 11445 }, { "epoch": 0.6257807908368984, "grad_norm": 1.4079794883728027, "learning_rate": 6.852882126373118e-06, "loss": 1.4324, "step": 11446 }, { "epoch": 0.62583546328062, "grad_norm": 1.371649980545044, "learning_rate": 6.851147674633125e-06, "loss": 1.563, "step": 11447 }, { "epoch": 0.6258901357243415, "grad_norm": 1.5779451131820679, "learning_rate": 6.849413328040705e-06, "loss": 1.4943, "step": 11448 }, { "epoch": 0.6259448081680631, "grad_norm": 1.5005210638046265, "learning_rate": 6.8476790866537665e-06, "loss": 1.5656, "step": 11449 }, { "epoch": 0.6259994806117847, "grad_norm": 1.4275161027908325, "learning_rate": 6.845944950530219e-06, "loss": 1.3682, "step": 11450 }, { "epoch": 0.6260541530555062, "grad_norm": 1.5369964838027954, "learning_rate": 6.844210919727971e-06, "loss": 1.4966, "step": 11451 }, { "epoch": 0.6261088254992278, "grad_norm": 1.3661150932312012, "learning_rate": 6.842476994304929e-06, "loss": 1.442, "step": 11452 }, { "epoch": 0.6261634979429493, "grad_norm": 1.7449718713760376, "learning_rate": 6.840743174318982e-06, "loss": 1.4028, "step": 11453 }, { "epoch": 0.6262181703866708, "grad_norm": 1.4580062627792358, "learning_rate": 6.839009459828041e-06, "loss": 1.5874, "step": 11454 }, { "epoch": 0.6262728428303924, "grad_norm": 1.6317428350448608, "learning_rate": 6.837275850889987e-06, "loss": 1.4437, "step": 11455 }, { "epoch": 0.626327515274114, "grad_norm": 2.267240524291992, "learning_rate": 6.835542347562717e-06, "loss": 1.2792, "step": 11456 }, { "epoch": 0.6263821877178355, "grad_norm": 1.4982810020446777, "learning_rate": 6.8338089499041135e-06, "loss": 1.4228, "step": 11457 }, { "epoch": 0.6264368601615571, "grad_norm": 1.6698051691055298, "learning_rate": 6.8320756579720545e-06, "loss": 1.3973, "step": 11458 }, { "epoch": 0.6264915326052787, "grad_norm": 1.534755825996399, "learning_rate": 6.830342471824428e-06, "loss": 1.3853, "step": 11459 }, { "epoch": 0.6265462050490002, "grad_norm": 1.4346082210540771, "learning_rate": 6.828609391519103e-06, "loss": 1.4598, "step": 11460 }, { "epoch": 0.6266008774927218, "grad_norm": 1.6323646306991577, "learning_rate": 6.82687641711395e-06, "loss": 1.4132, "step": 11461 }, { "epoch": 0.6266555499364432, "grad_norm": 1.7301141023635864, "learning_rate": 6.825143548666841e-06, "loss": 1.4473, "step": 11462 }, { "epoch": 0.6267102223801648, "grad_norm": 1.6164966821670532, "learning_rate": 6.823410786235643e-06, "loss": 1.4005, "step": 11463 }, { "epoch": 0.6267648948238864, "grad_norm": 1.6981580257415771, "learning_rate": 6.821678129878206e-06, "loss": 1.3696, "step": 11464 }, { "epoch": 0.6268195672676079, "grad_norm": 1.985021710395813, "learning_rate": 6.819945579652401e-06, "loss": 1.576, "step": 11465 }, { "epoch": 0.6268742397113295, "grad_norm": 1.4360171556472778, "learning_rate": 6.818213135616072e-06, "loss": 1.337, "step": 11466 }, { "epoch": 0.6269289121550511, "grad_norm": 1.675246238708496, "learning_rate": 6.81648079782707e-06, "loss": 1.395, "step": 11467 }, { "epoch": 0.6269835845987726, "grad_norm": 1.338208556175232, "learning_rate": 6.814748566343248e-06, "loss": 1.2513, "step": 11468 }, { "epoch": 0.6270382570424942, "grad_norm": 1.4981613159179688, "learning_rate": 6.813016441222444e-06, "loss": 1.2347, "step": 11469 }, { "epoch": 0.6270929294862158, "grad_norm": 1.4096226692199707, "learning_rate": 6.8112844225225015e-06, "loss": 1.4317, "step": 11470 }, { "epoch": 0.6271476019299372, "grad_norm": 1.5949645042419434, "learning_rate": 6.809552510301255e-06, "loss": 1.3723, "step": 11471 }, { "epoch": 0.6272022743736588, "grad_norm": 1.9212462902069092, "learning_rate": 6.807820704616532e-06, "loss": 1.5802, "step": 11472 }, { "epoch": 0.6272569468173804, "grad_norm": 1.5312633514404297, "learning_rate": 6.806089005526171e-06, "loss": 1.2628, "step": 11473 }, { "epoch": 0.6273116192611019, "grad_norm": 1.3652139902114868, "learning_rate": 6.804357413087993e-06, "loss": 1.2688, "step": 11474 }, { "epoch": 0.6273662917048235, "grad_norm": 1.6030924320220947, "learning_rate": 6.802625927359818e-06, "loss": 1.3129, "step": 11475 }, { "epoch": 0.627420964148545, "grad_norm": 1.5753663778305054, "learning_rate": 6.800894548399467e-06, "loss": 1.3287, "step": 11476 }, { "epoch": 0.6274756365922666, "grad_norm": 1.2664518356323242, "learning_rate": 6.799163276264756e-06, "loss": 1.5778, "step": 11477 }, { "epoch": 0.6275303090359882, "grad_norm": 1.3733340501785278, "learning_rate": 6.797432111013488e-06, "loss": 1.625, "step": 11478 }, { "epoch": 0.6275849814797096, "grad_norm": 1.8457016944885254, "learning_rate": 6.795701052703482e-06, "loss": 1.4996, "step": 11479 }, { "epoch": 0.6276396539234312, "grad_norm": 1.9074910879135132, "learning_rate": 6.793970101392537e-06, "loss": 1.4358, "step": 11480 }, { "epoch": 0.6276943263671528, "grad_norm": 1.475551724433899, "learning_rate": 6.792239257138449e-06, "loss": 1.4753, "step": 11481 }, { "epoch": 0.6277489988108743, "grad_norm": 1.3387821912765503, "learning_rate": 6.790508519999024e-06, "loss": 1.6143, "step": 11482 }, { "epoch": 0.6278036712545959, "grad_norm": 1.645684003829956, "learning_rate": 6.788777890032048e-06, "loss": 1.3667, "step": 11483 }, { "epoch": 0.6278583436983175, "grad_norm": 1.6133942604064941, "learning_rate": 6.787047367295316e-06, "loss": 1.5272, "step": 11484 }, { "epoch": 0.627913016142039, "grad_norm": 2.659137010574341, "learning_rate": 6.785316951846612e-06, "loss": 1.399, "step": 11485 }, { "epoch": 0.6279676885857606, "grad_norm": 1.5471992492675781, "learning_rate": 6.783586643743714e-06, "loss": 1.475, "step": 11486 }, { "epoch": 0.6280223610294822, "grad_norm": 1.4470531940460205, "learning_rate": 6.78185644304441e-06, "loss": 1.6365, "step": 11487 }, { "epoch": 0.6280770334732036, "grad_norm": 1.3149356842041016, "learning_rate": 6.7801263498064705e-06, "loss": 1.617, "step": 11488 }, { "epoch": 0.6281317059169252, "grad_norm": 1.163996934890747, "learning_rate": 6.778396364087667e-06, "loss": 1.61, "step": 11489 }, { "epoch": 0.6281863783606467, "grad_norm": 1.7856242656707764, "learning_rate": 6.77666648594577e-06, "loss": 1.397, "step": 11490 }, { "epoch": 0.6282410508043683, "grad_norm": 1.4327131509780884, "learning_rate": 6.7749367154385424e-06, "loss": 1.3881, "step": 11491 }, { "epoch": 0.6282957232480899, "grad_norm": 1.5731333494186401, "learning_rate": 6.773207052623743e-06, "loss": 1.3402, "step": 11492 }, { "epoch": 0.6283503956918114, "grad_norm": 1.3822391033172607, "learning_rate": 6.7714774975591335e-06, "loss": 1.5593, "step": 11493 }, { "epoch": 0.628405068135533, "grad_norm": 1.3101650476455688, "learning_rate": 6.769748050302469e-06, "loss": 1.5223, "step": 11494 }, { "epoch": 0.6284597405792546, "grad_norm": 2.321798801422119, "learning_rate": 6.7680187109114936e-06, "loss": 1.2978, "step": 11495 }, { "epoch": 0.6285144130229761, "grad_norm": 2.0166776180267334, "learning_rate": 6.766289479443959e-06, "loss": 1.478, "step": 11496 }, { "epoch": 0.6285690854666977, "grad_norm": 1.3920118808746338, "learning_rate": 6.7645603559576045e-06, "loss": 1.515, "step": 11497 }, { "epoch": 0.6286237579104192, "grad_norm": 1.2603713274002075, "learning_rate": 6.762831340510175e-06, "loss": 1.5653, "step": 11498 }, { "epoch": 0.6286784303541407, "grad_norm": 1.7122294902801514, "learning_rate": 6.761102433159403e-06, "loss": 1.3108, "step": 11499 }, { "epoch": 0.6287331027978623, "grad_norm": 1.50960111618042, "learning_rate": 6.75937363396302e-06, "loss": 1.4464, "step": 11500 }, { "epoch": 0.6287877752415839, "grad_norm": 1.3927632570266724, "learning_rate": 6.7576449429787585e-06, "loss": 1.5776, "step": 11501 }, { "epoch": 0.6288424476853054, "grad_norm": 1.5487521886825562, "learning_rate": 6.755916360264339e-06, "loss": 1.4747, "step": 11502 }, { "epoch": 0.628897120129027, "grad_norm": 1.4350312948226929, "learning_rate": 6.754187885877481e-06, "loss": 1.4969, "step": 11503 }, { "epoch": 0.6289517925727485, "grad_norm": 1.733618140220642, "learning_rate": 6.75245951987591e-06, "loss": 1.3389, "step": 11504 }, { "epoch": 0.6290064650164701, "grad_norm": 1.6612236499786377, "learning_rate": 6.750731262317337e-06, "loss": 1.4036, "step": 11505 }, { "epoch": 0.6290611374601917, "grad_norm": 1.6626818180084229, "learning_rate": 6.749003113259467e-06, "loss": 1.29, "step": 11506 }, { "epoch": 0.6291158099039131, "grad_norm": 1.3370862007141113, "learning_rate": 6.7472750727600155e-06, "loss": 1.3158, "step": 11507 }, { "epoch": 0.6291704823476347, "grad_norm": 1.3258804082870483, "learning_rate": 6.745547140876683e-06, "loss": 1.6521, "step": 11508 }, { "epoch": 0.6292251547913563, "grad_norm": 1.9329516887664795, "learning_rate": 6.7438193176671666e-06, "loss": 1.3492, "step": 11509 }, { "epoch": 0.6292798272350778, "grad_norm": 1.2573730945587158, "learning_rate": 6.742091603189165e-06, "loss": 1.5059, "step": 11510 }, { "epoch": 0.6293344996787994, "grad_norm": 1.3879811763763428, "learning_rate": 6.740363997500366e-06, "loss": 1.5218, "step": 11511 }, { "epoch": 0.629389172122521, "grad_norm": 1.4738610982894897, "learning_rate": 6.7386365006584665e-06, "loss": 1.2533, "step": 11512 }, { "epoch": 0.6294438445662425, "grad_norm": 1.3107454776763916, "learning_rate": 6.736909112721146e-06, "loss": 1.5981, "step": 11513 }, { "epoch": 0.6294985170099641, "grad_norm": 1.7460907697677612, "learning_rate": 6.735181833746087e-06, "loss": 1.5444, "step": 11514 }, { "epoch": 0.6295531894536857, "grad_norm": 1.1504848003387451, "learning_rate": 6.733454663790968e-06, "loss": 1.5281, "step": 11515 }, { "epoch": 0.6296078618974071, "grad_norm": 1.6504031419754028, "learning_rate": 6.731727602913465e-06, "loss": 1.3171, "step": 11516 }, { "epoch": 0.6296625343411287, "grad_norm": 1.530430793762207, "learning_rate": 6.730000651171241e-06, "loss": 1.4919, "step": 11517 }, { "epoch": 0.6297172067848502, "grad_norm": 2.0578906536102295, "learning_rate": 6.728273808621973e-06, "loss": 1.5003, "step": 11518 }, { "epoch": 0.6297718792285718, "grad_norm": 1.5798648595809937, "learning_rate": 6.72654707532332e-06, "loss": 1.3561, "step": 11519 }, { "epoch": 0.6298265516722934, "grad_norm": 1.3090345859527588, "learning_rate": 6.72482045133294e-06, "loss": 1.2728, "step": 11520 }, { "epoch": 0.6298812241160149, "grad_norm": 1.5593637228012085, "learning_rate": 6.7230939367084915e-06, "loss": 1.6079, "step": 11521 }, { "epoch": 0.6299358965597365, "grad_norm": 1.4634512662887573, "learning_rate": 6.721367531507627e-06, "loss": 1.522, "step": 11522 }, { "epoch": 0.6299905690034581, "grad_norm": 1.3298096656799316, "learning_rate": 6.7196412357879894e-06, "loss": 1.3597, "step": 11523 }, { "epoch": 0.6300452414471795, "grad_norm": 1.68720281124115, "learning_rate": 6.717915049607233e-06, "loss": 1.478, "step": 11524 }, { "epoch": 0.6300999138909011, "grad_norm": 1.804916262626648, "learning_rate": 6.71618897302299e-06, "loss": 1.4794, "step": 11525 }, { "epoch": 0.6301545863346227, "grad_norm": 1.7714738845825195, "learning_rate": 6.714463006092908e-06, "loss": 1.3426, "step": 11526 }, { "epoch": 0.6302092587783442, "grad_norm": 1.4935613870620728, "learning_rate": 6.7127371488746155e-06, "loss": 1.5173, "step": 11527 }, { "epoch": 0.6302639312220658, "grad_norm": 1.3433363437652588, "learning_rate": 6.711011401425741e-06, "loss": 1.5422, "step": 11528 }, { "epoch": 0.6303186036657874, "grad_norm": 1.116365671157837, "learning_rate": 6.709285763803917e-06, "loss": 1.7399, "step": 11529 }, { "epoch": 0.6303732761095089, "grad_norm": 1.8401069641113281, "learning_rate": 6.7075602360667616e-06, "loss": 1.3978, "step": 11530 }, { "epoch": 0.6304279485532305, "grad_norm": 2.452092409133911, "learning_rate": 6.705834818271893e-06, "loss": 1.4742, "step": 11531 }, { "epoch": 0.630482620996952, "grad_norm": 1.719388484954834, "learning_rate": 6.704109510476933e-06, "loss": 1.5244, "step": 11532 }, { "epoch": 0.6305372934406736, "grad_norm": 1.4341036081314087, "learning_rate": 6.7023843127394905e-06, "loss": 1.5884, "step": 11533 }, { "epoch": 0.6305919658843951, "grad_norm": 1.5963771343231201, "learning_rate": 6.700659225117172e-06, "loss": 1.5834, "step": 11534 }, { "epoch": 0.6306466383281166, "grad_norm": 1.7907205820083618, "learning_rate": 6.698934247667587e-06, "loss": 1.5134, "step": 11535 }, { "epoch": 0.6307013107718382, "grad_norm": 1.8566733598709106, "learning_rate": 6.697209380448333e-06, "loss": 1.6057, "step": 11536 }, { "epoch": 0.6307559832155598, "grad_norm": 1.3927247524261475, "learning_rate": 6.695484623517004e-06, "loss": 1.3545, "step": 11537 }, { "epoch": 0.6308106556592813, "grad_norm": 1.4458105564117432, "learning_rate": 6.693759976931201e-06, "loss": 1.55, "step": 11538 }, { "epoch": 0.6308653281030029, "grad_norm": 1.5395537614822388, "learning_rate": 6.692035440748512e-06, "loss": 1.3941, "step": 11539 }, { "epoch": 0.6309200005467245, "grad_norm": 1.2398395538330078, "learning_rate": 6.69031101502652e-06, "loss": 1.6649, "step": 11540 }, { "epoch": 0.630974672990446, "grad_norm": 1.614393711090088, "learning_rate": 6.68858669982281e-06, "loss": 1.4151, "step": 11541 }, { "epoch": 0.6310293454341676, "grad_norm": 1.641126275062561, "learning_rate": 6.686862495194958e-06, "loss": 1.2823, "step": 11542 }, { "epoch": 0.6310840178778891, "grad_norm": 1.3769943714141846, "learning_rate": 6.685138401200546e-06, "loss": 1.4744, "step": 11543 }, { "epoch": 0.6311386903216106, "grad_norm": 1.419676661491394, "learning_rate": 6.68341441789714e-06, "loss": 1.4558, "step": 11544 }, { "epoch": 0.6311933627653322, "grad_norm": 1.646044135093689, "learning_rate": 6.681690545342305e-06, "loss": 1.4203, "step": 11545 }, { "epoch": 0.6312480352090537, "grad_norm": 1.5664550065994263, "learning_rate": 6.679966783593616e-06, "loss": 1.2728, "step": 11546 }, { "epoch": 0.6313027076527753, "grad_norm": 1.6175025701522827, "learning_rate": 6.678243132708625e-06, "loss": 1.5764, "step": 11547 }, { "epoch": 0.6313573800964969, "grad_norm": 1.7297054529190063, "learning_rate": 6.676519592744888e-06, "loss": 1.7547, "step": 11548 }, { "epoch": 0.6314120525402184, "grad_norm": 1.971126675605774, "learning_rate": 6.6747961637599645e-06, "loss": 1.2342, "step": 11549 }, { "epoch": 0.63146672498394, "grad_norm": 1.7483505010604858, "learning_rate": 6.673072845811398e-06, "loss": 1.3039, "step": 11550 }, { "epoch": 0.6315213974276616, "grad_norm": 1.5820033550262451, "learning_rate": 6.671349638956732e-06, "loss": 1.2795, "step": 11551 }, { "epoch": 0.631576069871383, "grad_norm": 1.1793192625045776, "learning_rate": 6.669626543253518e-06, "loss": 1.4208, "step": 11552 }, { "epoch": 0.6316307423151046, "grad_norm": 1.4925618171691895, "learning_rate": 6.667903558759288e-06, "loss": 1.2003, "step": 11553 }, { "epoch": 0.6316854147588262, "grad_norm": 1.4128402471542358, "learning_rate": 6.666180685531576e-06, "loss": 1.1264, "step": 11554 }, { "epoch": 0.6317400872025477, "grad_norm": 2.027026414871216, "learning_rate": 6.664457923627914e-06, "loss": 1.3914, "step": 11555 }, { "epoch": 0.6317947596462693, "grad_norm": 1.6636021137237549, "learning_rate": 6.662735273105827e-06, "loss": 1.3506, "step": 11556 }, { "epoch": 0.6318494320899909, "grad_norm": 1.45600163936615, "learning_rate": 6.661012734022843e-06, "loss": 1.305, "step": 11557 }, { "epoch": 0.6319041045337124, "grad_norm": 1.5339275598526, "learning_rate": 6.659290306436479e-06, "loss": 1.3204, "step": 11558 }, { "epoch": 0.631958776977434, "grad_norm": 1.686252236366272, "learning_rate": 6.6575679904042504e-06, "loss": 1.4484, "step": 11559 }, { "epoch": 0.6320134494211554, "grad_norm": 1.8424460887908936, "learning_rate": 6.65584578598367e-06, "loss": 1.2771, "step": 11560 }, { "epoch": 0.632068121864877, "grad_norm": 1.2746549844741821, "learning_rate": 6.654123693232247e-06, "loss": 1.5047, "step": 11561 }, { "epoch": 0.6321227943085986, "grad_norm": 1.2180331945419312, "learning_rate": 6.652401712207481e-06, "loss": 1.3173, "step": 11562 }, { "epoch": 0.6321774667523201, "grad_norm": 1.8349031209945679, "learning_rate": 6.650679842966881e-06, "loss": 1.342, "step": 11563 }, { "epoch": 0.6322321391960417, "grad_norm": 1.7356568574905396, "learning_rate": 6.648958085567941e-06, "loss": 1.438, "step": 11564 }, { "epoch": 0.6322868116397633, "grad_norm": 1.5304343700408936, "learning_rate": 6.64723644006815e-06, "loss": 1.3513, "step": 11565 }, { "epoch": 0.6323414840834848, "grad_norm": 1.5503666400909424, "learning_rate": 6.645514906525006e-06, "loss": 1.4142, "step": 11566 }, { "epoch": 0.6323961565272064, "grad_norm": 1.6422210931777954, "learning_rate": 6.643793484995991e-06, "loss": 1.4053, "step": 11567 }, { "epoch": 0.632450828970928, "grad_norm": 1.6336556673049927, "learning_rate": 6.642072175538583e-06, "loss": 1.2624, "step": 11568 }, { "epoch": 0.6325055014146495, "grad_norm": 1.7237211465835571, "learning_rate": 6.640350978210269e-06, "loss": 1.3141, "step": 11569 }, { "epoch": 0.632560173858371, "grad_norm": 1.403486967086792, "learning_rate": 6.638629893068516e-06, "loss": 1.3589, "step": 11570 }, { "epoch": 0.6326148463020926, "grad_norm": 1.553013563156128, "learning_rate": 6.6369089201708e-06, "loss": 1.2815, "step": 11571 }, { "epoch": 0.6326695187458141, "grad_norm": 1.389306664466858, "learning_rate": 6.635188059574589e-06, "loss": 1.3956, "step": 11572 }, { "epoch": 0.6327241911895357, "grad_norm": 1.510880708694458, "learning_rate": 6.633467311337341e-06, "loss": 1.3368, "step": 11573 }, { "epoch": 0.6327788636332572, "grad_norm": 1.6833709478378296, "learning_rate": 6.631746675516522e-06, "loss": 1.2953, "step": 11574 }, { "epoch": 0.6328335360769788, "grad_norm": 1.3782800436019897, "learning_rate": 6.630026152169585e-06, "loss": 1.4295, "step": 11575 }, { "epoch": 0.6328882085207004, "grad_norm": 1.2818152904510498, "learning_rate": 6.628305741353979e-06, "loss": 1.6227, "step": 11576 }, { "epoch": 0.6329428809644219, "grad_norm": 1.6197313070297241, "learning_rate": 6.62658544312716e-06, "loss": 1.3243, "step": 11577 }, { "epoch": 0.6329975534081435, "grad_norm": 1.2518608570098877, "learning_rate": 6.6248652575465696e-06, "loss": 1.5463, "step": 11578 }, { "epoch": 0.633052225851865, "grad_norm": 1.7602202892303467, "learning_rate": 6.623145184669646e-06, "loss": 1.4636, "step": 11579 }, { "epoch": 0.6331068982955865, "grad_norm": 1.5134851932525635, "learning_rate": 6.62142522455383e-06, "loss": 1.5267, "step": 11580 }, { "epoch": 0.6331615707393081, "grad_norm": 1.4115608930587769, "learning_rate": 6.619705377256556e-06, "loss": 1.2494, "step": 11581 }, { "epoch": 0.6332162431830297, "grad_norm": 1.470474362373352, "learning_rate": 6.617985642835245e-06, "loss": 1.5132, "step": 11582 }, { "epoch": 0.6332709156267512, "grad_norm": 1.3828577995300293, "learning_rate": 6.616266021347335e-06, "loss": 1.6164, "step": 11583 }, { "epoch": 0.6333255880704728, "grad_norm": 2.230823278427124, "learning_rate": 6.614546512850237e-06, "loss": 1.5202, "step": 11584 }, { "epoch": 0.6333802605141944, "grad_norm": 1.672909140586853, "learning_rate": 6.612827117401381e-06, "loss": 1.4931, "step": 11585 }, { "epoch": 0.6334349329579159, "grad_norm": 1.5482016801834106, "learning_rate": 6.611107835058175e-06, "loss": 1.5468, "step": 11586 }, { "epoch": 0.6334896054016375, "grad_norm": 1.5535417795181274, "learning_rate": 6.609388665878029e-06, "loss": 1.2568, "step": 11587 }, { "epoch": 0.6335442778453589, "grad_norm": 1.844511866569519, "learning_rate": 6.6076696099183544e-06, "loss": 1.579, "step": 11588 }, { "epoch": 0.6335989502890805, "grad_norm": 1.457638144493103, "learning_rate": 6.6059506672365516e-06, "loss": 1.2752, "step": 11589 }, { "epoch": 0.6336536227328021, "grad_norm": 1.9242253303527832, "learning_rate": 6.604231837890015e-06, "loss": 1.6003, "step": 11590 }, { "epoch": 0.6337082951765236, "grad_norm": 1.6647671461105347, "learning_rate": 6.6025131219361505e-06, "loss": 1.4099, "step": 11591 }, { "epoch": 0.6337629676202452, "grad_norm": 1.6630504131317139, "learning_rate": 6.600794519432346e-06, "loss": 1.4813, "step": 11592 }, { "epoch": 0.6338176400639668, "grad_norm": 1.4111052751541138, "learning_rate": 6.599076030435987e-06, "loss": 1.3295, "step": 11593 }, { "epoch": 0.6338723125076883, "grad_norm": 1.155464768409729, "learning_rate": 6.5973576550044604e-06, "loss": 1.4646, "step": 11594 }, { "epoch": 0.6339269849514099, "grad_norm": 1.566362977027893, "learning_rate": 6.595639393195148e-06, "loss": 1.2191, "step": 11595 }, { "epoch": 0.6339816573951315, "grad_norm": 1.5240483283996582, "learning_rate": 6.59392124506542e-06, "loss": 1.4876, "step": 11596 }, { "epoch": 0.6340363298388529, "grad_norm": 1.2251865863800049, "learning_rate": 6.592203210672657e-06, "loss": 1.3697, "step": 11597 }, { "epoch": 0.6340910022825745, "grad_norm": 1.6296683549880981, "learning_rate": 6.590485290074224e-06, "loss": 1.7121, "step": 11598 }, { "epoch": 0.6341456747262961, "grad_norm": 1.4823267459869385, "learning_rate": 6.588767483327492e-06, "loss": 1.4085, "step": 11599 }, { "epoch": 0.6342003471700176, "grad_norm": 1.533591866493225, "learning_rate": 6.5870497904898165e-06, "loss": 1.3731, "step": 11600 }, { "epoch": 0.6342550196137392, "grad_norm": 1.785418152809143, "learning_rate": 6.585332211618554e-06, "loss": 1.3309, "step": 11601 }, { "epoch": 0.6343096920574607, "grad_norm": 1.606145977973938, "learning_rate": 6.583614746771065e-06, "loss": 1.4342, "step": 11602 }, { "epoch": 0.6343643645011823, "grad_norm": 1.690848708152771, "learning_rate": 6.5818973960046976e-06, "loss": 1.4836, "step": 11603 }, { "epoch": 0.6344190369449039, "grad_norm": 1.7061554193496704, "learning_rate": 6.580180159376792e-06, "loss": 1.6006, "step": 11604 }, { "epoch": 0.6344737093886254, "grad_norm": 1.5695829391479492, "learning_rate": 6.5784630369447e-06, "loss": 1.6919, "step": 11605 }, { "epoch": 0.6345283818323469, "grad_norm": 1.8349483013153076, "learning_rate": 6.576746028765756e-06, "loss": 1.417, "step": 11606 }, { "epoch": 0.6345830542760685, "grad_norm": 1.3213063478469849, "learning_rate": 6.575029134897293e-06, "loss": 1.4694, "step": 11607 }, { "epoch": 0.63463772671979, "grad_norm": 1.4336841106414795, "learning_rate": 6.573312355396646e-06, "loss": 1.3662, "step": 11608 }, { "epoch": 0.6346923991635116, "grad_norm": 1.6748182773590088, "learning_rate": 6.571595690321141e-06, "loss": 1.4289, "step": 11609 }, { "epoch": 0.6347470716072332, "grad_norm": 1.8292977809906006, "learning_rate": 6.569879139728097e-06, "loss": 1.3635, "step": 11610 }, { "epoch": 0.6348017440509547, "grad_norm": 1.2337661981582642, "learning_rate": 6.5681627036748404e-06, "loss": 1.4702, "step": 11611 }, { "epoch": 0.6348564164946763, "grad_norm": 1.1752455234527588, "learning_rate": 6.566446382218683e-06, "loss": 1.4656, "step": 11612 }, { "epoch": 0.6349110889383979, "grad_norm": 1.4603610038757324, "learning_rate": 6.56473017541694e-06, "loss": 1.3591, "step": 11613 }, { "epoch": 0.6349657613821194, "grad_norm": 1.6360630989074707, "learning_rate": 6.5630140833269175e-06, "loss": 1.3177, "step": 11614 }, { "epoch": 0.635020433825841, "grad_norm": 1.4441816806793213, "learning_rate": 6.5612981060059156e-06, "loss": 1.3112, "step": 11615 }, { "epoch": 0.6350751062695624, "grad_norm": 1.2135188579559326, "learning_rate": 6.559582243511244e-06, "loss": 1.4205, "step": 11616 }, { "epoch": 0.635129778713284, "grad_norm": 1.4683067798614502, "learning_rate": 6.557866495900194e-06, "loss": 1.6431, "step": 11617 }, { "epoch": 0.6351844511570056, "grad_norm": 1.6183652877807617, "learning_rate": 6.556150863230055e-06, "loss": 1.6041, "step": 11618 }, { "epoch": 0.6352391236007271, "grad_norm": 1.1631131172180176, "learning_rate": 6.5544353455581245e-06, "loss": 1.3772, "step": 11619 }, { "epoch": 0.6352937960444487, "grad_norm": 1.3490098714828491, "learning_rate": 6.552719942941682e-06, "loss": 1.3744, "step": 11620 }, { "epoch": 0.6353484684881703, "grad_norm": 1.8180584907531738, "learning_rate": 6.551004655438007e-06, "loss": 1.4137, "step": 11621 }, { "epoch": 0.6354031409318918, "grad_norm": 1.464889407157898, "learning_rate": 6.549289483104382e-06, "loss": 1.5974, "step": 11622 }, { "epoch": 0.6354578133756134, "grad_norm": 1.368605375289917, "learning_rate": 6.54757442599808e-06, "loss": 1.6603, "step": 11623 }, { "epoch": 0.635512485819335, "grad_norm": 1.588099479675293, "learning_rate": 6.545859484176364e-06, "loss": 1.2691, "step": 11624 }, { "epoch": 0.6355671582630564, "grad_norm": 1.5619828701019287, "learning_rate": 6.54414465769651e-06, "loss": 1.4908, "step": 11625 }, { "epoch": 0.635621830706778, "grad_norm": 1.8417450189590454, "learning_rate": 6.542429946615774e-06, "loss": 1.3108, "step": 11626 }, { "epoch": 0.6356765031504996, "grad_norm": 1.6799474954605103, "learning_rate": 6.540715350991416e-06, "loss": 1.4867, "step": 11627 }, { "epoch": 0.6357311755942211, "grad_norm": 1.5068858861923218, "learning_rate": 6.539000870880692e-06, "loss": 1.441, "step": 11628 }, { "epoch": 0.6357858480379427, "grad_norm": 1.766708493232727, "learning_rate": 6.537286506340844e-06, "loss": 1.3272, "step": 11629 }, { "epoch": 0.6358405204816643, "grad_norm": 1.8932832479476929, "learning_rate": 6.53557225742913e-06, "loss": 1.2211, "step": 11630 }, { "epoch": 0.6358951929253858, "grad_norm": 1.7147748470306396, "learning_rate": 6.5338581242027885e-06, "loss": 1.6015, "step": 11631 }, { "epoch": 0.6359498653691074, "grad_norm": 1.2320427894592285, "learning_rate": 6.532144106719056e-06, "loss": 1.5468, "step": 11632 }, { "epoch": 0.6360045378128288, "grad_norm": 1.3858109712600708, "learning_rate": 6.53043020503517e-06, "loss": 1.3728, "step": 11633 }, { "epoch": 0.6360592102565504, "grad_norm": 1.82512629032135, "learning_rate": 6.528716419208362e-06, "loss": 1.6064, "step": 11634 }, { "epoch": 0.636113882700272, "grad_norm": 1.4894986152648926, "learning_rate": 6.527002749295854e-06, "loss": 1.3735, "step": 11635 }, { "epoch": 0.6361685551439935, "grad_norm": 1.7015300989151, "learning_rate": 6.525289195354878e-06, "loss": 1.2026, "step": 11636 }, { "epoch": 0.6362232275877151, "grad_norm": 1.6869945526123047, "learning_rate": 6.52357575744265e-06, "loss": 1.1909, "step": 11637 }, { "epoch": 0.6362779000314367, "grad_norm": 1.3064603805541992, "learning_rate": 6.521862435616382e-06, "loss": 1.5431, "step": 11638 }, { "epoch": 0.6363325724751582, "grad_norm": 2.3325302600860596, "learning_rate": 6.520149229933292e-06, "loss": 1.2952, "step": 11639 }, { "epoch": 0.6363872449188798, "grad_norm": 1.4769837856292725, "learning_rate": 6.5184361404505795e-06, "loss": 1.6656, "step": 11640 }, { "epoch": 0.6364419173626014, "grad_norm": 1.5416779518127441, "learning_rate": 6.5167231672254595e-06, "loss": 1.5523, "step": 11641 }, { "epoch": 0.6364965898063228, "grad_norm": 1.696611762046814, "learning_rate": 6.5150103103151265e-06, "loss": 1.2677, "step": 11642 }, { "epoch": 0.6365512622500444, "grad_norm": 1.8572841882705688, "learning_rate": 6.513297569776773e-06, "loss": 1.305, "step": 11643 }, { "epoch": 0.636605934693766, "grad_norm": 1.6195790767669678, "learning_rate": 6.511584945667599e-06, "loss": 1.4894, "step": 11644 }, { "epoch": 0.6366606071374875, "grad_norm": 1.7296091318130493, "learning_rate": 6.509872438044789e-06, "loss": 1.0752, "step": 11645 }, { "epoch": 0.6367152795812091, "grad_norm": 1.5593420267105103, "learning_rate": 6.508160046965527e-06, "loss": 1.4401, "step": 11646 }, { "epoch": 0.6367699520249306, "grad_norm": 1.9230599403381348, "learning_rate": 6.506447772486997e-06, "loss": 1.382, "step": 11647 }, { "epoch": 0.6368246244686522, "grad_norm": 1.6530452966690063, "learning_rate": 6.504735614666373e-06, "loss": 1.2856, "step": 11648 }, { "epoch": 0.6368792969123738, "grad_norm": 1.6277917623519897, "learning_rate": 6.503023573560825e-06, "loss": 1.2745, "step": 11649 }, { "epoch": 0.6369339693560953, "grad_norm": 1.3299500942230225, "learning_rate": 6.501311649227531e-06, "loss": 1.5651, "step": 11650 }, { "epoch": 0.6369886417998168, "grad_norm": 1.4072186946868896, "learning_rate": 6.499599841723649e-06, "loss": 1.6007, "step": 11651 }, { "epoch": 0.6370433142435384, "grad_norm": 1.7331252098083496, "learning_rate": 6.4978881511063416e-06, "loss": 1.5672, "step": 11652 }, { "epoch": 0.6370979866872599, "grad_norm": 1.5279566049575806, "learning_rate": 6.4961765774327676e-06, "loss": 1.3482, "step": 11653 }, { "epoch": 0.6371526591309815, "grad_norm": 1.4454492330551147, "learning_rate": 6.4944651207600765e-06, "loss": 1.419, "step": 11654 }, { "epoch": 0.6372073315747031, "grad_norm": 1.524149775505066, "learning_rate": 6.492753781145425e-06, "loss": 1.4561, "step": 11655 }, { "epoch": 0.6372620040184246, "grad_norm": 1.905961275100708, "learning_rate": 6.491042558645955e-06, "loss": 1.4032, "step": 11656 }, { "epoch": 0.6373166764621462, "grad_norm": 1.399012565612793, "learning_rate": 6.489331453318806e-06, "loss": 1.1697, "step": 11657 }, { "epoch": 0.6373713489058678, "grad_norm": 1.5221819877624512, "learning_rate": 6.487620465221118e-06, "loss": 1.4283, "step": 11658 }, { "epoch": 0.6374260213495893, "grad_norm": 1.903069019317627, "learning_rate": 6.485909594410027e-06, "loss": 1.2825, "step": 11659 }, { "epoch": 0.6374806937933108, "grad_norm": 1.2711418867111206, "learning_rate": 6.484198840942656e-06, "loss": 1.4756, "step": 11660 }, { "epoch": 0.6375353662370323, "grad_norm": 1.578291416168213, "learning_rate": 6.4824882048761406e-06, "loss": 1.5276, "step": 11661 }, { "epoch": 0.6375900386807539, "grad_norm": 1.3335801362991333, "learning_rate": 6.480777686267597e-06, "loss": 1.4631, "step": 11662 }, { "epoch": 0.6376447111244755, "grad_norm": 1.2155261039733887, "learning_rate": 6.479067285174141e-06, "loss": 1.4246, "step": 11663 }, { "epoch": 0.637699383568197, "grad_norm": 1.6271926164627075, "learning_rate": 6.477357001652893e-06, "loss": 1.3024, "step": 11664 }, { "epoch": 0.6377540560119186, "grad_norm": 1.7931969165802002, "learning_rate": 6.475646835760963e-06, "loss": 1.2381, "step": 11665 }, { "epoch": 0.6378087284556402, "grad_norm": 1.599593162536621, "learning_rate": 6.4739367875554526e-06, "loss": 1.394, "step": 11666 }, { "epoch": 0.6378634008993617, "grad_norm": 1.7973483800888062, "learning_rate": 6.472226857093468e-06, "loss": 1.4136, "step": 11667 }, { "epoch": 0.6379180733430833, "grad_norm": 1.7099955081939697, "learning_rate": 6.470517044432104e-06, "loss": 1.4583, "step": 11668 }, { "epoch": 0.6379727457868049, "grad_norm": 1.7102463245391846, "learning_rate": 6.468807349628462e-06, "loss": 1.4954, "step": 11669 }, { "epoch": 0.6380274182305263, "grad_norm": 1.3975777626037598, "learning_rate": 6.467097772739628e-06, "loss": 1.4611, "step": 11670 }, { "epoch": 0.6380820906742479, "grad_norm": 1.378888487815857, "learning_rate": 6.4653883138226895e-06, "loss": 1.8533, "step": 11671 }, { "epoch": 0.6381367631179695, "grad_norm": 1.3757044076919556, "learning_rate": 6.46367897293473e-06, "loss": 1.5437, "step": 11672 }, { "epoch": 0.638191435561691, "grad_norm": 1.5986683368682861, "learning_rate": 6.461969750132827e-06, "loss": 1.5085, "step": 11673 }, { "epoch": 0.6382461080054126, "grad_norm": 1.5627144575119019, "learning_rate": 6.460260645474054e-06, "loss": 1.5332, "step": 11674 }, { "epoch": 0.6383007804491341, "grad_norm": 1.6559416055679321, "learning_rate": 6.458551659015486e-06, "loss": 1.3941, "step": 11675 }, { "epoch": 0.6383554528928557, "grad_norm": 1.317609190940857, "learning_rate": 6.45684279081419e-06, "loss": 1.1983, "step": 11676 }, { "epoch": 0.6384101253365773, "grad_norm": 1.512351632118225, "learning_rate": 6.455134040927227e-06, "loss": 1.4493, "step": 11677 }, { "epoch": 0.6384647977802987, "grad_norm": 1.8795058727264404, "learning_rate": 6.4534254094116555e-06, "loss": 1.371, "step": 11678 }, { "epoch": 0.6385194702240203, "grad_norm": 1.2461366653442383, "learning_rate": 6.451716896324534e-06, "loss": 1.4254, "step": 11679 }, { "epoch": 0.6385741426677419, "grad_norm": 1.2042109966278076, "learning_rate": 6.4500085017229065e-06, "loss": 1.5853, "step": 11680 }, { "epoch": 0.6386288151114634, "grad_norm": 1.0985499620437622, "learning_rate": 6.448300225663831e-06, "loss": 1.604, "step": 11681 }, { "epoch": 0.638683487555185, "grad_norm": 1.4025988578796387, "learning_rate": 6.446592068204341e-06, "loss": 1.5278, "step": 11682 }, { "epoch": 0.6387381599989066, "grad_norm": 1.3394279479980469, "learning_rate": 6.444884029401483e-06, "loss": 1.4042, "step": 11683 }, { "epoch": 0.6387928324426281, "grad_norm": 1.3387092351913452, "learning_rate": 6.44317610931229e-06, "loss": 1.2643, "step": 11684 }, { "epoch": 0.6388475048863497, "grad_norm": 1.9055105447769165, "learning_rate": 6.44146830799379e-06, "loss": 1.5329, "step": 11685 }, { "epoch": 0.6389021773300713, "grad_norm": 1.0862412452697754, "learning_rate": 6.439760625503018e-06, "loss": 1.3169, "step": 11686 }, { "epoch": 0.6389568497737927, "grad_norm": 1.7956229448318481, "learning_rate": 6.438053061896992e-06, "loss": 1.6649, "step": 11687 }, { "epoch": 0.6390115222175143, "grad_norm": 1.386715054512024, "learning_rate": 6.436345617232728e-06, "loss": 1.602, "step": 11688 }, { "epoch": 0.6390661946612358, "grad_norm": 1.6446586847305298, "learning_rate": 6.43463829156725e-06, "loss": 1.4788, "step": 11689 }, { "epoch": 0.6391208671049574, "grad_norm": 1.9004167318344116, "learning_rate": 6.432931084957567e-06, "loss": 1.2643, "step": 11690 }, { "epoch": 0.639175539548679, "grad_norm": 1.4454405307769775, "learning_rate": 6.431223997460683e-06, "loss": 1.5407, "step": 11691 }, { "epoch": 0.6392302119924005, "grad_norm": 1.8865247964859009, "learning_rate": 6.429517029133605e-06, "loss": 1.3751, "step": 11692 }, { "epoch": 0.6392848844361221, "grad_norm": 1.3473360538482666, "learning_rate": 6.427810180033334e-06, "loss": 1.375, "step": 11693 }, { "epoch": 0.6393395568798437, "grad_norm": 1.3761602640151978, "learning_rate": 6.426103450216857e-06, "loss": 1.4934, "step": 11694 }, { "epoch": 0.6393942293235652, "grad_norm": 2.028175115585327, "learning_rate": 6.424396839741178e-06, "loss": 1.1366, "step": 11695 }, { "epoch": 0.6394489017672867, "grad_norm": 1.6654365062713623, "learning_rate": 6.422690348663276e-06, "loss": 1.3627, "step": 11696 }, { "epoch": 0.6395035742110083, "grad_norm": 1.352071762084961, "learning_rate": 6.420983977040141e-06, "loss": 1.3732, "step": 11697 }, { "epoch": 0.6395582466547298, "grad_norm": 1.5928807258605957, "learning_rate": 6.419277724928748e-06, "loss": 1.2852, "step": 11698 }, { "epoch": 0.6396129190984514, "grad_norm": 1.612067461013794, "learning_rate": 6.417571592386071e-06, "loss": 1.3999, "step": 11699 }, { "epoch": 0.639667591542173, "grad_norm": 1.2385815382003784, "learning_rate": 6.415865579469089e-06, "loss": 1.3159, "step": 11700 }, { "epoch": 0.6397222639858945, "grad_norm": 1.4118647575378418, "learning_rate": 6.4141596862347645e-06, "loss": 1.5485, "step": 11701 }, { "epoch": 0.6397769364296161, "grad_norm": 1.4528629779815674, "learning_rate": 6.41245391274006e-06, "loss": 1.3809, "step": 11702 }, { "epoch": 0.6398316088733376, "grad_norm": 1.4622374773025513, "learning_rate": 6.410748259041941e-06, "loss": 1.3541, "step": 11703 }, { "epoch": 0.6398862813170592, "grad_norm": 1.4514764547348022, "learning_rate": 6.409042725197361e-06, "loss": 1.4338, "step": 11704 }, { "epoch": 0.6399409537607808, "grad_norm": 1.647179126739502, "learning_rate": 6.407337311263269e-06, "loss": 1.4278, "step": 11705 }, { "epoch": 0.6399956262045022, "grad_norm": 1.3552486896514893, "learning_rate": 6.4056320172966145e-06, "loss": 1.4329, "step": 11706 }, { "epoch": 0.6400502986482238, "grad_norm": 1.4224910736083984, "learning_rate": 6.4039268433543425e-06, "loss": 1.3784, "step": 11707 }, { "epoch": 0.6401049710919454, "grad_norm": 1.6919952630996704, "learning_rate": 6.402221789493388e-06, "loss": 1.1852, "step": 11708 }, { "epoch": 0.6401596435356669, "grad_norm": 1.6876736879348755, "learning_rate": 6.400516855770694e-06, "loss": 1.4285, "step": 11709 }, { "epoch": 0.6402143159793885, "grad_norm": 1.3456135988235474, "learning_rate": 6.398812042243187e-06, "loss": 1.4537, "step": 11710 }, { "epoch": 0.6402689884231101, "grad_norm": 1.5438064336776733, "learning_rate": 6.3971073489678e-06, "loss": 1.3543, "step": 11711 }, { "epoch": 0.6403236608668316, "grad_norm": 1.3967875242233276, "learning_rate": 6.395402776001449e-06, "loss": 1.4246, "step": 11712 }, { "epoch": 0.6403783333105532, "grad_norm": 1.4360809326171875, "learning_rate": 6.393698323401056e-06, "loss": 1.4402, "step": 11713 }, { "epoch": 0.6404330057542748, "grad_norm": 1.2234156131744385, "learning_rate": 6.391993991223544e-06, "loss": 1.3794, "step": 11714 }, { "epoch": 0.6404876781979962, "grad_norm": 1.821388840675354, "learning_rate": 6.390289779525818e-06, "loss": 1.3286, "step": 11715 }, { "epoch": 0.6405423506417178, "grad_norm": 1.3550622463226318, "learning_rate": 6.388585688364783e-06, "loss": 1.3286, "step": 11716 }, { "epoch": 0.6405970230854393, "grad_norm": 1.571065902709961, "learning_rate": 6.3868817177973505e-06, "loss": 1.3291, "step": 11717 }, { "epoch": 0.6406516955291609, "grad_norm": 1.6248079538345337, "learning_rate": 6.385177867880414e-06, "loss": 1.4564, "step": 11718 }, { "epoch": 0.6407063679728825, "grad_norm": 1.4701563119888306, "learning_rate": 6.383474138670869e-06, "loss": 1.7503, "step": 11719 }, { "epoch": 0.640761040416604, "grad_norm": 1.8519749641418457, "learning_rate": 6.381770530225611e-06, "loss": 1.3486, "step": 11720 }, { "epoch": 0.6408157128603256, "grad_norm": 1.416090965270996, "learning_rate": 6.380067042601526e-06, "loss": 1.4084, "step": 11721 }, { "epoch": 0.6408703853040472, "grad_norm": 1.3601486682891846, "learning_rate": 6.378363675855494e-06, "loss": 1.3809, "step": 11722 }, { "epoch": 0.6409250577477686, "grad_norm": 1.736627221107483, "learning_rate": 6.3766604300444e-06, "loss": 1.5227, "step": 11723 }, { "epoch": 0.6409797301914902, "grad_norm": 1.7156585454940796, "learning_rate": 6.3749573052251155e-06, "loss": 1.6671, "step": 11724 }, { "epoch": 0.6410344026352118, "grad_norm": 1.6100198030471802, "learning_rate": 6.373254301454514e-06, "loss": 1.2936, "step": 11725 }, { "epoch": 0.6410890750789333, "grad_norm": 2.057802438735962, "learning_rate": 6.371551418789463e-06, "loss": 1.2478, "step": 11726 }, { "epoch": 0.6411437475226549, "grad_norm": 1.6810137033462524, "learning_rate": 6.36984865728682e-06, "loss": 1.3928, "step": 11727 }, { "epoch": 0.6411984199663765, "grad_norm": 1.2375749349594116, "learning_rate": 6.368146017003454e-06, "loss": 1.3911, "step": 11728 }, { "epoch": 0.641253092410098, "grad_norm": 1.5615828037261963, "learning_rate": 6.366443497996213e-06, "loss": 1.783, "step": 11729 }, { "epoch": 0.6413077648538196, "grad_norm": 1.4756555557250977, "learning_rate": 6.3647411003219486e-06, "loss": 1.3356, "step": 11730 }, { "epoch": 0.641362437297541, "grad_norm": 1.4194523096084595, "learning_rate": 6.363038824037511e-06, "loss": 1.27, "step": 11731 }, { "epoch": 0.6414171097412626, "grad_norm": 1.364717721939087, "learning_rate": 6.3613366691997426e-06, "loss": 1.6889, "step": 11732 }, { "epoch": 0.6414717821849842, "grad_norm": 1.3946610689163208, "learning_rate": 6.359634635865476e-06, "loss": 1.4337, "step": 11733 }, { "epoch": 0.6415264546287057, "grad_norm": 1.9501056671142578, "learning_rate": 6.357932724091555e-06, "loss": 1.2927, "step": 11734 }, { "epoch": 0.6415811270724273, "grad_norm": 1.4998077154159546, "learning_rate": 6.356230933934808e-06, "loss": 1.5086, "step": 11735 }, { "epoch": 0.6416357995161489, "grad_norm": 1.2614003419876099, "learning_rate": 6.354529265452059e-06, "loss": 1.394, "step": 11736 }, { "epoch": 0.6416904719598704, "grad_norm": 1.4562921524047852, "learning_rate": 6.3528277187001315e-06, "loss": 1.4261, "step": 11737 }, { "epoch": 0.641745144403592, "grad_norm": 1.396529197692871, "learning_rate": 6.351126293735843e-06, "loss": 1.434, "step": 11738 }, { "epoch": 0.6417998168473136, "grad_norm": 1.5959640741348267, "learning_rate": 6.349424990616013e-06, "loss": 1.3846, "step": 11739 }, { "epoch": 0.6418544892910351, "grad_norm": 1.2232110500335693, "learning_rate": 6.34772380939745e-06, "loss": 1.3079, "step": 11740 }, { "epoch": 0.6419091617347567, "grad_norm": 1.3595080375671387, "learning_rate": 6.346022750136956e-06, "loss": 1.6093, "step": 11741 }, { "epoch": 0.6419638341784782, "grad_norm": 1.3717924356460571, "learning_rate": 6.34432181289134e-06, "loss": 1.405, "step": 11742 }, { "epoch": 0.6420185066221997, "grad_norm": 1.3135243654251099, "learning_rate": 6.342620997717397e-06, "loss": 1.4551, "step": 11743 }, { "epoch": 0.6420731790659213, "grad_norm": 1.119202971458435, "learning_rate": 6.340920304671916e-06, "loss": 1.3451, "step": 11744 }, { "epoch": 0.6421278515096428, "grad_norm": 1.4867793321609497, "learning_rate": 6.339219733811697e-06, "loss": 1.4956, "step": 11745 }, { "epoch": 0.6421825239533644, "grad_norm": 1.4177581071853638, "learning_rate": 6.337519285193521e-06, "loss": 1.4811, "step": 11746 }, { "epoch": 0.642237196397086, "grad_norm": 1.7057557106018066, "learning_rate": 6.335818958874167e-06, "loss": 1.4636, "step": 11747 }, { "epoch": 0.6422918688408075, "grad_norm": 1.407886028289795, "learning_rate": 6.334118754910419e-06, "loss": 1.3668, "step": 11748 }, { "epoch": 0.6423465412845291, "grad_norm": 1.425889492034912, "learning_rate": 6.332418673359049e-06, "loss": 1.5785, "step": 11749 }, { "epoch": 0.6424012137282507, "grad_norm": 1.4069045782089233, "learning_rate": 6.330718714276823e-06, "loss": 1.443, "step": 11750 }, { "epoch": 0.6424558861719721, "grad_norm": 1.6750997304916382, "learning_rate": 6.329018877720512e-06, "loss": 1.337, "step": 11751 }, { "epoch": 0.6425105586156937, "grad_norm": 1.3780840635299683, "learning_rate": 6.327319163746871e-06, "loss": 1.5936, "step": 11752 }, { "epoch": 0.6425652310594153, "grad_norm": 1.9152836799621582, "learning_rate": 6.325619572412665e-06, "loss": 1.3753, "step": 11753 }, { "epoch": 0.6426199035031368, "grad_norm": 1.446406602859497, "learning_rate": 6.323920103774644e-06, "loss": 1.2373, "step": 11754 }, { "epoch": 0.6426745759468584, "grad_norm": 1.5818166732788086, "learning_rate": 6.322220757889555e-06, "loss": 1.427, "step": 11755 }, { "epoch": 0.64272924839058, "grad_norm": 1.3115711212158203, "learning_rate": 6.320521534814147e-06, "loss": 1.4235, "step": 11756 }, { "epoch": 0.6427839208343015, "grad_norm": 1.3782869577407837, "learning_rate": 6.318822434605159e-06, "loss": 1.4074, "step": 11757 }, { "epoch": 0.6428385932780231, "grad_norm": 1.555707335472107, "learning_rate": 6.317123457319323e-06, "loss": 1.3887, "step": 11758 }, { "epoch": 0.6428932657217445, "grad_norm": 1.4864524602890015, "learning_rate": 6.315424603013382e-06, "loss": 1.4253, "step": 11759 }, { "epoch": 0.6429479381654661, "grad_norm": 1.4004859924316406, "learning_rate": 6.3137258717440606e-06, "loss": 1.3835, "step": 11760 }, { "epoch": 0.6430026106091877, "grad_norm": 1.2477397918701172, "learning_rate": 6.312027263568079e-06, "loss": 1.3515, "step": 11761 }, { "epoch": 0.6430572830529092, "grad_norm": 1.345450758934021, "learning_rate": 6.310328778542163e-06, "loss": 1.4839, "step": 11762 }, { "epoch": 0.6431119554966308, "grad_norm": 1.2809728384017944, "learning_rate": 6.3086304167230284e-06, "loss": 1.4315, "step": 11763 }, { "epoch": 0.6431666279403524, "grad_norm": 1.6504143476486206, "learning_rate": 6.306932178167382e-06, "loss": 1.4175, "step": 11764 }, { "epoch": 0.6432213003840739, "grad_norm": 1.7708908319473267, "learning_rate": 6.30523406293194e-06, "loss": 1.385, "step": 11765 }, { "epoch": 0.6432759728277955, "grad_norm": 1.4214441776275635, "learning_rate": 6.303536071073397e-06, "loss": 1.5751, "step": 11766 }, { "epoch": 0.6433306452715171, "grad_norm": 1.435289740562439, "learning_rate": 6.3018382026484645e-06, "loss": 1.4872, "step": 11767 }, { "epoch": 0.6433853177152385, "grad_norm": 2.4412078857421875, "learning_rate": 6.3001404577138325e-06, "loss": 1.2412, "step": 11768 }, { "epoch": 0.6434399901589601, "grad_norm": 1.8990353345870972, "learning_rate": 6.29844283632619e-06, "loss": 1.1723, "step": 11769 }, { "epoch": 0.6434946626026817, "grad_norm": 1.7171145677566528, "learning_rate": 6.296745338542229e-06, "loss": 1.3791, "step": 11770 }, { "epoch": 0.6435493350464032, "grad_norm": 1.422669529914856, "learning_rate": 6.295047964418632e-06, "loss": 1.5483, "step": 11771 }, { "epoch": 0.6436040074901248, "grad_norm": 1.3285759687423706, "learning_rate": 6.293350714012073e-06, "loss": 1.4425, "step": 11772 }, { "epoch": 0.6436586799338463, "grad_norm": 1.670052409172058, "learning_rate": 6.291653587379236e-06, "loss": 1.4802, "step": 11773 }, { "epoch": 0.6437133523775679, "grad_norm": 1.168012261390686, "learning_rate": 6.289956584576786e-06, "loss": 1.7781, "step": 11774 }, { "epoch": 0.6437680248212895, "grad_norm": 1.7583467960357666, "learning_rate": 6.288259705661391e-06, "loss": 1.4513, "step": 11775 }, { "epoch": 0.643822697265011, "grad_norm": 1.4063204526901245, "learning_rate": 6.286562950689717e-06, "loss": 1.3178, "step": 11776 }, { "epoch": 0.6438773697087325, "grad_norm": 1.4066462516784668, "learning_rate": 6.284866319718418e-06, "loss": 1.4195, "step": 11777 }, { "epoch": 0.6439320421524541, "grad_norm": 1.588417410850525, "learning_rate": 6.283169812804146e-06, "loss": 1.1828, "step": 11778 }, { "epoch": 0.6439867145961756, "grad_norm": 1.5870550870895386, "learning_rate": 6.281473430003562e-06, "loss": 1.3955, "step": 11779 }, { "epoch": 0.6440413870398972, "grad_norm": 1.3099563121795654, "learning_rate": 6.2797771713733025e-06, "loss": 1.214, "step": 11780 }, { "epoch": 0.6440960594836188, "grad_norm": 1.53079354763031, "learning_rate": 6.2780810369700165e-06, "loss": 1.5383, "step": 11781 }, { "epoch": 0.6441507319273403, "grad_norm": 1.6687623262405396, "learning_rate": 6.276385026850337e-06, "loss": 1.2885, "step": 11782 }, { "epoch": 0.6442054043710619, "grad_norm": 1.8632359504699707, "learning_rate": 6.2746891410708955e-06, "loss": 1.1729, "step": 11783 }, { "epoch": 0.6442600768147835, "grad_norm": 1.1513638496398926, "learning_rate": 6.272993379688329e-06, "loss": 1.4576, "step": 11784 }, { "epoch": 0.644314749258505, "grad_norm": 1.2617729902267456, "learning_rate": 6.271297742759259e-06, "loss": 1.5087, "step": 11785 }, { "epoch": 0.6443694217022266, "grad_norm": 1.7914074659347534, "learning_rate": 6.269602230340305e-06, "loss": 1.3182, "step": 11786 }, { "epoch": 0.644424094145948, "grad_norm": 1.7837468385696411, "learning_rate": 6.267906842488088e-06, "loss": 1.2889, "step": 11787 }, { "epoch": 0.6444787665896696, "grad_norm": 1.8559623956680298, "learning_rate": 6.26621157925922e-06, "loss": 1.2976, "step": 11788 }, { "epoch": 0.6445334390333912, "grad_norm": 1.4725741147994995, "learning_rate": 6.264516440710308e-06, "loss": 1.6989, "step": 11789 }, { "epoch": 0.6445881114771127, "grad_norm": 1.3701618909835815, "learning_rate": 6.262821426897958e-06, "loss": 1.474, "step": 11790 }, { "epoch": 0.6446427839208343, "grad_norm": 1.3223912715911865, "learning_rate": 6.261126537878771e-06, "loss": 1.5677, "step": 11791 }, { "epoch": 0.6446974563645559, "grad_norm": 1.7198219299316406, "learning_rate": 6.259431773709338e-06, "loss": 1.4254, "step": 11792 }, { "epoch": 0.6447521288082774, "grad_norm": 1.9824438095092773, "learning_rate": 6.25773713444626e-06, "loss": 1.4147, "step": 11793 }, { "epoch": 0.644806801251999, "grad_norm": 1.3244247436523438, "learning_rate": 6.256042620146119e-06, "loss": 1.4855, "step": 11794 }, { "epoch": 0.6448614736957206, "grad_norm": 1.8947123289108276, "learning_rate": 6.254348230865501e-06, "loss": 1.2732, "step": 11795 }, { "epoch": 0.644916146139442, "grad_norm": 1.678001046180725, "learning_rate": 6.252653966660987e-06, "loss": 1.1201, "step": 11796 }, { "epoch": 0.6449708185831636, "grad_norm": 1.4280502796173096, "learning_rate": 6.2509598275891445e-06, "loss": 1.4637, "step": 11797 }, { "epoch": 0.6450254910268852, "grad_norm": 1.3790699243545532, "learning_rate": 6.249265813706555e-06, "loss": 1.4117, "step": 11798 }, { "epoch": 0.6450801634706067, "grad_norm": 1.695560097694397, "learning_rate": 6.247571925069782e-06, "loss": 1.568, "step": 11799 }, { "epoch": 0.6451348359143283, "grad_norm": 1.4835132360458374, "learning_rate": 6.245878161735386e-06, "loss": 1.4282, "step": 11800 }, { "epoch": 0.6451895083580498, "grad_norm": 2.0568859577178955, "learning_rate": 6.2441845237599285e-06, "loss": 1.2612, "step": 11801 }, { "epoch": 0.6452441808017714, "grad_norm": 1.7834218740463257, "learning_rate": 6.242491011199964e-06, "loss": 1.5135, "step": 11802 }, { "epoch": 0.645298853245493, "grad_norm": 1.625616192817688, "learning_rate": 6.240797624112037e-06, "loss": 1.3697, "step": 11803 }, { "epoch": 0.6453535256892144, "grad_norm": 2.04061222076416, "learning_rate": 6.239104362552704e-06, "loss": 1.1799, "step": 11804 }, { "epoch": 0.645408198132936, "grad_norm": 1.6229445934295654, "learning_rate": 6.2374112265785e-06, "loss": 1.5774, "step": 11805 }, { "epoch": 0.6454628705766576, "grad_norm": 1.55088472366333, "learning_rate": 6.235718216245961e-06, "loss": 1.7552, "step": 11806 }, { "epoch": 0.6455175430203791, "grad_norm": 1.536487340927124, "learning_rate": 6.2340253316116286e-06, "loss": 1.2726, "step": 11807 }, { "epoch": 0.6455722154641007, "grad_norm": 1.6769137382507324, "learning_rate": 6.232332572732025e-06, "loss": 1.52, "step": 11808 }, { "epoch": 0.6456268879078223, "grad_norm": 1.777392029762268, "learning_rate": 6.230639939663679e-06, "loss": 1.5251, "step": 11809 }, { "epoch": 0.6456815603515438, "grad_norm": 1.9169120788574219, "learning_rate": 6.228947432463112e-06, "loss": 1.36, "step": 11810 }, { "epoch": 0.6457362327952654, "grad_norm": 1.8698647022247314, "learning_rate": 6.227255051186834e-06, "loss": 1.3292, "step": 11811 }, { "epoch": 0.645790905238987, "grad_norm": 1.817751169204712, "learning_rate": 6.2255627958913675e-06, "loss": 1.4461, "step": 11812 }, { "epoch": 0.6458455776827084, "grad_norm": 1.7576758861541748, "learning_rate": 6.223870666633216e-06, "loss": 1.2782, "step": 11813 }, { "epoch": 0.64590025012643, "grad_norm": 1.4173429012298584, "learning_rate": 6.222178663468883e-06, "loss": 1.3024, "step": 11814 }, { "epoch": 0.6459549225701515, "grad_norm": 1.78276526927948, "learning_rate": 6.22048678645487e-06, "loss": 1.4059, "step": 11815 }, { "epoch": 0.6460095950138731, "grad_norm": 1.5021231174468994, "learning_rate": 6.218795035647672e-06, "loss": 1.3058, "step": 11816 }, { "epoch": 0.6460642674575947, "grad_norm": 1.4375779628753662, "learning_rate": 6.2171034111037776e-06, "loss": 1.5811, "step": 11817 }, { "epoch": 0.6461189399013162, "grad_norm": 1.5563243627548218, "learning_rate": 6.215411912879681e-06, "loss": 1.4463, "step": 11818 }, { "epoch": 0.6461736123450378, "grad_norm": 1.8854469060897827, "learning_rate": 6.2137205410318605e-06, "loss": 1.222, "step": 11819 }, { "epoch": 0.6462282847887594, "grad_norm": 1.3828797340393066, "learning_rate": 6.212029295616795e-06, "loss": 1.4086, "step": 11820 }, { "epoch": 0.6462829572324809, "grad_norm": 1.4603755474090576, "learning_rate": 6.210338176690962e-06, "loss": 1.4056, "step": 11821 }, { "epoch": 0.6463376296762025, "grad_norm": 1.2513970136642456, "learning_rate": 6.208647184310826e-06, "loss": 1.5927, "step": 11822 }, { "epoch": 0.646392302119924, "grad_norm": 1.657896876335144, "learning_rate": 6.206956318532859e-06, "loss": 1.279, "step": 11823 }, { "epoch": 0.6464469745636455, "grad_norm": 1.5170204639434814, "learning_rate": 6.205265579413524e-06, "loss": 1.3416, "step": 11824 }, { "epoch": 0.6465016470073671, "grad_norm": 1.6880728006362915, "learning_rate": 6.203574967009271e-06, "loss": 1.4874, "step": 11825 }, { "epoch": 0.6465563194510887, "grad_norm": 1.9018152952194214, "learning_rate": 6.2018844813765635e-06, "loss": 1.2422, "step": 11826 }, { "epoch": 0.6466109918948102, "grad_norm": 1.5066596269607544, "learning_rate": 6.200194122571843e-06, "loss": 1.7267, "step": 11827 }, { "epoch": 0.6466656643385318, "grad_norm": 1.4988828897476196, "learning_rate": 6.198503890651557e-06, "loss": 1.3614, "step": 11828 }, { "epoch": 0.6467203367822534, "grad_norm": 1.3483457565307617, "learning_rate": 6.196813785672149e-06, "loss": 1.5258, "step": 11829 }, { "epoch": 0.6467750092259749, "grad_norm": 1.5362169742584229, "learning_rate": 6.195123807690053e-06, "loss": 1.5169, "step": 11830 }, { "epoch": 0.6468296816696965, "grad_norm": 1.800097942352295, "learning_rate": 6.193433956761697e-06, "loss": 1.3365, "step": 11831 }, { "epoch": 0.6468843541134179, "grad_norm": 1.316272258758545, "learning_rate": 6.1917442329435175e-06, "loss": 1.5032, "step": 11832 }, { "epoch": 0.6469390265571395, "grad_norm": 1.6258978843688965, "learning_rate": 6.190054636291935e-06, "loss": 1.4534, "step": 11833 }, { "epoch": 0.6469936990008611, "grad_norm": 2.5958988666534424, "learning_rate": 6.188365166863366e-06, "loss": 1.1872, "step": 11834 }, { "epoch": 0.6470483714445826, "grad_norm": 1.532740592956543, "learning_rate": 6.18667582471423e-06, "loss": 1.4295, "step": 11835 }, { "epoch": 0.6471030438883042, "grad_norm": 1.9698147773742676, "learning_rate": 6.184986609900934e-06, "loss": 1.4464, "step": 11836 }, { "epoch": 0.6471577163320258, "grad_norm": 1.4920932054519653, "learning_rate": 6.18329752247989e-06, "loss": 1.3768, "step": 11837 }, { "epoch": 0.6472123887757473, "grad_norm": 1.5735349655151367, "learning_rate": 6.181608562507497e-06, "loss": 1.4171, "step": 11838 }, { "epoch": 0.6472670612194689, "grad_norm": 1.6518940925598145, "learning_rate": 6.179919730040154e-06, "loss": 1.4618, "step": 11839 }, { "epoch": 0.6473217336631905, "grad_norm": 1.5363473892211914, "learning_rate": 6.178231025134256e-06, "loss": 1.3813, "step": 11840 }, { "epoch": 0.6473764061069119, "grad_norm": 1.1413565874099731, "learning_rate": 6.176542447846193e-06, "loss": 1.5691, "step": 11841 }, { "epoch": 0.6474310785506335, "grad_norm": 1.752362608909607, "learning_rate": 6.174853998232346e-06, "loss": 1.3634, "step": 11842 }, { "epoch": 0.6474857509943551, "grad_norm": 1.5757791996002197, "learning_rate": 6.173165676349103e-06, "loss": 1.3746, "step": 11843 }, { "epoch": 0.6475404234380766, "grad_norm": 1.5804468393325806, "learning_rate": 6.171477482252839e-06, "loss": 1.3313, "step": 11844 }, { "epoch": 0.6475950958817982, "grad_norm": 1.4589723348617554, "learning_rate": 6.169789415999921e-06, "loss": 1.5642, "step": 11845 }, { "epoch": 0.6476497683255197, "grad_norm": 1.5314565896987915, "learning_rate": 6.168101477646726e-06, "loss": 1.3539, "step": 11846 }, { "epoch": 0.6477044407692413, "grad_norm": 1.548403024673462, "learning_rate": 6.166413667249615e-06, "loss": 1.4776, "step": 11847 }, { "epoch": 0.6477591132129629, "grad_norm": 1.8409345149993896, "learning_rate": 6.164725984864947e-06, "loss": 1.2567, "step": 11848 }, { "epoch": 0.6478137856566843, "grad_norm": 1.5391865968704224, "learning_rate": 6.163038430549077e-06, "loss": 1.3182, "step": 11849 }, { "epoch": 0.6478684581004059, "grad_norm": 1.3401139974594116, "learning_rate": 6.16135100435836e-06, "loss": 1.34, "step": 11850 }, { "epoch": 0.6479231305441275, "grad_norm": 1.414918065071106, "learning_rate": 6.159663706349136e-06, "loss": 1.4206, "step": 11851 }, { "epoch": 0.647977802987849, "grad_norm": 1.3705183267593384, "learning_rate": 6.157976536577757e-06, "loss": 1.3509, "step": 11852 }, { "epoch": 0.6480324754315706, "grad_norm": 1.6052770614624023, "learning_rate": 6.156289495100553e-06, "loss": 1.7133, "step": 11853 }, { "epoch": 0.6480871478752922, "grad_norm": 1.9029974937438965, "learning_rate": 6.154602581973865e-06, "loss": 1.211, "step": 11854 }, { "epoch": 0.6481418203190137, "grad_norm": 1.432786464691162, "learning_rate": 6.152915797254022e-06, "loss": 1.4535, "step": 11855 }, { "epoch": 0.6481964927627353, "grad_norm": 1.8388527631759644, "learning_rate": 6.151229140997343e-06, "loss": 1.4015, "step": 11856 }, { "epoch": 0.6482511652064569, "grad_norm": 1.456394076347351, "learning_rate": 6.149542613260157e-06, "loss": 1.2843, "step": 11857 }, { "epoch": 0.6483058376501784, "grad_norm": 1.674798607826233, "learning_rate": 6.147856214098781e-06, "loss": 1.3566, "step": 11858 }, { "epoch": 0.6483605100938999, "grad_norm": 1.5468641519546509, "learning_rate": 6.146169943569522e-06, "loss": 1.332, "step": 11859 }, { "epoch": 0.6484151825376214, "grad_norm": 1.3338901996612549, "learning_rate": 6.144483801728693e-06, "loss": 1.25, "step": 11860 }, { "epoch": 0.648469854981343, "grad_norm": 1.5648499727249146, "learning_rate": 6.1427977886326e-06, "loss": 1.5388, "step": 11861 }, { "epoch": 0.6485245274250646, "grad_norm": 1.3418759107589722, "learning_rate": 6.141111904337534e-06, "loss": 1.4274, "step": 11862 }, { "epoch": 0.6485791998687861, "grad_norm": 1.3702094554901123, "learning_rate": 6.139426148899801e-06, "loss": 1.3355, "step": 11863 }, { "epoch": 0.6486338723125077, "grad_norm": 1.4240065813064575, "learning_rate": 6.137740522375687e-06, "loss": 1.3844, "step": 11864 }, { "epoch": 0.6486885447562293, "grad_norm": 1.6844758987426758, "learning_rate": 6.136055024821477e-06, "loss": 1.4578, "step": 11865 }, { "epoch": 0.6487432171999508, "grad_norm": 1.4419972896575928, "learning_rate": 6.134369656293461e-06, "loss": 1.525, "step": 11866 }, { "epoch": 0.6487978896436724, "grad_norm": 1.9979888200759888, "learning_rate": 6.1326844168479104e-06, "loss": 1.3331, "step": 11867 }, { "epoch": 0.648852562087394, "grad_norm": 1.4232821464538574, "learning_rate": 6.130999306541104e-06, "loss": 1.375, "step": 11868 }, { "epoch": 0.6489072345311154, "grad_norm": 2.0947682857513428, "learning_rate": 6.129314325429311e-06, "loss": 1.1247, "step": 11869 }, { "epoch": 0.648961906974837, "grad_norm": 2.3770604133605957, "learning_rate": 6.12762947356879e-06, "loss": 1.2996, "step": 11870 }, { "epoch": 0.6490165794185586, "grad_norm": 1.5905689001083374, "learning_rate": 6.1259447510158136e-06, "loss": 1.2586, "step": 11871 }, { "epoch": 0.6490712518622801, "grad_norm": 1.4158105850219727, "learning_rate": 6.124260157826631e-06, "loss": 1.4735, "step": 11872 }, { "epoch": 0.6491259243060017, "grad_norm": 1.443447470664978, "learning_rate": 6.122575694057495e-06, "loss": 1.3204, "step": 11873 }, { "epoch": 0.6491805967497232, "grad_norm": 1.41665518283844, "learning_rate": 6.120891359764655e-06, "loss": 1.4467, "step": 11874 }, { "epoch": 0.6492352691934448, "grad_norm": 1.300585150718689, "learning_rate": 6.1192071550043584e-06, "loss": 1.6218, "step": 11875 }, { "epoch": 0.6492899416371664, "grad_norm": 1.5837560892105103, "learning_rate": 6.1175230798328365e-06, "loss": 1.4508, "step": 11876 }, { "epoch": 0.6493446140808878, "grad_norm": 1.514548897743225, "learning_rate": 6.1158391343063335e-06, "loss": 1.4006, "step": 11877 }, { "epoch": 0.6493992865246094, "grad_norm": 2.3050856590270996, "learning_rate": 6.114155318481076e-06, "loss": 1.2092, "step": 11878 }, { "epoch": 0.649453958968331, "grad_norm": 1.6483783721923828, "learning_rate": 6.1124716324132885e-06, "loss": 1.2602, "step": 11879 }, { "epoch": 0.6495086314120525, "grad_norm": 1.7207310199737549, "learning_rate": 6.110788076159198e-06, "loss": 1.2512, "step": 11880 }, { "epoch": 0.6495633038557741, "grad_norm": 1.4896210432052612, "learning_rate": 6.109104649775016e-06, "loss": 1.3991, "step": 11881 }, { "epoch": 0.6496179762994957, "grad_norm": 1.627722144126892, "learning_rate": 6.107421353316965e-06, "loss": 1.3008, "step": 11882 }, { "epoch": 0.6496726487432172, "grad_norm": 1.2658908367156982, "learning_rate": 6.105738186841248e-06, "loss": 1.4566, "step": 11883 }, { "epoch": 0.6497273211869388, "grad_norm": 1.965219259262085, "learning_rate": 6.104055150404067e-06, "loss": 1.3734, "step": 11884 }, { "epoch": 0.6497819936306604, "grad_norm": 1.7664871215820312, "learning_rate": 6.102372244061631e-06, "loss": 1.5376, "step": 11885 }, { "epoch": 0.6498366660743818, "grad_norm": 1.4904072284698486, "learning_rate": 6.1006894678701314e-06, "loss": 1.577, "step": 11886 }, { "epoch": 0.6498913385181034, "grad_norm": 1.2345103025436401, "learning_rate": 6.099006821885758e-06, "loss": 1.6924, "step": 11887 }, { "epoch": 0.6499460109618249, "grad_norm": 1.8501098155975342, "learning_rate": 6.097324306164705e-06, "loss": 1.4349, "step": 11888 }, { "epoch": 0.6500006834055465, "grad_norm": 1.7280815839767456, "learning_rate": 6.095641920763149e-06, "loss": 1.5197, "step": 11889 }, { "epoch": 0.6500553558492681, "grad_norm": 1.3750699758529663, "learning_rate": 6.093959665737268e-06, "loss": 1.4732, "step": 11890 }, { "epoch": 0.6501100282929896, "grad_norm": 1.437949538230896, "learning_rate": 6.092277541143243e-06, "loss": 1.5528, "step": 11891 }, { "epoch": 0.6501647007367112, "grad_norm": 1.623184084892273, "learning_rate": 6.090595547037242e-06, "loss": 1.3589, "step": 11892 }, { "epoch": 0.6502193731804328, "grad_norm": 1.6447793245315552, "learning_rate": 6.088913683475427e-06, "loss": 1.4848, "step": 11893 }, { "epoch": 0.6502740456241543, "grad_norm": 1.408897042274475, "learning_rate": 6.0872319505139635e-06, "loss": 1.3075, "step": 11894 }, { "epoch": 0.6503287180678758, "grad_norm": 1.493553876876831, "learning_rate": 6.0855503482090025e-06, "loss": 1.5924, "step": 11895 }, { "epoch": 0.6503833905115974, "grad_norm": 1.5857387781143188, "learning_rate": 6.083868876616706e-06, "loss": 1.3467, "step": 11896 }, { "epoch": 0.6504380629553189, "grad_norm": 1.6179704666137695, "learning_rate": 6.082187535793216e-06, "loss": 1.4515, "step": 11897 }, { "epoch": 0.6504927353990405, "grad_norm": 1.4088605642318726, "learning_rate": 6.080506325794675e-06, "loss": 1.4266, "step": 11898 }, { "epoch": 0.6505474078427621, "grad_norm": 1.8593302965164185, "learning_rate": 6.078825246677229e-06, "loss": 1.591, "step": 11899 }, { "epoch": 0.6506020802864836, "grad_norm": 1.5757522583007812, "learning_rate": 6.077144298497009e-06, "loss": 1.3788, "step": 11900 }, { "epoch": 0.6506567527302052, "grad_norm": 2.185573101043701, "learning_rate": 6.075463481310141e-06, "loss": 1.0475, "step": 11901 }, { "epoch": 0.6507114251739267, "grad_norm": 1.2483404874801636, "learning_rate": 6.073782795172761e-06, "loss": 1.4604, "step": 11902 }, { "epoch": 0.6507660976176483, "grad_norm": 1.892669439315796, "learning_rate": 6.0721022401409864e-06, "loss": 1.1802, "step": 11903 }, { "epoch": 0.6508207700613698, "grad_norm": 1.4660954475402832, "learning_rate": 6.070421816270933e-06, "loss": 1.18, "step": 11904 }, { "epoch": 0.6508754425050913, "grad_norm": 1.6608761548995972, "learning_rate": 6.068741523618718e-06, "loss": 1.339, "step": 11905 }, { "epoch": 0.6509301149488129, "grad_norm": 1.8264044523239136, "learning_rate": 6.06706136224045e-06, "loss": 1.3697, "step": 11906 }, { "epoch": 0.6509847873925345, "grad_norm": 2.0878653526306152, "learning_rate": 6.065381332192228e-06, "loss": 1.4077, "step": 11907 }, { "epoch": 0.651039459836256, "grad_norm": 1.804368495941162, "learning_rate": 6.06370143353016e-06, "loss": 1.4278, "step": 11908 }, { "epoch": 0.6510941322799776, "grad_norm": 1.2988201379776, "learning_rate": 6.0620216663103336e-06, "loss": 1.3557, "step": 11909 }, { "epoch": 0.6511488047236992, "grad_norm": 1.5168888568878174, "learning_rate": 6.0603420305888484e-06, "loss": 1.3436, "step": 11910 }, { "epoch": 0.6512034771674207, "grad_norm": 1.455779790878296, "learning_rate": 6.058662526421787e-06, "loss": 1.5123, "step": 11911 }, { "epoch": 0.6512581496111423, "grad_norm": 1.533927083015442, "learning_rate": 6.0569831538652306e-06, "loss": 1.2612, "step": 11912 }, { "epoch": 0.6513128220548638, "grad_norm": 1.846177577972412, "learning_rate": 6.055303912975261e-06, "loss": 1.2881, "step": 11913 }, { "epoch": 0.6513674944985853, "grad_norm": 1.6602200269699097, "learning_rate": 6.053624803807951e-06, "loss": 1.1282, "step": 11914 }, { "epoch": 0.6514221669423069, "grad_norm": 1.7323508262634277, "learning_rate": 6.051945826419366e-06, "loss": 1.3194, "step": 11915 }, { "epoch": 0.6514768393860284, "grad_norm": 1.2998801469802856, "learning_rate": 6.0502669808655774e-06, "loss": 1.5811, "step": 11916 }, { "epoch": 0.65153151182975, "grad_norm": 1.4179670810699463, "learning_rate": 6.0485882672026415e-06, "loss": 1.4555, "step": 11917 }, { "epoch": 0.6515861842734716, "grad_norm": 1.5869084596633911, "learning_rate": 6.046909685486615e-06, "loss": 1.373, "step": 11918 }, { "epoch": 0.6516408567171931, "grad_norm": 1.6601088047027588, "learning_rate": 6.045231235773552e-06, "loss": 1.4297, "step": 11919 }, { "epoch": 0.6516955291609147, "grad_norm": 1.5045121908187866, "learning_rate": 6.0435529181195e-06, "loss": 1.4225, "step": 11920 }, { "epoch": 0.6517502016046363, "grad_norm": 1.487714171409607, "learning_rate": 6.041874732580493e-06, "loss": 1.4829, "step": 11921 }, { "epoch": 0.6518048740483577, "grad_norm": 1.2837220430374146, "learning_rate": 6.040196679212582e-06, "loss": 1.1411, "step": 11922 }, { "epoch": 0.6518595464920793, "grad_norm": 1.6223487854003906, "learning_rate": 6.0385187580717915e-06, "loss": 1.1179, "step": 11923 }, { "epoch": 0.6519142189358009, "grad_norm": 1.5426580905914307, "learning_rate": 6.0368409692141615e-06, "loss": 1.5886, "step": 11924 }, { "epoch": 0.6519688913795224, "grad_norm": 1.586471676826477, "learning_rate": 6.035163312695709e-06, "loss": 1.2055, "step": 11925 }, { "epoch": 0.652023563823244, "grad_norm": 1.7748394012451172, "learning_rate": 6.0334857885724575e-06, "loss": 1.2392, "step": 11926 }, { "epoch": 0.6520782362669656, "grad_norm": 1.6060189008712769, "learning_rate": 6.031808396900422e-06, "loss": 1.4241, "step": 11927 }, { "epoch": 0.6521329087106871, "grad_norm": 1.6616065502166748, "learning_rate": 6.030131137735618e-06, "loss": 1.3239, "step": 11928 }, { "epoch": 0.6521875811544087, "grad_norm": 1.6595416069030762, "learning_rate": 6.028454011134047e-06, "loss": 1.3419, "step": 11929 }, { "epoch": 0.6522422535981302, "grad_norm": 1.8252718448638916, "learning_rate": 6.026777017151719e-06, "loss": 1.4623, "step": 11930 }, { "epoch": 0.6522969260418517, "grad_norm": 1.6115835905075073, "learning_rate": 6.025100155844632e-06, "loss": 1.4397, "step": 11931 }, { "epoch": 0.6523515984855733, "grad_norm": 1.5424368381500244, "learning_rate": 6.023423427268774e-06, "loss": 1.3871, "step": 11932 }, { "epoch": 0.6524062709292948, "grad_norm": 1.772252082824707, "learning_rate": 6.021746831480142e-06, "loss": 1.6826, "step": 11933 }, { "epoch": 0.6524609433730164, "grad_norm": 1.4647035598754883, "learning_rate": 6.020070368534719e-06, "loss": 1.3923, "step": 11934 }, { "epoch": 0.652515615816738, "grad_norm": 1.3210632801055908, "learning_rate": 6.01839403848848e-06, "loss": 1.553, "step": 11935 }, { "epoch": 0.6525702882604595, "grad_norm": 1.6419020891189575, "learning_rate": 6.016717841397413e-06, "loss": 1.3222, "step": 11936 }, { "epoch": 0.6526249607041811, "grad_norm": 1.6318418979644775, "learning_rate": 6.015041777317481e-06, "loss": 1.5498, "step": 11937 }, { "epoch": 0.6526796331479027, "grad_norm": 1.984390139579773, "learning_rate": 6.013365846304657e-06, "loss": 1.5568, "step": 11938 }, { "epoch": 0.6527343055916242, "grad_norm": 1.4347399473190308, "learning_rate": 6.0116900484149046e-06, "loss": 1.3668, "step": 11939 }, { "epoch": 0.6527889780353457, "grad_norm": 1.4682183265686035, "learning_rate": 6.010014383704174e-06, "loss": 1.2201, "step": 11940 }, { "epoch": 0.6528436504790673, "grad_norm": 1.3276896476745605, "learning_rate": 6.00833885222843e-06, "loss": 1.6344, "step": 11941 }, { "epoch": 0.6528983229227888, "grad_norm": 1.7420257329940796, "learning_rate": 6.00666345404362e-06, "loss": 1.3114, "step": 11942 }, { "epoch": 0.6529529953665104, "grad_norm": 1.668851613998413, "learning_rate": 6.004988189205683e-06, "loss": 1.0834, "step": 11943 }, { "epoch": 0.6530076678102319, "grad_norm": 1.5480402708053589, "learning_rate": 6.003313057770568e-06, "loss": 1.4516, "step": 11944 }, { "epoch": 0.6530623402539535, "grad_norm": 1.4023340940475464, "learning_rate": 6.001638059794211e-06, "loss": 1.1631, "step": 11945 }, { "epoch": 0.6531170126976751, "grad_norm": 1.4658300876617432, "learning_rate": 5.999963195332536e-06, "loss": 1.5994, "step": 11946 }, { "epoch": 0.6531716851413966, "grad_norm": 1.3853566646575928, "learning_rate": 5.9982884644414815e-06, "loss": 1.3432, "step": 11947 }, { "epoch": 0.6532263575851182, "grad_norm": 1.8246289491653442, "learning_rate": 5.996613867176964e-06, "loss": 1.3667, "step": 11948 }, { "epoch": 0.6532810300288397, "grad_norm": 1.987900733947754, "learning_rate": 5.994939403594899e-06, "loss": 1.3063, "step": 11949 }, { "epoch": 0.6533357024725612, "grad_norm": 1.3195329904556274, "learning_rate": 5.993265073751211e-06, "loss": 1.4528, "step": 11950 }, { "epoch": 0.6533903749162828, "grad_norm": 1.6496508121490479, "learning_rate": 5.9915908777018026e-06, "loss": 1.4197, "step": 11951 }, { "epoch": 0.6534450473600044, "grad_norm": 1.3245387077331543, "learning_rate": 5.989916815502581e-06, "loss": 1.4668, "step": 11952 }, { "epoch": 0.6534997198037259, "grad_norm": 2.637561321258545, "learning_rate": 5.9882428872094475e-06, "loss": 1.4859, "step": 11953 }, { "epoch": 0.6535543922474475, "grad_norm": 1.711378574371338, "learning_rate": 5.986569092878296e-06, "loss": 1.5857, "step": 11954 }, { "epoch": 0.6536090646911691, "grad_norm": 1.1321585178375244, "learning_rate": 5.984895432565022e-06, "loss": 1.8404, "step": 11955 }, { "epoch": 0.6536637371348906, "grad_norm": 1.151641845703125, "learning_rate": 5.983221906325512e-06, "loss": 1.5742, "step": 11956 }, { "epoch": 0.6537184095786122, "grad_norm": 1.4998878240585327, "learning_rate": 5.981548514215646e-06, "loss": 1.7482, "step": 11957 }, { "epoch": 0.6537730820223336, "grad_norm": 1.6815060377120972, "learning_rate": 5.979875256291307e-06, "loss": 1.456, "step": 11958 }, { "epoch": 0.6538277544660552, "grad_norm": 1.4590885639190674, "learning_rate": 5.9782021326083665e-06, "loss": 1.3777, "step": 11959 }, { "epoch": 0.6538824269097768, "grad_norm": 1.4922147989273071, "learning_rate": 5.976529143222689e-06, "loss": 1.5224, "step": 11960 }, { "epoch": 0.6539370993534983, "grad_norm": 1.4728329181671143, "learning_rate": 5.9748562881901504e-06, "loss": 1.3614, "step": 11961 }, { "epoch": 0.6539917717972199, "grad_norm": 1.6008269786834717, "learning_rate": 5.973183567566605e-06, "loss": 1.536, "step": 11962 }, { "epoch": 0.6540464442409415, "grad_norm": 1.440186858177185, "learning_rate": 5.9715109814079085e-06, "loss": 1.5595, "step": 11963 }, { "epoch": 0.654101116684663, "grad_norm": 1.1733314990997314, "learning_rate": 5.969838529769914e-06, "loss": 1.5722, "step": 11964 }, { "epoch": 0.6541557891283846, "grad_norm": 1.8383333683013916, "learning_rate": 5.968166212708465e-06, "loss": 1.3245, "step": 11965 }, { "epoch": 0.6542104615721062, "grad_norm": 1.6137703657150269, "learning_rate": 5.966494030279411e-06, "loss": 1.4011, "step": 11966 }, { "epoch": 0.6542651340158276, "grad_norm": 1.4674742221832275, "learning_rate": 5.964821982538586e-06, "loss": 1.3385, "step": 11967 }, { "epoch": 0.6543198064595492, "grad_norm": 1.6432452201843262, "learning_rate": 5.96315006954182e-06, "loss": 1.281, "step": 11968 }, { "epoch": 0.6543744789032708, "grad_norm": 1.250967025756836, "learning_rate": 5.96147829134495e-06, "loss": 1.342, "step": 11969 }, { "epoch": 0.6544291513469923, "grad_norm": 1.394784927368164, "learning_rate": 5.959806648003796e-06, "loss": 1.3035, "step": 11970 }, { "epoch": 0.6544838237907139, "grad_norm": 1.471872091293335, "learning_rate": 5.958135139574177e-06, "loss": 1.4699, "step": 11971 }, { "epoch": 0.6545384962344354, "grad_norm": 1.384512186050415, "learning_rate": 5.956463766111913e-06, "loss": 1.5979, "step": 11972 }, { "epoch": 0.654593168678157, "grad_norm": 1.2789437770843506, "learning_rate": 5.954792527672812e-06, "loss": 1.3587, "step": 11973 }, { "epoch": 0.6546478411218786, "grad_norm": 1.6192399263381958, "learning_rate": 5.953121424312676e-06, "loss": 1.4164, "step": 11974 }, { "epoch": 0.6547025135656, "grad_norm": 1.8392940759658813, "learning_rate": 5.951450456087317e-06, "loss": 1.5099, "step": 11975 }, { "epoch": 0.6547571860093216, "grad_norm": 1.7633211612701416, "learning_rate": 5.949779623052526e-06, "loss": 1.4176, "step": 11976 }, { "epoch": 0.6548118584530432, "grad_norm": 1.675702691078186, "learning_rate": 5.948108925264096e-06, "loss": 1.5992, "step": 11977 }, { "epoch": 0.6548665308967647, "grad_norm": 2.723862886428833, "learning_rate": 5.94643836277782e-06, "loss": 1.4742, "step": 11978 }, { "epoch": 0.6549212033404863, "grad_norm": 1.4571515321731567, "learning_rate": 5.944767935649475e-06, "loss": 1.386, "step": 11979 }, { "epoch": 0.6549758757842079, "grad_norm": 1.5666673183441162, "learning_rate": 5.943097643934847e-06, "loss": 1.4758, "step": 11980 }, { "epoch": 0.6550305482279294, "grad_norm": 1.5589848756790161, "learning_rate": 5.941427487689711e-06, "loss": 1.2631, "step": 11981 }, { "epoch": 0.655085220671651, "grad_norm": 1.3881264925003052, "learning_rate": 5.939757466969831e-06, "loss": 1.293, "step": 11982 }, { "epoch": 0.6551398931153726, "grad_norm": 1.6569242477416992, "learning_rate": 5.9380875818309805e-06, "loss": 1.3038, "step": 11983 }, { "epoch": 0.655194565559094, "grad_norm": 1.6147555112838745, "learning_rate": 5.9364178323289155e-06, "loss": 1.3763, "step": 11984 }, { "epoch": 0.6552492380028156, "grad_norm": 1.4955973625183105, "learning_rate": 5.934748218519391e-06, "loss": 1.359, "step": 11985 }, { "epoch": 0.6553039104465371, "grad_norm": 1.4481713771820068, "learning_rate": 5.933078740458167e-06, "loss": 1.6326, "step": 11986 }, { "epoch": 0.6553585828902587, "grad_norm": 1.523202896118164, "learning_rate": 5.931409398200987e-06, "loss": 1.1522, "step": 11987 }, { "epoch": 0.6554132553339803, "grad_norm": 1.61641526222229, "learning_rate": 5.92974019180359e-06, "loss": 1.3559, "step": 11988 }, { "epoch": 0.6554679277777018, "grad_norm": 2.0543038845062256, "learning_rate": 5.928071121321723e-06, "loss": 1.4166, "step": 11989 }, { "epoch": 0.6555226002214234, "grad_norm": 1.306642770767212, "learning_rate": 5.926402186811118e-06, "loss": 1.3489, "step": 11990 }, { "epoch": 0.655577272665145, "grad_norm": 1.5196539163589478, "learning_rate": 5.924733388327501e-06, "loss": 1.4756, "step": 11991 }, { "epoch": 0.6556319451088665, "grad_norm": 1.296738862991333, "learning_rate": 5.9230647259266e-06, "loss": 1.3976, "step": 11992 }, { "epoch": 0.6556866175525881, "grad_norm": 1.3878648281097412, "learning_rate": 5.9213961996641315e-06, "loss": 1.4493, "step": 11993 }, { "epoch": 0.6557412899963097, "grad_norm": 1.3239595890045166, "learning_rate": 5.919727809595816e-06, "loss": 1.4133, "step": 11994 }, { "epoch": 0.6557959624400311, "grad_norm": 1.365876317024231, "learning_rate": 5.918059555777367e-06, "loss": 1.4986, "step": 11995 }, { "epoch": 0.6558506348837527, "grad_norm": 1.5580432415008545, "learning_rate": 5.916391438264484e-06, "loss": 1.2815, "step": 11996 }, { "epoch": 0.6559053073274743, "grad_norm": 1.4264194965362549, "learning_rate": 5.914723457112877e-06, "loss": 1.422, "step": 11997 }, { "epoch": 0.6559599797711958, "grad_norm": 2.675088882446289, "learning_rate": 5.913055612378238e-06, "loss": 1.5257, "step": 11998 }, { "epoch": 0.6560146522149174, "grad_norm": 1.2868541479110718, "learning_rate": 5.9113879041162595e-06, "loss": 1.5333, "step": 11999 }, { "epoch": 0.6560693246586389, "grad_norm": 1.8503056764602661, "learning_rate": 5.909720332382638e-06, "loss": 1.427, "step": 12000 }, { "epoch": 0.6561239971023605, "grad_norm": 1.4661052227020264, "learning_rate": 5.908052897233052e-06, "loss": 1.497, "step": 12001 }, { "epoch": 0.6561786695460821, "grad_norm": 1.4872066974639893, "learning_rate": 5.9063855987231785e-06, "loss": 1.3464, "step": 12002 }, { "epoch": 0.6562333419898035, "grad_norm": 1.6247732639312744, "learning_rate": 5.9047184369086994e-06, "loss": 1.3941, "step": 12003 }, { "epoch": 0.6562880144335251, "grad_norm": 1.3875752687454224, "learning_rate": 5.903051411845282e-06, "loss": 1.4339, "step": 12004 }, { "epoch": 0.6563426868772467, "grad_norm": 1.244138240814209, "learning_rate": 5.901384523588586e-06, "loss": 1.4437, "step": 12005 }, { "epoch": 0.6563973593209682, "grad_norm": 1.5675097703933716, "learning_rate": 5.899717772194283e-06, "loss": 1.4787, "step": 12006 }, { "epoch": 0.6564520317646898, "grad_norm": 2.1797726154327393, "learning_rate": 5.898051157718022e-06, "loss": 1.3901, "step": 12007 }, { "epoch": 0.6565067042084114, "grad_norm": 1.4440727233886719, "learning_rate": 5.896384680215461e-06, "loss": 1.5205, "step": 12008 }, { "epoch": 0.6565613766521329, "grad_norm": 1.6824625730514526, "learning_rate": 5.894718339742247e-06, "loss": 1.1427, "step": 12009 }, { "epoch": 0.6566160490958545, "grad_norm": 1.315338134765625, "learning_rate": 5.893052136354019e-06, "loss": 1.5916, "step": 12010 }, { "epoch": 0.6566707215395761, "grad_norm": 1.5711307525634766, "learning_rate": 5.8913860701064175e-06, "loss": 1.4844, "step": 12011 }, { "epoch": 0.6567253939832975, "grad_norm": 1.4354310035705566, "learning_rate": 5.889720141055077e-06, "loss": 1.4556, "step": 12012 }, { "epoch": 0.6567800664270191, "grad_norm": 1.592449426651001, "learning_rate": 5.888054349255622e-06, "loss": 1.3078, "step": 12013 }, { "epoch": 0.6568347388707406, "grad_norm": 1.736023187637329, "learning_rate": 5.886388694763685e-06, "loss": 1.455, "step": 12014 }, { "epoch": 0.6568894113144622, "grad_norm": 1.5282416343688965, "learning_rate": 5.884723177634884e-06, "loss": 1.4481, "step": 12015 }, { "epoch": 0.6569440837581838, "grad_norm": 1.644724726676941, "learning_rate": 5.883057797924829e-06, "loss": 1.397, "step": 12016 }, { "epoch": 0.6569987562019053, "grad_norm": 1.2958024740219116, "learning_rate": 5.881392555689137e-06, "loss": 1.5107, "step": 12017 }, { "epoch": 0.6570534286456269, "grad_norm": 1.6304316520690918, "learning_rate": 5.879727450983412e-06, "loss": 1.3781, "step": 12018 }, { "epoch": 0.6571081010893485, "grad_norm": 1.4562532901763916, "learning_rate": 5.878062483863254e-06, "loss": 1.271, "step": 12019 }, { "epoch": 0.65716277353307, "grad_norm": 1.6043150424957275, "learning_rate": 5.876397654384265e-06, "loss": 1.3714, "step": 12020 }, { "epoch": 0.6572174459767915, "grad_norm": 1.4486232995986938, "learning_rate": 5.874732962602032e-06, "loss": 1.402, "step": 12021 }, { "epoch": 0.6572721184205131, "grad_norm": 1.4000006914138794, "learning_rate": 5.873068408572148e-06, "loss": 1.3089, "step": 12022 }, { "epoch": 0.6573267908642346, "grad_norm": 1.4693667888641357, "learning_rate": 5.871403992350194e-06, "loss": 1.3962, "step": 12023 }, { "epoch": 0.6573814633079562, "grad_norm": 1.366199254989624, "learning_rate": 5.8697397139917446e-06, "loss": 1.606, "step": 12024 }, { "epoch": 0.6574361357516778, "grad_norm": 1.9867805242538452, "learning_rate": 5.868075573552383e-06, "loss": 1.5367, "step": 12025 }, { "epoch": 0.6574908081953993, "grad_norm": 1.437361717224121, "learning_rate": 5.866411571087672e-06, "loss": 1.3454, "step": 12026 }, { "epoch": 0.6575454806391209, "grad_norm": 1.4757027626037598, "learning_rate": 5.864747706653176e-06, "loss": 1.3617, "step": 12027 }, { "epoch": 0.6576001530828425, "grad_norm": 1.3771698474884033, "learning_rate": 5.8630839803044615e-06, "loss": 1.5109, "step": 12028 }, { "epoch": 0.657654825526564, "grad_norm": 1.9669160842895508, "learning_rate": 5.86142039209708e-06, "loss": 1.3839, "step": 12029 }, { "epoch": 0.6577094979702856, "grad_norm": 1.500860333442688, "learning_rate": 5.85975694208658e-06, "loss": 1.7151, "step": 12030 }, { "epoch": 0.657764170414007, "grad_norm": 1.4009333848953247, "learning_rate": 5.8580936303285165e-06, "loss": 1.6145, "step": 12031 }, { "epoch": 0.6578188428577286, "grad_norm": 1.2419381141662598, "learning_rate": 5.856430456878424e-06, "loss": 1.4096, "step": 12032 }, { "epoch": 0.6578735153014502, "grad_norm": 1.602396011352539, "learning_rate": 5.8547674217918374e-06, "loss": 1.1418, "step": 12033 }, { "epoch": 0.6579281877451717, "grad_norm": 1.7050883769989014, "learning_rate": 5.853104525124298e-06, "loss": 1.197, "step": 12034 }, { "epoch": 0.6579828601888933, "grad_norm": 1.7219836711883545, "learning_rate": 5.851441766931328e-06, "loss": 1.39, "step": 12035 }, { "epoch": 0.6580375326326149, "grad_norm": 1.5571435689926147, "learning_rate": 5.849779147268453e-06, "loss": 1.4123, "step": 12036 }, { "epoch": 0.6580922050763364, "grad_norm": 1.629448413848877, "learning_rate": 5.8481166661911915e-06, "loss": 1.2057, "step": 12037 }, { "epoch": 0.658146877520058, "grad_norm": 1.5458275079727173, "learning_rate": 5.846454323755053e-06, "loss": 1.4039, "step": 12038 }, { "epoch": 0.6582015499637796, "grad_norm": 1.4883966445922852, "learning_rate": 5.844792120015556e-06, "loss": 1.3393, "step": 12039 }, { "epoch": 0.658256222407501, "grad_norm": 1.4581388235092163, "learning_rate": 5.843130055028201e-06, "loss": 1.4924, "step": 12040 }, { "epoch": 0.6583108948512226, "grad_norm": 1.5560376644134521, "learning_rate": 5.841468128848484e-06, "loss": 1.206, "step": 12041 }, { "epoch": 0.6583655672949442, "grad_norm": 1.5463119745254517, "learning_rate": 5.839806341531908e-06, "loss": 1.3643, "step": 12042 }, { "epoch": 0.6584202397386657, "grad_norm": 1.2664047479629517, "learning_rate": 5.838144693133958e-06, "loss": 1.4424, "step": 12043 }, { "epoch": 0.6584749121823873, "grad_norm": 1.4404665231704712, "learning_rate": 5.836483183710122e-06, "loss": 1.705, "step": 12044 }, { "epoch": 0.6585295846261088, "grad_norm": 1.8440437316894531, "learning_rate": 5.8348218133158855e-06, "loss": 1.3838, "step": 12045 }, { "epoch": 0.6585842570698304, "grad_norm": 1.3671433925628662, "learning_rate": 5.833160582006722e-06, "loss": 1.6272, "step": 12046 }, { "epoch": 0.658638929513552, "grad_norm": 1.4434351921081543, "learning_rate": 5.831499489838105e-06, "loss": 1.5883, "step": 12047 }, { "epoch": 0.6586936019572734, "grad_norm": 1.473868727684021, "learning_rate": 5.829838536865502e-06, "loss": 1.3187, "step": 12048 }, { "epoch": 0.658748274400995, "grad_norm": 1.3608500957489014, "learning_rate": 5.82817772314437e-06, "loss": 1.3387, "step": 12049 }, { "epoch": 0.6588029468447166, "grad_norm": 1.7632980346679688, "learning_rate": 5.8265170487301806e-06, "loss": 1.3929, "step": 12050 }, { "epoch": 0.6588576192884381, "grad_norm": 1.7866023778915405, "learning_rate": 5.8248565136783786e-06, "loss": 1.2398, "step": 12051 }, { "epoch": 0.6589122917321597, "grad_norm": 1.4866822957992554, "learning_rate": 5.82319611804441e-06, "loss": 1.5535, "step": 12052 }, { "epoch": 0.6589669641758813, "grad_norm": 1.7007722854614258, "learning_rate": 5.821535861883729e-06, "loss": 1.5802, "step": 12053 }, { "epoch": 0.6590216366196028, "grad_norm": 2.0082762241363525, "learning_rate": 5.819875745251771e-06, "loss": 1.3248, "step": 12054 }, { "epoch": 0.6590763090633244, "grad_norm": 1.3365920782089233, "learning_rate": 5.8182157682039665e-06, "loss": 1.4696, "step": 12055 }, { "epoch": 0.659130981507046, "grad_norm": 1.4590216875076294, "learning_rate": 5.816555930795754e-06, "loss": 1.5423, "step": 12056 }, { "epoch": 0.6591856539507674, "grad_norm": 1.779335856437683, "learning_rate": 5.814896233082556e-06, "loss": 1.4593, "step": 12057 }, { "epoch": 0.659240326394489, "grad_norm": 1.5438748598098755, "learning_rate": 5.813236675119793e-06, "loss": 1.4673, "step": 12058 }, { "epoch": 0.6592949988382105, "grad_norm": 1.7530308961868286, "learning_rate": 5.811577256962883e-06, "loss": 1.419, "step": 12059 }, { "epoch": 0.6593496712819321, "grad_norm": 1.5082032680511475, "learning_rate": 5.8099179786672365e-06, "loss": 1.3886, "step": 12060 }, { "epoch": 0.6594043437256537, "grad_norm": 1.6472216844558716, "learning_rate": 5.808258840288257e-06, "loss": 1.389, "step": 12061 }, { "epoch": 0.6594590161693752, "grad_norm": 1.4185876846313477, "learning_rate": 5.806599841881355e-06, "loss": 1.3984, "step": 12062 }, { "epoch": 0.6595136886130968, "grad_norm": 1.9750804901123047, "learning_rate": 5.8049409835019215e-06, "loss": 1.4231, "step": 12063 }, { "epoch": 0.6595683610568184, "grad_norm": 1.5988421440124512, "learning_rate": 5.803282265205354e-06, "loss": 1.4018, "step": 12064 }, { "epoch": 0.6596230335005399, "grad_norm": 1.3620637655258179, "learning_rate": 5.801623687047041e-06, "loss": 1.523, "step": 12065 }, { "epoch": 0.6596777059442614, "grad_norm": 2.5948362350463867, "learning_rate": 5.799965249082361e-06, "loss": 1.266, "step": 12066 }, { "epoch": 0.659732378387983, "grad_norm": 1.3025591373443604, "learning_rate": 5.798306951366701e-06, "loss": 1.3499, "step": 12067 }, { "epoch": 0.6597870508317045, "grad_norm": 1.445683479309082, "learning_rate": 5.79664879395543e-06, "loss": 1.3868, "step": 12068 }, { "epoch": 0.6598417232754261, "grad_norm": 1.4865905046463013, "learning_rate": 5.794990776903917e-06, "loss": 1.4869, "step": 12069 }, { "epoch": 0.6598963957191477, "grad_norm": 1.5748732089996338, "learning_rate": 5.793332900267534e-06, "loss": 1.1667, "step": 12070 }, { "epoch": 0.6599510681628692, "grad_norm": 1.6850248575210571, "learning_rate": 5.7916751641016356e-06, "loss": 1.559, "step": 12071 }, { "epoch": 0.6600057406065908, "grad_norm": 1.5064666271209717, "learning_rate": 5.7900175684615786e-06, "loss": 1.3946, "step": 12072 }, { "epoch": 0.6600604130503123, "grad_norm": 1.5392558574676514, "learning_rate": 5.788360113402713e-06, "loss": 1.3758, "step": 12073 }, { "epoch": 0.6601150854940339, "grad_norm": 1.7288234233856201, "learning_rate": 5.786702798980388e-06, "loss": 1.5442, "step": 12074 }, { "epoch": 0.6601697579377555, "grad_norm": 1.6386128664016724, "learning_rate": 5.7850456252499386e-06, "loss": 1.2834, "step": 12075 }, { "epoch": 0.6602244303814769, "grad_norm": 1.8285084962844849, "learning_rate": 5.78338859226671e-06, "loss": 1.5876, "step": 12076 }, { "epoch": 0.6602791028251985, "grad_norm": 1.5989793539047241, "learning_rate": 5.781731700086028e-06, "loss": 1.3371, "step": 12077 }, { "epoch": 0.6603337752689201, "grad_norm": 1.3434630632400513, "learning_rate": 5.780074948763226e-06, "loss": 1.4157, "step": 12078 }, { "epoch": 0.6603884477126416, "grad_norm": 1.3466718196868896, "learning_rate": 5.778418338353624e-06, "loss": 1.4248, "step": 12079 }, { "epoch": 0.6604431201563632, "grad_norm": 1.695168375968933, "learning_rate": 5.776761868912537e-06, "loss": 1.1667, "step": 12080 }, { "epoch": 0.6604977926000848, "grad_norm": 1.5173496007919312, "learning_rate": 5.775105540495284e-06, "loss": 1.5285, "step": 12081 }, { "epoch": 0.6605524650438063, "grad_norm": 1.5253844261169434, "learning_rate": 5.773449353157172e-06, "loss": 1.3048, "step": 12082 }, { "epoch": 0.6606071374875279, "grad_norm": 1.9434552192687988, "learning_rate": 5.771793306953504e-06, "loss": 1.6386, "step": 12083 }, { "epoch": 0.6606618099312495, "grad_norm": 1.6152019500732422, "learning_rate": 5.770137401939577e-06, "loss": 1.4311, "step": 12084 }, { "epoch": 0.6607164823749709, "grad_norm": 1.6832712888717651, "learning_rate": 5.768481638170691e-06, "loss": 1.4112, "step": 12085 }, { "epoch": 0.6607711548186925, "grad_norm": 1.5285453796386719, "learning_rate": 5.766826015702127e-06, "loss": 1.3669, "step": 12086 }, { "epoch": 0.660825827262414, "grad_norm": 1.3742454051971436, "learning_rate": 5.7651705345891795e-06, "loss": 1.3762, "step": 12087 }, { "epoch": 0.6608804997061356, "grad_norm": 1.7446893453598022, "learning_rate": 5.763515194887126e-06, "loss": 1.4904, "step": 12088 }, { "epoch": 0.6609351721498572, "grad_norm": 1.589267611503601, "learning_rate": 5.761859996651237e-06, "loss": 1.5031, "step": 12089 }, { "epoch": 0.6609898445935787, "grad_norm": 1.2929884195327759, "learning_rate": 5.760204939936791e-06, "loss": 1.5126, "step": 12090 }, { "epoch": 0.6610445170373003, "grad_norm": 1.2038476467132568, "learning_rate": 5.758550024799049e-06, "loss": 1.5786, "step": 12091 }, { "epoch": 0.6610991894810219, "grad_norm": 1.5063589811325073, "learning_rate": 5.756895251293277e-06, "loss": 1.3027, "step": 12092 }, { "epoch": 0.6611538619247433, "grad_norm": 1.3750665187835693, "learning_rate": 5.75524061947473e-06, "loss": 1.5345, "step": 12093 }, { "epoch": 0.6612085343684649, "grad_norm": 1.7126686573028564, "learning_rate": 5.753586129398655e-06, "loss": 1.3418, "step": 12094 }, { "epoch": 0.6612632068121865, "grad_norm": 1.5826178789138794, "learning_rate": 5.751931781120308e-06, "loss": 1.5157, "step": 12095 }, { "epoch": 0.661317879255908, "grad_norm": 1.3497427701950073, "learning_rate": 5.750277574694927e-06, "loss": 1.5259, "step": 12096 }, { "epoch": 0.6613725516996296, "grad_norm": 2.0002171993255615, "learning_rate": 5.748623510177751e-06, "loss": 1.23, "step": 12097 }, { "epoch": 0.6614272241433512, "grad_norm": 1.4556158781051636, "learning_rate": 5.746969587624011e-06, "loss": 1.2289, "step": 12098 }, { "epoch": 0.6614818965870727, "grad_norm": 1.4904645681381226, "learning_rate": 5.745315807088936e-06, "loss": 1.4723, "step": 12099 }, { "epoch": 0.6615365690307943, "grad_norm": 1.617295742034912, "learning_rate": 5.743662168627747e-06, "loss": 1.5013, "step": 12100 }, { "epoch": 0.6615912414745158, "grad_norm": 1.4649217128753662, "learning_rate": 5.74200867229567e-06, "loss": 1.5175, "step": 12101 }, { "epoch": 0.6616459139182373, "grad_norm": 1.2569127082824707, "learning_rate": 5.740355318147916e-06, "loss": 1.2402, "step": 12102 }, { "epoch": 0.6617005863619589, "grad_norm": 1.2753028869628906, "learning_rate": 5.738702106239689e-06, "loss": 1.4201, "step": 12103 }, { "epoch": 0.6617552588056804, "grad_norm": 1.648135781288147, "learning_rate": 5.737049036626201e-06, "loss": 1.4166, "step": 12104 }, { "epoch": 0.661809931249402, "grad_norm": 1.8688918352127075, "learning_rate": 5.735396109362646e-06, "loss": 1.356, "step": 12105 }, { "epoch": 0.6618646036931236, "grad_norm": 1.489917278289795, "learning_rate": 5.733743324504225e-06, "loss": 1.4921, "step": 12106 }, { "epoch": 0.6619192761368451, "grad_norm": 1.657806396484375, "learning_rate": 5.732090682106126e-06, "loss": 1.4004, "step": 12107 }, { "epoch": 0.6619739485805667, "grad_norm": 1.9707926511764526, "learning_rate": 5.73043818222353e-06, "loss": 1.3719, "step": 12108 }, { "epoch": 0.6620286210242883, "grad_norm": 1.886759877204895, "learning_rate": 5.728785824911627e-06, "loss": 1.4859, "step": 12109 }, { "epoch": 0.6620832934680098, "grad_norm": 1.8432739973068237, "learning_rate": 5.727133610225588e-06, "loss": 1.5902, "step": 12110 }, { "epoch": 0.6621379659117314, "grad_norm": 1.7231563329696655, "learning_rate": 5.725481538220583e-06, "loss": 1.2005, "step": 12111 }, { "epoch": 0.662192638355453, "grad_norm": 1.7727627754211426, "learning_rate": 5.723829608951781e-06, "loss": 1.1505, "step": 12112 }, { "epoch": 0.6622473107991744, "grad_norm": 1.642792820930481, "learning_rate": 5.722177822474342e-06, "loss": 1.379, "step": 12113 }, { "epoch": 0.662301983242896, "grad_norm": 1.7064976692199707, "learning_rate": 5.72052617884342e-06, "loss": 1.4346, "step": 12114 }, { "epoch": 0.6623566556866175, "grad_norm": 1.7280932664871216, "learning_rate": 5.718874678114174e-06, "loss": 1.3053, "step": 12115 }, { "epoch": 0.6624113281303391, "grad_norm": 1.4468194246292114, "learning_rate": 5.717223320341751e-06, "loss": 1.5382, "step": 12116 }, { "epoch": 0.6624660005740607, "grad_norm": 2.6355273723602295, "learning_rate": 5.7155721055812856e-06, "loss": 1.2929, "step": 12117 }, { "epoch": 0.6625206730177822, "grad_norm": 1.4812425374984741, "learning_rate": 5.713921033887925e-06, "loss": 1.3609, "step": 12118 }, { "epoch": 0.6625753454615038, "grad_norm": 1.4804903268814087, "learning_rate": 5.712270105316795e-06, "loss": 1.4993, "step": 12119 }, { "epoch": 0.6626300179052254, "grad_norm": 1.3943744897842407, "learning_rate": 5.710619319923032e-06, "loss": 1.3584, "step": 12120 }, { "epoch": 0.6626846903489468, "grad_norm": 1.3056950569152832, "learning_rate": 5.708968677761755e-06, "loss": 1.5148, "step": 12121 }, { "epoch": 0.6627393627926684, "grad_norm": 1.554796814918518, "learning_rate": 5.707318178888083e-06, "loss": 1.4489, "step": 12122 }, { "epoch": 0.66279403523639, "grad_norm": 1.6425048112869263, "learning_rate": 5.70566782335713e-06, "loss": 1.494, "step": 12123 }, { "epoch": 0.6628487076801115, "grad_norm": 1.3299050331115723, "learning_rate": 5.704017611224005e-06, "loss": 1.3892, "step": 12124 }, { "epoch": 0.6629033801238331, "grad_norm": 1.6290013790130615, "learning_rate": 5.7023675425438096e-06, "loss": 1.431, "step": 12125 }, { "epoch": 0.6629580525675547, "grad_norm": 1.6072624921798706, "learning_rate": 5.70071761737165e-06, "loss": 1.6395, "step": 12126 }, { "epoch": 0.6630127250112762, "grad_norm": 1.4740890264511108, "learning_rate": 5.699067835762618e-06, "loss": 1.2819, "step": 12127 }, { "epoch": 0.6630673974549978, "grad_norm": 2.142591953277588, "learning_rate": 5.6974181977718e-06, "loss": 1.3918, "step": 12128 }, { "epoch": 0.6631220698987192, "grad_norm": 1.3623825311660767, "learning_rate": 5.695768703454288e-06, "loss": 1.4404, "step": 12129 }, { "epoch": 0.6631767423424408, "grad_norm": 1.6766631603240967, "learning_rate": 5.69411935286516e-06, "loss": 1.4086, "step": 12130 }, { "epoch": 0.6632314147861624, "grad_norm": 1.3703902959823608, "learning_rate": 5.6924701460594855e-06, "loss": 1.4351, "step": 12131 }, { "epoch": 0.6632860872298839, "grad_norm": 1.59036123752594, "learning_rate": 5.690821083092346e-06, "loss": 1.3032, "step": 12132 }, { "epoch": 0.6633407596736055, "grad_norm": 1.6549124717712402, "learning_rate": 5.689172164018797e-06, "loss": 1.4719, "step": 12133 }, { "epoch": 0.6633954321173271, "grad_norm": 1.5316046476364136, "learning_rate": 5.687523388893911e-06, "loss": 1.3356, "step": 12134 }, { "epoch": 0.6634501045610486, "grad_norm": 2.018561840057373, "learning_rate": 5.685874757772737e-06, "loss": 1.4468, "step": 12135 }, { "epoch": 0.6635047770047702, "grad_norm": 1.7426447868347168, "learning_rate": 5.684226270710329e-06, "loss": 1.4002, "step": 12136 }, { "epoch": 0.6635594494484918, "grad_norm": 1.5156333446502686, "learning_rate": 5.682577927761732e-06, "loss": 1.293, "step": 12137 }, { "epoch": 0.6636141218922132, "grad_norm": 2.396571159362793, "learning_rate": 5.680929728981991e-06, "loss": 1.0423, "step": 12138 }, { "epoch": 0.6636687943359348, "grad_norm": 1.4804692268371582, "learning_rate": 5.679281674426136e-06, "loss": 1.3642, "step": 12139 }, { "epoch": 0.6637234667796564, "grad_norm": 1.3602488040924072, "learning_rate": 5.67763376414921e-06, "loss": 1.5981, "step": 12140 }, { "epoch": 0.6637781392233779, "grad_norm": 1.9769195318222046, "learning_rate": 5.6759859982062345e-06, "loss": 1.3664, "step": 12141 }, { "epoch": 0.6638328116670995, "grad_norm": 1.4784761667251587, "learning_rate": 5.674338376652228e-06, "loss": 1.4677, "step": 12142 }, { "epoch": 0.663887484110821, "grad_norm": 2.377629280090332, "learning_rate": 5.672690899542219e-06, "loss": 1.3134, "step": 12143 }, { "epoch": 0.6639421565545426, "grad_norm": 1.4710594415664673, "learning_rate": 5.671043566931216e-06, "loss": 1.4666, "step": 12144 }, { "epoch": 0.6639968289982642, "grad_norm": 1.6167817115783691, "learning_rate": 5.6693963788742215e-06, "loss": 1.4172, "step": 12145 }, { "epoch": 0.6640515014419857, "grad_norm": 1.3822754621505737, "learning_rate": 5.667749335426247e-06, "loss": 1.3847, "step": 12146 }, { "epoch": 0.6641061738857073, "grad_norm": 1.8660651445388794, "learning_rate": 5.666102436642285e-06, "loss": 1.1663, "step": 12147 }, { "epoch": 0.6641608463294288, "grad_norm": 1.2968817949295044, "learning_rate": 5.6644556825773365e-06, "loss": 1.4884, "step": 12148 }, { "epoch": 0.6642155187731503, "grad_norm": 1.5884146690368652, "learning_rate": 5.662809073286387e-06, "loss": 1.2553, "step": 12149 }, { "epoch": 0.6642701912168719, "grad_norm": 1.8558324575424194, "learning_rate": 5.66116260882442e-06, "loss": 1.3265, "step": 12150 }, { "epoch": 0.6643248636605935, "grad_norm": 1.577185034751892, "learning_rate": 5.659516289246414e-06, "loss": 1.5348, "step": 12151 }, { "epoch": 0.664379536104315, "grad_norm": 1.4034521579742432, "learning_rate": 5.657870114607346e-06, "loss": 1.4178, "step": 12152 }, { "epoch": 0.6644342085480366, "grad_norm": 1.3328691720962524, "learning_rate": 5.6562240849621785e-06, "loss": 1.4269, "step": 12153 }, { "epoch": 0.6644888809917582, "grad_norm": 1.462756872177124, "learning_rate": 5.654578200365886e-06, "loss": 1.4912, "step": 12154 }, { "epoch": 0.6645435534354797, "grad_norm": 1.3446351289749146, "learning_rate": 5.652932460873424e-06, "loss": 1.4933, "step": 12155 }, { "epoch": 0.6645982258792013, "grad_norm": 1.4385786056518555, "learning_rate": 5.651286866539745e-06, "loss": 1.3974, "step": 12156 }, { "epoch": 0.6646528983229227, "grad_norm": 1.4345612525939941, "learning_rate": 5.649641417419805e-06, "loss": 1.4717, "step": 12157 }, { "epoch": 0.6647075707666443, "grad_norm": 1.2241461277008057, "learning_rate": 5.647996113568547e-06, "loss": 1.2376, "step": 12158 }, { "epoch": 0.6647622432103659, "grad_norm": 1.626462459564209, "learning_rate": 5.6463509550409065e-06, "loss": 1.4217, "step": 12159 }, { "epoch": 0.6648169156540874, "grad_norm": 1.2478407621383667, "learning_rate": 5.644705941891828e-06, "loss": 1.5708, "step": 12160 }, { "epoch": 0.664871588097809, "grad_norm": 1.553855538368225, "learning_rate": 5.643061074176239e-06, "loss": 1.4615, "step": 12161 }, { "epoch": 0.6649262605415306, "grad_norm": 1.5053558349609375, "learning_rate": 5.641416351949063e-06, "loss": 1.2633, "step": 12162 }, { "epoch": 0.6649809329852521, "grad_norm": 2.0083534717559814, "learning_rate": 5.639771775265223e-06, "loss": 1.4978, "step": 12163 }, { "epoch": 0.6650356054289737, "grad_norm": 1.7791166305541992, "learning_rate": 5.638127344179632e-06, "loss": 1.1732, "step": 12164 }, { "epoch": 0.6650902778726953, "grad_norm": 1.3395832777023315, "learning_rate": 5.636483058747209e-06, "loss": 1.3944, "step": 12165 }, { "epoch": 0.6651449503164167, "grad_norm": 1.5104377269744873, "learning_rate": 5.634838919022855e-06, "loss": 1.5333, "step": 12166 }, { "epoch": 0.6651996227601383, "grad_norm": 1.9314504861831665, "learning_rate": 5.633194925061469e-06, "loss": 1.3191, "step": 12167 }, { "epoch": 0.6652542952038599, "grad_norm": 1.6229639053344727, "learning_rate": 5.631551076917955e-06, "loss": 1.4473, "step": 12168 }, { "epoch": 0.6653089676475814, "grad_norm": 1.4801926612854004, "learning_rate": 5.629907374647201e-06, "loss": 1.448, "step": 12169 }, { "epoch": 0.665363640091303, "grad_norm": 1.4169074296951294, "learning_rate": 5.628263818304091e-06, "loss": 1.2868, "step": 12170 }, { "epoch": 0.6654183125350245, "grad_norm": 1.7999719381332397, "learning_rate": 5.626620407943515e-06, "loss": 1.5472, "step": 12171 }, { "epoch": 0.6654729849787461, "grad_norm": 1.4313859939575195, "learning_rate": 5.624977143620347e-06, "loss": 1.378, "step": 12172 }, { "epoch": 0.6655276574224677, "grad_norm": 1.416064739227295, "learning_rate": 5.623334025389453e-06, "loss": 1.3608, "step": 12173 }, { "epoch": 0.6655823298661891, "grad_norm": 1.516015648841858, "learning_rate": 5.62169105330571e-06, "loss": 1.6056, "step": 12174 }, { "epoch": 0.6656370023099107, "grad_norm": 1.360317587852478, "learning_rate": 5.620048227423977e-06, "loss": 1.4538, "step": 12175 }, { "epoch": 0.6656916747536323, "grad_norm": 1.1608883142471313, "learning_rate": 5.618405547799113e-06, "loss": 1.5864, "step": 12176 }, { "epoch": 0.6657463471973538, "grad_norm": 1.5320895910263062, "learning_rate": 5.616763014485967e-06, "loss": 1.596, "step": 12177 }, { "epoch": 0.6658010196410754, "grad_norm": 1.484194040298462, "learning_rate": 5.615120627539387e-06, "loss": 1.6287, "step": 12178 }, { "epoch": 0.665855692084797, "grad_norm": 1.809399127960205, "learning_rate": 5.613478387014223e-06, "loss": 1.1679, "step": 12179 }, { "epoch": 0.6659103645285185, "grad_norm": 1.2523614168167114, "learning_rate": 5.611836292965308e-06, "loss": 1.6259, "step": 12180 }, { "epoch": 0.6659650369722401, "grad_norm": 1.4499657154083252, "learning_rate": 5.6101943454474725e-06, "loss": 1.4784, "step": 12181 }, { "epoch": 0.6660197094159617, "grad_norm": 1.317726492881775, "learning_rate": 5.608552544515553e-06, "loss": 1.2879, "step": 12182 }, { "epoch": 0.6660743818596832, "grad_norm": 1.9943500757217407, "learning_rate": 5.60691089022437e-06, "loss": 1.38, "step": 12183 }, { "epoch": 0.6661290543034047, "grad_norm": 1.1899404525756836, "learning_rate": 5.605269382628735e-06, "loss": 1.527, "step": 12184 }, { "epoch": 0.6661837267471262, "grad_norm": 1.6106950044631958, "learning_rate": 5.603628021783474e-06, "loss": 1.3917, "step": 12185 }, { "epoch": 0.6662383991908478, "grad_norm": 1.4335910081863403, "learning_rate": 5.601986807743388e-06, "loss": 1.4514, "step": 12186 }, { "epoch": 0.6662930716345694, "grad_norm": 1.6267098188400269, "learning_rate": 5.60034574056328e-06, "loss": 1.6106, "step": 12187 }, { "epoch": 0.6663477440782909, "grad_norm": 1.7828073501586914, "learning_rate": 5.598704820297955e-06, "loss": 1.4638, "step": 12188 }, { "epoch": 0.6664024165220125, "grad_norm": 1.6124626398086548, "learning_rate": 5.597064047002204e-06, "loss": 1.3343, "step": 12189 }, { "epoch": 0.6664570889657341, "grad_norm": 2.0586681365966797, "learning_rate": 5.595423420730816e-06, "loss": 1.3469, "step": 12190 }, { "epoch": 0.6665117614094556, "grad_norm": 1.5995744466781616, "learning_rate": 5.593782941538577e-06, "loss": 1.2725, "step": 12191 }, { "epoch": 0.6665664338531772, "grad_norm": 1.6007660627365112, "learning_rate": 5.59214260948026e-06, "loss": 1.3078, "step": 12192 }, { "epoch": 0.6666211062968987, "grad_norm": 1.769324541091919, "learning_rate": 5.5905024246106485e-06, "loss": 1.4532, "step": 12193 }, { "epoch": 0.6666757787406202, "grad_norm": 1.3002605438232422, "learning_rate": 5.5888623869845095e-06, "loss": 1.2427, "step": 12194 }, { "epoch": 0.6667304511843418, "grad_norm": 1.5007576942443848, "learning_rate": 5.587222496656601e-06, "loss": 1.3279, "step": 12195 }, { "epoch": 0.6667851236280634, "grad_norm": 1.6439192295074463, "learning_rate": 5.585582753681693e-06, "loss": 1.3913, "step": 12196 }, { "epoch": 0.6668397960717849, "grad_norm": 1.6414145231246948, "learning_rate": 5.583943158114535e-06, "loss": 1.3746, "step": 12197 }, { "epoch": 0.6668944685155065, "grad_norm": 1.7100814580917358, "learning_rate": 5.5823037100098756e-06, "loss": 1.4091, "step": 12198 }, { "epoch": 0.666949140959228, "grad_norm": 1.2611875534057617, "learning_rate": 5.580664409422464e-06, "loss": 1.4629, "step": 12199 }, { "epoch": 0.6670038134029496, "grad_norm": 1.5312256813049316, "learning_rate": 5.579025256407038e-06, "loss": 1.2706, "step": 12200 }, { "epoch": 0.6670584858466712, "grad_norm": 1.5522429943084717, "learning_rate": 5.577386251018333e-06, "loss": 1.4635, "step": 12201 }, { "epoch": 0.6671131582903926, "grad_norm": 1.1411468982696533, "learning_rate": 5.57574739331108e-06, "loss": 1.4701, "step": 12202 }, { "epoch": 0.6671678307341142, "grad_norm": 1.4341486692428589, "learning_rate": 5.5741086833400015e-06, "loss": 1.5341, "step": 12203 }, { "epoch": 0.6672225031778358, "grad_norm": 1.356351375579834, "learning_rate": 5.572470121159816e-06, "loss": 1.5006, "step": 12204 }, { "epoch": 0.6672771756215573, "grad_norm": 1.3320902585983276, "learning_rate": 5.570831706825248e-06, "loss": 1.3368, "step": 12205 }, { "epoch": 0.6673318480652789, "grad_norm": 1.5211914777755737, "learning_rate": 5.569193440390999e-06, "loss": 1.6438, "step": 12206 }, { "epoch": 0.6673865205090005, "grad_norm": 2.3269155025482178, "learning_rate": 5.567555321911782e-06, "loss": 1.2688, "step": 12207 }, { "epoch": 0.667441192952722, "grad_norm": 1.4690513610839844, "learning_rate": 5.565917351442291e-06, "loss": 1.4421, "step": 12208 }, { "epoch": 0.6674958653964436, "grad_norm": 2.335571050643921, "learning_rate": 5.5642795290372245e-06, "loss": 1.3278, "step": 12209 }, { "epoch": 0.6675505378401652, "grad_norm": 1.6911932229995728, "learning_rate": 5.562641854751275e-06, "loss": 1.592, "step": 12210 }, { "epoch": 0.6676052102838866, "grad_norm": 1.4880110025405884, "learning_rate": 5.561004328639128e-06, "loss": 1.5296, "step": 12211 }, { "epoch": 0.6676598827276082, "grad_norm": 1.742121934890747, "learning_rate": 5.559366950755457e-06, "loss": 1.3773, "step": 12212 }, { "epoch": 0.6677145551713297, "grad_norm": 2.2444169521331787, "learning_rate": 5.557729721154949e-06, "loss": 1.1827, "step": 12213 }, { "epoch": 0.6677692276150513, "grad_norm": 1.2130794525146484, "learning_rate": 5.55609263989227e-06, "loss": 1.46, "step": 12214 }, { "epoch": 0.6678239000587729, "grad_norm": 1.5042965412139893, "learning_rate": 5.554455707022084e-06, "loss": 1.3574, "step": 12215 }, { "epoch": 0.6678785725024944, "grad_norm": 1.8900206089019775, "learning_rate": 5.552818922599056e-06, "loss": 1.3845, "step": 12216 }, { "epoch": 0.667933244946216, "grad_norm": 0.9287706613540649, "learning_rate": 5.551182286677839e-06, "loss": 1.5906, "step": 12217 }, { "epoch": 0.6679879173899376, "grad_norm": 1.6593878269195557, "learning_rate": 5.549545799313081e-06, "loss": 1.3778, "step": 12218 }, { "epoch": 0.668042589833659, "grad_norm": 1.4694801568984985, "learning_rate": 5.5479094605594355e-06, "loss": 1.6769, "step": 12219 }, { "epoch": 0.6680972622773806, "grad_norm": 1.3284258842468262, "learning_rate": 5.546273270471536e-06, "loss": 1.5437, "step": 12220 }, { "epoch": 0.6681519347211022, "grad_norm": 1.3755772113800049, "learning_rate": 5.544637229104027e-06, "loss": 1.4223, "step": 12221 }, { "epoch": 0.6682066071648237, "grad_norm": 1.5881521701812744, "learning_rate": 5.543001336511537e-06, "loss": 1.2664, "step": 12222 }, { "epoch": 0.6682612796085453, "grad_norm": 1.7445231676101685, "learning_rate": 5.541365592748686e-06, "loss": 1.6784, "step": 12223 }, { "epoch": 0.6683159520522669, "grad_norm": 1.871976613998413, "learning_rate": 5.539729997870104e-06, "loss": 1.3, "step": 12224 }, { "epoch": 0.6683706244959884, "grad_norm": 1.5972912311553955, "learning_rate": 5.538094551930405e-06, "loss": 1.4748, "step": 12225 }, { "epoch": 0.66842529693971, "grad_norm": 1.8808499574661255, "learning_rate": 5.536459254984195e-06, "loss": 1.5403, "step": 12226 }, { "epoch": 0.6684799693834315, "grad_norm": 1.1842334270477295, "learning_rate": 5.534824107086088e-06, "loss": 1.4082, "step": 12227 }, { "epoch": 0.668534641827153, "grad_norm": 1.264107584953308, "learning_rate": 5.533189108290682e-06, "loss": 1.6529, "step": 12228 }, { "epoch": 0.6685893142708746, "grad_norm": 1.2289408445358276, "learning_rate": 5.531554258652574e-06, "loss": 1.4581, "step": 12229 }, { "epoch": 0.6686439867145961, "grad_norm": 1.5698376893997192, "learning_rate": 5.529919558226353e-06, "loss": 1.5477, "step": 12230 }, { "epoch": 0.6686986591583177, "grad_norm": 1.516357421875, "learning_rate": 5.528285007066609e-06, "loss": 1.4153, "step": 12231 }, { "epoch": 0.6687533316020393, "grad_norm": 1.5343589782714844, "learning_rate": 5.5266506052279165e-06, "loss": 1.2968, "step": 12232 }, { "epoch": 0.6688080040457608, "grad_norm": 1.787589430809021, "learning_rate": 5.525016352764863e-06, "loss": 1.5943, "step": 12233 }, { "epoch": 0.6688626764894824, "grad_norm": 1.4066479206085205, "learning_rate": 5.52338224973201e-06, "loss": 1.1367, "step": 12234 }, { "epoch": 0.668917348933204, "grad_norm": 1.5701404809951782, "learning_rate": 5.5217482961839305e-06, "loss": 1.3592, "step": 12235 }, { "epoch": 0.6689720213769255, "grad_norm": 1.2049862146377563, "learning_rate": 5.520114492175186e-06, "loss": 1.5722, "step": 12236 }, { "epoch": 0.6690266938206471, "grad_norm": 1.8336036205291748, "learning_rate": 5.518480837760327e-06, "loss": 1.2556, "step": 12237 }, { "epoch": 0.6690813662643686, "grad_norm": 1.6567349433898926, "learning_rate": 5.516847332993912e-06, "loss": 1.2391, "step": 12238 }, { "epoch": 0.6691360387080901, "grad_norm": 1.661092758178711, "learning_rate": 5.515213977930485e-06, "loss": 1.6179, "step": 12239 }, { "epoch": 0.6691907111518117, "grad_norm": 1.5224519968032837, "learning_rate": 5.513580772624587e-06, "loss": 1.3881, "step": 12240 }, { "epoch": 0.6692453835955333, "grad_norm": 1.4495607614517212, "learning_rate": 5.511947717130755e-06, "loss": 1.457, "step": 12241 }, { "epoch": 0.6693000560392548, "grad_norm": 2.26312255859375, "learning_rate": 5.51031481150352e-06, "loss": 1.2458, "step": 12242 }, { "epoch": 0.6693547284829764, "grad_norm": 1.6873369216918945, "learning_rate": 5.508682055797405e-06, "loss": 1.6107, "step": 12243 }, { "epoch": 0.6694094009266979, "grad_norm": 1.4857900142669678, "learning_rate": 5.507049450066939e-06, "loss": 1.3072, "step": 12244 }, { "epoch": 0.6694640733704195, "grad_norm": 1.5149304866790771, "learning_rate": 5.505416994366634e-06, "loss": 1.4603, "step": 12245 }, { "epoch": 0.6695187458141411, "grad_norm": 1.5847426652908325, "learning_rate": 5.5037846887510015e-06, "loss": 1.5031, "step": 12246 }, { "epoch": 0.6695734182578625, "grad_norm": 1.6309399604797363, "learning_rate": 5.502152533274551e-06, "loss": 1.3876, "step": 12247 }, { "epoch": 0.6696280907015841, "grad_norm": 1.3107998371124268, "learning_rate": 5.500520527991777e-06, "loss": 1.6017, "step": 12248 }, { "epoch": 0.6696827631453057, "grad_norm": 1.400789499282837, "learning_rate": 5.498888672957187e-06, "loss": 1.4599, "step": 12249 }, { "epoch": 0.6697374355890272, "grad_norm": 1.1884047985076904, "learning_rate": 5.497256968225264e-06, "loss": 1.5366, "step": 12250 }, { "epoch": 0.6697921080327488, "grad_norm": 1.6402968168258667, "learning_rate": 5.495625413850495e-06, "loss": 1.1625, "step": 12251 }, { "epoch": 0.6698467804764704, "grad_norm": 1.3503166437149048, "learning_rate": 5.493994009887367e-06, "loss": 1.3824, "step": 12252 }, { "epoch": 0.6699014529201919, "grad_norm": 1.559302568435669, "learning_rate": 5.492362756390353e-06, "loss": 1.3935, "step": 12253 }, { "epoch": 0.6699561253639135, "grad_norm": 1.6320048570632935, "learning_rate": 5.490731653413922e-06, "loss": 1.0912, "step": 12254 }, { "epoch": 0.6700107978076351, "grad_norm": 1.4678887128829956, "learning_rate": 5.489100701012544e-06, "loss": 1.3878, "step": 12255 }, { "epoch": 0.6700654702513565, "grad_norm": 1.313405990600586, "learning_rate": 5.487469899240678e-06, "loss": 1.3967, "step": 12256 }, { "epoch": 0.6701201426950781, "grad_norm": 1.4662837982177734, "learning_rate": 5.485839248152778e-06, "loss": 1.3143, "step": 12257 }, { "epoch": 0.6701748151387996, "grad_norm": 1.4413197040557861, "learning_rate": 5.484208747803301e-06, "loss": 1.4949, "step": 12258 }, { "epoch": 0.6702294875825212, "grad_norm": 1.7594208717346191, "learning_rate": 5.482578398246692e-06, "loss": 1.4867, "step": 12259 }, { "epoch": 0.6702841600262428, "grad_norm": 1.7042005062103271, "learning_rate": 5.480948199537386e-06, "loss": 1.5455, "step": 12260 }, { "epoch": 0.6703388324699643, "grad_norm": 1.214881420135498, "learning_rate": 5.479318151729828e-06, "loss": 1.7344, "step": 12261 }, { "epoch": 0.6703935049136859, "grad_norm": 1.4072952270507812, "learning_rate": 5.47768825487844e-06, "loss": 1.3311, "step": 12262 }, { "epoch": 0.6704481773574075, "grad_norm": 1.6128332614898682, "learning_rate": 5.476058509037658e-06, "loss": 1.4422, "step": 12263 }, { "epoch": 0.670502849801129, "grad_norm": 1.301649570465088, "learning_rate": 5.474428914261899e-06, "loss": 1.3581, "step": 12264 }, { "epoch": 0.6705575222448505, "grad_norm": 1.3267234563827515, "learning_rate": 5.472799470605573e-06, "loss": 1.4252, "step": 12265 }, { "epoch": 0.6706121946885721, "grad_norm": 2.0608808994293213, "learning_rate": 5.4711701781231e-06, "loss": 1.1437, "step": 12266 }, { "epoch": 0.6706668671322936, "grad_norm": 1.616317868232727, "learning_rate": 5.469541036868883e-06, "loss": 1.3845, "step": 12267 }, { "epoch": 0.6707215395760152, "grad_norm": 1.5965152978897095, "learning_rate": 5.467912046897321e-06, "loss": 1.234, "step": 12268 }, { "epoch": 0.6707762120197368, "grad_norm": 1.4556565284729004, "learning_rate": 5.46628320826281e-06, "loss": 1.4248, "step": 12269 }, { "epoch": 0.6708308844634583, "grad_norm": 1.7066107988357544, "learning_rate": 5.4646545210197435e-06, "loss": 1.5351, "step": 12270 }, { "epoch": 0.6708855569071799, "grad_norm": 1.4529401063919067, "learning_rate": 5.4630259852225e-06, "loss": 1.3796, "step": 12271 }, { "epoch": 0.6709402293509014, "grad_norm": 1.3265618085861206, "learning_rate": 5.461397600925469e-06, "loss": 1.6493, "step": 12272 }, { "epoch": 0.670994901794623, "grad_norm": 1.3768421411514282, "learning_rate": 5.459769368183022e-06, "loss": 1.3546, "step": 12273 }, { "epoch": 0.6710495742383445, "grad_norm": 1.516342282295227, "learning_rate": 5.458141287049525e-06, "loss": 1.5092, "step": 12274 }, { "epoch": 0.671104246682066, "grad_norm": 1.6940211057662964, "learning_rate": 5.456513357579354e-06, "loss": 1.3598, "step": 12275 }, { "epoch": 0.6711589191257876, "grad_norm": 1.8234533071517944, "learning_rate": 5.4548855798268595e-06, "loss": 1.4529, "step": 12276 }, { "epoch": 0.6712135915695092, "grad_norm": 1.2647418975830078, "learning_rate": 5.453257953846405e-06, "loss": 1.5069, "step": 12277 }, { "epoch": 0.6712682640132307, "grad_norm": 1.2252978086471558, "learning_rate": 5.451630479692336e-06, "loss": 1.4814, "step": 12278 }, { "epoch": 0.6713229364569523, "grad_norm": 1.67441725730896, "learning_rate": 5.450003157418997e-06, "loss": 1.4098, "step": 12279 }, { "epoch": 0.6713776089006739, "grad_norm": 1.7447211742401123, "learning_rate": 5.448375987080732e-06, "loss": 1.4492, "step": 12280 }, { "epoch": 0.6714322813443954, "grad_norm": 1.4126735925674438, "learning_rate": 5.446748968731872e-06, "loss": 1.4222, "step": 12281 }, { "epoch": 0.671486953788117, "grad_norm": 1.6124677658081055, "learning_rate": 5.445122102426745e-06, "loss": 1.4365, "step": 12282 }, { "epoch": 0.6715416262318386, "grad_norm": 1.627593994140625, "learning_rate": 5.443495388219684e-06, "loss": 1.4092, "step": 12283 }, { "epoch": 0.67159629867556, "grad_norm": 1.0850398540496826, "learning_rate": 5.441868826165002e-06, "loss": 1.646, "step": 12284 }, { "epoch": 0.6716509711192816, "grad_norm": 1.838063359260559, "learning_rate": 5.4402424163170145e-06, "loss": 1.3689, "step": 12285 }, { "epoch": 0.6717056435630031, "grad_norm": 1.238438606262207, "learning_rate": 5.438616158730034e-06, "loss": 1.1364, "step": 12286 }, { "epoch": 0.6717603160067247, "grad_norm": 1.5999597311019897, "learning_rate": 5.436990053458365e-06, "loss": 1.2774, "step": 12287 }, { "epoch": 0.6718149884504463, "grad_norm": 1.3472402095794678, "learning_rate": 5.435364100556302e-06, "loss": 1.3921, "step": 12288 }, { "epoch": 0.6718696608941678, "grad_norm": 1.44089674949646, "learning_rate": 5.433738300078146e-06, "loss": 1.1534, "step": 12289 }, { "epoch": 0.6719243333378894, "grad_norm": 1.4721095561981201, "learning_rate": 5.43211265207818e-06, "loss": 1.471, "step": 12290 }, { "epoch": 0.671979005781611, "grad_norm": 1.6850459575653076, "learning_rate": 5.430487156610695e-06, "loss": 1.3489, "step": 12291 }, { "epoch": 0.6720336782253324, "grad_norm": 1.4865138530731201, "learning_rate": 5.428861813729966e-06, "loss": 1.4324, "step": 12292 }, { "epoch": 0.672088350669054, "grad_norm": 1.4157487154006958, "learning_rate": 5.4272366234902685e-06, "loss": 1.4004, "step": 12293 }, { "epoch": 0.6721430231127756, "grad_norm": 1.5327165126800537, "learning_rate": 5.42561158594587e-06, "loss": 1.4758, "step": 12294 }, { "epoch": 0.6721976955564971, "grad_norm": 1.9353094100952148, "learning_rate": 5.423986701151035e-06, "loss": 1.4193, "step": 12295 }, { "epoch": 0.6722523680002187, "grad_norm": 1.288905143737793, "learning_rate": 5.4223619691600185e-06, "loss": 1.5868, "step": 12296 }, { "epoch": 0.6723070404439403, "grad_norm": 1.7716435194015503, "learning_rate": 5.42073739002708e-06, "loss": 1.7581, "step": 12297 }, { "epoch": 0.6723617128876618, "grad_norm": 1.3433459997177124, "learning_rate": 5.419112963806468e-06, "loss": 1.5921, "step": 12298 }, { "epoch": 0.6724163853313834, "grad_norm": 1.5754069089889526, "learning_rate": 5.417488690552417e-06, "loss": 1.6511, "step": 12299 }, { "epoch": 0.6724710577751049, "grad_norm": 1.6966722011566162, "learning_rate": 5.415864570319177e-06, "loss": 1.3263, "step": 12300 }, { "epoch": 0.6725257302188264, "grad_norm": 1.786993145942688, "learning_rate": 5.414240603160976e-06, "loss": 1.6845, "step": 12301 }, { "epoch": 0.672580402662548, "grad_norm": 1.1169158220291138, "learning_rate": 5.412616789132038e-06, "loss": 1.4055, "step": 12302 }, { "epoch": 0.6726350751062695, "grad_norm": 2.0829944610595703, "learning_rate": 5.4109931282865945e-06, "loss": 1.2942, "step": 12303 }, { "epoch": 0.6726897475499911, "grad_norm": 1.3864277601242065, "learning_rate": 5.409369620678855e-06, "loss": 1.4708, "step": 12304 }, { "epoch": 0.6727444199937127, "grad_norm": 1.3183780908584595, "learning_rate": 5.407746266363039e-06, "loss": 1.7722, "step": 12305 }, { "epoch": 0.6727990924374342, "grad_norm": 1.3659145832061768, "learning_rate": 5.406123065393352e-06, "loss": 1.5742, "step": 12306 }, { "epoch": 0.6728537648811558, "grad_norm": 1.4902780055999756, "learning_rate": 5.4045000178239945e-06, "loss": 1.482, "step": 12307 }, { "epoch": 0.6729084373248774, "grad_norm": 1.6137360334396362, "learning_rate": 5.402877123709167e-06, "loss": 1.3751, "step": 12308 }, { "epoch": 0.6729631097685989, "grad_norm": 1.2940419912338257, "learning_rate": 5.401254383103058e-06, "loss": 1.3511, "step": 12309 }, { "epoch": 0.6730177822123204, "grad_norm": 1.3772392272949219, "learning_rate": 5.399631796059854e-06, "loss": 1.3071, "step": 12310 }, { "epoch": 0.673072454656042, "grad_norm": 1.2920444011688232, "learning_rate": 5.398009362633743e-06, "loss": 1.4861, "step": 12311 }, { "epoch": 0.6731271270997635, "grad_norm": 1.4777247905731201, "learning_rate": 5.3963870828788975e-06, "loss": 1.541, "step": 12312 }, { "epoch": 0.6731817995434851, "grad_norm": 2.027665853500366, "learning_rate": 5.394764956849488e-06, "loss": 1.3809, "step": 12313 }, { "epoch": 0.6732364719872066, "grad_norm": 1.345978021621704, "learning_rate": 5.393142984599684e-06, "loss": 1.3606, "step": 12314 }, { "epoch": 0.6732911444309282, "grad_norm": 1.5363051891326904, "learning_rate": 5.3915211661836485e-06, "loss": 1.403, "step": 12315 }, { "epoch": 0.6733458168746498, "grad_norm": 1.719859004020691, "learning_rate": 5.389899501655531e-06, "loss": 1.2172, "step": 12316 }, { "epoch": 0.6734004893183713, "grad_norm": 1.2588889598846436, "learning_rate": 5.388277991069491e-06, "loss": 1.5556, "step": 12317 }, { "epoch": 0.6734551617620929, "grad_norm": 1.2089159488677979, "learning_rate": 5.38665663447967e-06, "loss": 1.6019, "step": 12318 }, { "epoch": 0.6735098342058145, "grad_norm": 1.5790746212005615, "learning_rate": 5.3850354319402095e-06, "loss": 1.3753, "step": 12319 }, { "epoch": 0.6735645066495359, "grad_norm": 1.7344518899917603, "learning_rate": 5.383414383505245e-06, "loss": 1.3757, "step": 12320 }, { "epoch": 0.6736191790932575, "grad_norm": 1.2122364044189453, "learning_rate": 5.381793489228906e-06, "loss": 1.5356, "step": 12321 }, { "epoch": 0.6736738515369791, "grad_norm": 1.4227327108383179, "learning_rate": 5.380172749165321e-06, "loss": 1.5663, "step": 12322 }, { "epoch": 0.6737285239807006, "grad_norm": 1.4448094367980957, "learning_rate": 5.378552163368609e-06, "loss": 1.2402, "step": 12323 }, { "epoch": 0.6737831964244222, "grad_norm": 1.5741370916366577, "learning_rate": 5.3769317318928805e-06, "loss": 1.5236, "step": 12324 }, { "epoch": 0.6738378688681438, "grad_norm": 1.7196943759918213, "learning_rate": 5.375311454792255e-06, "loss": 1.3882, "step": 12325 }, { "epoch": 0.6738925413118653, "grad_norm": 1.2727867364883423, "learning_rate": 5.373691332120832e-06, "loss": 1.4904, "step": 12326 }, { "epoch": 0.6739472137555869, "grad_norm": 2.018242835998535, "learning_rate": 5.372071363932706e-06, "loss": 1.254, "step": 12327 }, { "epoch": 0.6740018861993083, "grad_norm": 1.2841796875, "learning_rate": 5.370451550281982e-06, "loss": 1.3117, "step": 12328 }, { "epoch": 0.6740565586430299, "grad_norm": 1.3194984197616577, "learning_rate": 5.368831891222744e-06, "loss": 1.5471, "step": 12329 }, { "epoch": 0.6741112310867515, "grad_norm": 1.3384456634521484, "learning_rate": 5.367212386809073e-06, "loss": 1.3716, "step": 12330 }, { "epoch": 0.674165903530473, "grad_norm": 1.4565335512161255, "learning_rate": 5.365593037095055e-06, "loss": 1.3757, "step": 12331 }, { "epoch": 0.6742205759741946, "grad_norm": 2.2196204662323, "learning_rate": 5.363973842134761e-06, "loss": 1.6651, "step": 12332 }, { "epoch": 0.6742752484179162, "grad_norm": 1.6124517917633057, "learning_rate": 5.362354801982259e-06, "loss": 1.2835, "step": 12333 }, { "epoch": 0.6743299208616377, "grad_norm": 1.404422402381897, "learning_rate": 5.360735916691613e-06, "loss": 1.6177, "step": 12334 }, { "epoch": 0.6743845933053593, "grad_norm": 1.3243099451065063, "learning_rate": 5.359117186316875e-06, "loss": 1.245, "step": 12335 }, { "epoch": 0.6744392657490809, "grad_norm": 2.006117582321167, "learning_rate": 5.357498610912111e-06, "loss": 1.3924, "step": 12336 }, { "epoch": 0.6744939381928023, "grad_norm": 1.481713891029358, "learning_rate": 5.355880190531362e-06, "loss": 1.5281, "step": 12337 }, { "epoch": 0.6745486106365239, "grad_norm": 1.8438794612884521, "learning_rate": 5.354261925228666e-06, "loss": 1.1297, "step": 12338 }, { "epoch": 0.6746032830802455, "grad_norm": 1.345502495765686, "learning_rate": 5.3526438150580705e-06, "loss": 1.6343, "step": 12339 }, { "epoch": 0.674657955523967, "grad_norm": 1.5234577655792236, "learning_rate": 5.351025860073604e-06, "loss": 1.5426, "step": 12340 }, { "epoch": 0.6747126279676886, "grad_norm": 1.2156351804733276, "learning_rate": 5.349408060329288e-06, "loss": 1.6097, "step": 12341 }, { "epoch": 0.6747673004114101, "grad_norm": 1.4496279954910278, "learning_rate": 5.347790415879155e-06, "loss": 1.3739, "step": 12342 }, { "epoch": 0.6748219728551317, "grad_norm": 1.7974647283554077, "learning_rate": 5.346172926777215e-06, "loss": 1.4937, "step": 12343 }, { "epoch": 0.6748766452988533, "grad_norm": 1.5310777425765991, "learning_rate": 5.344555593077483e-06, "loss": 1.4555, "step": 12344 }, { "epoch": 0.6749313177425748, "grad_norm": 1.6412338018417358, "learning_rate": 5.342938414833965e-06, "loss": 1.3873, "step": 12345 }, { "epoch": 0.6749859901862963, "grad_norm": 1.4825141429901123, "learning_rate": 5.341321392100656e-06, "loss": 1.337, "step": 12346 }, { "epoch": 0.6750406626300179, "grad_norm": 1.596977949142456, "learning_rate": 5.3397045249315615e-06, "loss": 1.6374, "step": 12347 }, { "epoch": 0.6750953350737394, "grad_norm": 1.2656211853027344, "learning_rate": 5.338087813380669e-06, "loss": 1.5933, "step": 12348 }, { "epoch": 0.675150007517461, "grad_norm": 1.4623587131500244, "learning_rate": 5.336471257501961e-06, "loss": 1.4948, "step": 12349 }, { "epoch": 0.6752046799611826, "grad_norm": 1.724960446357727, "learning_rate": 5.334854857349423e-06, "loss": 1.1037, "step": 12350 }, { "epoch": 0.6752593524049041, "grad_norm": 1.536849856376648, "learning_rate": 5.3332386129770295e-06, "loss": 1.468, "step": 12351 }, { "epoch": 0.6753140248486257, "grad_norm": 1.4348474740982056, "learning_rate": 5.331622524438745e-06, "loss": 1.34, "step": 12352 }, { "epoch": 0.6753686972923473, "grad_norm": 1.4613944292068481, "learning_rate": 5.330006591788543e-06, "loss": 1.4195, "step": 12353 }, { "epoch": 0.6754233697360688, "grad_norm": 1.9148497581481934, "learning_rate": 5.328390815080381e-06, "loss": 1.388, "step": 12354 }, { "epoch": 0.6754780421797904, "grad_norm": 1.5474531650543213, "learning_rate": 5.326775194368208e-06, "loss": 1.3951, "step": 12355 }, { "epoch": 0.6755327146235118, "grad_norm": 1.7509589195251465, "learning_rate": 5.32515972970598e-06, "loss": 1.3639, "step": 12356 }, { "epoch": 0.6755873870672334, "grad_norm": 1.6659542322158813, "learning_rate": 5.323544421147637e-06, "loss": 1.1214, "step": 12357 }, { "epoch": 0.675642059510955, "grad_norm": 1.6499069929122925, "learning_rate": 5.3219292687471226e-06, "loss": 1.4348, "step": 12358 }, { "epoch": 0.6756967319546765, "grad_norm": 1.3635741472244263, "learning_rate": 5.320314272558366e-06, "loss": 1.1384, "step": 12359 }, { "epoch": 0.6757514043983981, "grad_norm": 1.366146206855774, "learning_rate": 5.3186994326352926e-06, "loss": 1.5253, "step": 12360 }, { "epoch": 0.6758060768421197, "grad_norm": 1.407118797302246, "learning_rate": 5.317084749031835e-06, "loss": 1.4973, "step": 12361 }, { "epoch": 0.6758607492858412, "grad_norm": 1.361838936805725, "learning_rate": 5.315470221801906e-06, "loss": 1.456, "step": 12362 }, { "epoch": 0.6759154217295628, "grad_norm": 1.3764841556549072, "learning_rate": 5.313855850999414e-06, "loss": 1.6007, "step": 12363 }, { "epoch": 0.6759700941732844, "grad_norm": 1.7141127586364746, "learning_rate": 5.312241636678277e-06, "loss": 1.531, "step": 12364 }, { "epoch": 0.6760247666170058, "grad_norm": 1.5818771123886108, "learning_rate": 5.310627578892391e-06, "loss": 1.4828, "step": 12365 }, { "epoch": 0.6760794390607274, "grad_norm": 1.5398412942886353, "learning_rate": 5.309013677695651e-06, "loss": 1.4644, "step": 12366 }, { "epoch": 0.676134111504449, "grad_norm": 1.5608062744140625, "learning_rate": 5.307399933141955e-06, "loss": 1.2227, "step": 12367 }, { "epoch": 0.6761887839481705, "grad_norm": 1.4536558389663696, "learning_rate": 5.3057863452851875e-06, "loss": 1.3076, "step": 12368 }, { "epoch": 0.6762434563918921, "grad_norm": 1.4573805332183838, "learning_rate": 5.304172914179224e-06, "loss": 1.6323, "step": 12369 }, { "epoch": 0.6762981288356136, "grad_norm": 1.9625160694122314, "learning_rate": 5.302559639877952e-06, "loss": 1.3293, "step": 12370 }, { "epoch": 0.6763528012793352, "grad_norm": 1.4909673929214478, "learning_rate": 5.3009465224352355e-06, "loss": 1.4962, "step": 12371 }, { "epoch": 0.6764074737230568, "grad_norm": 1.3776720762252808, "learning_rate": 5.2993335619049415e-06, "loss": 1.4351, "step": 12372 }, { "epoch": 0.6764621461667782, "grad_norm": 1.5626566410064697, "learning_rate": 5.297720758340929e-06, "loss": 1.1254, "step": 12373 }, { "epoch": 0.6765168186104998, "grad_norm": 1.5809255838394165, "learning_rate": 5.296108111797052e-06, "loss": 1.5506, "step": 12374 }, { "epoch": 0.6765714910542214, "grad_norm": 1.3069766759872437, "learning_rate": 5.294495622327167e-06, "loss": 1.4662, "step": 12375 }, { "epoch": 0.6766261634979429, "grad_norm": 1.489415168762207, "learning_rate": 5.292883289985116e-06, "loss": 1.41, "step": 12376 }, { "epoch": 0.6766808359416645, "grad_norm": 1.4239593744277954, "learning_rate": 5.291271114824732e-06, "loss": 1.2513, "step": 12377 }, { "epoch": 0.6767355083853861, "grad_norm": 1.5266790390014648, "learning_rate": 5.289659096899859e-06, "loss": 1.4178, "step": 12378 }, { "epoch": 0.6767901808291076, "grad_norm": 2.188927412033081, "learning_rate": 5.288047236264322e-06, "loss": 1.2301, "step": 12379 }, { "epoch": 0.6768448532728292, "grad_norm": 2.0938265323638916, "learning_rate": 5.286435532971941e-06, "loss": 1.2396, "step": 12380 }, { "epoch": 0.6768995257165508, "grad_norm": 1.3794804811477661, "learning_rate": 5.284823987076542e-06, "loss": 1.4595, "step": 12381 }, { "epoch": 0.6769541981602722, "grad_norm": 1.9037944078445435, "learning_rate": 5.283212598631935e-06, "loss": 1.3201, "step": 12382 }, { "epoch": 0.6770088706039938, "grad_norm": 1.5407311916351318, "learning_rate": 5.281601367691928e-06, "loss": 1.3282, "step": 12383 }, { "epoch": 0.6770635430477153, "grad_norm": 1.3062665462493896, "learning_rate": 5.279990294310322e-06, "loss": 1.441, "step": 12384 }, { "epoch": 0.6771182154914369, "grad_norm": 2.2665629386901855, "learning_rate": 5.278379378540917e-06, "loss": 1.3812, "step": 12385 }, { "epoch": 0.6771728879351585, "grad_norm": 1.674009084701538, "learning_rate": 5.2767686204375e-06, "loss": 1.3034, "step": 12386 }, { "epoch": 0.67722756037888, "grad_norm": 1.5561586618423462, "learning_rate": 5.275158020053865e-06, "loss": 1.5508, "step": 12387 }, { "epoch": 0.6772822328226016, "grad_norm": 1.497793197631836, "learning_rate": 5.273547577443789e-06, "loss": 1.3451, "step": 12388 }, { "epoch": 0.6773369052663232, "grad_norm": 1.6768540143966675, "learning_rate": 5.271937292661054e-06, "loss": 1.2506, "step": 12389 }, { "epoch": 0.6773915777100447, "grad_norm": 1.3581068515777588, "learning_rate": 5.2703271657594255e-06, "loss": 1.411, "step": 12390 }, { "epoch": 0.6774462501537662, "grad_norm": 1.515602946281433, "learning_rate": 5.26871719679267e-06, "loss": 1.5287, "step": 12391 }, { "epoch": 0.6775009225974878, "grad_norm": 1.7404206991195679, "learning_rate": 5.267107385814552e-06, "loss": 1.5095, "step": 12392 }, { "epoch": 0.6775555950412093, "grad_norm": 1.5725558996200562, "learning_rate": 5.265497732878826e-06, "loss": 1.2643, "step": 12393 }, { "epoch": 0.6776102674849309, "grad_norm": 1.4486827850341797, "learning_rate": 5.263888238039234e-06, "loss": 1.339, "step": 12394 }, { "epoch": 0.6776649399286525, "grad_norm": 1.3690528869628906, "learning_rate": 5.262278901349533e-06, "loss": 1.6792, "step": 12395 }, { "epoch": 0.677719612372374, "grad_norm": 1.6437742710113525, "learning_rate": 5.260669722863457e-06, "loss": 1.4278, "step": 12396 }, { "epoch": 0.6777742848160956, "grad_norm": 1.6923877000808716, "learning_rate": 5.2590607026347395e-06, "loss": 1.3187, "step": 12397 }, { "epoch": 0.6778289572598171, "grad_norm": 1.313812017440796, "learning_rate": 5.2574518407171115e-06, "loss": 1.4176, "step": 12398 }, { "epoch": 0.6778836297035387, "grad_norm": 1.5403270721435547, "learning_rate": 5.255843137164294e-06, "loss": 1.2893, "step": 12399 }, { "epoch": 0.6779383021472603, "grad_norm": 1.4520127773284912, "learning_rate": 5.254234592030003e-06, "loss": 1.4327, "step": 12400 }, { "epoch": 0.6779929745909817, "grad_norm": 1.4465060234069824, "learning_rate": 5.252626205367959e-06, "loss": 1.672, "step": 12401 }, { "epoch": 0.6780476470347033, "grad_norm": 1.4618690013885498, "learning_rate": 5.251017977231862e-06, "loss": 1.4413, "step": 12402 }, { "epoch": 0.6781023194784249, "grad_norm": 1.5479596853256226, "learning_rate": 5.249409907675422e-06, "loss": 1.5259, "step": 12403 }, { "epoch": 0.6781569919221464, "grad_norm": 1.4841405153274536, "learning_rate": 5.2478019967523355e-06, "loss": 1.9082, "step": 12404 }, { "epoch": 0.678211664365868, "grad_norm": 1.508545994758606, "learning_rate": 5.246194244516285e-06, "loss": 1.4914, "step": 12405 }, { "epoch": 0.6782663368095896, "grad_norm": 1.4404202699661255, "learning_rate": 5.244586651020969e-06, "loss": 1.4382, "step": 12406 }, { "epoch": 0.6783210092533111, "grad_norm": 1.3857789039611816, "learning_rate": 5.242979216320063e-06, "loss": 1.3636, "step": 12407 }, { "epoch": 0.6783756816970327, "grad_norm": 1.3211307525634766, "learning_rate": 5.241371940467239e-06, "loss": 1.376, "step": 12408 }, { "epoch": 0.6784303541407543, "grad_norm": 1.696366310119629, "learning_rate": 5.239764823516178e-06, "loss": 1.2661, "step": 12409 }, { "epoch": 0.6784850265844757, "grad_norm": 1.9447582960128784, "learning_rate": 5.238157865520539e-06, "loss": 1.5413, "step": 12410 }, { "epoch": 0.6785396990281973, "grad_norm": 1.7574880123138428, "learning_rate": 5.236551066533983e-06, "loss": 1.3263, "step": 12411 }, { "epoch": 0.6785943714719188, "grad_norm": 1.793891429901123, "learning_rate": 5.234944426610165e-06, "loss": 1.2651, "step": 12412 }, { "epoch": 0.6786490439156404, "grad_norm": 1.8189672231674194, "learning_rate": 5.233337945802734e-06, "loss": 1.1721, "step": 12413 }, { "epoch": 0.678703716359362, "grad_norm": 1.3969852924346924, "learning_rate": 5.2317316241653304e-06, "loss": 1.4236, "step": 12414 }, { "epoch": 0.6787583888030835, "grad_norm": 1.2362186908721924, "learning_rate": 5.2301254617516e-06, "loss": 1.4188, "step": 12415 }, { "epoch": 0.6788130612468051, "grad_norm": 1.5749342441558838, "learning_rate": 5.228519458615171e-06, "loss": 1.3671, "step": 12416 }, { "epoch": 0.6788677336905267, "grad_norm": 2.8090155124664307, "learning_rate": 5.226913614809677e-06, "loss": 1.3396, "step": 12417 }, { "epoch": 0.6789224061342481, "grad_norm": 1.2052764892578125, "learning_rate": 5.225307930388737e-06, "loss": 1.4541, "step": 12418 }, { "epoch": 0.6789770785779697, "grad_norm": 1.623572826385498, "learning_rate": 5.223702405405966e-06, "loss": 1.5032, "step": 12419 }, { "epoch": 0.6790317510216913, "grad_norm": 1.951576590538025, "learning_rate": 5.222097039914984e-06, "loss": 1.5549, "step": 12420 }, { "epoch": 0.6790864234654128, "grad_norm": 1.4492119550704956, "learning_rate": 5.2204918339693925e-06, "loss": 1.4758, "step": 12421 }, { "epoch": 0.6791410959091344, "grad_norm": 1.681439757347107, "learning_rate": 5.218886787622794e-06, "loss": 1.5225, "step": 12422 }, { "epoch": 0.679195768352856, "grad_norm": 1.4513373374938965, "learning_rate": 5.217281900928787e-06, "loss": 1.4427, "step": 12423 }, { "epoch": 0.6792504407965775, "grad_norm": 1.4099498987197876, "learning_rate": 5.215677173940959e-06, "loss": 1.5765, "step": 12424 }, { "epoch": 0.6793051132402991, "grad_norm": 1.2287105321884155, "learning_rate": 5.214072606712893e-06, "loss": 1.5646, "step": 12425 }, { "epoch": 0.6793597856840206, "grad_norm": 1.843375325202942, "learning_rate": 5.212468199298178e-06, "loss": 1.1607, "step": 12426 }, { "epoch": 0.6794144581277421, "grad_norm": 1.6560420989990234, "learning_rate": 5.210863951750382e-06, "loss": 1.2656, "step": 12427 }, { "epoch": 0.6794691305714637, "grad_norm": 1.4799600839614868, "learning_rate": 5.209259864123075e-06, "loss": 1.1724, "step": 12428 }, { "epoch": 0.6795238030151852, "grad_norm": 1.4617072343826294, "learning_rate": 5.207655936469825e-06, "loss": 1.3389, "step": 12429 }, { "epoch": 0.6795784754589068, "grad_norm": 1.5629291534423828, "learning_rate": 5.206052168844187e-06, "loss": 1.4966, "step": 12430 }, { "epoch": 0.6796331479026284, "grad_norm": 1.37758207321167, "learning_rate": 5.204448561299718e-06, "loss": 1.4829, "step": 12431 }, { "epoch": 0.6796878203463499, "grad_norm": 1.5960644483566284, "learning_rate": 5.202845113889967e-06, "loss": 1.3387, "step": 12432 }, { "epoch": 0.6797424927900715, "grad_norm": 1.3287311792373657, "learning_rate": 5.201241826668469e-06, "loss": 1.5194, "step": 12433 }, { "epoch": 0.6797971652337931, "grad_norm": 1.6315284967422485, "learning_rate": 5.199638699688772e-06, "loss": 1.5296, "step": 12434 }, { "epoch": 0.6798518376775146, "grad_norm": 1.5174769163131714, "learning_rate": 5.198035733004403e-06, "loss": 1.4844, "step": 12435 }, { "epoch": 0.6799065101212362, "grad_norm": 1.5002089738845825, "learning_rate": 5.1964329266688885e-06, "loss": 1.2938, "step": 12436 }, { "epoch": 0.6799611825649577, "grad_norm": 1.6209354400634766, "learning_rate": 5.19483028073575e-06, "loss": 1.7096, "step": 12437 }, { "epoch": 0.6800158550086792, "grad_norm": 1.6378135681152344, "learning_rate": 5.193227795258505e-06, "loss": 1.3213, "step": 12438 }, { "epoch": 0.6800705274524008, "grad_norm": 1.6389973163604736, "learning_rate": 5.19162547029066e-06, "loss": 1.4114, "step": 12439 }, { "epoch": 0.6801251998961224, "grad_norm": 1.7491753101348877, "learning_rate": 5.190023305885727e-06, "loss": 1.1354, "step": 12440 }, { "epoch": 0.6801798723398439, "grad_norm": 1.6268068552017212, "learning_rate": 5.188421302097202e-06, "loss": 1.5108, "step": 12441 }, { "epoch": 0.6802345447835655, "grad_norm": 1.693733811378479, "learning_rate": 5.186819458978578e-06, "loss": 1.3835, "step": 12442 }, { "epoch": 0.680289217227287, "grad_norm": 1.6079329252243042, "learning_rate": 5.185217776583349e-06, "loss": 1.486, "step": 12443 }, { "epoch": 0.6803438896710086, "grad_norm": 1.2775977849960327, "learning_rate": 5.183616254964994e-06, "loss": 1.6672, "step": 12444 }, { "epoch": 0.6803985621147302, "grad_norm": 1.8097543716430664, "learning_rate": 5.182014894176999e-06, "loss": 1.4534, "step": 12445 }, { "epoch": 0.6804532345584516, "grad_norm": 1.403462290763855, "learning_rate": 5.18041369427283e-06, "loss": 1.5416, "step": 12446 }, { "epoch": 0.6805079070021732, "grad_norm": 2.361922025680542, "learning_rate": 5.178812655305954e-06, "loss": 1.4014, "step": 12447 }, { "epoch": 0.6805625794458948, "grad_norm": 1.4884659051895142, "learning_rate": 5.177211777329842e-06, "loss": 1.3111, "step": 12448 }, { "epoch": 0.6806172518896163, "grad_norm": 1.7564148902893066, "learning_rate": 5.1756110603979445e-06, "loss": 1.2515, "step": 12449 }, { "epoch": 0.6806719243333379, "grad_norm": 1.476680874824524, "learning_rate": 5.174010504563716e-06, "loss": 1.5162, "step": 12450 }, { "epoch": 0.6807265967770595, "grad_norm": 1.4092342853546143, "learning_rate": 5.1724101098806e-06, "loss": 1.2173, "step": 12451 }, { "epoch": 0.680781269220781, "grad_norm": 1.4502538442611694, "learning_rate": 5.170809876402039e-06, "loss": 1.464, "step": 12452 }, { "epoch": 0.6808359416645026, "grad_norm": 1.6792610883712769, "learning_rate": 5.169209804181465e-06, "loss": 1.1758, "step": 12453 }, { "epoch": 0.6808906141082242, "grad_norm": 1.5489318370819092, "learning_rate": 5.167609893272314e-06, "loss": 1.5027, "step": 12454 }, { "epoch": 0.6809452865519456, "grad_norm": 1.7425744533538818, "learning_rate": 5.166010143728009e-06, "loss": 1.4511, "step": 12455 }, { "epoch": 0.6809999589956672, "grad_norm": 1.6366654634475708, "learning_rate": 5.1644105556019655e-06, "loss": 1.4498, "step": 12456 }, { "epoch": 0.6810546314393887, "grad_norm": 2.1688859462738037, "learning_rate": 5.1628111289476025e-06, "loss": 1.3703, "step": 12457 }, { "epoch": 0.6811093038831103, "grad_norm": 1.4113068580627441, "learning_rate": 5.161211863818328e-06, "loss": 1.4687, "step": 12458 }, { "epoch": 0.6811639763268319, "grad_norm": 1.1089907884597778, "learning_rate": 5.159612760267541e-06, "loss": 1.5007, "step": 12459 }, { "epoch": 0.6812186487705534, "grad_norm": 1.521805763244629, "learning_rate": 5.158013818348645e-06, "loss": 1.4518, "step": 12460 }, { "epoch": 0.681273321214275, "grad_norm": 1.3168845176696777, "learning_rate": 5.1564150381150305e-06, "loss": 1.4237, "step": 12461 }, { "epoch": 0.6813279936579966, "grad_norm": 1.4169317483901978, "learning_rate": 5.154816419620082e-06, "loss": 1.6191, "step": 12462 }, { "epoch": 0.681382666101718, "grad_norm": 1.5237879753112793, "learning_rate": 5.153217962917184e-06, "loss": 1.5444, "step": 12463 }, { "epoch": 0.6814373385454396, "grad_norm": 1.5912346839904785, "learning_rate": 5.151619668059707e-06, "loss": 1.3904, "step": 12464 }, { "epoch": 0.6814920109891612, "grad_norm": 1.3596515655517578, "learning_rate": 5.15002153510103e-06, "loss": 1.5289, "step": 12465 }, { "epoch": 0.6815466834328827, "grad_norm": 1.7522273063659668, "learning_rate": 5.148423564094517e-06, "loss": 1.4087, "step": 12466 }, { "epoch": 0.6816013558766043, "grad_norm": 1.6765048503875732, "learning_rate": 5.146825755093521e-06, "loss": 1.5657, "step": 12467 }, { "epoch": 0.6816560283203259, "grad_norm": 1.6037687063217163, "learning_rate": 5.145228108151403e-06, "loss": 1.6197, "step": 12468 }, { "epoch": 0.6817107007640474, "grad_norm": 1.79555082321167, "learning_rate": 5.143630623321514e-06, "loss": 1.4455, "step": 12469 }, { "epoch": 0.681765373207769, "grad_norm": 2.1374905109405518, "learning_rate": 5.142033300657188e-06, "loss": 1.4624, "step": 12470 }, { "epoch": 0.6818200456514905, "grad_norm": 1.8845553398132324, "learning_rate": 5.1404361402117745e-06, "loss": 1.2466, "step": 12471 }, { "epoch": 0.681874718095212, "grad_norm": 1.6504926681518555, "learning_rate": 5.138839142038601e-06, "loss": 1.3108, "step": 12472 }, { "epoch": 0.6819293905389336, "grad_norm": 1.7134264707565308, "learning_rate": 5.137242306190991e-06, "loss": 1.285, "step": 12473 }, { "epoch": 0.6819840629826551, "grad_norm": 1.8070099353790283, "learning_rate": 5.135645632722277e-06, "loss": 1.7604, "step": 12474 }, { "epoch": 0.6820387354263767, "grad_norm": 1.424241304397583, "learning_rate": 5.134049121685769e-06, "loss": 1.4946, "step": 12475 }, { "epoch": 0.6820934078700983, "grad_norm": 1.3869636058807373, "learning_rate": 5.132452773134779e-06, "loss": 1.3945, "step": 12476 }, { "epoch": 0.6821480803138198, "grad_norm": 1.2607172727584839, "learning_rate": 5.130856587122613e-06, "loss": 1.4272, "step": 12477 }, { "epoch": 0.6822027527575414, "grad_norm": 1.6451624631881714, "learning_rate": 5.129260563702568e-06, "loss": 1.5587, "step": 12478 }, { "epoch": 0.682257425201263, "grad_norm": 1.7466986179351807, "learning_rate": 5.127664702927946e-06, "loss": 1.3083, "step": 12479 }, { "epoch": 0.6823120976449845, "grad_norm": 1.312599539756775, "learning_rate": 5.126069004852033e-06, "loss": 1.6163, "step": 12480 }, { "epoch": 0.682366770088706, "grad_norm": 1.6746363639831543, "learning_rate": 5.124473469528108e-06, "loss": 1.523, "step": 12481 }, { "epoch": 0.6824214425324276, "grad_norm": 1.4332605600357056, "learning_rate": 5.122878097009459e-06, "loss": 1.6333, "step": 12482 }, { "epoch": 0.6824761149761491, "grad_norm": 1.4671945571899414, "learning_rate": 5.121282887349354e-06, "loss": 1.4093, "step": 12483 }, { "epoch": 0.6825307874198707, "grad_norm": 1.326985239982605, "learning_rate": 5.11968784060106e-06, "loss": 1.1431, "step": 12484 }, { "epoch": 0.6825854598635922, "grad_norm": 1.647729754447937, "learning_rate": 5.118092956817844e-06, "loss": 1.6722, "step": 12485 }, { "epoch": 0.6826401323073138, "grad_norm": 1.4424940347671509, "learning_rate": 5.11649823605296e-06, "loss": 1.334, "step": 12486 }, { "epoch": 0.6826948047510354, "grad_norm": 1.570557713508606, "learning_rate": 5.114903678359655e-06, "loss": 1.3349, "step": 12487 }, { "epoch": 0.6827494771947569, "grad_norm": 2.0701920986175537, "learning_rate": 5.1133092837911835e-06, "loss": 1.4476, "step": 12488 }, { "epoch": 0.6828041496384785, "grad_norm": 1.7161251306533813, "learning_rate": 5.111715052400783e-06, "loss": 1.4445, "step": 12489 }, { "epoch": 0.6828588220822001, "grad_norm": 1.137705683708191, "learning_rate": 5.110120984241687e-06, "loss": 1.5127, "step": 12490 }, { "epoch": 0.6829134945259215, "grad_norm": 1.4827938079833984, "learning_rate": 5.108527079367125e-06, "loss": 1.3015, "step": 12491 }, { "epoch": 0.6829681669696431, "grad_norm": 1.6554234027862549, "learning_rate": 5.106933337830318e-06, "loss": 1.5411, "step": 12492 }, { "epoch": 0.6830228394133647, "grad_norm": 1.7134422063827515, "learning_rate": 5.105339759684493e-06, "loss": 1.3418, "step": 12493 }, { "epoch": 0.6830775118570862, "grad_norm": 1.77813720703125, "learning_rate": 5.103746344982859e-06, "loss": 1.4109, "step": 12494 }, { "epoch": 0.6831321843008078, "grad_norm": 1.4516851902008057, "learning_rate": 5.1021530937786215e-06, "loss": 1.3522, "step": 12495 }, { "epoch": 0.6831868567445294, "grad_norm": 1.590302586555481, "learning_rate": 5.100560006124988e-06, "loss": 1.362, "step": 12496 }, { "epoch": 0.6832415291882509, "grad_norm": 1.878716230392456, "learning_rate": 5.098967082075153e-06, "loss": 1.4111, "step": 12497 }, { "epoch": 0.6832962016319725, "grad_norm": 1.2558112144470215, "learning_rate": 5.097374321682304e-06, "loss": 1.5918, "step": 12498 }, { "epoch": 0.683350874075694, "grad_norm": 1.403426170349121, "learning_rate": 5.095781724999633e-06, "loss": 1.4049, "step": 12499 }, { "epoch": 0.6834055465194155, "grad_norm": 1.5211642980575562, "learning_rate": 5.094189292080321e-06, "loss": 1.2504, "step": 12500 }, { "epoch": 0.6834602189631371, "grad_norm": 1.734389066696167, "learning_rate": 5.092597022977539e-06, "loss": 1.2603, "step": 12501 }, { "epoch": 0.6835148914068586, "grad_norm": 1.613938570022583, "learning_rate": 5.091004917744457e-06, "loss": 1.2403, "step": 12502 }, { "epoch": 0.6835695638505802, "grad_norm": 1.8780999183654785, "learning_rate": 5.089412976434238e-06, "loss": 1.5394, "step": 12503 }, { "epoch": 0.6836242362943018, "grad_norm": 1.2090928554534912, "learning_rate": 5.087821199100047e-06, "loss": 1.6547, "step": 12504 }, { "epoch": 0.6836789087380233, "grad_norm": 2.1071088314056396, "learning_rate": 5.086229585795032e-06, "loss": 1.2115, "step": 12505 }, { "epoch": 0.6837335811817449, "grad_norm": 1.390873908996582, "learning_rate": 5.0846381365723375e-06, "loss": 1.3316, "step": 12506 }, { "epoch": 0.6837882536254665, "grad_norm": 1.4836323261260986, "learning_rate": 5.083046851485114e-06, "loss": 1.2765, "step": 12507 }, { "epoch": 0.683842926069188, "grad_norm": 1.170444130897522, "learning_rate": 5.081455730586495e-06, "loss": 1.2003, "step": 12508 }, { "epoch": 0.6838975985129095, "grad_norm": 1.1618530750274658, "learning_rate": 5.079864773929606e-06, "loss": 1.493, "step": 12509 }, { "epoch": 0.6839522709566311, "grad_norm": 1.383016586303711, "learning_rate": 5.07827398156758e-06, "loss": 1.4373, "step": 12510 }, { "epoch": 0.6840069434003526, "grad_norm": 1.4215152263641357, "learning_rate": 5.076683353553538e-06, "loss": 1.3419, "step": 12511 }, { "epoch": 0.6840616158440742, "grad_norm": 1.7754480838775635, "learning_rate": 5.075092889940587e-06, "loss": 1.3227, "step": 12512 }, { "epoch": 0.6841162882877957, "grad_norm": 1.421597957611084, "learning_rate": 5.073502590781844e-06, "loss": 1.6354, "step": 12513 }, { "epoch": 0.6841709607315173, "grad_norm": 1.3665159940719604, "learning_rate": 5.07191245613041e-06, "loss": 1.4146, "step": 12514 }, { "epoch": 0.6842256331752389, "grad_norm": 1.6467760801315308, "learning_rate": 5.070322486039383e-06, "loss": 1.5279, "step": 12515 }, { "epoch": 0.6842803056189604, "grad_norm": 1.4379091262817383, "learning_rate": 5.0687326805618575e-06, "loss": 1.4055, "step": 12516 }, { "epoch": 0.684334978062682, "grad_norm": 1.582737684249878, "learning_rate": 5.067143039750914e-06, "loss": 1.4968, "step": 12517 }, { "epoch": 0.6843896505064035, "grad_norm": 1.2364009618759155, "learning_rate": 5.065553563659644e-06, "loss": 1.634, "step": 12518 }, { "epoch": 0.684444322950125, "grad_norm": 1.6011687517166138, "learning_rate": 5.06396425234112e-06, "loss": 1.5723, "step": 12519 }, { "epoch": 0.6844989953938466, "grad_norm": 1.4202096462249756, "learning_rate": 5.062375105848409e-06, "loss": 1.4546, "step": 12520 }, { "epoch": 0.6845536678375682, "grad_norm": 2.1087381839752197, "learning_rate": 5.060786124234582e-06, "loss": 1.7023, "step": 12521 }, { "epoch": 0.6846083402812897, "grad_norm": 1.3929787874221802, "learning_rate": 5.059197307552698e-06, "loss": 1.7054, "step": 12522 }, { "epoch": 0.6846630127250113, "grad_norm": 1.651878833770752, "learning_rate": 5.057608655855806e-06, "loss": 1.5456, "step": 12523 }, { "epoch": 0.6847176851687329, "grad_norm": 1.5039455890655518, "learning_rate": 5.056020169196962e-06, "loss": 1.5335, "step": 12524 }, { "epoch": 0.6847723576124544, "grad_norm": 1.6535098552703857, "learning_rate": 5.054431847629204e-06, "loss": 1.2337, "step": 12525 }, { "epoch": 0.684827030056176, "grad_norm": 1.4578468799591064, "learning_rate": 5.052843691205571e-06, "loss": 1.3801, "step": 12526 }, { "epoch": 0.6848817024998974, "grad_norm": 1.3489853143692017, "learning_rate": 5.051255699979099e-06, "loss": 1.3738, "step": 12527 }, { "epoch": 0.684936374943619, "grad_norm": 1.7501164674758911, "learning_rate": 5.049667874002811e-06, "loss": 1.4665, "step": 12528 }, { "epoch": 0.6849910473873406, "grad_norm": 1.452688455581665, "learning_rate": 5.048080213329729e-06, "loss": 1.2162, "step": 12529 }, { "epoch": 0.6850457198310621, "grad_norm": 1.6642059087753296, "learning_rate": 5.04649271801287e-06, "loss": 1.5052, "step": 12530 }, { "epoch": 0.6851003922747837, "grad_norm": 1.4737273454666138, "learning_rate": 5.0449053881052365e-06, "loss": 1.4241, "step": 12531 }, { "epoch": 0.6851550647185053, "grad_norm": 1.1391117572784424, "learning_rate": 5.043318223659846e-06, "loss": 1.5415, "step": 12532 }, { "epoch": 0.6852097371622268, "grad_norm": 1.4321916103363037, "learning_rate": 5.04173122472969e-06, "loss": 1.4647, "step": 12533 }, { "epoch": 0.6852644096059484, "grad_norm": 2.180356502532959, "learning_rate": 5.0401443913677605e-06, "loss": 1.5117, "step": 12534 }, { "epoch": 0.68531908204967, "grad_norm": 1.6371873617172241, "learning_rate": 5.038557723627051e-06, "loss": 1.3909, "step": 12535 }, { "epoch": 0.6853737544933914, "grad_norm": 1.5968891382217407, "learning_rate": 5.036971221560543e-06, "loss": 1.3765, "step": 12536 }, { "epoch": 0.685428426937113, "grad_norm": 1.7752450704574585, "learning_rate": 5.035384885221206e-06, "loss": 1.5373, "step": 12537 }, { "epoch": 0.6854830993808346, "grad_norm": 1.845893383026123, "learning_rate": 5.033798714662023e-06, "loss": 1.4123, "step": 12538 }, { "epoch": 0.6855377718245561, "grad_norm": 1.493908166885376, "learning_rate": 5.0322127099359554e-06, "loss": 1.3507, "step": 12539 }, { "epoch": 0.6855924442682777, "grad_norm": 1.4337128400802612, "learning_rate": 5.030626871095961e-06, "loss": 1.2657, "step": 12540 }, { "epoch": 0.6856471167119992, "grad_norm": 1.525985836982727, "learning_rate": 5.029041198194997e-06, "loss": 1.326, "step": 12541 }, { "epoch": 0.6857017891557208, "grad_norm": 2.224147319793701, "learning_rate": 5.027455691286012e-06, "loss": 1.1818, "step": 12542 }, { "epoch": 0.6857564615994424, "grad_norm": 1.5517847537994385, "learning_rate": 5.025870350421945e-06, "loss": 1.3585, "step": 12543 }, { "epoch": 0.6858111340431639, "grad_norm": 1.3239595890045166, "learning_rate": 5.0242851756557446e-06, "loss": 1.479, "step": 12544 }, { "epoch": 0.6858658064868854, "grad_norm": 1.5694642066955566, "learning_rate": 5.022700167040333e-06, "loss": 1.4238, "step": 12545 }, { "epoch": 0.685920478930607, "grad_norm": 1.5523356199264526, "learning_rate": 5.021115324628647e-06, "loss": 1.6481, "step": 12546 }, { "epoch": 0.6859751513743285, "grad_norm": 1.350695252418518, "learning_rate": 5.019530648473604e-06, "loss": 1.5284, "step": 12547 }, { "epoch": 0.6860298238180501, "grad_norm": 1.2698252201080322, "learning_rate": 5.017946138628116e-06, "loss": 1.6778, "step": 12548 }, { "epoch": 0.6860844962617717, "grad_norm": 1.6505908966064453, "learning_rate": 5.0163617951451e-06, "loss": 1.4554, "step": 12549 }, { "epoch": 0.6861391687054932, "grad_norm": 1.2658072710037231, "learning_rate": 5.0147776180774575e-06, "loss": 1.4029, "step": 12550 }, { "epoch": 0.6861938411492148, "grad_norm": 1.7103910446166992, "learning_rate": 5.0131936074780865e-06, "loss": 1.4413, "step": 12551 }, { "epoch": 0.6862485135929364, "grad_norm": 1.3372751474380493, "learning_rate": 5.011609763399885e-06, "loss": 1.0985, "step": 12552 }, { "epoch": 0.6863031860366579, "grad_norm": 1.816835880279541, "learning_rate": 5.010026085895741e-06, "loss": 1.4493, "step": 12553 }, { "epoch": 0.6863578584803794, "grad_norm": 2.3036868572235107, "learning_rate": 5.008442575018534e-06, "loss": 1.467, "step": 12554 }, { "epoch": 0.6864125309241009, "grad_norm": 1.3405009508132935, "learning_rate": 5.0068592308211425e-06, "loss": 1.5152, "step": 12555 }, { "epoch": 0.6864672033678225, "grad_norm": 1.700920820236206, "learning_rate": 5.005276053356438e-06, "loss": 1.5625, "step": 12556 }, { "epoch": 0.6865218758115441, "grad_norm": 1.8405238389968872, "learning_rate": 5.003693042677283e-06, "loss": 1.5686, "step": 12557 }, { "epoch": 0.6865765482552656, "grad_norm": 1.4794178009033203, "learning_rate": 5.0021101988365465e-06, "loss": 1.3917, "step": 12558 }, { "epoch": 0.6866312206989872, "grad_norm": 1.2062163352966309, "learning_rate": 5.000527521887073e-06, "loss": 1.4703, "step": 12559 }, { "epoch": 0.6866858931427088, "grad_norm": 2.0572257041931152, "learning_rate": 4.998945011881722e-06, "loss": 1.1801, "step": 12560 }, { "epoch": 0.6867405655864303, "grad_norm": 1.3230005502700806, "learning_rate": 4.997362668873331e-06, "loss": 1.5821, "step": 12561 }, { "epoch": 0.6867952380301519, "grad_norm": 1.668958306312561, "learning_rate": 4.995780492914737e-06, "loss": 1.4244, "step": 12562 }, { "epoch": 0.6868499104738734, "grad_norm": 1.666780710220337, "learning_rate": 4.9941984840587786e-06, "loss": 1.6136, "step": 12563 }, { "epoch": 0.6869045829175949, "grad_norm": 1.7330964803695679, "learning_rate": 4.992616642358279e-06, "loss": 1.4819, "step": 12564 }, { "epoch": 0.6869592553613165, "grad_norm": 1.920805811882019, "learning_rate": 4.991034967866061e-06, "loss": 1.281, "step": 12565 }, { "epoch": 0.6870139278050381, "grad_norm": 1.3894338607788086, "learning_rate": 4.989453460634938e-06, "loss": 1.2859, "step": 12566 }, { "epoch": 0.6870686002487596, "grad_norm": 1.8000701665878296, "learning_rate": 4.987872120717721e-06, "loss": 1.2948, "step": 12567 }, { "epoch": 0.6871232726924812, "grad_norm": 1.5720303058624268, "learning_rate": 4.9862909481672126e-06, "loss": 1.5918, "step": 12568 }, { "epoch": 0.6871779451362027, "grad_norm": 1.3810125589370728, "learning_rate": 4.984709943036219e-06, "loss": 1.5656, "step": 12569 }, { "epoch": 0.6872326175799243, "grad_norm": 1.5220848321914673, "learning_rate": 4.9831291053775275e-06, "loss": 1.3418, "step": 12570 }, { "epoch": 0.6872872900236459, "grad_norm": 1.1962546110153198, "learning_rate": 4.9815484352439244e-06, "loss": 1.4379, "step": 12571 }, { "epoch": 0.6873419624673673, "grad_norm": 1.4892257452011108, "learning_rate": 4.9799679326882e-06, "loss": 1.2117, "step": 12572 }, { "epoch": 0.6873966349110889, "grad_norm": 1.453993320465088, "learning_rate": 4.978387597763121e-06, "loss": 1.3773, "step": 12573 }, { "epoch": 0.6874513073548105, "grad_norm": 1.634856939315796, "learning_rate": 4.976807430521469e-06, "loss": 1.3763, "step": 12574 }, { "epoch": 0.687505979798532, "grad_norm": 1.437628984451294, "learning_rate": 4.975227431016003e-06, "loss": 1.403, "step": 12575 }, { "epoch": 0.6875606522422536, "grad_norm": 1.3171216249465942, "learning_rate": 4.97364759929948e-06, "loss": 1.3213, "step": 12576 }, { "epoch": 0.6876153246859752, "grad_norm": 1.8596640825271606, "learning_rate": 4.972067935424664e-06, "loss": 1.4748, "step": 12577 }, { "epoch": 0.6876699971296967, "grad_norm": 2.146611452102661, "learning_rate": 4.970488439444296e-06, "loss": 1.3578, "step": 12578 }, { "epoch": 0.6877246695734183, "grad_norm": 1.5548129081726074, "learning_rate": 4.968909111411122e-06, "loss": 1.171, "step": 12579 }, { "epoch": 0.6877793420171399, "grad_norm": 1.6232317686080933, "learning_rate": 4.967329951377878e-06, "loss": 1.2597, "step": 12580 }, { "epoch": 0.6878340144608613, "grad_norm": 1.9121216535568237, "learning_rate": 4.965750959397297e-06, "loss": 1.3296, "step": 12581 }, { "epoch": 0.6878886869045829, "grad_norm": 1.4060227870941162, "learning_rate": 4.964172135522102e-06, "loss": 1.5842, "step": 12582 }, { "epoch": 0.6879433593483044, "grad_norm": 1.2545909881591797, "learning_rate": 4.962593479805018e-06, "loss": 1.61, "step": 12583 }, { "epoch": 0.687998031792026, "grad_norm": 1.6700055599212646, "learning_rate": 4.961014992298759e-06, "loss": 1.127, "step": 12584 }, { "epoch": 0.6880527042357476, "grad_norm": 1.6396535634994507, "learning_rate": 4.95943667305603e-06, "loss": 1.3978, "step": 12585 }, { "epoch": 0.6881073766794691, "grad_norm": 1.332152009010315, "learning_rate": 4.957858522129544e-06, "loss": 1.4691, "step": 12586 }, { "epoch": 0.6881620491231907, "grad_norm": 1.6465990543365479, "learning_rate": 4.956280539571988e-06, "loss": 1.5417, "step": 12587 }, { "epoch": 0.6882167215669123, "grad_norm": 1.749463677406311, "learning_rate": 4.954702725436065e-06, "loss": 1.3585, "step": 12588 }, { "epoch": 0.6882713940106338, "grad_norm": 1.8550879955291748, "learning_rate": 4.953125079774457e-06, "loss": 1.3674, "step": 12589 }, { "epoch": 0.6883260664543553, "grad_norm": 1.4210492372512817, "learning_rate": 4.9515476026398415e-06, "loss": 1.3335, "step": 12590 }, { "epoch": 0.6883807388980769, "grad_norm": 1.4815773963928223, "learning_rate": 4.9499702940849016e-06, "loss": 1.4897, "step": 12591 }, { "epoch": 0.6884354113417984, "grad_norm": 1.6239986419677734, "learning_rate": 4.948393154162303e-06, "loss": 1.1803, "step": 12592 }, { "epoch": 0.68849008378552, "grad_norm": 1.6670019626617432, "learning_rate": 4.946816182924713e-06, "loss": 1.3522, "step": 12593 }, { "epoch": 0.6885447562292416, "grad_norm": 1.419473648071289, "learning_rate": 4.945239380424787e-06, "loss": 1.4984, "step": 12594 }, { "epoch": 0.6885994286729631, "grad_norm": 1.8619145154953003, "learning_rate": 4.9436627467151795e-06, "loss": 1.3568, "step": 12595 }, { "epoch": 0.6886541011166847, "grad_norm": 1.2264503240585327, "learning_rate": 4.942086281848535e-06, "loss": 1.4007, "step": 12596 }, { "epoch": 0.6887087735604062, "grad_norm": 1.3392298221588135, "learning_rate": 4.9405099858775e-06, "loss": 1.5584, "step": 12597 }, { "epoch": 0.6887634460041278, "grad_norm": 1.42440927028656, "learning_rate": 4.938933858854712e-06, "loss": 1.4787, "step": 12598 }, { "epoch": 0.6888181184478493, "grad_norm": 1.2466241121292114, "learning_rate": 4.937357900832793e-06, "loss": 1.6936, "step": 12599 }, { "epoch": 0.6888727908915708, "grad_norm": 1.4445393085479736, "learning_rate": 4.935782111864378e-06, "loss": 1.6015, "step": 12600 }, { "epoch": 0.6889274633352924, "grad_norm": 1.5077568292617798, "learning_rate": 4.934206492002077e-06, "loss": 1.3854, "step": 12601 }, { "epoch": 0.688982135779014, "grad_norm": 1.4581223726272583, "learning_rate": 4.932631041298513e-06, "loss": 1.4546, "step": 12602 }, { "epoch": 0.6890368082227355, "grad_norm": 1.7212785482406616, "learning_rate": 4.93105575980629e-06, "loss": 1.4489, "step": 12603 }, { "epoch": 0.6890914806664571, "grad_norm": 1.4724382162094116, "learning_rate": 4.92948064757801e-06, "loss": 1.3732, "step": 12604 }, { "epoch": 0.6891461531101787, "grad_norm": 1.4988281726837158, "learning_rate": 4.927905704666268e-06, "loss": 1.5253, "step": 12605 }, { "epoch": 0.6892008255539002, "grad_norm": 2.2284748554229736, "learning_rate": 4.926330931123659e-06, "loss": 1.2278, "step": 12606 }, { "epoch": 0.6892554979976218, "grad_norm": 1.1824764013290405, "learning_rate": 4.92475632700276e-06, "loss": 1.4506, "step": 12607 }, { "epoch": 0.6893101704413434, "grad_norm": 1.4412226676940918, "learning_rate": 4.92318189235616e-06, "loss": 1.5257, "step": 12608 }, { "epoch": 0.6893648428850648, "grad_norm": 1.2659608125686646, "learning_rate": 4.921607627236431e-06, "loss": 1.393, "step": 12609 }, { "epoch": 0.6894195153287864, "grad_norm": 1.5515546798706055, "learning_rate": 4.920033531696137e-06, "loss": 1.3961, "step": 12610 }, { "epoch": 0.6894741877725079, "grad_norm": 1.394658088684082, "learning_rate": 4.918459605787847e-06, "loss": 1.4537, "step": 12611 }, { "epoch": 0.6895288602162295, "grad_norm": 1.3232530355453491, "learning_rate": 4.916885849564115e-06, "loss": 1.5624, "step": 12612 }, { "epoch": 0.6895835326599511, "grad_norm": 1.4956464767456055, "learning_rate": 4.915312263077488e-06, "loss": 1.6511, "step": 12613 }, { "epoch": 0.6896382051036726, "grad_norm": 1.638763666152954, "learning_rate": 4.91373884638052e-06, "loss": 1.376, "step": 12614 }, { "epoch": 0.6896928775473942, "grad_norm": 1.409972906112671, "learning_rate": 4.9121655995257445e-06, "loss": 1.5882, "step": 12615 }, { "epoch": 0.6897475499911158, "grad_norm": 1.7976423501968384, "learning_rate": 4.910592522565702e-06, "loss": 1.2111, "step": 12616 }, { "epoch": 0.6898022224348372, "grad_norm": 1.510457992553711, "learning_rate": 4.9090196155529165e-06, "loss": 1.4431, "step": 12617 }, { "epoch": 0.6898568948785588, "grad_norm": 1.868168592453003, "learning_rate": 4.907446878539913e-06, "loss": 1.3633, "step": 12618 }, { "epoch": 0.6899115673222804, "grad_norm": 1.235390543937683, "learning_rate": 4.905874311579209e-06, "loss": 1.5575, "step": 12619 }, { "epoch": 0.6899662397660019, "grad_norm": 1.4007991552352905, "learning_rate": 4.904301914723315e-06, "loss": 1.5362, "step": 12620 }, { "epoch": 0.6900209122097235, "grad_norm": 1.7089463472366333, "learning_rate": 4.902729688024734e-06, "loss": 1.2782, "step": 12621 }, { "epoch": 0.6900755846534451, "grad_norm": 1.1983234882354736, "learning_rate": 4.9011576315359736e-06, "loss": 1.5428, "step": 12622 }, { "epoch": 0.6901302570971666, "grad_norm": 1.35087251663208, "learning_rate": 4.899585745309525e-06, "loss": 1.4039, "step": 12623 }, { "epoch": 0.6901849295408882, "grad_norm": 1.6986385583877563, "learning_rate": 4.898014029397873e-06, "loss": 1.234, "step": 12624 }, { "epoch": 0.6902396019846097, "grad_norm": 1.7667343616485596, "learning_rate": 4.896442483853507e-06, "loss": 1.5156, "step": 12625 }, { "epoch": 0.6902942744283312, "grad_norm": 1.8183183670043945, "learning_rate": 4.894871108728903e-06, "loss": 1.2423, "step": 12626 }, { "epoch": 0.6903489468720528, "grad_norm": 2.2654247283935547, "learning_rate": 4.89329990407653e-06, "loss": 1.3627, "step": 12627 }, { "epoch": 0.6904036193157743, "grad_norm": 1.5819158554077148, "learning_rate": 4.8917288699488596e-06, "loss": 1.3341, "step": 12628 }, { "epoch": 0.6904582917594959, "grad_norm": 1.6342432498931885, "learning_rate": 4.890158006398345e-06, "loss": 1.3506, "step": 12629 }, { "epoch": 0.6905129642032175, "grad_norm": 1.4250808954238892, "learning_rate": 4.888587313477449e-06, "loss": 1.4864, "step": 12630 }, { "epoch": 0.690567636646939, "grad_norm": 1.2429865598678589, "learning_rate": 4.887016791238617e-06, "loss": 1.523, "step": 12631 }, { "epoch": 0.6906223090906606, "grad_norm": 1.9262362718582153, "learning_rate": 4.8854464397342914e-06, "loss": 1.4375, "step": 12632 }, { "epoch": 0.6906769815343822, "grad_norm": 1.4076569080352783, "learning_rate": 4.883876259016912e-06, "loss": 1.4122, "step": 12633 }, { "epoch": 0.6907316539781037, "grad_norm": 2.00494647026062, "learning_rate": 4.882306249138909e-06, "loss": 1.6292, "step": 12634 }, { "epoch": 0.6907863264218252, "grad_norm": 1.248789668083191, "learning_rate": 4.880736410152707e-06, "loss": 1.4474, "step": 12635 }, { "epoch": 0.6908409988655468, "grad_norm": 1.6558001041412354, "learning_rate": 4.879166742110731e-06, "loss": 1.4893, "step": 12636 }, { "epoch": 0.6908956713092683, "grad_norm": 1.5938280820846558, "learning_rate": 4.877597245065394e-06, "loss": 1.4078, "step": 12637 }, { "epoch": 0.6909503437529899, "grad_norm": 1.5867503881454468, "learning_rate": 4.876027919069103e-06, "loss": 1.2237, "step": 12638 }, { "epoch": 0.6910050161967115, "grad_norm": 1.744295358657837, "learning_rate": 4.874458764174266e-06, "loss": 1.3984, "step": 12639 }, { "epoch": 0.691059688640433, "grad_norm": 1.6952379941940308, "learning_rate": 4.872889780433279e-06, "loss": 1.3963, "step": 12640 }, { "epoch": 0.6911143610841546, "grad_norm": 1.6067415475845337, "learning_rate": 4.871320967898528e-06, "loss": 1.1646, "step": 12641 }, { "epoch": 0.6911690335278761, "grad_norm": 2.0945491790771484, "learning_rate": 4.86975232662241e-06, "loss": 1.5325, "step": 12642 }, { "epoch": 0.6912237059715977, "grad_norm": 1.0668234825134277, "learning_rate": 4.8681838566573e-06, "loss": 1.5902, "step": 12643 }, { "epoch": 0.6912783784153193, "grad_norm": 2.0629639625549316, "learning_rate": 4.866615558055573e-06, "loss": 1.696, "step": 12644 }, { "epoch": 0.6913330508590407, "grad_norm": 1.3047901391983032, "learning_rate": 4.865047430869598e-06, "loss": 1.3301, "step": 12645 }, { "epoch": 0.6913877233027623, "grad_norm": 1.5434547662734985, "learning_rate": 4.863479475151737e-06, "loss": 1.3174, "step": 12646 }, { "epoch": 0.6914423957464839, "grad_norm": 1.6252211332321167, "learning_rate": 4.861911690954351e-06, "loss": 1.4652, "step": 12647 }, { "epoch": 0.6914970681902054, "grad_norm": 1.458909034729004, "learning_rate": 4.860344078329791e-06, "loss": 1.303, "step": 12648 }, { "epoch": 0.691551740633927, "grad_norm": 1.321506142616272, "learning_rate": 4.8587766373304e-06, "loss": 1.4285, "step": 12649 }, { "epoch": 0.6916064130776486, "grad_norm": 1.3001154661178589, "learning_rate": 4.8572093680085245e-06, "loss": 1.4669, "step": 12650 }, { "epoch": 0.6916610855213701, "grad_norm": 1.7688932418823242, "learning_rate": 4.855642270416496e-06, "loss": 1.3844, "step": 12651 }, { "epoch": 0.6917157579650917, "grad_norm": 1.6632856130599976, "learning_rate": 4.85407534460664e-06, "loss": 1.496, "step": 12652 }, { "epoch": 0.6917704304088133, "grad_norm": 1.747863531112671, "learning_rate": 4.852508590631288e-06, "loss": 1.5783, "step": 12653 }, { "epoch": 0.6918251028525347, "grad_norm": 1.1005607843399048, "learning_rate": 4.850942008542753e-06, "loss": 1.6173, "step": 12654 }, { "epoch": 0.6918797752962563, "grad_norm": 1.5454061031341553, "learning_rate": 4.849375598393342e-06, "loss": 1.2681, "step": 12655 }, { "epoch": 0.6919344477399778, "grad_norm": 1.6939740180969238, "learning_rate": 4.8478093602353715e-06, "loss": 1.476, "step": 12656 }, { "epoch": 0.6919891201836994, "grad_norm": 1.6384978294372559, "learning_rate": 4.846243294121136e-06, "loss": 1.5549, "step": 12657 }, { "epoch": 0.692043792627421, "grad_norm": 1.4814103841781616, "learning_rate": 4.8446774001029295e-06, "loss": 1.4215, "step": 12658 }, { "epoch": 0.6920984650711425, "grad_norm": 1.36307954788208, "learning_rate": 4.843111678233042e-06, "loss": 1.4881, "step": 12659 }, { "epoch": 0.6921531375148641, "grad_norm": 1.240866780281067, "learning_rate": 4.841546128563754e-06, "loss": 1.4936, "step": 12660 }, { "epoch": 0.6922078099585857, "grad_norm": 1.4985038042068481, "learning_rate": 4.8399807511473486e-06, "loss": 1.5466, "step": 12661 }, { "epoch": 0.6922624824023071, "grad_norm": 1.8732330799102783, "learning_rate": 4.838415546036095e-06, "loss": 1.4884, "step": 12662 }, { "epoch": 0.6923171548460287, "grad_norm": 1.7302956581115723, "learning_rate": 4.836850513282253e-06, "loss": 1.0119, "step": 12663 }, { "epoch": 0.6923718272897503, "grad_norm": 1.9875271320343018, "learning_rate": 4.835285652938093e-06, "loss": 1.2509, "step": 12664 }, { "epoch": 0.6924264997334718, "grad_norm": 1.6380938291549683, "learning_rate": 4.833720965055865e-06, "loss": 1.4072, "step": 12665 }, { "epoch": 0.6924811721771934, "grad_norm": 1.3365561962127686, "learning_rate": 4.832156449687812e-06, "loss": 1.2853, "step": 12666 }, { "epoch": 0.692535844620915, "grad_norm": 1.468501329421997, "learning_rate": 4.830592106886186e-06, "loss": 1.558, "step": 12667 }, { "epoch": 0.6925905170646365, "grad_norm": 1.544593095779419, "learning_rate": 4.82902793670322e-06, "loss": 1.4936, "step": 12668 }, { "epoch": 0.6926451895083581, "grad_norm": 1.1926863193511963, "learning_rate": 4.827463939191141e-06, "loss": 1.4361, "step": 12669 }, { "epoch": 0.6926998619520796, "grad_norm": 1.5370532274246216, "learning_rate": 4.825900114402185e-06, "loss": 1.4982, "step": 12670 }, { "epoch": 0.6927545343958011, "grad_norm": 1.484313726425171, "learning_rate": 4.824336462388563e-06, "loss": 1.2887, "step": 12671 }, { "epoch": 0.6928092068395227, "grad_norm": 1.5064891576766968, "learning_rate": 4.8227729832024914e-06, "loss": 1.4119, "step": 12672 }, { "epoch": 0.6928638792832442, "grad_norm": 1.8778185844421387, "learning_rate": 4.82120967689618e-06, "loss": 1.4154, "step": 12673 }, { "epoch": 0.6929185517269658, "grad_norm": 1.8026202917099, "learning_rate": 4.819646543521825e-06, "loss": 1.4367, "step": 12674 }, { "epoch": 0.6929732241706874, "grad_norm": 1.835309624671936, "learning_rate": 4.818083583131633e-06, "loss": 1.6062, "step": 12675 }, { "epoch": 0.6930278966144089, "grad_norm": 1.5553886890411377, "learning_rate": 4.816520795777789e-06, "loss": 1.5453, "step": 12676 }, { "epoch": 0.6930825690581305, "grad_norm": 1.7196775674819946, "learning_rate": 4.8149581815124756e-06, "loss": 1.4699, "step": 12677 }, { "epoch": 0.6931372415018521, "grad_norm": 1.5748529434204102, "learning_rate": 4.81339574038788e-06, "loss": 1.6451, "step": 12678 }, { "epoch": 0.6931919139455736, "grad_norm": 1.5286134481430054, "learning_rate": 4.811833472456171e-06, "loss": 1.4187, "step": 12679 }, { "epoch": 0.6932465863892951, "grad_norm": 1.4742441177368164, "learning_rate": 4.810271377769512e-06, "loss": 1.2781, "step": 12680 }, { "epoch": 0.6933012588330167, "grad_norm": 1.64246666431427, "learning_rate": 4.808709456380075e-06, "loss": 1.1895, "step": 12681 }, { "epoch": 0.6933559312767382, "grad_norm": 1.6934070587158203, "learning_rate": 4.80714770834001e-06, "loss": 1.4042, "step": 12682 }, { "epoch": 0.6934106037204598, "grad_norm": 2.1495697498321533, "learning_rate": 4.805586133701468e-06, "loss": 1.2988, "step": 12683 }, { "epoch": 0.6934652761641813, "grad_norm": 1.278846263885498, "learning_rate": 4.804024732516596e-06, "loss": 1.4771, "step": 12684 }, { "epoch": 0.6935199486079029, "grad_norm": 1.7572641372680664, "learning_rate": 4.802463504837526e-06, "loss": 1.353, "step": 12685 }, { "epoch": 0.6935746210516245, "grad_norm": 1.8116804361343384, "learning_rate": 4.8009024507163996e-06, "loss": 1.4915, "step": 12686 }, { "epoch": 0.693629293495346, "grad_norm": 1.3498157262802124, "learning_rate": 4.79934157020534e-06, "loss": 1.398, "step": 12687 }, { "epoch": 0.6936839659390676, "grad_norm": 1.491816520690918, "learning_rate": 4.797780863356466e-06, "loss": 1.3867, "step": 12688 }, { "epoch": 0.6937386383827892, "grad_norm": 1.4136720895767212, "learning_rate": 4.7962203302219e-06, "loss": 1.2045, "step": 12689 }, { "epoch": 0.6937933108265106, "grad_norm": 1.4319031238555908, "learning_rate": 4.7946599708537485e-06, "loss": 1.2419, "step": 12690 }, { "epoch": 0.6938479832702322, "grad_norm": 1.6021711826324463, "learning_rate": 4.793099785304111e-06, "loss": 1.4909, "step": 12691 }, { "epoch": 0.6939026557139538, "grad_norm": 1.6268543004989624, "learning_rate": 4.791539773625094e-06, "loss": 1.3207, "step": 12692 }, { "epoch": 0.6939573281576753, "grad_norm": 1.7642922401428223, "learning_rate": 4.789979935868786e-06, "loss": 1.5359, "step": 12693 }, { "epoch": 0.6940120006013969, "grad_norm": 1.7820699214935303, "learning_rate": 4.788420272087271e-06, "loss": 1.5649, "step": 12694 }, { "epoch": 0.6940666730451185, "grad_norm": 1.3481712341308594, "learning_rate": 4.786860782332634e-06, "loss": 1.5439, "step": 12695 }, { "epoch": 0.69412134548884, "grad_norm": 1.4623656272888184, "learning_rate": 4.78530146665695e-06, "loss": 1.3809, "step": 12696 }, { "epoch": 0.6941760179325616, "grad_norm": 1.5711939334869385, "learning_rate": 4.783742325112286e-06, "loss": 1.2585, "step": 12697 }, { "epoch": 0.694230690376283, "grad_norm": 1.5291223526000977, "learning_rate": 4.782183357750707e-06, "loss": 1.2324, "step": 12698 }, { "epoch": 0.6942853628200046, "grad_norm": 1.6033287048339844, "learning_rate": 4.780624564624265e-06, "loss": 1.423, "step": 12699 }, { "epoch": 0.6943400352637262, "grad_norm": 1.4562594890594482, "learning_rate": 4.779065945785021e-06, "loss": 1.4213, "step": 12700 }, { "epoch": 0.6943947077074477, "grad_norm": 1.6553555727005005, "learning_rate": 4.777507501285016e-06, "loss": 1.2628, "step": 12701 }, { "epoch": 0.6944493801511693, "grad_norm": 1.8042089939117432, "learning_rate": 4.775949231176287e-06, "loss": 1.4454, "step": 12702 }, { "epoch": 0.6945040525948909, "grad_norm": 1.598296880722046, "learning_rate": 4.7743911355108745e-06, "loss": 1.4646, "step": 12703 }, { "epoch": 0.6945587250386124, "grad_norm": 1.7102911472320557, "learning_rate": 4.772833214340805e-06, "loss": 1.4213, "step": 12704 }, { "epoch": 0.694613397482334, "grad_norm": 1.4847114086151123, "learning_rate": 4.771275467718096e-06, "loss": 1.4195, "step": 12705 }, { "epoch": 0.6946680699260556, "grad_norm": 1.6634209156036377, "learning_rate": 4.769717895694774e-06, "loss": 1.2223, "step": 12706 }, { "epoch": 0.694722742369777, "grad_norm": 1.4733433723449707, "learning_rate": 4.768160498322843e-06, "loss": 1.3737, "step": 12707 }, { "epoch": 0.6947774148134986, "grad_norm": 1.1770492792129517, "learning_rate": 4.766603275654308e-06, "loss": 1.5434, "step": 12708 }, { "epoch": 0.6948320872572202, "grad_norm": 1.5203608274459839, "learning_rate": 4.765046227741173e-06, "loss": 1.3966, "step": 12709 }, { "epoch": 0.6948867597009417, "grad_norm": 1.3662126064300537, "learning_rate": 4.7634893546354275e-06, "loss": 1.4787, "step": 12710 }, { "epoch": 0.6949414321446633, "grad_norm": 1.2992182970046997, "learning_rate": 4.761932656389061e-06, "loss": 1.5835, "step": 12711 }, { "epoch": 0.6949961045883848, "grad_norm": 1.7215087413787842, "learning_rate": 4.760376133054056e-06, "loss": 1.317, "step": 12712 }, { "epoch": 0.6950507770321064, "grad_norm": 1.7425305843353271, "learning_rate": 4.7588197846823814e-06, "loss": 1.5458, "step": 12713 }, { "epoch": 0.695105449475828, "grad_norm": 1.289212703704834, "learning_rate": 4.757263611326018e-06, "loss": 1.5617, "step": 12714 }, { "epoch": 0.6951601219195495, "grad_norm": 1.9541789293289185, "learning_rate": 4.755707613036925e-06, "loss": 1.4953, "step": 12715 }, { "epoch": 0.695214794363271, "grad_norm": 1.2796339988708496, "learning_rate": 4.7541517898670565e-06, "loss": 1.6391, "step": 12716 }, { "epoch": 0.6952694668069926, "grad_norm": 1.523383378982544, "learning_rate": 4.7525961418683734e-06, "loss": 1.4003, "step": 12717 }, { "epoch": 0.6953241392507141, "grad_norm": 1.5383334159851074, "learning_rate": 4.751040669092819e-06, "loss": 1.3301, "step": 12718 }, { "epoch": 0.6953788116944357, "grad_norm": 1.4462547302246094, "learning_rate": 4.74948537159233e-06, "loss": 1.2519, "step": 12719 }, { "epoch": 0.6954334841381573, "grad_norm": 1.3527721166610718, "learning_rate": 4.747930249418848e-06, "loss": 1.558, "step": 12720 }, { "epoch": 0.6954881565818788, "grad_norm": 1.64442777633667, "learning_rate": 4.7463753026243e-06, "loss": 1.4063, "step": 12721 }, { "epoch": 0.6955428290256004, "grad_norm": 1.9241105318069458, "learning_rate": 4.744820531260609e-06, "loss": 1.3613, "step": 12722 }, { "epoch": 0.695597501469322, "grad_norm": 1.4811170101165771, "learning_rate": 4.743265935379692e-06, "loss": 1.3005, "step": 12723 }, { "epoch": 0.6956521739130435, "grad_norm": 1.1322433948516846, "learning_rate": 4.7417115150334606e-06, "loss": 1.6034, "step": 12724 }, { "epoch": 0.695706846356765, "grad_norm": 1.6310012340545654, "learning_rate": 4.740157270273816e-06, "loss": 1.7579, "step": 12725 }, { "epoch": 0.6957615188004865, "grad_norm": 1.4015922546386719, "learning_rate": 4.7386032011526674e-06, "loss": 1.3989, "step": 12726 }, { "epoch": 0.6958161912442081, "grad_norm": 1.5554341077804565, "learning_rate": 4.737049307721901e-06, "loss": 1.3718, "step": 12727 }, { "epoch": 0.6958708636879297, "grad_norm": 1.5579568147659302, "learning_rate": 4.735495590033411e-06, "loss": 1.2715, "step": 12728 }, { "epoch": 0.6959255361316512, "grad_norm": 1.8437106609344482, "learning_rate": 4.733942048139077e-06, "loss": 1.3662, "step": 12729 }, { "epoch": 0.6959802085753728, "grad_norm": 1.6895474195480347, "learning_rate": 4.7323886820907715e-06, "loss": 1.1948, "step": 12730 }, { "epoch": 0.6960348810190944, "grad_norm": 1.3665560483932495, "learning_rate": 4.730835491940372e-06, "loss": 1.3092, "step": 12731 }, { "epoch": 0.6960895534628159, "grad_norm": 1.907260537147522, "learning_rate": 4.729282477739741e-06, "loss": 1.5834, "step": 12732 }, { "epoch": 0.6961442259065375, "grad_norm": 1.500174641609192, "learning_rate": 4.7277296395407316e-06, "loss": 1.372, "step": 12733 }, { "epoch": 0.696198898350259, "grad_norm": 1.5817062854766846, "learning_rate": 4.726176977395205e-06, "loss": 1.4769, "step": 12734 }, { "epoch": 0.6962535707939805, "grad_norm": 1.2766607999801636, "learning_rate": 4.724624491355005e-06, "loss": 1.7019, "step": 12735 }, { "epoch": 0.6963082432377021, "grad_norm": 1.4940091371536255, "learning_rate": 4.723072181471971e-06, "loss": 1.5096, "step": 12736 }, { "epoch": 0.6963629156814237, "grad_norm": 1.718835711479187, "learning_rate": 4.7215200477979415e-06, "loss": 1.3405, "step": 12737 }, { "epoch": 0.6964175881251452, "grad_norm": 1.2489728927612305, "learning_rate": 4.719968090384743e-06, "loss": 1.332, "step": 12738 }, { "epoch": 0.6964722605688668, "grad_norm": 1.1985788345336914, "learning_rate": 4.718416309284196e-06, "loss": 1.4096, "step": 12739 }, { "epoch": 0.6965269330125883, "grad_norm": 1.745671033859253, "learning_rate": 4.7168647045481264e-06, "loss": 1.3454, "step": 12740 }, { "epoch": 0.6965816054563099, "grad_norm": 1.2373236417770386, "learning_rate": 4.715313276228337e-06, "loss": 1.4611, "step": 12741 }, { "epoch": 0.6966362779000315, "grad_norm": 1.8744149208068848, "learning_rate": 4.7137620243766425e-06, "loss": 1.411, "step": 12742 }, { "epoch": 0.696690950343753, "grad_norm": 1.4719878435134888, "learning_rate": 4.712210949044839e-06, "loss": 1.4548, "step": 12743 }, { "epoch": 0.6967456227874745, "grad_norm": 1.4534131288528442, "learning_rate": 4.710660050284716e-06, "loss": 1.259, "step": 12744 }, { "epoch": 0.6968002952311961, "grad_norm": 1.332668423652649, "learning_rate": 4.709109328148069e-06, "loss": 1.5626, "step": 12745 }, { "epoch": 0.6968549676749176, "grad_norm": 1.9504358768463135, "learning_rate": 4.707558782686677e-06, "loss": 1.3184, "step": 12746 }, { "epoch": 0.6969096401186392, "grad_norm": 1.9232803583145142, "learning_rate": 4.7060084139523135e-06, "loss": 1.406, "step": 12747 }, { "epoch": 0.6969643125623608, "grad_norm": 1.6569311618804932, "learning_rate": 4.704458221996755e-06, "loss": 1.3408, "step": 12748 }, { "epoch": 0.6970189850060823, "grad_norm": 1.618598222732544, "learning_rate": 4.702908206871763e-06, "loss": 1.5357, "step": 12749 }, { "epoch": 0.6970736574498039, "grad_norm": 1.6472865343093872, "learning_rate": 4.701358368629095e-06, "loss": 1.2029, "step": 12750 }, { "epoch": 0.6971283298935255, "grad_norm": 1.5747805833816528, "learning_rate": 4.699808707320506e-06, "loss": 1.3075, "step": 12751 }, { "epoch": 0.697183002337247, "grad_norm": 1.4008738994598389, "learning_rate": 4.6982592229977405e-06, "loss": 1.5263, "step": 12752 }, { "epoch": 0.6972376747809685, "grad_norm": 1.4055861234664917, "learning_rate": 4.6967099157125384e-06, "loss": 1.3857, "step": 12753 }, { "epoch": 0.69729234722469, "grad_norm": 1.344089388847351, "learning_rate": 4.695160785516639e-06, "loss": 1.547, "step": 12754 }, { "epoch": 0.6973470196684116, "grad_norm": 1.5378425121307373, "learning_rate": 4.693611832461766e-06, "loss": 1.535, "step": 12755 }, { "epoch": 0.6974016921121332, "grad_norm": 1.4711363315582275, "learning_rate": 4.6920630565996495e-06, "loss": 1.2851, "step": 12756 }, { "epoch": 0.6974563645558547, "grad_norm": 1.6673074960708618, "learning_rate": 4.690514457982003e-06, "loss": 1.5724, "step": 12757 }, { "epoch": 0.6975110369995763, "grad_norm": 1.6667522192001343, "learning_rate": 4.688966036660534e-06, "loss": 1.301, "step": 12758 }, { "epoch": 0.6975657094432979, "grad_norm": 1.7169296741485596, "learning_rate": 4.687417792686954e-06, "loss": 1.3836, "step": 12759 }, { "epoch": 0.6976203818870194, "grad_norm": 1.6222695112228394, "learning_rate": 4.685869726112963e-06, "loss": 1.3339, "step": 12760 }, { "epoch": 0.697675054330741, "grad_norm": 1.578577995300293, "learning_rate": 4.684321836990251e-06, "loss": 1.4811, "step": 12761 }, { "epoch": 0.6977297267744625, "grad_norm": 1.3373721837997437, "learning_rate": 4.682774125370506e-06, "loss": 1.5012, "step": 12762 }, { "epoch": 0.697784399218184, "grad_norm": 1.7723313570022583, "learning_rate": 4.68122659130541e-06, "loss": 1.4356, "step": 12763 }, { "epoch": 0.6978390716619056, "grad_norm": 1.9137063026428223, "learning_rate": 4.679679234846636e-06, "loss": 1.5869, "step": 12764 }, { "epoch": 0.6978937441056272, "grad_norm": 1.5968172550201416, "learning_rate": 4.67813205604586e-06, "loss": 1.6296, "step": 12765 }, { "epoch": 0.6979484165493487, "grad_norm": 1.3303455114364624, "learning_rate": 4.676585054954743e-06, "loss": 1.4501, "step": 12766 }, { "epoch": 0.6980030889930703, "grad_norm": 1.727392315864563, "learning_rate": 4.675038231624939e-06, "loss": 1.5178, "step": 12767 }, { "epoch": 0.6980577614367918, "grad_norm": 1.4867327213287354, "learning_rate": 4.673491586108108e-06, "loss": 1.3823, "step": 12768 }, { "epoch": 0.6981124338805134, "grad_norm": 1.429046869277954, "learning_rate": 4.671945118455891e-06, "loss": 1.5149, "step": 12769 }, { "epoch": 0.698167106324235, "grad_norm": 1.647051215171814, "learning_rate": 4.670398828719926e-06, "loss": 1.3081, "step": 12770 }, { "epoch": 0.6982217787679564, "grad_norm": 1.5529237985610962, "learning_rate": 4.668852716951854e-06, "loss": 1.5198, "step": 12771 }, { "epoch": 0.698276451211678, "grad_norm": 1.5658879280090332, "learning_rate": 4.667306783203296e-06, "loss": 1.4862, "step": 12772 }, { "epoch": 0.6983311236553996, "grad_norm": 1.6652830839157104, "learning_rate": 4.6657610275258826e-06, "loss": 1.5734, "step": 12773 }, { "epoch": 0.6983857960991211, "grad_norm": 1.6911954879760742, "learning_rate": 4.664215449971225e-06, "loss": 1.3158, "step": 12774 }, { "epoch": 0.6984404685428427, "grad_norm": 1.9850026369094849, "learning_rate": 4.662670050590934e-06, "loss": 1.2497, "step": 12775 }, { "epoch": 0.6984951409865643, "grad_norm": 1.4313645362854004, "learning_rate": 4.661124829436615e-06, "loss": 1.3561, "step": 12776 }, { "epoch": 0.6985498134302858, "grad_norm": 1.4326313734054565, "learning_rate": 4.6595797865598655e-06, "loss": 1.4973, "step": 12777 }, { "epoch": 0.6986044858740074, "grad_norm": 1.6553641557693481, "learning_rate": 4.658034922012276e-06, "loss": 1.3827, "step": 12778 }, { "epoch": 0.698659158317729, "grad_norm": 1.5940351486206055, "learning_rate": 4.656490235845438e-06, "loss": 1.2107, "step": 12779 }, { "epoch": 0.6987138307614504, "grad_norm": 1.414070725440979, "learning_rate": 4.654945728110931e-06, "loss": 1.3556, "step": 12780 }, { "epoch": 0.698768503205172, "grad_norm": 1.724316954612732, "learning_rate": 4.653401398860324e-06, "loss": 1.368, "step": 12781 }, { "epoch": 0.6988231756488935, "grad_norm": 1.570587396621704, "learning_rate": 4.651857248145195e-06, "loss": 1.3525, "step": 12782 }, { "epoch": 0.6988778480926151, "grad_norm": 1.6161019802093506, "learning_rate": 4.650313276017102e-06, "loss": 1.5355, "step": 12783 }, { "epoch": 0.6989325205363367, "grad_norm": 1.5922067165374756, "learning_rate": 4.6487694825275985e-06, "loss": 1.3767, "step": 12784 }, { "epoch": 0.6989871929800582, "grad_norm": 1.7401517629623413, "learning_rate": 4.6472258677282434e-06, "loss": 1.4483, "step": 12785 }, { "epoch": 0.6990418654237798, "grad_norm": 1.8334330320358276, "learning_rate": 4.645682431670573e-06, "loss": 1.2482, "step": 12786 }, { "epoch": 0.6990965378675014, "grad_norm": 1.7443101406097412, "learning_rate": 4.644139174406134e-06, "loss": 1.4002, "step": 12787 }, { "epoch": 0.6991512103112228, "grad_norm": 1.511154055595398, "learning_rate": 4.6425960959864556e-06, "loss": 1.4166, "step": 12788 }, { "epoch": 0.6992058827549444, "grad_norm": 1.5054175853729248, "learning_rate": 4.6410531964630665e-06, "loss": 1.522, "step": 12789 }, { "epoch": 0.699260555198666, "grad_norm": 1.8640514612197876, "learning_rate": 4.639510475887486e-06, "loss": 1.3856, "step": 12790 }, { "epoch": 0.6993152276423875, "grad_norm": 1.6020773649215698, "learning_rate": 4.637967934311228e-06, "loss": 1.2262, "step": 12791 }, { "epoch": 0.6993699000861091, "grad_norm": 1.9053049087524414, "learning_rate": 4.636425571785801e-06, "loss": 1.2932, "step": 12792 }, { "epoch": 0.6994245725298307, "grad_norm": 1.6620594263076782, "learning_rate": 4.634883388362712e-06, "loss": 1.3784, "step": 12793 }, { "epoch": 0.6994792449735522, "grad_norm": 2.140702962875366, "learning_rate": 4.633341384093459e-06, "loss": 1.2755, "step": 12794 }, { "epoch": 0.6995339174172738, "grad_norm": 1.5509300231933594, "learning_rate": 4.631799559029524e-06, "loss": 1.3482, "step": 12795 }, { "epoch": 0.6995885898609953, "grad_norm": 3.134249210357666, "learning_rate": 4.630257913222403e-06, "loss": 1.2225, "step": 12796 }, { "epoch": 0.6996432623047169, "grad_norm": 2.0128681659698486, "learning_rate": 4.628716446723572e-06, "loss": 1.5822, "step": 12797 }, { "epoch": 0.6996979347484384, "grad_norm": 1.5462439060211182, "learning_rate": 4.627175159584498e-06, "loss": 1.5016, "step": 12798 }, { "epoch": 0.6997526071921599, "grad_norm": 2.0284245014190674, "learning_rate": 4.625634051856658e-06, "loss": 1.4632, "step": 12799 }, { "epoch": 0.6998072796358815, "grad_norm": 1.690616250038147, "learning_rate": 4.624093123591508e-06, "loss": 1.3584, "step": 12800 }, { "epoch": 0.6998619520796031, "grad_norm": 1.1000736951828003, "learning_rate": 4.622552374840503e-06, "loss": 1.5489, "step": 12801 }, { "epoch": 0.6999166245233246, "grad_norm": 1.6710424423217773, "learning_rate": 4.621011805655093e-06, "loss": 1.4424, "step": 12802 }, { "epoch": 0.6999712969670462, "grad_norm": 1.4986389875411987, "learning_rate": 4.619471416086717e-06, "loss": 1.5455, "step": 12803 }, { "epoch": 0.7000259694107678, "grad_norm": 1.7427260875701904, "learning_rate": 4.617931206186821e-06, "loss": 1.3963, "step": 12804 }, { "epoch": 0.7000806418544893, "grad_norm": 1.5815637111663818, "learning_rate": 4.61639117600683e-06, "loss": 1.3909, "step": 12805 }, { "epoch": 0.7001353142982109, "grad_norm": 1.4249818325042725, "learning_rate": 4.614851325598168e-06, "loss": 1.2809, "step": 12806 }, { "epoch": 0.7001899867419324, "grad_norm": 1.6753259897232056, "learning_rate": 4.61331165501226e-06, "loss": 1.3816, "step": 12807 }, { "epoch": 0.7002446591856539, "grad_norm": 1.4126423597335815, "learning_rate": 4.611772164300516e-06, "loss": 1.5208, "step": 12808 }, { "epoch": 0.7002993316293755, "grad_norm": 1.4986631870269775, "learning_rate": 4.61023285351434e-06, "loss": 1.4745, "step": 12809 }, { "epoch": 0.700354004073097, "grad_norm": 1.7343002557754517, "learning_rate": 4.608693722705141e-06, "loss": 1.6109, "step": 12810 }, { "epoch": 0.7004086765168186, "grad_norm": 1.1756982803344727, "learning_rate": 4.6071547719243095e-06, "loss": 1.4094, "step": 12811 }, { "epoch": 0.7004633489605402, "grad_norm": 1.5638890266418457, "learning_rate": 4.60561600122323e-06, "loss": 1.2644, "step": 12812 }, { "epoch": 0.7005180214042617, "grad_norm": 1.5771852731704712, "learning_rate": 4.604077410653295e-06, "loss": 1.2466, "step": 12813 }, { "epoch": 0.7005726938479833, "grad_norm": 1.4234778881072998, "learning_rate": 4.6025390002658764e-06, "loss": 1.2536, "step": 12814 }, { "epoch": 0.7006273662917049, "grad_norm": 1.2298778295516968, "learning_rate": 4.601000770112347e-06, "loss": 1.3527, "step": 12815 }, { "epoch": 0.7006820387354263, "grad_norm": 1.8226596117019653, "learning_rate": 4.599462720244071e-06, "loss": 1.4293, "step": 12816 }, { "epoch": 0.7007367111791479, "grad_norm": 1.5412269830703735, "learning_rate": 4.597924850712403e-06, "loss": 1.3486, "step": 12817 }, { "epoch": 0.7007913836228695, "grad_norm": 1.8683112859725952, "learning_rate": 4.596387161568705e-06, "loss": 1.2293, "step": 12818 }, { "epoch": 0.700846056066591, "grad_norm": 1.6822775602340698, "learning_rate": 4.59484965286432e-06, "loss": 1.4189, "step": 12819 }, { "epoch": 0.7009007285103126, "grad_norm": 1.6729650497436523, "learning_rate": 4.593312324650584e-06, "loss": 1.2878, "step": 12820 }, { "epoch": 0.7009554009540342, "grad_norm": 1.2319488525390625, "learning_rate": 4.591775176978841e-06, "loss": 1.2586, "step": 12821 }, { "epoch": 0.7010100733977557, "grad_norm": 2.2727553844451904, "learning_rate": 4.590238209900416e-06, "loss": 1.7291, "step": 12822 }, { "epoch": 0.7010647458414773, "grad_norm": 1.3683640956878662, "learning_rate": 4.5887014234666275e-06, "loss": 1.7155, "step": 12823 }, { "epoch": 0.7011194182851987, "grad_norm": 1.315358281135559, "learning_rate": 4.5871648177287995e-06, "loss": 1.368, "step": 12824 }, { "epoch": 0.7011740907289203, "grad_norm": 1.5823765993118286, "learning_rate": 4.58562839273824e-06, "loss": 1.3028, "step": 12825 }, { "epoch": 0.7012287631726419, "grad_norm": 1.8206716775894165, "learning_rate": 4.584092148546254e-06, "loss": 1.1603, "step": 12826 }, { "epoch": 0.7012834356163634, "grad_norm": 1.602925419807434, "learning_rate": 4.582556085204141e-06, "loss": 1.3717, "step": 12827 }, { "epoch": 0.701338108060085, "grad_norm": 1.4602528810501099, "learning_rate": 4.581020202763188e-06, "loss": 1.4229, "step": 12828 }, { "epoch": 0.7013927805038066, "grad_norm": 1.7419896125793457, "learning_rate": 4.579484501274691e-06, "loss": 0.958, "step": 12829 }, { "epoch": 0.7014474529475281, "grad_norm": 2.146143674850464, "learning_rate": 4.577948980789924e-06, "loss": 1.6227, "step": 12830 }, { "epoch": 0.7015021253912497, "grad_norm": 1.6972190141677856, "learning_rate": 4.576413641360162e-06, "loss": 1.6576, "step": 12831 }, { "epoch": 0.7015567978349713, "grad_norm": 1.2924764156341553, "learning_rate": 4.574878483036679e-06, "loss": 1.5856, "step": 12832 }, { "epoch": 0.7016114702786928, "grad_norm": 1.910319209098816, "learning_rate": 4.573343505870733e-06, "loss": 1.59, "step": 12833 }, { "epoch": 0.7016661427224143, "grad_norm": 1.7365899085998535, "learning_rate": 4.571808709913578e-06, "loss": 1.4015, "step": 12834 }, { "epoch": 0.7017208151661359, "grad_norm": 1.3744560480117798, "learning_rate": 4.570274095216472e-06, "loss": 1.5221, "step": 12835 }, { "epoch": 0.7017754876098574, "grad_norm": 2.2668778896331787, "learning_rate": 4.5687396618306545e-06, "loss": 1.4177, "step": 12836 }, { "epoch": 0.701830160053579, "grad_norm": 1.4218246936798096, "learning_rate": 4.567205409807362e-06, "loss": 1.4655, "step": 12837 }, { "epoch": 0.7018848324973005, "grad_norm": 1.567436695098877, "learning_rate": 4.565671339197831e-06, "loss": 1.439, "step": 12838 }, { "epoch": 0.7019395049410221, "grad_norm": 1.6614432334899902, "learning_rate": 4.564137450053288e-06, "loss": 1.3681, "step": 12839 }, { "epoch": 0.7019941773847437, "grad_norm": 1.4622663259506226, "learning_rate": 4.56260374242495e-06, "loss": 1.4062, "step": 12840 }, { "epoch": 0.7020488498284652, "grad_norm": 1.4532227516174316, "learning_rate": 4.561070216364033e-06, "loss": 1.2695, "step": 12841 }, { "epoch": 0.7021035222721868, "grad_norm": 1.7060751914978027, "learning_rate": 4.55953687192174e-06, "loss": 1.5589, "step": 12842 }, { "epoch": 0.7021581947159083, "grad_norm": 1.306462287902832, "learning_rate": 4.558003709149282e-06, "loss": 1.4095, "step": 12843 }, { "epoch": 0.7022128671596298, "grad_norm": 1.4162094593048096, "learning_rate": 4.556470728097849e-06, "loss": 1.5005, "step": 12844 }, { "epoch": 0.7022675396033514, "grad_norm": 2.0688679218292236, "learning_rate": 4.55493792881863e-06, "loss": 1.1848, "step": 12845 }, { "epoch": 0.702322212047073, "grad_norm": 1.518751859664917, "learning_rate": 4.553405311362813e-06, "loss": 1.2996, "step": 12846 }, { "epoch": 0.7023768844907945, "grad_norm": 1.573828935623169, "learning_rate": 4.551872875781575e-06, "loss": 1.6913, "step": 12847 }, { "epoch": 0.7024315569345161, "grad_norm": 1.6320340633392334, "learning_rate": 4.5503406221260805e-06, "loss": 1.2887, "step": 12848 }, { "epoch": 0.7024862293782377, "grad_norm": 1.6093711853027344, "learning_rate": 4.548808550447505e-06, "loss": 1.3961, "step": 12849 }, { "epoch": 0.7025409018219592, "grad_norm": 1.4948979616165161, "learning_rate": 4.547276660797003e-06, "loss": 1.3164, "step": 12850 }, { "epoch": 0.7025955742656808, "grad_norm": 1.5044726133346558, "learning_rate": 4.545744953225726e-06, "loss": 1.4553, "step": 12851 }, { "epoch": 0.7026502467094023, "grad_norm": 1.4135174751281738, "learning_rate": 4.544213427784827e-06, "loss": 1.4821, "step": 12852 }, { "epoch": 0.7027049191531238, "grad_norm": 1.7758618593215942, "learning_rate": 4.5426820845254446e-06, "loss": 1.4277, "step": 12853 }, { "epoch": 0.7027595915968454, "grad_norm": 1.4694688320159912, "learning_rate": 4.5411509234987124e-06, "loss": 1.4315, "step": 12854 }, { "epoch": 0.7028142640405669, "grad_norm": 1.4166760444641113, "learning_rate": 4.539619944755762e-06, "loss": 1.5628, "step": 12855 }, { "epoch": 0.7028689364842885, "grad_norm": 1.4757834672927856, "learning_rate": 4.538089148347709e-06, "loss": 1.3592, "step": 12856 }, { "epoch": 0.7029236089280101, "grad_norm": 1.5728168487548828, "learning_rate": 4.536558534325681e-06, "loss": 1.3808, "step": 12857 }, { "epoch": 0.7029782813717316, "grad_norm": 1.6319292783737183, "learning_rate": 4.535028102740785e-06, "loss": 1.3346, "step": 12858 }, { "epoch": 0.7030329538154532, "grad_norm": 1.8873611688613892, "learning_rate": 4.533497853644119e-06, "loss": 1.258, "step": 12859 }, { "epoch": 0.7030876262591748, "grad_norm": 1.5546364784240723, "learning_rate": 4.531967787086791e-06, "loss": 1.544, "step": 12860 }, { "epoch": 0.7031422987028962, "grad_norm": 2.2110707759857178, "learning_rate": 4.5304379031198906e-06, "loss": 1.5811, "step": 12861 }, { "epoch": 0.7031969711466178, "grad_norm": 1.8567248582839966, "learning_rate": 4.5289082017944995e-06, "loss": 1.4788, "step": 12862 }, { "epoch": 0.7032516435903394, "grad_norm": 1.5874285697937012, "learning_rate": 4.527378683161706e-06, "loss": 1.247, "step": 12863 }, { "epoch": 0.7033063160340609, "grad_norm": 1.7242692708969116, "learning_rate": 4.52584934727258e-06, "loss": 1.4873, "step": 12864 }, { "epoch": 0.7033609884777825, "grad_norm": 2.008718490600586, "learning_rate": 4.524320194178189e-06, "loss": 1.1842, "step": 12865 }, { "epoch": 0.7034156609215041, "grad_norm": 1.3504997491836548, "learning_rate": 4.522791223929597e-06, "loss": 1.422, "step": 12866 }, { "epoch": 0.7034703333652256, "grad_norm": 1.6141334772109985, "learning_rate": 4.521262436577858e-06, "loss": 1.4693, "step": 12867 }, { "epoch": 0.7035250058089472, "grad_norm": 1.409889578819275, "learning_rate": 4.519733832174018e-06, "loss": 1.6606, "step": 12868 }, { "epoch": 0.7035796782526686, "grad_norm": 1.53965163230896, "learning_rate": 4.51820541076913e-06, "loss": 1.2721, "step": 12869 }, { "epoch": 0.7036343506963902, "grad_norm": 1.365310788154602, "learning_rate": 4.516677172414223e-06, "loss": 1.3586, "step": 12870 }, { "epoch": 0.7036890231401118, "grad_norm": 1.6535084247589111, "learning_rate": 4.515149117160335e-06, "loss": 1.4026, "step": 12871 }, { "epoch": 0.7037436955838333, "grad_norm": 1.5879582166671753, "learning_rate": 4.5136212450584895e-06, "loss": 1.3272, "step": 12872 }, { "epoch": 0.7037983680275549, "grad_norm": 2.0124857425689697, "learning_rate": 4.512093556159702e-06, "loss": 1.3195, "step": 12873 }, { "epoch": 0.7038530404712765, "grad_norm": 1.4590390920639038, "learning_rate": 4.510566050514991e-06, "loss": 1.495, "step": 12874 }, { "epoch": 0.703907712914998, "grad_norm": 1.9378256797790527, "learning_rate": 4.50903872817536e-06, "loss": 1.1677, "step": 12875 }, { "epoch": 0.7039623853587196, "grad_norm": 1.505427360534668, "learning_rate": 4.507511589191809e-06, "loss": 1.2978, "step": 12876 }, { "epoch": 0.7040170578024412, "grad_norm": 1.2560837268829346, "learning_rate": 4.505984633615337e-06, "loss": 1.5482, "step": 12877 }, { "epoch": 0.7040717302461627, "grad_norm": 1.5191179513931274, "learning_rate": 4.504457861496931e-06, "loss": 1.3998, "step": 12878 }, { "epoch": 0.7041264026898842, "grad_norm": 1.5468720197677612, "learning_rate": 4.502931272887572e-06, "loss": 1.3664, "step": 12879 }, { "epoch": 0.7041810751336058, "grad_norm": 1.225547194480896, "learning_rate": 4.501404867838237e-06, "loss": 1.4736, "step": 12880 }, { "epoch": 0.7042357475773273, "grad_norm": 1.3593522310256958, "learning_rate": 4.499878646399897e-06, "loss": 1.5223, "step": 12881 }, { "epoch": 0.7042904200210489, "grad_norm": 1.8514665365219116, "learning_rate": 4.498352608623511e-06, "loss": 1.2224, "step": 12882 }, { "epoch": 0.7043450924647704, "grad_norm": 1.5905619859695435, "learning_rate": 4.496826754560046e-06, "loss": 1.3381, "step": 12883 }, { "epoch": 0.704399764908492, "grad_norm": 1.4016813039779663, "learning_rate": 4.495301084260444e-06, "loss": 1.5989, "step": 12884 }, { "epoch": 0.7044544373522136, "grad_norm": 1.3955971002578735, "learning_rate": 4.493775597775661e-06, "loss": 1.4204, "step": 12885 }, { "epoch": 0.7045091097959351, "grad_norm": 1.686371088027954, "learning_rate": 4.492250295156632e-06, "loss": 1.4578, "step": 12886 }, { "epoch": 0.7045637822396567, "grad_norm": 1.5557737350463867, "learning_rate": 4.490725176454285e-06, "loss": 1.3691, "step": 12887 }, { "epoch": 0.7046184546833782, "grad_norm": 2.319868803024292, "learning_rate": 4.489200241719556e-06, "loss": 1.3482, "step": 12888 }, { "epoch": 0.7046731271270997, "grad_norm": 1.3690400123596191, "learning_rate": 4.487675491003362e-06, "loss": 1.406, "step": 12889 }, { "epoch": 0.7047277995708213, "grad_norm": 1.473117470741272, "learning_rate": 4.486150924356617e-06, "loss": 1.4799, "step": 12890 }, { "epoch": 0.7047824720145429, "grad_norm": 1.5560169219970703, "learning_rate": 4.484626541830234e-06, "loss": 1.4025, "step": 12891 }, { "epoch": 0.7048371444582644, "grad_norm": 1.4372471570968628, "learning_rate": 4.483102343475112e-06, "loss": 1.4324, "step": 12892 }, { "epoch": 0.704891816901986, "grad_norm": 1.5847817659378052, "learning_rate": 4.481578329342149e-06, "loss": 1.3595, "step": 12893 }, { "epoch": 0.7049464893457076, "grad_norm": 1.6171536445617676, "learning_rate": 4.480054499482236e-06, "loss": 1.465, "step": 12894 }, { "epoch": 0.7050011617894291, "grad_norm": 1.9649159908294678, "learning_rate": 4.478530853946255e-06, "loss": 1.4814, "step": 12895 }, { "epoch": 0.7050558342331507, "grad_norm": 1.8072075843811035, "learning_rate": 4.477007392785082e-06, "loss": 1.3075, "step": 12896 }, { "epoch": 0.7051105066768721, "grad_norm": 1.4656169414520264, "learning_rate": 4.475484116049596e-06, "loss": 1.4595, "step": 12897 }, { "epoch": 0.7051651791205937, "grad_norm": 1.2286829948425293, "learning_rate": 4.473961023790655e-06, "loss": 1.5443, "step": 12898 }, { "epoch": 0.7052198515643153, "grad_norm": 1.2865217924118042, "learning_rate": 4.472438116059127e-06, "loss": 1.648, "step": 12899 }, { "epoch": 0.7052745240080368, "grad_norm": 1.4378585815429688, "learning_rate": 4.470915392905862e-06, "loss": 1.4223, "step": 12900 }, { "epoch": 0.7053291964517584, "grad_norm": 2.7172305583953857, "learning_rate": 4.4693928543817e-06, "loss": 1.1722, "step": 12901 }, { "epoch": 0.70538386889548, "grad_norm": 1.4933980703353882, "learning_rate": 4.467870500537494e-06, "loss": 1.3307, "step": 12902 }, { "epoch": 0.7054385413392015, "grad_norm": 1.801367163658142, "learning_rate": 4.466348331424074e-06, "loss": 1.4164, "step": 12903 }, { "epoch": 0.7054932137829231, "grad_norm": 1.7865984439849854, "learning_rate": 4.464826347092267e-06, "loss": 1.604, "step": 12904 }, { "epoch": 0.7055478862266447, "grad_norm": 1.5108917951583862, "learning_rate": 4.463304547592898e-06, "loss": 1.4752, "step": 12905 }, { "epoch": 0.7056025586703661, "grad_norm": 1.703012228012085, "learning_rate": 4.461782932976783e-06, "loss": 1.3606, "step": 12906 }, { "epoch": 0.7056572311140877, "grad_norm": 1.3010979890823364, "learning_rate": 4.460261503294726e-06, "loss": 1.6322, "step": 12907 }, { "epoch": 0.7057119035578093, "grad_norm": 1.396550178527832, "learning_rate": 4.458740258597541e-06, "loss": 1.3612, "step": 12908 }, { "epoch": 0.7057665760015308, "grad_norm": 1.538435459136963, "learning_rate": 4.457219198936024e-06, "loss": 1.4208, "step": 12909 }, { "epoch": 0.7058212484452524, "grad_norm": 1.8675768375396729, "learning_rate": 4.455698324360959e-06, "loss": 1.1413, "step": 12910 }, { "epoch": 0.7058759208889739, "grad_norm": 1.57273530960083, "learning_rate": 4.45417763492314e-06, "loss": 1.5519, "step": 12911 }, { "epoch": 0.7059305933326955, "grad_norm": 1.2523547410964966, "learning_rate": 4.452657130673341e-06, "loss": 1.3273, "step": 12912 }, { "epoch": 0.7059852657764171, "grad_norm": 1.6151121854782104, "learning_rate": 4.451136811662341e-06, "loss": 1.4131, "step": 12913 }, { "epoch": 0.7060399382201386, "grad_norm": 1.3656272888183594, "learning_rate": 4.449616677940904e-06, "loss": 1.5361, "step": 12914 }, { "epoch": 0.7060946106638601, "grad_norm": 1.651762843132019, "learning_rate": 4.4480967295597856e-06, "loss": 1.2485, "step": 12915 }, { "epoch": 0.7061492831075817, "grad_norm": 1.453285813331604, "learning_rate": 4.44657696656975e-06, "loss": 1.7962, "step": 12916 }, { "epoch": 0.7062039555513032, "grad_norm": 1.6276624202728271, "learning_rate": 4.445057389021541e-06, "loss": 1.384, "step": 12917 }, { "epoch": 0.7062586279950248, "grad_norm": 1.2552943229675293, "learning_rate": 4.4435379969659005e-06, "loss": 1.4324, "step": 12918 }, { "epoch": 0.7063133004387464, "grad_norm": 2.16542649269104, "learning_rate": 4.442018790453566e-06, "loss": 1.2223, "step": 12919 }, { "epoch": 0.7063679728824679, "grad_norm": 1.527808666229248, "learning_rate": 4.440499769535265e-06, "loss": 1.2392, "step": 12920 }, { "epoch": 0.7064226453261895, "grad_norm": 1.327310562133789, "learning_rate": 4.4389809342617195e-06, "loss": 1.5764, "step": 12921 }, { "epoch": 0.7064773177699111, "grad_norm": 1.3627837896347046, "learning_rate": 4.437462284683653e-06, "loss": 1.5542, "step": 12922 }, { "epoch": 0.7065319902136326, "grad_norm": 1.6280994415283203, "learning_rate": 4.435943820851775e-06, "loss": 1.4375, "step": 12923 }, { "epoch": 0.7065866626573541, "grad_norm": 1.4378395080566406, "learning_rate": 4.4344255428167845e-06, "loss": 1.5994, "step": 12924 }, { "epoch": 0.7066413351010756, "grad_norm": 1.479616641998291, "learning_rate": 4.432907450629389e-06, "loss": 1.1962, "step": 12925 }, { "epoch": 0.7066960075447972, "grad_norm": 1.2053474187850952, "learning_rate": 4.431389544340273e-06, "loss": 1.4867, "step": 12926 }, { "epoch": 0.7067506799885188, "grad_norm": 1.388782262802124, "learning_rate": 4.429871824000133e-06, "loss": 1.3899, "step": 12927 }, { "epoch": 0.7068053524322403, "grad_norm": 1.5227998495101929, "learning_rate": 4.428354289659641e-06, "loss": 1.6676, "step": 12928 }, { "epoch": 0.7068600248759619, "grad_norm": 1.4832687377929688, "learning_rate": 4.426836941369471e-06, "loss": 1.586, "step": 12929 }, { "epoch": 0.7069146973196835, "grad_norm": 1.7035026550292969, "learning_rate": 4.425319779180297e-06, "loss": 1.3484, "step": 12930 }, { "epoch": 0.706969369763405, "grad_norm": 1.6093268394470215, "learning_rate": 4.423802803142777e-06, "loss": 1.5057, "step": 12931 }, { "epoch": 0.7070240422071266, "grad_norm": 1.6547176837921143, "learning_rate": 4.422286013307567e-06, "loss": 1.5267, "step": 12932 }, { "epoch": 0.7070787146508482, "grad_norm": 1.6877371072769165, "learning_rate": 4.420769409725315e-06, "loss": 1.5501, "step": 12933 }, { "epoch": 0.7071333870945696, "grad_norm": 1.712621808052063, "learning_rate": 4.419252992446664e-06, "loss": 1.4638, "step": 12934 }, { "epoch": 0.7071880595382912, "grad_norm": 1.8034744262695312, "learning_rate": 4.417736761522249e-06, "loss": 1.3086, "step": 12935 }, { "epoch": 0.7072427319820128, "grad_norm": 1.8474081754684448, "learning_rate": 4.416220717002705e-06, "loss": 1.5775, "step": 12936 }, { "epoch": 0.7072974044257343, "grad_norm": 1.8715158700942993, "learning_rate": 4.414704858938655e-06, "loss": 1.2716, "step": 12937 }, { "epoch": 0.7073520768694559, "grad_norm": 1.7623205184936523, "learning_rate": 4.413189187380711e-06, "loss": 1.4945, "step": 12938 }, { "epoch": 0.7074067493131774, "grad_norm": 1.6537625789642334, "learning_rate": 4.411673702379495e-06, "loss": 1.3849, "step": 12939 }, { "epoch": 0.707461421756899, "grad_norm": 1.6303832530975342, "learning_rate": 4.410158403985603e-06, "loss": 1.47, "step": 12940 }, { "epoch": 0.7075160942006206, "grad_norm": 1.5189369916915894, "learning_rate": 4.408643292249642e-06, "loss": 1.3897, "step": 12941 }, { "epoch": 0.707570766644342, "grad_norm": 1.540238618850708, "learning_rate": 4.407128367222203e-06, "loss": 1.2798, "step": 12942 }, { "epoch": 0.7076254390880636, "grad_norm": 1.5757743120193481, "learning_rate": 4.405613628953871e-06, "loss": 1.2601, "step": 12943 }, { "epoch": 0.7076801115317852, "grad_norm": 1.7370080947875977, "learning_rate": 4.404099077495229e-06, "loss": 1.3583, "step": 12944 }, { "epoch": 0.7077347839755067, "grad_norm": 1.503017783164978, "learning_rate": 4.402584712896849e-06, "loss": 1.5964, "step": 12945 }, { "epoch": 0.7077894564192283, "grad_norm": 1.5965157747268677, "learning_rate": 4.401070535209296e-06, "loss": 1.2676, "step": 12946 }, { "epoch": 0.7078441288629499, "grad_norm": 1.524096965789795, "learning_rate": 4.399556544483141e-06, "loss": 1.5047, "step": 12947 }, { "epoch": 0.7078988013066714, "grad_norm": 1.2247945070266724, "learning_rate": 4.3980427407689345e-06, "loss": 1.4717, "step": 12948 }, { "epoch": 0.707953473750393, "grad_norm": 1.1979789733886719, "learning_rate": 4.396529124117223e-06, "loss": 1.4413, "step": 12949 }, { "epoch": 0.7080081461941146, "grad_norm": 1.3714076280593872, "learning_rate": 4.395015694578555e-06, "loss": 1.4505, "step": 12950 }, { "epoch": 0.708062818637836, "grad_norm": 1.5063214302062988, "learning_rate": 4.393502452203466e-06, "loss": 1.6294, "step": 12951 }, { "epoch": 0.7081174910815576, "grad_norm": 1.7662904262542725, "learning_rate": 4.391989397042485e-06, "loss": 1.5383, "step": 12952 }, { "epoch": 0.7081721635252791, "grad_norm": 1.7580623626708984, "learning_rate": 4.390476529146138e-06, "loss": 1.5006, "step": 12953 }, { "epoch": 0.7082268359690007, "grad_norm": 1.0528520345687866, "learning_rate": 4.388963848564941e-06, "loss": 1.6941, "step": 12954 }, { "epoch": 0.7082815084127223, "grad_norm": 1.4947420358657837, "learning_rate": 4.38745135534941e-06, "loss": 1.4028, "step": 12955 }, { "epoch": 0.7083361808564438, "grad_norm": 1.4387059211730957, "learning_rate": 4.38593904955005e-06, "loss": 1.4265, "step": 12956 }, { "epoch": 0.7083908533001654, "grad_norm": 1.4675474166870117, "learning_rate": 4.384426931217359e-06, "loss": 1.3773, "step": 12957 }, { "epoch": 0.708445525743887, "grad_norm": 1.3775005340576172, "learning_rate": 4.382915000401829e-06, "loss": 1.5151, "step": 12958 }, { "epoch": 0.7085001981876085, "grad_norm": 1.8404361009597778, "learning_rate": 4.381403257153949e-06, "loss": 1.4802, "step": 12959 }, { "epoch": 0.70855487063133, "grad_norm": 1.3959356546401978, "learning_rate": 4.3798917015241944e-06, "loss": 1.3696, "step": 12960 }, { "epoch": 0.7086095430750516, "grad_norm": 1.4307880401611328, "learning_rate": 4.378380333563048e-06, "loss": 1.5608, "step": 12961 }, { "epoch": 0.7086642155187731, "grad_norm": 2.0291032791137695, "learning_rate": 4.376869153320974e-06, "loss": 1.0504, "step": 12962 }, { "epoch": 0.7087188879624947, "grad_norm": 1.7349841594696045, "learning_rate": 4.37535816084843e-06, "loss": 1.4633, "step": 12963 }, { "epoch": 0.7087735604062163, "grad_norm": 1.4212687015533447, "learning_rate": 4.3738473561958795e-06, "loss": 1.5202, "step": 12964 }, { "epoch": 0.7088282328499378, "grad_norm": 1.7710639238357544, "learning_rate": 4.372336739413767e-06, "loss": 1.3866, "step": 12965 }, { "epoch": 0.7088829052936594, "grad_norm": 1.3896467685699463, "learning_rate": 4.370826310552534e-06, "loss": 1.2031, "step": 12966 }, { "epoch": 0.7089375777373809, "grad_norm": 1.4714066982269287, "learning_rate": 4.369316069662623e-06, "loss": 1.3901, "step": 12967 }, { "epoch": 0.7089922501811025, "grad_norm": 1.664633870124817, "learning_rate": 4.367806016794458e-06, "loss": 1.2945, "step": 12968 }, { "epoch": 0.709046922624824, "grad_norm": 1.3634967803955078, "learning_rate": 4.366296151998469e-06, "loss": 1.5658, "step": 12969 }, { "epoch": 0.7091015950685455, "grad_norm": 1.3499486446380615, "learning_rate": 4.364786475325072e-06, "loss": 1.2852, "step": 12970 }, { "epoch": 0.7091562675122671, "grad_norm": 1.6107146739959717, "learning_rate": 4.3632769868246784e-06, "loss": 1.2384, "step": 12971 }, { "epoch": 0.7092109399559887, "grad_norm": 1.6726281642913818, "learning_rate": 4.361767686547693e-06, "loss": 1.346, "step": 12972 }, { "epoch": 0.7092656123997102, "grad_norm": 1.8754267692565918, "learning_rate": 4.360258574544516e-06, "loss": 1.596, "step": 12973 }, { "epoch": 0.7093202848434318, "grad_norm": 1.4080723524093628, "learning_rate": 4.358749650865534e-06, "loss": 1.3097, "step": 12974 }, { "epoch": 0.7093749572871534, "grad_norm": 1.8096126317977905, "learning_rate": 4.357240915561143e-06, "loss": 1.1602, "step": 12975 }, { "epoch": 0.7094296297308749, "grad_norm": 1.575574278831482, "learning_rate": 4.3557323686817185e-06, "loss": 1.3565, "step": 12976 }, { "epoch": 0.7094843021745965, "grad_norm": 1.3906363248825073, "learning_rate": 4.354224010277632e-06, "loss": 1.3466, "step": 12977 }, { "epoch": 0.709538974618318, "grad_norm": 1.3338375091552734, "learning_rate": 4.352715840399257e-06, "loss": 1.4482, "step": 12978 }, { "epoch": 0.7095936470620395, "grad_norm": 1.4119879007339478, "learning_rate": 4.351207859096953e-06, "loss": 1.4544, "step": 12979 }, { "epoch": 0.7096483195057611, "grad_norm": 1.5289626121520996, "learning_rate": 4.3497000664210695e-06, "loss": 1.3589, "step": 12980 }, { "epoch": 0.7097029919494826, "grad_norm": 2.146629571914673, "learning_rate": 4.348192462421963e-06, "loss": 1.2886, "step": 12981 }, { "epoch": 0.7097576643932042, "grad_norm": 1.5451719760894775, "learning_rate": 4.346685047149973e-06, "loss": 1.4386, "step": 12982 }, { "epoch": 0.7098123368369258, "grad_norm": 1.5390052795410156, "learning_rate": 4.345177820655435e-06, "loss": 1.4902, "step": 12983 }, { "epoch": 0.7098670092806473, "grad_norm": 1.5086863040924072, "learning_rate": 4.343670782988679e-06, "loss": 1.4303, "step": 12984 }, { "epoch": 0.7099216817243689, "grad_norm": 1.6990498304367065, "learning_rate": 4.3421639342000255e-06, "loss": 1.5203, "step": 12985 }, { "epoch": 0.7099763541680905, "grad_norm": 1.5351747274398804, "learning_rate": 4.340657274339798e-06, "loss": 1.3748, "step": 12986 }, { "epoch": 0.7100310266118119, "grad_norm": 1.3799407482147217, "learning_rate": 4.339150803458304e-06, "loss": 1.3819, "step": 12987 }, { "epoch": 0.7100856990555335, "grad_norm": 3.463329315185547, "learning_rate": 4.337644521605845e-06, "loss": 1.5326, "step": 12988 }, { "epoch": 0.7101403714992551, "grad_norm": 1.920434832572937, "learning_rate": 4.336138428832726e-06, "loss": 1.5305, "step": 12989 }, { "epoch": 0.7101950439429766, "grad_norm": 1.8256875276565552, "learning_rate": 4.334632525189235e-06, "loss": 1.5122, "step": 12990 }, { "epoch": 0.7102497163866982, "grad_norm": 1.369055986404419, "learning_rate": 4.333126810725655e-06, "loss": 1.4118, "step": 12991 }, { "epoch": 0.7103043888304198, "grad_norm": 1.5586426258087158, "learning_rate": 4.331621285492272e-06, "loss": 1.3865, "step": 12992 }, { "epoch": 0.7103590612741413, "grad_norm": 1.8498543500900269, "learning_rate": 4.330115949539356e-06, "loss": 1.4907, "step": 12993 }, { "epoch": 0.7104137337178629, "grad_norm": 1.5464859008789062, "learning_rate": 4.328610802917169e-06, "loss": 1.5249, "step": 12994 }, { "epoch": 0.7104684061615844, "grad_norm": 1.825377106666565, "learning_rate": 4.327105845675979e-06, "loss": 1.4299, "step": 12995 }, { "epoch": 0.710523078605306, "grad_norm": 1.2584011554718018, "learning_rate": 4.325601077866039e-06, "loss": 1.5011, "step": 12996 }, { "epoch": 0.7105777510490275, "grad_norm": 1.2406750917434692, "learning_rate": 4.324096499537592e-06, "loss": 1.337, "step": 12997 }, { "epoch": 0.710632423492749, "grad_norm": 1.5117435455322266, "learning_rate": 4.322592110740882e-06, "loss": 1.4721, "step": 12998 }, { "epoch": 0.7106870959364706, "grad_norm": 1.7497484683990479, "learning_rate": 4.32108791152614e-06, "loss": 1.6253, "step": 12999 }, { "epoch": 0.7107417683801922, "grad_norm": 1.6584298610687256, "learning_rate": 4.319583901943604e-06, "loss": 1.5049, "step": 13000 }, { "epoch": 0.7107964408239137, "grad_norm": 1.4470082521438599, "learning_rate": 4.31808008204349e-06, "loss": 1.554, "step": 13001 }, { "epoch": 0.7108511132676353, "grad_norm": 1.565205693244934, "learning_rate": 4.316576451876011e-06, "loss": 1.3919, "step": 13002 }, { "epoch": 0.7109057857113569, "grad_norm": 1.856737732887268, "learning_rate": 4.315073011491385e-06, "loss": 1.36, "step": 13003 }, { "epoch": 0.7109604581550784, "grad_norm": 1.987911343574524, "learning_rate": 4.313569760939811e-06, "loss": 1.2777, "step": 13004 }, { "epoch": 0.7110151305988, "grad_norm": 1.3961198329925537, "learning_rate": 4.312066700271483e-06, "loss": 1.404, "step": 13005 }, { "epoch": 0.7110698030425215, "grad_norm": 1.6007118225097656, "learning_rate": 4.310563829536598e-06, "loss": 1.4651, "step": 13006 }, { "epoch": 0.711124475486243, "grad_norm": 1.4360889196395874, "learning_rate": 4.3090611487853385e-06, "loss": 1.6883, "step": 13007 }, { "epoch": 0.7111791479299646, "grad_norm": 1.4333101511001587, "learning_rate": 4.307558658067878e-06, "loss": 1.6573, "step": 13008 }, { "epoch": 0.7112338203736861, "grad_norm": 1.3879152536392212, "learning_rate": 4.306056357434394e-06, "loss": 1.4058, "step": 13009 }, { "epoch": 0.7112884928174077, "grad_norm": 1.59772789478302, "learning_rate": 4.3045542469350495e-06, "loss": 1.433, "step": 13010 }, { "epoch": 0.7113431652611293, "grad_norm": 1.185322880744934, "learning_rate": 4.303052326620004e-06, "loss": 1.7181, "step": 13011 }, { "epoch": 0.7113978377048508, "grad_norm": 1.432188630104065, "learning_rate": 4.30155059653941e-06, "loss": 1.3489, "step": 13012 }, { "epoch": 0.7114525101485724, "grad_norm": 1.610805630683899, "learning_rate": 4.300049056743409e-06, "loss": 1.24, "step": 13013 }, { "epoch": 0.711507182592294, "grad_norm": 1.3728917837142944, "learning_rate": 4.298547707282149e-06, "loss": 1.5408, "step": 13014 }, { "epoch": 0.7115618550360154, "grad_norm": 1.5782471895217896, "learning_rate": 4.297046548205761e-06, "loss": 1.3812, "step": 13015 }, { "epoch": 0.711616527479737, "grad_norm": 2.1379570960998535, "learning_rate": 4.295545579564366e-06, "loss": 1.6007, "step": 13016 }, { "epoch": 0.7116711999234586, "grad_norm": 1.5253758430480957, "learning_rate": 4.294044801408095e-06, "loss": 1.3126, "step": 13017 }, { "epoch": 0.7117258723671801, "grad_norm": 1.8289847373962402, "learning_rate": 4.292544213787056e-06, "loss": 1.3875, "step": 13018 }, { "epoch": 0.7117805448109017, "grad_norm": 1.4446147680282593, "learning_rate": 4.291043816751357e-06, "loss": 1.4069, "step": 13019 }, { "epoch": 0.7118352172546233, "grad_norm": 1.6641379594802856, "learning_rate": 4.289543610351104e-06, "loss": 1.1648, "step": 13020 }, { "epoch": 0.7118898896983448, "grad_norm": 1.649307131767273, "learning_rate": 4.288043594636389e-06, "loss": 1.4726, "step": 13021 }, { "epoch": 0.7119445621420664, "grad_norm": 1.9846134185791016, "learning_rate": 4.2865437696573046e-06, "loss": 1.5446, "step": 13022 }, { "epoch": 0.7119992345857878, "grad_norm": 1.650775671005249, "learning_rate": 4.285044135463929e-06, "loss": 1.516, "step": 13023 }, { "epoch": 0.7120539070295094, "grad_norm": 1.3088324069976807, "learning_rate": 4.283544692106336e-06, "loss": 1.332, "step": 13024 }, { "epoch": 0.712108579473231, "grad_norm": 1.4229859113693237, "learning_rate": 4.282045439634605e-06, "loss": 1.4589, "step": 13025 }, { "epoch": 0.7121632519169525, "grad_norm": 1.6838507652282715, "learning_rate": 4.280546378098792e-06, "loss": 1.357, "step": 13026 }, { "epoch": 0.7122179243606741, "grad_norm": 1.7036807537078857, "learning_rate": 4.279047507548955e-06, "loss": 1.4213, "step": 13027 }, { "epoch": 0.7122725968043957, "grad_norm": 2.1668648719787598, "learning_rate": 4.27754882803515e-06, "loss": 1.2564, "step": 13028 }, { "epoch": 0.7123272692481172, "grad_norm": 1.621050238609314, "learning_rate": 4.276050339607416e-06, "loss": 1.4437, "step": 13029 }, { "epoch": 0.7123819416918388, "grad_norm": 1.6008498668670654, "learning_rate": 4.274552042315791e-06, "loss": 1.6201, "step": 13030 }, { "epoch": 0.7124366141355604, "grad_norm": 1.4540698528289795, "learning_rate": 4.273053936210312e-06, "loss": 1.6092, "step": 13031 }, { "epoch": 0.7124912865792818, "grad_norm": 1.7538021802902222, "learning_rate": 4.271556021341e-06, "loss": 1.4874, "step": 13032 }, { "epoch": 0.7125459590230034, "grad_norm": 1.7347134351730347, "learning_rate": 4.270058297757871e-06, "loss": 1.3596, "step": 13033 }, { "epoch": 0.712600631466725, "grad_norm": 2.026984453201294, "learning_rate": 4.2685607655109455e-06, "loss": 1.5166, "step": 13034 }, { "epoch": 0.7126553039104465, "grad_norm": 1.7528300285339355, "learning_rate": 4.267063424650224e-06, "loss": 1.5194, "step": 13035 }, { "epoch": 0.7127099763541681, "grad_norm": 1.7807849645614624, "learning_rate": 4.265566275225709e-06, "loss": 1.3665, "step": 13036 }, { "epoch": 0.7127646487978896, "grad_norm": 1.6902408599853516, "learning_rate": 4.2640693172873914e-06, "loss": 1.5229, "step": 13037 }, { "epoch": 0.7128193212416112, "grad_norm": 1.4603968858718872, "learning_rate": 4.2625725508852555e-06, "loss": 1.3438, "step": 13038 }, { "epoch": 0.7128739936853328, "grad_norm": 1.7737563848495483, "learning_rate": 4.26107597606929e-06, "loss": 1.4175, "step": 13039 }, { "epoch": 0.7129286661290543, "grad_norm": 1.6434943675994873, "learning_rate": 4.259579592889464e-06, "loss": 1.3008, "step": 13040 }, { "epoch": 0.7129833385727758, "grad_norm": 1.503547191619873, "learning_rate": 4.258083401395742e-06, "loss": 1.4406, "step": 13041 }, { "epoch": 0.7130380110164974, "grad_norm": 1.2216439247131348, "learning_rate": 4.2565874016380915e-06, "loss": 1.3673, "step": 13042 }, { "epoch": 0.7130926834602189, "grad_norm": 1.3555474281311035, "learning_rate": 4.255091593666466e-06, "loss": 1.3112, "step": 13043 }, { "epoch": 0.7131473559039405, "grad_norm": 1.557491660118103, "learning_rate": 4.25359597753081e-06, "loss": 1.6206, "step": 13044 }, { "epoch": 0.7132020283476621, "grad_norm": 1.6353782415390015, "learning_rate": 4.252100553281072e-06, "loss": 1.2689, "step": 13045 }, { "epoch": 0.7132567007913836, "grad_norm": 1.5552008152008057, "learning_rate": 4.250605320967184e-06, "loss": 1.4998, "step": 13046 }, { "epoch": 0.7133113732351052, "grad_norm": 2.518937587738037, "learning_rate": 4.249110280639076e-06, "loss": 1.6316, "step": 13047 }, { "epoch": 0.7133660456788268, "grad_norm": 1.8492215871810913, "learning_rate": 4.24761543234667e-06, "loss": 1.2027, "step": 13048 }, { "epoch": 0.7134207181225483, "grad_norm": 1.732041358947754, "learning_rate": 4.246120776139884e-06, "loss": 1.2005, "step": 13049 }, { "epoch": 0.7134753905662699, "grad_norm": 1.4347587823867798, "learning_rate": 4.244626312068623e-06, "loss": 1.3833, "step": 13050 }, { "epoch": 0.7135300630099914, "grad_norm": 1.751307487487793, "learning_rate": 4.243132040182798e-06, "loss": 1.448, "step": 13051 }, { "epoch": 0.7135847354537129, "grad_norm": 1.4493370056152344, "learning_rate": 4.241637960532301e-06, "loss": 1.4475, "step": 13052 }, { "epoch": 0.7136394078974345, "grad_norm": 1.7930301427841187, "learning_rate": 4.240144073167028e-06, "loss": 1.4808, "step": 13053 }, { "epoch": 0.713694080341156, "grad_norm": 1.348717212677002, "learning_rate": 4.238650378136859e-06, "loss": 1.4834, "step": 13054 }, { "epoch": 0.7137487527848776, "grad_norm": 2.2665209770202637, "learning_rate": 4.237156875491671e-06, "loss": 1.2466, "step": 13055 }, { "epoch": 0.7138034252285992, "grad_norm": 1.332502841949463, "learning_rate": 4.23566356528134e-06, "loss": 1.3655, "step": 13056 }, { "epoch": 0.7138580976723207, "grad_norm": 1.5995386838912964, "learning_rate": 4.234170447555731e-06, "loss": 1.4204, "step": 13057 }, { "epoch": 0.7139127701160423, "grad_norm": 2.340543270111084, "learning_rate": 4.2326775223646965e-06, "loss": 1.4306, "step": 13058 }, { "epoch": 0.7139674425597639, "grad_norm": 1.777036190032959, "learning_rate": 4.231184789758096e-06, "loss": 1.5005, "step": 13059 }, { "epoch": 0.7140221150034853, "grad_norm": 1.4818358421325684, "learning_rate": 4.229692249785773e-06, "loss": 1.5098, "step": 13060 }, { "epoch": 0.7140767874472069, "grad_norm": 1.4625965356826782, "learning_rate": 4.228199902497565e-06, "loss": 1.6428, "step": 13061 }, { "epoch": 0.7141314598909285, "grad_norm": 1.3460607528686523, "learning_rate": 4.2267077479433075e-06, "loss": 1.719, "step": 13062 }, { "epoch": 0.71418613233465, "grad_norm": 1.571618914604187, "learning_rate": 4.225215786172825e-06, "loss": 1.3881, "step": 13063 }, { "epoch": 0.7142408047783716, "grad_norm": 1.9725521802902222, "learning_rate": 4.223724017235935e-06, "loss": 1.3147, "step": 13064 }, { "epoch": 0.7142954772220932, "grad_norm": 1.5514495372772217, "learning_rate": 4.222232441182459e-06, "loss": 1.3425, "step": 13065 }, { "epoch": 0.7143501496658147, "grad_norm": 1.4537649154663086, "learning_rate": 4.220741058062194e-06, "loss": 1.5031, "step": 13066 }, { "epoch": 0.7144048221095363, "grad_norm": 1.1955963373184204, "learning_rate": 4.219249867924953e-06, "loss": 1.2949, "step": 13067 }, { "epoch": 0.7144594945532577, "grad_norm": 1.700016736984253, "learning_rate": 4.217758870820522e-06, "loss": 1.7081, "step": 13068 }, { "epoch": 0.7145141669969793, "grad_norm": 1.6004928350448608, "learning_rate": 4.216268066798687e-06, "loss": 1.4125, "step": 13069 }, { "epoch": 0.7145688394407009, "grad_norm": 1.2556535005569458, "learning_rate": 4.214777455909237e-06, "loss": 1.4996, "step": 13070 }, { "epoch": 0.7146235118844224, "grad_norm": 1.4991428852081299, "learning_rate": 4.213287038201943e-06, "loss": 1.3275, "step": 13071 }, { "epoch": 0.714678184328144, "grad_norm": 1.9153786897659302, "learning_rate": 4.211796813726571e-06, "loss": 1.2185, "step": 13072 }, { "epoch": 0.7147328567718656, "grad_norm": 2.556302547454834, "learning_rate": 4.210306782532889e-06, "loss": 1.3581, "step": 13073 }, { "epoch": 0.7147875292155871, "grad_norm": 1.5868682861328125, "learning_rate": 4.208816944670649e-06, "loss": 1.3115, "step": 13074 }, { "epoch": 0.7148422016593087, "grad_norm": 1.7718687057495117, "learning_rate": 4.207327300189602e-06, "loss": 1.4332, "step": 13075 }, { "epoch": 0.7148968741030303, "grad_norm": 1.4841418266296387, "learning_rate": 4.205837849139488e-06, "loss": 1.3721, "step": 13076 }, { "epoch": 0.7149515465467517, "grad_norm": 1.9138010740280151, "learning_rate": 4.204348591570046e-06, "loss": 1.3561, "step": 13077 }, { "epoch": 0.7150062189904733, "grad_norm": 1.5209813117980957, "learning_rate": 4.202859527530999e-06, "loss": 1.1049, "step": 13078 }, { "epoch": 0.7150608914341949, "grad_norm": 1.512353777885437, "learning_rate": 4.2013706570720815e-06, "loss": 1.5295, "step": 13079 }, { "epoch": 0.7151155638779164, "grad_norm": 1.3664517402648926, "learning_rate": 4.199881980243003e-06, "loss": 1.5791, "step": 13080 }, { "epoch": 0.715170236321638, "grad_norm": 2.025752305984497, "learning_rate": 4.1983934970934725e-06, "loss": 1.2911, "step": 13081 }, { "epoch": 0.7152249087653595, "grad_norm": 2.0199151039123535, "learning_rate": 4.196905207673201e-06, "loss": 1.3963, "step": 13082 }, { "epoch": 0.7152795812090811, "grad_norm": 1.8977910280227661, "learning_rate": 4.195417112031878e-06, "loss": 1.6553, "step": 13083 }, { "epoch": 0.7153342536528027, "grad_norm": 2.0006349086761475, "learning_rate": 4.193929210219202e-06, "loss": 1.2697, "step": 13084 }, { "epoch": 0.7153889260965242, "grad_norm": 1.7989511489868164, "learning_rate": 4.1924415022848545e-06, "loss": 1.183, "step": 13085 }, { "epoch": 0.7154435985402458, "grad_norm": 1.3713520765304565, "learning_rate": 4.1909539882785135e-06, "loss": 1.3998, "step": 13086 }, { "epoch": 0.7154982709839673, "grad_norm": 2.0835046768188477, "learning_rate": 4.1894666682498485e-06, "loss": 1.1449, "step": 13087 }, { "epoch": 0.7155529434276888, "grad_norm": 1.743453860282898, "learning_rate": 4.187979542248528e-06, "loss": 1.3926, "step": 13088 }, { "epoch": 0.7156076158714104, "grad_norm": 1.6514334678649902, "learning_rate": 4.186492610324204e-06, "loss": 1.7642, "step": 13089 }, { "epoch": 0.715662288315132, "grad_norm": 1.0458632707595825, "learning_rate": 4.185005872526538e-06, "loss": 1.5568, "step": 13090 }, { "epoch": 0.7157169607588535, "grad_norm": 1.8238434791564941, "learning_rate": 4.183519328905171e-06, "loss": 1.4784, "step": 13091 }, { "epoch": 0.7157716332025751, "grad_norm": 1.7725404500961304, "learning_rate": 4.18203297950974e-06, "loss": 1.5226, "step": 13092 }, { "epoch": 0.7158263056462967, "grad_norm": 1.4563573598861694, "learning_rate": 4.180546824389881e-06, "loss": 1.657, "step": 13093 }, { "epoch": 0.7158809780900182, "grad_norm": 1.1741341352462769, "learning_rate": 4.1790608635952214e-06, "loss": 1.4286, "step": 13094 }, { "epoch": 0.7159356505337398, "grad_norm": 1.99628484249115, "learning_rate": 4.177575097175375e-06, "loss": 1.5251, "step": 13095 }, { "epoch": 0.7159903229774612, "grad_norm": 1.5910179615020752, "learning_rate": 4.176089525179961e-06, "loss": 1.4588, "step": 13096 }, { "epoch": 0.7160449954211828, "grad_norm": 1.3599843978881836, "learning_rate": 4.174604147658582e-06, "loss": 1.4925, "step": 13097 }, { "epoch": 0.7160996678649044, "grad_norm": 1.4794307947158813, "learning_rate": 4.173118964660844e-06, "loss": 1.393, "step": 13098 }, { "epoch": 0.7161543403086259, "grad_norm": 2.0041146278381348, "learning_rate": 4.171633976236335e-06, "loss": 1.3589, "step": 13099 }, { "epoch": 0.7162090127523475, "grad_norm": 1.4312691688537598, "learning_rate": 4.1701491824346465e-06, "loss": 1.4819, "step": 13100 }, { "epoch": 0.7162636851960691, "grad_norm": 1.7094635963439941, "learning_rate": 4.168664583305357e-06, "loss": 1.1958, "step": 13101 }, { "epoch": 0.7163183576397906, "grad_norm": 1.6341941356658936, "learning_rate": 4.167180178898039e-06, "loss": 1.2903, "step": 13102 }, { "epoch": 0.7163730300835122, "grad_norm": 1.6572216749191284, "learning_rate": 4.165695969262259e-06, "loss": 1.3902, "step": 13103 }, { "epoch": 0.7164277025272338, "grad_norm": 1.231297492980957, "learning_rate": 4.164211954447585e-06, "loss": 1.4461, "step": 13104 }, { "epoch": 0.7164823749709552, "grad_norm": 1.5642775297164917, "learning_rate": 4.162728134503568e-06, "loss": 1.4523, "step": 13105 }, { "epoch": 0.7165370474146768, "grad_norm": 1.2448431253433228, "learning_rate": 4.1612445094797515e-06, "loss": 1.4523, "step": 13106 }, { "epoch": 0.7165917198583984, "grad_norm": 1.5766957998275757, "learning_rate": 4.159761079425687e-06, "loss": 1.5349, "step": 13107 }, { "epoch": 0.7166463923021199, "grad_norm": 1.6162474155426025, "learning_rate": 4.158277844390904e-06, "loss": 1.3561, "step": 13108 }, { "epoch": 0.7167010647458415, "grad_norm": 1.3766419887542725, "learning_rate": 4.1567948044249284e-06, "loss": 1.3327, "step": 13109 }, { "epoch": 0.716755737189563, "grad_norm": 1.4164868593215942, "learning_rate": 4.155311959577289e-06, "loss": 1.4598, "step": 13110 }, { "epoch": 0.7168104096332846, "grad_norm": 1.5088766813278198, "learning_rate": 4.153829309897494e-06, "loss": 1.414, "step": 13111 }, { "epoch": 0.7168650820770062, "grad_norm": 1.3413660526275635, "learning_rate": 4.152346855435062e-06, "loss": 1.5801, "step": 13112 }, { "epoch": 0.7169197545207276, "grad_norm": 1.8013386726379395, "learning_rate": 4.15086459623949e-06, "loss": 1.3265, "step": 13113 }, { "epoch": 0.7169744269644492, "grad_norm": 1.8879318237304688, "learning_rate": 4.149382532360275e-06, "loss": 1.6645, "step": 13114 }, { "epoch": 0.7170290994081708, "grad_norm": 1.5942336320877075, "learning_rate": 4.147900663846904e-06, "loss": 1.3157, "step": 13115 }, { "epoch": 0.7170837718518923, "grad_norm": 2.3005638122558594, "learning_rate": 4.146418990748865e-06, "loss": 1.056, "step": 13116 }, { "epoch": 0.7171384442956139, "grad_norm": 1.94976806640625, "learning_rate": 4.144937513115627e-06, "loss": 1.145, "step": 13117 }, { "epoch": 0.7171931167393355, "grad_norm": 2.825240135192871, "learning_rate": 4.143456230996667e-06, "loss": 1.4222, "step": 13118 }, { "epoch": 0.717247789183057, "grad_norm": 1.3548169136047363, "learning_rate": 4.141975144441448e-06, "loss": 1.4107, "step": 13119 }, { "epoch": 0.7173024616267786, "grad_norm": 1.6902952194213867, "learning_rate": 4.140494253499421e-06, "loss": 1.2276, "step": 13120 }, { "epoch": 0.7173571340705002, "grad_norm": 1.5151259899139404, "learning_rate": 4.139013558220044e-06, "loss": 1.2766, "step": 13121 }, { "epoch": 0.7174118065142217, "grad_norm": 1.6733319759368896, "learning_rate": 4.137533058652758e-06, "loss": 1.3824, "step": 13122 }, { "epoch": 0.7174664789579432, "grad_norm": 1.4595671892166138, "learning_rate": 4.136052754846996e-06, "loss": 1.4702, "step": 13123 }, { "epoch": 0.7175211514016647, "grad_norm": 1.6629650592803955, "learning_rate": 4.134572646852196e-06, "loss": 1.4072, "step": 13124 }, { "epoch": 0.7175758238453863, "grad_norm": 1.2872122526168823, "learning_rate": 4.1330927347177795e-06, "loss": 1.4474, "step": 13125 }, { "epoch": 0.7176304962891079, "grad_norm": 1.4161393642425537, "learning_rate": 4.1316130184931646e-06, "loss": 1.2667, "step": 13126 }, { "epoch": 0.7176851687328294, "grad_norm": 1.5236388444900513, "learning_rate": 4.130133498227761e-06, "loss": 1.3218, "step": 13127 }, { "epoch": 0.717739841176551, "grad_norm": 1.5023614168167114, "learning_rate": 4.12865417397097e-06, "loss": 1.275, "step": 13128 }, { "epoch": 0.7177945136202726, "grad_norm": 1.9173674583435059, "learning_rate": 4.127175045772196e-06, "loss": 1.4246, "step": 13129 }, { "epoch": 0.7178491860639941, "grad_norm": 1.4764140844345093, "learning_rate": 4.125696113680831e-06, "loss": 1.4461, "step": 13130 }, { "epoch": 0.7179038585077157, "grad_norm": 1.6932861804962158, "learning_rate": 4.124217377746251e-06, "loss": 1.1957, "step": 13131 }, { "epoch": 0.7179585309514372, "grad_norm": 2.1920742988586426, "learning_rate": 4.122738838017845e-06, "loss": 1.0633, "step": 13132 }, { "epoch": 0.7180132033951587, "grad_norm": 1.6044148206710815, "learning_rate": 4.121260494544982e-06, "loss": 1.3691, "step": 13133 }, { "epoch": 0.7180678758388803, "grad_norm": 1.3428899049758911, "learning_rate": 4.1197823473770215e-06, "loss": 1.3627, "step": 13134 }, { "epoch": 0.7181225482826019, "grad_norm": 1.5031927824020386, "learning_rate": 4.11830439656333e-06, "loss": 1.5581, "step": 13135 }, { "epoch": 0.7181772207263234, "grad_norm": 1.894054889678955, "learning_rate": 4.116826642153256e-06, "loss": 1.4087, "step": 13136 }, { "epoch": 0.718231893170045, "grad_norm": 1.5173742771148682, "learning_rate": 4.115349084196144e-06, "loss": 1.3614, "step": 13137 }, { "epoch": 0.7182865656137665, "grad_norm": 1.584535002708435, "learning_rate": 4.113871722741337e-06, "loss": 1.4237, "step": 13138 }, { "epoch": 0.7183412380574881, "grad_norm": 1.21879243850708, "learning_rate": 4.1123945578381665e-06, "loss": 1.5257, "step": 13139 }, { "epoch": 0.7183959105012097, "grad_norm": 1.9141887426376343, "learning_rate": 4.110917589535957e-06, "loss": 1.3262, "step": 13140 }, { "epoch": 0.7184505829449311, "grad_norm": 1.4620792865753174, "learning_rate": 4.109440817884027e-06, "loss": 1.4631, "step": 13141 }, { "epoch": 0.7185052553886527, "grad_norm": 1.7888407707214355, "learning_rate": 4.107964242931689e-06, "loss": 1.2624, "step": 13142 }, { "epoch": 0.7185599278323743, "grad_norm": 1.5554474592208862, "learning_rate": 4.106487864728254e-06, "loss": 1.4057, "step": 13143 }, { "epoch": 0.7186146002760958, "grad_norm": 1.6247258186340332, "learning_rate": 4.105011683323018e-06, "loss": 1.2255, "step": 13144 }, { "epoch": 0.7186692727198174, "grad_norm": 2.224553108215332, "learning_rate": 4.103535698765272e-06, "loss": 1.3954, "step": 13145 }, { "epoch": 0.718723945163539, "grad_norm": 1.5401540994644165, "learning_rate": 4.102059911104309e-06, "loss": 1.478, "step": 13146 }, { "epoch": 0.7187786176072605, "grad_norm": 1.3024486303329468, "learning_rate": 4.100584320389406e-06, "loss": 1.6712, "step": 13147 }, { "epoch": 0.7188332900509821, "grad_norm": 1.7967286109924316, "learning_rate": 4.099108926669832e-06, "loss": 1.4367, "step": 13148 }, { "epoch": 0.7188879624947037, "grad_norm": 1.3830907344818115, "learning_rate": 4.097633729994861e-06, "loss": 1.4128, "step": 13149 }, { "epoch": 0.7189426349384251, "grad_norm": 1.6056491136550903, "learning_rate": 4.096158730413751e-06, "loss": 1.211, "step": 13150 }, { "epoch": 0.7189973073821467, "grad_norm": 1.3682245016098022, "learning_rate": 4.09468392797575e-06, "loss": 1.444, "step": 13151 }, { "epoch": 0.7190519798258682, "grad_norm": 1.9028812646865845, "learning_rate": 4.093209322730114e-06, "loss": 1.3804, "step": 13152 }, { "epoch": 0.7191066522695898, "grad_norm": 1.7997421026229858, "learning_rate": 4.0917349147260796e-06, "loss": 1.2911, "step": 13153 }, { "epoch": 0.7191613247133114, "grad_norm": 1.2743617296218872, "learning_rate": 4.09026070401288e-06, "loss": 1.4845, "step": 13154 }, { "epoch": 0.7192159971570329, "grad_norm": 1.8288884162902832, "learning_rate": 4.088786690639744e-06, "loss": 1.2605, "step": 13155 }, { "epoch": 0.7192706696007545, "grad_norm": 1.6930879354476929, "learning_rate": 4.087312874655888e-06, "loss": 1.4282, "step": 13156 }, { "epoch": 0.7193253420444761, "grad_norm": 1.5000991821289062, "learning_rate": 4.085839256110533e-06, "loss": 1.4398, "step": 13157 }, { "epoch": 0.7193800144881976, "grad_norm": 1.8835686445236206, "learning_rate": 4.084365835052883e-06, "loss": 1.2599, "step": 13158 }, { "epoch": 0.7194346869319191, "grad_norm": 1.5167373418807983, "learning_rate": 4.082892611532136e-06, "loss": 1.1222, "step": 13159 }, { "epoch": 0.7194893593756407, "grad_norm": 1.459479570388794, "learning_rate": 4.081419585597493e-06, "loss": 1.3601, "step": 13160 }, { "epoch": 0.7195440318193622, "grad_norm": 1.3647804260253906, "learning_rate": 4.079946757298138e-06, "loss": 1.5055, "step": 13161 }, { "epoch": 0.7195987042630838, "grad_norm": 1.3003500699996948, "learning_rate": 4.078474126683249e-06, "loss": 1.6012, "step": 13162 }, { "epoch": 0.7196533767068054, "grad_norm": 1.7383638620376587, "learning_rate": 4.077001693802008e-06, "loss": 1.5029, "step": 13163 }, { "epoch": 0.7197080491505269, "grad_norm": 1.4050856828689575, "learning_rate": 4.0755294587035796e-06, "loss": 1.7256, "step": 13164 }, { "epoch": 0.7197627215942485, "grad_norm": 1.1387624740600586, "learning_rate": 4.074057421437124e-06, "loss": 1.3898, "step": 13165 }, { "epoch": 0.71981739403797, "grad_norm": 1.752578854560852, "learning_rate": 4.072585582051798e-06, "loss": 1.2232, "step": 13166 }, { "epoch": 0.7198720664816916, "grad_norm": 1.6564584970474243, "learning_rate": 4.071113940596744e-06, "loss": 1.4536, "step": 13167 }, { "epoch": 0.7199267389254131, "grad_norm": 1.6277743577957153, "learning_rate": 4.0696424971211124e-06, "loss": 1.2761, "step": 13168 }, { "epoch": 0.7199814113691346, "grad_norm": 1.767584204673767, "learning_rate": 4.068171251674033e-06, "loss": 1.4269, "step": 13169 }, { "epoch": 0.7200360838128562, "grad_norm": 1.3014353513717651, "learning_rate": 4.066700204304631e-06, "loss": 1.3532, "step": 13170 }, { "epoch": 0.7200907562565778, "grad_norm": 1.4757925271987915, "learning_rate": 4.065229355062037e-06, "loss": 1.4058, "step": 13171 }, { "epoch": 0.7201454287002993, "grad_norm": 1.4018433094024658, "learning_rate": 4.063758703995361e-06, "loss": 1.5882, "step": 13172 }, { "epoch": 0.7202001011440209, "grad_norm": 1.3741333484649658, "learning_rate": 4.0622882511537076e-06, "loss": 1.5691, "step": 13173 }, { "epoch": 0.7202547735877425, "grad_norm": 1.5935720205307007, "learning_rate": 4.060817996586186e-06, "loss": 1.4119, "step": 13174 }, { "epoch": 0.720309446031464, "grad_norm": 1.5932645797729492, "learning_rate": 4.059347940341889e-06, "loss": 1.414, "step": 13175 }, { "epoch": 0.7203641184751856, "grad_norm": 1.7022491693496704, "learning_rate": 4.0578780824699005e-06, "loss": 1.3758, "step": 13176 }, { "epoch": 0.7204187909189071, "grad_norm": 1.574481725692749, "learning_rate": 4.05640842301931e-06, "loss": 1.4426, "step": 13177 }, { "epoch": 0.7204734633626286, "grad_norm": 1.944775104522705, "learning_rate": 4.05493896203919e-06, "loss": 1.4926, "step": 13178 }, { "epoch": 0.7205281358063502, "grad_norm": 1.823385238647461, "learning_rate": 4.053469699578608e-06, "loss": 1.2521, "step": 13179 }, { "epoch": 0.7205828082500717, "grad_norm": 1.7361842393875122, "learning_rate": 4.052000635686627e-06, "loss": 1.4747, "step": 13180 }, { "epoch": 0.7206374806937933, "grad_norm": 1.5174134969711304, "learning_rate": 4.050531770412298e-06, "loss": 1.5473, "step": 13181 }, { "epoch": 0.7206921531375149, "grad_norm": 1.5464372634887695, "learning_rate": 4.049063103804678e-06, "loss": 1.5101, "step": 13182 }, { "epoch": 0.7207468255812364, "grad_norm": 1.7491601705551147, "learning_rate": 4.047594635912805e-06, "loss": 1.3327, "step": 13183 }, { "epoch": 0.720801498024958, "grad_norm": 1.579646110534668, "learning_rate": 4.046126366785712e-06, "loss": 1.5771, "step": 13184 }, { "epoch": 0.7208561704686796, "grad_norm": 1.8054627180099487, "learning_rate": 4.044658296472433e-06, "loss": 1.1695, "step": 13185 }, { "epoch": 0.720910842912401, "grad_norm": 1.6048283576965332, "learning_rate": 4.04319042502199e-06, "loss": 1.3414, "step": 13186 }, { "epoch": 0.7209655153561226, "grad_norm": 1.7252092361450195, "learning_rate": 4.0417227524833925e-06, "loss": 1.1995, "step": 13187 }, { "epoch": 0.7210201877998442, "grad_norm": 1.574913740158081, "learning_rate": 4.040255278905657e-06, "loss": 1.4214, "step": 13188 }, { "epoch": 0.7210748602435657, "grad_norm": 1.6417274475097656, "learning_rate": 4.038788004337783e-06, "loss": 1.7297, "step": 13189 }, { "epoch": 0.7211295326872873, "grad_norm": 1.2615656852722168, "learning_rate": 4.0373209288287616e-06, "loss": 1.5149, "step": 13190 }, { "epoch": 0.7211842051310089, "grad_norm": 1.6423075199127197, "learning_rate": 4.035854052427591e-06, "loss": 1.4183, "step": 13191 }, { "epoch": 0.7212388775747304, "grad_norm": 1.4005051851272583, "learning_rate": 4.034387375183248e-06, "loss": 1.4145, "step": 13192 }, { "epoch": 0.721293550018452, "grad_norm": 1.934293508529663, "learning_rate": 4.0329208971447095e-06, "loss": 1.409, "step": 13193 }, { "epoch": 0.7213482224621734, "grad_norm": 1.4840316772460938, "learning_rate": 4.031454618360945e-06, "loss": 1.331, "step": 13194 }, { "epoch": 0.721402894905895, "grad_norm": 1.6239439249038696, "learning_rate": 4.029988538880913e-06, "loss": 1.2802, "step": 13195 }, { "epoch": 0.7214575673496166, "grad_norm": 2.2341716289520264, "learning_rate": 4.028522658753575e-06, "loss": 1.0966, "step": 13196 }, { "epoch": 0.7215122397933381, "grad_norm": 1.4564895629882812, "learning_rate": 4.027056978027879e-06, "loss": 1.3976, "step": 13197 }, { "epoch": 0.7215669122370597, "grad_norm": 1.6495503187179565, "learning_rate": 4.025591496752763e-06, "loss": 1.4086, "step": 13198 }, { "epoch": 0.7216215846807813, "grad_norm": 1.7608219385147095, "learning_rate": 4.024126214977169e-06, "loss": 1.4772, "step": 13199 }, { "epoch": 0.7216762571245028, "grad_norm": 1.570813775062561, "learning_rate": 4.0226611327500245e-06, "loss": 1.5717, "step": 13200 }, { "epoch": 0.7217309295682244, "grad_norm": 1.5762763023376465, "learning_rate": 4.021196250120248e-06, "loss": 1.4603, "step": 13201 }, { "epoch": 0.721785602011946, "grad_norm": 1.5437902212142944, "learning_rate": 4.01973156713676e-06, "loss": 1.615, "step": 13202 }, { "epoch": 0.7218402744556675, "grad_norm": 1.5564897060394287, "learning_rate": 4.018267083848468e-06, "loss": 1.3765, "step": 13203 }, { "epoch": 0.721894946899389, "grad_norm": 1.636733889579773, "learning_rate": 4.016802800304277e-06, "loss": 1.1737, "step": 13204 }, { "epoch": 0.7219496193431106, "grad_norm": 2.206372022628784, "learning_rate": 4.015338716553079e-06, "loss": 1.3635, "step": 13205 }, { "epoch": 0.7220042917868321, "grad_norm": 1.4626208543777466, "learning_rate": 4.0138748326437645e-06, "loss": 1.4955, "step": 13206 }, { "epoch": 0.7220589642305537, "grad_norm": 1.8205957412719727, "learning_rate": 4.012411148625214e-06, "loss": 1.3802, "step": 13207 }, { "epoch": 0.7221136366742752, "grad_norm": 1.4042855501174927, "learning_rate": 4.0109476645463076e-06, "loss": 1.3132, "step": 13208 }, { "epoch": 0.7221683091179968, "grad_norm": 1.5263396501541138, "learning_rate": 4.0094843804559095e-06, "loss": 1.6061, "step": 13209 }, { "epoch": 0.7222229815617184, "grad_norm": 1.308106541633606, "learning_rate": 4.0080212964028884e-06, "loss": 1.2684, "step": 13210 }, { "epoch": 0.7222776540054399, "grad_norm": 1.7024270296096802, "learning_rate": 4.006558412436098e-06, "loss": 1.0953, "step": 13211 }, { "epoch": 0.7223323264491615, "grad_norm": 1.4629367589950562, "learning_rate": 4.0050957286043815e-06, "loss": 1.3437, "step": 13212 }, { "epoch": 0.722386998892883, "grad_norm": 1.4495165348052979, "learning_rate": 4.00363324495659e-06, "loss": 1.444, "step": 13213 }, { "epoch": 0.7224416713366045, "grad_norm": 1.4405531883239746, "learning_rate": 4.002170961541555e-06, "loss": 1.5586, "step": 13214 }, { "epoch": 0.7224963437803261, "grad_norm": 1.908592939376831, "learning_rate": 4.000708878408103e-06, "loss": 1.2164, "step": 13215 }, { "epoch": 0.7225510162240477, "grad_norm": 1.72487211227417, "learning_rate": 3.999246995605063e-06, "loss": 1.2814, "step": 13216 }, { "epoch": 0.7226056886677692, "grad_norm": 1.7007008790969849, "learning_rate": 3.997785313181246e-06, "loss": 1.4731, "step": 13217 }, { "epoch": 0.7226603611114908, "grad_norm": 1.41431725025177, "learning_rate": 3.996323831185462e-06, "loss": 1.3108, "step": 13218 }, { "epoch": 0.7227150335552124, "grad_norm": 1.4878973960876465, "learning_rate": 3.994862549666515e-06, "loss": 1.4804, "step": 13219 }, { "epoch": 0.7227697059989339, "grad_norm": 1.3890753984451294, "learning_rate": 3.9934014686731985e-06, "loss": 1.4556, "step": 13220 }, { "epoch": 0.7228243784426555, "grad_norm": 1.427299976348877, "learning_rate": 3.991940588254297e-06, "loss": 1.4347, "step": 13221 }, { "epoch": 0.7228790508863769, "grad_norm": 1.8652747869491577, "learning_rate": 3.990479908458602e-06, "loss": 1.2945, "step": 13222 }, { "epoch": 0.7229337233300985, "grad_norm": 1.2997232675552368, "learning_rate": 3.989019429334881e-06, "loss": 1.4409, "step": 13223 }, { "epoch": 0.7229883957738201, "grad_norm": 1.6342004537582397, "learning_rate": 3.98755915093191e-06, "loss": 1.3668, "step": 13224 }, { "epoch": 0.7230430682175416, "grad_norm": 1.7260514497756958, "learning_rate": 3.986099073298447e-06, "loss": 1.4277, "step": 13225 }, { "epoch": 0.7230977406612632, "grad_norm": 1.2909525632858276, "learning_rate": 3.984639196483245e-06, "loss": 1.667, "step": 13226 }, { "epoch": 0.7231524131049848, "grad_norm": 1.5538020133972168, "learning_rate": 3.98317952053506e-06, "loss": 1.375, "step": 13227 }, { "epoch": 0.7232070855487063, "grad_norm": 1.5351625680923462, "learning_rate": 3.9817200455026295e-06, "loss": 1.4169, "step": 13228 }, { "epoch": 0.7232617579924279, "grad_norm": 1.333662509918213, "learning_rate": 3.980260771434685e-06, "loss": 1.4116, "step": 13229 }, { "epoch": 0.7233164304361495, "grad_norm": 1.3336762189865112, "learning_rate": 3.978801698379963e-06, "loss": 1.5461, "step": 13230 }, { "epoch": 0.7233711028798709, "grad_norm": 1.7068204879760742, "learning_rate": 3.977342826387181e-06, "loss": 1.4305, "step": 13231 }, { "epoch": 0.7234257753235925, "grad_norm": 1.5270531177520752, "learning_rate": 3.975884155505054e-06, "loss": 1.5833, "step": 13232 }, { "epoch": 0.7234804477673141, "grad_norm": 1.631502389907837, "learning_rate": 3.974425685782293e-06, "loss": 1.3912, "step": 13233 }, { "epoch": 0.7235351202110356, "grad_norm": 1.430442452430725, "learning_rate": 3.972967417267596e-06, "loss": 1.2903, "step": 13234 }, { "epoch": 0.7235897926547572, "grad_norm": 1.3024101257324219, "learning_rate": 3.971509350009657e-06, "loss": 1.3167, "step": 13235 }, { "epoch": 0.7236444650984787, "grad_norm": 1.171523928642273, "learning_rate": 3.970051484057171e-06, "loss": 1.5939, "step": 13236 }, { "epoch": 0.7236991375422003, "grad_norm": 1.4185653924942017, "learning_rate": 3.968593819458812e-06, "loss": 1.2746, "step": 13237 }, { "epoch": 0.7237538099859219, "grad_norm": 2.081918239593506, "learning_rate": 3.967136356263261e-06, "loss": 1.5154, "step": 13238 }, { "epoch": 0.7238084824296434, "grad_norm": 1.667275071144104, "learning_rate": 3.965679094519184e-06, "loss": 1.3392, "step": 13239 }, { "epoch": 0.723863154873365, "grad_norm": 1.7559940814971924, "learning_rate": 3.964222034275239e-06, "loss": 1.2496, "step": 13240 }, { "epoch": 0.7239178273170865, "grad_norm": 1.5085465908050537, "learning_rate": 3.962765175580088e-06, "loss": 1.468, "step": 13241 }, { "epoch": 0.723972499760808, "grad_norm": 2.365527868270874, "learning_rate": 3.961308518482373e-06, "loss": 1.3132, "step": 13242 }, { "epoch": 0.7240271722045296, "grad_norm": 1.4448906183242798, "learning_rate": 3.959852063030738e-06, "loss": 1.3479, "step": 13243 }, { "epoch": 0.7240818446482512, "grad_norm": 1.701143741607666, "learning_rate": 3.958395809273815e-06, "loss": 1.3495, "step": 13244 }, { "epoch": 0.7241365170919727, "grad_norm": 1.391364574432373, "learning_rate": 3.956939757260234e-06, "loss": 1.5189, "step": 13245 }, { "epoch": 0.7241911895356943, "grad_norm": 1.5633621215820312, "learning_rate": 3.955483907038612e-06, "loss": 1.4336, "step": 13246 }, { "epoch": 0.7242458619794159, "grad_norm": 1.4406219720840454, "learning_rate": 3.954028258657568e-06, "loss": 1.4412, "step": 13247 }, { "epoch": 0.7243005344231374, "grad_norm": 1.6205672025680542, "learning_rate": 3.952572812165709e-06, "loss": 1.2938, "step": 13248 }, { "epoch": 0.724355206866859, "grad_norm": 1.5235742330551147, "learning_rate": 3.951117567611631e-06, "loss": 1.548, "step": 13249 }, { "epoch": 0.7244098793105805, "grad_norm": 1.7096258401870728, "learning_rate": 3.949662525043935e-06, "loss": 1.3871, "step": 13250 }, { "epoch": 0.724464551754302, "grad_norm": 1.5432415008544922, "learning_rate": 3.9482076845112006e-06, "loss": 1.6772, "step": 13251 }, { "epoch": 0.7245192241980236, "grad_norm": 1.3271650075912476, "learning_rate": 3.946753046062017e-06, "loss": 1.5113, "step": 13252 }, { "epoch": 0.7245738966417451, "grad_norm": 1.3217859268188477, "learning_rate": 3.945298609744953e-06, "loss": 1.6044, "step": 13253 }, { "epoch": 0.7246285690854667, "grad_norm": 1.6715611219406128, "learning_rate": 3.943844375608573e-06, "loss": 1.2553, "step": 13254 }, { "epoch": 0.7246832415291883, "grad_norm": 1.4024730920791626, "learning_rate": 3.942390343701444e-06, "loss": 1.277, "step": 13255 }, { "epoch": 0.7247379139729098, "grad_norm": 1.670888900756836, "learning_rate": 3.940936514072117e-06, "loss": 1.4175, "step": 13256 }, { "epoch": 0.7247925864166314, "grad_norm": 1.5939277410507202, "learning_rate": 3.939482886769136e-06, "loss": 1.3471, "step": 13257 }, { "epoch": 0.724847258860353, "grad_norm": 1.5092369318008423, "learning_rate": 3.938029461841044e-06, "loss": 1.4014, "step": 13258 }, { "epoch": 0.7249019313040744, "grad_norm": 1.102150321006775, "learning_rate": 3.9365762393363725e-06, "loss": 1.6035, "step": 13259 }, { "epoch": 0.724956603747796, "grad_norm": 1.216997742652893, "learning_rate": 3.935123219303646e-06, "loss": 1.5721, "step": 13260 }, { "epoch": 0.7250112761915176, "grad_norm": 1.4621061086654663, "learning_rate": 3.9336704017913895e-06, "loss": 1.3895, "step": 13261 }, { "epoch": 0.7250659486352391, "grad_norm": 1.4769136905670166, "learning_rate": 3.932217786848114e-06, "loss": 1.4886, "step": 13262 }, { "epoch": 0.7251206210789607, "grad_norm": 1.7751065492630005, "learning_rate": 3.930765374522322e-06, "loss": 1.4832, "step": 13263 }, { "epoch": 0.7251752935226823, "grad_norm": 1.3125413656234741, "learning_rate": 3.929313164862518e-06, "loss": 1.2807, "step": 13264 }, { "epoch": 0.7252299659664038, "grad_norm": 1.3609232902526855, "learning_rate": 3.92786115791719e-06, "loss": 1.2703, "step": 13265 }, { "epoch": 0.7252846384101254, "grad_norm": 1.794008731842041, "learning_rate": 3.9264093537348305e-06, "loss": 1.5936, "step": 13266 }, { "epoch": 0.7253393108538468, "grad_norm": 1.723110556602478, "learning_rate": 3.924957752363915e-06, "loss": 1.5285, "step": 13267 }, { "epoch": 0.7253939832975684, "grad_norm": 1.7140294313430786, "learning_rate": 3.923506353852912e-06, "loss": 1.4098, "step": 13268 }, { "epoch": 0.72544865574129, "grad_norm": 1.3087215423583984, "learning_rate": 3.9220551582502934e-06, "loss": 1.4811, "step": 13269 }, { "epoch": 0.7255033281850115, "grad_norm": 1.8749809265136719, "learning_rate": 3.9206041656045155e-06, "loss": 1.4499, "step": 13270 }, { "epoch": 0.7255580006287331, "grad_norm": 1.4965792894363403, "learning_rate": 3.919153375964032e-06, "loss": 1.3761, "step": 13271 }, { "epoch": 0.7256126730724547, "grad_norm": 1.5496716499328613, "learning_rate": 3.917702789377284e-06, "loss": 1.6072, "step": 13272 }, { "epoch": 0.7256673455161762, "grad_norm": 1.2554991245269775, "learning_rate": 3.916252405892714e-06, "loss": 1.4952, "step": 13273 }, { "epoch": 0.7257220179598978, "grad_norm": 1.5789523124694824, "learning_rate": 3.91480222555875e-06, "loss": 1.3571, "step": 13274 }, { "epoch": 0.7257766904036194, "grad_norm": 1.7675422430038452, "learning_rate": 3.91335224842382e-06, "loss": 1.4666, "step": 13275 }, { "epoch": 0.7258313628473408, "grad_norm": 1.7985053062438965, "learning_rate": 3.911902474536342e-06, "loss": 1.3051, "step": 13276 }, { "epoch": 0.7258860352910624, "grad_norm": 1.5346981287002563, "learning_rate": 3.910452903944722e-06, "loss": 1.1734, "step": 13277 }, { "epoch": 0.725940707734784, "grad_norm": 1.3151674270629883, "learning_rate": 3.909003536697374e-06, "loss": 1.3289, "step": 13278 }, { "epoch": 0.7259953801785055, "grad_norm": 1.7108608484268188, "learning_rate": 3.907554372842688e-06, "loss": 1.5936, "step": 13279 }, { "epoch": 0.7260500526222271, "grad_norm": 1.617008924484253, "learning_rate": 3.90610541242906e-06, "loss": 1.4229, "step": 13280 }, { "epoch": 0.7261047250659486, "grad_norm": 1.3216065168380737, "learning_rate": 3.904656655504872e-06, "loss": 1.2802, "step": 13281 }, { "epoch": 0.7261593975096702, "grad_norm": 1.5641577243804932, "learning_rate": 3.903208102118503e-06, "loss": 1.0851, "step": 13282 }, { "epoch": 0.7262140699533918, "grad_norm": 1.5111459493637085, "learning_rate": 3.90175975231832e-06, "loss": 1.4503, "step": 13283 }, { "epoch": 0.7262687423971133, "grad_norm": 1.3318755626678467, "learning_rate": 3.90031160615269e-06, "loss": 1.4598, "step": 13284 }, { "epoch": 0.7263234148408348, "grad_norm": 2.104567050933838, "learning_rate": 3.898863663669965e-06, "loss": 1.363, "step": 13285 }, { "epoch": 0.7263780872845564, "grad_norm": 1.1383119821548462, "learning_rate": 3.897415924918503e-06, "loss": 1.5095, "step": 13286 }, { "epoch": 0.7264327597282779, "grad_norm": 1.6930205821990967, "learning_rate": 3.895968389946644e-06, "loss": 1.4009, "step": 13287 }, { "epoch": 0.7264874321719995, "grad_norm": 1.8278529644012451, "learning_rate": 3.894521058802719e-06, "loss": 1.313, "step": 13288 }, { "epoch": 0.7265421046157211, "grad_norm": 1.493000864982605, "learning_rate": 3.893073931535068e-06, "loss": 1.509, "step": 13289 }, { "epoch": 0.7265967770594426, "grad_norm": 1.7448621988296509, "learning_rate": 3.891627008192007e-06, "loss": 1.5381, "step": 13290 }, { "epoch": 0.7266514495031642, "grad_norm": 1.8190313577651978, "learning_rate": 3.890180288821851e-06, "loss": 1.2748, "step": 13291 }, { "epoch": 0.7267061219468858, "grad_norm": 1.4589226245880127, "learning_rate": 3.888733773472916e-06, "loss": 1.4683, "step": 13292 }, { "epoch": 0.7267607943906073, "grad_norm": 1.5863604545593262, "learning_rate": 3.8872874621934976e-06, "loss": 1.0498, "step": 13293 }, { "epoch": 0.7268154668343288, "grad_norm": 1.5709010362625122, "learning_rate": 3.885841355031897e-06, "loss": 1.4581, "step": 13294 }, { "epoch": 0.7268701392780503, "grad_norm": 1.7446595430374146, "learning_rate": 3.8843954520364026e-06, "loss": 1.3102, "step": 13295 }, { "epoch": 0.7269248117217719, "grad_norm": 1.564049243927002, "learning_rate": 3.882949753255294e-06, "loss": 1.3727, "step": 13296 }, { "epoch": 0.7269794841654935, "grad_norm": 1.574391484260559, "learning_rate": 3.881504258736847e-06, "loss": 1.5927, "step": 13297 }, { "epoch": 0.727034156609215, "grad_norm": 1.422340750694275, "learning_rate": 3.88005896852933e-06, "loss": 1.4669, "step": 13298 }, { "epoch": 0.7270888290529366, "grad_norm": 1.3806185722351074, "learning_rate": 3.878613882681002e-06, "loss": 1.54, "step": 13299 }, { "epoch": 0.7271435014966582, "grad_norm": 1.4404374361038208, "learning_rate": 3.877169001240124e-06, "loss": 1.4825, "step": 13300 }, { "epoch": 0.7271981739403797, "grad_norm": 1.4736770391464233, "learning_rate": 3.875724324254941e-06, "loss": 1.5253, "step": 13301 }, { "epoch": 0.7272528463841013, "grad_norm": 1.4293367862701416, "learning_rate": 3.874279851773691e-06, "loss": 1.3842, "step": 13302 }, { "epoch": 0.7273075188278229, "grad_norm": 1.1970146894454956, "learning_rate": 3.872835583844614e-06, "loss": 1.6317, "step": 13303 }, { "epoch": 0.7273621912715443, "grad_norm": 1.5315020084381104, "learning_rate": 3.871391520515935e-06, "loss": 1.4327, "step": 13304 }, { "epoch": 0.7274168637152659, "grad_norm": 1.6559420824050903, "learning_rate": 3.86994766183587e-06, "loss": 1.4498, "step": 13305 }, { "epoch": 0.7274715361589875, "grad_norm": 2.0756187438964844, "learning_rate": 3.868504007852641e-06, "loss": 1.2492, "step": 13306 }, { "epoch": 0.727526208602709, "grad_norm": 1.4353759288787842, "learning_rate": 3.867060558614451e-06, "loss": 1.5716, "step": 13307 }, { "epoch": 0.7275808810464306, "grad_norm": 1.9083967208862305, "learning_rate": 3.865617314169502e-06, "loss": 1.5421, "step": 13308 }, { "epoch": 0.7276355534901521, "grad_norm": 1.686513066291809, "learning_rate": 3.864174274565984e-06, "loss": 1.3061, "step": 13309 }, { "epoch": 0.7276902259338737, "grad_norm": 1.2748061418533325, "learning_rate": 3.862731439852082e-06, "loss": 1.3574, "step": 13310 }, { "epoch": 0.7277448983775953, "grad_norm": 1.7979427576065063, "learning_rate": 3.861288810075983e-06, "loss": 1.3433, "step": 13311 }, { "epoch": 0.7277995708213167, "grad_norm": 1.6322555541992188, "learning_rate": 3.859846385285855e-06, "loss": 1.1169, "step": 13312 }, { "epoch": 0.7278542432650383, "grad_norm": 1.6549919843673706, "learning_rate": 3.8584041655298606e-06, "loss": 1.4283, "step": 13313 }, { "epoch": 0.7279089157087599, "grad_norm": 1.8039865493774414, "learning_rate": 3.856962150856167e-06, "loss": 1.3963, "step": 13314 }, { "epoch": 0.7279635881524814, "grad_norm": 1.9602131843566895, "learning_rate": 3.855520341312922e-06, "loss": 1.2362, "step": 13315 }, { "epoch": 0.728018260596203, "grad_norm": 1.6588704586029053, "learning_rate": 3.854078736948268e-06, "loss": 1.3915, "step": 13316 }, { "epoch": 0.7280729330399246, "grad_norm": 1.8047354221343994, "learning_rate": 3.85263733781035e-06, "loss": 1.2679, "step": 13317 }, { "epoch": 0.7281276054836461, "grad_norm": 2.027789831161499, "learning_rate": 3.851196143947296e-06, "loss": 1.1927, "step": 13318 }, { "epoch": 0.7281822779273677, "grad_norm": 1.6320313215255737, "learning_rate": 3.849755155407229e-06, "loss": 1.4681, "step": 13319 }, { "epoch": 0.7282369503710893, "grad_norm": 1.9826712608337402, "learning_rate": 3.848314372238272e-06, "loss": 1.3611, "step": 13320 }, { "epoch": 0.7282916228148107, "grad_norm": 1.6231688261032104, "learning_rate": 3.846873794488534e-06, "loss": 1.5235, "step": 13321 }, { "epoch": 0.7283462952585323, "grad_norm": 1.824975609779358, "learning_rate": 3.845433422206119e-06, "loss": 1.1907, "step": 13322 }, { "epoch": 0.7284009677022538, "grad_norm": 1.6025755405426025, "learning_rate": 3.843993255439124e-06, "loss": 1.4614, "step": 13323 }, { "epoch": 0.7284556401459754, "grad_norm": 1.5368493795394897, "learning_rate": 3.842553294235635e-06, "loss": 1.4847, "step": 13324 }, { "epoch": 0.728510312589697, "grad_norm": 1.552394986152649, "learning_rate": 3.841113538643745e-06, "loss": 1.5496, "step": 13325 }, { "epoch": 0.7285649850334185, "grad_norm": 1.3439037799835205, "learning_rate": 3.839673988711526e-06, "loss": 1.2889, "step": 13326 }, { "epoch": 0.7286196574771401, "grad_norm": 2.2818148136138916, "learning_rate": 3.838234644487045e-06, "loss": 1.5024, "step": 13327 }, { "epoch": 0.7286743299208617, "grad_norm": 1.601462721824646, "learning_rate": 3.836795506018371e-06, "loss": 1.4361, "step": 13328 }, { "epoch": 0.7287290023645832, "grad_norm": 1.8057128190994263, "learning_rate": 3.835356573353558e-06, "loss": 1.3379, "step": 13329 }, { "epoch": 0.7287836748083047, "grad_norm": 1.4885857105255127, "learning_rate": 3.833917846540651e-06, "loss": 1.5021, "step": 13330 }, { "epoch": 0.7288383472520263, "grad_norm": 1.8642915487289429, "learning_rate": 3.8324793256277e-06, "loss": 1.3324, "step": 13331 }, { "epoch": 0.7288930196957478, "grad_norm": 1.9981985092163086, "learning_rate": 3.831041010662737e-06, "loss": 1.3055, "step": 13332 }, { "epoch": 0.7289476921394694, "grad_norm": 2.1516358852386475, "learning_rate": 3.829602901693788e-06, "loss": 1.2496, "step": 13333 }, { "epoch": 0.729002364583191, "grad_norm": 1.6001802682876587, "learning_rate": 3.828164998768879e-06, "loss": 1.5897, "step": 13334 }, { "epoch": 0.7290570370269125, "grad_norm": 1.3349990844726562, "learning_rate": 3.826727301936025e-06, "loss": 1.7112, "step": 13335 }, { "epoch": 0.7291117094706341, "grad_norm": 1.3337342739105225, "learning_rate": 3.8252898112432315e-06, "loss": 1.6235, "step": 13336 }, { "epoch": 0.7291663819143556, "grad_norm": 1.4041022062301636, "learning_rate": 3.823852526738501e-06, "loss": 1.3002, "step": 13337 }, { "epoch": 0.7292210543580772, "grad_norm": 1.7310172319412231, "learning_rate": 3.822415448469824e-06, "loss": 1.5107, "step": 13338 }, { "epoch": 0.7292757268017988, "grad_norm": 1.7004755735397339, "learning_rate": 3.820978576485194e-06, "loss": 1.3065, "step": 13339 }, { "epoch": 0.7293303992455202, "grad_norm": 1.3039065599441528, "learning_rate": 3.8195419108325896e-06, "loss": 1.4937, "step": 13340 }, { "epoch": 0.7293850716892418, "grad_norm": 1.2129333019256592, "learning_rate": 3.8181054515599806e-06, "loss": 1.6381, "step": 13341 }, { "epoch": 0.7294397441329634, "grad_norm": 1.4363198280334473, "learning_rate": 3.816669198715339e-06, "loss": 1.2674, "step": 13342 }, { "epoch": 0.7294944165766849, "grad_norm": 1.6046864986419678, "learning_rate": 3.815233152346623e-06, "loss": 1.484, "step": 13343 }, { "epoch": 0.7295490890204065, "grad_norm": 1.4860330820083618, "learning_rate": 3.8137973125017825e-06, "loss": 1.3934, "step": 13344 }, { "epoch": 0.7296037614641281, "grad_norm": 1.525133490562439, "learning_rate": 3.812361679228769e-06, "loss": 1.6285, "step": 13345 }, { "epoch": 0.7296584339078496, "grad_norm": 1.5167624950408936, "learning_rate": 3.810926252575519e-06, "loss": 1.4984, "step": 13346 }, { "epoch": 0.7297131063515712, "grad_norm": 1.5757241249084473, "learning_rate": 3.8094910325899637e-06, "loss": 1.4154, "step": 13347 }, { "epoch": 0.7297677787952928, "grad_norm": 1.550381064414978, "learning_rate": 3.8080560193200288e-06, "loss": 1.5046, "step": 13348 }, { "epoch": 0.7298224512390142, "grad_norm": 1.3431015014648438, "learning_rate": 3.8066212128136315e-06, "loss": 1.5824, "step": 13349 }, { "epoch": 0.7298771236827358, "grad_norm": 1.4108973741531372, "learning_rate": 3.8051866131186876e-06, "loss": 1.4684, "step": 13350 }, { "epoch": 0.7299317961264573, "grad_norm": 1.978116750717163, "learning_rate": 3.8037522202831002e-06, "loss": 1.2645, "step": 13351 }, { "epoch": 0.7299864685701789, "grad_norm": 1.2512375116348267, "learning_rate": 3.8023180343547615e-06, "loss": 1.4754, "step": 13352 }, { "epoch": 0.7300411410139005, "grad_norm": 1.56326162815094, "learning_rate": 3.8008840553815707e-06, "loss": 1.3587, "step": 13353 }, { "epoch": 0.730095813457622, "grad_norm": 1.66725492477417, "learning_rate": 3.799450283411409e-06, "loss": 1.2069, "step": 13354 }, { "epoch": 0.7301504859013436, "grad_norm": 2.1113641262054443, "learning_rate": 3.798016718492148e-06, "loss": 1.2464, "step": 13355 }, { "epoch": 0.7302051583450652, "grad_norm": 1.401994228363037, "learning_rate": 3.796583360671665e-06, "loss": 1.4538, "step": 13356 }, { "epoch": 0.7302598307887866, "grad_norm": 1.2476732730865479, "learning_rate": 3.795150209997822e-06, "loss": 1.666, "step": 13357 }, { "epoch": 0.7303145032325082, "grad_norm": 1.6986415386199951, "learning_rate": 3.7937172665184684e-06, "loss": 1.3875, "step": 13358 }, { "epoch": 0.7303691756762298, "grad_norm": 1.7376981973648071, "learning_rate": 3.7922845302814636e-06, "loss": 1.3458, "step": 13359 }, { "epoch": 0.7304238481199513, "grad_norm": 1.9605079889297485, "learning_rate": 3.790852001334645e-06, "loss": 1.3097, "step": 13360 }, { "epoch": 0.7304785205636729, "grad_norm": 1.5474213361740112, "learning_rate": 3.7894196797258475e-06, "loss": 1.1867, "step": 13361 }, { "epoch": 0.7305331930073945, "grad_norm": 1.8221198320388794, "learning_rate": 3.7879875655029018e-06, "loss": 1.2728, "step": 13362 }, { "epoch": 0.730587865451116, "grad_norm": 1.6893861293792725, "learning_rate": 3.7865556587136233e-06, "loss": 1.3582, "step": 13363 }, { "epoch": 0.7306425378948376, "grad_norm": 2.0517146587371826, "learning_rate": 3.7851239594058365e-06, "loss": 1.1882, "step": 13364 }, { "epoch": 0.7306972103385591, "grad_norm": 1.5270187854766846, "learning_rate": 3.7836924676273433e-06, "loss": 1.5181, "step": 13365 }, { "epoch": 0.7307518827822806, "grad_norm": 1.798729658126831, "learning_rate": 3.7822611834259425e-06, "loss": 1.2547, "step": 13366 }, { "epoch": 0.7308065552260022, "grad_norm": 1.344048261642456, "learning_rate": 3.7808301068494347e-06, "loss": 1.3611, "step": 13367 }, { "epoch": 0.7308612276697237, "grad_norm": 1.5107592344284058, "learning_rate": 3.7793992379456033e-06, "loss": 1.2966, "step": 13368 }, { "epoch": 0.7309159001134453, "grad_norm": 1.1827212572097778, "learning_rate": 3.7779685767622255e-06, "loss": 1.5749, "step": 13369 }, { "epoch": 0.7309705725571669, "grad_norm": 2.106147050857544, "learning_rate": 3.77653812334708e-06, "loss": 1.4541, "step": 13370 }, { "epoch": 0.7310252450008884, "grad_norm": 1.6212233304977417, "learning_rate": 3.77510787774793e-06, "loss": 1.3892, "step": 13371 }, { "epoch": 0.73107991744461, "grad_norm": 1.7985893487930298, "learning_rate": 3.7736778400125328e-06, "loss": 1.2029, "step": 13372 }, { "epoch": 0.7311345898883316, "grad_norm": 1.269769549369812, "learning_rate": 3.772248010188646e-06, "loss": 1.6607, "step": 13373 }, { "epoch": 0.7311892623320531, "grad_norm": 1.5898311138153076, "learning_rate": 3.7708183883240123e-06, "loss": 1.493, "step": 13374 }, { "epoch": 0.7312439347757747, "grad_norm": 1.5796600580215454, "learning_rate": 3.769388974466369e-06, "loss": 1.2228, "step": 13375 }, { "epoch": 0.7312986072194962, "grad_norm": 1.7692946195602417, "learning_rate": 3.7679597686634495e-06, "loss": 1.7061, "step": 13376 }, { "epoch": 0.7313532796632177, "grad_norm": 1.7341071367263794, "learning_rate": 3.766530770962974e-06, "loss": 1.5103, "step": 13377 }, { "epoch": 0.7314079521069393, "grad_norm": 1.8177311420440674, "learning_rate": 3.7651019814126656e-06, "loss": 1.434, "step": 13378 }, { "epoch": 0.7314626245506608, "grad_norm": 1.927527904510498, "learning_rate": 3.763673400060234e-06, "loss": 1.2466, "step": 13379 }, { "epoch": 0.7315172969943824, "grad_norm": 1.5988593101501465, "learning_rate": 3.7622450269533773e-06, "loss": 1.3814, "step": 13380 }, { "epoch": 0.731571969438104, "grad_norm": 1.7677712440490723, "learning_rate": 3.7608168621398e-06, "loss": 1.4185, "step": 13381 }, { "epoch": 0.7316266418818255, "grad_norm": 1.6534308195114136, "learning_rate": 3.759388905667188e-06, "loss": 1.4666, "step": 13382 }, { "epoch": 0.7316813143255471, "grad_norm": 1.497624158859253, "learning_rate": 3.757961157583221e-06, "loss": 1.4968, "step": 13383 }, { "epoch": 0.7317359867692687, "grad_norm": 1.648060917854309, "learning_rate": 3.756533617935583e-06, "loss": 1.4029, "step": 13384 }, { "epoch": 0.7317906592129901, "grad_norm": 1.2373054027557373, "learning_rate": 3.7551062867719367e-06, "loss": 1.4718, "step": 13385 }, { "epoch": 0.7318453316567117, "grad_norm": 1.771254062652588, "learning_rate": 3.753679164139947e-06, "loss": 1.026, "step": 13386 }, { "epoch": 0.7319000041004333, "grad_norm": 1.6982083320617676, "learning_rate": 3.752252250087267e-06, "loss": 1.5446, "step": 13387 }, { "epoch": 0.7319546765441548, "grad_norm": 1.5193634033203125, "learning_rate": 3.750825544661545e-06, "loss": 1.4124, "step": 13388 }, { "epoch": 0.7320093489878764, "grad_norm": 1.6670897006988525, "learning_rate": 3.749399047910418e-06, "loss": 1.1894, "step": 13389 }, { "epoch": 0.732064021431598, "grad_norm": 1.5662552118301392, "learning_rate": 3.7479727598815287e-06, "loss": 1.2553, "step": 13390 }, { "epoch": 0.7321186938753195, "grad_norm": 1.3548439741134644, "learning_rate": 3.7465466806225006e-06, "loss": 1.3931, "step": 13391 }, { "epoch": 0.7321733663190411, "grad_norm": 1.4551600217819214, "learning_rate": 3.7451208101809477e-06, "loss": 1.5757, "step": 13392 }, { "epoch": 0.7322280387627625, "grad_norm": 1.4905716180801392, "learning_rate": 3.7436951486044927e-06, "loss": 1.3616, "step": 13393 }, { "epoch": 0.7322827112064841, "grad_norm": 1.5610300302505493, "learning_rate": 3.7422696959407347e-06, "loss": 1.4619, "step": 13394 }, { "epoch": 0.7323373836502057, "grad_norm": 1.2549875974655151, "learning_rate": 3.740844452237279e-06, "loss": 1.6937, "step": 13395 }, { "epoch": 0.7323920560939272, "grad_norm": 2.1311752796173096, "learning_rate": 3.739419417541714e-06, "loss": 1.3906, "step": 13396 }, { "epoch": 0.7324467285376488, "grad_norm": 1.659342885017395, "learning_rate": 3.7379945919016225e-06, "loss": 1.578, "step": 13397 }, { "epoch": 0.7325014009813704, "grad_norm": 1.4102318286895752, "learning_rate": 3.7365699753645888e-06, "loss": 1.3442, "step": 13398 }, { "epoch": 0.7325560734250919, "grad_norm": 1.366795539855957, "learning_rate": 3.7351455679781823e-06, "loss": 1.2206, "step": 13399 }, { "epoch": 0.7326107458688135, "grad_norm": 1.6186286211013794, "learning_rate": 3.7337213697899656e-06, "loss": 1.1739, "step": 13400 }, { "epoch": 0.7326654183125351, "grad_norm": 1.5103859901428223, "learning_rate": 3.732297380847496e-06, "loss": 1.5901, "step": 13401 }, { "epoch": 0.7327200907562565, "grad_norm": 1.607422947883606, "learning_rate": 3.730873601198326e-06, "loss": 1.3845, "step": 13402 }, { "epoch": 0.7327747631999781, "grad_norm": 1.9244545698165894, "learning_rate": 3.729450030889993e-06, "loss": 1.5069, "step": 13403 }, { "epoch": 0.7328294356436997, "grad_norm": 1.5367941856384277, "learning_rate": 3.7280266699700406e-06, "loss": 1.508, "step": 13404 }, { "epoch": 0.7328841080874212, "grad_norm": 1.3955307006835938, "learning_rate": 3.726603518485996e-06, "loss": 1.2602, "step": 13405 }, { "epoch": 0.7329387805311428, "grad_norm": 2.6283321380615234, "learning_rate": 3.7251805764853776e-06, "loss": 1.3409, "step": 13406 }, { "epoch": 0.7329934529748643, "grad_norm": 1.3405768871307373, "learning_rate": 3.7237578440157076e-06, "loss": 1.5968, "step": 13407 }, { "epoch": 0.7330481254185859, "grad_norm": 1.8799700736999512, "learning_rate": 3.722335321124487e-06, "loss": 1.308, "step": 13408 }, { "epoch": 0.7331027978623075, "grad_norm": 1.992583990097046, "learning_rate": 3.720913007859225e-06, "loss": 1.2866, "step": 13409 }, { "epoch": 0.733157470306029, "grad_norm": 1.6721669435501099, "learning_rate": 3.7194909042674123e-06, "loss": 1.221, "step": 13410 }, { "epoch": 0.7332121427497506, "grad_norm": 1.7500479221343994, "learning_rate": 3.7180690103965313e-06, "loss": 1.1833, "step": 13411 }, { "epoch": 0.7332668151934721, "grad_norm": 1.6998869180679321, "learning_rate": 3.7166473262940717e-06, "loss": 1.5714, "step": 13412 }, { "epoch": 0.7333214876371936, "grad_norm": 1.4111469984054565, "learning_rate": 3.715225852007501e-06, "loss": 1.2304, "step": 13413 }, { "epoch": 0.7333761600809152, "grad_norm": 1.3129470348358154, "learning_rate": 3.7138045875842877e-06, "loss": 1.5654, "step": 13414 }, { "epoch": 0.7334308325246368, "grad_norm": 1.5616236925125122, "learning_rate": 3.7123835330718903e-06, "loss": 1.3268, "step": 13415 }, { "epoch": 0.7334855049683583, "grad_norm": 1.2846808433532715, "learning_rate": 3.7109626885177606e-06, "loss": 1.6412, "step": 13416 }, { "epoch": 0.7335401774120799, "grad_norm": 1.5043578147888184, "learning_rate": 3.7095420539693417e-06, "loss": 1.1239, "step": 13417 }, { "epoch": 0.7335948498558015, "grad_norm": 1.4785650968551636, "learning_rate": 3.7081216294740773e-06, "loss": 1.4409, "step": 13418 }, { "epoch": 0.733649522299523, "grad_norm": 1.9407212734222412, "learning_rate": 3.7067014150793955e-06, "loss": 1.4128, "step": 13419 }, { "epoch": 0.7337041947432446, "grad_norm": 1.7697774171829224, "learning_rate": 3.7052814108327194e-06, "loss": 1.3032, "step": 13420 }, { "epoch": 0.733758867186966, "grad_norm": 1.6065253019332886, "learning_rate": 3.70386161678147e-06, "loss": 1.3468, "step": 13421 }, { "epoch": 0.7338135396306876, "grad_norm": 1.5548341274261475, "learning_rate": 3.7024420329730527e-06, "loss": 1.3164, "step": 13422 }, { "epoch": 0.7338682120744092, "grad_norm": 1.5115553140640259, "learning_rate": 3.701022659454877e-06, "loss": 1.3244, "step": 13423 }, { "epoch": 0.7339228845181307, "grad_norm": 4.622364044189453, "learning_rate": 3.6996034962743354e-06, "loss": 1.7015, "step": 13424 }, { "epoch": 0.7339775569618523, "grad_norm": 1.490113615989685, "learning_rate": 3.6981845434788188e-06, "loss": 1.616, "step": 13425 }, { "epoch": 0.7340322294055739, "grad_norm": 1.4345636367797852, "learning_rate": 3.696765801115706e-06, "loss": 1.7014, "step": 13426 }, { "epoch": 0.7340869018492954, "grad_norm": 1.1025011539459229, "learning_rate": 3.6953472692323757e-06, "loss": 1.6743, "step": 13427 }, { "epoch": 0.734141574293017, "grad_norm": 1.6181553602218628, "learning_rate": 3.69392894787619e-06, "loss": 1.5237, "step": 13428 }, { "epoch": 0.7341962467367386, "grad_norm": 1.409825325012207, "learning_rate": 3.6925108370945183e-06, "loss": 1.4263, "step": 13429 }, { "epoch": 0.73425091918046, "grad_norm": 1.5509446859359741, "learning_rate": 3.6910929369347105e-06, "loss": 1.6129, "step": 13430 }, { "epoch": 0.7343055916241816, "grad_norm": 1.602268099784851, "learning_rate": 3.68967524744411e-06, "loss": 1.1004, "step": 13431 }, { "epoch": 0.7343602640679032, "grad_norm": 1.4160020351409912, "learning_rate": 3.688257768670065e-06, "loss": 1.5527, "step": 13432 }, { "epoch": 0.7344149365116247, "grad_norm": 1.6466127634048462, "learning_rate": 3.686840500659904e-06, "loss": 1.2639, "step": 13433 }, { "epoch": 0.7344696089553463, "grad_norm": 1.6440119743347168, "learning_rate": 3.685423443460948e-06, "loss": 1.2518, "step": 13434 }, { "epoch": 0.7345242813990678, "grad_norm": 1.356046438217163, "learning_rate": 3.6840065971205263e-06, "loss": 1.392, "step": 13435 }, { "epoch": 0.7345789538427894, "grad_norm": 1.5335968732833862, "learning_rate": 3.6825899616859404e-06, "loss": 1.3497, "step": 13436 }, { "epoch": 0.734633626286511, "grad_norm": 1.5190215110778809, "learning_rate": 3.6811735372045043e-06, "loss": 1.5124, "step": 13437 }, { "epoch": 0.7346882987302324, "grad_norm": 1.703229308128357, "learning_rate": 3.6797573237235108e-06, "loss": 1.152, "step": 13438 }, { "epoch": 0.734742971173954, "grad_norm": 1.5331933498382568, "learning_rate": 3.678341321290252e-06, "loss": 1.4051, "step": 13439 }, { "epoch": 0.7347976436176756, "grad_norm": 1.468104362487793, "learning_rate": 3.676925529952009e-06, "loss": 1.4086, "step": 13440 }, { "epoch": 0.7348523160613971, "grad_norm": 2.1606807708740234, "learning_rate": 3.675509949756062e-06, "loss": 1.4379, "step": 13441 }, { "epoch": 0.7349069885051187, "grad_norm": 1.6631544828414917, "learning_rate": 3.674094580749674e-06, "loss": 1.5207, "step": 13442 }, { "epoch": 0.7349616609488403, "grad_norm": 1.5174651145935059, "learning_rate": 3.6726794229801168e-06, "loss": 1.3201, "step": 13443 }, { "epoch": 0.7350163333925618, "grad_norm": 1.560724139213562, "learning_rate": 3.671264476494639e-06, "loss": 1.4023, "step": 13444 }, { "epoch": 0.7350710058362834, "grad_norm": 1.3898471593856812, "learning_rate": 3.66984974134049e-06, "loss": 1.5314, "step": 13445 }, { "epoch": 0.735125678280005, "grad_norm": 1.2355798482894897, "learning_rate": 3.668435217564915e-06, "loss": 1.3643, "step": 13446 }, { "epoch": 0.7351803507237265, "grad_norm": 1.5972449779510498, "learning_rate": 3.6670209052151452e-06, "loss": 1.5068, "step": 13447 }, { "epoch": 0.735235023167448, "grad_norm": 1.581895351409912, "learning_rate": 3.665606804338405e-06, "loss": 1.2688, "step": 13448 }, { "epoch": 0.7352896956111695, "grad_norm": 1.5509510040283203, "learning_rate": 3.664192914981921e-06, "loss": 1.526, "step": 13449 }, { "epoch": 0.7353443680548911, "grad_norm": 1.5724318027496338, "learning_rate": 3.6627792371928993e-06, "loss": 1.213, "step": 13450 }, { "epoch": 0.7353990404986127, "grad_norm": 1.772079586982727, "learning_rate": 3.6613657710185537e-06, "loss": 1.328, "step": 13451 }, { "epoch": 0.7354537129423342, "grad_norm": 1.643064022064209, "learning_rate": 3.659952516506079e-06, "loss": 1.6596, "step": 13452 }, { "epoch": 0.7355083853860558, "grad_norm": 1.4507125616073608, "learning_rate": 3.658539473702667e-06, "loss": 1.6145, "step": 13453 }, { "epoch": 0.7355630578297774, "grad_norm": 2.1838700771331787, "learning_rate": 3.657126642655503e-06, "loss": 1.0783, "step": 13454 }, { "epoch": 0.7356177302734989, "grad_norm": 1.5680873394012451, "learning_rate": 3.655714023411764e-06, "loss": 1.2787, "step": 13455 }, { "epoch": 0.7356724027172205, "grad_norm": 1.712561011314392, "learning_rate": 3.654301616018617e-06, "loss": 1.4301, "step": 13456 }, { "epoch": 0.735727075160942, "grad_norm": 1.463991641998291, "learning_rate": 3.6528894205232346e-06, "loss": 1.3821, "step": 13457 }, { "epoch": 0.7357817476046635, "grad_norm": 1.3867990970611572, "learning_rate": 3.6514774369727678e-06, "loss": 1.3916, "step": 13458 }, { "epoch": 0.7358364200483851, "grad_norm": 1.7601863145828247, "learning_rate": 3.650065665414363e-06, "loss": 1.5103, "step": 13459 }, { "epoch": 0.7358910924921067, "grad_norm": 1.244372010231018, "learning_rate": 3.6486541058951696e-06, "loss": 1.5873, "step": 13460 }, { "epoch": 0.7359457649358282, "grad_norm": 1.9133782386779785, "learning_rate": 3.6472427584623194e-06, "loss": 1.1649, "step": 13461 }, { "epoch": 0.7360004373795498, "grad_norm": 1.4645684957504272, "learning_rate": 3.6458316231629377e-06, "loss": 1.5105, "step": 13462 }, { "epoch": 0.7360551098232714, "grad_norm": 1.4951261281967163, "learning_rate": 3.6444207000441524e-06, "loss": 1.5341, "step": 13463 }, { "epoch": 0.7361097822669929, "grad_norm": 1.3820195198059082, "learning_rate": 3.6430099891530735e-06, "loss": 1.4365, "step": 13464 }, { "epoch": 0.7361644547107145, "grad_norm": 1.6181714534759521, "learning_rate": 3.641599490536808e-06, "loss": 1.2135, "step": 13465 }, { "epoch": 0.7362191271544359, "grad_norm": 1.7597836256027222, "learning_rate": 3.6401892042424557e-06, "loss": 1.4457, "step": 13466 }, { "epoch": 0.7362737995981575, "grad_norm": 1.1715734004974365, "learning_rate": 3.638779130317106e-06, "loss": 1.4406, "step": 13467 }, { "epoch": 0.7363284720418791, "grad_norm": 1.6965012550354004, "learning_rate": 3.637369268807852e-06, "loss": 1.3319, "step": 13468 }, { "epoch": 0.7363831444856006, "grad_norm": 1.9514814615249634, "learning_rate": 3.6359596197617687e-06, "loss": 1.5047, "step": 13469 }, { "epoch": 0.7364378169293222, "grad_norm": 1.9153701066970825, "learning_rate": 3.6345501832259233e-06, "loss": 1.3205, "step": 13470 }, { "epoch": 0.7364924893730438, "grad_norm": 1.4845762252807617, "learning_rate": 3.6331409592473887e-06, "loss": 1.2914, "step": 13471 }, { "epoch": 0.7365471618167653, "grad_norm": 1.6162456274032593, "learning_rate": 3.631731947873217e-06, "loss": 1.4806, "step": 13472 }, { "epoch": 0.7366018342604869, "grad_norm": 1.562296986579895, "learning_rate": 3.6303231491504566e-06, "loss": 1.0671, "step": 13473 }, { "epoch": 0.7366565067042085, "grad_norm": 1.5186187028884888, "learning_rate": 3.628914563126156e-06, "loss": 1.2942, "step": 13474 }, { "epoch": 0.7367111791479299, "grad_norm": 1.3029588460922241, "learning_rate": 3.6275061898473484e-06, "loss": 1.7007, "step": 13475 }, { "epoch": 0.7367658515916515, "grad_norm": 1.5925626754760742, "learning_rate": 3.626098029361059e-06, "loss": 1.4871, "step": 13476 }, { "epoch": 0.7368205240353731, "grad_norm": 1.2404453754425049, "learning_rate": 3.624690081714317e-06, "loss": 1.4849, "step": 13477 }, { "epoch": 0.7368751964790946, "grad_norm": 1.6431008577346802, "learning_rate": 3.6232823469541333e-06, "loss": 1.446, "step": 13478 }, { "epoch": 0.7369298689228162, "grad_norm": 1.7064913511276245, "learning_rate": 3.621874825127515e-06, "loss": 1.4746, "step": 13479 }, { "epoch": 0.7369845413665377, "grad_norm": 1.458400011062622, "learning_rate": 3.620467516281464e-06, "loss": 1.4453, "step": 13480 }, { "epoch": 0.7370392138102593, "grad_norm": 1.9800348281860352, "learning_rate": 3.6190604204629685e-06, "loss": 1.3857, "step": 13481 }, { "epoch": 0.7370938862539809, "grad_norm": 1.8506778478622437, "learning_rate": 3.6176535377190226e-06, "loss": 1.341, "step": 13482 }, { "epoch": 0.7371485586977023, "grad_norm": 1.364090085029602, "learning_rate": 3.616246868096601e-06, "loss": 1.4859, "step": 13483 }, { "epoch": 0.7372032311414239, "grad_norm": 1.6225225925445557, "learning_rate": 3.614840411642674e-06, "loss": 1.101, "step": 13484 }, { "epoch": 0.7372579035851455, "grad_norm": 1.3040224313735962, "learning_rate": 3.6134341684042116e-06, "loss": 1.512, "step": 13485 }, { "epoch": 0.737312576028867, "grad_norm": 1.6643542051315308, "learning_rate": 3.6120281384281685e-06, "loss": 1.3688, "step": 13486 }, { "epoch": 0.7373672484725886, "grad_norm": 1.382470726966858, "learning_rate": 3.6106223217614934e-06, "loss": 1.556, "step": 13487 }, { "epoch": 0.7374219209163102, "grad_norm": 1.221373438835144, "learning_rate": 3.6092167184511352e-06, "loss": 1.5764, "step": 13488 }, { "epoch": 0.7374765933600317, "grad_norm": 1.5211342573165894, "learning_rate": 3.6078113285440277e-06, "loss": 1.3245, "step": 13489 }, { "epoch": 0.7375312658037533, "grad_norm": 1.624675989151001, "learning_rate": 3.606406152087095e-06, "loss": 1.4162, "step": 13490 }, { "epoch": 0.7375859382474749, "grad_norm": 1.5390158891677856, "learning_rate": 3.6050011891272686e-06, "loss": 1.5937, "step": 13491 }, { "epoch": 0.7376406106911964, "grad_norm": 1.934910774230957, "learning_rate": 3.6035964397114577e-06, "loss": 1.3832, "step": 13492 }, { "epoch": 0.737695283134918, "grad_norm": 1.5100919008255005, "learning_rate": 3.6021919038865716e-06, "loss": 1.126, "step": 13493 }, { "epoch": 0.7377499555786394, "grad_norm": 1.7542366981506348, "learning_rate": 3.6007875816995108e-06, "loss": 1.5663, "step": 13494 }, { "epoch": 0.737804628022361, "grad_norm": 1.4365586042404175, "learning_rate": 3.5993834731971654e-06, "loss": 1.4011, "step": 13495 }, { "epoch": 0.7378593004660826, "grad_norm": 1.588577151298523, "learning_rate": 3.5979795784264294e-06, "loss": 1.2615, "step": 13496 }, { "epoch": 0.7379139729098041, "grad_norm": 1.8528023958206177, "learning_rate": 3.596575897434178e-06, "loss": 1.5088, "step": 13497 }, { "epoch": 0.7379686453535257, "grad_norm": 1.3788039684295654, "learning_rate": 3.5951724302672796e-06, "loss": 1.4939, "step": 13498 }, { "epoch": 0.7380233177972473, "grad_norm": 1.529624581336975, "learning_rate": 3.593769176972607e-06, "loss": 1.3236, "step": 13499 }, { "epoch": 0.7380779902409688, "grad_norm": 1.408488392829895, "learning_rate": 3.5923661375970142e-06, "loss": 1.4749, "step": 13500 }, { "epoch": 0.7381326626846904, "grad_norm": 1.7565381526947021, "learning_rate": 3.590963312187348e-06, "loss": 1.3647, "step": 13501 }, { "epoch": 0.738187335128412, "grad_norm": 1.4143335819244385, "learning_rate": 3.5895607007904597e-06, "loss": 1.3767, "step": 13502 }, { "epoch": 0.7382420075721334, "grad_norm": 1.9343674182891846, "learning_rate": 3.5881583034531832e-06, "loss": 1.4766, "step": 13503 }, { "epoch": 0.738296680015855, "grad_norm": 1.332167148590088, "learning_rate": 3.5867561202223455e-06, "loss": 1.5373, "step": 13504 }, { "epoch": 0.7383513524595766, "grad_norm": 1.206083059310913, "learning_rate": 3.585354151144771e-06, "loss": 1.5104, "step": 13505 }, { "epoch": 0.7384060249032981, "grad_norm": 2.230211019515991, "learning_rate": 3.5839523962672694e-06, "loss": 1.347, "step": 13506 }, { "epoch": 0.7384606973470197, "grad_norm": 1.135600209236145, "learning_rate": 3.5825508556366574e-06, "loss": 1.3798, "step": 13507 }, { "epoch": 0.7385153697907412, "grad_norm": 1.4127624034881592, "learning_rate": 3.581149529299731e-06, "loss": 1.5927, "step": 13508 }, { "epoch": 0.7385700422344628, "grad_norm": 1.892106533050537, "learning_rate": 3.5797484173032806e-06, "loss": 1.4906, "step": 13509 }, { "epoch": 0.7386247146781844, "grad_norm": 1.4667727947235107, "learning_rate": 3.5783475196940997e-06, "loss": 1.3235, "step": 13510 }, { "epoch": 0.7386793871219058, "grad_norm": 1.6838603019714355, "learning_rate": 3.576946836518964e-06, "loss": 1.2958, "step": 13511 }, { "epoch": 0.7387340595656274, "grad_norm": 1.361174464225769, "learning_rate": 3.5755463678246417e-06, "loss": 1.3761, "step": 13512 }, { "epoch": 0.738788732009349, "grad_norm": 1.5248119831085205, "learning_rate": 3.574146113657906e-06, "loss": 1.3077, "step": 13513 }, { "epoch": 0.7388434044530705, "grad_norm": 1.4682247638702393, "learning_rate": 3.5727460740655097e-06, "loss": 1.4418, "step": 13514 }, { "epoch": 0.7388980768967921, "grad_norm": 1.6824880838394165, "learning_rate": 3.5713462490942006e-06, "loss": 1.4765, "step": 13515 }, { "epoch": 0.7389527493405137, "grad_norm": 2.196153402328491, "learning_rate": 3.569946638790729e-06, "loss": 1.4508, "step": 13516 }, { "epoch": 0.7390074217842352, "grad_norm": 1.7618907690048218, "learning_rate": 3.5685472432018274e-06, "loss": 1.5386, "step": 13517 }, { "epoch": 0.7390620942279568, "grad_norm": 1.314444661140442, "learning_rate": 3.567148062374226e-06, "loss": 1.6583, "step": 13518 }, { "epoch": 0.7391167666716784, "grad_norm": 1.7091648578643799, "learning_rate": 3.565749096354645e-06, "loss": 1.231, "step": 13519 }, { "epoch": 0.7391714391153998, "grad_norm": 1.5299899578094482, "learning_rate": 3.5643503451897975e-06, "loss": 1.3441, "step": 13520 }, { "epoch": 0.7392261115591214, "grad_norm": 2.63421368598938, "learning_rate": 3.562951808926397e-06, "loss": 1.6098, "step": 13521 }, { "epoch": 0.7392807840028429, "grad_norm": 1.5225796699523926, "learning_rate": 3.56155348761114e-06, "loss": 1.6428, "step": 13522 }, { "epoch": 0.7393354564465645, "grad_norm": 1.6080015897750854, "learning_rate": 3.5601553812907174e-06, "loss": 1.3653, "step": 13523 }, { "epoch": 0.7393901288902861, "grad_norm": 1.526403546333313, "learning_rate": 3.5587574900118215e-06, "loss": 1.3722, "step": 13524 }, { "epoch": 0.7394448013340076, "grad_norm": 1.561110019683838, "learning_rate": 3.5573598138211284e-06, "loss": 1.3563, "step": 13525 }, { "epoch": 0.7394994737777292, "grad_norm": 1.532820463180542, "learning_rate": 3.5559623527653054e-06, "loss": 1.2565, "step": 13526 }, { "epoch": 0.7395541462214508, "grad_norm": 1.5602478981018066, "learning_rate": 3.5545651068910245e-06, "loss": 1.4015, "step": 13527 }, { "epoch": 0.7396088186651723, "grad_norm": 1.188008189201355, "learning_rate": 3.553168076244938e-06, "loss": 1.5573, "step": 13528 }, { "epoch": 0.7396634911088938, "grad_norm": 1.776179552078247, "learning_rate": 3.5517712608737e-06, "loss": 1.3207, "step": 13529 }, { "epoch": 0.7397181635526154, "grad_norm": 1.3176203966140747, "learning_rate": 3.5503746608239487e-06, "loss": 1.6428, "step": 13530 }, { "epoch": 0.7397728359963369, "grad_norm": 1.5964936017990112, "learning_rate": 3.548978276142323e-06, "loss": 1.4403, "step": 13531 }, { "epoch": 0.7398275084400585, "grad_norm": 1.2579708099365234, "learning_rate": 3.547582106875447e-06, "loss": 1.4692, "step": 13532 }, { "epoch": 0.7398821808837801, "grad_norm": 1.3692007064819336, "learning_rate": 3.546186153069948e-06, "loss": 1.3644, "step": 13533 }, { "epoch": 0.7399368533275016, "grad_norm": 1.9322381019592285, "learning_rate": 3.5447904147724344e-06, "loss": 1.2912, "step": 13534 }, { "epoch": 0.7399915257712232, "grad_norm": 1.4075138568878174, "learning_rate": 3.5433948920295216e-06, "loss": 1.1945, "step": 13535 }, { "epoch": 0.7400461982149447, "grad_norm": 1.5540462732315063, "learning_rate": 3.541999584887802e-06, "loss": 1.411, "step": 13536 }, { "epoch": 0.7401008706586663, "grad_norm": 1.5138949155807495, "learning_rate": 3.5406044933938688e-06, "loss": 1.2328, "step": 13537 }, { "epoch": 0.7401555431023878, "grad_norm": 1.5216350555419922, "learning_rate": 3.5392096175943113e-06, "loss": 1.4655, "step": 13538 }, { "epoch": 0.7402102155461093, "grad_norm": 1.4991062879562378, "learning_rate": 3.5378149575357058e-06, "loss": 1.2767, "step": 13539 }, { "epoch": 0.7402648879898309, "grad_norm": 1.67349112033844, "learning_rate": 3.536420513264619e-06, "loss": 1.4178, "step": 13540 }, { "epoch": 0.7403195604335525, "grad_norm": 1.8078038692474365, "learning_rate": 3.535026284827623e-06, "loss": 1.3079, "step": 13541 }, { "epoch": 0.740374232877274, "grad_norm": 1.5880168676376343, "learning_rate": 3.533632272271269e-06, "loss": 1.1402, "step": 13542 }, { "epoch": 0.7404289053209956, "grad_norm": 1.6088190078735352, "learning_rate": 3.532238475642108e-06, "loss": 1.4189, "step": 13543 }, { "epoch": 0.7404835777647172, "grad_norm": 1.735884428024292, "learning_rate": 3.5308448949866805e-06, "loss": 1.3789, "step": 13544 }, { "epoch": 0.7405382502084387, "grad_norm": 1.4509994983673096, "learning_rate": 3.529451530351522e-06, "loss": 1.3305, "step": 13545 }, { "epoch": 0.7405929226521603, "grad_norm": 1.8176679611206055, "learning_rate": 3.528058381783158e-06, "loss": 1.513, "step": 13546 }, { "epoch": 0.7406475950958818, "grad_norm": 1.3901574611663818, "learning_rate": 3.526665449328115e-06, "loss": 1.6705, "step": 13547 }, { "epoch": 0.7407022675396033, "grad_norm": 1.6666545867919922, "learning_rate": 3.5252727330328996e-06, "loss": 1.4454, "step": 13548 }, { "epoch": 0.7407569399833249, "grad_norm": 1.5832329988479614, "learning_rate": 3.5238802329440234e-06, "loss": 1.3667, "step": 13549 }, { "epoch": 0.7408116124270464, "grad_norm": 1.3959941864013672, "learning_rate": 3.522487949107983e-06, "loss": 1.7597, "step": 13550 }, { "epoch": 0.740866284870768, "grad_norm": 1.4889028072357178, "learning_rate": 3.5210958815712672e-06, "loss": 1.4614, "step": 13551 }, { "epoch": 0.7409209573144896, "grad_norm": 1.4439337253570557, "learning_rate": 3.5197040303803665e-06, "loss": 1.4799, "step": 13552 }, { "epoch": 0.7409756297582111, "grad_norm": 1.624355435371399, "learning_rate": 3.5183123955817545e-06, "loss": 1.435, "step": 13553 }, { "epoch": 0.7410303022019327, "grad_norm": 1.70034658908844, "learning_rate": 3.516920977221898e-06, "loss": 1.2123, "step": 13554 }, { "epoch": 0.7410849746456543, "grad_norm": 1.2722814083099365, "learning_rate": 3.515529775347267e-06, "loss": 1.5166, "step": 13555 }, { "epoch": 0.7411396470893757, "grad_norm": 1.7521774768829346, "learning_rate": 3.514138790004312e-06, "loss": 1.255, "step": 13556 }, { "epoch": 0.7411943195330973, "grad_norm": 1.8585548400878906, "learning_rate": 3.5127480212394836e-06, "loss": 1.2852, "step": 13557 }, { "epoch": 0.7412489919768189, "grad_norm": 1.82469642162323, "learning_rate": 3.5113574690992203e-06, "loss": 1.4236, "step": 13558 }, { "epoch": 0.7413036644205404, "grad_norm": 1.6712270975112915, "learning_rate": 3.509967133629958e-06, "loss": 1.2493, "step": 13559 }, { "epoch": 0.741358336864262, "grad_norm": 1.5236343145370483, "learning_rate": 3.5085770148781195e-06, "loss": 1.4272, "step": 13560 }, { "epoch": 0.7414130093079836, "grad_norm": 1.3542912006378174, "learning_rate": 3.507187112890129e-06, "loss": 1.1785, "step": 13561 }, { "epoch": 0.7414676817517051, "grad_norm": 1.9394100904464722, "learning_rate": 3.505797427712394e-06, "loss": 1.4574, "step": 13562 }, { "epoch": 0.7415223541954267, "grad_norm": 2.5658528804779053, "learning_rate": 3.504407959391326e-06, "loss": 1.1454, "step": 13563 }, { "epoch": 0.7415770266391482, "grad_norm": 1.5081866979599, "learning_rate": 3.503018707973318e-06, "loss": 1.3671, "step": 13564 }, { "epoch": 0.7416316990828697, "grad_norm": 1.4876673221588135, "learning_rate": 3.5016296735047584e-06, "loss": 1.4572, "step": 13565 }, { "epoch": 0.7416863715265913, "grad_norm": 1.3201175928115845, "learning_rate": 3.5002408560320356e-06, "loss": 1.5064, "step": 13566 }, { "epoch": 0.7417410439703128, "grad_norm": 1.7966924905776978, "learning_rate": 3.4988522556015223e-06, "loss": 1.3292, "step": 13567 }, { "epoch": 0.7417957164140344, "grad_norm": 1.564852237701416, "learning_rate": 3.4974638722595887e-06, "loss": 1.4773, "step": 13568 }, { "epoch": 0.741850388857756, "grad_norm": 1.4722715616226196, "learning_rate": 3.496075706052594e-06, "loss": 1.2647, "step": 13569 }, { "epoch": 0.7419050613014775, "grad_norm": 1.3867597579956055, "learning_rate": 3.4946877570268943e-06, "loss": 1.6044, "step": 13570 }, { "epoch": 0.7419597337451991, "grad_norm": 1.8604497909545898, "learning_rate": 3.493300025228832e-06, "loss": 1.2397, "step": 13571 }, { "epoch": 0.7420144061889207, "grad_norm": 1.7271703481674194, "learning_rate": 3.4919125107047537e-06, "loss": 1.5172, "step": 13572 }, { "epoch": 0.7420690786326422, "grad_norm": 1.4127299785614014, "learning_rate": 3.490525213500987e-06, "loss": 1.4169, "step": 13573 }, { "epoch": 0.7421237510763637, "grad_norm": 1.3686453104019165, "learning_rate": 3.4891381336638565e-06, "loss": 1.4439, "step": 13574 }, { "epoch": 0.7421784235200853, "grad_norm": 1.9545707702636719, "learning_rate": 3.4877512712396856e-06, "loss": 1.4619, "step": 13575 }, { "epoch": 0.7422330959638068, "grad_norm": 1.30344820022583, "learning_rate": 3.486364626274776e-06, "loss": 1.2125, "step": 13576 }, { "epoch": 0.7422877684075284, "grad_norm": 1.3317506313323975, "learning_rate": 3.484978198815442e-06, "loss": 1.5396, "step": 13577 }, { "epoch": 0.7423424408512499, "grad_norm": 1.3931316137313843, "learning_rate": 3.483591988907973e-06, "loss": 1.4504, "step": 13578 }, { "epoch": 0.7423971132949715, "grad_norm": 1.5802812576293945, "learning_rate": 3.482205996598654e-06, "loss": 1.461, "step": 13579 }, { "epoch": 0.7424517857386931, "grad_norm": 1.4615793228149414, "learning_rate": 3.480820221933776e-06, "loss": 1.4842, "step": 13580 }, { "epoch": 0.7425064581824146, "grad_norm": 1.4125545024871826, "learning_rate": 3.4794346649596088e-06, "loss": 1.5326, "step": 13581 }, { "epoch": 0.7425611306261362, "grad_norm": 1.6498862504959106, "learning_rate": 3.4780493257224192e-06, "loss": 1.3635, "step": 13582 }, { "epoch": 0.7426158030698577, "grad_norm": 1.5120043754577637, "learning_rate": 3.4766642042684652e-06, "loss": 1.3962, "step": 13583 }, { "epoch": 0.7426704755135792, "grad_norm": 1.476724624633789, "learning_rate": 3.4752793006440024e-06, "loss": 1.5339, "step": 13584 }, { "epoch": 0.7427251479573008, "grad_norm": 1.6825047731399536, "learning_rate": 3.4738946148952703e-06, "loss": 1.527, "step": 13585 }, { "epoch": 0.7427798204010224, "grad_norm": 1.2830976247787476, "learning_rate": 3.472510147068515e-06, "loss": 1.3855, "step": 13586 }, { "epoch": 0.7428344928447439, "grad_norm": 1.1320676803588867, "learning_rate": 3.4711258972099624e-06, "loss": 1.6221, "step": 13587 }, { "epoch": 0.7428891652884655, "grad_norm": 1.4066823720932007, "learning_rate": 3.4697418653658345e-06, "loss": 1.2801, "step": 13588 }, { "epoch": 0.7429438377321871, "grad_norm": 1.6347168684005737, "learning_rate": 3.468358051582352e-06, "loss": 1.5873, "step": 13589 }, { "epoch": 0.7429985101759086, "grad_norm": 1.7929556369781494, "learning_rate": 3.4669744559057173e-06, "loss": 1.3544, "step": 13590 }, { "epoch": 0.7430531826196302, "grad_norm": 1.7498598098754883, "learning_rate": 3.46559107838214e-06, "loss": 1.3372, "step": 13591 }, { "epoch": 0.7431078550633516, "grad_norm": 1.1612576246261597, "learning_rate": 3.4642079190578094e-06, "loss": 1.4362, "step": 13592 }, { "epoch": 0.7431625275070732, "grad_norm": 1.5785152912139893, "learning_rate": 3.4628249779789105e-06, "loss": 1.188, "step": 13593 }, { "epoch": 0.7432171999507948, "grad_norm": 1.654600739479065, "learning_rate": 3.461442255191628e-06, "loss": 1.236, "step": 13594 }, { "epoch": 0.7432718723945163, "grad_norm": 1.3949247598648071, "learning_rate": 3.4600597507421317e-06, "loss": 1.4004, "step": 13595 }, { "epoch": 0.7433265448382379, "grad_norm": 1.6394078731536865, "learning_rate": 3.4586774646765875e-06, "loss": 1.6483, "step": 13596 }, { "epoch": 0.7433812172819595, "grad_norm": 1.7385858297348022, "learning_rate": 3.4572953970411527e-06, "loss": 1.1306, "step": 13597 }, { "epoch": 0.743435889725681, "grad_norm": 2.089045286178589, "learning_rate": 3.4559135478819772e-06, "loss": 1.521, "step": 13598 }, { "epoch": 0.7434905621694026, "grad_norm": 1.5358326435089111, "learning_rate": 3.4545319172452005e-06, "loss": 1.3037, "step": 13599 }, { "epoch": 0.7435452346131242, "grad_norm": 2.08040189743042, "learning_rate": 3.4531505051769665e-06, "loss": 1.3059, "step": 13600 }, { "epoch": 0.7435999070568456, "grad_norm": 1.4381836652755737, "learning_rate": 3.4517693117233995e-06, "loss": 1.6923, "step": 13601 }, { "epoch": 0.7436545795005672, "grad_norm": 1.1990896463394165, "learning_rate": 3.450388336930618e-06, "loss": 1.4545, "step": 13602 }, { "epoch": 0.7437092519442888, "grad_norm": 2.6189894676208496, "learning_rate": 3.449007580844742e-06, "loss": 1.1154, "step": 13603 }, { "epoch": 0.7437639243880103, "grad_norm": 1.5987629890441895, "learning_rate": 3.447627043511872e-06, "loss": 1.3592, "step": 13604 }, { "epoch": 0.7438185968317319, "grad_norm": 1.3633403778076172, "learning_rate": 3.446246724978115e-06, "loss": 1.5922, "step": 13605 }, { "epoch": 0.7438732692754534, "grad_norm": 1.813118577003479, "learning_rate": 3.444866625289558e-06, "loss": 1.1562, "step": 13606 }, { "epoch": 0.743927941719175, "grad_norm": 1.267056941986084, "learning_rate": 3.4434867444922857e-06, "loss": 1.6672, "step": 13607 }, { "epoch": 0.7439826141628966, "grad_norm": 1.4700329303741455, "learning_rate": 3.4421070826323775e-06, "loss": 1.4359, "step": 13608 }, { "epoch": 0.744037286606618, "grad_norm": 1.4213042259216309, "learning_rate": 3.440727639755902e-06, "loss": 1.6237, "step": 13609 }, { "epoch": 0.7440919590503396, "grad_norm": 1.7571675777435303, "learning_rate": 3.4393484159089187e-06, "loss": 1.2901, "step": 13610 }, { "epoch": 0.7441466314940612, "grad_norm": 1.6765058040618896, "learning_rate": 3.4379694111374904e-06, "loss": 1.4384, "step": 13611 }, { "epoch": 0.7442013039377827, "grad_norm": 1.9187114238739014, "learning_rate": 3.4365906254876623e-06, "loss": 1.2658, "step": 13612 }, { "epoch": 0.7442559763815043, "grad_norm": 1.8240877389907837, "learning_rate": 3.4352120590054705e-06, "loss": 1.2175, "step": 13613 }, { "epoch": 0.7443106488252259, "grad_norm": 1.2070815563201904, "learning_rate": 3.433833711736957e-06, "loss": 1.4869, "step": 13614 }, { "epoch": 0.7443653212689474, "grad_norm": 1.4321837425231934, "learning_rate": 3.4324555837281435e-06, "loss": 1.4466, "step": 13615 }, { "epoch": 0.744419993712669, "grad_norm": 1.495457410812378, "learning_rate": 3.431077675025045e-06, "loss": 1.4724, "step": 13616 }, { "epoch": 0.7444746661563906, "grad_norm": 1.1253821849822998, "learning_rate": 3.4296999856736824e-06, "loss": 1.5345, "step": 13617 }, { "epoch": 0.7445293386001121, "grad_norm": 1.4269272089004517, "learning_rate": 3.4283225157200507e-06, "loss": 1.5258, "step": 13618 }, { "epoch": 0.7445840110438336, "grad_norm": 1.4522448778152466, "learning_rate": 3.4269452652101543e-06, "loss": 1.4353, "step": 13619 }, { "epoch": 0.7446386834875551, "grad_norm": 1.704318642616272, "learning_rate": 3.42556823418998e-06, "loss": 1.3417, "step": 13620 }, { "epoch": 0.7446933559312767, "grad_norm": 1.4120241403579712, "learning_rate": 3.4241914227055096e-06, "loss": 1.354, "step": 13621 }, { "epoch": 0.7447480283749983, "grad_norm": 1.8327820301055908, "learning_rate": 3.4228148308027186e-06, "loss": 1.7151, "step": 13622 }, { "epoch": 0.7448027008187198, "grad_norm": 1.576308012008667, "learning_rate": 3.421438458527574e-06, "loss": 1.1646, "step": 13623 }, { "epoch": 0.7448573732624414, "grad_norm": 1.5321438312530518, "learning_rate": 3.4200623059260328e-06, "loss": 1.5216, "step": 13624 }, { "epoch": 0.744912045706163, "grad_norm": 3.599454879760742, "learning_rate": 3.4186863730440554e-06, "loss": 1.285, "step": 13625 }, { "epoch": 0.7449667181498845, "grad_norm": 1.3737690448760986, "learning_rate": 3.417310659927583e-06, "loss": 1.2918, "step": 13626 }, { "epoch": 0.7450213905936061, "grad_norm": 1.611441731452942, "learning_rate": 3.4159351666225515e-06, "loss": 1.669, "step": 13627 }, { "epoch": 0.7450760630373277, "grad_norm": 1.5925129652023315, "learning_rate": 3.414559893174898e-06, "loss": 1.6669, "step": 13628 }, { "epoch": 0.7451307354810491, "grad_norm": 1.8802721500396729, "learning_rate": 3.4131848396305423e-06, "loss": 1.1815, "step": 13629 }, { "epoch": 0.7451854079247707, "grad_norm": 2.0738937854766846, "learning_rate": 3.4118100060353985e-06, "loss": 1.2962, "step": 13630 }, { "epoch": 0.7452400803684923, "grad_norm": 1.7517629861831665, "learning_rate": 3.4104353924353818e-06, "loss": 1.507, "step": 13631 }, { "epoch": 0.7452947528122138, "grad_norm": 1.515252709388733, "learning_rate": 3.4090609988763867e-06, "loss": 1.4763, "step": 13632 }, { "epoch": 0.7453494252559354, "grad_norm": 1.226948857307434, "learning_rate": 3.4076868254043138e-06, "loss": 1.768, "step": 13633 }, { "epoch": 0.7454040976996569, "grad_norm": 1.5745137929916382, "learning_rate": 3.4063128720650475e-06, "loss": 1.6242, "step": 13634 }, { "epoch": 0.7454587701433785, "grad_norm": 1.351421594619751, "learning_rate": 3.4049391389044674e-06, "loss": 1.3723, "step": 13635 }, { "epoch": 0.7455134425871001, "grad_norm": 1.3543399572372437, "learning_rate": 3.4035656259684446e-06, "loss": 1.2532, "step": 13636 }, { "epoch": 0.7455681150308215, "grad_norm": 1.7207005023956299, "learning_rate": 3.402192333302845e-06, "loss": 1.5625, "step": 13637 }, { "epoch": 0.7456227874745431, "grad_norm": 1.4596071243286133, "learning_rate": 3.4008192609535216e-06, "loss": 1.7283, "step": 13638 }, { "epoch": 0.7456774599182647, "grad_norm": 1.083076000213623, "learning_rate": 3.3994464089663327e-06, "loss": 1.6231, "step": 13639 }, { "epoch": 0.7457321323619862, "grad_norm": 1.5638295412063599, "learning_rate": 3.3980737773871163e-06, "loss": 1.4725, "step": 13640 }, { "epoch": 0.7457868048057078, "grad_norm": 1.3534950017929077, "learning_rate": 3.3967013662617053e-06, "loss": 1.3455, "step": 13641 }, { "epoch": 0.7458414772494294, "grad_norm": 1.4822155237197876, "learning_rate": 3.3953291756359354e-06, "loss": 1.529, "step": 13642 }, { "epoch": 0.7458961496931509, "grad_norm": 2.0696473121643066, "learning_rate": 3.3939572055556203e-06, "loss": 1.2939, "step": 13643 }, { "epoch": 0.7459508221368725, "grad_norm": 1.6597447395324707, "learning_rate": 3.392585456066574e-06, "loss": 1.4107, "step": 13644 }, { "epoch": 0.7460054945805941, "grad_norm": 1.9138832092285156, "learning_rate": 3.3912139272146073e-06, "loss": 1.426, "step": 13645 }, { "epoch": 0.7460601670243155, "grad_norm": 1.3622866868972778, "learning_rate": 3.3898426190455147e-06, "loss": 1.2857, "step": 13646 }, { "epoch": 0.7461148394680371, "grad_norm": 1.6666678190231323, "learning_rate": 3.3884715316050886e-06, "loss": 1.472, "step": 13647 }, { "epoch": 0.7461695119117586, "grad_norm": 1.162802815437317, "learning_rate": 3.3871006649391126e-06, "loss": 1.6325, "step": 13648 }, { "epoch": 0.7462241843554802, "grad_norm": 1.3127491474151611, "learning_rate": 3.3857300190933606e-06, "loss": 1.3262, "step": 13649 }, { "epoch": 0.7462788567992018, "grad_norm": 1.372223973274231, "learning_rate": 3.3843595941136065e-06, "loss": 1.4561, "step": 13650 }, { "epoch": 0.7463335292429233, "grad_norm": 1.4721317291259766, "learning_rate": 3.382989390045609e-06, "loss": 1.5091, "step": 13651 }, { "epoch": 0.7463882016866449, "grad_norm": 1.8330332040786743, "learning_rate": 3.3816194069351204e-06, "loss": 1.2832, "step": 13652 }, { "epoch": 0.7464428741303665, "grad_norm": 1.399855613708496, "learning_rate": 3.380249644827894e-06, "loss": 1.2951, "step": 13653 }, { "epoch": 0.746497546574088, "grad_norm": 1.4749071598052979, "learning_rate": 3.378880103769666e-06, "loss": 1.4842, "step": 13654 }, { "epoch": 0.7465522190178095, "grad_norm": 1.7126834392547607, "learning_rate": 3.3775107838061637e-06, "loss": 1.6495, "step": 13655 }, { "epoch": 0.7466068914615311, "grad_norm": 1.5637179613113403, "learning_rate": 3.376141684983121e-06, "loss": 1.1921, "step": 13656 }, { "epoch": 0.7466615639052526, "grad_norm": 1.94596266746521, "learning_rate": 3.3747728073462506e-06, "loss": 1.3631, "step": 13657 }, { "epoch": 0.7467162363489742, "grad_norm": 1.2834396362304688, "learning_rate": 3.3734041509412584e-06, "loss": 1.3369, "step": 13658 }, { "epoch": 0.7467709087926958, "grad_norm": 1.270963191986084, "learning_rate": 3.372035715813856e-06, "loss": 1.4027, "step": 13659 }, { "epoch": 0.7468255812364173, "grad_norm": 1.0542513132095337, "learning_rate": 3.3706675020097335e-06, "loss": 1.6482, "step": 13660 }, { "epoch": 0.7468802536801389, "grad_norm": 2.1488406658172607, "learning_rate": 3.3692995095745796e-06, "loss": 1.3004, "step": 13661 }, { "epoch": 0.7469349261238605, "grad_norm": 1.6281055212020874, "learning_rate": 3.3679317385540744e-06, "loss": 1.2139, "step": 13662 }, { "epoch": 0.746989598567582, "grad_norm": 1.6876896619796753, "learning_rate": 3.366564188993887e-06, "loss": 1.3403, "step": 13663 }, { "epoch": 0.7470442710113036, "grad_norm": 1.3831307888031006, "learning_rate": 3.365196860939691e-06, "loss": 1.5468, "step": 13664 }, { "epoch": 0.747098943455025, "grad_norm": 3.585930109024048, "learning_rate": 3.363829754437141e-06, "loss": 1.1996, "step": 13665 }, { "epoch": 0.7471536158987466, "grad_norm": 1.4678212404251099, "learning_rate": 3.362462869531885e-06, "loss": 1.4568, "step": 13666 }, { "epoch": 0.7472082883424682, "grad_norm": 1.1671239137649536, "learning_rate": 3.361096206269572e-06, "loss": 1.5611, "step": 13667 }, { "epoch": 0.7472629607861897, "grad_norm": 1.580531358718872, "learning_rate": 3.3597297646958348e-06, "loss": 1.4407, "step": 13668 }, { "epoch": 0.7473176332299113, "grad_norm": 1.371770977973938, "learning_rate": 3.3583635448563e-06, "loss": 1.0866, "step": 13669 }, { "epoch": 0.7473723056736329, "grad_norm": 1.5712302923202515, "learning_rate": 3.3569975467965955e-06, "loss": 1.1871, "step": 13670 }, { "epoch": 0.7474269781173544, "grad_norm": 1.8037811517715454, "learning_rate": 3.35563177056233e-06, "loss": 1.2625, "step": 13671 }, { "epoch": 0.747481650561076, "grad_norm": 1.778247356414795, "learning_rate": 3.354266216199108e-06, "loss": 1.4202, "step": 13672 }, { "epoch": 0.7475363230047976, "grad_norm": 1.6226818561553955, "learning_rate": 3.3529008837525355e-06, "loss": 1.3703, "step": 13673 }, { "epoch": 0.747590995448519, "grad_norm": 1.4543037414550781, "learning_rate": 3.3515357732682008e-06, "loss": 1.4146, "step": 13674 }, { "epoch": 0.7476456678922406, "grad_norm": 1.6778147220611572, "learning_rate": 3.350170884791687e-06, "loss": 1.3473, "step": 13675 }, { "epoch": 0.7477003403359622, "grad_norm": 1.6756054162979126, "learning_rate": 3.348806218368571e-06, "loss": 1.3728, "step": 13676 }, { "epoch": 0.7477550127796837, "grad_norm": 1.7617075443267822, "learning_rate": 3.347441774044421e-06, "loss": 1.3215, "step": 13677 }, { "epoch": 0.7478096852234053, "grad_norm": 1.3916724920272827, "learning_rate": 3.3460775518648037e-06, "loss": 1.5928, "step": 13678 }, { "epoch": 0.7478643576671268, "grad_norm": 1.6429704427719116, "learning_rate": 3.3447135518752705e-06, "loss": 1.1328, "step": 13679 }, { "epoch": 0.7479190301108484, "grad_norm": 1.427652359008789, "learning_rate": 3.343349774121366e-06, "loss": 1.5175, "step": 13680 }, { "epoch": 0.74797370255457, "grad_norm": 1.4778786897659302, "learning_rate": 3.3419862186486364e-06, "loss": 1.3452, "step": 13681 }, { "epoch": 0.7480283749982914, "grad_norm": 2.1093289852142334, "learning_rate": 3.34062288550261e-06, "loss": 1.3274, "step": 13682 }, { "epoch": 0.748083047442013, "grad_norm": 1.6169041395187378, "learning_rate": 3.339259774728809e-06, "loss": 1.4695, "step": 13683 }, { "epoch": 0.7481377198857346, "grad_norm": 1.4300767183303833, "learning_rate": 3.337896886372757e-06, "loss": 1.5447, "step": 13684 }, { "epoch": 0.7481923923294561, "grad_norm": 1.9026358127593994, "learning_rate": 3.3365342204799613e-06, "loss": 1.6689, "step": 13685 }, { "epoch": 0.7482470647731777, "grad_norm": 1.5619843006134033, "learning_rate": 3.3351717770959246e-06, "loss": 1.4221, "step": 13686 }, { "epoch": 0.7483017372168993, "grad_norm": 1.7301435470581055, "learning_rate": 3.333809556266142e-06, "loss": 1.3198, "step": 13687 }, { "epoch": 0.7483564096606208, "grad_norm": 1.4740502834320068, "learning_rate": 3.3324475580361005e-06, "loss": 1.3543, "step": 13688 }, { "epoch": 0.7484110821043424, "grad_norm": 1.725497841835022, "learning_rate": 3.3310857824512776e-06, "loss": 1.3043, "step": 13689 }, { "epoch": 0.748465754548064, "grad_norm": 1.2807096242904663, "learning_rate": 3.329724229557153e-06, "loss": 1.5247, "step": 13690 }, { "epoch": 0.7485204269917854, "grad_norm": 1.575558066368103, "learning_rate": 3.3283628993991846e-06, "loss": 1.3931, "step": 13691 }, { "epoch": 0.748575099435507, "grad_norm": 1.9188698530197144, "learning_rate": 3.327001792022839e-06, "loss": 1.2909, "step": 13692 }, { "epoch": 0.7486297718792285, "grad_norm": 1.3057096004486084, "learning_rate": 3.325640907473562e-06, "loss": 1.5761, "step": 13693 }, { "epoch": 0.7486844443229501, "grad_norm": 1.291449785232544, "learning_rate": 3.3242802457967928e-06, "loss": 1.4119, "step": 13694 }, { "epoch": 0.7487391167666717, "grad_norm": 1.3479605913162231, "learning_rate": 3.3229198070379754e-06, "loss": 1.646, "step": 13695 }, { "epoch": 0.7487937892103932, "grad_norm": 1.6369962692260742, "learning_rate": 3.3215595912425336e-06, "loss": 1.4887, "step": 13696 }, { "epoch": 0.7488484616541148, "grad_norm": 1.783353567123413, "learning_rate": 3.3201995984558854e-06, "loss": 1.4469, "step": 13697 }, { "epoch": 0.7489031340978364, "grad_norm": 1.6281707286834717, "learning_rate": 3.3188398287234504e-06, "loss": 1.3994, "step": 13698 }, { "epoch": 0.7489578065415579, "grad_norm": 1.276099443435669, "learning_rate": 3.3174802820906315e-06, "loss": 1.5284, "step": 13699 }, { "epoch": 0.7490124789852795, "grad_norm": 2.190469741821289, "learning_rate": 3.3161209586028265e-06, "loss": 1.4257, "step": 13700 }, { "epoch": 0.749067151429001, "grad_norm": 1.930935263633728, "learning_rate": 3.3147618583054277e-06, "loss": 1.4344, "step": 13701 }, { "epoch": 0.7491218238727225, "grad_norm": 1.687554121017456, "learning_rate": 3.313402981243817e-06, "loss": 1.3408, "step": 13702 }, { "epoch": 0.7491764963164441, "grad_norm": 1.7586299180984497, "learning_rate": 3.3120443274633683e-06, "loss": 1.4499, "step": 13703 }, { "epoch": 0.7492311687601657, "grad_norm": 1.7521345615386963, "learning_rate": 3.310685897009457e-06, "loss": 1.6472, "step": 13704 }, { "epoch": 0.7492858412038872, "grad_norm": 1.4009414911270142, "learning_rate": 3.3093276899274373e-06, "loss": 1.3263, "step": 13705 }, { "epoch": 0.7493405136476088, "grad_norm": 1.7229812145233154, "learning_rate": 3.307969706262669e-06, "loss": 1.4752, "step": 13706 }, { "epoch": 0.7493951860913303, "grad_norm": 1.3958314657211304, "learning_rate": 3.306611946060496e-06, "loss": 1.4587, "step": 13707 }, { "epoch": 0.7494498585350519, "grad_norm": 1.559659481048584, "learning_rate": 3.3052544093662533e-06, "loss": 1.5449, "step": 13708 }, { "epoch": 0.7495045309787735, "grad_norm": 2.231919288635254, "learning_rate": 3.3038970962252793e-06, "loss": 1.2429, "step": 13709 }, { "epoch": 0.7495592034224949, "grad_norm": 1.9746184349060059, "learning_rate": 3.3025400066828926e-06, "loss": 1.544, "step": 13710 }, { "epoch": 0.7496138758662165, "grad_norm": 1.466894507408142, "learning_rate": 3.3011831407844085e-06, "loss": 1.3063, "step": 13711 }, { "epoch": 0.7496685483099381, "grad_norm": 1.9071428775787354, "learning_rate": 3.2998264985751425e-06, "loss": 1.6616, "step": 13712 }, { "epoch": 0.7497232207536596, "grad_norm": 1.8070018291473389, "learning_rate": 3.298470080100392e-06, "loss": 1.4163, "step": 13713 }, { "epoch": 0.7497778931973812, "grad_norm": 1.330928087234497, "learning_rate": 3.2971138854054506e-06, "loss": 1.3915, "step": 13714 }, { "epoch": 0.7498325656411028, "grad_norm": 1.316348671913147, "learning_rate": 3.2957579145356067e-06, "loss": 1.5624, "step": 13715 }, { "epoch": 0.7498872380848243, "grad_norm": 1.898536205291748, "learning_rate": 3.2944021675361372e-06, "loss": 1.2207, "step": 13716 }, { "epoch": 0.7499419105285459, "grad_norm": 1.5090997219085693, "learning_rate": 3.2930466444523112e-06, "loss": 1.4268, "step": 13717 }, { "epoch": 0.7499965829722675, "grad_norm": 1.8553205728530884, "learning_rate": 3.2916913453293984e-06, "loss": 1.4177, "step": 13718 }, { "epoch": 0.7500512554159889, "grad_norm": 1.3892796039581299, "learning_rate": 3.2903362702126516e-06, "loss": 1.2604, "step": 13719 }, { "epoch": 0.7501059278597105, "grad_norm": 1.199763298034668, "learning_rate": 3.2889814191473234e-06, "loss": 1.7247, "step": 13720 }, { "epoch": 0.750160600303432, "grad_norm": 1.5914437770843506, "learning_rate": 3.2876267921786544e-06, "loss": 1.4662, "step": 13721 }, { "epoch": 0.7502152727471536, "grad_norm": 1.3186964988708496, "learning_rate": 3.2862723893518743e-06, "loss": 1.2296, "step": 13722 }, { "epoch": 0.7502699451908752, "grad_norm": 1.683821678161621, "learning_rate": 3.284918210712217e-06, "loss": 1.1262, "step": 13723 }, { "epoch": 0.7503246176345967, "grad_norm": 1.5014830827713013, "learning_rate": 3.2835642563048977e-06, "loss": 1.5973, "step": 13724 }, { "epoch": 0.7503792900783183, "grad_norm": 1.545253872871399, "learning_rate": 3.282210526175128e-06, "loss": 1.3942, "step": 13725 }, { "epoch": 0.7504339625220399, "grad_norm": 1.4291982650756836, "learning_rate": 3.2808570203681135e-06, "loss": 1.5421, "step": 13726 }, { "epoch": 0.7504886349657613, "grad_norm": 1.7969146966934204, "learning_rate": 3.2795037389290498e-06, "loss": 1.3732, "step": 13727 }, { "epoch": 0.7505433074094829, "grad_norm": 1.4965879917144775, "learning_rate": 3.278150681903123e-06, "loss": 1.4824, "step": 13728 }, { "epoch": 0.7505979798532045, "grad_norm": 1.4640697240829468, "learning_rate": 3.2767978493355214e-06, "loss": 1.4182, "step": 13729 }, { "epoch": 0.750652652296926, "grad_norm": 1.7241545915603638, "learning_rate": 3.2754452412714153e-06, "loss": 1.3721, "step": 13730 }, { "epoch": 0.7507073247406476, "grad_norm": 1.599420428276062, "learning_rate": 3.2740928577559705e-06, "loss": 1.4138, "step": 13731 }, { "epoch": 0.7507619971843692, "grad_norm": 1.488205909729004, "learning_rate": 3.2727406988343504e-06, "loss": 1.7127, "step": 13732 }, { "epoch": 0.7508166696280907, "grad_norm": 2.5412755012512207, "learning_rate": 3.271388764551702e-06, "loss": 1.3242, "step": 13733 }, { "epoch": 0.7508713420718123, "grad_norm": 1.2091634273529053, "learning_rate": 3.2700370549531734e-06, "loss": 1.5073, "step": 13734 }, { "epoch": 0.7509260145155338, "grad_norm": 1.5718967914581299, "learning_rate": 3.2686855700839017e-06, "loss": 1.6063, "step": 13735 }, { "epoch": 0.7509806869592554, "grad_norm": 1.4009191989898682, "learning_rate": 3.26733430998901e-06, "loss": 1.4991, "step": 13736 }, { "epoch": 0.7510353594029769, "grad_norm": 1.499403476715088, "learning_rate": 3.2659832747136276e-06, "loss": 1.3713, "step": 13737 }, { "epoch": 0.7510900318466984, "grad_norm": 2.1855592727661133, "learning_rate": 3.264632464302867e-06, "loss": 1.6092, "step": 13738 }, { "epoch": 0.75114470429042, "grad_norm": 1.7625455856323242, "learning_rate": 3.2632818788018317e-06, "loss": 1.3493, "step": 13739 }, { "epoch": 0.7511993767341416, "grad_norm": 2.129417896270752, "learning_rate": 3.2619315182556234e-06, "loss": 1.2185, "step": 13740 }, { "epoch": 0.7512540491778631, "grad_norm": 1.2752453088760376, "learning_rate": 3.2605813827093335e-06, "loss": 1.6051, "step": 13741 }, { "epoch": 0.7513087216215847, "grad_norm": 1.5530290603637695, "learning_rate": 3.259231472208042e-06, "loss": 1.7039, "step": 13742 }, { "epoch": 0.7513633940653063, "grad_norm": 1.6528407335281372, "learning_rate": 3.2578817867968327e-06, "loss": 1.3032, "step": 13743 }, { "epoch": 0.7514180665090278, "grad_norm": 1.5645307302474976, "learning_rate": 3.2565323265207718e-06, "loss": 1.5534, "step": 13744 }, { "epoch": 0.7514727389527494, "grad_norm": 1.5465730428695679, "learning_rate": 3.255183091424916e-06, "loss": 1.5722, "step": 13745 }, { "epoch": 0.751527411396471, "grad_norm": 1.4325956106185913, "learning_rate": 3.253834081554329e-06, "loss": 1.2501, "step": 13746 }, { "epoch": 0.7515820838401924, "grad_norm": 1.7130287885665894, "learning_rate": 3.2524852969540477e-06, "loss": 1.4343, "step": 13747 }, { "epoch": 0.751636756283914, "grad_norm": 1.7148895263671875, "learning_rate": 3.2511367376691194e-06, "loss": 1.3314, "step": 13748 }, { "epoch": 0.7516914287276355, "grad_norm": 1.5225884914398193, "learning_rate": 3.2497884037445726e-06, "loss": 1.4373, "step": 13749 }, { "epoch": 0.7517461011713571, "grad_norm": 1.5137255191802979, "learning_rate": 3.248440295225428e-06, "loss": 1.5114, "step": 13750 }, { "epoch": 0.7518007736150787, "grad_norm": 1.4964587688446045, "learning_rate": 3.2470924121567072e-06, "loss": 1.2456, "step": 13751 }, { "epoch": 0.7518554460588002, "grad_norm": 1.956808090209961, "learning_rate": 3.2457447545834177e-06, "loss": 1.4921, "step": 13752 }, { "epoch": 0.7519101185025218, "grad_norm": 1.727400302886963, "learning_rate": 3.24439732255056e-06, "loss": 1.4508, "step": 13753 }, { "epoch": 0.7519647909462434, "grad_norm": 1.8295550346374512, "learning_rate": 3.243050116103128e-06, "loss": 1.5563, "step": 13754 }, { "epoch": 0.7520194633899648, "grad_norm": 1.3557209968566895, "learning_rate": 3.2417031352861085e-06, "loss": 1.3285, "step": 13755 }, { "epoch": 0.7520741358336864, "grad_norm": 1.839091420173645, "learning_rate": 3.2403563801444772e-06, "loss": 1.6197, "step": 13756 }, { "epoch": 0.752128808277408, "grad_norm": 1.6035319566726685, "learning_rate": 3.2390098507232113e-06, "loss": 1.4148, "step": 13757 }, { "epoch": 0.7521834807211295, "grad_norm": 1.3415647745132446, "learning_rate": 3.2376635470672713e-06, "loss": 1.4776, "step": 13758 }, { "epoch": 0.7522381531648511, "grad_norm": 2.275798797607422, "learning_rate": 3.2363174692216113e-06, "loss": 1.4546, "step": 13759 }, { "epoch": 0.7522928256085727, "grad_norm": 1.4871656894683838, "learning_rate": 3.234971617231185e-06, "loss": 1.3982, "step": 13760 }, { "epoch": 0.7523474980522942, "grad_norm": 1.6274604797363281, "learning_rate": 3.2336259911409283e-06, "loss": 1.4548, "step": 13761 }, { "epoch": 0.7524021704960158, "grad_norm": 1.583625316619873, "learning_rate": 3.23228059099578e-06, "loss": 1.3619, "step": 13762 }, { "epoch": 0.7524568429397372, "grad_norm": 1.5012956857681274, "learning_rate": 3.230935416840665e-06, "loss": 1.3533, "step": 13763 }, { "epoch": 0.7525115153834588, "grad_norm": 1.4197345972061157, "learning_rate": 3.2295904687204995e-06, "loss": 1.2153, "step": 13764 }, { "epoch": 0.7525661878271804, "grad_norm": 1.4477652311325073, "learning_rate": 3.2282457466801962e-06, "loss": 1.1766, "step": 13765 }, { "epoch": 0.7526208602709019, "grad_norm": 1.2675694227218628, "learning_rate": 3.226901250764657e-06, "loss": 1.5229, "step": 13766 }, { "epoch": 0.7526755327146235, "grad_norm": 1.4438453912734985, "learning_rate": 3.225556981018776e-06, "loss": 1.2217, "step": 13767 }, { "epoch": 0.7527302051583451, "grad_norm": 1.3436875343322754, "learning_rate": 3.2242129374874478e-06, "loss": 1.3654, "step": 13768 }, { "epoch": 0.7527848776020666, "grad_norm": 1.8386369943618774, "learning_rate": 3.222869120215548e-06, "loss": 1.5697, "step": 13769 }, { "epoch": 0.7528395500457882, "grad_norm": 1.927699089050293, "learning_rate": 3.2215255292479496e-06, "loss": 1.6468, "step": 13770 }, { "epoch": 0.7528942224895098, "grad_norm": 1.3263012170791626, "learning_rate": 3.2201821646295227e-06, "loss": 1.3801, "step": 13771 }, { "epoch": 0.7529488949332312, "grad_norm": 1.682349681854248, "learning_rate": 3.2188390264051226e-06, "loss": 1.4496, "step": 13772 }, { "epoch": 0.7530035673769528, "grad_norm": 1.7375985383987427, "learning_rate": 3.217496114619596e-06, "loss": 1.2597, "step": 13773 }, { "epoch": 0.7530582398206744, "grad_norm": 1.73342764377594, "learning_rate": 3.2161534293177942e-06, "loss": 1.4346, "step": 13774 }, { "epoch": 0.7531129122643959, "grad_norm": 1.9089289903640747, "learning_rate": 3.2148109705445442e-06, "loss": 1.2132, "step": 13775 }, { "epoch": 0.7531675847081175, "grad_norm": 1.461821436882019, "learning_rate": 3.2134687383446815e-06, "loss": 1.4133, "step": 13776 }, { "epoch": 0.753222257151839, "grad_norm": 1.4043556451797485, "learning_rate": 3.2121267327630222e-06, "loss": 1.4984, "step": 13777 }, { "epoch": 0.7532769295955606, "grad_norm": 1.5735878944396973, "learning_rate": 3.2107849538443802e-06, "loss": 1.3869, "step": 13778 }, { "epoch": 0.7533316020392822, "grad_norm": 1.4362183809280396, "learning_rate": 3.20944340163356e-06, "loss": 1.4375, "step": 13779 }, { "epoch": 0.7533862744830037, "grad_norm": 1.6790763139724731, "learning_rate": 3.208102076175358e-06, "loss": 1.4154, "step": 13780 }, { "epoch": 0.7534409469267253, "grad_norm": 1.399375319480896, "learning_rate": 3.2067609775145625e-06, "loss": 1.4681, "step": 13781 }, { "epoch": 0.7534956193704468, "grad_norm": 1.5409517288208008, "learning_rate": 3.205420105695963e-06, "loss": 1.4094, "step": 13782 }, { "epoch": 0.7535502918141683, "grad_norm": 1.3467038869857788, "learning_rate": 3.20407946076433e-06, "loss": 1.345, "step": 13783 }, { "epoch": 0.7536049642578899, "grad_norm": 1.3640235662460327, "learning_rate": 3.2027390427644267e-06, "loss": 1.2769, "step": 13784 }, { "epoch": 0.7536596367016115, "grad_norm": 1.7876993417739868, "learning_rate": 3.201398851741021e-06, "loss": 1.2469, "step": 13785 }, { "epoch": 0.753714309145333, "grad_norm": 2.0603582859039307, "learning_rate": 3.2000588877388606e-06, "loss": 1.3986, "step": 13786 }, { "epoch": 0.7537689815890546, "grad_norm": 1.4711371660232544, "learning_rate": 3.1987191508026884e-06, "loss": 1.2606, "step": 13787 }, { "epoch": 0.7538236540327762, "grad_norm": 1.343940258026123, "learning_rate": 3.197379640977245e-06, "loss": 1.2598, "step": 13788 }, { "epoch": 0.7538783264764977, "grad_norm": 1.4796950817108154, "learning_rate": 3.1960403583072596e-06, "loss": 1.165, "step": 13789 }, { "epoch": 0.7539329989202193, "grad_norm": 1.8583106994628906, "learning_rate": 3.1947013028374517e-06, "loss": 1.3549, "step": 13790 }, { "epoch": 0.7539876713639407, "grad_norm": 1.7393699884414673, "learning_rate": 3.1933624746125368e-06, "loss": 1.3858, "step": 13791 }, { "epoch": 0.7540423438076623, "grad_norm": 1.4314560890197754, "learning_rate": 3.192023873677218e-06, "loss": 1.3246, "step": 13792 }, { "epoch": 0.7540970162513839, "grad_norm": 1.96035897731781, "learning_rate": 3.1906855000762005e-06, "loss": 1.3199, "step": 13793 }, { "epoch": 0.7541516886951054, "grad_norm": 1.449947476387024, "learning_rate": 3.189347353854173e-06, "loss": 1.5945, "step": 13794 }, { "epoch": 0.754206361138827, "grad_norm": 1.8972961902618408, "learning_rate": 3.1880094350558155e-06, "loss": 1.3917, "step": 13795 }, { "epoch": 0.7542610335825486, "grad_norm": 1.5753962993621826, "learning_rate": 3.186671743725812e-06, "loss": 1.3645, "step": 13796 }, { "epoch": 0.7543157060262701, "grad_norm": 1.5114176273345947, "learning_rate": 3.185334279908826e-06, "loss": 1.4877, "step": 13797 }, { "epoch": 0.7543703784699917, "grad_norm": 1.5964394807815552, "learning_rate": 3.183997043649516e-06, "loss": 1.5235, "step": 13798 }, { "epoch": 0.7544250509137133, "grad_norm": 1.3900210857391357, "learning_rate": 3.1826600349925427e-06, "loss": 1.4401, "step": 13799 }, { "epoch": 0.7544797233574347, "grad_norm": 1.3663345575332642, "learning_rate": 3.181323253982549e-06, "loss": 1.1767, "step": 13800 }, { "epoch": 0.7545343958011563, "grad_norm": 1.4626935720443726, "learning_rate": 3.1799867006641684e-06, "loss": 1.5415, "step": 13801 }, { "epoch": 0.7545890682448779, "grad_norm": 1.684802532196045, "learning_rate": 3.1786503750820384e-06, "loss": 1.5437, "step": 13802 }, { "epoch": 0.7546437406885994, "grad_norm": 1.7226022481918335, "learning_rate": 3.1773142772807796e-06, "loss": 1.3848, "step": 13803 }, { "epoch": 0.754698413132321, "grad_norm": 1.3843212127685547, "learning_rate": 3.175978407305006e-06, "loss": 1.5707, "step": 13804 }, { "epoch": 0.7547530855760425, "grad_norm": 1.6995973587036133, "learning_rate": 3.1746427651993273e-06, "loss": 1.4391, "step": 13805 }, { "epoch": 0.7548077580197641, "grad_norm": 1.5628124475479126, "learning_rate": 3.17330735100834e-06, "loss": 1.3494, "step": 13806 }, { "epoch": 0.7548624304634857, "grad_norm": 1.5511233806610107, "learning_rate": 3.171972164776642e-06, "loss": 1.4709, "step": 13807 }, { "epoch": 0.7549171029072071, "grad_norm": 1.2502201795578003, "learning_rate": 3.1706372065488166e-06, "loss": 1.536, "step": 13808 }, { "epoch": 0.7549717753509287, "grad_norm": 2.276087999343872, "learning_rate": 3.1693024763694368e-06, "loss": 1.6795, "step": 13809 }, { "epoch": 0.7550264477946503, "grad_norm": 1.2885620594024658, "learning_rate": 3.1679679742830806e-06, "loss": 1.512, "step": 13810 }, { "epoch": 0.7550811202383718, "grad_norm": 1.765763759613037, "learning_rate": 3.166633700334304e-06, "loss": 1.3469, "step": 13811 }, { "epoch": 0.7551357926820934, "grad_norm": 1.383729100227356, "learning_rate": 3.1652996545676605e-06, "loss": 1.4637, "step": 13812 }, { "epoch": 0.755190465125815, "grad_norm": 1.268416404724121, "learning_rate": 3.163965837027703e-06, "loss": 1.4791, "step": 13813 }, { "epoch": 0.7552451375695365, "grad_norm": 1.5486681461334229, "learning_rate": 3.1626322477589667e-06, "loss": 1.4864, "step": 13814 }, { "epoch": 0.7552998100132581, "grad_norm": 1.876439094543457, "learning_rate": 3.161298886805981e-06, "loss": 1.367, "step": 13815 }, { "epoch": 0.7553544824569797, "grad_norm": 1.7246284484863281, "learning_rate": 3.159965754213277e-06, "loss": 1.3119, "step": 13816 }, { "epoch": 0.7554091549007012, "grad_norm": 1.5230846405029297, "learning_rate": 3.158632850025367e-06, "loss": 1.5537, "step": 13817 }, { "epoch": 0.7554638273444227, "grad_norm": 1.1778775453567505, "learning_rate": 3.1573001742867594e-06, "loss": 1.278, "step": 13818 }, { "epoch": 0.7555184997881442, "grad_norm": 1.1300848722457886, "learning_rate": 3.1559677270419564e-06, "loss": 1.5452, "step": 13819 }, { "epoch": 0.7555731722318658, "grad_norm": 2.0264298915863037, "learning_rate": 3.1546355083354474e-06, "loss": 1.2024, "step": 13820 }, { "epoch": 0.7556278446755874, "grad_norm": 1.4216713905334473, "learning_rate": 3.1533035182117254e-06, "loss": 1.3923, "step": 13821 }, { "epoch": 0.7556825171193089, "grad_norm": 2.8602652549743652, "learning_rate": 3.151971756715264e-06, "loss": 0.9595, "step": 13822 }, { "epoch": 0.7557371895630305, "grad_norm": 1.6740131378173828, "learning_rate": 3.150640223890533e-06, "loss": 1.4662, "step": 13823 }, { "epoch": 0.7557918620067521, "grad_norm": 1.3547533750534058, "learning_rate": 3.1493089197820015e-06, "loss": 1.4393, "step": 13824 }, { "epoch": 0.7558465344504736, "grad_norm": 1.6620842218399048, "learning_rate": 3.147977844434119e-06, "loss": 1.3327, "step": 13825 }, { "epoch": 0.7559012068941952, "grad_norm": 2.445037841796875, "learning_rate": 3.146646997891333e-06, "loss": 1.1298, "step": 13826 }, { "epoch": 0.7559558793379167, "grad_norm": 1.7147465944290161, "learning_rate": 3.145316380198088e-06, "loss": 1.4454, "step": 13827 }, { "epoch": 0.7560105517816382, "grad_norm": 1.3808529376983643, "learning_rate": 3.143985991398815e-06, "loss": 1.3952, "step": 13828 }, { "epoch": 0.7560652242253598, "grad_norm": 1.38247811794281, "learning_rate": 3.1426558315379375e-06, "loss": 1.6162, "step": 13829 }, { "epoch": 0.7561198966690814, "grad_norm": 1.5495057106018066, "learning_rate": 3.141325900659873e-06, "loss": 1.4619, "step": 13830 }, { "epoch": 0.7561745691128029, "grad_norm": 1.6499334573745728, "learning_rate": 3.139996198809028e-06, "loss": 1.425, "step": 13831 }, { "epoch": 0.7562292415565245, "grad_norm": 1.7150402069091797, "learning_rate": 3.138666726029811e-06, "loss": 1.2961, "step": 13832 }, { "epoch": 0.756283914000246, "grad_norm": 1.3349294662475586, "learning_rate": 3.1373374823666113e-06, "loss": 1.4326, "step": 13833 }, { "epoch": 0.7563385864439676, "grad_norm": 1.4214433431625366, "learning_rate": 3.136008467863815e-06, "loss": 1.3835, "step": 13834 }, { "epoch": 0.7563932588876892, "grad_norm": 1.475632667541504, "learning_rate": 3.1346796825658053e-06, "loss": 1.5561, "step": 13835 }, { "epoch": 0.7564479313314106, "grad_norm": 1.500924825668335, "learning_rate": 3.1333511265169513e-06, "loss": 1.3936, "step": 13836 }, { "epoch": 0.7565026037751322, "grad_norm": 1.5718344449996948, "learning_rate": 3.1320227997616127e-06, "loss": 1.3775, "step": 13837 }, { "epoch": 0.7565572762188538, "grad_norm": 1.4328083992004395, "learning_rate": 3.1306947023441524e-06, "loss": 1.4356, "step": 13838 }, { "epoch": 0.7566119486625753, "grad_norm": 1.6344598531723022, "learning_rate": 3.1293668343089157e-06, "loss": 1.6195, "step": 13839 }, { "epoch": 0.7566666211062969, "grad_norm": 1.4780203104019165, "learning_rate": 3.1280391957002387e-06, "loss": 1.4253, "step": 13840 }, { "epoch": 0.7567212935500185, "grad_norm": 1.307924747467041, "learning_rate": 3.126711786562463e-06, "loss": 1.5582, "step": 13841 }, { "epoch": 0.75677596599374, "grad_norm": 2.4645748138427734, "learning_rate": 3.1253846069399084e-06, "loss": 1.3971, "step": 13842 }, { "epoch": 0.7568306384374616, "grad_norm": 2.175464153289795, "learning_rate": 3.1240576568768943e-06, "loss": 1.4436, "step": 13843 }, { "epoch": 0.7568853108811832, "grad_norm": 1.7605109214782715, "learning_rate": 3.1227309364177293e-06, "loss": 1.5051, "step": 13844 }, { "epoch": 0.7569399833249046, "grad_norm": 1.775769829750061, "learning_rate": 3.121404445606714e-06, "loss": 1.2198, "step": 13845 }, { "epoch": 0.7569946557686262, "grad_norm": 1.5456706285476685, "learning_rate": 3.1200781844881477e-06, "loss": 1.539, "step": 13846 }, { "epoch": 0.7570493282123477, "grad_norm": 1.5354636907577515, "learning_rate": 3.1187521531063146e-06, "loss": 1.4499, "step": 13847 }, { "epoch": 0.7571040006560693, "grad_norm": 1.5382083654403687, "learning_rate": 3.1174263515054927e-06, "loss": 1.6664, "step": 13848 }, { "epoch": 0.7571586730997909, "grad_norm": 1.9808703660964966, "learning_rate": 3.1161007797299583e-06, "loss": 1.7319, "step": 13849 }, { "epoch": 0.7572133455435124, "grad_norm": 1.5331404209136963, "learning_rate": 3.1147754378239716e-06, "loss": 1.1834, "step": 13850 }, { "epoch": 0.757268017987234, "grad_norm": 1.514835238456726, "learning_rate": 3.1134503258317872e-06, "loss": 1.4085, "step": 13851 }, { "epoch": 0.7573226904309556, "grad_norm": 1.7730833292007446, "learning_rate": 3.112125443797659e-06, "loss": 1.2044, "step": 13852 }, { "epoch": 0.757377362874677, "grad_norm": 2.4545085430145264, "learning_rate": 3.1108007917658257e-06, "loss": 1.1917, "step": 13853 }, { "epoch": 0.7574320353183986, "grad_norm": 1.488092064857483, "learning_rate": 3.1094763697805165e-06, "loss": 1.4086, "step": 13854 }, { "epoch": 0.7574867077621202, "grad_norm": 1.762419581413269, "learning_rate": 3.1081521778859624e-06, "loss": 1.2888, "step": 13855 }, { "epoch": 0.7575413802058417, "grad_norm": 1.6495901346206665, "learning_rate": 3.1068282161263806e-06, "loss": 1.2101, "step": 13856 }, { "epoch": 0.7575960526495633, "grad_norm": 1.3901093006134033, "learning_rate": 3.1055044845459804e-06, "loss": 1.4042, "step": 13857 }, { "epoch": 0.7576507250932849, "grad_norm": 1.5293233394622803, "learning_rate": 3.1041809831889637e-06, "loss": 1.3792, "step": 13858 }, { "epoch": 0.7577053975370064, "grad_norm": 1.443127155303955, "learning_rate": 3.1028577120995216e-06, "loss": 1.6189, "step": 13859 }, { "epoch": 0.757760069980728, "grad_norm": 1.5028696060180664, "learning_rate": 3.1015346713218488e-06, "loss": 1.3385, "step": 13860 }, { "epoch": 0.7578147424244496, "grad_norm": 1.190799355506897, "learning_rate": 3.100211860900121e-06, "loss": 1.6518, "step": 13861 }, { "epoch": 0.757869414868171, "grad_norm": 2.427645683288574, "learning_rate": 3.0988892808785063e-06, "loss": 1.2568, "step": 13862 }, { "epoch": 0.7579240873118926, "grad_norm": 1.3436048030853271, "learning_rate": 3.0975669313011768e-06, "loss": 1.5135, "step": 13863 }, { "epoch": 0.7579787597556141, "grad_norm": 1.5694515705108643, "learning_rate": 3.0962448122122834e-06, "loss": 1.3616, "step": 13864 }, { "epoch": 0.7580334321993357, "grad_norm": 1.713532567024231, "learning_rate": 3.094922923655973e-06, "loss": 1.6921, "step": 13865 }, { "epoch": 0.7580881046430573, "grad_norm": 1.7176258563995361, "learning_rate": 3.0936012656763937e-06, "loss": 1.5117, "step": 13866 }, { "epoch": 0.7581427770867788, "grad_norm": 1.3612267971038818, "learning_rate": 3.0922798383176733e-06, "loss": 1.5742, "step": 13867 }, { "epoch": 0.7581974495305004, "grad_norm": 1.8901361227035522, "learning_rate": 3.090958641623939e-06, "loss": 1.4344, "step": 13868 }, { "epoch": 0.758252121974222, "grad_norm": 1.433823823928833, "learning_rate": 3.0896376756393074e-06, "loss": 1.4761, "step": 13869 }, { "epoch": 0.7583067944179435, "grad_norm": 1.504874348640442, "learning_rate": 3.0883169404078906e-06, "loss": 1.2798, "step": 13870 }, { "epoch": 0.7583614668616651, "grad_norm": 2.1789774894714355, "learning_rate": 3.086996435973787e-06, "loss": 1.4731, "step": 13871 }, { "epoch": 0.7584161393053866, "grad_norm": 2.5592923164367676, "learning_rate": 3.085676162381096e-06, "loss": 1.3684, "step": 13872 }, { "epoch": 0.7584708117491081, "grad_norm": 1.3984644412994385, "learning_rate": 3.0843561196739013e-06, "loss": 1.372, "step": 13873 }, { "epoch": 0.7585254841928297, "grad_norm": 1.5207233428955078, "learning_rate": 3.0830363078962854e-06, "loss": 1.3541, "step": 13874 }, { "epoch": 0.7585801566365513, "grad_norm": 1.3756356239318848, "learning_rate": 3.0817167270923197e-06, "loss": 1.2896, "step": 13875 }, { "epoch": 0.7586348290802728, "grad_norm": 1.5742297172546387, "learning_rate": 3.0803973773060634e-06, "loss": 1.4467, "step": 13876 }, { "epoch": 0.7586895015239944, "grad_norm": 3.085265636444092, "learning_rate": 3.079078258581579e-06, "loss": 1.3026, "step": 13877 }, { "epoch": 0.7587441739677159, "grad_norm": 1.3932430744171143, "learning_rate": 3.0777593709629115e-06, "loss": 1.3187, "step": 13878 }, { "epoch": 0.7587988464114375, "grad_norm": 1.3779101371765137, "learning_rate": 3.0764407144941e-06, "loss": 1.5174, "step": 13879 }, { "epoch": 0.7588535188551591, "grad_norm": 1.5357142686843872, "learning_rate": 3.075122289219181e-06, "loss": 1.3678, "step": 13880 }, { "epoch": 0.7589081912988805, "grad_norm": 1.8892511129379272, "learning_rate": 3.07380409518218e-06, "loss": 1.3254, "step": 13881 }, { "epoch": 0.7589628637426021, "grad_norm": 1.5257424116134644, "learning_rate": 3.0724861324271137e-06, "loss": 1.2992, "step": 13882 }, { "epoch": 0.7590175361863237, "grad_norm": 1.45597243309021, "learning_rate": 3.0711684009979904e-06, "loss": 1.3282, "step": 13883 }, { "epoch": 0.7590722086300452, "grad_norm": 2.140176296234131, "learning_rate": 3.0698509009388134e-06, "loss": 1.4397, "step": 13884 }, { "epoch": 0.7591268810737668, "grad_norm": 1.441058874130249, "learning_rate": 3.068533632293573e-06, "loss": 1.3493, "step": 13885 }, { "epoch": 0.7591815535174884, "grad_norm": 1.6173263788223267, "learning_rate": 3.067216595106264e-06, "loss": 1.1898, "step": 13886 }, { "epoch": 0.7592362259612099, "grad_norm": 1.6529797315597534, "learning_rate": 3.0658997894208573e-06, "loss": 1.4102, "step": 13887 }, { "epoch": 0.7592908984049315, "grad_norm": 1.376926064491272, "learning_rate": 3.0645832152813315e-06, "loss": 1.826, "step": 13888 }, { "epoch": 0.7593455708486531, "grad_norm": 1.7085126638412476, "learning_rate": 3.063266872731646e-06, "loss": 1.2721, "step": 13889 }, { "epoch": 0.7594002432923745, "grad_norm": 1.578748106956482, "learning_rate": 3.061950761815755e-06, "loss": 1.3389, "step": 13890 }, { "epoch": 0.7594549157360961, "grad_norm": 1.6261868476867676, "learning_rate": 3.060634882577612e-06, "loss": 1.4422, "step": 13891 }, { "epoch": 0.7595095881798176, "grad_norm": 1.5148292779922485, "learning_rate": 3.0593192350611533e-06, "loss": 1.5542, "step": 13892 }, { "epoch": 0.7595642606235392, "grad_norm": 1.4621049165725708, "learning_rate": 3.058003819310309e-06, "loss": 1.4551, "step": 13893 }, { "epoch": 0.7596189330672608, "grad_norm": 1.6451842784881592, "learning_rate": 3.0566886353690106e-06, "loss": 1.135, "step": 13894 }, { "epoch": 0.7596736055109823, "grad_norm": 1.300235390663147, "learning_rate": 3.055373683281171e-06, "loss": 1.3179, "step": 13895 }, { "epoch": 0.7597282779547039, "grad_norm": 1.7534313201904297, "learning_rate": 3.0540589630907016e-06, "loss": 1.6276, "step": 13896 }, { "epoch": 0.7597829503984255, "grad_norm": 1.3543646335601807, "learning_rate": 3.0527444748415016e-06, "loss": 1.5893, "step": 13897 }, { "epoch": 0.759837622842147, "grad_norm": 1.6270142793655396, "learning_rate": 3.051430218577466e-06, "loss": 1.509, "step": 13898 }, { "epoch": 0.7598922952858685, "grad_norm": 1.9058738946914673, "learning_rate": 3.050116194342476e-06, "loss": 1.5005, "step": 13899 }, { "epoch": 0.7599469677295901, "grad_norm": 1.4361294507980347, "learning_rate": 3.0488024021804197e-06, "loss": 1.3683, "step": 13900 }, { "epoch": 0.7600016401733116, "grad_norm": 1.824150800704956, "learning_rate": 3.047488842135159e-06, "loss": 1.7857, "step": 13901 }, { "epoch": 0.7600563126170332, "grad_norm": 1.275162696838379, "learning_rate": 3.0461755142505643e-06, "loss": 1.6772, "step": 13902 }, { "epoch": 0.7601109850607548, "grad_norm": 1.38834810256958, "learning_rate": 3.0448624185704857e-06, "loss": 1.3122, "step": 13903 }, { "epoch": 0.7601656575044763, "grad_norm": 1.6453479528427124, "learning_rate": 3.0435495551387694e-06, "loss": 1.2568, "step": 13904 }, { "epoch": 0.7602203299481979, "grad_norm": 2.2054240703582764, "learning_rate": 3.04223692399926e-06, "loss": 1.1896, "step": 13905 }, { "epoch": 0.7602750023919194, "grad_norm": 1.7720366716384888, "learning_rate": 3.0409245251957865e-06, "loss": 1.4132, "step": 13906 }, { "epoch": 0.760329674835641, "grad_norm": 1.2432547807693481, "learning_rate": 3.0396123587721737e-06, "loss": 1.421, "step": 13907 }, { "epoch": 0.7603843472793625, "grad_norm": 0.9788808226585388, "learning_rate": 3.038300424772237e-06, "loss": 1.6007, "step": 13908 }, { "epoch": 0.760439019723084, "grad_norm": 1.9687689542770386, "learning_rate": 3.0369887232397855e-06, "loss": 1.2876, "step": 13909 }, { "epoch": 0.7604936921668056, "grad_norm": 1.4101406335830688, "learning_rate": 3.0356772542186165e-06, "loss": 1.4461, "step": 13910 }, { "epoch": 0.7605483646105272, "grad_norm": 1.7537654638290405, "learning_rate": 3.034366017752528e-06, "loss": 1.4405, "step": 13911 }, { "epoch": 0.7606030370542487, "grad_norm": 1.8192473649978638, "learning_rate": 3.0330550138853053e-06, "loss": 1.6213, "step": 13912 }, { "epoch": 0.7606577094979703, "grad_norm": 1.8274519443511963, "learning_rate": 3.0317442426607203e-06, "loss": 1.8443, "step": 13913 }, { "epoch": 0.7607123819416919, "grad_norm": 1.7082507610321045, "learning_rate": 3.0304337041225497e-06, "loss": 1.5548, "step": 13914 }, { "epoch": 0.7607670543854134, "grad_norm": 1.3885447978973389, "learning_rate": 3.0291233983145494e-06, "loss": 1.477, "step": 13915 }, { "epoch": 0.760821726829135, "grad_norm": 1.4254634380340576, "learning_rate": 3.0278133252804797e-06, "loss": 1.4887, "step": 13916 }, { "epoch": 0.7608763992728566, "grad_norm": 2.508354902267456, "learning_rate": 3.026503485064084e-06, "loss": 1.3426, "step": 13917 }, { "epoch": 0.760931071716578, "grad_norm": 1.513440728187561, "learning_rate": 3.0251938777090974e-06, "loss": 1.4877, "step": 13918 }, { "epoch": 0.7609857441602996, "grad_norm": 1.9391037225723267, "learning_rate": 3.0238845032592566e-06, "loss": 1.8081, "step": 13919 }, { "epoch": 0.7610404166040211, "grad_norm": 1.7420790195465088, "learning_rate": 3.0225753617582833e-06, "loss": 1.3134, "step": 13920 }, { "epoch": 0.7610950890477427, "grad_norm": 1.6496714353561401, "learning_rate": 3.0212664532498903e-06, "loss": 1.4777, "step": 13921 }, { "epoch": 0.7611497614914643, "grad_norm": 1.5126761198043823, "learning_rate": 3.019957777777788e-06, "loss": 1.3131, "step": 13922 }, { "epoch": 0.7612044339351858, "grad_norm": 1.8445721864700317, "learning_rate": 3.0186493353856737e-06, "loss": 1.3161, "step": 13923 }, { "epoch": 0.7612591063789074, "grad_norm": 1.4935287237167358, "learning_rate": 3.017341126117238e-06, "loss": 1.4548, "step": 13924 }, { "epoch": 0.761313778822629, "grad_norm": 1.4713528156280518, "learning_rate": 3.01603315001617e-06, "loss": 1.2732, "step": 13925 }, { "epoch": 0.7613684512663504, "grad_norm": 1.4802122116088867, "learning_rate": 3.014725407126143e-06, "loss": 1.3997, "step": 13926 }, { "epoch": 0.761423123710072, "grad_norm": 1.6971131563186646, "learning_rate": 3.0134178974908237e-06, "loss": 1.4438, "step": 13927 }, { "epoch": 0.7614777961537936, "grad_norm": 1.508750557899475, "learning_rate": 3.0121106211538786e-06, "loss": 1.6348, "step": 13928 }, { "epoch": 0.7615324685975151, "grad_norm": 1.8909659385681152, "learning_rate": 3.010803578158954e-06, "loss": 1.2819, "step": 13929 }, { "epoch": 0.7615871410412367, "grad_norm": 1.5211549997329712, "learning_rate": 3.0094967685497022e-06, "loss": 1.5975, "step": 13930 }, { "epoch": 0.7616418134849583, "grad_norm": 1.4736042022705078, "learning_rate": 3.0081901923697564e-06, "loss": 1.2845, "step": 13931 }, { "epoch": 0.7616964859286798, "grad_norm": 1.6034784317016602, "learning_rate": 3.006883849662744e-06, "loss": 1.4267, "step": 13932 }, { "epoch": 0.7617511583724014, "grad_norm": 1.59469473361969, "learning_rate": 3.005577740472293e-06, "loss": 1.2164, "step": 13933 }, { "epoch": 0.7618058308161229, "grad_norm": 2.092582941055298, "learning_rate": 3.0042718648420145e-06, "loss": 1.3544, "step": 13934 }, { "epoch": 0.7618605032598444, "grad_norm": 2.832568883895874, "learning_rate": 3.002966222815513e-06, "loss": 1.0839, "step": 13935 }, { "epoch": 0.761915175703566, "grad_norm": 1.4276665449142456, "learning_rate": 3.00166081443639e-06, "loss": 1.4252, "step": 13936 }, { "epoch": 0.7619698481472875, "grad_norm": 1.6599271297454834, "learning_rate": 3.0003556397482336e-06, "loss": 1.3739, "step": 13937 }, { "epoch": 0.7620245205910091, "grad_norm": 1.178638219833374, "learning_rate": 2.9990506987946244e-06, "loss": 1.7363, "step": 13938 }, { "epoch": 0.7620791930347307, "grad_norm": 1.532500982284546, "learning_rate": 2.9977459916191444e-06, "loss": 1.5477, "step": 13939 }, { "epoch": 0.7621338654784522, "grad_norm": 1.395026683807373, "learning_rate": 2.9964415182653562e-06, "loss": 1.5806, "step": 13940 }, { "epoch": 0.7621885379221738, "grad_norm": 1.3864721059799194, "learning_rate": 2.9951372787768176e-06, "loss": 1.8174, "step": 13941 }, { "epoch": 0.7622432103658954, "grad_norm": 1.3843066692352295, "learning_rate": 2.9938332731970854e-06, "loss": 1.5758, "step": 13942 }, { "epoch": 0.7622978828096169, "grad_norm": 1.7773356437683105, "learning_rate": 2.9925295015696978e-06, "loss": 1.2872, "step": 13943 }, { "epoch": 0.7623525552533384, "grad_norm": 1.4504841566085815, "learning_rate": 2.9912259639381967e-06, "loss": 1.4799, "step": 13944 }, { "epoch": 0.76240722769706, "grad_norm": 1.4946502447128296, "learning_rate": 2.9899226603461074e-06, "loss": 1.3626, "step": 13945 }, { "epoch": 0.7624619001407815, "grad_norm": 1.3076142072677612, "learning_rate": 2.988619590836951e-06, "loss": 1.373, "step": 13946 }, { "epoch": 0.7625165725845031, "grad_norm": 1.403079867362976, "learning_rate": 2.987316755454238e-06, "loss": 1.3507, "step": 13947 }, { "epoch": 0.7625712450282246, "grad_norm": 1.5058166980743408, "learning_rate": 2.9860141542414745e-06, "loss": 1.5175, "step": 13948 }, { "epoch": 0.7626259174719462, "grad_norm": 1.2548092603683472, "learning_rate": 2.9847117872421537e-06, "loss": 1.5831, "step": 13949 }, { "epoch": 0.7626805899156678, "grad_norm": 1.156008005142212, "learning_rate": 2.9834096544997725e-06, "loss": 1.2495, "step": 13950 }, { "epoch": 0.7627352623593893, "grad_norm": 1.467136025428772, "learning_rate": 2.982107756057807e-06, "loss": 1.5939, "step": 13951 }, { "epoch": 0.7627899348031109, "grad_norm": 1.3158364295959473, "learning_rate": 2.9808060919597282e-06, "loss": 1.131, "step": 13952 }, { "epoch": 0.7628446072468325, "grad_norm": 1.2691236734390259, "learning_rate": 2.979504662249009e-06, "loss": 1.5021, "step": 13953 }, { "epoch": 0.7628992796905539, "grad_norm": 1.457353115081787, "learning_rate": 2.978203466969103e-06, "loss": 1.2304, "step": 13954 }, { "epoch": 0.7629539521342755, "grad_norm": 1.6132675409317017, "learning_rate": 2.9769025061634573e-06, "loss": 1.4766, "step": 13955 }, { "epoch": 0.7630086245779971, "grad_norm": 1.407851219177246, "learning_rate": 2.97560177987552e-06, "loss": 1.5388, "step": 13956 }, { "epoch": 0.7630632970217186, "grad_norm": 1.665766716003418, "learning_rate": 2.9743012881487187e-06, "loss": 1.5325, "step": 13957 }, { "epoch": 0.7631179694654402, "grad_norm": 1.5782042741775513, "learning_rate": 2.9730010310264878e-06, "loss": 1.4642, "step": 13958 }, { "epoch": 0.7631726419091618, "grad_norm": 2.0485973358154297, "learning_rate": 2.9717010085522415e-06, "loss": 1.5436, "step": 13959 }, { "epoch": 0.7632273143528833, "grad_norm": 1.4791382551193237, "learning_rate": 2.970401220769391e-06, "loss": 1.701, "step": 13960 }, { "epoch": 0.7632819867966049, "grad_norm": 1.6607012748718262, "learning_rate": 2.969101667721339e-06, "loss": 1.6648, "step": 13961 }, { "epoch": 0.7633366592403263, "grad_norm": 1.3546513319015503, "learning_rate": 2.967802349451482e-06, "loss": 1.4928, "step": 13962 }, { "epoch": 0.7633913316840479, "grad_norm": 1.6609328985214233, "learning_rate": 2.966503266003201e-06, "loss": 1.1589, "step": 13963 }, { "epoch": 0.7634460041277695, "grad_norm": 1.5850739479064941, "learning_rate": 2.965204417419886e-06, "loss": 1.5987, "step": 13964 }, { "epoch": 0.763500676571491, "grad_norm": 1.5748035907745361, "learning_rate": 2.9639058037449008e-06, "loss": 1.3924, "step": 13965 }, { "epoch": 0.7635553490152126, "grad_norm": 1.455334186553955, "learning_rate": 2.96260742502161e-06, "loss": 1.3703, "step": 13966 }, { "epoch": 0.7636100214589342, "grad_norm": 1.4884940385818481, "learning_rate": 2.961309281293374e-06, "loss": 1.4227, "step": 13967 }, { "epoch": 0.7636646939026557, "grad_norm": 1.8690909147262573, "learning_rate": 2.9600113726035374e-06, "loss": 1.2831, "step": 13968 }, { "epoch": 0.7637193663463773, "grad_norm": 1.3902696371078491, "learning_rate": 2.958713698995438e-06, "loss": 1.5071, "step": 13969 }, { "epoch": 0.7637740387900989, "grad_norm": 1.5610462427139282, "learning_rate": 2.9574162605124147e-06, "loss": 1.5987, "step": 13970 }, { "epoch": 0.7638287112338203, "grad_norm": 1.5192023515701294, "learning_rate": 2.956119057197785e-06, "loss": 1.5922, "step": 13971 }, { "epoch": 0.7638833836775419, "grad_norm": 1.9318122863769531, "learning_rate": 2.9548220890948707e-06, "loss": 1.4096, "step": 13972 }, { "epoch": 0.7639380561212635, "grad_norm": 1.6352468729019165, "learning_rate": 2.953525356246981e-06, "loss": 1.6081, "step": 13973 }, { "epoch": 0.763992728564985, "grad_norm": 1.5122662782669067, "learning_rate": 2.9522288586974136e-06, "loss": 1.4461, "step": 13974 }, { "epoch": 0.7640474010087066, "grad_norm": 1.6554105281829834, "learning_rate": 2.950932596489463e-06, "loss": 1.305, "step": 13975 }, { "epoch": 0.7641020734524281, "grad_norm": 1.9175711870193481, "learning_rate": 2.9496365696664143e-06, "loss": 1.4182, "step": 13976 }, { "epoch": 0.7641567458961497, "grad_norm": 1.1184016466140747, "learning_rate": 2.948340778271541e-06, "loss": 1.5591, "step": 13977 }, { "epoch": 0.7642114183398713, "grad_norm": 1.6203097105026245, "learning_rate": 2.9470452223481206e-06, "loss": 1.1552, "step": 13978 }, { "epoch": 0.7642660907835928, "grad_norm": 1.9191956520080566, "learning_rate": 2.9457499019394088e-06, "loss": 1.561, "step": 13979 }, { "epoch": 0.7643207632273143, "grad_norm": 1.4146026372909546, "learning_rate": 2.9444548170886588e-06, "loss": 1.5564, "step": 13980 }, { "epoch": 0.7643754356710359, "grad_norm": 1.9874544143676758, "learning_rate": 2.943159967839122e-06, "loss": 1.5169, "step": 13981 }, { "epoch": 0.7644301081147574, "grad_norm": 1.4092237949371338, "learning_rate": 2.9418653542340336e-06, "loss": 1.6706, "step": 13982 }, { "epoch": 0.764484780558479, "grad_norm": 1.3957122564315796, "learning_rate": 2.94057097631662e-06, "loss": 1.3096, "step": 13983 }, { "epoch": 0.7645394530022006, "grad_norm": 1.5791709423065186, "learning_rate": 2.93927683413011e-06, "loss": 1.6407, "step": 13984 }, { "epoch": 0.7645941254459221, "grad_norm": 1.456817865371704, "learning_rate": 2.9379829277177152e-06, "loss": 1.3679, "step": 13985 }, { "epoch": 0.7646487978896437, "grad_norm": 1.8079811334609985, "learning_rate": 2.9366892571226424e-06, "loss": 1.6179, "step": 13986 }, { "epoch": 0.7647034703333653, "grad_norm": 1.236598253250122, "learning_rate": 2.9353958223880895e-06, "loss": 1.4638, "step": 13987 }, { "epoch": 0.7647581427770868, "grad_norm": 1.4840863943099976, "learning_rate": 2.9341026235572446e-06, "loss": 1.4494, "step": 13988 }, { "epoch": 0.7648128152208084, "grad_norm": 1.4141664505004883, "learning_rate": 2.932809660673297e-06, "loss": 1.5299, "step": 13989 }, { "epoch": 0.7648674876645298, "grad_norm": 1.3778928518295288, "learning_rate": 2.9315169337794183e-06, "loss": 1.4083, "step": 13990 }, { "epoch": 0.7649221601082514, "grad_norm": 1.5392026901245117, "learning_rate": 2.9302244429187723e-06, "loss": 1.3624, "step": 13991 }, { "epoch": 0.764976832551973, "grad_norm": 1.6928104162216187, "learning_rate": 2.9289321881345257e-06, "loss": 1.4856, "step": 13992 }, { "epoch": 0.7650315049956945, "grad_norm": 1.5586090087890625, "learning_rate": 2.9276401694698255e-06, "loss": 1.5235, "step": 13993 }, { "epoch": 0.7650861774394161, "grad_norm": 1.2702770233154297, "learning_rate": 2.9263483869678133e-06, "loss": 1.5246, "step": 13994 }, { "epoch": 0.7651408498831377, "grad_norm": 1.7450920343399048, "learning_rate": 2.9250568406716305e-06, "loss": 1.3099, "step": 13995 }, { "epoch": 0.7651955223268592, "grad_norm": 2.171797037124634, "learning_rate": 2.9237655306244017e-06, "loss": 1.4996, "step": 13996 }, { "epoch": 0.7652501947705808, "grad_norm": 1.5354247093200684, "learning_rate": 2.922474456869243e-06, "loss": 1.4165, "step": 13997 }, { "epoch": 0.7653048672143024, "grad_norm": 1.6495929956436157, "learning_rate": 2.921183619449274e-06, "loss": 1.3806, "step": 13998 }, { "epoch": 0.7653595396580238, "grad_norm": 1.5425199270248413, "learning_rate": 2.9198930184075944e-06, "loss": 1.5567, "step": 13999 }, { "epoch": 0.7654142121017454, "grad_norm": 1.600356936454773, "learning_rate": 2.9186026537873003e-06, "loss": 1.3365, "step": 14000 }, { "epoch": 0.765468884545467, "grad_norm": 1.2663168907165527, "learning_rate": 2.9173125256314817e-06, "loss": 1.5899, "step": 14001 }, { "epoch": 0.7655235569891885, "grad_norm": 1.402220606803894, "learning_rate": 2.916022633983214e-06, "loss": 1.6762, "step": 14002 }, { "epoch": 0.7655782294329101, "grad_norm": 1.606041669845581, "learning_rate": 2.914732978885577e-06, "loss": 1.373, "step": 14003 }, { "epoch": 0.7656329018766316, "grad_norm": 1.3927830457687378, "learning_rate": 2.9134435603816324e-06, "loss": 1.5875, "step": 14004 }, { "epoch": 0.7656875743203532, "grad_norm": 1.7539349794387817, "learning_rate": 2.9121543785144333e-06, "loss": 1.3232, "step": 14005 }, { "epoch": 0.7657422467640748, "grad_norm": 1.7325466871261597, "learning_rate": 2.9108654333270346e-06, "loss": 1.1217, "step": 14006 }, { "epoch": 0.7657969192077962, "grad_norm": 1.4492131471633911, "learning_rate": 2.909576724862474e-06, "loss": 1.4378, "step": 14007 }, { "epoch": 0.7658515916515178, "grad_norm": 1.5985273122787476, "learning_rate": 2.9082882531637813e-06, "loss": 1.4241, "step": 14008 }, { "epoch": 0.7659062640952394, "grad_norm": 2.1525986194610596, "learning_rate": 2.9070000182739886e-06, "loss": 1.4255, "step": 14009 }, { "epoch": 0.7659609365389609, "grad_norm": 1.775611400604248, "learning_rate": 2.90571202023611e-06, "loss": 1.2308, "step": 14010 }, { "epoch": 0.7660156089826825, "grad_norm": 1.291718602180481, "learning_rate": 2.904424259093154e-06, "loss": 1.4697, "step": 14011 }, { "epoch": 0.7660702814264041, "grad_norm": 1.6725127696990967, "learning_rate": 2.9031367348881224e-06, "loss": 1.3638, "step": 14012 }, { "epoch": 0.7661249538701256, "grad_norm": 1.9187532663345337, "learning_rate": 2.901849447664008e-06, "loss": 1.2995, "step": 14013 }, { "epoch": 0.7661796263138472, "grad_norm": 1.455527901649475, "learning_rate": 2.9005623974637943e-06, "loss": 1.4379, "step": 14014 }, { "epoch": 0.7662342987575688, "grad_norm": 2.0410594940185547, "learning_rate": 2.8992755843304643e-06, "loss": 1.4252, "step": 14015 }, { "epoch": 0.7662889712012902, "grad_norm": 1.2398964166641235, "learning_rate": 2.8979890083069817e-06, "loss": 1.4692, "step": 14016 }, { "epoch": 0.7663436436450118, "grad_norm": 1.5979645252227783, "learning_rate": 2.8967026694363156e-06, "loss": 1.4959, "step": 14017 }, { "epoch": 0.7663983160887333, "grad_norm": 1.3164504766464233, "learning_rate": 2.8954165677614143e-06, "loss": 1.335, "step": 14018 }, { "epoch": 0.7664529885324549, "grad_norm": 1.5666167736053467, "learning_rate": 2.894130703325223e-06, "loss": 1.3981, "step": 14019 }, { "epoch": 0.7665076609761765, "grad_norm": 1.3209196329116821, "learning_rate": 2.892845076170685e-06, "loss": 1.5493, "step": 14020 }, { "epoch": 0.766562333419898, "grad_norm": 1.2231014966964722, "learning_rate": 2.891559686340727e-06, "loss": 1.5727, "step": 14021 }, { "epoch": 0.7666170058636196, "grad_norm": 1.4084205627441406, "learning_rate": 2.890274533878269e-06, "loss": 1.3805, "step": 14022 }, { "epoch": 0.7666716783073412, "grad_norm": 1.5124868154525757, "learning_rate": 2.8889896188262303e-06, "loss": 1.5047, "step": 14023 }, { "epoch": 0.7667263507510627, "grad_norm": 1.5824742317199707, "learning_rate": 2.8877049412275147e-06, "loss": 1.4171, "step": 14024 }, { "epoch": 0.7667810231947843, "grad_norm": 1.5846515893936157, "learning_rate": 2.8864205011250214e-06, "loss": 1.3759, "step": 14025 }, { "epoch": 0.7668356956385058, "grad_norm": 1.0793836116790771, "learning_rate": 2.8851362985616395e-06, "loss": 1.5522, "step": 14026 }, { "epoch": 0.7668903680822273, "grad_norm": 1.3430465459823608, "learning_rate": 2.8838523335802525e-06, "loss": 1.3901, "step": 14027 }, { "epoch": 0.7669450405259489, "grad_norm": 2.0427262783050537, "learning_rate": 2.8825686062237315e-06, "loss": 1.117, "step": 14028 }, { "epoch": 0.7669997129696705, "grad_norm": 1.6562012434005737, "learning_rate": 2.881285116534949e-06, "loss": 1.2608, "step": 14029 }, { "epoch": 0.767054385413392, "grad_norm": 1.46016263961792, "learning_rate": 2.8800018645567572e-06, "loss": 1.2932, "step": 14030 }, { "epoch": 0.7671090578571136, "grad_norm": 1.8381158113479614, "learning_rate": 2.878718850332015e-06, "loss": 1.3146, "step": 14031 }, { "epoch": 0.7671637303008351, "grad_norm": 1.4313479661941528, "learning_rate": 2.877436073903561e-06, "loss": 1.3197, "step": 14032 }, { "epoch": 0.7672184027445567, "grad_norm": 1.4149600267410278, "learning_rate": 2.8761535353142266e-06, "loss": 1.5474, "step": 14033 }, { "epoch": 0.7672730751882783, "grad_norm": 1.518837571144104, "learning_rate": 2.8748712346068464e-06, "loss": 1.3193, "step": 14034 }, { "epoch": 0.7673277476319997, "grad_norm": 1.8534305095672607, "learning_rate": 2.8735891718242347e-06, "loss": 1.3665, "step": 14035 }, { "epoch": 0.7673824200757213, "grad_norm": 1.5776150226593018, "learning_rate": 2.8723073470092e-06, "loss": 1.3554, "step": 14036 }, { "epoch": 0.7674370925194429, "grad_norm": 1.6446495056152344, "learning_rate": 2.8710257602045512e-06, "loss": 1.4892, "step": 14037 }, { "epoch": 0.7674917649631644, "grad_norm": 1.3725342750549316, "learning_rate": 2.8697444114530814e-06, "loss": 1.3947, "step": 14038 }, { "epoch": 0.767546437406886, "grad_norm": 1.368491768836975, "learning_rate": 2.8684633007975772e-06, "loss": 1.6789, "step": 14039 }, { "epoch": 0.7676011098506076, "grad_norm": 1.4228754043579102, "learning_rate": 2.867182428280818e-06, "loss": 1.2622, "step": 14040 }, { "epoch": 0.7676557822943291, "grad_norm": 1.8638420104980469, "learning_rate": 2.865901793945576e-06, "loss": 1.2613, "step": 14041 }, { "epoch": 0.7677104547380507, "grad_norm": 1.5018192529678345, "learning_rate": 2.8646213978346104e-06, "loss": 1.35, "step": 14042 }, { "epoch": 0.7677651271817723, "grad_norm": 2.0884947776794434, "learning_rate": 2.8633412399906825e-06, "loss": 1.4525, "step": 14043 }, { "epoch": 0.7678197996254937, "grad_norm": 1.7322969436645508, "learning_rate": 2.862061320456535e-06, "loss": 1.2971, "step": 14044 }, { "epoch": 0.7678744720692153, "grad_norm": 1.0991215705871582, "learning_rate": 2.860781639274912e-06, "loss": 1.4839, "step": 14045 }, { "epoch": 0.7679291445129368, "grad_norm": 1.4012709856033325, "learning_rate": 2.8595021964885426e-06, "loss": 1.343, "step": 14046 }, { "epoch": 0.7679838169566584, "grad_norm": 1.3242378234863281, "learning_rate": 2.8582229921401484e-06, "loss": 1.4674, "step": 14047 }, { "epoch": 0.76803848940038, "grad_norm": 2.0324835777282715, "learning_rate": 2.8569440262724502e-06, "loss": 1.6137, "step": 14048 }, { "epoch": 0.7680931618441015, "grad_norm": 1.9055287837982178, "learning_rate": 2.8556652989281517e-06, "loss": 1.2416, "step": 14049 }, { "epoch": 0.7681478342878231, "grad_norm": 2.120898723602295, "learning_rate": 2.854386810149955e-06, "loss": 1.5554, "step": 14050 }, { "epoch": 0.7682025067315447, "grad_norm": 1.486877202987671, "learning_rate": 2.8531085599805496e-06, "loss": 1.5983, "step": 14051 }, { "epoch": 0.7682571791752661, "grad_norm": 1.1810815334320068, "learning_rate": 2.8518305484626196e-06, "loss": 1.7092, "step": 14052 }, { "epoch": 0.7683118516189877, "grad_norm": 1.4842191934585571, "learning_rate": 2.8505527756388384e-06, "loss": 1.4603, "step": 14053 }, { "epoch": 0.7683665240627093, "grad_norm": 2.0591070652008057, "learning_rate": 2.8492752415518808e-06, "loss": 1.1024, "step": 14054 }, { "epoch": 0.7684211965064308, "grad_norm": 1.716732144355774, "learning_rate": 2.8479979462444017e-06, "loss": 1.6299, "step": 14055 }, { "epoch": 0.7684758689501524, "grad_norm": 1.4629284143447876, "learning_rate": 2.8467208897590513e-06, "loss": 1.2989, "step": 14056 }, { "epoch": 0.768530541393874, "grad_norm": 1.6479284763336182, "learning_rate": 2.845444072138479e-06, "loss": 1.3397, "step": 14057 }, { "epoch": 0.7685852138375955, "grad_norm": 1.3987897634506226, "learning_rate": 2.844167493425314e-06, "loss": 1.3694, "step": 14058 }, { "epoch": 0.7686398862813171, "grad_norm": 1.719570279121399, "learning_rate": 2.8428911536621916e-06, "loss": 1.5446, "step": 14059 }, { "epoch": 0.7686945587250386, "grad_norm": 1.3894249200820923, "learning_rate": 2.8416150528917288e-06, "loss": 1.5177, "step": 14060 }, { "epoch": 0.7687492311687601, "grad_norm": 1.7129170894622803, "learning_rate": 2.8403391911565335e-06, "loss": 1.6037, "step": 14061 }, { "epoch": 0.7688039036124817, "grad_norm": 1.5936462879180908, "learning_rate": 2.8390635684992163e-06, "loss": 1.2069, "step": 14062 }, { "epoch": 0.7688585760562032, "grad_norm": 1.5574599504470825, "learning_rate": 2.8377881849623714e-06, "loss": 1.2677, "step": 14063 }, { "epoch": 0.7689132484999248, "grad_norm": 1.6091632843017578, "learning_rate": 2.8365130405885843e-06, "loss": 1.4507, "step": 14064 }, { "epoch": 0.7689679209436464, "grad_norm": 1.618964433670044, "learning_rate": 2.835238135420436e-06, "loss": 1.481, "step": 14065 }, { "epoch": 0.7690225933873679, "grad_norm": 1.9721782207489014, "learning_rate": 2.8339634695005e-06, "loss": 1.2158, "step": 14066 }, { "epoch": 0.7690772658310895, "grad_norm": 1.72300124168396, "learning_rate": 2.832689042871336e-06, "loss": 1.4573, "step": 14067 }, { "epoch": 0.7691319382748111, "grad_norm": 1.2985966205596924, "learning_rate": 2.831414855575507e-06, "loss": 1.6254, "step": 14068 }, { "epoch": 0.7691866107185326, "grad_norm": 1.474837303161621, "learning_rate": 2.8301409076555574e-06, "loss": 1.543, "step": 14069 }, { "epoch": 0.7692412831622542, "grad_norm": 1.4855685234069824, "learning_rate": 2.828867199154024e-06, "loss": 1.3202, "step": 14070 }, { "epoch": 0.7692959556059757, "grad_norm": 1.5253045558929443, "learning_rate": 2.827593730113446e-06, "loss": 1.2525, "step": 14071 }, { "epoch": 0.7693506280496972, "grad_norm": 1.3554705381393433, "learning_rate": 2.8263205005763405e-06, "loss": 1.498, "step": 14072 }, { "epoch": 0.7694053004934188, "grad_norm": 1.2395970821380615, "learning_rate": 2.8250475105852306e-06, "loss": 1.5103, "step": 14073 }, { "epoch": 0.7694599729371404, "grad_norm": 1.4431400299072266, "learning_rate": 2.8237747601826193e-06, "loss": 1.4419, "step": 14074 }, { "epoch": 0.7695146453808619, "grad_norm": 1.3231496810913086, "learning_rate": 2.8225022494110067e-06, "loss": 1.4364, "step": 14075 }, { "epoch": 0.7695693178245835, "grad_norm": 1.6818122863769531, "learning_rate": 2.821229978312889e-06, "loss": 1.3378, "step": 14076 }, { "epoch": 0.769623990268305, "grad_norm": 1.5175825357437134, "learning_rate": 2.819957946930748e-06, "loss": 1.4531, "step": 14077 }, { "epoch": 0.7696786627120266, "grad_norm": 1.9106172323226929, "learning_rate": 2.81868615530706e-06, "loss": 1.4856, "step": 14078 }, { "epoch": 0.7697333351557482, "grad_norm": 1.5121746063232422, "learning_rate": 2.8174146034842933e-06, "loss": 1.4078, "step": 14079 }, { "epoch": 0.7697880075994696, "grad_norm": 1.7615643739700317, "learning_rate": 2.816143291504906e-06, "loss": 1.4021, "step": 14080 }, { "epoch": 0.7698426800431912, "grad_norm": 1.512718915939331, "learning_rate": 2.8148722194113498e-06, "loss": 1.3337, "step": 14081 }, { "epoch": 0.7698973524869128, "grad_norm": 1.8834302425384521, "learning_rate": 2.8136013872460733e-06, "loss": 1.738, "step": 14082 }, { "epoch": 0.7699520249306343, "grad_norm": 1.4915872812271118, "learning_rate": 2.8123307950515087e-06, "loss": 1.6034, "step": 14083 }, { "epoch": 0.7700066973743559, "grad_norm": 1.3831084966659546, "learning_rate": 2.811060442870084e-06, "loss": 1.4409, "step": 14084 }, { "epoch": 0.7700613698180775, "grad_norm": 1.4438042640686035, "learning_rate": 2.809790330744222e-06, "loss": 1.6027, "step": 14085 }, { "epoch": 0.770116042261799, "grad_norm": 1.7456575632095337, "learning_rate": 2.8085204587163317e-06, "loss": 1.4854, "step": 14086 }, { "epoch": 0.7701707147055206, "grad_norm": 1.6296577453613281, "learning_rate": 2.8072508268288212e-06, "loss": 1.4766, "step": 14087 }, { "epoch": 0.7702253871492422, "grad_norm": 1.4105298519134521, "learning_rate": 2.805981435124083e-06, "loss": 1.382, "step": 14088 }, { "epoch": 0.7702800595929636, "grad_norm": 1.6401110887527466, "learning_rate": 2.8047122836445063e-06, "loss": 1.3452, "step": 14089 }, { "epoch": 0.7703347320366852, "grad_norm": 1.5464749336242676, "learning_rate": 2.8034433724324716e-06, "loss": 1.4984, "step": 14090 }, { "epoch": 0.7703894044804067, "grad_norm": 1.260025143623352, "learning_rate": 2.802174701530349e-06, "loss": 1.4644, "step": 14091 }, { "epoch": 0.7704440769241283, "grad_norm": 1.012300729751587, "learning_rate": 2.8009062709805014e-06, "loss": 1.6758, "step": 14092 }, { "epoch": 0.7704987493678499, "grad_norm": 2.0829217433929443, "learning_rate": 2.7996380808252887e-06, "loss": 1.5083, "step": 14093 }, { "epoch": 0.7705534218115714, "grad_norm": 1.6327377557754517, "learning_rate": 2.7983701311070564e-06, "loss": 1.3217, "step": 14094 }, { "epoch": 0.770608094255293, "grad_norm": 1.4816116094589233, "learning_rate": 2.797102421868142e-06, "loss": 1.2939, "step": 14095 }, { "epoch": 0.7706627666990146, "grad_norm": 1.5039154291152954, "learning_rate": 2.7958349531508833e-06, "loss": 1.8387, "step": 14096 }, { "epoch": 0.770717439142736, "grad_norm": 1.9766818284988403, "learning_rate": 2.7945677249976e-06, "loss": 1.5674, "step": 14097 }, { "epoch": 0.7707721115864576, "grad_norm": 1.3545225858688354, "learning_rate": 2.793300737450605e-06, "loss": 1.6576, "step": 14098 }, { "epoch": 0.7708267840301792, "grad_norm": 1.600582480430603, "learning_rate": 2.792033990552213e-06, "loss": 1.381, "step": 14099 }, { "epoch": 0.7708814564739007, "grad_norm": 1.3792165517807007, "learning_rate": 2.7907674843447172e-06, "loss": 1.5598, "step": 14100 }, { "epoch": 0.7709361289176223, "grad_norm": 1.7543495893478394, "learning_rate": 2.789501218870415e-06, "loss": 1.375, "step": 14101 }, { "epoch": 0.7709908013613439, "grad_norm": 1.4135253429412842, "learning_rate": 2.788235194171588e-06, "loss": 1.6094, "step": 14102 }, { "epoch": 0.7710454738050654, "grad_norm": 1.4905821084976196, "learning_rate": 2.78696941029051e-06, "loss": 1.5079, "step": 14103 }, { "epoch": 0.771100146248787, "grad_norm": 1.4992458820343018, "learning_rate": 2.7857038672694492e-06, "loss": 1.3959, "step": 14104 }, { "epoch": 0.7711548186925085, "grad_norm": 1.9425673484802246, "learning_rate": 2.7844385651506643e-06, "loss": 1.4374, "step": 14105 }, { "epoch": 0.77120949113623, "grad_norm": 1.5406032800674438, "learning_rate": 2.7831735039764054e-06, "loss": 1.3522, "step": 14106 }, { "epoch": 0.7712641635799516, "grad_norm": 1.7984110116958618, "learning_rate": 2.781908683788921e-06, "loss": 1.261, "step": 14107 }, { "epoch": 0.7713188360236731, "grad_norm": 1.5447542667388916, "learning_rate": 2.7806441046304432e-06, "loss": 1.3289, "step": 14108 }, { "epoch": 0.7713735084673947, "grad_norm": 1.355462670326233, "learning_rate": 2.7793797665431977e-06, "loss": 1.5357, "step": 14109 }, { "epoch": 0.7714281809111163, "grad_norm": 1.3854693174362183, "learning_rate": 2.7781156695694066e-06, "loss": 1.6052, "step": 14110 }, { "epoch": 0.7714828533548378, "grad_norm": 1.552841067314148, "learning_rate": 2.776851813751281e-06, "loss": 1.6709, "step": 14111 }, { "epoch": 0.7715375257985594, "grad_norm": 1.7935436964035034, "learning_rate": 2.7755881991310206e-06, "loss": 1.1388, "step": 14112 }, { "epoch": 0.771592198242281, "grad_norm": 2.2599666118621826, "learning_rate": 2.774324825750825e-06, "loss": 1.4402, "step": 14113 }, { "epoch": 0.7716468706860025, "grad_norm": 2.05316424369812, "learning_rate": 2.7730616936528765e-06, "loss": 1.3455, "step": 14114 }, { "epoch": 0.771701543129724, "grad_norm": 1.9916051626205444, "learning_rate": 2.7717988028793587e-06, "loss": 1.2716, "step": 14115 }, { "epoch": 0.7717562155734456, "grad_norm": 1.522382140159607, "learning_rate": 2.770536153472441e-06, "loss": 1.2569, "step": 14116 }, { "epoch": 0.7718108880171671, "grad_norm": 1.4257352352142334, "learning_rate": 2.7692737454742858e-06, "loss": 1.4957, "step": 14117 }, { "epoch": 0.7718655604608887, "grad_norm": 1.3908722400665283, "learning_rate": 2.7680115789270478e-06, "loss": 1.4664, "step": 14118 }, { "epoch": 0.7719202329046102, "grad_norm": 1.722440242767334, "learning_rate": 2.766749653872873e-06, "loss": 1.2197, "step": 14119 }, { "epoch": 0.7719749053483318, "grad_norm": 1.6402033567428589, "learning_rate": 2.7654879703538974e-06, "loss": 1.2933, "step": 14120 }, { "epoch": 0.7720295777920534, "grad_norm": 1.350925087928772, "learning_rate": 2.7642265284122584e-06, "loss": 1.3099, "step": 14121 }, { "epoch": 0.7720842502357749, "grad_norm": 1.9538525342941284, "learning_rate": 2.762965328090075e-06, "loss": 1.6599, "step": 14122 }, { "epoch": 0.7721389226794965, "grad_norm": 1.7814126014709473, "learning_rate": 2.761704369429458e-06, "loss": 1.6425, "step": 14123 }, { "epoch": 0.7721935951232181, "grad_norm": 1.7435855865478516, "learning_rate": 2.760443652472521e-06, "loss": 1.5897, "step": 14124 }, { "epoch": 0.7722482675669395, "grad_norm": 1.6655161380767822, "learning_rate": 2.7591831772613576e-06, "loss": 1.4059, "step": 14125 }, { "epoch": 0.7723029400106611, "grad_norm": 2.100853443145752, "learning_rate": 2.7579229438380563e-06, "loss": 1.3558, "step": 14126 }, { "epoch": 0.7723576124543827, "grad_norm": 1.595104455947876, "learning_rate": 2.7566629522447054e-06, "loss": 1.2532, "step": 14127 }, { "epoch": 0.7724122848981042, "grad_norm": 1.3144826889038086, "learning_rate": 2.7554032025233756e-06, "loss": 1.3347, "step": 14128 }, { "epoch": 0.7724669573418258, "grad_norm": 1.4976121187210083, "learning_rate": 2.754143694716133e-06, "loss": 1.2768, "step": 14129 }, { "epoch": 0.7725216297855474, "grad_norm": 1.9907829761505127, "learning_rate": 2.7528844288650347e-06, "loss": 1.0894, "step": 14130 }, { "epoch": 0.7725763022292689, "grad_norm": 1.3611623048782349, "learning_rate": 2.7516254050121284e-06, "loss": 1.3197, "step": 14131 }, { "epoch": 0.7726309746729905, "grad_norm": 1.4934130907058716, "learning_rate": 2.750366623199462e-06, "loss": 1.2057, "step": 14132 }, { "epoch": 0.772685647116712, "grad_norm": 1.9346332550048828, "learning_rate": 2.7491080834690655e-06, "loss": 1.6892, "step": 14133 }, { "epoch": 0.7727403195604335, "grad_norm": 1.5809321403503418, "learning_rate": 2.7478497858629617e-06, "loss": 1.3497, "step": 14134 }, { "epoch": 0.7727949920041551, "grad_norm": 1.2316416501998901, "learning_rate": 2.7465917304231747e-06, "loss": 1.3044, "step": 14135 }, { "epoch": 0.7728496644478766, "grad_norm": 1.6359935998916626, "learning_rate": 2.7453339171917106e-06, "loss": 1.4895, "step": 14136 }, { "epoch": 0.7729043368915982, "grad_norm": 1.374935507774353, "learning_rate": 2.744076346210567e-06, "loss": 1.2, "step": 14137 }, { "epoch": 0.7729590093353198, "grad_norm": 1.1885666847229004, "learning_rate": 2.742819017521744e-06, "loss": 1.57, "step": 14138 }, { "epoch": 0.7730136817790413, "grad_norm": 1.1250048875808716, "learning_rate": 2.7415619311672236e-06, "loss": 1.7482, "step": 14139 }, { "epoch": 0.7730683542227629, "grad_norm": 1.7028595209121704, "learning_rate": 2.740305087188979e-06, "loss": 1.2849, "step": 14140 }, { "epoch": 0.7731230266664845, "grad_norm": 1.622040033340454, "learning_rate": 2.7390484856289867e-06, "loss": 1.3476, "step": 14141 }, { "epoch": 0.773177699110206, "grad_norm": 1.7268683910369873, "learning_rate": 2.737792126529204e-06, "loss": 1.6261, "step": 14142 }, { "epoch": 0.7732323715539275, "grad_norm": 2.0296571254730225, "learning_rate": 2.736536009931583e-06, "loss": 1.3315, "step": 14143 }, { "epoch": 0.7732870439976491, "grad_norm": 1.5179471969604492, "learning_rate": 2.73528013587807e-06, "loss": 1.1614, "step": 14144 }, { "epoch": 0.7733417164413706, "grad_norm": 1.5931613445281982, "learning_rate": 2.734024504410596e-06, "loss": 1.5277, "step": 14145 }, { "epoch": 0.7733963888850922, "grad_norm": 1.8636101484298706, "learning_rate": 2.7327691155710978e-06, "loss": 1.2108, "step": 14146 }, { "epoch": 0.7734510613288137, "grad_norm": 1.59565269947052, "learning_rate": 2.7315139694014913e-06, "loss": 1.3384, "step": 14147 }, { "epoch": 0.7735057337725353, "grad_norm": 1.3265137672424316, "learning_rate": 2.730259065943688e-06, "loss": 1.4837, "step": 14148 }, { "epoch": 0.7735604062162569, "grad_norm": 1.5696502923965454, "learning_rate": 2.729004405239595e-06, "loss": 1.4656, "step": 14149 }, { "epoch": 0.7736150786599784, "grad_norm": 1.6032437086105347, "learning_rate": 2.7277499873311064e-06, "loss": 1.5618, "step": 14150 }, { "epoch": 0.7736697511037, "grad_norm": 1.6558756828308105, "learning_rate": 2.7264958122601083e-06, "loss": 1.5643, "step": 14151 }, { "epoch": 0.7737244235474215, "grad_norm": 1.7408970594406128, "learning_rate": 2.7252418800684865e-06, "loss": 1.4362, "step": 14152 }, { "epoch": 0.773779095991143, "grad_norm": 1.7457499504089355, "learning_rate": 2.723988190798108e-06, "loss": 1.0869, "step": 14153 }, { "epoch": 0.7738337684348646, "grad_norm": 1.707546353340149, "learning_rate": 2.722734744490835e-06, "loss": 1.4341, "step": 14154 }, { "epoch": 0.7738884408785862, "grad_norm": 1.3236615657806396, "learning_rate": 2.7214815411885287e-06, "loss": 1.6318, "step": 14155 }, { "epoch": 0.7739431133223077, "grad_norm": 1.3952579498291016, "learning_rate": 2.720228580933033e-06, "loss": 1.0185, "step": 14156 }, { "epoch": 0.7739977857660293, "grad_norm": 1.3782023191452026, "learning_rate": 2.718975863766188e-06, "loss": 1.4371, "step": 14157 }, { "epoch": 0.7740524582097509, "grad_norm": 1.9142329692840576, "learning_rate": 2.717723389729823e-06, "loss": 1.3142, "step": 14158 }, { "epoch": 0.7741071306534724, "grad_norm": 1.5672451257705688, "learning_rate": 2.716471158865761e-06, "loss": 1.1271, "step": 14159 }, { "epoch": 0.774161803097194, "grad_norm": 1.1649264097213745, "learning_rate": 2.7152191712158207e-06, "loss": 1.802, "step": 14160 }, { "epoch": 0.7742164755409154, "grad_norm": 1.6366204023361206, "learning_rate": 2.713967426821806e-06, "loss": 1.4084, "step": 14161 }, { "epoch": 0.774271147984637, "grad_norm": 1.5479668378829956, "learning_rate": 2.712715925725514e-06, "loss": 1.4788, "step": 14162 }, { "epoch": 0.7743258204283586, "grad_norm": 1.8057479858398438, "learning_rate": 2.7114646679687393e-06, "loss": 1.4002, "step": 14163 }, { "epoch": 0.7743804928720801, "grad_norm": 1.447081446647644, "learning_rate": 2.7102136535932633e-06, "loss": 1.0611, "step": 14164 }, { "epoch": 0.7744351653158017, "grad_norm": 1.4558660984039307, "learning_rate": 2.7089628826408563e-06, "loss": 1.4686, "step": 14165 }, { "epoch": 0.7744898377595233, "grad_norm": 1.7468111515045166, "learning_rate": 2.7077123551532913e-06, "loss": 1.3554, "step": 14166 }, { "epoch": 0.7745445102032448, "grad_norm": 1.6837499141693115, "learning_rate": 2.706462071172322e-06, "loss": 1.5629, "step": 14167 }, { "epoch": 0.7745991826469664, "grad_norm": 1.2285635471343994, "learning_rate": 2.705212030739699e-06, "loss": 1.5766, "step": 14168 }, { "epoch": 0.774653855090688, "grad_norm": 1.5692315101623535, "learning_rate": 2.7039622338971637e-06, "loss": 1.2824, "step": 14169 }, { "epoch": 0.7747085275344094, "grad_norm": 1.680774450302124, "learning_rate": 2.7027126806864467e-06, "loss": 1.3714, "step": 14170 }, { "epoch": 0.774763199978131, "grad_norm": 1.4225749969482422, "learning_rate": 2.701463371149281e-06, "loss": 1.4329, "step": 14171 }, { "epoch": 0.7748178724218526, "grad_norm": 1.323266863822937, "learning_rate": 2.70021430532738e-06, "loss": 1.7347, "step": 14172 }, { "epoch": 0.7748725448655741, "grad_norm": 1.250338077545166, "learning_rate": 2.698965483262449e-06, "loss": 1.3697, "step": 14173 }, { "epoch": 0.7749272173092957, "grad_norm": 1.7577210664749146, "learning_rate": 2.697716904996196e-06, "loss": 1.2549, "step": 14174 }, { "epoch": 0.7749818897530172, "grad_norm": 1.9714148044586182, "learning_rate": 2.6964685705703107e-06, "loss": 1.6372, "step": 14175 }, { "epoch": 0.7750365621967388, "grad_norm": 1.7511699199676514, "learning_rate": 2.695220480026476e-06, "loss": 1.37, "step": 14176 }, { "epoch": 0.7750912346404604, "grad_norm": 1.7002619504928589, "learning_rate": 2.693972633406373e-06, "loss": 1.2097, "step": 14177 }, { "epoch": 0.7751459070841819, "grad_norm": 1.628452181816101, "learning_rate": 2.6927250307516685e-06, "loss": 1.4082, "step": 14178 }, { "epoch": 0.7752005795279034, "grad_norm": 1.8177145719528198, "learning_rate": 2.691477672104018e-06, "loss": 1.5662, "step": 14179 }, { "epoch": 0.775255251971625, "grad_norm": 1.4181338548660278, "learning_rate": 2.6902305575050822e-06, "loss": 1.2418, "step": 14180 }, { "epoch": 0.7753099244153465, "grad_norm": 1.102957010269165, "learning_rate": 2.6889836869965016e-06, "loss": 1.6989, "step": 14181 }, { "epoch": 0.7753645968590681, "grad_norm": 1.190359354019165, "learning_rate": 2.6877370606199094e-06, "loss": 1.6927, "step": 14182 }, { "epoch": 0.7754192693027897, "grad_norm": 1.6873403787612915, "learning_rate": 2.6864906784169375e-06, "loss": 1.5663, "step": 14183 }, { "epoch": 0.7754739417465112, "grad_norm": 1.9708367586135864, "learning_rate": 2.685244540429199e-06, "loss": 1.4799, "step": 14184 }, { "epoch": 0.7755286141902328, "grad_norm": 1.693282127380371, "learning_rate": 2.683998646698314e-06, "loss": 1.1889, "step": 14185 }, { "epoch": 0.7755832866339544, "grad_norm": 2.578188180923462, "learning_rate": 2.6827529972658816e-06, "loss": 1.0998, "step": 14186 }, { "epoch": 0.7756379590776759, "grad_norm": 1.4997103214263916, "learning_rate": 2.6815075921734936e-06, "loss": 1.2984, "step": 14187 }, { "epoch": 0.7756926315213974, "grad_norm": 1.5106147527694702, "learning_rate": 2.6802624314627436e-06, "loss": 1.4168, "step": 14188 }, { "epoch": 0.7757473039651189, "grad_norm": 1.3948898315429688, "learning_rate": 2.6790175151752073e-06, "loss": 1.56, "step": 14189 }, { "epoch": 0.7758019764088405, "grad_norm": 1.9451828002929688, "learning_rate": 2.677772843352453e-06, "loss": 1.2258, "step": 14190 }, { "epoch": 0.7758566488525621, "grad_norm": 1.8284364938735962, "learning_rate": 2.6765284160360483e-06, "loss": 1.4161, "step": 14191 }, { "epoch": 0.7759113212962836, "grad_norm": 1.4915364980697632, "learning_rate": 2.6752842332675446e-06, "loss": 1.5327, "step": 14192 }, { "epoch": 0.7759659937400052, "grad_norm": 2.0361251831054688, "learning_rate": 2.6740402950884848e-06, "loss": 1.6057, "step": 14193 }, { "epoch": 0.7760206661837268, "grad_norm": 1.76774263381958, "learning_rate": 2.672796601540415e-06, "loss": 1.5146, "step": 14194 }, { "epoch": 0.7760753386274483, "grad_norm": 1.884859561920166, "learning_rate": 2.6715531526648585e-06, "loss": 1.3268, "step": 14195 }, { "epoch": 0.7761300110711699, "grad_norm": 2.077415943145752, "learning_rate": 2.67030994850334e-06, "loss": 1.3087, "step": 14196 }, { "epoch": 0.7761846835148914, "grad_norm": 1.4769076108932495, "learning_rate": 2.669066989097373e-06, "loss": 1.4267, "step": 14197 }, { "epoch": 0.7762393559586129, "grad_norm": 1.7780063152313232, "learning_rate": 2.6678242744884575e-06, "loss": 1.4356, "step": 14198 }, { "epoch": 0.7762940284023345, "grad_norm": 1.4405455589294434, "learning_rate": 2.666581804718098e-06, "loss": 1.4271, "step": 14199 }, { "epoch": 0.7763487008460561, "grad_norm": 2.934390068054199, "learning_rate": 2.6653395798277814e-06, "loss": 1.4825, "step": 14200 }, { "epoch": 0.7764033732897776, "grad_norm": 2.059854745864868, "learning_rate": 2.6640975998589836e-06, "loss": 1.3954, "step": 14201 }, { "epoch": 0.7764580457334992, "grad_norm": 1.7691781520843506, "learning_rate": 2.6628558648531845e-06, "loss": 1.3302, "step": 14202 }, { "epoch": 0.7765127181772207, "grad_norm": 2.742183208465576, "learning_rate": 2.6616143748518453e-06, "loss": 1.3163, "step": 14203 }, { "epoch": 0.7765673906209423, "grad_norm": 1.5143684148788452, "learning_rate": 2.6603731298964186e-06, "loss": 1.5215, "step": 14204 }, { "epoch": 0.7766220630646639, "grad_norm": 1.6674648523330688, "learning_rate": 2.6591321300283603e-06, "loss": 1.3542, "step": 14205 }, { "epoch": 0.7766767355083853, "grad_norm": 1.9442970752716064, "learning_rate": 2.6578913752891044e-06, "loss": 1.5253, "step": 14206 }, { "epoch": 0.7767314079521069, "grad_norm": 1.3923319578170776, "learning_rate": 2.656650865720085e-06, "loss": 1.3879, "step": 14207 }, { "epoch": 0.7767860803958285, "grad_norm": 1.7391022443771362, "learning_rate": 2.6554106013627256e-06, "loss": 1.3268, "step": 14208 }, { "epoch": 0.77684075283955, "grad_norm": 1.3335809707641602, "learning_rate": 2.654170582258441e-06, "loss": 1.5132, "step": 14209 }, { "epoch": 0.7768954252832716, "grad_norm": 1.6687557697296143, "learning_rate": 2.652930808448634e-06, "loss": 1.2025, "step": 14210 }, { "epoch": 0.7769500977269932, "grad_norm": 1.8208298683166504, "learning_rate": 2.6516912799747106e-06, "loss": 1.3332, "step": 14211 }, { "epoch": 0.7770047701707147, "grad_norm": 1.1136749982833862, "learning_rate": 2.650451996878056e-06, "loss": 1.3674, "step": 14212 }, { "epoch": 0.7770594426144363, "grad_norm": 1.6518714427947998, "learning_rate": 2.649212959200059e-06, "loss": 1.4123, "step": 14213 }, { "epoch": 0.7771141150581579, "grad_norm": 1.3613379001617432, "learning_rate": 2.6479741669820915e-06, "loss": 1.5142, "step": 14214 }, { "epoch": 0.7771687875018793, "grad_norm": 1.5217090845108032, "learning_rate": 2.6467356202655135e-06, "loss": 1.2429, "step": 14215 }, { "epoch": 0.7772234599456009, "grad_norm": 1.7503962516784668, "learning_rate": 2.645497319091692e-06, "loss": 1.2065, "step": 14216 }, { "epoch": 0.7772781323893224, "grad_norm": 1.4665918350219727, "learning_rate": 2.6442592635019724e-06, "loss": 1.6285, "step": 14217 }, { "epoch": 0.777332804833044, "grad_norm": 2.044517755508423, "learning_rate": 2.6430214535376954e-06, "loss": 1.374, "step": 14218 }, { "epoch": 0.7773874772767656, "grad_norm": 1.3726452589035034, "learning_rate": 2.641783889240197e-06, "loss": 1.278, "step": 14219 }, { "epoch": 0.7774421497204871, "grad_norm": 1.756410837173462, "learning_rate": 2.6405465706508014e-06, "loss": 1.1942, "step": 14220 }, { "epoch": 0.7774968221642087, "grad_norm": 1.9550831317901611, "learning_rate": 2.639309497810827e-06, "loss": 1.4937, "step": 14221 }, { "epoch": 0.7775514946079303, "grad_norm": 1.1955859661102295, "learning_rate": 2.6380726707615787e-06, "loss": 1.5526, "step": 14222 }, { "epoch": 0.7776061670516518, "grad_norm": 1.5035147666931152, "learning_rate": 2.63683608954436e-06, "loss": 1.3302, "step": 14223 }, { "epoch": 0.7776608394953733, "grad_norm": 1.3696240186691284, "learning_rate": 2.6355997542004596e-06, "loss": 1.3592, "step": 14224 }, { "epoch": 0.7777155119390949, "grad_norm": 1.310051441192627, "learning_rate": 2.634363664771168e-06, "loss": 1.616, "step": 14225 }, { "epoch": 0.7777701843828164, "grad_norm": 1.8227009773254395, "learning_rate": 2.633127821297754e-06, "loss": 1.3037, "step": 14226 }, { "epoch": 0.777824856826538, "grad_norm": 1.8204705715179443, "learning_rate": 2.6318922238214915e-06, "loss": 1.2421, "step": 14227 }, { "epoch": 0.7778795292702596, "grad_norm": 1.8724584579467773, "learning_rate": 2.630656872383639e-06, "loss": 1.6459, "step": 14228 }, { "epoch": 0.7779342017139811, "grad_norm": 1.5883982181549072, "learning_rate": 2.629421767025442e-06, "loss": 1.3053, "step": 14229 }, { "epoch": 0.7779888741577027, "grad_norm": 1.7201435565948486, "learning_rate": 2.6281869077881507e-06, "loss": 1.3523, "step": 14230 }, { "epoch": 0.7780435466014242, "grad_norm": 1.4092401266098022, "learning_rate": 2.6269522947129976e-06, "loss": 1.2294, "step": 14231 }, { "epoch": 0.7780982190451458, "grad_norm": 1.3889881372451782, "learning_rate": 2.6257179278412084e-06, "loss": 1.4776, "step": 14232 }, { "epoch": 0.7781528914888673, "grad_norm": 1.4933545589447021, "learning_rate": 2.6244838072140023e-06, "loss": 1.5475, "step": 14233 }, { "epoch": 0.7782075639325888, "grad_norm": 1.3472745418548584, "learning_rate": 2.623249932872589e-06, "loss": 1.2758, "step": 14234 }, { "epoch": 0.7782622363763104, "grad_norm": 1.5280966758728027, "learning_rate": 2.622016304858167e-06, "loss": 1.4039, "step": 14235 }, { "epoch": 0.778316908820032, "grad_norm": 1.4916151762008667, "learning_rate": 2.6207829232119373e-06, "loss": 1.4207, "step": 14236 }, { "epoch": 0.7783715812637535, "grad_norm": 2.0017170906066895, "learning_rate": 2.619549787975081e-06, "loss": 1.3265, "step": 14237 }, { "epoch": 0.7784262537074751, "grad_norm": 1.3634238243103027, "learning_rate": 2.618316899188773e-06, "loss": 1.5759, "step": 14238 }, { "epoch": 0.7784809261511967, "grad_norm": 1.1723105907440186, "learning_rate": 2.617084256894189e-06, "loss": 1.2647, "step": 14239 }, { "epoch": 0.7785355985949182, "grad_norm": 1.765128254890442, "learning_rate": 2.6158518611324836e-06, "loss": 1.1555, "step": 14240 }, { "epoch": 0.7785902710386398, "grad_norm": 1.3062020540237427, "learning_rate": 2.6146197119448135e-06, "loss": 1.4327, "step": 14241 }, { "epoch": 0.7786449434823614, "grad_norm": 1.3423779010772705, "learning_rate": 2.613387809372323e-06, "loss": 1.6077, "step": 14242 }, { "epoch": 0.7786996159260828, "grad_norm": 2.0831143856048584, "learning_rate": 2.6121561534561423e-06, "loss": 1.6193, "step": 14243 }, { "epoch": 0.7787542883698044, "grad_norm": 1.3717221021652222, "learning_rate": 2.6109247442374088e-06, "loss": 1.5038, "step": 14244 }, { "epoch": 0.7788089608135259, "grad_norm": 1.6094838380813599, "learning_rate": 2.6096935817572357e-06, "loss": 1.441, "step": 14245 }, { "epoch": 0.7788636332572475, "grad_norm": 1.5171443223953247, "learning_rate": 2.608462666056736e-06, "loss": 1.1952, "step": 14246 }, { "epoch": 0.7789183057009691, "grad_norm": 1.7119241952896118, "learning_rate": 2.6072319971770122e-06, "loss": 1.1481, "step": 14247 }, { "epoch": 0.7789729781446906, "grad_norm": 1.6028069257736206, "learning_rate": 2.6060015751591605e-06, "loss": 1.2511, "step": 14248 }, { "epoch": 0.7790276505884122, "grad_norm": 1.4375964403152466, "learning_rate": 2.6047714000442634e-06, "loss": 1.5959, "step": 14249 }, { "epoch": 0.7790823230321338, "grad_norm": 1.6896071434020996, "learning_rate": 2.6035414718734052e-06, "loss": 1.3734, "step": 14250 }, { "epoch": 0.7791369954758552, "grad_norm": 1.6944103240966797, "learning_rate": 2.602311790687655e-06, "loss": 1.3741, "step": 14251 }, { "epoch": 0.7791916679195768, "grad_norm": 1.6759332418441772, "learning_rate": 2.6010823565280695e-06, "loss": 1.4174, "step": 14252 }, { "epoch": 0.7792463403632984, "grad_norm": 1.5565831661224365, "learning_rate": 2.599853169435709e-06, "loss": 1.4536, "step": 14253 }, { "epoch": 0.7793010128070199, "grad_norm": 1.4995336532592773, "learning_rate": 2.5986242294516127e-06, "loss": 1.2691, "step": 14254 }, { "epoch": 0.7793556852507415, "grad_norm": 1.9208227396011353, "learning_rate": 2.5973955366168257e-06, "loss": 1.3963, "step": 14255 }, { "epoch": 0.7794103576944631, "grad_norm": 1.7440561056137085, "learning_rate": 2.5961670909723714e-06, "loss": 1.1532, "step": 14256 }, { "epoch": 0.7794650301381846, "grad_norm": 1.3096487522125244, "learning_rate": 2.5949388925592687e-06, "loss": 1.3728, "step": 14257 }, { "epoch": 0.7795197025819062, "grad_norm": 1.7395820617675781, "learning_rate": 2.593710941418537e-06, "loss": 1.4181, "step": 14258 }, { "epoch": 0.7795743750256277, "grad_norm": 1.5513547658920288, "learning_rate": 2.5924832375911746e-06, "loss": 1.428, "step": 14259 }, { "epoch": 0.7796290474693492, "grad_norm": 1.6569347381591797, "learning_rate": 2.5912557811181802e-06, "loss": 1.3774, "step": 14260 }, { "epoch": 0.7796837199130708, "grad_norm": 1.7769534587860107, "learning_rate": 2.5900285720405403e-06, "loss": 1.3232, "step": 14261 }, { "epoch": 0.7797383923567923, "grad_norm": 1.6800910234451294, "learning_rate": 2.588801610399234e-06, "loss": 1.2909, "step": 14262 }, { "epoch": 0.7797930648005139, "grad_norm": 1.616310477256775, "learning_rate": 2.587574896235231e-06, "loss": 1.2982, "step": 14263 }, { "epoch": 0.7798477372442355, "grad_norm": 1.6119836568832397, "learning_rate": 2.586348429589498e-06, "loss": 1.3435, "step": 14264 }, { "epoch": 0.779902409687957, "grad_norm": 1.2667016983032227, "learning_rate": 2.585122210502987e-06, "loss": 1.464, "step": 14265 }, { "epoch": 0.7799570821316786, "grad_norm": 1.3756452798843384, "learning_rate": 2.5838962390166433e-06, "loss": 1.5924, "step": 14266 }, { "epoch": 0.7800117545754002, "grad_norm": 1.435422420501709, "learning_rate": 2.582670515171409e-06, "loss": 1.4184, "step": 14267 }, { "epoch": 0.7800664270191217, "grad_norm": 1.422361135482788, "learning_rate": 2.581445039008209e-06, "loss": 1.4484, "step": 14268 }, { "epoch": 0.7801210994628432, "grad_norm": 1.6974722146987915, "learning_rate": 2.58021981056797e-06, "loss": 1.4831, "step": 14269 }, { "epoch": 0.7801757719065648, "grad_norm": 1.2501338720321655, "learning_rate": 2.5789948298916025e-06, "loss": 1.5339, "step": 14270 }, { "epoch": 0.7802304443502863, "grad_norm": 1.5429850816726685, "learning_rate": 2.5777700970200115e-06, "loss": 1.4307, "step": 14271 }, { "epoch": 0.7802851167940079, "grad_norm": 1.098373532295227, "learning_rate": 2.5765456119940933e-06, "loss": 1.8514, "step": 14272 }, { "epoch": 0.7803397892377295, "grad_norm": 1.6723811626434326, "learning_rate": 2.575321374854738e-06, "loss": 1.1816, "step": 14273 }, { "epoch": 0.780394461681451, "grad_norm": 1.9657666683197021, "learning_rate": 2.5740973856428207e-06, "loss": 1.6869, "step": 14274 }, { "epoch": 0.7804491341251726, "grad_norm": 1.3659183979034424, "learning_rate": 2.57287364439922e-06, "loss": 1.4629, "step": 14275 }, { "epoch": 0.7805038065688941, "grad_norm": 1.17374849319458, "learning_rate": 2.5716501511647975e-06, "loss": 1.566, "step": 14276 }, { "epoch": 0.7805584790126157, "grad_norm": 1.6404047012329102, "learning_rate": 2.5704269059804034e-06, "loss": 1.5287, "step": 14277 }, { "epoch": 0.7806131514563373, "grad_norm": 1.4410643577575684, "learning_rate": 2.5692039088868927e-06, "loss": 1.1645, "step": 14278 }, { "epoch": 0.7806678239000587, "grad_norm": 1.3816388845443726, "learning_rate": 2.5679811599251003e-06, "loss": 1.4642, "step": 14279 }, { "epoch": 0.7807224963437803, "grad_norm": 1.7187724113464355, "learning_rate": 2.566758659135854e-06, "loss": 1.4513, "step": 14280 }, { "epoch": 0.7807771687875019, "grad_norm": 1.4346404075622559, "learning_rate": 2.565536406559982e-06, "loss": 1.4754, "step": 14281 }, { "epoch": 0.7808318412312234, "grad_norm": 1.538944959640503, "learning_rate": 2.5643144022382904e-06, "loss": 1.4474, "step": 14282 }, { "epoch": 0.780886513674945, "grad_norm": 1.6941123008728027, "learning_rate": 2.5630926462115934e-06, "loss": 1.3778, "step": 14283 }, { "epoch": 0.7809411861186666, "grad_norm": 1.8435944318771362, "learning_rate": 2.561871138520684e-06, "loss": 1.3615, "step": 14284 }, { "epoch": 0.7809958585623881, "grad_norm": 1.8020150661468506, "learning_rate": 2.5606498792063515e-06, "loss": 1.4568, "step": 14285 }, { "epoch": 0.7810505310061097, "grad_norm": 1.3044617176055908, "learning_rate": 2.559428868309377e-06, "loss": 1.3485, "step": 14286 }, { "epoch": 0.7811052034498313, "grad_norm": 1.4517065286636353, "learning_rate": 2.558208105870531e-06, "loss": 1.4101, "step": 14287 }, { "epoch": 0.7811598758935527, "grad_norm": 1.6463978290557861, "learning_rate": 2.5569875919305777e-06, "loss": 1.3696, "step": 14288 }, { "epoch": 0.7812145483372743, "grad_norm": 1.776179313659668, "learning_rate": 2.555767326530276e-06, "loss": 1.3097, "step": 14289 }, { "epoch": 0.7812692207809958, "grad_norm": 1.6960777044296265, "learning_rate": 2.5545473097103725e-06, "loss": 1.4844, "step": 14290 }, { "epoch": 0.7813238932247174, "grad_norm": 1.483888030052185, "learning_rate": 2.553327541511602e-06, "loss": 1.3214, "step": 14291 }, { "epoch": 0.781378565668439, "grad_norm": 1.395814299583435, "learning_rate": 2.552108021974703e-06, "loss": 1.486, "step": 14292 }, { "epoch": 0.7814332381121605, "grad_norm": 1.6023424863815308, "learning_rate": 2.5508887511403936e-06, "loss": 1.3146, "step": 14293 }, { "epoch": 0.7814879105558821, "grad_norm": 1.6473551988601685, "learning_rate": 2.5496697290493855e-06, "loss": 1.3487, "step": 14294 }, { "epoch": 0.7815425829996037, "grad_norm": 1.956660509109497, "learning_rate": 2.548450955742391e-06, "loss": 1.6614, "step": 14295 }, { "epoch": 0.7815972554433251, "grad_norm": 1.2880895137786865, "learning_rate": 2.5472324312601017e-06, "loss": 1.5036, "step": 14296 }, { "epoch": 0.7816519278870467, "grad_norm": 1.6049076318740845, "learning_rate": 2.5460141556432127e-06, "loss": 1.2776, "step": 14297 }, { "epoch": 0.7817066003307683, "grad_norm": 2.09953236579895, "learning_rate": 2.544796128932403e-06, "loss": 1.524, "step": 14298 }, { "epoch": 0.7817612727744898, "grad_norm": 1.440183162689209, "learning_rate": 2.5435783511683444e-06, "loss": 1.3699, "step": 14299 }, { "epoch": 0.7818159452182114, "grad_norm": 1.3818849325180054, "learning_rate": 2.542360822391702e-06, "loss": 1.2838, "step": 14300 }, { "epoch": 0.781870617661933, "grad_norm": 1.7127238512039185, "learning_rate": 2.541143542643132e-06, "loss": 1.2501, "step": 14301 }, { "epoch": 0.7819252901056545, "grad_norm": 2.25644588470459, "learning_rate": 2.539926511963278e-06, "loss": 1.4418, "step": 14302 }, { "epoch": 0.7819799625493761, "grad_norm": 1.2927086353302002, "learning_rate": 2.5387097303927864e-06, "loss": 1.5986, "step": 14303 }, { "epoch": 0.7820346349930976, "grad_norm": 1.8889024257659912, "learning_rate": 2.5374931979722863e-06, "loss": 1.2486, "step": 14304 }, { "epoch": 0.7820893074368191, "grad_norm": 1.6802107095718384, "learning_rate": 2.536276914742395e-06, "loss": 1.1948, "step": 14305 }, { "epoch": 0.7821439798805407, "grad_norm": 1.5311671495437622, "learning_rate": 2.535060880743736e-06, "loss": 1.3761, "step": 14306 }, { "epoch": 0.7821986523242622, "grad_norm": 1.5143561363220215, "learning_rate": 2.5338450960169105e-06, "loss": 1.5306, "step": 14307 }, { "epoch": 0.7822533247679838, "grad_norm": 1.7681277990341187, "learning_rate": 2.532629560602514e-06, "loss": 1.3966, "step": 14308 }, { "epoch": 0.7823079972117054, "grad_norm": 1.6656593084335327, "learning_rate": 2.531414274541143e-06, "loss": 1.6168, "step": 14309 }, { "epoch": 0.7823626696554269, "grad_norm": 1.6310207843780518, "learning_rate": 2.5301992378733753e-06, "loss": 1.5255, "step": 14310 }, { "epoch": 0.7824173420991485, "grad_norm": 1.404841661453247, "learning_rate": 2.528984450639782e-06, "loss": 1.5426, "step": 14311 }, { "epoch": 0.7824720145428701, "grad_norm": 1.6495234966278076, "learning_rate": 2.5277699128809307e-06, "loss": 1.4167, "step": 14312 }, { "epoch": 0.7825266869865916, "grad_norm": 1.538318395614624, "learning_rate": 2.5265556246373724e-06, "loss": 1.3527, "step": 14313 }, { "epoch": 0.7825813594303132, "grad_norm": 1.3354932069778442, "learning_rate": 2.525341585949662e-06, "loss": 1.4625, "step": 14314 }, { "epoch": 0.7826360318740347, "grad_norm": 1.397013783454895, "learning_rate": 2.5241277968583355e-06, "loss": 1.4435, "step": 14315 }, { "epoch": 0.7826907043177562, "grad_norm": 1.6792548894882202, "learning_rate": 2.5229142574039224e-06, "loss": 1.4587, "step": 14316 }, { "epoch": 0.7827453767614778, "grad_norm": 1.8692845106124878, "learning_rate": 2.52170096762695e-06, "loss": 1.6247, "step": 14317 }, { "epoch": 0.7828000492051993, "grad_norm": 1.290311574935913, "learning_rate": 2.5204879275679307e-06, "loss": 1.5117, "step": 14318 }, { "epoch": 0.7828547216489209, "grad_norm": 1.517314076423645, "learning_rate": 2.5192751372673673e-06, "loss": 1.2773, "step": 14319 }, { "epoch": 0.7829093940926425, "grad_norm": 1.7253949642181396, "learning_rate": 2.5180625967657647e-06, "loss": 1.4054, "step": 14320 }, { "epoch": 0.782964066536364, "grad_norm": 1.5131502151489258, "learning_rate": 2.5168503061036086e-06, "loss": 1.4632, "step": 14321 }, { "epoch": 0.7830187389800856, "grad_norm": 1.275309681892395, "learning_rate": 2.5156382653213786e-06, "loss": 1.4477, "step": 14322 }, { "epoch": 0.7830734114238072, "grad_norm": 1.8501379489898682, "learning_rate": 2.5144264744595515e-06, "loss": 1.2184, "step": 14323 }, { "epoch": 0.7831280838675286, "grad_norm": 1.4395029544830322, "learning_rate": 2.5132149335585896e-06, "loss": 1.3726, "step": 14324 }, { "epoch": 0.7831827563112502, "grad_norm": 1.2715471982955933, "learning_rate": 2.51200364265895e-06, "loss": 1.4062, "step": 14325 }, { "epoch": 0.7832374287549718, "grad_norm": 1.4521082639694214, "learning_rate": 2.5107926018010796e-06, "loss": 1.268, "step": 14326 }, { "epoch": 0.7832921011986933, "grad_norm": 1.4125211238861084, "learning_rate": 2.5095818110254155e-06, "loss": 1.3077, "step": 14327 }, { "epoch": 0.7833467736424149, "grad_norm": 1.2308874130249023, "learning_rate": 2.5083712703723952e-06, "loss": 1.5716, "step": 14328 }, { "epoch": 0.7834014460861365, "grad_norm": 1.8164981603622437, "learning_rate": 2.507160979882436e-06, "loss": 1.3033, "step": 14329 }, { "epoch": 0.783456118529858, "grad_norm": 1.3559166193008423, "learning_rate": 2.5059509395959523e-06, "loss": 1.2854, "step": 14330 }, { "epoch": 0.7835107909735796, "grad_norm": 1.5904029607772827, "learning_rate": 2.5047411495533556e-06, "loss": 1.3033, "step": 14331 }, { "epoch": 0.783565463417301, "grad_norm": 1.8146862983703613, "learning_rate": 2.503531609795039e-06, "loss": 1.4347, "step": 14332 }, { "epoch": 0.7836201358610226, "grad_norm": 1.206821322441101, "learning_rate": 2.502322320361391e-06, "loss": 1.5386, "step": 14333 }, { "epoch": 0.7836748083047442, "grad_norm": 1.358331561088562, "learning_rate": 2.5011132812927963e-06, "loss": 1.2613, "step": 14334 }, { "epoch": 0.7837294807484657, "grad_norm": 1.4039099216461182, "learning_rate": 2.499904492629627e-06, "loss": 1.5625, "step": 14335 }, { "epoch": 0.7837841531921873, "grad_norm": 1.2203456163406372, "learning_rate": 2.4986959544122423e-06, "loss": 1.4039, "step": 14336 }, { "epoch": 0.7838388256359089, "grad_norm": 1.4022629261016846, "learning_rate": 2.4974876666810053e-06, "loss": 1.646, "step": 14337 }, { "epoch": 0.7838934980796304, "grad_norm": 1.6292685270309448, "learning_rate": 2.4962796294762615e-06, "loss": 1.3541, "step": 14338 }, { "epoch": 0.783948170523352, "grad_norm": 1.538772702217102, "learning_rate": 2.495071842838348e-06, "loss": 1.1984, "step": 14339 }, { "epoch": 0.7840028429670736, "grad_norm": 2.04923152923584, "learning_rate": 2.4938643068075962e-06, "loss": 1.2018, "step": 14340 }, { "epoch": 0.784057515410795, "grad_norm": 1.6818040609359741, "learning_rate": 2.4926570214243264e-06, "loss": 1.3969, "step": 14341 }, { "epoch": 0.7841121878545166, "grad_norm": 1.574825644493103, "learning_rate": 2.4914499867288577e-06, "loss": 1.4334, "step": 14342 }, { "epoch": 0.7841668602982382, "grad_norm": 1.7807246446609497, "learning_rate": 2.4902432027614933e-06, "loss": 1.6334, "step": 14343 }, { "epoch": 0.7842215327419597, "grad_norm": 1.8496071100234985, "learning_rate": 2.489036669562528e-06, "loss": 1.3573, "step": 14344 }, { "epoch": 0.7842762051856813, "grad_norm": 1.4458708763122559, "learning_rate": 2.4878303871722564e-06, "loss": 1.5775, "step": 14345 }, { "epoch": 0.7843308776294028, "grad_norm": 2.053234815597534, "learning_rate": 2.4866243556309557e-06, "loss": 1.3071, "step": 14346 }, { "epoch": 0.7843855500731244, "grad_norm": 1.4540295600891113, "learning_rate": 2.485418574978895e-06, "loss": 1.5979, "step": 14347 }, { "epoch": 0.784440222516846, "grad_norm": 1.4121414422988892, "learning_rate": 2.4842130452563453e-06, "loss": 1.4633, "step": 14348 }, { "epoch": 0.7844948949605675, "grad_norm": 1.5035182237625122, "learning_rate": 2.483007766503558e-06, "loss": 1.1615, "step": 14349 }, { "epoch": 0.784549567404289, "grad_norm": 1.4692165851593018, "learning_rate": 2.4818027387607814e-06, "loss": 1.3647, "step": 14350 }, { "epoch": 0.7846042398480106, "grad_norm": 1.4380232095718384, "learning_rate": 2.480597962068252e-06, "loss": 1.6077, "step": 14351 }, { "epoch": 0.7846589122917321, "grad_norm": 1.7171542644500732, "learning_rate": 2.479393436466202e-06, "loss": 1.157, "step": 14352 }, { "epoch": 0.7847135847354537, "grad_norm": 1.2672233581542969, "learning_rate": 2.4781891619948506e-06, "loss": 1.4741, "step": 14353 }, { "epoch": 0.7847682571791753, "grad_norm": 1.4525134563446045, "learning_rate": 2.4769851386944157e-06, "loss": 1.4763, "step": 14354 }, { "epoch": 0.7848229296228968, "grad_norm": 1.797775149345398, "learning_rate": 2.475781366605098e-06, "loss": 1.2371, "step": 14355 }, { "epoch": 0.7848776020666184, "grad_norm": 1.602307677268982, "learning_rate": 2.474577845767099e-06, "loss": 1.5406, "step": 14356 }, { "epoch": 0.78493227451034, "grad_norm": 1.4178353548049927, "learning_rate": 2.4733745762206042e-06, "loss": 1.5225, "step": 14357 }, { "epoch": 0.7849869469540615, "grad_norm": 1.3418866395950317, "learning_rate": 2.4721715580057926e-06, "loss": 1.477, "step": 14358 }, { "epoch": 0.785041619397783, "grad_norm": 1.3547921180725098, "learning_rate": 2.470968791162839e-06, "loss": 1.6391, "step": 14359 }, { "epoch": 0.7850962918415045, "grad_norm": 1.80789315700531, "learning_rate": 2.4697662757319053e-06, "loss": 1.3763, "step": 14360 }, { "epoch": 0.7851509642852261, "grad_norm": 1.9936504364013672, "learning_rate": 2.4685640117531427e-06, "loss": 1.2576, "step": 14361 }, { "epoch": 0.7852056367289477, "grad_norm": 1.870713710784912, "learning_rate": 2.467361999266704e-06, "loss": 1.3659, "step": 14362 }, { "epoch": 0.7852603091726692, "grad_norm": 1.679748296737671, "learning_rate": 2.4661602383127235e-06, "loss": 1.261, "step": 14363 }, { "epoch": 0.7853149816163908, "grad_norm": 1.5527557134628296, "learning_rate": 2.4649587289313325e-06, "loss": 1.6825, "step": 14364 }, { "epoch": 0.7853696540601124, "grad_norm": 1.7121535539627075, "learning_rate": 2.46375747116265e-06, "loss": 1.4134, "step": 14365 }, { "epoch": 0.7854243265038339, "grad_norm": 1.5345141887664795, "learning_rate": 2.4625564650467904e-06, "loss": 1.1656, "step": 14366 }, { "epoch": 0.7854789989475555, "grad_norm": 2.0826609134674072, "learning_rate": 2.461355710623855e-06, "loss": 1.1634, "step": 14367 }, { "epoch": 0.785533671391277, "grad_norm": 2.4275710582733154, "learning_rate": 2.4601552079339453e-06, "loss": 1.2193, "step": 14368 }, { "epoch": 0.7855883438349985, "grad_norm": 1.3355960845947266, "learning_rate": 2.4589549570171423e-06, "loss": 1.2747, "step": 14369 }, { "epoch": 0.7856430162787201, "grad_norm": 1.7854524850845337, "learning_rate": 2.4577549579135318e-06, "loss": 1.278, "step": 14370 }, { "epoch": 0.7856976887224417, "grad_norm": 1.4893159866333008, "learning_rate": 2.456555210663183e-06, "loss": 1.485, "step": 14371 }, { "epoch": 0.7857523611661632, "grad_norm": 1.8036621809005737, "learning_rate": 2.4553557153061527e-06, "loss": 1.4259, "step": 14372 }, { "epoch": 0.7858070336098848, "grad_norm": 2.1746416091918945, "learning_rate": 2.4541564718825028e-06, "loss": 1.4047, "step": 14373 }, { "epoch": 0.7858617060536063, "grad_norm": 1.6000133752822876, "learning_rate": 2.4529574804322744e-06, "loss": 1.4211, "step": 14374 }, { "epoch": 0.7859163784973279, "grad_norm": 1.6513097286224365, "learning_rate": 2.4517587409955036e-06, "loss": 1.3262, "step": 14375 }, { "epoch": 0.7859710509410495, "grad_norm": 1.33161461353302, "learning_rate": 2.450560253612223e-06, "loss": 1.5686, "step": 14376 }, { "epoch": 0.786025723384771, "grad_norm": 1.5329455137252808, "learning_rate": 2.449362018322451e-06, "loss": 1.3307, "step": 14377 }, { "epoch": 0.7860803958284925, "grad_norm": 1.5831124782562256, "learning_rate": 2.4481640351661995e-06, "loss": 1.2499, "step": 14378 }, { "epoch": 0.7861350682722141, "grad_norm": 1.5341092348098755, "learning_rate": 2.4469663041834713e-06, "loss": 1.5222, "step": 14379 }, { "epoch": 0.7861897407159356, "grad_norm": 1.6137449741363525, "learning_rate": 2.445768825414263e-06, "loss": 1.5718, "step": 14380 }, { "epoch": 0.7862444131596572, "grad_norm": 1.7689378261566162, "learning_rate": 2.4445715988985562e-06, "loss": 1.657, "step": 14381 }, { "epoch": 0.7862990856033788, "grad_norm": 1.6993921995162964, "learning_rate": 2.443374624676337e-06, "loss": 1.3571, "step": 14382 }, { "epoch": 0.7863537580471003, "grad_norm": 1.446311354637146, "learning_rate": 2.4421779027875668e-06, "loss": 1.5724, "step": 14383 }, { "epoch": 0.7864084304908219, "grad_norm": 1.6986141204833984, "learning_rate": 2.440981433272216e-06, "loss": 1.6514, "step": 14384 }, { "epoch": 0.7864631029345435, "grad_norm": 1.5986829996109009, "learning_rate": 2.4397852161702317e-06, "loss": 1.4437, "step": 14385 }, { "epoch": 0.786517775378265, "grad_norm": 1.282852053642273, "learning_rate": 2.4385892515215583e-06, "loss": 1.2828, "step": 14386 }, { "epoch": 0.7865724478219865, "grad_norm": 1.5894067287445068, "learning_rate": 2.437393539366134e-06, "loss": 1.5948, "step": 14387 }, { "epoch": 0.786627120265708, "grad_norm": 1.874026894569397, "learning_rate": 2.4361980797438868e-06, "loss": 1.5999, "step": 14388 }, { "epoch": 0.7866817927094296, "grad_norm": 1.060943603515625, "learning_rate": 2.435002872694735e-06, "loss": 1.5922, "step": 14389 }, { "epoch": 0.7867364651531512, "grad_norm": 2.196265935897827, "learning_rate": 2.433807918258588e-06, "loss": 1.3147, "step": 14390 }, { "epoch": 0.7867911375968727, "grad_norm": 1.544533133506775, "learning_rate": 2.43261321647535e-06, "loss": 1.335, "step": 14391 }, { "epoch": 0.7868458100405943, "grad_norm": 1.2028939723968506, "learning_rate": 2.4314187673849122e-06, "loss": 1.5117, "step": 14392 }, { "epoch": 0.7869004824843159, "grad_norm": 1.153672456741333, "learning_rate": 2.4302245710271634e-06, "loss": 1.4638, "step": 14393 }, { "epoch": 0.7869551549280374, "grad_norm": 1.5643953084945679, "learning_rate": 2.4290306274419794e-06, "loss": 1.2939, "step": 14394 }, { "epoch": 0.787009827371759, "grad_norm": 1.517522931098938, "learning_rate": 2.4278369366692268e-06, "loss": 1.4477, "step": 14395 }, { "epoch": 0.7870644998154805, "grad_norm": 1.3546706438064575, "learning_rate": 2.4266434987487697e-06, "loss": 1.3679, "step": 14396 }, { "epoch": 0.787119172259202, "grad_norm": 1.5459636449813843, "learning_rate": 2.4254503137204544e-06, "loss": 1.3885, "step": 14397 }, { "epoch": 0.7871738447029236, "grad_norm": 2.0630459785461426, "learning_rate": 2.42425738162413e-06, "loss": 1.4941, "step": 14398 }, { "epoch": 0.7872285171466452, "grad_norm": 1.7740092277526855, "learning_rate": 2.423064702499629e-06, "loss": 1.2068, "step": 14399 }, { "epoch": 0.7872831895903667, "grad_norm": 1.534175992012024, "learning_rate": 2.4218722763867754e-06, "loss": 1.3198, "step": 14400 }, { "epoch": 0.7873378620340883, "grad_norm": 1.3468799591064453, "learning_rate": 2.4206801033253914e-06, "loss": 1.4316, "step": 14401 }, { "epoch": 0.7873925344778098, "grad_norm": 2.1107993125915527, "learning_rate": 2.419488183355284e-06, "loss": 1.3884, "step": 14402 }, { "epoch": 0.7874472069215314, "grad_norm": 1.8777209520339966, "learning_rate": 2.418296516516254e-06, "loss": 1.4416, "step": 14403 }, { "epoch": 0.787501879365253, "grad_norm": 1.8957847356796265, "learning_rate": 2.4171051028480953e-06, "loss": 1.4466, "step": 14404 }, { "epoch": 0.7875565518089744, "grad_norm": 1.2577866315841675, "learning_rate": 2.4159139423905898e-06, "loss": 1.4527, "step": 14405 }, { "epoch": 0.787611224252696, "grad_norm": 1.5304925441741943, "learning_rate": 2.414723035183513e-06, "loss": 1.3436, "step": 14406 }, { "epoch": 0.7876658966964176, "grad_norm": 1.5760250091552734, "learning_rate": 2.4135323812666357e-06, "loss": 1.3833, "step": 14407 }, { "epoch": 0.7877205691401391, "grad_norm": 1.8443424701690674, "learning_rate": 2.4123419806797143e-06, "loss": 1.2885, "step": 14408 }, { "epoch": 0.7877752415838607, "grad_norm": 1.627661943435669, "learning_rate": 2.411151833462496e-06, "loss": 1.4528, "step": 14409 }, { "epoch": 0.7878299140275823, "grad_norm": 1.43581223487854, "learning_rate": 2.4099619396547293e-06, "loss": 1.6955, "step": 14410 }, { "epoch": 0.7878845864713038, "grad_norm": 1.9620567560195923, "learning_rate": 2.4087722992961406e-06, "loss": 1.4512, "step": 14411 }, { "epoch": 0.7879392589150254, "grad_norm": 1.769837498664856, "learning_rate": 2.4075829124264606e-06, "loss": 1.3883, "step": 14412 }, { "epoch": 0.787993931358747, "grad_norm": 1.2235286235809326, "learning_rate": 2.406393779085404e-06, "loss": 1.4082, "step": 14413 }, { "epoch": 0.7880486038024684, "grad_norm": 1.924101710319519, "learning_rate": 2.4052048993126754e-06, "loss": 1.3381, "step": 14414 }, { "epoch": 0.78810327624619, "grad_norm": 2.0335981845855713, "learning_rate": 2.4040162731479786e-06, "loss": 1.223, "step": 14415 }, { "epoch": 0.7881579486899115, "grad_norm": 1.4670997858047485, "learning_rate": 2.4028279006310053e-06, "loss": 1.1905, "step": 14416 }, { "epoch": 0.7882126211336331, "grad_norm": 1.2855198383331299, "learning_rate": 2.4016397818014336e-06, "loss": 1.4607, "step": 14417 }, { "epoch": 0.7882672935773547, "grad_norm": 1.7662301063537598, "learning_rate": 2.4004519166989405e-06, "loss": 1.5212, "step": 14418 }, { "epoch": 0.7883219660210762, "grad_norm": 1.3195486068725586, "learning_rate": 2.3992643053631904e-06, "loss": 1.3438, "step": 14419 }, { "epoch": 0.7883766384647978, "grad_norm": 1.2304534912109375, "learning_rate": 2.398076947833838e-06, "loss": 1.4258, "step": 14420 }, { "epoch": 0.7884313109085194, "grad_norm": 1.544570803642273, "learning_rate": 2.3968898441505384e-06, "loss": 1.4805, "step": 14421 }, { "epoch": 0.7884859833522408, "grad_norm": 1.3518378734588623, "learning_rate": 2.3957029943529276e-06, "loss": 1.3625, "step": 14422 }, { "epoch": 0.7885406557959624, "grad_norm": 1.647032618522644, "learning_rate": 2.3945163984806354e-06, "loss": 1.3754, "step": 14423 }, { "epoch": 0.788595328239684, "grad_norm": 1.659212589263916, "learning_rate": 2.39333005657329e-06, "loss": 1.3387, "step": 14424 }, { "epoch": 0.7886500006834055, "grad_norm": 1.5093039274215698, "learning_rate": 2.3921439686705005e-06, "loss": 1.4668, "step": 14425 }, { "epoch": 0.7887046731271271, "grad_norm": 1.551802635192871, "learning_rate": 2.3909581348118803e-06, "loss": 1.3056, "step": 14426 }, { "epoch": 0.7887593455708487, "grad_norm": 1.440335988998413, "learning_rate": 2.389772555037022e-06, "loss": 1.3571, "step": 14427 }, { "epoch": 0.7888140180145702, "grad_norm": 1.4017654657363892, "learning_rate": 2.388587229385516e-06, "loss": 1.3175, "step": 14428 }, { "epoch": 0.7888686904582918, "grad_norm": 1.345873475074768, "learning_rate": 2.3874021578969443e-06, "loss": 1.5565, "step": 14429 }, { "epoch": 0.7889233629020133, "grad_norm": 1.5367021560668945, "learning_rate": 2.3862173406108767e-06, "loss": 1.4156, "step": 14430 }, { "epoch": 0.7889780353457349, "grad_norm": 1.4522329568862915, "learning_rate": 2.3850327775668758e-06, "loss": 1.5133, "step": 14431 }, { "epoch": 0.7890327077894564, "grad_norm": 2.0103015899658203, "learning_rate": 2.3838484688045026e-06, "loss": 1.4981, "step": 14432 }, { "epoch": 0.7890873802331779, "grad_norm": 1.424394130706787, "learning_rate": 2.3826644143633017e-06, "loss": 1.5214, "step": 14433 }, { "epoch": 0.7891420526768995, "grad_norm": 1.5772018432617188, "learning_rate": 2.381480614282807e-06, "loss": 1.3399, "step": 14434 }, { "epoch": 0.7891967251206211, "grad_norm": 1.2426283359527588, "learning_rate": 2.380297068602555e-06, "loss": 1.3693, "step": 14435 }, { "epoch": 0.7892513975643426, "grad_norm": 1.628997564315796, "learning_rate": 2.3791137773620644e-06, "loss": 1.3787, "step": 14436 }, { "epoch": 0.7893060700080642, "grad_norm": 1.7590360641479492, "learning_rate": 2.3779307406008444e-06, "loss": 1.4939, "step": 14437 }, { "epoch": 0.7893607424517858, "grad_norm": 1.7375463247299194, "learning_rate": 2.376747958358405e-06, "loss": 1.2453, "step": 14438 }, { "epoch": 0.7894154148955073, "grad_norm": 1.8072806596755981, "learning_rate": 2.375565430674238e-06, "loss": 1.2004, "step": 14439 }, { "epoch": 0.7894700873392289, "grad_norm": 1.555383324623108, "learning_rate": 2.3743831575878352e-06, "loss": 1.3606, "step": 14440 }, { "epoch": 0.7895247597829504, "grad_norm": 1.436621904373169, "learning_rate": 2.3732011391386724e-06, "loss": 1.5889, "step": 14441 }, { "epoch": 0.7895794322266719, "grad_norm": 1.4584776163101196, "learning_rate": 2.37201937536622e-06, "loss": 1.2904, "step": 14442 }, { "epoch": 0.7896341046703935, "grad_norm": 1.6995240449905396, "learning_rate": 2.370837866309942e-06, "loss": 1.3276, "step": 14443 }, { "epoch": 0.789688777114115, "grad_norm": 1.2833704948425293, "learning_rate": 2.369656612009289e-06, "loss": 1.5313, "step": 14444 }, { "epoch": 0.7897434495578366, "grad_norm": 1.6826555728912354, "learning_rate": 2.3684756125037033e-06, "loss": 1.4766, "step": 14445 }, { "epoch": 0.7897981220015582, "grad_norm": 1.7097491025924683, "learning_rate": 2.367294867832629e-06, "loss": 1.3954, "step": 14446 }, { "epoch": 0.7898527944452797, "grad_norm": 1.5060955286026, "learning_rate": 2.366114378035489e-06, "loss": 1.6575, "step": 14447 }, { "epoch": 0.7899074668890013, "grad_norm": 1.814615249633789, "learning_rate": 2.3649341431517005e-06, "loss": 1.3851, "step": 14448 }, { "epoch": 0.7899621393327229, "grad_norm": 1.3062546253204346, "learning_rate": 2.3637541632206804e-06, "loss": 1.4217, "step": 14449 }, { "epoch": 0.7900168117764443, "grad_norm": 1.771244764328003, "learning_rate": 2.362574438281827e-06, "loss": 1.3714, "step": 14450 }, { "epoch": 0.7900714842201659, "grad_norm": 1.6916409730911255, "learning_rate": 2.361394968374533e-06, "loss": 1.512, "step": 14451 }, { "epoch": 0.7901261566638875, "grad_norm": 1.5429967641830444, "learning_rate": 2.360215753538189e-06, "loss": 1.3192, "step": 14452 }, { "epoch": 0.790180829107609, "grad_norm": 1.3843356370925903, "learning_rate": 2.3590367938121637e-06, "loss": 1.8741, "step": 14453 }, { "epoch": 0.7902355015513306, "grad_norm": 1.5235755443572998, "learning_rate": 2.3578580892358337e-06, "loss": 1.4202, "step": 14454 }, { "epoch": 0.7902901739950522, "grad_norm": 1.8724192380905151, "learning_rate": 2.356679639848555e-06, "loss": 1.2612, "step": 14455 }, { "epoch": 0.7903448464387737, "grad_norm": 1.6724611520767212, "learning_rate": 2.3555014456896786e-06, "loss": 1.3097, "step": 14456 }, { "epoch": 0.7903995188824953, "grad_norm": 1.6865472793579102, "learning_rate": 2.354323506798547e-06, "loss": 1.3176, "step": 14457 }, { "epoch": 0.7904541913262167, "grad_norm": 2.319700002670288, "learning_rate": 2.3531458232144953e-06, "loss": 1.3289, "step": 14458 }, { "epoch": 0.7905088637699383, "grad_norm": 1.9091438055038452, "learning_rate": 2.351968394976846e-06, "loss": 1.4936, "step": 14459 }, { "epoch": 0.7905635362136599, "grad_norm": 1.8976317644119263, "learning_rate": 2.3507912221249206e-06, "loss": 1.5325, "step": 14460 }, { "epoch": 0.7906182086573814, "grad_norm": 1.2974464893341064, "learning_rate": 2.3496143046980256e-06, "loss": 1.5037, "step": 14461 }, { "epoch": 0.790672881101103, "grad_norm": 3.561682939529419, "learning_rate": 2.348437642735458e-06, "loss": 1.1549, "step": 14462 }, { "epoch": 0.7907275535448246, "grad_norm": 1.5368492603302002, "learning_rate": 2.347261236276517e-06, "loss": 1.6983, "step": 14463 }, { "epoch": 0.7907822259885461, "grad_norm": 1.4464761018753052, "learning_rate": 2.34608508536048e-06, "loss": 1.4246, "step": 14464 }, { "epoch": 0.7908368984322677, "grad_norm": 1.292886734008789, "learning_rate": 2.3449091900266196e-06, "loss": 1.3422, "step": 14465 }, { "epoch": 0.7908915708759893, "grad_norm": 1.7452436685562134, "learning_rate": 2.3437335503142065e-06, "loss": 1.4522, "step": 14466 }, { "epoch": 0.7909462433197108, "grad_norm": 1.6786590814590454, "learning_rate": 2.3425581662624975e-06, "loss": 1.242, "step": 14467 }, { "epoch": 0.7910009157634323, "grad_norm": 1.222509503364563, "learning_rate": 2.3413830379107395e-06, "loss": 1.5237, "step": 14468 }, { "epoch": 0.7910555882071539, "grad_norm": 1.5425969362258911, "learning_rate": 2.3402081652981733e-06, "loss": 1.6477, "step": 14469 }, { "epoch": 0.7911102606508754, "grad_norm": 1.7548938989639282, "learning_rate": 2.339033548464028e-06, "loss": 1.2831, "step": 14470 }, { "epoch": 0.791164933094597, "grad_norm": 1.3259456157684326, "learning_rate": 2.337859187447533e-06, "loss": 1.5584, "step": 14471 }, { "epoch": 0.7912196055383186, "grad_norm": 1.8993898630142212, "learning_rate": 2.3366850822878996e-06, "loss": 1.1728, "step": 14472 }, { "epoch": 0.7912742779820401, "grad_norm": 1.2029780149459839, "learning_rate": 2.33551123302433e-06, "loss": 1.488, "step": 14473 }, { "epoch": 0.7913289504257617, "grad_norm": 1.8533052206039429, "learning_rate": 2.3343376396960282e-06, "loss": 1.4346, "step": 14474 }, { "epoch": 0.7913836228694832, "grad_norm": 1.509109377861023, "learning_rate": 2.3331643023421813e-06, "loss": 1.4472, "step": 14475 }, { "epoch": 0.7914382953132048, "grad_norm": 1.374616026878357, "learning_rate": 2.331991221001967e-06, "loss": 1.4743, "step": 14476 }, { "epoch": 0.7914929677569263, "grad_norm": 1.254118800163269, "learning_rate": 2.3308183957145613e-06, "loss": 1.1088, "step": 14477 }, { "epoch": 0.7915476402006478, "grad_norm": 1.7804540395736694, "learning_rate": 2.329645826519126e-06, "loss": 1.4945, "step": 14478 }, { "epoch": 0.7916023126443694, "grad_norm": 1.9789938926696777, "learning_rate": 2.328473513454812e-06, "loss": 1.2644, "step": 14479 }, { "epoch": 0.791656985088091, "grad_norm": 1.1307673454284668, "learning_rate": 2.3273014565607734e-06, "loss": 1.6425, "step": 14480 }, { "epoch": 0.7917116575318125, "grad_norm": 1.8187443017959595, "learning_rate": 2.3261296558761427e-06, "loss": 1.2791, "step": 14481 }, { "epoch": 0.7917663299755341, "grad_norm": 1.5764691829681396, "learning_rate": 2.324958111440051e-06, "loss": 1.3004, "step": 14482 }, { "epoch": 0.7918210024192557, "grad_norm": 1.716589331626892, "learning_rate": 2.323786823291617e-06, "loss": 1.1313, "step": 14483 }, { "epoch": 0.7918756748629772, "grad_norm": 1.922706961631775, "learning_rate": 2.322615791469951e-06, "loss": 1.3645, "step": 14484 }, { "epoch": 0.7919303473066988, "grad_norm": 1.728736162185669, "learning_rate": 2.321445016014162e-06, "loss": 1.2563, "step": 14485 }, { "epoch": 0.7919850197504203, "grad_norm": 1.7229167222976685, "learning_rate": 2.3202744969633427e-06, "loss": 1.3319, "step": 14486 }, { "epoch": 0.7920396921941418, "grad_norm": 1.3764337301254272, "learning_rate": 2.319104234356576e-06, "loss": 1.4749, "step": 14487 }, { "epoch": 0.7920943646378634, "grad_norm": 2.4839866161346436, "learning_rate": 2.3179342282329463e-06, "loss": 1.5449, "step": 14488 }, { "epoch": 0.7921490370815849, "grad_norm": 1.8201745748519897, "learning_rate": 2.316764478631518e-06, "loss": 1.2915, "step": 14489 }, { "epoch": 0.7922037095253065, "grad_norm": 1.337091326713562, "learning_rate": 2.3155949855913516e-06, "loss": 1.356, "step": 14490 }, { "epoch": 0.7922583819690281, "grad_norm": 1.858113169670105, "learning_rate": 2.314425749151502e-06, "loss": 1.3432, "step": 14491 }, { "epoch": 0.7923130544127496, "grad_norm": 1.6462651491165161, "learning_rate": 2.3132567693510123e-06, "loss": 1.4393, "step": 14492 }, { "epoch": 0.7923677268564712, "grad_norm": 1.5260154008865356, "learning_rate": 2.3120880462289165e-06, "loss": 1.3121, "step": 14493 }, { "epoch": 0.7924223993001928, "grad_norm": 1.779125690460205, "learning_rate": 2.310919579824241e-06, "loss": 1.3967, "step": 14494 }, { "epoch": 0.7924770717439142, "grad_norm": 1.5400913953781128, "learning_rate": 2.309751370176001e-06, "loss": 1.2123, "step": 14495 }, { "epoch": 0.7925317441876358, "grad_norm": 1.534110188484192, "learning_rate": 2.30858341732321e-06, "loss": 1.3353, "step": 14496 }, { "epoch": 0.7925864166313574, "grad_norm": 1.363612413406372, "learning_rate": 2.3074157213048686e-06, "loss": 1.3804, "step": 14497 }, { "epoch": 0.7926410890750789, "grad_norm": 1.5106351375579834, "learning_rate": 2.306248282159965e-06, "loss": 1.3759, "step": 14498 }, { "epoch": 0.7926957615188005, "grad_norm": 1.4985545873641968, "learning_rate": 2.3050810999274874e-06, "loss": 1.2386, "step": 14499 }, { "epoch": 0.7927504339625221, "grad_norm": 1.7855212688446045, "learning_rate": 2.303914174646409e-06, "loss": 1.4557, "step": 14500 }, { "epoch": 0.7928051064062436, "grad_norm": 1.3934684991836548, "learning_rate": 2.3027475063556913e-06, "loss": 1.4204, "step": 14501 }, { "epoch": 0.7928597788499652, "grad_norm": 1.5463507175445557, "learning_rate": 2.301581095094301e-06, "loss": 1.5105, "step": 14502 }, { "epoch": 0.7929144512936867, "grad_norm": 1.461025595664978, "learning_rate": 2.300414940901182e-06, "loss": 1.4878, "step": 14503 }, { "epoch": 0.7929691237374082, "grad_norm": 1.4156100749969482, "learning_rate": 2.2992490438152735e-06, "loss": 1.4177, "step": 14504 }, { "epoch": 0.7930237961811298, "grad_norm": 1.469057559967041, "learning_rate": 2.298083403875513e-06, "loss": 1.539, "step": 14505 }, { "epoch": 0.7930784686248513, "grad_norm": 1.4668606519699097, "learning_rate": 2.2969180211208195e-06, "loss": 1.3651, "step": 14506 }, { "epoch": 0.7931331410685729, "grad_norm": 1.59121572971344, "learning_rate": 2.2957528955901097e-06, "loss": 1.3971, "step": 14507 }, { "epoch": 0.7931878135122945, "grad_norm": 1.7307279109954834, "learning_rate": 2.29458802732229e-06, "loss": 1.5015, "step": 14508 }, { "epoch": 0.793242485956016, "grad_norm": 1.784677505493164, "learning_rate": 2.293423416356254e-06, "loss": 1.5964, "step": 14509 }, { "epoch": 0.7932971583997376, "grad_norm": 1.3916009664535522, "learning_rate": 2.292259062730897e-06, "loss": 1.5763, "step": 14510 }, { "epoch": 0.7933518308434592, "grad_norm": 1.6441783905029297, "learning_rate": 2.2910949664850967e-06, "loss": 1.3264, "step": 14511 }, { "epoch": 0.7934065032871807, "grad_norm": 1.3153916597366333, "learning_rate": 2.2899311276577217e-06, "loss": 1.3889, "step": 14512 }, { "epoch": 0.7934611757309022, "grad_norm": 1.6222894191741943, "learning_rate": 2.2887675462876425e-06, "loss": 1.3982, "step": 14513 }, { "epoch": 0.7935158481746238, "grad_norm": 1.6473207473754883, "learning_rate": 2.2876042224137085e-06, "loss": 1.3657, "step": 14514 }, { "epoch": 0.7935705206183453, "grad_norm": 2.048691749572754, "learning_rate": 2.2864411560747655e-06, "loss": 1.362, "step": 14515 }, { "epoch": 0.7936251930620669, "grad_norm": 1.4247218370437622, "learning_rate": 2.285278347309655e-06, "loss": 1.4085, "step": 14516 }, { "epoch": 0.7936798655057884, "grad_norm": 1.5732970237731934, "learning_rate": 2.2841157961572034e-06, "loss": 1.4159, "step": 14517 }, { "epoch": 0.79373453794951, "grad_norm": 1.9670603275299072, "learning_rate": 2.2829535026562287e-06, "loss": 1.264, "step": 14518 }, { "epoch": 0.7937892103932316, "grad_norm": 1.326433777809143, "learning_rate": 2.2817914668455486e-06, "loss": 1.5306, "step": 14519 }, { "epoch": 0.7938438828369531, "grad_norm": 1.5689692497253418, "learning_rate": 2.2806296887639622e-06, "loss": 1.4762, "step": 14520 }, { "epoch": 0.7938985552806747, "grad_norm": 1.11900794506073, "learning_rate": 2.279468168450265e-06, "loss": 1.795, "step": 14521 }, { "epoch": 0.7939532277243962, "grad_norm": 1.8260880708694458, "learning_rate": 2.2783069059432417e-06, "loss": 1.4286, "step": 14522 }, { "epoch": 0.7940079001681177, "grad_norm": 1.5583690404891968, "learning_rate": 2.277145901281668e-06, "loss": 1.3902, "step": 14523 }, { "epoch": 0.7940625726118393, "grad_norm": 1.38956618309021, "learning_rate": 2.2759851545043175e-06, "loss": 1.4246, "step": 14524 }, { "epoch": 0.7941172450555609, "grad_norm": 2.2514984607696533, "learning_rate": 2.2748246656499485e-06, "loss": 1.3167, "step": 14525 }, { "epoch": 0.7941719174992824, "grad_norm": 1.4304192066192627, "learning_rate": 2.273664434757308e-06, "loss": 1.3689, "step": 14526 }, { "epoch": 0.794226589943004, "grad_norm": 1.3813211917877197, "learning_rate": 2.272504461865145e-06, "loss": 1.3044, "step": 14527 }, { "epoch": 0.7942812623867256, "grad_norm": 1.602142572402954, "learning_rate": 2.2713447470121917e-06, "loss": 1.4087, "step": 14528 }, { "epoch": 0.7943359348304471, "grad_norm": 1.4213275909423828, "learning_rate": 2.27018529023717e-06, "loss": 1.4597, "step": 14529 }, { "epoch": 0.7943906072741687, "grad_norm": 1.5238922834396362, "learning_rate": 2.269026091578803e-06, "loss": 1.4246, "step": 14530 }, { "epoch": 0.7944452797178901, "grad_norm": 1.4782887697219849, "learning_rate": 2.2678671510757953e-06, "loss": 1.4418, "step": 14531 }, { "epoch": 0.7944999521616117, "grad_norm": 1.418144702911377, "learning_rate": 2.266708468766848e-06, "loss": 1.5065, "step": 14532 }, { "epoch": 0.7945546246053333, "grad_norm": 2.2569875717163086, "learning_rate": 2.265550044690653e-06, "loss": 1.2168, "step": 14533 }, { "epoch": 0.7946092970490548, "grad_norm": 1.631178617477417, "learning_rate": 2.26439187888589e-06, "loss": 1.38, "step": 14534 }, { "epoch": 0.7946639694927764, "grad_norm": 1.9533450603485107, "learning_rate": 2.263233971391232e-06, "loss": 1.2913, "step": 14535 }, { "epoch": 0.794718641936498, "grad_norm": 2.3968851566314697, "learning_rate": 2.262076322245349e-06, "loss": 1.1837, "step": 14536 }, { "epoch": 0.7947733143802195, "grad_norm": 3.0035388469696045, "learning_rate": 2.2609189314868927e-06, "loss": 1.0448, "step": 14537 }, { "epoch": 0.7948279868239411, "grad_norm": 1.5791468620300293, "learning_rate": 2.259761799154516e-06, "loss": 1.2235, "step": 14538 }, { "epoch": 0.7948826592676627, "grad_norm": 1.8563108444213867, "learning_rate": 2.258604925286857e-06, "loss": 1.5462, "step": 14539 }, { "epoch": 0.7949373317113841, "grad_norm": 1.7922688722610474, "learning_rate": 2.257448309922542e-06, "loss": 1.5458, "step": 14540 }, { "epoch": 0.7949920041551057, "grad_norm": 1.7215832471847534, "learning_rate": 2.2562919531001983e-06, "loss": 1.4747, "step": 14541 }, { "epoch": 0.7950466765988273, "grad_norm": 1.9837696552276611, "learning_rate": 2.255135854858438e-06, "loss": 1.3717, "step": 14542 }, { "epoch": 0.7951013490425488, "grad_norm": 1.544701337814331, "learning_rate": 2.2539800152358626e-06, "loss": 1.5904, "step": 14543 }, { "epoch": 0.7951560214862704, "grad_norm": 1.4341254234313965, "learning_rate": 2.252824434271075e-06, "loss": 1.4027, "step": 14544 }, { "epoch": 0.7952106939299919, "grad_norm": 1.2833406925201416, "learning_rate": 2.251669112002657e-06, "loss": 1.3424, "step": 14545 }, { "epoch": 0.7952653663737135, "grad_norm": 1.5666736364364624, "learning_rate": 2.25051404846919e-06, "loss": 1.2001, "step": 14546 }, { "epoch": 0.7953200388174351, "grad_norm": 1.3524889945983887, "learning_rate": 2.249359243709245e-06, "loss": 1.3929, "step": 14547 }, { "epoch": 0.7953747112611566, "grad_norm": 1.5866401195526123, "learning_rate": 2.2482046977613805e-06, "loss": 1.3381, "step": 14548 }, { "epoch": 0.7954293837048781, "grad_norm": 1.4250359535217285, "learning_rate": 2.2470504106641487e-06, "loss": 1.3624, "step": 14549 }, { "epoch": 0.7954840561485997, "grad_norm": 1.5489425659179688, "learning_rate": 2.2458963824561007e-06, "loss": 1.5859, "step": 14550 }, { "epoch": 0.7955387285923212, "grad_norm": 1.318873643875122, "learning_rate": 2.244742613175764e-06, "loss": 1.5393, "step": 14551 }, { "epoch": 0.7955934010360428, "grad_norm": 1.5872694253921509, "learning_rate": 2.243589102861673e-06, "loss": 1.6832, "step": 14552 }, { "epoch": 0.7956480734797644, "grad_norm": 1.6723113059997559, "learning_rate": 2.2424358515523426e-06, "loss": 1.2165, "step": 14553 }, { "epoch": 0.7957027459234859, "grad_norm": 1.7362900972366333, "learning_rate": 2.24128285928628e-06, "loss": 1.4612, "step": 14554 }, { "epoch": 0.7957574183672075, "grad_norm": 1.384545922279358, "learning_rate": 2.2401301261019927e-06, "loss": 1.3334, "step": 14555 }, { "epoch": 0.7958120908109291, "grad_norm": 1.6884698867797852, "learning_rate": 2.238977652037969e-06, "loss": 1.5712, "step": 14556 }, { "epoch": 0.7958667632546506, "grad_norm": 1.5582749843597412, "learning_rate": 2.23782543713269e-06, "loss": 1.6202, "step": 14557 }, { "epoch": 0.7959214356983721, "grad_norm": 1.5929900407791138, "learning_rate": 2.2366734814246383e-06, "loss": 1.2551, "step": 14558 }, { "epoch": 0.7959761081420936, "grad_norm": 1.5260816812515259, "learning_rate": 2.235521784952275e-06, "loss": 1.5994, "step": 14559 }, { "epoch": 0.7960307805858152, "grad_norm": 1.8469150066375732, "learning_rate": 2.2343703477540603e-06, "loss": 1.4818, "step": 14560 }, { "epoch": 0.7960854530295368, "grad_norm": 1.888213038444519, "learning_rate": 2.2332191698684413e-06, "loss": 1.1722, "step": 14561 }, { "epoch": 0.7961401254732583, "grad_norm": 1.4056904315948486, "learning_rate": 2.23206825133386e-06, "loss": 1.3894, "step": 14562 }, { "epoch": 0.7961947979169799, "grad_norm": 1.8506566286087036, "learning_rate": 2.2309175921887447e-06, "loss": 1.4029, "step": 14563 }, { "epoch": 0.7962494703607015, "grad_norm": 1.6067049503326416, "learning_rate": 2.229767192471525e-06, "loss": 1.4672, "step": 14564 }, { "epoch": 0.796304142804423, "grad_norm": 1.6382850408554077, "learning_rate": 2.2286170522206086e-06, "loss": 1.3819, "step": 14565 }, { "epoch": 0.7963588152481446, "grad_norm": 1.465841293334961, "learning_rate": 2.227467171474409e-06, "loss": 1.5921, "step": 14566 }, { "epoch": 0.7964134876918662, "grad_norm": 1.8856858015060425, "learning_rate": 2.2263175502713187e-06, "loss": 1.5595, "step": 14567 }, { "epoch": 0.7964681601355876, "grad_norm": 1.6958849430084229, "learning_rate": 2.2251681886497235e-06, "loss": 1.4228, "step": 14568 }, { "epoch": 0.7965228325793092, "grad_norm": 1.1447113752365112, "learning_rate": 2.2240190866480105e-06, "loss": 1.4137, "step": 14569 }, { "epoch": 0.7965775050230308, "grad_norm": 1.8865801095962524, "learning_rate": 2.2228702443045456e-06, "loss": 1.2901, "step": 14570 }, { "epoch": 0.7966321774667523, "grad_norm": 1.4471529722213745, "learning_rate": 2.2217216616576944e-06, "loss": 1.6134, "step": 14571 }, { "epoch": 0.7966868499104739, "grad_norm": 1.6954920291900635, "learning_rate": 2.2205733387458083e-06, "loss": 1.5197, "step": 14572 }, { "epoch": 0.7967415223541954, "grad_norm": 2.0060017108917236, "learning_rate": 2.2194252756072343e-06, "loss": 1.5644, "step": 14573 }, { "epoch": 0.796796194797917, "grad_norm": 1.364967703819275, "learning_rate": 2.218277472280305e-06, "loss": 1.3765, "step": 14574 }, { "epoch": 0.7968508672416386, "grad_norm": 1.305652141571045, "learning_rate": 2.217129928803353e-06, "loss": 1.554, "step": 14575 }, { "epoch": 0.79690553968536, "grad_norm": 1.400315523147583, "learning_rate": 2.215982645214697e-06, "loss": 1.3871, "step": 14576 }, { "epoch": 0.7969602121290816, "grad_norm": 1.2827585935592651, "learning_rate": 2.2148356215526436e-06, "loss": 1.3363, "step": 14577 }, { "epoch": 0.7970148845728032, "grad_norm": 1.2023149728775024, "learning_rate": 2.2136888578554993e-06, "loss": 1.5997, "step": 14578 }, { "epoch": 0.7970695570165247, "grad_norm": 1.285703182220459, "learning_rate": 2.212542354161552e-06, "loss": 1.4909, "step": 14579 }, { "epoch": 0.7971242294602463, "grad_norm": 1.8321266174316406, "learning_rate": 2.2113961105090933e-06, "loss": 1.2449, "step": 14580 }, { "epoch": 0.7971789019039679, "grad_norm": 1.4924595355987549, "learning_rate": 2.210250126936394e-06, "loss": 1.3992, "step": 14581 }, { "epoch": 0.7972335743476894, "grad_norm": 1.5605539083480835, "learning_rate": 2.20910440348172e-06, "loss": 1.3577, "step": 14582 }, { "epoch": 0.797288246791411, "grad_norm": 1.5380969047546387, "learning_rate": 2.2079589401833348e-06, "loss": 1.3313, "step": 14583 }, { "epoch": 0.7973429192351326, "grad_norm": 1.5026339292526245, "learning_rate": 2.206813737079485e-06, "loss": 1.612, "step": 14584 }, { "epoch": 0.797397591678854, "grad_norm": 1.6050442457199097, "learning_rate": 2.2056687942084108e-06, "loss": 1.4474, "step": 14585 }, { "epoch": 0.7974522641225756, "grad_norm": 1.8521637916564941, "learning_rate": 2.2045241116083472e-06, "loss": 1.3017, "step": 14586 }, { "epoch": 0.7975069365662971, "grad_norm": 1.2891929149627686, "learning_rate": 2.2033796893175152e-06, "loss": 1.5887, "step": 14587 }, { "epoch": 0.7975616090100187, "grad_norm": 1.4845423698425293, "learning_rate": 2.202235527374128e-06, "loss": 1.5718, "step": 14588 }, { "epoch": 0.7976162814537403, "grad_norm": 1.7934237718582153, "learning_rate": 2.201091625816397e-06, "loss": 1.5251, "step": 14589 }, { "epoch": 0.7976709538974618, "grad_norm": 1.4435275793075562, "learning_rate": 2.199947984682518e-06, "loss": 1.5067, "step": 14590 }, { "epoch": 0.7977256263411834, "grad_norm": 1.5896587371826172, "learning_rate": 2.198804604010677e-06, "loss": 1.6704, "step": 14591 }, { "epoch": 0.797780298784905, "grad_norm": 1.7873533964157104, "learning_rate": 2.1976614838390576e-06, "loss": 1.3849, "step": 14592 }, { "epoch": 0.7978349712286265, "grad_norm": 1.7428821325302124, "learning_rate": 2.196518624205828e-06, "loss": 1.3725, "step": 14593 }, { "epoch": 0.797889643672348, "grad_norm": 1.3350293636322021, "learning_rate": 2.195376025149156e-06, "loss": 1.3125, "step": 14594 }, { "epoch": 0.7979443161160696, "grad_norm": 1.5900250673294067, "learning_rate": 2.194233686707192e-06, "loss": 1.3839, "step": 14595 }, { "epoch": 0.7979989885597911, "grad_norm": 2.151435375213623, "learning_rate": 2.19309160891808e-06, "loss": 1.175, "step": 14596 }, { "epoch": 0.7980536610035127, "grad_norm": 1.6287648677825928, "learning_rate": 2.1919497918199605e-06, "loss": 1.3284, "step": 14597 }, { "epoch": 0.7981083334472343, "grad_norm": 1.4275641441345215, "learning_rate": 2.190808235450961e-06, "loss": 1.3841, "step": 14598 }, { "epoch": 0.7981630058909558, "grad_norm": 1.2647485733032227, "learning_rate": 2.1896669398491975e-06, "loss": 1.3914, "step": 14599 }, { "epoch": 0.7982176783346774, "grad_norm": 1.9704737663269043, "learning_rate": 2.188525905052784e-06, "loss": 1.4444, "step": 14600 }, { "epoch": 0.7982723507783989, "grad_norm": 1.7952556610107422, "learning_rate": 2.1873851310998194e-06, "loss": 1.3436, "step": 14601 }, { "epoch": 0.7983270232221205, "grad_norm": 1.3656587600708008, "learning_rate": 2.186244618028397e-06, "loss": 1.3215, "step": 14602 }, { "epoch": 0.798381695665842, "grad_norm": 1.872086763381958, "learning_rate": 2.1851043658766034e-06, "loss": 1.4686, "step": 14603 }, { "epoch": 0.7984363681095635, "grad_norm": 1.6027202606201172, "learning_rate": 2.1839643746825145e-06, "loss": 1.153, "step": 14604 }, { "epoch": 0.7984910405532851, "grad_norm": 1.533797025680542, "learning_rate": 2.1828246444841925e-06, "loss": 1.6137, "step": 14605 }, { "epoch": 0.7985457129970067, "grad_norm": 1.2503046989440918, "learning_rate": 2.1816851753197023e-06, "loss": 1.2512, "step": 14606 }, { "epoch": 0.7986003854407282, "grad_norm": 1.5827606916427612, "learning_rate": 2.1805459672270913e-06, "loss": 1.3595, "step": 14607 }, { "epoch": 0.7986550578844498, "grad_norm": 1.3590549230575562, "learning_rate": 2.179407020244395e-06, "loss": 1.572, "step": 14608 }, { "epoch": 0.7987097303281714, "grad_norm": 1.7127277851104736, "learning_rate": 2.178268334409653e-06, "loss": 1.2575, "step": 14609 }, { "epoch": 0.7987644027718929, "grad_norm": 1.2624711990356445, "learning_rate": 2.1771299097608866e-06, "loss": 1.4852, "step": 14610 }, { "epoch": 0.7988190752156145, "grad_norm": 1.3996257781982422, "learning_rate": 2.175991746336108e-06, "loss": 1.2102, "step": 14611 }, { "epoch": 0.798873747659336, "grad_norm": 1.5119701623916626, "learning_rate": 2.174853844173326e-06, "loss": 1.7182, "step": 14612 }, { "epoch": 0.7989284201030575, "grad_norm": 1.4280372858047485, "learning_rate": 2.173716203310533e-06, "loss": 1.538, "step": 14613 }, { "epoch": 0.7989830925467791, "grad_norm": 1.4617384672164917, "learning_rate": 2.1725788237857235e-06, "loss": 1.5606, "step": 14614 }, { "epoch": 0.7990377649905006, "grad_norm": 1.4330670833587646, "learning_rate": 2.1714417056368752e-06, "loss": 1.5164, "step": 14615 }, { "epoch": 0.7990924374342222, "grad_norm": 3.219959020614624, "learning_rate": 2.170304848901955e-06, "loss": 1.017, "step": 14616 }, { "epoch": 0.7991471098779438, "grad_norm": 1.6928207874298096, "learning_rate": 2.1691682536189327e-06, "loss": 1.4273, "step": 14617 }, { "epoch": 0.7992017823216653, "grad_norm": 1.3767644166946411, "learning_rate": 2.1680319198257573e-06, "loss": 1.4816, "step": 14618 }, { "epoch": 0.7992564547653869, "grad_norm": 1.6563365459442139, "learning_rate": 2.166895847560372e-06, "loss": 1.7092, "step": 14619 }, { "epoch": 0.7993111272091085, "grad_norm": 1.2674803733825684, "learning_rate": 2.165760036860718e-06, "loss": 1.3995, "step": 14620 }, { "epoch": 0.79936579965283, "grad_norm": 2.094254493713379, "learning_rate": 2.1646244877647195e-06, "loss": 1.4815, "step": 14621 }, { "epoch": 0.7994204720965515, "grad_norm": 1.5570261478424072, "learning_rate": 2.1634892003102935e-06, "loss": 1.2965, "step": 14622 }, { "epoch": 0.7994751445402731, "grad_norm": 1.3723483085632324, "learning_rate": 2.1623541745353547e-06, "loss": 1.3625, "step": 14623 }, { "epoch": 0.7995298169839946, "grad_norm": 1.326209306716919, "learning_rate": 2.1612194104778016e-06, "loss": 1.2746, "step": 14624 }, { "epoch": 0.7995844894277162, "grad_norm": 1.8836311101913452, "learning_rate": 2.160084908175526e-06, "loss": 1.6345, "step": 14625 }, { "epoch": 0.7996391618714378, "grad_norm": 1.7955504655838013, "learning_rate": 2.1589506676664128e-06, "loss": 1.3901, "step": 14626 }, { "epoch": 0.7996938343151593, "grad_norm": 1.9696463346481323, "learning_rate": 2.1578166889883336e-06, "loss": 1.3183, "step": 14627 }, { "epoch": 0.7997485067588809, "grad_norm": 1.5872788429260254, "learning_rate": 2.1566829721791603e-06, "loss": 1.3091, "step": 14628 }, { "epoch": 0.7998031792026024, "grad_norm": 1.4334437847137451, "learning_rate": 2.155549517276747e-06, "loss": 1.4633, "step": 14629 }, { "epoch": 0.799857851646324, "grad_norm": 1.5773515701293945, "learning_rate": 2.154416324318941e-06, "loss": 1.5975, "step": 14630 }, { "epoch": 0.7999125240900455, "grad_norm": 1.866248607635498, "learning_rate": 2.153283393343587e-06, "loss": 1.3876, "step": 14631 }, { "epoch": 0.799967196533767, "grad_norm": 1.5353350639343262, "learning_rate": 2.152150724388512e-06, "loss": 1.5019, "step": 14632 }, { "epoch": 0.8000218689774886, "grad_norm": 1.756296157836914, "learning_rate": 2.151018317491539e-06, "loss": 1.4386, "step": 14633 }, { "epoch": 0.8000765414212102, "grad_norm": 1.413584589958191, "learning_rate": 2.149886172690484e-06, "loss": 1.6964, "step": 14634 }, { "epoch": 0.8001312138649317, "grad_norm": 1.6717593669891357, "learning_rate": 2.1487542900231508e-06, "loss": 1.2533, "step": 14635 }, { "epoch": 0.8001858863086533, "grad_norm": 1.4448034763336182, "learning_rate": 2.1476226695273326e-06, "loss": 1.5163, "step": 14636 }, { "epoch": 0.8002405587523749, "grad_norm": 1.2551612854003906, "learning_rate": 2.1464913112408225e-06, "loss": 1.4723, "step": 14637 }, { "epoch": 0.8002952311960964, "grad_norm": 1.5991673469543457, "learning_rate": 2.1453602152013965e-06, "loss": 1.4277, "step": 14638 }, { "epoch": 0.800349903639818, "grad_norm": 1.4930211305618286, "learning_rate": 2.144229381446824e-06, "loss": 1.6257, "step": 14639 }, { "epoch": 0.8004045760835395, "grad_norm": 1.8110246658325195, "learning_rate": 2.1430988100148663e-06, "loss": 1.3567, "step": 14640 }, { "epoch": 0.800459248527261, "grad_norm": 1.5315767526626587, "learning_rate": 2.1419685009432746e-06, "loss": 1.4299, "step": 14641 }, { "epoch": 0.8005139209709826, "grad_norm": 1.6889896392822266, "learning_rate": 2.140838454269796e-06, "loss": 1.1368, "step": 14642 }, { "epoch": 0.8005685934147041, "grad_norm": 1.6383765935897827, "learning_rate": 2.1397086700321635e-06, "loss": 1.3959, "step": 14643 }, { "epoch": 0.8006232658584257, "grad_norm": 1.3097689151763916, "learning_rate": 2.1385791482681007e-06, "loss": 1.6966, "step": 14644 }, { "epoch": 0.8006779383021473, "grad_norm": 1.574973702430725, "learning_rate": 2.1374498890153305e-06, "loss": 1.624, "step": 14645 }, { "epoch": 0.8007326107458688, "grad_norm": 1.6915479898452759, "learning_rate": 2.136320892311559e-06, "loss": 1.4373, "step": 14646 }, { "epoch": 0.8007872831895904, "grad_norm": 1.8899106979370117, "learning_rate": 2.1351921581944813e-06, "loss": 1.2431, "step": 14647 }, { "epoch": 0.800841955633312, "grad_norm": 1.7030277252197266, "learning_rate": 2.134063686701797e-06, "loss": 1.2541, "step": 14648 }, { "epoch": 0.8008966280770334, "grad_norm": 1.5263049602508545, "learning_rate": 2.132935477871183e-06, "loss": 1.3981, "step": 14649 }, { "epoch": 0.800951300520755, "grad_norm": 1.113485336303711, "learning_rate": 2.1318075317403152e-06, "loss": 1.4556, "step": 14650 }, { "epoch": 0.8010059729644766, "grad_norm": 1.5718843936920166, "learning_rate": 2.130679848346857e-06, "loss": 1.3111, "step": 14651 }, { "epoch": 0.8010606454081981, "grad_norm": 1.741403341293335, "learning_rate": 2.1295524277284617e-06, "loss": 1.3323, "step": 14652 }, { "epoch": 0.8011153178519197, "grad_norm": 1.2125771045684814, "learning_rate": 2.1284252699227813e-06, "loss": 1.3969, "step": 14653 }, { "epoch": 0.8011699902956413, "grad_norm": 1.7260031700134277, "learning_rate": 2.1272983749674537e-06, "loss": 1.2198, "step": 14654 }, { "epoch": 0.8012246627393628, "grad_norm": 1.401117205619812, "learning_rate": 2.126171742900104e-06, "loss": 1.3126, "step": 14655 }, { "epoch": 0.8012793351830844, "grad_norm": 1.42336106300354, "learning_rate": 2.125045373758359e-06, "loss": 1.5866, "step": 14656 }, { "epoch": 0.8013340076268058, "grad_norm": 1.723563313484192, "learning_rate": 2.123919267579828e-06, "loss": 1.1355, "step": 14657 }, { "epoch": 0.8013886800705274, "grad_norm": 1.5591007471084595, "learning_rate": 2.1227934244021108e-06, "loss": 1.4135, "step": 14658 }, { "epoch": 0.801443352514249, "grad_norm": 1.4691718816757202, "learning_rate": 2.1216678442628104e-06, "loss": 1.2794, "step": 14659 }, { "epoch": 0.8014980249579705, "grad_norm": 1.8633184432983398, "learning_rate": 2.120542527199506e-06, "loss": 1.3249, "step": 14660 }, { "epoch": 0.8015526974016921, "grad_norm": 1.5038443803787231, "learning_rate": 2.119417473249774e-06, "loss": 1.6781, "step": 14661 }, { "epoch": 0.8016073698454137, "grad_norm": 1.651145100593567, "learning_rate": 2.1182926824511887e-06, "loss": 1.483, "step": 14662 }, { "epoch": 0.8016620422891352, "grad_norm": 1.4728435277938843, "learning_rate": 2.1171681548413046e-06, "loss": 1.3946, "step": 14663 }, { "epoch": 0.8017167147328568, "grad_norm": 1.3450908660888672, "learning_rate": 2.1160438904576743e-06, "loss": 1.5677, "step": 14664 }, { "epoch": 0.8017713871765784, "grad_norm": 1.136269211769104, "learning_rate": 2.1149198893378377e-06, "loss": 1.5712, "step": 14665 }, { "epoch": 0.8018260596202998, "grad_norm": 1.3077707290649414, "learning_rate": 2.1137961515193274e-06, "loss": 1.288, "step": 14666 }, { "epoch": 0.8018807320640214, "grad_norm": 1.7385832071304321, "learning_rate": 2.1126726770396712e-06, "loss": 1.2921, "step": 14667 }, { "epoch": 0.801935404507743, "grad_norm": 1.8059738874435425, "learning_rate": 2.1115494659363824e-06, "loss": 1.4284, "step": 14668 }, { "epoch": 0.8019900769514645, "grad_norm": 1.4993834495544434, "learning_rate": 2.110426518246965e-06, "loss": 1.0554, "step": 14669 }, { "epoch": 0.8020447493951861, "grad_norm": 1.1996814012527466, "learning_rate": 2.1093038340089235e-06, "loss": 1.5774, "step": 14670 }, { "epoch": 0.8020994218389076, "grad_norm": 1.5525788068771362, "learning_rate": 2.108181413259741e-06, "loss": 1.6448, "step": 14671 }, { "epoch": 0.8021540942826292, "grad_norm": 1.9923793077468872, "learning_rate": 2.1070592560368986e-06, "loss": 1.2519, "step": 14672 }, { "epoch": 0.8022087667263508, "grad_norm": 1.424383282661438, "learning_rate": 2.1059373623778722e-06, "loss": 1.3932, "step": 14673 }, { "epoch": 0.8022634391700723, "grad_norm": 1.6482700109481812, "learning_rate": 2.10481573232012e-06, "loss": 1.3076, "step": 14674 }, { "epoch": 0.8023181116137938, "grad_norm": 1.3021900653839111, "learning_rate": 2.1036943659010945e-06, "loss": 1.6674, "step": 14675 }, { "epoch": 0.8023727840575154, "grad_norm": 2.247389316558838, "learning_rate": 2.1025732631582475e-06, "loss": 1.3748, "step": 14676 }, { "epoch": 0.8024274565012369, "grad_norm": 1.79152512550354, "learning_rate": 2.101452424129009e-06, "loss": 1.1967, "step": 14677 }, { "epoch": 0.8024821289449585, "grad_norm": 1.6458027362823486, "learning_rate": 2.1003318488508107e-06, "loss": 1.1159, "step": 14678 }, { "epoch": 0.8025368013886801, "grad_norm": 1.4214565753936768, "learning_rate": 2.0992115373610677e-06, "loss": 1.4031, "step": 14679 }, { "epoch": 0.8025914738324016, "grad_norm": 1.8135242462158203, "learning_rate": 2.0980914896971892e-06, "loss": 1.4284, "step": 14680 }, { "epoch": 0.8026461462761232, "grad_norm": 1.289487361907959, "learning_rate": 2.096971705896581e-06, "loss": 1.5186, "step": 14681 }, { "epoch": 0.8027008187198448, "grad_norm": 1.598982810974121, "learning_rate": 2.095852185996632e-06, "loss": 1.5254, "step": 14682 }, { "epoch": 0.8027554911635663, "grad_norm": 1.3200318813323975, "learning_rate": 2.094732930034724e-06, "loss": 1.4118, "step": 14683 }, { "epoch": 0.8028101636072879, "grad_norm": 1.1925015449523926, "learning_rate": 2.0936139380482357e-06, "loss": 1.5686, "step": 14684 }, { "epoch": 0.8028648360510094, "grad_norm": 2.021153211593628, "learning_rate": 2.092495210074532e-06, "loss": 1.4769, "step": 14685 }, { "epoch": 0.8029195084947309, "grad_norm": 1.9056347608566284, "learning_rate": 2.0913767461509647e-06, "loss": 1.3688, "step": 14686 }, { "epoch": 0.8029741809384525, "grad_norm": 1.45630943775177, "learning_rate": 2.0902585463148907e-06, "loss": 1.4824, "step": 14687 }, { "epoch": 0.803028853382174, "grad_norm": 1.6539843082427979, "learning_rate": 2.089140610603643e-06, "loss": 1.3949, "step": 14688 }, { "epoch": 0.8030835258258956, "grad_norm": 1.4836703538894653, "learning_rate": 2.0880229390545536e-06, "loss": 1.447, "step": 14689 }, { "epoch": 0.8031381982696172, "grad_norm": 1.8271079063415527, "learning_rate": 2.086905531704946e-06, "loss": 1.4827, "step": 14690 }, { "epoch": 0.8031928707133387, "grad_norm": 1.7992768287658691, "learning_rate": 2.085788388592129e-06, "loss": 1.1974, "step": 14691 }, { "epoch": 0.8032475431570603, "grad_norm": 1.275221347808838, "learning_rate": 2.0846715097534087e-06, "loss": 1.4965, "step": 14692 }, { "epoch": 0.8033022156007819, "grad_norm": 1.5845239162445068, "learning_rate": 2.0835548952260822e-06, "loss": 1.5448, "step": 14693 }, { "epoch": 0.8033568880445033, "grad_norm": 1.7345385551452637, "learning_rate": 2.0824385450474314e-06, "loss": 1.3218, "step": 14694 }, { "epoch": 0.8034115604882249, "grad_norm": 1.4106043577194214, "learning_rate": 2.081322459254739e-06, "loss": 1.2884, "step": 14695 }, { "epoch": 0.8034662329319465, "grad_norm": 1.4020867347717285, "learning_rate": 2.0802066378852725e-06, "loss": 1.4806, "step": 14696 }, { "epoch": 0.803520905375668, "grad_norm": 1.3795254230499268, "learning_rate": 2.0790910809762876e-06, "loss": 1.5663, "step": 14697 }, { "epoch": 0.8035755778193896, "grad_norm": 1.7990634441375732, "learning_rate": 2.077975788565041e-06, "loss": 1.1763, "step": 14698 }, { "epoch": 0.8036302502631112, "grad_norm": 1.1708483695983887, "learning_rate": 2.0768607606887724e-06, "loss": 1.5667, "step": 14699 }, { "epoch": 0.8036849227068327, "grad_norm": 2.137441873550415, "learning_rate": 2.075745997384713e-06, "loss": 1.1859, "step": 14700 }, { "epoch": 0.8037395951505543, "grad_norm": 1.7651773691177368, "learning_rate": 2.074631498690092e-06, "loss": 1.4115, "step": 14701 }, { "epoch": 0.8037942675942757, "grad_norm": 1.6607866287231445, "learning_rate": 2.073517264642122e-06, "loss": 1.61, "step": 14702 }, { "epoch": 0.8038489400379973, "grad_norm": 1.4440940618515015, "learning_rate": 2.0724032952780115e-06, "loss": 1.5776, "step": 14703 }, { "epoch": 0.8039036124817189, "grad_norm": 1.5942145586013794, "learning_rate": 2.071289590634957e-06, "loss": 1.5884, "step": 14704 }, { "epoch": 0.8039582849254404, "grad_norm": 1.3029720783233643, "learning_rate": 2.0701761507501495e-06, "loss": 1.3464, "step": 14705 }, { "epoch": 0.804012957369162, "grad_norm": 1.7014654874801636, "learning_rate": 2.069062975660765e-06, "loss": 1.3544, "step": 14706 }, { "epoch": 0.8040676298128836, "grad_norm": 1.5607664585113525, "learning_rate": 2.067950065403981e-06, "loss": 1.4431, "step": 14707 }, { "epoch": 0.8041223022566051, "grad_norm": 1.8336639404296875, "learning_rate": 2.066837420016954e-06, "loss": 1.3242, "step": 14708 }, { "epoch": 0.8041769747003267, "grad_norm": 1.9284698963165283, "learning_rate": 2.0657250395368443e-06, "loss": 1.5748, "step": 14709 }, { "epoch": 0.8042316471440483, "grad_norm": 1.5210374593734741, "learning_rate": 2.064612924000795e-06, "loss": 1.5465, "step": 14710 }, { "epoch": 0.8042863195877697, "grad_norm": 1.713189721107483, "learning_rate": 2.0635010734459372e-06, "loss": 1.1944, "step": 14711 }, { "epoch": 0.8043409920314913, "grad_norm": 1.519115924835205, "learning_rate": 2.062389487909405e-06, "loss": 1.5872, "step": 14712 }, { "epoch": 0.8043956644752129, "grad_norm": 1.8929002285003662, "learning_rate": 2.0612781674283142e-06, "loss": 1.3145, "step": 14713 }, { "epoch": 0.8044503369189344, "grad_norm": 1.2876640558242798, "learning_rate": 2.060167112039775e-06, "loss": 1.5662, "step": 14714 }, { "epoch": 0.804505009362656, "grad_norm": 1.8204675912857056, "learning_rate": 2.0590563217808858e-06, "loss": 1.4847, "step": 14715 }, { "epoch": 0.8045596818063775, "grad_norm": 1.766680359840393, "learning_rate": 2.0579457966887406e-06, "loss": 1.5255, "step": 14716 }, { "epoch": 0.8046143542500991, "grad_norm": 1.8158494234085083, "learning_rate": 2.05683553680042e-06, "loss": 1.3438, "step": 14717 }, { "epoch": 0.8046690266938207, "grad_norm": 1.4649118185043335, "learning_rate": 2.055725542153002e-06, "loss": 1.3289, "step": 14718 }, { "epoch": 0.8047236991375422, "grad_norm": 1.5049619674682617, "learning_rate": 2.0546158127835503e-06, "loss": 1.2445, "step": 14719 }, { "epoch": 0.8047783715812638, "grad_norm": 1.4314175844192505, "learning_rate": 2.0535063487291176e-06, "loss": 1.6134, "step": 14720 }, { "epoch": 0.8048330440249853, "grad_norm": 1.4670453071594238, "learning_rate": 2.0523971500267583e-06, "loss": 1.7413, "step": 14721 }, { "epoch": 0.8048877164687068, "grad_norm": 1.7324426174163818, "learning_rate": 2.051288216713505e-06, "loss": 1.5163, "step": 14722 }, { "epoch": 0.8049423889124284, "grad_norm": 2.1811318397521973, "learning_rate": 2.050179548826393e-06, "loss": 1.699, "step": 14723 }, { "epoch": 0.80499706135615, "grad_norm": 1.230141043663025, "learning_rate": 2.0490711464024403e-06, "loss": 1.3409, "step": 14724 }, { "epoch": 0.8050517337998715, "grad_norm": 1.4833555221557617, "learning_rate": 2.047963009478657e-06, "loss": 1.3023, "step": 14725 }, { "epoch": 0.8051064062435931, "grad_norm": 1.8850003480911255, "learning_rate": 2.046855138092052e-06, "loss": 1.3022, "step": 14726 }, { "epoch": 0.8051610786873147, "grad_norm": 1.338453769683838, "learning_rate": 2.045747532279616e-06, "loss": 1.3852, "step": 14727 }, { "epoch": 0.8052157511310362, "grad_norm": 1.7417634725570679, "learning_rate": 2.0446401920783353e-06, "loss": 1.481, "step": 14728 }, { "epoch": 0.8052704235747578, "grad_norm": 1.2864092588424683, "learning_rate": 2.0435331175251873e-06, "loss": 1.5689, "step": 14729 }, { "epoch": 0.8053250960184792, "grad_norm": 1.4638034105300903, "learning_rate": 2.042426308657138e-06, "loss": 1.2404, "step": 14730 }, { "epoch": 0.8053797684622008, "grad_norm": 1.4794013500213623, "learning_rate": 2.041319765511145e-06, "loss": 1.4087, "step": 14731 }, { "epoch": 0.8054344409059224, "grad_norm": 1.5243632793426514, "learning_rate": 2.040213488124163e-06, "loss": 1.4078, "step": 14732 }, { "epoch": 0.8054891133496439, "grad_norm": 2.1903061866760254, "learning_rate": 2.0391074765331307e-06, "loss": 1.3818, "step": 14733 }, { "epoch": 0.8055437857933655, "grad_norm": 2.151567220687866, "learning_rate": 2.038001730774978e-06, "loss": 1.3644, "step": 14734 }, { "epoch": 0.8055984582370871, "grad_norm": 1.6755069494247437, "learning_rate": 2.036896250886634e-06, "loss": 1.327, "step": 14735 }, { "epoch": 0.8056531306808086, "grad_norm": 1.7964152097702026, "learning_rate": 2.035791036905007e-06, "loss": 1.5285, "step": 14736 }, { "epoch": 0.8057078031245302, "grad_norm": 1.2909990549087524, "learning_rate": 2.0346860888670095e-06, "loss": 1.3168, "step": 14737 }, { "epoch": 0.8057624755682518, "grad_norm": 1.935603141784668, "learning_rate": 2.033581406809534e-06, "loss": 1.2724, "step": 14738 }, { "epoch": 0.8058171480119732, "grad_norm": 1.3732057809829712, "learning_rate": 2.0324769907694665e-06, "loss": 1.505, "step": 14739 }, { "epoch": 0.8058718204556948, "grad_norm": 1.2674797773361206, "learning_rate": 2.031372840783691e-06, "loss": 1.8251, "step": 14740 }, { "epoch": 0.8059264928994164, "grad_norm": 1.63645339012146, "learning_rate": 2.0302689568890753e-06, "loss": 1.28, "step": 14741 }, { "epoch": 0.8059811653431379, "grad_norm": 1.6503654718399048, "learning_rate": 2.029165339122482e-06, "loss": 1.4467, "step": 14742 }, { "epoch": 0.8060358377868595, "grad_norm": 1.6305081844329834, "learning_rate": 2.028061987520761e-06, "loss": 1.3649, "step": 14743 }, { "epoch": 0.806090510230581, "grad_norm": 1.3109935522079468, "learning_rate": 2.026958902120757e-06, "loss": 1.7478, "step": 14744 }, { "epoch": 0.8061451826743026, "grad_norm": 1.4344316720962524, "learning_rate": 2.025856082959302e-06, "loss": 1.5774, "step": 14745 }, { "epoch": 0.8061998551180242, "grad_norm": 1.3403488397598267, "learning_rate": 2.0247535300732267e-06, "loss": 1.5647, "step": 14746 }, { "epoch": 0.8062545275617456, "grad_norm": 1.7443305253982544, "learning_rate": 2.023651243499346e-06, "loss": 1.1138, "step": 14747 }, { "epoch": 0.8063092000054672, "grad_norm": 1.4485065937042236, "learning_rate": 2.022549223274465e-06, "loss": 1.3678, "step": 14748 }, { "epoch": 0.8063638724491888, "grad_norm": 1.8742009401321411, "learning_rate": 2.0214474694353868e-06, "loss": 1.2541, "step": 14749 }, { "epoch": 0.8064185448929103, "grad_norm": 1.524145483970642, "learning_rate": 2.0203459820188974e-06, "loss": 1.4485, "step": 14750 }, { "epoch": 0.8064732173366319, "grad_norm": 1.689023494720459, "learning_rate": 2.019244761061784e-06, "loss": 1.2049, "step": 14751 }, { "epoch": 0.8065278897803535, "grad_norm": 1.7883949279785156, "learning_rate": 2.0181438066008154e-06, "loss": 1.2421, "step": 14752 }, { "epoch": 0.806582562224075, "grad_norm": 1.3338104486465454, "learning_rate": 2.0170431186727545e-06, "loss": 1.605, "step": 14753 }, { "epoch": 0.8066372346677966, "grad_norm": 1.5312597751617432, "learning_rate": 2.015942697314357e-06, "loss": 1.1815, "step": 14754 }, { "epoch": 0.8066919071115182, "grad_norm": 1.6775104999542236, "learning_rate": 2.0148425425623673e-06, "loss": 1.4493, "step": 14755 }, { "epoch": 0.8067465795552397, "grad_norm": 1.3329734802246094, "learning_rate": 2.013742654453521e-06, "loss": 1.285, "step": 14756 }, { "epoch": 0.8068012519989612, "grad_norm": 1.4619799852371216, "learning_rate": 2.0126430330245493e-06, "loss": 1.4871, "step": 14757 }, { "epoch": 0.8068559244426827, "grad_norm": 1.9375324249267578, "learning_rate": 2.011543678312171e-06, "loss": 1.0502, "step": 14758 }, { "epoch": 0.8069105968864043, "grad_norm": 1.791000485420227, "learning_rate": 2.0104445903530912e-06, "loss": 1.3572, "step": 14759 }, { "epoch": 0.8069652693301259, "grad_norm": 1.761809229850769, "learning_rate": 2.0093457691840178e-06, "loss": 1.3971, "step": 14760 }, { "epoch": 0.8070199417738474, "grad_norm": 1.688962697982788, "learning_rate": 2.0082472148416387e-06, "loss": 1.6345, "step": 14761 }, { "epoch": 0.807074614217569, "grad_norm": 1.4101481437683105, "learning_rate": 2.0071489273626376e-06, "loss": 1.4131, "step": 14762 }, { "epoch": 0.8071292866612906, "grad_norm": 1.4040300846099854, "learning_rate": 2.0060509067836907e-06, "loss": 1.2393, "step": 14763 }, { "epoch": 0.8071839591050121, "grad_norm": 1.590902328491211, "learning_rate": 2.00495315314146e-06, "loss": 1.1723, "step": 14764 }, { "epoch": 0.8072386315487337, "grad_norm": 1.7669429779052734, "learning_rate": 2.0038556664726083e-06, "loss": 1.2752, "step": 14765 }, { "epoch": 0.8072933039924552, "grad_norm": 1.6182818412780762, "learning_rate": 2.0027584468137784e-06, "loss": 1.3807, "step": 14766 }, { "epoch": 0.8073479764361767, "grad_norm": 1.380653977394104, "learning_rate": 2.00166149420161e-06, "loss": 1.4393, "step": 14767 }, { "epoch": 0.8074026488798983, "grad_norm": 1.79204261302948, "learning_rate": 2.0005648086727337e-06, "loss": 1.4429, "step": 14768 }, { "epoch": 0.8074573213236199, "grad_norm": 1.583616852760315, "learning_rate": 1.999468390263769e-06, "loss": 1.324, "step": 14769 }, { "epoch": 0.8075119937673414, "grad_norm": 1.6885440349578857, "learning_rate": 1.9983722390113257e-06, "loss": 1.5433, "step": 14770 }, { "epoch": 0.807566666211063, "grad_norm": 1.6457794904708862, "learning_rate": 1.9972763549520137e-06, "loss": 1.3908, "step": 14771 }, { "epoch": 0.8076213386547845, "grad_norm": 1.794700264930725, "learning_rate": 1.996180738122422e-06, "loss": 1.6334, "step": 14772 }, { "epoch": 0.8076760110985061, "grad_norm": 2.252203941345215, "learning_rate": 1.9950853885591346e-06, "loss": 1.4624, "step": 14773 }, { "epoch": 0.8077306835422277, "grad_norm": 1.7058460712432861, "learning_rate": 1.993990306298733e-06, "loss": 1.515, "step": 14774 }, { "epoch": 0.8077853559859491, "grad_norm": 1.5257271528244019, "learning_rate": 1.992895491377782e-06, "loss": 1.3582, "step": 14775 }, { "epoch": 0.8078400284296707, "grad_norm": 1.5068882703781128, "learning_rate": 1.9918009438328365e-06, "loss": 1.3222, "step": 14776 }, { "epoch": 0.8078947008733923, "grad_norm": 1.4584720134735107, "learning_rate": 1.9907066637004526e-06, "loss": 1.6028, "step": 14777 }, { "epoch": 0.8079493733171138, "grad_norm": 1.6736977100372314, "learning_rate": 1.989612651017164e-06, "loss": 1.2937, "step": 14778 }, { "epoch": 0.8080040457608354, "grad_norm": 1.7548980712890625, "learning_rate": 1.98851890581951e-06, "loss": 1.18, "step": 14779 }, { "epoch": 0.808058718204557, "grad_norm": 1.551605463027954, "learning_rate": 1.9874254281440085e-06, "loss": 1.4756, "step": 14780 }, { "epoch": 0.8081133906482785, "grad_norm": 2.398449182510376, "learning_rate": 1.986332218027174e-06, "loss": 1.5038, "step": 14781 }, { "epoch": 0.8081680630920001, "grad_norm": 2.6530964374542236, "learning_rate": 1.9852392755055117e-06, "loss": 1.0499, "step": 14782 }, { "epoch": 0.8082227355357217, "grad_norm": 1.6391987800598145, "learning_rate": 1.9841466006155162e-06, "loss": 1.601, "step": 14783 }, { "epoch": 0.8082774079794431, "grad_norm": 1.90339195728302, "learning_rate": 1.983054193393674e-06, "loss": 1.403, "step": 14784 }, { "epoch": 0.8083320804231647, "grad_norm": 1.7003517150878906, "learning_rate": 1.981962053876467e-06, "loss": 1.3285, "step": 14785 }, { "epoch": 0.8083867528668862, "grad_norm": 1.856784701347351, "learning_rate": 1.9808701821003615e-06, "loss": 1.4186, "step": 14786 }, { "epoch": 0.8084414253106078, "grad_norm": 1.334435224533081, "learning_rate": 1.9797785781018164e-06, "loss": 1.4243, "step": 14787 }, { "epoch": 0.8084960977543294, "grad_norm": 2.0566213130950928, "learning_rate": 1.9786872419172863e-06, "loss": 1.2362, "step": 14788 }, { "epoch": 0.8085507701980509, "grad_norm": 1.6487011909484863, "learning_rate": 1.9775961735832126e-06, "loss": 1.5156, "step": 14789 }, { "epoch": 0.8086054426417725, "grad_norm": 1.429248332977295, "learning_rate": 1.976505373136025e-06, "loss": 1.6395, "step": 14790 }, { "epoch": 0.8086601150854941, "grad_norm": 1.4374017715454102, "learning_rate": 1.975414840612153e-06, "loss": 1.494, "step": 14791 }, { "epoch": 0.8087147875292156, "grad_norm": 1.4673504829406738, "learning_rate": 1.97432457604801e-06, "loss": 1.6947, "step": 14792 }, { "epoch": 0.8087694599729371, "grad_norm": 1.9549901485443115, "learning_rate": 1.973234579480001e-06, "loss": 1.4186, "step": 14793 }, { "epoch": 0.8088241324166587, "grad_norm": 1.554093599319458, "learning_rate": 1.9721448509445264e-06, "loss": 1.6142, "step": 14794 }, { "epoch": 0.8088788048603802, "grad_norm": 1.2853567600250244, "learning_rate": 1.9710553904779708e-06, "loss": 1.6202, "step": 14795 }, { "epoch": 0.8089334773041018, "grad_norm": 1.3363689184188843, "learning_rate": 1.969966198116717e-06, "loss": 1.487, "step": 14796 }, { "epoch": 0.8089881497478234, "grad_norm": 1.4565935134887695, "learning_rate": 1.968877273897136e-06, "loss": 1.6019, "step": 14797 }, { "epoch": 0.8090428221915449, "grad_norm": 2.062960386276245, "learning_rate": 1.967788617855586e-06, "loss": 1.2148, "step": 14798 }, { "epoch": 0.8090974946352665, "grad_norm": 1.948569416999817, "learning_rate": 1.9667002300284255e-06, "loss": 1.3764, "step": 14799 }, { "epoch": 0.809152167078988, "grad_norm": 1.4072643518447876, "learning_rate": 1.965612110451994e-06, "loss": 1.6985, "step": 14800 }, { "epoch": 0.8092068395227096, "grad_norm": 1.5886023044586182, "learning_rate": 1.9645242591626244e-06, "loss": 1.4996, "step": 14801 }, { "epoch": 0.8092615119664311, "grad_norm": 1.2801650762557983, "learning_rate": 1.9634366761966495e-06, "loss": 1.1796, "step": 14802 }, { "epoch": 0.8093161844101526, "grad_norm": 1.9155257940292358, "learning_rate": 1.962349361590381e-06, "loss": 1.4766, "step": 14803 }, { "epoch": 0.8093708568538742, "grad_norm": 1.367571473121643, "learning_rate": 1.9612623153801267e-06, "loss": 1.2728, "step": 14804 }, { "epoch": 0.8094255292975958, "grad_norm": 4.608108997344971, "learning_rate": 1.960175537602189e-06, "loss": 1.5789, "step": 14805 }, { "epoch": 0.8094802017413173, "grad_norm": 1.4073821306228638, "learning_rate": 1.9590890282928574e-06, "loss": 1.4746, "step": 14806 }, { "epoch": 0.8095348741850389, "grad_norm": 1.4316571950912476, "learning_rate": 1.9580027874884112e-06, "loss": 1.4032, "step": 14807 }, { "epoch": 0.8095895466287605, "grad_norm": 1.7158491611480713, "learning_rate": 1.956916815225122e-06, "loss": 1.384, "step": 14808 }, { "epoch": 0.809644219072482, "grad_norm": 1.4977805614471436, "learning_rate": 1.9558311115392524e-06, "loss": 1.2448, "step": 14809 }, { "epoch": 0.8096988915162036, "grad_norm": 1.4886633157730103, "learning_rate": 1.95474567646706e-06, "loss": 1.4652, "step": 14810 }, { "epoch": 0.8097535639599251, "grad_norm": 1.6878379583358765, "learning_rate": 1.953660510044789e-06, "loss": 1.3825, "step": 14811 }, { "epoch": 0.8098082364036466, "grad_norm": 1.7487365007400513, "learning_rate": 1.9525756123086726e-06, "loss": 1.494, "step": 14812 }, { "epoch": 0.8098629088473682, "grad_norm": 1.5566186904907227, "learning_rate": 1.9514909832949427e-06, "loss": 1.2931, "step": 14813 }, { "epoch": 0.8099175812910897, "grad_norm": 1.5839616060256958, "learning_rate": 1.9504066230398156e-06, "loss": 1.4736, "step": 14814 }, { "epoch": 0.8099722537348113, "grad_norm": 1.530752182006836, "learning_rate": 1.949322531579496e-06, "loss": 1.501, "step": 14815 }, { "epoch": 0.8100269261785329, "grad_norm": 2.211151361465454, "learning_rate": 1.948238708950193e-06, "loss": 1.4256, "step": 14816 }, { "epoch": 0.8100815986222544, "grad_norm": 1.554413914680481, "learning_rate": 1.947155155188093e-06, "loss": 1.1295, "step": 14817 }, { "epoch": 0.810136271065976, "grad_norm": 2.3918063640594482, "learning_rate": 1.946071870329377e-06, "loss": 1.3099, "step": 14818 }, { "epoch": 0.8101909435096976, "grad_norm": 1.5896148681640625, "learning_rate": 1.9449888544102215e-06, "loss": 1.2436, "step": 14819 }, { "epoch": 0.810245615953419, "grad_norm": 1.4209911823272705, "learning_rate": 1.943906107466791e-06, "loss": 1.2073, "step": 14820 }, { "epoch": 0.8103002883971406, "grad_norm": 1.6116282939910889, "learning_rate": 1.9428236295352388e-06, "loss": 1.6545, "step": 14821 }, { "epoch": 0.8103549608408622, "grad_norm": 1.6182990074157715, "learning_rate": 1.941741420651714e-06, "loss": 1.4117, "step": 14822 }, { "epoch": 0.8104096332845837, "grad_norm": 1.5398621559143066, "learning_rate": 1.9406594808523484e-06, "loss": 1.5046, "step": 14823 }, { "epoch": 0.8104643057283053, "grad_norm": 1.4181160926818848, "learning_rate": 1.9395778101732777e-06, "loss": 1.4886, "step": 14824 }, { "epoch": 0.8105189781720269, "grad_norm": 2.0557072162628174, "learning_rate": 1.9384964086506185e-06, "loss": 1.276, "step": 14825 }, { "epoch": 0.8105736506157484, "grad_norm": 1.3736714124679565, "learning_rate": 1.9374152763204777e-06, "loss": 1.4246, "step": 14826 }, { "epoch": 0.81062832305947, "grad_norm": 1.4162009954452515, "learning_rate": 1.9363344132189633e-06, "loss": 1.4055, "step": 14827 }, { "epoch": 0.8106829955031915, "grad_norm": 1.4508545398712158, "learning_rate": 1.9352538193821645e-06, "loss": 1.4449, "step": 14828 }, { "epoch": 0.810737667946913, "grad_norm": 1.596169114112854, "learning_rate": 1.9341734948461633e-06, "loss": 1.3052, "step": 14829 }, { "epoch": 0.8107923403906346, "grad_norm": 1.7116124629974365, "learning_rate": 1.933093439647039e-06, "loss": 1.3346, "step": 14830 }, { "epoch": 0.8108470128343561, "grad_norm": 1.5346076488494873, "learning_rate": 1.9320136538208535e-06, "loss": 1.2403, "step": 14831 }, { "epoch": 0.8109016852780777, "grad_norm": 1.5302507877349854, "learning_rate": 1.930934137403665e-06, "loss": 1.473, "step": 14832 }, { "epoch": 0.8109563577217993, "grad_norm": 1.4845325946807861, "learning_rate": 1.9298548904315197e-06, "loss": 1.3017, "step": 14833 }, { "epoch": 0.8110110301655208, "grad_norm": 1.365268349647522, "learning_rate": 1.928775912940454e-06, "loss": 1.345, "step": 14834 }, { "epoch": 0.8110657026092424, "grad_norm": 1.4944590330123901, "learning_rate": 1.9276972049665033e-06, "loss": 1.2567, "step": 14835 }, { "epoch": 0.811120375052964, "grad_norm": 1.3674432039260864, "learning_rate": 1.9266187665456857e-06, "loss": 1.3131, "step": 14836 }, { "epoch": 0.8111750474966855, "grad_norm": 1.781707763671875, "learning_rate": 1.9255405977140083e-06, "loss": 1.6431, "step": 14837 }, { "epoch": 0.811229719940407, "grad_norm": 1.8606183528900146, "learning_rate": 1.924462698507481e-06, "loss": 1.0845, "step": 14838 }, { "epoch": 0.8112843923841286, "grad_norm": 1.4570163488388062, "learning_rate": 1.923385068962095e-06, "loss": 1.2029, "step": 14839 }, { "epoch": 0.8113390648278501, "grad_norm": 1.2344635725021362, "learning_rate": 1.92230770911383e-06, "loss": 1.4225, "step": 14840 }, { "epoch": 0.8113937372715717, "grad_norm": 2.279348850250244, "learning_rate": 1.9212306189986686e-06, "loss": 1.4332, "step": 14841 }, { "epoch": 0.8114484097152932, "grad_norm": 1.5726336240768433, "learning_rate": 1.9201537986525743e-06, "loss": 1.4324, "step": 14842 }, { "epoch": 0.8115030821590148, "grad_norm": 1.898971676826477, "learning_rate": 1.9190772481115017e-06, "loss": 1.531, "step": 14843 }, { "epoch": 0.8115577546027364, "grad_norm": 1.7162779569625854, "learning_rate": 1.9180009674114055e-06, "loss": 1.2111, "step": 14844 }, { "epoch": 0.8116124270464579, "grad_norm": 1.6375552415847778, "learning_rate": 1.916924956588221e-06, "loss": 1.5225, "step": 14845 }, { "epoch": 0.8116670994901795, "grad_norm": 1.5430512428283691, "learning_rate": 1.9158492156778807e-06, "loss": 1.4762, "step": 14846 }, { "epoch": 0.811721771933901, "grad_norm": 1.6365617513656616, "learning_rate": 1.914773744716304e-06, "loss": 1.4129, "step": 14847 }, { "epoch": 0.8117764443776225, "grad_norm": 1.5514543056488037, "learning_rate": 1.913698543739403e-06, "loss": 1.3147, "step": 14848 }, { "epoch": 0.8118311168213441, "grad_norm": 1.728190302848816, "learning_rate": 1.9126236127830843e-06, "loss": 1.5776, "step": 14849 }, { "epoch": 0.8118857892650657, "grad_norm": 1.3990471363067627, "learning_rate": 1.911548951883242e-06, "loss": 1.4692, "step": 14850 }, { "epoch": 0.8119404617087872, "grad_norm": 1.3535374402999878, "learning_rate": 1.910474561075757e-06, "loss": 1.4462, "step": 14851 }, { "epoch": 0.8119951341525088, "grad_norm": 1.7814581394195557, "learning_rate": 1.9094004403965116e-06, "loss": 1.3417, "step": 14852 }, { "epoch": 0.8120498065962304, "grad_norm": 1.7160412073135376, "learning_rate": 1.908326589881372e-06, "loss": 1.2753, "step": 14853 }, { "epoch": 0.8121044790399519, "grad_norm": 1.8211954832077026, "learning_rate": 1.9072530095661912e-06, "loss": 1.3123, "step": 14854 }, { "epoch": 0.8121591514836735, "grad_norm": 1.5133914947509766, "learning_rate": 1.9061796994868254e-06, "loss": 1.4871, "step": 14855 }, { "epoch": 0.8122138239273949, "grad_norm": 1.4606564044952393, "learning_rate": 1.9051066596791124e-06, "loss": 1.5609, "step": 14856 }, { "epoch": 0.8122684963711165, "grad_norm": 1.639121413230896, "learning_rate": 1.9040338901788813e-06, "loss": 1.4096, "step": 14857 }, { "epoch": 0.8123231688148381, "grad_norm": 1.744126796722412, "learning_rate": 1.902961391021958e-06, "loss": 1.5535, "step": 14858 }, { "epoch": 0.8123778412585596, "grad_norm": 1.6377849578857422, "learning_rate": 1.901889162244155e-06, "loss": 1.4476, "step": 14859 }, { "epoch": 0.8124325137022812, "grad_norm": 2.129722833633423, "learning_rate": 1.9008172038812744e-06, "loss": 1.2044, "step": 14860 }, { "epoch": 0.8124871861460028, "grad_norm": 2.0857722759246826, "learning_rate": 1.8997455159691135e-06, "loss": 1.3779, "step": 14861 }, { "epoch": 0.8125418585897243, "grad_norm": 1.4131579399108887, "learning_rate": 1.898674098543456e-06, "loss": 1.3019, "step": 14862 }, { "epoch": 0.8125965310334459, "grad_norm": 1.4321832656860352, "learning_rate": 1.897602951640082e-06, "loss": 1.6408, "step": 14863 }, { "epoch": 0.8126512034771675, "grad_norm": 1.4675805568695068, "learning_rate": 1.8965320752947592e-06, "loss": 1.2899, "step": 14864 }, { "epoch": 0.8127058759208889, "grad_norm": 1.6194961071014404, "learning_rate": 1.8954614695432427e-06, "loss": 1.4045, "step": 14865 }, { "epoch": 0.8127605483646105, "grad_norm": 1.4199141263961792, "learning_rate": 1.8943911344212873e-06, "loss": 1.3903, "step": 14866 }, { "epoch": 0.8128152208083321, "grad_norm": 1.5715140104293823, "learning_rate": 1.8933210699646342e-06, "loss": 1.1154, "step": 14867 }, { "epoch": 0.8128698932520536, "grad_norm": 1.1578865051269531, "learning_rate": 1.8922512762090096e-06, "loss": 1.4527, "step": 14868 }, { "epoch": 0.8129245656957752, "grad_norm": 1.2723734378814697, "learning_rate": 1.8911817531901432e-06, "loss": 1.4292, "step": 14869 }, { "epoch": 0.8129792381394967, "grad_norm": 1.3669531345367432, "learning_rate": 1.890112500943746e-06, "loss": 1.6138, "step": 14870 }, { "epoch": 0.8130339105832183, "grad_norm": 1.7045109272003174, "learning_rate": 1.8890435195055235e-06, "loss": 1.5229, "step": 14871 }, { "epoch": 0.8130885830269399, "grad_norm": 2.0141441822052, "learning_rate": 1.8879748089111693e-06, "loss": 1.6382, "step": 14872 }, { "epoch": 0.8131432554706614, "grad_norm": 1.413406491279602, "learning_rate": 1.886906369196373e-06, "loss": 1.498, "step": 14873 }, { "epoch": 0.813197927914383, "grad_norm": 1.5446794033050537, "learning_rate": 1.885838200396808e-06, "loss": 1.3838, "step": 14874 }, { "epoch": 0.8132526003581045, "grad_norm": 2.2638590335845947, "learning_rate": 1.8847703025481489e-06, "loss": 1.4212, "step": 14875 }, { "epoch": 0.813307272801826, "grad_norm": 1.767890453338623, "learning_rate": 1.88370267568605e-06, "loss": 1.409, "step": 14876 }, { "epoch": 0.8133619452455476, "grad_norm": 1.368931770324707, "learning_rate": 1.8826353198461655e-06, "loss": 1.3842, "step": 14877 }, { "epoch": 0.8134166176892692, "grad_norm": 1.6802937984466553, "learning_rate": 1.8815682350641373e-06, "loss": 1.2281, "step": 14878 }, { "epoch": 0.8134712901329907, "grad_norm": 1.7231611013412476, "learning_rate": 1.8805014213755924e-06, "loss": 1.4382, "step": 14879 }, { "epoch": 0.8135259625767123, "grad_norm": 1.3715484142303467, "learning_rate": 1.879434878816161e-06, "loss": 1.4998, "step": 14880 }, { "epoch": 0.8135806350204339, "grad_norm": 1.2391117811203003, "learning_rate": 1.8783686074214546e-06, "loss": 1.4421, "step": 14881 }, { "epoch": 0.8136353074641554, "grad_norm": 1.499085545539856, "learning_rate": 1.8773026072270762e-06, "loss": 1.3249, "step": 14882 }, { "epoch": 0.813689979907877, "grad_norm": 1.6242737770080566, "learning_rate": 1.8762368782686258e-06, "loss": 1.4699, "step": 14883 }, { "epoch": 0.8137446523515985, "grad_norm": 1.4613869190216064, "learning_rate": 1.8751714205816897e-06, "loss": 1.414, "step": 14884 }, { "epoch": 0.81379932479532, "grad_norm": 1.4749191999435425, "learning_rate": 1.8741062342018458e-06, "loss": 1.4035, "step": 14885 }, { "epoch": 0.8138539972390416, "grad_norm": 1.325481653213501, "learning_rate": 1.8730413191646623e-06, "loss": 1.7021, "step": 14886 }, { "epoch": 0.8139086696827631, "grad_norm": 1.4979959726333618, "learning_rate": 1.871976675505699e-06, "loss": 1.4816, "step": 14887 }, { "epoch": 0.8139633421264847, "grad_norm": 1.322066307067871, "learning_rate": 1.8709123032605058e-06, "loss": 1.4993, "step": 14888 }, { "epoch": 0.8140180145702063, "grad_norm": 1.3800325393676758, "learning_rate": 1.8698482024646291e-06, "loss": 1.5596, "step": 14889 }, { "epoch": 0.8140726870139278, "grad_norm": 1.4836759567260742, "learning_rate": 1.868784373153596e-06, "loss": 1.3948, "step": 14890 }, { "epoch": 0.8141273594576494, "grad_norm": 1.5436679124832153, "learning_rate": 1.8677208153629356e-06, "loss": 1.5316, "step": 14891 }, { "epoch": 0.814182031901371, "grad_norm": 1.4158085584640503, "learning_rate": 1.8666575291281597e-06, "loss": 1.5197, "step": 14892 }, { "epoch": 0.8142367043450924, "grad_norm": 1.854990839958191, "learning_rate": 1.865594514484772e-06, "loss": 1.4067, "step": 14893 }, { "epoch": 0.814291376788814, "grad_norm": 1.6032822132110596, "learning_rate": 1.8645317714682742e-06, "loss": 1.3868, "step": 14894 }, { "epoch": 0.8143460492325356, "grad_norm": 1.393853783607483, "learning_rate": 1.8634693001141513e-06, "loss": 1.4969, "step": 14895 }, { "epoch": 0.8144007216762571, "grad_norm": 1.312023401260376, "learning_rate": 1.8624071004578792e-06, "loss": 1.6296, "step": 14896 }, { "epoch": 0.8144553941199787, "grad_norm": 1.7096223831176758, "learning_rate": 1.8613451725349318e-06, "loss": 1.4832, "step": 14897 }, { "epoch": 0.8145100665637003, "grad_norm": 1.7257556915283203, "learning_rate": 1.8602835163807664e-06, "loss": 1.1908, "step": 14898 }, { "epoch": 0.8145647390074218, "grad_norm": 2.199565887451172, "learning_rate": 1.8592221320308358e-06, "loss": 1.255, "step": 14899 }, { "epoch": 0.8146194114511434, "grad_norm": 1.4457768201828003, "learning_rate": 1.85816101952058e-06, "loss": 1.4738, "step": 14900 }, { "epoch": 0.8146740838948648, "grad_norm": 1.621409296989441, "learning_rate": 1.8571001788854338e-06, "loss": 1.4471, "step": 14901 }, { "epoch": 0.8147287563385864, "grad_norm": 1.8732936382293701, "learning_rate": 1.856039610160818e-06, "loss": 1.2847, "step": 14902 }, { "epoch": 0.814783428782308, "grad_norm": 2.4581046104431152, "learning_rate": 1.8549793133821525e-06, "loss": 1.2361, "step": 14903 }, { "epoch": 0.8148381012260295, "grad_norm": 2.012296199798584, "learning_rate": 1.8539192885848377e-06, "loss": 1.071, "step": 14904 }, { "epoch": 0.8148927736697511, "grad_norm": 1.3621641397476196, "learning_rate": 1.8528595358042768e-06, "loss": 1.6285, "step": 14905 }, { "epoch": 0.8149474461134727, "grad_norm": 1.607576608657837, "learning_rate": 1.851800055075853e-06, "loss": 1.3639, "step": 14906 }, { "epoch": 0.8150021185571942, "grad_norm": 1.3792855739593506, "learning_rate": 1.850740846434943e-06, "loss": 1.4777, "step": 14907 }, { "epoch": 0.8150567910009158, "grad_norm": 1.7536417245864868, "learning_rate": 1.8496819099169227e-06, "loss": 1.438, "step": 14908 }, { "epoch": 0.8151114634446374, "grad_norm": 1.1218374967575073, "learning_rate": 1.8486232455571473e-06, "loss": 1.5697, "step": 14909 }, { "epoch": 0.8151661358883588, "grad_norm": 1.670645833015442, "learning_rate": 1.8475648533909707e-06, "loss": 1.6063, "step": 14910 }, { "epoch": 0.8152208083320804, "grad_norm": 1.4397556781768799, "learning_rate": 1.8465067334537335e-06, "loss": 1.4369, "step": 14911 }, { "epoch": 0.815275480775802, "grad_norm": 1.7478876113891602, "learning_rate": 1.8454488857807684e-06, "loss": 1.5339, "step": 14912 }, { "epoch": 0.8153301532195235, "grad_norm": 1.399569034576416, "learning_rate": 1.8443913104073984e-06, "loss": 1.3056, "step": 14913 }, { "epoch": 0.8153848256632451, "grad_norm": 1.766680121421814, "learning_rate": 1.8433340073689432e-06, "loss": 1.4819, "step": 14914 }, { "epoch": 0.8154394981069666, "grad_norm": 1.1226476430892944, "learning_rate": 1.8422769767007053e-06, "loss": 1.4119, "step": 14915 }, { "epoch": 0.8154941705506882, "grad_norm": 1.372752070426941, "learning_rate": 1.8412202184379801e-06, "loss": 1.2479, "step": 14916 }, { "epoch": 0.8155488429944098, "grad_norm": 1.3040306568145752, "learning_rate": 1.8401637326160582e-06, "loss": 1.5749, "step": 14917 }, { "epoch": 0.8156035154381313, "grad_norm": 1.5827975273132324, "learning_rate": 1.8391075192702179e-06, "loss": 1.453, "step": 14918 }, { "epoch": 0.8156581878818528, "grad_norm": 1.360478401184082, "learning_rate": 1.8380515784357245e-06, "loss": 1.2567, "step": 14919 }, { "epoch": 0.8157128603255744, "grad_norm": 1.3944910764694214, "learning_rate": 1.836995910147845e-06, "loss": 1.5667, "step": 14920 }, { "epoch": 0.8157675327692959, "grad_norm": 1.5425097942352295, "learning_rate": 1.8359405144418241e-06, "loss": 1.3725, "step": 14921 }, { "epoch": 0.8158222052130175, "grad_norm": 1.3940343856811523, "learning_rate": 1.8348853913529085e-06, "loss": 1.5835, "step": 14922 }, { "epoch": 0.8158768776567391, "grad_norm": 1.7457313537597656, "learning_rate": 1.8338305409163314e-06, "loss": 1.5933, "step": 14923 }, { "epoch": 0.8159315501004606, "grad_norm": 1.5767369270324707, "learning_rate": 1.8327759631673136e-06, "loss": 1.5401, "step": 14924 }, { "epoch": 0.8159862225441822, "grad_norm": 1.5588761568069458, "learning_rate": 1.8317216581410725e-06, "loss": 1.3345, "step": 14925 }, { "epoch": 0.8160408949879038, "grad_norm": 1.3472002744674683, "learning_rate": 1.8306676258728118e-06, "loss": 1.2517, "step": 14926 }, { "epoch": 0.8160955674316253, "grad_norm": 1.6708173751831055, "learning_rate": 1.829613866397727e-06, "loss": 1.3752, "step": 14927 }, { "epoch": 0.8161502398753469, "grad_norm": 1.2539175748825073, "learning_rate": 1.8285603797510098e-06, "loss": 1.4881, "step": 14928 }, { "epoch": 0.8162049123190683, "grad_norm": 1.4299139976501465, "learning_rate": 1.8275071659678367e-06, "loss": 1.4438, "step": 14929 }, { "epoch": 0.8162595847627899, "grad_norm": 1.7052541971206665, "learning_rate": 1.826454225083375e-06, "loss": 1.3752, "step": 14930 }, { "epoch": 0.8163142572065115, "grad_norm": 2.2435548305511475, "learning_rate": 1.8254015571327876e-06, "loss": 1.3216, "step": 14931 }, { "epoch": 0.816368929650233, "grad_norm": 1.4217942953109741, "learning_rate": 1.8243491621512255e-06, "loss": 1.7372, "step": 14932 }, { "epoch": 0.8164236020939546, "grad_norm": 1.3955280780792236, "learning_rate": 1.823297040173826e-06, "loss": 1.5393, "step": 14933 }, { "epoch": 0.8164782745376762, "grad_norm": 1.8924213647842407, "learning_rate": 1.8222451912357287e-06, "loss": 1.2472, "step": 14934 }, { "epoch": 0.8165329469813977, "grad_norm": 1.9870953559875488, "learning_rate": 1.8211936153720523e-06, "loss": 1.3957, "step": 14935 }, { "epoch": 0.8165876194251193, "grad_norm": 1.4041632413864136, "learning_rate": 1.820142312617915e-06, "loss": 1.4576, "step": 14936 }, { "epoch": 0.8166422918688409, "grad_norm": 1.6430104970932007, "learning_rate": 1.8190912830084207e-06, "loss": 1.3365, "step": 14937 }, { "epoch": 0.8166969643125623, "grad_norm": 1.6047948598861694, "learning_rate": 1.818040526578666e-06, "loss": 1.1796, "step": 14938 }, { "epoch": 0.8167516367562839, "grad_norm": 2.5284435749053955, "learning_rate": 1.8169900433637366e-06, "loss": 1.3285, "step": 14939 }, { "epoch": 0.8168063092000055, "grad_norm": 1.6277893781661987, "learning_rate": 1.8159398333987133e-06, "loss": 1.4722, "step": 14940 }, { "epoch": 0.816860981643727, "grad_norm": 1.6950756311416626, "learning_rate": 1.814889896718659e-06, "loss": 1.5583, "step": 14941 }, { "epoch": 0.8169156540874486, "grad_norm": 1.6813544034957886, "learning_rate": 1.8138402333586425e-06, "loss": 1.3968, "step": 14942 }, { "epoch": 0.8169703265311701, "grad_norm": 1.4777780771255493, "learning_rate": 1.8127908433537088e-06, "loss": 1.5878, "step": 14943 }, { "epoch": 0.8170249989748917, "grad_norm": 1.9605796337127686, "learning_rate": 1.811741726738898e-06, "loss": 1.2174, "step": 14944 }, { "epoch": 0.8170796714186133, "grad_norm": 1.278592824935913, "learning_rate": 1.8106928835492488e-06, "loss": 1.6027, "step": 14945 }, { "epoch": 0.8171343438623347, "grad_norm": 1.8733216524124146, "learning_rate": 1.8096443138197806e-06, "loss": 1.4206, "step": 14946 }, { "epoch": 0.8171890163060563, "grad_norm": 1.2272202968597412, "learning_rate": 1.8085960175855056e-06, "loss": 1.431, "step": 14947 }, { "epoch": 0.8172436887497779, "grad_norm": 1.4500892162322998, "learning_rate": 1.8075479948814334e-06, "loss": 1.4001, "step": 14948 }, { "epoch": 0.8172983611934994, "grad_norm": 1.8085169792175293, "learning_rate": 1.8065002457425583e-06, "loss": 1.3403, "step": 14949 }, { "epoch": 0.817353033637221, "grad_norm": 1.2572602033615112, "learning_rate": 1.805452770203866e-06, "loss": 1.6017, "step": 14950 }, { "epoch": 0.8174077060809426, "grad_norm": 1.4727051258087158, "learning_rate": 1.8044055683003358e-06, "loss": 1.4026, "step": 14951 }, { "epoch": 0.8174623785246641, "grad_norm": 1.7094738483428955, "learning_rate": 1.8033586400669322e-06, "loss": 1.3296, "step": 14952 }, { "epoch": 0.8175170509683857, "grad_norm": 1.7630393505096436, "learning_rate": 1.8023119855386196e-06, "loss": 1.4079, "step": 14953 }, { "epoch": 0.8175717234121073, "grad_norm": 1.8473546504974365, "learning_rate": 1.8012656047503475e-06, "loss": 1.1694, "step": 14954 }, { "epoch": 0.8176263958558287, "grad_norm": 1.5115573406219482, "learning_rate": 1.8002194977370523e-06, "loss": 1.6331, "step": 14955 }, { "epoch": 0.8176810682995503, "grad_norm": 1.4675099849700928, "learning_rate": 1.799173664533672e-06, "loss": 1.4868, "step": 14956 }, { "epoch": 0.8177357407432718, "grad_norm": 1.3802176713943481, "learning_rate": 1.7981281051751276e-06, "loss": 1.4489, "step": 14957 }, { "epoch": 0.8177904131869934, "grad_norm": 1.4271668195724487, "learning_rate": 1.7970828196963286e-06, "loss": 1.2999, "step": 14958 }, { "epoch": 0.817845085630715, "grad_norm": 1.7908577919006348, "learning_rate": 1.796037808132186e-06, "loss": 1.7981, "step": 14959 }, { "epoch": 0.8178997580744365, "grad_norm": 1.345895767211914, "learning_rate": 1.7949930705175922e-06, "loss": 1.5226, "step": 14960 }, { "epoch": 0.8179544305181581, "grad_norm": 2.18926739692688, "learning_rate": 1.7939486068874311e-06, "loss": 1.3196, "step": 14961 }, { "epoch": 0.8180091029618797, "grad_norm": 1.0981839895248413, "learning_rate": 1.792904417276584e-06, "loss": 1.7785, "step": 14962 }, { "epoch": 0.8180637754056012, "grad_norm": 1.3972983360290527, "learning_rate": 1.7918605017199176e-06, "loss": 1.4907, "step": 14963 }, { "epoch": 0.8181184478493227, "grad_norm": 2.0709848403930664, "learning_rate": 1.7908168602522903e-06, "loss": 1.5038, "step": 14964 }, { "epoch": 0.8181731202930443, "grad_norm": 1.716544270515442, "learning_rate": 1.7897734929085508e-06, "loss": 1.3488, "step": 14965 }, { "epoch": 0.8182277927367658, "grad_norm": 2.272505044937134, "learning_rate": 1.7887303997235372e-06, "loss": 1.4315, "step": 14966 }, { "epoch": 0.8182824651804874, "grad_norm": 1.6141269207000732, "learning_rate": 1.7876875807320881e-06, "loss": 1.2432, "step": 14967 }, { "epoch": 0.818337137624209, "grad_norm": 1.1532588005065918, "learning_rate": 1.7866450359690203e-06, "loss": 1.6913, "step": 14968 }, { "epoch": 0.8183918100679305, "grad_norm": 1.518720269203186, "learning_rate": 1.7856027654691454e-06, "loss": 1.4921, "step": 14969 }, { "epoch": 0.8184464825116521, "grad_norm": 1.7560019493103027, "learning_rate": 1.784560769267273e-06, "loss": 1.4572, "step": 14970 }, { "epoch": 0.8185011549553736, "grad_norm": 1.4785895347595215, "learning_rate": 1.7835190473981945e-06, "loss": 1.5255, "step": 14971 }, { "epoch": 0.8185558273990952, "grad_norm": 1.3808858394622803, "learning_rate": 1.7824775998966926e-06, "loss": 1.4175, "step": 14972 }, { "epoch": 0.8186104998428168, "grad_norm": 1.3745962381362915, "learning_rate": 1.7814364267975493e-06, "loss": 1.4434, "step": 14973 }, { "epoch": 0.8186651722865382, "grad_norm": 1.659261703491211, "learning_rate": 1.7803955281355302e-06, "loss": 1.6441, "step": 14974 }, { "epoch": 0.8187198447302598, "grad_norm": 1.981749415397644, "learning_rate": 1.7793549039453905e-06, "loss": 1.2157, "step": 14975 }, { "epoch": 0.8187745171739814, "grad_norm": 1.7530975341796875, "learning_rate": 1.7783145542618819e-06, "loss": 1.4443, "step": 14976 }, { "epoch": 0.8188291896177029, "grad_norm": 1.561483383178711, "learning_rate": 1.7772744791197406e-06, "loss": 1.5533, "step": 14977 }, { "epoch": 0.8188838620614245, "grad_norm": 1.627913236618042, "learning_rate": 1.776234678553702e-06, "loss": 1.4668, "step": 14978 }, { "epoch": 0.8189385345051461, "grad_norm": 1.8386328220367432, "learning_rate": 1.7751951525984857e-06, "loss": 1.5848, "step": 14979 }, { "epoch": 0.8189932069488676, "grad_norm": 1.6495826244354248, "learning_rate": 1.774155901288801e-06, "loss": 1.2967, "step": 14980 }, { "epoch": 0.8190478793925892, "grad_norm": 2.1482253074645996, "learning_rate": 1.773116924659355e-06, "loss": 1.3355, "step": 14981 }, { "epoch": 0.8191025518363108, "grad_norm": 1.2767086029052734, "learning_rate": 1.7720782227448407e-06, "loss": 1.5809, "step": 14982 }, { "epoch": 0.8191572242800322, "grad_norm": 1.673573613166809, "learning_rate": 1.7710397955799386e-06, "loss": 1.5339, "step": 14983 }, { "epoch": 0.8192118967237538, "grad_norm": 1.8650500774383545, "learning_rate": 1.7700016431993305e-06, "loss": 1.4615, "step": 14984 }, { "epoch": 0.8192665691674753, "grad_norm": 2.4117703437805176, "learning_rate": 1.768963765637679e-06, "loss": 1.3533, "step": 14985 }, { "epoch": 0.8193212416111969, "grad_norm": 1.5899204015731812, "learning_rate": 1.7679261629296408e-06, "loss": 1.2326, "step": 14986 }, { "epoch": 0.8193759140549185, "grad_norm": 1.6241919994354248, "learning_rate": 1.7668888351098678e-06, "loss": 1.5076, "step": 14987 }, { "epoch": 0.81943058649864, "grad_norm": 1.3953628540039062, "learning_rate": 1.765851782212995e-06, "loss": 1.7176, "step": 14988 }, { "epoch": 0.8194852589423616, "grad_norm": 1.5740333795547485, "learning_rate": 1.7648150042736546e-06, "loss": 1.3058, "step": 14989 }, { "epoch": 0.8195399313860832, "grad_norm": 1.4802085161209106, "learning_rate": 1.763778501326464e-06, "loss": 1.5168, "step": 14990 }, { "epoch": 0.8195946038298046, "grad_norm": 2.0631802082061768, "learning_rate": 1.7627422734060352e-06, "loss": 1.3648, "step": 14991 }, { "epoch": 0.8196492762735262, "grad_norm": 1.3672784566879272, "learning_rate": 1.761706320546973e-06, "loss": 1.4259, "step": 14992 }, { "epoch": 0.8197039487172478, "grad_norm": 1.6191004514694214, "learning_rate": 1.7606706427838682e-06, "loss": 1.1136, "step": 14993 }, { "epoch": 0.8197586211609693, "grad_norm": 1.2080897092819214, "learning_rate": 1.7596352401513027e-06, "loss": 1.6043, "step": 14994 }, { "epoch": 0.8198132936046909, "grad_norm": 1.7873570919036865, "learning_rate": 1.7586001126838558e-06, "loss": 1.1356, "step": 14995 }, { "epoch": 0.8198679660484125, "grad_norm": 1.4454959630966187, "learning_rate": 1.7575652604160898e-06, "loss": 1.4336, "step": 14996 }, { "epoch": 0.819922638492134, "grad_norm": 1.7116316556930542, "learning_rate": 1.756530683382559e-06, "loss": 1.3614, "step": 14997 }, { "epoch": 0.8199773109358556, "grad_norm": 1.5091569423675537, "learning_rate": 1.7554963816178162e-06, "loss": 1.3194, "step": 14998 }, { "epoch": 0.8200319833795771, "grad_norm": 1.9282560348510742, "learning_rate": 1.7544623551563932e-06, "loss": 1.3413, "step": 14999 }, { "epoch": 0.8200866558232986, "grad_norm": 1.633510947227478, "learning_rate": 1.7534286040328208e-06, "loss": 1.4323, "step": 15000 }, { "epoch": 0.8201413282670202, "grad_norm": 1.7138088941574097, "learning_rate": 1.7523951282816199e-06, "loss": 1.3747, "step": 15001 }, { "epoch": 0.8201960007107417, "grad_norm": 1.4756289720535278, "learning_rate": 1.7513619279372984e-06, "loss": 1.5266, "step": 15002 }, { "epoch": 0.8202506731544633, "grad_norm": 1.9847631454467773, "learning_rate": 1.750329003034359e-06, "loss": 1.2417, "step": 15003 }, { "epoch": 0.8203053455981849, "grad_norm": 1.8687888383865356, "learning_rate": 1.7492963536072927e-06, "loss": 1.2287, "step": 15004 }, { "epoch": 0.8203600180419064, "grad_norm": 1.5645359754562378, "learning_rate": 1.7482639796905798e-06, "loss": 1.357, "step": 15005 }, { "epoch": 0.820414690485628, "grad_norm": 1.9593055248260498, "learning_rate": 1.7472318813186984e-06, "loss": 1.5881, "step": 15006 }, { "epoch": 0.8204693629293496, "grad_norm": 1.5878357887268066, "learning_rate": 1.7462000585261096e-06, "loss": 1.218, "step": 15007 }, { "epoch": 0.8205240353730711, "grad_norm": 1.4793121814727783, "learning_rate": 1.7451685113472673e-06, "loss": 1.5673, "step": 15008 }, { "epoch": 0.8205787078167927, "grad_norm": 1.6588600873947144, "learning_rate": 1.74413723981662e-06, "loss": 1.3978, "step": 15009 }, { "epoch": 0.8206333802605142, "grad_norm": 1.6065438985824585, "learning_rate": 1.7431062439686052e-06, "loss": 1.3904, "step": 15010 }, { "epoch": 0.8206880527042357, "grad_norm": 1.346671462059021, "learning_rate": 1.742075523837644e-06, "loss": 1.4772, "step": 15011 }, { "epoch": 0.8207427251479573, "grad_norm": 1.5801355838775635, "learning_rate": 1.7410450794581623e-06, "loss": 1.4432, "step": 15012 }, { "epoch": 0.8207973975916788, "grad_norm": 1.5389597415924072, "learning_rate": 1.7400149108645658e-06, "loss": 1.3499, "step": 15013 }, { "epoch": 0.8208520700354004, "grad_norm": 1.4381380081176758, "learning_rate": 1.7389850180912537e-06, "loss": 1.315, "step": 15014 }, { "epoch": 0.820906742479122, "grad_norm": 1.856224775314331, "learning_rate": 1.7379554011726175e-06, "loss": 1.5784, "step": 15015 }, { "epoch": 0.8209614149228435, "grad_norm": 1.533500075340271, "learning_rate": 1.7369260601430371e-06, "loss": 1.4269, "step": 15016 }, { "epoch": 0.8210160873665651, "grad_norm": 1.5013587474822998, "learning_rate": 1.7358969950368842e-06, "loss": 1.6411, "step": 15017 }, { "epoch": 0.8210707598102867, "grad_norm": 1.3977473974227905, "learning_rate": 1.7348682058885247e-06, "loss": 1.4934, "step": 15018 }, { "epoch": 0.8211254322540081, "grad_norm": 1.7798048257827759, "learning_rate": 1.7338396927323076e-06, "loss": 1.6196, "step": 15019 }, { "epoch": 0.8211801046977297, "grad_norm": 1.62934410572052, "learning_rate": 1.7328114556025832e-06, "loss": 1.341, "step": 15020 }, { "epoch": 0.8212347771414513, "grad_norm": 2.268944025039673, "learning_rate": 1.7317834945336843e-06, "loss": 1.1236, "step": 15021 }, { "epoch": 0.8212894495851728, "grad_norm": 1.5163646936416626, "learning_rate": 1.7307558095599332e-06, "loss": 1.3489, "step": 15022 }, { "epoch": 0.8213441220288944, "grad_norm": 1.407531499862671, "learning_rate": 1.7297284007156533e-06, "loss": 1.1954, "step": 15023 }, { "epoch": 0.821398794472616, "grad_norm": 1.307034969329834, "learning_rate": 1.7287012680351479e-06, "loss": 1.5695, "step": 15024 }, { "epoch": 0.8214534669163375, "grad_norm": 1.8112014532089233, "learning_rate": 1.7276744115527144e-06, "loss": 1.4467, "step": 15025 }, { "epoch": 0.8215081393600591, "grad_norm": 1.5631928443908691, "learning_rate": 1.7266478313026469e-06, "loss": 1.5761, "step": 15026 }, { "epoch": 0.8215628118037805, "grad_norm": 1.0738457441329956, "learning_rate": 1.7256215273192223e-06, "loss": 1.4956, "step": 15027 }, { "epoch": 0.8216174842475021, "grad_norm": 1.6521073579788208, "learning_rate": 1.724595499636711e-06, "loss": 1.2926, "step": 15028 }, { "epoch": 0.8216721566912237, "grad_norm": 1.5041625499725342, "learning_rate": 1.7235697482893743e-06, "loss": 1.4592, "step": 15029 }, { "epoch": 0.8217268291349452, "grad_norm": 1.4354093074798584, "learning_rate": 1.722544273311465e-06, "loss": 1.3944, "step": 15030 }, { "epoch": 0.8217815015786668, "grad_norm": 1.3857470750808716, "learning_rate": 1.7215190747372246e-06, "loss": 1.4664, "step": 15031 }, { "epoch": 0.8218361740223884, "grad_norm": 1.5470558404922485, "learning_rate": 1.72049415260089e-06, "loss": 1.2903, "step": 15032 }, { "epoch": 0.8218908464661099, "grad_norm": 1.1206464767456055, "learning_rate": 1.7194695069366818e-06, "loss": 1.6586, "step": 15033 }, { "epoch": 0.8219455189098315, "grad_norm": 1.3802363872528076, "learning_rate": 1.7184451377788202e-06, "loss": 1.3693, "step": 15034 }, { "epoch": 0.8220001913535531, "grad_norm": 1.529888391494751, "learning_rate": 1.7174210451615091e-06, "loss": 1.3665, "step": 15035 }, { "epoch": 0.8220548637972745, "grad_norm": 1.8060747385025024, "learning_rate": 1.7163972291189423e-06, "loss": 1.3324, "step": 15036 }, { "epoch": 0.8221095362409961, "grad_norm": 1.1966975927352905, "learning_rate": 1.7153736896853124e-06, "loss": 1.3539, "step": 15037 }, { "epoch": 0.8221642086847177, "grad_norm": 2.1640124320983887, "learning_rate": 1.7143504268947952e-06, "loss": 1.527, "step": 15038 }, { "epoch": 0.8222188811284392, "grad_norm": 1.2725389003753662, "learning_rate": 1.7133274407815581e-06, "loss": 1.4031, "step": 15039 }, { "epoch": 0.8222735535721608, "grad_norm": 1.5772544145584106, "learning_rate": 1.7123047313797657e-06, "loss": 1.5969, "step": 15040 }, { "epoch": 0.8223282260158823, "grad_norm": 1.1923902034759521, "learning_rate": 1.7112822987235656e-06, "loss": 1.5443, "step": 15041 }, { "epoch": 0.8223828984596039, "grad_norm": 1.4004853963851929, "learning_rate": 1.7102601428470988e-06, "loss": 1.282, "step": 15042 }, { "epoch": 0.8224375709033255, "grad_norm": 1.2434544563293457, "learning_rate": 1.7092382637844995e-06, "loss": 1.4835, "step": 15043 }, { "epoch": 0.822492243347047, "grad_norm": 2.0813605785369873, "learning_rate": 1.7082166615698893e-06, "loss": 1.3878, "step": 15044 }, { "epoch": 0.8225469157907686, "grad_norm": 1.5799589157104492, "learning_rate": 1.7071953362373795e-06, "loss": 1.7046, "step": 15045 }, { "epoch": 0.8226015882344901, "grad_norm": 1.9524565935134888, "learning_rate": 1.7061742878210797e-06, "loss": 1.5176, "step": 15046 }, { "epoch": 0.8226562606782116, "grad_norm": 1.6204607486724854, "learning_rate": 1.7051535163550804e-06, "loss": 1.3696, "step": 15047 }, { "epoch": 0.8227109331219332, "grad_norm": 1.635118007659912, "learning_rate": 1.704133021873471e-06, "loss": 1.5934, "step": 15048 }, { "epoch": 0.8227656055656548, "grad_norm": 1.5767203569412231, "learning_rate": 1.7031128044103272e-06, "loss": 1.3564, "step": 15049 }, { "epoch": 0.8228202780093763, "grad_norm": 1.5896800756454468, "learning_rate": 1.7020928639997136e-06, "loss": 1.3515, "step": 15050 }, { "epoch": 0.8228749504530979, "grad_norm": 1.3958868980407715, "learning_rate": 1.7010732006756948e-06, "loss": 1.5944, "step": 15051 }, { "epoch": 0.8229296228968195, "grad_norm": 1.4271385669708252, "learning_rate": 1.7000538144723145e-06, "loss": 1.3963, "step": 15052 }, { "epoch": 0.822984295340541, "grad_norm": 1.3413927555084229, "learning_rate": 1.6990347054236134e-06, "loss": 1.6424, "step": 15053 }, { "epoch": 0.8230389677842626, "grad_norm": 1.6009918451309204, "learning_rate": 1.698015873563623e-06, "loss": 1.4034, "step": 15054 }, { "epoch": 0.823093640227984, "grad_norm": 1.3993295431137085, "learning_rate": 1.6969973189263644e-06, "loss": 1.3509, "step": 15055 }, { "epoch": 0.8231483126717056, "grad_norm": 1.629136085510254, "learning_rate": 1.6959790415458454e-06, "loss": 1.2998, "step": 15056 }, { "epoch": 0.8232029851154272, "grad_norm": 1.4852023124694824, "learning_rate": 1.6949610414560746e-06, "loss": 1.499, "step": 15057 }, { "epoch": 0.8232576575591487, "grad_norm": 1.6557408571243286, "learning_rate": 1.6939433186910436e-06, "loss": 1.2012, "step": 15058 }, { "epoch": 0.8233123300028703, "grad_norm": 1.5446436405181885, "learning_rate": 1.6929258732847332e-06, "loss": 1.3834, "step": 15059 }, { "epoch": 0.8233670024465919, "grad_norm": 1.5258162021636963, "learning_rate": 1.6919087052711236e-06, "loss": 1.6764, "step": 15060 }, { "epoch": 0.8234216748903134, "grad_norm": 1.726033091545105, "learning_rate": 1.6908918146841758e-06, "loss": 1.5631, "step": 15061 }, { "epoch": 0.823476347334035, "grad_norm": 1.6232640743255615, "learning_rate": 1.68987520155785e-06, "loss": 1.2717, "step": 15062 }, { "epoch": 0.8235310197777566, "grad_norm": 1.721613883972168, "learning_rate": 1.6888588659260929e-06, "loss": 1.4721, "step": 15063 }, { "epoch": 0.823585692221478, "grad_norm": 1.3910332918167114, "learning_rate": 1.687842807822837e-06, "loss": 1.5621, "step": 15064 }, { "epoch": 0.8236403646651996, "grad_norm": 3.1236486434936523, "learning_rate": 1.6868270272820175e-06, "loss": 1.3464, "step": 15065 }, { "epoch": 0.8236950371089212, "grad_norm": 1.4895700216293335, "learning_rate": 1.6858115243375516e-06, "loss": 1.3805, "step": 15066 }, { "epoch": 0.8237497095526427, "grad_norm": 2.030181884765625, "learning_rate": 1.684796299023349e-06, "loss": 1.4358, "step": 15067 }, { "epoch": 0.8238043819963643, "grad_norm": 1.586319088935852, "learning_rate": 1.6837813513733093e-06, "loss": 1.3095, "step": 15068 }, { "epoch": 0.8238590544400858, "grad_norm": 1.5135315656661987, "learning_rate": 1.682766681421325e-06, "loss": 1.4648, "step": 15069 }, { "epoch": 0.8239137268838074, "grad_norm": 2.034872055053711, "learning_rate": 1.6817522892012762e-06, "loss": 1.2156, "step": 15070 }, { "epoch": 0.823968399327529, "grad_norm": 1.8316742181777954, "learning_rate": 1.6807381747470408e-06, "loss": 1.2185, "step": 15071 }, { "epoch": 0.8240230717712504, "grad_norm": 1.8089942932128906, "learning_rate": 1.6797243380924788e-06, "loss": 1.5591, "step": 15072 }, { "epoch": 0.824077744214972, "grad_norm": 1.8100264072418213, "learning_rate": 1.678710779271443e-06, "loss": 1.1886, "step": 15073 }, { "epoch": 0.8241324166586936, "grad_norm": 1.8206799030303955, "learning_rate": 1.677697498317783e-06, "loss": 1.5771, "step": 15074 }, { "epoch": 0.8241870891024151, "grad_norm": 1.6306854486465454, "learning_rate": 1.6766844952653294e-06, "loss": 1.3593, "step": 15075 }, { "epoch": 0.8242417615461367, "grad_norm": 1.7627569437026978, "learning_rate": 1.6756717701479152e-06, "loss": 1.5986, "step": 15076 }, { "epoch": 0.8242964339898583, "grad_norm": 1.4184668064117432, "learning_rate": 1.6746593229993545e-06, "loss": 1.3827, "step": 15077 }, { "epoch": 0.8243511064335798, "grad_norm": 1.617637276649475, "learning_rate": 1.6736471538534516e-06, "loss": 1.4047, "step": 15078 }, { "epoch": 0.8244057788773014, "grad_norm": 1.670559287071228, "learning_rate": 1.6726352627440122e-06, "loss": 1.4469, "step": 15079 }, { "epoch": 0.824460451321023, "grad_norm": 1.1523040533065796, "learning_rate": 1.6716236497048211e-06, "loss": 1.4778, "step": 15080 }, { "epoch": 0.8245151237647445, "grad_norm": 1.3972673416137695, "learning_rate": 1.6706123147696596e-06, "loss": 1.2537, "step": 15081 }, { "epoch": 0.824569796208466, "grad_norm": 1.4465230703353882, "learning_rate": 1.6696012579722986e-06, "loss": 1.2937, "step": 15082 }, { "epoch": 0.8246244686521876, "grad_norm": 1.65803062915802, "learning_rate": 1.6685904793465003e-06, "loss": 1.4383, "step": 15083 }, { "epoch": 0.8246791410959091, "grad_norm": 1.5290396213531494, "learning_rate": 1.6675799789260128e-06, "loss": 1.5213, "step": 15084 }, { "epoch": 0.8247338135396307, "grad_norm": 1.2775617837905884, "learning_rate": 1.6665697567445848e-06, "loss": 1.7235, "step": 15085 }, { "epoch": 0.8247884859833522, "grad_norm": 1.1792149543762207, "learning_rate": 1.6655598128359486e-06, "loss": 1.4469, "step": 15086 }, { "epoch": 0.8248431584270738, "grad_norm": 1.364193320274353, "learning_rate": 1.6645501472338243e-06, "loss": 1.6294, "step": 15087 }, { "epoch": 0.8248978308707954, "grad_norm": 1.3842021226882935, "learning_rate": 1.6635407599719332e-06, "loss": 1.4244, "step": 15088 }, { "epoch": 0.8249525033145169, "grad_norm": 1.584132432937622, "learning_rate": 1.6625316510839752e-06, "loss": 1.477, "step": 15089 }, { "epoch": 0.8250071757582385, "grad_norm": 1.9626396894454956, "learning_rate": 1.6615228206036527e-06, "loss": 1.4755, "step": 15090 }, { "epoch": 0.82506184820196, "grad_norm": 1.5700838565826416, "learning_rate": 1.6605142685646503e-06, "loss": 1.4527, "step": 15091 }, { "epoch": 0.8251165206456815, "grad_norm": 1.7520768642425537, "learning_rate": 1.6595059950006454e-06, "loss": 1.4513, "step": 15092 }, { "epoch": 0.8251711930894031, "grad_norm": 1.552108883857727, "learning_rate": 1.6584979999453065e-06, "loss": 1.48, "step": 15093 }, { "epoch": 0.8252258655331247, "grad_norm": 1.3058634996414185, "learning_rate": 1.6574902834322937e-06, "loss": 1.5999, "step": 15094 }, { "epoch": 0.8252805379768462, "grad_norm": 1.3691312074661255, "learning_rate": 1.656482845495254e-06, "loss": 1.3911, "step": 15095 }, { "epoch": 0.8253352104205678, "grad_norm": 1.215587854385376, "learning_rate": 1.6554756861678345e-06, "loss": 1.4717, "step": 15096 }, { "epoch": 0.8253898828642894, "grad_norm": 1.7615493535995483, "learning_rate": 1.6544688054836611e-06, "loss": 1.4153, "step": 15097 }, { "epoch": 0.8254445553080109, "grad_norm": 1.3912702798843384, "learning_rate": 1.6534622034763558e-06, "loss": 1.3371, "step": 15098 }, { "epoch": 0.8254992277517325, "grad_norm": 1.6873447895050049, "learning_rate": 1.6524558801795366e-06, "loss": 1.3874, "step": 15099 }, { "epoch": 0.8255539001954539, "grad_norm": 1.7431704998016357, "learning_rate": 1.6514498356268027e-06, "loss": 1.2031, "step": 15100 }, { "epoch": 0.8256085726391755, "grad_norm": 1.594075083732605, "learning_rate": 1.6504440698517477e-06, "loss": 1.6072, "step": 15101 }, { "epoch": 0.8256632450828971, "grad_norm": 1.5299710035324097, "learning_rate": 1.64943858288796e-06, "loss": 1.4977, "step": 15102 }, { "epoch": 0.8257179175266186, "grad_norm": 1.8478952646255493, "learning_rate": 1.6484333747690107e-06, "loss": 1.4351, "step": 15103 }, { "epoch": 0.8257725899703402, "grad_norm": 1.3401174545288086, "learning_rate": 1.6474284455284707e-06, "loss": 1.2994, "step": 15104 }, { "epoch": 0.8258272624140618, "grad_norm": 1.3900691270828247, "learning_rate": 1.6464237951998952e-06, "loss": 1.7493, "step": 15105 }, { "epoch": 0.8258819348577833, "grad_norm": 1.5989068746566772, "learning_rate": 1.645419423816832e-06, "loss": 1.3746, "step": 15106 }, { "epoch": 0.8259366073015049, "grad_norm": 2.1624927520751953, "learning_rate": 1.6444153314128175e-06, "loss": 1.2524, "step": 15107 }, { "epoch": 0.8259912797452265, "grad_norm": 1.4254601001739502, "learning_rate": 1.6434115180213828e-06, "loss": 1.4837, "step": 15108 }, { "epoch": 0.8260459521889479, "grad_norm": 1.5762628316879272, "learning_rate": 1.6424079836760454e-06, "loss": 1.3246, "step": 15109 }, { "epoch": 0.8261006246326695, "grad_norm": 2.1664764881134033, "learning_rate": 1.6414047284103185e-06, "loss": 1.3254, "step": 15110 }, { "epoch": 0.8261552970763911, "grad_norm": 1.9948267936706543, "learning_rate": 1.640401752257702e-06, "loss": 1.4153, "step": 15111 }, { "epoch": 0.8262099695201126, "grad_norm": 2.0782933235168457, "learning_rate": 1.6393990552516848e-06, "loss": 1.3862, "step": 15112 }, { "epoch": 0.8262646419638342, "grad_norm": 1.8716461658477783, "learning_rate": 1.6383966374257544e-06, "loss": 1.4295, "step": 15113 }, { "epoch": 0.8263193144075557, "grad_norm": 1.4918498992919922, "learning_rate": 1.6373944988133817e-06, "loss": 1.4638, "step": 15114 }, { "epoch": 0.8263739868512773, "grad_norm": 1.4208561182022095, "learning_rate": 1.636392639448028e-06, "loss": 1.2113, "step": 15115 }, { "epoch": 0.8264286592949989, "grad_norm": 1.4834097623825073, "learning_rate": 1.6353910593631507e-06, "loss": 1.4041, "step": 15116 }, { "epoch": 0.8264833317387204, "grad_norm": 1.3729043006896973, "learning_rate": 1.634389758592193e-06, "loss": 1.6128, "step": 15117 }, { "epoch": 0.8265380041824419, "grad_norm": 1.8252846002578735, "learning_rate": 1.633388737168594e-06, "loss": 1.1203, "step": 15118 }, { "epoch": 0.8265926766261635, "grad_norm": 1.3369181156158447, "learning_rate": 1.6323879951257783e-06, "loss": 1.5639, "step": 15119 }, { "epoch": 0.826647349069885, "grad_norm": 1.659250259399414, "learning_rate": 1.6313875324971618e-06, "loss": 1.3509, "step": 15120 }, { "epoch": 0.8267020215136066, "grad_norm": 1.473406195640564, "learning_rate": 1.6303873493161538e-06, "loss": 1.497, "step": 15121 }, { "epoch": 0.8267566939573282, "grad_norm": 1.610121250152588, "learning_rate": 1.6293874456161518e-06, "loss": 1.453, "step": 15122 }, { "epoch": 0.8268113664010497, "grad_norm": 1.1596318483352661, "learning_rate": 1.6283878214305438e-06, "loss": 1.5471, "step": 15123 }, { "epoch": 0.8268660388447713, "grad_norm": 1.5167474746704102, "learning_rate": 1.6273884767927117e-06, "loss": 1.4949, "step": 15124 }, { "epoch": 0.8269207112884929, "grad_norm": 1.5532985925674438, "learning_rate": 1.6263894117360268e-06, "loss": 1.5776, "step": 15125 }, { "epoch": 0.8269753837322144, "grad_norm": 1.715897798538208, "learning_rate": 1.6253906262938457e-06, "loss": 1.2939, "step": 15126 }, { "epoch": 0.827030056175936, "grad_norm": 1.506844401359558, "learning_rate": 1.624392120499526e-06, "loss": 1.3529, "step": 15127 }, { "epoch": 0.8270847286196574, "grad_norm": 1.4364073276519775, "learning_rate": 1.623393894386407e-06, "loss": 1.5237, "step": 15128 }, { "epoch": 0.827139401063379, "grad_norm": 1.468774676322937, "learning_rate": 1.6223959479878193e-06, "loss": 1.6171, "step": 15129 }, { "epoch": 0.8271940735071006, "grad_norm": 1.5253379344940186, "learning_rate": 1.6213982813370931e-06, "loss": 1.2449, "step": 15130 }, { "epoch": 0.8272487459508221, "grad_norm": 1.3625893592834473, "learning_rate": 1.6204008944675387e-06, "loss": 1.2782, "step": 15131 }, { "epoch": 0.8273034183945437, "grad_norm": 1.536072850227356, "learning_rate": 1.6194037874124612e-06, "loss": 1.569, "step": 15132 }, { "epoch": 0.8273580908382653, "grad_norm": 1.5133256912231445, "learning_rate": 1.6184069602051578e-06, "loss": 1.1743, "step": 15133 }, { "epoch": 0.8274127632819868, "grad_norm": 1.6322362422943115, "learning_rate": 1.6174104128789115e-06, "loss": 1.6402, "step": 15134 }, { "epoch": 0.8274674357257084, "grad_norm": 1.6940628290176392, "learning_rate": 1.6164141454670034e-06, "loss": 1.6429, "step": 15135 }, { "epoch": 0.82752210816943, "grad_norm": 1.2598581314086914, "learning_rate": 1.6154181580027006e-06, "loss": 1.5397, "step": 15136 }, { "epoch": 0.8275767806131514, "grad_norm": 1.4580168724060059, "learning_rate": 1.6144224505192586e-06, "loss": 1.4716, "step": 15137 }, { "epoch": 0.827631453056873, "grad_norm": 1.9747486114501953, "learning_rate": 1.6134270230499294e-06, "loss": 1.4577, "step": 15138 }, { "epoch": 0.8276861255005946, "grad_norm": 1.2426854372024536, "learning_rate": 1.6124318756279533e-06, "loss": 1.4959, "step": 15139 }, { "epoch": 0.8277407979443161, "grad_norm": 1.6412984132766724, "learning_rate": 1.611437008286555e-06, "loss": 1.3503, "step": 15140 }, { "epoch": 0.8277954703880377, "grad_norm": 1.2116762399673462, "learning_rate": 1.610442421058962e-06, "loss": 1.5901, "step": 15141 }, { "epoch": 0.8278501428317592, "grad_norm": 1.60866379737854, "learning_rate": 1.6094481139783836e-06, "loss": 1.309, "step": 15142 }, { "epoch": 0.8279048152754808, "grad_norm": 1.407594084739685, "learning_rate": 1.6084540870780197e-06, "loss": 1.5774, "step": 15143 }, { "epoch": 0.8279594877192024, "grad_norm": 1.6772452592849731, "learning_rate": 1.607460340391067e-06, "loss": 1.269, "step": 15144 }, { "epoch": 0.8280141601629238, "grad_norm": 1.6167945861816406, "learning_rate": 1.6064668739507072e-06, "loss": 1.4238, "step": 15145 }, { "epoch": 0.8280688326066454, "grad_norm": 1.5143487453460693, "learning_rate": 1.6054736877901156e-06, "loss": 1.0521, "step": 15146 }, { "epoch": 0.828123505050367, "grad_norm": 1.602346420288086, "learning_rate": 1.6044807819424545e-06, "loss": 1.3598, "step": 15147 }, { "epoch": 0.8281781774940885, "grad_norm": 1.3558142185211182, "learning_rate": 1.603488156440879e-06, "loss": 1.4693, "step": 15148 }, { "epoch": 0.8282328499378101, "grad_norm": 1.3828636407852173, "learning_rate": 1.6024958113185395e-06, "loss": 1.4044, "step": 15149 }, { "epoch": 0.8282875223815317, "grad_norm": 1.2184898853302002, "learning_rate": 1.6015037466085704e-06, "loss": 1.5625, "step": 15150 }, { "epoch": 0.8283421948252532, "grad_norm": 1.8275725841522217, "learning_rate": 1.6005119623440956e-06, "loss": 1.1809, "step": 15151 }, { "epoch": 0.8283968672689748, "grad_norm": 1.7366024255752563, "learning_rate": 1.5995204585582392e-06, "loss": 1.5077, "step": 15152 }, { "epoch": 0.8284515397126964, "grad_norm": 1.5323628187179565, "learning_rate": 1.5985292352841074e-06, "loss": 1.3491, "step": 15153 }, { "epoch": 0.8285062121564178, "grad_norm": 2.447742462158203, "learning_rate": 1.5975382925547966e-06, "loss": 1.5391, "step": 15154 }, { "epoch": 0.8285608846001394, "grad_norm": 1.407070279121399, "learning_rate": 1.5965476304034023e-06, "loss": 1.3136, "step": 15155 }, { "epoch": 0.8286155570438609, "grad_norm": 1.6134029626846313, "learning_rate": 1.5955572488630012e-06, "loss": 1.4588, "step": 15156 }, { "epoch": 0.8286702294875825, "grad_norm": 1.915199875831604, "learning_rate": 1.5945671479666625e-06, "loss": 1.3924, "step": 15157 }, { "epoch": 0.8287249019313041, "grad_norm": 1.4507172107696533, "learning_rate": 1.593577327747453e-06, "loss": 1.3994, "step": 15158 }, { "epoch": 0.8287795743750256, "grad_norm": 1.9441643953323364, "learning_rate": 1.5925877882384232e-06, "loss": 1.3201, "step": 15159 }, { "epoch": 0.8288342468187472, "grad_norm": 1.482886552810669, "learning_rate": 1.5915985294726156e-06, "loss": 1.6166, "step": 15160 }, { "epoch": 0.8288889192624688, "grad_norm": 2.1291651725769043, "learning_rate": 1.5906095514830645e-06, "loss": 1.3567, "step": 15161 }, { "epoch": 0.8289435917061903, "grad_norm": 1.5995925664901733, "learning_rate": 1.5896208543027912e-06, "loss": 1.5079, "step": 15162 }, { "epoch": 0.8289982641499118, "grad_norm": 1.7309128046035767, "learning_rate": 1.5886324379648156e-06, "loss": 1.4055, "step": 15163 }, { "epoch": 0.8290529365936334, "grad_norm": 1.6556774377822876, "learning_rate": 1.5876443025021404e-06, "loss": 1.4311, "step": 15164 }, { "epoch": 0.8291076090373549, "grad_norm": 1.4786343574523926, "learning_rate": 1.5866564479477599e-06, "loss": 1.6458, "step": 15165 }, { "epoch": 0.8291622814810765, "grad_norm": 1.2496776580810547, "learning_rate": 1.585668874334665e-06, "loss": 1.3339, "step": 15166 }, { "epoch": 0.8292169539247981, "grad_norm": 1.589271068572998, "learning_rate": 1.5846815816958317e-06, "loss": 1.317, "step": 15167 }, { "epoch": 0.8292716263685196, "grad_norm": 1.2668856382369995, "learning_rate": 1.5836945700642248e-06, "loss": 1.4, "step": 15168 }, { "epoch": 0.8293262988122412, "grad_norm": 1.5467153787612915, "learning_rate": 1.582707839472809e-06, "loss": 1.5704, "step": 15169 }, { "epoch": 0.8293809712559627, "grad_norm": 1.3871797323226929, "learning_rate": 1.5817213899545293e-06, "loss": 1.6137, "step": 15170 }, { "epoch": 0.8294356436996843, "grad_norm": 1.4606585502624512, "learning_rate": 1.5807352215423278e-06, "loss": 1.3407, "step": 15171 }, { "epoch": 0.8294903161434058, "grad_norm": 1.2955312728881836, "learning_rate": 1.5797493342691328e-06, "loss": 1.469, "step": 15172 }, { "epoch": 0.8295449885871273, "grad_norm": 1.581470251083374, "learning_rate": 1.5787637281678637e-06, "loss": 1.3384, "step": 15173 }, { "epoch": 0.8295996610308489, "grad_norm": 1.8657357692718506, "learning_rate": 1.577778403271437e-06, "loss": 1.6858, "step": 15174 }, { "epoch": 0.8296543334745705, "grad_norm": 1.5132490396499634, "learning_rate": 1.5767933596127528e-06, "loss": 1.5198, "step": 15175 }, { "epoch": 0.829709005918292, "grad_norm": 1.4998811483383179, "learning_rate": 1.5758085972247017e-06, "loss": 1.2931, "step": 15176 }, { "epoch": 0.8297636783620136, "grad_norm": 1.6335060596466064, "learning_rate": 1.5748241161401723e-06, "loss": 1.4395, "step": 15177 }, { "epoch": 0.8298183508057352, "grad_norm": 1.6095014810562134, "learning_rate": 1.5738399163920359e-06, "loss": 1.419, "step": 15178 }, { "epoch": 0.8298730232494567, "grad_norm": 1.4557374715805054, "learning_rate": 1.5728559980131553e-06, "loss": 1.3617, "step": 15179 }, { "epoch": 0.8299276956931783, "grad_norm": 1.9533787965774536, "learning_rate": 1.5718723610363895e-06, "loss": 1.3476, "step": 15180 }, { "epoch": 0.8299823681368999, "grad_norm": 1.954954743385315, "learning_rate": 1.5708890054945824e-06, "loss": 1.1823, "step": 15181 }, { "epoch": 0.8300370405806213, "grad_norm": 1.886499285697937, "learning_rate": 1.56990593142057e-06, "loss": 1.6995, "step": 15182 }, { "epoch": 0.8300917130243429, "grad_norm": 1.2590783834457397, "learning_rate": 1.5689231388471816e-06, "loss": 1.6758, "step": 15183 }, { "epoch": 0.8301463854680644, "grad_norm": 1.5142996311187744, "learning_rate": 1.567940627807234e-06, "loss": 1.3649, "step": 15184 }, { "epoch": 0.830201057911786, "grad_norm": 2.4190850257873535, "learning_rate": 1.5669583983335356e-06, "loss": 1.1548, "step": 15185 }, { "epoch": 0.8302557303555076, "grad_norm": 1.6158391237258911, "learning_rate": 1.5659764504588848e-06, "loss": 1.3379, "step": 15186 }, { "epoch": 0.8303104027992291, "grad_norm": 1.5123969316482544, "learning_rate": 1.5649947842160683e-06, "loss": 1.225, "step": 15187 }, { "epoch": 0.8303650752429507, "grad_norm": 2.392321825027466, "learning_rate": 1.5640133996378725e-06, "loss": 1.3012, "step": 15188 }, { "epoch": 0.8304197476866723, "grad_norm": 1.733174443244934, "learning_rate": 1.5630322967570655e-06, "loss": 1.2393, "step": 15189 }, { "epoch": 0.8304744201303937, "grad_norm": 1.6551235914230347, "learning_rate": 1.5620514756064043e-06, "loss": 1.3559, "step": 15190 }, { "epoch": 0.8305290925741153, "grad_norm": 1.2706795930862427, "learning_rate": 1.5610709362186482e-06, "loss": 1.7232, "step": 15191 }, { "epoch": 0.8305837650178369, "grad_norm": 3.3033552169799805, "learning_rate": 1.5600906786265358e-06, "loss": 1.0848, "step": 15192 }, { "epoch": 0.8306384374615584, "grad_norm": 1.195281982421875, "learning_rate": 1.5591107028627972e-06, "loss": 1.5735, "step": 15193 }, { "epoch": 0.83069310990528, "grad_norm": 1.351867914199829, "learning_rate": 1.558131008960163e-06, "loss": 1.7356, "step": 15194 }, { "epoch": 0.8307477823490016, "grad_norm": 1.4962899684906006, "learning_rate": 1.5571515969513428e-06, "loss": 1.4955, "step": 15195 }, { "epoch": 0.8308024547927231, "grad_norm": 1.4002234935760498, "learning_rate": 1.5561724668690436e-06, "loss": 1.4516, "step": 15196 }, { "epoch": 0.8308571272364447, "grad_norm": 2.155369997024536, "learning_rate": 1.5551936187459594e-06, "loss": 1.593, "step": 15197 }, { "epoch": 0.8309117996801662, "grad_norm": 1.4288103580474854, "learning_rate": 1.554215052614776e-06, "loss": 1.3761, "step": 15198 }, { "epoch": 0.8309664721238877, "grad_norm": 1.5202289819717407, "learning_rate": 1.5532367685081685e-06, "loss": 1.5273, "step": 15199 }, { "epoch": 0.8310211445676093, "grad_norm": 1.5471632480621338, "learning_rate": 1.5522587664588097e-06, "loss": 1.4427, "step": 15200 }, { "epoch": 0.8310758170113308, "grad_norm": 1.2648615837097168, "learning_rate": 1.55128104649935e-06, "loss": 1.5891, "step": 15201 }, { "epoch": 0.8311304894550524, "grad_norm": 1.8966217041015625, "learning_rate": 1.5503036086624456e-06, "loss": 1.3968, "step": 15202 }, { "epoch": 0.831185161898774, "grad_norm": 1.438636302947998, "learning_rate": 1.5493264529807305e-06, "loss": 1.4375, "step": 15203 }, { "epoch": 0.8312398343424955, "grad_norm": 1.4667656421661377, "learning_rate": 1.548349579486833e-06, "loss": 1.4212, "step": 15204 }, { "epoch": 0.8312945067862171, "grad_norm": 1.4403231143951416, "learning_rate": 1.547372988213378e-06, "loss": 1.4835, "step": 15205 }, { "epoch": 0.8313491792299387, "grad_norm": 1.7528800964355469, "learning_rate": 1.546396679192974e-06, "loss": 1.3394, "step": 15206 }, { "epoch": 0.8314038516736602, "grad_norm": 1.3967580795288086, "learning_rate": 1.5454206524582194e-06, "loss": 1.5557, "step": 15207 }, { "epoch": 0.8314585241173817, "grad_norm": 1.6965221166610718, "learning_rate": 1.54444490804171e-06, "loss": 1.625, "step": 15208 }, { "epoch": 0.8315131965611033, "grad_norm": 1.427737832069397, "learning_rate": 1.543469445976028e-06, "loss": 1.5916, "step": 15209 }, { "epoch": 0.8315678690048248, "grad_norm": 1.5086917877197266, "learning_rate": 1.5424942662937436e-06, "loss": 1.5255, "step": 15210 }, { "epoch": 0.8316225414485464, "grad_norm": 1.6625697612762451, "learning_rate": 1.5415193690274234e-06, "loss": 1.5686, "step": 15211 }, { "epoch": 0.8316772138922679, "grad_norm": 1.556440830230713, "learning_rate": 1.5405447542096187e-06, "loss": 1.2782, "step": 15212 }, { "epoch": 0.8317318863359895, "grad_norm": 1.7028863430023193, "learning_rate": 1.5395704218728736e-06, "loss": 1.201, "step": 15213 }, { "epoch": 0.8317865587797111, "grad_norm": 1.6288996934890747, "learning_rate": 1.5385963720497278e-06, "loss": 1.6864, "step": 15214 }, { "epoch": 0.8318412312234326, "grad_norm": 1.3805714845657349, "learning_rate": 1.5376226047727005e-06, "loss": 1.3465, "step": 15215 }, { "epoch": 0.8318959036671542, "grad_norm": 1.8114473819732666, "learning_rate": 1.536649120074316e-06, "loss": 1.1784, "step": 15216 }, { "epoch": 0.8319505761108758, "grad_norm": 1.6258395910263062, "learning_rate": 1.5356759179870762e-06, "loss": 1.2636, "step": 15217 }, { "epoch": 0.8320052485545972, "grad_norm": 1.5845927000045776, "learning_rate": 1.5347029985434781e-06, "loss": 1.195, "step": 15218 }, { "epoch": 0.8320599209983188, "grad_norm": 2.448988914489746, "learning_rate": 1.5337303617760136e-06, "loss": 1.4828, "step": 15219 }, { "epoch": 0.8321145934420404, "grad_norm": 1.7490309476852417, "learning_rate": 1.5327580077171589e-06, "loss": 1.3233, "step": 15220 }, { "epoch": 0.8321692658857619, "grad_norm": 1.373438835144043, "learning_rate": 1.5317859363993814e-06, "loss": 1.5347, "step": 15221 }, { "epoch": 0.8322239383294835, "grad_norm": 1.4590362310409546, "learning_rate": 1.5308141478551441e-06, "loss": 1.2292, "step": 15222 }, { "epoch": 0.8322786107732051, "grad_norm": 1.4236905574798584, "learning_rate": 1.5298426421168965e-06, "loss": 1.4607, "step": 15223 }, { "epoch": 0.8323332832169266, "grad_norm": 1.7153509855270386, "learning_rate": 1.5288714192170796e-06, "loss": 1.4566, "step": 15224 }, { "epoch": 0.8323879556606482, "grad_norm": 1.3811819553375244, "learning_rate": 1.5279004791881236e-06, "loss": 1.5432, "step": 15225 }, { "epoch": 0.8324426281043696, "grad_norm": 1.9395036697387695, "learning_rate": 1.5269298220624506e-06, "loss": 1.3451, "step": 15226 }, { "epoch": 0.8324973005480912, "grad_norm": 1.6693718433380127, "learning_rate": 1.5259594478724715e-06, "loss": 1.3257, "step": 15227 }, { "epoch": 0.8325519729918128, "grad_norm": 1.8583264350891113, "learning_rate": 1.5249893566505935e-06, "loss": 1.2027, "step": 15228 }, { "epoch": 0.8326066454355343, "grad_norm": 1.326662302017212, "learning_rate": 1.5240195484292087e-06, "loss": 1.3993, "step": 15229 }, { "epoch": 0.8326613178792559, "grad_norm": 1.699533224105835, "learning_rate": 1.5230500232406975e-06, "loss": 1.3668, "step": 15230 }, { "epoch": 0.8327159903229775, "grad_norm": 1.3761390447616577, "learning_rate": 1.522080781117441e-06, "loss": 1.4619, "step": 15231 }, { "epoch": 0.832770662766699, "grad_norm": 1.6678646802902222, "learning_rate": 1.5211118220917987e-06, "loss": 1.305, "step": 15232 }, { "epoch": 0.8328253352104206, "grad_norm": 1.6106113195419312, "learning_rate": 1.5201431461961314e-06, "loss": 1.5676, "step": 15233 }, { "epoch": 0.8328800076541422, "grad_norm": 1.3674261569976807, "learning_rate": 1.5191747534627822e-06, "loss": 1.5431, "step": 15234 }, { "epoch": 0.8329346800978636, "grad_norm": 1.3642082214355469, "learning_rate": 1.5182066439240894e-06, "loss": 1.2709, "step": 15235 }, { "epoch": 0.8329893525415852, "grad_norm": 1.5934123992919922, "learning_rate": 1.5172388176123808e-06, "loss": 1.2083, "step": 15236 }, { "epoch": 0.8330440249853068, "grad_norm": 1.7334853410720825, "learning_rate": 1.5162712745599728e-06, "loss": 1.3002, "step": 15237 }, { "epoch": 0.8330986974290283, "grad_norm": 1.8748289346694946, "learning_rate": 1.5153040147991716e-06, "loss": 1.3649, "step": 15238 }, { "epoch": 0.8331533698727499, "grad_norm": 1.3339723348617554, "learning_rate": 1.5143370383622825e-06, "loss": 1.7242, "step": 15239 }, { "epoch": 0.8332080423164714, "grad_norm": 1.5536274909973145, "learning_rate": 1.5133703452815917e-06, "loss": 1.5059, "step": 15240 }, { "epoch": 0.833262714760193, "grad_norm": 2.025552988052368, "learning_rate": 1.512403935589377e-06, "loss": 1.303, "step": 15241 }, { "epoch": 0.8333173872039146, "grad_norm": 2.021122694015503, "learning_rate": 1.5114378093179148e-06, "loss": 1.2142, "step": 15242 }, { "epoch": 0.833372059647636, "grad_norm": 1.6647329330444336, "learning_rate": 1.5104719664994626e-06, "loss": 1.3673, "step": 15243 }, { "epoch": 0.8334267320913576, "grad_norm": 1.8758735656738281, "learning_rate": 1.5095064071662702e-06, "loss": 1.5038, "step": 15244 }, { "epoch": 0.8334814045350792, "grad_norm": 2.1140854358673096, "learning_rate": 1.5085411313505849e-06, "loss": 1.5093, "step": 15245 }, { "epoch": 0.8335360769788007, "grad_norm": 1.9228687286376953, "learning_rate": 1.5075761390846344e-06, "loss": 1.5532, "step": 15246 }, { "epoch": 0.8335907494225223, "grad_norm": 1.9416892528533936, "learning_rate": 1.5066114304006473e-06, "loss": 1.4238, "step": 15247 }, { "epoch": 0.8336454218662439, "grad_norm": 1.2394887208938599, "learning_rate": 1.5056470053308358e-06, "loss": 1.4646, "step": 15248 }, { "epoch": 0.8337000943099654, "grad_norm": 1.500209093093872, "learning_rate": 1.5046828639074028e-06, "loss": 1.6445, "step": 15249 }, { "epoch": 0.833754766753687, "grad_norm": 1.2838999032974243, "learning_rate": 1.5037190061625429e-06, "loss": 1.3077, "step": 15250 }, { "epoch": 0.8338094391974086, "grad_norm": 1.795974850654602, "learning_rate": 1.5027554321284442e-06, "loss": 1.4603, "step": 15251 }, { "epoch": 0.8338641116411301, "grad_norm": 1.6163580417633057, "learning_rate": 1.5017921418372772e-06, "loss": 1.5204, "step": 15252 }, { "epoch": 0.8339187840848516, "grad_norm": 1.5140081644058228, "learning_rate": 1.5008291353212157e-06, "loss": 1.4206, "step": 15253 }, { "epoch": 0.8339734565285731, "grad_norm": 1.570229172706604, "learning_rate": 1.499866412612413e-06, "loss": 1.4289, "step": 15254 }, { "epoch": 0.8340281289722947, "grad_norm": 1.5628763437271118, "learning_rate": 1.4989039737430144e-06, "loss": 1.6698, "step": 15255 }, { "epoch": 0.8340828014160163, "grad_norm": 1.504272699356079, "learning_rate": 1.4979418187451632e-06, "loss": 1.2148, "step": 15256 }, { "epoch": 0.8341374738597378, "grad_norm": 2.1091954708099365, "learning_rate": 1.496979947650985e-06, "loss": 1.4332, "step": 15257 }, { "epoch": 0.8341921463034594, "grad_norm": 1.4916727542877197, "learning_rate": 1.4960183604925972e-06, "loss": 1.3414, "step": 15258 }, { "epoch": 0.834246818747181, "grad_norm": 1.6442588567733765, "learning_rate": 1.4950570573021138e-06, "loss": 1.2841, "step": 15259 }, { "epoch": 0.8343014911909025, "grad_norm": 1.4516913890838623, "learning_rate": 1.4940960381116299e-06, "loss": 1.2332, "step": 15260 }, { "epoch": 0.8343561636346241, "grad_norm": 1.6058374643325806, "learning_rate": 1.4931353029532425e-06, "loss": 1.7655, "step": 15261 }, { "epoch": 0.8344108360783457, "grad_norm": 1.4705616235733032, "learning_rate": 1.4921748518590284e-06, "loss": 1.785, "step": 15262 }, { "epoch": 0.8344655085220671, "grad_norm": 1.6143800020217896, "learning_rate": 1.491214684861061e-06, "loss": 1.2437, "step": 15263 }, { "epoch": 0.8345201809657887, "grad_norm": 1.899355411529541, "learning_rate": 1.490254801991401e-06, "loss": 1.3296, "step": 15264 }, { "epoch": 0.8345748534095103, "grad_norm": 1.6580774784088135, "learning_rate": 1.4892952032821017e-06, "loss": 1.52, "step": 15265 }, { "epoch": 0.8346295258532318, "grad_norm": 1.4524123668670654, "learning_rate": 1.4883358887652044e-06, "loss": 1.2678, "step": 15266 }, { "epoch": 0.8346841982969534, "grad_norm": 1.3850778341293335, "learning_rate": 1.4873768584727478e-06, "loss": 1.3968, "step": 15267 }, { "epoch": 0.8347388707406749, "grad_norm": 1.4856351613998413, "learning_rate": 1.4864181124367538e-06, "loss": 1.4613, "step": 15268 }, { "epoch": 0.8347935431843965, "grad_norm": 1.6553168296813965, "learning_rate": 1.4854596506892338e-06, "loss": 1.1936, "step": 15269 }, { "epoch": 0.8348482156281181, "grad_norm": 3.029529571533203, "learning_rate": 1.4845014732621987e-06, "loss": 1.4343, "step": 15270 }, { "epoch": 0.8349028880718395, "grad_norm": 1.3505514860153198, "learning_rate": 1.4835435801876409e-06, "loss": 1.2232, "step": 15271 }, { "epoch": 0.8349575605155611, "grad_norm": 1.573935627937317, "learning_rate": 1.4825859714975455e-06, "loss": 1.5479, "step": 15272 }, { "epoch": 0.8350122329592827, "grad_norm": 1.2258808612823486, "learning_rate": 1.4816286472238939e-06, "loss": 1.5918, "step": 15273 }, { "epoch": 0.8350669054030042, "grad_norm": 1.7003061771392822, "learning_rate": 1.4806716073986504e-06, "loss": 1.5551, "step": 15274 }, { "epoch": 0.8351215778467258, "grad_norm": 1.4307981729507446, "learning_rate": 1.479714852053774e-06, "loss": 1.3688, "step": 15275 }, { "epoch": 0.8351762502904474, "grad_norm": 2.1008145809173584, "learning_rate": 1.4787583812212114e-06, "loss": 1.0174, "step": 15276 }, { "epoch": 0.8352309227341689, "grad_norm": 1.5186243057250977, "learning_rate": 1.4778021949329003e-06, "loss": 1.3609, "step": 15277 }, { "epoch": 0.8352855951778905, "grad_norm": 1.5208117961883545, "learning_rate": 1.4768462932207727e-06, "loss": 1.2925, "step": 15278 }, { "epoch": 0.8353402676216121, "grad_norm": 1.4559513330459595, "learning_rate": 1.475890676116749e-06, "loss": 1.443, "step": 15279 }, { "epoch": 0.8353949400653335, "grad_norm": 1.5309637784957886, "learning_rate": 1.474935343652736e-06, "loss": 1.2649, "step": 15280 }, { "epoch": 0.8354496125090551, "grad_norm": 1.3787131309509277, "learning_rate": 1.4739802958606386e-06, "loss": 1.4786, "step": 15281 }, { "epoch": 0.8355042849527766, "grad_norm": 1.5640136003494263, "learning_rate": 1.4730255327723452e-06, "loss": 1.4726, "step": 15282 }, { "epoch": 0.8355589573964982, "grad_norm": 1.3600715398788452, "learning_rate": 1.4720710544197369e-06, "loss": 1.2321, "step": 15283 }, { "epoch": 0.8356136298402198, "grad_norm": 1.2296009063720703, "learning_rate": 1.4711168608346893e-06, "loss": 1.394, "step": 15284 }, { "epoch": 0.8356683022839413, "grad_norm": 1.862430453300476, "learning_rate": 1.4701629520490646e-06, "loss": 1.5429, "step": 15285 }, { "epoch": 0.8357229747276629, "grad_norm": 1.5668227672576904, "learning_rate": 1.4692093280947106e-06, "loss": 1.4111, "step": 15286 }, { "epoch": 0.8357776471713845, "grad_norm": 1.4583629369735718, "learning_rate": 1.4682559890034787e-06, "loss": 1.5834, "step": 15287 }, { "epoch": 0.835832319615106, "grad_norm": 1.3960803747177124, "learning_rate": 1.4673029348072e-06, "loss": 1.2646, "step": 15288 }, { "epoch": 0.8358869920588275, "grad_norm": 1.774870753288269, "learning_rate": 1.4663501655376989e-06, "loss": 1.4131, "step": 15289 }, { "epoch": 0.8359416645025491, "grad_norm": 1.7275484800338745, "learning_rate": 1.46539768122679e-06, "loss": 1.2589, "step": 15290 }, { "epoch": 0.8359963369462706, "grad_norm": 2.6507339477539062, "learning_rate": 1.464445481906277e-06, "loss": 1.5577, "step": 15291 }, { "epoch": 0.8360510093899922, "grad_norm": 1.50113844871521, "learning_rate": 1.463493567607962e-06, "loss": 1.6066, "step": 15292 }, { "epoch": 0.8361056818337138, "grad_norm": 1.3513165712356567, "learning_rate": 1.4625419383636275e-06, "loss": 1.3922, "step": 15293 }, { "epoch": 0.8361603542774353, "grad_norm": 1.7748878002166748, "learning_rate": 1.461590594205049e-06, "loss": 1.4114, "step": 15294 }, { "epoch": 0.8362150267211569, "grad_norm": 1.381321668624878, "learning_rate": 1.4606395351640002e-06, "loss": 1.3217, "step": 15295 }, { "epoch": 0.8362696991648785, "grad_norm": 1.5972102880477905, "learning_rate": 1.4596887612722345e-06, "loss": 1.5291, "step": 15296 }, { "epoch": 0.8363243716086, "grad_norm": 1.4172172546386719, "learning_rate": 1.4587382725614997e-06, "loss": 1.1331, "step": 15297 }, { "epoch": 0.8363790440523216, "grad_norm": 1.5633726119995117, "learning_rate": 1.4577880690635381e-06, "loss": 1.2957, "step": 15298 }, { "epoch": 0.836433716496043, "grad_norm": 1.365059733390808, "learning_rate": 1.4568381508100782e-06, "loss": 1.3222, "step": 15299 }, { "epoch": 0.8364883889397646, "grad_norm": 1.4436962604522705, "learning_rate": 1.4558885178328374e-06, "loss": 1.6611, "step": 15300 }, { "epoch": 0.8365430613834862, "grad_norm": 1.803922176361084, "learning_rate": 1.4549391701635308e-06, "loss": 1.5307, "step": 15301 }, { "epoch": 0.8365977338272077, "grad_norm": 1.5963205099105835, "learning_rate": 1.453990107833857e-06, "loss": 1.4411, "step": 15302 }, { "epoch": 0.8366524062709293, "grad_norm": 1.6293120384216309, "learning_rate": 1.4530413308755075e-06, "loss": 1.4593, "step": 15303 }, { "epoch": 0.8367070787146509, "grad_norm": 3.250093936920166, "learning_rate": 1.4520928393201638e-06, "loss": 1.3355, "step": 15304 }, { "epoch": 0.8367617511583724, "grad_norm": 1.566453456878662, "learning_rate": 1.4511446331994961e-06, "loss": 1.1804, "step": 15305 }, { "epoch": 0.836816423602094, "grad_norm": 1.3327239751815796, "learning_rate": 1.450196712545172e-06, "loss": 1.4245, "step": 15306 }, { "epoch": 0.8368710960458156, "grad_norm": 1.5300233364105225, "learning_rate": 1.4492490773888424e-06, "loss": 1.5017, "step": 15307 }, { "epoch": 0.836925768489537, "grad_norm": 1.5335379838943481, "learning_rate": 1.4483017277621482e-06, "loss": 1.307, "step": 15308 }, { "epoch": 0.8369804409332586, "grad_norm": 1.3107649087905884, "learning_rate": 1.4473546636967296e-06, "loss": 1.3784, "step": 15309 }, { "epoch": 0.8370351133769802, "grad_norm": 1.4071036577224731, "learning_rate": 1.446407885224208e-06, "loss": 1.4473, "step": 15310 }, { "epoch": 0.8370897858207017, "grad_norm": 1.590739130973816, "learning_rate": 1.4454613923761962e-06, "loss": 1.2281, "step": 15311 }, { "epoch": 0.8371444582644233, "grad_norm": 1.1936216354370117, "learning_rate": 1.4445151851843042e-06, "loss": 1.5286, "step": 15312 }, { "epoch": 0.8371991307081448, "grad_norm": 2.0379958152770996, "learning_rate": 1.4435692636801268e-06, "loss": 1.4168, "step": 15313 }, { "epoch": 0.8372538031518664, "grad_norm": 1.2593923807144165, "learning_rate": 1.442623627895251e-06, "loss": 1.372, "step": 15314 }, { "epoch": 0.837308475595588, "grad_norm": 1.6230723857879639, "learning_rate": 1.4416782778612514e-06, "loss": 1.4672, "step": 15315 }, { "epoch": 0.8373631480393094, "grad_norm": 1.8775665760040283, "learning_rate": 1.4407332136096953e-06, "loss": 1.3996, "step": 15316 }, { "epoch": 0.837417820483031, "grad_norm": 1.6893996000289917, "learning_rate": 1.4397884351721436e-06, "loss": 1.3438, "step": 15317 }, { "epoch": 0.8374724929267526, "grad_norm": 1.7817232608795166, "learning_rate": 1.4388439425801437e-06, "loss": 1.538, "step": 15318 }, { "epoch": 0.8375271653704741, "grad_norm": 1.6408641338348389, "learning_rate": 1.4378997358652313e-06, "loss": 1.5246, "step": 15319 }, { "epoch": 0.8375818378141957, "grad_norm": 1.3702954053878784, "learning_rate": 1.4369558150589413e-06, "loss": 1.4987, "step": 15320 }, { "epoch": 0.8376365102579173, "grad_norm": 1.5690767765045166, "learning_rate": 1.4360121801927907e-06, "loss": 1.2844, "step": 15321 }, { "epoch": 0.8376911827016388, "grad_norm": 1.5112583637237549, "learning_rate": 1.4350688312982864e-06, "loss": 1.4192, "step": 15322 }, { "epoch": 0.8377458551453604, "grad_norm": 1.2453818321228027, "learning_rate": 1.4341257684069344e-06, "loss": 1.5276, "step": 15323 }, { "epoch": 0.837800527589082, "grad_norm": 1.9557241201400757, "learning_rate": 1.4331829915502226e-06, "loss": 0.969, "step": 15324 }, { "epoch": 0.8378552000328034, "grad_norm": 1.9338549375534058, "learning_rate": 1.4322405007596329e-06, "loss": 1.5108, "step": 15325 }, { "epoch": 0.837909872476525, "grad_norm": 1.6445834636688232, "learning_rate": 1.4312982960666388e-06, "loss": 1.298, "step": 15326 }, { "epoch": 0.8379645449202465, "grad_norm": 1.6339422464370728, "learning_rate": 1.430356377502702e-06, "loss": 1.4361, "step": 15327 }, { "epoch": 0.8380192173639681, "grad_norm": 2.159125804901123, "learning_rate": 1.4294147450992757e-06, "loss": 1.0052, "step": 15328 }, { "epoch": 0.8380738898076897, "grad_norm": 1.4004088640213013, "learning_rate": 1.428473398887802e-06, "loss": 1.5493, "step": 15329 }, { "epoch": 0.8381285622514112, "grad_norm": 1.7468364238739014, "learning_rate": 1.427532338899712e-06, "loss": 1.226, "step": 15330 }, { "epoch": 0.8381832346951328, "grad_norm": 1.360181212425232, "learning_rate": 1.4265915651664363e-06, "loss": 1.5244, "step": 15331 }, { "epoch": 0.8382379071388544, "grad_norm": 1.6247715950012207, "learning_rate": 1.4256510777193866e-06, "loss": 1.5246, "step": 15332 }, { "epoch": 0.8382925795825759, "grad_norm": 1.734907865524292, "learning_rate": 1.4247108765899654e-06, "loss": 1.2933, "step": 15333 }, { "epoch": 0.8383472520262975, "grad_norm": 1.948674201965332, "learning_rate": 1.423770961809573e-06, "loss": 1.5032, "step": 15334 }, { "epoch": 0.838401924470019, "grad_norm": 1.4395616054534912, "learning_rate": 1.4228313334095923e-06, "loss": 1.5053, "step": 15335 }, { "epoch": 0.8384565969137405, "grad_norm": 1.7687989473342896, "learning_rate": 1.421891991421399e-06, "loss": 1.5475, "step": 15336 }, { "epoch": 0.8385112693574621, "grad_norm": 1.4115511178970337, "learning_rate": 1.420952935876363e-06, "loss": 1.5542, "step": 15337 }, { "epoch": 0.8385659418011837, "grad_norm": 1.6627613306045532, "learning_rate": 1.4200141668058397e-06, "loss": 1.4859, "step": 15338 }, { "epoch": 0.8386206142449052, "grad_norm": 1.5914093255996704, "learning_rate": 1.4190756842411746e-06, "loss": 1.477, "step": 15339 }, { "epoch": 0.8386752866886268, "grad_norm": 1.380300760269165, "learning_rate": 1.41813748821371e-06, "loss": 1.3657, "step": 15340 }, { "epoch": 0.8387299591323483, "grad_norm": 1.8830103874206543, "learning_rate": 1.4171995787547732e-06, "loss": 1.5774, "step": 15341 }, { "epoch": 0.8387846315760699, "grad_norm": 2.2210211753845215, "learning_rate": 1.4162619558956836e-06, "loss": 1.1818, "step": 15342 }, { "epoch": 0.8388393040197915, "grad_norm": 1.7602882385253906, "learning_rate": 1.4153246196677483e-06, "loss": 1.4117, "step": 15343 }, { "epoch": 0.8388939764635129, "grad_norm": 1.3652821779251099, "learning_rate": 1.414387570102267e-06, "loss": 1.4724, "step": 15344 }, { "epoch": 0.8389486489072345, "grad_norm": 1.4906407594680786, "learning_rate": 1.413450807230533e-06, "loss": 1.1323, "step": 15345 }, { "epoch": 0.8390033213509561, "grad_norm": 1.371275544166565, "learning_rate": 1.4125143310838262e-06, "loss": 1.4324, "step": 15346 }, { "epoch": 0.8390579937946776, "grad_norm": 1.5195518732070923, "learning_rate": 1.4115781416934148e-06, "loss": 1.3558, "step": 15347 }, { "epoch": 0.8391126662383992, "grad_norm": 1.7112942934036255, "learning_rate": 1.4106422390905649e-06, "loss": 1.2495, "step": 15348 }, { "epoch": 0.8391673386821208, "grad_norm": 1.4560831785202026, "learning_rate": 1.409706623306526e-06, "loss": 1.4029, "step": 15349 }, { "epoch": 0.8392220111258423, "grad_norm": 1.3496294021606445, "learning_rate": 1.4087712943725384e-06, "loss": 1.1428, "step": 15350 }, { "epoch": 0.8392766835695639, "grad_norm": 1.3221886157989502, "learning_rate": 1.4078362523198385e-06, "loss": 1.5957, "step": 15351 }, { "epoch": 0.8393313560132855, "grad_norm": 1.2024469375610352, "learning_rate": 1.4069014971796502e-06, "loss": 1.4838, "step": 15352 }, { "epoch": 0.8393860284570069, "grad_norm": 1.2774232625961304, "learning_rate": 1.405967028983184e-06, "loss": 1.6123, "step": 15353 }, { "epoch": 0.8394407009007285, "grad_norm": 1.6569035053253174, "learning_rate": 1.405032847761646e-06, "loss": 1.5117, "step": 15354 }, { "epoch": 0.83949537334445, "grad_norm": 1.714403748512268, "learning_rate": 1.404098953546229e-06, "loss": 1.3147, "step": 15355 }, { "epoch": 0.8395500457881716, "grad_norm": 1.3783024549484253, "learning_rate": 1.4031653463681172e-06, "loss": 1.476, "step": 15356 }, { "epoch": 0.8396047182318932, "grad_norm": 1.5369558334350586, "learning_rate": 1.40223202625849e-06, "loss": 1.3809, "step": 15357 }, { "epoch": 0.8396593906756147, "grad_norm": 1.5678229331970215, "learning_rate": 1.4012989932485077e-06, "loss": 1.4985, "step": 15358 }, { "epoch": 0.8397140631193363, "grad_norm": 1.474717140197754, "learning_rate": 1.4003662473693324e-06, "loss": 1.6138, "step": 15359 }, { "epoch": 0.8397687355630579, "grad_norm": 1.2893670797348022, "learning_rate": 1.399433788652107e-06, "loss": 1.3627, "step": 15360 }, { "epoch": 0.8398234080067793, "grad_norm": 1.731675624847412, "learning_rate": 1.3985016171279675e-06, "loss": 1.2043, "step": 15361 }, { "epoch": 0.8398780804505009, "grad_norm": 1.3998703956604004, "learning_rate": 1.3975697328280457e-06, "loss": 1.3217, "step": 15362 }, { "epoch": 0.8399327528942225, "grad_norm": 1.6664021015167236, "learning_rate": 1.3966381357834568e-06, "loss": 1.1257, "step": 15363 }, { "epoch": 0.839987425337944, "grad_norm": 1.782823085784912, "learning_rate": 1.395706826025306e-06, "loss": 1.3155, "step": 15364 }, { "epoch": 0.8400420977816656, "grad_norm": 1.4654875993728638, "learning_rate": 1.3947758035846981e-06, "loss": 1.4168, "step": 15365 }, { "epoch": 0.8400967702253872, "grad_norm": 1.329766869544983, "learning_rate": 1.3938450684927185e-06, "loss": 1.4539, "step": 15366 }, { "epoch": 0.8401514426691087, "grad_norm": 1.4624427556991577, "learning_rate": 1.3929146207804468e-06, "loss": 1.7219, "step": 15367 }, { "epoch": 0.8402061151128303, "grad_norm": 1.252023696899414, "learning_rate": 1.3919844604789534e-06, "loss": 1.5503, "step": 15368 }, { "epoch": 0.8402607875565518, "grad_norm": 1.969358205795288, "learning_rate": 1.3910545876192971e-06, "loss": 1.4763, "step": 15369 }, { "epoch": 0.8403154600002734, "grad_norm": 2.1024527549743652, "learning_rate": 1.3901250022325286e-06, "loss": 1.467, "step": 15370 }, { "epoch": 0.8403701324439949, "grad_norm": 1.2242565155029297, "learning_rate": 1.3891957043496917e-06, "loss": 1.3954, "step": 15371 }, { "epoch": 0.8404248048877164, "grad_norm": 1.7204440832138062, "learning_rate": 1.3882666940018141e-06, "loss": 1.3454, "step": 15372 }, { "epoch": 0.840479477331438, "grad_norm": 1.6130149364471436, "learning_rate": 1.387337971219922e-06, "loss": 1.5626, "step": 15373 }, { "epoch": 0.8405341497751596, "grad_norm": 1.6266002655029297, "learning_rate": 1.3864095360350249e-06, "loss": 1.6476, "step": 15374 }, { "epoch": 0.8405888222188811, "grad_norm": 2.5047850608825684, "learning_rate": 1.3854813884781238e-06, "loss": 1.31, "step": 15375 }, { "epoch": 0.8406434946626027, "grad_norm": 1.3435423374176025, "learning_rate": 1.384553528580216e-06, "loss": 1.8203, "step": 15376 }, { "epoch": 0.8406981671063243, "grad_norm": 1.4261820316314697, "learning_rate": 1.3836259563722832e-06, "loss": 1.516, "step": 15377 }, { "epoch": 0.8407528395500458, "grad_norm": 1.5221333503723145, "learning_rate": 1.3826986718852952e-06, "loss": 1.5231, "step": 15378 }, { "epoch": 0.8408075119937674, "grad_norm": 1.561692237854004, "learning_rate": 1.381771675150223e-06, "loss": 1.2948, "step": 15379 }, { "epoch": 0.840862184437489, "grad_norm": 1.3880318403244019, "learning_rate": 1.3808449661980173e-06, "loss": 1.305, "step": 15380 }, { "epoch": 0.8409168568812104, "grad_norm": 1.61362624168396, "learning_rate": 1.3799185450596243e-06, "loss": 1.4977, "step": 15381 }, { "epoch": 0.840971529324932, "grad_norm": 1.7579604387283325, "learning_rate": 1.3789924117659782e-06, "loss": 1.661, "step": 15382 }, { "epoch": 0.8410262017686535, "grad_norm": 1.7350941896438599, "learning_rate": 1.3780665663480052e-06, "loss": 1.2115, "step": 15383 }, { "epoch": 0.8410808742123751, "grad_norm": 1.9379953145980835, "learning_rate": 1.377141008836619e-06, "loss": 1.574, "step": 15384 }, { "epoch": 0.8411355466560967, "grad_norm": 1.507198691368103, "learning_rate": 1.3762157392627317e-06, "loss": 1.4789, "step": 15385 }, { "epoch": 0.8411902190998182, "grad_norm": 1.8005722761154175, "learning_rate": 1.375290757657235e-06, "loss": 1.4396, "step": 15386 }, { "epoch": 0.8412448915435398, "grad_norm": 1.4199800491333008, "learning_rate": 1.3743660640510205e-06, "loss": 1.5219, "step": 15387 }, { "epoch": 0.8412995639872614, "grad_norm": 1.362399935722351, "learning_rate": 1.3734416584749633e-06, "loss": 1.1864, "step": 15388 }, { "epoch": 0.8413542364309828, "grad_norm": 1.631096601486206, "learning_rate": 1.37251754095993e-06, "loss": 1.3829, "step": 15389 }, { "epoch": 0.8414089088747044, "grad_norm": 2.003635883331299, "learning_rate": 1.3715937115367829e-06, "loss": 1.4049, "step": 15390 }, { "epoch": 0.841463581318426, "grad_norm": 1.7177640199661255, "learning_rate": 1.3706701702363701e-06, "loss": 1.3427, "step": 15391 }, { "epoch": 0.8415182537621475, "grad_norm": 1.5787874460220337, "learning_rate": 1.3697469170895282e-06, "loss": 1.2001, "step": 15392 }, { "epoch": 0.8415729262058691, "grad_norm": 1.7741289138793945, "learning_rate": 1.3688239521270897e-06, "loss": 1.6146, "step": 15393 }, { "epoch": 0.8416275986495907, "grad_norm": 1.5705665349960327, "learning_rate": 1.3679012753798726e-06, "loss": 1.346, "step": 15394 }, { "epoch": 0.8416822710933122, "grad_norm": 1.2715020179748535, "learning_rate": 1.366978886878685e-06, "loss": 1.4218, "step": 15395 }, { "epoch": 0.8417369435370338, "grad_norm": 1.4833226203918457, "learning_rate": 1.3660567866543328e-06, "loss": 1.296, "step": 15396 }, { "epoch": 0.8417916159807552, "grad_norm": 1.8272062540054321, "learning_rate": 1.3651349747376053e-06, "loss": 1.3307, "step": 15397 }, { "epoch": 0.8418462884244768, "grad_norm": 1.5468940734863281, "learning_rate": 1.364213451159281e-06, "loss": 1.3266, "step": 15398 }, { "epoch": 0.8419009608681984, "grad_norm": 1.5135313272476196, "learning_rate": 1.363292215950135e-06, "loss": 1.4522, "step": 15399 }, { "epoch": 0.8419556333119199, "grad_norm": 1.6744146347045898, "learning_rate": 1.3623712691409274e-06, "loss": 1.3644, "step": 15400 }, { "epoch": 0.8420103057556415, "grad_norm": 1.4275907278060913, "learning_rate": 1.3614506107624148e-06, "loss": 1.6051, "step": 15401 }, { "epoch": 0.8420649781993631, "grad_norm": 1.6242828369140625, "learning_rate": 1.3605302408453359e-06, "loss": 1.484, "step": 15402 }, { "epoch": 0.8421196506430846, "grad_norm": 1.4670401811599731, "learning_rate": 1.3596101594204248e-06, "loss": 1.4467, "step": 15403 }, { "epoch": 0.8421743230868062, "grad_norm": 1.5367194414138794, "learning_rate": 1.358690366518407e-06, "loss": 1.664, "step": 15404 }, { "epoch": 0.8422289955305278, "grad_norm": 1.6038661003112793, "learning_rate": 1.3577708621699948e-06, "loss": 1.4081, "step": 15405 }, { "epoch": 0.8422836679742493, "grad_norm": 1.8132917881011963, "learning_rate": 1.3568516464058946e-06, "loss": 1.4663, "step": 15406 }, { "epoch": 0.8423383404179708, "grad_norm": 1.3231209516525269, "learning_rate": 1.3559327192567984e-06, "loss": 1.4679, "step": 15407 }, { "epoch": 0.8423930128616924, "grad_norm": 1.977494478225708, "learning_rate": 1.355014080753393e-06, "loss": 1.232, "step": 15408 }, { "epoch": 0.8424476853054139, "grad_norm": 1.914673924446106, "learning_rate": 1.3540957309263513e-06, "loss": 1.4024, "step": 15409 }, { "epoch": 0.8425023577491355, "grad_norm": 2.0186562538146973, "learning_rate": 1.3531776698063436e-06, "loss": 1.319, "step": 15410 }, { "epoch": 0.842557030192857, "grad_norm": 1.356503963470459, "learning_rate": 1.3522598974240241e-06, "loss": 1.4494, "step": 15411 }, { "epoch": 0.8426117026365786, "grad_norm": 1.751542329788208, "learning_rate": 1.3513424138100372e-06, "loss": 1.3063, "step": 15412 }, { "epoch": 0.8426663750803002, "grad_norm": 1.616470456123352, "learning_rate": 1.350425218995024e-06, "loss": 1.5427, "step": 15413 }, { "epoch": 0.8427210475240217, "grad_norm": 1.691029667854309, "learning_rate": 1.3495083130096066e-06, "loss": 1.1299, "step": 15414 }, { "epoch": 0.8427757199677433, "grad_norm": 1.7284047603607178, "learning_rate": 1.3485916958844093e-06, "loss": 1.3528, "step": 15415 }, { "epoch": 0.8428303924114648, "grad_norm": 1.5047924518585205, "learning_rate": 1.3476753676500355e-06, "loss": 1.374, "step": 15416 }, { "epoch": 0.8428850648551863, "grad_norm": 2.0682454109191895, "learning_rate": 1.3467593283370817e-06, "loss": 1.4365, "step": 15417 }, { "epoch": 0.8429397372989079, "grad_norm": 1.3625319004058838, "learning_rate": 1.3458435779761425e-06, "loss": 1.4158, "step": 15418 }, { "epoch": 0.8429944097426295, "grad_norm": 1.237083911895752, "learning_rate": 1.3449281165977935e-06, "loss": 1.4857, "step": 15419 }, { "epoch": 0.843049082186351, "grad_norm": 1.6913762092590332, "learning_rate": 1.3440129442326045e-06, "loss": 1.4703, "step": 15420 }, { "epoch": 0.8431037546300726, "grad_norm": 1.2773717641830444, "learning_rate": 1.3430980609111354e-06, "loss": 1.5189, "step": 15421 }, { "epoch": 0.8431584270737942, "grad_norm": 1.7658586502075195, "learning_rate": 1.3421834666639355e-06, "loss": 1.5288, "step": 15422 }, { "epoch": 0.8432130995175157, "grad_norm": 1.5585072040557861, "learning_rate": 1.3412691615215445e-06, "loss": 1.2945, "step": 15423 }, { "epoch": 0.8432677719612373, "grad_norm": 1.4255528450012207, "learning_rate": 1.3403551455144958e-06, "loss": 1.5448, "step": 15424 }, { "epoch": 0.8433224444049587, "grad_norm": 1.6220598220825195, "learning_rate": 1.3394414186733096e-06, "loss": 1.56, "step": 15425 }, { "epoch": 0.8433771168486803, "grad_norm": 1.2661895751953125, "learning_rate": 1.3385279810284956e-06, "loss": 1.4608, "step": 15426 }, { "epoch": 0.8434317892924019, "grad_norm": 1.4076529741287231, "learning_rate": 1.3376148326105586e-06, "loss": 1.3213, "step": 15427 }, { "epoch": 0.8434864617361234, "grad_norm": 1.5933805704116821, "learning_rate": 1.3367019734499876e-06, "loss": 1.3529, "step": 15428 }, { "epoch": 0.843541134179845, "grad_norm": 1.500656247138977, "learning_rate": 1.3357894035772678e-06, "loss": 1.3797, "step": 15429 }, { "epoch": 0.8435958066235666, "grad_norm": 1.2022041082382202, "learning_rate": 1.3348771230228718e-06, "loss": 1.7725, "step": 15430 }, { "epoch": 0.8436504790672881, "grad_norm": 1.67759370803833, "learning_rate": 1.333965131817263e-06, "loss": 1.2072, "step": 15431 }, { "epoch": 0.8437051515110097, "grad_norm": 1.4266878366470337, "learning_rate": 1.3330534299908925e-06, "loss": 1.5387, "step": 15432 }, { "epoch": 0.8437598239547313, "grad_norm": 1.612736701965332, "learning_rate": 1.3321420175742061e-06, "loss": 1.4929, "step": 15433 }, { "epoch": 0.8438144963984527, "grad_norm": 1.6792644262313843, "learning_rate": 1.3312308945976348e-06, "loss": 1.2667, "step": 15434 }, { "epoch": 0.8438691688421743, "grad_norm": 1.6032403707504272, "learning_rate": 1.330320061091609e-06, "loss": 1.2542, "step": 15435 }, { "epoch": 0.8439238412858959, "grad_norm": 1.3086203336715698, "learning_rate": 1.3294095170865395e-06, "loss": 1.3273, "step": 15436 }, { "epoch": 0.8439785137296174, "grad_norm": 1.6800607442855835, "learning_rate": 1.3284992626128312e-06, "loss": 1.423, "step": 15437 }, { "epoch": 0.844033186173339, "grad_norm": 1.873826503753662, "learning_rate": 1.327589297700882e-06, "loss": 1.4321, "step": 15438 }, { "epoch": 0.8440878586170605, "grad_norm": 1.6328426599502563, "learning_rate": 1.3266796223810773e-06, "loss": 1.6846, "step": 15439 }, { "epoch": 0.8441425310607821, "grad_norm": 1.6621408462524414, "learning_rate": 1.32577023668379e-06, "loss": 1.4153, "step": 15440 }, { "epoch": 0.8441972035045037, "grad_norm": 1.6831947565078735, "learning_rate": 1.3248611406393918e-06, "loss": 1.1926, "step": 15441 }, { "epoch": 0.8442518759482252, "grad_norm": 1.701908826828003, "learning_rate": 1.3239523342782345e-06, "loss": 1.3638, "step": 15442 }, { "epoch": 0.8443065483919467, "grad_norm": 1.6753263473510742, "learning_rate": 1.3230438176306693e-06, "loss": 1.1993, "step": 15443 }, { "epoch": 0.8443612208356683, "grad_norm": 1.4137009382247925, "learning_rate": 1.3221355907270329e-06, "loss": 1.573, "step": 15444 }, { "epoch": 0.8444158932793898, "grad_norm": 1.548721194267273, "learning_rate": 1.321227653597653e-06, "loss": 1.4287, "step": 15445 }, { "epoch": 0.8444705657231114, "grad_norm": 1.8484152555465698, "learning_rate": 1.320320006272846e-06, "loss": 1.3074, "step": 15446 }, { "epoch": 0.844525238166833, "grad_norm": 1.598567008972168, "learning_rate": 1.3194126487829218e-06, "loss": 1.4437, "step": 15447 }, { "epoch": 0.8445799106105545, "grad_norm": 2.1814112663269043, "learning_rate": 1.318505581158177e-06, "loss": 1.072, "step": 15448 }, { "epoch": 0.8446345830542761, "grad_norm": 1.4165444374084473, "learning_rate": 1.3175988034289045e-06, "loss": 1.5629, "step": 15449 }, { "epoch": 0.8446892554979977, "grad_norm": 1.3968770503997803, "learning_rate": 1.316692315625382e-06, "loss": 1.3177, "step": 15450 }, { "epoch": 0.8447439279417192, "grad_norm": 1.6911821365356445, "learning_rate": 1.3157861177778764e-06, "loss": 1.4516, "step": 15451 }, { "epoch": 0.8447986003854407, "grad_norm": 1.8830907344818115, "learning_rate": 1.3148802099166535e-06, "loss": 1.2961, "step": 15452 }, { "epoch": 0.8448532728291622, "grad_norm": 1.3222113847732544, "learning_rate": 1.3139745920719605e-06, "loss": 1.6097, "step": 15453 }, { "epoch": 0.8449079452728838, "grad_norm": 1.5608091354370117, "learning_rate": 1.313069264274035e-06, "loss": 1.258, "step": 15454 }, { "epoch": 0.8449626177166054, "grad_norm": 1.4931985139846802, "learning_rate": 1.3121642265531154e-06, "loss": 1.4442, "step": 15455 }, { "epoch": 0.8450172901603269, "grad_norm": 1.7981230020523071, "learning_rate": 1.3112594789394174e-06, "loss": 1.4377, "step": 15456 }, { "epoch": 0.8450719626040485, "grad_norm": 1.547136902809143, "learning_rate": 1.3103550214631544e-06, "loss": 1.4026, "step": 15457 }, { "epoch": 0.8451266350477701, "grad_norm": 1.6521700620651245, "learning_rate": 1.3094508541545282e-06, "loss": 1.2707, "step": 15458 }, { "epoch": 0.8451813074914916, "grad_norm": 1.7656625509262085, "learning_rate": 1.3085469770437286e-06, "loss": 1.3785, "step": 15459 }, { "epoch": 0.8452359799352132, "grad_norm": 1.3036162853240967, "learning_rate": 1.307643390160943e-06, "loss": 1.5655, "step": 15460 }, { "epoch": 0.8452906523789347, "grad_norm": 1.459502100944519, "learning_rate": 1.3067400935363427e-06, "loss": 1.4727, "step": 15461 }, { "epoch": 0.8453453248226562, "grad_norm": 1.4901204109191895, "learning_rate": 1.305837087200087e-06, "loss": 1.3644, "step": 15462 }, { "epoch": 0.8453999972663778, "grad_norm": 1.5853270292282104, "learning_rate": 1.3049343711823347e-06, "loss": 1.4809, "step": 15463 }, { "epoch": 0.8454546697100994, "grad_norm": 1.4628480672836304, "learning_rate": 1.3040319455132277e-06, "loss": 1.3656, "step": 15464 }, { "epoch": 0.8455093421538209, "grad_norm": 1.2731235027313232, "learning_rate": 1.3031298102228974e-06, "loss": 1.3923, "step": 15465 }, { "epoch": 0.8455640145975425, "grad_norm": 1.48848295211792, "learning_rate": 1.3022279653414728e-06, "loss": 1.6166, "step": 15466 }, { "epoch": 0.845618687041264, "grad_norm": 1.6069830656051636, "learning_rate": 1.3013264108990654e-06, "loss": 1.4009, "step": 15467 }, { "epoch": 0.8456733594849856, "grad_norm": 2.0145740509033203, "learning_rate": 1.30042514692578e-06, "loss": 1.4631, "step": 15468 }, { "epoch": 0.8457280319287072, "grad_norm": 1.0524876117706299, "learning_rate": 1.299524173451715e-06, "loss": 1.6598, "step": 15469 }, { "epoch": 0.8457827043724286, "grad_norm": 1.6630042791366577, "learning_rate": 1.298623490506955e-06, "loss": 1.4704, "step": 15470 }, { "epoch": 0.8458373768161502, "grad_norm": 1.6784764528274536, "learning_rate": 1.2977230981215738e-06, "loss": 1.3723, "step": 15471 }, { "epoch": 0.8458920492598718, "grad_norm": 1.4266225099563599, "learning_rate": 1.2968229963256395e-06, "loss": 1.4279, "step": 15472 }, { "epoch": 0.8459467217035933, "grad_norm": 1.859710454940796, "learning_rate": 1.295923185149206e-06, "loss": 1.3781, "step": 15473 }, { "epoch": 0.8460013941473149, "grad_norm": 1.212804913520813, "learning_rate": 1.2950236646223246e-06, "loss": 1.6075, "step": 15474 }, { "epoch": 0.8460560665910365, "grad_norm": 1.4396719932556152, "learning_rate": 1.2941244347750292e-06, "loss": 1.3539, "step": 15475 }, { "epoch": 0.846110739034758, "grad_norm": 1.5475932359695435, "learning_rate": 1.2932254956373457e-06, "loss": 1.2649, "step": 15476 }, { "epoch": 0.8461654114784796, "grad_norm": 1.5481009483337402, "learning_rate": 1.292326847239297e-06, "loss": 1.5206, "step": 15477 }, { "epoch": 0.8462200839222012, "grad_norm": 1.6952221393585205, "learning_rate": 1.2914284896108875e-06, "loss": 1.3926, "step": 15478 }, { "epoch": 0.8462747563659226, "grad_norm": 1.393894076347351, "learning_rate": 1.2905304227821136e-06, "loss": 1.3393, "step": 15479 }, { "epoch": 0.8463294288096442, "grad_norm": 1.7598875761032104, "learning_rate": 1.289632646782969e-06, "loss": 1.4641, "step": 15480 }, { "epoch": 0.8463841012533657, "grad_norm": 1.928632140159607, "learning_rate": 1.2887351616434285e-06, "loss": 1.3162, "step": 15481 }, { "epoch": 0.8464387736970873, "grad_norm": 1.3270207643508911, "learning_rate": 1.2878379673934616e-06, "loss": 1.7002, "step": 15482 }, { "epoch": 0.8464934461408089, "grad_norm": 2.025179147720337, "learning_rate": 1.286941064063031e-06, "loss": 1.2764, "step": 15483 }, { "epoch": 0.8465481185845304, "grad_norm": 1.6504391431808472, "learning_rate": 1.2860444516820835e-06, "loss": 1.2717, "step": 15484 }, { "epoch": 0.846602791028252, "grad_norm": 1.3895865678787231, "learning_rate": 1.2851481302805603e-06, "loss": 1.2784, "step": 15485 }, { "epoch": 0.8466574634719736, "grad_norm": 2.0216078758239746, "learning_rate": 1.2842520998883912e-06, "loss": 1.5296, "step": 15486 }, { "epoch": 0.846712135915695, "grad_norm": 1.3379398584365845, "learning_rate": 1.2833563605354938e-06, "loss": 1.397, "step": 15487 }, { "epoch": 0.8467668083594166, "grad_norm": 1.5142167806625366, "learning_rate": 1.282460912251784e-06, "loss": 1.1406, "step": 15488 }, { "epoch": 0.8468214808031382, "grad_norm": 1.6632070541381836, "learning_rate": 1.2815657550671612e-06, "loss": 1.513, "step": 15489 }, { "epoch": 0.8468761532468597, "grad_norm": 1.355773687362671, "learning_rate": 1.2806708890115138e-06, "loss": 1.5349, "step": 15490 }, { "epoch": 0.8469308256905813, "grad_norm": 1.4410984516143799, "learning_rate": 1.279776314114728e-06, "loss": 1.4232, "step": 15491 }, { "epoch": 0.8469854981343029, "grad_norm": 1.3004225492477417, "learning_rate": 1.278882030406674e-06, "loss": 1.6166, "step": 15492 }, { "epoch": 0.8470401705780244, "grad_norm": 2.0155184268951416, "learning_rate": 1.2779880379172105e-06, "loss": 1.3648, "step": 15493 }, { "epoch": 0.847094843021746, "grad_norm": 1.5925012826919556, "learning_rate": 1.2770943366761968e-06, "loss": 1.4049, "step": 15494 }, { "epoch": 0.8471495154654676, "grad_norm": 1.5387015342712402, "learning_rate": 1.2762009267134712e-06, "loss": 1.4704, "step": 15495 }, { "epoch": 0.847204187909189, "grad_norm": 1.4622080326080322, "learning_rate": 1.2753078080588688e-06, "loss": 1.3287, "step": 15496 }, { "epoch": 0.8472588603529106, "grad_norm": 1.5524020195007324, "learning_rate": 1.2744149807422113e-06, "loss": 1.5004, "step": 15497 }, { "epoch": 0.8473135327966321, "grad_norm": 1.722151279449463, "learning_rate": 1.2735224447933104e-06, "loss": 1.4213, "step": 15498 }, { "epoch": 0.8473682052403537, "grad_norm": 1.3947193622589111, "learning_rate": 1.2726302002419744e-06, "loss": 1.4883, "step": 15499 }, { "epoch": 0.8474228776840753, "grad_norm": 1.7491389513015747, "learning_rate": 1.2717382471179961e-06, "loss": 1.3667, "step": 15500 }, { "epoch": 0.8474775501277968, "grad_norm": 1.387588620185852, "learning_rate": 1.2708465854511565e-06, "loss": 1.4989, "step": 15501 }, { "epoch": 0.8475322225715184, "grad_norm": 1.5118634700775146, "learning_rate": 1.2699552152712348e-06, "loss": 1.689, "step": 15502 }, { "epoch": 0.84758689501524, "grad_norm": 1.9005094766616821, "learning_rate": 1.2690641366079947e-06, "loss": 1.3582, "step": 15503 }, { "epoch": 0.8476415674589615, "grad_norm": 1.435091495513916, "learning_rate": 1.2681733494911897e-06, "loss": 1.5756, "step": 15504 }, { "epoch": 0.8476962399026831, "grad_norm": 1.3270124197006226, "learning_rate": 1.2672828539505665e-06, "loss": 1.6129, "step": 15505 }, { "epoch": 0.8477509123464047, "grad_norm": 1.624215841293335, "learning_rate": 1.2663926500158618e-06, "loss": 1.1691, "step": 15506 }, { "epoch": 0.8478055847901261, "grad_norm": 1.5486159324645996, "learning_rate": 1.2655027377167994e-06, "loss": 1.3719, "step": 15507 }, { "epoch": 0.8478602572338477, "grad_norm": 1.790474534034729, "learning_rate": 1.2646131170830977e-06, "loss": 1.4302, "step": 15508 }, { "epoch": 0.8479149296775693, "grad_norm": 1.5959739685058594, "learning_rate": 1.2637237881444619e-06, "loss": 1.2683, "step": 15509 }, { "epoch": 0.8479696021212908, "grad_norm": 1.5559134483337402, "learning_rate": 1.2628347509305905e-06, "loss": 1.3367, "step": 15510 }, { "epoch": 0.8480242745650124, "grad_norm": 1.9802991151809692, "learning_rate": 1.2619460054711685e-06, "loss": 1.2784, "step": 15511 }, { "epoch": 0.8480789470087339, "grad_norm": 1.6049365997314453, "learning_rate": 1.2610575517958713e-06, "loss": 1.5636, "step": 15512 }, { "epoch": 0.8481336194524555, "grad_norm": 1.2906121015548706, "learning_rate": 1.2601693899343714e-06, "loss": 1.5217, "step": 15513 }, { "epoch": 0.8481882918961771, "grad_norm": 1.4848467111587524, "learning_rate": 1.2592815199163244e-06, "loss": 1.2661, "step": 15514 }, { "epoch": 0.8482429643398985, "grad_norm": 1.4941537380218506, "learning_rate": 1.2583939417713764e-06, "loss": 1.3416, "step": 15515 }, { "epoch": 0.8482976367836201, "grad_norm": 1.6875128746032715, "learning_rate": 1.257506655529168e-06, "loss": 1.4468, "step": 15516 }, { "epoch": 0.8483523092273417, "grad_norm": 1.6759800910949707, "learning_rate": 1.2566196612193293e-06, "loss": 1.2461, "step": 15517 }, { "epoch": 0.8484069816710632, "grad_norm": 1.7184228897094727, "learning_rate": 1.2557329588714739e-06, "loss": 1.2855, "step": 15518 }, { "epoch": 0.8484616541147848, "grad_norm": 1.9881389141082764, "learning_rate": 1.2548465485152162e-06, "loss": 1.3367, "step": 15519 }, { "epoch": 0.8485163265585064, "grad_norm": 1.9008796215057373, "learning_rate": 1.2539604301801544e-06, "loss": 1.351, "step": 15520 }, { "epoch": 0.8485709990022279, "grad_norm": 1.8864543437957764, "learning_rate": 1.2530746038958741e-06, "loss": 1.3699, "step": 15521 }, { "epoch": 0.8486256714459495, "grad_norm": 1.3336983919143677, "learning_rate": 1.2521890696919604e-06, "loss": 1.6456, "step": 15522 }, { "epoch": 0.8486803438896711, "grad_norm": 1.5936357975006104, "learning_rate": 1.2513038275979805e-06, "loss": 1.4512, "step": 15523 }, { "epoch": 0.8487350163333925, "grad_norm": 1.0482499599456787, "learning_rate": 1.2504188776434955e-06, "loss": 1.6399, "step": 15524 }, { "epoch": 0.8487896887771141, "grad_norm": 1.44845712184906, "learning_rate": 1.2495342198580562e-06, "loss": 1.5315, "step": 15525 }, { "epoch": 0.8488443612208356, "grad_norm": 2.0978238582611084, "learning_rate": 1.2486498542712e-06, "loss": 1.249, "step": 15526 }, { "epoch": 0.8488990336645572, "grad_norm": 1.6689738035202026, "learning_rate": 1.2477657809124632e-06, "loss": 1.3858, "step": 15527 }, { "epoch": 0.8489537061082788, "grad_norm": 1.7388783693313599, "learning_rate": 1.2468819998113658e-06, "loss": 1.4498, "step": 15528 }, { "epoch": 0.8490083785520003, "grad_norm": 1.5630146265029907, "learning_rate": 1.2459985109974149e-06, "loss": 1.3282, "step": 15529 }, { "epoch": 0.8490630509957219, "grad_norm": 1.0875455141067505, "learning_rate": 1.2451153145001183e-06, "loss": 1.5469, "step": 15530 }, { "epoch": 0.8491177234394435, "grad_norm": 1.6376665830612183, "learning_rate": 1.2442324103489656e-06, "loss": 1.4128, "step": 15531 }, { "epoch": 0.849172395883165, "grad_norm": 1.5223013162612915, "learning_rate": 1.2433497985734356e-06, "loss": 1.1201, "step": 15532 }, { "epoch": 0.8492270683268865, "grad_norm": 1.444273829460144, "learning_rate": 1.2424674792030067e-06, "loss": 1.7399, "step": 15533 }, { "epoch": 0.8492817407706081, "grad_norm": 1.2615699768066406, "learning_rate": 1.2415854522671388e-06, "loss": 1.5823, "step": 15534 }, { "epoch": 0.8493364132143296, "grad_norm": 1.6524631977081299, "learning_rate": 1.2407037177952852e-06, "loss": 1.3543, "step": 15535 }, { "epoch": 0.8493910856580512, "grad_norm": 1.3083679676055908, "learning_rate": 1.2398222758168887e-06, "loss": 1.3466, "step": 15536 }, { "epoch": 0.8494457581017728, "grad_norm": 1.704068660736084, "learning_rate": 1.2389411263613826e-06, "loss": 1.4252, "step": 15537 }, { "epoch": 0.8495004305454943, "grad_norm": 1.5666509866714478, "learning_rate": 1.238060269458189e-06, "loss": 1.4928, "step": 15538 }, { "epoch": 0.8495551029892159, "grad_norm": 1.3688794374465942, "learning_rate": 1.237179705136725e-06, "loss": 1.4629, "step": 15539 }, { "epoch": 0.8496097754329374, "grad_norm": 1.3532181978225708, "learning_rate": 1.2362994334263933e-06, "loss": 1.54, "step": 15540 }, { "epoch": 0.849664447876659, "grad_norm": 1.6980558633804321, "learning_rate": 1.2354194543565868e-06, "loss": 1.4842, "step": 15541 }, { "epoch": 0.8497191203203805, "grad_norm": 1.4010869264602661, "learning_rate": 1.2345397679566927e-06, "loss": 1.3767, "step": 15542 }, { "epoch": 0.849773792764102, "grad_norm": 2.7626771926879883, "learning_rate": 1.2336603742560826e-06, "loss": 1.3251, "step": 15543 }, { "epoch": 0.8498284652078236, "grad_norm": 1.6403443813323975, "learning_rate": 1.232781273284126e-06, "loss": 1.5247, "step": 15544 }, { "epoch": 0.8498831376515452, "grad_norm": 2.028909921646118, "learning_rate": 1.231902465070176e-06, "loss": 1.308, "step": 15545 }, { "epoch": 0.8499378100952667, "grad_norm": 1.7964037656784058, "learning_rate": 1.2310239496435749e-06, "loss": 1.4724, "step": 15546 }, { "epoch": 0.8499924825389883, "grad_norm": 1.6644350290298462, "learning_rate": 1.2301457270336637e-06, "loss": 1.3915, "step": 15547 }, { "epoch": 0.8500471549827099, "grad_norm": 1.6602263450622559, "learning_rate": 1.2292677972697654e-06, "loss": 1.3553, "step": 15548 }, { "epoch": 0.8501018274264314, "grad_norm": 1.6021170616149902, "learning_rate": 1.2283901603811964e-06, "loss": 1.5339, "step": 15549 }, { "epoch": 0.850156499870153, "grad_norm": 1.7924628257751465, "learning_rate": 1.2275128163972638e-06, "loss": 1.3527, "step": 15550 }, { "epoch": 0.8502111723138746, "grad_norm": 1.3912701606750488, "learning_rate": 1.2266357653472626e-06, "loss": 1.381, "step": 15551 }, { "epoch": 0.850265844757596, "grad_norm": 1.8956935405731201, "learning_rate": 1.2257590072604796e-06, "loss": 1.5085, "step": 15552 }, { "epoch": 0.8503205172013176, "grad_norm": 1.7642837762832642, "learning_rate": 1.2248825421661937e-06, "loss": 1.1807, "step": 15553 }, { "epoch": 0.8503751896450391, "grad_norm": 1.4618220329284668, "learning_rate": 1.2240063700936722e-06, "loss": 1.5814, "step": 15554 }, { "epoch": 0.8504298620887607, "grad_norm": 1.725816011428833, "learning_rate": 1.2231304910721686e-06, "loss": 1.2442, "step": 15555 }, { "epoch": 0.8504845345324823, "grad_norm": 1.4354103803634644, "learning_rate": 1.2222549051309362e-06, "loss": 1.3282, "step": 15556 }, { "epoch": 0.8505392069762038, "grad_norm": 1.9539611339569092, "learning_rate": 1.2213796122992072e-06, "loss": 1.4088, "step": 15557 }, { "epoch": 0.8505938794199254, "grad_norm": 1.7802268266677856, "learning_rate": 1.220504612606216e-06, "loss": 1.4394, "step": 15558 }, { "epoch": 0.850648551863647, "grad_norm": 1.6467660665512085, "learning_rate": 1.2196299060811756e-06, "loss": 1.2541, "step": 15559 }, { "epoch": 0.8507032243073684, "grad_norm": 1.251509666442871, "learning_rate": 1.2187554927532963e-06, "loss": 1.5889, "step": 15560 }, { "epoch": 0.85075789675109, "grad_norm": 1.771125078201294, "learning_rate": 1.2178813726517779e-06, "loss": 1.1426, "step": 15561 }, { "epoch": 0.8508125691948116, "grad_norm": 1.6583870649337769, "learning_rate": 1.2170075458058084e-06, "loss": 1.3461, "step": 15562 }, { "epoch": 0.8508672416385331, "grad_norm": 1.3408383131027222, "learning_rate": 1.2161340122445674e-06, "loss": 1.3092, "step": 15563 }, { "epoch": 0.8509219140822547, "grad_norm": 1.4545516967773438, "learning_rate": 1.215260771997223e-06, "loss": 1.5825, "step": 15564 }, { "epoch": 0.8509765865259763, "grad_norm": 1.5793379545211792, "learning_rate": 1.2143878250929364e-06, "loss": 1.4689, "step": 15565 }, { "epoch": 0.8510312589696978, "grad_norm": 1.4897576570510864, "learning_rate": 1.213515171560854e-06, "loss": 1.5944, "step": 15566 }, { "epoch": 0.8510859314134194, "grad_norm": 1.652698040008545, "learning_rate": 1.2126428114301204e-06, "loss": 1.3776, "step": 15567 }, { "epoch": 0.8511406038571409, "grad_norm": 1.4604648351669312, "learning_rate": 1.2117707447298633e-06, "loss": 1.3857, "step": 15568 }, { "epoch": 0.8511952763008624, "grad_norm": 1.3742344379425049, "learning_rate": 1.2108989714892006e-06, "loss": 1.2558, "step": 15569 }, { "epoch": 0.851249948744584, "grad_norm": 1.513429880142212, "learning_rate": 1.2100274917372479e-06, "loss": 1.342, "step": 15570 }, { "epoch": 0.8513046211883055, "grad_norm": 1.8408119678497314, "learning_rate": 1.209156305503102e-06, "loss": 1.3875, "step": 15571 }, { "epoch": 0.8513592936320271, "grad_norm": 1.6978034973144531, "learning_rate": 1.2082854128158572e-06, "loss": 1.295, "step": 15572 }, { "epoch": 0.8514139660757487, "grad_norm": 1.677520751953125, "learning_rate": 1.2074148137045926e-06, "loss": 1.4479, "step": 15573 }, { "epoch": 0.8514686385194702, "grad_norm": 1.776813268661499, "learning_rate": 1.2065445081983795e-06, "loss": 1.4658, "step": 15574 }, { "epoch": 0.8515233109631918, "grad_norm": 1.5684971809387207, "learning_rate": 1.2056744963262813e-06, "loss": 1.2217, "step": 15575 }, { "epoch": 0.8515779834069134, "grad_norm": 1.5274404287338257, "learning_rate": 1.2048047781173467e-06, "loss": 1.5862, "step": 15576 }, { "epoch": 0.8516326558506349, "grad_norm": 1.6205157041549683, "learning_rate": 1.2039353536006171e-06, "loss": 1.5528, "step": 15577 }, { "epoch": 0.8516873282943564, "grad_norm": 1.2744168043136597, "learning_rate": 1.2030662228051292e-06, "loss": 1.3184, "step": 15578 }, { "epoch": 0.851742000738078, "grad_norm": 1.4320741891860962, "learning_rate": 1.2021973857599034e-06, "loss": 1.4249, "step": 15579 }, { "epoch": 0.8517966731817995, "grad_norm": 1.462714433670044, "learning_rate": 1.2013288424939484e-06, "loss": 1.5431, "step": 15580 }, { "epoch": 0.8518513456255211, "grad_norm": 1.329333782196045, "learning_rate": 1.2004605930362724e-06, "loss": 1.444, "step": 15581 }, { "epoch": 0.8519060180692426, "grad_norm": 1.57558274269104, "learning_rate": 1.1995926374158663e-06, "loss": 1.1559, "step": 15582 }, { "epoch": 0.8519606905129642, "grad_norm": 1.56205153465271, "learning_rate": 1.1987249756617103e-06, "loss": 1.2156, "step": 15583 }, { "epoch": 0.8520153629566858, "grad_norm": 2.5797574520111084, "learning_rate": 1.1978576078027825e-06, "loss": 1.3413, "step": 15584 }, { "epoch": 0.8520700354004073, "grad_norm": 1.7464017868041992, "learning_rate": 1.1969905338680421e-06, "loss": 1.5981, "step": 15585 }, { "epoch": 0.8521247078441289, "grad_norm": 1.7521499395370483, "learning_rate": 1.1961237538864468e-06, "loss": 1.4357, "step": 15586 }, { "epoch": 0.8521793802878505, "grad_norm": 1.528541088104248, "learning_rate": 1.195257267886939e-06, "loss": 1.2479, "step": 15587 }, { "epoch": 0.8522340527315719, "grad_norm": 1.433243989944458, "learning_rate": 1.194391075898451e-06, "loss": 1.3463, "step": 15588 }, { "epoch": 0.8522887251752935, "grad_norm": 1.600630521774292, "learning_rate": 1.193525177949908e-06, "loss": 1.358, "step": 15589 }, { "epoch": 0.8523433976190151, "grad_norm": 2.5842504501342773, "learning_rate": 1.192659574070225e-06, "loss": 1.2373, "step": 15590 }, { "epoch": 0.8523980700627366, "grad_norm": 1.2566864490509033, "learning_rate": 1.1917942642883028e-06, "loss": 1.4594, "step": 15591 }, { "epoch": 0.8524527425064582, "grad_norm": 2.2524547576904297, "learning_rate": 1.1909292486330427e-06, "loss": 1.1443, "step": 15592 }, { "epoch": 0.8525074149501798, "grad_norm": 1.556929111480713, "learning_rate": 1.1900645271333245e-06, "loss": 1.1818, "step": 15593 }, { "epoch": 0.8525620873939013, "grad_norm": 1.6212987899780273, "learning_rate": 1.1892000998180242e-06, "loss": 1.6296, "step": 15594 }, { "epoch": 0.8526167598376229, "grad_norm": 1.4762725830078125, "learning_rate": 1.1883359667160087e-06, "loss": 1.3206, "step": 15595 }, { "epoch": 0.8526714322813443, "grad_norm": 1.5923386812210083, "learning_rate": 1.187472127856133e-06, "loss": 1.4871, "step": 15596 }, { "epoch": 0.8527261047250659, "grad_norm": 1.4120672941207886, "learning_rate": 1.1866085832672403e-06, "loss": 1.2703, "step": 15597 }, { "epoch": 0.8527807771687875, "grad_norm": 1.3546041250228882, "learning_rate": 1.185745332978171e-06, "loss": 1.7041, "step": 15598 }, { "epoch": 0.852835449612509, "grad_norm": 1.4406468868255615, "learning_rate": 1.1848823770177453e-06, "loss": 1.6449, "step": 15599 }, { "epoch": 0.8528901220562306, "grad_norm": 1.3651725053787231, "learning_rate": 1.1840197154147836e-06, "loss": 1.6346, "step": 15600 }, { "epoch": 0.8529447944999522, "grad_norm": 1.2431374788284302, "learning_rate": 1.1831573481980929e-06, "loss": 1.4753, "step": 15601 }, { "epoch": 0.8529994669436737, "grad_norm": 1.9008902311325073, "learning_rate": 1.1822952753964667e-06, "loss": 1.3921, "step": 15602 }, { "epoch": 0.8530541393873953, "grad_norm": 1.5440938472747803, "learning_rate": 1.1814334970386921e-06, "loss": 1.4809, "step": 15603 }, { "epoch": 0.8531088118311169, "grad_norm": 1.4474420547485352, "learning_rate": 1.180572013153547e-06, "loss": 1.5588, "step": 15604 }, { "epoch": 0.8531634842748383, "grad_norm": 2.253509759902954, "learning_rate": 1.1797108237697963e-06, "loss": 1.3882, "step": 15605 }, { "epoch": 0.8532181567185599, "grad_norm": 1.34122633934021, "learning_rate": 1.1788499289162003e-06, "loss": 1.5749, "step": 15606 }, { "epoch": 0.8532728291622815, "grad_norm": 1.1117315292358398, "learning_rate": 1.1779893286215039e-06, "loss": 1.7398, "step": 15607 }, { "epoch": 0.853327501606003, "grad_norm": 1.7799029350280762, "learning_rate": 1.1771290229144438e-06, "loss": 1.3607, "step": 15608 }, { "epoch": 0.8533821740497246, "grad_norm": 1.5136641263961792, "learning_rate": 1.1762690118237518e-06, "loss": 1.5561, "step": 15609 }, { "epoch": 0.8534368464934461, "grad_norm": 1.7512952089309692, "learning_rate": 1.1754092953781426e-06, "loss": 1.3721, "step": 15610 }, { "epoch": 0.8534915189371677, "grad_norm": 1.7577725648880005, "learning_rate": 1.1745498736063222e-06, "loss": 1.2695, "step": 15611 }, { "epoch": 0.8535461913808893, "grad_norm": 1.3396568298339844, "learning_rate": 1.173690746536994e-06, "loss": 1.5184, "step": 15612 }, { "epoch": 0.8536008638246108, "grad_norm": 1.3674228191375732, "learning_rate": 1.172831914198842e-06, "loss": 1.3519, "step": 15613 }, { "epoch": 0.8536555362683323, "grad_norm": 1.4124908447265625, "learning_rate": 1.1719733766205477e-06, "loss": 1.1229, "step": 15614 }, { "epoch": 0.8537102087120539, "grad_norm": 1.4363620281219482, "learning_rate": 1.171115133830777e-06, "loss": 1.3615, "step": 15615 }, { "epoch": 0.8537648811557754, "grad_norm": 1.4979737997055054, "learning_rate": 1.1702571858581867e-06, "loss": 1.4596, "step": 15616 }, { "epoch": 0.853819553599497, "grad_norm": 1.5989960432052612, "learning_rate": 1.1693995327314322e-06, "loss": 1.4321, "step": 15617 }, { "epoch": 0.8538742260432186, "grad_norm": 1.6680002212524414, "learning_rate": 1.1685421744791481e-06, "loss": 1.5197, "step": 15618 }, { "epoch": 0.8539288984869401, "grad_norm": 1.5511305332183838, "learning_rate": 1.1676851111299625e-06, "loss": 1.44, "step": 15619 }, { "epoch": 0.8539835709306617, "grad_norm": 1.3384524583816528, "learning_rate": 1.1668283427124994e-06, "loss": 1.3702, "step": 15620 }, { "epoch": 0.8540382433743833, "grad_norm": 1.685823678970337, "learning_rate": 1.1659718692553646e-06, "loss": 1.3133, "step": 15621 }, { "epoch": 0.8540929158181048, "grad_norm": 1.520796537399292, "learning_rate": 1.1651156907871575e-06, "loss": 1.3658, "step": 15622 }, { "epoch": 0.8541475882618264, "grad_norm": 1.2093225717544556, "learning_rate": 1.1642598073364707e-06, "loss": 1.4302, "step": 15623 }, { "epoch": 0.8542022607055478, "grad_norm": 1.8359310626983643, "learning_rate": 1.1634042189318828e-06, "loss": 1.5638, "step": 15624 }, { "epoch": 0.8542569331492694, "grad_norm": 1.4714103937149048, "learning_rate": 1.1625489256019618e-06, "loss": 1.4971, "step": 15625 }, { "epoch": 0.854311605592991, "grad_norm": 1.4408222436904907, "learning_rate": 1.1616939273752715e-06, "loss": 1.4814, "step": 15626 }, { "epoch": 0.8543662780367125, "grad_norm": 1.6713000535964966, "learning_rate": 1.1608392242803613e-06, "loss": 1.4557, "step": 15627 }, { "epoch": 0.8544209504804341, "grad_norm": 1.2415738105773926, "learning_rate": 1.1599848163457716e-06, "loss": 1.3737, "step": 15628 }, { "epoch": 0.8544756229241557, "grad_norm": 1.7133245468139648, "learning_rate": 1.1591307036000321e-06, "loss": 1.3573, "step": 15629 }, { "epoch": 0.8545302953678772, "grad_norm": 1.6239893436431885, "learning_rate": 1.158276886071662e-06, "loss": 1.384, "step": 15630 }, { "epoch": 0.8545849678115988, "grad_norm": 1.6826437711715698, "learning_rate": 1.1574233637891762e-06, "loss": 1.3558, "step": 15631 }, { "epoch": 0.8546396402553204, "grad_norm": 1.647236704826355, "learning_rate": 1.1565701367810744e-06, "loss": 1.4269, "step": 15632 }, { "epoch": 0.8546943126990418, "grad_norm": 1.6141092777252197, "learning_rate": 1.1557172050758447e-06, "loss": 1.4502, "step": 15633 }, { "epoch": 0.8547489851427634, "grad_norm": 1.8486884832382202, "learning_rate": 1.1548645687019745e-06, "loss": 1.2703, "step": 15634 }, { "epoch": 0.854803657586485, "grad_norm": 1.5382238626480103, "learning_rate": 1.1540122276879317e-06, "loss": 1.5818, "step": 15635 }, { "epoch": 0.8548583300302065, "grad_norm": 1.6754635572433472, "learning_rate": 1.1531601820621763e-06, "loss": 1.3009, "step": 15636 }, { "epoch": 0.8549130024739281, "grad_norm": 1.2764066457748413, "learning_rate": 1.1523084318531641e-06, "loss": 1.397, "step": 15637 }, { "epoch": 0.8549676749176496, "grad_norm": 1.8466031551361084, "learning_rate": 1.1514569770893347e-06, "loss": 1.4089, "step": 15638 }, { "epoch": 0.8550223473613712, "grad_norm": 1.5062586069107056, "learning_rate": 1.1506058177991198e-06, "loss": 1.2454, "step": 15639 }, { "epoch": 0.8550770198050928, "grad_norm": 1.7992295026779175, "learning_rate": 1.149754954010943e-06, "loss": 1.2877, "step": 15640 }, { "epoch": 0.8551316922488142, "grad_norm": 1.431720495223999, "learning_rate": 1.1489043857532167e-06, "loss": 1.4774, "step": 15641 }, { "epoch": 0.8551863646925358, "grad_norm": 1.4530699253082275, "learning_rate": 1.1480541130543433e-06, "loss": 1.3933, "step": 15642 }, { "epoch": 0.8552410371362574, "grad_norm": 1.7436813116073608, "learning_rate": 1.1472041359427145e-06, "loss": 1.426, "step": 15643 }, { "epoch": 0.8552957095799789, "grad_norm": 1.8338443040847778, "learning_rate": 1.1463544544467109e-06, "loss": 1.2397, "step": 15644 }, { "epoch": 0.8553503820237005, "grad_norm": 1.9008156061172485, "learning_rate": 1.14550506859471e-06, "loss": 1.3718, "step": 15645 }, { "epoch": 0.8554050544674221, "grad_norm": 1.3716787099838257, "learning_rate": 1.1446559784150724e-06, "loss": 1.3939, "step": 15646 }, { "epoch": 0.8554597269111436, "grad_norm": 1.6654170751571655, "learning_rate": 1.1438071839361498e-06, "loss": 1.3218, "step": 15647 }, { "epoch": 0.8555143993548652, "grad_norm": 1.4366424083709717, "learning_rate": 1.1429586851862884e-06, "loss": 1.7923, "step": 15648 }, { "epoch": 0.8555690717985868, "grad_norm": 1.7605977058410645, "learning_rate": 1.14211048219382e-06, "loss": 1.2333, "step": 15649 }, { "epoch": 0.8556237442423082, "grad_norm": 1.7539584636688232, "learning_rate": 1.1412625749870676e-06, "loss": 1.5291, "step": 15650 }, { "epoch": 0.8556784166860298, "grad_norm": 2.359428644180298, "learning_rate": 1.1404149635943462e-06, "loss": 1.2216, "step": 15651 }, { "epoch": 0.8557330891297513, "grad_norm": 1.512433409690857, "learning_rate": 1.13956764804396e-06, "loss": 1.1142, "step": 15652 }, { "epoch": 0.8557877615734729, "grad_norm": 1.333977460861206, "learning_rate": 1.1387206283642005e-06, "loss": 1.4346, "step": 15653 }, { "epoch": 0.8558424340171945, "grad_norm": 1.5998746156692505, "learning_rate": 1.1378739045833543e-06, "loss": 1.494, "step": 15654 }, { "epoch": 0.855897106460916, "grad_norm": 1.6058882474899292, "learning_rate": 1.1370274767296907e-06, "loss": 1.3095, "step": 15655 }, { "epoch": 0.8559517789046376, "grad_norm": 1.3367747068405151, "learning_rate": 1.1361813448314796e-06, "loss": 1.2326, "step": 15656 }, { "epoch": 0.8560064513483592, "grad_norm": 1.9356515407562256, "learning_rate": 1.1353355089169737e-06, "loss": 1.2623, "step": 15657 }, { "epoch": 0.8560611237920807, "grad_norm": 1.6124110221862793, "learning_rate": 1.134489969014414e-06, "loss": 1.4996, "step": 15658 }, { "epoch": 0.8561157962358023, "grad_norm": 1.3781843185424805, "learning_rate": 1.1336447251520398e-06, "loss": 1.3715, "step": 15659 }, { "epoch": 0.8561704686795238, "grad_norm": 1.9242993593215942, "learning_rate": 1.1327997773580733e-06, "loss": 1.3921, "step": 15660 }, { "epoch": 0.8562251411232453, "grad_norm": 1.6990537643432617, "learning_rate": 1.1319551256607286e-06, "loss": 1.3755, "step": 15661 }, { "epoch": 0.8562798135669669, "grad_norm": 1.6524773836135864, "learning_rate": 1.131110770088214e-06, "loss": 1.2122, "step": 15662 }, { "epoch": 0.8563344860106885, "grad_norm": 1.5070695877075195, "learning_rate": 1.1302667106687227e-06, "loss": 1.3812, "step": 15663 }, { "epoch": 0.85638915845441, "grad_norm": 1.5844327211380005, "learning_rate": 1.1294229474304364e-06, "loss": 1.4784, "step": 15664 }, { "epoch": 0.8564438308981316, "grad_norm": 1.6050915718078613, "learning_rate": 1.1285794804015349e-06, "loss": 1.1576, "step": 15665 }, { "epoch": 0.8564985033418531, "grad_norm": 1.3083035945892334, "learning_rate": 1.1277363096101834e-06, "loss": 1.413, "step": 15666 }, { "epoch": 0.8565531757855747, "grad_norm": 1.5585012435913086, "learning_rate": 1.1268934350845351e-06, "loss": 1.4874, "step": 15667 }, { "epoch": 0.8566078482292963, "grad_norm": 1.603309988975525, "learning_rate": 1.1260508568527362e-06, "loss": 1.6653, "step": 15668 }, { "epoch": 0.8566625206730177, "grad_norm": 1.7506108283996582, "learning_rate": 1.125208574942921e-06, "loss": 1.4347, "step": 15669 }, { "epoch": 0.8567171931167393, "grad_norm": 2.0300357341766357, "learning_rate": 1.124366589383219e-06, "loss": 1.396, "step": 15670 }, { "epoch": 0.8567718655604609, "grad_norm": 1.502341866493225, "learning_rate": 1.1235249002017434e-06, "loss": 1.2119, "step": 15671 }, { "epoch": 0.8568265380041824, "grad_norm": 1.4826072454452515, "learning_rate": 1.1226835074265985e-06, "loss": 1.5202, "step": 15672 }, { "epoch": 0.856881210447904, "grad_norm": 1.4331015348434448, "learning_rate": 1.121842411085885e-06, "loss": 1.5812, "step": 15673 }, { "epoch": 0.8569358828916256, "grad_norm": 1.9250909090042114, "learning_rate": 1.1210016112076871e-06, "loss": 1.1989, "step": 15674 }, { "epoch": 0.8569905553353471, "grad_norm": 1.5417876243591309, "learning_rate": 1.120161107820078e-06, "loss": 1.4815, "step": 15675 }, { "epoch": 0.8570452277790687, "grad_norm": 1.7507879734039307, "learning_rate": 1.1193209009511285e-06, "loss": 1.43, "step": 15676 }, { "epoch": 0.8570999002227903, "grad_norm": 1.7228668928146362, "learning_rate": 1.118480990628894e-06, "loss": 1.4767, "step": 15677 }, { "epoch": 0.8571545726665117, "grad_norm": 2.047400951385498, "learning_rate": 1.1176413768814198e-06, "loss": 1.3356, "step": 15678 }, { "epoch": 0.8572092451102333, "grad_norm": 1.552094578742981, "learning_rate": 1.1168020597367435e-06, "loss": 1.3568, "step": 15679 }, { "epoch": 0.8572639175539548, "grad_norm": 1.2165888547897339, "learning_rate": 1.1159630392228904e-06, "loss": 1.6032, "step": 15680 }, { "epoch": 0.8573185899976764, "grad_norm": 2.1676297187805176, "learning_rate": 1.1151243153678782e-06, "loss": 1.105, "step": 15681 }, { "epoch": 0.857373262441398, "grad_norm": 1.216141939163208, "learning_rate": 1.1142858881997155e-06, "loss": 1.6641, "step": 15682 }, { "epoch": 0.8574279348851195, "grad_norm": 1.3655232191085815, "learning_rate": 1.1134477577463954e-06, "loss": 1.3824, "step": 15683 }, { "epoch": 0.8574826073288411, "grad_norm": 1.8295179605484009, "learning_rate": 1.1126099240359101e-06, "loss": 1.5266, "step": 15684 }, { "epoch": 0.8575372797725627, "grad_norm": 1.6787551641464233, "learning_rate": 1.1117723870962337e-06, "loss": 1.1093, "step": 15685 }, { "epoch": 0.8575919522162841, "grad_norm": 1.527976632118225, "learning_rate": 1.1109351469553331e-06, "loss": 1.362, "step": 15686 }, { "epoch": 0.8576466246600057, "grad_norm": 1.5045489072799683, "learning_rate": 1.1100982036411679e-06, "loss": 1.4965, "step": 15687 }, { "epoch": 0.8577012971037273, "grad_norm": 1.6139494180679321, "learning_rate": 1.1092615571816855e-06, "loss": 1.4088, "step": 15688 }, { "epoch": 0.8577559695474488, "grad_norm": 1.319451928138733, "learning_rate": 1.1084252076048195e-06, "loss": 1.4284, "step": 15689 }, { "epoch": 0.8578106419911704, "grad_norm": 1.3261393308639526, "learning_rate": 1.1075891549385032e-06, "loss": 1.3612, "step": 15690 }, { "epoch": 0.857865314434892, "grad_norm": 1.4554258584976196, "learning_rate": 1.1067533992106516e-06, "loss": 1.4852, "step": 15691 }, { "epoch": 0.8579199868786135, "grad_norm": 1.7822754383087158, "learning_rate": 1.1059179404491726e-06, "loss": 1.3521, "step": 15692 }, { "epoch": 0.8579746593223351, "grad_norm": 1.9019917249679565, "learning_rate": 1.105082778681964e-06, "loss": 1.4604, "step": 15693 }, { "epoch": 0.8580293317660567, "grad_norm": 1.288818120956421, "learning_rate": 1.1042479139369144e-06, "loss": 1.482, "step": 15694 }, { "epoch": 0.8580840042097782, "grad_norm": 1.4305152893066406, "learning_rate": 1.1034133462418995e-06, "loss": 1.225, "step": 15695 }, { "epoch": 0.8581386766534997, "grad_norm": 1.3951092958450317, "learning_rate": 1.1025790756247912e-06, "loss": 1.4663, "step": 15696 }, { "epoch": 0.8581933490972212, "grad_norm": 3.274777412414551, "learning_rate": 1.101745102113444e-06, "loss": 1.2531, "step": 15697 }, { "epoch": 0.8582480215409428, "grad_norm": 1.289015293121338, "learning_rate": 1.1009114257357101e-06, "loss": 1.4873, "step": 15698 }, { "epoch": 0.8583026939846644, "grad_norm": 1.5461162328720093, "learning_rate": 1.100078046519426e-06, "loss": 1.4361, "step": 15699 }, { "epoch": 0.8583573664283859, "grad_norm": 2.108248233795166, "learning_rate": 1.0992449644924186e-06, "loss": 1.3754, "step": 15700 }, { "epoch": 0.8584120388721075, "grad_norm": 1.6141103506088257, "learning_rate": 1.0984121796825099e-06, "loss": 1.2576, "step": 15701 }, { "epoch": 0.8584667113158291, "grad_norm": 1.4828898906707764, "learning_rate": 1.0975796921175065e-06, "loss": 1.314, "step": 15702 }, { "epoch": 0.8585213837595506, "grad_norm": 1.6101397275924683, "learning_rate": 1.096747501825206e-06, "loss": 1.3582, "step": 15703 }, { "epoch": 0.8585760562032722, "grad_norm": 1.511634349822998, "learning_rate": 1.0959156088334e-06, "loss": 1.492, "step": 15704 }, { "epoch": 0.8586307286469937, "grad_norm": 1.466030240058899, "learning_rate": 1.095084013169867e-06, "loss": 1.5711, "step": 15705 }, { "epoch": 0.8586854010907152, "grad_norm": 1.6674346923828125, "learning_rate": 1.0942527148623738e-06, "loss": 1.5218, "step": 15706 }, { "epoch": 0.8587400735344368, "grad_norm": 1.5600687265396118, "learning_rate": 1.0934217139386805e-06, "loss": 1.3573, "step": 15707 }, { "epoch": 0.8587947459781584, "grad_norm": 1.21479070186615, "learning_rate": 1.0925910104265359e-06, "loss": 1.5711, "step": 15708 }, { "epoch": 0.8588494184218799, "grad_norm": 1.8872489929199219, "learning_rate": 1.0917606043536777e-06, "loss": 1.2515, "step": 15709 }, { "epoch": 0.8589040908656015, "grad_norm": 1.82659912109375, "learning_rate": 1.0909304957478394e-06, "loss": 1.4173, "step": 15710 }, { "epoch": 0.858958763309323, "grad_norm": 1.3233753442764282, "learning_rate": 1.0901006846367347e-06, "loss": 1.4651, "step": 15711 }, { "epoch": 0.8590134357530446, "grad_norm": 1.5488560199737549, "learning_rate": 1.0892711710480785e-06, "loss": 1.5381, "step": 15712 }, { "epoch": 0.8590681081967662, "grad_norm": 1.8603752851486206, "learning_rate": 1.088441955009567e-06, "loss": 1.2652, "step": 15713 }, { "epoch": 0.8591227806404876, "grad_norm": 1.5294644832611084, "learning_rate": 1.087613036548888e-06, "loss": 1.306, "step": 15714 }, { "epoch": 0.8591774530842092, "grad_norm": 1.30081307888031, "learning_rate": 1.0867844156937257e-06, "loss": 1.3368, "step": 15715 }, { "epoch": 0.8592321255279308, "grad_norm": 1.4679101705551147, "learning_rate": 1.0859560924717483e-06, "loss": 1.3874, "step": 15716 }, { "epoch": 0.8592867979716523, "grad_norm": 1.39518141746521, "learning_rate": 1.0851280669106135e-06, "loss": 1.4282, "step": 15717 }, { "epoch": 0.8593414704153739, "grad_norm": 1.5968470573425293, "learning_rate": 1.0843003390379714e-06, "loss": 1.1317, "step": 15718 }, { "epoch": 0.8593961428590955, "grad_norm": 1.5239862203598022, "learning_rate": 1.0834729088814632e-06, "loss": 1.6146, "step": 15719 }, { "epoch": 0.859450815302817, "grad_norm": 1.7725660800933838, "learning_rate": 1.0826457764687148e-06, "loss": 1.4826, "step": 15720 }, { "epoch": 0.8595054877465386, "grad_norm": 1.5038366317749023, "learning_rate": 1.0818189418273527e-06, "loss": 1.42, "step": 15721 }, { "epoch": 0.8595601601902602, "grad_norm": 1.3712741136550903, "learning_rate": 1.0809924049849819e-06, "loss": 1.3152, "step": 15722 }, { "epoch": 0.8596148326339816, "grad_norm": 1.5548793077468872, "learning_rate": 1.080166165969202e-06, "loss": 1.6406, "step": 15723 }, { "epoch": 0.8596695050777032, "grad_norm": 1.4922899007797241, "learning_rate": 1.079340224807608e-06, "loss": 1.4656, "step": 15724 }, { "epoch": 0.8597241775214247, "grad_norm": 1.8366061449050903, "learning_rate": 1.0785145815277741e-06, "loss": 1.6932, "step": 15725 }, { "epoch": 0.8597788499651463, "grad_norm": 1.6490478515625, "learning_rate": 1.0776892361572756e-06, "loss": 1.2035, "step": 15726 }, { "epoch": 0.8598335224088679, "grad_norm": 1.4873038530349731, "learning_rate": 1.0768641887236697e-06, "loss": 1.3548, "step": 15727 }, { "epoch": 0.8598881948525894, "grad_norm": 1.3873151540756226, "learning_rate": 1.0760394392545058e-06, "loss": 1.4147, "step": 15728 }, { "epoch": 0.859942867296311, "grad_norm": 1.4735816717147827, "learning_rate": 1.0752149877773278e-06, "loss": 1.3885, "step": 15729 }, { "epoch": 0.8599975397400326, "grad_norm": 1.426140546798706, "learning_rate": 1.0743908343196629e-06, "loss": 1.4773, "step": 15730 }, { "epoch": 0.860052212183754, "grad_norm": 1.6346303224563599, "learning_rate": 1.0735669789090341e-06, "loss": 1.1423, "step": 15731 }, { "epoch": 0.8601068846274756, "grad_norm": 1.458738088607788, "learning_rate": 1.0727434215729494e-06, "loss": 1.3263, "step": 15732 }, { "epoch": 0.8601615570711972, "grad_norm": 1.4930756092071533, "learning_rate": 1.0719201623389107e-06, "loss": 1.3415, "step": 15733 }, { "epoch": 0.8602162295149187, "grad_norm": 1.0761024951934814, "learning_rate": 1.0710972012344067e-06, "loss": 1.339, "step": 15734 }, { "epoch": 0.8602709019586403, "grad_norm": 1.3225263357162476, "learning_rate": 1.0702745382869207e-06, "loss": 1.6588, "step": 15735 }, { "epoch": 0.8603255744023619, "grad_norm": 1.6296000480651855, "learning_rate": 1.0694521735239226e-06, "loss": 1.7344, "step": 15736 }, { "epoch": 0.8603802468460834, "grad_norm": 1.2573457956314087, "learning_rate": 1.068630106972871e-06, "loss": 1.5263, "step": 15737 }, { "epoch": 0.860434919289805, "grad_norm": 1.7778220176696777, "learning_rate": 1.0678083386612193e-06, "loss": 1.1347, "step": 15738 }, { "epoch": 0.8604895917335265, "grad_norm": 1.7339873313903809, "learning_rate": 1.0669868686164053e-06, "loss": 1.2998, "step": 15739 }, { "epoch": 0.860544264177248, "grad_norm": 1.230200171470642, "learning_rate": 1.0661656968658641e-06, "loss": 1.4632, "step": 15740 }, { "epoch": 0.8605989366209696, "grad_norm": 1.4196158647537231, "learning_rate": 1.0653448234370144e-06, "loss": 1.5309, "step": 15741 }, { "epoch": 0.8606536090646911, "grad_norm": 1.1887856721878052, "learning_rate": 1.064524248357265e-06, "loss": 1.519, "step": 15742 }, { "epoch": 0.8607082815084127, "grad_norm": 1.4374383687973022, "learning_rate": 1.0637039716540197e-06, "loss": 1.4005, "step": 15743 }, { "epoch": 0.8607629539521343, "grad_norm": 1.5238280296325684, "learning_rate": 1.0628839933546697e-06, "loss": 1.5449, "step": 15744 }, { "epoch": 0.8608176263958558, "grad_norm": 1.1546193361282349, "learning_rate": 1.0620643134865937e-06, "loss": 1.4401, "step": 15745 }, { "epoch": 0.8608722988395774, "grad_norm": 1.66725754737854, "learning_rate": 1.0612449320771645e-06, "loss": 1.1993, "step": 15746 }, { "epoch": 0.860926971283299, "grad_norm": 1.5618244409561157, "learning_rate": 1.0604258491537423e-06, "loss": 1.3723, "step": 15747 }, { "epoch": 0.8609816437270205, "grad_norm": 1.6001543998718262, "learning_rate": 1.0596070647436763e-06, "loss": 1.4082, "step": 15748 }, { "epoch": 0.861036316170742, "grad_norm": 1.9443618059158325, "learning_rate": 1.0587885788743112e-06, "loss": 1.2707, "step": 15749 }, { "epoch": 0.8610909886144636, "grad_norm": 1.8976519107818604, "learning_rate": 1.0579703915729777e-06, "loss": 1.3282, "step": 15750 }, { "epoch": 0.8611456610581851, "grad_norm": 2.020477056503296, "learning_rate": 1.0571525028669927e-06, "loss": 1.6224, "step": 15751 }, { "epoch": 0.8612003335019067, "grad_norm": 1.660987138748169, "learning_rate": 1.0563349127836731e-06, "loss": 1.4394, "step": 15752 }, { "epoch": 0.8612550059456282, "grad_norm": 1.6950957775115967, "learning_rate": 1.055517621350316e-06, "loss": 1.3375, "step": 15753 }, { "epoch": 0.8613096783893498, "grad_norm": 1.15762197971344, "learning_rate": 1.0547006285942163e-06, "loss": 1.2968, "step": 15754 }, { "epoch": 0.8613643508330714, "grad_norm": 1.407430648803711, "learning_rate": 1.0538839345426543e-06, "loss": 1.3265, "step": 15755 }, { "epoch": 0.8614190232767929, "grad_norm": 1.8280385732650757, "learning_rate": 1.0530675392228995e-06, "loss": 1.3918, "step": 15756 }, { "epoch": 0.8614736957205145, "grad_norm": 1.5053424835205078, "learning_rate": 1.0522514426622143e-06, "loss": 1.4746, "step": 15757 }, { "epoch": 0.8615283681642361, "grad_norm": 1.292336106300354, "learning_rate": 1.0514356448878505e-06, "loss": 1.6093, "step": 15758 }, { "epoch": 0.8615830406079575, "grad_norm": 1.57828950881958, "learning_rate": 1.050620145927046e-06, "loss": 1.6271, "step": 15759 }, { "epoch": 0.8616377130516791, "grad_norm": 1.4035435914993286, "learning_rate": 1.049804945807038e-06, "loss": 1.4131, "step": 15760 }, { "epoch": 0.8616923854954007, "grad_norm": 2.0183475017547607, "learning_rate": 1.048990044555045e-06, "loss": 1.4556, "step": 15761 }, { "epoch": 0.8617470579391222, "grad_norm": 1.697495698928833, "learning_rate": 1.0481754421982758e-06, "loss": 1.5221, "step": 15762 }, { "epoch": 0.8618017303828438, "grad_norm": 1.3183038234710693, "learning_rate": 1.047361138763937e-06, "loss": 1.4476, "step": 15763 }, { "epoch": 0.8618564028265654, "grad_norm": 1.6107213497161865, "learning_rate": 1.0465471342792188e-06, "loss": 1.2766, "step": 15764 }, { "epoch": 0.8619110752702869, "grad_norm": 1.661487340927124, "learning_rate": 1.0457334287712994e-06, "loss": 1.2509, "step": 15765 }, { "epoch": 0.8619657477140085, "grad_norm": 1.8907784223556519, "learning_rate": 1.0449200222673538e-06, "loss": 1.4439, "step": 15766 }, { "epoch": 0.86202042015773, "grad_norm": 1.490586519241333, "learning_rate": 1.0441069147945414e-06, "loss": 1.689, "step": 15767 }, { "epoch": 0.8620750926014515, "grad_norm": 1.8925056457519531, "learning_rate": 1.043294106380016e-06, "loss": 1.1957, "step": 15768 }, { "epoch": 0.8621297650451731, "grad_norm": 1.5949695110321045, "learning_rate": 1.0424815970509184e-06, "loss": 1.3156, "step": 15769 }, { "epoch": 0.8621844374888946, "grad_norm": 2.0618326663970947, "learning_rate": 1.0416693868343796e-06, "loss": 1.307, "step": 15770 }, { "epoch": 0.8622391099326162, "grad_norm": 1.4142837524414062, "learning_rate": 1.0408574757575218e-06, "loss": 1.419, "step": 15771 }, { "epoch": 0.8622937823763378, "grad_norm": 1.4972542524337769, "learning_rate": 1.0400458638474552e-06, "loss": 1.3698, "step": 15772 }, { "epoch": 0.8623484548200593, "grad_norm": 1.6436150074005127, "learning_rate": 1.0392345511312806e-06, "loss": 1.5168, "step": 15773 }, { "epoch": 0.8624031272637809, "grad_norm": 1.7794817686080933, "learning_rate": 1.038423537636094e-06, "loss": 1.3948, "step": 15774 }, { "epoch": 0.8624577997075025, "grad_norm": 1.5788719654083252, "learning_rate": 1.0376128233889747e-06, "loss": 1.6764, "step": 15775 }, { "epoch": 0.862512472151224, "grad_norm": 1.4228711128234863, "learning_rate": 1.0368024084169914e-06, "loss": 1.4695, "step": 15776 }, { "epoch": 0.8625671445949455, "grad_norm": 1.4698293209075928, "learning_rate": 1.035992292747211e-06, "loss": 1.5336, "step": 15777 }, { "epoch": 0.8626218170386671, "grad_norm": 1.6967401504516602, "learning_rate": 1.035182476406682e-06, "loss": 1.2195, "step": 15778 }, { "epoch": 0.8626764894823886, "grad_norm": 1.8609726428985596, "learning_rate": 1.034372959422445e-06, "loss": 1.3527, "step": 15779 }, { "epoch": 0.8627311619261102, "grad_norm": 1.4014815092086792, "learning_rate": 1.033563741821536e-06, "loss": 1.2237, "step": 15780 }, { "epoch": 0.8627858343698317, "grad_norm": 1.6586713790893555, "learning_rate": 1.0327548236309714e-06, "loss": 1.638, "step": 15781 }, { "epoch": 0.8628405068135533, "grad_norm": 1.430985689163208, "learning_rate": 1.0319462048777685e-06, "loss": 1.3644, "step": 15782 }, { "epoch": 0.8628951792572749, "grad_norm": 1.2073136568069458, "learning_rate": 1.0311378855889243e-06, "loss": 1.4161, "step": 15783 }, { "epoch": 0.8629498517009964, "grad_norm": 1.2910094261169434, "learning_rate": 1.030329865791434e-06, "loss": 1.4187, "step": 15784 }, { "epoch": 0.863004524144718, "grad_norm": 1.443084478378296, "learning_rate": 1.0295221455122762e-06, "loss": 1.4282, "step": 15785 }, { "epoch": 0.8630591965884395, "grad_norm": 1.5122671127319336, "learning_rate": 1.0287147247784246e-06, "loss": 1.4917, "step": 15786 }, { "epoch": 0.863113869032161, "grad_norm": 2.0763888359069824, "learning_rate": 1.0279076036168379e-06, "loss": 1.4062, "step": 15787 }, { "epoch": 0.8631685414758826, "grad_norm": 1.5870025157928467, "learning_rate": 1.027100782054471e-06, "loss": 1.5272, "step": 15788 }, { "epoch": 0.8632232139196042, "grad_norm": 1.741638422012329, "learning_rate": 1.0262942601182657e-06, "loss": 1.3436, "step": 15789 }, { "epoch": 0.8632778863633257, "grad_norm": 1.7884879112243652, "learning_rate": 1.0254880378351506e-06, "loss": 1.4629, "step": 15790 }, { "epoch": 0.8633325588070473, "grad_norm": 1.335029125213623, "learning_rate": 1.0246821152320507e-06, "loss": 1.4293, "step": 15791 }, { "epoch": 0.8633872312507689, "grad_norm": 1.4045685529708862, "learning_rate": 1.0238764923358768e-06, "loss": 1.5673, "step": 15792 }, { "epoch": 0.8634419036944904, "grad_norm": 1.6244763135910034, "learning_rate": 1.0230711691735273e-06, "loss": 1.5762, "step": 15793 }, { "epoch": 0.863496576138212, "grad_norm": 1.4766401052474976, "learning_rate": 1.0222661457718986e-06, "loss": 1.4904, "step": 15794 }, { "epoch": 0.8635512485819334, "grad_norm": 1.4862287044525146, "learning_rate": 1.0214614221578701e-06, "loss": 1.6332, "step": 15795 }, { "epoch": 0.863605921025655, "grad_norm": 1.1667160987854004, "learning_rate": 1.0206569983583137e-06, "loss": 1.5967, "step": 15796 }, { "epoch": 0.8636605934693766, "grad_norm": 1.7601758241653442, "learning_rate": 1.0198528744000913e-06, "loss": 1.2224, "step": 15797 }, { "epoch": 0.8637152659130981, "grad_norm": 1.47032630443573, "learning_rate": 1.0190490503100515e-06, "loss": 1.5325, "step": 15798 }, { "epoch": 0.8637699383568197, "grad_norm": 2.342670440673828, "learning_rate": 1.0182455261150404e-06, "loss": 1.4263, "step": 15799 }, { "epoch": 0.8638246108005413, "grad_norm": 1.583114743232727, "learning_rate": 1.0174423018418877e-06, "loss": 1.4514, "step": 15800 }, { "epoch": 0.8638792832442628, "grad_norm": 1.2942579984664917, "learning_rate": 1.0166393775174121e-06, "loss": 1.6895, "step": 15801 }, { "epoch": 0.8639339556879844, "grad_norm": 1.74464750289917, "learning_rate": 1.015836753168431e-06, "loss": 1.3084, "step": 15802 }, { "epoch": 0.863988628131706, "grad_norm": 1.62068510055542, "learning_rate": 1.0150344288217418e-06, "loss": 1.5401, "step": 15803 }, { "epoch": 0.8640433005754274, "grad_norm": 1.382451057434082, "learning_rate": 1.0142324045041351e-06, "loss": 1.2587, "step": 15804 }, { "epoch": 0.864097973019149, "grad_norm": 1.6535271406173706, "learning_rate": 1.0134306802423965e-06, "loss": 1.334, "step": 15805 }, { "epoch": 0.8641526454628706, "grad_norm": 1.323848843574524, "learning_rate": 1.0126292560632944e-06, "loss": 1.5635, "step": 15806 }, { "epoch": 0.8642073179065921, "grad_norm": 1.469641923904419, "learning_rate": 1.0118281319935896e-06, "loss": 1.4547, "step": 15807 }, { "epoch": 0.8642619903503137, "grad_norm": 1.3958979845046997, "learning_rate": 1.0110273080600374e-06, "loss": 1.4612, "step": 15808 }, { "epoch": 0.8643166627940352, "grad_norm": 1.4969687461853027, "learning_rate": 1.0102267842893753e-06, "loss": 1.4664, "step": 15809 }, { "epoch": 0.8643713352377568, "grad_norm": 2.5879979133605957, "learning_rate": 1.0094265607083375e-06, "loss": 1.5281, "step": 15810 }, { "epoch": 0.8644260076814784, "grad_norm": 1.3737198114395142, "learning_rate": 1.0086266373436427e-06, "loss": 1.5512, "step": 15811 }, { "epoch": 0.8644806801251999, "grad_norm": 1.5519227981567383, "learning_rate": 1.0078270142220015e-06, "loss": 1.362, "step": 15812 }, { "epoch": 0.8645353525689214, "grad_norm": 3.2682268619537354, "learning_rate": 1.0070276913701193e-06, "loss": 1.6831, "step": 15813 }, { "epoch": 0.864590025012643, "grad_norm": 1.7379610538482666, "learning_rate": 1.006228668814686e-06, "loss": 1.4698, "step": 15814 }, { "epoch": 0.8646446974563645, "grad_norm": 1.5768671035766602, "learning_rate": 1.0054299465823791e-06, "loss": 1.557, "step": 15815 }, { "epoch": 0.8646993699000861, "grad_norm": 1.4985588788986206, "learning_rate": 1.004631524699875e-06, "loss": 1.3582, "step": 15816 }, { "epoch": 0.8647540423438077, "grad_norm": 1.4828726053237915, "learning_rate": 1.0038334031938324e-06, "loss": 1.3912, "step": 15817 }, { "epoch": 0.8648087147875292, "grad_norm": 1.6154570579528809, "learning_rate": 1.0030355820908998e-06, "loss": 1.3673, "step": 15818 }, { "epoch": 0.8648633872312508, "grad_norm": 1.6261804103851318, "learning_rate": 1.0022380614177251e-06, "loss": 1.323, "step": 15819 }, { "epoch": 0.8649180596749724, "grad_norm": 3.6170156002044678, "learning_rate": 1.0014408412009335e-06, "loss": 1.216, "step": 15820 }, { "epoch": 0.8649727321186939, "grad_norm": 1.545191764831543, "learning_rate": 1.0006439214671471e-06, "loss": 1.5901, "step": 15821 }, { "epoch": 0.8650274045624154, "grad_norm": 1.5706154108047485, "learning_rate": 9.9984730224298e-07, "loss": 1.6456, "step": 15822 }, { "epoch": 0.8650820770061369, "grad_norm": 1.3516931533813477, "learning_rate": 9.9905098355503e-07, "loss": 1.3258, "step": 15823 }, { "epoch": 0.8651367494498585, "grad_norm": 1.5149073600769043, "learning_rate": 9.9825496542989e-07, "loss": 1.3868, "step": 15824 }, { "epoch": 0.8651914218935801, "grad_norm": 1.6742697954177856, "learning_rate": 9.974592478941393e-07, "loss": 1.1753, "step": 15825 }, { "epoch": 0.8652460943373016, "grad_norm": 1.340734601020813, "learning_rate": 9.966638309743481e-07, "loss": 1.6027, "step": 15826 }, { "epoch": 0.8653007667810232, "grad_norm": 2.0077872276306152, "learning_rate": 9.958687146970802e-07, "loss": 1.2761, "step": 15827 }, { "epoch": 0.8653554392247448, "grad_norm": 1.4752604961395264, "learning_rate": 9.950738990888841e-07, "loss": 1.5436, "step": 15828 }, { "epoch": 0.8654101116684663, "grad_norm": 1.2189416885375977, "learning_rate": 9.942793841762999e-07, "loss": 1.409, "step": 15829 }, { "epoch": 0.8654647841121879, "grad_norm": 1.528498888015747, "learning_rate": 9.934851699858616e-07, "loss": 1.4262, "step": 15830 }, { "epoch": 0.8655194565559094, "grad_norm": 2.295426607131958, "learning_rate": 9.926912565440883e-07, "loss": 1.2886, "step": 15831 }, { "epoch": 0.8655741289996309, "grad_norm": 1.2841764688491821, "learning_rate": 9.918976438774884e-07, "loss": 1.3625, "step": 15832 }, { "epoch": 0.8656288014433525, "grad_norm": 2.218204975128174, "learning_rate": 9.911043320125657e-07, "loss": 1.0793, "step": 15833 }, { "epoch": 0.8656834738870741, "grad_norm": 1.3061387538909912, "learning_rate": 9.903113209758098e-07, "loss": 1.4964, "step": 15834 }, { "epoch": 0.8657381463307956, "grad_norm": 1.5274991989135742, "learning_rate": 9.895186107937005e-07, "loss": 1.3702, "step": 15835 }, { "epoch": 0.8657928187745172, "grad_norm": 2.104583501815796, "learning_rate": 9.887262014927079e-07, "loss": 1.3923, "step": 15836 }, { "epoch": 0.8658474912182387, "grad_norm": 1.190788984298706, "learning_rate": 9.879340930992943e-07, "loss": 1.8061, "step": 15837 }, { "epoch": 0.8659021636619603, "grad_norm": 1.762525200843811, "learning_rate": 9.87142285639906e-07, "loss": 1.4636, "step": 15838 }, { "epoch": 0.8659568361056819, "grad_norm": 3.128016233444214, "learning_rate": 9.863507791409876e-07, "loss": 1.2206, "step": 15839 }, { "epoch": 0.8660115085494033, "grad_norm": 1.701404333114624, "learning_rate": 9.85559573628967e-07, "loss": 1.2438, "step": 15840 }, { "epoch": 0.8660661809931249, "grad_norm": 1.12663996219635, "learning_rate": 9.847686691302671e-07, "loss": 1.5393, "step": 15841 }, { "epoch": 0.8661208534368465, "grad_norm": 2.274451971054077, "learning_rate": 9.83978065671296e-07, "loss": 1.4443, "step": 15842 }, { "epoch": 0.866175525880568, "grad_norm": 1.4696574211120605, "learning_rate": 9.831877632784525e-07, "loss": 1.3177, "step": 15843 }, { "epoch": 0.8662301983242896, "grad_norm": 1.7105928659439087, "learning_rate": 9.823977619781288e-07, "loss": 1.1772, "step": 15844 }, { "epoch": 0.8662848707680112, "grad_norm": 1.55292809009552, "learning_rate": 9.81608061796706e-07, "loss": 1.3301, "step": 15845 }, { "epoch": 0.8663395432117327, "grad_norm": 1.7092649936676025, "learning_rate": 9.808186627605498e-07, "loss": 1.3962, "step": 15846 }, { "epoch": 0.8663942156554543, "grad_norm": 1.2996505498886108, "learning_rate": 9.800295648960245e-07, "loss": 1.4748, "step": 15847 }, { "epoch": 0.8664488880991759, "grad_norm": 1.226630449295044, "learning_rate": 9.79240768229478e-07, "loss": 1.5677, "step": 15848 }, { "epoch": 0.8665035605428973, "grad_norm": 1.2599800825119019, "learning_rate": 9.784522727872493e-07, "loss": 1.3744, "step": 15849 }, { "epoch": 0.8665582329866189, "grad_norm": 1.4896647930145264, "learning_rate": 9.776640785956703e-07, "loss": 1.5257, "step": 15850 }, { "epoch": 0.8666129054303404, "grad_norm": 1.3459309339523315, "learning_rate": 9.768761856810581e-07, "loss": 1.5379, "step": 15851 }, { "epoch": 0.866667577874062, "grad_norm": 1.849753975868225, "learning_rate": 9.760885940697229e-07, "loss": 1.6459, "step": 15852 }, { "epoch": 0.8667222503177836, "grad_norm": 1.7031742334365845, "learning_rate": 9.753013037879655e-07, "loss": 1.1799, "step": 15853 }, { "epoch": 0.8667769227615051, "grad_norm": 1.3725061416625977, "learning_rate": 9.74514314862074e-07, "loss": 1.3551, "step": 15854 }, { "epoch": 0.8668315952052267, "grad_norm": 2.109408140182495, "learning_rate": 9.737276273183294e-07, "loss": 1.2663, "step": 15855 }, { "epoch": 0.8668862676489483, "grad_norm": 1.2286834716796875, "learning_rate": 9.729412411829998e-07, "loss": 1.4274, "step": 15856 }, { "epoch": 0.8669409400926698, "grad_norm": 1.9437578916549683, "learning_rate": 9.72155156482344e-07, "loss": 1.4098, "step": 15857 }, { "epoch": 0.8669956125363913, "grad_norm": 1.562943696975708, "learning_rate": 9.713693732426132e-07, "loss": 1.3677, "step": 15858 }, { "epoch": 0.8670502849801129, "grad_norm": 1.6797316074371338, "learning_rate": 9.705838914900456e-07, "loss": 1.48, "step": 15859 }, { "epoch": 0.8671049574238344, "grad_norm": 1.8725637197494507, "learning_rate": 9.697987112508688e-07, "loss": 1.2449, "step": 15860 }, { "epoch": 0.867159629867556, "grad_norm": 2.150347948074341, "learning_rate": 9.690138325513043e-07, "loss": 1.3498, "step": 15861 }, { "epoch": 0.8672143023112776, "grad_norm": 1.4479942321777344, "learning_rate": 9.6822925541756e-07, "loss": 1.397, "step": 15862 }, { "epoch": 0.8672689747549991, "grad_norm": 1.581668496131897, "learning_rate": 9.674449798758334e-07, "loss": 1.3129, "step": 15863 }, { "epoch": 0.8673236471987207, "grad_norm": 1.5205225944519043, "learning_rate": 9.666610059523163e-07, "loss": 1.5208, "step": 15864 }, { "epoch": 0.8673783196424422, "grad_norm": 2.0221192836761475, "learning_rate": 9.65877333673184e-07, "loss": 1.427, "step": 15865 }, { "epoch": 0.8674329920861638, "grad_norm": 1.8373998403549194, "learning_rate": 9.65093963064606e-07, "loss": 1.1806, "step": 15866 }, { "epoch": 0.8674876645298853, "grad_norm": 1.2424814701080322, "learning_rate": 9.643108941527435e-07, "loss": 1.4519, "step": 15867 }, { "epoch": 0.8675423369736068, "grad_norm": 1.4279063940048218, "learning_rate": 9.635281269637409e-07, "loss": 1.5458, "step": 15868 }, { "epoch": 0.8675970094173284, "grad_norm": 1.6951847076416016, "learning_rate": 9.627456615237406e-07, "loss": 1.4971, "step": 15869 }, { "epoch": 0.86765168186105, "grad_norm": 1.4466060400009155, "learning_rate": 9.6196349785887e-07, "loss": 1.4929, "step": 15870 }, { "epoch": 0.8677063543047715, "grad_norm": 1.934106469154358, "learning_rate": 9.611816359952442e-07, "loss": 1.4176, "step": 15871 }, { "epoch": 0.8677610267484931, "grad_norm": 1.3137474060058594, "learning_rate": 9.604000759589748e-07, "loss": 1.7019, "step": 15872 }, { "epoch": 0.8678156991922147, "grad_norm": 1.3934584856033325, "learning_rate": 9.5961881777616e-07, "loss": 1.5319, "step": 15873 }, { "epoch": 0.8678703716359362, "grad_norm": 1.2982889413833618, "learning_rate": 9.588378614728867e-07, "loss": 1.4975, "step": 15874 }, { "epoch": 0.8679250440796578, "grad_norm": 1.8129116296768188, "learning_rate": 9.580572070752335e-07, "loss": 1.3632, "step": 15875 }, { "epoch": 0.8679797165233794, "grad_norm": 1.6017999649047852, "learning_rate": 9.57276854609267e-07, "loss": 1.175, "step": 15876 }, { "epoch": 0.8680343889671008, "grad_norm": 1.3669463396072388, "learning_rate": 9.564968041010435e-07, "loss": 1.3794, "step": 15877 }, { "epoch": 0.8680890614108224, "grad_norm": 1.3595460653305054, "learning_rate": 9.55717055576616e-07, "loss": 1.5802, "step": 15878 }, { "epoch": 0.8681437338545439, "grad_norm": 1.9086017608642578, "learning_rate": 9.549376090620188e-07, "loss": 1.344, "step": 15879 }, { "epoch": 0.8681984062982655, "grad_norm": 1.5946885347366333, "learning_rate": 9.541584645832768e-07, "loss": 1.4817, "step": 15880 }, { "epoch": 0.8682530787419871, "grad_norm": 1.7263123989105225, "learning_rate": 9.533796221664137e-07, "loss": 1.4205, "step": 15881 }, { "epoch": 0.8683077511857086, "grad_norm": 1.7403614521026611, "learning_rate": 9.52601081837431e-07, "loss": 1.3901, "step": 15882 }, { "epoch": 0.8683624236294302, "grad_norm": 1.335142731666565, "learning_rate": 9.518228436223298e-07, "loss": 1.5338, "step": 15883 }, { "epoch": 0.8684170960731518, "grad_norm": 1.3654563426971436, "learning_rate": 9.510449075470973e-07, "loss": 1.4179, "step": 15884 }, { "epoch": 0.8684717685168732, "grad_norm": 1.5006963014602661, "learning_rate": 9.502672736377061e-07, "loss": 1.486, "step": 15885 }, { "epoch": 0.8685264409605948, "grad_norm": 2.02874493598938, "learning_rate": 9.494899419201298e-07, "loss": 1.0881, "step": 15886 }, { "epoch": 0.8685811134043164, "grad_norm": 1.8334788084030151, "learning_rate": 9.487129124203209e-07, "loss": 1.4123, "step": 15887 }, { "epoch": 0.8686357858480379, "grad_norm": 1.8086862564086914, "learning_rate": 9.479361851642277e-07, "loss": 1.3744, "step": 15888 }, { "epoch": 0.8686904582917595, "grad_norm": 1.140268087387085, "learning_rate": 9.471597601777871e-07, "loss": 1.4502, "step": 15889 }, { "epoch": 0.8687451307354811, "grad_norm": 1.609101414680481, "learning_rate": 9.46383637486925e-07, "loss": 1.3352, "step": 15890 }, { "epoch": 0.8687998031792026, "grad_norm": 1.5202100276947021, "learning_rate": 9.456078171175564e-07, "loss": 1.1485, "step": 15891 }, { "epoch": 0.8688544756229242, "grad_norm": 1.416116714477539, "learning_rate": 9.448322990955916e-07, "loss": 1.3869, "step": 15892 }, { "epoch": 0.8689091480666457, "grad_norm": 1.896180272102356, "learning_rate": 9.440570834469243e-07, "loss": 1.3566, "step": 15893 }, { "epoch": 0.8689638205103672, "grad_norm": 2.1133627891540527, "learning_rate": 9.432821701974393e-07, "loss": 1.598, "step": 15894 }, { "epoch": 0.8690184929540888, "grad_norm": 1.5553367137908936, "learning_rate": 9.425075593730181e-07, "loss": 1.4902, "step": 15895 }, { "epoch": 0.8690731653978103, "grad_norm": 1.6758003234863281, "learning_rate": 9.417332509995203e-07, "loss": 1.3925, "step": 15896 }, { "epoch": 0.8691278378415319, "grad_norm": 1.4459120035171509, "learning_rate": 9.409592451028082e-07, "loss": 1.4357, "step": 15897 }, { "epoch": 0.8691825102852535, "grad_norm": 1.8633474111557007, "learning_rate": 9.401855417087236e-07, "loss": 1.516, "step": 15898 }, { "epoch": 0.869237182728975, "grad_norm": 2.4069712162017822, "learning_rate": 9.394121408431022e-07, "loss": 1.3919, "step": 15899 }, { "epoch": 0.8692918551726966, "grad_norm": 1.722833275794983, "learning_rate": 9.386390425317726e-07, "loss": 1.2081, "step": 15900 }, { "epoch": 0.8693465276164182, "grad_norm": 1.8496885299682617, "learning_rate": 9.378662468005484e-07, "loss": 1.5892, "step": 15901 }, { "epoch": 0.8694012000601397, "grad_norm": 1.715798258781433, "learning_rate": 9.370937536752344e-07, "loss": 1.4823, "step": 15902 }, { "epoch": 0.8694558725038612, "grad_norm": 1.746500849723816, "learning_rate": 9.36321563181628e-07, "loss": 1.374, "step": 15903 }, { "epoch": 0.8695105449475828, "grad_norm": 2.036984920501709, "learning_rate": 9.355496753455118e-07, "loss": 1.2431, "step": 15904 }, { "epoch": 0.8695652173913043, "grad_norm": 2.582167625427246, "learning_rate": 9.347780901926617e-07, "loss": 1.4267, "step": 15905 }, { "epoch": 0.8696198898350259, "grad_norm": 1.5678106546401978, "learning_rate": 9.340068077488451e-07, "loss": 1.5265, "step": 15906 }, { "epoch": 0.8696745622787475, "grad_norm": 1.7732319831848145, "learning_rate": 9.332358280398146e-07, "loss": 1.2206, "step": 15907 }, { "epoch": 0.869729234722469, "grad_norm": 1.4277673959732056, "learning_rate": 9.32465151091314e-07, "loss": 1.5067, "step": 15908 }, { "epoch": 0.8697839071661906, "grad_norm": 1.6197069883346558, "learning_rate": 9.316947769290819e-07, "loss": 1.3459, "step": 15909 }, { "epoch": 0.8698385796099121, "grad_norm": 1.1924493312835693, "learning_rate": 9.309247055788384e-07, "loss": 1.4463, "step": 15910 }, { "epoch": 0.8698932520536337, "grad_norm": 1.676851749420166, "learning_rate": 9.301549370663022e-07, "loss": 1.5256, "step": 15911 }, { "epoch": 0.8699479244973553, "grad_norm": 1.4750797748565674, "learning_rate": 9.293854714171758e-07, "loss": 1.3844, "step": 15912 }, { "epoch": 0.8700025969410767, "grad_norm": 1.387049674987793, "learning_rate": 9.286163086571531e-07, "loss": 1.402, "step": 15913 }, { "epoch": 0.8700572693847983, "grad_norm": 1.4249281883239746, "learning_rate": 9.278474488119182e-07, "loss": 1.7936, "step": 15914 }, { "epoch": 0.8701119418285199, "grad_norm": 1.5849816799163818, "learning_rate": 9.270788919071461e-07, "loss": 1.3864, "step": 15915 }, { "epoch": 0.8701666142722414, "grad_norm": 1.4045138359069824, "learning_rate": 9.263106379684972e-07, "loss": 1.3874, "step": 15916 }, { "epoch": 0.870221286715963, "grad_norm": 1.3742246627807617, "learning_rate": 9.255426870216311e-07, "loss": 1.653, "step": 15917 }, { "epoch": 0.8702759591596846, "grad_norm": 1.4196832180023193, "learning_rate": 9.247750390921883e-07, "loss": 1.4522, "step": 15918 }, { "epoch": 0.8703306316034061, "grad_norm": 1.4119001626968384, "learning_rate": 9.240076942058008e-07, "loss": 1.4537, "step": 15919 }, { "epoch": 0.8703853040471277, "grad_norm": 1.391873836517334, "learning_rate": 9.232406523880954e-07, "loss": 1.4828, "step": 15920 }, { "epoch": 0.8704399764908493, "grad_norm": 1.5555177927017212, "learning_rate": 9.224739136646843e-07, "loss": 1.3761, "step": 15921 }, { "epoch": 0.8704946489345707, "grad_norm": 1.5638103485107422, "learning_rate": 9.21707478061169e-07, "loss": 1.5193, "step": 15922 }, { "epoch": 0.8705493213782923, "grad_norm": 1.4227157831192017, "learning_rate": 9.209413456031446e-07, "loss": 1.6479, "step": 15923 }, { "epoch": 0.8706039938220138, "grad_norm": 1.8598774671554565, "learning_rate": 9.201755163161918e-07, "loss": 1.5073, "step": 15924 }, { "epoch": 0.8706586662657354, "grad_norm": 1.3464380502700806, "learning_rate": 9.19409990225888e-07, "loss": 1.5638, "step": 15925 }, { "epoch": 0.870713338709457, "grad_norm": 2.066070079803467, "learning_rate": 9.186447673577914e-07, "loss": 1.5122, "step": 15926 }, { "epoch": 0.8707680111531785, "grad_norm": 1.8078628778457642, "learning_rate": 9.178798477374562e-07, "loss": 1.3182, "step": 15927 }, { "epoch": 0.8708226835969001, "grad_norm": 1.4127665758132935, "learning_rate": 9.171152313904253e-07, "loss": 1.4694, "step": 15928 }, { "epoch": 0.8708773560406217, "grad_norm": 1.947813868522644, "learning_rate": 9.163509183422303e-07, "loss": 1.1248, "step": 15929 }, { "epoch": 0.8709320284843431, "grad_norm": 1.7457187175750732, "learning_rate": 9.155869086183922e-07, "loss": 1.0909, "step": 15930 }, { "epoch": 0.8709867009280647, "grad_norm": 1.7488617897033691, "learning_rate": 9.148232022444259e-07, "loss": 1.356, "step": 15931 }, { "epoch": 0.8710413733717863, "grad_norm": 1.606048583984375, "learning_rate": 9.140597992458322e-07, "loss": 1.3117, "step": 15932 }, { "epoch": 0.8710960458155078, "grad_norm": 1.2240632772445679, "learning_rate": 9.132966996480996e-07, "loss": 1.491, "step": 15933 }, { "epoch": 0.8711507182592294, "grad_norm": 1.3550699949264526, "learning_rate": 9.125339034767155e-07, "loss": 1.3628, "step": 15934 }, { "epoch": 0.871205390702951, "grad_norm": 1.6991865634918213, "learning_rate": 9.117714107571496e-07, "loss": 1.2714, "step": 15935 }, { "epoch": 0.8712600631466725, "grad_norm": 1.4144551753997803, "learning_rate": 9.110092215148592e-07, "loss": 1.2423, "step": 15936 }, { "epoch": 0.8713147355903941, "grad_norm": 1.6938176155090332, "learning_rate": 9.102473357753017e-07, "loss": 1.3008, "step": 15937 }, { "epoch": 0.8713694080341156, "grad_norm": 1.6482033729553223, "learning_rate": 9.094857535639157e-07, "loss": 1.3768, "step": 15938 }, { "epoch": 0.8714240804778371, "grad_norm": 1.9169001579284668, "learning_rate": 9.087244749061308e-07, "loss": 1.3501, "step": 15939 }, { "epoch": 0.8714787529215587, "grad_norm": 1.7311415672302246, "learning_rate": 9.079634998273701e-07, "loss": 1.1724, "step": 15940 }, { "epoch": 0.8715334253652802, "grad_norm": 1.3288551568984985, "learning_rate": 9.072028283530399e-07, "loss": 1.5452, "step": 15941 }, { "epoch": 0.8715880978090018, "grad_norm": 1.5104682445526123, "learning_rate": 9.064424605085476e-07, "loss": 1.3613, "step": 15942 }, { "epoch": 0.8716427702527234, "grad_norm": 1.3767752647399902, "learning_rate": 9.056823963192796e-07, "loss": 1.5696, "step": 15943 }, { "epoch": 0.8716974426964449, "grad_norm": 1.6300262212753296, "learning_rate": 9.049226358106156e-07, "loss": 1.4416, "step": 15944 }, { "epoch": 0.8717521151401665, "grad_norm": 1.7044987678527832, "learning_rate": 9.041631790079275e-07, "loss": 1.2924, "step": 15945 }, { "epoch": 0.8718067875838881, "grad_norm": 1.4624217748641968, "learning_rate": 9.034040259365762e-07, "loss": 1.4173, "step": 15946 }, { "epoch": 0.8718614600276096, "grad_norm": 1.261171817779541, "learning_rate": 9.02645176621908e-07, "loss": 1.4368, "step": 15947 }, { "epoch": 0.8719161324713312, "grad_norm": 1.5312111377716064, "learning_rate": 9.018866310892671e-07, "loss": 1.4393, "step": 15948 }, { "epoch": 0.8719708049150527, "grad_norm": 1.8751081228256226, "learning_rate": 9.011283893639811e-07, "loss": 1.4175, "step": 15949 }, { "epoch": 0.8720254773587742, "grad_norm": 1.627576470375061, "learning_rate": 9.003704514713663e-07, "loss": 1.2272, "step": 15950 }, { "epoch": 0.8720801498024958, "grad_norm": 1.471059799194336, "learning_rate": 8.99612817436738e-07, "loss": 1.4499, "step": 15951 }, { "epoch": 0.8721348222462173, "grad_norm": 1.923527479171753, "learning_rate": 8.988554872853927e-07, "loss": 1.4585, "step": 15952 }, { "epoch": 0.8721894946899389, "grad_norm": 1.543432354927063, "learning_rate": 8.98098461042618e-07, "loss": 1.3926, "step": 15953 }, { "epoch": 0.8722441671336605, "grad_norm": 1.333208680152893, "learning_rate": 8.973417387336947e-07, "loss": 1.545, "step": 15954 }, { "epoch": 0.872298839577382, "grad_norm": 1.4511717557907104, "learning_rate": 8.965853203838892e-07, "loss": 1.1816, "step": 15955 }, { "epoch": 0.8723535120211036, "grad_norm": 1.3216910362243652, "learning_rate": 8.958292060184637e-07, "loss": 1.5491, "step": 15956 }, { "epoch": 0.8724081844648252, "grad_norm": 1.3854362964630127, "learning_rate": 8.950733956626634e-07, "loss": 1.4531, "step": 15957 }, { "epoch": 0.8724628569085466, "grad_norm": 1.7880537509918213, "learning_rate": 8.94317889341727e-07, "loss": 1.421, "step": 15958 }, { "epoch": 0.8725175293522682, "grad_norm": 1.494240164756775, "learning_rate": 8.935626870808856e-07, "loss": 1.6032, "step": 15959 }, { "epoch": 0.8725722017959898, "grad_norm": 1.5616278648376465, "learning_rate": 8.928077889053544e-07, "loss": 1.5577, "step": 15960 }, { "epoch": 0.8726268742397113, "grad_norm": 1.8441916704177856, "learning_rate": 8.9205319484034e-07, "loss": 1.4298, "step": 15961 }, { "epoch": 0.8726815466834329, "grad_norm": 1.8475663661956787, "learning_rate": 8.912989049110432e-07, "loss": 1.4024, "step": 15962 }, { "epoch": 0.8727362191271545, "grad_norm": 1.6123744249343872, "learning_rate": 8.905449191426507e-07, "loss": 1.5267, "step": 15963 }, { "epoch": 0.872790891570876, "grad_norm": 1.5439919233322144, "learning_rate": 8.897912375603379e-07, "loss": 1.3284, "step": 15964 }, { "epoch": 0.8728455640145976, "grad_norm": 1.6701059341430664, "learning_rate": 8.890378601892746e-07, "loss": 1.3715, "step": 15965 }, { "epoch": 0.872900236458319, "grad_norm": 1.625794529914856, "learning_rate": 8.882847870546174e-07, "loss": 1.4409, "step": 15966 }, { "epoch": 0.8729549089020406, "grad_norm": 1.578655481338501, "learning_rate": 8.875320181815117e-07, "loss": 1.2863, "step": 15967 }, { "epoch": 0.8730095813457622, "grad_norm": 1.3277939558029175, "learning_rate": 8.867795535950951e-07, "loss": 1.6626, "step": 15968 }, { "epoch": 0.8730642537894837, "grad_norm": 1.383943796157837, "learning_rate": 8.860273933204933e-07, "loss": 1.3908, "step": 15969 }, { "epoch": 0.8731189262332053, "grad_norm": 1.337479829788208, "learning_rate": 8.852755373828236e-07, "loss": 1.4704, "step": 15970 }, { "epoch": 0.8731735986769269, "grad_norm": 1.5367658138275146, "learning_rate": 8.84523985807193e-07, "loss": 1.422, "step": 15971 }, { "epoch": 0.8732282711206484, "grad_norm": 1.5401954650878906, "learning_rate": 8.837727386186956e-07, "loss": 1.433, "step": 15972 }, { "epoch": 0.87328294356437, "grad_norm": 1.3798434734344482, "learning_rate": 8.830217958424192e-07, "loss": 1.5804, "step": 15973 }, { "epoch": 0.8733376160080916, "grad_norm": 1.3601988554000854, "learning_rate": 8.822711575034381e-07, "loss": 1.6704, "step": 15974 }, { "epoch": 0.873392288451813, "grad_norm": 1.463279128074646, "learning_rate": 8.81520823626818e-07, "loss": 1.4849, "step": 15975 }, { "epoch": 0.8734469608955346, "grad_norm": 1.832444190979004, "learning_rate": 8.807707942376165e-07, "loss": 1.4727, "step": 15976 }, { "epoch": 0.8735016333392562, "grad_norm": 1.2362284660339355, "learning_rate": 8.800210693608758e-07, "loss": 1.5077, "step": 15977 }, { "epoch": 0.8735563057829777, "grad_norm": 1.3462200164794922, "learning_rate": 8.792716490216335e-07, "loss": 1.2267, "step": 15978 }, { "epoch": 0.8736109782266993, "grad_norm": 1.5821532011032104, "learning_rate": 8.785225332449133e-07, "loss": 1.3776, "step": 15979 }, { "epoch": 0.8736656506704208, "grad_norm": 1.303293228149414, "learning_rate": 8.77773722055727e-07, "loss": 1.3881, "step": 15980 }, { "epoch": 0.8737203231141424, "grad_norm": 1.5498024225234985, "learning_rate": 8.770252154790848e-07, "loss": 1.3733, "step": 15981 }, { "epoch": 0.873774995557864, "grad_norm": 1.2662700414657593, "learning_rate": 8.762770135399778e-07, "loss": 1.2982, "step": 15982 }, { "epoch": 0.8738296680015855, "grad_norm": 1.3701785802841187, "learning_rate": 8.755291162633894e-07, "loss": 1.4313, "step": 15983 }, { "epoch": 0.873884340445307, "grad_norm": 1.3896946907043457, "learning_rate": 8.747815236742974e-07, "loss": 1.2628, "step": 15984 }, { "epoch": 0.8739390128890286, "grad_norm": 1.6785565614700317, "learning_rate": 8.740342357976628e-07, "loss": 1.3566, "step": 15985 }, { "epoch": 0.8739936853327501, "grad_norm": 1.4764893054962158, "learning_rate": 8.73287252658438e-07, "loss": 1.4353, "step": 15986 }, { "epoch": 0.8740483577764717, "grad_norm": 1.7346335649490356, "learning_rate": 8.725405742815695e-07, "loss": 1.2362, "step": 15987 }, { "epoch": 0.8741030302201933, "grad_norm": 1.2577561140060425, "learning_rate": 8.717942006919911e-07, "loss": 1.4808, "step": 15988 }, { "epoch": 0.8741577026639148, "grad_norm": 2.7583673000335693, "learning_rate": 8.710481319146213e-07, "loss": 1.6008, "step": 15989 }, { "epoch": 0.8742123751076364, "grad_norm": 1.4303404092788696, "learning_rate": 8.703023679743783e-07, "loss": 1.2893, "step": 15990 }, { "epoch": 0.874267047551358, "grad_norm": 1.5505205392837524, "learning_rate": 8.69556908896162e-07, "loss": 1.3695, "step": 15991 }, { "epoch": 0.8743217199950795, "grad_norm": 1.6236917972564697, "learning_rate": 8.688117547048669e-07, "loss": 1.4075, "step": 15992 }, { "epoch": 0.874376392438801, "grad_norm": 1.6606340408325195, "learning_rate": 8.680669054253732e-07, "loss": 1.5375, "step": 15993 }, { "epoch": 0.8744310648825225, "grad_norm": 1.7677973508834839, "learning_rate": 8.673223610825532e-07, "loss": 1.5993, "step": 15994 }, { "epoch": 0.8744857373262441, "grad_norm": 1.0776591300964355, "learning_rate": 8.665781217012725e-07, "loss": 1.6343, "step": 15995 }, { "epoch": 0.8745404097699657, "grad_norm": 1.7048648595809937, "learning_rate": 8.658341873063792e-07, "loss": 1.3144, "step": 15996 }, { "epoch": 0.8745950822136872, "grad_norm": 1.46261727809906, "learning_rate": 8.650905579227154e-07, "loss": 1.5187, "step": 15997 }, { "epoch": 0.8746497546574088, "grad_norm": 1.836346983909607, "learning_rate": 8.643472335751157e-07, "loss": 1.384, "step": 15998 }, { "epoch": 0.8747044271011304, "grad_norm": 1.2944309711456299, "learning_rate": 8.636042142883982e-07, "loss": 1.481, "step": 15999 }, { "epoch": 0.8747590995448519, "grad_norm": 1.5460268259048462, "learning_rate": 8.628615000873741e-07, "loss": 1.5202, "step": 16000 }, { "epoch": 0.8748137719885735, "grad_norm": 1.5390995740890503, "learning_rate": 8.62119090996848e-07, "loss": 1.4884, "step": 16001 }, { "epoch": 0.8748684444322951, "grad_norm": 1.2699832916259766, "learning_rate": 8.613769870416067e-07, "loss": 1.4569, "step": 16002 }, { "epoch": 0.8749231168760165, "grad_norm": 1.542840600013733, "learning_rate": 8.606351882464314e-07, "loss": 1.5976, "step": 16003 }, { "epoch": 0.8749777893197381, "grad_norm": 1.7956700325012207, "learning_rate": 8.598936946360948e-07, "loss": 1.453, "step": 16004 }, { "epoch": 0.8750324617634597, "grad_norm": 1.3614709377288818, "learning_rate": 8.591525062353557e-07, "loss": 1.4299, "step": 16005 }, { "epoch": 0.8750871342071812, "grad_norm": 1.946043848991394, "learning_rate": 8.584116230689643e-07, "loss": 1.3829, "step": 16006 }, { "epoch": 0.8751418066509028, "grad_norm": 2.123373508453369, "learning_rate": 8.576710451616599e-07, "loss": 1.3376, "step": 16007 }, { "epoch": 0.8751964790946243, "grad_norm": 1.6365623474121094, "learning_rate": 8.569307725381715e-07, "loss": 1.3125, "step": 16008 }, { "epoch": 0.8752511515383459, "grad_norm": 1.6047027111053467, "learning_rate": 8.561908052232204e-07, "loss": 1.4414, "step": 16009 }, { "epoch": 0.8753058239820675, "grad_norm": 1.3083487749099731, "learning_rate": 8.554511432415147e-07, "loss": 1.4879, "step": 16010 }, { "epoch": 0.875360496425789, "grad_norm": 2.0666630268096924, "learning_rate": 8.547117866177524e-07, "loss": 1.3323, "step": 16011 }, { "epoch": 0.8754151688695105, "grad_norm": 1.4620565176010132, "learning_rate": 8.539727353766259e-07, "loss": 1.246, "step": 16012 }, { "epoch": 0.8754698413132321, "grad_norm": 1.9069370031356812, "learning_rate": 8.53233989542811e-07, "loss": 1.3903, "step": 16013 }, { "epoch": 0.8755245137569536, "grad_norm": 1.3902755975723267, "learning_rate": 8.524955491409748e-07, "loss": 1.3265, "step": 16014 }, { "epoch": 0.8755791862006752, "grad_norm": 1.9650760889053345, "learning_rate": 8.517574141957796e-07, "loss": 1.1324, "step": 16015 }, { "epoch": 0.8756338586443968, "grad_norm": 1.2944903373718262, "learning_rate": 8.510195847318714e-07, "loss": 1.5285, "step": 16016 }, { "epoch": 0.8756885310881183, "grad_norm": 1.3904684782028198, "learning_rate": 8.502820607738871e-07, "loss": 1.4682, "step": 16017 }, { "epoch": 0.8757432035318399, "grad_norm": 1.4137674570083618, "learning_rate": 8.495448423464569e-07, "loss": 1.4424, "step": 16018 }, { "epoch": 0.8757978759755615, "grad_norm": 1.4804260730743408, "learning_rate": 8.488079294741957e-07, "loss": 1.4193, "step": 16019 }, { "epoch": 0.875852548419283, "grad_norm": 1.495768427848816, "learning_rate": 8.480713221817095e-07, "loss": 1.4418, "step": 16020 }, { "epoch": 0.8759072208630045, "grad_norm": 1.8893909454345703, "learning_rate": 8.473350204936004e-07, "loss": 1.415, "step": 16021 }, { "epoch": 0.875961893306726, "grad_norm": 1.327212929725647, "learning_rate": 8.46599024434449e-07, "loss": 1.5145, "step": 16022 }, { "epoch": 0.8760165657504476, "grad_norm": 1.6796905994415283, "learning_rate": 8.458633340288391e-07, "loss": 1.4079, "step": 16023 }, { "epoch": 0.8760712381941692, "grad_norm": 1.4202960729599, "learning_rate": 8.45127949301332e-07, "loss": 1.2702, "step": 16024 }, { "epoch": 0.8761259106378907, "grad_norm": 2.953913688659668, "learning_rate": 8.443928702764836e-07, "loss": 1.2183, "step": 16025 }, { "epoch": 0.8761805830816123, "grad_norm": 2.097775459289551, "learning_rate": 8.436580969788432e-07, "loss": 1.4539, "step": 16026 }, { "epoch": 0.8762352555253339, "grad_norm": 2.1325719356536865, "learning_rate": 8.429236294329457e-07, "loss": 1.5702, "step": 16027 }, { "epoch": 0.8762899279690554, "grad_norm": 1.9645382165908813, "learning_rate": 8.421894676633136e-07, "loss": 1.5861, "step": 16028 }, { "epoch": 0.876344600412777, "grad_norm": 1.5836485624313354, "learning_rate": 8.414556116944672e-07, "loss": 1.1944, "step": 16029 }, { "epoch": 0.8763992728564985, "grad_norm": 1.5224781036376953, "learning_rate": 8.407220615509081e-07, "loss": 1.2055, "step": 16030 }, { "epoch": 0.87645394530022, "grad_norm": 1.6033594608306885, "learning_rate": 8.399888172571324e-07, "loss": 1.5714, "step": 16031 }, { "epoch": 0.8765086177439416, "grad_norm": 1.7519668340682983, "learning_rate": 8.392558788376248e-07, "loss": 1.4399, "step": 16032 }, { "epoch": 0.8765632901876632, "grad_norm": 1.7014634609222412, "learning_rate": 8.385232463168602e-07, "loss": 1.4089, "step": 16033 }, { "epoch": 0.8766179626313847, "grad_norm": 1.4207683801651, "learning_rate": 8.377909197193013e-07, "loss": 1.5723, "step": 16034 }, { "epoch": 0.8766726350751063, "grad_norm": 1.36712646484375, "learning_rate": 8.37058899069404e-07, "loss": 1.4127, "step": 16035 }, { "epoch": 0.8767273075188278, "grad_norm": 1.5830094814300537, "learning_rate": 8.363271843916099e-07, "loss": 1.4759, "step": 16036 }, { "epoch": 0.8767819799625494, "grad_norm": 1.5967196226119995, "learning_rate": 8.355957757103562e-07, "loss": 1.4534, "step": 16037 }, { "epoch": 0.876836652406271, "grad_norm": 1.6759175062179565, "learning_rate": 8.348646730500654e-07, "loss": 1.373, "step": 16038 }, { "epoch": 0.8768913248499924, "grad_norm": 2.146050453186035, "learning_rate": 8.34133876435147e-07, "loss": 1.3469, "step": 16039 }, { "epoch": 0.876945997293714, "grad_norm": 1.5254672765731812, "learning_rate": 8.334033858900092e-07, "loss": 1.1465, "step": 16040 }, { "epoch": 0.8770006697374356, "grad_norm": 1.5075881481170654, "learning_rate": 8.326732014390415e-07, "loss": 1.4566, "step": 16041 }, { "epoch": 0.8770553421811571, "grad_norm": 1.4803144931793213, "learning_rate": 8.319433231066265e-07, "loss": 1.4488, "step": 16042 }, { "epoch": 0.8771100146248787, "grad_norm": 1.3596714735031128, "learning_rate": 8.312137509171392e-07, "loss": 1.4928, "step": 16043 }, { "epoch": 0.8771646870686003, "grad_norm": 1.5136661529541016, "learning_rate": 8.3048448489494e-07, "loss": 1.3725, "step": 16044 }, { "epoch": 0.8772193595123218, "grad_norm": 1.9489134550094604, "learning_rate": 8.297555250643808e-07, "loss": 1.0722, "step": 16045 }, { "epoch": 0.8772740319560434, "grad_norm": 1.6837894916534424, "learning_rate": 8.290268714498029e-07, "loss": 1.0487, "step": 16046 }, { "epoch": 0.877328704399765, "grad_norm": 1.4810280799865723, "learning_rate": 8.282985240755381e-07, "loss": 1.5557, "step": 16047 }, { "epoch": 0.8773833768434864, "grad_norm": 1.5242552757263184, "learning_rate": 8.27570482965906e-07, "loss": 1.5112, "step": 16048 }, { "epoch": 0.877438049287208, "grad_norm": 1.341620922088623, "learning_rate": 8.268427481452213e-07, "loss": 1.3674, "step": 16049 }, { "epoch": 0.8774927217309295, "grad_norm": 1.6180315017700195, "learning_rate": 8.261153196377814e-07, "loss": 1.3349, "step": 16050 }, { "epoch": 0.8775473941746511, "grad_norm": 1.3610759973526, "learning_rate": 8.25388197467879e-07, "loss": 1.6667, "step": 16051 }, { "epoch": 0.8776020666183727, "grad_norm": 1.6153305768966675, "learning_rate": 8.246613816597937e-07, "loss": 1.4971, "step": 16052 }, { "epoch": 0.8776567390620942, "grad_norm": 1.745551586151123, "learning_rate": 8.239348722377937e-07, "loss": 1.3478, "step": 16053 }, { "epoch": 0.8777114115058158, "grad_norm": 1.5013301372528076, "learning_rate": 8.232086692261432e-07, "loss": 1.5437, "step": 16054 }, { "epoch": 0.8777660839495374, "grad_norm": 1.5842006206512451, "learning_rate": 8.224827726490891e-07, "loss": 1.5361, "step": 16055 }, { "epoch": 0.8778207563932588, "grad_norm": 1.5760222673416138, "learning_rate": 8.217571825308701e-07, "loss": 1.444, "step": 16056 }, { "epoch": 0.8778754288369804, "grad_norm": 1.3808649778366089, "learning_rate": 8.210318988957166e-07, "loss": 1.3774, "step": 16057 }, { "epoch": 0.877930101280702, "grad_norm": 1.337301254272461, "learning_rate": 8.203069217678472e-07, "loss": 1.4437, "step": 16058 }, { "epoch": 0.8779847737244235, "grad_norm": 1.557403564453125, "learning_rate": 8.195822511714691e-07, "loss": 1.5555, "step": 16059 }, { "epoch": 0.8780394461681451, "grad_norm": 1.4057118892669678, "learning_rate": 8.188578871307829e-07, "loss": 1.1596, "step": 16060 }, { "epoch": 0.8780941186118667, "grad_norm": 1.3352550268173218, "learning_rate": 8.18133829669977e-07, "loss": 1.4238, "step": 16061 }, { "epoch": 0.8781487910555882, "grad_norm": 1.8574851751327515, "learning_rate": 8.174100788132266e-07, "loss": 1.6079, "step": 16062 }, { "epoch": 0.8782034634993098, "grad_norm": 1.609622836112976, "learning_rate": 8.166866345847025e-07, "loss": 1.3359, "step": 16063 }, { "epoch": 0.8782581359430313, "grad_norm": 1.5202362537384033, "learning_rate": 8.159634970085595e-07, "loss": 1.5376, "step": 16064 }, { "epoch": 0.8783128083867529, "grad_norm": 1.6096042394638062, "learning_rate": 8.152406661089485e-07, "loss": 1.4328, "step": 16065 }, { "epoch": 0.8783674808304744, "grad_norm": 1.50528085231781, "learning_rate": 8.145181419100034e-07, "loss": 1.1933, "step": 16066 }, { "epoch": 0.8784221532741959, "grad_norm": 1.6426520347595215, "learning_rate": 8.137959244358506e-07, "loss": 1.3725, "step": 16067 }, { "epoch": 0.8784768257179175, "grad_norm": 1.457970142364502, "learning_rate": 8.130740137106108e-07, "loss": 1.3527, "step": 16068 }, { "epoch": 0.8785314981616391, "grad_norm": 1.675803780555725, "learning_rate": 8.123524097583857e-07, "loss": 1.512, "step": 16069 }, { "epoch": 0.8785861706053606, "grad_norm": 1.344671368598938, "learning_rate": 8.11631112603275e-07, "loss": 1.4238, "step": 16070 }, { "epoch": 0.8786408430490822, "grad_norm": 1.5314067602157593, "learning_rate": 8.109101222693616e-07, "loss": 1.3805, "step": 16071 }, { "epoch": 0.8786955154928038, "grad_norm": 1.8439807891845703, "learning_rate": 8.101894387807219e-07, "loss": 1.3188, "step": 16072 }, { "epoch": 0.8787501879365253, "grad_norm": 1.4069162607192993, "learning_rate": 8.094690621614199e-07, "loss": 1.6059, "step": 16073 }, { "epoch": 0.8788048603802469, "grad_norm": 1.7046037912368774, "learning_rate": 8.087489924355141e-07, "loss": 1.2924, "step": 16074 }, { "epoch": 0.8788595328239684, "grad_norm": 1.5393441915512085, "learning_rate": 8.080292296270476e-07, "loss": 1.4571, "step": 16075 }, { "epoch": 0.8789142052676899, "grad_norm": 1.4235206842422485, "learning_rate": 8.073097737600522e-07, "loss": 1.3349, "step": 16076 }, { "epoch": 0.8789688777114115, "grad_norm": 1.8941185474395752, "learning_rate": 8.065906248585564e-07, "loss": 1.295, "step": 16077 }, { "epoch": 0.879023550155133, "grad_norm": 1.3858675956726074, "learning_rate": 8.058717829465723e-07, "loss": 1.4396, "step": 16078 }, { "epoch": 0.8790782225988546, "grad_norm": 1.7443833351135254, "learning_rate": 8.05153248048105e-07, "loss": 1.3959, "step": 16079 }, { "epoch": 0.8791328950425762, "grad_norm": 1.5564854145050049, "learning_rate": 8.044350201871465e-07, "loss": 1.5627, "step": 16080 }, { "epoch": 0.8791875674862977, "grad_norm": 1.4882594347000122, "learning_rate": 8.037170993876797e-07, "loss": 1.1251, "step": 16081 }, { "epoch": 0.8792422399300193, "grad_norm": 1.7977774143218994, "learning_rate": 8.029994856736811e-07, "loss": 1.4518, "step": 16082 }, { "epoch": 0.8792969123737409, "grad_norm": 1.9310569763183594, "learning_rate": 8.022821790691104e-07, "loss": 1.3934, "step": 16083 }, { "epoch": 0.8793515848174623, "grad_norm": 1.4234081506729126, "learning_rate": 8.015651795979206e-07, "loss": 1.3175, "step": 16084 }, { "epoch": 0.8794062572611839, "grad_norm": 1.6007812023162842, "learning_rate": 8.008484872840538e-07, "loss": 1.5393, "step": 16085 }, { "epoch": 0.8794609297049055, "grad_norm": 1.7645204067230225, "learning_rate": 8.001321021514442e-07, "loss": 1.3925, "step": 16086 }, { "epoch": 0.879515602148627, "grad_norm": 1.9860844612121582, "learning_rate": 7.99416024224009e-07, "loss": 1.3157, "step": 16087 }, { "epoch": 0.8795702745923486, "grad_norm": 1.6134170293807983, "learning_rate": 7.987002535256638e-07, "loss": 1.3494, "step": 16088 }, { "epoch": 0.8796249470360702, "grad_norm": 1.4434903860092163, "learning_rate": 7.979847900803095e-07, "loss": 1.297, "step": 16089 }, { "epoch": 0.8796796194797917, "grad_norm": 2.022446632385254, "learning_rate": 7.972696339118346e-07, "loss": 1.3568, "step": 16090 }, { "epoch": 0.8797342919235133, "grad_norm": 1.2982940673828125, "learning_rate": 7.965547850441224e-07, "loss": 1.5902, "step": 16091 }, { "epoch": 0.8797889643672347, "grad_norm": 1.6791396141052246, "learning_rate": 7.958402435010415e-07, "loss": 1.5665, "step": 16092 }, { "epoch": 0.8798436368109563, "grad_norm": 1.4758800268173218, "learning_rate": 7.95126009306455e-07, "loss": 1.6613, "step": 16093 }, { "epoch": 0.8798983092546779, "grad_norm": 1.7117832899093628, "learning_rate": 7.944120824842106e-07, "loss": 1.512, "step": 16094 }, { "epoch": 0.8799529816983994, "grad_norm": 1.725670337677002, "learning_rate": 7.93698463058149e-07, "loss": 1.0922, "step": 16095 }, { "epoch": 0.880007654142121, "grad_norm": 1.6191564798355103, "learning_rate": 7.92985151052098e-07, "loss": 1.3198, "step": 16096 }, { "epoch": 0.8800623265858426, "grad_norm": 1.2915763854980469, "learning_rate": 7.922721464898786e-07, "loss": 1.4596, "step": 16097 }, { "epoch": 0.8801169990295641, "grad_norm": 1.6122548580169678, "learning_rate": 7.91559449395296e-07, "loss": 1.3585, "step": 16098 }, { "epoch": 0.8801716714732857, "grad_norm": 1.436396598815918, "learning_rate": 7.908470597921547e-07, "loss": 1.4729, "step": 16099 }, { "epoch": 0.8802263439170073, "grad_norm": 1.4609637260437012, "learning_rate": 7.901349777042389e-07, "loss": 1.4, "step": 16100 }, { "epoch": 0.8802810163607288, "grad_norm": 1.3823570013046265, "learning_rate": 7.894232031553262e-07, "loss": 1.4674, "step": 16101 }, { "epoch": 0.8803356888044503, "grad_norm": 1.311440348625183, "learning_rate": 7.887117361691888e-07, "loss": 1.3983, "step": 16102 }, { "epoch": 0.8803903612481719, "grad_norm": 1.6938611268997192, "learning_rate": 7.880005767695809e-07, "loss": 1.3184, "step": 16103 }, { "epoch": 0.8804450336918934, "grad_norm": 1.2393243312835693, "learning_rate": 7.87289724980248e-07, "loss": 1.7677, "step": 16104 }, { "epoch": 0.880499706135615, "grad_norm": 1.525234580039978, "learning_rate": 7.865791808249324e-07, "loss": 1.5097, "step": 16105 }, { "epoch": 0.8805543785793366, "grad_norm": 1.6413702964782715, "learning_rate": 7.858689443273548e-07, "loss": 1.2608, "step": 16106 }, { "epoch": 0.8806090510230581, "grad_norm": 1.8404006958007812, "learning_rate": 7.851590155112376e-07, "loss": 1.1815, "step": 16107 }, { "epoch": 0.8806637234667797, "grad_norm": 1.2620656490325928, "learning_rate": 7.84449394400284e-07, "loss": 1.5361, "step": 16108 }, { "epoch": 0.8807183959105012, "grad_norm": 1.7930272817611694, "learning_rate": 7.837400810181894e-07, "loss": 1.1788, "step": 16109 }, { "epoch": 0.8807730683542228, "grad_norm": 1.4407320022583008, "learning_rate": 7.830310753886406e-07, "loss": 1.4718, "step": 16110 }, { "epoch": 0.8808277407979443, "grad_norm": 1.566057562828064, "learning_rate": 7.823223775353128e-07, "loss": 1.5057, "step": 16111 }, { "epoch": 0.8808824132416658, "grad_norm": 1.48175847530365, "learning_rate": 7.816139874818696e-07, "loss": 1.2228, "step": 16112 }, { "epoch": 0.8809370856853874, "grad_norm": 1.9009766578674316, "learning_rate": 7.809059052519674e-07, "loss": 1.3335, "step": 16113 }, { "epoch": 0.880991758129109, "grad_norm": 1.3990684747695923, "learning_rate": 7.801981308692508e-07, "loss": 1.6111, "step": 16114 }, { "epoch": 0.8810464305728305, "grad_norm": 1.2091511487960815, "learning_rate": 7.794906643573519e-07, "loss": 1.6305, "step": 16115 }, { "epoch": 0.8811011030165521, "grad_norm": 1.553544521331787, "learning_rate": 7.787835057398985e-07, "loss": 1.1597, "step": 16116 }, { "epoch": 0.8811557754602737, "grad_norm": 1.3782835006713867, "learning_rate": 7.780766550405006e-07, "loss": 1.4752, "step": 16117 }, { "epoch": 0.8812104479039952, "grad_norm": 1.506284236907959, "learning_rate": 7.773701122827626e-07, "loss": 1.2712, "step": 16118 }, { "epoch": 0.8812651203477168, "grad_norm": 1.4728096723556519, "learning_rate": 7.766638774902802e-07, "loss": 1.5464, "step": 16119 }, { "epoch": 0.8813197927914384, "grad_norm": 1.7084262371063232, "learning_rate": 7.759579506866311e-07, "loss": 1.4797, "step": 16120 }, { "epoch": 0.8813744652351598, "grad_norm": 1.178518295288086, "learning_rate": 7.752523318953942e-07, "loss": 1.5507, "step": 16121 }, { "epoch": 0.8814291376788814, "grad_norm": 1.516869068145752, "learning_rate": 7.745470211401274e-07, "loss": 1.3295, "step": 16122 }, { "epoch": 0.8814838101226029, "grad_norm": 1.7065727710723877, "learning_rate": 7.73842018444384e-07, "loss": 1.37, "step": 16123 }, { "epoch": 0.8815384825663245, "grad_norm": 1.9661942720413208, "learning_rate": 7.731373238317053e-07, "loss": 1.1634, "step": 16124 }, { "epoch": 0.8815931550100461, "grad_norm": 1.74003005027771, "learning_rate": 7.724329373256234e-07, "loss": 1.4884, "step": 16125 }, { "epoch": 0.8816478274537676, "grad_norm": 1.6813846826553345, "learning_rate": 7.717288589496563e-07, "loss": 1.4983, "step": 16126 }, { "epoch": 0.8817024998974892, "grad_norm": 1.6142544746398926, "learning_rate": 7.710250887273196e-07, "loss": 1.5149, "step": 16127 }, { "epoch": 0.8817571723412108, "grad_norm": 2.3071632385253906, "learning_rate": 7.703216266821123e-07, "loss": 1.3972, "step": 16128 }, { "epoch": 0.8818118447849322, "grad_norm": 1.30046546459198, "learning_rate": 7.696184728375222e-07, "loss": 1.5409, "step": 16129 }, { "epoch": 0.8818665172286538, "grad_norm": 1.4829002618789673, "learning_rate": 7.689156272170318e-07, "loss": 1.5019, "step": 16130 }, { "epoch": 0.8819211896723754, "grad_norm": 1.7012699842453003, "learning_rate": 7.68213089844111e-07, "loss": 1.5194, "step": 16131 }, { "epoch": 0.8819758621160969, "grad_norm": 1.9026724100112915, "learning_rate": 7.675108607422154e-07, "loss": 1.5325, "step": 16132 }, { "epoch": 0.8820305345598185, "grad_norm": 1.3580659627914429, "learning_rate": 7.668089399348e-07, "loss": 1.3165, "step": 16133 }, { "epoch": 0.8820852070035401, "grad_norm": 1.2854470014572144, "learning_rate": 7.66107327445299e-07, "loss": 1.3843, "step": 16134 }, { "epoch": 0.8821398794472616, "grad_norm": 1.4008708000183105, "learning_rate": 7.654060232971427e-07, "loss": 1.2878, "step": 16135 }, { "epoch": 0.8821945518909832, "grad_norm": 1.517735242843628, "learning_rate": 7.647050275137502e-07, "loss": 1.695, "step": 16136 }, { "epoch": 0.8822492243347047, "grad_norm": 1.3776884078979492, "learning_rate": 7.640043401185249e-07, "loss": 1.5303, "step": 16137 }, { "epoch": 0.8823038967784262, "grad_norm": 1.5792346000671387, "learning_rate": 7.633039611348702e-07, "loss": 1.382, "step": 16138 }, { "epoch": 0.8823585692221478, "grad_norm": 1.601231336593628, "learning_rate": 7.626038905861699e-07, "loss": 1.3365, "step": 16139 }, { "epoch": 0.8824132416658693, "grad_norm": 1.1920496225357056, "learning_rate": 7.619041284958017e-07, "loss": 1.4799, "step": 16140 }, { "epoch": 0.8824679141095909, "grad_norm": 1.2653759717941284, "learning_rate": 7.612046748871327e-07, "loss": 1.5664, "step": 16141 }, { "epoch": 0.8825225865533125, "grad_norm": 1.9692319631576538, "learning_rate": 7.605055297835196e-07, "loss": 1.2641, "step": 16142 }, { "epoch": 0.882577258997034, "grad_norm": 2.1086127758026123, "learning_rate": 7.59806693208307e-07, "loss": 1.5755, "step": 16143 }, { "epoch": 0.8826319314407556, "grad_norm": 1.390384316444397, "learning_rate": 7.591081651848331e-07, "loss": 1.4513, "step": 16144 }, { "epoch": 0.8826866038844772, "grad_norm": 1.315721869468689, "learning_rate": 7.584099457364213e-07, "loss": 1.466, "step": 16145 }, { "epoch": 0.8827412763281987, "grad_norm": 2.0338430404663086, "learning_rate": 7.577120348863864e-07, "loss": 1.2453, "step": 16146 }, { "epoch": 0.8827959487719202, "grad_norm": 1.654976725578308, "learning_rate": 7.570144326580365e-07, "loss": 1.4325, "step": 16147 }, { "epoch": 0.8828506212156418, "grad_norm": 1.5620847940444946, "learning_rate": 7.563171390746627e-07, "loss": 1.4059, "step": 16148 }, { "epoch": 0.8829052936593633, "grad_norm": 1.1981215476989746, "learning_rate": 7.556201541595521e-07, "loss": 1.6327, "step": 16149 }, { "epoch": 0.8829599661030849, "grad_norm": 1.6385241746902466, "learning_rate": 7.54923477935976e-07, "loss": 1.5964, "step": 16150 }, { "epoch": 0.8830146385468064, "grad_norm": 1.4145948886871338, "learning_rate": 7.542271104271981e-07, "loss": 1.3022, "step": 16151 }, { "epoch": 0.883069310990528, "grad_norm": 1.6293373107910156, "learning_rate": 7.535310516564742e-07, "loss": 1.3509, "step": 16152 }, { "epoch": 0.8831239834342496, "grad_norm": 1.8172976970672607, "learning_rate": 7.528353016470468e-07, "loss": 1.4924, "step": 16153 }, { "epoch": 0.8831786558779711, "grad_norm": 1.5083625316619873, "learning_rate": 7.521398604221453e-07, "loss": 1.5105, "step": 16154 }, { "epoch": 0.8832333283216927, "grad_norm": 1.7518891096115112, "learning_rate": 7.514447280049964e-07, "loss": 1.462, "step": 16155 }, { "epoch": 0.8832880007654142, "grad_norm": 1.656593680381775, "learning_rate": 7.507499044188105e-07, "loss": 1.4636, "step": 16156 }, { "epoch": 0.8833426732091357, "grad_norm": 1.8336448669433594, "learning_rate": 7.50055389686788e-07, "loss": 1.4325, "step": 16157 }, { "epoch": 0.8833973456528573, "grad_norm": 1.5417159795761108, "learning_rate": 7.493611838321236e-07, "loss": 1.3218, "step": 16158 }, { "epoch": 0.8834520180965789, "grad_norm": 1.41335129737854, "learning_rate": 7.486672868779954e-07, "loss": 1.335, "step": 16159 }, { "epoch": 0.8835066905403004, "grad_norm": 1.6858866214752197, "learning_rate": 7.479736988475772e-07, "loss": 1.2916, "step": 16160 }, { "epoch": 0.883561362984022, "grad_norm": 1.3194105625152588, "learning_rate": 7.47280419764026e-07, "loss": 1.49, "step": 16161 }, { "epoch": 0.8836160354277436, "grad_norm": 1.3875484466552734, "learning_rate": 7.465874496504944e-07, "loss": 1.4141, "step": 16162 }, { "epoch": 0.8836707078714651, "grad_norm": 1.6974067687988281, "learning_rate": 7.458947885301204e-07, "loss": 1.1956, "step": 16163 }, { "epoch": 0.8837253803151867, "grad_norm": 1.4533442258834839, "learning_rate": 7.452024364260368e-07, "loss": 1.3704, "step": 16164 }, { "epoch": 0.8837800527589081, "grad_norm": 1.2537803649902344, "learning_rate": 7.445103933613585e-07, "loss": 1.4797, "step": 16165 }, { "epoch": 0.8838347252026297, "grad_norm": 1.5248514413833618, "learning_rate": 7.438186593591989e-07, "loss": 1.4535, "step": 16166 }, { "epoch": 0.8838893976463513, "grad_norm": 1.7117741107940674, "learning_rate": 7.431272344426544e-07, "loss": 1.2253, "step": 16167 }, { "epoch": 0.8839440700900728, "grad_norm": 1.405895709991455, "learning_rate": 7.424361186348117e-07, "loss": 1.5581, "step": 16168 }, { "epoch": 0.8839987425337944, "grad_norm": 1.6048481464385986, "learning_rate": 7.417453119587525e-07, "loss": 1.5007, "step": 16169 }, { "epoch": 0.884053414977516, "grad_norm": 1.5666561126708984, "learning_rate": 7.410548144375418e-07, "loss": 1.26, "step": 16170 }, { "epoch": 0.8841080874212375, "grad_norm": 1.4783906936645508, "learning_rate": 7.403646260942366e-07, "loss": 1.7528, "step": 16171 }, { "epoch": 0.8841627598649591, "grad_norm": 1.754060983657837, "learning_rate": 7.396747469518862e-07, "loss": 1.4449, "step": 16172 }, { "epoch": 0.8842174323086807, "grad_norm": 1.8038649559020996, "learning_rate": 7.389851770335266e-07, "loss": 1.1793, "step": 16173 }, { "epoch": 0.8842721047524021, "grad_norm": 1.7021232843399048, "learning_rate": 7.382959163621828e-07, "loss": 1.4295, "step": 16174 }, { "epoch": 0.8843267771961237, "grad_norm": 1.78322434425354, "learning_rate": 7.37606964960873e-07, "loss": 1.456, "step": 16175 }, { "epoch": 0.8843814496398453, "grad_norm": 1.5175617933273315, "learning_rate": 7.369183228526e-07, "loss": 1.3701, "step": 16176 }, { "epoch": 0.8844361220835668, "grad_norm": 1.7030975818634033, "learning_rate": 7.362299900603598e-07, "loss": 1.4078, "step": 16177 }, { "epoch": 0.8844907945272884, "grad_norm": 1.2577649354934692, "learning_rate": 7.355419666071406e-07, "loss": 1.4927, "step": 16178 }, { "epoch": 0.8845454669710099, "grad_norm": 1.5270894765853882, "learning_rate": 7.348542525159119e-07, "loss": 1.3085, "step": 16179 }, { "epoch": 0.8846001394147315, "grad_norm": 1.3884767293930054, "learning_rate": 7.341668478096431e-07, "loss": 1.5134, "step": 16180 }, { "epoch": 0.8846548118584531, "grad_norm": 1.5631179809570312, "learning_rate": 7.334797525112868e-07, "loss": 1.3118, "step": 16181 }, { "epoch": 0.8847094843021746, "grad_norm": 1.5021337270736694, "learning_rate": 7.327929666437839e-07, "loss": 1.3078, "step": 16182 }, { "epoch": 0.8847641567458961, "grad_norm": 1.4072331190109253, "learning_rate": 7.321064902300723e-07, "loss": 1.3244, "step": 16183 }, { "epoch": 0.8848188291896177, "grad_norm": 1.3902164697647095, "learning_rate": 7.314203232930728e-07, "loss": 1.2985, "step": 16184 }, { "epoch": 0.8848735016333392, "grad_norm": 1.4274799823760986, "learning_rate": 7.307344658556959e-07, "loss": 1.3007, "step": 16185 }, { "epoch": 0.8849281740770608, "grad_norm": 1.7633315324783325, "learning_rate": 7.300489179408477e-07, "loss": 1.3357, "step": 16186 }, { "epoch": 0.8849828465207824, "grad_norm": 1.9348642826080322, "learning_rate": 7.293636795714187e-07, "loss": 1.3922, "step": 16187 }, { "epoch": 0.8850375189645039, "grad_norm": 1.5940223932266235, "learning_rate": 7.28678750770292e-07, "loss": 1.3343, "step": 16188 }, { "epoch": 0.8850921914082255, "grad_norm": 1.6438640356063843, "learning_rate": 7.279941315603356e-07, "loss": 1.2634, "step": 16189 }, { "epoch": 0.8851468638519471, "grad_norm": 2.76332688331604, "learning_rate": 7.273098219644137e-07, "loss": 1.2859, "step": 16190 }, { "epoch": 0.8852015362956686, "grad_norm": 1.465606689453125, "learning_rate": 7.266258220053746e-07, "loss": 1.3355, "step": 16191 }, { "epoch": 0.8852562087393901, "grad_norm": 1.6865270137786865, "learning_rate": 7.259421317060611e-07, "loss": 1.5009, "step": 16192 }, { "epoch": 0.8853108811831116, "grad_norm": 1.6346170902252197, "learning_rate": 7.252587510893005e-07, "loss": 1.7356, "step": 16193 }, { "epoch": 0.8853655536268332, "grad_norm": 1.2741484642028809, "learning_rate": 7.245756801779158e-07, "loss": 1.4081, "step": 16194 }, { "epoch": 0.8854202260705548, "grad_norm": 1.4437925815582275, "learning_rate": 7.238929189947153e-07, "loss": 1.5124, "step": 16195 }, { "epoch": 0.8854748985142763, "grad_norm": 1.5784236192703247, "learning_rate": 7.23210467562494e-07, "loss": 1.594, "step": 16196 }, { "epoch": 0.8855295709579979, "grad_norm": 1.8158162832260132, "learning_rate": 7.225283259040472e-07, "loss": 1.4384, "step": 16197 }, { "epoch": 0.8855842434017195, "grad_norm": 1.1964582204818726, "learning_rate": 7.218464940421488e-07, "loss": 1.4538, "step": 16198 }, { "epoch": 0.885638915845441, "grad_norm": 1.7614775896072388, "learning_rate": 7.211649719995684e-07, "loss": 1.7113, "step": 16199 }, { "epoch": 0.8856935882891626, "grad_norm": 1.418942928314209, "learning_rate": 7.204837597990622e-07, "loss": 1.4901, "step": 16200 }, { "epoch": 0.8857482607328842, "grad_norm": 1.6993542909622192, "learning_rate": 7.198028574633787e-07, "loss": 1.14, "step": 16201 }, { "epoch": 0.8858029331766056, "grad_norm": 2.017587184906006, "learning_rate": 7.19122265015253e-07, "loss": 1.0712, "step": 16202 }, { "epoch": 0.8858576056203272, "grad_norm": 1.6554423570632935, "learning_rate": 7.184419824774147e-07, "loss": 1.848, "step": 16203 }, { "epoch": 0.8859122780640488, "grad_norm": 1.6947686672210693, "learning_rate": 7.17762009872579e-07, "loss": 1.2229, "step": 16204 }, { "epoch": 0.8859669505077703, "grad_norm": 1.6103397607803345, "learning_rate": 7.17082347223449e-07, "loss": 1.4842, "step": 16205 }, { "epoch": 0.8860216229514919, "grad_norm": 1.4005099534988403, "learning_rate": 7.16402994552724e-07, "loss": 1.2656, "step": 16206 }, { "epoch": 0.8860762953952134, "grad_norm": 1.4865059852600098, "learning_rate": 7.157239518830872e-07, "loss": 1.3212, "step": 16207 }, { "epoch": 0.886130967838935, "grad_norm": 1.6614372730255127, "learning_rate": 7.150452192372138e-07, "loss": 1.4548, "step": 16208 }, { "epoch": 0.8861856402826566, "grad_norm": 1.3658373355865479, "learning_rate": 7.143667966377699e-07, "loss": 1.4629, "step": 16209 }, { "epoch": 0.886240312726378, "grad_norm": 1.9021021127700806, "learning_rate": 7.136886841074053e-07, "loss": 1.6285, "step": 16210 }, { "epoch": 0.8862949851700996, "grad_norm": 1.80097234249115, "learning_rate": 7.130108816687687e-07, "loss": 1.4255, "step": 16211 }, { "epoch": 0.8863496576138212, "grad_norm": 1.3183479309082031, "learning_rate": 7.123333893444906e-07, "loss": 1.4393, "step": 16212 }, { "epoch": 0.8864043300575427, "grad_norm": 1.3667680025100708, "learning_rate": 7.116562071571964e-07, "loss": 1.4486, "step": 16213 }, { "epoch": 0.8864590025012643, "grad_norm": 1.6786586046218872, "learning_rate": 7.109793351294958e-07, "loss": 1.3588, "step": 16214 }, { "epoch": 0.8865136749449859, "grad_norm": 1.574827790260315, "learning_rate": 7.103027732839929e-07, "loss": 1.3386, "step": 16215 }, { "epoch": 0.8865683473887074, "grad_norm": 1.4839398860931396, "learning_rate": 7.096265216432786e-07, "loss": 1.1813, "step": 16216 }, { "epoch": 0.886623019832429, "grad_norm": 1.3877277374267578, "learning_rate": 7.089505802299357e-07, "loss": 1.3819, "step": 16217 }, { "epoch": 0.8866776922761506, "grad_norm": 2.0038130283355713, "learning_rate": 7.082749490665353e-07, "loss": 1.1487, "step": 16218 }, { "epoch": 0.886732364719872, "grad_norm": 1.4421648979187012, "learning_rate": 7.07599628175637e-07, "loss": 1.5176, "step": 16219 }, { "epoch": 0.8867870371635936, "grad_norm": 1.5638078451156616, "learning_rate": 7.069246175797939e-07, "loss": 1.4729, "step": 16220 }, { "epoch": 0.8868417096073151, "grad_norm": 1.381190299987793, "learning_rate": 7.062499173015425e-07, "loss": 1.4519, "step": 16221 }, { "epoch": 0.8868963820510367, "grad_norm": 1.5059056282043457, "learning_rate": 7.055755273634169e-07, "loss": 1.4658, "step": 16222 }, { "epoch": 0.8869510544947583, "grad_norm": 1.320225715637207, "learning_rate": 7.049014477879346e-07, "loss": 1.4687, "step": 16223 }, { "epoch": 0.8870057269384798, "grad_norm": 1.6945858001708984, "learning_rate": 7.042276785976032e-07, "loss": 1.5493, "step": 16224 }, { "epoch": 0.8870603993822014, "grad_norm": 1.6800435781478882, "learning_rate": 7.035542198149237e-07, "loss": 1.5475, "step": 16225 }, { "epoch": 0.887115071825923, "grad_norm": 1.4978519678115845, "learning_rate": 7.028810714623846e-07, "loss": 1.8268, "step": 16226 }, { "epoch": 0.8871697442696445, "grad_norm": 1.7104655504226685, "learning_rate": 7.022082335624614e-07, "loss": 1.4571, "step": 16227 }, { "epoch": 0.887224416713366, "grad_norm": 1.5836342573165894, "learning_rate": 7.015357061376249e-07, "loss": 1.3587, "step": 16228 }, { "epoch": 0.8872790891570876, "grad_norm": 1.4941147565841675, "learning_rate": 7.008634892103294e-07, "loss": 1.5761, "step": 16229 }, { "epoch": 0.8873337616008091, "grad_norm": 1.298690915107727, "learning_rate": 7.001915828030225e-07, "loss": 1.5434, "step": 16230 }, { "epoch": 0.8873884340445307, "grad_norm": 1.7018946409225464, "learning_rate": 6.995199869381419e-07, "loss": 1.4737, "step": 16231 }, { "epoch": 0.8874431064882523, "grad_norm": 1.6770133972167969, "learning_rate": 6.98848701638114e-07, "loss": 1.3754, "step": 16232 }, { "epoch": 0.8874977789319738, "grad_norm": 1.6189155578613281, "learning_rate": 6.98177726925352e-07, "loss": 1.4756, "step": 16233 }, { "epoch": 0.8875524513756954, "grad_norm": 2.4436562061309814, "learning_rate": 6.975070628222646e-07, "loss": 1.4235, "step": 16234 }, { "epoch": 0.8876071238194169, "grad_norm": 1.9261891841888428, "learning_rate": 6.96836709351244e-07, "loss": 1.4556, "step": 16235 }, { "epoch": 0.8876617962631385, "grad_norm": 1.525667428970337, "learning_rate": 6.961666665346767e-07, "loss": 1.2605, "step": 16236 }, { "epoch": 0.88771646870686, "grad_norm": 1.5760756731033325, "learning_rate": 6.954969343949381e-07, "loss": 1.2343, "step": 16237 }, { "epoch": 0.8877711411505815, "grad_norm": 1.5234981775283813, "learning_rate": 6.948275129543902e-07, "loss": 1.3691, "step": 16238 }, { "epoch": 0.8878258135943031, "grad_norm": 1.238466501235962, "learning_rate": 6.941584022353865e-07, "loss": 1.7091, "step": 16239 }, { "epoch": 0.8878804860380247, "grad_norm": 1.405739665031433, "learning_rate": 6.934896022602699e-07, "loss": 1.3434, "step": 16240 }, { "epoch": 0.8879351584817462, "grad_norm": 2.009936809539795, "learning_rate": 6.92821113051374e-07, "loss": 1.1681, "step": 16241 }, { "epoch": 0.8879898309254678, "grad_norm": 1.7665680646896362, "learning_rate": 6.921529346310218e-07, "loss": 1.6145, "step": 16242 }, { "epoch": 0.8880445033691894, "grad_norm": 1.2715649604797363, "learning_rate": 6.914850670215245e-07, "loss": 1.43, "step": 16243 }, { "epoch": 0.8880991758129109, "grad_norm": 1.6926347017288208, "learning_rate": 6.90817510245182e-07, "loss": 1.4684, "step": 16244 }, { "epoch": 0.8881538482566325, "grad_norm": 1.7719252109527588, "learning_rate": 6.901502643242897e-07, "loss": 1.4977, "step": 16245 }, { "epoch": 0.888208520700354, "grad_norm": 1.602313756942749, "learning_rate": 6.894833292811265e-07, "loss": 1.389, "step": 16246 }, { "epoch": 0.8882631931440755, "grad_norm": 1.7102543115615845, "learning_rate": 6.888167051379602e-07, "loss": 1.6502, "step": 16247 }, { "epoch": 0.8883178655877971, "grad_norm": 1.8305360078811646, "learning_rate": 6.881503919170563e-07, "loss": 1.2926, "step": 16248 }, { "epoch": 0.8883725380315186, "grad_norm": 1.7325173616409302, "learning_rate": 6.874843896406591e-07, "loss": 1.5754, "step": 16249 }, { "epoch": 0.8884272104752402, "grad_norm": 1.6954829692840576, "learning_rate": 6.868186983310133e-07, "loss": 1.389, "step": 16250 }, { "epoch": 0.8884818829189618, "grad_norm": 2.376127243041992, "learning_rate": 6.861533180103441e-07, "loss": 1.257, "step": 16251 }, { "epoch": 0.8885365553626833, "grad_norm": 1.7601958513259888, "learning_rate": 6.854882487008718e-07, "loss": 1.3689, "step": 16252 }, { "epoch": 0.8885912278064049, "grad_norm": 1.5321125984191895, "learning_rate": 6.848234904248041e-07, "loss": 1.3915, "step": 16253 }, { "epoch": 0.8886459002501265, "grad_norm": 1.9435197114944458, "learning_rate": 6.841590432043388e-07, "loss": 1.3735, "step": 16254 }, { "epoch": 0.888700572693848, "grad_norm": 1.5884473323822021, "learning_rate": 6.834949070616626e-07, "loss": 1.3271, "step": 16255 }, { "epoch": 0.8887552451375695, "grad_norm": 1.5546612739562988, "learning_rate": 6.828310820189533e-07, "loss": 1.2586, "step": 16256 }, { "epoch": 0.8888099175812911, "grad_norm": 1.8010343313217163, "learning_rate": 6.821675680983786e-07, "loss": 1.4052, "step": 16257 }, { "epoch": 0.8888645900250126, "grad_norm": 1.2958178520202637, "learning_rate": 6.815043653220921e-07, "loss": 1.4601, "step": 16258 }, { "epoch": 0.8889192624687342, "grad_norm": 2.1699891090393066, "learning_rate": 6.808414737122426e-07, "loss": 1.2126, "step": 16259 }, { "epoch": 0.8889739349124558, "grad_norm": 1.3253753185272217, "learning_rate": 6.801788932909648e-07, "loss": 1.4334, "step": 16260 }, { "epoch": 0.8890286073561773, "grad_norm": 1.6336606740951538, "learning_rate": 6.795166240803819e-07, "loss": 1.3344, "step": 16261 }, { "epoch": 0.8890832797998989, "grad_norm": 1.3822728395462036, "learning_rate": 6.788546661026108e-07, "loss": 1.3997, "step": 16262 }, { "epoch": 0.8891379522436204, "grad_norm": 1.718665599822998, "learning_rate": 6.781930193797548e-07, "loss": 1.1098, "step": 16263 }, { "epoch": 0.889192624687342, "grad_norm": 1.5644536018371582, "learning_rate": 6.775316839339086e-07, "loss": 1.3234, "step": 16264 }, { "epoch": 0.8892472971310635, "grad_norm": 2.0888497829437256, "learning_rate": 6.768706597871555e-07, "loss": 1.3865, "step": 16265 }, { "epoch": 0.889301969574785, "grad_norm": 1.382938027381897, "learning_rate": 6.76209946961569e-07, "loss": 1.5473, "step": 16266 }, { "epoch": 0.8893566420185066, "grad_norm": 1.4423718452453613, "learning_rate": 6.755495454792116e-07, "loss": 1.2924, "step": 16267 }, { "epoch": 0.8894113144622282, "grad_norm": 1.3591681718826294, "learning_rate": 6.748894553621344e-07, "loss": 1.3463, "step": 16268 }, { "epoch": 0.8894659869059497, "grad_norm": 1.6465853452682495, "learning_rate": 6.742296766323797e-07, "loss": 1.3037, "step": 16269 }, { "epoch": 0.8895206593496713, "grad_norm": 1.3486946821212769, "learning_rate": 6.735702093119811e-07, "loss": 1.4354, "step": 16270 }, { "epoch": 0.8895753317933929, "grad_norm": 1.572460651397705, "learning_rate": 6.729110534229577e-07, "loss": 1.2658, "step": 16271 }, { "epoch": 0.8896300042371144, "grad_norm": 1.6625027656555176, "learning_rate": 6.722522089873196e-07, "loss": 1.0924, "step": 16272 }, { "epoch": 0.889684676680836, "grad_norm": 1.6199158430099487, "learning_rate": 6.715936760270703e-07, "loss": 1.3136, "step": 16273 }, { "epoch": 0.8897393491245575, "grad_norm": 2.394347667694092, "learning_rate": 6.709354545641989e-07, "loss": 1.4388, "step": 16274 }, { "epoch": 0.889794021568279, "grad_norm": 1.67934250831604, "learning_rate": 6.702775446206811e-07, "loss": 1.4059, "step": 16275 }, { "epoch": 0.8898486940120006, "grad_norm": 1.3311256170272827, "learning_rate": 6.696199462184905e-07, "loss": 1.3948, "step": 16276 }, { "epoch": 0.8899033664557221, "grad_norm": 1.6466326713562012, "learning_rate": 6.68962659379585e-07, "loss": 1.2632, "step": 16277 }, { "epoch": 0.8899580388994437, "grad_norm": 1.3625248670578003, "learning_rate": 6.683056841259117e-07, "loss": 1.4539, "step": 16278 }, { "epoch": 0.8900127113431653, "grad_norm": 1.8793907165527344, "learning_rate": 6.676490204794094e-07, "loss": 1.3282, "step": 16279 }, { "epoch": 0.8900673837868868, "grad_norm": 1.5604199171066284, "learning_rate": 6.669926684620043e-07, "loss": 1.3243, "step": 16280 }, { "epoch": 0.8901220562306084, "grad_norm": 1.5564024448394775, "learning_rate": 6.663366280956152e-07, "loss": 1.6934, "step": 16281 }, { "epoch": 0.89017672867433, "grad_norm": 1.4390974044799805, "learning_rate": 6.656808994021491e-07, "loss": 1.3986, "step": 16282 }, { "epoch": 0.8902314011180514, "grad_norm": 1.3300080299377441, "learning_rate": 6.650254824034996e-07, "loss": 1.461, "step": 16283 }, { "epoch": 0.890286073561773, "grad_norm": 1.5723555088043213, "learning_rate": 6.643703771215548e-07, "loss": 1.317, "step": 16284 }, { "epoch": 0.8903407460054946, "grad_norm": 1.6552824974060059, "learning_rate": 6.637155835781917e-07, "loss": 1.1923, "step": 16285 }, { "epoch": 0.8903954184492161, "grad_norm": 2.1116368770599365, "learning_rate": 6.630611017952704e-07, "loss": 1.2901, "step": 16286 }, { "epoch": 0.8904500908929377, "grad_norm": 1.339522361755371, "learning_rate": 6.624069317946513e-07, "loss": 1.6024, "step": 16287 }, { "epoch": 0.8905047633366593, "grad_norm": 1.8044137954711914, "learning_rate": 6.617530735981758e-07, "loss": 1.5729, "step": 16288 }, { "epoch": 0.8905594357803808, "grad_norm": 2.4294817447662354, "learning_rate": 6.610995272276765e-07, "loss": 1.5613, "step": 16289 }, { "epoch": 0.8906141082241024, "grad_norm": 2.2970027923583984, "learning_rate": 6.604462927049804e-07, "loss": 1.347, "step": 16290 }, { "epoch": 0.8906687806678238, "grad_norm": 1.603891134262085, "learning_rate": 6.597933700518977e-07, "loss": 1.3366, "step": 16291 }, { "epoch": 0.8907234531115454, "grad_norm": 1.3278374671936035, "learning_rate": 6.591407592902321e-07, "loss": 1.3464, "step": 16292 }, { "epoch": 0.890778125555267, "grad_norm": 1.2719634771347046, "learning_rate": 6.584884604417763e-07, "loss": 1.4829, "step": 16293 }, { "epoch": 0.8908327979989885, "grad_norm": 1.442678689956665, "learning_rate": 6.578364735283094e-07, "loss": 1.4014, "step": 16294 }, { "epoch": 0.8908874704427101, "grad_norm": 1.9611893892288208, "learning_rate": 6.571847985716063e-07, "loss": 1.3927, "step": 16295 }, { "epoch": 0.8909421428864317, "grad_norm": 1.6398500204086304, "learning_rate": 6.565334355934261e-07, "loss": 1.59, "step": 16296 }, { "epoch": 0.8909968153301532, "grad_norm": 1.8288812637329102, "learning_rate": 6.558823846155182e-07, "loss": 1.23, "step": 16297 }, { "epoch": 0.8910514877738748, "grad_norm": 1.5498132705688477, "learning_rate": 6.552316456596252e-07, "loss": 1.435, "step": 16298 }, { "epoch": 0.8911061602175964, "grad_norm": 1.1217808723449707, "learning_rate": 6.545812187474765e-07, "loss": 1.7367, "step": 16299 }, { "epoch": 0.8911608326613178, "grad_norm": 1.4207172393798828, "learning_rate": 6.53931103900789e-07, "loss": 1.4722, "step": 16300 }, { "epoch": 0.8912155051050394, "grad_norm": 1.499112606048584, "learning_rate": 6.532813011412742e-07, "loss": 1.3932, "step": 16301 }, { "epoch": 0.891270177548761, "grad_norm": 1.3138062953948975, "learning_rate": 6.526318104906293e-07, "loss": 1.5125, "step": 16302 }, { "epoch": 0.8913248499924825, "grad_norm": 1.6500941514968872, "learning_rate": 6.519826319705413e-07, "loss": 1.342, "step": 16303 }, { "epoch": 0.8913795224362041, "grad_norm": 1.57505202293396, "learning_rate": 6.513337656026908e-07, "loss": 1.5632, "step": 16304 }, { "epoch": 0.8914341948799257, "grad_norm": 1.96633780002594, "learning_rate": 6.506852114087436e-07, "loss": 1.2586, "step": 16305 }, { "epoch": 0.8914888673236472, "grad_norm": 2.0727832317352295, "learning_rate": 6.500369694103559e-07, "loss": 1.5417, "step": 16306 }, { "epoch": 0.8915435397673688, "grad_norm": 1.5779343843460083, "learning_rate": 6.493890396291736e-07, "loss": 1.2746, "step": 16307 }, { "epoch": 0.8915982122110903, "grad_norm": 1.74300217628479, "learning_rate": 6.487414220868315e-07, "loss": 1.3321, "step": 16308 }, { "epoch": 0.8916528846548119, "grad_norm": 1.623907208442688, "learning_rate": 6.480941168049593e-07, "loss": 1.6366, "step": 16309 }, { "epoch": 0.8917075570985334, "grad_norm": 1.6108176708221436, "learning_rate": 6.474471238051683e-07, "loss": 1.5525, "step": 16310 }, { "epoch": 0.8917622295422549, "grad_norm": 1.4580059051513672, "learning_rate": 6.468004431090636e-07, "loss": 1.5091, "step": 16311 }, { "epoch": 0.8918169019859765, "grad_norm": 1.6517390012741089, "learning_rate": 6.461540747382411e-07, "loss": 1.2827, "step": 16312 }, { "epoch": 0.8918715744296981, "grad_norm": 1.45038902759552, "learning_rate": 6.455080187142837e-07, "loss": 1.6808, "step": 16313 }, { "epoch": 0.8919262468734196, "grad_norm": 1.288709282875061, "learning_rate": 6.44862275058763e-07, "loss": 1.5123, "step": 16314 }, { "epoch": 0.8919809193171412, "grad_norm": 1.7436226606369019, "learning_rate": 6.44216843793245e-07, "loss": 1.4702, "step": 16315 }, { "epoch": 0.8920355917608628, "grad_norm": 1.3805280923843384, "learning_rate": 6.435717249392803e-07, "loss": 1.4662, "step": 16316 }, { "epoch": 0.8920902642045843, "grad_norm": 2.0607457160949707, "learning_rate": 6.429269185184117e-07, "loss": 1.3446, "step": 16317 }, { "epoch": 0.8921449366483059, "grad_norm": 1.4319864511489868, "learning_rate": 6.422824245521708e-07, "loss": 1.4175, "step": 16318 }, { "epoch": 0.8921996090920274, "grad_norm": 1.3742090463638306, "learning_rate": 6.41638243062076e-07, "loss": 1.4117, "step": 16319 }, { "epoch": 0.8922542815357489, "grad_norm": 1.391963005065918, "learning_rate": 6.409943740696423e-07, "loss": 1.2685, "step": 16320 }, { "epoch": 0.8923089539794705, "grad_norm": 1.6455230712890625, "learning_rate": 6.40350817596368e-07, "loss": 1.4193, "step": 16321 }, { "epoch": 0.892363626423192, "grad_norm": 1.0667802095413208, "learning_rate": 6.397075736637404e-07, "loss": 1.5363, "step": 16322 }, { "epoch": 0.8924182988669136, "grad_norm": 1.8813079595565796, "learning_rate": 6.390646422932445e-07, "loss": 1.3704, "step": 16323 }, { "epoch": 0.8924729713106352, "grad_norm": 1.383958339691162, "learning_rate": 6.384220235063454e-07, "loss": 1.6929, "step": 16324 }, { "epoch": 0.8925276437543567, "grad_norm": 1.9135234355926514, "learning_rate": 6.377797173245015e-07, "loss": 1.4555, "step": 16325 }, { "epoch": 0.8925823161980783, "grad_norm": 1.6767776012420654, "learning_rate": 6.371377237691634e-07, "loss": 1.3269, "step": 16326 }, { "epoch": 0.8926369886417999, "grad_norm": 1.80661141872406, "learning_rate": 6.364960428617673e-07, "loss": 1.2164, "step": 16327 }, { "epoch": 0.8926916610855213, "grad_norm": 1.363000750541687, "learning_rate": 6.358546746237393e-07, "loss": 1.5099, "step": 16328 }, { "epoch": 0.8927463335292429, "grad_norm": 1.1866618394851685, "learning_rate": 6.352136190764991e-07, "loss": 1.5339, "step": 16329 }, { "epoch": 0.8928010059729645, "grad_norm": 2.030229330062866, "learning_rate": 6.345728762414504e-07, "loss": 1.4107, "step": 16330 }, { "epoch": 0.892855678416686, "grad_norm": 1.3393702507019043, "learning_rate": 6.33932446139991e-07, "loss": 1.6131, "step": 16331 }, { "epoch": 0.8929103508604076, "grad_norm": 1.9692015647888184, "learning_rate": 6.332923287935044e-07, "loss": 1.5706, "step": 16332 }, { "epoch": 0.8929650233041292, "grad_norm": 1.2813924551010132, "learning_rate": 6.326525242233661e-07, "loss": 1.3717, "step": 16333 }, { "epoch": 0.8930196957478507, "grad_norm": 1.521979570388794, "learning_rate": 6.320130324509421e-07, "loss": 1.5283, "step": 16334 }, { "epoch": 0.8930743681915723, "grad_norm": 1.9641698598861694, "learning_rate": 6.313738534975855e-07, "loss": 1.2916, "step": 16335 }, { "epoch": 0.8931290406352937, "grad_norm": 2.3551318645477295, "learning_rate": 6.307349873846392e-07, "loss": 1.2368, "step": 16336 }, { "epoch": 0.8931837130790153, "grad_norm": 1.424936056137085, "learning_rate": 6.300964341334382e-07, "loss": 1.5101, "step": 16337 }, { "epoch": 0.8932383855227369, "grad_norm": 1.4908777475357056, "learning_rate": 6.294581937653044e-07, "loss": 1.4012, "step": 16338 }, { "epoch": 0.8932930579664584, "grad_norm": 1.6179429292678833, "learning_rate": 6.288202663015485e-07, "loss": 1.4828, "step": 16339 }, { "epoch": 0.89334773041018, "grad_norm": 1.283454418182373, "learning_rate": 6.281826517634759e-07, "loss": 1.5195, "step": 16340 }, { "epoch": 0.8934024028539016, "grad_norm": 1.36334228515625, "learning_rate": 6.275453501723771e-07, "loss": 1.4834, "step": 16341 }, { "epoch": 0.8934570752976231, "grad_norm": 1.6278963088989258, "learning_rate": 6.269083615495297e-07, "loss": 1.3783, "step": 16342 }, { "epoch": 0.8935117477413447, "grad_norm": 1.6852368116378784, "learning_rate": 6.262716859162088e-07, "loss": 1.4335, "step": 16343 }, { "epoch": 0.8935664201850663, "grad_norm": 1.3125196695327759, "learning_rate": 6.256353232936718e-07, "loss": 1.4933, "step": 16344 }, { "epoch": 0.8936210926287877, "grad_norm": 1.307681679725647, "learning_rate": 6.249992737031695e-07, "loss": 1.3802, "step": 16345 }, { "epoch": 0.8936757650725093, "grad_norm": 1.6389412879943848, "learning_rate": 6.243635371659396e-07, "loss": 1.235, "step": 16346 }, { "epoch": 0.8937304375162309, "grad_norm": 1.6521072387695312, "learning_rate": 6.237281137032114e-07, "loss": 1.3669, "step": 16347 }, { "epoch": 0.8937851099599524, "grad_norm": 1.7100118398666382, "learning_rate": 6.230930033362048e-07, "loss": 1.4375, "step": 16348 }, { "epoch": 0.893839782403674, "grad_norm": 2.009955883026123, "learning_rate": 6.224582060861262e-07, "loss": 1.5175, "step": 16349 }, { "epoch": 0.8938944548473955, "grad_norm": 1.56537926197052, "learning_rate": 6.218237219741718e-07, "loss": 1.5962, "step": 16350 }, { "epoch": 0.8939491272911171, "grad_norm": 1.7645970582962036, "learning_rate": 6.211895510215316e-07, "loss": 1.3767, "step": 16351 }, { "epoch": 0.8940037997348387, "grad_norm": 1.3524776697158813, "learning_rate": 6.205556932493806e-07, "loss": 1.3619, "step": 16352 }, { "epoch": 0.8940584721785602, "grad_norm": 1.2733485698699951, "learning_rate": 6.199221486788831e-07, "loss": 1.5911, "step": 16353 }, { "epoch": 0.8941131446222818, "grad_norm": 1.1687874794006348, "learning_rate": 6.192889173311966e-07, "loss": 1.7388, "step": 16354 }, { "epoch": 0.8941678170660033, "grad_norm": 1.3692312240600586, "learning_rate": 6.186559992274665e-07, "loss": 1.3929, "step": 16355 }, { "epoch": 0.8942224895097248, "grad_norm": 2.230574607849121, "learning_rate": 6.18023394388827e-07, "loss": 1.3049, "step": 16356 }, { "epoch": 0.8942771619534464, "grad_norm": 1.7380588054656982, "learning_rate": 6.17391102836401e-07, "loss": 1.3739, "step": 16357 }, { "epoch": 0.894331834397168, "grad_norm": 1.7204711437225342, "learning_rate": 6.167591245913029e-07, "loss": 1.3118, "step": 16358 }, { "epoch": 0.8943865068408895, "grad_norm": 1.6216760873794556, "learning_rate": 6.161274596746347e-07, "loss": 1.4795, "step": 16359 }, { "epoch": 0.8944411792846111, "grad_norm": 1.3896489143371582, "learning_rate": 6.154961081074929e-07, "loss": 1.3214, "step": 16360 }, { "epoch": 0.8944958517283327, "grad_norm": 1.5223702192306519, "learning_rate": 6.148650699109559e-07, "loss": 1.4197, "step": 16361 }, { "epoch": 0.8945505241720542, "grad_norm": 1.3954499959945679, "learning_rate": 6.142343451060973e-07, "loss": 1.4107, "step": 16362 }, { "epoch": 0.8946051966157758, "grad_norm": 1.8896493911743164, "learning_rate": 6.136039337139799e-07, "loss": 1.5043, "step": 16363 }, { "epoch": 0.8946598690594972, "grad_norm": 1.6041836738586426, "learning_rate": 6.129738357556503e-07, "loss": 1.2975, "step": 16364 }, { "epoch": 0.8947145415032188, "grad_norm": 1.4760396480560303, "learning_rate": 6.123440512521539e-07, "loss": 1.5669, "step": 16365 }, { "epoch": 0.8947692139469404, "grad_norm": 1.4450603723526, "learning_rate": 6.117145802245183e-07, "loss": 1.3417, "step": 16366 }, { "epoch": 0.8948238863906619, "grad_norm": 1.475795865058899, "learning_rate": 6.110854226937613e-07, "loss": 1.3265, "step": 16367 }, { "epoch": 0.8948785588343835, "grad_norm": 1.380240797996521, "learning_rate": 6.104565786808959e-07, "loss": 1.2023, "step": 16368 }, { "epoch": 0.8949332312781051, "grad_norm": 2.1343917846679688, "learning_rate": 6.098280482069186e-07, "loss": 1.5247, "step": 16369 }, { "epoch": 0.8949879037218266, "grad_norm": 1.684791088104248, "learning_rate": 6.091998312928171e-07, "loss": 1.3988, "step": 16370 }, { "epoch": 0.8950425761655482, "grad_norm": 1.7073367834091187, "learning_rate": 6.085719279595692e-07, "loss": 1.436, "step": 16371 }, { "epoch": 0.8950972486092698, "grad_norm": 1.3592839241027832, "learning_rate": 6.079443382281424e-07, "loss": 1.4933, "step": 16372 }, { "epoch": 0.8951519210529912, "grad_norm": 1.5414767265319824, "learning_rate": 6.073170621194924e-07, "loss": 1.5882, "step": 16373 }, { "epoch": 0.8952065934967128, "grad_norm": 1.7873135805130005, "learning_rate": 6.066900996545677e-07, "loss": 1.4056, "step": 16374 }, { "epoch": 0.8952612659404344, "grad_norm": 1.407545566558838, "learning_rate": 6.060634508543006e-07, "loss": 1.5315, "step": 16375 }, { "epoch": 0.8953159383841559, "grad_norm": 1.5145049095153809, "learning_rate": 6.0543711573962e-07, "loss": 1.3803, "step": 16376 }, { "epoch": 0.8953706108278775, "grad_norm": 1.4738298654556274, "learning_rate": 6.0481109433144e-07, "loss": 1.2564, "step": 16377 }, { "epoch": 0.895425283271599, "grad_norm": 1.5735889673233032, "learning_rate": 6.041853866506619e-07, "loss": 1.6646, "step": 16378 }, { "epoch": 0.8954799557153206, "grad_norm": 2.0908701419830322, "learning_rate": 6.035599927181834e-07, "loss": 1.1764, "step": 16379 }, { "epoch": 0.8955346281590422, "grad_norm": 1.466360092163086, "learning_rate": 6.029349125548856e-07, "loss": 1.4466, "step": 16380 }, { "epoch": 0.8955893006027636, "grad_norm": 1.4216142892837524, "learning_rate": 6.023101461816427e-07, "loss": 1.3487, "step": 16381 }, { "epoch": 0.8956439730464852, "grad_norm": 1.6580408811569214, "learning_rate": 6.016856936193161e-07, "loss": 1.4714, "step": 16382 }, { "epoch": 0.8956986454902068, "grad_norm": 1.3738465309143066, "learning_rate": 6.010615548887577e-07, "loss": 1.3621, "step": 16383 }, { "epoch": 0.8957533179339283, "grad_norm": 1.7945772409439087, "learning_rate": 6.004377300108078e-07, "loss": 1.2491, "step": 16384 }, { "epoch": 0.8958079903776499, "grad_norm": 1.8883998394012451, "learning_rate": 5.998142190063017e-07, "loss": 1.3198, "step": 16385 }, { "epoch": 0.8958626628213715, "grad_norm": 1.3530107736587524, "learning_rate": 5.99191021896055e-07, "loss": 1.4837, "step": 16386 }, { "epoch": 0.895917335265093, "grad_norm": 1.9068760871887207, "learning_rate": 5.985681387008802e-07, "loss": 1.1823, "step": 16387 }, { "epoch": 0.8959720077088146, "grad_norm": 2.053483486175537, "learning_rate": 5.97945569441577e-07, "loss": 1.3463, "step": 16388 }, { "epoch": 0.8960266801525362, "grad_norm": 1.1730116605758667, "learning_rate": 5.973233141389323e-07, "loss": 1.4911, "step": 16389 }, { "epoch": 0.8960813525962577, "grad_norm": 1.8331319093704224, "learning_rate": 5.967013728137272e-07, "loss": 1.5524, "step": 16390 }, { "epoch": 0.8961360250399792, "grad_norm": 1.9374891519546509, "learning_rate": 5.960797454867295e-07, "loss": 1.273, "step": 16391 }, { "epoch": 0.8961906974837007, "grad_norm": 1.203613042831421, "learning_rate": 5.954584321786949e-07, "loss": 1.6108, "step": 16392 }, { "epoch": 0.8962453699274223, "grad_norm": 1.4551466703414917, "learning_rate": 5.948374329103723e-07, "loss": 1.3184, "step": 16393 }, { "epoch": 0.8963000423711439, "grad_norm": 1.4957966804504395, "learning_rate": 5.942167477024985e-07, "loss": 1.3779, "step": 16394 }, { "epoch": 0.8963547148148654, "grad_norm": 1.227599859237671, "learning_rate": 5.93596376575798e-07, "loss": 1.3609, "step": 16395 }, { "epoch": 0.896409387258587, "grad_norm": 1.3161181211471558, "learning_rate": 5.929763195509875e-07, "loss": 1.5255, "step": 16396 }, { "epoch": 0.8964640597023086, "grad_norm": 1.748676061630249, "learning_rate": 5.923565766487727e-07, "loss": 1.5249, "step": 16397 }, { "epoch": 0.8965187321460301, "grad_norm": 1.122402548789978, "learning_rate": 5.91737147889846e-07, "loss": 1.5907, "step": 16398 }, { "epoch": 0.8965734045897517, "grad_norm": 1.405145525932312, "learning_rate": 5.91118033294894e-07, "loss": 1.3112, "step": 16399 }, { "epoch": 0.8966280770334732, "grad_norm": 1.3650639057159424, "learning_rate": 5.904992328845893e-07, "loss": 1.5853, "step": 16400 }, { "epoch": 0.8966827494771947, "grad_norm": 1.436468243598938, "learning_rate": 5.898807466795941e-07, "loss": 1.2645, "step": 16401 }, { "epoch": 0.8967374219209163, "grad_norm": 1.4798504114151, "learning_rate": 5.892625747005642e-07, "loss": 1.5188, "step": 16402 }, { "epoch": 0.8967920943646379, "grad_norm": 1.5928910970687866, "learning_rate": 5.886447169681386e-07, "loss": 1.2523, "step": 16403 }, { "epoch": 0.8968467668083594, "grad_norm": 1.5566154718399048, "learning_rate": 5.880271735029508e-07, "loss": 1.5182, "step": 16404 }, { "epoch": 0.896901439252081, "grad_norm": 1.4478093385696411, "learning_rate": 5.874099443256221e-07, "loss": 1.4515, "step": 16405 }, { "epoch": 0.8969561116958025, "grad_norm": 1.3264245986938477, "learning_rate": 5.867930294567614e-07, "loss": 1.7392, "step": 16406 }, { "epoch": 0.8970107841395241, "grad_norm": 1.5597387552261353, "learning_rate": 5.861764289169713e-07, "loss": 1.4604, "step": 16407 }, { "epoch": 0.8970654565832457, "grad_norm": 1.6291773319244385, "learning_rate": 5.855601427268409e-07, "loss": 1.5182, "step": 16408 }, { "epoch": 0.8971201290269671, "grad_norm": 1.4277316331863403, "learning_rate": 5.849441709069492e-07, "loss": 1.3873, "step": 16409 }, { "epoch": 0.8971748014706887, "grad_norm": 1.674257516860962, "learning_rate": 5.843285134778643e-07, "loss": 1.2605, "step": 16410 }, { "epoch": 0.8972294739144103, "grad_norm": 1.2224466800689697, "learning_rate": 5.837131704601451e-07, "loss": 1.4424, "step": 16411 }, { "epoch": 0.8972841463581318, "grad_norm": 1.5747071504592896, "learning_rate": 5.830981418743376e-07, "loss": 1.2847, "step": 16412 }, { "epoch": 0.8973388188018534, "grad_norm": 1.4442346096038818, "learning_rate": 5.824834277409818e-07, "loss": 1.3021, "step": 16413 }, { "epoch": 0.897393491245575, "grad_norm": 1.6830570697784424, "learning_rate": 5.818690280806038e-07, "loss": 1.5508, "step": 16414 }, { "epoch": 0.8974481636892965, "grad_norm": 1.633730173110962, "learning_rate": 5.812549429137171e-07, "loss": 1.6228, "step": 16415 }, { "epoch": 0.8975028361330181, "grad_norm": 1.3234134912490845, "learning_rate": 5.806411722608318e-07, "loss": 1.3812, "step": 16416 }, { "epoch": 0.8975575085767397, "grad_norm": 1.7527351379394531, "learning_rate": 5.800277161424383e-07, "loss": 1.4105, "step": 16417 }, { "epoch": 0.8976121810204611, "grad_norm": 1.893819808959961, "learning_rate": 5.794145745790269e-07, "loss": 1.2351, "step": 16418 }, { "epoch": 0.8976668534641827, "grad_norm": 2.059130907058716, "learning_rate": 5.788017475910679e-07, "loss": 1.2401, "step": 16419 }, { "epoch": 0.8977215259079042, "grad_norm": 1.4279206991195679, "learning_rate": 5.781892351990271e-07, "loss": 1.5204, "step": 16420 }, { "epoch": 0.8977761983516258, "grad_norm": 1.3260788917541504, "learning_rate": 5.775770374233558e-07, "loss": 1.3751, "step": 16421 }, { "epoch": 0.8978308707953474, "grad_norm": 1.9131245613098145, "learning_rate": 5.769651542844989e-07, "loss": 1.5013, "step": 16422 }, { "epoch": 0.8978855432390689, "grad_norm": 1.5035195350646973, "learning_rate": 5.763535858028846e-07, "loss": 1.3938, "step": 16423 }, { "epoch": 0.8979402156827905, "grad_norm": 1.768898606300354, "learning_rate": 5.757423319989397e-07, "loss": 1.4818, "step": 16424 }, { "epoch": 0.8979948881265121, "grad_norm": 1.3267662525177002, "learning_rate": 5.751313928930724e-07, "loss": 1.4665, "step": 16425 }, { "epoch": 0.8980495605702336, "grad_norm": 1.5870859622955322, "learning_rate": 5.74520768505682e-07, "loss": 1.1776, "step": 16426 }, { "epoch": 0.8981042330139551, "grad_norm": 1.464686632156372, "learning_rate": 5.739104588571631e-07, "loss": 1.2588, "step": 16427 }, { "epoch": 0.8981589054576767, "grad_norm": 1.4918707609176636, "learning_rate": 5.73300463967893e-07, "loss": 1.5151, "step": 16428 }, { "epoch": 0.8982135779013982, "grad_norm": 2.3325705528259277, "learning_rate": 5.726907838582396e-07, "loss": 1.2309, "step": 16429 }, { "epoch": 0.8982682503451198, "grad_norm": 1.651923418045044, "learning_rate": 5.720814185485635e-07, "loss": 1.2979, "step": 16430 }, { "epoch": 0.8983229227888414, "grad_norm": 1.6905969381332397, "learning_rate": 5.714723680592116e-07, "loss": 1.4098, "step": 16431 }, { "epoch": 0.8983775952325629, "grad_norm": 1.3344913721084595, "learning_rate": 5.708636324105222e-07, "loss": 1.5617, "step": 16432 }, { "epoch": 0.8984322676762845, "grad_norm": 1.446609377861023, "learning_rate": 5.702552116228232e-07, "loss": 1.3557, "step": 16433 }, { "epoch": 0.898486940120006, "grad_norm": 1.7122068405151367, "learning_rate": 5.696471057164299e-07, "loss": 1.4227, "step": 16434 }, { "epoch": 0.8985416125637276, "grad_norm": 1.4648253917694092, "learning_rate": 5.690393147116491e-07, "loss": 1.4081, "step": 16435 }, { "epoch": 0.8985962850074491, "grad_norm": 1.689065933227539, "learning_rate": 5.684318386287758e-07, "loss": 1.513, "step": 16436 }, { "epoch": 0.8986509574511706, "grad_norm": 1.826540470123291, "learning_rate": 5.678246774880946e-07, "loss": 1.4778, "step": 16437 }, { "epoch": 0.8987056298948922, "grad_norm": 1.627642273902893, "learning_rate": 5.672178313098808e-07, "loss": 1.5411, "step": 16438 }, { "epoch": 0.8987603023386138, "grad_norm": 1.370408296585083, "learning_rate": 5.666113001143991e-07, "loss": 1.468, "step": 16439 }, { "epoch": 0.8988149747823353, "grad_norm": 1.8844987154006958, "learning_rate": 5.660050839219011e-07, "loss": 1.109, "step": 16440 }, { "epoch": 0.8988696472260569, "grad_norm": 1.307870864868164, "learning_rate": 5.653991827526318e-07, "loss": 1.5517, "step": 16441 }, { "epoch": 0.8989243196697785, "grad_norm": 1.6994805335998535, "learning_rate": 5.647935966268225e-07, "loss": 1.5042, "step": 16442 }, { "epoch": 0.8989789921135, "grad_norm": 1.568772792816162, "learning_rate": 5.641883255646952e-07, "loss": 1.3181, "step": 16443 }, { "epoch": 0.8990336645572216, "grad_norm": 1.547914743423462, "learning_rate": 5.635833695864623e-07, "loss": 1.2053, "step": 16444 }, { "epoch": 0.8990883370009431, "grad_norm": 1.229280948638916, "learning_rate": 5.629787287123223e-07, "loss": 1.5156, "step": 16445 }, { "epoch": 0.8991430094446646, "grad_norm": 1.6534205675125122, "learning_rate": 5.623744029624678e-07, "loss": 1.5542, "step": 16446 }, { "epoch": 0.8991976818883862, "grad_norm": 1.4937658309936523, "learning_rate": 5.617703923570795e-07, "loss": 1.3264, "step": 16447 }, { "epoch": 0.8992523543321077, "grad_norm": 1.5283740758895874, "learning_rate": 5.611666969163243e-07, "loss": 1.6176, "step": 16448 }, { "epoch": 0.8993070267758293, "grad_norm": 1.5020068883895874, "learning_rate": 5.60563316660363e-07, "loss": 1.6317, "step": 16449 }, { "epoch": 0.8993616992195509, "grad_norm": 1.4486051797866821, "learning_rate": 5.599602516093427e-07, "loss": 1.4344, "step": 16450 }, { "epoch": 0.8994163716632724, "grad_norm": 1.639160394668579, "learning_rate": 5.593575017833997e-07, "loss": 1.3334, "step": 16451 }, { "epoch": 0.899471044106994, "grad_norm": 1.7429254055023193, "learning_rate": 5.587550672026643e-07, "loss": 1.5531, "step": 16452 }, { "epoch": 0.8995257165507156, "grad_norm": 1.304549217224121, "learning_rate": 5.581529478872516e-07, "loss": 1.5017, "step": 16453 }, { "epoch": 0.899580388994437, "grad_norm": 1.5234801769256592, "learning_rate": 5.575511438572656e-07, "loss": 1.1691, "step": 16454 }, { "epoch": 0.8996350614381586, "grad_norm": 1.6381886005401611, "learning_rate": 5.569496551328069e-07, "loss": 1.3039, "step": 16455 }, { "epoch": 0.8996897338818802, "grad_norm": 1.428593635559082, "learning_rate": 5.563484817339581e-07, "loss": 1.5449, "step": 16456 }, { "epoch": 0.8997444063256017, "grad_norm": 1.8961806297302246, "learning_rate": 5.557476236807923e-07, "loss": 1.2473, "step": 16457 }, { "epoch": 0.8997990787693233, "grad_norm": 1.318363070487976, "learning_rate": 5.551470809933757e-07, "loss": 1.5073, "step": 16458 }, { "epoch": 0.8998537512130449, "grad_norm": 1.355640172958374, "learning_rate": 5.545468536917619e-07, "loss": 1.6084, "step": 16459 }, { "epoch": 0.8999084236567664, "grad_norm": 1.5685925483703613, "learning_rate": 5.539469417959931e-07, "loss": 1.3353, "step": 16460 }, { "epoch": 0.899963096100488, "grad_norm": 1.777762770652771, "learning_rate": 5.533473453261007e-07, "loss": 1.3277, "step": 16461 }, { "epoch": 0.9000177685442095, "grad_norm": 1.544084072113037, "learning_rate": 5.527480643021077e-07, "loss": 1.3267, "step": 16462 }, { "epoch": 0.900072440987931, "grad_norm": 1.5883774757385254, "learning_rate": 5.521490987440259e-07, "loss": 1.2963, "step": 16463 }, { "epoch": 0.9001271134316526, "grad_norm": 1.7831016778945923, "learning_rate": 5.515504486718559e-07, "loss": 1.1052, "step": 16464 }, { "epoch": 0.9001817858753741, "grad_norm": 1.388871669769287, "learning_rate": 5.509521141055874e-07, "loss": 1.5605, "step": 16465 }, { "epoch": 0.9002364583190957, "grad_norm": 1.3552418947219849, "learning_rate": 5.503540950652009e-07, "loss": 1.3457, "step": 16466 }, { "epoch": 0.9002911307628173, "grad_norm": 1.643951416015625, "learning_rate": 5.497563915706661e-07, "loss": 1.3723, "step": 16467 }, { "epoch": 0.9003458032065388, "grad_norm": 2.186229944229126, "learning_rate": 5.491590036419392e-07, "loss": 1.885, "step": 16468 }, { "epoch": 0.9004004756502604, "grad_norm": 2.2492244243621826, "learning_rate": 5.48561931298972e-07, "loss": 1.6803, "step": 16469 }, { "epoch": 0.900455148093982, "grad_norm": 1.4630248546600342, "learning_rate": 5.47965174561701e-07, "loss": 1.2766, "step": 16470 }, { "epoch": 0.9005098205377035, "grad_norm": 1.6183815002441406, "learning_rate": 5.473687334500499e-07, "loss": 1.3046, "step": 16471 }, { "epoch": 0.900564492981425, "grad_norm": 1.4765903949737549, "learning_rate": 5.467726079839408e-07, "loss": 1.4279, "step": 16472 }, { "epoch": 0.9006191654251466, "grad_norm": 1.222308874130249, "learning_rate": 5.461767981832766e-07, "loss": 1.3365, "step": 16473 }, { "epoch": 0.9006738378688681, "grad_norm": 1.475111961364746, "learning_rate": 5.455813040679536e-07, "loss": 1.3492, "step": 16474 }, { "epoch": 0.9007285103125897, "grad_norm": 1.3160005807876587, "learning_rate": 5.449861256578559e-07, "loss": 1.5263, "step": 16475 }, { "epoch": 0.9007831827563112, "grad_norm": 1.5505794286727905, "learning_rate": 5.443912629728565e-07, "loss": 1.6872, "step": 16476 }, { "epoch": 0.9008378552000328, "grad_norm": 1.7743003368377686, "learning_rate": 5.437967160328228e-07, "loss": 0.9797, "step": 16477 }, { "epoch": 0.9008925276437544, "grad_norm": 1.5124369859695435, "learning_rate": 5.432024848576067e-07, "loss": 1.3408, "step": 16478 }, { "epoch": 0.9009472000874759, "grad_norm": 1.6080435514450073, "learning_rate": 5.426085694670491e-07, "loss": 1.5339, "step": 16479 }, { "epoch": 0.9010018725311975, "grad_norm": 1.6944833993911743, "learning_rate": 5.420149698809851e-07, "loss": 1.1625, "step": 16480 }, { "epoch": 0.901056544974919, "grad_norm": 1.6989272832870483, "learning_rate": 5.414216861192356e-07, "loss": 1.3946, "step": 16481 }, { "epoch": 0.9011112174186405, "grad_norm": 1.394623875617981, "learning_rate": 5.408287182016092e-07, "loss": 1.3445, "step": 16482 }, { "epoch": 0.9011658898623621, "grad_norm": 1.3551808595657349, "learning_rate": 5.402360661479101e-07, "loss": 1.5456, "step": 16483 }, { "epoch": 0.9012205623060837, "grad_norm": 1.3957984447479248, "learning_rate": 5.396437299779278e-07, "loss": 1.4368, "step": 16484 }, { "epoch": 0.9012752347498052, "grad_norm": 1.547749638557434, "learning_rate": 5.390517097114378e-07, "loss": 1.2956, "step": 16485 }, { "epoch": 0.9013299071935268, "grad_norm": 1.8446228504180908, "learning_rate": 5.384600053682143e-07, "loss": 1.229, "step": 16486 }, { "epoch": 0.9013845796372484, "grad_norm": 1.4649897813796997, "learning_rate": 5.378686169680137e-07, "loss": 1.2105, "step": 16487 }, { "epoch": 0.9014392520809699, "grad_norm": 1.3445391654968262, "learning_rate": 5.372775445305833e-07, "loss": 1.617, "step": 16488 }, { "epoch": 0.9014939245246915, "grad_norm": 1.5135794878005981, "learning_rate": 5.366867880756599e-07, "loss": 1.6017, "step": 16489 }, { "epoch": 0.9015485969684129, "grad_norm": 1.3108670711517334, "learning_rate": 5.360963476229708e-07, "loss": 1.4116, "step": 16490 }, { "epoch": 0.9016032694121345, "grad_norm": 1.3083161115646362, "learning_rate": 5.355062231922326e-07, "loss": 1.3332, "step": 16491 }, { "epoch": 0.9016579418558561, "grad_norm": 2.17621111869812, "learning_rate": 5.349164148031515e-07, "loss": 1.1342, "step": 16492 }, { "epoch": 0.9017126142995776, "grad_norm": 1.6682372093200684, "learning_rate": 5.3432692247542e-07, "loss": 1.3785, "step": 16493 }, { "epoch": 0.9017672867432992, "grad_norm": 1.40687894821167, "learning_rate": 5.337377462287263e-07, "loss": 1.562, "step": 16494 }, { "epoch": 0.9018219591870208, "grad_norm": 1.5537198781967163, "learning_rate": 5.331488860827427e-07, "loss": 1.4081, "step": 16495 }, { "epoch": 0.9018766316307423, "grad_norm": 1.8507758378982544, "learning_rate": 5.325603420571302e-07, "loss": 1.5398, "step": 16496 }, { "epoch": 0.9019313040744639, "grad_norm": 2.2297606468200684, "learning_rate": 5.319721141715461e-07, "loss": 1.4502, "step": 16497 }, { "epoch": 0.9019859765181855, "grad_norm": 1.1139899492263794, "learning_rate": 5.313842024456306e-07, "loss": 1.5053, "step": 16498 }, { "epoch": 0.9020406489619069, "grad_norm": 1.5351258516311646, "learning_rate": 5.307966068990144e-07, "loss": 1.4745, "step": 16499 }, { "epoch": 0.9020953214056285, "grad_norm": 1.397210955619812, "learning_rate": 5.302093275513209e-07, "loss": 1.6156, "step": 16500 }, { "epoch": 0.9021499938493501, "grad_norm": 1.658030390739441, "learning_rate": 5.296223644221588e-07, "loss": 1.3082, "step": 16501 }, { "epoch": 0.9022046662930716, "grad_norm": 1.5737526416778564, "learning_rate": 5.290357175311278e-07, "loss": 1.4027, "step": 16502 }, { "epoch": 0.9022593387367932, "grad_norm": 1.3440802097320557, "learning_rate": 5.284493868978191e-07, "loss": 1.8852, "step": 16503 }, { "epoch": 0.9023140111805147, "grad_norm": 1.9603768587112427, "learning_rate": 5.278633725418103e-07, "loss": 1.397, "step": 16504 }, { "epoch": 0.9023686836242363, "grad_norm": 1.5632085800170898, "learning_rate": 5.272776744826724e-07, "loss": 1.2239, "step": 16505 }, { "epoch": 0.9024233560679579, "grad_norm": 2.15541934967041, "learning_rate": 5.26692292739961e-07, "loss": 1.4677, "step": 16506 }, { "epoch": 0.9024780285116794, "grad_norm": 1.597106695175171, "learning_rate": 5.261072273332224e-07, "loss": 1.504, "step": 16507 }, { "epoch": 0.902532700955401, "grad_norm": 1.3964635133743286, "learning_rate": 5.255224782819957e-07, "loss": 1.4708, "step": 16508 }, { "epoch": 0.9025873733991225, "grad_norm": 1.4766414165496826, "learning_rate": 5.249380456058062e-07, "loss": 1.4713, "step": 16509 }, { "epoch": 0.902642045842844, "grad_norm": 1.8219863176345825, "learning_rate": 5.243539293241684e-07, "loss": 1.5061, "step": 16510 }, { "epoch": 0.9026967182865656, "grad_norm": 1.5431277751922607, "learning_rate": 5.237701294565889e-07, "loss": 1.3526, "step": 16511 }, { "epoch": 0.9027513907302872, "grad_norm": 1.3386858701705933, "learning_rate": 5.231866460225621e-07, "loss": 1.3401, "step": 16512 }, { "epoch": 0.9028060631740087, "grad_norm": 2.306795358657837, "learning_rate": 5.226034790415702e-07, "loss": 1.3833, "step": 16513 }, { "epoch": 0.9028607356177303, "grad_norm": 1.968298077583313, "learning_rate": 5.220206285330887e-07, "loss": 1.3236, "step": 16514 }, { "epoch": 0.9029154080614519, "grad_norm": 1.729156494140625, "learning_rate": 5.214380945165787e-07, "loss": 1.1757, "step": 16515 }, { "epoch": 0.9029700805051734, "grad_norm": 1.7359291315078735, "learning_rate": 5.208558770114913e-07, "loss": 1.3338, "step": 16516 }, { "epoch": 0.903024752948895, "grad_norm": 1.5435514450073242, "learning_rate": 5.20273976037271e-07, "loss": 1.1242, "step": 16517 }, { "epoch": 0.9030794253926165, "grad_norm": 1.4455920457839966, "learning_rate": 5.196923916133467e-07, "loss": 1.6686, "step": 16518 }, { "epoch": 0.903134097836338, "grad_norm": 1.2685222625732422, "learning_rate": 5.191111237591406e-07, "loss": 1.4617, "step": 16519 }, { "epoch": 0.9031887702800596, "grad_norm": 1.0343114137649536, "learning_rate": 5.185301724940617e-07, "loss": 1.585, "step": 16520 }, { "epoch": 0.9032434427237811, "grad_norm": 1.760619044303894, "learning_rate": 5.179495378375077e-07, "loss": 1.5385, "step": 16521 }, { "epoch": 0.9032981151675027, "grad_norm": 1.510510802268982, "learning_rate": 5.173692198088709e-07, "loss": 1.2125, "step": 16522 }, { "epoch": 0.9033527876112243, "grad_norm": 1.6881604194641113, "learning_rate": 5.167892184275269e-07, "loss": 1.4573, "step": 16523 }, { "epoch": 0.9034074600549458, "grad_norm": 1.420272707939148, "learning_rate": 5.162095337128426e-07, "loss": 1.4644, "step": 16524 }, { "epoch": 0.9034621324986674, "grad_norm": 1.9532113075256348, "learning_rate": 5.156301656841789e-07, "loss": 1.5464, "step": 16525 }, { "epoch": 0.903516804942389, "grad_norm": 2.002976894378662, "learning_rate": 5.150511143608782e-07, "loss": 1.376, "step": 16526 }, { "epoch": 0.9035714773861104, "grad_norm": 1.6459132432937622, "learning_rate": 5.144723797622786e-07, "loss": 1.5972, "step": 16527 }, { "epoch": 0.903626149829832, "grad_norm": 1.5681440830230713, "learning_rate": 5.138939619077055e-07, "loss": 1.2736, "step": 16528 }, { "epoch": 0.9036808222735536, "grad_norm": 1.4087953567504883, "learning_rate": 5.133158608164724e-07, "loss": 1.5197, "step": 16529 }, { "epoch": 0.9037354947172751, "grad_norm": 1.6503654718399048, "learning_rate": 5.127380765078815e-07, "loss": 1.4479, "step": 16530 }, { "epoch": 0.9037901671609967, "grad_norm": 1.4448853731155396, "learning_rate": 5.121606090012309e-07, "loss": 1.422, "step": 16531 }, { "epoch": 0.9038448396047183, "grad_norm": 1.4918770790100098, "learning_rate": 5.115834583158008e-07, "loss": 1.4171, "step": 16532 }, { "epoch": 0.9038995120484398, "grad_norm": 1.571473240852356, "learning_rate": 5.110066244708645e-07, "loss": 1.4067, "step": 16533 }, { "epoch": 0.9039541844921614, "grad_norm": 1.5370111465454102, "learning_rate": 5.104301074856843e-07, "loss": 1.5584, "step": 16534 }, { "epoch": 0.9040088569358828, "grad_norm": 1.3606804609298706, "learning_rate": 5.098539073795095e-07, "loss": 1.558, "step": 16535 }, { "epoch": 0.9040635293796044, "grad_norm": 1.8135968446731567, "learning_rate": 5.092780241715833e-07, "loss": 1.3921, "step": 16536 }, { "epoch": 0.904118201823326, "grad_norm": 1.6355761289596558, "learning_rate": 5.08702457881134e-07, "loss": 1.5351, "step": 16537 }, { "epoch": 0.9041728742670475, "grad_norm": 1.6069148778915405, "learning_rate": 5.081272085273825e-07, "loss": 1.4575, "step": 16538 }, { "epoch": 0.9042275467107691, "grad_norm": 1.7183185815811157, "learning_rate": 5.075522761295359e-07, "loss": 1.5429, "step": 16539 }, { "epoch": 0.9042822191544907, "grad_norm": 1.6478146314620972, "learning_rate": 5.069776607067944e-07, "loss": 1.502, "step": 16540 }, { "epoch": 0.9043368915982122, "grad_norm": 1.5696213245391846, "learning_rate": 5.064033622783426e-07, "loss": 1.2455, "step": 16541 }, { "epoch": 0.9043915640419338, "grad_norm": 1.956442952156067, "learning_rate": 5.058293808633629e-07, "loss": 1.3893, "step": 16542 }, { "epoch": 0.9044462364856554, "grad_norm": 1.5709798336029053, "learning_rate": 5.052557164810179e-07, "loss": 1.5039, "step": 16543 }, { "epoch": 0.9045009089293768, "grad_norm": 1.2884713411331177, "learning_rate": 5.046823691504632e-07, "loss": 1.7977, "step": 16544 }, { "epoch": 0.9045555813730984, "grad_norm": 1.5215462446212769, "learning_rate": 5.041093388908469e-07, "loss": 1.4367, "step": 16545 }, { "epoch": 0.90461025381682, "grad_norm": 1.2548428773880005, "learning_rate": 5.035366257213015e-07, "loss": 1.382, "step": 16546 }, { "epoch": 0.9046649262605415, "grad_norm": 2.341442346572876, "learning_rate": 5.029642296609538e-07, "loss": 1.604, "step": 16547 }, { "epoch": 0.9047195987042631, "grad_norm": 2.126220464706421, "learning_rate": 5.023921507289153e-07, "loss": 1.5431, "step": 16548 }, { "epoch": 0.9047742711479846, "grad_norm": 1.606012225151062, "learning_rate": 5.018203889442896e-07, "loss": 1.3805, "step": 16549 }, { "epoch": 0.9048289435917062, "grad_norm": 1.857549786567688, "learning_rate": 5.012489443261693e-07, "loss": 1.3794, "step": 16550 }, { "epoch": 0.9048836160354278, "grad_norm": 1.6797915697097778, "learning_rate": 5.006778168936377e-07, "loss": 1.321, "step": 16551 }, { "epoch": 0.9049382884791493, "grad_norm": 1.3435040712356567, "learning_rate": 5.001070066657631e-07, "loss": 1.4304, "step": 16552 }, { "epoch": 0.9049929609228708, "grad_norm": 1.74184250831604, "learning_rate": 4.995365136616092e-07, "loss": 1.4832, "step": 16553 }, { "epoch": 0.9050476333665924, "grad_norm": 1.9326666593551636, "learning_rate": 4.98966337900224e-07, "loss": 1.5554, "step": 16554 }, { "epoch": 0.9051023058103139, "grad_norm": 1.2239744663238525, "learning_rate": 4.983964794006457e-07, "loss": 1.3278, "step": 16555 }, { "epoch": 0.9051569782540355, "grad_norm": 1.863661289215088, "learning_rate": 4.978269381819068e-07, "loss": 1.3529, "step": 16556 }, { "epoch": 0.9052116506977571, "grad_norm": 1.6698230504989624, "learning_rate": 4.972577142630242e-07, "loss": 1.6026, "step": 16557 }, { "epoch": 0.9052663231414786, "grad_norm": 1.3769081830978394, "learning_rate": 4.96688807663005e-07, "loss": 1.5821, "step": 16558 }, { "epoch": 0.9053209955852002, "grad_norm": 1.5157575607299805, "learning_rate": 4.961202184008462e-07, "loss": 1.5427, "step": 16559 }, { "epoch": 0.9053756680289218, "grad_norm": 1.3372422456741333, "learning_rate": 4.95551946495535e-07, "loss": 1.5799, "step": 16560 }, { "epoch": 0.9054303404726433, "grad_norm": 1.8294214010238647, "learning_rate": 4.949839919660481e-07, "loss": 1.3181, "step": 16561 }, { "epoch": 0.9054850129163649, "grad_norm": 1.734204649925232, "learning_rate": 4.944163548313496e-07, "loss": 1.201, "step": 16562 }, { "epoch": 0.9055396853600863, "grad_norm": 1.439473032951355, "learning_rate": 4.938490351103931e-07, "loss": 1.1788, "step": 16563 }, { "epoch": 0.9055943578038079, "grad_norm": 1.9036130905151367, "learning_rate": 4.932820328221266e-07, "loss": 1.2707, "step": 16564 }, { "epoch": 0.9056490302475295, "grad_norm": 1.5845063924789429, "learning_rate": 4.927153479854807e-07, "loss": 1.0713, "step": 16565 }, { "epoch": 0.905703702691251, "grad_norm": 1.700848937034607, "learning_rate": 4.921489806193779e-07, "loss": 1.5296, "step": 16566 }, { "epoch": 0.9057583751349726, "grad_norm": 1.5132973194122314, "learning_rate": 4.915829307427333e-07, "loss": 1.3003, "step": 16567 }, { "epoch": 0.9058130475786942, "grad_norm": 1.5386513471603394, "learning_rate": 4.91017198374445e-07, "loss": 1.4426, "step": 16568 }, { "epoch": 0.9058677200224157, "grad_norm": 1.7894456386566162, "learning_rate": 4.904517835334055e-07, "loss": 1.0728, "step": 16569 }, { "epoch": 0.9059223924661373, "grad_norm": 1.8062995672225952, "learning_rate": 4.898866862384976e-07, "loss": 1.2448, "step": 16570 }, { "epoch": 0.9059770649098589, "grad_norm": 1.640221118927002, "learning_rate": 4.893219065085886e-07, "loss": 1.4822, "step": 16571 }, { "epoch": 0.9060317373535803, "grad_norm": 1.3892401456832886, "learning_rate": 4.887574443625376e-07, "loss": 1.3575, "step": 16572 }, { "epoch": 0.9060864097973019, "grad_norm": 2.8582866191864014, "learning_rate": 4.881932998191963e-07, "loss": 1.2914, "step": 16573 }, { "epoch": 0.9061410822410235, "grad_norm": 1.3301184177398682, "learning_rate": 4.876294728973984e-07, "loss": 1.6719, "step": 16574 }, { "epoch": 0.906195754684745, "grad_norm": 1.299572229385376, "learning_rate": 4.870659636159758e-07, "loss": 1.3179, "step": 16575 }, { "epoch": 0.9062504271284666, "grad_norm": 1.3896080255508423, "learning_rate": 4.865027719937443e-07, "loss": 1.2677, "step": 16576 }, { "epoch": 0.9063050995721881, "grad_norm": 1.5738561153411865, "learning_rate": 4.859398980495078e-07, "loss": 1.4153, "step": 16577 }, { "epoch": 0.9063597720159097, "grad_norm": 1.5207903385162354, "learning_rate": 4.853773418020646e-07, "loss": 1.5598, "step": 16578 }, { "epoch": 0.9064144444596313, "grad_norm": 1.5870654582977295, "learning_rate": 4.848151032701987e-07, "loss": 1.239, "step": 16579 }, { "epoch": 0.9064691169033527, "grad_norm": 1.8638839721679688, "learning_rate": 4.842531824726826e-07, "loss": 1.4959, "step": 16580 }, { "epoch": 0.9065237893470743, "grad_norm": 1.9179751873016357, "learning_rate": 4.836915794282838e-07, "loss": 1.345, "step": 16581 }, { "epoch": 0.9065784617907959, "grad_norm": 1.7575801610946655, "learning_rate": 4.831302941557537e-07, "loss": 1.2791, "step": 16582 }, { "epoch": 0.9066331342345174, "grad_norm": 1.4075876474380493, "learning_rate": 4.82569326673834e-07, "loss": 1.6183, "step": 16583 }, { "epoch": 0.906687806678239, "grad_norm": 1.6304129362106323, "learning_rate": 4.8200867700126e-07, "loss": 1.4719, "step": 16584 }, { "epoch": 0.9067424791219606, "grad_norm": 1.3053994178771973, "learning_rate": 4.814483451567498e-07, "loss": 1.5116, "step": 16585 }, { "epoch": 0.9067971515656821, "grad_norm": 1.7064666748046875, "learning_rate": 4.80888331159014e-07, "loss": 1.4327, "step": 16586 }, { "epoch": 0.9068518240094037, "grad_norm": 1.7090270519256592, "learning_rate": 4.803286350267555e-07, "loss": 1.5103, "step": 16587 }, { "epoch": 0.9069064964531253, "grad_norm": 1.573121428489685, "learning_rate": 4.797692567786616e-07, "loss": 1.4355, "step": 16588 }, { "epoch": 0.9069611688968467, "grad_norm": 1.4327139854431152, "learning_rate": 4.79210196433414e-07, "loss": 1.8333, "step": 16589 }, { "epoch": 0.9070158413405683, "grad_norm": 1.6927433013916016, "learning_rate": 4.786514540096776e-07, "loss": 1.5857, "step": 16590 }, { "epoch": 0.9070705137842898, "grad_norm": 1.3924360275268555, "learning_rate": 4.780930295261133e-07, "loss": 1.6726, "step": 16591 }, { "epoch": 0.9071251862280114, "grad_norm": 1.7622803449630737, "learning_rate": 4.77534923001366e-07, "loss": 1.456, "step": 16592 }, { "epoch": 0.907179858671733, "grad_norm": 1.2328003644943237, "learning_rate": 4.769771344540719e-07, "loss": 1.5691, "step": 16593 }, { "epoch": 0.9072345311154545, "grad_norm": 1.9049626588821411, "learning_rate": 4.764196639028573e-07, "loss": 1.4517, "step": 16594 }, { "epoch": 0.9072892035591761, "grad_norm": 1.6930794715881348, "learning_rate": 4.7586251136633956e-07, "loss": 1.2171, "step": 16595 }, { "epoch": 0.9073438760028977, "grad_norm": 1.5550028085708618, "learning_rate": 4.7530567686312035e-07, "loss": 1.7342, "step": 16596 }, { "epoch": 0.9073985484466192, "grad_norm": 1.3274203538894653, "learning_rate": 4.7474916041179487e-07, "loss": 1.2667, "step": 16597 }, { "epoch": 0.9074532208903408, "grad_norm": 1.4790090322494507, "learning_rate": 4.7419296203094713e-07, "loss": 1.5623, "step": 16598 }, { "epoch": 0.9075078933340623, "grad_norm": 1.4698883295059204, "learning_rate": 4.7363708173915e-07, "loss": 1.4084, "step": 16599 }, { "epoch": 0.9075625657777838, "grad_norm": 1.546741008758545, "learning_rate": 4.730815195549643e-07, "loss": 1.3715, "step": 16600 }, { "epoch": 0.9076172382215054, "grad_norm": 1.3622486591339111, "learning_rate": 4.7252627549694285e-07, "loss": 1.423, "step": 16601 }, { "epoch": 0.907671910665227, "grad_norm": 2.2046432495117188, "learning_rate": 4.719713495836242e-07, "loss": 1.4161, "step": 16602 }, { "epoch": 0.9077265831089485, "grad_norm": 1.4276542663574219, "learning_rate": 4.7141674183354247e-07, "loss": 1.4589, "step": 16603 }, { "epoch": 0.9077812555526701, "grad_norm": 1.396234393119812, "learning_rate": 4.70862452265215e-07, "loss": 1.7399, "step": 16604 }, { "epoch": 0.9078359279963916, "grad_norm": 1.441173791885376, "learning_rate": 4.703084808971503e-07, "loss": 1.3397, "step": 16605 }, { "epoch": 0.9078906004401132, "grad_norm": 1.4841268062591553, "learning_rate": 4.697548277478481e-07, "loss": 1.6709, "step": 16606 }, { "epoch": 0.9079452728838348, "grad_norm": 1.4539169073104858, "learning_rate": 4.6920149283579574e-07, "loss": 1.1894, "step": 16607 }, { "epoch": 0.9079999453275562, "grad_norm": 2.053162097930908, "learning_rate": 4.6864847617946964e-07, "loss": 1.5658, "step": 16608 }, { "epoch": 0.9080546177712778, "grad_norm": 1.19681715965271, "learning_rate": 4.6809577779733715e-07, "loss": 1.5247, "step": 16609 }, { "epoch": 0.9081092902149994, "grad_norm": 1.5151227712631226, "learning_rate": 4.6754339770785474e-07, "loss": 1.3749, "step": 16610 }, { "epoch": 0.9081639626587209, "grad_norm": 1.6041439771652222, "learning_rate": 4.6699133592946535e-07, "loss": 1.2684, "step": 16611 }, { "epoch": 0.9082186351024425, "grad_norm": 1.3391880989074707, "learning_rate": 4.6643959248060643e-07, "loss": 1.423, "step": 16612 }, { "epoch": 0.9082733075461641, "grad_norm": 1.4560059309005737, "learning_rate": 4.658881673797e-07, "loss": 1.3237, "step": 16613 }, { "epoch": 0.9083279799898856, "grad_norm": 1.3120683431625366, "learning_rate": 4.6533706064516015e-07, "loss": 1.5318, "step": 16614 }, { "epoch": 0.9083826524336072, "grad_norm": 1.4233171939849854, "learning_rate": 4.6478627229539e-07, "loss": 1.4678, "step": 16615 }, { "epoch": 0.9084373248773288, "grad_norm": 1.4539706707000732, "learning_rate": 4.6423580234878143e-07, "loss": 1.3733, "step": 16616 }, { "epoch": 0.9084919973210502, "grad_norm": 1.3830630779266357, "learning_rate": 4.636856508237164e-07, "loss": 1.4483, "step": 16617 }, { "epoch": 0.9085466697647718, "grad_norm": 1.3894853591918945, "learning_rate": 4.6313581773856474e-07, "loss": 1.5255, "step": 16618 }, { "epoch": 0.9086013422084933, "grad_norm": 1.9091026782989502, "learning_rate": 4.6258630311168505e-07, "loss": 1.0978, "step": 16619 }, { "epoch": 0.9086560146522149, "grad_norm": 1.7937828302383423, "learning_rate": 4.6203710696143153e-07, "loss": 1.3953, "step": 16620 }, { "epoch": 0.9087106870959365, "grad_norm": 1.7968764305114746, "learning_rate": 4.614882293061396e-07, "loss": 1.395, "step": 16621 }, { "epoch": 0.908765359539658, "grad_norm": 1.3787866830825806, "learning_rate": 4.6093967016413774e-07, "loss": 1.3145, "step": 16622 }, { "epoch": 0.9088200319833796, "grad_norm": 1.3137239217758179, "learning_rate": 4.6039142955374483e-07, "loss": 1.4032, "step": 16623 }, { "epoch": 0.9088747044271012, "grad_norm": 1.651828646659851, "learning_rate": 4.5984350749326835e-07, "loss": 1.3396, "step": 16624 }, { "epoch": 0.9089293768708226, "grad_norm": 1.4599113464355469, "learning_rate": 4.592959040010025e-07, "loss": 1.458, "step": 16625 }, { "epoch": 0.9089840493145442, "grad_norm": 1.8987985849380493, "learning_rate": 4.5874861909523506e-07, "loss": 1.3451, "step": 16626 }, { "epoch": 0.9090387217582658, "grad_norm": 1.643448829650879, "learning_rate": 4.582016527942412e-07, "loss": 1.3768, "step": 16627 }, { "epoch": 0.9090933942019873, "grad_norm": 1.681941270828247, "learning_rate": 4.5765500511628314e-07, "loss": 1.5075, "step": 16628 }, { "epoch": 0.9091480666457089, "grad_norm": 1.655876874923706, "learning_rate": 4.571086760796173e-07, "loss": 1.4037, "step": 16629 }, { "epoch": 0.9092027390894305, "grad_norm": 1.3418081998825073, "learning_rate": 4.5656266570248687e-07, "loss": 1.492, "step": 16630 }, { "epoch": 0.909257411533152, "grad_norm": 1.9535952806472778, "learning_rate": 4.5601697400312175e-07, "loss": 1.3053, "step": 16631 }, { "epoch": 0.9093120839768736, "grad_norm": 1.5897226333618164, "learning_rate": 4.554716009997473e-07, "loss": 1.6606, "step": 16632 }, { "epoch": 0.9093667564205951, "grad_norm": 1.6154744625091553, "learning_rate": 4.5492654671057014e-07, "loss": 1.3894, "step": 16633 }, { "epoch": 0.9094214288643166, "grad_norm": 1.773646593093872, "learning_rate": 4.5438181115379564e-07, "loss": 1.6566, "step": 16634 }, { "epoch": 0.9094761013080382, "grad_norm": 2.015716314315796, "learning_rate": 4.5383739434761265e-07, "loss": 1.2267, "step": 16635 }, { "epoch": 0.9095307737517597, "grad_norm": 1.5597485303878784, "learning_rate": 4.532932963101977e-07, "loss": 1.507, "step": 16636 }, { "epoch": 0.9095854461954813, "grad_norm": 1.630782127380371, "learning_rate": 4.5274951705972294e-07, "loss": 1.51, "step": 16637 }, { "epoch": 0.9096401186392029, "grad_norm": 1.5101618766784668, "learning_rate": 4.5220605661434605e-07, "loss": 1.5778, "step": 16638 }, { "epoch": 0.9096947910829244, "grad_norm": 1.5606930255889893, "learning_rate": 4.5166291499221137e-07, "loss": 1.5781, "step": 16639 }, { "epoch": 0.909749463526646, "grad_norm": 1.3597331047058105, "learning_rate": 4.511200922114589e-07, "loss": 1.2589, "step": 16640 }, { "epoch": 0.9098041359703676, "grad_norm": 1.1355311870574951, "learning_rate": 4.505775882902141e-07, "loss": 1.6787, "step": 16641 }, { "epoch": 0.9098588084140891, "grad_norm": 1.537216067314148, "learning_rate": 4.5003540324659255e-07, "loss": 1.2415, "step": 16642 }, { "epoch": 0.9099134808578107, "grad_norm": 1.794493317604065, "learning_rate": 4.494935370986986e-07, "loss": 1.2327, "step": 16643 }, { "epoch": 0.9099681533015322, "grad_norm": 1.196413516998291, "learning_rate": 4.489519898646244e-07, "loss": 1.436, "step": 16644 }, { "epoch": 0.9100228257452537, "grad_norm": 1.617159366607666, "learning_rate": 4.4841076156245665e-07, "loss": 1.5495, "step": 16645 }, { "epoch": 0.9100774981889753, "grad_norm": 2.0456831455230713, "learning_rate": 4.4786985221026756e-07, "loss": 1.4087, "step": 16646 }, { "epoch": 0.9101321706326968, "grad_norm": 1.5449864864349365, "learning_rate": 4.4732926182611826e-07, "loss": 1.3771, "step": 16647 }, { "epoch": 0.9101868430764184, "grad_norm": 1.3695236444473267, "learning_rate": 4.467889904280609e-07, "loss": 1.4226, "step": 16648 }, { "epoch": 0.91024151552014, "grad_norm": 1.5505691766738892, "learning_rate": 4.4624903803413667e-07, "loss": 1.481, "step": 16649 }, { "epoch": 0.9102961879638615, "grad_norm": 1.3591258525848389, "learning_rate": 4.457094046623756e-07, "loss": 1.351, "step": 16650 }, { "epoch": 0.9103508604075831, "grad_norm": 1.416089415550232, "learning_rate": 4.451700903307976e-07, "loss": 1.7561, "step": 16651 }, { "epoch": 0.9104055328513047, "grad_norm": 1.8965438604354858, "learning_rate": 4.4463109505741177e-07, "loss": 1.1844, "step": 16652 }, { "epoch": 0.9104602052950261, "grad_norm": 1.8273614645004272, "learning_rate": 4.440924188602136e-07, "loss": 1.4626, "step": 16653 }, { "epoch": 0.9105148777387477, "grad_norm": 1.9390416145324707, "learning_rate": 4.435540617571965e-07, "loss": 1.2179, "step": 16654 }, { "epoch": 0.9105695501824693, "grad_norm": 1.401183843612671, "learning_rate": 4.430160237663328e-07, "loss": 1.407, "step": 16655 }, { "epoch": 0.9106242226261908, "grad_norm": 1.3185125589370728, "learning_rate": 4.424783049055903e-07, "loss": 1.5352, "step": 16656 }, { "epoch": 0.9106788950699124, "grad_norm": 1.63759446144104, "learning_rate": 4.4194090519292467e-07, "loss": 1.2402, "step": 16657 }, { "epoch": 0.910733567513634, "grad_norm": 1.7715144157409668, "learning_rate": 4.414038246462804e-07, "loss": 1.5471, "step": 16658 }, { "epoch": 0.9107882399573555, "grad_norm": 1.5086596012115479, "learning_rate": 4.408670632835932e-07, "loss": 1.6074, "step": 16659 }, { "epoch": 0.9108429124010771, "grad_norm": 1.3194512128829956, "learning_rate": 4.4033062112278537e-07, "loss": 1.3136, "step": 16660 }, { "epoch": 0.9108975848447985, "grad_norm": 1.4400713443756104, "learning_rate": 4.397944981817703e-07, "loss": 1.4809, "step": 16661 }, { "epoch": 0.9109522572885201, "grad_norm": 1.3039532899856567, "learning_rate": 4.392586944784505e-07, "loss": 1.5568, "step": 16662 }, { "epoch": 0.9110069297322417, "grad_norm": 1.5574913024902344, "learning_rate": 4.387232100307193e-07, "loss": 1.4171, "step": 16663 }, { "epoch": 0.9110616021759632, "grad_norm": 1.5703712701797485, "learning_rate": 4.3818804485645463e-07, "loss": 1.5991, "step": 16664 }, { "epoch": 0.9111162746196848, "grad_norm": 2.019428014755249, "learning_rate": 4.3765319897352997e-07, "loss": 1.5944, "step": 16665 }, { "epoch": 0.9111709470634064, "grad_norm": 1.749310851097107, "learning_rate": 4.3711867239980335e-07, "loss": 1.6306, "step": 16666 }, { "epoch": 0.9112256195071279, "grad_norm": 1.8226525783538818, "learning_rate": 4.365844651531237e-07, "loss": 1.4333, "step": 16667 }, { "epoch": 0.9112802919508495, "grad_norm": 1.497145175933838, "learning_rate": 4.3605057725133015e-07, "loss": 1.4238, "step": 16668 }, { "epoch": 0.9113349643945711, "grad_norm": 1.605974793434143, "learning_rate": 4.3551700871225177e-07, "loss": 1.4149, "step": 16669 }, { "epoch": 0.9113896368382925, "grad_norm": 1.7877991199493408, "learning_rate": 4.349837595537032e-07, "loss": 1.5167, "step": 16670 }, { "epoch": 0.9114443092820141, "grad_norm": 1.4658747911453247, "learning_rate": 4.344508297934924e-07, "loss": 1.5608, "step": 16671 }, { "epoch": 0.9114989817257357, "grad_norm": 1.5191874504089355, "learning_rate": 4.339182194494129e-07, "loss": 1.4299, "step": 16672 }, { "epoch": 0.9115536541694572, "grad_norm": 1.3113900423049927, "learning_rate": 4.3338592853925277e-07, "loss": 1.3282, "step": 16673 }, { "epoch": 0.9116083266131788, "grad_norm": 1.3599398136138916, "learning_rate": 4.3285395708078547e-07, "loss": 1.2779, "step": 16674 }, { "epoch": 0.9116629990569003, "grad_norm": 2.310763120651245, "learning_rate": 4.323223050917735e-07, "loss": 1.3003, "step": 16675 }, { "epoch": 0.9117176715006219, "grad_norm": 1.8604522943496704, "learning_rate": 4.317909725899727e-07, "loss": 1.2892, "step": 16676 }, { "epoch": 0.9117723439443435, "grad_norm": 2.1275899410247803, "learning_rate": 4.312599595931233e-07, "loss": 1.0962, "step": 16677 }, { "epoch": 0.911827016388065, "grad_norm": 1.0483390092849731, "learning_rate": 4.307292661189566e-07, "loss": 1.4496, "step": 16678 }, { "epoch": 0.9118816888317866, "grad_norm": 1.659881353378296, "learning_rate": 4.3019889218519516e-07, "loss": 1.391, "step": 16679 }, { "epoch": 0.9119363612755081, "grad_norm": 1.4143106937408447, "learning_rate": 4.296688378095493e-07, "loss": 1.2979, "step": 16680 }, { "epoch": 0.9119910337192296, "grad_norm": 1.9742952585220337, "learning_rate": 4.291391030097192e-07, "loss": 1.46, "step": 16681 }, { "epoch": 0.9120457061629512, "grad_norm": 1.6499687433242798, "learning_rate": 4.2860968780339296e-07, "loss": 1.4322, "step": 16682 }, { "epoch": 0.9121003786066728, "grad_norm": 1.7045649290084839, "learning_rate": 4.280805922082487e-07, "loss": 1.4598, "step": 16683 }, { "epoch": 0.9121550510503943, "grad_norm": 1.6228810548782349, "learning_rate": 4.2755181624195344e-07, "loss": 1.3676, "step": 16684 }, { "epoch": 0.9122097234941159, "grad_norm": 2.1037588119506836, "learning_rate": 4.270233599221674e-07, "loss": 1.3429, "step": 16685 }, { "epoch": 0.9122643959378375, "grad_norm": 1.7258116006851196, "learning_rate": 4.2649522326653315e-07, "loss": 1.3756, "step": 16686 }, { "epoch": 0.912319068381559, "grad_norm": 1.7158690690994263, "learning_rate": 4.2596740629268997e-07, "loss": 1.3541, "step": 16687 }, { "epoch": 0.9123737408252806, "grad_norm": 1.6529183387756348, "learning_rate": 4.2543990901826035e-07, "loss": 1.3399, "step": 16688 }, { "epoch": 0.912428413269002, "grad_norm": 1.7329024076461792, "learning_rate": 4.249127314608592e-07, "loss": 1.2868, "step": 16689 }, { "epoch": 0.9124830857127236, "grad_norm": 1.286052942276001, "learning_rate": 4.2438587363809127e-07, "loss": 1.8741, "step": 16690 }, { "epoch": 0.9125377581564452, "grad_norm": 1.3738549947738647, "learning_rate": 4.238593355675502e-07, "loss": 1.5331, "step": 16691 }, { "epoch": 0.9125924306001667, "grad_norm": 1.8029978275299072, "learning_rate": 4.2333311726681426e-07, "loss": 1.3678, "step": 16692 }, { "epoch": 0.9126471030438883, "grad_norm": 1.7446283102035522, "learning_rate": 4.228072187534604e-07, "loss": 1.1546, "step": 16693 }, { "epoch": 0.9127017754876099, "grad_norm": 1.3026713132858276, "learning_rate": 4.222816400450458e-07, "loss": 1.4803, "step": 16694 }, { "epoch": 0.9127564479313314, "grad_norm": 1.3911018371582031, "learning_rate": 4.2175638115912296e-07, "loss": 1.62, "step": 16695 }, { "epoch": 0.912811120375053, "grad_norm": 1.4580296277999878, "learning_rate": 4.212314421132302e-07, "loss": 1.311, "step": 16696 }, { "epoch": 0.9128657928187746, "grad_norm": 1.7270686626434326, "learning_rate": 4.2070682292489674e-07, "loss": 1.4292, "step": 16697 }, { "epoch": 0.912920465262496, "grad_norm": 2.2409932613372803, "learning_rate": 4.201825236116408e-07, "loss": 1.3865, "step": 16698 }, { "epoch": 0.9129751377062176, "grad_norm": 1.6483283042907715, "learning_rate": 4.196585441909706e-07, "loss": 1.3329, "step": 16699 }, { "epoch": 0.9130298101499392, "grad_norm": 1.578640103340149, "learning_rate": 4.191348846803811e-07, "loss": 1.6093, "step": 16700 }, { "epoch": 0.9130844825936607, "grad_norm": 1.5585330724716187, "learning_rate": 4.186115450973616e-07, "loss": 1.0399, "step": 16701 }, { "epoch": 0.9131391550373823, "grad_norm": 1.562576413154602, "learning_rate": 4.1808852545938583e-07, "loss": 1.3578, "step": 16702 }, { "epoch": 0.9131938274811038, "grad_norm": 2.1670007705688477, "learning_rate": 4.175658257839177e-07, "loss": 1.1394, "step": 16703 }, { "epoch": 0.9132484999248254, "grad_norm": 1.541587471961975, "learning_rate": 4.170434460884132e-07, "loss": 1.6024, "step": 16704 }, { "epoch": 0.913303172368547, "grad_norm": 1.4361870288848877, "learning_rate": 4.1652138639031614e-07, "loss": 1.3807, "step": 16705 }, { "epoch": 0.9133578448122684, "grad_norm": 1.6644262075424194, "learning_rate": 4.1599964670705705e-07, "loss": 1.3476, "step": 16706 }, { "epoch": 0.91341251725599, "grad_norm": 1.5356694459915161, "learning_rate": 4.154782270560598e-07, "loss": 1.6325, "step": 16707 }, { "epoch": 0.9134671896997116, "grad_norm": 1.4180601835250854, "learning_rate": 4.1495712745473595e-07, "loss": 1.4308, "step": 16708 }, { "epoch": 0.9135218621434331, "grad_norm": 1.9980286359786987, "learning_rate": 4.144363479204849e-07, "loss": 1.6124, "step": 16709 }, { "epoch": 0.9135765345871547, "grad_norm": 1.7703903913497925, "learning_rate": 4.1391588847069844e-07, "loss": 1.2113, "step": 16710 }, { "epoch": 0.9136312070308763, "grad_norm": 1.3409004211425781, "learning_rate": 4.133957491227547e-07, "loss": 1.3845, "step": 16711 }, { "epoch": 0.9136858794745978, "grad_norm": 2.1337244510650635, "learning_rate": 4.12875929894021e-07, "loss": 1.5379, "step": 16712 }, { "epoch": 0.9137405519183194, "grad_norm": 2.2374157905578613, "learning_rate": 4.1235643080185797e-07, "loss": 1.2414, "step": 16713 }, { "epoch": 0.913795224362041, "grad_norm": 1.4117558002471924, "learning_rate": 4.1183725186361044e-07, "loss": 1.3578, "step": 16714 }, { "epoch": 0.9138498968057625, "grad_norm": 2.3010053634643555, "learning_rate": 4.1131839309661803e-07, "loss": 1.279, "step": 16715 }, { "epoch": 0.913904569249484, "grad_norm": 1.4863476753234863, "learning_rate": 4.107998545182046e-07, "loss": 1.4275, "step": 16716 }, { "epoch": 0.9139592416932056, "grad_norm": 1.7563602924346924, "learning_rate": 4.1028163614568516e-07, "loss": 1.4568, "step": 16717 }, { "epoch": 0.9140139141369271, "grad_norm": 1.5528830289840698, "learning_rate": 4.097637379963659e-07, "loss": 1.4092, "step": 16718 }, { "epoch": 0.9140685865806487, "grad_norm": 1.3988109827041626, "learning_rate": 4.092461600875397e-07, "loss": 1.3476, "step": 16719 }, { "epoch": 0.9141232590243702, "grad_norm": 1.7017477750778198, "learning_rate": 4.0872890243648933e-07, "loss": 1.2297, "step": 16720 }, { "epoch": 0.9141779314680918, "grad_norm": 1.5743147134780884, "learning_rate": 4.0821196506048764e-07, "loss": 1.4473, "step": 16721 }, { "epoch": 0.9142326039118134, "grad_norm": 1.7131701707839966, "learning_rate": 4.076953479767964e-07, "loss": 1.4382, "step": 16722 }, { "epoch": 0.9142872763555349, "grad_norm": 1.352236032485962, "learning_rate": 4.071790512026652e-07, "loss": 1.4428, "step": 16723 }, { "epoch": 0.9143419487992565, "grad_norm": 2.1103343963623047, "learning_rate": 4.0666307475533686e-07, "loss": 1.5181, "step": 16724 }, { "epoch": 0.914396621242978, "grad_norm": 1.6075289249420166, "learning_rate": 4.061474186520409e-07, "loss": 1.3837, "step": 16725 }, { "epoch": 0.9144512936866995, "grad_norm": 1.6198288202285767, "learning_rate": 4.056320829099925e-07, "loss": 1.4033, "step": 16726 }, { "epoch": 0.9145059661304211, "grad_norm": 1.4968825578689575, "learning_rate": 4.0511706754640557e-07, "loss": 1.3962, "step": 16727 }, { "epoch": 0.9145606385741427, "grad_norm": 1.2258909940719604, "learning_rate": 4.0460237257847203e-07, "loss": 1.4164, "step": 16728 }, { "epoch": 0.9146153110178642, "grad_norm": 1.6400146484375, "learning_rate": 4.040879980233836e-07, "loss": 1.5047, "step": 16729 }, { "epoch": 0.9146699834615858, "grad_norm": 1.3263318538665771, "learning_rate": 4.035739438983144e-07, "loss": 1.4882, "step": 16730 }, { "epoch": 0.9147246559053074, "grad_norm": 1.3852243423461914, "learning_rate": 4.030602102204284e-07, "loss": 1.491, "step": 16731 }, { "epoch": 0.9147793283490289, "grad_norm": 1.8205897808074951, "learning_rate": 4.0254679700688414e-07, "loss": 1.1227, "step": 16732 }, { "epoch": 0.9148340007927505, "grad_norm": 1.3836643695831299, "learning_rate": 4.020337042748224e-07, "loss": 1.5162, "step": 16733 }, { "epoch": 0.9148886732364719, "grad_norm": 1.279140830039978, "learning_rate": 4.015209320413782e-07, "loss": 1.4302, "step": 16734 }, { "epoch": 0.9149433456801935, "grad_norm": 1.611670970916748, "learning_rate": 4.010084803236736e-07, "loss": 1.4251, "step": 16735 }, { "epoch": 0.9149980181239151, "grad_norm": 1.7428711652755737, "learning_rate": 4.004963491388203e-07, "loss": 1.3821, "step": 16736 }, { "epoch": 0.9150526905676366, "grad_norm": 1.9813381433486938, "learning_rate": 3.9998453850391807e-07, "loss": 1.5461, "step": 16737 }, { "epoch": 0.9151073630113582, "grad_norm": 1.3858249187469482, "learning_rate": 3.9947304843606093e-07, "loss": 1.3344, "step": 16738 }, { "epoch": 0.9151620354550798, "grad_norm": 1.464697241783142, "learning_rate": 3.989618789523275e-07, "loss": 1.4962, "step": 16739 }, { "epoch": 0.9152167078988013, "grad_norm": 2.8413233757019043, "learning_rate": 3.9845103006978525e-07, "loss": 1.1777, "step": 16740 }, { "epoch": 0.9152713803425229, "grad_norm": 1.7882808446884155, "learning_rate": 3.979405018054949e-07, "loss": 1.4093, "step": 16741 }, { "epoch": 0.9153260527862445, "grad_norm": 1.6580075025558472, "learning_rate": 3.974302941765007e-07, "loss": 1.4101, "step": 16742 }, { "epoch": 0.9153807252299659, "grad_norm": 1.4094822406768799, "learning_rate": 3.969204071998445e-07, "loss": 1.6207, "step": 16743 }, { "epoch": 0.9154353976736875, "grad_norm": 1.6308536529541016, "learning_rate": 3.9641084089255045e-07, "loss": 1.3349, "step": 16744 }, { "epoch": 0.9154900701174091, "grad_norm": 1.3754209280014038, "learning_rate": 3.959015952716327e-07, "loss": 1.3726, "step": 16745 }, { "epoch": 0.9155447425611306, "grad_norm": 1.6124838590621948, "learning_rate": 3.953926703540978e-07, "loss": 1.4442, "step": 16746 }, { "epoch": 0.9155994150048522, "grad_norm": 1.4231599569320679, "learning_rate": 3.9488406615694084e-07, "loss": 2.0504, "step": 16747 }, { "epoch": 0.9156540874485737, "grad_norm": 1.4599785804748535, "learning_rate": 3.943757826971428e-07, "loss": 1.4183, "step": 16748 }, { "epoch": 0.9157087598922953, "grad_norm": 1.949154019355774, "learning_rate": 3.938678199916779e-07, "loss": 1.5024, "step": 16749 }, { "epoch": 0.9157634323360169, "grad_norm": 1.3372658491134644, "learning_rate": 3.93360178057508e-07, "loss": 1.2999, "step": 16750 }, { "epoch": 0.9158181047797384, "grad_norm": 1.417833685874939, "learning_rate": 3.928528569115841e-07, "loss": 1.3914, "step": 16751 }, { "epoch": 0.9158727772234599, "grad_norm": 1.929021954536438, "learning_rate": 3.923458565708471e-07, "loss": 1.4168, "step": 16752 }, { "epoch": 0.9159274496671815, "grad_norm": 1.239367127418518, "learning_rate": 3.9183917705222787e-07, "loss": 1.5142, "step": 16753 }, { "epoch": 0.915982122110903, "grad_norm": 1.5949889421463013, "learning_rate": 3.9133281837264394e-07, "loss": 1.1955, "step": 16754 }, { "epoch": 0.9160367945546246, "grad_norm": 1.2307580709457397, "learning_rate": 3.908267805490051e-07, "loss": 1.4982, "step": 16755 }, { "epoch": 0.9160914669983462, "grad_norm": 2.4329261779785156, "learning_rate": 3.903210635982091e-07, "loss": 1.5675, "step": 16756 }, { "epoch": 0.9161461394420677, "grad_norm": 1.4354161024093628, "learning_rate": 3.8981566753714116e-07, "loss": 1.5197, "step": 16757 }, { "epoch": 0.9162008118857893, "grad_norm": 1.7462834119796753, "learning_rate": 3.893105923826801e-07, "loss": 1.371, "step": 16758 }, { "epoch": 0.9162554843295109, "grad_norm": 1.6267688274383545, "learning_rate": 3.8880583815169014e-07, "loss": 1.1685, "step": 16759 }, { "epoch": 0.9163101567732324, "grad_norm": 1.351618766784668, "learning_rate": 3.8830140486102785e-07, "loss": 1.5551, "step": 16760 }, { "epoch": 0.916364829216954, "grad_norm": 1.4808480739593506, "learning_rate": 3.877972925275353e-07, "loss": 1.5942, "step": 16761 }, { "epoch": 0.9164195016606754, "grad_norm": 1.3924177885055542, "learning_rate": 3.8729350116804564e-07, "loss": 1.4498, "step": 16762 }, { "epoch": 0.916474174104397, "grad_norm": 1.4081202745437622, "learning_rate": 3.8679003079938437e-07, "loss": 1.2869, "step": 16763 }, { "epoch": 0.9165288465481186, "grad_norm": 1.5672627687454224, "learning_rate": 3.8628688143836244e-07, "loss": 1.4561, "step": 16764 }, { "epoch": 0.9165835189918401, "grad_norm": 1.800162434577942, "learning_rate": 3.857840531017798e-07, "loss": 1.6115, "step": 16765 }, { "epoch": 0.9166381914355617, "grad_norm": 1.6795474290847778, "learning_rate": 3.8528154580642853e-07, "loss": 1.5606, "step": 16766 }, { "epoch": 0.9166928638792833, "grad_norm": 1.4389642477035522, "learning_rate": 3.847793595690885e-07, "loss": 1.3198, "step": 16767 }, { "epoch": 0.9167475363230048, "grad_norm": 1.5722671747207642, "learning_rate": 3.842774944065264e-07, "loss": 1.3898, "step": 16768 }, { "epoch": 0.9168022087667264, "grad_norm": 1.6002815961837769, "learning_rate": 3.837759503355054e-07, "loss": 1.5577, "step": 16769 }, { "epoch": 0.916856881210448, "grad_norm": 1.3872145414352417, "learning_rate": 3.8327472737276995e-07, "loss": 1.3955, "step": 16770 }, { "epoch": 0.9169115536541694, "grad_norm": 1.8385080099105835, "learning_rate": 3.827738255350555e-07, "loss": 1.2926, "step": 16771 }, { "epoch": 0.916966226097891, "grad_norm": 1.4440723657608032, "learning_rate": 3.8227324483909313e-07, "loss": 1.5533, "step": 16772 }, { "epoch": 0.9170208985416126, "grad_norm": 1.779536485671997, "learning_rate": 3.81772985301595e-07, "loss": 1.4673, "step": 16773 }, { "epoch": 0.9170755709853341, "grad_norm": 1.4387468099594116, "learning_rate": 3.812730469392678e-07, "loss": 1.3842, "step": 16774 }, { "epoch": 0.9171302434290557, "grad_norm": 1.4279903173446655, "learning_rate": 3.807734297688037e-07, "loss": 1.4544, "step": 16775 }, { "epoch": 0.9171849158727772, "grad_norm": 1.4592766761779785, "learning_rate": 3.8027413380688603e-07, "loss": 1.2892, "step": 16776 }, { "epoch": 0.9172395883164988, "grad_norm": 1.6359398365020752, "learning_rate": 3.7977515907018927e-07, "loss": 1.4773, "step": 16777 }, { "epoch": 0.9172942607602204, "grad_norm": 1.4575684070587158, "learning_rate": 3.792765055753755e-07, "loss": 1.3296, "step": 16778 }, { "epoch": 0.9173489332039418, "grad_norm": 1.8398922681808472, "learning_rate": 3.7877817333909275e-07, "loss": 1.2186, "step": 16779 }, { "epoch": 0.9174036056476634, "grad_norm": 1.7891590595245361, "learning_rate": 3.7828016237798525e-07, "loss": 1.4945, "step": 16780 }, { "epoch": 0.917458278091385, "grad_norm": 1.5324571132659912, "learning_rate": 3.77782472708681e-07, "loss": 1.4066, "step": 16781 }, { "epoch": 0.9175129505351065, "grad_norm": 1.7294954061508179, "learning_rate": 3.7728510434779876e-07, "loss": 1.3835, "step": 16782 }, { "epoch": 0.9175676229788281, "grad_norm": 1.3306232690811157, "learning_rate": 3.7678805731194754e-07, "loss": 1.49, "step": 16783 }, { "epoch": 0.9176222954225497, "grad_norm": 1.7061375379562378, "learning_rate": 3.7629133161772525e-07, "loss": 1.3866, "step": 16784 }, { "epoch": 0.9176769678662712, "grad_norm": 1.4625885486602783, "learning_rate": 3.757949272817174e-07, "loss": 1.5357, "step": 16785 }, { "epoch": 0.9177316403099928, "grad_norm": 1.61588716506958, "learning_rate": 3.7529884432050077e-07, "loss": 1.4608, "step": 16786 }, { "epoch": 0.9177863127537144, "grad_norm": 1.2651762962341309, "learning_rate": 3.7480308275064214e-07, "loss": 1.5546, "step": 16787 }, { "epoch": 0.9178409851974358, "grad_norm": 1.4331305027008057, "learning_rate": 3.7430764258869377e-07, "loss": 1.2619, "step": 16788 }, { "epoch": 0.9178956576411574, "grad_norm": 1.4656398296356201, "learning_rate": 3.738125238512014e-07, "loss": 1.4326, "step": 16789 }, { "epoch": 0.9179503300848789, "grad_norm": 1.503548502922058, "learning_rate": 3.73317726554695e-07, "loss": 1.3575, "step": 16790 }, { "epoch": 0.9180050025286005, "grad_norm": 1.556815266609192, "learning_rate": 3.7282325071570145e-07, "loss": 1.5444, "step": 16791 }, { "epoch": 0.9180596749723221, "grad_norm": 1.6569757461547852, "learning_rate": 3.723290963507309e-07, "loss": 1.3433, "step": 16792 }, { "epoch": 0.9181143474160436, "grad_norm": 1.1221833229064941, "learning_rate": 3.718352634762823e-07, "loss": 1.5682, "step": 16793 }, { "epoch": 0.9181690198597652, "grad_norm": 1.5947823524475098, "learning_rate": 3.7134175210884807e-07, "loss": 1.3364, "step": 16794 }, { "epoch": 0.9182236923034868, "grad_norm": 1.675612211227417, "learning_rate": 3.7084856226490716e-07, "loss": 1.1586, "step": 16795 }, { "epoch": 0.9182783647472083, "grad_norm": 1.7574479579925537, "learning_rate": 3.7035569396092763e-07, "loss": 1.562, "step": 16796 }, { "epoch": 0.9183330371909298, "grad_norm": 1.5785620212554932, "learning_rate": 3.698631472133696e-07, "loss": 1.1883, "step": 16797 }, { "epoch": 0.9183877096346514, "grad_norm": 1.6323288679122925, "learning_rate": 3.6937092203867874e-07, "loss": 1.3298, "step": 16798 }, { "epoch": 0.9184423820783729, "grad_norm": 1.2045701742172241, "learning_rate": 3.688790184532909e-07, "loss": 1.4788, "step": 16799 }, { "epoch": 0.9184970545220945, "grad_norm": 2.8317131996154785, "learning_rate": 3.68387436473634e-07, "loss": 1.4055, "step": 16800 }, { "epoch": 0.9185517269658161, "grad_norm": 1.3855788707733154, "learning_rate": 3.678961761161193e-07, "loss": 1.3925, "step": 16801 }, { "epoch": 0.9186063994095376, "grad_norm": 1.548475980758667, "learning_rate": 3.674052373971559e-07, "loss": 1.506, "step": 16802 }, { "epoch": 0.9186610718532592, "grad_norm": 1.4442479610443115, "learning_rate": 3.669146203331353e-07, "loss": 1.5991, "step": 16803 }, { "epoch": 0.9187157442969807, "grad_norm": 1.2680447101593018, "learning_rate": 3.664243249404387e-07, "loss": 1.6272, "step": 16804 }, { "epoch": 0.9187704167407023, "grad_norm": 1.5998296737670898, "learning_rate": 3.6593435123544075e-07, "loss": 1.414, "step": 16805 }, { "epoch": 0.9188250891844238, "grad_norm": 1.403468132019043, "learning_rate": 3.654446992345018e-07, "loss": 1.3209, "step": 16806 }, { "epoch": 0.9188797616281453, "grad_norm": 1.2631754875183105, "learning_rate": 3.64955368953972e-07, "loss": 1.5156, "step": 16807 }, { "epoch": 0.9189344340718669, "grad_norm": 1.292921781539917, "learning_rate": 3.6446636041019276e-07, "loss": 1.2839, "step": 16808 }, { "epoch": 0.9189891065155885, "grad_norm": 1.3539974689483643, "learning_rate": 3.639776736194922e-07, "loss": 1.4542, "step": 16809 }, { "epoch": 0.91904377895931, "grad_norm": 1.2711155414581299, "learning_rate": 3.634893085981872e-07, "loss": 1.4358, "step": 16810 }, { "epoch": 0.9190984514030316, "grad_norm": 1.6785253286361694, "learning_rate": 3.6300126536258806e-07, "loss": 1.2167, "step": 16811 }, { "epoch": 0.9191531238467532, "grad_norm": 1.6256407499313354, "learning_rate": 3.625135439289917e-07, "loss": 1.4102, "step": 16812 }, { "epoch": 0.9192077962904747, "grad_norm": 1.319028615951538, "learning_rate": 3.620261443136819e-07, "loss": 1.5518, "step": 16813 }, { "epoch": 0.9192624687341963, "grad_norm": 1.6398403644561768, "learning_rate": 3.6153906653293544e-07, "loss": 1.2638, "step": 16814 }, { "epoch": 0.9193171411779179, "grad_norm": 1.476245641708374, "learning_rate": 3.6105231060301613e-07, "loss": 1.2048, "step": 16815 }, { "epoch": 0.9193718136216393, "grad_norm": 1.4920986890792847, "learning_rate": 3.6056587654018094e-07, "loss": 1.291, "step": 16816 }, { "epoch": 0.9194264860653609, "grad_norm": 1.8387541770935059, "learning_rate": 3.6007976436066903e-07, "loss": 1.6266, "step": 16817 }, { "epoch": 0.9194811585090824, "grad_norm": 1.4676846265792847, "learning_rate": 3.5959397408071416e-07, "loss": 1.3869, "step": 16818 }, { "epoch": 0.919535830952804, "grad_norm": 1.7143198251724243, "learning_rate": 3.5910850571653997e-07, "loss": 1.436, "step": 16819 }, { "epoch": 0.9195905033965256, "grad_norm": 2.450364112854004, "learning_rate": 3.5862335928435465e-07, "loss": 1.5215, "step": 16820 }, { "epoch": 0.9196451758402471, "grad_norm": 1.238338589668274, "learning_rate": 3.5813853480035966e-07, "loss": 1.3204, "step": 16821 }, { "epoch": 0.9196998482839687, "grad_norm": 1.1649073362350464, "learning_rate": 3.5765403228074424e-07, "loss": 1.399, "step": 16822 }, { "epoch": 0.9197545207276903, "grad_norm": 1.5197997093200684, "learning_rate": 3.5716985174168884e-07, "loss": 1.3485, "step": 16823 }, { "epoch": 0.9198091931714117, "grad_norm": 1.4583913087844849, "learning_rate": 3.5668599319935717e-07, "loss": 1.3068, "step": 16824 }, { "epoch": 0.9198638656151333, "grad_norm": 1.438517689704895, "learning_rate": 3.562024566699107e-07, "loss": 1.457, "step": 16825 }, { "epoch": 0.9199185380588549, "grad_norm": 1.6780035495758057, "learning_rate": 3.5571924216949327e-07, "loss": 1.0908, "step": 16826 }, { "epoch": 0.9199732105025764, "grad_norm": 1.7881152629852295, "learning_rate": 3.5523634971424194e-07, "loss": 1.7568, "step": 16827 }, { "epoch": 0.920027882946298, "grad_norm": 1.3247047662734985, "learning_rate": 3.5475377932028155e-07, "loss": 1.5485, "step": 16828 }, { "epoch": 0.9200825553900196, "grad_norm": 1.5566704273223877, "learning_rate": 3.5427153100372367e-07, "loss": 1.3109, "step": 16829 }, { "epoch": 0.9201372278337411, "grad_norm": 1.4333516359329224, "learning_rate": 3.5378960478067547e-07, "loss": 1.5633, "step": 16830 }, { "epoch": 0.9201919002774627, "grad_norm": 1.5787222385406494, "learning_rate": 3.533080006672285e-07, "loss": 1.2887, "step": 16831 }, { "epoch": 0.9202465727211842, "grad_norm": 1.3268331289291382, "learning_rate": 3.5282671867946206e-07, "loss": 1.2608, "step": 16832 }, { "epoch": 0.9203012451649057, "grad_norm": 1.6852943897247314, "learning_rate": 3.5234575883345e-07, "loss": 1.3687, "step": 16833 }, { "epoch": 0.9203559176086273, "grad_norm": 1.591079592704773, "learning_rate": 3.5186512114525283e-07, "loss": 1.7244, "step": 16834 }, { "epoch": 0.9204105900523488, "grad_norm": 1.2496124505996704, "learning_rate": 3.513848056309177e-07, "loss": 1.3777, "step": 16835 }, { "epoch": 0.9204652624960704, "grad_norm": 1.412105917930603, "learning_rate": 3.509048123064862e-07, "loss": 1.5207, "step": 16836 }, { "epoch": 0.920519934939792, "grad_norm": 1.2466669082641602, "learning_rate": 3.504251411879855e-07, "loss": 1.4932, "step": 16837 }, { "epoch": 0.9205746073835135, "grad_norm": 1.7188669443130493, "learning_rate": 3.4994579229143287e-07, "loss": 1.4144, "step": 16838 }, { "epoch": 0.9206292798272351, "grad_norm": 1.4461796283721924, "learning_rate": 3.4946676563283545e-07, "loss": 1.4347, "step": 16839 }, { "epoch": 0.9206839522709567, "grad_norm": 1.698000431060791, "learning_rate": 3.489880612281871e-07, "loss": 1.4619, "step": 16840 }, { "epoch": 0.9207386247146782, "grad_norm": 2.5066020488739014, "learning_rate": 3.485096790934739e-07, "loss": 1.306, "step": 16841 }, { "epoch": 0.9207932971583997, "grad_norm": 1.8854286670684814, "learning_rate": 3.48031619244672e-07, "loss": 1.3542, "step": 16842 }, { "epoch": 0.9208479696021213, "grad_norm": 1.923026442527771, "learning_rate": 3.4755388169774086e-07, "loss": 1.3215, "step": 16843 }, { "epoch": 0.9209026420458428, "grad_norm": 1.6762691736221313, "learning_rate": 3.470764664686377e-07, "loss": 1.6794, "step": 16844 }, { "epoch": 0.9209573144895644, "grad_norm": 1.722036600112915, "learning_rate": 3.465993735733031e-07, "loss": 1.3783, "step": 16845 }, { "epoch": 0.9210119869332859, "grad_norm": 1.2402960062026978, "learning_rate": 3.4612260302766653e-07, "loss": 1.4439, "step": 16846 }, { "epoch": 0.9210666593770075, "grad_norm": 1.5897390842437744, "learning_rate": 3.4564615484764975e-07, "loss": 1.1207, "step": 16847 }, { "epoch": 0.9211213318207291, "grad_norm": 2.063009262084961, "learning_rate": 3.451700290491633e-07, "loss": 1.4063, "step": 16848 }, { "epoch": 0.9211760042644506, "grad_norm": 1.4636058807373047, "learning_rate": 3.4469422564810453e-07, "loss": 1.2934, "step": 16849 }, { "epoch": 0.9212306767081722, "grad_norm": 1.8042947053909302, "learning_rate": 3.4421874466036286e-07, "loss": 1.3037, "step": 16850 }, { "epoch": 0.9212853491518938, "grad_norm": 1.2914550304412842, "learning_rate": 3.437435861018168e-07, "loss": 1.5896, "step": 16851 }, { "epoch": 0.9213400215956152, "grad_norm": 1.4506839513778687, "learning_rate": 3.4326874998833026e-07, "loss": 1.4233, "step": 16852 }, { "epoch": 0.9213946940393368, "grad_norm": 1.502768874168396, "learning_rate": 3.427942363357606e-07, "loss": 1.5079, "step": 16853 }, { "epoch": 0.9214493664830584, "grad_norm": 1.515377402305603, "learning_rate": 3.4232004515995287e-07, "loss": 1.1489, "step": 16854 }, { "epoch": 0.9215040389267799, "grad_norm": 1.2813153266906738, "learning_rate": 3.4184617647674e-07, "loss": 1.5206, "step": 16855 }, { "epoch": 0.9215587113705015, "grad_norm": 1.5012931823730469, "learning_rate": 3.4137263030194713e-07, "loss": 1.3802, "step": 16856 }, { "epoch": 0.9216133838142231, "grad_norm": 1.7664721012115479, "learning_rate": 3.408994066513871e-07, "loss": 1.3454, "step": 16857 }, { "epoch": 0.9216680562579446, "grad_norm": 1.4547200202941895, "learning_rate": 3.404265055408618e-07, "loss": 1.4836, "step": 16858 }, { "epoch": 0.9217227287016662, "grad_norm": 1.434249758720398, "learning_rate": 3.399539269861629e-07, "loss": 1.3442, "step": 16859 }, { "epoch": 0.9217774011453876, "grad_norm": 1.6652153730392456, "learning_rate": 3.3948167100306906e-07, "loss": 1.3334, "step": 16860 }, { "epoch": 0.9218320735891092, "grad_norm": 1.5735293626785278, "learning_rate": 3.3900973760735313e-07, "loss": 1.3761, "step": 16861 }, { "epoch": 0.9218867460328308, "grad_norm": 1.2424180507659912, "learning_rate": 3.3853812681477136e-07, "loss": 1.5973, "step": 16862 }, { "epoch": 0.9219414184765523, "grad_norm": 1.6874204874038696, "learning_rate": 3.3806683864107347e-07, "loss": 1.2702, "step": 16863 }, { "epoch": 0.9219960909202739, "grad_norm": 1.452156662940979, "learning_rate": 3.375958731019957e-07, "loss": 1.261, "step": 16864 }, { "epoch": 0.9220507633639955, "grad_norm": 1.6145622730255127, "learning_rate": 3.371252302132666e-07, "loss": 1.4864, "step": 16865 }, { "epoch": 0.922105435807717, "grad_norm": 1.8465993404388428, "learning_rate": 3.36654909990598e-07, "loss": 1.2816, "step": 16866 }, { "epoch": 0.9221601082514386, "grad_norm": 1.707112193107605, "learning_rate": 3.3618491244969965e-07, "loss": 1.4815, "step": 16867 }, { "epoch": 0.9222147806951602, "grad_norm": 1.542455792427063, "learning_rate": 3.3571523760626333e-07, "loss": 1.4984, "step": 16868 }, { "epoch": 0.9222694531388816, "grad_norm": 1.513833999633789, "learning_rate": 3.3524588547597327e-07, "loss": 1.2451, "step": 16869 }, { "epoch": 0.9223241255826032, "grad_norm": 1.592275619506836, "learning_rate": 3.347768560745024e-07, "loss": 1.256, "step": 16870 }, { "epoch": 0.9223787980263248, "grad_norm": 2.4459307193756104, "learning_rate": 3.3430814941751153e-07, "loss": 1.4587, "step": 16871 }, { "epoch": 0.9224334704700463, "grad_norm": 1.5023242235183716, "learning_rate": 3.3383976552065376e-07, "loss": 1.4693, "step": 16872 }, { "epoch": 0.9224881429137679, "grad_norm": 1.6811027526855469, "learning_rate": 3.333717043995677e-07, "loss": 1.2212, "step": 16873 }, { "epoch": 0.9225428153574894, "grad_norm": 1.7036731243133545, "learning_rate": 3.3290396606988404e-07, "loss": 1.4042, "step": 16874 }, { "epoch": 0.922597487801211, "grad_norm": 1.340524673461914, "learning_rate": 3.324365505472227e-07, "loss": 1.3878, "step": 16875 }, { "epoch": 0.9226521602449326, "grad_norm": 1.2330667972564697, "learning_rate": 3.3196945784718993e-07, "loss": 1.7087, "step": 16876 }, { "epoch": 0.922706832688654, "grad_norm": 1.5121718645095825, "learning_rate": 3.315026879853833e-07, "loss": 1.588, "step": 16877 }, { "epoch": 0.9227615051323756, "grad_norm": 1.8566299676895142, "learning_rate": 3.310362409773904e-07, "loss": 1.2381, "step": 16878 }, { "epoch": 0.9228161775760972, "grad_norm": 1.6528979539871216, "learning_rate": 3.3057011683878647e-07, "loss": 1.4471, "step": 16879 }, { "epoch": 0.9228708500198187, "grad_norm": 1.4208111763000488, "learning_rate": 3.3010431558513577e-07, "loss": 1.3843, "step": 16880 }, { "epoch": 0.9229255224635403, "grad_norm": 1.4950848817825317, "learning_rate": 3.2963883723199364e-07, "loss": 1.2765, "step": 16881 }, { "epoch": 0.9229801949072619, "grad_norm": 1.555299997329712, "learning_rate": 3.291736817949021e-07, "loss": 1.4005, "step": 16882 }, { "epoch": 0.9230348673509834, "grad_norm": 1.548776626586914, "learning_rate": 3.287088492893942e-07, "loss": 1.3692, "step": 16883 }, { "epoch": 0.923089539794705, "grad_norm": 1.5874587297439575, "learning_rate": 3.2824433973099425e-07, "loss": 1.5053, "step": 16884 }, { "epoch": 0.9231442122384266, "grad_norm": 1.668285608291626, "learning_rate": 3.277801531352087e-07, "loss": 1.6056, "step": 16885 }, { "epoch": 0.9231988846821481, "grad_norm": 1.6495513916015625, "learning_rate": 3.2731628951754193e-07, "loss": 1.4493, "step": 16886 }, { "epoch": 0.9232535571258697, "grad_norm": 1.6276451349258423, "learning_rate": 3.268527488934825e-07, "loss": 1.3382, "step": 16887 }, { "epoch": 0.9233082295695911, "grad_norm": 1.6029053926467896, "learning_rate": 3.26389531278507e-07, "loss": 1.4176, "step": 16888 }, { "epoch": 0.9233629020133127, "grad_norm": 1.8334386348724365, "learning_rate": 3.2592663668808645e-07, "loss": 1.1451, "step": 16889 }, { "epoch": 0.9234175744570343, "grad_norm": 1.5913270711898804, "learning_rate": 3.25464065137675e-07, "loss": 1.0999, "step": 16890 }, { "epoch": 0.9234722469007558, "grad_norm": 1.5018041133880615, "learning_rate": 3.250018166427216e-07, "loss": 1.3358, "step": 16891 }, { "epoch": 0.9235269193444774, "grad_norm": 1.404934287071228, "learning_rate": 3.245398912186604e-07, "loss": 1.3678, "step": 16892 }, { "epoch": 0.923581591788199, "grad_norm": 1.2248287200927734, "learning_rate": 3.2407828888091687e-07, "loss": 1.5407, "step": 16893 }, { "epoch": 0.9236362642319205, "grad_norm": 2.012948751449585, "learning_rate": 3.236170096449032e-07, "loss": 1.0204, "step": 16894 }, { "epoch": 0.9236909366756421, "grad_norm": 1.9313957691192627, "learning_rate": 3.2315605352602474e-07, "loss": 1.365, "step": 16895 }, { "epoch": 0.9237456091193637, "grad_norm": 1.6486830711364746, "learning_rate": 3.226954205396737e-07, "loss": 1.4474, "step": 16896 }, { "epoch": 0.9238002815630851, "grad_norm": 1.3501936197280884, "learning_rate": 3.2223511070122893e-07, "loss": 1.5891, "step": 16897 }, { "epoch": 0.9238549540068067, "grad_norm": 1.3942595720291138, "learning_rate": 3.217751240260647e-07, "loss": 1.4746, "step": 16898 }, { "epoch": 0.9239096264505283, "grad_norm": 1.2266839742660522, "learning_rate": 3.2131546052953987e-07, "loss": 1.2862, "step": 16899 }, { "epoch": 0.9239642988942498, "grad_norm": 1.3801283836364746, "learning_rate": 3.208561202270033e-07, "loss": 1.489, "step": 16900 }, { "epoch": 0.9240189713379714, "grad_norm": 1.6108325719833374, "learning_rate": 3.203971031337938e-07, "loss": 1.3744, "step": 16901 }, { "epoch": 0.9240736437816929, "grad_norm": 1.6214022636413574, "learning_rate": 3.199384092652402e-07, "loss": 1.3353, "step": 16902 }, { "epoch": 0.9241283162254145, "grad_norm": 1.8491846323013306, "learning_rate": 3.19480038636657e-07, "loss": 1.358, "step": 16903 }, { "epoch": 0.9241829886691361, "grad_norm": 1.3584930896759033, "learning_rate": 3.190219912633519e-07, "loss": 1.3439, "step": 16904 }, { "epoch": 0.9242376611128575, "grad_norm": 1.2850857973098755, "learning_rate": 3.185642671606182e-07, "loss": 1.415, "step": 16905 }, { "epoch": 0.9242923335565791, "grad_norm": 1.430030345916748, "learning_rate": 3.1810686634374253e-07, "loss": 1.4695, "step": 16906 }, { "epoch": 0.9243470060003007, "grad_norm": 2.1084413528442383, "learning_rate": 3.1764978882799833e-07, "loss": 1.3724, "step": 16907 }, { "epoch": 0.9244016784440222, "grad_norm": 1.7133159637451172, "learning_rate": 3.171930346286467e-07, "loss": 1.6223, "step": 16908 }, { "epoch": 0.9244563508877438, "grad_norm": 1.3895219564437866, "learning_rate": 3.167366037609421e-07, "loss": 1.5165, "step": 16909 }, { "epoch": 0.9245110233314654, "grad_norm": 1.8962616920471191, "learning_rate": 3.162804962401256e-07, "loss": 1.4035, "step": 16910 }, { "epoch": 0.9245656957751869, "grad_norm": 2.4160773754119873, "learning_rate": 3.158247120814251e-07, "loss": 1.1726, "step": 16911 }, { "epoch": 0.9246203682189085, "grad_norm": 1.486120581626892, "learning_rate": 3.153692513000628e-07, "loss": 1.0713, "step": 16912 }, { "epoch": 0.9246750406626301, "grad_norm": 1.4646869897842407, "learning_rate": 3.149141139112466e-07, "loss": 1.1132, "step": 16913 }, { "epoch": 0.9247297131063515, "grad_norm": 1.8036775588989258, "learning_rate": 3.1445929993017545e-07, "loss": 1.4755, "step": 16914 }, { "epoch": 0.9247843855500731, "grad_norm": 1.4337536096572876, "learning_rate": 3.1400480937203604e-07, "loss": 1.2713, "step": 16915 }, { "epoch": 0.9248390579937947, "grad_norm": 1.519912600517273, "learning_rate": 3.1355064225200516e-07, "loss": 1.4007, "step": 16916 }, { "epoch": 0.9248937304375162, "grad_norm": 1.5284128189086914, "learning_rate": 3.1309679858524846e-07, "loss": 1.368, "step": 16917 }, { "epoch": 0.9249484028812378, "grad_norm": 1.8831695318222046, "learning_rate": 3.1264327838692153e-07, "loss": 1.3751, "step": 16918 }, { "epoch": 0.9250030753249593, "grad_norm": 1.3022352457046509, "learning_rate": 3.121900816721646e-07, "loss": 1.5598, "step": 16919 }, { "epoch": 0.9250577477686809, "grad_norm": 1.9823516607284546, "learning_rate": 3.1173720845611654e-07, "loss": 1.4773, "step": 16920 }, { "epoch": 0.9251124202124025, "grad_norm": 1.548496127128601, "learning_rate": 3.1128465875389646e-07, "loss": 1.5189, "step": 16921 }, { "epoch": 0.925167092656124, "grad_norm": 1.564959168434143, "learning_rate": 3.108324325806167e-07, "loss": 1.3104, "step": 16922 }, { "epoch": 0.9252217650998456, "grad_norm": 1.4961097240447998, "learning_rate": 3.103805299513796e-07, "loss": 1.4937, "step": 16923 }, { "epoch": 0.9252764375435671, "grad_norm": 1.5476223230361938, "learning_rate": 3.0992895088127306e-07, "loss": 1.1865, "step": 16924 }, { "epoch": 0.9253311099872886, "grad_norm": 1.5125662088394165, "learning_rate": 3.094776953853762e-07, "loss": 1.2973, "step": 16925 }, { "epoch": 0.9253857824310102, "grad_norm": 1.3563451766967773, "learning_rate": 3.0902676347876025e-07, "loss": 1.4409, "step": 16926 }, { "epoch": 0.9254404548747318, "grad_norm": 1.845848798751831, "learning_rate": 3.085761551764799e-07, "loss": 1.5399, "step": 16927 }, { "epoch": 0.9254951273184533, "grad_norm": 1.5727819204330444, "learning_rate": 3.081258704935841e-07, "loss": 1.2708, "step": 16928 }, { "epoch": 0.9255497997621749, "grad_norm": 1.7589880228042603, "learning_rate": 3.076759094451087e-07, "loss": 1.4318, "step": 16929 }, { "epoch": 0.9256044722058965, "grad_norm": 2.104557514190674, "learning_rate": 3.0722627204607834e-07, "loss": 1.3973, "step": 16930 }, { "epoch": 0.925659144649618, "grad_norm": 1.4581443071365356, "learning_rate": 3.0677695831150767e-07, "loss": 1.3554, "step": 16931 }, { "epoch": 0.9257138170933396, "grad_norm": 1.746952772140503, "learning_rate": 3.063279682564002e-07, "loss": 1.3427, "step": 16932 }, { "epoch": 0.925768489537061, "grad_norm": 1.6697747707366943, "learning_rate": 3.0587930189574734e-07, "loss": 1.644, "step": 16933 }, { "epoch": 0.9258231619807826, "grad_norm": 1.3381712436676025, "learning_rate": 3.054309592445348e-07, "loss": 1.3594, "step": 16934 }, { "epoch": 0.9258778344245042, "grad_norm": 1.4787322282791138, "learning_rate": 3.049829403177307e-07, "loss": 1.4016, "step": 16935 }, { "epoch": 0.9259325068682257, "grad_norm": 1.5860545635223389, "learning_rate": 3.045352451302952e-07, "loss": 1.498, "step": 16936 }, { "epoch": 0.9259871793119473, "grad_norm": 1.274759292602539, "learning_rate": 3.040878736971797e-07, "loss": 1.7793, "step": 16937 }, { "epoch": 0.9260418517556689, "grad_norm": 1.2990806102752686, "learning_rate": 3.0364082603332235e-07, "loss": 1.5344, "step": 16938 }, { "epoch": 0.9260965241993904, "grad_norm": 1.3845744132995605, "learning_rate": 3.0319410215365e-07, "loss": 1.4852, "step": 16939 }, { "epoch": 0.926151196643112, "grad_norm": 2.1491940021514893, "learning_rate": 3.027477020730829e-07, "loss": 1.1849, "step": 16940 }, { "epoch": 0.9262058690868336, "grad_norm": 1.8074712753295898, "learning_rate": 3.0230162580652367e-07, "loss": 1.3145, "step": 16941 }, { "epoch": 0.926260541530555, "grad_norm": 1.561821460723877, "learning_rate": 3.0185587336887034e-07, "loss": 1.5892, "step": 16942 }, { "epoch": 0.9263152139742766, "grad_norm": 1.421929955482483, "learning_rate": 3.014104447750077e-07, "loss": 1.6253, "step": 16943 }, { "epoch": 0.9263698864179982, "grad_norm": 1.4144147634506226, "learning_rate": 3.0096534003980606e-07, "loss": 1.3928, "step": 16944 }, { "epoch": 0.9264245588617197, "grad_norm": 1.7236312627792358, "learning_rate": 3.005205591781335e-07, "loss": 1.3194, "step": 16945 }, { "epoch": 0.9264792313054413, "grad_norm": 1.5407288074493408, "learning_rate": 3.0007610220483927e-07, "loss": 1.329, "step": 16946 }, { "epoch": 0.9265339037491628, "grad_norm": 1.2289642095565796, "learning_rate": 2.996319691347649e-07, "loss": 1.3351, "step": 16947 }, { "epoch": 0.9265885761928844, "grad_norm": 1.7951041460037231, "learning_rate": 2.991881599827429e-07, "loss": 1.3841, "step": 16948 }, { "epoch": 0.926643248636606, "grad_norm": 1.4415132999420166, "learning_rate": 2.987446747635925e-07, "loss": 1.3296, "step": 16949 }, { "epoch": 0.9266979210803274, "grad_norm": 1.6476333141326904, "learning_rate": 2.983015134921197e-07, "loss": 1.4727, "step": 16950 }, { "epoch": 0.926752593524049, "grad_norm": 1.9029113054275513, "learning_rate": 2.9785867618312705e-07, "loss": 1.5415, "step": 16951 }, { "epoch": 0.9268072659677706, "grad_norm": 1.5371452569961548, "learning_rate": 2.9741616285139943e-07, "loss": 1.3753, "step": 16952 }, { "epoch": 0.9268619384114921, "grad_norm": 1.5439085960388184, "learning_rate": 2.969739735117128e-07, "loss": 1.3723, "step": 16953 }, { "epoch": 0.9269166108552137, "grad_norm": 1.4370830059051514, "learning_rate": 2.965321081788364e-07, "loss": 1.3478, "step": 16954 }, { "epoch": 0.9269712832989353, "grad_norm": 2.3432323932647705, "learning_rate": 2.960905668675218e-07, "loss": 1.2782, "step": 16955 }, { "epoch": 0.9270259557426568, "grad_norm": 1.51012122631073, "learning_rate": 2.956493495925139e-07, "loss": 1.4365, "step": 16956 }, { "epoch": 0.9270806281863784, "grad_norm": 1.6960607767105103, "learning_rate": 2.9520845636854644e-07, "loss": 1.3769, "step": 16957 }, { "epoch": 0.9271353006301, "grad_norm": 1.661009669303894, "learning_rate": 2.94767887210341e-07, "loss": 1.5379, "step": 16958 }, { "epoch": 0.9271899730738214, "grad_norm": 1.3797279596328735, "learning_rate": 2.9432764213261025e-07, "loss": 1.389, "step": 16959 }, { "epoch": 0.927244645517543, "grad_norm": 1.3543403148651123, "learning_rate": 2.9388772115005457e-07, "loss": 1.452, "step": 16960 }, { "epoch": 0.9272993179612645, "grad_norm": 1.664858102798462, "learning_rate": 2.934481242773635e-07, "loss": 1.5068, "step": 16961 }, { "epoch": 0.9273539904049861, "grad_norm": 1.9647449254989624, "learning_rate": 2.930088515292173e-07, "loss": 1.5413, "step": 16962 }, { "epoch": 0.9274086628487077, "grad_norm": 1.8947803974151611, "learning_rate": 2.925699029202844e-07, "loss": 1.3354, "step": 16963 }, { "epoch": 0.9274633352924292, "grad_norm": 1.438117504119873, "learning_rate": 2.921312784652197e-07, "loss": 1.4095, "step": 16964 }, { "epoch": 0.9275180077361508, "grad_norm": 1.422637701034546, "learning_rate": 2.916929781786737e-07, "loss": 1.5532, "step": 16965 }, { "epoch": 0.9275726801798724, "grad_norm": 1.6991451978683472, "learning_rate": 2.912550020752791e-07, "loss": 1.1674, "step": 16966 }, { "epoch": 0.9276273526235939, "grad_norm": 1.5255964994430542, "learning_rate": 2.9081735016966205e-07, "loss": 1.2483, "step": 16967 }, { "epoch": 0.9276820250673155, "grad_norm": 1.398790717124939, "learning_rate": 2.9038002247643857e-07, "loss": 1.5246, "step": 16968 }, { "epoch": 0.927736697511037, "grad_norm": 1.5836360454559326, "learning_rate": 2.8994301901021035e-07, "loss": 1.6021, "step": 16969 }, { "epoch": 0.9277913699547585, "grad_norm": 1.4660249948501587, "learning_rate": 2.8950633978556907e-07, "loss": 1.3843, "step": 16970 }, { "epoch": 0.9278460423984801, "grad_norm": 2.71907114982605, "learning_rate": 2.8906998481709857e-07, "loss": 1.1704, "step": 16971 }, { "epoch": 0.9279007148422017, "grad_norm": 1.7414355278015137, "learning_rate": 2.886339541193672e-07, "loss": 1.3372, "step": 16972 }, { "epoch": 0.9279553872859232, "grad_norm": 1.5970592498779297, "learning_rate": 2.881982477069378e-07, "loss": 1.4452, "step": 16973 }, { "epoch": 0.9280100597296448, "grad_norm": 1.7047613859176636, "learning_rate": 2.877628655943576e-07, "loss": 1.3122, "step": 16974 }, { "epoch": 0.9280647321733663, "grad_norm": 1.6986467838287354, "learning_rate": 2.873278077961661e-07, "loss": 1.3335, "step": 16975 }, { "epoch": 0.9281194046170879, "grad_norm": 1.951292634010315, "learning_rate": 2.8689307432689053e-07, "loss": 1.6847, "step": 16976 }, { "epoch": 0.9281740770608095, "grad_norm": 1.528344750404358, "learning_rate": 2.8645866520104815e-07, "loss": 1.4603, "step": 16977 }, { "epoch": 0.9282287495045309, "grad_norm": 1.4819180965423584, "learning_rate": 2.8602458043314296e-07, "loss": 1.7224, "step": 16978 }, { "epoch": 0.9282834219482525, "grad_norm": 1.377677083015442, "learning_rate": 2.8559082003767334e-07, "loss": 1.351, "step": 16979 }, { "epoch": 0.9283380943919741, "grad_norm": 1.2723820209503174, "learning_rate": 2.851573840291211e-07, "loss": 1.5629, "step": 16980 }, { "epoch": 0.9283927668356956, "grad_norm": 1.4689654111862183, "learning_rate": 2.847242724219612e-07, "loss": 1.4012, "step": 16981 }, { "epoch": 0.9284474392794172, "grad_norm": 1.372783899307251, "learning_rate": 2.8429148523065443e-07, "loss": 1.5192, "step": 16982 }, { "epoch": 0.9285021117231388, "grad_norm": 1.6028703451156616, "learning_rate": 2.8385902246965357e-07, "loss": 1.2652, "step": 16983 }, { "epoch": 0.9285567841668603, "grad_norm": 1.647618055343628, "learning_rate": 2.834268841534005e-07, "loss": 1.4319, "step": 16984 }, { "epoch": 0.9286114566105819, "grad_norm": 2.4381103515625, "learning_rate": 2.8299507029632356e-07, "loss": 1.4272, "step": 16985 }, { "epoch": 0.9286661290543035, "grad_norm": 1.4371365308761597, "learning_rate": 2.825635809128424e-07, "loss": 1.5587, "step": 16986 }, { "epoch": 0.9287208014980249, "grad_norm": 1.6604527235031128, "learning_rate": 2.8213241601736775e-07, "loss": 1.4244, "step": 16987 }, { "epoch": 0.9287754739417465, "grad_norm": 1.6056216955184937, "learning_rate": 2.8170157562429466e-07, "loss": 1.5289, "step": 16988 }, { "epoch": 0.928830146385468, "grad_norm": 1.872426986694336, "learning_rate": 2.812710597480095e-07, "loss": 1.3651, "step": 16989 }, { "epoch": 0.9288848188291896, "grad_norm": 1.5982909202575684, "learning_rate": 2.8084086840289074e-07, "loss": 1.3624, "step": 16990 }, { "epoch": 0.9289394912729112, "grad_norm": 1.553562879562378, "learning_rate": 2.8041100160330127e-07, "loss": 1.341, "step": 16991 }, { "epoch": 0.9289941637166327, "grad_norm": 2.293686866760254, "learning_rate": 2.7998145936359635e-07, "loss": 1.4815, "step": 16992 }, { "epoch": 0.9290488361603543, "grad_norm": 1.5472825765609741, "learning_rate": 2.7955224169812e-07, "loss": 1.3599, "step": 16993 }, { "epoch": 0.9291035086040759, "grad_norm": 1.2663922309875488, "learning_rate": 2.7912334862120305e-07, "loss": 1.6582, "step": 16994 }, { "epoch": 0.9291581810477973, "grad_norm": 1.6402407884597778, "learning_rate": 2.7869478014716953e-07, "loss": 1.6203, "step": 16995 }, { "epoch": 0.9292128534915189, "grad_norm": 1.033169150352478, "learning_rate": 2.7826653629032806e-07, "loss": 1.6302, "step": 16996 }, { "epoch": 0.9292675259352405, "grad_norm": 1.8053597211837769, "learning_rate": 2.778386170649794e-07, "loss": 1.3398, "step": 16997 }, { "epoch": 0.929322198378962, "grad_norm": 1.3641623258590698, "learning_rate": 2.774110224854132e-07, "loss": 1.6941, "step": 16998 }, { "epoch": 0.9293768708226836, "grad_norm": 1.4183917045593262, "learning_rate": 2.7698375256590916e-07, "loss": 1.6108, "step": 16999 }, { "epoch": 0.9294315432664052, "grad_norm": 1.286077857017517, "learning_rate": 2.765568073207314e-07, "loss": 1.5758, "step": 17000 }, { "epoch": 0.9294862157101267, "grad_norm": 1.5850650072097778, "learning_rate": 2.761301867641397e-07, "loss": 1.5356, "step": 17001 }, { "epoch": 0.9295408881538483, "grad_norm": 1.1255924701690674, "learning_rate": 2.757038909103793e-07, "loss": 1.7326, "step": 17002 }, { "epoch": 0.9295955605975698, "grad_norm": 1.3410826921463013, "learning_rate": 2.752779197736832e-07, "loss": 1.1837, "step": 17003 }, { "epoch": 0.9296502330412914, "grad_norm": 1.832296371459961, "learning_rate": 2.748522733682779e-07, "loss": 1.2751, "step": 17004 }, { "epoch": 0.929704905485013, "grad_norm": 1.784128189086914, "learning_rate": 2.744269517083764e-07, "loss": 1.4565, "step": 17005 }, { "epoch": 0.9297595779287344, "grad_norm": 1.42039155960083, "learning_rate": 2.740019548081796e-07, "loss": 1.3721, "step": 17006 }, { "epoch": 0.929814250372456, "grad_norm": 1.3258800506591797, "learning_rate": 2.7357728268188167e-07, "loss": 1.5438, "step": 17007 }, { "epoch": 0.9298689228161776, "grad_norm": 1.9701472520828247, "learning_rate": 2.731529353436624e-07, "loss": 1.382, "step": 17008 }, { "epoch": 0.9299235952598991, "grad_norm": 1.581326961517334, "learning_rate": 2.7272891280769044e-07, "loss": 1.2055, "step": 17009 }, { "epoch": 0.9299782677036207, "grad_norm": 2.8358097076416016, "learning_rate": 2.7230521508812556e-07, "loss": 1.5938, "step": 17010 }, { "epoch": 0.9300329401473423, "grad_norm": 1.8140239715576172, "learning_rate": 2.718818421991165e-07, "loss": 1.5969, "step": 17011 }, { "epoch": 0.9300876125910638, "grad_norm": 1.7168338298797607, "learning_rate": 2.714587941548008e-07, "loss": 1.1279, "step": 17012 }, { "epoch": 0.9301422850347854, "grad_norm": 2.0492100715637207, "learning_rate": 2.7103607096930497e-07, "loss": 1.4005, "step": 17013 }, { "epoch": 0.930196957478507, "grad_norm": 1.4203009605407715, "learning_rate": 2.7061367265674323e-07, "loss": 1.4034, "step": 17014 }, { "epoch": 0.9302516299222284, "grad_norm": 1.2984009981155396, "learning_rate": 2.701915992312221e-07, "loss": 1.519, "step": 17015 }, { "epoch": 0.93030630236595, "grad_norm": 1.2292239665985107, "learning_rate": 2.697698507068358e-07, "loss": 1.5051, "step": 17016 }, { "epoch": 0.9303609748096715, "grad_norm": 1.670448660850525, "learning_rate": 2.693484270976665e-07, "loss": 1.4954, "step": 17017 }, { "epoch": 0.9304156472533931, "grad_norm": 1.4864835739135742, "learning_rate": 2.6892732841778736e-07, "loss": 1.3364, "step": 17018 }, { "epoch": 0.9304703196971147, "grad_norm": 2.0441153049468994, "learning_rate": 2.685065546812593e-07, "loss": 1.6322, "step": 17019 }, { "epoch": 0.9305249921408362, "grad_norm": 1.6437910795211792, "learning_rate": 2.6808610590213336e-07, "loss": 1.3278, "step": 17020 }, { "epoch": 0.9305796645845578, "grad_norm": 1.5192991495132446, "learning_rate": 2.6766598209444825e-07, "loss": 1.3923, "step": 17021 }, { "epoch": 0.9306343370282794, "grad_norm": 1.4299066066741943, "learning_rate": 2.6724618327223394e-07, "loss": 1.8474, "step": 17022 }, { "epoch": 0.9306890094720008, "grad_norm": 1.3795050382614136, "learning_rate": 2.6682670944950804e-07, "loss": 1.3172, "step": 17023 }, { "epoch": 0.9307436819157224, "grad_norm": 1.8314684629440308, "learning_rate": 2.664075606402783e-07, "loss": 1.4516, "step": 17024 }, { "epoch": 0.930798354359444, "grad_norm": 1.644826889038086, "learning_rate": 2.6598873685853897e-07, "loss": 1.3431, "step": 17025 }, { "epoch": 0.9308530268031655, "grad_norm": 2.1984314918518066, "learning_rate": 2.6557023811827897e-07, "loss": 1.4078, "step": 17026 }, { "epoch": 0.9309076992468871, "grad_norm": 1.8937722444534302, "learning_rate": 2.6515206443347153e-07, "loss": 1.2755, "step": 17027 }, { "epoch": 0.9309623716906087, "grad_norm": 1.2985639572143555, "learning_rate": 2.6473421581807877e-07, "loss": 1.6455, "step": 17028 }, { "epoch": 0.9310170441343302, "grad_norm": 1.9035656452178955, "learning_rate": 2.6431669228605625e-07, "loss": 1.5412, "step": 17029 }, { "epoch": 0.9310717165780518, "grad_norm": 1.4787416458129883, "learning_rate": 2.638994938513451e-07, "loss": 1.4809, "step": 17030 }, { "epoch": 0.9311263890217732, "grad_norm": 1.6728317737579346, "learning_rate": 2.634826205278751e-07, "loss": 1.4651, "step": 17031 }, { "epoch": 0.9311810614654948, "grad_norm": 2.030141592025757, "learning_rate": 2.630660723295686e-07, "loss": 1.284, "step": 17032 }, { "epoch": 0.9312357339092164, "grad_norm": 1.214188814163208, "learning_rate": 2.6264984927033445e-07, "loss": 1.5161, "step": 17033 }, { "epoch": 0.9312904063529379, "grad_norm": 1.7407335042953491, "learning_rate": 2.6223395136407146e-07, "loss": 1.4438, "step": 17034 }, { "epoch": 0.9313450787966595, "grad_norm": 1.4990037679672241, "learning_rate": 2.618183786246675e-07, "loss": 1.4952, "step": 17035 }, { "epoch": 0.9313997512403811, "grad_norm": 1.5269898176193237, "learning_rate": 2.6140313106599813e-07, "loss": 1.5498, "step": 17036 }, { "epoch": 0.9314544236841026, "grad_norm": 1.4626282453536987, "learning_rate": 2.609882087019311e-07, "loss": 1.4736, "step": 17037 }, { "epoch": 0.9315090961278242, "grad_norm": 1.3459430932998657, "learning_rate": 2.605736115463209e-07, "loss": 1.5009, "step": 17038 }, { "epoch": 0.9315637685715458, "grad_norm": 1.6181511878967285, "learning_rate": 2.601593396130109e-07, "loss": 1.6881, "step": 17039 }, { "epoch": 0.9316184410152673, "grad_norm": 1.672519326210022, "learning_rate": 2.597453929158378e-07, "loss": 1.5275, "step": 17040 }, { "epoch": 0.9316731134589888, "grad_norm": 1.5307925939559937, "learning_rate": 2.5933177146862167e-07, "loss": 1.5534, "step": 17041 }, { "epoch": 0.9317277859027104, "grad_norm": 1.8009244203567505, "learning_rate": 2.589184752851748e-07, "loss": 1.3682, "step": 17042 }, { "epoch": 0.9317824583464319, "grad_norm": 1.6571897268295288, "learning_rate": 2.5850550437929834e-07, "loss": 1.6314, "step": 17043 }, { "epoch": 0.9318371307901535, "grad_norm": 1.6109730005264282, "learning_rate": 2.580928587647824e-07, "loss": 1.2514, "step": 17044 }, { "epoch": 0.931891803233875, "grad_norm": 1.484797477722168, "learning_rate": 2.5768053845540484e-07, "loss": 1.4365, "step": 17045 }, { "epoch": 0.9319464756775966, "grad_norm": 1.227248191833496, "learning_rate": 2.572685434649358e-07, "loss": 1.3151, "step": 17046 }, { "epoch": 0.9320011481213182, "grad_norm": 1.4423112869262695, "learning_rate": 2.568568738071331e-07, "loss": 1.3239, "step": 17047 }, { "epoch": 0.9320558205650397, "grad_norm": 1.2859137058258057, "learning_rate": 2.564455294957413e-07, "loss": 1.5771, "step": 17048 }, { "epoch": 0.9321104930087613, "grad_norm": 1.765508770942688, "learning_rate": 2.5603451054449835e-07, "loss": 1.4634, "step": 17049 }, { "epoch": 0.9321651654524828, "grad_norm": 1.7413889169692993, "learning_rate": 2.556238169671266e-07, "loss": 1.231, "step": 17050 }, { "epoch": 0.9322198378962043, "grad_norm": 1.5482324361801147, "learning_rate": 2.5521344877734165e-07, "loss": 1.4792, "step": 17051 }, { "epoch": 0.9322745103399259, "grad_norm": 1.5449097156524658, "learning_rate": 2.548034059888471e-07, "loss": 1.4168, "step": 17052 }, { "epoch": 0.9323291827836475, "grad_norm": 1.3562781810760498, "learning_rate": 2.543936886153342e-07, "loss": 1.5669, "step": 17053 }, { "epoch": 0.932383855227369, "grad_norm": 1.5355817079544067, "learning_rate": 2.539842966704853e-07, "loss": 1.5633, "step": 17054 }, { "epoch": 0.9324385276710906, "grad_norm": 1.874508023262024, "learning_rate": 2.535752301679706e-07, "loss": 1.3849, "step": 17055 }, { "epoch": 0.9324932001148122, "grad_norm": 1.5922417640686035, "learning_rate": 2.531664891214491e-07, "loss": 1.2538, "step": 17056 }, { "epoch": 0.9325478725585337, "grad_norm": 1.327863097190857, "learning_rate": 2.527580735445701e-07, "loss": 1.6376, "step": 17057 }, { "epoch": 0.9326025450022553, "grad_norm": 1.2361948490142822, "learning_rate": 2.523499834509724e-07, "loss": 1.2903, "step": 17058 }, { "epoch": 0.9326572174459767, "grad_norm": 1.0577616691589355, "learning_rate": 2.519422188542819e-07, "loss": 1.6546, "step": 17059 }, { "epoch": 0.9327118898896983, "grad_norm": 1.9595085382461548, "learning_rate": 2.515347797681156e-07, "loss": 1.3505, "step": 17060 }, { "epoch": 0.9327665623334199, "grad_norm": 1.4965412616729736, "learning_rate": 2.511276662060791e-07, "loss": 1.4592, "step": 17061 }, { "epoch": 0.9328212347771414, "grad_norm": 1.670721173286438, "learning_rate": 2.507208781817638e-07, "loss": 1.6803, "step": 17062 }, { "epoch": 0.932875907220863, "grad_norm": 2.412517786026001, "learning_rate": 2.5031441570875783e-07, "loss": 1.15, "step": 17063 }, { "epoch": 0.9329305796645846, "grad_norm": 1.4343246221542358, "learning_rate": 2.499082788006313e-07, "loss": 1.5205, "step": 17064 }, { "epoch": 0.9329852521083061, "grad_norm": 1.6380983591079712, "learning_rate": 2.495024674709468e-07, "loss": 1.217, "step": 17065 }, { "epoch": 0.9330399245520277, "grad_norm": 1.5464434623718262, "learning_rate": 2.490969817332545e-07, "loss": 1.6098, "step": 17066 }, { "epoch": 0.9330945969957493, "grad_norm": 1.3945696353912354, "learning_rate": 2.4869182160109696e-07, "loss": 1.388, "step": 17067 }, { "epoch": 0.9331492694394707, "grad_norm": 1.777267336845398, "learning_rate": 2.482869870879989e-07, "loss": 1.3453, "step": 17068 }, { "epoch": 0.9332039418831923, "grad_norm": 1.6090912818908691, "learning_rate": 2.47882478207484e-07, "loss": 1.2712, "step": 17069 }, { "epoch": 0.9332586143269139, "grad_norm": 1.9279491901397705, "learning_rate": 2.4747829497305477e-07, "loss": 1.2864, "step": 17070 }, { "epoch": 0.9333132867706354, "grad_norm": 1.2412865161895752, "learning_rate": 2.470744373982126e-07, "loss": 1.4699, "step": 17071 }, { "epoch": 0.933367959214357, "grad_norm": 1.3776918649673462, "learning_rate": 2.4667090549644e-07, "loss": 1.5497, "step": 17072 }, { "epoch": 0.9334226316580785, "grad_norm": 1.3225244283676147, "learning_rate": 2.46267699281213e-07, "loss": 1.3039, "step": 17073 }, { "epoch": 0.9334773041018001, "grad_norm": 1.626362919807434, "learning_rate": 2.458648187659962e-07, "loss": 1.4277, "step": 17074 }, { "epoch": 0.9335319765455217, "grad_norm": 1.7977606058120728, "learning_rate": 2.454622639642412e-07, "loss": 1.4366, "step": 17075 }, { "epoch": 0.9335866489892432, "grad_norm": 1.52866792678833, "learning_rate": 2.4506003488938943e-07, "loss": 1.3804, "step": 17076 }, { "epoch": 0.9336413214329647, "grad_norm": 1.358351469039917, "learning_rate": 2.4465813155487574e-07, "loss": 1.5523, "step": 17077 }, { "epoch": 0.9336959938766863, "grad_norm": 1.550951600074768, "learning_rate": 2.442565539741182e-07, "loss": 1.341, "step": 17078 }, { "epoch": 0.9337506663204078, "grad_norm": 1.5538746118545532, "learning_rate": 2.43855302160525e-07, "loss": 1.3455, "step": 17079 }, { "epoch": 0.9338053387641294, "grad_norm": 1.534084439277649, "learning_rate": 2.434543761274988e-07, "loss": 1.3162, "step": 17080 }, { "epoch": 0.933860011207851, "grad_norm": 1.823851227760315, "learning_rate": 2.4305377588842547e-07, "loss": 1.208, "step": 17081 }, { "epoch": 0.9339146836515725, "grad_norm": 1.2907510995864868, "learning_rate": 2.426535014566811e-07, "loss": 1.5854, "step": 17082 }, { "epoch": 0.9339693560952941, "grad_norm": 1.7432751655578613, "learning_rate": 2.4225355284563265e-07, "loss": 1.3521, "step": 17083 }, { "epoch": 0.9340240285390157, "grad_norm": 1.693070888519287, "learning_rate": 2.418539300686351e-07, "loss": 1.562, "step": 17084 }, { "epoch": 0.9340787009827372, "grad_norm": 1.5401982069015503, "learning_rate": 2.414546331390344e-07, "loss": 1.3438, "step": 17085 }, { "epoch": 0.9341333734264587, "grad_norm": 1.7088799476623535, "learning_rate": 2.4105566207016207e-07, "loss": 1.3151, "step": 17086 }, { "epoch": 0.9341880458701802, "grad_norm": 1.7564139366149902, "learning_rate": 2.406570168753408e-07, "loss": 1.472, "step": 17087 }, { "epoch": 0.9342427183139018, "grad_norm": 1.404274344444275, "learning_rate": 2.4025869756788333e-07, "loss": 1.5351, "step": 17088 }, { "epoch": 0.9342973907576234, "grad_norm": 1.4995185136795044, "learning_rate": 2.398607041610901e-07, "loss": 1.2452, "step": 17089 }, { "epoch": 0.9343520632013449, "grad_norm": 1.3148127794265747, "learning_rate": 2.3946303666824934e-07, "loss": 1.4356, "step": 17090 }, { "epoch": 0.9344067356450665, "grad_norm": 1.267540454864502, "learning_rate": 2.3906569510264375e-07, "loss": 1.392, "step": 17091 }, { "epoch": 0.9344614080887881, "grad_norm": 1.5908145904541016, "learning_rate": 2.3866867947753836e-07, "loss": 1.5934, "step": 17092 }, { "epoch": 0.9345160805325096, "grad_norm": 1.3766535520553589, "learning_rate": 2.3827198980619025e-07, "loss": 1.4663, "step": 17093 }, { "epoch": 0.9345707529762312, "grad_norm": 1.2288062572479248, "learning_rate": 2.3787562610184888e-07, "loss": 1.5187, "step": 17094 }, { "epoch": 0.9346254254199527, "grad_norm": 1.6701228618621826, "learning_rate": 2.37479588377747e-07, "loss": 1.4546, "step": 17095 }, { "epoch": 0.9346800978636742, "grad_norm": 1.2195923328399658, "learning_rate": 2.3708387664710952e-07, "loss": 1.3409, "step": 17096 }, { "epoch": 0.9347347703073958, "grad_norm": 1.9137753248214722, "learning_rate": 2.366884909231515e-07, "loss": 1.1744, "step": 17097 }, { "epoch": 0.9347894427511174, "grad_norm": 1.6493068933486938, "learning_rate": 2.3629343121907566e-07, "loss": 1.3085, "step": 17098 }, { "epoch": 0.9348441151948389, "grad_norm": 1.5717309713363647, "learning_rate": 2.358986975480726e-07, "loss": 1.1015, "step": 17099 }, { "epoch": 0.9348987876385605, "grad_norm": 1.4816033840179443, "learning_rate": 2.3550428992332508e-07, "loss": 1.224, "step": 17100 }, { "epoch": 0.934953460082282, "grad_norm": 1.4847633838653564, "learning_rate": 2.3511020835800147e-07, "loss": 1.3774, "step": 17101 }, { "epoch": 0.9350081325260036, "grad_norm": 1.5754534006118774, "learning_rate": 2.3471645286526233e-07, "loss": 1.1169, "step": 17102 }, { "epoch": 0.9350628049697252, "grad_norm": 1.5142403841018677, "learning_rate": 2.3432302345825608e-07, "loss": 1.6515, "step": 17103 }, { "epoch": 0.9351174774134466, "grad_norm": 1.2746790647506714, "learning_rate": 2.3392992015011883e-07, "loss": 1.562, "step": 17104 }, { "epoch": 0.9351721498571682, "grad_norm": 1.5419992208480835, "learning_rate": 2.33537142953979e-07, "loss": 1.5069, "step": 17105 }, { "epoch": 0.9352268223008898, "grad_norm": 1.6894184350967407, "learning_rate": 2.3314469188295273e-07, "loss": 1.3964, "step": 17106 }, { "epoch": 0.9352814947446113, "grad_norm": 1.4494630098342896, "learning_rate": 2.327525669501418e-07, "loss": 1.4695, "step": 17107 }, { "epoch": 0.9353361671883329, "grad_norm": 1.701129674911499, "learning_rate": 2.323607681686446e-07, "loss": 1.2469, "step": 17108 }, { "epoch": 0.9353908396320545, "grad_norm": 1.3619590997695923, "learning_rate": 2.3196929555154068e-07, "loss": 1.4321, "step": 17109 }, { "epoch": 0.935445512075776, "grad_norm": 1.888107180595398, "learning_rate": 2.315781491119029e-07, "loss": 1.5744, "step": 17110 }, { "epoch": 0.9355001845194976, "grad_norm": 1.369128704071045, "learning_rate": 2.3118732886279304e-07, "loss": 1.2714, "step": 17111 }, { "epoch": 0.9355548569632192, "grad_norm": 1.6357181072235107, "learning_rate": 2.307968348172629e-07, "loss": 1.3185, "step": 17112 }, { "epoch": 0.9356095294069406, "grad_norm": 1.9969345331192017, "learning_rate": 2.304066669883498e-07, "loss": 1.2196, "step": 17113 }, { "epoch": 0.9356642018506622, "grad_norm": 1.5373518466949463, "learning_rate": 2.3001682538908333e-07, "loss": 1.437, "step": 17114 }, { "epoch": 0.9357188742943837, "grad_norm": 1.2196682691574097, "learning_rate": 2.2962731003247972e-07, "loss": 1.4353, "step": 17115 }, { "epoch": 0.9357735467381053, "grad_norm": 1.8754934072494507, "learning_rate": 2.2923812093154861e-07, "loss": 1.4455, "step": 17116 }, { "epoch": 0.9358282191818269, "grad_norm": 2.001253128051758, "learning_rate": 2.2884925809928404e-07, "loss": 1.1126, "step": 17117 }, { "epoch": 0.9358828916255484, "grad_norm": 1.6464951038360596, "learning_rate": 2.2846072154867117e-07, "loss": 1.4142, "step": 17118 }, { "epoch": 0.93593756406927, "grad_norm": 1.2315162420272827, "learning_rate": 2.2807251129268404e-07, "loss": 1.621, "step": 17119 }, { "epoch": 0.9359922365129916, "grad_norm": 2.0078256130218506, "learning_rate": 2.2768462734428786e-07, "loss": 1.5465, "step": 17120 }, { "epoch": 0.936046908956713, "grad_norm": 1.272542953491211, "learning_rate": 2.2729706971643117e-07, "loss": 1.4763, "step": 17121 }, { "epoch": 0.9361015814004346, "grad_norm": 1.480703592300415, "learning_rate": 2.2690983842205916e-07, "loss": 1.5488, "step": 17122 }, { "epoch": 0.9361562538441562, "grad_norm": 1.6964199542999268, "learning_rate": 2.2652293347410148e-07, "loss": 1.6489, "step": 17123 }, { "epoch": 0.9362109262878777, "grad_norm": 1.3883394002914429, "learning_rate": 2.261363548854767e-07, "loss": 1.4304, "step": 17124 }, { "epoch": 0.9362655987315993, "grad_norm": 1.2958452701568604, "learning_rate": 2.2575010266909448e-07, "loss": 1.2452, "step": 17125 }, { "epoch": 0.9363202711753209, "grad_norm": 1.2986869812011719, "learning_rate": 2.2536417683785117e-07, "loss": 1.6671, "step": 17126 }, { "epoch": 0.9363749436190424, "grad_norm": 1.7514954805374146, "learning_rate": 2.2497857740463536e-07, "loss": 1.4089, "step": 17127 }, { "epoch": 0.936429616062764, "grad_norm": 1.408970594406128, "learning_rate": 2.245933043823234e-07, "loss": 1.4502, "step": 17128 }, { "epoch": 0.9364842885064856, "grad_norm": 1.3734267950057983, "learning_rate": 2.2420835778377837e-07, "loss": 1.4885, "step": 17129 }, { "epoch": 0.9365389609502071, "grad_norm": 1.4928687810897827, "learning_rate": 2.238237376218566e-07, "loss": 1.6471, "step": 17130 }, { "epoch": 0.9365936333939286, "grad_norm": 1.4180246591567993, "learning_rate": 2.2343944390940119e-07, "loss": 1.4655, "step": 17131 }, { "epoch": 0.9366483058376501, "grad_norm": 1.292860746383667, "learning_rate": 2.2305547665924298e-07, "loss": 1.4441, "step": 17132 }, { "epoch": 0.9367029782813717, "grad_norm": 1.60586678981781, "learning_rate": 2.2267183588420616e-07, "loss": 1.6421, "step": 17133 }, { "epoch": 0.9367576507250933, "grad_norm": 1.479722499847412, "learning_rate": 2.2228852159709935e-07, "loss": 1.2691, "step": 17134 }, { "epoch": 0.9368123231688148, "grad_norm": 1.4160076379776, "learning_rate": 2.2190553381072234e-07, "loss": 1.1904, "step": 17135 }, { "epoch": 0.9368669956125364, "grad_norm": 1.0303229093551636, "learning_rate": 2.2152287253786598e-07, "loss": 1.6497, "step": 17136 }, { "epoch": 0.936921668056258, "grad_norm": 1.701043725013733, "learning_rate": 2.2114053779130561e-07, "loss": 1.386, "step": 17137 }, { "epoch": 0.9369763404999795, "grad_norm": 2.583463191986084, "learning_rate": 2.2075852958380995e-07, "loss": 1.2436, "step": 17138 }, { "epoch": 0.9370310129437011, "grad_norm": 1.6147089004516602, "learning_rate": 2.2037684792813542e-07, "loss": 1.2128, "step": 17139 }, { "epoch": 0.9370856853874227, "grad_norm": 2.4004085063934326, "learning_rate": 2.1999549283702514e-07, "loss": 1.3355, "step": 17140 }, { "epoch": 0.9371403578311441, "grad_norm": 1.7630983591079712, "learning_rate": 2.1961446432321564e-07, "loss": 1.5232, "step": 17141 }, { "epoch": 0.9371950302748657, "grad_norm": 1.8960936069488525, "learning_rate": 2.1923376239942895e-07, "loss": 1.4175, "step": 17142 }, { "epoch": 0.9372497027185873, "grad_norm": 1.5074429512023926, "learning_rate": 2.1885338707837822e-07, "loss": 1.4283, "step": 17143 }, { "epoch": 0.9373043751623088, "grad_norm": 1.553139328956604, "learning_rate": 2.1847333837276552e-07, "loss": 1.4312, "step": 17144 }, { "epoch": 0.9373590476060304, "grad_norm": 1.3708469867706299, "learning_rate": 2.180936162952818e-07, "loss": 1.5469, "step": 17145 }, { "epoch": 0.9374137200497519, "grad_norm": 1.4905191659927368, "learning_rate": 2.1771422085860473e-07, "loss": 1.4531, "step": 17146 }, { "epoch": 0.9374683924934735, "grad_norm": 1.3874664306640625, "learning_rate": 2.1733515207540634e-07, "loss": 1.5704, "step": 17147 }, { "epoch": 0.9375230649371951, "grad_norm": 1.3320668935775757, "learning_rate": 2.169564099583421e-07, "loss": 1.5888, "step": 17148 }, { "epoch": 0.9375777373809165, "grad_norm": 1.4144213199615479, "learning_rate": 2.1657799452005856e-07, "loss": 1.427, "step": 17149 }, { "epoch": 0.9376324098246381, "grad_norm": 1.4288675785064697, "learning_rate": 2.1619990577319562e-07, "loss": 1.3795, "step": 17150 }, { "epoch": 0.9376870822683597, "grad_norm": 2.202871322631836, "learning_rate": 2.1582214373037536e-07, "loss": 1.3518, "step": 17151 }, { "epoch": 0.9377417547120812, "grad_norm": 1.679815411567688, "learning_rate": 2.154447084042133e-07, "loss": 1.5118, "step": 17152 }, { "epoch": 0.9377964271558028, "grad_norm": 1.4861258268356323, "learning_rate": 2.150675998073126e-07, "loss": 1.4025, "step": 17153 }, { "epoch": 0.9378510995995244, "grad_norm": 1.0119227170944214, "learning_rate": 2.1469081795226443e-07, "loss": 1.4364, "step": 17154 }, { "epoch": 0.9379057720432459, "grad_norm": 1.430290699005127, "learning_rate": 2.1431436285165307e-07, "loss": 1.5583, "step": 17155 }, { "epoch": 0.9379604444869675, "grad_norm": 1.2143793106079102, "learning_rate": 2.139382345180474e-07, "loss": 1.3973, "step": 17156 }, { "epoch": 0.9380151169306891, "grad_norm": 1.9151829481124878, "learning_rate": 2.1356243296400846e-07, "loss": 1.6523, "step": 17157 }, { "epoch": 0.9380697893744105, "grad_norm": 1.6451537609100342, "learning_rate": 2.13186958202084e-07, "loss": 1.3115, "step": 17158 }, { "epoch": 0.9381244618181321, "grad_norm": 1.5117441415786743, "learning_rate": 2.128118102448129e-07, "loss": 1.3574, "step": 17159 }, { "epoch": 0.9381791342618536, "grad_norm": 1.4625197649002075, "learning_rate": 2.1243698910472067e-07, "loss": 1.2208, "step": 17160 }, { "epoch": 0.9382338067055752, "grad_norm": 1.530109167098999, "learning_rate": 2.1206249479432617e-07, "loss": 1.3862, "step": 17161 }, { "epoch": 0.9382884791492968, "grad_norm": 1.430206537246704, "learning_rate": 2.1168832732613164e-07, "loss": 1.4749, "step": 17162 }, { "epoch": 0.9383431515930183, "grad_norm": 1.4786748886108398, "learning_rate": 2.1131448671263378e-07, "loss": 1.6286, "step": 17163 }, { "epoch": 0.9383978240367399, "grad_norm": 1.5269646644592285, "learning_rate": 2.1094097296631587e-07, "loss": 1.4098, "step": 17164 }, { "epoch": 0.9384524964804615, "grad_norm": 1.588222622871399, "learning_rate": 2.105677860996491e-07, "loss": 1.5894, "step": 17165 }, { "epoch": 0.938507168924183, "grad_norm": 1.4297736883163452, "learning_rate": 2.101949261250935e-07, "loss": 1.5273, "step": 17166 }, { "epoch": 0.9385618413679045, "grad_norm": 1.8770256042480469, "learning_rate": 2.0982239305510355e-07, "loss": 1.2508, "step": 17167 }, { "epoch": 0.9386165138116261, "grad_norm": 1.770817518234253, "learning_rate": 2.0945018690211706e-07, "loss": 1.4439, "step": 17168 }, { "epoch": 0.9386711862553476, "grad_norm": 1.6578975915908813, "learning_rate": 2.0907830767856295e-07, "loss": 1.5968, "step": 17169 }, { "epoch": 0.9387258586990692, "grad_norm": 1.3511251211166382, "learning_rate": 2.0870675539686024e-07, "loss": 1.2821, "step": 17170 }, { "epoch": 0.9387805311427908, "grad_norm": 1.7664077281951904, "learning_rate": 2.0833553006941343e-07, "loss": 1.3121, "step": 17171 }, { "epoch": 0.9388352035865123, "grad_norm": 2.3965444564819336, "learning_rate": 2.0796463170862147e-07, "loss": 1.3605, "step": 17172 }, { "epoch": 0.9388898760302339, "grad_norm": 1.9294261932373047, "learning_rate": 2.075940603268678e-07, "loss": 1.4331, "step": 17173 }, { "epoch": 0.9389445484739554, "grad_norm": 2.1554088592529297, "learning_rate": 2.0722381593652586e-07, "loss": 1.0413, "step": 17174 }, { "epoch": 0.938999220917677, "grad_norm": 1.2581411600112915, "learning_rate": 2.068538985499613e-07, "loss": 1.5254, "step": 17175 }, { "epoch": 0.9390538933613986, "grad_norm": 1.5358036756515503, "learning_rate": 2.0648430817952537e-07, "loss": 1.4852, "step": 17176 }, { "epoch": 0.93910856580512, "grad_norm": 1.1991705894470215, "learning_rate": 2.0611504483756038e-07, "loss": 1.4995, "step": 17177 }, { "epoch": 0.9391632382488416, "grad_norm": 1.979041576385498, "learning_rate": 2.0574610853639544e-07, "loss": 1.4262, "step": 17178 }, { "epoch": 0.9392179106925632, "grad_norm": 1.3907040357589722, "learning_rate": 2.053774992883506e-07, "loss": 1.5262, "step": 17179 }, { "epoch": 0.9392725831362847, "grad_norm": 1.699953317642212, "learning_rate": 2.0500921710573385e-07, "loss": 1.3148, "step": 17180 }, { "epoch": 0.9393272555800063, "grad_norm": 1.7208398580551147, "learning_rate": 2.0464126200084532e-07, "loss": 1.2037, "step": 17181 }, { "epoch": 0.9393819280237279, "grad_norm": 1.6199301481246948, "learning_rate": 2.0427363398596966e-07, "loss": 1.4333, "step": 17182 }, { "epoch": 0.9394366004674494, "grad_norm": 1.5860899686813354, "learning_rate": 2.039063330733848e-07, "loss": 1.3621, "step": 17183 }, { "epoch": 0.939491272911171, "grad_norm": 1.6308354139328003, "learning_rate": 2.0353935927535428e-07, "loss": 1.3899, "step": 17184 }, { "epoch": 0.9395459453548926, "grad_norm": 1.7628268003463745, "learning_rate": 2.0317271260413273e-07, "loss": 1.3328, "step": 17185 }, { "epoch": 0.939600617798614, "grad_norm": 1.5417033433914185, "learning_rate": 2.028063930719637e-07, "loss": 1.2967, "step": 17186 }, { "epoch": 0.9396552902423356, "grad_norm": 1.8882912397384644, "learning_rate": 2.024404006910785e-07, "loss": 1.288, "step": 17187 }, { "epoch": 0.9397099626860571, "grad_norm": 1.4520903825759888, "learning_rate": 2.020747354736985e-07, "loss": 1.3668, "step": 17188 }, { "epoch": 0.9397646351297787, "grad_norm": 1.6515958309173584, "learning_rate": 2.0170939743203499e-07, "loss": 1.4132, "step": 17189 }, { "epoch": 0.9398193075735003, "grad_norm": 1.5659997463226318, "learning_rate": 2.0134438657828824e-07, "loss": 1.4338, "step": 17190 }, { "epoch": 0.9398739800172218, "grad_norm": 1.759922742843628, "learning_rate": 2.009797029246452e-07, "loss": 1.3973, "step": 17191 }, { "epoch": 0.9399286524609434, "grad_norm": 1.7295544147491455, "learning_rate": 2.0061534648328384e-07, "loss": 1.428, "step": 17192 }, { "epoch": 0.939983324904665, "grad_norm": 1.5059142112731934, "learning_rate": 2.0025131726637116e-07, "loss": 1.8781, "step": 17193 }, { "epoch": 0.9400379973483864, "grad_norm": 1.6065335273742676, "learning_rate": 1.9988761528606182e-07, "loss": 1.3368, "step": 17194 }, { "epoch": 0.940092669792108, "grad_norm": 1.8713375329971313, "learning_rate": 1.9952424055450282e-07, "loss": 1.4198, "step": 17195 }, { "epoch": 0.9401473422358296, "grad_norm": 1.5297640562057495, "learning_rate": 1.9916119308382553e-07, "loss": 1.3779, "step": 17196 }, { "epoch": 0.9402020146795511, "grad_norm": 1.4232839345932007, "learning_rate": 1.9879847288615583e-07, "loss": 1.4159, "step": 17197 }, { "epoch": 0.9402566871232727, "grad_norm": 1.7768433094024658, "learning_rate": 1.9843607997360403e-07, "loss": 1.2554, "step": 17198 }, { "epoch": 0.9403113595669943, "grad_norm": 2.1644883155822754, "learning_rate": 1.9807401435827045e-07, "loss": 1.4345, "step": 17199 }, { "epoch": 0.9403660320107158, "grad_norm": 1.9651479721069336, "learning_rate": 1.9771227605224763e-07, "loss": 1.1877, "step": 17200 }, { "epoch": 0.9404207044544374, "grad_norm": 1.5579348802566528, "learning_rate": 1.9735086506761368e-07, "loss": 1.4169, "step": 17201 }, { "epoch": 0.9404753768981589, "grad_norm": 1.5351160764694214, "learning_rate": 1.9698978141643786e-07, "loss": 1.3353, "step": 17202 }, { "epoch": 0.9405300493418804, "grad_norm": 1.732803225517273, "learning_rate": 1.9662902511077607e-07, "loss": 1.4657, "step": 17203 }, { "epoch": 0.940584721785602, "grad_norm": 1.358406901359558, "learning_rate": 1.9626859616267536e-07, "loss": 1.2101, "step": 17204 }, { "epoch": 0.9406393942293235, "grad_norm": 1.48006272315979, "learning_rate": 1.959084945841705e-07, "loss": 1.3599, "step": 17205 }, { "epoch": 0.9406940666730451, "grad_norm": 1.9526370763778687, "learning_rate": 1.9554872038728746e-07, "loss": 1.5254, "step": 17206 }, { "epoch": 0.9407487391167667, "grad_norm": 1.7060538530349731, "learning_rate": 1.9518927358403994e-07, "loss": 1.2964, "step": 17207 }, { "epoch": 0.9408034115604882, "grad_norm": 1.5757006406784058, "learning_rate": 1.9483015418642947e-07, "loss": 1.4196, "step": 17208 }, { "epoch": 0.9408580840042098, "grad_norm": 2.1956162452697754, "learning_rate": 1.9447136220644979e-07, "loss": 1.2355, "step": 17209 }, { "epoch": 0.9409127564479314, "grad_norm": 1.3074387311935425, "learning_rate": 1.941128976560791e-07, "loss": 1.5804, "step": 17210 }, { "epoch": 0.9409674288916529, "grad_norm": 1.6554274559020996, "learning_rate": 1.9375476054729115e-07, "loss": 1.5434, "step": 17211 }, { "epoch": 0.9410221013353745, "grad_norm": 1.4491361379623413, "learning_rate": 1.9339695089204192e-07, "loss": 1.4538, "step": 17212 }, { "epoch": 0.941076773779096, "grad_norm": 1.4850945472717285, "learning_rate": 1.9303946870227964e-07, "loss": 1.4635, "step": 17213 }, { "epoch": 0.9411314462228175, "grad_norm": 1.6456819772720337, "learning_rate": 1.9268231398994363e-07, "loss": 1.3835, "step": 17214 }, { "epoch": 0.9411861186665391, "grad_norm": 1.5030914545059204, "learning_rate": 1.9232548676695772e-07, "loss": 1.2579, "step": 17215 }, { "epoch": 0.9412407911102606, "grad_norm": 2.2308387756347656, "learning_rate": 1.9196898704523902e-07, "loss": 1.5995, "step": 17216 }, { "epoch": 0.9412954635539822, "grad_norm": 1.6261513233184814, "learning_rate": 1.9161281483669025e-07, "loss": 1.3639, "step": 17217 }, { "epoch": 0.9413501359977038, "grad_norm": 1.4622770547866821, "learning_rate": 1.9125697015320632e-07, "loss": 1.4729, "step": 17218 }, { "epoch": 0.9414048084414253, "grad_norm": 1.5165892839431763, "learning_rate": 1.9090145300666885e-07, "loss": 1.4883, "step": 17219 }, { "epoch": 0.9414594808851469, "grad_norm": 1.3594727516174316, "learning_rate": 1.9054626340894943e-07, "loss": 1.076, "step": 17220 }, { "epoch": 0.9415141533288685, "grad_norm": 1.7195696830749512, "learning_rate": 1.9019140137190973e-07, "loss": 1.2133, "step": 17221 }, { "epoch": 0.9415688257725899, "grad_norm": 1.7100478410720825, "learning_rate": 1.8983686690739688e-07, "loss": 1.1669, "step": 17222 }, { "epoch": 0.9416234982163115, "grad_norm": 1.5824905633926392, "learning_rate": 1.8948266002725258e-07, "loss": 1.4535, "step": 17223 }, { "epoch": 0.9416781706600331, "grad_norm": 1.67178475856781, "learning_rate": 1.8912878074330288e-07, "loss": 1.6449, "step": 17224 }, { "epoch": 0.9417328431037546, "grad_norm": 1.6553643941879272, "learning_rate": 1.8877522906736612e-07, "loss": 1.4105, "step": 17225 }, { "epoch": 0.9417875155474762, "grad_norm": 1.669266700744629, "learning_rate": 1.884220050112462e-07, "loss": 1.506, "step": 17226 }, { "epoch": 0.9418421879911978, "grad_norm": 1.679336428642273, "learning_rate": 1.880691085867392e-07, "loss": 1.4879, "step": 17227 }, { "epoch": 0.9418968604349193, "grad_norm": 1.4398107528686523, "learning_rate": 1.8771653980562908e-07, "loss": 1.4097, "step": 17228 }, { "epoch": 0.9419515328786409, "grad_norm": 1.2772245407104492, "learning_rate": 1.8736429867968976e-07, "loss": 1.5529, "step": 17229 }, { "epoch": 0.9420062053223623, "grad_norm": 1.2587872743606567, "learning_rate": 1.8701238522068176e-07, "loss": 1.4104, "step": 17230 }, { "epoch": 0.9420608777660839, "grad_norm": 1.4714181423187256, "learning_rate": 1.8666079944035797e-07, "loss": 1.3774, "step": 17231 }, { "epoch": 0.9421155502098055, "grad_norm": 1.5144166946411133, "learning_rate": 1.8630954135045677e-07, "loss": 1.199, "step": 17232 }, { "epoch": 0.942170222653527, "grad_norm": 1.3428125381469727, "learning_rate": 1.8595861096270874e-07, "loss": 1.2798, "step": 17233 }, { "epoch": 0.9422248950972486, "grad_norm": 1.6797773838043213, "learning_rate": 1.8560800828883229e-07, "loss": 1.51, "step": 17234 }, { "epoch": 0.9422795675409702, "grad_norm": 1.7882206439971924, "learning_rate": 1.8525773334053476e-07, "loss": 1.4421, "step": 17235 }, { "epoch": 0.9423342399846917, "grad_norm": 1.1781219244003296, "learning_rate": 1.849077861295123e-07, "loss": 1.4982, "step": 17236 }, { "epoch": 0.9423889124284133, "grad_norm": 2.1065421104431152, "learning_rate": 1.8455816666745119e-07, "loss": 1.3743, "step": 17237 }, { "epoch": 0.9424435848721349, "grad_norm": 1.5961045026779175, "learning_rate": 1.8420887496602424e-07, "loss": 1.5513, "step": 17238 }, { "epoch": 0.9424982573158563, "grad_norm": 1.1467782258987427, "learning_rate": 1.838599110368977e-07, "loss": 1.71, "step": 17239 }, { "epoch": 0.9425529297595779, "grad_norm": 1.9707345962524414, "learning_rate": 1.8351127489172227e-07, "loss": 1.4068, "step": 17240 }, { "epoch": 0.9426076022032995, "grad_norm": 1.4554829597473145, "learning_rate": 1.8316296654214084e-07, "loss": 1.4187, "step": 17241 }, { "epoch": 0.942662274647021, "grad_norm": 1.6399961709976196, "learning_rate": 1.828149859997841e-07, "loss": 1.4917, "step": 17242 }, { "epoch": 0.9427169470907426, "grad_norm": 1.9606283903121948, "learning_rate": 1.8246733327627275e-07, "loss": 1.5131, "step": 17243 }, { "epoch": 0.9427716195344641, "grad_norm": 1.4627751111984253, "learning_rate": 1.8212000838321197e-07, "loss": 1.3286, "step": 17244 }, { "epoch": 0.9428262919781857, "grad_norm": 1.460056185722351, "learning_rate": 1.8177301133220472e-07, "loss": 1.5845, "step": 17245 }, { "epoch": 0.9428809644219073, "grad_norm": 1.5001277923583984, "learning_rate": 1.8142634213483502e-07, "loss": 1.1458, "step": 17246 }, { "epoch": 0.9429356368656288, "grad_norm": 1.4088212251663208, "learning_rate": 1.8108000080267918e-07, "loss": 1.2918, "step": 17247 }, { "epoch": 0.9429903093093503, "grad_norm": 1.8250846862792969, "learning_rate": 1.807339873473035e-07, "loss": 1.1521, "step": 17248 }, { "epoch": 0.9430449817530719, "grad_norm": 1.1366206407546997, "learning_rate": 1.8038830178026213e-07, "loss": 1.7295, "step": 17249 }, { "epoch": 0.9430996541967934, "grad_norm": 1.4772709608078003, "learning_rate": 1.8004294411309687e-07, "loss": 1.4262, "step": 17250 }, { "epoch": 0.943154326640515, "grad_norm": 1.5118082761764526, "learning_rate": 1.7969791435734184e-07, "loss": 1.5814, "step": 17251 }, { "epoch": 0.9432089990842366, "grad_norm": 1.4103820323944092, "learning_rate": 1.7935321252451677e-07, "loss": 1.2182, "step": 17252 }, { "epoch": 0.9432636715279581, "grad_norm": 1.620160460472107, "learning_rate": 1.7900883862613348e-07, "loss": 1.2346, "step": 17253 }, { "epoch": 0.9433183439716797, "grad_norm": 1.4941604137420654, "learning_rate": 1.7866479267369062e-07, "loss": 1.4894, "step": 17254 }, { "epoch": 0.9433730164154013, "grad_norm": 2.065631628036499, "learning_rate": 1.7832107467867676e-07, "loss": 1.5822, "step": 17255 }, { "epoch": 0.9434276888591228, "grad_norm": 1.7406818866729736, "learning_rate": 1.7797768465256938e-07, "loss": 1.3083, "step": 17256 }, { "epoch": 0.9434823613028444, "grad_norm": 2.2343838214874268, "learning_rate": 1.7763462260683483e-07, "loss": 1.1246, "step": 17257 }, { "epoch": 0.9435370337465658, "grad_norm": 1.8600939512252808, "learning_rate": 1.7729188855292957e-07, "loss": 1.3291, "step": 17258 }, { "epoch": 0.9435917061902874, "grad_norm": 1.3243321180343628, "learning_rate": 1.7694948250229772e-07, "loss": 1.641, "step": 17259 }, { "epoch": 0.943646378634009, "grad_norm": 1.8512529134750366, "learning_rate": 1.7660740446637348e-07, "loss": 1.6143, "step": 17260 }, { "epoch": 0.9437010510777305, "grad_norm": 1.4179112911224365, "learning_rate": 1.7626565445657883e-07, "loss": 1.4122, "step": 17261 }, { "epoch": 0.9437557235214521, "grad_norm": 1.7522650957107544, "learning_rate": 1.7592423248432577e-07, "loss": 1.2347, "step": 17262 }, { "epoch": 0.9438103959651737, "grad_norm": 2.138803005218506, "learning_rate": 1.7558313856101627e-07, "loss": 1.3791, "step": 17263 }, { "epoch": 0.9438650684088952, "grad_norm": 1.9855072498321533, "learning_rate": 1.75242372698039e-07, "loss": 1.4993, "step": 17264 }, { "epoch": 0.9439197408526168, "grad_norm": 2.476308584213257, "learning_rate": 1.7490193490677377e-07, "loss": 1.4563, "step": 17265 }, { "epoch": 0.9439744132963384, "grad_norm": 1.592214822769165, "learning_rate": 1.7456182519858812e-07, "loss": 1.1195, "step": 17266 }, { "epoch": 0.9440290857400598, "grad_norm": 1.2836847305297852, "learning_rate": 1.7422204358483962e-07, "loss": 1.4918, "step": 17267 }, { "epoch": 0.9440837581837814, "grad_norm": 1.6735223531723022, "learning_rate": 1.7388259007687368e-07, "loss": 1.359, "step": 17268 }, { "epoch": 0.944138430627503, "grad_norm": 1.4128252267837524, "learning_rate": 1.7354346468602567e-07, "loss": 1.5432, "step": 17269 }, { "epoch": 0.9441931030712245, "grad_norm": 1.739180088043213, "learning_rate": 1.7320466742361984e-07, "loss": 1.3097, "step": 17270 }, { "epoch": 0.9442477755149461, "grad_norm": 1.268173336982727, "learning_rate": 1.728661983009705e-07, "loss": 1.1538, "step": 17271 }, { "epoch": 0.9443024479586676, "grad_norm": 1.8257334232330322, "learning_rate": 1.7252805732937749e-07, "loss": 1.6573, "step": 17272 }, { "epoch": 0.9443571204023892, "grad_norm": 1.4112980365753174, "learning_rate": 1.72190244520134e-07, "loss": 1.535, "step": 17273 }, { "epoch": 0.9444117928461108, "grad_norm": 1.368749737739563, "learning_rate": 1.7185275988451987e-07, "loss": 1.4458, "step": 17274 }, { "epoch": 0.9444664652898322, "grad_norm": 1.6812782287597656, "learning_rate": 1.715156034338039e-07, "loss": 1.4576, "step": 17275 }, { "epoch": 0.9445211377335538, "grad_norm": 1.3378100395202637, "learning_rate": 1.7117877517924597e-07, "loss": 1.4017, "step": 17276 }, { "epoch": 0.9445758101772754, "grad_norm": 1.6647051572799683, "learning_rate": 1.7084227513209374e-07, "loss": 1.3864, "step": 17277 }, { "epoch": 0.9446304826209969, "grad_norm": 1.556023359298706, "learning_rate": 1.7050610330358043e-07, "loss": 1.3642, "step": 17278 }, { "epoch": 0.9446851550647185, "grad_norm": 2.1420624256134033, "learning_rate": 1.7017025970493595e-07, "loss": 1.4375, "step": 17279 }, { "epoch": 0.9447398275084401, "grad_norm": 1.4212844371795654, "learning_rate": 1.6983474434737246e-07, "loss": 1.364, "step": 17280 }, { "epoch": 0.9447944999521616, "grad_norm": 1.489741325378418, "learning_rate": 1.6949955724209433e-07, "loss": 1.5637, "step": 17281 }, { "epoch": 0.9448491723958832, "grad_norm": 1.576572060585022, "learning_rate": 1.691646984002937e-07, "loss": 1.334, "step": 17282 }, { "epoch": 0.9449038448396048, "grad_norm": 1.363443374633789, "learning_rate": 1.6883016783315165e-07, "loss": 1.57, "step": 17283 }, { "epoch": 0.9449585172833262, "grad_norm": 1.4978619813919067, "learning_rate": 1.6849596555184033e-07, "loss": 1.3501, "step": 17284 }, { "epoch": 0.9450131897270478, "grad_norm": 1.854430079460144, "learning_rate": 1.6816209156751973e-07, "loss": 1.2591, "step": 17285 }, { "epoch": 0.9450678621707693, "grad_norm": 1.2920362949371338, "learning_rate": 1.6782854589133645e-07, "loss": 1.5491, "step": 17286 }, { "epoch": 0.9451225346144909, "grad_norm": 3.173762083053589, "learning_rate": 1.6749532853443163e-07, "loss": 1.1074, "step": 17287 }, { "epoch": 0.9451772070582125, "grad_norm": 1.6573657989501953, "learning_rate": 1.6716243950793077e-07, "loss": 1.4709, "step": 17288 }, { "epoch": 0.945231879501934, "grad_norm": 1.5836642980575562, "learning_rate": 1.6682987882294722e-07, "loss": 1.2622, "step": 17289 }, { "epoch": 0.9452865519456556, "grad_norm": 3.1207213401794434, "learning_rate": 1.6649764649059097e-07, "loss": 1.2057, "step": 17290 }, { "epoch": 0.9453412243893772, "grad_norm": 1.7995555400848389, "learning_rate": 1.6616574252195205e-07, "loss": 1.3936, "step": 17291 }, { "epoch": 0.9453958968330987, "grad_norm": 1.6861021518707275, "learning_rate": 1.6583416692811382e-07, "loss": 1.3993, "step": 17292 }, { "epoch": 0.9454505692768203, "grad_norm": 2.1252381801605225, "learning_rate": 1.6550291972015077e-07, "loss": 1.3392, "step": 17293 }, { "epoch": 0.9455052417205418, "grad_norm": 1.5774991512298584, "learning_rate": 1.651720009091229e-07, "loss": 1.3284, "step": 17294 }, { "epoch": 0.9455599141642633, "grad_norm": 1.5218031406402588, "learning_rate": 1.6484141050607915e-07, "loss": 1.6296, "step": 17295 }, { "epoch": 0.9456145866079849, "grad_norm": 1.3276002407073975, "learning_rate": 1.6451114852206073e-07, "loss": 1.5384, "step": 17296 }, { "epoch": 0.9456692590517065, "grad_norm": 1.5782445669174194, "learning_rate": 1.6418121496809324e-07, "loss": 1.5004, "step": 17297 }, { "epoch": 0.945723931495428, "grad_norm": 1.3465347290039062, "learning_rate": 1.6385160985519566e-07, "loss": 1.5561, "step": 17298 }, { "epoch": 0.9457786039391496, "grad_norm": 1.3177013397216797, "learning_rate": 1.6352233319437473e-07, "loss": 1.2736, "step": 17299 }, { "epoch": 0.9458332763828711, "grad_norm": 1.3151229619979858, "learning_rate": 1.6319338499662496e-07, "loss": 1.4219, "step": 17300 }, { "epoch": 0.9458879488265927, "grad_norm": 1.3196696043014526, "learning_rate": 1.6286476527293095e-07, "loss": 1.3956, "step": 17301 }, { "epoch": 0.9459426212703143, "grad_norm": 1.611135721206665, "learning_rate": 1.625364740342661e-07, "loss": 1.2465, "step": 17302 }, { "epoch": 0.9459972937140357, "grad_norm": 2.017899990081787, "learning_rate": 1.6220851129159164e-07, "loss": 1.2898, "step": 17303 }, { "epoch": 0.9460519661577573, "grad_norm": 2.0072250366210938, "learning_rate": 1.6188087705586108e-07, "loss": 1.184, "step": 17304 }, { "epoch": 0.9461066386014789, "grad_norm": 1.1870527267456055, "learning_rate": 1.6155357133801342e-07, "loss": 1.3945, "step": 17305 }, { "epoch": 0.9461613110452004, "grad_norm": 1.392067551612854, "learning_rate": 1.6122659414897878e-07, "loss": 1.5407, "step": 17306 }, { "epoch": 0.946215983488922, "grad_norm": 1.656069278717041, "learning_rate": 1.6089994549967625e-07, "loss": 1.3728, "step": 17307 }, { "epoch": 0.9462706559326436, "grad_norm": 2.2169198989868164, "learning_rate": 1.6057362540101262e-07, "loss": 1.3703, "step": 17308 }, { "epoch": 0.9463253283763651, "grad_norm": 1.5438865423202515, "learning_rate": 1.6024763386388365e-07, "loss": 1.5225, "step": 17309 }, { "epoch": 0.9463800008200867, "grad_norm": 1.6396962404251099, "learning_rate": 1.5992197089917727e-07, "loss": 1.2594, "step": 17310 }, { "epoch": 0.9464346732638083, "grad_norm": 1.5764402151107788, "learning_rate": 1.595966365177648e-07, "loss": 1.5064, "step": 17311 }, { "epoch": 0.9464893457075297, "grad_norm": 1.5802706480026245, "learning_rate": 1.5927163073051312e-07, "loss": 1.47, "step": 17312 }, { "epoch": 0.9465440181512513, "grad_norm": 1.7830681800842285, "learning_rate": 1.589469535482735e-07, "loss": 1.6806, "step": 17313 }, { "epoch": 0.9465986905949728, "grad_norm": 1.43413245677948, "learning_rate": 1.586226049818873e-07, "loss": 1.5402, "step": 17314 }, { "epoch": 0.9466533630386944, "grad_norm": 1.8236738443374634, "learning_rate": 1.58298585042187e-07, "loss": 1.5975, "step": 17315 }, { "epoch": 0.946708035482416, "grad_norm": 1.778962254524231, "learning_rate": 1.5797489373999053e-07, "loss": 1.5211, "step": 17316 }, { "epoch": 0.9467627079261375, "grad_norm": 1.9986038208007812, "learning_rate": 1.576515310861071e-07, "loss": 1.3794, "step": 17317 }, { "epoch": 0.9468173803698591, "grad_norm": 1.2957944869995117, "learning_rate": 1.573284970913358e-07, "loss": 1.6833, "step": 17318 }, { "epoch": 0.9468720528135807, "grad_norm": 1.6030644178390503, "learning_rate": 1.5700579176646246e-07, "loss": 1.395, "step": 17319 }, { "epoch": 0.9469267252573021, "grad_norm": 1.5214239358901978, "learning_rate": 1.5668341512226182e-07, "loss": 1.4435, "step": 17320 }, { "epoch": 0.9469813977010237, "grad_norm": 1.4675339460372925, "learning_rate": 1.563613671695019e-07, "loss": 1.2438, "step": 17321 }, { "epoch": 0.9470360701447453, "grad_norm": 1.5119811296463013, "learning_rate": 1.56039647918933e-07, "loss": 1.2112, "step": 17322 }, { "epoch": 0.9470907425884668, "grad_norm": 1.2993046045303345, "learning_rate": 1.5571825738129987e-07, "loss": 1.4766, "step": 17323 }, { "epoch": 0.9471454150321884, "grad_norm": 1.5526769161224365, "learning_rate": 1.55397195567335e-07, "loss": 1.5578, "step": 17324 }, { "epoch": 0.94720008747591, "grad_norm": 1.3798779249191284, "learning_rate": 1.5507646248775875e-07, "loss": 1.4659, "step": 17325 }, { "epoch": 0.9472547599196315, "grad_norm": 1.1726787090301514, "learning_rate": 1.5475605815328142e-07, "loss": 1.6303, "step": 17326 }, { "epoch": 0.9473094323633531, "grad_norm": 1.5804920196533203, "learning_rate": 1.5443598257460225e-07, "loss": 1.3831, "step": 17327 }, { "epoch": 0.9473641048070747, "grad_norm": 1.4169187545776367, "learning_rate": 1.541162357624082e-07, "loss": 1.5092, "step": 17328 }, { "epoch": 0.9474187772507962, "grad_norm": 1.6182217597961426, "learning_rate": 1.5379681772737743e-07, "loss": 1.488, "step": 17329 }, { "epoch": 0.9474734496945177, "grad_norm": 1.6993556022644043, "learning_rate": 1.5347772848017584e-07, "loss": 1.4661, "step": 17330 }, { "epoch": 0.9475281221382392, "grad_norm": 2.005218267440796, "learning_rate": 1.5315896803145824e-07, "loss": 1.4454, "step": 17331 }, { "epoch": 0.9475827945819608, "grad_norm": 1.3349016904830933, "learning_rate": 1.5284053639186947e-07, "loss": 1.4641, "step": 17332 }, { "epoch": 0.9476374670256824, "grad_norm": 2.1234376430511475, "learning_rate": 1.5252243357204212e-07, "loss": 1.1683, "step": 17333 }, { "epoch": 0.9476921394694039, "grad_norm": 1.7425408363342285, "learning_rate": 1.5220465958259878e-07, "loss": 1.4048, "step": 17334 }, { "epoch": 0.9477468119131255, "grad_norm": 1.3675554990768433, "learning_rate": 1.5188721443414988e-07, "loss": 1.2868, "step": 17335 }, { "epoch": 0.9478014843568471, "grad_norm": 1.5011850595474243, "learning_rate": 1.5157009813729585e-07, "loss": 1.2037, "step": 17336 }, { "epoch": 0.9478561568005686, "grad_norm": 1.3544135093688965, "learning_rate": 1.5125331070262706e-07, "loss": 1.7736, "step": 17337 }, { "epoch": 0.9479108292442902, "grad_norm": 1.301040768623352, "learning_rate": 1.5093685214072173e-07, "loss": 1.4952, "step": 17338 }, { "epoch": 0.9479655016880117, "grad_norm": 1.8076975345611572, "learning_rate": 1.5062072246214476e-07, "loss": 1.3675, "step": 17339 }, { "epoch": 0.9480201741317332, "grad_norm": 1.5052213668823242, "learning_rate": 1.5030492167745547e-07, "loss": 1.2998, "step": 17340 }, { "epoch": 0.9480748465754548, "grad_norm": 1.7728323936462402, "learning_rate": 1.4998944979719765e-07, "loss": 1.2586, "step": 17341 }, { "epoch": 0.9481295190191764, "grad_norm": 1.5402034521102905, "learning_rate": 1.496743068319051e-07, "loss": 1.2898, "step": 17342 }, { "epoch": 0.9481841914628979, "grad_norm": 2.259567975997925, "learning_rate": 1.4935949279210272e-07, "loss": 1.5018, "step": 17343 }, { "epoch": 0.9482388639066195, "grad_norm": 1.6044565439224243, "learning_rate": 1.490450076883021e-07, "loss": 1.2501, "step": 17344 }, { "epoch": 0.948293536350341, "grad_norm": 1.6761183738708496, "learning_rate": 1.4873085153100485e-07, "loss": 1.519, "step": 17345 }, { "epoch": 0.9483482087940626, "grad_norm": 1.4359166622161865, "learning_rate": 1.4841702433070038e-07, "loss": 1.4788, "step": 17346 }, { "epoch": 0.9484028812377842, "grad_norm": 1.3469748497009277, "learning_rate": 1.4810352609787028e-07, "loss": 1.3828, "step": 17347 }, { "epoch": 0.9484575536815056, "grad_norm": 1.3997695446014404, "learning_rate": 1.477903568429795e-07, "loss": 1.4604, "step": 17348 }, { "epoch": 0.9485122261252272, "grad_norm": 1.8171831369400024, "learning_rate": 1.4747751657648968e-07, "loss": 1.5183, "step": 17349 }, { "epoch": 0.9485668985689488, "grad_norm": 1.6427499055862427, "learning_rate": 1.471650053088436e-07, "loss": 1.3738, "step": 17350 }, { "epoch": 0.9486215710126703, "grad_norm": 1.4222424030303955, "learning_rate": 1.4685282305047956e-07, "loss": 1.5068, "step": 17351 }, { "epoch": 0.9486762434563919, "grad_norm": 1.3259484767913818, "learning_rate": 1.4654096981182031e-07, "loss": 1.6597, "step": 17352 }, { "epoch": 0.9487309159001135, "grad_norm": 1.986556887626648, "learning_rate": 1.462294456032798e-07, "loss": 1.5338, "step": 17353 }, { "epoch": 0.948785588343835, "grad_norm": 1.4318779706954956, "learning_rate": 1.4591825043526075e-07, "loss": 1.2454, "step": 17354 }, { "epoch": 0.9488402607875566, "grad_norm": 1.5087004899978638, "learning_rate": 1.45607384318156e-07, "loss": 1.3812, "step": 17355 }, { "epoch": 0.9488949332312782, "grad_norm": 1.3225516080856323, "learning_rate": 1.4529684726234284e-07, "loss": 1.4056, "step": 17356 }, { "epoch": 0.9489496056749996, "grad_norm": 1.6883430480957031, "learning_rate": 1.44986639278194e-07, "loss": 1.3894, "step": 17357 }, { "epoch": 0.9490042781187212, "grad_norm": 1.674404501914978, "learning_rate": 1.4467676037606682e-07, "loss": 1.5443, "step": 17358 }, { "epoch": 0.9490589505624427, "grad_norm": 1.3851677179336548, "learning_rate": 1.4436721056630853e-07, "loss": 1.4708, "step": 17359 }, { "epoch": 0.9491136230061643, "grad_norm": 1.5868773460388184, "learning_rate": 1.4405798985925533e-07, "loss": 1.3001, "step": 17360 }, { "epoch": 0.9491682954498859, "grad_norm": 1.748565673828125, "learning_rate": 1.4374909826523453e-07, "loss": 1.4509, "step": 17361 }, { "epoch": 0.9492229678936074, "grad_norm": 1.7967135906219482, "learning_rate": 1.4344053579455897e-07, "loss": 1.2711, "step": 17362 }, { "epoch": 0.949277640337329, "grad_norm": 1.4353922605514526, "learning_rate": 1.4313230245753374e-07, "loss": 1.4206, "step": 17363 }, { "epoch": 0.9493323127810506, "grad_norm": 1.5351747274398804, "learning_rate": 1.428243982644506e-07, "loss": 1.426, "step": 17364 }, { "epoch": 0.949386985224772, "grad_norm": 1.9264953136444092, "learning_rate": 1.4251682322559134e-07, "loss": 1.4484, "step": 17365 }, { "epoch": 0.9494416576684936, "grad_norm": 2.0861613750457764, "learning_rate": 1.4220957735122663e-07, "loss": 1.2427, "step": 17366 }, { "epoch": 0.9494963301122152, "grad_norm": 1.7459875345230103, "learning_rate": 1.4190266065161607e-07, "loss": 1.3832, "step": 17367 }, { "epoch": 0.9495510025559367, "grad_norm": 1.245659351348877, "learning_rate": 1.4159607313700808e-07, "loss": 1.7409, "step": 17368 }, { "epoch": 0.9496056749996583, "grad_norm": 1.2656512260437012, "learning_rate": 1.4128981481764115e-07, "loss": 1.4132, "step": 17369 }, { "epoch": 0.9496603474433799, "grad_norm": 1.461157202720642, "learning_rate": 1.4098388570374154e-07, "loss": 1.3984, "step": 17370 }, { "epoch": 0.9497150198871014, "grad_norm": 1.570082664489746, "learning_rate": 1.4067828580552445e-07, "loss": 1.381, "step": 17371 }, { "epoch": 0.949769692330823, "grad_norm": 1.6940218210220337, "learning_rate": 1.4037301513319613e-07, "loss": 1.234, "step": 17372 }, { "epoch": 0.9498243647745445, "grad_norm": 1.3631062507629395, "learning_rate": 1.400680736969484e-07, "loss": 1.2141, "step": 17373 }, { "epoch": 0.949879037218266, "grad_norm": 1.3376948833465576, "learning_rate": 1.397634615069654e-07, "loss": 1.6302, "step": 17374 }, { "epoch": 0.9499337096619876, "grad_norm": 1.7953240871429443, "learning_rate": 1.3945917857341673e-07, "loss": 1.1768, "step": 17375 }, { "epoch": 0.9499883821057091, "grad_norm": 1.5210509300231934, "learning_rate": 1.3915522490646538e-07, "loss": 1.5663, "step": 17376 }, { "epoch": 0.9500430545494307, "grad_norm": 1.9397388696670532, "learning_rate": 1.38851600516261e-07, "loss": 1.5385, "step": 17377 }, { "epoch": 0.9500977269931523, "grad_norm": 2.9904658794403076, "learning_rate": 1.3854830541294105e-07, "loss": 1.3296, "step": 17378 }, { "epoch": 0.9501523994368738, "grad_norm": 1.604390025138855, "learning_rate": 1.38245339606633e-07, "loss": 1.3921, "step": 17379 }, { "epoch": 0.9502070718805954, "grad_norm": 1.4751851558685303, "learning_rate": 1.3794270310745538e-07, "loss": 1.4399, "step": 17380 }, { "epoch": 0.950261744324317, "grad_norm": 1.4077051877975464, "learning_rate": 1.3764039592551125e-07, "loss": 1.5396, "step": 17381 }, { "epoch": 0.9503164167680385, "grad_norm": 1.8404072523117065, "learning_rate": 1.3733841807089921e-07, "loss": 1.5603, "step": 17382 }, { "epoch": 0.9503710892117601, "grad_norm": 1.6140527725219727, "learning_rate": 1.3703676955370003e-07, "loss": 1.4756, "step": 17383 }, { "epoch": 0.9504257616554816, "grad_norm": 1.7257187366485596, "learning_rate": 1.3673545038398683e-07, "loss": 1.4149, "step": 17384 }, { "epoch": 0.9504804340992031, "grad_norm": 1.271759271621704, "learning_rate": 1.3643446057182264e-07, "loss": 1.5957, "step": 17385 }, { "epoch": 0.9505351065429247, "grad_norm": 1.2290958166122437, "learning_rate": 1.3613380012725718e-07, "loss": 1.413, "step": 17386 }, { "epoch": 0.9505897789866462, "grad_norm": 1.317884922027588, "learning_rate": 1.3583346906033024e-07, "loss": 1.6181, "step": 17387 }, { "epoch": 0.9506444514303678, "grad_norm": 1.2963099479675293, "learning_rate": 1.3553346738107044e-07, "loss": 1.3979, "step": 17388 }, { "epoch": 0.9506991238740894, "grad_norm": 1.2764418125152588, "learning_rate": 1.352337950994964e-07, "loss": 1.5266, "step": 17389 }, { "epoch": 0.9507537963178109, "grad_norm": 1.8502956628799438, "learning_rate": 1.3493445222561353e-07, "loss": 1.5098, "step": 17390 }, { "epoch": 0.9508084687615325, "grad_norm": 1.165892243385315, "learning_rate": 1.346354387694193e-07, "loss": 1.458, "step": 17391 }, { "epoch": 0.9508631412052541, "grad_norm": 1.308336853981018, "learning_rate": 1.3433675474089803e-07, "loss": 1.6597, "step": 17392 }, { "epoch": 0.9509178136489755, "grad_norm": 1.2414157390594482, "learning_rate": 1.3403840015002168e-07, "loss": 1.7461, "step": 17393 }, { "epoch": 0.9509724860926971, "grad_norm": 1.4176967144012451, "learning_rate": 1.3374037500675452e-07, "loss": 1.5118, "step": 17394 }, { "epoch": 0.9510271585364187, "grad_norm": 1.9906154870986938, "learning_rate": 1.334426793210486e-07, "loss": 1.266, "step": 17395 }, { "epoch": 0.9510818309801402, "grad_norm": 1.2860534191131592, "learning_rate": 1.3314531310284485e-07, "loss": 1.3856, "step": 17396 }, { "epoch": 0.9511365034238618, "grad_norm": 1.3343631029129028, "learning_rate": 1.3284827636207198e-07, "loss": 1.4078, "step": 17397 }, { "epoch": 0.9511911758675834, "grad_norm": 2.0418710708618164, "learning_rate": 1.3255156910864874e-07, "loss": 1.3865, "step": 17398 }, { "epoch": 0.9512458483113049, "grad_norm": 1.101989984512329, "learning_rate": 1.322551913524839e-07, "loss": 1.5552, "step": 17399 }, { "epoch": 0.9513005207550265, "grad_norm": 2.124753713607788, "learning_rate": 1.319591431034728e-07, "loss": 1.4903, "step": 17400 }, { "epoch": 0.951355193198748, "grad_norm": 1.4074350595474243, "learning_rate": 1.3166342437150204e-07, "loss": 1.7526, "step": 17401 }, { "epoch": 0.9514098656424695, "grad_norm": 1.5530991554260254, "learning_rate": 1.3136803516644704e-07, "loss": 1.4816, "step": 17402 }, { "epoch": 0.9514645380861911, "grad_norm": 1.6141290664672852, "learning_rate": 1.310729754981699e-07, "loss": 1.3102, "step": 17403 }, { "epoch": 0.9515192105299126, "grad_norm": 1.5296083688735962, "learning_rate": 1.3077824537652494e-07, "loss": 1.215, "step": 17404 }, { "epoch": 0.9515738829736342, "grad_norm": 1.5614758729934692, "learning_rate": 1.3048384481135323e-07, "loss": 1.4713, "step": 17405 }, { "epoch": 0.9516285554173558, "grad_norm": 1.5725505352020264, "learning_rate": 1.3018977381248576e-07, "loss": 1.3224, "step": 17406 }, { "epoch": 0.9516832278610773, "grad_norm": 1.6957147121429443, "learning_rate": 1.2989603238974024e-07, "loss": 1.5749, "step": 17407 }, { "epoch": 0.9517379003047989, "grad_norm": 2.3359758853912354, "learning_rate": 1.2960262055292884e-07, "loss": 1.477, "step": 17408 }, { "epoch": 0.9517925727485205, "grad_norm": 0.9914867281913757, "learning_rate": 1.2930953831184701e-07, "loss": 1.5408, "step": 17409 }, { "epoch": 0.951847245192242, "grad_norm": 1.9866180419921875, "learning_rate": 1.290167856762825e-07, "loss": 1.3581, "step": 17410 }, { "epoch": 0.9519019176359635, "grad_norm": 1.4384222030639648, "learning_rate": 1.2872436265600973e-07, "loss": 1.1469, "step": 17411 }, { "epoch": 0.9519565900796851, "grad_norm": 1.5824651718139648, "learning_rate": 1.2843226926079532e-07, "loss": 1.2332, "step": 17412 }, { "epoch": 0.9520112625234066, "grad_norm": 1.7253414392471313, "learning_rate": 1.2814050550039148e-07, "loss": 1.4551, "step": 17413 }, { "epoch": 0.9520659349671282, "grad_norm": 2.238996982574463, "learning_rate": 1.278490713845404e-07, "loss": 1.4047, "step": 17414 }, { "epoch": 0.9521206074108497, "grad_norm": 1.485696792602539, "learning_rate": 1.275579669229743e-07, "loss": 1.1942, "step": 17415 }, { "epoch": 0.9521752798545713, "grad_norm": 2.1452414989471436, "learning_rate": 1.2726719212541538e-07, "loss": 1.0768, "step": 17416 }, { "epoch": 0.9522299522982929, "grad_norm": 1.4953473806381226, "learning_rate": 1.2697674700157148e-07, "loss": 1.4284, "step": 17417 }, { "epoch": 0.9522846247420144, "grad_norm": 1.7563396692276, "learning_rate": 1.2668663156114036e-07, "loss": 1.1195, "step": 17418 }, { "epoch": 0.952339297185736, "grad_norm": 1.5226013660430908, "learning_rate": 1.2639684581381317e-07, "loss": 1.2888, "step": 17419 }, { "epoch": 0.9523939696294575, "grad_norm": 1.5432265996932983, "learning_rate": 1.261073897692633e-07, "loss": 1.6516, "step": 17420 }, { "epoch": 0.952448642073179, "grad_norm": 1.1994613409042358, "learning_rate": 1.258182634371574e-07, "loss": 1.4336, "step": 17421 }, { "epoch": 0.9525033145169006, "grad_norm": 1.6514062881469727, "learning_rate": 1.2552946682715116e-07, "loss": 1.3503, "step": 17422 }, { "epoch": 0.9525579869606222, "grad_norm": 1.759434461593628, "learning_rate": 1.2524099994888683e-07, "loss": 1.2771, "step": 17423 }, { "epoch": 0.9526126594043437, "grad_norm": 1.4237364530563354, "learning_rate": 1.249528628119967e-07, "loss": 1.342, "step": 17424 }, { "epoch": 0.9526673318480653, "grad_norm": 1.4029662609100342, "learning_rate": 1.2466505542610419e-07, "loss": 1.4783, "step": 17425 }, { "epoch": 0.9527220042917869, "grad_norm": 1.7603795528411865, "learning_rate": 1.2437757780081717e-07, "loss": 1.2952, "step": 17426 }, { "epoch": 0.9527766767355084, "grad_norm": 1.629775047302246, "learning_rate": 1.2409042994573795e-07, "loss": 1.7857, "step": 17427 }, { "epoch": 0.95283134917923, "grad_norm": 1.5652638673782349, "learning_rate": 1.238036118704533e-07, "loss": 1.4016, "step": 17428 }, { "epoch": 0.9528860216229514, "grad_norm": 1.407573938369751, "learning_rate": 1.2351712358454115e-07, "loss": 1.4898, "step": 17429 }, { "epoch": 0.952940694066673, "grad_norm": 1.8449550867080688, "learning_rate": 1.2323096509756827e-07, "loss": 1.2661, "step": 17430 }, { "epoch": 0.9529953665103946, "grad_norm": 1.525801420211792, "learning_rate": 1.2294513641909034e-07, "loss": 1.5141, "step": 17431 }, { "epoch": 0.9530500389541161, "grad_norm": 1.5022960901260376, "learning_rate": 1.2265963755865083e-07, "loss": 1.2992, "step": 17432 }, { "epoch": 0.9531047113978377, "grad_norm": 1.4420037269592285, "learning_rate": 1.2237446852578438e-07, "loss": 1.5067, "step": 17433 }, { "epoch": 0.9531593838415593, "grad_norm": 1.272944688796997, "learning_rate": 1.2208962933001333e-07, "loss": 1.4781, "step": 17434 }, { "epoch": 0.9532140562852808, "grad_norm": 1.406153917312622, "learning_rate": 1.2180511998084788e-07, "loss": 1.4099, "step": 17435 }, { "epoch": 0.9532687287290024, "grad_norm": 1.7589031457901, "learning_rate": 1.215209404877904e-07, "loss": 1.4918, "step": 17436 }, { "epoch": 0.953323401172724, "grad_norm": 1.8165531158447266, "learning_rate": 1.2123709086032887e-07, "loss": 1.3763, "step": 17437 }, { "epoch": 0.9533780736164454, "grad_norm": 1.316490650177002, "learning_rate": 1.2095357110794238e-07, "loss": 1.4999, "step": 17438 }, { "epoch": 0.953432746060167, "grad_norm": 1.5174591541290283, "learning_rate": 1.2067038124009778e-07, "loss": 1.2883, "step": 17439 }, { "epoch": 0.9534874185038886, "grad_norm": 1.7129734754562378, "learning_rate": 1.2038752126625087e-07, "loss": 1.2155, "step": 17440 }, { "epoch": 0.9535420909476101, "grad_norm": 1.5638917684555054, "learning_rate": 1.2010499119584963e-07, "loss": 1.4163, "step": 17441 }, { "epoch": 0.9535967633913317, "grad_norm": 1.3755930662155151, "learning_rate": 1.198227910383254e-07, "loss": 1.5324, "step": 17442 }, { "epoch": 0.9536514358350532, "grad_norm": 1.604656457901001, "learning_rate": 1.1954092080310288e-07, "loss": 1.5142, "step": 17443 }, { "epoch": 0.9537061082787748, "grad_norm": 1.5429866313934326, "learning_rate": 1.192593804995945e-07, "loss": 1.4748, "step": 17444 }, { "epoch": 0.9537607807224964, "grad_norm": 1.4704986810684204, "learning_rate": 1.189781701372017e-07, "loss": 1.245, "step": 17445 }, { "epoch": 0.9538154531662179, "grad_norm": 1.5254805088043213, "learning_rate": 1.1869728972531247e-07, "loss": 1.3911, "step": 17446 }, { "epoch": 0.9538701256099394, "grad_norm": 1.4067809581756592, "learning_rate": 1.1841673927331043e-07, "loss": 1.2889, "step": 17447 }, { "epoch": 0.953924798053661, "grad_norm": 1.805158019065857, "learning_rate": 1.1813651879056031e-07, "loss": 1.3139, "step": 17448 }, { "epoch": 0.9539794704973825, "grad_norm": 1.9331554174423218, "learning_rate": 1.1785662828641908e-07, "loss": 1.6057, "step": 17449 }, { "epoch": 0.9540341429411041, "grad_norm": 1.7020938396453857, "learning_rate": 1.1757706777023592e-07, "loss": 1.3325, "step": 17450 }, { "epoch": 0.9540888153848257, "grad_norm": 1.6317055225372314, "learning_rate": 1.1729783725134336e-07, "loss": 1.3426, "step": 17451 }, { "epoch": 0.9541434878285472, "grad_norm": 1.4385815858840942, "learning_rate": 1.1701893673906729e-07, "loss": 1.3852, "step": 17452 }, { "epoch": 0.9541981602722688, "grad_norm": 1.707906723022461, "learning_rate": 1.1674036624272023e-07, "loss": 1.5969, "step": 17453 }, { "epoch": 0.9542528327159904, "grad_norm": 1.6605297327041626, "learning_rate": 1.1646212577160254e-07, "loss": 1.3402, "step": 17454 }, { "epoch": 0.9543075051597119, "grad_norm": 1.6197961568832397, "learning_rate": 1.1618421533500901e-07, "loss": 1.4088, "step": 17455 }, { "epoch": 0.9543621776034334, "grad_norm": 1.7486169338226318, "learning_rate": 1.1590663494221665e-07, "loss": 1.3756, "step": 17456 }, { "epoch": 0.9544168500471549, "grad_norm": 1.3311212062835693, "learning_rate": 1.1562938460249473e-07, "loss": 1.4676, "step": 17457 }, { "epoch": 0.9544715224908765, "grad_norm": 1.340414047241211, "learning_rate": 1.153524643251025e-07, "loss": 1.3139, "step": 17458 }, { "epoch": 0.9545261949345981, "grad_norm": 1.3878620862960815, "learning_rate": 1.15075874119287e-07, "loss": 1.4651, "step": 17459 }, { "epoch": 0.9545808673783196, "grad_norm": 1.6905694007873535, "learning_rate": 1.1479961399428308e-07, "loss": 1.2828, "step": 17460 }, { "epoch": 0.9546355398220412, "grad_norm": 1.5974628925323486, "learning_rate": 1.1452368395931668e-07, "loss": 1.4542, "step": 17461 }, { "epoch": 0.9546902122657628, "grad_norm": 1.5771551132202148, "learning_rate": 1.1424808402360266e-07, "loss": 1.3563, "step": 17462 }, { "epoch": 0.9547448847094843, "grad_norm": 1.4351445436477661, "learning_rate": 1.1397281419634143e-07, "loss": 1.3418, "step": 17463 }, { "epoch": 0.9547995571532059, "grad_norm": 1.2787994146347046, "learning_rate": 1.1369787448672675e-07, "loss": 1.5098, "step": 17464 }, { "epoch": 0.9548542295969275, "grad_norm": 1.6294251680374146, "learning_rate": 1.1342326490393796e-07, "loss": 1.5061, "step": 17465 }, { "epoch": 0.9549089020406489, "grad_norm": 2.40165638923645, "learning_rate": 1.1314898545714769e-07, "loss": 1.4248, "step": 17466 }, { "epoch": 0.9549635744843705, "grad_norm": 1.5148097276687622, "learning_rate": 1.1287503615551199e-07, "loss": 1.323, "step": 17467 }, { "epoch": 0.9550182469280921, "grad_norm": 1.633987307548523, "learning_rate": 1.1260141700817906e-07, "loss": 1.3541, "step": 17468 }, { "epoch": 0.9550729193718136, "grad_norm": 1.6371678113937378, "learning_rate": 1.1232812802428716e-07, "loss": 1.2822, "step": 17469 }, { "epoch": 0.9551275918155352, "grad_norm": 1.6261415481567383, "learning_rate": 1.1205516921296122e-07, "loss": 1.5349, "step": 17470 }, { "epoch": 0.9551822642592567, "grad_norm": 1.733928918838501, "learning_rate": 1.1178254058331616e-07, "loss": 1.2786, "step": 17471 }, { "epoch": 0.9552369367029783, "grad_norm": 1.461744785308838, "learning_rate": 1.1151024214445472e-07, "loss": 1.1982, "step": 17472 }, { "epoch": 0.9552916091466999, "grad_norm": 1.5451020002365112, "learning_rate": 1.1123827390547071e-07, "loss": 1.3353, "step": 17473 }, { "epoch": 0.9553462815904213, "grad_norm": 1.4944528341293335, "learning_rate": 1.1096663587544576e-07, "loss": 1.5836, "step": 17474 }, { "epoch": 0.9554009540341429, "grad_norm": 1.3157716989517212, "learning_rate": 1.106953280634504e-07, "loss": 1.3622, "step": 17475 }, { "epoch": 0.9554556264778645, "grad_norm": 1.3350770473480225, "learning_rate": 1.1042435047854295e-07, "loss": 1.4874, "step": 17476 }, { "epoch": 0.955510298921586, "grad_norm": 1.436903715133667, "learning_rate": 1.1015370312977392e-07, "loss": 1.2766, "step": 17477 }, { "epoch": 0.9555649713653076, "grad_norm": 1.657612919807434, "learning_rate": 1.0988338602618053e-07, "loss": 1.0604, "step": 17478 }, { "epoch": 0.9556196438090292, "grad_norm": 1.4626275300979614, "learning_rate": 1.096133991767867e-07, "loss": 1.438, "step": 17479 }, { "epoch": 0.9556743162527507, "grad_norm": 1.531590461730957, "learning_rate": 1.0934374259061187e-07, "loss": 1.3503, "step": 17480 }, { "epoch": 0.9557289886964723, "grad_norm": 1.6102181673049927, "learning_rate": 1.0907441627665883e-07, "loss": 1.1733, "step": 17481 }, { "epoch": 0.9557836611401939, "grad_norm": 2.022961139678955, "learning_rate": 1.0880542024391927e-07, "loss": 1.3791, "step": 17482 }, { "epoch": 0.9558383335839153, "grad_norm": 1.4999147653579712, "learning_rate": 1.0853675450137824e-07, "loss": 1.4672, "step": 17483 }, { "epoch": 0.9558930060276369, "grad_norm": 1.5808871984481812, "learning_rate": 1.0826841905800522e-07, "loss": 1.2893, "step": 17484 }, { "epoch": 0.9559476784713584, "grad_norm": 1.7207852602005005, "learning_rate": 1.0800041392276194e-07, "loss": 1.4007, "step": 17485 }, { "epoch": 0.95600235091508, "grad_norm": 1.377780795097351, "learning_rate": 1.0773273910459681e-07, "loss": 1.4824, "step": 17486 }, { "epoch": 0.9560570233588016, "grad_norm": 1.2729235887527466, "learning_rate": 1.0746539461244932e-07, "loss": 1.3233, "step": 17487 }, { "epoch": 0.9561116958025231, "grad_norm": 1.2718158960342407, "learning_rate": 1.0719838045524456e-07, "loss": 1.4477, "step": 17488 }, { "epoch": 0.9561663682462447, "grad_norm": 1.401106357574463, "learning_rate": 1.0693169664190095e-07, "loss": 1.2504, "step": 17489 }, { "epoch": 0.9562210406899663, "grad_norm": 1.6507076025009155, "learning_rate": 1.0666534318132249e-07, "loss": 1.3207, "step": 17490 }, { "epoch": 0.9562757131336878, "grad_norm": 1.4298324584960938, "learning_rate": 1.0639932008240428e-07, "loss": 1.3033, "step": 17491 }, { "epoch": 0.9563303855774093, "grad_norm": 1.5806547403335571, "learning_rate": 1.061336273540281e-07, "loss": 1.337, "step": 17492 }, { "epoch": 0.9563850580211309, "grad_norm": 1.6934452056884766, "learning_rate": 1.0586826500506686e-07, "loss": 1.4943, "step": 17493 }, { "epoch": 0.9564397304648524, "grad_norm": 1.6062157154083252, "learning_rate": 1.0560323304438125e-07, "loss": 1.2096, "step": 17494 }, { "epoch": 0.956494402908574, "grad_norm": 1.7368274927139282, "learning_rate": 1.0533853148082197e-07, "loss": 1.106, "step": 17495 }, { "epoch": 0.9565490753522956, "grad_norm": 1.7108640670776367, "learning_rate": 1.0507416032322748e-07, "loss": 1.4568, "step": 17496 }, { "epoch": 0.9566037477960171, "grad_norm": 1.4876011610031128, "learning_rate": 1.048101195804263e-07, "loss": 1.3551, "step": 17497 }, { "epoch": 0.9566584202397387, "grad_norm": 1.5795460939407349, "learning_rate": 1.0454640926123583e-07, "loss": 1.5699, "step": 17498 }, { "epoch": 0.9567130926834602, "grad_norm": 1.4469879865646362, "learning_rate": 1.0428302937445899e-07, "loss": 1.5021, "step": 17499 }, { "epoch": 0.9567677651271818, "grad_norm": 2.0895142555236816, "learning_rate": 1.0401997992889434e-07, "loss": 1.3002, "step": 17500 }, { "epoch": 0.9568224375709034, "grad_norm": 1.4983196258544922, "learning_rate": 1.0375726093332484e-07, "loss": 1.2885, "step": 17501 }, { "epoch": 0.9568771100146248, "grad_norm": 1.4952207803726196, "learning_rate": 1.0349487239652123e-07, "loss": 1.2821, "step": 17502 }, { "epoch": 0.9569317824583464, "grad_norm": 1.7172807455062866, "learning_rate": 1.0323281432724763e-07, "loss": 1.5643, "step": 17503 }, { "epoch": 0.956986454902068, "grad_norm": 1.2485263347625732, "learning_rate": 1.0297108673425371e-07, "loss": 1.5326, "step": 17504 }, { "epoch": 0.9570411273457895, "grad_norm": 1.6645402908325195, "learning_rate": 1.0270968962627914e-07, "loss": 1.5804, "step": 17505 }, { "epoch": 0.9570957997895111, "grad_norm": 1.4323521852493286, "learning_rate": 1.024486230120525e-07, "loss": 1.5946, "step": 17506 }, { "epoch": 0.9571504722332327, "grad_norm": 1.7976620197296143, "learning_rate": 1.0218788690029124e-07, "loss": 1.3472, "step": 17507 }, { "epoch": 0.9572051446769542, "grad_norm": 1.6210484504699707, "learning_rate": 1.0192748129970287e-07, "loss": 1.4393, "step": 17508 }, { "epoch": 0.9572598171206758, "grad_norm": 1.2961739301681519, "learning_rate": 1.0166740621898374e-07, "loss": 1.4064, "step": 17509 }, { "epoch": 0.9573144895643974, "grad_norm": 1.5794166326522827, "learning_rate": 1.014076616668147e-07, "loss": 1.3346, "step": 17510 }, { "epoch": 0.9573691620081188, "grad_norm": 1.7723753452301025, "learning_rate": 1.0114824765187326e-07, "loss": 1.1807, "step": 17511 }, { "epoch": 0.9574238344518404, "grad_norm": 1.478861689567566, "learning_rate": 1.0088916418282024e-07, "loss": 1.3684, "step": 17512 }, { "epoch": 0.9574785068955619, "grad_norm": 1.518378496170044, "learning_rate": 1.0063041126830542e-07, "loss": 1.4039, "step": 17513 }, { "epoch": 0.9575331793392835, "grad_norm": 2.3422675132751465, "learning_rate": 1.0037198891697298e-07, "loss": 1.4893, "step": 17514 }, { "epoch": 0.9575878517830051, "grad_norm": 1.6307684183120728, "learning_rate": 1.0011389713744824e-07, "loss": 1.4388, "step": 17515 }, { "epoch": 0.9576425242267266, "grad_norm": 1.3664789199829102, "learning_rate": 9.985613593835319e-08, "loss": 1.5445, "step": 17516 }, { "epoch": 0.9576971966704482, "grad_norm": 1.5340052843093872, "learning_rate": 9.959870532829208e-08, "loss": 1.5228, "step": 17517 }, { "epoch": 0.9577518691141698, "grad_norm": 1.549741506576538, "learning_rate": 9.934160531586134e-08, "loss": 1.1921, "step": 17518 }, { "epoch": 0.9578065415578912, "grad_norm": 1.551722526550293, "learning_rate": 9.908483590964746e-08, "loss": 1.4453, "step": 17519 }, { "epoch": 0.9578612140016128, "grad_norm": 1.4778258800506592, "learning_rate": 9.882839711822468e-08, "loss": 1.4652, "step": 17520 }, { "epoch": 0.9579158864453344, "grad_norm": 1.6564818620681763, "learning_rate": 9.857228895015503e-08, "loss": 1.7074, "step": 17521 }, { "epoch": 0.9579705588890559, "grad_norm": 1.6531648635864258, "learning_rate": 9.831651141399167e-08, "loss": 1.4259, "step": 17522 }, { "epoch": 0.9580252313327775, "grad_norm": 1.1662710905075073, "learning_rate": 9.806106451827557e-08, "loss": 1.5104, "step": 17523 }, { "epoch": 0.9580799037764991, "grad_norm": 1.426468849182129, "learning_rate": 9.780594827153434e-08, "loss": 1.2098, "step": 17524 }, { "epoch": 0.9581345762202206, "grad_norm": 1.2411556243896484, "learning_rate": 9.755116268229003e-08, "loss": 1.5404, "step": 17525 }, { "epoch": 0.9581892486639422, "grad_norm": 1.3594927787780762, "learning_rate": 9.72967077590492e-08, "loss": 1.5281, "step": 17526 }, { "epoch": 0.9582439211076638, "grad_norm": 1.331589698791504, "learning_rate": 9.704258351030838e-08, "loss": 1.5624, "step": 17527 }, { "epoch": 0.9582985935513852, "grad_norm": 1.419104814529419, "learning_rate": 9.678878994455298e-08, "loss": 1.4579, "step": 17528 }, { "epoch": 0.9583532659951068, "grad_norm": 1.3725148439407349, "learning_rate": 9.653532707025959e-08, "loss": 1.2679, "step": 17529 }, { "epoch": 0.9584079384388283, "grad_norm": 1.799760341644287, "learning_rate": 9.628219489589141e-08, "loss": 1.5448, "step": 17530 }, { "epoch": 0.9584626108825499, "grad_norm": 1.418286919593811, "learning_rate": 9.602939342989948e-08, "loss": 1.5584, "step": 17531 }, { "epoch": 0.9585172833262715, "grad_norm": 1.6597708463668823, "learning_rate": 9.577692268072702e-08, "loss": 1.2565, "step": 17532 }, { "epoch": 0.958571955769993, "grad_norm": 1.5096572637557983, "learning_rate": 9.552478265680287e-08, "loss": 1.3814, "step": 17533 }, { "epoch": 0.9586266282137146, "grad_norm": 1.1999062299728394, "learning_rate": 9.527297336654917e-08, "loss": 1.6635, "step": 17534 }, { "epoch": 0.9586813006574362, "grad_norm": 1.2752246856689453, "learning_rate": 9.502149481837252e-08, "loss": 1.4546, "step": 17535 }, { "epoch": 0.9587359731011577, "grad_norm": 1.6483373641967773, "learning_rate": 9.477034702067067e-08, "loss": 1.3304, "step": 17536 }, { "epoch": 0.9587906455448792, "grad_norm": 1.26239013671875, "learning_rate": 9.451952998183022e-08, "loss": 1.4686, "step": 17537 }, { "epoch": 0.9588453179886008, "grad_norm": 1.3488013744354248, "learning_rate": 9.426904371022672e-08, "loss": 1.5505, "step": 17538 }, { "epoch": 0.9588999904323223, "grad_norm": 1.5973412990570068, "learning_rate": 9.401888821422566e-08, "loss": 1.2397, "step": 17539 }, { "epoch": 0.9589546628760439, "grad_norm": 1.9464695453643799, "learning_rate": 9.376906350217819e-08, "loss": 1.2395, "step": 17540 }, { "epoch": 0.9590093353197655, "grad_norm": 1.2574294805526733, "learning_rate": 9.351956958242648e-08, "loss": 1.5332, "step": 17541 }, { "epoch": 0.959064007763487, "grad_norm": 1.3051835298538208, "learning_rate": 9.327040646330388e-08, "loss": 1.4623, "step": 17542 }, { "epoch": 0.9591186802072086, "grad_norm": 1.555262804031372, "learning_rate": 9.302157415312929e-08, "loss": 1.3147, "step": 17543 }, { "epoch": 0.9591733526509301, "grad_norm": 1.6330512762069702, "learning_rate": 9.277307266021052e-08, "loss": 1.3968, "step": 17544 }, { "epoch": 0.9592280250946517, "grad_norm": 1.3202968835830688, "learning_rate": 9.252490199284758e-08, "loss": 1.4449, "step": 17545 }, { "epoch": 0.9592826975383733, "grad_norm": 1.4638392925262451, "learning_rate": 9.227706215932719e-08, "loss": 1.4127, "step": 17546 }, { "epoch": 0.9593373699820947, "grad_norm": 1.700455665588379, "learning_rate": 9.202955316792384e-08, "loss": 1.1097, "step": 17547 }, { "epoch": 0.9593920424258163, "grad_norm": 1.6902954578399658, "learning_rate": 9.178237502690423e-08, "loss": 1.4244, "step": 17548 }, { "epoch": 0.9594467148695379, "grad_norm": 1.7376657724380493, "learning_rate": 9.153552774452179e-08, "loss": 1.389, "step": 17549 }, { "epoch": 0.9595013873132594, "grad_norm": 1.4308598041534424, "learning_rate": 9.12890113290188e-08, "loss": 1.4661, "step": 17550 }, { "epoch": 0.959556059756981, "grad_norm": 1.390383243560791, "learning_rate": 9.104282578862644e-08, "loss": 1.4347, "step": 17551 }, { "epoch": 0.9596107322007026, "grad_norm": 1.467254638671875, "learning_rate": 9.079697113156705e-08, "loss": 1.1575, "step": 17552 }, { "epoch": 0.9596654046444241, "grad_norm": 1.5333646535873413, "learning_rate": 9.05514473660496e-08, "loss": 1.1592, "step": 17553 }, { "epoch": 0.9597200770881457, "grad_norm": 1.4841656684875488, "learning_rate": 9.030625450027198e-08, "loss": 1.4476, "step": 17554 }, { "epoch": 0.9597747495318673, "grad_norm": 1.3928639888763428, "learning_rate": 9.006139254242319e-08, "loss": 1.5696, "step": 17555 }, { "epoch": 0.9598294219755887, "grad_norm": 2.08754563331604, "learning_rate": 8.981686150067781e-08, "loss": 1.4226, "step": 17556 }, { "epoch": 0.9598840944193103, "grad_norm": 1.3988102674484253, "learning_rate": 8.957266138320375e-08, "loss": 1.4882, "step": 17557 }, { "epoch": 0.9599387668630318, "grad_norm": 1.480908989906311, "learning_rate": 8.932879219815227e-08, "loss": 1.4691, "step": 17558 }, { "epoch": 0.9599934393067534, "grad_norm": 1.6037040948867798, "learning_rate": 8.908525395367018e-08, "loss": 1.4852, "step": 17559 }, { "epoch": 0.960048111750475, "grad_norm": 1.5547003746032715, "learning_rate": 8.884204665788765e-08, "loss": 1.322, "step": 17560 }, { "epoch": 0.9601027841941965, "grad_norm": 1.653672456741333, "learning_rate": 8.859917031892595e-08, "loss": 1.3102, "step": 17561 }, { "epoch": 0.9601574566379181, "grad_norm": 1.7725130319595337, "learning_rate": 8.835662494489638e-08, "loss": 1.4331, "step": 17562 }, { "epoch": 0.9602121290816397, "grad_norm": 1.7358920574188232, "learning_rate": 8.81144105438958e-08, "loss": 1.4426, "step": 17563 }, { "epoch": 0.9602668015253611, "grad_norm": 1.7217140197753906, "learning_rate": 8.787252712401662e-08, "loss": 1.4376, "step": 17564 }, { "epoch": 0.9603214739690827, "grad_norm": 1.9532097578048706, "learning_rate": 8.763097469333237e-08, "loss": 1.3303, "step": 17565 }, { "epoch": 0.9603761464128043, "grad_norm": 1.583094835281372, "learning_rate": 8.738975325990884e-08, "loss": 1.5446, "step": 17566 }, { "epoch": 0.9604308188565258, "grad_norm": 1.4750185012817383, "learning_rate": 8.714886283180291e-08, "loss": 1.5194, "step": 17567 }, { "epoch": 0.9604854913002474, "grad_norm": 1.606682300567627, "learning_rate": 8.690830341705814e-08, "loss": 1.3045, "step": 17568 }, { "epoch": 0.960540163743969, "grad_norm": 1.5414623022079468, "learning_rate": 8.6668075023707e-08, "loss": 1.488, "step": 17569 }, { "epoch": 0.9605948361876905, "grad_norm": 1.4102731943130493, "learning_rate": 8.642817765977086e-08, "loss": 1.519, "step": 17570 }, { "epoch": 0.9606495086314121, "grad_norm": 1.4821834564208984, "learning_rate": 8.618861133326106e-08, "loss": 1.2556, "step": 17571 }, { "epoch": 0.9607041810751336, "grad_norm": 1.6476529836654663, "learning_rate": 8.594937605217568e-08, "loss": 1.3635, "step": 17572 }, { "epoch": 0.9607588535188551, "grad_norm": 1.244246244430542, "learning_rate": 8.571047182450609e-08, "loss": 1.454, "step": 17573 }, { "epoch": 0.9608135259625767, "grad_norm": 1.9752520322799683, "learning_rate": 8.547189865822814e-08, "loss": 1.1827, "step": 17574 }, { "epoch": 0.9608681984062982, "grad_norm": 1.9721500873565674, "learning_rate": 8.523365656130767e-08, "loss": 1.3485, "step": 17575 }, { "epoch": 0.9609228708500198, "grad_norm": 1.2110501527786255, "learning_rate": 8.499574554170276e-08, "loss": 1.5475, "step": 17576 }, { "epoch": 0.9609775432937414, "grad_norm": 1.4257735013961792, "learning_rate": 8.475816560735484e-08, "loss": 1.742, "step": 17577 }, { "epoch": 0.9610322157374629, "grad_norm": 2.0523521900177, "learning_rate": 8.452091676619978e-08, "loss": 1.3516, "step": 17578 }, { "epoch": 0.9610868881811845, "grad_norm": 1.7334249019622803, "learning_rate": 8.428399902615791e-08, "loss": 1.0803, "step": 17579 }, { "epoch": 0.9611415606249061, "grad_norm": 2.0474095344543457, "learning_rate": 8.404741239514181e-08, "loss": 1.4811, "step": 17580 }, { "epoch": 0.9611962330686276, "grad_norm": 1.245914101600647, "learning_rate": 8.381115688105068e-08, "loss": 1.4485, "step": 17581 }, { "epoch": 0.9612509055123492, "grad_norm": 1.43980073928833, "learning_rate": 8.35752324917749e-08, "loss": 1.5852, "step": 17582 }, { "epoch": 0.9613055779560707, "grad_norm": 1.772572636604309, "learning_rate": 8.333963923519039e-08, "loss": 1.6272, "step": 17583 }, { "epoch": 0.9613602503997922, "grad_norm": 1.342598795890808, "learning_rate": 8.310437711916641e-08, "loss": 1.5176, "step": 17584 }, { "epoch": 0.9614149228435138, "grad_norm": 1.756399154663086, "learning_rate": 8.286944615155778e-08, "loss": 1.5126, "step": 17585 }, { "epoch": 0.9614695952872353, "grad_norm": 1.633721947669983, "learning_rate": 8.263484634020935e-08, "loss": 1.385, "step": 17586 }, { "epoch": 0.9615242677309569, "grad_norm": 1.5665794610977173, "learning_rate": 8.240057769295485e-08, "loss": 1.419, "step": 17587 }, { "epoch": 0.9615789401746785, "grad_norm": 3.444302797317505, "learning_rate": 8.2166640217618e-08, "loss": 1.5238, "step": 17588 }, { "epoch": 0.9616336126184, "grad_norm": 1.8458918333053589, "learning_rate": 8.193303392201036e-08, "loss": 1.4763, "step": 17589 }, { "epoch": 0.9616882850621216, "grad_norm": 1.5755006074905396, "learning_rate": 8.169975881393122e-08, "loss": 1.3578, "step": 17590 }, { "epoch": 0.9617429575058432, "grad_norm": 2.1347122192382812, "learning_rate": 8.146681490117214e-08, "loss": 1.3256, "step": 17591 }, { "epoch": 0.9617976299495646, "grad_norm": 1.825838327407837, "learning_rate": 8.123420219151023e-08, "loss": 1.3798, "step": 17592 }, { "epoch": 0.9618523023932862, "grad_norm": 1.509526252746582, "learning_rate": 8.100192069271374e-08, "loss": 1.4757, "step": 17593 }, { "epoch": 0.9619069748370078, "grad_norm": 2.0690677165985107, "learning_rate": 8.076997041253865e-08, "loss": 1.3616, "step": 17594 }, { "epoch": 0.9619616472807293, "grad_norm": 1.4519015550613403, "learning_rate": 8.05383513587299e-08, "loss": 1.3114, "step": 17595 }, { "epoch": 0.9620163197244509, "grad_norm": 1.4916983842849731, "learning_rate": 8.030706353902351e-08, "loss": 1.4358, "step": 17596 }, { "epoch": 0.9620709921681725, "grad_norm": 1.4684829711914062, "learning_rate": 8.007610696114e-08, "loss": 1.3628, "step": 17597 }, { "epoch": 0.962125664611894, "grad_norm": 1.6656814813613892, "learning_rate": 7.984548163279426e-08, "loss": 1.3894, "step": 17598 }, { "epoch": 0.9621803370556156, "grad_norm": 1.49281907081604, "learning_rate": 7.961518756168574e-08, "loss": 1.2825, "step": 17599 }, { "epoch": 0.962235009499337, "grad_norm": 1.5292199850082397, "learning_rate": 7.938522475550492e-08, "loss": 1.3923, "step": 17600 }, { "epoch": 0.9622896819430586, "grad_norm": 1.5194284915924072, "learning_rate": 7.915559322193123e-08, "loss": 1.3004, "step": 17601 }, { "epoch": 0.9623443543867802, "grad_norm": 1.4658564329147339, "learning_rate": 7.892629296863297e-08, "loss": 1.4404, "step": 17602 }, { "epoch": 0.9623990268305017, "grad_norm": 1.6110901832580566, "learning_rate": 7.869732400326513e-08, "loss": 1.492, "step": 17603 }, { "epoch": 0.9624536992742233, "grad_norm": 1.284136176109314, "learning_rate": 7.846868633347492e-08, "loss": 1.2792, "step": 17604 }, { "epoch": 0.9625083717179449, "grad_norm": 1.448891520500183, "learning_rate": 7.824037996689738e-08, "loss": 1.2524, "step": 17605 }, { "epoch": 0.9625630441616664, "grad_norm": 1.6624964475631714, "learning_rate": 7.801240491115525e-08, "loss": 1.4234, "step": 17606 }, { "epoch": 0.962617716605388, "grad_norm": 1.4414095878601074, "learning_rate": 7.778476117386247e-08, "loss": 1.4293, "step": 17607 }, { "epoch": 0.9626723890491096, "grad_norm": 1.6945644617080688, "learning_rate": 7.75574487626185e-08, "loss": 1.44, "step": 17608 }, { "epoch": 0.962727061492831, "grad_norm": 1.763076901435852, "learning_rate": 7.733046768501617e-08, "loss": 1.472, "step": 17609 }, { "epoch": 0.9627817339365526, "grad_norm": 1.641221284866333, "learning_rate": 7.710381794863275e-08, "loss": 1.3935, "step": 17610 }, { "epoch": 0.9628364063802742, "grad_norm": 1.5247321128845215, "learning_rate": 7.687749956103774e-08, "loss": 1.4088, "step": 17611 }, { "epoch": 0.9628910788239957, "grad_norm": 1.7570075988769531, "learning_rate": 7.665151252978842e-08, "loss": 1.3495, "step": 17612 }, { "epoch": 0.9629457512677173, "grad_norm": 2.182600259780884, "learning_rate": 7.642585686243209e-08, "loss": 1.3435, "step": 17613 }, { "epoch": 0.9630004237114388, "grad_norm": 1.4746707677841187, "learning_rate": 7.620053256650162e-08, "loss": 1.5075, "step": 17614 }, { "epoch": 0.9630550961551604, "grad_norm": 1.6140069961547852, "learning_rate": 7.59755396495221e-08, "loss": 1.5638, "step": 17615 }, { "epoch": 0.963109768598882, "grad_norm": 1.2698837518692017, "learning_rate": 7.57508781190075e-08, "loss": 1.4878, "step": 17616 }, { "epoch": 0.9631644410426035, "grad_norm": 1.5442827939987183, "learning_rate": 7.55265479824585e-08, "loss": 1.5122, "step": 17617 }, { "epoch": 0.963219113486325, "grad_norm": 1.6593844890594482, "learning_rate": 7.530254924736691e-08, "loss": 1.3971, "step": 17618 }, { "epoch": 0.9632737859300466, "grad_norm": 1.5004079341888428, "learning_rate": 7.507888192121338e-08, "loss": 1.5411, "step": 17619 }, { "epoch": 0.9633284583737681, "grad_norm": 1.4149134159088135, "learning_rate": 7.485554601146417e-08, "loss": 1.4366, "step": 17620 }, { "epoch": 0.9633831308174897, "grad_norm": 1.452541470527649, "learning_rate": 7.463254152557885e-08, "loss": 1.3582, "step": 17621 }, { "epoch": 0.9634378032612113, "grad_norm": 1.1362769603729248, "learning_rate": 7.440986847100262e-08, "loss": 1.6386, "step": 17622 }, { "epoch": 0.9634924757049328, "grad_norm": 1.5898962020874023, "learning_rate": 7.418752685517283e-08, "loss": 1.4513, "step": 17623 }, { "epoch": 0.9635471481486544, "grad_norm": 2.7012453079223633, "learning_rate": 7.396551668551355e-08, "loss": 1.3042, "step": 17624 }, { "epoch": 0.963601820592376, "grad_norm": 1.5266202688217163, "learning_rate": 7.374383796943663e-08, "loss": 1.3672, "step": 17625 }, { "epoch": 0.9636564930360975, "grad_norm": 1.741851806640625, "learning_rate": 7.352249071434614e-08, "loss": 1.15, "step": 17626 }, { "epoch": 0.963711165479819, "grad_norm": 1.1697373390197754, "learning_rate": 7.330147492763396e-08, "loss": 1.3892, "step": 17627 }, { "epoch": 0.9637658379235405, "grad_norm": 1.4483145475387573, "learning_rate": 7.30807906166775e-08, "loss": 1.5877, "step": 17628 }, { "epoch": 0.9638205103672621, "grad_norm": 1.2476519346237183, "learning_rate": 7.286043778884865e-08, "loss": 1.5154, "step": 17629 }, { "epoch": 0.9638751828109837, "grad_norm": 2.5563905239105225, "learning_rate": 7.264041645150488e-08, "loss": 1.4629, "step": 17630 }, { "epoch": 0.9639298552547052, "grad_norm": 1.401776671409607, "learning_rate": 7.242072661199251e-08, "loss": 1.4164, "step": 17631 }, { "epoch": 0.9639845276984268, "grad_norm": 1.2109779119491577, "learning_rate": 7.22013682776479e-08, "loss": 1.4777, "step": 17632 }, { "epoch": 0.9640392001421484, "grad_norm": 1.5778563022613525, "learning_rate": 7.198234145579519e-08, "loss": 1.2257, "step": 17633 }, { "epoch": 0.9640938725858699, "grad_norm": 1.6946752071380615, "learning_rate": 7.176364615374964e-08, "loss": 1.3981, "step": 17634 }, { "epoch": 0.9641485450295915, "grad_norm": 2.0862841606140137, "learning_rate": 7.154528237881431e-08, "loss": 1.1831, "step": 17635 }, { "epoch": 0.9642032174733131, "grad_norm": 1.3811781406402588, "learning_rate": 7.132725013827779e-08, "loss": 1.4142, "step": 17636 }, { "epoch": 0.9642578899170345, "grad_norm": 1.51423978805542, "learning_rate": 7.110954943942428e-08, "loss": 1.3542, "step": 17637 }, { "epoch": 0.9643125623607561, "grad_norm": 2.3537886142730713, "learning_rate": 7.089218028952128e-08, "loss": 0.9719, "step": 17638 }, { "epoch": 0.9643672348044777, "grad_norm": 2.0009167194366455, "learning_rate": 7.067514269582743e-08, "loss": 1.4088, "step": 17639 }, { "epoch": 0.9644219072481992, "grad_norm": 1.7501171827316284, "learning_rate": 7.045843666559027e-08, "loss": 1.4296, "step": 17640 }, { "epoch": 0.9644765796919208, "grad_norm": 1.4035241603851318, "learning_rate": 7.024206220604734e-08, "loss": 1.4212, "step": 17641 }, { "epoch": 0.9645312521356423, "grad_norm": 1.2657136917114258, "learning_rate": 7.002601932442177e-08, "loss": 1.3324, "step": 17642 }, { "epoch": 0.9645859245793639, "grad_norm": 1.550315499305725, "learning_rate": 6.981030802792998e-08, "loss": 1.4617, "step": 17643 }, { "epoch": 0.9646405970230855, "grad_norm": 1.2710996866226196, "learning_rate": 6.95949283237729e-08, "loss": 1.4458, "step": 17644 }, { "epoch": 0.964695269466807, "grad_norm": 1.3909333944320679, "learning_rate": 6.937988021914477e-08, "loss": 1.5462, "step": 17645 }, { "epoch": 0.9647499419105285, "grad_norm": 1.5059159994125366, "learning_rate": 6.916516372122429e-08, "loss": 1.2158, "step": 17646 }, { "epoch": 0.9648046143542501, "grad_norm": 1.8634225130081177, "learning_rate": 6.895077883718237e-08, "loss": 1.4398, "step": 17647 }, { "epoch": 0.9648592867979716, "grad_norm": 1.4161852598190308, "learning_rate": 6.873672557417777e-08, "loss": 1.4418, "step": 17648 }, { "epoch": 0.9649139592416932, "grad_norm": 1.6962547302246094, "learning_rate": 6.852300393935918e-08, "loss": 1.2663, "step": 17649 }, { "epoch": 0.9649686316854148, "grad_norm": 1.8180997371673584, "learning_rate": 6.830961393986202e-08, "loss": 1.2773, "step": 17650 }, { "epoch": 0.9650233041291363, "grad_norm": 1.1958378553390503, "learning_rate": 6.80965555828128e-08, "loss": 1.3125, "step": 17651 }, { "epoch": 0.9650779765728579, "grad_norm": 1.372659683227539, "learning_rate": 6.788382887532475e-08, "loss": 1.5864, "step": 17652 }, { "epoch": 0.9651326490165795, "grad_norm": 1.2222652435302734, "learning_rate": 6.767143382450214e-08, "loss": 1.5106, "step": 17653 }, { "epoch": 0.965187321460301, "grad_norm": 1.2547932863235474, "learning_rate": 6.745937043743712e-08, "loss": 1.3925, "step": 17654 }, { "epoch": 0.9652419939040225, "grad_norm": 1.6846762895584106, "learning_rate": 6.724763872121177e-08, "loss": 1.2588, "step": 17655 }, { "epoch": 0.965296666347744, "grad_norm": 2.101292371749878, "learning_rate": 6.7036238682896e-08, "loss": 1.1095, "step": 17656 }, { "epoch": 0.9653513387914656, "grad_norm": 1.5216988325119019, "learning_rate": 6.68251703295475e-08, "loss": 1.468, "step": 17657 }, { "epoch": 0.9654060112351872, "grad_norm": 1.5032918453216553, "learning_rate": 6.661443366821618e-08, "loss": 1.4068, "step": 17658 }, { "epoch": 0.9654606836789087, "grad_norm": 2.032724142074585, "learning_rate": 6.640402870593865e-08, "loss": 1.0466, "step": 17659 }, { "epoch": 0.9655153561226303, "grad_norm": 1.5440706014633179, "learning_rate": 6.619395544974039e-08, "loss": 1.527, "step": 17660 }, { "epoch": 0.9655700285663519, "grad_norm": 1.3055408000946045, "learning_rate": 6.598421390663578e-08, "loss": 1.4597, "step": 17661 }, { "epoch": 0.9656247010100734, "grad_norm": 2.0285630226135254, "learning_rate": 6.577480408363035e-08, "loss": 1.4293, "step": 17662 }, { "epoch": 0.965679373453795, "grad_norm": 1.983587384223938, "learning_rate": 6.556572598771404e-08, "loss": 1.2515, "step": 17663 }, { "epoch": 0.9657340458975165, "grad_norm": 1.9419960975646973, "learning_rate": 6.535697962587129e-08, "loss": 1.3892, "step": 17664 }, { "epoch": 0.965788718341238, "grad_norm": 1.7447158098220825, "learning_rate": 6.514856500507094e-08, "loss": 1.4866, "step": 17665 }, { "epoch": 0.9658433907849596, "grad_norm": 1.3138785362243652, "learning_rate": 6.4940482132273e-08, "loss": 1.4261, "step": 17666 }, { "epoch": 0.9658980632286812, "grad_norm": 4.076321601867676, "learning_rate": 6.473273101442412e-08, "loss": 1.2824, "step": 17667 }, { "epoch": 0.9659527356724027, "grad_norm": 1.6110919713974, "learning_rate": 6.452531165846543e-08, "loss": 1.2126, "step": 17668 }, { "epoch": 0.9660074081161243, "grad_norm": 1.8643022775650024, "learning_rate": 6.431822407132027e-08, "loss": 1.3528, "step": 17669 }, { "epoch": 0.9660620805598458, "grad_norm": 1.4721651077270508, "learning_rate": 6.411146825990311e-08, "loss": 1.2655, "step": 17670 }, { "epoch": 0.9661167530035674, "grad_norm": 1.485880732536316, "learning_rate": 6.390504423112065e-08, "loss": 1.489, "step": 17671 }, { "epoch": 0.966171425447289, "grad_norm": 1.671973705291748, "learning_rate": 6.369895199186404e-08, "loss": 1.2766, "step": 17672 }, { "epoch": 0.9662260978910104, "grad_norm": 1.7237061262130737, "learning_rate": 6.349319154901668e-08, "loss": 1.5155, "step": 17673 }, { "epoch": 0.966280770334732, "grad_norm": 1.7851009368896484, "learning_rate": 6.32877629094475e-08, "loss": 1.4508, "step": 17674 }, { "epoch": 0.9663354427784536, "grad_norm": 1.6229989528656006, "learning_rate": 6.308266608001656e-08, "loss": 1.5218, "step": 17675 }, { "epoch": 0.9663901152221751, "grad_norm": 1.2462825775146484, "learning_rate": 6.287790106757396e-08, "loss": 1.3623, "step": 17676 }, { "epoch": 0.9664447876658967, "grad_norm": 1.5755794048309326, "learning_rate": 6.267346787895645e-08, "loss": 1.4022, "step": 17677 }, { "epoch": 0.9664994601096183, "grad_norm": 1.5674142837524414, "learning_rate": 6.246936652099078e-08, "loss": 1.5418, "step": 17678 }, { "epoch": 0.9665541325533398, "grad_norm": 1.6569266319274902, "learning_rate": 6.226559700049151e-08, "loss": 1.3833, "step": 17679 }, { "epoch": 0.9666088049970614, "grad_norm": 2.0690507888793945, "learning_rate": 6.206215932426319e-08, "loss": 1.3671, "step": 17680 }, { "epoch": 0.966663477440783, "grad_norm": 1.2377992868423462, "learning_rate": 6.185905349910038e-08, "loss": 1.5094, "step": 17681 }, { "epoch": 0.9667181498845044, "grad_norm": 1.5431770086288452, "learning_rate": 6.165627953178433e-08, "loss": 1.4206, "step": 17682 }, { "epoch": 0.966772822328226, "grad_norm": 1.699636459350586, "learning_rate": 6.145383742908517e-08, "loss": 1.3507, "step": 17683 }, { "epoch": 0.9668274947719475, "grad_norm": 1.3134335279464722, "learning_rate": 6.125172719776529e-08, "loss": 1.3213, "step": 17684 }, { "epoch": 0.9668821672156691, "grad_norm": 1.4102811813354492, "learning_rate": 6.10499488445715e-08, "loss": 1.3398, "step": 17685 }, { "epoch": 0.9669368396593907, "grad_norm": 1.4306832551956177, "learning_rate": 6.084850237624285e-08, "loss": 1.7307, "step": 17686 }, { "epoch": 0.9669915121031122, "grad_norm": 2.0601983070373535, "learning_rate": 6.064738779950397e-08, "loss": 1.6483, "step": 17687 }, { "epoch": 0.9670461845468338, "grad_norm": 1.4487922191619873, "learning_rate": 6.044660512107392e-08, "loss": 1.4785, "step": 17688 }, { "epoch": 0.9671008569905554, "grad_norm": 2.050546884536743, "learning_rate": 6.024615434765513e-08, "loss": 1.2181, "step": 17689 }, { "epoch": 0.9671555294342769, "grad_norm": 1.2275534868240356, "learning_rate": 6.004603548594112e-08, "loss": 1.5108, "step": 17690 }, { "epoch": 0.9672102018779984, "grad_norm": 1.2629872560501099, "learning_rate": 5.984624854261545e-08, "loss": 1.6354, "step": 17691 }, { "epoch": 0.96726487432172, "grad_norm": 1.7484848499298096, "learning_rate": 5.964679352434833e-08, "loss": 1.517, "step": 17692 }, { "epoch": 0.9673195467654415, "grad_norm": 1.7295427322387695, "learning_rate": 5.944767043780109e-08, "loss": 1.5309, "step": 17693 }, { "epoch": 0.9673742192091631, "grad_norm": 1.537903904914856, "learning_rate": 5.924887928962286e-08, "loss": 1.3695, "step": 17694 }, { "epoch": 0.9674288916528847, "grad_norm": 1.7030705213546753, "learning_rate": 5.905042008645057e-08, "loss": 1.4009, "step": 17695 }, { "epoch": 0.9674835640966062, "grad_norm": 1.4615558385849, "learning_rate": 5.885229283491223e-08, "loss": 1.3075, "step": 17696 }, { "epoch": 0.9675382365403278, "grad_norm": 1.4488245248794556, "learning_rate": 5.865449754162256e-08, "loss": 1.2825, "step": 17697 }, { "epoch": 0.9675929089840493, "grad_norm": 1.535038948059082, "learning_rate": 5.845703421318849e-08, "loss": 1.5166, "step": 17698 }, { "epoch": 0.9676475814277709, "grad_norm": 1.2325645685195923, "learning_rate": 5.825990285620253e-08, "loss": 1.5185, "step": 17699 }, { "epoch": 0.9677022538714924, "grad_norm": 1.907923936843872, "learning_rate": 5.806310347724609e-08, "loss": 1.2418, "step": 17700 }, { "epoch": 0.9677569263152139, "grad_norm": 1.101952314376831, "learning_rate": 5.786663608289278e-08, "loss": 1.5338, "step": 17701 }, { "epoch": 0.9678115987589355, "grad_norm": 1.5771632194519043, "learning_rate": 5.7670500679702925e-08, "loss": 1.5023, "step": 17702 }, { "epoch": 0.9678662712026571, "grad_norm": 1.4272786378860474, "learning_rate": 5.747469727422572e-08, "loss": 1.4451, "step": 17703 }, { "epoch": 0.9679209436463786, "grad_norm": 1.6772351264953613, "learning_rate": 5.727922587299817e-08, "loss": 1.2356, "step": 17704 }, { "epoch": 0.9679756160901002, "grad_norm": 1.5496119260787964, "learning_rate": 5.7084086482549486e-08, "loss": 1.5349, "step": 17705 }, { "epoch": 0.9680302885338218, "grad_norm": 1.3130161762237549, "learning_rate": 5.688927910939446e-08, "loss": 1.4655, "step": 17706 }, { "epoch": 0.9680849609775433, "grad_norm": 2.0057907104492188, "learning_rate": 5.6694803760039e-08, "loss": 1.4402, "step": 17707 }, { "epoch": 0.9681396334212649, "grad_norm": 1.2588914632797241, "learning_rate": 5.6500660440975684e-08, "loss": 1.5607, "step": 17708 }, { "epoch": 0.9681943058649864, "grad_norm": 1.849194884300232, "learning_rate": 5.630684915868934e-08, "loss": 1.4176, "step": 17709 }, { "epoch": 0.9682489783087079, "grad_norm": 1.3801850080490112, "learning_rate": 5.611336991965144e-08, "loss": 1.519, "step": 17710 }, { "epoch": 0.9683036507524295, "grad_norm": 1.4264971017837524, "learning_rate": 5.5920222730321275e-08, "loss": 1.4685, "step": 17711 }, { "epoch": 0.968358323196151, "grad_norm": 1.4696999788284302, "learning_rate": 5.572740759715034e-08, "loss": 1.1839, "step": 17712 }, { "epoch": 0.9684129956398726, "grad_norm": 1.346588373184204, "learning_rate": 5.5534924526575716e-08, "loss": 1.2934, "step": 17713 }, { "epoch": 0.9684676680835942, "grad_norm": 1.2142183780670166, "learning_rate": 5.5342773525024484e-08, "loss": 1.3609, "step": 17714 }, { "epoch": 0.9685223405273157, "grad_norm": 1.8712592124938965, "learning_rate": 5.515095459891484e-08, "loss": 1.6236, "step": 17715 }, { "epoch": 0.9685770129710373, "grad_norm": 1.4366453886032104, "learning_rate": 5.4959467754651665e-08, "loss": 1.4062, "step": 17716 }, { "epoch": 0.9686316854147589, "grad_norm": 1.554095983505249, "learning_rate": 5.4768312998627616e-08, "loss": 1.6542, "step": 17717 }, { "epoch": 0.9686863578584803, "grad_norm": 1.552372694015503, "learning_rate": 5.457749033722648e-08, "loss": 1.5289, "step": 17718 }, { "epoch": 0.9687410303022019, "grad_norm": 1.4868985414505005, "learning_rate": 5.438699977682205e-08, "loss": 1.5015, "step": 17719 }, { "epoch": 0.9687957027459235, "grad_norm": 1.4527677297592163, "learning_rate": 5.4196841323772565e-08, "loss": 1.4725, "step": 17720 }, { "epoch": 0.968850375189645, "grad_norm": 1.5117064714431763, "learning_rate": 5.400701498442962e-08, "loss": 1.5553, "step": 17721 }, { "epoch": 0.9689050476333666, "grad_norm": 1.663395643234253, "learning_rate": 5.381752076513147e-08, "loss": 1.682, "step": 17722 }, { "epoch": 0.9689597200770882, "grad_norm": 1.5239737033843994, "learning_rate": 5.3628358672205285e-08, "loss": 1.5077, "step": 17723 }, { "epoch": 0.9690143925208097, "grad_norm": 1.4096767902374268, "learning_rate": 5.343952871196934e-08, "loss": 1.3087, "step": 17724 }, { "epoch": 0.9690690649645313, "grad_norm": 1.4942022562026978, "learning_rate": 5.3251030890727474e-08, "loss": 1.4052, "step": 17725 }, { "epoch": 0.9691237374082528, "grad_norm": 1.2127835750579834, "learning_rate": 5.306286521477355e-08, "loss": 1.5149, "step": 17726 }, { "epoch": 0.9691784098519743, "grad_norm": 1.718955159187317, "learning_rate": 5.287503169039143e-08, "loss": 1.2974, "step": 17727 }, { "epoch": 0.9692330822956959, "grad_norm": 1.8102153539657593, "learning_rate": 5.2687530323854985e-08, "loss": 1.5681, "step": 17728 }, { "epoch": 0.9692877547394174, "grad_norm": 1.8883538246154785, "learning_rate": 5.250036112142365e-08, "loss": 1.4737, "step": 17729 }, { "epoch": 0.969342427183139, "grad_norm": 1.262197494506836, "learning_rate": 5.231352408934687e-08, "loss": 1.4982, "step": 17730 }, { "epoch": 0.9693970996268606, "grad_norm": 1.8923572301864624, "learning_rate": 5.2127019233866316e-08, "loss": 1.5404, "step": 17731 }, { "epoch": 0.9694517720705821, "grad_norm": 1.2950103282928467, "learning_rate": 5.1940846561205907e-08, "loss": 1.6173, "step": 17732 }, { "epoch": 0.9695064445143037, "grad_norm": 1.7245277166366577, "learning_rate": 5.175500607758621e-08, "loss": 1.3558, "step": 17733 }, { "epoch": 0.9695611169580253, "grad_norm": 1.3861297369003296, "learning_rate": 5.156949778921006e-08, "loss": 1.4035, "step": 17734 }, { "epoch": 0.9696157894017468, "grad_norm": 1.408612608909607, "learning_rate": 5.1384321702273586e-08, "loss": 1.279, "step": 17735 }, { "epoch": 0.9696704618454683, "grad_norm": 1.6647156476974487, "learning_rate": 5.119947782295964e-08, "loss": 1.2336, "step": 17736 }, { "epoch": 0.9697251342891899, "grad_norm": 2.3792545795440674, "learning_rate": 5.101496615744106e-08, "loss": 1.406, "step": 17737 }, { "epoch": 0.9697798067329114, "grad_norm": 1.6975440979003906, "learning_rate": 5.083078671187846e-08, "loss": 1.28, "step": 17738 }, { "epoch": 0.969834479176633, "grad_norm": 1.3131566047668457, "learning_rate": 5.06469394924225e-08, "loss": 1.6072, "step": 17739 }, { "epoch": 0.9698891516203546, "grad_norm": 1.3987858295440674, "learning_rate": 5.046342450521158e-08, "loss": 1.5686, "step": 17740 }, { "epoch": 0.9699438240640761, "grad_norm": 2.0478551387786865, "learning_rate": 5.028024175637525e-08, "loss": 1.2842, "step": 17741 }, { "epoch": 0.9699984965077977, "grad_norm": 1.2141908407211304, "learning_rate": 5.0097391252028616e-08, "loss": 1.633, "step": 17742 }, { "epoch": 0.9700531689515192, "grad_norm": 1.285644292831421, "learning_rate": 4.9914872998277906e-08, "loss": 1.573, "step": 17743 }, { "epoch": 0.9701078413952408, "grad_norm": 1.8055853843688965, "learning_rate": 4.973268700121936e-08, "loss": 1.3139, "step": 17744 }, { "epoch": 0.9701625138389623, "grad_norm": 1.6784533262252808, "learning_rate": 4.955083326693477e-08, "loss": 1.4696, "step": 17745 }, { "epoch": 0.9702171862826838, "grad_norm": 1.5205113887786865, "learning_rate": 4.9369311801497065e-08, "loss": 1.3584, "step": 17746 }, { "epoch": 0.9702718587264054, "grad_norm": 1.2737698554992676, "learning_rate": 4.918812261096806e-08, "loss": 1.5562, "step": 17747 }, { "epoch": 0.970326531170127, "grad_norm": 1.6747198104858398, "learning_rate": 4.9007265701397357e-08, "loss": 1.3141, "step": 17748 }, { "epoch": 0.9703812036138485, "grad_norm": 1.2885509729385376, "learning_rate": 4.882674107882568e-08, "loss": 1.4972, "step": 17749 }, { "epoch": 0.9704358760575701, "grad_norm": 1.5712860822677612, "learning_rate": 4.864654874928043e-08, "loss": 1.5416, "step": 17750 }, { "epoch": 0.9704905485012917, "grad_norm": 1.4589227437973022, "learning_rate": 4.846668871877902e-08, "loss": 1.3282, "step": 17751 }, { "epoch": 0.9705452209450132, "grad_norm": 1.710400104522705, "learning_rate": 4.8287160993325535e-08, "loss": 1.3982, "step": 17752 }, { "epoch": 0.9705998933887348, "grad_norm": 1.6100825071334839, "learning_rate": 4.8107965578917395e-08, "loss": 1.3935, "step": 17753 }, { "epoch": 0.9706545658324564, "grad_norm": 1.406397819519043, "learning_rate": 4.792910248153537e-08, "loss": 1.5532, "step": 17754 }, { "epoch": 0.9707092382761778, "grad_norm": 1.8022291660308838, "learning_rate": 4.77505717071558e-08, "loss": 1.2196, "step": 17755 }, { "epoch": 0.9707639107198994, "grad_norm": 1.9238239526748657, "learning_rate": 4.757237326173725e-08, "loss": 1.3983, "step": 17756 }, { "epoch": 0.9708185831636209, "grad_norm": 1.7398232221603394, "learning_rate": 4.739450715123162e-08, "loss": 1.4232, "step": 17757 }, { "epoch": 0.9708732556073425, "grad_norm": 1.3226555585861206, "learning_rate": 4.721697338157749e-08, "loss": 1.5519, "step": 17758 }, { "epoch": 0.9709279280510641, "grad_norm": 1.4558900594711304, "learning_rate": 4.703977195870346e-08, "loss": 1.5258, "step": 17759 }, { "epoch": 0.9709826004947856, "grad_norm": 1.8683254718780518, "learning_rate": 4.6862902888527016e-08, "loss": 1.1428, "step": 17760 }, { "epoch": 0.9710372729385072, "grad_norm": 1.0739237070083618, "learning_rate": 4.668636617695454e-08, "loss": 1.5678, "step": 17761 }, { "epoch": 0.9710919453822288, "grad_norm": 1.6141842603683472, "learning_rate": 4.651016182988022e-08, "loss": 1.4266, "step": 17762 }, { "epoch": 0.9711466178259502, "grad_norm": 1.5663414001464844, "learning_rate": 4.6334289853188216e-08, "loss": 1.2993, "step": 17763 }, { "epoch": 0.9712012902696718, "grad_norm": 1.304161787033081, "learning_rate": 4.615875025275163e-08, "loss": 1.2859, "step": 17764 }, { "epoch": 0.9712559627133934, "grad_norm": 1.7108176946640015, "learning_rate": 4.59835430344302e-08, "loss": 1.4644, "step": 17765 }, { "epoch": 0.9713106351571149, "grad_norm": 1.3257421255111694, "learning_rate": 4.5808668204078146e-08, "loss": 1.4262, "step": 17766 }, { "epoch": 0.9713653076008365, "grad_norm": 1.5277045965194702, "learning_rate": 4.56341257675319e-08, "loss": 1.3782, "step": 17767 }, { "epoch": 0.9714199800445581, "grad_norm": 1.6402091979980469, "learning_rate": 4.545991573062014e-08, "loss": 1.5088, "step": 17768 }, { "epoch": 0.9714746524882796, "grad_norm": 1.696717619895935, "learning_rate": 4.528603809916154e-08, "loss": 1.4939, "step": 17769 }, { "epoch": 0.9715293249320012, "grad_norm": 1.8135316371917725, "learning_rate": 4.5112492878962574e-08, "loss": 1.4041, "step": 17770 }, { "epoch": 0.9715839973757227, "grad_norm": 1.3945828676223755, "learning_rate": 4.493928007581527e-08, "loss": 1.6179, "step": 17771 }, { "epoch": 0.9716386698194442, "grad_norm": 1.6601696014404297, "learning_rate": 4.476639969550722e-08, "loss": 1.4083, "step": 17772 }, { "epoch": 0.9716933422631658, "grad_norm": 1.7439481019973755, "learning_rate": 4.459385174380937e-08, "loss": 1.2707, "step": 17773 }, { "epoch": 0.9717480147068873, "grad_norm": 1.5073331594467163, "learning_rate": 4.442163622648377e-08, "loss": 1.2897, "step": 17774 }, { "epoch": 0.9718026871506089, "grad_norm": 1.3732067346572876, "learning_rate": 4.424975314928137e-08, "loss": 1.3677, "step": 17775 }, { "epoch": 0.9718573595943305, "grad_norm": 1.295974612236023, "learning_rate": 4.4078202517942037e-08, "loss": 1.2926, "step": 17776 }, { "epoch": 0.971912032038052, "grad_norm": 1.7986865043640137, "learning_rate": 4.39069843381934e-08, "loss": 1.3026, "step": 17777 }, { "epoch": 0.9719667044817736, "grad_norm": 1.5595011711120605, "learning_rate": 4.373609861575423e-08, "loss": 1.4582, "step": 17778 }, { "epoch": 0.9720213769254952, "grad_norm": 1.462053656578064, "learning_rate": 4.3565545356327734e-08, "loss": 1.2694, "step": 17779 }, { "epoch": 0.9720760493692167, "grad_norm": 1.5094507932662964, "learning_rate": 4.3395324565612686e-08, "loss": 1.3985, "step": 17780 }, { "epoch": 0.9721307218129382, "grad_norm": 1.3933378458023071, "learning_rate": 4.322543624929121e-08, "loss": 1.6512, "step": 17781 }, { "epoch": 0.9721853942566598, "grad_norm": 1.6240816116333008, "learning_rate": 4.3055880413036545e-08, "loss": 1.3498, "step": 17782 }, { "epoch": 0.9722400667003813, "grad_norm": 1.279499888420105, "learning_rate": 4.288665706251194e-08, "loss": 1.3539, "step": 17783 }, { "epoch": 0.9722947391441029, "grad_norm": 1.635591983795166, "learning_rate": 4.271776620336621e-08, "loss": 1.4195, "step": 17784 }, { "epoch": 0.9723494115878244, "grad_norm": 1.7104164361953735, "learning_rate": 4.2549207841239284e-08, "loss": 1.4658, "step": 17785 }, { "epoch": 0.972404084031546, "grad_norm": 1.442463994026184, "learning_rate": 4.2380981981759994e-08, "loss": 1.3325, "step": 17786 }, { "epoch": 0.9724587564752676, "grad_norm": 1.3911930322647095, "learning_rate": 4.2213088630547185e-08, "loss": 1.5132, "step": 17787 }, { "epoch": 0.9725134289189891, "grad_norm": 1.3653154373168945, "learning_rate": 4.2045527793204145e-08, "loss": 1.4304, "step": 17788 }, { "epoch": 0.9725681013627107, "grad_norm": 1.647416114807129, "learning_rate": 4.187829947532973e-08, "loss": 1.5076, "step": 17789 }, { "epoch": 0.9726227738064323, "grad_norm": 1.2193357944488525, "learning_rate": 4.171140368250615e-08, "loss": 1.2941, "step": 17790 }, { "epoch": 0.9726774462501537, "grad_norm": 1.4791990518569946, "learning_rate": 4.154484042030671e-08, "loss": 1.4243, "step": 17791 }, { "epoch": 0.9727321186938753, "grad_norm": 1.8034459352493286, "learning_rate": 4.1378609694292526e-08, "loss": 1.4086, "step": 17792 }, { "epoch": 0.9727867911375969, "grad_norm": 1.9543774127960205, "learning_rate": 4.1212711510015826e-08, "loss": 1.6877, "step": 17793 }, { "epoch": 0.9728414635813184, "grad_norm": 1.8939765691757202, "learning_rate": 4.1047145873015504e-08, "loss": 1.157, "step": 17794 }, { "epoch": 0.97289613602504, "grad_norm": 1.666404366493225, "learning_rate": 4.0881912788820476e-08, "loss": 1.2439, "step": 17795 }, { "epoch": 0.9729508084687616, "grad_norm": 1.5775043964385986, "learning_rate": 4.071701226294744e-08, "loss": 1.4194, "step": 17796 }, { "epoch": 0.9730054809124831, "grad_norm": 1.843092679977417, "learning_rate": 4.0552444300904213e-08, "loss": 1.4776, "step": 17797 }, { "epoch": 0.9730601533562047, "grad_norm": 1.7746061086654663, "learning_rate": 4.038820890818529e-08, "loss": 1.4199, "step": 17798 }, { "epoch": 0.9731148257999261, "grad_norm": 1.2849247455596924, "learning_rate": 4.0224306090275165e-08, "loss": 1.5335, "step": 17799 }, { "epoch": 0.9731694982436477, "grad_norm": 1.5883171558380127, "learning_rate": 4.006073585264725e-08, "loss": 1.3988, "step": 17800 }, { "epoch": 0.9732241706873693, "grad_norm": 1.2596603631973267, "learning_rate": 3.989749820076272e-08, "loss": 1.4735, "step": 17801 }, { "epoch": 0.9732788431310908, "grad_norm": 1.8886772394180298, "learning_rate": 3.973459314007278e-08, "loss": 1.5513, "step": 17802 }, { "epoch": 0.9733335155748124, "grad_norm": 1.3106341361999512, "learning_rate": 3.957202067601751e-08, "loss": 1.2785, "step": 17803 }, { "epoch": 0.973388188018534, "grad_norm": 1.2328603267669678, "learning_rate": 3.94097808140248e-08, "loss": 1.6041, "step": 17804 }, { "epoch": 0.9734428604622555, "grad_norm": 1.1496193408966064, "learning_rate": 3.924787355951254e-08, "loss": 1.5092, "step": 17805 }, { "epoch": 0.9734975329059771, "grad_norm": 1.9287999868392944, "learning_rate": 3.908629891788862e-08, "loss": 1.7171, "step": 17806 }, { "epoch": 0.9735522053496987, "grad_norm": 1.7227897644042969, "learning_rate": 3.89250568945454e-08, "loss": 1.0939, "step": 17807 }, { "epoch": 0.9736068777934201, "grad_norm": 2.3601934909820557, "learning_rate": 3.8764147494870787e-08, "loss": 1.3553, "step": 17808 }, { "epoch": 0.9736615502371417, "grad_norm": 1.3234057426452637, "learning_rate": 3.860357072423493e-08, "loss": 1.4209, "step": 17809 }, { "epoch": 0.9737162226808633, "grad_norm": 1.4021871089935303, "learning_rate": 3.844332658800132e-08, "loss": 1.4649, "step": 17810 }, { "epoch": 0.9737708951245848, "grad_norm": 1.833090901374817, "learning_rate": 3.8283415091521224e-08, "loss": 1.4408, "step": 17811 }, { "epoch": 0.9738255675683064, "grad_norm": 1.3263771533966064, "learning_rate": 3.812383624013261e-08, "loss": 1.4955, "step": 17812 }, { "epoch": 0.9738802400120279, "grad_norm": 1.595232367515564, "learning_rate": 3.7964590039165637e-08, "loss": 1.3581, "step": 17813 }, { "epoch": 0.9739349124557495, "grad_norm": 1.2540528774261475, "learning_rate": 3.7805676493938294e-08, "loss": 1.4862, "step": 17814 }, { "epoch": 0.9739895848994711, "grad_norm": 1.7947572469711304, "learning_rate": 3.764709560975632e-08, "loss": 1.5537, "step": 17815 }, { "epoch": 0.9740442573431926, "grad_norm": 1.9485076665878296, "learning_rate": 3.7488847391915494e-08, "loss": 1.31, "step": 17816 }, { "epoch": 0.9740989297869141, "grad_norm": 2.126317262649536, "learning_rate": 3.733093184569936e-08, "loss": 1.2624, "step": 17817 }, { "epoch": 0.9741536022306357, "grad_norm": 1.085263967514038, "learning_rate": 3.717334897638147e-08, "loss": 1.627, "step": 17818 }, { "epoch": 0.9742082746743572, "grad_norm": 1.634132981300354, "learning_rate": 3.701609878922319e-08, "loss": 1.3694, "step": 17819 }, { "epoch": 0.9742629471180788, "grad_norm": 1.6577041149139404, "learning_rate": 3.6859181289478077e-08, "loss": 1.3583, "step": 17820 }, { "epoch": 0.9743176195618004, "grad_norm": 2.0888044834136963, "learning_rate": 3.6702596482381946e-08, "loss": 1.3368, "step": 17821 }, { "epoch": 0.9743722920055219, "grad_norm": 1.161181092262268, "learning_rate": 3.6546344373167286e-08, "loss": 1.6486, "step": 17822 }, { "epoch": 0.9744269644492435, "grad_norm": 1.8998188972473145, "learning_rate": 3.639042496704992e-08, "loss": 1.3169, "step": 17823 }, { "epoch": 0.9744816368929651, "grad_norm": 1.3481769561767578, "learning_rate": 3.6234838269236795e-08, "loss": 1.3167, "step": 17824 }, { "epoch": 0.9745363093366866, "grad_norm": 1.3648701906204224, "learning_rate": 3.607958428492264e-08, "loss": 1.3399, "step": 17825 }, { "epoch": 0.9745909817804081, "grad_norm": 1.7288782596588135, "learning_rate": 3.59246630192922e-08, "loss": 1.5693, "step": 17826 }, { "epoch": 0.9746456542241296, "grad_norm": 1.5565264225006104, "learning_rate": 3.5770074477519124e-08, "loss": 1.6432, "step": 17827 }, { "epoch": 0.9747003266678512, "grad_norm": 1.6029396057128906, "learning_rate": 3.5615818664764825e-08, "loss": 1.209, "step": 17828 }, { "epoch": 0.9747549991115728, "grad_norm": 1.461669683456421, "learning_rate": 3.546189558618074e-08, "loss": 1.6862, "step": 17829 }, { "epoch": 0.9748096715552943, "grad_norm": 1.4375864267349243, "learning_rate": 3.5308305246906096e-08, "loss": 1.3831, "step": 17830 }, { "epoch": 0.9748643439990159, "grad_norm": 2.599393606185913, "learning_rate": 3.515504765207012e-08, "loss": 1.2308, "step": 17831 }, { "epoch": 0.9749190164427375, "grad_norm": 1.6024290323257446, "learning_rate": 3.500212280678983e-08, "loss": 1.297, "step": 17832 }, { "epoch": 0.974973688886459, "grad_norm": 1.6786870956420898, "learning_rate": 3.4849530716173365e-08, "loss": 1.4347, "step": 17833 }, { "epoch": 0.9750283613301806, "grad_norm": 1.6804192066192627, "learning_rate": 3.469727138531442e-08, "loss": 1.4327, "step": 17834 }, { "epoch": 0.9750830337739022, "grad_norm": 3.7815470695495605, "learning_rate": 3.454534481929783e-08, "loss": 1.2382, "step": 17835 }, { "epoch": 0.9751377062176236, "grad_norm": 1.4173476696014404, "learning_rate": 3.439375102319731e-08, "loss": 1.4865, "step": 17836 }, { "epoch": 0.9751923786613452, "grad_norm": 1.544769048690796, "learning_rate": 3.424249000207436e-08, "loss": 1.3464, "step": 17837 }, { "epoch": 0.9752470511050668, "grad_norm": 1.6874969005584717, "learning_rate": 3.4091561760979384e-08, "loss": 1.3094, "step": 17838 }, { "epoch": 0.9753017235487883, "grad_norm": 1.3732404708862305, "learning_rate": 3.394096630495281e-08, "loss": 1.5857, "step": 17839 }, { "epoch": 0.9753563959925099, "grad_norm": 1.502505898475647, "learning_rate": 3.379070363902504e-08, "loss": 1.2828, "step": 17840 }, { "epoch": 0.9754110684362314, "grad_norm": 1.2939122915267944, "learning_rate": 3.3640773768210953e-08, "loss": 1.8024, "step": 17841 }, { "epoch": 0.975465740879953, "grad_norm": 1.7371206283569336, "learning_rate": 3.349117669751767e-08, "loss": 1.3486, "step": 17842 }, { "epoch": 0.9755204133236746, "grad_norm": 1.7774932384490967, "learning_rate": 3.3341912431942294e-08, "loss": 1.4299, "step": 17843 }, { "epoch": 0.975575085767396, "grad_norm": 1.2864664793014526, "learning_rate": 3.31929809764675e-08, "loss": 1.3002, "step": 17844 }, { "epoch": 0.9756297582111176, "grad_norm": 1.292427659034729, "learning_rate": 3.30443823360671e-08, "loss": 1.5726, "step": 17845 }, { "epoch": 0.9756844306548392, "grad_norm": 1.4528664350509644, "learning_rate": 3.289611651570268e-08, "loss": 1.4387, "step": 17846 }, { "epoch": 0.9757391030985607, "grad_norm": 1.6398639678955078, "learning_rate": 3.2748183520325827e-08, "loss": 1.4653, "step": 17847 }, { "epoch": 0.9757937755422823, "grad_norm": 1.6573312282562256, "learning_rate": 3.2600583354874814e-08, "loss": 1.3118, "step": 17848 }, { "epoch": 0.9758484479860039, "grad_norm": 1.5079282522201538, "learning_rate": 3.245331602428126e-08, "loss": 1.3429, "step": 17849 }, { "epoch": 0.9759031204297254, "grad_norm": 1.406488299369812, "learning_rate": 3.230638153346011e-08, "loss": 1.3502, "step": 17850 }, { "epoch": 0.975957792873447, "grad_norm": 1.4608453512191772, "learning_rate": 3.215977988731855e-08, "loss": 1.3516, "step": 17851 }, { "epoch": 0.9760124653171686, "grad_norm": 2.015228033065796, "learning_rate": 3.201351109075268e-08, "loss": 1.3589, "step": 17852 }, { "epoch": 0.97606713776089, "grad_norm": 2.230661630630493, "learning_rate": 3.186757514864636e-08, "loss": 1.3582, "step": 17853 }, { "epoch": 0.9761218102046116, "grad_norm": 1.4894473552703857, "learning_rate": 3.172197206587235e-08, "loss": 1.3565, "step": 17854 }, { "epoch": 0.9761764826483331, "grad_norm": 1.385818362236023, "learning_rate": 3.157670184729344e-08, "loss": 1.2437, "step": 17855 }, { "epoch": 0.9762311550920547, "grad_norm": 1.5761396884918213, "learning_rate": 3.14317644977602e-08, "loss": 1.2445, "step": 17856 }, { "epoch": 0.9762858275357763, "grad_norm": 1.6664276123046875, "learning_rate": 3.128716002211207e-08, "loss": 1.653, "step": 17857 }, { "epoch": 0.9763404999794978, "grad_norm": 1.3042232990264893, "learning_rate": 3.114288842517743e-08, "loss": 1.3776, "step": 17858 }, { "epoch": 0.9763951724232194, "grad_norm": 1.5841442346572876, "learning_rate": 3.099894971177464e-08, "loss": 1.2784, "step": 17859 }, { "epoch": 0.976449844866941, "grad_norm": 1.666940450668335, "learning_rate": 3.085534388670986e-08, "loss": 1.4925, "step": 17860 }, { "epoch": 0.9765045173106625, "grad_norm": 1.0762501955032349, "learning_rate": 3.071207095477924e-08, "loss": 1.6436, "step": 17861 }, { "epoch": 0.976559189754384, "grad_norm": 1.3837840557098389, "learning_rate": 3.056913092076563e-08, "loss": 1.4147, "step": 17862 }, { "epoch": 0.9766138621981056, "grad_norm": 1.7308019399642944, "learning_rate": 3.0426523789442994e-08, "loss": 1.3039, "step": 17863 }, { "epoch": 0.9766685346418271, "grad_norm": 1.9574164152145386, "learning_rate": 3.0284249565573076e-08, "loss": 1.3783, "step": 17864 }, { "epoch": 0.9767232070855487, "grad_norm": 1.5782139301300049, "learning_rate": 3.0142308253906515e-08, "loss": 1.793, "step": 17865 }, { "epoch": 0.9767778795292703, "grad_norm": 1.300113320350647, "learning_rate": 3.000069985918397e-08, "loss": 1.374, "step": 17866 }, { "epoch": 0.9768325519729918, "grad_norm": 1.304113745689392, "learning_rate": 2.985942438613276e-08, "loss": 1.34, "step": 17867 }, { "epoch": 0.9768872244167134, "grad_norm": 1.621015191078186, "learning_rate": 2.9718481839470236e-08, "loss": 1.432, "step": 17868 }, { "epoch": 0.9769418968604349, "grad_norm": 1.402894377708435, "learning_rate": 2.9577872223904846e-08, "loss": 1.5909, "step": 17869 }, { "epoch": 0.9769965693041565, "grad_norm": 1.4410102367401123, "learning_rate": 2.9437595544130615e-08, "loss": 1.2139, "step": 17870 }, { "epoch": 0.977051241747878, "grad_norm": 1.405729055404663, "learning_rate": 2.9297651804830464e-08, "loss": 1.3476, "step": 17871 }, { "epoch": 0.9771059141915995, "grad_norm": 1.7226358652114868, "learning_rate": 2.915804101067954e-08, "loss": 1.445, "step": 17872 }, { "epoch": 0.9771605866353211, "grad_norm": 1.5264705419540405, "learning_rate": 2.901876316633967e-08, "loss": 1.3738, "step": 17873 }, { "epoch": 0.9772152590790427, "grad_norm": 1.318333625793457, "learning_rate": 2.887981827645936e-08, "loss": 1.5761, "step": 17874 }, { "epoch": 0.9772699315227642, "grad_norm": 1.7286330461502075, "learning_rate": 2.874120634568156e-08, "loss": 1.3252, "step": 17875 }, { "epoch": 0.9773246039664858, "grad_norm": 1.2334520816802979, "learning_rate": 2.860292737863146e-08, "loss": 1.5288, "step": 17876 }, { "epoch": 0.9773792764102074, "grad_norm": 1.486405849456787, "learning_rate": 2.8464981379929814e-08, "loss": 1.4733, "step": 17877 }, { "epoch": 0.9774339488539289, "grad_norm": 1.4493448734283447, "learning_rate": 2.8327368354180707e-08, "loss": 1.4746, "step": 17878 }, { "epoch": 0.9774886212976505, "grad_norm": 1.363076090812683, "learning_rate": 2.8190088305979357e-08, "loss": 1.5334, "step": 17879 }, { "epoch": 0.977543293741372, "grad_norm": 1.322636604309082, "learning_rate": 2.8053141239912095e-08, "loss": 1.4348, "step": 17880 }, { "epoch": 0.9775979661850935, "grad_norm": 1.6702755689620972, "learning_rate": 2.791652716054971e-08, "loss": 1.2642, "step": 17881 }, { "epoch": 0.9776526386288151, "grad_norm": 1.760593056678772, "learning_rate": 2.778024607245411e-08, "loss": 1.4054, "step": 17882 }, { "epoch": 0.9777073110725366, "grad_norm": 1.7432548999786377, "learning_rate": 2.7644297980177204e-08, "loss": 1.3451, "step": 17883 }, { "epoch": 0.9777619835162582, "grad_norm": 1.8043389320373535, "learning_rate": 2.7508682888257587e-08, "loss": 1.3003, "step": 17884 }, { "epoch": 0.9778166559599798, "grad_norm": 1.853650689125061, "learning_rate": 2.737340080122497e-08, "loss": 1.6215, "step": 17885 }, { "epoch": 0.9778713284037013, "grad_norm": 1.5819785594940186, "learning_rate": 2.7238451723594628e-08, "loss": 1.3014, "step": 17886 }, { "epoch": 0.9779260008474229, "grad_norm": 1.5469056367874146, "learning_rate": 2.7103835659875177e-08, "loss": 1.4406, "step": 17887 }, { "epoch": 0.9779806732911445, "grad_norm": 2.0876615047454834, "learning_rate": 2.69695526145608e-08, "loss": 1.2778, "step": 17888 }, { "epoch": 0.978035345734866, "grad_norm": 1.1641660928726196, "learning_rate": 2.683560259213569e-08, "loss": 1.8035, "step": 17889 }, { "epoch": 0.9780900181785875, "grad_norm": 1.8019523620605469, "learning_rate": 2.6701985597071822e-08, "loss": 1.4637, "step": 17890 }, { "epoch": 0.9781446906223091, "grad_norm": 2.4470112323760986, "learning_rate": 2.6568701633832295e-08, "loss": 1.2127, "step": 17891 }, { "epoch": 0.9781993630660306, "grad_norm": 1.5747740268707275, "learning_rate": 2.643575070686688e-08, "loss": 1.271, "step": 17892 }, { "epoch": 0.9782540355097522, "grad_norm": 1.6535826921463013, "learning_rate": 2.6303132820616474e-08, "loss": 1.4818, "step": 17893 }, { "epoch": 0.9783087079534738, "grad_norm": 1.886962652206421, "learning_rate": 2.617084797950753e-08, "loss": 1.3545, "step": 17894 }, { "epoch": 0.9783633803971953, "grad_norm": 1.706724762916565, "learning_rate": 2.603889618795874e-08, "loss": 1.46, "step": 17895 }, { "epoch": 0.9784180528409169, "grad_norm": 1.623003363609314, "learning_rate": 2.590727745037547e-08, "loss": 1.4379, "step": 17896 }, { "epoch": 0.9784727252846384, "grad_norm": 1.297120451927185, "learning_rate": 2.5775991771153085e-08, "loss": 1.7353, "step": 17897 }, { "epoch": 0.97852739772836, "grad_norm": 1.5200263261795044, "learning_rate": 2.5645039154675867e-08, "loss": 1.5331, "step": 17898 }, { "epoch": 0.9785820701720815, "grad_norm": 1.5973516702651978, "learning_rate": 2.5514419605315867e-08, "loss": 1.4059, "step": 17899 }, { "epoch": 0.978636742615803, "grad_norm": 1.8369144201278687, "learning_rate": 2.538413312743515e-08, "loss": 1.3462, "step": 17900 }, { "epoch": 0.9786914150595246, "grad_norm": 2.9678268432617188, "learning_rate": 2.5254179725384686e-08, "loss": 1.1493, "step": 17901 }, { "epoch": 0.9787460875032462, "grad_norm": 1.514784574508667, "learning_rate": 2.5124559403504334e-08, "loss": 1.5502, "step": 17902 }, { "epoch": 0.9788007599469677, "grad_norm": 1.2019810676574707, "learning_rate": 2.499527216612063e-08, "loss": 1.9727, "step": 17903 }, { "epoch": 0.9788554323906893, "grad_norm": 1.9355292320251465, "learning_rate": 2.486631801755235e-08, "loss": 1.281, "step": 17904 }, { "epoch": 0.9789101048344109, "grad_norm": 2.1251108646392822, "learning_rate": 2.4737696962106038e-08, "loss": 1.4389, "step": 17905 }, { "epoch": 0.9789647772781324, "grad_norm": 1.2858073711395264, "learning_rate": 2.4609409004074937e-08, "loss": 1.26, "step": 17906 }, { "epoch": 0.979019449721854, "grad_norm": 1.1351463794708252, "learning_rate": 2.448145414774339e-08, "loss": 1.7799, "step": 17907 }, { "epoch": 0.9790741221655755, "grad_norm": 1.7406569719314575, "learning_rate": 2.4353832397384647e-08, "loss": 1.1933, "step": 17908 }, { "epoch": 0.979128794609297, "grad_norm": 1.3376554250717163, "learning_rate": 2.4226543757259746e-08, "loss": 1.2731, "step": 17909 }, { "epoch": 0.9791834670530186, "grad_norm": 1.338301181793213, "learning_rate": 2.4099588231619732e-08, "loss": 1.2628, "step": 17910 }, { "epoch": 0.9792381394967401, "grad_norm": 1.8440454006195068, "learning_rate": 2.3972965824703433e-08, "loss": 1.5723, "step": 17911 }, { "epoch": 0.9792928119404617, "grad_norm": 1.65099036693573, "learning_rate": 2.3846676540739687e-08, "loss": 1.4897, "step": 17912 }, { "epoch": 0.9793474843841833, "grad_norm": 1.5495057106018066, "learning_rate": 2.372072038394402e-08, "loss": 1.588, "step": 17913 }, { "epoch": 0.9794021568279048, "grad_norm": 1.5127828121185303, "learning_rate": 2.3595097358525275e-08, "loss": 1.3428, "step": 17914 }, { "epoch": 0.9794568292716264, "grad_norm": 1.4956047534942627, "learning_rate": 2.3469807468675664e-08, "loss": 1.1545, "step": 17915 }, { "epoch": 0.979511501715348, "grad_norm": 1.3831324577331543, "learning_rate": 2.3344850718579613e-08, "loss": 1.2119, "step": 17916 }, { "epoch": 0.9795661741590694, "grad_norm": 1.5195146799087524, "learning_rate": 2.3220227112410455e-08, "loss": 1.4128, "step": 17917 }, { "epoch": 0.979620846602791, "grad_norm": 1.397866129875183, "learning_rate": 2.3095936654328187e-08, "loss": 1.5793, "step": 17918 }, { "epoch": 0.9796755190465126, "grad_norm": 1.621806263923645, "learning_rate": 2.2971979348485053e-08, "loss": 1.4959, "step": 17919 }, { "epoch": 0.9797301914902341, "grad_norm": 1.7665177583694458, "learning_rate": 2.2848355199019954e-08, "loss": 1.5296, "step": 17920 }, { "epoch": 0.9797848639339557, "grad_norm": 1.2389758825302124, "learning_rate": 2.27250642100596e-08, "loss": 1.5902, "step": 17921 }, { "epoch": 0.9798395363776773, "grad_norm": 1.6811963319778442, "learning_rate": 2.26021063857218e-08, "loss": 1.2996, "step": 17922 }, { "epoch": 0.9798942088213988, "grad_norm": 1.927470326423645, "learning_rate": 2.2479481730112162e-08, "loss": 1.4609, "step": 17923 }, { "epoch": 0.9799488812651204, "grad_norm": 1.6951028108596802, "learning_rate": 2.23571902473263e-08, "loss": 1.3032, "step": 17924 }, { "epoch": 0.9800035537088418, "grad_norm": 1.357633113861084, "learning_rate": 2.22352319414465e-08, "loss": 1.1026, "step": 17925 }, { "epoch": 0.9800582261525634, "grad_norm": 1.7870064973831177, "learning_rate": 2.2113606816546172e-08, "loss": 1.4494, "step": 17926 }, { "epoch": 0.980112898596285, "grad_norm": 1.3164647817611694, "learning_rate": 2.1992314876686515e-08, "loss": 1.3311, "step": 17927 }, { "epoch": 0.9801675710400065, "grad_norm": 1.8596994876861572, "learning_rate": 2.187135612591651e-08, "loss": 1.2223, "step": 17928 }, { "epoch": 0.9802222434837281, "grad_norm": 1.5337936878204346, "learning_rate": 2.1750730568277366e-08, "loss": 1.3662, "step": 17929 }, { "epoch": 0.9802769159274497, "grad_norm": 1.681774377822876, "learning_rate": 2.1630438207795866e-08, "loss": 1.3639, "step": 17930 }, { "epoch": 0.9803315883711712, "grad_norm": 1.4892507791519165, "learning_rate": 2.1510479048488797e-08, "loss": 1.5134, "step": 17931 }, { "epoch": 0.9803862608148928, "grad_norm": 1.6335560083389282, "learning_rate": 2.139085309436184e-08, "loss": 1.3642, "step": 17932 }, { "epoch": 0.9804409332586144, "grad_norm": 1.8561586141586304, "learning_rate": 2.127156034941069e-08, "loss": 1.4587, "step": 17933 }, { "epoch": 0.9804956057023358, "grad_norm": 1.6662170886993408, "learning_rate": 2.115260081761772e-08, "loss": 1.222, "step": 17934 }, { "epoch": 0.9805502781460574, "grad_norm": 1.9665299654006958, "learning_rate": 2.10339745029553e-08, "loss": 1.2826, "step": 17935 }, { "epoch": 0.980604950589779, "grad_norm": 1.6869221925735474, "learning_rate": 2.0915681409384713e-08, "loss": 1.5011, "step": 17936 }, { "epoch": 0.9806596230335005, "grad_norm": 1.296487808227539, "learning_rate": 2.0797721540856132e-08, "loss": 1.4492, "step": 17937 }, { "epoch": 0.9807142954772221, "grad_norm": 1.9137346744537354, "learning_rate": 2.0680094901308622e-08, "loss": 1.411, "step": 17938 }, { "epoch": 0.9807689679209437, "grad_norm": 1.5716849565505981, "learning_rate": 2.056280149467016e-08, "loss": 1.4501, "step": 17939 }, { "epoch": 0.9808236403646652, "grad_norm": 1.2412749528884888, "learning_rate": 2.0445841324856497e-08, "loss": 1.4337, "step": 17940 }, { "epoch": 0.9808783128083868, "grad_norm": 1.5331758260726929, "learning_rate": 2.032921439577562e-08, "loss": 1.4774, "step": 17941 }, { "epoch": 0.9809329852521083, "grad_norm": 1.3442000150680542, "learning_rate": 2.021292071131886e-08, "loss": 1.2563, "step": 17942 }, { "epoch": 0.9809876576958299, "grad_norm": 1.8061213493347168, "learning_rate": 2.0096960275370893e-08, "loss": 1.2423, "step": 17943 }, { "epoch": 0.9810423301395514, "grad_norm": 1.6430000066757202, "learning_rate": 1.9981333091805278e-08, "loss": 1.4118, "step": 17944 }, { "epoch": 0.9810970025832729, "grad_norm": 1.3543916940689087, "learning_rate": 1.9866039164481156e-08, "loss": 1.3193, "step": 17945 }, { "epoch": 0.9811516750269945, "grad_norm": 1.532978892326355, "learning_rate": 1.9751078497248777e-08, "loss": 1.6747, "step": 17946 }, { "epoch": 0.9812063474707161, "grad_norm": 1.2137712240219116, "learning_rate": 1.9636451093947296e-08, "loss": 1.2627, "step": 17947 }, { "epoch": 0.9812610199144376, "grad_norm": 1.4089337587356567, "learning_rate": 1.9522156958404758e-08, "loss": 1.5112, "step": 17948 }, { "epoch": 0.9813156923581592, "grad_norm": 1.7991256713867188, "learning_rate": 1.9408196094437004e-08, "loss": 1.5587, "step": 17949 }, { "epoch": 0.9813703648018808, "grad_norm": 1.3879830837249756, "learning_rate": 1.9294568505849876e-08, "loss": 1.6827, "step": 17950 }, { "epoch": 0.9814250372456023, "grad_norm": 1.507717490196228, "learning_rate": 1.9181274196437005e-08, "loss": 1.2802, "step": 17951 }, { "epoch": 0.9814797096893239, "grad_norm": 1.498985767364502, "learning_rate": 1.9068313169983144e-08, "loss": 1.4947, "step": 17952 }, { "epoch": 0.9815343821330454, "grad_norm": 1.6877261400222778, "learning_rate": 1.8955685430258608e-08, "loss": 1.3076, "step": 17953 }, { "epoch": 0.9815890545767669, "grad_norm": 1.9093154668807983, "learning_rate": 1.8843390981024835e-08, "loss": 1.1912, "step": 17954 }, { "epoch": 0.9816437270204885, "grad_norm": 2.056450843811035, "learning_rate": 1.8731429826032154e-08, "loss": 1.3017, "step": 17955 }, { "epoch": 0.98169839946421, "grad_norm": 1.6255135536193848, "learning_rate": 1.8619801969018692e-08, "loss": 1.2362, "step": 17956 }, { "epoch": 0.9817530719079316, "grad_norm": 1.3518010377883911, "learning_rate": 1.8508507413712572e-08, "loss": 1.3385, "step": 17957 }, { "epoch": 0.9818077443516532, "grad_norm": 1.6434788703918457, "learning_rate": 1.8397546163829716e-08, "loss": 1.645, "step": 17958 }, { "epoch": 0.9818624167953747, "grad_norm": 1.3146088123321533, "learning_rate": 1.8286918223074935e-08, "loss": 1.2523, "step": 17959 }, { "epoch": 0.9819170892390963, "grad_norm": 1.3558400869369507, "learning_rate": 1.817662359514194e-08, "loss": 1.4267, "step": 17960 }, { "epoch": 0.9819717616828179, "grad_norm": 1.7684684991836548, "learning_rate": 1.8066662283715562e-08, "loss": 1.4683, "step": 17961 }, { "epoch": 0.9820264341265393, "grad_norm": 2.0031800270080566, "learning_rate": 1.7957034292466204e-08, "loss": 1.3929, "step": 17962 }, { "epoch": 0.9820811065702609, "grad_norm": 1.4373403787612915, "learning_rate": 1.7847739625055372e-08, "loss": 1.5287, "step": 17963 }, { "epoch": 0.9821357790139825, "grad_norm": 1.3889638185501099, "learning_rate": 1.7738778285132373e-08, "loss": 1.4132, "step": 17964 }, { "epoch": 0.982190451457704, "grad_norm": 1.6253094673156738, "learning_rate": 1.7630150276336523e-08, "loss": 1.3461, "step": 17965 }, { "epoch": 0.9822451239014256, "grad_norm": 1.4520784616470337, "learning_rate": 1.7521855602292693e-08, "loss": 1.5586, "step": 17966 }, { "epoch": 0.9822997963451472, "grad_norm": 1.967530608177185, "learning_rate": 1.7413894266619104e-08, "loss": 1.3617, "step": 17967 }, { "epoch": 0.9823544687888687, "grad_norm": 1.3957571983337402, "learning_rate": 1.7306266272921756e-08, "loss": 1.4318, "step": 17968 }, { "epoch": 0.9824091412325903, "grad_norm": 1.8974521160125732, "learning_rate": 1.7198971624792226e-08, "loss": 1.5685, "step": 17969 }, { "epoch": 0.9824638136763117, "grad_norm": 1.4807569980621338, "learning_rate": 1.7092010325814312e-08, "loss": 1.5587, "step": 17970 }, { "epoch": 0.9825184861200333, "grad_norm": 2.0879271030426025, "learning_rate": 1.6985382379559602e-08, "loss": 1.3786, "step": 17971 }, { "epoch": 0.9825731585637549, "grad_norm": 1.6270197629928589, "learning_rate": 1.6879087789589687e-08, "loss": 1.4464, "step": 17972 }, { "epoch": 0.9826278310074764, "grad_norm": 1.3743985891342163, "learning_rate": 1.6773126559452845e-08, "loss": 1.4131, "step": 17973 }, { "epoch": 0.982682503451198, "grad_norm": 1.9514613151550293, "learning_rate": 1.6667498692687358e-08, "loss": 1.4204, "step": 17974 }, { "epoch": 0.9827371758949196, "grad_norm": 1.7910974025726318, "learning_rate": 1.6562204192821507e-08, "loss": 1.306, "step": 17975 }, { "epoch": 0.9827918483386411, "grad_norm": 2.060019016265869, "learning_rate": 1.6457243063370265e-08, "loss": 1.3353, "step": 17976 }, { "epoch": 0.9828465207823627, "grad_norm": 1.5601413249969482, "learning_rate": 1.6352615307838604e-08, "loss": 1.6904, "step": 17977 }, { "epoch": 0.9829011932260843, "grad_norm": 1.3107197284698486, "learning_rate": 1.6248320929719285e-08, "loss": 1.6113, "step": 17978 }, { "epoch": 0.9829558656698058, "grad_norm": 1.9069409370422363, "learning_rate": 1.61443599324973e-08, "loss": 1.2506, "step": 17979 }, { "epoch": 0.9830105381135273, "grad_norm": 1.4609949588775635, "learning_rate": 1.6040732319643204e-08, "loss": 1.4327, "step": 17980 }, { "epoch": 0.9830652105572489, "grad_norm": 1.4182603359222412, "learning_rate": 1.5937438094617564e-08, "loss": 1.4959, "step": 17981 }, { "epoch": 0.9831198830009704, "grad_norm": 1.5321567058563232, "learning_rate": 1.583447726086762e-08, "loss": 1.409, "step": 17982 }, { "epoch": 0.983174555444692, "grad_norm": 1.3039867877960205, "learning_rate": 1.5731849821833955e-08, "loss": 1.4127, "step": 17983 }, { "epoch": 0.9832292278884135, "grad_norm": 1.714293360710144, "learning_rate": 1.5629555780942717e-08, "loss": 1.4355, "step": 17984 }, { "epoch": 0.9832839003321351, "grad_norm": 1.5117080211639404, "learning_rate": 1.5527595141610064e-08, "loss": 1.4329, "step": 17985 }, { "epoch": 0.9833385727758567, "grad_norm": 1.6906718015670776, "learning_rate": 1.5425967907239935e-08, "loss": 1.4383, "step": 17986 }, { "epoch": 0.9833932452195782, "grad_norm": 1.9458582401275635, "learning_rate": 1.5324674081226286e-08, "loss": 1.1776, "step": 17987 }, { "epoch": 0.9834479176632998, "grad_norm": 1.6807782649993896, "learning_rate": 1.5223713666950857e-08, "loss": 1.4832, "step": 17988 }, { "epoch": 0.9835025901070213, "grad_norm": 1.2925760746002197, "learning_rate": 1.5123086667786502e-08, "loss": 1.3055, "step": 17989 }, { "epoch": 0.9835572625507428, "grad_norm": 1.4930256605148315, "learning_rate": 1.5022793087092757e-08, "loss": 1.3321, "step": 17990 }, { "epoch": 0.9836119349944644, "grad_norm": 1.785171627998352, "learning_rate": 1.4922832928218058e-08, "loss": 1.3945, "step": 17991 }, { "epoch": 0.983666607438186, "grad_norm": 1.5308020114898682, "learning_rate": 1.4823206194499728e-08, "loss": 1.4621, "step": 17992 }, { "epoch": 0.9837212798819075, "grad_norm": 1.637094497680664, "learning_rate": 1.4723912889266224e-08, "loss": 1.4373, "step": 17993 }, { "epoch": 0.9837759523256291, "grad_norm": 1.6727687120437622, "learning_rate": 1.4624953015832666e-08, "loss": 1.3567, "step": 17994 }, { "epoch": 0.9838306247693507, "grad_norm": 1.5458228588104248, "learning_rate": 1.452632657750308e-08, "loss": 1.2177, "step": 17995 }, { "epoch": 0.9838852972130722, "grad_norm": 1.540348768234253, "learning_rate": 1.4428033577571498e-08, "loss": 1.2315, "step": 17996 }, { "epoch": 0.9839399696567938, "grad_norm": 2.6443634033203125, "learning_rate": 1.433007401931974e-08, "loss": 1.0925, "step": 17997 }, { "epoch": 0.9839946421005152, "grad_norm": 1.7380043268203735, "learning_rate": 1.423244790601852e-08, "loss": 1.3435, "step": 17998 }, { "epoch": 0.9840493145442368, "grad_norm": 1.4856677055358887, "learning_rate": 1.4135155240928566e-08, "loss": 1.5572, "step": 17999 }, { "epoch": 0.9841039869879584, "grad_norm": 1.2537745237350464, "learning_rate": 1.4038196027298389e-08, "loss": 1.5757, "step": 18000 }, { "epoch": 0.9841586594316799, "grad_norm": 1.246057152748108, "learning_rate": 1.3941570268365401e-08, "loss": 1.3587, "step": 18001 }, { "epoch": 0.9842133318754015, "grad_norm": 1.598376750946045, "learning_rate": 1.3845277967355908e-08, "loss": 1.3236, "step": 18002 }, { "epoch": 0.9842680043191231, "grad_norm": 1.7594540119171143, "learning_rate": 1.3749319127486228e-08, "loss": 1.4297, "step": 18003 }, { "epoch": 0.9843226767628446, "grad_norm": 1.7274212837219238, "learning_rate": 1.3653693751960461e-08, "loss": 1.646, "step": 18004 }, { "epoch": 0.9843773492065662, "grad_norm": 1.6518539190292358, "learning_rate": 1.3558401843971613e-08, "loss": 1.3434, "step": 18005 }, { "epoch": 0.9844320216502878, "grad_norm": 1.4709036350250244, "learning_rate": 1.3463443406701581e-08, "loss": 1.5098, "step": 18006 }, { "epoch": 0.9844866940940092, "grad_norm": 1.3632540702819824, "learning_rate": 1.3368818443321163e-08, "loss": 1.4982, "step": 18007 }, { "epoch": 0.9845413665377308, "grad_norm": 1.9034194946289062, "learning_rate": 1.3274526956990052e-08, "loss": 1.4176, "step": 18008 }, { "epoch": 0.9845960389814524, "grad_norm": 1.5080121755599976, "learning_rate": 1.3180568950856843e-08, "loss": 1.5084, "step": 18009 }, { "epoch": 0.9846507114251739, "grad_norm": 1.527359127998352, "learning_rate": 1.3086944428060132e-08, "loss": 1.3376, "step": 18010 }, { "epoch": 0.9847053838688955, "grad_norm": 2.068265914916992, "learning_rate": 1.2993653391725204e-08, "loss": 1.5851, "step": 18011 }, { "epoch": 0.984760056312617, "grad_norm": 1.263920545578003, "learning_rate": 1.2900695844967336e-08, "loss": 1.4835, "step": 18012 }, { "epoch": 0.9848147287563386, "grad_norm": 1.8794318437576294, "learning_rate": 1.2808071790889609e-08, "loss": 1.4689, "step": 18013 }, { "epoch": 0.9848694012000602, "grad_norm": 1.5197460651397705, "learning_rate": 1.271578123258732e-08, "loss": 1.284, "step": 18014 }, { "epoch": 0.9849240736437817, "grad_norm": 1.688350796699524, "learning_rate": 1.2623824173140231e-08, "loss": 1.3562, "step": 18015 }, { "epoch": 0.9849787460875032, "grad_norm": 1.5959820747375488, "learning_rate": 1.2532200615620327e-08, "loss": 1.4678, "step": 18016 }, { "epoch": 0.9850334185312248, "grad_norm": 1.3211737871170044, "learning_rate": 1.2440910563086273e-08, "loss": 1.2059, "step": 18017 }, { "epoch": 0.9850880909749463, "grad_norm": 3.10579252243042, "learning_rate": 1.234995401858785e-08, "loss": 1.2743, "step": 18018 }, { "epoch": 0.9851427634186679, "grad_norm": 1.6871672868728638, "learning_rate": 1.2259330985159302e-08, "loss": 1.5452, "step": 18019 }, { "epoch": 0.9851974358623895, "grad_norm": 1.5300266742706299, "learning_rate": 1.2169041465830423e-08, "loss": 1.3809, "step": 18020 }, { "epoch": 0.985252108306111, "grad_norm": 1.578886866569519, "learning_rate": 1.2079085463613249e-08, "loss": 1.4352, "step": 18021 }, { "epoch": 0.9853067807498326, "grad_norm": 1.458963394165039, "learning_rate": 1.1989462981513156e-08, "loss": 1.3404, "step": 18022 }, { "epoch": 0.9853614531935542, "grad_norm": 1.3969447612762451, "learning_rate": 1.1900174022522192e-08, "loss": 1.7182, "step": 18023 }, { "epoch": 0.9854161256372757, "grad_norm": 1.8913888931274414, "learning_rate": 1.181121858962353e-08, "loss": 1.4212, "step": 18024 }, { "epoch": 0.9854707980809972, "grad_norm": 1.7075563669204712, "learning_rate": 1.1722596685784793e-08, "loss": 1.5361, "step": 18025 }, { "epoch": 0.9855254705247187, "grad_norm": 1.8877577781677246, "learning_rate": 1.1634308313966947e-08, "loss": 1.2929, "step": 18026 }, { "epoch": 0.9855801429684403, "grad_norm": 1.6296515464782715, "learning_rate": 1.1546353477118743e-08, "loss": 1.2506, "step": 18027 }, { "epoch": 0.9856348154121619, "grad_norm": 1.624483346939087, "learning_rate": 1.1458732178175613e-08, "loss": 1.6111, "step": 18028 }, { "epoch": 0.9856894878558834, "grad_norm": 1.6400288343429565, "learning_rate": 1.1371444420065214e-08, "loss": 1.3797, "step": 18029 }, { "epoch": 0.985744160299605, "grad_norm": 1.399943232536316, "learning_rate": 1.1284490205700771e-08, "loss": 1.3655, "step": 18030 }, { "epoch": 0.9857988327433266, "grad_norm": 2.2539315223693848, "learning_rate": 1.1197869537986627e-08, "loss": 1.3102, "step": 18031 }, { "epoch": 0.9858535051870481, "grad_norm": 1.5693538188934326, "learning_rate": 1.1111582419814914e-08, "loss": 1.4145, "step": 18032 }, { "epoch": 0.9859081776307697, "grad_norm": 1.3730047941207886, "learning_rate": 1.102562885406666e-08, "loss": 1.2991, "step": 18033 }, { "epoch": 0.9859628500744912, "grad_norm": 1.6133290529251099, "learning_rate": 1.0940008843612904e-08, "loss": 1.3354, "step": 18034 }, { "epoch": 0.9860175225182127, "grad_norm": 1.352428674697876, "learning_rate": 1.0854722391312467e-08, "loss": 1.4896, "step": 18035 }, { "epoch": 0.9860721949619343, "grad_norm": 1.2281467914581299, "learning_rate": 1.0769769500013072e-08, "loss": 1.391, "step": 18036 }, { "epoch": 0.9861268674056559, "grad_norm": 1.5104163885116577, "learning_rate": 1.068515017255245e-08, "loss": 1.3098, "step": 18037 }, { "epoch": 0.9861815398493774, "grad_norm": 2.0637481212615967, "learning_rate": 1.0600864411753897e-08, "loss": 1.3448, "step": 18038 }, { "epoch": 0.986236212293099, "grad_norm": 1.790215253829956, "learning_rate": 1.051691222043405e-08, "loss": 1.4254, "step": 18039 }, { "epoch": 0.9862908847368205, "grad_norm": 1.5604932308197021, "learning_rate": 1.0433293601395112e-08, "loss": 1.4298, "step": 18040 }, { "epoch": 0.9863455571805421, "grad_norm": 1.4858559370040894, "learning_rate": 1.0350008557430402e-08, "loss": 1.4925, "step": 18041 }, { "epoch": 0.9864002296242637, "grad_norm": 1.3948603868484497, "learning_rate": 1.0267057091319921e-08, "loss": 1.5659, "step": 18042 }, { "epoch": 0.9864549020679851, "grad_norm": 1.4020525217056274, "learning_rate": 1.0184439205833675e-08, "loss": 1.5513, "step": 18043 }, { "epoch": 0.9865095745117067, "grad_norm": 1.890539288520813, "learning_rate": 1.0102154903731676e-08, "loss": 1.4782, "step": 18044 }, { "epoch": 0.9865642469554283, "grad_norm": 1.511174201965332, "learning_rate": 1.0020204187759507e-08, "loss": 1.4189, "step": 18045 }, { "epoch": 0.9866189193991498, "grad_norm": 1.3908288478851318, "learning_rate": 9.93858706065609e-09, "loss": 1.3812, "step": 18046 }, { "epoch": 0.9866735918428714, "grad_norm": 2.0429847240448, "learning_rate": 9.857303525145911e-09, "loss": 1.1925, "step": 18047 }, { "epoch": 0.986728264286593, "grad_norm": 1.8387985229492188, "learning_rate": 9.776353583942356e-09, "loss": 1.2525, "step": 18048 }, { "epoch": 0.9867829367303145, "grad_norm": 1.716924786567688, "learning_rate": 9.695737239748815e-09, "loss": 1.4338, "step": 18049 }, { "epoch": 0.9868376091740361, "grad_norm": 1.593924880027771, "learning_rate": 9.615454495257581e-09, "loss": 1.4496, "step": 18050 }, { "epoch": 0.9868922816177577, "grad_norm": 1.586184024810791, "learning_rate": 9.535505353149843e-09, "loss": 1.1942, "step": 18051 }, { "epoch": 0.9869469540614791, "grad_norm": 1.3215216398239136, "learning_rate": 9.455889816095687e-09, "loss": 1.2033, "step": 18052 }, { "epoch": 0.9870016265052007, "grad_norm": 1.625131368637085, "learning_rate": 9.376607886751876e-09, "loss": 1.8134, "step": 18053 }, { "epoch": 0.9870562989489222, "grad_norm": 1.7009878158569336, "learning_rate": 9.297659567767403e-09, "loss": 1.3044, "step": 18054 }, { "epoch": 0.9871109713926438, "grad_norm": 1.2099274396896362, "learning_rate": 9.219044861777937e-09, "loss": 1.6292, "step": 18055 }, { "epoch": 0.9871656438363654, "grad_norm": 1.6765145063400269, "learning_rate": 9.140763771408045e-09, "loss": 1.4756, "step": 18056 }, { "epoch": 0.9872203162800869, "grad_norm": 1.6392048597335815, "learning_rate": 9.062816299272304e-09, "loss": 1.2656, "step": 18057 }, { "epoch": 0.9872749887238085, "grad_norm": 1.6783756017684937, "learning_rate": 8.985202447974183e-09, "loss": 1.403, "step": 18058 }, { "epoch": 0.9873296611675301, "grad_norm": 1.884721279144287, "learning_rate": 8.907922220104947e-09, "loss": 1.2962, "step": 18059 }, { "epoch": 0.9873843336112516, "grad_norm": 1.3053677082061768, "learning_rate": 8.830975618244752e-09, "loss": 1.4756, "step": 18060 }, { "epoch": 0.9874390060549731, "grad_norm": 1.6717702150344849, "learning_rate": 8.754362644963765e-09, "loss": 1.3994, "step": 18061 }, { "epoch": 0.9874936784986947, "grad_norm": 1.3614416122436523, "learning_rate": 8.67808330281883e-09, "loss": 1.3982, "step": 18062 }, { "epoch": 0.9875483509424162, "grad_norm": 1.477095127105713, "learning_rate": 8.602137594359016e-09, "loss": 1.3565, "step": 18063 }, { "epoch": 0.9876030233861378, "grad_norm": 1.700590968132019, "learning_rate": 8.526525522118967e-09, "loss": 1.3562, "step": 18064 }, { "epoch": 0.9876576958298594, "grad_norm": 1.962034821510315, "learning_rate": 8.451247088623327e-09, "loss": 1.3763, "step": 18065 }, { "epoch": 0.9877123682735809, "grad_norm": 1.376393437385559, "learning_rate": 8.376302296387862e-09, "loss": 1.4634, "step": 18066 }, { "epoch": 0.9877670407173025, "grad_norm": 1.4616056680679321, "learning_rate": 8.301691147912794e-09, "loss": 1.436, "step": 18067 }, { "epoch": 0.987821713161024, "grad_norm": 1.717472791671753, "learning_rate": 8.227413645690574e-09, "loss": 1.3723, "step": 18068 }, { "epoch": 0.9878763856047456, "grad_norm": 1.320258617401123, "learning_rate": 8.15346979220144e-09, "loss": 1.3632, "step": 18069 }, { "epoch": 0.9879310580484671, "grad_norm": 1.3680460453033447, "learning_rate": 8.07985958991453e-09, "loss": 1.4131, "step": 18070 }, { "epoch": 0.9879857304921886, "grad_norm": 1.5699872970581055, "learning_rate": 8.006583041287874e-09, "loss": 1.427, "step": 18071 }, { "epoch": 0.9880404029359102, "grad_norm": 1.3466259241104126, "learning_rate": 7.933640148768406e-09, "loss": 1.3904, "step": 18072 }, { "epoch": 0.9880950753796318, "grad_norm": 1.860918402671814, "learning_rate": 7.861030914791956e-09, "loss": 1.4165, "step": 18073 }, { "epoch": 0.9881497478233533, "grad_norm": 1.8890427350997925, "learning_rate": 7.78875534178325e-09, "loss": 1.4559, "step": 18074 }, { "epoch": 0.9882044202670749, "grad_norm": 1.4862157106399536, "learning_rate": 7.716813432154802e-09, "loss": 1.4981, "step": 18075 }, { "epoch": 0.9882590927107965, "grad_norm": 1.683876633644104, "learning_rate": 7.645205188310245e-09, "loss": 1.4054, "step": 18076 }, { "epoch": 0.988313765154518, "grad_norm": 1.3162603378295898, "learning_rate": 7.57393061263989e-09, "loss": 1.5334, "step": 18077 }, { "epoch": 0.9883684375982396, "grad_norm": 1.7347527742385864, "learning_rate": 7.502989707524056e-09, "loss": 1.3234, "step": 18078 }, { "epoch": 0.9884231100419612, "grad_norm": 1.4160617589950562, "learning_rate": 7.432382475330846e-09, "loss": 1.2594, "step": 18079 }, { "epoch": 0.9884777824856826, "grad_norm": 1.7525818347930908, "learning_rate": 7.362108918418376e-09, "loss": 1.3439, "step": 18080 }, { "epoch": 0.9885324549294042, "grad_norm": 1.555890440940857, "learning_rate": 7.292169039134767e-09, "loss": 1.2768, "step": 18081 }, { "epoch": 0.9885871273731257, "grad_norm": 1.6633644104003906, "learning_rate": 7.222562839813707e-09, "loss": 1.5723, "step": 18082 }, { "epoch": 0.9886417998168473, "grad_norm": 1.4135562181472778, "learning_rate": 7.153290322780004e-09, "loss": 1.62, "step": 18083 }, { "epoch": 0.9886964722605689, "grad_norm": 1.5075526237487793, "learning_rate": 7.084351490347363e-09, "loss": 1.4819, "step": 18084 }, { "epoch": 0.9887511447042904, "grad_norm": 1.6275900602340698, "learning_rate": 7.015746344816166e-09, "loss": 1.5583, "step": 18085 }, { "epoch": 0.988805817148012, "grad_norm": 1.723885416984558, "learning_rate": 6.947474888480132e-09, "loss": 1.4312, "step": 18086 }, { "epoch": 0.9888604895917336, "grad_norm": 1.2539421319961548, "learning_rate": 6.8795371236163315e-09, "loss": 1.6816, "step": 18087 }, { "epoch": 0.988915162035455, "grad_norm": 1.9384711980819702, "learning_rate": 6.811933052494057e-09, "loss": 1.6991, "step": 18088 }, { "epoch": 0.9889698344791766, "grad_norm": 1.434517502784729, "learning_rate": 6.744662677371505e-09, "loss": 1.4955, "step": 18089 }, { "epoch": 0.9890245069228982, "grad_norm": 1.6390244960784912, "learning_rate": 6.6777260004946555e-09, "loss": 1.4965, "step": 18090 }, { "epoch": 0.9890791793666197, "grad_norm": 1.6335809230804443, "learning_rate": 6.611123024098387e-09, "loss": 1.1713, "step": 18091 }, { "epoch": 0.9891338518103413, "grad_norm": 2.520124912261963, "learning_rate": 6.544853750407587e-09, "loss": 1.105, "step": 18092 }, { "epoch": 0.9891885242540629, "grad_norm": 1.5429149866104126, "learning_rate": 6.478918181633819e-09, "loss": 1.6006, "step": 18093 }, { "epoch": 0.9892431966977844, "grad_norm": 1.5056575536727905, "learning_rate": 6.413316319979768e-09, "loss": 1.6273, "step": 18094 }, { "epoch": 0.989297869141506, "grad_norm": 1.2895790338516235, "learning_rate": 6.3480481676359006e-09, "loss": 1.4521, "step": 18095 }, { "epoch": 0.9893525415852275, "grad_norm": 1.7994160652160645, "learning_rate": 6.283113726781587e-09, "loss": 1.2093, "step": 18096 }, { "epoch": 0.989407214028949, "grad_norm": 1.33642578125, "learning_rate": 6.218512999583981e-09, "loss": 1.4344, "step": 18097 }, { "epoch": 0.9894618864726706, "grad_norm": 1.7698307037353516, "learning_rate": 6.154245988202467e-09, "loss": 1.3207, "step": 18098 }, { "epoch": 0.9895165589163921, "grad_norm": 1.6382042169570923, "learning_rate": 6.0903126947819965e-09, "loss": 1.4397, "step": 18099 }, { "epoch": 0.9895712313601137, "grad_norm": 1.5232594013214111, "learning_rate": 6.026713121457528e-09, "loss": 1.455, "step": 18100 }, { "epoch": 0.9896259038038353, "grad_norm": 1.8474066257476807, "learning_rate": 5.9634472703518075e-09, "loss": 1.2625, "step": 18101 }, { "epoch": 0.9896805762475568, "grad_norm": 1.5721298456192017, "learning_rate": 5.90051514357981e-09, "loss": 1.5156, "step": 18102 }, { "epoch": 0.9897352486912784, "grad_norm": 1.5552442073822021, "learning_rate": 5.837916743239857e-09, "loss": 1.418, "step": 18103 }, { "epoch": 0.989789921135, "grad_norm": 1.3311223983764648, "learning_rate": 5.77565207142472e-09, "loss": 1.4863, "step": 18104 }, { "epoch": 0.9898445935787215, "grad_norm": 1.5611991882324219, "learning_rate": 5.713721130212735e-09, "loss": 1.4181, "step": 18105 }, { "epoch": 0.989899266022443, "grad_norm": 1.7443827390670776, "learning_rate": 5.6521239216722476e-09, "loss": 1.4006, "step": 18106 }, { "epoch": 0.9899539384661646, "grad_norm": 1.286914348602295, "learning_rate": 5.590860447858282e-09, "loss": 1.4607, "step": 18107 }, { "epoch": 0.9900086109098861, "grad_norm": 1.3641780614852905, "learning_rate": 5.529930710820308e-09, "loss": 1.4217, "step": 18108 }, { "epoch": 0.9900632833536077, "grad_norm": 1.6571065187454224, "learning_rate": 5.4693347125889255e-09, "loss": 1.3005, "step": 18109 }, { "epoch": 0.9901179557973292, "grad_norm": 1.970203161239624, "learning_rate": 5.409072455190289e-09, "loss": 1.4486, "step": 18110 }, { "epoch": 0.9901726282410508, "grad_norm": 1.4076430797576904, "learning_rate": 5.349143940635015e-09, "loss": 1.2029, "step": 18111 }, { "epoch": 0.9902273006847724, "grad_norm": 1.7806812524795532, "learning_rate": 5.289549170925945e-09, "loss": 1.4439, "step": 18112 }, { "epoch": 0.9902819731284939, "grad_norm": 1.3828914165496826, "learning_rate": 5.230288148051488e-09, "loss": 1.2419, "step": 18113 }, { "epoch": 0.9903366455722155, "grad_norm": 1.3813464641571045, "learning_rate": 5.1713608739911714e-09, "loss": 1.5231, "step": 18114 }, { "epoch": 0.990391318015937, "grad_norm": 1.5727882385253906, "learning_rate": 5.112767350713421e-09, "loss": 1.4964, "step": 18115 }, { "epoch": 0.9904459904596585, "grad_norm": 1.5687355995178223, "learning_rate": 5.05450758017334e-09, "loss": 1.4619, "step": 18116 }, { "epoch": 0.9905006629033801, "grad_norm": 1.507287621498108, "learning_rate": 4.996581564318259e-09, "loss": 1.6505, "step": 18117 }, { "epoch": 0.9905553353471017, "grad_norm": 1.3901402950286865, "learning_rate": 4.938989305079966e-09, "loss": 1.4808, "step": 18118 }, { "epoch": 0.9906100077908232, "grad_norm": 1.4920463562011719, "learning_rate": 4.881730804383589e-09, "loss": 1.4212, "step": 18119 }, { "epoch": 0.9906646802345448, "grad_norm": 1.1959203481674194, "learning_rate": 4.82480606413982e-09, "loss": 1.5185, "step": 18120 }, { "epoch": 0.9907193526782664, "grad_norm": 1.5520111322402954, "learning_rate": 4.7682150862515816e-09, "loss": 1.461, "step": 18121 }, { "epoch": 0.9907740251219879, "grad_norm": 1.4971095323562622, "learning_rate": 4.711957872606254e-09, "loss": 1.4436, "step": 18122 }, { "epoch": 0.9908286975657095, "grad_norm": 2.21820330619812, "learning_rate": 4.656034425083445e-09, "loss": 1.2902, "step": 18123 }, { "epoch": 0.9908833700094309, "grad_norm": 1.9642655849456787, "learning_rate": 4.600444745550548e-09, "loss": 1.3559, "step": 18124 }, { "epoch": 0.9909380424531525, "grad_norm": 1.6996628046035767, "learning_rate": 4.5451888358627465e-09, "loss": 1.4571, "step": 18125 }, { "epoch": 0.9909927148968741, "grad_norm": 1.4467061758041382, "learning_rate": 4.490266697867451e-09, "loss": 1.8258, "step": 18126 }, { "epoch": 0.9910473873405956, "grad_norm": 1.7566323280334473, "learning_rate": 4.435678333397641e-09, "loss": 1.423, "step": 18127 }, { "epoch": 0.9911020597843172, "grad_norm": 1.7552000284194946, "learning_rate": 4.381423744275193e-09, "loss": 1.279, "step": 18128 }, { "epoch": 0.9911567322280388, "grad_norm": 1.5930323600769043, "learning_rate": 4.327502932311989e-09, "loss": 1.3531, "step": 18129 }, { "epoch": 0.9912114046717603, "grad_norm": 1.650418758392334, "learning_rate": 4.273915899309922e-09, "loss": 1.3604, "step": 18130 }, { "epoch": 0.9912660771154819, "grad_norm": 1.3875501155853271, "learning_rate": 4.220662647056451e-09, "loss": 1.4299, "step": 18131 }, { "epoch": 0.9913207495592035, "grad_norm": 1.6476867198944092, "learning_rate": 4.167743177331262e-09, "loss": 1.309, "step": 18132 }, { "epoch": 0.9913754220029249, "grad_norm": 1.7114713191986084, "learning_rate": 4.115157491901834e-09, "loss": 1.5811, "step": 18133 }, { "epoch": 0.9914300944466465, "grad_norm": 1.8430522680282593, "learning_rate": 4.062905592522315e-09, "loss": 1.3977, "step": 18134 }, { "epoch": 0.9914847668903681, "grad_norm": 1.7030376195907593, "learning_rate": 4.010987480939088e-09, "loss": 1.3716, "step": 18135 }, { "epoch": 0.9915394393340896, "grad_norm": 1.4452911615371704, "learning_rate": 3.959403158885211e-09, "loss": 1.2723, "step": 18136 }, { "epoch": 0.9915941117778112, "grad_norm": 1.5900694131851196, "learning_rate": 3.9081526280837504e-09, "loss": 1.2537, "step": 18137 }, { "epoch": 0.9916487842215328, "grad_norm": 1.3726465702056885, "learning_rate": 3.857235890245559e-09, "loss": 1.3986, "step": 18138 }, { "epoch": 0.9917034566652543, "grad_norm": 1.6679412126541138, "learning_rate": 3.8066529470703885e-09, "loss": 1.4356, "step": 18139 }, { "epoch": 0.9917581291089759, "grad_norm": 1.6793885231018066, "learning_rate": 3.756403800249109e-09, "loss": 1.1265, "step": 18140 }, { "epoch": 0.9918128015526974, "grad_norm": 1.6322728395462036, "learning_rate": 3.7064884514570464e-09, "loss": 1.2851, "step": 18141 }, { "epoch": 0.991867473996419, "grad_norm": 1.183749794960022, "learning_rate": 3.656906902362867e-09, "loss": 1.503, "step": 18142 }, { "epoch": 0.9919221464401405, "grad_norm": 1.658958077430725, "learning_rate": 3.607659154621912e-09, "loss": 1.4031, "step": 18143 }, { "epoch": 0.991976818883862, "grad_norm": 2.4630651473999023, "learning_rate": 3.5587452098784225e-09, "loss": 1.2942, "step": 18144 }, { "epoch": 0.9920314913275836, "grad_norm": 1.6995733976364136, "learning_rate": 3.5101650697655363e-09, "loss": 1.2956, "step": 18145 }, { "epoch": 0.9920861637713052, "grad_norm": 1.7650723457336426, "learning_rate": 3.46191873590529e-09, "loss": 1.3131, "step": 18146 }, { "epoch": 0.9921408362150267, "grad_norm": 1.8817861080169678, "learning_rate": 3.414006209909726e-09, "loss": 1.3226, "step": 18147 }, { "epoch": 0.9921955086587483, "grad_norm": 1.722183108329773, "learning_rate": 3.366427493378677e-09, "loss": 1.4935, "step": 18148 }, { "epoch": 0.9922501811024699, "grad_norm": 2.115231513977051, "learning_rate": 3.3191825878997606e-09, "loss": 1.2734, "step": 18149 }, { "epoch": 0.9923048535461914, "grad_norm": 1.4192482233047485, "learning_rate": 3.2722714950517154e-09, "loss": 1.5362, "step": 18150 }, { "epoch": 0.992359525989913, "grad_norm": 1.761160135269165, "learning_rate": 3.225694216401065e-09, "loss": 1.3869, "step": 18151 }, { "epoch": 0.9924141984336345, "grad_norm": 1.5872819423675537, "learning_rate": 3.1794507535010122e-09, "loss": 1.4736, "step": 18152 }, { "epoch": 0.992468870877356, "grad_norm": 1.758249282836914, "learning_rate": 3.1335411078992074e-09, "loss": 1.3534, "step": 18153 }, { "epoch": 0.9925235433210776, "grad_norm": 1.2632085084915161, "learning_rate": 3.0879652811255377e-09, "loss": 1.5138, "step": 18154 }, { "epoch": 0.9925782157647991, "grad_norm": 1.222015619277954, "learning_rate": 3.0427232747043402e-09, "loss": 1.3727, "step": 18155 }, { "epoch": 0.9926328882085207, "grad_norm": 1.6264420747756958, "learning_rate": 2.997815090144407e-09, "loss": 1.2681, "step": 18156 }, { "epoch": 0.9926875606522423, "grad_norm": 1.7212046384811401, "learning_rate": 2.95324072894565e-09, "loss": 1.3123, "step": 18157 }, { "epoch": 0.9927422330959638, "grad_norm": 1.5213534832000732, "learning_rate": 2.909000192597988e-09, "loss": 1.1844, "step": 18158 }, { "epoch": 0.9927969055396854, "grad_norm": 1.4827032089233398, "learning_rate": 2.865093482576908e-09, "loss": 1.2603, "step": 18159 }, { "epoch": 0.992851577983407, "grad_norm": 1.5709662437438965, "learning_rate": 2.821520600350125e-09, "loss": 1.3459, "step": 18160 }, { "epoch": 0.9929062504271284, "grad_norm": 1.5352723598480225, "learning_rate": 2.7782815473720304e-09, "loss": 1.3755, "step": 18161 }, { "epoch": 0.99296092287085, "grad_norm": 1.3729197978973389, "learning_rate": 2.7353763250848044e-09, "loss": 1.6148, "step": 18162 }, { "epoch": 0.9930155953145716, "grad_norm": 1.8073668479919434, "learning_rate": 2.692804934923965e-09, "loss": 1.4886, "step": 18163 }, { "epoch": 0.9930702677582931, "grad_norm": 1.3957973718643188, "learning_rate": 2.6505673783094875e-09, "loss": 1.6121, "step": 18164 }, { "epoch": 0.9931249402020147, "grad_norm": 1.7311738729476929, "learning_rate": 2.608663656652466e-09, "loss": 1.3246, "step": 18165 }, { "epoch": 0.9931796126457363, "grad_norm": 1.562618613243103, "learning_rate": 2.5670937713517807e-09, "loss": 1.4522, "step": 18166 }, { "epoch": 0.9932342850894578, "grad_norm": 1.7529690265655518, "learning_rate": 2.525857723795211e-09, "loss": 1.2945, "step": 18167 }, { "epoch": 0.9932889575331794, "grad_norm": 1.358880877494812, "learning_rate": 2.4849555153594328e-09, "loss": 1.3575, "step": 18168 }, { "epoch": 0.9933436299769008, "grad_norm": 1.6148569583892822, "learning_rate": 2.4443871474122415e-09, "loss": 1.5422, "step": 18169 }, { "epoch": 0.9933983024206224, "grad_norm": 1.6303845643997192, "learning_rate": 2.4041526213058885e-09, "loss": 1.4106, "step": 18170 }, { "epoch": 0.993452974864344, "grad_norm": 1.8089519739151, "learning_rate": 2.364251938384854e-09, "loss": 1.3607, "step": 18171 }, { "epoch": 0.9935076473080655, "grad_norm": 1.447957992553711, "learning_rate": 2.3246850999825153e-09, "loss": 1.1702, "step": 18172 }, { "epoch": 0.9935623197517871, "grad_norm": 1.590786337852478, "learning_rate": 2.2854521074189284e-09, "loss": 1.3586, "step": 18173 }, { "epoch": 0.9936169921955087, "grad_norm": 1.3657647371292114, "learning_rate": 2.246552962004156e-09, "loss": 1.4832, "step": 18174 }, { "epoch": 0.9936716646392302, "grad_norm": 1.3317488431930542, "learning_rate": 2.207987665037159e-09, "loss": 1.2829, "step": 18175 }, { "epoch": 0.9937263370829518, "grad_norm": 1.1285803318023682, "learning_rate": 2.1697562178069067e-09, "loss": 1.5015, "step": 18176 }, { "epoch": 0.9937810095266734, "grad_norm": 1.7562052011489868, "learning_rate": 2.1318586215890447e-09, "loss": 1.3882, "step": 18177 }, { "epoch": 0.9938356819703948, "grad_norm": 1.6808725595474243, "learning_rate": 2.0942948776481175e-09, "loss": 1.594, "step": 18178 }, { "epoch": 0.9938903544141164, "grad_norm": 1.560161828994751, "learning_rate": 2.0570649872408977e-09, "loss": 1.3041, "step": 18179 }, { "epoch": 0.993945026857838, "grad_norm": 2.6145780086517334, "learning_rate": 2.020168951608614e-09, "loss": 1.4714, "step": 18180 }, { "epoch": 0.9939996993015595, "grad_norm": 2.066871404647827, "learning_rate": 1.983606771983615e-09, "loss": 1.3759, "step": 18181 }, { "epoch": 0.9940543717452811, "grad_norm": 2.055436611175537, "learning_rate": 1.947378449587145e-09, "loss": 1.3643, "step": 18182 }, { "epoch": 0.9941090441890026, "grad_norm": 1.7505477666854858, "learning_rate": 1.9114839856293475e-09, "loss": 1.4003, "step": 18183 }, { "epoch": 0.9941637166327242, "grad_norm": 3.3583602905273438, "learning_rate": 1.875923381307043e-09, "loss": 1.3091, "step": 18184 }, { "epoch": 0.9942183890764458, "grad_norm": 2.259441375732422, "learning_rate": 1.8406966378103909e-09, "loss": 1.1654, "step": 18185 }, { "epoch": 0.9942730615201673, "grad_norm": 1.576789140701294, "learning_rate": 1.8058037563140064e-09, "loss": 1.2768, "step": 18186 }, { "epoch": 0.9943277339638888, "grad_norm": 1.6126034259796143, "learning_rate": 1.7712447379825137e-09, "loss": 1.4422, "step": 18187 }, { "epoch": 0.9943824064076104, "grad_norm": 1.3190374374389648, "learning_rate": 1.7370195839716552e-09, "loss": 1.3703, "step": 18188 }, { "epoch": 0.9944370788513319, "grad_norm": 1.4161146879196167, "learning_rate": 1.7031282954227402e-09, "loss": 1.5144, "step": 18189 }, { "epoch": 0.9944917512950535, "grad_norm": 1.8257801532745361, "learning_rate": 1.6695708734693061e-09, "loss": 1.5044, "step": 18190 }, { "epoch": 0.9945464237387751, "grad_norm": 1.3141149282455444, "learning_rate": 1.6363473192293478e-09, "loss": 1.5389, "step": 18191 }, { "epoch": 0.9946010961824966, "grad_norm": 1.4147919416427612, "learning_rate": 1.6034576338141982e-09, "loss": 1.4677, "step": 18192 }, { "epoch": 0.9946557686262182, "grad_norm": 1.3307799100875854, "learning_rate": 1.5709018183218684e-09, "loss": 1.6296, "step": 18193 }, { "epoch": 0.9947104410699398, "grad_norm": 1.6473345756530762, "learning_rate": 1.5386798738381558e-09, "loss": 1.3737, "step": 18194 }, { "epoch": 0.9947651135136613, "grad_norm": 1.476227879524231, "learning_rate": 1.5067918014410877e-09, "loss": 1.2512, "step": 18195 }, { "epoch": 0.9948197859573829, "grad_norm": 1.3839751482009888, "learning_rate": 1.475237602194257e-09, "loss": 1.195, "step": 18196 }, { "epoch": 0.9948744584011043, "grad_norm": 1.5173226594924927, "learning_rate": 1.444017277151266e-09, "loss": 1.1772, "step": 18197 }, { "epoch": 0.9949291308448259, "grad_norm": 1.627895474433899, "learning_rate": 1.413130827354614e-09, "loss": 1.511, "step": 18198 }, { "epoch": 0.9949838032885475, "grad_norm": 1.6661964654922485, "learning_rate": 1.3825782538368083e-09, "loss": 1.5418, "step": 18199 }, { "epoch": 0.995038475732269, "grad_norm": 1.728510856628418, "learning_rate": 1.3523595576159232e-09, "loss": 1.4258, "step": 18200 }, { "epoch": 0.9950931481759906, "grad_norm": 1.6354501247406006, "learning_rate": 1.3224747397033721e-09, "loss": 1.3992, "step": 18201 }, { "epoch": 0.9951478206197122, "grad_norm": 1.3571319580078125, "learning_rate": 1.2929238010961354e-09, "loss": 1.304, "step": 18202 }, { "epoch": 0.9952024930634337, "grad_norm": 1.6992820501327515, "learning_rate": 1.2637067427800909e-09, "loss": 1.5651, "step": 18203 }, { "epoch": 0.9952571655071553, "grad_norm": 1.8207906484603882, "learning_rate": 1.234823565732235e-09, "loss": 1.4344, "step": 18204 }, { "epoch": 0.9953118379508769, "grad_norm": 2.0581157207489014, "learning_rate": 1.206274270916241e-09, "loss": 1.475, "step": 18205 }, { "epoch": 0.9953665103945983, "grad_norm": 1.760464072227478, "learning_rate": 1.178058859285791e-09, "loss": 1.6178, "step": 18206 }, { "epoch": 0.9954211828383199, "grad_norm": 1.4405735731124878, "learning_rate": 1.150177331782354e-09, "loss": 1.4638, "step": 18207 }, { "epoch": 0.9954758552820415, "grad_norm": 2.1314380168914795, "learning_rate": 1.1226296893374068e-09, "loss": 1.4124, "step": 18208 }, { "epoch": 0.995530527725763, "grad_norm": 1.3683748245239258, "learning_rate": 1.0954159328724345e-09, "loss": 1.401, "step": 18209 }, { "epoch": 0.9955852001694846, "grad_norm": 1.548554539680481, "learning_rate": 1.0685360632933794e-09, "loss": 1.3978, "step": 18210 }, { "epoch": 0.9956398726132061, "grad_norm": 1.5133002996444702, "learning_rate": 1.041990081499522e-09, "loss": 1.411, "step": 18211 }, { "epoch": 0.9956945450569277, "grad_norm": 1.411568284034729, "learning_rate": 1.0157779883768203e-09, "loss": 1.5306, "step": 18212 }, { "epoch": 0.9957492175006493, "grad_norm": 1.620316743850708, "learning_rate": 9.898997848001302e-10, "loss": 1.212, "step": 18213 }, { "epoch": 0.9958038899443707, "grad_norm": 1.8198615312576294, "learning_rate": 9.643554716354253e-10, "loss": 1.2896, "step": 18214 }, { "epoch": 0.9958585623880923, "grad_norm": 1.3624953031539917, "learning_rate": 9.391450497331367e-10, "loss": 1.4223, "step": 18215 }, { "epoch": 0.9959132348318139, "grad_norm": 1.4147337675094604, "learning_rate": 9.142685199370338e-10, "loss": 1.503, "step": 18216 }, { "epoch": 0.9959679072755354, "grad_norm": 1.348541259765625, "learning_rate": 8.897258830764533e-10, "loss": 1.4958, "step": 18217 }, { "epoch": 0.996022579719257, "grad_norm": 1.8107455968856812, "learning_rate": 8.655171399718498e-10, "loss": 1.3042, "step": 18218 }, { "epoch": 0.9960772521629786, "grad_norm": 1.3405689001083374, "learning_rate": 8.416422914325762e-10, "loss": 1.5416, "step": 18219 }, { "epoch": 0.9961319246067001, "grad_norm": 2.2244532108306885, "learning_rate": 8.181013382524416e-10, "loss": 1.3089, "step": 18220 }, { "epoch": 0.9961865970504217, "grad_norm": 1.5637465715408325, "learning_rate": 7.94894281220815e-10, "loss": 1.4086, "step": 18221 }, { "epoch": 0.9962412694941433, "grad_norm": 1.534604787826538, "learning_rate": 7.720211211115214e-10, "loss": 1.5261, "step": 18222 }, { "epoch": 0.9962959419378647, "grad_norm": 1.7484489679336548, "learning_rate": 7.494818586883945e-10, "loss": 1.3583, "step": 18223 }, { "epoch": 0.9963506143815863, "grad_norm": 1.5565276145935059, "learning_rate": 7.272764947041655e-10, "loss": 1.2001, "step": 18224 }, { "epoch": 0.9964052868253078, "grad_norm": 1.3965981006622314, "learning_rate": 7.054050299004633e-10, "loss": 1.4343, "step": 18225 }, { "epoch": 0.9964599592690294, "grad_norm": 1.1862423419952393, "learning_rate": 6.838674650067045e-10, "loss": 1.4202, "step": 18226 }, { "epoch": 0.996514631712751, "grad_norm": 1.7889617681503296, "learning_rate": 6.626638007434239e-10, "loss": 1.3761, "step": 18227 }, { "epoch": 0.9965693041564725, "grad_norm": 1.4247843027114868, "learning_rate": 6.417940378167231e-10, "loss": 1.238, "step": 18228 }, { "epoch": 0.9966239766001941, "grad_norm": 1.6208032369613647, "learning_rate": 6.212581769260428e-10, "loss": 1.526, "step": 18229 }, { "epoch": 0.9966786490439157, "grad_norm": 1.709917664527893, "learning_rate": 6.010562187552804e-10, "loss": 1.4, "step": 18230 }, { "epoch": 0.9967333214876372, "grad_norm": 1.54747474193573, "learning_rate": 5.811881639794515e-10, "loss": 1.2914, "step": 18231 }, { "epoch": 0.9967879939313588, "grad_norm": 1.523590087890625, "learning_rate": 5.616540132624693e-10, "loss": 1.6083, "step": 18232 }, { "epoch": 0.9968426663750803, "grad_norm": 1.0487951040267944, "learning_rate": 5.424537672560349e-10, "loss": 1.5961, "step": 18233 }, { "epoch": 0.9968973388188018, "grad_norm": 1.9288138151168823, "learning_rate": 5.23587426601857e-10, "loss": 1.472, "step": 18234 }, { "epoch": 0.9969520112625234, "grad_norm": 1.6796330213546753, "learning_rate": 5.050549919294323e-10, "loss": 1.3824, "step": 18235 }, { "epoch": 0.997006683706245, "grad_norm": 1.6207281351089478, "learning_rate": 4.868564638571549e-10, "loss": 1.5272, "step": 18236 }, { "epoch": 0.9970613561499665, "grad_norm": 1.343766212463379, "learning_rate": 4.689918429945373e-10, "loss": 1.3847, "step": 18237 }, { "epoch": 0.9971160285936881, "grad_norm": 1.5538972616195679, "learning_rate": 4.514611299355487e-10, "loss": 1.4421, "step": 18238 }, { "epoch": 0.9971707010374096, "grad_norm": 1.5431804656982422, "learning_rate": 4.342643252686074e-10, "loss": 1.4797, "step": 18239 }, { "epoch": 0.9972253734811312, "grad_norm": 1.7481824159622192, "learning_rate": 4.17401429564368e-10, "loss": 1.3958, "step": 18240 }, { "epoch": 0.9972800459248528, "grad_norm": 1.5405104160308838, "learning_rate": 4.008724433890443e-10, "loss": 1.3592, "step": 18241 }, { "epoch": 0.9973347183685742, "grad_norm": 1.4432076215744019, "learning_rate": 3.8467736729330687e-10, "loss": 1.4755, "step": 18242 }, { "epoch": 0.9973893908122958, "grad_norm": 1.3469876050949097, "learning_rate": 3.688162018178343e-10, "loss": 1.3112, "step": 18243 }, { "epoch": 0.9974440632560174, "grad_norm": 1.7492750883102417, "learning_rate": 3.53288947492203e-10, "loss": 1.233, "step": 18244 }, { "epoch": 0.9974987356997389, "grad_norm": 1.898117184638977, "learning_rate": 3.3809560483599735e-10, "loss": 1.283, "step": 18245 }, { "epoch": 0.9975534081434605, "grad_norm": 1.5992342233657837, "learning_rate": 3.2323617435547905e-10, "loss": 1.5061, "step": 18246 }, { "epoch": 0.9976080805871821, "grad_norm": 1.5131334066390991, "learning_rate": 3.087106565469178e-10, "loss": 1.5606, "step": 18247 }, { "epoch": 0.9976627530309036, "grad_norm": 1.2450956106185913, "learning_rate": 2.94519051895481e-10, "loss": 1.53, "step": 18248 }, { "epoch": 0.9977174254746252, "grad_norm": 1.7178770303726196, "learning_rate": 2.806613608741238e-10, "loss": 1.2606, "step": 18249 }, { "epoch": 0.9977720979183468, "grad_norm": 1.541345477104187, "learning_rate": 2.6713758394802945e-10, "loss": 1.4603, "step": 18250 }, { "epoch": 0.9978267703620682, "grad_norm": 1.927592396736145, "learning_rate": 2.5394772156683845e-10, "loss": 1.5854, "step": 18251 }, { "epoch": 0.9978814428057898, "grad_norm": 1.4477406740188599, "learning_rate": 2.4109177417130925e-10, "loss": 1.4289, "step": 18252 }, { "epoch": 0.9979361152495113, "grad_norm": 1.6439918279647827, "learning_rate": 2.2856974219109817e-10, "loss": 1.4578, "step": 18253 }, { "epoch": 0.9979907876932329, "grad_norm": 1.7704306840896606, "learning_rate": 2.163816260436491e-10, "loss": 1.5645, "step": 18254 }, { "epoch": 0.9980454601369545, "grad_norm": 1.674269437789917, "learning_rate": 2.0452742613641386e-10, "loss": 1.425, "step": 18255 }, { "epoch": 0.998100132580676, "grad_norm": 2.2213521003723145, "learning_rate": 1.9300714286574207e-10, "loss": 1.2718, "step": 18256 }, { "epoch": 0.9981548050243976, "grad_norm": 1.2323691844940186, "learning_rate": 1.818207766157709e-10, "loss": 1.3534, "step": 18257 }, { "epoch": 0.9982094774681192, "grad_norm": 1.4477019309997559, "learning_rate": 1.7096832776064554e-10, "loss": 1.6391, "step": 18258 }, { "epoch": 0.9982641499118406, "grad_norm": 1.710619568824768, "learning_rate": 1.6044979666118843e-10, "loss": 1.5304, "step": 18259 }, { "epoch": 0.9983188223555622, "grad_norm": 1.5933791399002075, "learning_rate": 1.5026518367045052e-10, "loss": 1.6428, "step": 18260 }, { "epoch": 0.9983734947992838, "grad_norm": 1.346360445022583, "learning_rate": 1.404144891270498e-10, "loss": 1.498, "step": 18261 }, { "epoch": 0.9984281672430053, "grad_norm": 1.7390758991241455, "learning_rate": 1.3089771336072256e-10, "loss": 1.4634, "step": 18262 }, { "epoch": 0.9984828396867269, "grad_norm": 1.4951883554458618, "learning_rate": 1.2171485669010275e-10, "loss": 1.4669, "step": 18263 }, { "epoch": 0.9985375121304485, "grad_norm": 1.591810941696167, "learning_rate": 1.1286591941939151e-10, "loss": 1.3325, "step": 18264 }, { "epoch": 0.99859218457417, "grad_norm": 1.9350759983062744, "learning_rate": 1.0435090184723884e-10, "loss": 1.368, "step": 18265 }, { "epoch": 0.9986468570178916, "grad_norm": 1.3852277994155884, "learning_rate": 9.616980425453116e-11, "loss": 1.5973, "step": 18266 }, { "epoch": 0.9987015294616131, "grad_norm": 1.4843392372131348, "learning_rate": 8.832262691771398e-11, "loss": 1.2445, "step": 18267 }, { "epoch": 0.9987562019053347, "grad_norm": 1.5321238040924072, "learning_rate": 8.080937009657952e-11, "loss": 1.4893, "step": 18268 }, { "epoch": 0.9988108743490562, "grad_norm": 1.4337668418884277, "learning_rate": 7.363003404314839e-11, "loss": 1.3439, "step": 18269 }, { "epoch": 0.9988655467927777, "grad_norm": 1.270403265953064, "learning_rate": 6.678461899611854e-11, "loss": 1.3931, "step": 18270 }, { "epoch": 0.9989202192364993, "grad_norm": 1.6032652854919434, "learning_rate": 6.027312518530614e-11, "loss": 1.4169, "step": 18271 }, { "epoch": 0.9989748916802209, "grad_norm": 1.4040756225585938, "learning_rate": 5.409555282720469e-11, "loss": 1.4312, "step": 18272 }, { "epoch": 0.9990295641239424, "grad_norm": 1.5738003253936768, "learning_rate": 4.825190212831565e-11, "loss": 1.4962, "step": 18273 }, { "epoch": 0.999084236567664, "grad_norm": 1.7599151134490967, "learning_rate": 4.274217328514851e-11, "loss": 1.1941, "step": 18274 }, { "epoch": 0.9991389090113856, "grad_norm": 1.5715889930725098, "learning_rate": 3.756636647866962e-11, "loss": 1.2988, "step": 18275 }, { "epoch": 0.9991935814551071, "grad_norm": 1.2795343399047852, "learning_rate": 3.272448188429422e-11, "loss": 1.5152, "step": 18276 }, { "epoch": 0.9992482538988287, "grad_norm": 1.358577847480774, "learning_rate": 2.821651966300465e-11, "loss": 1.3602, "step": 18277 }, { "epoch": 0.9993029263425502, "grad_norm": 1.5314719676971436, "learning_rate": 2.4042479965791234e-11, "loss": 1.4568, "step": 18278 }, { "epoch": 0.9993575987862717, "grad_norm": 1.5921199321746826, "learning_rate": 2.020236293143185e-11, "loss": 1.2695, "step": 18279 }, { "epoch": 0.9994122712299933, "grad_norm": 1.5932323932647705, "learning_rate": 1.6696168687602155e-11, "loss": 1.5039, "step": 18280 }, { "epoch": 0.9994669436737148, "grad_norm": 1.5867269039154053, "learning_rate": 1.3523897351985783e-11, "loss": 1.2935, "step": 18281 }, { "epoch": 0.9995216161174364, "grad_norm": 1.5455549955368042, "learning_rate": 1.0685549030053921e-11, "loss": 1.5167, "step": 18282 }, { "epoch": 0.999576288561158, "grad_norm": 1.328511357307434, "learning_rate": 8.181123817285752e-12, "loss": 1.5436, "step": 18283 }, { "epoch": 0.9996309610048795, "grad_norm": 1.640791654586792, "learning_rate": 6.010621798058225e-12, "loss": 1.1925, "step": 18284 }, { "epoch": 0.9996856334486011, "grad_norm": 1.4612890481948853, "learning_rate": 4.174043043425613e-12, "loss": 1.4254, "step": 18285 }, { "epoch": 0.9997403058923227, "grad_norm": 1.5805788040161133, "learning_rate": 2.6713876144501827e-12, "loss": 1.2036, "step": 18286 }, { "epoch": 0.9997949783360441, "grad_norm": 1.3750700950622559, "learning_rate": 1.5026555622021932e-12, "loss": 1.4302, "step": 18287 }, { "epoch": 0.9998496507797657, "grad_norm": 1.3186200857162476, "learning_rate": 6.678469255394504e-13, "loss": 1.2997, "step": 18288 }, { "epoch": 0.9999043232234873, "grad_norm": 1.6287105083465576, "learning_rate": 1.6696173332775289e-13, "loss": 1.4892, "step": 18289 }, { "epoch": 0.9999589956672088, "grad_norm": 1.4226773977279663, "learning_rate": 0.0, "loss": 1.391, "step": 18290 }, { "epoch": 0.9999589956672088, "step": 18290, "total_flos": 1.6170385225104415e+18, "train_loss": 1.452843578387115, "train_runtime": 32577.0278, "train_samples_per_second": 17.967, "train_steps_per_second": 0.561 } ], "logging_steps": 1.0, "max_steps": 18290, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.6170385225104415e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }