diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,148652 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.999952897953416, - "eval_steps": 500, - "global_step": 21230, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 9.420409316784815e-05, - "grad_norm": 11.339726448059082, - "learning_rate": 4.705882352941176e-08, - "loss": 1.4014, - "step": 1 - }, - { - "epoch": 0.0001884081863356963, - "grad_norm": 11.068122863769531, - "learning_rate": 9.411764705882353e-08, - "loss": 1.4854, - "step": 2 - }, - { - "epoch": 0.00028261227950354445, - "grad_norm": 12.699030876159668, - "learning_rate": 1.4117647058823532e-07, - "loss": 1.4708, - "step": 3 - }, - { - "epoch": 0.0003768163726713926, - "grad_norm": 11.916887283325195, - "learning_rate": 1.8823529411764705e-07, - "loss": 1.4012, - "step": 4 - }, - { - "epoch": 0.0004710204658392407, - "grad_norm": 11.894647598266602, - "learning_rate": 2.3529411764705883e-07, - "loss": 1.4301, - "step": 5 - }, - { - "epoch": 0.0005652245590070889, - "grad_norm": 11.18047046661377, - "learning_rate": 2.8235294117647064e-07, - "loss": 1.3958, - "step": 6 - }, - { - "epoch": 0.000659428652174937, - "grad_norm": 11.70823860168457, - "learning_rate": 3.294117647058824e-07, - "loss": 1.4063, - "step": 7 - }, - { - "epoch": 0.0007536327453427852, - "grad_norm": 10.865617752075195, - "learning_rate": 3.764705882352941e-07, - "loss": 1.3843, - "step": 8 - }, - { - "epoch": 0.0008478368385106332, - "grad_norm": 12.764007568359375, - "learning_rate": 4.235294117647059e-07, - "loss": 1.4925, - "step": 9 - }, - { - "epoch": 0.0009420409316784814, - "grad_norm": 11.885594367980957, - "learning_rate": 4.7058823529411767e-07, - "loss": 1.4427, - "step": 10 - }, - { - "epoch": 0.0010362450248463295, - "grad_norm": 10.534202575683594, - "learning_rate": 5.176470588235294e-07, - "loss": 1.4676, - "step": 11 - }, - { - "epoch": 0.0011304491180141778, - "grad_norm": 9.053123474121094, - "learning_rate": 5.647058823529413e-07, - "loss": 1.4124, - "step": 12 - }, - { - "epoch": 0.0012246532111820259, - "grad_norm": 9.09373664855957, - "learning_rate": 6.11764705882353e-07, - "loss": 1.2907, - "step": 13 - }, - { - "epoch": 0.001318857304349874, - "grad_norm": 10.74179744720459, - "learning_rate": 6.588235294117648e-07, - "loss": 1.4167, - "step": 14 - }, - { - "epoch": 0.001413061397517722, - "grad_norm": 8.731218338012695, - "learning_rate": 7.058823529411766e-07, - "loss": 1.3377, - "step": 15 - }, - { - "epoch": 0.0015072654906855703, - "grad_norm": 8.675665855407715, - "learning_rate": 7.529411764705882e-07, - "loss": 1.3483, - "step": 16 - }, - { - "epoch": 0.0016014695838534184, - "grad_norm": 8.480453491210938, - "learning_rate": 8.000000000000001e-07, - "loss": 1.3519, - "step": 17 - }, - { - "epoch": 0.0016956736770212665, - "grad_norm": 8.892087936401367, - "learning_rate": 8.470588235294118e-07, - "loss": 1.415, - "step": 18 - }, - { - "epoch": 0.0017898777701891148, - "grad_norm": 7.941059112548828, - "learning_rate": 8.941176470588237e-07, - "loss": 1.4556, - "step": 19 - }, - { - "epoch": 0.0018840818633569629, - "grad_norm": 7.159634590148926, - "learning_rate": 9.411764705882353e-07, - "loss": 1.1699, - "step": 20 - }, - { - "epoch": 0.001978285956524811, - "grad_norm": 7.125668048858643, - "learning_rate": 9.88235294117647e-07, - "loss": 1.224, - "step": 21 - }, - { - "epoch": 0.002072490049692659, - "grad_norm": 7.0029497146606445, - "learning_rate": 1.0352941176470589e-06, - "loss": 1.2028, - "step": 22 - }, - { - "epoch": 0.002166694142860507, - "grad_norm": 6.187039375305176, - "learning_rate": 1.0823529411764707e-06, - "loss": 1.1646, - "step": 23 - }, - { - "epoch": 0.0022608982360283556, - "grad_norm": 6.738177299499512, - "learning_rate": 1.1294117647058826e-06, - "loss": 1.1445, - "step": 24 - }, - { - "epoch": 0.0023551023291962037, - "grad_norm": 6.802069664001465, - "learning_rate": 1.1764705882352942e-06, - "loss": 1.1857, - "step": 25 - }, - { - "epoch": 0.0024493064223640518, - "grad_norm": 6.856109142303467, - "learning_rate": 1.223529411764706e-06, - "loss": 1.2034, - "step": 26 - }, - { - "epoch": 0.0025435105155319, - "grad_norm": 5.405038833618164, - "learning_rate": 1.2705882352941175e-06, - "loss": 1.0185, - "step": 27 - }, - { - "epoch": 0.002637714608699748, - "grad_norm": 4.919756889343262, - "learning_rate": 1.3176470588235296e-06, - "loss": 1.0102, - "step": 28 - }, - { - "epoch": 0.002731918701867596, - "grad_norm": 4.2810773849487305, - "learning_rate": 1.3647058823529413e-06, - "loss": 0.9412, - "step": 29 - }, - { - "epoch": 0.002826122795035444, - "grad_norm": 4.479569911956787, - "learning_rate": 1.4117647058823531e-06, - "loss": 0.9872, - "step": 30 - }, - { - "epoch": 0.0029203268882032926, - "grad_norm": 4.528472900390625, - "learning_rate": 1.4588235294117648e-06, - "loss": 0.9284, - "step": 31 - }, - { - "epoch": 0.0030145309813711407, - "grad_norm": 3.8819658756256104, - "learning_rate": 1.5058823529411764e-06, - "loss": 0.9209, - "step": 32 - }, - { - "epoch": 0.0031087350745389888, - "grad_norm": 4.198963642120361, - "learning_rate": 1.5529411764705885e-06, - "loss": 0.9574, - "step": 33 - }, - { - "epoch": 0.003202939167706837, - "grad_norm": 3.9804775714874268, - "learning_rate": 1.6000000000000001e-06, - "loss": 0.9235, - "step": 34 - }, - { - "epoch": 0.003297143260874685, - "grad_norm": 4.031086444854736, - "learning_rate": 1.6470588235294118e-06, - "loss": 0.8726, - "step": 35 - }, - { - "epoch": 0.003391347354042533, - "grad_norm": 3.4946067333221436, - "learning_rate": 1.6941176470588237e-06, - "loss": 0.8327, - "step": 36 - }, - { - "epoch": 0.0034855514472103815, - "grad_norm": 3.3714377880096436, - "learning_rate": 1.7411764705882353e-06, - "loss": 0.9078, - "step": 37 - }, - { - "epoch": 0.0035797555403782296, - "grad_norm": 3.398184299468994, - "learning_rate": 1.7882352941176474e-06, - "loss": 0.8618, - "step": 38 - }, - { - "epoch": 0.0036739596335460777, - "grad_norm": 3.3403360843658447, - "learning_rate": 1.835294117647059e-06, - "loss": 0.8712, - "step": 39 - }, - { - "epoch": 0.0037681637267139257, - "grad_norm": 2.760078191757202, - "learning_rate": 1.8823529411764707e-06, - "loss": 0.8793, - "step": 40 - }, - { - "epoch": 0.003862367819881774, - "grad_norm": 2.781355619430542, - "learning_rate": 1.9294117647058825e-06, - "loss": 0.8844, - "step": 41 - }, - { - "epoch": 0.003956571913049622, - "grad_norm": 2.2941536903381348, - "learning_rate": 1.976470588235294e-06, - "loss": 0.7767, - "step": 42 - }, - { - "epoch": 0.00405077600621747, - "grad_norm": 2.1851463317871094, - "learning_rate": 2.0235294117647063e-06, - "loss": 0.7546, - "step": 43 - }, - { - "epoch": 0.004144980099385318, - "grad_norm": 2.916654109954834, - "learning_rate": 2.0705882352941177e-06, - "loss": 0.8267, - "step": 44 - }, - { - "epoch": 0.004239184192553166, - "grad_norm": 2.311211585998535, - "learning_rate": 2.1176470588235296e-06, - "loss": 0.7443, - "step": 45 - }, - { - "epoch": 0.004333388285721014, - "grad_norm": 2.1399497985839844, - "learning_rate": 2.1647058823529414e-06, - "loss": 0.7353, - "step": 46 - }, - { - "epoch": 0.004427592378888863, - "grad_norm": 2.1802375316619873, - "learning_rate": 2.2117647058823533e-06, - "loss": 0.7627, - "step": 47 - }, - { - "epoch": 0.004521796472056711, - "grad_norm": 2.017062187194824, - "learning_rate": 2.258823529411765e-06, - "loss": 0.7589, - "step": 48 - }, - { - "epoch": 0.004616000565224559, - "grad_norm": 2.568002700805664, - "learning_rate": 2.3058823529411766e-06, - "loss": 0.7944, - "step": 49 - }, - { - "epoch": 0.004710204658392407, - "grad_norm": 2.188521146774292, - "learning_rate": 2.3529411764705885e-06, - "loss": 0.8467, - "step": 50 - }, - { - "epoch": 0.0048044087515602555, - "grad_norm": 1.8111073970794678, - "learning_rate": 2.4000000000000003e-06, - "loss": 0.752, - "step": 51 - }, - { - "epoch": 0.0048986128447281035, - "grad_norm": 1.927584171295166, - "learning_rate": 2.447058823529412e-06, - "loss": 0.7251, - "step": 52 - }, - { - "epoch": 0.004992816937895952, - "grad_norm": 1.8764548301696777, - "learning_rate": 2.4941176470588236e-06, - "loss": 0.7833, - "step": 53 - }, - { - "epoch": 0.0050870210310638, - "grad_norm": 1.7211430072784424, - "learning_rate": 2.541176470588235e-06, - "loss": 0.7421, - "step": 54 - }, - { - "epoch": 0.005181225124231648, - "grad_norm": 1.9138911962509155, - "learning_rate": 2.5882352941176473e-06, - "loss": 0.7632, - "step": 55 - }, - { - "epoch": 0.005275429217399496, - "grad_norm": 1.6788952350616455, - "learning_rate": 2.635294117647059e-06, - "loss": 0.7039, - "step": 56 - }, - { - "epoch": 0.005369633310567344, - "grad_norm": 1.7721251249313354, - "learning_rate": 2.682352941176471e-06, - "loss": 0.7089, - "step": 57 - }, - { - "epoch": 0.005463837403735192, - "grad_norm": 1.8875666856765747, - "learning_rate": 2.7294117647058825e-06, - "loss": 0.6769, - "step": 58 - }, - { - "epoch": 0.00555804149690304, - "grad_norm": 1.7260370254516602, - "learning_rate": 2.7764705882352944e-06, - "loss": 0.697, - "step": 59 - }, - { - "epoch": 0.005652245590070888, - "grad_norm": 1.7355884313583374, - "learning_rate": 2.8235294117647062e-06, - "loss": 0.6625, - "step": 60 - }, - { - "epoch": 0.005746449683238737, - "grad_norm": 1.7094358205795288, - "learning_rate": 2.8705882352941177e-06, - "loss": 0.6575, - "step": 61 - }, - { - "epoch": 0.005840653776406585, - "grad_norm": 1.5695163011550903, - "learning_rate": 2.9176470588235295e-06, - "loss": 0.6273, - "step": 62 - }, - { - "epoch": 0.005934857869574433, - "grad_norm": 2.2251622676849365, - "learning_rate": 2.9647058823529414e-06, - "loss": 0.7796, - "step": 63 - }, - { - "epoch": 0.006029061962742281, - "grad_norm": 1.889656901359558, - "learning_rate": 3.011764705882353e-06, - "loss": 0.6638, - "step": 64 - }, - { - "epoch": 0.006123266055910129, - "grad_norm": 1.8280856609344482, - "learning_rate": 3.058823529411765e-06, - "loss": 0.7595, - "step": 65 - }, - { - "epoch": 0.0062174701490779775, - "grad_norm": 1.7405308485031128, - "learning_rate": 3.105882352941177e-06, - "loss": 0.6778, - "step": 66 - }, - { - "epoch": 0.006311674242245826, - "grad_norm": 1.758604645729065, - "learning_rate": 3.1529411764705884e-06, - "loss": 0.6882, - "step": 67 - }, - { - "epoch": 0.006405878335413674, - "grad_norm": 1.6205955743789673, - "learning_rate": 3.2000000000000003e-06, - "loss": 0.6489, - "step": 68 - }, - { - "epoch": 0.006500082428581522, - "grad_norm": 1.5852848291397095, - "learning_rate": 3.247058823529412e-06, - "loss": 0.698, - "step": 69 - }, - { - "epoch": 0.00659428652174937, - "grad_norm": 1.7549113035202026, - "learning_rate": 3.2941176470588236e-06, - "loss": 0.6203, - "step": 70 - }, - { - "epoch": 0.006688490614917218, - "grad_norm": 1.6429790258407593, - "learning_rate": 3.3411764705882354e-06, - "loss": 0.6237, - "step": 71 - }, - { - "epoch": 0.006782694708085066, - "grad_norm": 1.7007336616516113, - "learning_rate": 3.3882352941176473e-06, - "loss": 0.7162, - "step": 72 - }, - { - "epoch": 0.006876898801252914, - "grad_norm": 1.6153029203414917, - "learning_rate": 3.4352941176470587e-06, - "loss": 0.6938, - "step": 73 - }, - { - "epoch": 0.006971102894420763, - "grad_norm": 1.8349709510803223, - "learning_rate": 3.4823529411764706e-06, - "loss": 0.6437, - "step": 74 - }, - { - "epoch": 0.007065306987588611, - "grad_norm": 1.6667909622192383, - "learning_rate": 3.529411764705883e-06, - "loss": 0.6905, - "step": 75 - }, - { - "epoch": 0.007159511080756459, - "grad_norm": 1.966651201248169, - "learning_rate": 3.5764705882352948e-06, - "loss": 0.6857, - "step": 76 - }, - { - "epoch": 0.007253715173924307, - "grad_norm": 1.5965569019317627, - "learning_rate": 3.623529411764706e-06, - "loss": 0.5966, - "step": 77 - }, - { - "epoch": 0.007347919267092155, - "grad_norm": 1.6134767532348633, - "learning_rate": 3.670588235294118e-06, - "loss": 0.6519, - "step": 78 - }, - { - "epoch": 0.007442123360260003, - "grad_norm": 1.6604300737380981, - "learning_rate": 3.71764705882353e-06, - "loss": 0.6105, - "step": 79 - }, - { - "epoch": 0.0075363274534278515, - "grad_norm": 1.5630711317062378, - "learning_rate": 3.7647058823529414e-06, - "loss": 0.5989, - "step": 80 - }, - { - "epoch": 0.0076305315465956996, - "grad_norm": 1.7707780599594116, - "learning_rate": 3.8117647058823532e-06, - "loss": 0.705, - "step": 81 - }, - { - "epoch": 0.007724735639763548, - "grad_norm": 1.6089433431625366, - "learning_rate": 3.858823529411765e-06, - "loss": 0.6568, - "step": 82 - }, - { - "epoch": 0.007818939732931397, - "grad_norm": 1.6167658567428589, - "learning_rate": 3.905882352941177e-06, - "loss": 0.6381, - "step": 83 - }, - { - "epoch": 0.007913143826099244, - "grad_norm": 1.7541074752807617, - "learning_rate": 3.952941176470588e-06, - "loss": 0.7424, - "step": 84 - }, - { - "epoch": 0.008007347919267093, - "grad_norm": 1.5577178001403809, - "learning_rate": 4.000000000000001e-06, - "loss": 0.6776, - "step": 85 - }, - { - "epoch": 0.00810155201243494, - "grad_norm": 1.5912531614303589, - "learning_rate": 4.0470588235294125e-06, - "loss": 0.5949, - "step": 86 - }, - { - "epoch": 0.008195756105602789, - "grad_norm": 1.5673514604568481, - "learning_rate": 4.094117647058824e-06, - "loss": 0.7048, - "step": 87 - }, - { - "epoch": 0.008289960198770636, - "grad_norm": 1.7601391077041626, - "learning_rate": 4.141176470588235e-06, - "loss": 0.6333, - "step": 88 - }, - { - "epoch": 0.008384164291938485, - "grad_norm": 1.598659634590149, - "learning_rate": 4.188235294117647e-06, - "loss": 0.6692, - "step": 89 - }, - { - "epoch": 0.008478368385106332, - "grad_norm": 1.6280843019485474, - "learning_rate": 4.235294117647059e-06, - "loss": 0.6205, - "step": 90 - }, - { - "epoch": 0.008572572478274181, - "grad_norm": 1.5283360481262207, - "learning_rate": 4.282352941176471e-06, - "loss": 0.6305, - "step": 91 - }, - { - "epoch": 0.008666776571442028, - "grad_norm": 1.6552764177322388, - "learning_rate": 4.329411764705883e-06, - "loss": 0.6371, - "step": 92 - }, - { - "epoch": 0.008760980664609877, - "grad_norm": 1.6012983322143555, - "learning_rate": 4.376470588235294e-06, - "loss": 0.6803, - "step": 93 - }, - { - "epoch": 0.008855184757777726, - "grad_norm": 1.5913658142089844, - "learning_rate": 4.423529411764707e-06, - "loss": 0.6094, - "step": 94 - }, - { - "epoch": 0.008949388850945574, - "grad_norm": 1.5518425703048706, - "learning_rate": 4.4705882352941184e-06, - "loss": 0.6185, - "step": 95 - }, - { - "epoch": 0.009043592944113422, - "grad_norm": 1.5862501859664917, - "learning_rate": 4.51764705882353e-06, - "loss": 0.6281, - "step": 96 - }, - { - "epoch": 0.00913779703728127, - "grad_norm": 1.6757488250732422, - "learning_rate": 4.564705882352941e-06, - "loss": 0.6599, - "step": 97 - }, - { - "epoch": 0.009232001130449119, - "grad_norm": 1.4505435228347778, - "learning_rate": 4.611764705882353e-06, - "loss": 0.6064, - "step": 98 - }, - { - "epoch": 0.009326205223616966, - "grad_norm": 1.4430831670761108, - "learning_rate": 4.658823529411765e-06, - "loss": 0.5856, - "step": 99 - }, - { - "epoch": 0.009420409316784815, - "grad_norm": 1.755677580833435, - "learning_rate": 4.705882352941177e-06, - "loss": 0.7195, - "step": 100 - }, - { - "epoch": 0.009514613409952662, - "grad_norm": 1.4079543352127075, - "learning_rate": 4.752941176470589e-06, - "loss": 0.5292, - "step": 101 - }, - { - "epoch": 0.009608817503120511, - "grad_norm": 1.7317286729812622, - "learning_rate": 4.800000000000001e-06, - "loss": 0.6123, - "step": 102 - }, - { - "epoch": 0.009703021596288358, - "grad_norm": 1.6857632398605347, - "learning_rate": 4.847058823529412e-06, - "loss": 0.6198, - "step": 103 - }, - { - "epoch": 0.009797225689456207, - "grad_norm": 1.7745858430862427, - "learning_rate": 4.894117647058824e-06, - "loss": 0.5912, - "step": 104 - }, - { - "epoch": 0.009891429782624054, - "grad_norm": 1.6427191495895386, - "learning_rate": 4.941176470588236e-06, - "loss": 0.5242, - "step": 105 - }, - { - "epoch": 0.009985633875791903, - "grad_norm": 1.5532233715057373, - "learning_rate": 4.988235294117647e-06, - "loss": 0.6075, - "step": 106 - }, - { - "epoch": 0.01007983796895975, - "grad_norm": 1.5551834106445312, - "learning_rate": 5.035294117647059e-06, - "loss": 0.588, - "step": 107 - }, - { - "epoch": 0.0101740420621276, - "grad_norm": 1.4823707342147827, - "learning_rate": 5.08235294117647e-06, - "loss": 0.6076, - "step": 108 - }, - { - "epoch": 0.010268246155295448, - "grad_norm": 1.7012345790863037, - "learning_rate": 5.129411764705883e-06, - "loss": 0.6516, - "step": 109 - }, - { - "epoch": 0.010362450248463296, - "grad_norm": 1.598178744316101, - "learning_rate": 5.176470588235295e-06, - "loss": 0.5925, - "step": 110 - }, - { - "epoch": 0.010456654341631145, - "grad_norm": 1.4713191986083984, - "learning_rate": 5.2235294117647065e-06, - "loss": 0.5498, - "step": 111 - }, - { - "epoch": 0.010550858434798992, - "grad_norm": 1.4787997007369995, - "learning_rate": 5.270588235294118e-06, - "loss": 0.5936, - "step": 112 - }, - { - "epoch": 0.01064506252796684, - "grad_norm": 1.4660656452178955, - "learning_rate": 5.317647058823529e-06, - "loss": 0.563, - "step": 113 - }, - { - "epoch": 0.010739266621134688, - "grad_norm": 1.5136562585830688, - "learning_rate": 5.364705882352942e-06, - "loss": 0.5717, - "step": 114 - }, - { - "epoch": 0.010833470714302537, - "grad_norm": 1.4835213422775269, - "learning_rate": 5.411764705882353e-06, - "loss": 0.5547, - "step": 115 - }, - { - "epoch": 0.010927674807470384, - "grad_norm": 1.4667916297912598, - "learning_rate": 5.458823529411765e-06, - "loss": 0.5683, - "step": 116 - }, - { - "epoch": 0.011021878900638233, - "grad_norm": 1.5353646278381348, - "learning_rate": 5.505882352941177e-06, - "loss": 0.5581, - "step": 117 - }, - { - "epoch": 0.01111608299380608, - "grad_norm": 1.4183745384216309, - "learning_rate": 5.552941176470589e-06, - "loss": 0.5556, - "step": 118 - }, - { - "epoch": 0.011210287086973929, - "grad_norm": 1.6031017303466797, - "learning_rate": 5.600000000000001e-06, - "loss": 0.6011, - "step": 119 - }, - { - "epoch": 0.011304491180141776, - "grad_norm": 1.57628333568573, - "learning_rate": 5.6470588235294125e-06, - "loss": 0.5728, - "step": 120 - }, - { - "epoch": 0.011398695273309625, - "grad_norm": 1.5039172172546387, - "learning_rate": 5.694117647058824e-06, - "loss": 0.6005, - "step": 121 - }, - { - "epoch": 0.011492899366477474, - "grad_norm": 1.4251632690429688, - "learning_rate": 5.741176470588235e-06, - "loss": 0.5717, - "step": 122 - }, - { - "epoch": 0.011587103459645321, - "grad_norm": 1.5075788497924805, - "learning_rate": 5.788235294117648e-06, - "loss": 0.6307, - "step": 123 - }, - { - "epoch": 0.01168130755281317, - "grad_norm": 1.4773801565170288, - "learning_rate": 5.835294117647059e-06, - "loss": 0.5513, - "step": 124 - }, - { - "epoch": 0.011775511645981018, - "grad_norm": 1.4166651964187622, - "learning_rate": 5.882352941176471e-06, - "loss": 0.6316, - "step": 125 - }, - { - "epoch": 0.011869715739148867, - "grad_norm": 1.4896390438079834, - "learning_rate": 5.929411764705883e-06, - "loss": 0.5854, - "step": 126 - }, - { - "epoch": 0.011963919832316714, - "grad_norm": 1.5579148530960083, - "learning_rate": 5.976470588235295e-06, - "loss": 0.6574, - "step": 127 - }, - { - "epoch": 0.012058123925484563, - "grad_norm": 1.5000208616256714, - "learning_rate": 6.023529411764706e-06, - "loss": 0.605, - "step": 128 - }, - { - "epoch": 0.01215232801865241, - "grad_norm": 1.848293423652649, - "learning_rate": 6.070588235294118e-06, - "loss": 0.5875, - "step": 129 - }, - { - "epoch": 0.012246532111820259, - "grad_norm": 1.6110161542892456, - "learning_rate": 6.11764705882353e-06, - "loss": 0.6874, - "step": 130 - }, - { - "epoch": 0.012340736204988106, - "grad_norm": 1.4359551668167114, - "learning_rate": 6.164705882352941e-06, - "loss": 0.5507, - "step": 131 - }, - { - "epoch": 0.012434940298155955, - "grad_norm": 1.3417483568191528, - "learning_rate": 6.211764705882354e-06, - "loss": 0.5285, - "step": 132 - }, - { - "epoch": 0.012529144391323802, - "grad_norm": 1.3211323022842407, - "learning_rate": 6.258823529411765e-06, - "loss": 0.57, - "step": 133 - }, - { - "epoch": 0.012623348484491651, - "grad_norm": 1.3868370056152344, - "learning_rate": 6.305882352941177e-06, - "loss": 0.5669, - "step": 134 - }, - { - "epoch": 0.0127175525776595, - "grad_norm": 1.4950891733169556, - "learning_rate": 6.352941176470589e-06, - "loss": 0.63, - "step": 135 - }, - { - "epoch": 0.012811756670827347, - "grad_norm": 1.4748525619506836, - "learning_rate": 6.4000000000000006e-06, - "loss": 0.6375, - "step": 136 - }, - { - "epoch": 0.012905960763995196, - "grad_norm": 1.4417543411254883, - "learning_rate": 6.4470588235294116e-06, - "loss": 0.567, - "step": 137 - }, - { - "epoch": 0.013000164857163043, - "grad_norm": 1.3991072177886963, - "learning_rate": 6.494117647058824e-06, - "loss": 0.5565, - "step": 138 - }, - { - "epoch": 0.013094368950330892, - "grad_norm": 1.2583264112472534, - "learning_rate": 6.541176470588236e-06, - "loss": 0.5257, - "step": 139 - }, - { - "epoch": 0.01318857304349874, - "grad_norm": 1.4247316122055054, - "learning_rate": 6.588235294117647e-06, - "loss": 0.5586, - "step": 140 - }, - { - "epoch": 0.013282777136666589, - "grad_norm": 1.4726084470748901, - "learning_rate": 6.63529411764706e-06, - "loss": 0.5374, - "step": 141 - }, - { - "epoch": 0.013376981229834436, - "grad_norm": 1.5030871629714966, - "learning_rate": 6.682352941176471e-06, - "loss": 0.581, - "step": 142 - }, - { - "epoch": 0.013471185323002285, - "grad_norm": 1.4666228294372559, - "learning_rate": 6.729411764705884e-06, - "loss": 0.6157, - "step": 143 - }, - { - "epoch": 0.013565389416170132, - "grad_norm": 1.3930963277816772, - "learning_rate": 6.776470588235295e-06, - "loss": 0.5412, - "step": 144 - }, - { - "epoch": 0.013659593509337981, - "grad_norm": 1.4461039304733276, - "learning_rate": 6.8235294117647065e-06, - "loss": 0.5156, - "step": 145 - }, - { - "epoch": 0.013753797602505828, - "grad_norm": 1.3485381603240967, - "learning_rate": 6.8705882352941175e-06, - "loss": 0.6053, - "step": 146 - }, - { - "epoch": 0.013848001695673677, - "grad_norm": 1.4633151292800903, - "learning_rate": 6.91764705882353e-06, - "loss": 0.5936, - "step": 147 - }, - { - "epoch": 0.013942205788841526, - "grad_norm": 1.366125464439392, - "learning_rate": 6.964705882352941e-06, - "loss": 0.5454, - "step": 148 - }, - { - "epoch": 0.014036409882009373, - "grad_norm": 1.674149751663208, - "learning_rate": 7.011764705882353e-06, - "loss": 0.5619, - "step": 149 - }, - { - "epoch": 0.014130613975177222, - "grad_norm": 1.3823537826538086, - "learning_rate": 7.058823529411766e-06, - "loss": 0.5343, - "step": 150 - }, - { - "epoch": 0.01422481806834507, - "grad_norm": 1.4399135112762451, - "learning_rate": 7.105882352941177e-06, - "loss": 0.5469, - "step": 151 - }, - { - "epoch": 0.014319022161512918, - "grad_norm": 1.4885296821594238, - "learning_rate": 7.1529411764705895e-06, - "loss": 0.5506, - "step": 152 - }, - { - "epoch": 0.014413226254680766, - "grad_norm": 1.4054043292999268, - "learning_rate": 7.2000000000000005e-06, - "loss": 0.5174, - "step": 153 - }, - { - "epoch": 0.014507430347848614, - "grad_norm": 1.6000251770019531, - "learning_rate": 7.247058823529412e-06, - "loss": 0.6446, - "step": 154 - }, - { - "epoch": 0.014601634441016462, - "grad_norm": 1.3519340753555298, - "learning_rate": 7.294117647058823e-06, - "loss": 0.4738, - "step": 155 - }, - { - "epoch": 0.01469583853418431, - "grad_norm": 1.4328761100769043, - "learning_rate": 7.341176470588236e-06, - "loss": 0.5681, - "step": 156 - }, - { - "epoch": 0.014790042627352158, - "grad_norm": 1.3303059339523315, - "learning_rate": 7.388235294117647e-06, - "loss": 0.5603, - "step": 157 - }, - { - "epoch": 0.014884246720520007, - "grad_norm": 1.3826634883880615, - "learning_rate": 7.43529411764706e-06, - "loss": 0.5284, - "step": 158 - }, - { - "epoch": 0.014978450813687854, - "grad_norm": 1.2612242698669434, - "learning_rate": 7.482352941176472e-06, - "loss": 0.5077, - "step": 159 - }, - { - "epoch": 0.015072654906855703, - "grad_norm": 1.3535127639770508, - "learning_rate": 7.529411764705883e-06, - "loss": 0.5265, - "step": 160 - }, - { - "epoch": 0.015166859000023552, - "grad_norm": 1.3785712718963623, - "learning_rate": 7.576470588235295e-06, - "loss": 0.4815, - "step": 161 - }, - { - "epoch": 0.015261063093191399, - "grad_norm": 1.4843330383300781, - "learning_rate": 7.6235294117647064e-06, - "loss": 0.5591, - "step": 162 - }, - { - "epoch": 0.015355267186359248, - "grad_norm": 1.5826313495635986, - "learning_rate": 7.670588235294119e-06, - "loss": 0.5464, - "step": 163 - }, - { - "epoch": 0.015449471279527095, - "grad_norm": 1.3580889701843262, - "learning_rate": 7.71764705882353e-06, - "loss": 0.5522, - "step": 164 - }, - { - "epoch": 0.015543675372694944, - "grad_norm": 1.3601776361465454, - "learning_rate": 7.764705882352941e-06, - "loss": 0.5444, - "step": 165 - }, - { - "epoch": 0.015637879465862793, - "grad_norm": 1.6120527982711792, - "learning_rate": 7.811764705882354e-06, - "loss": 0.568, - "step": 166 - }, - { - "epoch": 0.01573208355903064, - "grad_norm": 1.3571330308914185, - "learning_rate": 7.858823529411765e-06, - "loss": 0.581, - "step": 167 - }, - { - "epoch": 0.015826287652198488, - "grad_norm": 1.3575953245162964, - "learning_rate": 7.905882352941176e-06, - "loss": 0.4842, - "step": 168 - }, - { - "epoch": 0.015920491745366337, - "grad_norm": 1.4324496984481812, - "learning_rate": 7.952941176470589e-06, - "loss": 0.5788, - "step": 169 - }, - { - "epoch": 0.016014695838534185, - "grad_norm": 1.3828068971633911, - "learning_rate": 8.000000000000001e-06, - "loss": 0.5675, - "step": 170 - }, - { - "epoch": 0.01610889993170203, - "grad_norm": 1.3581621646881104, - "learning_rate": 8.047058823529412e-06, - "loss": 0.5055, - "step": 171 - }, - { - "epoch": 0.01620310402486988, - "grad_norm": 1.3914538621902466, - "learning_rate": 8.094117647058825e-06, - "loss": 0.5261, - "step": 172 - }, - { - "epoch": 0.01629730811803773, - "grad_norm": 1.4400177001953125, - "learning_rate": 8.141176470588236e-06, - "loss": 0.5764, - "step": 173 - }, - { - "epoch": 0.016391512211205578, - "grad_norm": 1.4791513681411743, - "learning_rate": 8.188235294117649e-06, - "loss": 0.5258, - "step": 174 - }, - { - "epoch": 0.016485716304373427, - "grad_norm": 1.4356977939605713, - "learning_rate": 8.23529411764706e-06, - "loss": 0.584, - "step": 175 - }, - { - "epoch": 0.016579920397541272, - "grad_norm": 1.3307058811187744, - "learning_rate": 8.28235294117647e-06, - "loss": 0.5009, - "step": 176 - }, - { - "epoch": 0.01667412449070912, - "grad_norm": 1.3605982065200806, - "learning_rate": 8.329411764705882e-06, - "loss": 0.5552, - "step": 177 - }, - { - "epoch": 0.01676832858387697, - "grad_norm": 1.4693851470947266, - "learning_rate": 8.376470588235295e-06, - "loss": 0.5796, - "step": 178 - }, - { - "epoch": 0.01686253267704482, - "grad_norm": 1.4685381650924683, - "learning_rate": 8.423529411764707e-06, - "loss": 0.6125, - "step": 179 - }, - { - "epoch": 0.016956736770212665, - "grad_norm": 1.2329943180084229, - "learning_rate": 8.470588235294118e-06, - "loss": 0.4956, - "step": 180 - }, - { - "epoch": 0.017050940863380513, - "grad_norm": 1.5707588195800781, - "learning_rate": 8.517647058823531e-06, - "loss": 0.5738, - "step": 181 - }, - { - "epoch": 0.017145144956548362, - "grad_norm": 1.6525170803070068, - "learning_rate": 8.564705882352942e-06, - "loss": 0.5336, - "step": 182 - }, - { - "epoch": 0.01723934904971621, - "grad_norm": 1.3461308479309082, - "learning_rate": 8.611764705882355e-06, - "loss": 0.5926, - "step": 183 - }, - { - "epoch": 0.017333553142884057, - "grad_norm": 1.3954113721847534, - "learning_rate": 8.658823529411766e-06, - "loss": 0.4952, - "step": 184 - }, - { - "epoch": 0.017427757236051906, - "grad_norm": 1.3195507526397705, - "learning_rate": 8.705882352941177e-06, - "loss": 0.5116, - "step": 185 - }, - { - "epoch": 0.017521961329219755, - "grad_norm": 1.3457469940185547, - "learning_rate": 8.752941176470588e-06, - "loss": 0.5605, - "step": 186 - }, - { - "epoch": 0.017616165422387604, - "grad_norm": 1.3406563997268677, - "learning_rate": 8.8e-06, - "loss": 0.5213, - "step": 187 - }, - { - "epoch": 0.017710369515555453, - "grad_norm": 1.3711072206497192, - "learning_rate": 8.847058823529413e-06, - "loss": 0.5083, - "step": 188 - }, - { - "epoch": 0.017804573608723298, - "grad_norm": 1.4641438722610474, - "learning_rate": 8.894117647058824e-06, - "loss": 0.4863, - "step": 189 - }, - { - "epoch": 0.017898777701891147, - "grad_norm": 1.3969779014587402, - "learning_rate": 8.941176470588237e-06, - "loss": 0.5855, - "step": 190 - }, - { - "epoch": 0.017992981795058996, - "grad_norm": 1.3376325368881226, - "learning_rate": 8.988235294117648e-06, - "loss": 0.5644, - "step": 191 - }, - { - "epoch": 0.018087185888226845, - "grad_norm": 1.3779734373092651, - "learning_rate": 9.03529411764706e-06, - "loss": 0.5194, - "step": 192 - }, - { - "epoch": 0.01818138998139469, - "grad_norm": 1.4868650436401367, - "learning_rate": 9.082352941176472e-06, - "loss": 0.5184, - "step": 193 - }, - { - "epoch": 0.01827559407456254, - "grad_norm": 1.3568449020385742, - "learning_rate": 9.129411764705883e-06, - "loss": 0.553, - "step": 194 - }, - { - "epoch": 0.01836979816773039, - "grad_norm": 1.3707945346832275, - "learning_rate": 9.176470588235294e-06, - "loss": 0.5774, - "step": 195 - }, - { - "epoch": 0.018464002260898237, - "grad_norm": 1.3641828298568726, - "learning_rate": 9.223529411764706e-06, - "loss": 0.5567, - "step": 196 - }, - { - "epoch": 0.018558206354066083, - "grad_norm": 1.3307080268859863, - "learning_rate": 9.270588235294117e-06, - "loss": 0.4996, - "step": 197 - }, - { - "epoch": 0.01865241044723393, - "grad_norm": 1.4373600482940674, - "learning_rate": 9.31764705882353e-06, - "loss": 0.4957, - "step": 198 - }, - { - "epoch": 0.01874661454040178, - "grad_norm": 1.361508846282959, - "learning_rate": 9.364705882352943e-06, - "loss": 0.5227, - "step": 199 - }, - { - "epoch": 0.01884081863356963, - "grad_norm": 1.3544015884399414, - "learning_rate": 9.411764705882354e-06, - "loss": 0.5122, - "step": 200 - }, - { - "epoch": 0.018935022726737475, - "grad_norm": 1.2734888792037964, - "learning_rate": 9.458823529411767e-06, - "loss": 0.5555, - "step": 201 - }, - { - "epoch": 0.019029226819905324, - "grad_norm": 1.4170773029327393, - "learning_rate": 9.505882352941178e-06, - "loss": 0.5838, - "step": 202 - }, - { - "epoch": 0.019123430913073173, - "grad_norm": 1.3663331270217896, - "learning_rate": 9.552941176470589e-06, - "loss": 0.5348, - "step": 203 - }, - { - "epoch": 0.019217635006241022, - "grad_norm": 1.4586267471313477, - "learning_rate": 9.600000000000001e-06, - "loss": 0.5567, - "step": 204 - }, - { - "epoch": 0.01931183909940887, - "grad_norm": 1.3278580904006958, - "learning_rate": 9.647058823529412e-06, - "loss": 0.5193, - "step": 205 - }, - { - "epoch": 0.019406043192576716, - "grad_norm": 1.3905582427978516, - "learning_rate": 9.694117647058823e-06, - "loss": 0.5318, - "step": 206 - }, - { - "epoch": 0.019500247285744565, - "grad_norm": 1.427916407585144, - "learning_rate": 9.741176470588236e-06, - "loss": 0.4598, - "step": 207 - }, - { - "epoch": 0.019594451378912414, - "grad_norm": 1.3491191864013672, - "learning_rate": 9.788235294117649e-06, - "loss": 0.505, - "step": 208 - }, - { - "epoch": 0.019688655472080263, - "grad_norm": 1.3706151247024536, - "learning_rate": 9.83529411764706e-06, - "loss": 0.5623, - "step": 209 - }, - { - "epoch": 0.01978285956524811, - "grad_norm": 1.4672727584838867, - "learning_rate": 9.882352941176472e-06, - "loss": 0.556, - "step": 210 - }, - { - "epoch": 0.019877063658415958, - "grad_norm": 1.283095121383667, - "learning_rate": 9.929411764705883e-06, - "loss": 0.5038, - "step": 211 - }, - { - "epoch": 0.019971267751583806, - "grad_norm": 1.3932231664657593, - "learning_rate": 9.976470588235294e-06, - "loss": 0.5366, - "step": 212 - }, - { - "epoch": 0.020065471844751655, - "grad_norm": 1.2726454734802246, - "learning_rate": 1.0023529411764707e-05, - "loss": 0.523, - "step": 213 - }, - { - "epoch": 0.0201596759379195, - "grad_norm": 1.360388994216919, - "learning_rate": 1.0070588235294118e-05, - "loss": 0.5856, - "step": 214 - }, - { - "epoch": 0.02025388003108735, - "grad_norm": 1.2967466115951538, - "learning_rate": 1.011764705882353e-05, - "loss": 0.499, - "step": 215 - }, - { - "epoch": 0.0203480841242552, - "grad_norm": 1.2784472703933716, - "learning_rate": 1.016470588235294e-05, - "loss": 0.5509, - "step": 216 - }, - { - "epoch": 0.020442288217423048, - "grad_norm": 1.389125108718872, - "learning_rate": 1.0211764705882355e-05, - "loss": 0.498, - "step": 217 - }, - { - "epoch": 0.020536492310590897, - "grad_norm": 1.286584734916687, - "learning_rate": 1.0258823529411766e-05, - "loss": 0.4908, - "step": 218 - }, - { - "epoch": 0.020630696403758742, - "grad_norm": 1.4463655948638916, - "learning_rate": 1.0305882352941177e-05, - "loss": 0.5232, - "step": 219 - }, - { - "epoch": 0.02072490049692659, - "grad_norm": 1.384580373764038, - "learning_rate": 1.035294117647059e-05, - "loss": 0.5515, - "step": 220 - }, - { - "epoch": 0.02081910459009444, - "grad_norm": 1.3646312952041626, - "learning_rate": 1.04e-05, - "loss": 0.5048, - "step": 221 - }, - { - "epoch": 0.02091330868326229, - "grad_norm": 1.398513674736023, - "learning_rate": 1.0447058823529413e-05, - "loss": 0.5485, - "step": 222 - }, - { - "epoch": 0.021007512776430134, - "grad_norm": 1.4606225490570068, - "learning_rate": 1.0494117647058824e-05, - "loss": 0.5682, - "step": 223 - }, - { - "epoch": 0.021101716869597983, - "grad_norm": 1.404390811920166, - "learning_rate": 1.0541176470588237e-05, - "loss": 0.5791, - "step": 224 - }, - { - "epoch": 0.021195920962765832, - "grad_norm": 1.3128252029418945, - "learning_rate": 1.0588235294117648e-05, - "loss": 0.5609, - "step": 225 - }, - { - "epoch": 0.02129012505593368, - "grad_norm": 1.289519190788269, - "learning_rate": 1.0635294117647059e-05, - "loss": 0.5223, - "step": 226 - }, - { - "epoch": 0.021384329149101527, - "grad_norm": 1.1863501071929932, - "learning_rate": 1.068235294117647e-05, - "loss": 0.4881, - "step": 227 - }, - { - "epoch": 0.021478533242269376, - "grad_norm": 1.232800006866455, - "learning_rate": 1.0729411764705884e-05, - "loss": 0.4874, - "step": 228 - }, - { - "epoch": 0.021572737335437225, - "grad_norm": 1.4398059844970703, - "learning_rate": 1.0776470588235295e-05, - "loss": 0.5385, - "step": 229 - }, - { - "epoch": 0.021666941428605074, - "grad_norm": 1.2320069074630737, - "learning_rate": 1.0823529411764706e-05, - "loss": 0.4659, - "step": 230 - }, - { - "epoch": 0.021761145521772923, - "grad_norm": 1.3715555667877197, - "learning_rate": 1.0870588235294119e-05, - "loss": 0.51, - "step": 231 - }, - { - "epoch": 0.021855349614940768, - "grad_norm": 1.4365489482879639, - "learning_rate": 1.091764705882353e-05, - "loss": 0.5078, - "step": 232 - }, - { - "epoch": 0.021949553708108617, - "grad_norm": 1.2546861171722412, - "learning_rate": 1.0964705882352941e-05, - "loss": 0.5661, - "step": 233 - }, - { - "epoch": 0.022043757801276466, - "grad_norm": 1.481594443321228, - "learning_rate": 1.1011764705882354e-05, - "loss": 0.6318, - "step": 234 - }, - { - "epoch": 0.022137961894444315, - "grad_norm": 1.4932091236114502, - "learning_rate": 1.1058823529411766e-05, - "loss": 0.5458, - "step": 235 - }, - { - "epoch": 0.02223216598761216, - "grad_norm": 1.3674851655960083, - "learning_rate": 1.1105882352941177e-05, - "loss": 0.4709, - "step": 236 - }, - { - "epoch": 0.02232637008078001, - "grad_norm": 1.3787115812301636, - "learning_rate": 1.1152941176470588e-05, - "loss": 0.5804, - "step": 237 - }, - { - "epoch": 0.022420574173947858, - "grad_norm": 1.3375911712646484, - "learning_rate": 1.1200000000000001e-05, - "loss": 0.4798, - "step": 238 - }, - { - "epoch": 0.022514778267115707, - "grad_norm": 1.4716728925704956, - "learning_rate": 1.1247058823529414e-05, - "loss": 0.5825, - "step": 239 - }, - { - "epoch": 0.022608982360283553, - "grad_norm": 1.3478437662124634, - "learning_rate": 1.1294117647058825e-05, - "loss": 0.5459, - "step": 240 - }, - { - "epoch": 0.0227031864534514, - "grad_norm": 1.2855398654937744, - "learning_rate": 1.1341176470588236e-05, - "loss": 0.5437, - "step": 241 - }, - { - "epoch": 0.02279739054661925, - "grad_norm": 1.4099336862564087, - "learning_rate": 1.1388235294117649e-05, - "loss": 0.5369, - "step": 242 - }, - { - "epoch": 0.0228915946397871, - "grad_norm": 1.4721664190292358, - "learning_rate": 1.143529411764706e-05, - "loss": 0.5232, - "step": 243 - }, - { - "epoch": 0.02298579873295495, - "grad_norm": 1.4028209447860718, - "learning_rate": 1.148235294117647e-05, - "loss": 0.5668, - "step": 244 - }, - { - "epoch": 0.023080002826122794, - "grad_norm": 1.2915841341018677, - "learning_rate": 1.1529411764705882e-05, - "loss": 0.5403, - "step": 245 - }, - { - "epoch": 0.023174206919290643, - "grad_norm": 1.385123372077942, - "learning_rate": 1.1576470588235296e-05, - "loss": 0.4851, - "step": 246 - }, - { - "epoch": 0.023268411012458492, - "grad_norm": 1.4114811420440674, - "learning_rate": 1.1623529411764707e-05, - "loss": 0.638, - "step": 247 - }, - { - "epoch": 0.02336261510562634, - "grad_norm": 1.2462477684020996, - "learning_rate": 1.1670588235294118e-05, - "loss": 0.4535, - "step": 248 - }, - { - "epoch": 0.023456819198794186, - "grad_norm": 1.4437800645828247, - "learning_rate": 1.171764705882353e-05, - "loss": 0.5457, - "step": 249 - }, - { - "epoch": 0.023551023291962035, - "grad_norm": 1.3583869934082031, - "learning_rate": 1.1764705882352942e-05, - "loss": 0.5556, - "step": 250 - }, - { - "epoch": 0.023645227385129884, - "grad_norm": 1.3165775537490845, - "learning_rate": 1.1811764705882353e-05, - "loss": 0.4812, - "step": 251 - }, - { - "epoch": 0.023739431478297733, - "grad_norm": 1.3515589237213135, - "learning_rate": 1.1858823529411766e-05, - "loss": 0.5272, - "step": 252 - }, - { - "epoch": 0.02383363557146558, - "grad_norm": 1.3042768239974976, - "learning_rate": 1.1905882352941178e-05, - "loss": 0.5391, - "step": 253 - }, - { - "epoch": 0.023927839664633428, - "grad_norm": 1.318557620048523, - "learning_rate": 1.195294117647059e-05, - "loss": 0.5077, - "step": 254 - }, - { - "epoch": 0.024022043757801276, - "grad_norm": 1.2530382871627808, - "learning_rate": 1.2e-05, - "loss": 0.5204, - "step": 255 - }, - { - "epoch": 0.024116247850969125, - "grad_norm": 1.3513764142990112, - "learning_rate": 1.2047058823529411e-05, - "loss": 0.5431, - "step": 256 - }, - { - "epoch": 0.024210451944136974, - "grad_norm": 1.1965149641036987, - "learning_rate": 1.2094117647058826e-05, - "loss": 0.4621, - "step": 257 - }, - { - "epoch": 0.02430465603730482, - "grad_norm": 1.5096133947372437, - "learning_rate": 1.2141176470588237e-05, - "loss": 0.5475, - "step": 258 - }, - { - "epoch": 0.02439886013047267, - "grad_norm": 1.3592801094055176, - "learning_rate": 1.2188235294117648e-05, - "loss": 0.4901, - "step": 259 - }, - { - "epoch": 0.024493064223640518, - "grad_norm": 1.318343162536621, - "learning_rate": 1.223529411764706e-05, - "loss": 0.5354, - "step": 260 - }, - { - "epoch": 0.024587268316808367, - "grad_norm": 1.2713096141815186, - "learning_rate": 1.2282352941176471e-05, - "loss": 0.5345, - "step": 261 - }, - { - "epoch": 0.024681472409976212, - "grad_norm": 1.2716429233551025, - "learning_rate": 1.2329411764705882e-05, - "loss": 0.4736, - "step": 262 - }, - { - "epoch": 0.02477567650314406, - "grad_norm": 1.2657898664474487, - "learning_rate": 1.2376470588235294e-05, - "loss": 0.522, - "step": 263 - }, - { - "epoch": 0.02486988059631191, - "grad_norm": 1.1999789476394653, - "learning_rate": 1.2423529411764708e-05, - "loss": 0.503, - "step": 264 - }, - { - "epoch": 0.02496408468947976, - "grad_norm": 1.2083820104599, - "learning_rate": 1.2470588235294119e-05, - "loss": 0.5141, - "step": 265 - }, - { - "epoch": 0.025058288782647604, - "grad_norm": 1.289919376373291, - "learning_rate": 1.251764705882353e-05, - "loss": 0.4781, - "step": 266 - }, - { - "epoch": 0.025152492875815453, - "grad_norm": 1.260485053062439, - "learning_rate": 1.2564705882352943e-05, - "loss": 0.5109, - "step": 267 - }, - { - "epoch": 0.025246696968983302, - "grad_norm": 1.3441667556762695, - "learning_rate": 1.2611764705882354e-05, - "loss": 0.523, - "step": 268 - }, - { - "epoch": 0.02534090106215115, - "grad_norm": 1.3340215682983398, - "learning_rate": 1.2658823529411766e-05, - "loss": 0.5996, - "step": 269 - }, - { - "epoch": 0.025435105155319, - "grad_norm": 1.1880944967269897, - "learning_rate": 1.2705882352941177e-05, - "loss": 0.5058, - "step": 270 - }, - { - "epoch": 0.025529309248486846, - "grad_norm": 1.326841950416565, - "learning_rate": 1.275294117647059e-05, - "loss": 0.5749, - "step": 271 - }, - { - "epoch": 0.025623513341654695, - "grad_norm": 1.1489343643188477, - "learning_rate": 1.2800000000000001e-05, - "loss": 0.4905, - "step": 272 - }, - { - "epoch": 0.025717717434822544, - "grad_norm": 1.2286577224731445, - "learning_rate": 1.2847058823529412e-05, - "loss": 0.4753, - "step": 273 - }, - { - "epoch": 0.025811921527990393, - "grad_norm": 1.2917677164077759, - "learning_rate": 1.2894117647058823e-05, - "loss": 0.4893, - "step": 274 - }, - { - "epoch": 0.025906125621158238, - "grad_norm": 1.211621642112732, - "learning_rate": 1.2941176470588238e-05, - "loss": 0.4962, - "step": 275 - }, - { - "epoch": 0.026000329714326087, - "grad_norm": 1.2378382682800293, - "learning_rate": 1.2988235294117649e-05, - "loss": 0.5133, - "step": 276 - }, - { - "epoch": 0.026094533807493936, - "grad_norm": 1.3860008716583252, - "learning_rate": 1.303529411764706e-05, - "loss": 0.5281, - "step": 277 - }, - { - "epoch": 0.026188737900661785, - "grad_norm": 1.2689138650894165, - "learning_rate": 1.3082352941176472e-05, - "loss": 0.4678, - "step": 278 - }, - { - "epoch": 0.02628294199382963, - "grad_norm": 1.2363132238388062, - "learning_rate": 1.3129411764705883e-05, - "loss": 0.4456, - "step": 279 - }, - { - "epoch": 0.02637714608699748, - "grad_norm": 1.2797064781188965, - "learning_rate": 1.3176470588235294e-05, - "loss": 0.4297, - "step": 280 - }, - { - "epoch": 0.026471350180165328, - "grad_norm": 1.382623314857483, - "learning_rate": 1.3223529411764705e-05, - "loss": 0.5486, - "step": 281 - }, - { - "epoch": 0.026565554273333177, - "grad_norm": 1.1852174997329712, - "learning_rate": 1.327058823529412e-05, - "loss": 0.4982, - "step": 282 - }, - { - "epoch": 0.026659758366501026, - "grad_norm": 1.3323570489883423, - "learning_rate": 1.331764705882353e-05, - "loss": 0.5272, - "step": 283 - }, - { - "epoch": 0.02675396245966887, - "grad_norm": 1.3760366439819336, - "learning_rate": 1.3364705882352942e-05, - "loss": 0.5075, - "step": 284 - }, - { - "epoch": 0.02684816655283672, - "grad_norm": 1.3552740812301636, - "learning_rate": 1.3411764705882353e-05, - "loss": 0.5689, - "step": 285 - }, - { - "epoch": 0.02694237064600457, - "grad_norm": 1.250741720199585, - "learning_rate": 1.3458823529411767e-05, - "loss": 0.4848, - "step": 286 - }, - { - "epoch": 0.02703657473917242, - "grad_norm": 1.3446913957595825, - "learning_rate": 1.3505882352941178e-05, - "loss": 0.4934, - "step": 287 - }, - { - "epoch": 0.027130778832340264, - "grad_norm": 1.26736319065094, - "learning_rate": 1.355294117647059e-05, - "loss": 0.4598, - "step": 288 - }, - { - "epoch": 0.027224982925508113, - "grad_norm": 1.2434927225112915, - "learning_rate": 1.3600000000000002e-05, - "loss": 0.5369, - "step": 289 - }, - { - "epoch": 0.027319187018675962, - "grad_norm": 1.2333719730377197, - "learning_rate": 1.3647058823529413e-05, - "loss": 0.5317, - "step": 290 - }, - { - "epoch": 0.02741339111184381, - "grad_norm": 1.21355140209198, - "learning_rate": 1.3694117647058824e-05, - "loss": 0.4773, - "step": 291 - }, - { - "epoch": 0.027507595205011656, - "grad_norm": 1.2929593324661255, - "learning_rate": 1.3741176470588235e-05, - "loss": 0.4926, - "step": 292 - }, - { - "epoch": 0.027601799298179505, - "grad_norm": 1.2802207469940186, - "learning_rate": 1.378823529411765e-05, - "loss": 0.4911, - "step": 293 - }, - { - "epoch": 0.027696003391347354, - "grad_norm": 1.466518759727478, - "learning_rate": 1.383529411764706e-05, - "loss": 0.5255, - "step": 294 - }, - { - "epoch": 0.027790207484515203, - "grad_norm": 1.3376758098602295, - "learning_rate": 1.3882352941176471e-05, - "loss": 0.5025, - "step": 295 - }, - { - "epoch": 0.027884411577683052, - "grad_norm": 1.6920759677886963, - "learning_rate": 1.3929411764705882e-05, - "loss": 0.5366, - "step": 296 - }, - { - "epoch": 0.027978615670850897, - "grad_norm": 1.1678169965744019, - "learning_rate": 1.3976470588235295e-05, - "loss": 0.4639, - "step": 297 - }, - { - "epoch": 0.028072819764018746, - "grad_norm": 1.1555671691894531, - "learning_rate": 1.4023529411764706e-05, - "loss": 0.4613, - "step": 298 - }, - { - "epoch": 0.028167023857186595, - "grad_norm": 1.3847774267196655, - "learning_rate": 1.4070588235294119e-05, - "loss": 0.5177, - "step": 299 - }, - { - "epoch": 0.028261227950354444, - "grad_norm": 1.3086796998977661, - "learning_rate": 1.4117647058823532e-05, - "loss": 0.5475, - "step": 300 - }, - { - "epoch": 0.02835543204352229, - "grad_norm": 1.4727612733840942, - "learning_rate": 1.4164705882352943e-05, - "loss": 0.5696, - "step": 301 - }, - { - "epoch": 0.02844963613669014, - "grad_norm": 1.166216492652893, - "learning_rate": 1.4211764705882354e-05, - "loss": 0.4991, - "step": 302 - }, - { - "epoch": 0.028543840229857988, - "grad_norm": 1.2508559226989746, - "learning_rate": 1.4258823529411765e-05, - "loss": 0.472, - "step": 303 - }, - { - "epoch": 0.028638044323025837, - "grad_norm": 1.341174602508545, - "learning_rate": 1.4305882352941179e-05, - "loss": 0.5393, - "step": 304 - }, - { - "epoch": 0.028732248416193682, - "grad_norm": 1.3411362171173096, - "learning_rate": 1.435294117647059e-05, - "loss": 0.49, - "step": 305 - }, - { - "epoch": 0.02882645250936153, - "grad_norm": 1.2923569679260254, - "learning_rate": 1.4400000000000001e-05, - "loss": 0.529, - "step": 306 - }, - { - "epoch": 0.02892065660252938, - "grad_norm": 1.364295482635498, - "learning_rate": 1.4447058823529414e-05, - "loss": 0.527, - "step": 307 - }, - { - "epoch": 0.02901486069569723, - "grad_norm": 1.2051934003829956, - "learning_rate": 1.4494117647058825e-05, - "loss": 0.4571, - "step": 308 - }, - { - "epoch": 0.029109064788865078, - "grad_norm": 1.301098346710205, - "learning_rate": 1.4541176470588236e-05, - "loss": 0.5431, - "step": 309 - }, - { - "epoch": 0.029203268882032923, - "grad_norm": 1.2850219011306763, - "learning_rate": 1.4588235294117647e-05, - "loss": 0.4506, - "step": 310 - }, - { - "epoch": 0.029297472975200772, - "grad_norm": 1.2272703647613525, - "learning_rate": 1.4635294117647061e-05, - "loss": 0.4411, - "step": 311 - }, - { - "epoch": 0.02939167706836862, - "grad_norm": 1.2605528831481934, - "learning_rate": 1.4682352941176472e-05, - "loss": 0.4766, - "step": 312 - }, - { - "epoch": 0.02948588116153647, - "grad_norm": 1.2444404363632202, - "learning_rate": 1.4729411764705883e-05, - "loss": 0.4705, - "step": 313 - }, - { - "epoch": 0.029580085254704316, - "grad_norm": 1.1735955476760864, - "learning_rate": 1.4776470588235294e-05, - "loss": 0.4668, - "step": 314 - }, - { - "epoch": 0.029674289347872165, - "grad_norm": 1.2663644552230835, - "learning_rate": 1.4823529411764707e-05, - "loss": 0.5078, - "step": 315 - }, - { - "epoch": 0.029768493441040014, - "grad_norm": 1.2020328044891357, - "learning_rate": 1.487058823529412e-05, - "loss": 0.5079, - "step": 316 - }, - { - "epoch": 0.029862697534207863, - "grad_norm": 1.1874061822891235, - "learning_rate": 1.491764705882353e-05, - "loss": 0.4708, - "step": 317 - }, - { - "epoch": 0.029956901627375708, - "grad_norm": 1.1729247570037842, - "learning_rate": 1.4964705882352943e-05, - "loss": 0.4968, - "step": 318 - }, - { - "epoch": 0.030051105720543557, - "grad_norm": 1.2035945653915405, - "learning_rate": 1.5011764705882354e-05, - "loss": 0.4761, - "step": 319 - }, - { - "epoch": 0.030145309813711406, - "grad_norm": 1.1717956066131592, - "learning_rate": 1.5058823529411765e-05, - "loss": 0.504, - "step": 320 - }, - { - "epoch": 0.030239513906879255, - "grad_norm": 1.1463146209716797, - "learning_rate": 1.5105882352941176e-05, - "loss": 0.4883, - "step": 321 - }, - { - "epoch": 0.030333718000047104, - "grad_norm": 1.2568846940994263, - "learning_rate": 1.515294117647059e-05, - "loss": 0.4818, - "step": 322 - }, - { - "epoch": 0.03042792209321495, - "grad_norm": 1.1344101428985596, - "learning_rate": 1.5200000000000002e-05, - "loss": 0.4452, - "step": 323 - }, - { - "epoch": 0.030522126186382798, - "grad_norm": 1.2352666854858398, - "learning_rate": 1.5247058823529413e-05, - "loss": 0.4619, - "step": 324 - }, - { - "epoch": 0.030616330279550647, - "grad_norm": 1.1882596015930176, - "learning_rate": 1.5294117647058822e-05, - "loss": 0.508, - "step": 325 - }, - { - "epoch": 0.030710534372718496, - "grad_norm": 1.341575264930725, - "learning_rate": 1.5341176470588238e-05, - "loss": 0.5217, - "step": 326 - }, - { - "epoch": 0.03080473846588634, - "grad_norm": 1.3158843517303467, - "learning_rate": 1.5388235294117648e-05, - "loss": 0.5428, - "step": 327 - }, - { - "epoch": 0.03089894255905419, - "grad_norm": 1.2857859134674072, - "learning_rate": 1.543529411764706e-05, - "loss": 0.5611, - "step": 328 - }, - { - "epoch": 0.03099314665222204, - "grad_norm": 1.1938344240188599, - "learning_rate": 1.5482352941176473e-05, - "loss": 0.4873, - "step": 329 - }, - { - "epoch": 0.03108735074538989, - "grad_norm": 1.1202720403671265, - "learning_rate": 1.5529411764705882e-05, - "loss": 0.4402, - "step": 330 - }, - { - "epoch": 0.031181554838557734, - "grad_norm": 1.1185312271118164, - "learning_rate": 1.5576470588235295e-05, - "loss": 0.5004, - "step": 331 - }, - { - "epoch": 0.031275758931725586, - "grad_norm": 1.2877079248428345, - "learning_rate": 1.5623529411764708e-05, - "loss": 0.4842, - "step": 332 - }, - { - "epoch": 0.03136996302489343, - "grad_norm": 1.2797932624816895, - "learning_rate": 1.567058823529412e-05, - "loss": 0.4949, - "step": 333 - }, - { - "epoch": 0.03146416711806128, - "grad_norm": 1.0597726106643677, - "learning_rate": 1.571764705882353e-05, - "loss": 0.4292, - "step": 334 - }, - { - "epoch": 0.031558371211229126, - "grad_norm": 1.1882489919662476, - "learning_rate": 1.5764705882352943e-05, - "loss": 0.4819, - "step": 335 - }, - { - "epoch": 0.031652575304396975, - "grad_norm": 1.305355429649353, - "learning_rate": 1.5811764705882352e-05, - "loss": 0.4368, - "step": 336 - }, - { - "epoch": 0.031746779397564824, - "grad_norm": 1.1395972967147827, - "learning_rate": 1.5858823529411768e-05, - "loss": 0.4541, - "step": 337 - }, - { - "epoch": 0.03184098349073267, - "grad_norm": 1.4120413064956665, - "learning_rate": 1.5905882352941177e-05, - "loss": 0.4797, - "step": 338 - }, - { - "epoch": 0.03193518758390052, - "grad_norm": 1.2574928998947144, - "learning_rate": 1.595294117647059e-05, - "loss": 0.4914, - "step": 339 - }, - { - "epoch": 0.03202939167706837, - "grad_norm": 1.1765894889831543, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.4123, - "step": 340 - }, - { - "epoch": 0.03212359577023622, - "grad_norm": 1.1035923957824707, - "learning_rate": 1.6047058823529412e-05, - "loss": 0.4594, - "step": 341 - }, - { - "epoch": 0.03221779986340406, - "grad_norm": 1.3358131647109985, - "learning_rate": 1.6094117647058825e-05, - "loss": 0.5147, - "step": 342 - }, - { - "epoch": 0.03231200395657191, - "grad_norm": 1.2328746318817139, - "learning_rate": 1.6141176470588234e-05, - "loss": 0.4826, - "step": 343 - }, - { - "epoch": 0.03240620804973976, - "grad_norm": 1.3128273487091064, - "learning_rate": 1.618823529411765e-05, - "loss": 0.4934, - "step": 344 - }, - { - "epoch": 0.03250041214290761, - "grad_norm": 1.1448085308074951, - "learning_rate": 1.623529411764706e-05, - "loss": 0.4152, - "step": 345 - }, - { - "epoch": 0.03259461623607546, - "grad_norm": 1.2045378684997559, - "learning_rate": 1.6282352941176472e-05, - "loss": 0.4433, - "step": 346 - }, - { - "epoch": 0.03268882032924331, - "grad_norm": 1.3228363990783691, - "learning_rate": 1.6329411764705885e-05, - "loss": 0.5286, - "step": 347 - }, - { - "epoch": 0.032783024422411156, - "grad_norm": 1.3525841236114502, - "learning_rate": 1.6376470588235298e-05, - "loss": 0.5279, - "step": 348 - }, - { - "epoch": 0.032877228515579004, - "grad_norm": 1.5093525648117065, - "learning_rate": 1.6423529411764707e-05, - "loss": 0.5161, - "step": 349 - }, - { - "epoch": 0.03297143260874685, - "grad_norm": 1.2169058322906494, - "learning_rate": 1.647058823529412e-05, - "loss": 0.4849, - "step": 350 - }, - { - "epoch": 0.033065636701914695, - "grad_norm": 1.2074980735778809, - "learning_rate": 1.6517647058823532e-05, - "loss": 0.4961, - "step": 351 - }, - { - "epoch": 0.033159840795082544, - "grad_norm": 1.128026008605957, - "learning_rate": 1.656470588235294e-05, - "loss": 0.4738, - "step": 352 - }, - { - "epoch": 0.03325404488825039, - "grad_norm": 1.2816585302352905, - "learning_rate": 1.6611764705882354e-05, - "loss": 0.4957, - "step": 353 - }, - { - "epoch": 0.03334824898141824, - "grad_norm": 1.2620776891708374, - "learning_rate": 1.6658823529411764e-05, - "loss": 0.549, - "step": 354 - }, - { - "epoch": 0.03344245307458609, - "grad_norm": 1.2972233295440674, - "learning_rate": 1.670588235294118e-05, - "loss": 0.5037, - "step": 355 - }, - { - "epoch": 0.03353665716775394, - "grad_norm": 1.1903115510940552, - "learning_rate": 1.675294117647059e-05, - "loss": 0.5025, - "step": 356 - }, - { - "epoch": 0.03363086126092179, - "grad_norm": 1.0767686367034912, - "learning_rate": 1.6800000000000002e-05, - "loss": 0.4487, - "step": 357 - }, - { - "epoch": 0.03372506535408964, - "grad_norm": 1.1257960796356201, - "learning_rate": 1.6847058823529414e-05, - "loss": 0.4863, - "step": 358 - }, - { - "epoch": 0.03381926944725748, - "grad_norm": 1.2453886270523071, - "learning_rate": 1.6894117647058824e-05, - "loss": 0.4671, - "step": 359 - }, - { - "epoch": 0.03391347354042533, - "grad_norm": 1.2498379945755005, - "learning_rate": 1.6941176470588237e-05, - "loss": 0.5045, - "step": 360 - }, - { - "epoch": 0.03400767763359318, - "grad_norm": 1.1781715154647827, - "learning_rate": 1.698823529411765e-05, - "loss": 0.4638, - "step": 361 - }, - { - "epoch": 0.03410188172676103, - "grad_norm": 1.3448647260665894, - "learning_rate": 1.7035294117647062e-05, - "loss": 0.5319, - "step": 362 - }, - { - "epoch": 0.034196085819928876, - "grad_norm": 1.0600744485855103, - "learning_rate": 1.708235294117647e-05, - "loss": 0.4685, - "step": 363 - }, - { - "epoch": 0.034290289913096725, - "grad_norm": 1.1628053188323975, - "learning_rate": 1.7129411764705884e-05, - "loss": 0.466, - "step": 364 - }, - { - "epoch": 0.034384494006264574, - "grad_norm": 1.249374270439148, - "learning_rate": 1.7176470588235293e-05, - "loss": 0.4751, - "step": 365 - }, - { - "epoch": 0.03447869809943242, - "grad_norm": 1.2262009382247925, - "learning_rate": 1.722352941176471e-05, - "loss": 0.5231, - "step": 366 - }, - { - "epoch": 0.03457290219260027, - "grad_norm": 1.08585786819458, - "learning_rate": 1.727058823529412e-05, - "loss": 0.4549, - "step": 367 - }, - { - "epoch": 0.034667106285768114, - "grad_norm": 1.250612497329712, - "learning_rate": 1.731764705882353e-05, - "loss": 0.5023, - "step": 368 - }, - { - "epoch": 0.03476131037893596, - "grad_norm": 1.2346527576446533, - "learning_rate": 1.7364705882352944e-05, - "loss": 0.4137, - "step": 369 - }, - { - "epoch": 0.03485551447210381, - "grad_norm": 1.1174893379211426, - "learning_rate": 1.7411764705882353e-05, - "loss": 0.4631, - "step": 370 - }, - { - "epoch": 0.03494971856527166, - "grad_norm": 1.193302869796753, - "learning_rate": 1.7458823529411766e-05, - "loss": 0.4284, - "step": 371 - }, - { - "epoch": 0.03504392265843951, - "grad_norm": 1.3517142534255981, - "learning_rate": 1.7505882352941175e-05, - "loss": 0.4327, - "step": 372 - }, - { - "epoch": 0.03513812675160736, - "grad_norm": 1.2797682285308838, - "learning_rate": 1.755294117647059e-05, - "loss": 0.5303, - "step": 373 - }, - { - "epoch": 0.03523233084477521, - "grad_norm": 1.0743054151535034, - "learning_rate": 1.76e-05, - "loss": 0.385, - "step": 374 - }, - { - "epoch": 0.035326534937943056, - "grad_norm": 1.3105340003967285, - "learning_rate": 1.7647058823529414e-05, - "loss": 0.4938, - "step": 375 - }, - { - "epoch": 0.035420739031110905, - "grad_norm": 1.1594735383987427, - "learning_rate": 1.7694117647058826e-05, - "loss": 0.4668, - "step": 376 - }, - { - "epoch": 0.03551494312427875, - "grad_norm": 1.1395872831344604, - "learning_rate": 1.7741176470588236e-05, - "loss": 0.4816, - "step": 377 - }, - { - "epoch": 0.035609147217446596, - "grad_norm": 1.2569265365600586, - "learning_rate": 1.778823529411765e-05, - "loss": 0.5287, - "step": 378 - }, - { - "epoch": 0.035703351310614445, - "grad_norm": 1.1969325542449951, - "learning_rate": 1.783529411764706e-05, - "loss": 0.4791, - "step": 379 - }, - { - "epoch": 0.035797555403782294, - "grad_norm": 1.286061406135559, - "learning_rate": 1.7882352941176474e-05, - "loss": 0.4755, - "step": 380 - }, - { - "epoch": 0.03589175949695014, - "grad_norm": 1.2607399225234985, - "learning_rate": 1.7929411764705883e-05, - "loss": 0.513, - "step": 381 - }, - { - "epoch": 0.03598596359011799, - "grad_norm": 1.1393650770187378, - "learning_rate": 1.7976470588235296e-05, - "loss": 0.5092, - "step": 382 - }, - { - "epoch": 0.03608016768328584, - "grad_norm": 1.2013413906097412, - "learning_rate": 1.8023529411764705e-05, - "loss": 0.4752, - "step": 383 - }, - { - "epoch": 0.03617437177645369, - "grad_norm": 1.0431921482086182, - "learning_rate": 1.807058823529412e-05, - "loss": 0.4307, - "step": 384 - }, - { - "epoch": 0.03626857586962153, - "grad_norm": 1.1883023977279663, - "learning_rate": 1.811764705882353e-05, - "loss": 0.4929, - "step": 385 - }, - { - "epoch": 0.03636277996278938, - "grad_norm": 1.215178370475769, - "learning_rate": 1.8164705882352943e-05, - "loss": 0.4872, - "step": 386 - }, - { - "epoch": 0.03645698405595723, - "grad_norm": 1.2737501859664917, - "learning_rate": 1.8211764705882356e-05, - "loss": 0.4534, - "step": 387 - }, - { - "epoch": 0.03655118814912508, - "grad_norm": 1.1616137027740479, - "learning_rate": 1.8258823529411765e-05, - "loss": 0.4926, - "step": 388 - }, - { - "epoch": 0.03664539224229293, - "grad_norm": 1.0881837606430054, - "learning_rate": 1.8305882352941178e-05, - "loss": 0.432, - "step": 389 - }, - { - "epoch": 0.03673959633546078, - "grad_norm": 1.3107651472091675, - "learning_rate": 1.8352941176470587e-05, - "loss": 0.5049, - "step": 390 - }, - { - "epoch": 0.036833800428628626, - "grad_norm": 1.1299723386764526, - "learning_rate": 1.8400000000000003e-05, - "loss": 0.4903, - "step": 391 - }, - { - "epoch": 0.036928004521796474, - "grad_norm": 1.2426947355270386, - "learning_rate": 1.8447058823529413e-05, - "loss": 0.5486, - "step": 392 - }, - { - "epoch": 0.03702220861496432, - "grad_norm": 1.1760404109954834, - "learning_rate": 1.8494117647058825e-05, - "loss": 0.558, - "step": 393 - }, - { - "epoch": 0.037116412708132165, - "grad_norm": 1.048372745513916, - "learning_rate": 1.8541176470588235e-05, - "loss": 0.4148, - "step": 394 - }, - { - "epoch": 0.037210616801300014, - "grad_norm": 1.1490461826324463, - "learning_rate": 1.8588235294117647e-05, - "loss": 0.4577, - "step": 395 - }, - { - "epoch": 0.03730482089446786, - "grad_norm": 1.167213797569275, - "learning_rate": 1.863529411764706e-05, - "loss": 0.5022, - "step": 396 - }, - { - "epoch": 0.03739902498763571, - "grad_norm": 1.1789714097976685, - "learning_rate": 1.8682352941176473e-05, - "loss": 0.4654, - "step": 397 - }, - { - "epoch": 0.03749322908080356, - "grad_norm": 1.1918264627456665, - "learning_rate": 1.8729411764705886e-05, - "loss": 0.5157, - "step": 398 - }, - { - "epoch": 0.03758743317397141, - "grad_norm": 1.4549074172973633, - "learning_rate": 1.8776470588235295e-05, - "loss": 0.5104, - "step": 399 - }, - { - "epoch": 0.03768163726713926, - "grad_norm": 1.2526028156280518, - "learning_rate": 1.8823529411764708e-05, - "loss": 0.5051, - "step": 400 - }, - { - "epoch": 0.03777584136030711, - "grad_norm": 1.1502976417541504, - "learning_rate": 1.8870588235294117e-05, - "loss": 0.5033, - "step": 401 - }, - { - "epoch": 0.03787004545347495, - "grad_norm": 1.1888201236724854, - "learning_rate": 1.8917647058823533e-05, - "loss": 0.4586, - "step": 402 - }, - { - "epoch": 0.0379642495466428, - "grad_norm": 1.2222927808761597, - "learning_rate": 1.8964705882352942e-05, - "loss": 0.5019, - "step": 403 - }, - { - "epoch": 0.03805845363981065, - "grad_norm": 1.211268663406372, - "learning_rate": 1.9011764705882355e-05, - "loss": 0.4384, - "step": 404 - }, - { - "epoch": 0.0381526577329785, - "grad_norm": 1.079023838043213, - "learning_rate": 1.9058823529411764e-05, - "loss": 0.4863, - "step": 405 - }, - { - "epoch": 0.038246861826146346, - "grad_norm": 1.2167657613754272, - "learning_rate": 1.9105882352941177e-05, - "loss": 0.4722, - "step": 406 - }, - { - "epoch": 0.038341065919314195, - "grad_norm": 1.325701117515564, - "learning_rate": 1.915294117647059e-05, - "loss": 0.4828, - "step": 407 - }, - { - "epoch": 0.038435270012482044, - "grad_norm": 1.3329956531524658, - "learning_rate": 1.9200000000000003e-05, - "loss": 0.4434, - "step": 408 - }, - { - "epoch": 0.03852947410564989, - "grad_norm": 1.2114311456680298, - "learning_rate": 1.9247058823529415e-05, - "loss": 0.5059, - "step": 409 - }, - { - "epoch": 0.03862367819881774, - "grad_norm": 1.188414454460144, - "learning_rate": 1.9294117647058825e-05, - "loss": 0.4929, - "step": 410 - }, - { - "epoch": 0.038717882291985584, - "grad_norm": 1.505661129951477, - "learning_rate": 1.9341176470588237e-05, - "loss": 0.4896, - "step": 411 - }, - { - "epoch": 0.03881208638515343, - "grad_norm": 1.4131497144699097, - "learning_rate": 1.9388235294117647e-05, - "loss": 0.506, - "step": 412 - }, - { - "epoch": 0.03890629047832128, - "grad_norm": 1.2669509649276733, - "learning_rate": 1.9435294117647063e-05, - "loss": 0.5385, - "step": 413 - }, - { - "epoch": 0.03900049457148913, - "grad_norm": 1.2937191724777222, - "learning_rate": 1.9482352941176472e-05, - "loss": 0.4741, - "step": 414 - }, - { - "epoch": 0.03909469866465698, - "grad_norm": 1.3381024599075317, - "learning_rate": 1.9529411764705885e-05, - "loss": 0.4952, - "step": 415 - }, - { - "epoch": 0.03918890275782483, - "grad_norm": 1.2062170505523682, - "learning_rate": 1.9576470588235297e-05, - "loss": 0.4666, - "step": 416 - }, - { - "epoch": 0.03928310685099268, - "grad_norm": 1.2051784992218018, - "learning_rate": 1.9623529411764707e-05, - "loss": 0.5123, - "step": 417 - }, - { - "epoch": 0.039377310944160526, - "grad_norm": 1.2372658252716064, - "learning_rate": 1.967058823529412e-05, - "loss": 0.5203, - "step": 418 - }, - { - "epoch": 0.039471515037328375, - "grad_norm": 1.284125804901123, - "learning_rate": 1.971764705882353e-05, - "loss": 0.5003, - "step": 419 - }, - { - "epoch": 0.03956571913049622, - "grad_norm": 1.290552020072937, - "learning_rate": 1.9764705882352945e-05, - "loss": 0.477, - "step": 420 - }, - { - "epoch": 0.039659923223664066, - "grad_norm": 1.0635818243026733, - "learning_rate": 1.9811764705882354e-05, - "loss": 0.4348, - "step": 421 - }, - { - "epoch": 0.039754127316831915, - "grad_norm": 1.3787859678268433, - "learning_rate": 1.9858823529411767e-05, - "loss": 0.4778, - "step": 422 - }, - { - "epoch": 0.039848331409999764, - "grad_norm": 1.476486325263977, - "learning_rate": 1.9905882352941176e-05, - "loss": 0.5057, - "step": 423 - }, - { - "epoch": 0.03994253550316761, - "grad_norm": 1.2540762424468994, - "learning_rate": 1.995294117647059e-05, - "loss": 0.4737, - "step": 424 - }, - { - "epoch": 0.04003673959633546, - "grad_norm": 1.1084176301956177, - "learning_rate": 2e-05, - "loss": 0.4288, - "step": 425 - }, - { - "epoch": 0.04013094368950331, - "grad_norm": 1.2009215354919434, - "learning_rate": 1.999999988599227e-05, - "loss": 0.4826, - "step": 426 - }, - { - "epoch": 0.04022514778267116, - "grad_norm": 1.214808464050293, - "learning_rate": 1.9999999543969074e-05, - "loss": 0.4561, - "step": 427 - }, - { - "epoch": 0.040319351875839, - "grad_norm": 1.221494436264038, - "learning_rate": 1.9999998973930422e-05, - "loss": 0.5272, - "step": 428 - }, - { - "epoch": 0.04041355596900685, - "grad_norm": 1.1077792644500732, - "learning_rate": 1.999999817587633e-05, - "loss": 0.455, - "step": 429 - }, - { - "epoch": 0.0405077600621747, - "grad_norm": 1.235834002494812, - "learning_rate": 1.9999997149806816e-05, - "loss": 0.4743, - "step": 430 - }, - { - "epoch": 0.04060196415534255, - "grad_norm": 1.2237988710403442, - "learning_rate": 1.9999995895721897e-05, - "loss": 0.4966, - "step": 431 - }, - { - "epoch": 0.0406961682485104, - "grad_norm": 1.158136248588562, - "learning_rate": 1.999999441362161e-05, - "loss": 0.5112, - "step": 432 - }, - { - "epoch": 0.04079037234167825, - "grad_norm": 1.1336339712142944, - "learning_rate": 1.9999992703505986e-05, - "loss": 0.5015, - "step": 433 - }, - { - "epoch": 0.040884576434846095, - "grad_norm": 1.130056381225586, - "learning_rate": 1.9999990765375062e-05, - "loss": 0.4514, - "step": 434 - }, - { - "epoch": 0.040978780528013944, - "grad_norm": 1.162482738494873, - "learning_rate": 1.999998859922888e-05, - "loss": 0.469, - "step": 435 - }, - { - "epoch": 0.04107298462118179, - "grad_norm": 1.1694494485855103, - "learning_rate": 1.9999986205067496e-05, - "loss": 0.4799, - "step": 436 - }, - { - "epoch": 0.041167188714349635, - "grad_norm": 0.9968825578689575, - "learning_rate": 1.999998358289096e-05, - "loss": 0.4409, - "step": 437 - }, - { - "epoch": 0.041261392807517484, - "grad_norm": 1.028335452079773, - "learning_rate": 1.9999980732699336e-05, - "loss": 0.4539, - "step": 438 - }, - { - "epoch": 0.04135559690068533, - "grad_norm": 1.2932831048965454, - "learning_rate": 1.999997765449268e-05, - "loss": 0.492, - "step": 439 - }, - { - "epoch": 0.04144980099385318, - "grad_norm": 1.2190347909927368, - "learning_rate": 1.9999974348271075e-05, - "loss": 0.4841, - "step": 440 - }, - { - "epoch": 0.04154400508702103, - "grad_norm": 1.1261992454528809, - "learning_rate": 1.9999970814034583e-05, - "loss": 0.4339, - "step": 441 - }, - { - "epoch": 0.04163820918018888, - "grad_norm": 1.215971827507019, - "learning_rate": 1.9999967051783296e-05, - "loss": 0.5165, - "step": 442 - }, - { - "epoch": 0.04173241327335673, - "grad_norm": 1.1952204704284668, - "learning_rate": 1.9999963061517293e-05, - "loss": 0.5068, - "step": 443 - }, - { - "epoch": 0.04182661736652458, - "grad_norm": 1.1849550008773804, - "learning_rate": 1.9999958843236668e-05, - "loss": 0.4797, - "step": 444 - }, - { - "epoch": 0.04192082145969243, - "grad_norm": 1.0811119079589844, - "learning_rate": 1.9999954396941515e-05, - "loss": 0.4631, - "step": 445 - }, - { - "epoch": 0.04201502555286027, - "grad_norm": 1.1640421152114868, - "learning_rate": 1.9999949722631938e-05, - "loss": 0.473, - "step": 446 - }, - { - "epoch": 0.04210922964602812, - "grad_norm": 1.1983455419540405, - "learning_rate": 1.9999944820308043e-05, - "loss": 0.5546, - "step": 447 - }, - { - "epoch": 0.04220343373919597, - "grad_norm": 1.105420708656311, - "learning_rate": 1.9999939689969937e-05, - "loss": 0.4665, - "step": 448 - }, - { - "epoch": 0.042297637832363816, - "grad_norm": 1.109999418258667, - "learning_rate": 1.9999934331617747e-05, - "loss": 0.481, - "step": 449 - }, - { - "epoch": 0.042391841925531665, - "grad_norm": 1.1446915864944458, - "learning_rate": 1.999992874525158e-05, - "loss": 0.5105, - "step": 450 - }, - { - "epoch": 0.042486046018699514, - "grad_norm": 1.1875211000442505, - "learning_rate": 1.999992293087158e-05, - "loss": 0.5124, - "step": 451 - }, - { - "epoch": 0.04258025011186736, - "grad_norm": 1.2004013061523438, - "learning_rate": 1.9999916888477868e-05, - "loss": 0.4798, - "step": 452 - }, - { - "epoch": 0.04267445420503521, - "grad_norm": 1.2664998769760132, - "learning_rate": 1.999991061807059e-05, - "loss": 0.4603, - "step": 453 - }, - { - "epoch": 0.042768658298203054, - "grad_norm": 1.1395792961120605, - "learning_rate": 1.999990411964988e-05, - "loss": 0.5195, - "step": 454 - }, - { - "epoch": 0.0428628623913709, - "grad_norm": 1.1454837322235107, - "learning_rate": 1.9999897393215893e-05, - "loss": 0.512, - "step": 455 - }, - { - "epoch": 0.04295706648453875, - "grad_norm": 1.1613349914550781, - "learning_rate": 1.999989043876878e-05, - "loss": 0.5386, - "step": 456 - }, - { - "epoch": 0.0430512705777066, - "grad_norm": 1.0763133764266968, - "learning_rate": 1.9999883256308702e-05, - "loss": 0.437, - "step": 457 - }, - { - "epoch": 0.04314547467087445, - "grad_norm": 1.0761719942092896, - "learning_rate": 1.999987584583582e-05, - "loss": 0.4538, - "step": 458 - }, - { - "epoch": 0.0432396787640423, - "grad_norm": 1.1660627126693726, - "learning_rate": 1.9999868207350298e-05, - "loss": 0.5102, - "step": 459 - }, - { - "epoch": 0.04333388285721015, - "grad_norm": 1.187119483947754, - "learning_rate": 1.9999860340852318e-05, - "loss": 0.5497, - "step": 460 - }, - { - "epoch": 0.043428086950377996, - "grad_norm": 1.0927040576934814, - "learning_rate": 1.9999852246342064e-05, - "loss": 0.4906, - "step": 461 - }, - { - "epoch": 0.043522291043545845, - "grad_norm": 1.19398832321167, - "learning_rate": 1.9999843923819705e-05, - "loss": 0.461, - "step": 462 - }, - { - "epoch": 0.04361649513671369, - "grad_norm": 1.0197585821151733, - "learning_rate": 1.9999835373285445e-05, - "loss": 0.4999, - "step": 463 - }, - { - "epoch": 0.043710699229881536, - "grad_norm": 1.040513277053833, - "learning_rate": 1.9999826594739473e-05, - "loss": 0.4405, - "step": 464 - }, - { - "epoch": 0.043804903323049385, - "grad_norm": 1.1803407669067383, - "learning_rate": 1.9999817588181987e-05, - "loss": 0.4917, - "step": 465 - }, - { - "epoch": 0.043899107416217234, - "grad_norm": 1.0444204807281494, - "learning_rate": 1.9999808353613196e-05, - "loss": 0.4894, - "step": 466 - }, - { - "epoch": 0.04399331150938508, - "grad_norm": 1.1150667667388916, - "learning_rate": 1.999979889103331e-05, - "loss": 0.4987, - "step": 467 - }, - { - "epoch": 0.04408751560255293, - "grad_norm": 1.2186847925186157, - "learning_rate": 1.9999789200442545e-05, - "loss": 0.4796, - "step": 468 - }, - { - "epoch": 0.04418171969572078, - "grad_norm": 1.144182562828064, - "learning_rate": 1.999977928184112e-05, - "loss": 0.5063, - "step": 469 - }, - { - "epoch": 0.04427592378888863, - "grad_norm": 1.0240905284881592, - "learning_rate": 1.9999769135229267e-05, - "loss": 0.5001, - "step": 470 - }, - { - "epoch": 0.04437012788205648, - "grad_norm": 1.119954228401184, - "learning_rate": 1.9999758760607207e-05, - "loss": 0.4732, - "step": 471 - }, - { - "epoch": 0.04446433197522432, - "grad_norm": 1.1916732788085938, - "learning_rate": 1.9999748157975184e-05, - "loss": 0.5002, - "step": 472 - }, - { - "epoch": 0.04455853606839217, - "grad_norm": 1.103894829750061, - "learning_rate": 1.999973732733344e-05, - "loss": 0.4516, - "step": 473 - }, - { - "epoch": 0.04465274016156002, - "grad_norm": 1.1423009634017944, - "learning_rate": 1.9999726268682217e-05, - "loss": 0.4931, - "step": 474 - }, - { - "epoch": 0.04474694425472787, - "grad_norm": 1.1311986446380615, - "learning_rate": 1.999971498202177e-05, - "loss": 0.4436, - "step": 475 - }, - { - "epoch": 0.044841148347895717, - "grad_norm": 1.1499707698822021, - "learning_rate": 1.9999703467352356e-05, - "loss": 0.4663, - "step": 476 - }, - { - "epoch": 0.044935352441063565, - "grad_norm": 1.1432238817214966, - "learning_rate": 1.999969172467424e-05, - "loss": 0.4648, - "step": 477 - }, - { - "epoch": 0.045029556534231414, - "grad_norm": 1.2627131938934326, - "learning_rate": 1.999967975398769e-05, - "loss": 0.5371, - "step": 478 - }, - { - "epoch": 0.04512376062739926, - "grad_norm": 1.1618503332138062, - "learning_rate": 1.9999667555292975e-05, - "loss": 0.4155, - "step": 479 - }, - { - "epoch": 0.045217964720567105, - "grad_norm": 1.1504175662994385, - "learning_rate": 1.9999655128590373e-05, - "loss": 0.4705, - "step": 480 - }, - { - "epoch": 0.045312168813734954, - "grad_norm": 1.2788769006729126, - "learning_rate": 1.999964247388017e-05, - "loss": 0.5294, - "step": 481 - }, - { - "epoch": 0.0454063729069028, - "grad_norm": 1.2049305438995361, - "learning_rate": 1.9999629591162658e-05, - "loss": 0.5037, - "step": 482 - }, - { - "epoch": 0.04550057700007065, - "grad_norm": 1.1303664445877075, - "learning_rate": 1.9999616480438122e-05, - "loss": 0.48, - "step": 483 - }, - { - "epoch": 0.0455947810932385, - "grad_norm": 1.032772183418274, - "learning_rate": 1.9999603141706866e-05, - "loss": 0.4712, - "step": 484 - }, - { - "epoch": 0.04568898518640635, - "grad_norm": 1.1010346412658691, - "learning_rate": 1.9999589574969198e-05, - "loss": 0.452, - "step": 485 - }, - { - "epoch": 0.0457831892795742, - "grad_norm": 1.2685179710388184, - "learning_rate": 1.9999575780225418e-05, - "loss": 0.4567, - "step": 486 - }, - { - "epoch": 0.04587739337274205, - "grad_norm": 1.0697119235992432, - "learning_rate": 1.9999561757475846e-05, - "loss": 0.4574, - "step": 487 - }, - { - "epoch": 0.0459715974659099, - "grad_norm": 1.0218472480773926, - "learning_rate": 1.9999547506720804e-05, - "loss": 0.4656, - "step": 488 - }, - { - "epoch": 0.04606580155907774, - "grad_norm": 1.01448655128479, - "learning_rate": 1.9999533027960613e-05, - "loss": 0.4836, - "step": 489 - }, - { - "epoch": 0.04616000565224559, - "grad_norm": 1.0936620235443115, - "learning_rate": 1.9999518321195605e-05, - "loss": 0.4991, - "step": 490 - }, - { - "epoch": 0.04625420974541344, - "grad_norm": 1.1408958435058594, - "learning_rate": 1.9999503386426113e-05, - "loss": 0.5001, - "step": 491 - }, - { - "epoch": 0.046348413838581286, - "grad_norm": 0.9541551470756531, - "learning_rate": 1.9999488223652478e-05, - "loss": 0.4713, - "step": 492 - }, - { - "epoch": 0.046442617931749135, - "grad_norm": 1.0181952714920044, - "learning_rate": 1.999947283287505e-05, - "loss": 0.4461, - "step": 493 - }, - { - "epoch": 0.046536822024916984, - "grad_norm": 0.9244551658630371, - "learning_rate": 1.9999457214094177e-05, - "loss": 0.3794, - "step": 494 - }, - { - "epoch": 0.04663102611808483, - "grad_norm": 1.0513863563537598, - "learning_rate": 1.9999441367310216e-05, - "loss": 0.4641, - "step": 495 - }, - { - "epoch": 0.04672523021125268, - "grad_norm": 1.0445611476898193, - "learning_rate": 1.999942529252352e-05, - "loss": 0.4759, - "step": 496 - }, - { - "epoch": 0.04681943430442053, - "grad_norm": 1.1633409261703491, - "learning_rate": 1.9999408989734474e-05, - "loss": 0.4455, - "step": 497 - }, - { - "epoch": 0.04691363839758837, - "grad_norm": 1.0402541160583496, - "learning_rate": 1.9999392458943432e-05, - "loss": 0.4627, - "step": 498 - }, - { - "epoch": 0.04700784249075622, - "grad_norm": 1.0239955186843872, - "learning_rate": 1.999937570015078e-05, - "loss": 0.4638, - "step": 499 - }, - { - "epoch": 0.04710204658392407, - "grad_norm": 1.2518709897994995, - "learning_rate": 1.9999358713356893e-05, - "loss": 0.5401, - "step": 500 - }, - { - "epoch": 0.04719625067709192, - "grad_norm": 1.000349998474121, - "learning_rate": 1.999934149856217e-05, - "loss": 0.4629, - "step": 501 - }, - { - "epoch": 0.04729045477025977, - "grad_norm": 1.187333345413208, - "learning_rate": 1.999932405576699e-05, - "loss": 0.5195, - "step": 502 - }, - { - "epoch": 0.04738465886342762, - "grad_norm": 1.1107083559036255, - "learning_rate": 1.999930638497176e-05, - "loss": 0.4893, - "step": 503 - }, - { - "epoch": 0.047478862956595466, - "grad_norm": 0.9216952323913574, - "learning_rate": 1.9999288486176882e-05, - "loss": 0.4263, - "step": 504 - }, - { - "epoch": 0.047573067049763315, - "grad_norm": 0.9864377379417419, - "learning_rate": 1.9999270359382762e-05, - "loss": 0.4538, - "step": 505 - }, - { - "epoch": 0.04766727114293116, - "grad_norm": 1.0480002164840698, - "learning_rate": 1.999925200458981e-05, - "loss": 0.4523, - "step": 506 - }, - { - "epoch": 0.047761475236099006, - "grad_norm": 1.1025638580322266, - "learning_rate": 1.999923342179845e-05, - "loss": 0.4599, - "step": 507 - }, - { - "epoch": 0.047855679329266855, - "grad_norm": 1.1831530332565308, - "learning_rate": 1.9999214611009105e-05, - "loss": 0.4961, - "step": 508 - }, - { - "epoch": 0.047949883422434704, - "grad_norm": 1.1201529502868652, - "learning_rate": 1.9999195572222204e-05, - "loss": 0.4841, - "step": 509 - }, - { - "epoch": 0.04804408751560255, - "grad_norm": 1.1097553968429565, - "learning_rate": 1.9999176305438178e-05, - "loss": 0.4252, - "step": 510 - }, - { - "epoch": 0.0481382916087704, - "grad_norm": 1.1436023712158203, - "learning_rate": 1.9999156810657466e-05, - "loss": 0.5302, - "step": 511 - }, - { - "epoch": 0.04823249570193825, - "grad_norm": 1.158348560333252, - "learning_rate": 1.999913708788052e-05, - "loss": 0.5032, - "step": 512 - }, - { - "epoch": 0.0483266997951061, - "grad_norm": 0.9677159190177917, - "learning_rate": 1.9999117137107783e-05, - "loss": 0.4697, - "step": 513 - }, - { - "epoch": 0.04842090388827395, - "grad_norm": 1.1214277744293213, - "learning_rate": 1.999909695833971e-05, - "loss": 0.4967, - "step": 514 - }, - { - "epoch": 0.04851510798144179, - "grad_norm": 1.142343282699585, - "learning_rate": 1.999907655157676e-05, - "loss": 0.473, - "step": 515 - }, - { - "epoch": 0.04860931207460964, - "grad_norm": 0.9969754219055176, - "learning_rate": 1.9999055916819402e-05, - "loss": 0.4535, - "step": 516 - }, - { - "epoch": 0.04870351616777749, - "grad_norm": 1.237532377243042, - "learning_rate": 1.9999035054068107e-05, - "loss": 0.5169, - "step": 517 - }, - { - "epoch": 0.04879772026094534, - "grad_norm": 1.1392680406570435, - "learning_rate": 1.999901396332335e-05, - "loss": 0.4856, - "step": 518 - }, - { - "epoch": 0.048891924354113186, - "grad_norm": 1.0848746299743652, - "learning_rate": 1.9998992644585606e-05, - "loss": 0.4951, - "step": 519 - }, - { - "epoch": 0.048986128447281035, - "grad_norm": 1.013777256011963, - "learning_rate": 1.9998971097855372e-05, - "loss": 0.4695, - "step": 520 - }, - { - "epoch": 0.049080332540448884, - "grad_norm": 1.1787139177322388, - "learning_rate": 1.999894932313313e-05, - "loss": 0.5033, - "step": 521 - }, - { - "epoch": 0.04917453663361673, - "grad_norm": 0.9743266701698303, - "learning_rate": 1.999892732041938e-05, - "loss": 0.3751, - "step": 522 - }, - { - "epoch": 0.049268740726784575, - "grad_norm": 1.13385009765625, - "learning_rate": 1.9998905089714622e-05, - "loss": 0.4557, - "step": 523 - }, - { - "epoch": 0.049362944819952424, - "grad_norm": 1.087335467338562, - "learning_rate": 1.9998882631019366e-05, - "loss": 0.4222, - "step": 524 - }, - { - "epoch": 0.04945714891312027, - "grad_norm": 1.0518101453781128, - "learning_rate": 1.9998859944334123e-05, - "loss": 0.4415, - "step": 525 - }, - { - "epoch": 0.04955135300628812, - "grad_norm": 1.0453583002090454, - "learning_rate": 1.9998837029659408e-05, - "loss": 0.4615, - "step": 526 - }, - { - "epoch": 0.04964555709945597, - "grad_norm": 1.1417620182037354, - "learning_rate": 1.9998813886995746e-05, - "loss": 0.4675, - "step": 527 - }, - { - "epoch": 0.04973976119262382, - "grad_norm": 1.0332422256469727, - "learning_rate": 1.9998790516343666e-05, - "loss": 0.4849, - "step": 528 - }, - { - "epoch": 0.04983396528579167, - "grad_norm": 1.053110122680664, - "learning_rate": 1.9998766917703697e-05, - "loss": 0.5184, - "step": 529 - }, - { - "epoch": 0.04992816937895952, - "grad_norm": 1.0806505680084229, - "learning_rate": 1.999874309107638e-05, - "loss": 0.4514, - "step": 530 - }, - { - "epoch": 0.05002237347212737, - "grad_norm": 1.0935556888580322, - "learning_rate": 1.9998719036462255e-05, - "loss": 0.5339, - "step": 531 - }, - { - "epoch": 0.05011657756529521, - "grad_norm": 0.9729276895523071, - "learning_rate": 1.9998694753861873e-05, - "loss": 0.445, - "step": 532 - }, - { - "epoch": 0.05021078165846306, - "grad_norm": 1.0580323934555054, - "learning_rate": 1.9998670243275787e-05, - "loss": 0.5206, - "step": 533 - }, - { - "epoch": 0.05030498575163091, - "grad_norm": 1.11548912525177, - "learning_rate": 1.999864550470456e-05, - "loss": 0.5121, - "step": 534 - }, - { - "epoch": 0.050399189844798756, - "grad_norm": 1.0575588941574097, - "learning_rate": 1.999862053814875e-05, - "loss": 0.4532, - "step": 535 - }, - { - "epoch": 0.050493393937966605, - "grad_norm": 1.0175671577453613, - "learning_rate": 1.999859534360893e-05, - "loss": 0.4673, - "step": 536 - }, - { - "epoch": 0.050587598031134454, - "grad_norm": 1.1149822473526, - "learning_rate": 1.9998569921085667e-05, - "loss": 0.519, - "step": 537 - }, - { - "epoch": 0.0506818021243023, - "grad_norm": 1.0861210823059082, - "learning_rate": 1.999854427057955e-05, - "loss": 0.4713, - "step": 538 - }, - { - "epoch": 0.05077600621747015, - "grad_norm": 1.114902377128601, - "learning_rate": 1.9998518392091163e-05, - "loss": 0.5145, - "step": 539 - }, - { - "epoch": 0.050870210310638, - "grad_norm": 0.9747793674468994, - "learning_rate": 1.9998492285621092e-05, - "loss": 0.461, - "step": 540 - }, - { - "epoch": 0.05096441440380584, - "grad_norm": 1.0336741209030151, - "learning_rate": 1.9998465951169935e-05, - "loss": 0.5079, - "step": 541 - }, - { - "epoch": 0.05105861849697369, - "grad_norm": 1.056095838546753, - "learning_rate": 1.999843938873829e-05, - "loss": 0.4316, - "step": 542 - }, - { - "epoch": 0.05115282259014154, - "grad_norm": 1.0102380514144897, - "learning_rate": 1.9998412598326765e-05, - "loss": 0.5051, - "step": 543 - }, - { - "epoch": 0.05124702668330939, - "grad_norm": 1.142443299293518, - "learning_rate": 1.9998385579935968e-05, - "loss": 0.4933, - "step": 544 - }, - { - "epoch": 0.05134123077647724, - "grad_norm": 1.115990161895752, - "learning_rate": 1.9998358333566518e-05, - "loss": 0.5089, - "step": 545 - }, - { - "epoch": 0.05143543486964509, - "grad_norm": 1.1316367387771606, - "learning_rate": 1.9998330859219037e-05, - "loss": 0.4447, - "step": 546 - }, - { - "epoch": 0.051529638962812936, - "grad_norm": 1.1623283624649048, - "learning_rate": 1.999830315689415e-05, - "loss": 0.4728, - "step": 547 - }, - { - "epoch": 0.051623843055980785, - "grad_norm": 1.0471458435058594, - "learning_rate": 1.9998275226592487e-05, - "loss": 0.4345, - "step": 548 - }, - { - "epoch": 0.05171804714914863, - "grad_norm": 0.9686200022697449, - "learning_rate": 1.9998247068314684e-05, - "loss": 0.4044, - "step": 549 - }, - { - "epoch": 0.051812251242316476, - "grad_norm": 1.1127114295959473, - "learning_rate": 1.999821868206139e-05, - "loss": 0.448, - "step": 550 - }, - { - "epoch": 0.051906455335484325, - "grad_norm": 0.9898568987846375, - "learning_rate": 1.999819006783324e-05, - "loss": 0.425, - "step": 551 - }, - { - "epoch": 0.052000659428652174, - "grad_norm": 1.0763282775878906, - "learning_rate": 1.99981612256309e-05, - "loss": 0.4433, - "step": 552 - }, - { - "epoch": 0.05209486352182002, - "grad_norm": 1.0753988027572632, - "learning_rate": 1.999813215545502e-05, - "loss": 0.4463, - "step": 553 - }, - { - "epoch": 0.05218906761498787, - "grad_norm": 1.0426037311553955, - "learning_rate": 1.9998102857306264e-05, - "loss": 0.394, - "step": 554 - }, - { - "epoch": 0.05228327170815572, - "grad_norm": 0.9977891445159912, - "learning_rate": 1.9998073331185305e-05, - "loss": 0.4555, - "step": 555 - }, - { - "epoch": 0.05237747580132357, - "grad_norm": 1.111856460571289, - "learning_rate": 1.999804357709281e-05, - "loss": 0.4504, - "step": 556 - }, - { - "epoch": 0.05247167989449142, - "grad_norm": 1.0742316246032715, - "learning_rate": 1.9998013595029454e-05, - "loss": 0.4027, - "step": 557 - }, - { - "epoch": 0.05256588398765926, - "grad_norm": 1.355398178100586, - "learning_rate": 1.9997983384995927e-05, - "loss": 0.5059, - "step": 558 - }, - { - "epoch": 0.05266008808082711, - "grad_norm": 1.0624873638153076, - "learning_rate": 1.999795294699292e-05, - "loss": 0.4277, - "step": 559 - }, - { - "epoch": 0.05275429217399496, - "grad_norm": 1.124698281288147, - "learning_rate": 1.999792228102112e-05, - "loss": 0.434, - "step": 560 - }, - { - "epoch": 0.05284849626716281, - "grad_norm": 0.9644001722335815, - "learning_rate": 1.9997891387081235e-05, - "loss": 0.3996, - "step": 561 - }, - { - "epoch": 0.052942700360330656, - "grad_norm": 1.0603997707366943, - "learning_rate": 1.999786026517396e-05, - "loss": 0.4611, - "step": 562 - }, - { - "epoch": 0.053036904453498505, - "grad_norm": 1.1700316667556763, - "learning_rate": 1.999782891530001e-05, - "loss": 0.4712, - "step": 563 - }, - { - "epoch": 0.053131108546666354, - "grad_norm": 1.2461744546890259, - "learning_rate": 1.99977973374601e-05, - "loss": 0.4314, - "step": 564 - }, - { - "epoch": 0.0532253126398342, - "grad_norm": 1.1271206140518188, - "learning_rate": 1.9997765531654945e-05, - "loss": 0.4393, - "step": 565 - }, - { - "epoch": 0.05331951673300205, - "grad_norm": 1.1461455821990967, - "learning_rate": 1.999773349788528e-05, - "loss": 0.4805, - "step": 566 - }, - { - "epoch": 0.053413720826169894, - "grad_norm": 0.9542377591133118, - "learning_rate": 1.9997701236151826e-05, - "loss": 0.4144, - "step": 567 - }, - { - "epoch": 0.05350792491933774, - "grad_norm": 1.0481843948364258, - "learning_rate": 1.9997668746455322e-05, - "loss": 0.44, - "step": 568 - }, - { - "epoch": 0.05360212901250559, - "grad_norm": 1.0989118814468384, - "learning_rate": 1.999763602879651e-05, - "loss": 0.469, - "step": 569 - }, - { - "epoch": 0.05369633310567344, - "grad_norm": 0.987362802028656, - "learning_rate": 1.9997603083176136e-05, - "loss": 0.4572, - "step": 570 - }, - { - "epoch": 0.05379053719884129, - "grad_norm": 0.9620579481124878, - "learning_rate": 1.9997569909594948e-05, - "loss": 0.4203, - "step": 571 - }, - { - "epoch": 0.05388474129200914, - "grad_norm": 1.0838489532470703, - "learning_rate": 1.9997536508053704e-05, - "loss": 0.4737, - "step": 572 - }, - { - "epoch": 0.05397894538517699, - "grad_norm": 0.9939470291137695, - "learning_rate": 1.999750287855317e-05, - "loss": 0.4097, - "step": 573 - }, - { - "epoch": 0.05407314947834484, - "grad_norm": 0.970329999923706, - "learning_rate": 1.9997469021094103e-05, - "loss": 0.422, - "step": 574 - }, - { - "epoch": 0.05416735357151268, - "grad_norm": 1.0482878684997559, - "learning_rate": 1.9997434935677285e-05, - "loss": 0.4607, - "step": 575 - }, - { - "epoch": 0.05426155766468053, - "grad_norm": 1.0672898292541504, - "learning_rate": 1.9997400622303488e-05, - "loss": 0.4587, - "step": 576 - }, - { - "epoch": 0.05435576175784838, - "grad_norm": 1.1420453786849976, - "learning_rate": 1.9997366080973493e-05, - "loss": 0.5164, - "step": 577 - }, - { - "epoch": 0.054449965851016226, - "grad_norm": 1.1007829904556274, - "learning_rate": 1.9997331311688095e-05, - "loss": 0.5201, - "step": 578 - }, - { - "epoch": 0.054544169944184075, - "grad_norm": 0.9502580761909485, - "learning_rate": 1.999729631444808e-05, - "loss": 0.4637, - "step": 579 - }, - { - "epoch": 0.054638374037351924, - "grad_norm": 1.0256754159927368, - "learning_rate": 1.9997261089254246e-05, - "loss": 0.431, - "step": 580 - }, - { - "epoch": 0.05473257813051977, - "grad_norm": 1.1105985641479492, - "learning_rate": 1.9997225636107402e-05, - "loss": 0.479, - "step": 581 - }, - { - "epoch": 0.05482678222368762, - "grad_norm": 1.2762510776519775, - "learning_rate": 1.999718995500835e-05, - "loss": 0.5189, - "step": 582 - }, - { - "epoch": 0.05492098631685547, - "grad_norm": 1.0495827198028564, - "learning_rate": 1.9997154045957903e-05, - "loss": 0.4846, - "step": 583 - }, - { - "epoch": 0.05501519041002331, - "grad_norm": 1.1202806234359741, - "learning_rate": 1.999711790895689e-05, - "loss": 0.4902, - "step": 584 - }, - { - "epoch": 0.05510939450319116, - "grad_norm": 1.0130805969238281, - "learning_rate": 1.9997081544006124e-05, - "loss": 0.4476, - "step": 585 - }, - { - "epoch": 0.05520359859635901, - "grad_norm": 1.071753740310669, - "learning_rate": 1.9997044951106438e-05, - "loss": 0.473, - "step": 586 - }, - { - "epoch": 0.05529780268952686, - "grad_norm": 1.1382759809494019, - "learning_rate": 1.9997008130258665e-05, - "loss": 0.4777, - "step": 587 - }, - { - "epoch": 0.05539200678269471, - "grad_norm": 1.0548356771469116, - "learning_rate": 1.9996971081463647e-05, - "loss": 0.445, - "step": 588 - }, - { - "epoch": 0.05548621087586256, - "grad_norm": 1.115378975868225, - "learning_rate": 1.999693380472223e-05, - "loss": 0.4967, - "step": 589 - }, - { - "epoch": 0.055580414969030406, - "grad_norm": 1.1774591207504272, - "learning_rate": 1.999689630003526e-05, - "loss": 0.4609, - "step": 590 - }, - { - "epoch": 0.055674619062198255, - "grad_norm": 1.0086830854415894, - "learning_rate": 1.9996858567403593e-05, - "loss": 0.4331, - "step": 591 - }, - { - "epoch": 0.055768823155366104, - "grad_norm": 0.9901698231697083, - "learning_rate": 1.999682060682809e-05, - "loss": 0.4519, - "step": 592 - }, - { - "epoch": 0.055863027248533946, - "grad_norm": 1.1610777378082275, - "learning_rate": 1.999678241830962e-05, - "loss": 0.4579, - "step": 593 - }, - { - "epoch": 0.055957231341701795, - "grad_norm": 1.0401917695999146, - "learning_rate": 1.9996744001849047e-05, - "loss": 0.4461, - "step": 594 - }, - { - "epoch": 0.056051435434869644, - "grad_norm": 1.081620216369629, - "learning_rate": 1.999670535744725e-05, - "loss": 0.4835, - "step": 595 - }, - { - "epoch": 0.05614563952803749, - "grad_norm": 1.0446057319641113, - "learning_rate": 1.9996666485105115e-05, - "loss": 0.456, - "step": 596 - }, - { - "epoch": 0.05623984362120534, - "grad_norm": 1.1298969984054565, - "learning_rate": 1.9996627384823522e-05, - "loss": 0.4787, - "step": 597 - }, - { - "epoch": 0.05633404771437319, - "grad_norm": 1.162839651107788, - "learning_rate": 1.999658805660336e-05, - "loss": 0.3753, - "step": 598 - }, - { - "epoch": 0.05642825180754104, - "grad_norm": 1.0797040462493896, - "learning_rate": 1.9996548500445536e-05, - "loss": 0.4798, - "step": 599 - }, - { - "epoch": 0.05652245590070889, - "grad_norm": 0.9650039076805115, - "learning_rate": 1.9996508716350945e-05, - "loss": 0.498, - "step": 600 - }, - { - "epoch": 0.05661665999387673, - "grad_norm": 1.0434175729751587, - "learning_rate": 1.9996468704320496e-05, - "loss": 0.4426, - "step": 601 - }, - { - "epoch": 0.05671086408704458, - "grad_norm": 1.0903749465942383, - "learning_rate": 1.9996428464355097e-05, - "loss": 0.4484, - "step": 602 - }, - { - "epoch": 0.05680506818021243, - "grad_norm": 1.168329119682312, - "learning_rate": 1.9996387996455673e-05, - "loss": 0.4613, - "step": 603 - }, - { - "epoch": 0.05689927227338028, - "grad_norm": 1.1196221113204956, - "learning_rate": 1.9996347300623142e-05, - "loss": 0.4536, - "step": 604 - }, - { - "epoch": 0.056993476366548126, - "grad_norm": 1.053094506263733, - "learning_rate": 1.9996306376858433e-05, - "loss": 0.4514, - "step": 605 - }, - { - "epoch": 0.057087680459715975, - "grad_norm": 0.9870381951332092, - "learning_rate": 1.999626522516248e-05, - "loss": 0.4702, - "step": 606 - }, - { - "epoch": 0.057181884552883824, - "grad_norm": 1.0807932615280151, - "learning_rate": 1.9996223845536216e-05, - "loss": 0.4557, - "step": 607 - }, - { - "epoch": 0.05727608864605167, - "grad_norm": 1.026733160018921, - "learning_rate": 1.9996182237980592e-05, - "loss": 0.4836, - "step": 608 - }, - { - "epoch": 0.05737029273921952, - "grad_norm": 1.0263826847076416, - "learning_rate": 1.9996140402496554e-05, - "loss": 0.4547, - "step": 609 - }, - { - "epoch": 0.057464496832387364, - "grad_norm": 1.1398074626922607, - "learning_rate": 1.9996098339085054e-05, - "loss": 0.4958, - "step": 610 - }, - { - "epoch": 0.05755870092555521, - "grad_norm": 1.3231003284454346, - "learning_rate": 1.9996056047747054e-05, - "loss": 0.4774, - "step": 611 - }, - { - "epoch": 0.05765290501872306, - "grad_norm": 0.9639459848403931, - "learning_rate": 1.999601352848352e-05, - "loss": 0.3985, - "step": 612 - }, - { - "epoch": 0.05774710911189091, - "grad_norm": 1.007828950881958, - "learning_rate": 1.9995970781295412e-05, - "loss": 0.3997, - "step": 613 - }, - { - "epoch": 0.05784131320505876, - "grad_norm": 1.2313796281814575, - "learning_rate": 1.9995927806183713e-05, - "loss": 0.4611, - "step": 614 - }, - { - "epoch": 0.05793551729822661, - "grad_norm": 1.1767758131027222, - "learning_rate": 1.9995884603149403e-05, - "loss": 0.4646, - "step": 615 - }, - { - "epoch": 0.05802972139139446, - "grad_norm": 1.0385993719100952, - "learning_rate": 1.9995841172193465e-05, - "loss": 0.481, - "step": 616 - }, - { - "epoch": 0.05812392548456231, - "grad_norm": 1.0699119567871094, - "learning_rate": 1.999579751331689e-05, - "loss": 0.4517, - "step": 617 - }, - { - "epoch": 0.058218129577730156, - "grad_norm": 1.3174458742141724, - "learning_rate": 1.999575362652067e-05, - "loss": 0.5169, - "step": 618 - }, - { - "epoch": 0.058312333670898, - "grad_norm": 1.0863970518112183, - "learning_rate": 1.999570951180581e-05, - "loss": 0.4659, - "step": 619 - }, - { - "epoch": 0.05840653776406585, - "grad_norm": 1.1128531694412231, - "learning_rate": 1.9995665169173313e-05, - "loss": 0.4469, - "step": 620 - }, - { - "epoch": 0.058500741857233696, - "grad_norm": 1.2424368858337402, - "learning_rate": 1.999562059862419e-05, - "loss": 0.4681, - "step": 621 - }, - { - "epoch": 0.058594945950401545, - "grad_norm": 0.9890236854553223, - "learning_rate": 1.9995575800159462e-05, - "loss": 0.3875, - "step": 622 - }, - { - "epoch": 0.058689150043569394, - "grad_norm": 1.1729191541671753, - "learning_rate": 1.999553077378015e-05, - "loss": 0.4408, - "step": 623 - }, - { - "epoch": 0.05878335413673724, - "grad_norm": 1.2006512880325317, - "learning_rate": 1.9995485519487276e-05, - "loss": 0.4509, - "step": 624 - }, - { - "epoch": 0.05887755822990509, - "grad_norm": 0.9693123698234558, - "learning_rate": 1.9995440037281872e-05, - "loss": 0.3543, - "step": 625 - }, - { - "epoch": 0.05897176232307294, - "grad_norm": 1.1058598756790161, - "learning_rate": 1.999539432716498e-05, - "loss": 0.4959, - "step": 626 - }, - { - "epoch": 0.05906596641624078, - "grad_norm": 1.1529923677444458, - "learning_rate": 1.9995348389137635e-05, - "loss": 0.4325, - "step": 627 - }, - { - "epoch": 0.05916017050940863, - "grad_norm": 1.079768180847168, - "learning_rate": 1.9995302223200893e-05, - "loss": 0.4196, - "step": 628 - }, - { - "epoch": 0.05925437460257648, - "grad_norm": 1.0184563398361206, - "learning_rate": 1.9995255829355798e-05, - "loss": 0.4032, - "step": 629 - }, - { - "epoch": 0.05934857869574433, - "grad_norm": 1.1753013134002686, - "learning_rate": 1.9995209207603414e-05, - "loss": 0.4522, - "step": 630 - }, - { - "epoch": 0.05944278278891218, - "grad_norm": 1.1616015434265137, - "learning_rate": 1.99951623579448e-05, - "loss": 0.4814, - "step": 631 - }, - { - "epoch": 0.05953698688208003, - "grad_norm": 1.0196958780288696, - "learning_rate": 1.999511528038103e-05, - "loss": 0.3906, - "step": 632 - }, - { - "epoch": 0.059631190975247876, - "grad_norm": 1.0535567998886108, - "learning_rate": 1.9995067974913175e-05, - "loss": 0.474, - "step": 633 - }, - { - "epoch": 0.059725395068415725, - "grad_norm": 1.2080250978469849, - "learning_rate": 1.999502044154231e-05, - "loss": 0.499, - "step": 634 - }, - { - "epoch": 0.059819599161583574, - "grad_norm": 1.0950868129730225, - "learning_rate": 1.9994972680269518e-05, - "loss": 0.4774, - "step": 635 - }, - { - "epoch": 0.059913803254751416, - "grad_norm": 0.9520348310470581, - "learning_rate": 1.9994924691095896e-05, - "loss": 0.4219, - "step": 636 - }, - { - "epoch": 0.060008007347919265, - "grad_norm": 1.1531012058258057, - "learning_rate": 1.9994876474022533e-05, - "loss": 0.5586, - "step": 637 - }, - { - "epoch": 0.060102211441087114, - "grad_norm": 1.3068886995315552, - "learning_rate": 1.9994828029050527e-05, - "loss": 0.5327, - "step": 638 - }, - { - "epoch": 0.06019641553425496, - "grad_norm": 0.9474340677261353, - "learning_rate": 1.9994779356180986e-05, - "loss": 0.3797, - "step": 639 - }, - { - "epoch": 0.06029061962742281, - "grad_norm": 1.1141210794448853, - "learning_rate": 1.999473045541502e-05, - "loss": 0.429, - "step": 640 - }, - { - "epoch": 0.06038482372059066, - "grad_norm": 0.9722087383270264, - "learning_rate": 1.999468132675374e-05, - "loss": 0.4279, - "step": 641 - }, - { - "epoch": 0.06047902781375851, - "grad_norm": 0.8949450254440308, - "learning_rate": 1.9994631970198268e-05, - "loss": 0.3797, - "step": 642 - }, - { - "epoch": 0.06057323190692636, - "grad_norm": 1.0000451803207397, - "learning_rate": 1.9994582385749735e-05, - "loss": 0.4243, - "step": 643 - }, - { - "epoch": 0.06066743600009421, - "grad_norm": 0.9662757515907288, - "learning_rate": 1.999453257340926e-05, - "loss": 0.4269, - "step": 644 - }, - { - "epoch": 0.06076164009326205, - "grad_norm": 1.0271151065826416, - "learning_rate": 1.999448253317799e-05, - "loss": 0.4215, - "step": 645 - }, - { - "epoch": 0.0608558441864299, - "grad_norm": 1.0444293022155762, - "learning_rate": 1.9994432265057064e-05, - "loss": 0.4495, - "step": 646 - }, - { - "epoch": 0.06095004827959775, - "grad_norm": 0.9824338555335999, - "learning_rate": 1.999438176904762e-05, - "loss": 0.442, - "step": 647 - }, - { - "epoch": 0.061044252372765596, - "grad_norm": 1.0066092014312744, - "learning_rate": 1.9994331045150814e-05, - "loss": 0.4283, - "step": 648 - }, - { - "epoch": 0.061138456465933445, - "grad_norm": 1.1279113292694092, - "learning_rate": 1.999428009336781e-05, - "loss": 0.5208, - "step": 649 - }, - { - "epoch": 0.061232660559101294, - "grad_norm": 1.0053372383117676, - "learning_rate": 1.999422891369976e-05, - "loss": 0.4212, - "step": 650 - }, - { - "epoch": 0.06132686465226914, - "grad_norm": 1.0733355283737183, - "learning_rate": 1.9994177506147834e-05, - "loss": 0.461, - "step": 651 - }, - { - "epoch": 0.06142106874543699, - "grad_norm": 1.1090284585952759, - "learning_rate": 1.9994125870713207e-05, - "loss": 0.4763, - "step": 652 - }, - { - "epoch": 0.061515272838604834, - "grad_norm": 1.0557955503463745, - "learning_rate": 1.999407400739705e-05, - "loss": 0.4282, - "step": 653 - }, - { - "epoch": 0.06160947693177268, - "grad_norm": 0.9294447898864746, - "learning_rate": 1.999402191620055e-05, - "loss": 0.4095, - "step": 654 - }, - { - "epoch": 0.06170368102494053, - "grad_norm": 1.004593014717102, - "learning_rate": 1.9993969597124896e-05, - "loss": 0.4279, - "step": 655 - }, - { - "epoch": 0.06179788511810838, - "grad_norm": 1.2182408571243286, - "learning_rate": 1.999391705017128e-05, - "loss": 0.4886, - "step": 656 - }, - { - "epoch": 0.06189208921127623, - "grad_norm": 1.0181046724319458, - "learning_rate": 1.99938642753409e-05, - "loss": 0.4573, - "step": 657 - }, - { - "epoch": 0.06198629330444408, - "grad_norm": 1.008346676826477, - "learning_rate": 1.9993811272634954e-05, - "loss": 0.4597, - "step": 658 - }, - { - "epoch": 0.06208049739761193, - "grad_norm": 1.1677072048187256, - "learning_rate": 1.999375804205466e-05, - "loss": 0.4695, - "step": 659 - }, - { - "epoch": 0.06217470149077978, - "grad_norm": 0.9776001572608948, - "learning_rate": 1.9993704583601224e-05, - "loss": 0.4482, - "step": 660 - }, - { - "epoch": 0.062268905583947626, - "grad_norm": 0.99480140209198, - "learning_rate": 1.999365089727587e-05, - "loss": 0.4439, - "step": 661 - }, - { - "epoch": 0.06236310967711547, - "grad_norm": 0.997078537940979, - "learning_rate": 1.999359698307982e-05, - "loss": 0.4439, - "step": 662 - }, - { - "epoch": 0.06245731377028332, - "grad_norm": 1.0121581554412842, - "learning_rate": 1.9993542841014303e-05, - "loss": 0.4432, - "step": 663 - }, - { - "epoch": 0.06255151786345117, - "grad_norm": 1.0929957628250122, - "learning_rate": 1.9993488471080553e-05, - "loss": 0.4819, - "step": 664 - }, - { - "epoch": 0.06264572195661902, - "grad_norm": 1.1690179109573364, - "learning_rate": 1.999343387327981e-05, - "loss": 0.4252, - "step": 665 - }, - { - "epoch": 0.06273992604978686, - "grad_norm": 0.9591436386108398, - "learning_rate": 1.999337904761332e-05, - "loss": 0.438, - "step": 666 - }, - { - "epoch": 0.0628341301429547, - "grad_norm": 0.9770983457565308, - "learning_rate": 1.9993323994082336e-05, - "loss": 0.469, - "step": 667 - }, - { - "epoch": 0.06292833423612255, - "grad_norm": 1.049344539642334, - "learning_rate": 1.9993268712688104e-05, - "loss": 0.4646, - "step": 668 - }, - { - "epoch": 0.0630225383292904, - "grad_norm": 1.0194511413574219, - "learning_rate": 1.9993213203431895e-05, - "loss": 0.4682, - "step": 669 - }, - { - "epoch": 0.06311674242245825, - "grad_norm": 0.9602471590042114, - "learning_rate": 1.999315746631497e-05, - "loss": 0.4507, - "step": 670 - }, - { - "epoch": 0.0632109465156261, - "grad_norm": 1.0650478601455688, - "learning_rate": 1.99931015013386e-05, - "loss": 0.5069, - "step": 671 - }, - { - "epoch": 0.06330515060879395, - "grad_norm": 0.924329936504364, - "learning_rate": 1.999304530850406e-05, - "loss": 0.4437, - "step": 672 - }, - { - "epoch": 0.0633993547019618, - "grad_norm": 1.0018926858901978, - "learning_rate": 1.999298888781263e-05, - "loss": 0.45, - "step": 673 - }, - { - "epoch": 0.06349355879512965, - "grad_norm": 0.9459987878799438, - "learning_rate": 1.9992932239265602e-05, - "loss": 0.4066, - "step": 674 - }, - { - "epoch": 0.0635877628882975, - "grad_norm": 0.9754473567008972, - "learning_rate": 1.9992875362864267e-05, - "loss": 0.4299, - "step": 675 - }, - { - "epoch": 0.06368196698146535, - "grad_norm": 1.0223201513290405, - "learning_rate": 1.9992818258609915e-05, - "loss": 0.4062, - "step": 676 - }, - { - "epoch": 0.0637761710746332, - "grad_norm": 0.9518370628356934, - "learning_rate": 1.9992760926503855e-05, - "loss": 0.4693, - "step": 677 - }, - { - "epoch": 0.06387037516780104, - "grad_norm": 1.127358078956604, - "learning_rate": 1.999270336654739e-05, - "loss": 0.4657, - "step": 678 - }, - { - "epoch": 0.06396457926096889, - "grad_norm": 0.9908960461616516, - "learning_rate": 1.9992645578741836e-05, - "loss": 0.4431, - "step": 679 - }, - { - "epoch": 0.06405878335413674, - "grad_norm": 0.9333703517913818, - "learning_rate": 1.999258756308851e-05, - "loss": 0.4195, - "step": 680 - }, - { - "epoch": 0.06415298744730459, - "grad_norm": 1.0626039505004883, - "learning_rate": 1.999252931958873e-05, - "loss": 0.5016, - "step": 681 - }, - { - "epoch": 0.06424719154047244, - "grad_norm": 1.1983187198638916, - "learning_rate": 1.999247084824383e-05, - "loss": 0.5111, - "step": 682 - }, - { - "epoch": 0.06434139563364027, - "grad_norm": 1.1836464405059814, - "learning_rate": 1.999241214905514e-05, - "loss": 0.4823, - "step": 683 - }, - { - "epoch": 0.06443559972680812, - "grad_norm": 1.056818962097168, - "learning_rate": 1.9992353222023998e-05, - "loss": 0.4278, - "step": 684 - }, - { - "epoch": 0.06452980381997597, - "grad_norm": 0.8685744404792786, - "learning_rate": 1.999229406715175e-05, - "loss": 0.4094, - "step": 685 - }, - { - "epoch": 0.06462400791314382, - "grad_norm": 1.0641740560531616, - "learning_rate": 1.9992234684439746e-05, - "loss": 0.4378, - "step": 686 - }, - { - "epoch": 0.06471821200631167, - "grad_norm": 0.9946682453155518, - "learning_rate": 1.999217507388934e-05, - "loss": 0.4648, - "step": 687 - }, - { - "epoch": 0.06481241609947952, - "grad_norm": 1.04335355758667, - "learning_rate": 1.9992115235501884e-05, - "loss": 0.4465, - "step": 688 - }, - { - "epoch": 0.06490662019264737, - "grad_norm": 0.8747390508651733, - "learning_rate": 1.9992055169278747e-05, - "loss": 0.4335, - "step": 689 - }, - { - "epoch": 0.06500082428581522, - "grad_norm": 0.9814894795417786, - "learning_rate": 1.9991994875221304e-05, - "loss": 0.4501, - "step": 690 - }, - { - "epoch": 0.06509502837898307, - "grad_norm": 1.0981602668762207, - "learning_rate": 1.9991934353330924e-05, - "loss": 0.5084, - "step": 691 - }, - { - "epoch": 0.06518923247215092, - "grad_norm": 0.9652166962623596, - "learning_rate": 1.9991873603608984e-05, - "loss": 0.4426, - "step": 692 - }, - { - "epoch": 0.06528343656531876, - "grad_norm": 0.9243811964988708, - "learning_rate": 1.9991812626056878e-05, - "loss": 0.4351, - "step": 693 - }, - { - "epoch": 0.06537764065848661, - "grad_norm": 1.0646644830703735, - "learning_rate": 1.999175142067599e-05, - "loss": 0.4961, - "step": 694 - }, - { - "epoch": 0.06547184475165446, - "grad_norm": 0.9408712387084961, - "learning_rate": 1.9991689987467714e-05, - "loss": 0.4422, - "step": 695 - }, - { - "epoch": 0.06556604884482231, - "grad_norm": 0.9736141562461853, - "learning_rate": 1.9991628326433457e-05, - "loss": 0.4093, - "step": 696 - }, - { - "epoch": 0.06566025293799016, - "grad_norm": 1.0736565589904785, - "learning_rate": 1.999156643757462e-05, - "loss": 0.4323, - "step": 697 - }, - { - "epoch": 0.06575445703115801, - "grad_norm": 1.196263313293457, - "learning_rate": 1.9991504320892616e-05, - "loss": 0.3956, - "step": 698 - }, - { - "epoch": 0.06584866112432586, - "grad_norm": 0.8757830858230591, - "learning_rate": 1.999144197638886e-05, - "loss": 0.388, - "step": 699 - }, - { - "epoch": 0.0659428652174937, - "grad_norm": 0.9341623187065125, - "learning_rate": 1.9991379404064778e-05, - "loss": 0.3855, - "step": 700 - }, - { - "epoch": 0.06603706931066154, - "grad_norm": 0.9324865341186523, - "learning_rate": 1.9991316603921793e-05, - "loss": 0.3909, - "step": 701 - }, - { - "epoch": 0.06613127340382939, - "grad_norm": 1.286978840827942, - "learning_rate": 1.9991253575961338e-05, - "loss": 0.4446, - "step": 702 - }, - { - "epoch": 0.06622547749699724, - "grad_norm": 1.165391445159912, - "learning_rate": 1.999119032018485e-05, - "loss": 0.4551, - "step": 703 - }, - { - "epoch": 0.06631968159016509, - "grad_norm": 1.060238003730774, - "learning_rate": 1.9991126836593768e-05, - "loss": 0.497, - "step": 704 - }, - { - "epoch": 0.06641388568333294, - "grad_norm": 1.016146183013916, - "learning_rate": 1.9991063125189546e-05, - "loss": 0.4318, - "step": 705 - }, - { - "epoch": 0.06650808977650079, - "grad_norm": 1.0532314777374268, - "learning_rate": 1.999099918597363e-05, - "loss": 0.4218, - "step": 706 - }, - { - "epoch": 0.06660229386966864, - "grad_norm": 1.1155513525009155, - "learning_rate": 1.9990935018947484e-05, - "loss": 0.4937, - "step": 707 - }, - { - "epoch": 0.06669649796283648, - "grad_norm": 1.0174504518508911, - "learning_rate": 1.9990870624112564e-05, - "loss": 0.4627, - "step": 708 - }, - { - "epoch": 0.06679070205600433, - "grad_norm": 0.975045919418335, - "learning_rate": 1.9990806001470346e-05, - "loss": 0.4542, - "step": 709 - }, - { - "epoch": 0.06688490614917218, - "grad_norm": 0.984332799911499, - "learning_rate": 1.9990741151022302e-05, - "loss": 0.429, - "step": 710 - }, - { - "epoch": 0.06697911024234003, - "grad_norm": 1.034583330154419, - "learning_rate": 1.9990676072769904e-05, - "loss": 0.4532, - "step": 711 - }, - { - "epoch": 0.06707331433550788, - "grad_norm": 1.0927097797393799, - "learning_rate": 1.9990610766714646e-05, - "loss": 0.4634, - "step": 712 - }, - { - "epoch": 0.06716751842867573, - "grad_norm": 1.004805088043213, - "learning_rate": 1.9990545232858008e-05, - "loss": 0.492, - "step": 713 - }, - { - "epoch": 0.06726172252184358, - "grad_norm": 1.055342674255371, - "learning_rate": 1.9990479471201488e-05, - "loss": 0.4621, - "step": 714 - }, - { - "epoch": 0.06735592661501143, - "grad_norm": 1.1024476289749146, - "learning_rate": 1.9990413481746587e-05, - "loss": 0.4413, - "step": 715 - }, - { - "epoch": 0.06745013070817928, - "grad_norm": 0.9659948945045471, - "learning_rate": 1.9990347264494806e-05, - "loss": 0.425, - "step": 716 - }, - { - "epoch": 0.06754433480134713, - "grad_norm": 0.9982300400733948, - "learning_rate": 1.9990280819447662e-05, - "loss": 0.4722, - "step": 717 - }, - { - "epoch": 0.06763853889451496, - "grad_norm": 1.2878872156143188, - "learning_rate": 1.999021414660666e-05, - "loss": 0.4372, - "step": 718 - }, - { - "epoch": 0.06773274298768281, - "grad_norm": 1.1230915784835815, - "learning_rate": 1.999014724597333e-05, - "loss": 0.5018, - "step": 719 - }, - { - "epoch": 0.06782694708085066, - "grad_norm": 1.0017739534378052, - "learning_rate": 1.9990080117549188e-05, - "loss": 0.4873, - "step": 720 - }, - { - "epoch": 0.06792115117401851, - "grad_norm": 1.1177877187728882, - "learning_rate": 1.999001276133577e-05, - "loss": 0.5517, - "step": 721 - }, - { - "epoch": 0.06801535526718636, - "grad_norm": 1.0998467206954956, - "learning_rate": 1.9989945177334614e-05, - "loss": 0.5142, - "step": 722 - }, - { - "epoch": 0.0681095593603542, - "grad_norm": 0.9907771944999695, - "learning_rate": 1.9989877365547253e-05, - "loss": 0.4245, - "step": 723 - }, - { - "epoch": 0.06820376345352205, - "grad_norm": 1.0315520763397217, - "learning_rate": 1.9989809325975244e-05, - "loss": 0.4571, - "step": 724 - }, - { - "epoch": 0.0682979675466899, - "grad_norm": 0.9746618270874023, - "learning_rate": 1.998974105862013e-05, - "loss": 0.4492, - "step": 725 - }, - { - "epoch": 0.06839217163985775, - "grad_norm": 0.9446602463722229, - "learning_rate": 1.998967256348347e-05, - "loss": 0.3837, - "step": 726 - }, - { - "epoch": 0.0684863757330256, - "grad_norm": 1.0078535079956055, - "learning_rate": 1.998960384056683e-05, - "loss": 0.39, - "step": 727 - }, - { - "epoch": 0.06858057982619345, - "grad_norm": 1.0134851932525635, - "learning_rate": 1.998953488987177e-05, - "loss": 0.4689, - "step": 728 - }, - { - "epoch": 0.0686747839193613, - "grad_norm": 1.0376334190368652, - "learning_rate": 1.9989465711399865e-05, - "loss": 0.4566, - "step": 729 - }, - { - "epoch": 0.06876898801252915, - "grad_norm": 1.0177689790725708, - "learning_rate": 1.9989396305152694e-05, - "loss": 0.4607, - "step": 730 - }, - { - "epoch": 0.068863192105697, - "grad_norm": 1.0617927312850952, - "learning_rate": 1.9989326671131837e-05, - "loss": 0.4368, - "step": 731 - }, - { - "epoch": 0.06895739619886485, - "grad_norm": 1.0075178146362305, - "learning_rate": 1.9989256809338885e-05, - "loss": 0.4401, - "step": 732 - }, - { - "epoch": 0.0690516002920327, - "grad_norm": 1.0294811725616455, - "learning_rate": 1.998918671977543e-05, - "loss": 0.4661, - "step": 733 - }, - { - "epoch": 0.06914580438520054, - "grad_norm": 1.0549850463867188, - "learning_rate": 1.9989116402443068e-05, - "loss": 0.4234, - "step": 734 - }, - { - "epoch": 0.06924000847836838, - "grad_norm": 1.123194694519043, - "learning_rate": 1.9989045857343403e-05, - "loss": 0.4594, - "step": 735 - }, - { - "epoch": 0.06933421257153623, - "grad_norm": 1.0410248041152954, - "learning_rate": 1.9988975084478044e-05, - "loss": 0.3936, - "step": 736 - }, - { - "epoch": 0.06942841666470408, - "grad_norm": 0.995914101600647, - "learning_rate": 1.9988904083848603e-05, - "loss": 0.4776, - "step": 737 - }, - { - "epoch": 0.06952262075787193, - "grad_norm": 1.1929165124893188, - "learning_rate": 1.9988832855456705e-05, - "loss": 0.4914, - "step": 738 - }, - { - "epoch": 0.06961682485103977, - "grad_norm": 1.121276617050171, - "learning_rate": 1.9988761399303966e-05, - "loss": 0.502, - "step": 739 - }, - { - "epoch": 0.06971102894420762, - "grad_norm": 0.9790893793106079, - "learning_rate": 1.998868971539202e-05, - "loss": 0.4299, - "step": 740 - }, - { - "epoch": 0.06980523303737547, - "grad_norm": 0.9773557782173157, - "learning_rate": 1.9988617803722503e-05, - "loss": 0.4416, - "step": 741 - }, - { - "epoch": 0.06989943713054332, - "grad_norm": 0.9014050364494324, - "learning_rate": 1.998854566429705e-05, - "loss": 0.4372, - "step": 742 - }, - { - "epoch": 0.06999364122371117, - "grad_norm": 1.0881351232528687, - "learning_rate": 1.998847329711731e-05, - "loss": 0.4556, - "step": 743 - }, - { - "epoch": 0.07008784531687902, - "grad_norm": 1.1250412464141846, - "learning_rate": 1.998840070218493e-05, - "loss": 0.46, - "step": 744 - }, - { - "epoch": 0.07018204941004687, - "grad_norm": 0.9465177059173584, - "learning_rate": 1.9988327879501567e-05, - "loss": 0.4083, - "step": 745 - }, - { - "epoch": 0.07027625350321472, - "grad_norm": 0.93609219789505, - "learning_rate": 1.998825482906888e-05, - "loss": 0.4355, - "step": 746 - }, - { - "epoch": 0.07037045759638257, - "grad_norm": 1.0855962038040161, - "learning_rate": 1.998818155088854e-05, - "loss": 0.4833, - "step": 747 - }, - { - "epoch": 0.07046466168955041, - "grad_norm": 1.063692569732666, - "learning_rate": 1.9988108044962207e-05, - "loss": 0.4584, - "step": 748 - }, - { - "epoch": 0.07055886578271826, - "grad_norm": 1.0598742961883545, - "learning_rate": 1.998803431129157e-05, - "loss": 0.4934, - "step": 749 - }, - { - "epoch": 0.07065306987588611, - "grad_norm": 0.9979044795036316, - "learning_rate": 1.9987960349878302e-05, - "loss": 0.4834, - "step": 750 - }, - { - "epoch": 0.07074727396905396, - "grad_norm": 0.9993002414703369, - "learning_rate": 1.998788616072409e-05, - "loss": 0.3989, - "step": 751 - }, - { - "epoch": 0.07084147806222181, - "grad_norm": 1.0453739166259766, - "learning_rate": 1.9987811743830624e-05, - "loss": 0.4515, - "step": 752 - }, - { - "epoch": 0.07093568215538965, - "grad_norm": 1.0170127153396606, - "learning_rate": 1.998773709919961e-05, - "loss": 0.4276, - "step": 753 - }, - { - "epoch": 0.0710298862485575, - "grad_norm": 0.9998757243156433, - "learning_rate": 1.998766222683274e-05, - "loss": 0.4545, - "step": 754 - }, - { - "epoch": 0.07112409034172534, - "grad_norm": 1.049187183380127, - "learning_rate": 1.9987587126731727e-05, - "loss": 0.4716, - "step": 755 - }, - { - "epoch": 0.07121829443489319, - "grad_norm": 1.0449811220169067, - "learning_rate": 1.998751179889828e-05, - "loss": 0.4916, - "step": 756 - }, - { - "epoch": 0.07131249852806104, - "grad_norm": 0.9422582983970642, - "learning_rate": 1.998743624333412e-05, - "loss": 0.378, - "step": 757 - }, - { - "epoch": 0.07140670262122889, - "grad_norm": 0.9875731468200684, - "learning_rate": 1.9987360460040963e-05, - "loss": 0.3882, - "step": 758 - }, - { - "epoch": 0.07150090671439674, - "grad_norm": 1.00863778591156, - "learning_rate": 1.9987284449020548e-05, - "loss": 0.4507, - "step": 759 - }, - { - "epoch": 0.07159511080756459, - "grad_norm": 1.0163146257400513, - "learning_rate": 1.9987208210274597e-05, - "loss": 0.433, - "step": 760 - }, - { - "epoch": 0.07168931490073244, - "grad_norm": 0.9799811244010925, - "learning_rate": 1.9987131743804858e-05, - "loss": 0.452, - "step": 761 - }, - { - "epoch": 0.07178351899390029, - "grad_norm": 1.0506083965301514, - "learning_rate": 1.9987055049613065e-05, - "loss": 0.4551, - "step": 762 - }, - { - "epoch": 0.07187772308706813, - "grad_norm": 1.1049585342407227, - "learning_rate": 1.9986978127700974e-05, - "loss": 0.4713, - "step": 763 - }, - { - "epoch": 0.07197192718023598, - "grad_norm": 0.8913968801498413, - "learning_rate": 1.9986900978070337e-05, - "loss": 0.3691, - "step": 764 - }, - { - "epoch": 0.07206613127340383, - "grad_norm": 1.0600327253341675, - "learning_rate": 1.998682360072291e-05, - "loss": 0.5035, - "step": 765 - }, - { - "epoch": 0.07216033536657168, - "grad_norm": 0.9857485294342041, - "learning_rate": 1.9986745995660463e-05, - "loss": 0.4404, - "step": 766 - }, - { - "epoch": 0.07225453945973953, - "grad_norm": 1.1809180974960327, - "learning_rate": 1.9986668162884763e-05, - "loss": 0.4649, - "step": 767 - }, - { - "epoch": 0.07234874355290738, - "grad_norm": 0.9589232206344604, - "learning_rate": 1.998659010239758e-05, - "loss": 0.4272, - "step": 768 - }, - { - "epoch": 0.07244294764607523, - "grad_norm": 0.9995101690292358, - "learning_rate": 1.99865118142007e-05, - "loss": 0.4669, - "step": 769 - }, - { - "epoch": 0.07253715173924306, - "grad_norm": 0.9336907863616943, - "learning_rate": 1.998643329829591e-05, - "loss": 0.433, - "step": 770 - }, - { - "epoch": 0.07263135583241091, - "grad_norm": 1.0039317607879639, - "learning_rate": 1.9986354554684994e-05, - "loss": 0.4497, - "step": 771 - }, - { - "epoch": 0.07272555992557876, - "grad_norm": 1.0792837142944336, - "learning_rate": 1.9986275583369745e-05, - "loss": 0.4448, - "step": 772 - }, - { - "epoch": 0.07281976401874661, - "grad_norm": 0.9362404346466064, - "learning_rate": 1.9986196384351975e-05, - "loss": 0.3992, - "step": 773 - }, - { - "epoch": 0.07291396811191446, - "grad_norm": 1.0516244173049927, - "learning_rate": 1.998611695763348e-05, - "loss": 0.4449, - "step": 774 - }, - { - "epoch": 0.07300817220508231, - "grad_norm": 1.085710883140564, - "learning_rate": 1.9986037303216076e-05, - "loss": 0.4198, - "step": 775 - }, - { - "epoch": 0.07310237629825016, - "grad_norm": 1.0311400890350342, - "learning_rate": 1.998595742110158e-05, - "loss": 0.431, - "step": 776 - }, - { - "epoch": 0.073196580391418, - "grad_norm": 1.0214941501617432, - "learning_rate": 1.998587731129181e-05, - "loss": 0.402, - "step": 777 - }, - { - "epoch": 0.07329078448458586, - "grad_norm": 1.0891741514205933, - "learning_rate": 1.9985796973788592e-05, - "loss": 0.4059, - "step": 778 - }, - { - "epoch": 0.0733849885777537, - "grad_norm": 1.0049872398376465, - "learning_rate": 1.998571640859376e-05, - "loss": 0.401, - "step": 779 - }, - { - "epoch": 0.07347919267092155, - "grad_norm": 1.0176184177398682, - "learning_rate": 1.998563561570915e-05, - "loss": 0.3997, - "step": 780 - }, - { - "epoch": 0.0735733967640894, - "grad_norm": 1.0183604955673218, - "learning_rate": 1.9985554595136606e-05, - "loss": 0.4529, - "step": 781 - }, - { - "epoch": 0.07366760085725725, - "grad_norm": 1.0522005558013916, - "learning_rate": 1.9985473346877976e-05, - "loss": 0.452, - "step": 782 - }, - { - "epoch": 0.0737618049504251, - "grad_norm": 0.9236129522323608, - "learning_rate": 1.9985391870935108e-05, - "loss": 0.4518, - "step": 783 - }, - { - "epoch": 0.07385600904359295, - "grad_norm": 1.044931411743164, - "learning_rate": 1.9985310167309865e-05, - "loss": 0.4967, - "step": 784 - }, - { - "epoch": 0.0739502131367608, - "grad_norm": 1.0424778461456299, - "learning_rate": 1.9985228236004107e-05, - "loss": 0.4418, - "step": 785 - }, - { - "epoch": 0.07404441722992865, - "grad_norm": 1.0401757955551147, - "learning_rate": 1.9985146077019698e-05, - "loss": 0.4709, - "step": 786 - }, - { - "epoch": 0.07413862132309648, - "grad_norm": 0.90964674949646, - "learning_rate": 1.998506369035852e-05, - "loss": 0.3972, - "step": 787 - }, - { - "epoch": 0.07423282541626433, - "grad_norm": 0.9477034211158752, - "learning_rate": 1.998498107602245e-05, - "loss": 0.3349, - "step": 788 - }, - { - "epoch": 0.07432702950943218, - "grad_norm": 1.0175570249557495, - "learning_rate": 1.9984898234013367e-05, - "loss": 0.4212, - "step": 789 - }, - { - "epoch": 0.07442123360260003, - "grad_norm": 0.9965648651123047, - "learning_rate": 1.9984815164333163e-05, - "loss": 0.4262, - "step": 790 - }, - { - "epoch": 0.07451543769576788, - "grad_norm": 1.0520919561386108, - "learning_rate": 1.998473186698373e-05, - "loss": 0.4884, - "step": 791 - }, - { - "epoch": 0.07460964178893573, - "grad_norm": 0.9975264072418213, - "learning_rate": 1.9984648341966974e-05, - "loss": 0.4992, - "step": 792 - }, - { - "epoch": 0.07470384588210358, - "grad_norm": 1.0246936082839966, - "learning_rate": 1.998456458928479e-05, - "loss": 0.4453, - "step": 793 - }, - { - "epoch": 0.07479804997527142, - "grad_norm": 1.0155010223388672, - "learning_rate": 1.998448060893909e-05, - "loss": 0.4551, - "step": 794 - }, - { - "epoch": 0.07489225406843927, - "grad_norm": 0.9694147109985352, - "learning_rate": 1.9984396400931794e-05, - "loss": 0.4338, - "step": 795 - }, - { - "epoch": 0.07498645816160712, - "grad_norm": 1.0100959539413452, - "learning_rate": 1.9984311965264816e-05, - "loss": 0.4345, - "step": 796 - }, - { - "epoch": 0.07508066225477497, - "grad_norm": 1.0546627044677734, - "learning_rate": 1.9984227301940088e-05, - "loss": 0.5144, - "step": 797 - }, - { - "epoch": 0.07517486634794282, - "grad_norm": 1.2081952095031738, - "learning_rate": 1.9984142410959534e-05, - "loss": 0.3909, - "step": 798 - }, - { - "epoch": 0.07526907044111067, - "grad_norm": 1.051641583442688, - "learning_rate": 1.9984057292325093e-05, - "loss": 0.4785, - "step": 799 - }, - { - "epoch": 0.07536327453427852, - "grad_norm": 1.1095900535583496, - "learning_rate": 1.9983971946038703e-05, - "loss": 0.4382, - "step": 800 - }, - { - "epoch": 0.07545747862744637, - "grad_norm": 1.0389223098754883, - "learning_rate": 1.9983886372102314e-05, - "loss": 0.4288, - "step": 801 - }, - { - "epoch": 0.07555168272061422, - "grad_norm": 0.965965747833252, - "learning_rate": 1.9983800570517876e-05, - "loss": 0.417, - "step": 802 - }, - { - "epoch": 0.07564588681378207, - "grad_norm": 0.9707470536231995, - "learning_rate": 1.9983714541287343e-05, - "loss": 0.3798, - "step": 803 - }, - { - "epoch": 0.0757400909069499, - "grad_norm": 0.9705069065093994, - "learning_rate": 1.998362828441268e-05, - "loss": 0.4492, - "step": 804 - }, - { - "epoch": 0.07583429500011775, - "grad_norm": 1.0146031379699707, - "learning_rate": 1.998354179989585e-05, - "loss": 0.4154, - "step": 805 - }, - { - "epoch": 0.0759284990932856, - "grad_norm": 1.011963963508606, - "learning_rate": 1.9983455087738833e-05, - "loss": 0.4724, - "step": 806 - }, - { - "epoch": 0.07602270318645345, - "grad_norm": 0.9977352023124695, - "learning_rate": 1.9983368147943593e-05, - "loss": 0.4739, - "step": 807 - }, - { - "epoch": 0.0761169072796213, - "grad_norm": 0.9591209888458252, - "learning_rate": 1.9983280980512127e-05, - "loss": 0.397, - "step": 808 - }, - { - "epoch": 0.07621111137278914, - "grad_norm": 1.0379873514175415, - "learning_rate": 1.9983193585446408e-05, - "loss": 0.4637, - "step": 809 - }, - { - "epoch": 0.076305315465957, - "grad_norm": 0.9633691906929016, - "learning_rate": 1.9983105962748438e-05, - "loss": 0.3821, - "step": 810 - }, - { - "epoch": 0.07639951955912484, - "grad_norm": 1.0823371410369873, - "learning_rate": 1.9983018112420213e-05, - "loss": 0.4669, - "step": 811 - }, - { - "epoch": 0.07649372365229269, - "grad_norm": 1.0636144876480103, - "learning_rate": 1.9982930034463738e-05, - "loss": 0.4933, - "step": 812 - }, - { - "epoch": 0.07658792774546054, - "grad_norm": 1.081931233406067, - "learning_rate": 1.9982841728881016e-05, - "loss": 0.4548, - "step": 813 - }, - { - "epoch": 0.07668213183862839, - "grad_norm": 1.0330334901809692, - "learning_rate": 1.998275319567407e-05, - "loss": 0.4312, - "step": 814 - }, - { - "epoch": 0.07677633593179624, - "grad_norm": 1.0809640884399414, - "learning_rate": 1.9982664434844908e-05, - "loss": 0.4582, - "step": 815 - }, - { - "epoch": 0.07687054002496409, - "grad_norm": 0.9635642766952515, - "learning_rate": 1.9982575446395554e-05, - "loss": 0.4516, - "step": 816 - }, - { - "epoch": 0.07696474411813194, - "grad_norm": 0.9754523038864136, - "learning_rate": 1.9982486230328047e-05, - "loss": 0.4632, - "step": 817 - }, - { - "epoch": 0.07705894821129979, - "grad_norm": 1.0695689916610718, - "learning_rate": 1.9982396786644417e-05, - "loss": 0.4215, - "step": 818 - }, - { - "epoch": 0.07715315230446763, - "grad_norm": 0.9689046740531921, - "learning_rate": 1.99823071153467e-05, - "loss": 0.4484, - "step": 819 - }, - { - "epoch": 0.07724735639763548, - "grad_norm": 1.0402729511260986, - "learning_rate": 1.998221721643694e-05, - "loss": 0.5115, - "step": 820 - }, - { - "epoch": 0.07734156049080333, - "grad_norm": 1.0786657333374023, - "learning_rate": 1.9982127089917196e-05, - "loss": 0.5123, - "step": 821 - }, - { - "epoch": 0.07743576458397117, - "grad_norm": 0.9543944001197815, - "learning_rate": 1.9982036735789513e-05, - "loss": 0.4345, - "step": 822 - }, - { - "epoch": 0.07752996867713902, - "grad_norm": 0.9859232902526855, - "learning_rate": 1.9981946154055955e-05, - "loss": 0.4569, - "step": 823 - }, - { - "epoch": 0.07762417277030687, - "grad_norm": 0.9364791512489319, - "learning_rate": 1.9981855344718587e-05, - "loss": 0.4085, - "step": 824 - }, - { - "epoch": 0.07771837686347471, - "grad_norm": 0.960197925567627, - "learning_rate": 1.998176430777948e-05, - "loss": 0.3889, - "step": 825 - }, - { - "epoch": 0.07781258095664256, - "grad_norm": 0.8794482350349426, - "learning_rate": 1.9981673043240712e-05, - "loss": 0.3551, - "step": 826 - }, - { - "epoch": 0.07790678504981041, - "grad_norm": 1.0110254287719727, - "learning_rate": 1.998158155110436e-05, - "loss": 0.4543, - "step": 827 - }, - { - "epoch": 0.07800098914297826, - "grad_norm": 1.0137994289398193, - "learning_rate": 1.9981489831372512e-05, - "loss": 0.4326, - "step": 828 - }, - { - "epoch": 0.07809519323614611, - "grad_norm": 0.9920204281806946, - "learning_rate": 1.9981397884047257e-05, - "loss": 0.4465, - "step": 829 - }, - { - "epoch": 0.07818939732931396, - "grad_norm": 1.0545607805252075, - "learning_rate": 1.9981305709130696e-05, - "loss": 0.4999, - "step": 830 - }, - { - "epoch": 0.07828360142248181, - "grad_norm": 1.0289400815963745, - "learning_rate": 1.998121330662493e-05, - "loss": 0.4756, - "step": 831 - }, - { - "epoch": 0.07837780551564966, - "grad_norm": 1.1255810260772705, - "learning_rate": 1.998112067653206e-05, - "loss": 0.5224, - "step": 832 - }, - { - "epoch": 0.0784720096088175, - "grad_norm": 0.9349611401557922, - "learning_rate": 1.9981027818854205e-05, - "loss": 0.4198, - "step": 833 - }, - { - "epoch": 0.07856621370198535, - "grad_norm": 1.0145128965377808, - "learning_rate": 1.998093473359348e-05, - "loss": 0.404, - "step": 834 - }, - { - "epoch": 0.0786604177951532, - "grad_norm": 1.003151297569275, - "learning_rate": 1.9980841420752008e-05, - "loss": 0.4209, - "step": 835 - }, - { - "epoch": 0.07875462188832105, - "grad_norm": 1.01313316822052, - "learning_rate": 1.998074788033191e-05, - "loss": 0.4449, - "step": 836 - }, - { - "epoch": 0.0788488259814889, - "grad_norm": 1.096969723701477, - "learning_rate": 1.9980654112335333e-05, - "loss": 0.5094, - "step": 837 - }, - { - "epoch": 0.07894303007465675, - "grad_norm": 1.038501501083374, - "learning_rate": 1.9980560116764404e-05, - "loss": 0.4482, - "step": 838 - }, - { - "epoch": 0.07903723416782459, - "grad_norm": 0.9726386070251465, - "learning_rate": 1.9980465893621268e-05, - "loss": 0.407, - "step": 839 - }, - { - "epoch": 0.07913143826099243, - "grad_norm": 0.9721311330795288, - "learning_rate": 1.9980371442908077e-05, - "loss": 0.4037, - "step": 840 - }, - { - "epoch": 0.07922564235416028, - "grad_norm": 1.0406038761138916, - "learning_rate": 1.998027676462698e-05, - "loss": 0.4435, - "step": 841 - }, - { - "epoch": 0.07931984644732813, - "grad_norm": 0.9423885941505432, - "learning_rate": 1.9980181858780136e-05, - "loss": 0.4365, - "step": 842 - }, - { - "epoch": 0.07941405054049598, - "grad_norm": 1.131834864616394, - "learning_rate": 1.9980086725369712e-05, - "loss": 0.471, - "step": 843 - }, - { - "epoch": 0.07950825463366383, - "grad_norm": 0.9440787434577942, - "learning_rate": 1.997999136439788e-05, - "loss": 0.4208, - "step": 844 - }, - { - "epoch": 0.07960245872683168, - "grad_norm": 1.0020167827606201, - "learning_rate": 1.997989577586681e-05, - "loss": 0.4368, - "step": 845 - }, - { - "epoch": 0.07969666281999953, - "grad_norm": 0.9412116408348083, - "learning_rate": 1.997979995977868e-05, - "loss": 0.4033, - "step": 846 - }, - { - "epoch": 0.07979086691316738, - "grad_norm": 0.9708133339881897, - "learning_rate": 1.9979703916135677e-05, - "loss": 0.4364, - "step": 847 - }, - { - "epoch": 0.07988507100633523, - "grad_norm": 0.9239591360092163, - "learning_rate": 1.997960764493999e-05, - "loss": 0.4404, - "step": 848 - }, - { - "epoch": 0.07997927509950307, - "grad_norm": 1.0096538066864014, - "learning_rate": 1.997951114619381e-05, - "loss": 0.4062, - "step": 849 - }, - { - "epoch": 0.08007347919267092, - "grad_norm": 0.858192503452301, - "learning_rate": 1.997941441989935e-05, - "loss": 0.3975, - "step": 850 - }, - { - "epoch": 0.08016768328583877, - "grad_norm": 1.05099356174469, - "learning_rate": 1.997931746605881e-05, - "loss": 0.4651, - "step": 851 - }, - { - "epoch": 0.08026188737900662, - "grad_norm": 1.252518653869629, - "learning_rate": 1.9979220284674392e-05, - "loss": 0.4671, - "step": 852 - }, - { - "epoch": 0.08035609147217447, - "grad_norm": 0.9368758201599121, - "learning_rate": 1.997912287574832e-05, - "loss": 0.4046, - "step": 853 - }, - { - "epoch": 0.08045029556534232, - "grad_norm": 0.9597492814064026, - "learning_rate": 1.997902523928281e-05, - "loss": 0.4595, - "step": 854 - }, - { - "epoch": 0.08054449965851017, - "grad_norm": 0.9732673168182373, - "learning_rate": 1.99789273752801e-05, - "loss": 0.4022, - "step": 855 - }, - { - "epoch": 0.080638703751678, - "grad_norm": 1.036406397819519, - "learning_rate": 1.9978829283742405e-05, - "loss": 0.4246, - "step": 856 - }, - { - "epoch": 0.08073290784484585, - "grad_norm": 0.9169813990592957, - "learning_rate": 1.9978730964671977e-05, - "loss": 0.4408, - "step": 857 - }, - { - "epoch": 0.0808271119380137, - "grad_norm": 1.088754415512085, - "learning_rate": 1.9978632418071044e-05, - "loss": 0.4319, - "step": 858 - }, - { - "epoch": 0.08092131603118155, - "grad_norm": 0.9790443778038025, - "learning_rate": 1.9978533643941865e-05, - "loss": 0.4223, - "step": 859 - }, - { - "epoch": 0.0810155201243494, - "grad_norm": 0.9643515944480896, - "learning_rate": 1.9978434642286684e-05, - "loss": 0.4287, - "step": 860 - }, - { - "epoch": 0.08110972421751725, - "grad_norm": 0.8812493681907654, - "learning_rate": 1.9978335413107764e-05, - "loss": 0.3915, - "step": 861 - }, - { - "epoch": 0.0812039283106851, - "grad_norm": 0.9891970753669739, - "learning_rate": 1.9978235956407358e-05, - "loss": 0.4435, - "step": 862 - }, - { - "epoch": 0.08129813240385295, - "grad_norm": 0.9829233884811401, - "learning_rate": 1.9978136272187745e-05, - "loss": 0.4052, - "step": 863 - }, - { - "epoch": 0.0813923364970208, - "grad_norm": 0.9916782975196838, - "learning_rate": 1.9978036360451197e-05, - "loss": 0.4325, - "step": 864 - }, - { - "epoch": 0.08148654059018864, - "grad_norm": 1.0053255558013916, - "learning_rate": 1.9977936221199983e-05, - "loss": 0.4686, - "step": 865 - }, - { - "epoch": 0.0815807446833565, - "grad_norm": 0.9676094651222229, - "learning_rate": 1.9977835854436398e-05, - "loss": 0.4224, - "step": 866 - }, - { - "epoch": 0.08167494877652434, - "grad_norm": 1.0051491260528564, - "learning_rate": 1.997773526016272e-05, - "loss": 0.4731, - "step": 867 - }, - { - "epoch": 0.08176915286969219, - "grad_norm": 0.9441819787025452, - "learning_rate": 1.9977634438381248e-05, - "loss": 0.4386, - "step": 868 - }, - { - "epoch": 0.08186335696286004, - "grad_norm": 1.0373190641403198, - "learning_rate": 1.9977533389094278e-05, - "loss": 0.4276, - "step": 869 - }, - { - "epoch": 0.08195756105602789, - "grad_norm": 0.9964975118637085, - "learning_rate": 1.9977432112304118e-05, - "loss": 0.4686, - "step": 870 - }, - { - "epoch": 0.08205176514919574, - "grad_norm": 0.9844152331352234, - "learning_rate": 1.9977330608013075e-05, - "loss": 0.4184, - "step": 871 - }, - { - "epoch": 0.08214596924236359, - "grad_norm": 0.9310718178749084, - "learning_rate": 1.9977228876223463e-05, - "loss": 0.382, - "step": 872 - }, - { - "epoch": 0.08224017333553144, - "grad_norm": 0.8855440020561218, - "learning_rate": 1.9977126916937607e-05, - "loss": 0.387, - "step": 873 - }, - { - "epoch": 0.08233437742869927, - "grad_norm": 0.9412824511528015, - "learning_rate": 1.9977024730157824e-05, - "loss": 0.4016, - "step": 874 - }, - { - "epoch": 0.08242858152186712, - "grad_norm": 1.117919683456421, - "learning_rate": 1.9976922315886445e-05, - "loss": 0.4676, - "step": 875 - }, - { - "epoch": 0.08252278561503497, - "grad_norm": 0.9602299332618713, - "learning_rate": 1.9976819674125815e-05, - "loss": 0.4452, - "step": 876 - }, - { - "epoch": 0.08261698970820282, - "grad_norm": 0.8984887003898621, - "learning_rate": 1.997671680487826e-05, - "loss": 0.4055, - "step": 877 - }, - { - "epoch": 0.08271119380137067, - "grad_norm": 1.0107239484786987, - "learning_rate": 1.9976613708146134e-05, - "loss": 0.4739, - "step": 878 - }, - { - "epoch": 0.08280539789453852, - "grad_norm": 0.9731579422950745, - "learning_rate": 1.997651038393179e-05, - "loss": 0.375, - "step": 879 - }, - { - "epoch": 0.08289960198770636, - "grad_norm": 1.032165765762329, - "learning_rate": 1.9976406832237576e-05, - "loss": 0.4491, - "step": 880 - }, - { - "epoch": 0.08299380608087421, - "grad_norm": 0.9235690236091614, - "learning_rate": 1.997630305306586e-05, - "loss": 0.4361, - "step": 881 - }, - { - "epoch": 0.08308801017404206, - "grad_norm": 0.9779563546180725, - "learning_rate": 1.9976199046419006e-05, - "loss": 0.5025, - "step": 882 - }, - { - "epoch": 0.08318221426720991, - "grad_norm": 0.8626536130905151, - "learning_rate": 1.997609481229938e-05, - "loss": 0.386, - "step": 883 - }, - { - "epoch": 0.08327641836037776, - "grad_norm": 1.018281102180481, - "learning_rate": 1.997599035070937e-05, - "loss": 0.4295, - "step": 884 - }, - { - "epoch": 0.08337062245354561, - "grad_norm": 1.0163942575454712, - "learning_rate": 1.997588566165135e-05, - "loss": 0.4134, - "step": 885 - }, - { - "epoch": 0.08346482654671346, - "grad_norm": 0.8940512537956238, - "learning_rate": 1.9975780745127706e-05, - "loss": 0.4126, - "step": 886 - }, - { - "epoch": 0.08355903063988131, - "grad_norm": 0.978265106678009, - "learning_rate": 1.997567560114084e-05, - "loss": 0.468, - "step": 887 - }, - { - "epoch": 0.08365323473304916, - "grad_norm": 1.018894076347351, - "learning_rate": 1.9975570229693137e-05, - "loss": 0.443, - "step": 888 - }, - { - "epoch": 0.083747438826217, - "grad_norm": 0.9922093152999878, - "learning_rate": 1.9975464630787008e-05, - "loss": 0.4468, - "step": 889 - }, - { - "epoch": 0.08384164291938485, - "grad_norm": 0.8547562956809998, - "learning_rate": 1.9975358804424853e-05, - "loss": 0.3887, - "step": 890 - }, - { - "epoch": 0.08393584701255269, - "grad_norm": 0.9433829188346863, - "learning_rate": 1.9975252750609095e-05, - "loss": 0.4038, - "step": 891 - }, - { - "epoch": 0.08403005110572054, - "grad_norm": 0.9846193194389343, - "learning_rate": 1.9975146469342146e-05, - "loss": 0.4322, - "step": 892 - }, - { - "epoch": 0.08412425519888839, - "grad_norm": 0.9687615633010864, - "learning_rate": 1.9975039960626433e-05, - "loss": 0.4211, - "step": 893 - }, - { - "epoch": 0.08421845929205624, - "grad_norm": 1.0523276329040527, - "learning_rate": 1.9974933224464376e-05, - "loss": 0.396, - "step": 894 - }, - { - "epoch": 0.08431266338522408, - "grad_norm": 0.9216560125350952, - "learning_rate": 1.997482626085842e-05, - "loss": 0.4138, - "step": 895 - }, - { - "epoch": 0.08440686747839193, - "grad_norm": 1.0367227792739868, - "learning_rate": 1.9974719069810998e-05, - "loss": 0.485, - "step": 896 - }, - { - "epoch": 0.08450107157155978, - "grad_norm": 0.9510256052017212, - "learning_rate": 1.9974611651324555e-05, - "loss": 0.3629, - "step": 897 - }, - { - "epoch": 0.08459527566472763, - "grad_norm": 0.9600812196731567, - "learning_rate": 1.997450400540154e-05, - "loss": 0.4321, - "step": 898 - }, - { - "epoch": 0.08468947975789548, - "grad_norm": 0.9327564239501953, - "learning_rate": 1.9974396132044405e-05, - "loss": 0.3761, - "step": 899 - }, - { - "epoch": 0.08478368385106333, - "grad_norm": 1.0322734117507935, - "learning_rate": 1.997428803125562e-05, - "loss": 0.4314, - "step": 900 - }, - { - "epoch": 0.08487788794423118, - "grad_norm": 0.9740006923675537, - "learning_rate": 1.9974179703037636e-05, - "loss": 0.4769, - "step": 901 - }, - { - "epoch": 0.08497209203739903, - "grad_norm": 0.9764218926429749, - "learning_rate": 1.997407114739293e-05, - "loss": 0.4944, - "step": 902 - }, - { - "epoch": 0.08506629613056688, - "grad_norm": 0.9930822849273682, - "learning_rate": 1.997396236432398e-05, - "loss": 0.428, - "step": 903 - }, - { - "epoch": 0.08516050022373473, - "grad_norm": 0.9698862433433533, - "learning_rate": 1.9973853353833262e-05, - "loss": 0.4313, - "step": 904 - }, - { - "epoch": 0.08525470431690257, - "grad_norm": 0.9097919464111328, - "learning_rate": 1.997374411592326e-05, - "loss": 0.3919, - "step": 905 - }, - { - "epoch": 0.08534890841007042, - "grad_norm": 0.9172612428665161, - "learning_rate": 1.997363465059647e-05, - "loss": 0.4214, - "step": 906 - }, - { - "epoch": 0.08544311250323827, - "grad_norm": 0.9216505885124207, - "learning_rate": 1.9973524957855384e-05, - "loss": 0.4232, - "step": 907 - }, - { - "epoch": 0.08553731659640611, - "grad_norm": 1.072882056236267, - "learning_rate": 1.9973415037702502e-05, - "loss": 0.4435, - "step": 908 - }, - { - "epoch": 0.08563152068957396, - "grad_norm": 0.9569485783576965, - "learning_rate": 1.9973304890140336e-05, - "loss": 0.435, - "step": 909 - }, - { - "epoch": 0.0857257247827418, - "grad_norm": 0.9770811796188354, - "learning_rate": 1.9973194515171396e-05, - "loss": 0.4541, - "step": 910 - }, - { - "epoch": 0.08581992887590965, - "grad_norm": 0.9245586395263672, - "learning_rate": 1.99730839127982e-05, - "loss": 0.4335, - "step": 911 - }, - { - "epoch": 0.0859141329690775, - "grad_norm": 0.8607360124588013, - "learning_rate": 1.997297308302326e-05, - "loss": 0.4122, - "step": 912 - }, - { - "epoch": 0.08600833706224535, - "grad_norm": 1.036927342414856, - "learning_rate": 1.997286202584911e-05, - "loss": 0.404, - "step": 913 - }, - { - "epoch": 0.0861025411554132, - "grad_norm": 0.9474027752876282, - "learning_rate": 1.9972750741278285e-05, - "loss": 0.4708, - "step": 914 - }, - { - "epoch": 0.08619674524858105, - "grad_norm": 1.0663697719573975, - "learning_rate": 1.9972639229313322e-05, - "loss": 0.4629, - "step": 915 - }, - { - "epoch": 0.0862909493417489, - "grad_norm": 0.9513779878616333, - "learning_rate": 1.9972527489956762e-05, - "loss": 0.4099, - "step": 916 - }, - { - "epoch": 0.08638515343491675, - "grad_norm": 1.0742497444152832, - "learning_rate": 1.997241552321115e-05, - "loss": 0.397, - "step": 917 - }, - { - "epoch": 0.0864793575280846, - "grad_norm": 1.0032830238342285, - "learning_rate": 1.9972303329079042e-05, - "loss": 0.4249, - "step": 918 - }, - { - "epoch": 0.08657356162125245, - "grad_norm": 1.023820400238037, - "learning_rate": 1.9972190907562993e-05, - "loss": 0.446, - "step": 919 - }, - { - "epoch": 0.0866677657144203, - "grad_norm": 0.9125425815582275, - "learning_rate": 1.9972078258665574e-05, - "loss": 0.4342, - "step": 920 - }, - { - "epoch": 0.08676196980758814, - "grad_norm": 0.8609833717346191, - "learning_rate": 1.9971965382389347e-05, - "loss": 0.3637, - "step": 921 - }, - { - "epoch": 0.08685617390075599, - "grad_norm": 0.9849636554718018, - "learning_rate": 1.9971852278736886e-05, - "loss": 0.4722, - "step": 922 - }, - { - "epoch": 0.08695037799392384, - "grad_norm": 0.9862493872642517, - "learning_rate": 1.997173894771077e-05, - "loss": 0.3878, - "step": 923 - }, - { - "epoch": 0.08704458208709169, - "grad_norm": 1.003157138824463, - "learning_rate": 1.9971625389313587e-05, - "loss": 0.4448, - "step": 924 - }, - { - "epoch": 0.08713878618025953, - "grad_norm": 0.9130213856697083, - "learning_rate": 1.9971511603547923e-05, - "loss": 0.4192, - "step": 925 - }, - { - "epoch": 0.08723299027342737, - "grad_norm": 0.972983717918396, - "learning_rate": 1.9971397590416372e-05, - "loss": 0.4194, - "step": 926 - }, - { - "epoch": 0.08732719436659522, - "grad_norm": 1.125463604927063, - "learning_rate": 1.9971283349921538e-05, - "loss": 0.4699, - "step": 927 - }, - { - "epoch": 0.08742139845976307, - "grad_norm": 0.8822147250175476, - "learning_rate": 1.997116888206602e-05, - "loss": 0.3593, - "step": 928 - }, - { - "epoch": 0.08751560255293092, - "grad_norm": 0.9502570033073425, - "learning_rate": 1.997105418685243e-05, - "loss": 0.433, - "step": 929 - }, - { - "epoch": 0.08760980664609877, - "grad_norm": 0.9492387175559998, - "learning_rate": 1.9970939264283386e-05, - "loss": 0.447, - "step": 930 - }, - { - "epoch": 0.08770401073926662, - "grad_norm": 0.9000892043113708, - "learning_rate": 1.9970824114361507e-05, - "loss": 0.4475, - "step": 931 - }, - { - "epoch": 0.08779821483243447, - "grad_norm": 0.9955626726150513, - "learning_rate": 1.9970708737089416e-05, - "loss": 0.4828, - "step": 932 - }, - { - "epoch": 0.08789241892560232, - "grad_norm": 0.8077518939971924, - "learning_rate": 1.9970593132469748e-05, - "loss": 0.397, - "step": 933 - }, - { - "epoch": 0.08798662301877017, - "grad_norm": 0.8611214756965637, - "learning_rate": 1.9970477300505133e-05, - "loss": 0.4351, - "step": 934 - }, - { - "epoch": 0.08808082711193801, - "grad_norm": 1.0117332935333252, - "learning_rate": 1.997036124119822e-05, - "loss": 0.46, - "step": 935 - }, - { - "epoch": 0.08817503120510586, - "grad_norm": 1.0115711688995361, - "learning_rate": 1.9970244954551648e-05, - "loss": 0.4724, - "step": 936 - }, - { - "epoch": 0.08826923529827371, - "grad_norm": 0.8961659669876099, - "learning_rate": 1.9970128440568074e-05, - "loss": 0.4178, - "step": 937 - }, - { - "epoch": 0.08836343939144156, - "grad_norm": 0.9423846006393433, - "learning_rate": 1.997001169925015e-05, - "loss": 0.4318, - "step": 938 - }, - { - "epoch": 0.08845764348460941, - "grad_norm": 1.0483758449554443, - "learning_rate": 1.9969894730600544e-05, - "loss": 0.3863, - "step": 939 - }, - { - "epoch": 0.08855184757777726, - "grad_norm": 0.9910538792610168, - "learning_rate": 1.9969777534621918e-05, - "loss": 0.4692, - "step": 940 - }, - { - "epoch": 0.08864605167094511, - "grad_norm": 0.8945648074150085, - "learning_rate": 1.9969660111316945e-05, - "loss": 0.4039, - "step": 941 - }, - { - "epoch": 0.08874025576411296, - "grad_norm": 0.867096483707428, - "learning_rate": 1.9969542460688305e-05, - "loss": 0.4212, - "step": 942 - }, - { - "epoch": 0.08883445985728079, - "grad_norm": 0.9797136187553406, - "learning_rate": 1.9969424582738676e-05, - "loss": 0.4278, - "step": 943 - }, - { - "epoch": 0.08892866395044864, - "grad_norm": 0.9553640484809875, - "learning_rate": 1.996930647747075e-05, - "loss": 0.4082, - "step": 944 - }, - { - "epoch": 0.08902286804361649, - "grad_norm": 0.9790606498718262, - "learning_rate": 1.9969188144887217e-05, - "loss": 0.4145, - "step": 945 - }, - { - "epoch": 0.08911707213678434, - "grad_norm": 0.8860597014427185, - "learning_rate": 1.9969069584990776e-05, - "loss": 0.3856, - "step": 946 - }, - { - "epoch": 0.08921127622995219, - "grad_norm": 1.0481891632080078, - "learning_rate": 1.9968950797784136e-05, - "loss": 0.4086, - "step": 947 - }, - { - "epoch": 0.08930548032312004, - "grad_norm": 1.065038800239563, - "learning_rate": 1.9968831783269997e-05, - "loss": 0.4608, - "step": 948 - }, - { - "epoch": 0.08939968441628789, - "grad_norm": 1.0611155033111572, - "learning_rate": 1.9968712541451073e-05, - "loss": 0.4528, - "step": 949 - }, - { - "epoch": 0.08949388850945574, - "grad_norm": 0.8630752563476562, - "learning_rate": 1.9968593072330093e-05, - "loss": 0.4143, - "step": 950 - }, - { - "epoch": 0.08958809260262358, - "grad_norm": 0.9093382954597473, - "learning_rate": 1.996847337590977e-05, - "loss": 0.4257, - "step": 951 - }, - { - "epoch": 0.08968229669579143, - "grad_norm": 0.9018155336380005, - "learning_rate": 1.996835345219284e-05, - "loss": 0.3865, - "step": 952 - }, - { - "epoch": 0.08977650078895928, - "grad_norm": 1.040109395980835, - "learning_rate": 1.9968233301182033e-05, - "loss": 0.3863, - "step": 953 - }, - { - "epoch": 0.08987070488212713, - "grad_norm": 1.0499516725540161, - "learning_rate": 1.9968112922880088e-05, - "loss": 0.4221, - "step": 954 - }, - { - "epoch": 0.08996490897529498, - "grad_norm": 0.9960455894470215, - "learning_rate": 1.9967992317289754e-05, - "loss": 0.4535, - "step": 955 - }, - { - "epoch": 0.09005911306846283, - "grad_norm": 0.992347002029419, - "learning_rate": 1.9967871484413782e-05, - "loss": 0.4323, - "step": 956 - }, - { - "epoch": 0.09015331716163068, - "grad_norm": 1.0884090662002563, - "learning_rate": 1.9967750424254922e-05, - "loss": 0.4741, - "step": 957 - }, - { - "epoch": 0.09024752125479853, - "grad_norm": 0.9774399399757385, - "learning_rate": 1.996762913681594e-05, - "loss": 0.419, - "step": 958 - }, - { - "epoch": 0.09034172534796638, - "grad_norm": 1.0485435724258423, - "learning_rate": 1.9967507622099595e-05, - "loss": 0.4387, - "step": 959 - }, - { - "epoch": 0.09043592944113421, - "grad_norm": 1.0433429479599, - "learning_rate": 1.9967385880108663e-05, - "loss": 0.4861, - "step": 960 - }, - { - "epoch": 0.09053013353430206, - "grad_norm": 0.8936190605163574, - "learning_rate": 1.996726391084592e-05, - "loss": 0.4286, - "step": 961 - }, - { - "epoch": 0.09062433762746991, - "grad_norm": 1.089770793914795, - "learning_rate": 1.996714171431414e-05, - "loss": 0.5094, - "step": 962 - }, - { - "epoch": 0.09071854172063776, - "grad_norm": 0.8957109451293945, - "learning_rate": 1.9967019290516115e-05, - "loss": 0.4038, - "step": 963 - }, - { - "epoch": 0.0908127458138056, - "grad_norm": 1.009662389755249, - "learning_rate": 1.996689663945464e-05, - "loss": 0.493, - "step": 964 - }, - { - "epoch": 0.09090694990697346, - "grad_norm": 0.9156802892684937, - "learning_rate": 1.9966773761132506e-05, - "loss": 0.3813, - "step": 965 - }, - { - "epoch": 0.0910011540001413, - "grad_norm": 0.8849820494651794, - "learning_rate": 1.9966650655552516e-05, - "loss": 0.3967, - "step": 966 - }, - { - "epoch": 0.09109535809330915, - "grad_norm": 0.9695305228233337, - "learning_rate": 1.996652732271748e-05, - "loss": 0.4175, - "step": 967 - }, - { - "epoch": 0.091189562186477, - "grad_norm": 1.0623234510421753, - "learning_rate": 1.99664037626302e-05, - "loss": 0.435, - "step": 968 - }, - { - "epoch": 0.09128376627964485, - "grad_norm": 1.0353437662124634, - "learning_rate": 1.996627997529351e-05, - "loss": 0.4506, - "step": 969 - }, - { - "epoch": 0.0913779703728127, - "grad_norm": 0.906459629535675, - "learning_rate": 1.996615596071022e-05, - "loss": 0.4007, - "step": 970 - }, - { - "epoch": 0.09147217446598055, - "grad_norm": 0.9056885242462158, - "learning_rate": 1.9966031718883157e-05, - "loss": 0.392, - "step": 971 - }, - { - "epoch": 0.0915663785591484, - "grad_norm": 0.906808078289032, - "learning_rate": 1.9965907249815163e-05, - "loss": 0.4303, - "step": 972 - }, - { - "epoch": 0.09166058265231625, - "grad_norm": 0.8993813991546631, - "learning_rate": 1.9965782553509067e-05, - "loss": 0.4355, - "step": 973 - }, - { - "epoch": 0.0917547867454841, - "grad_norm": 0.9064143300056458, - "learning_rate": 1.996565762996772e-05, - "loss": 0.3948, - "step": 974 - }, - { - "epoch": 0.09184899083865194, - "grad_norm": 0.9879948496818542, - "learning_rate": 1.9965532479193967e-05, - "loss": 0.4619, - "step": 975 - }, - { - "epoch": 0.0919431949318198, - "grad_norm": 1.0842314958572388, - "learning_rate": 1.996540710119066e-05, - "loss": 0.4527, - "step": 976 - }, - { - "epoch": 0.09203739902498763, - "grad_norm": 1.0031143426895142, - "learning_rate": 1.996528149596066e-05, - "loss": 0.4272, - "step": 977 - }, - { - "epoch": 0.09213160311815548, - "grad_norm": 0.9392336010932922, - "learning_rate": 1.996515566350683e-05, - "loss": 0.4007, - "step": 978 - }, - { - "epoch": 0.09222580721132333, - "grad_norm": 1.0424503087997437, - "learning_rate": 1.9965029603832036e-05, - "loss": 0.4618, - "step": 979 - }, - { - "epoch": 0.09232001130449118, - "grad_norm": 0.9735755324363708, - "learning_rate": 1.996490331693916e-05, - "loss": 0.4586, - "step": 980 - }, - { - "epoch": 0.09241421539765902, - "grad_norm": 0.9512478113174438, - "learning_rate": 1.996477680283108e-05, - "loss": 0.4286, - "step": 981 - }, - { - "epoch": 0.09250841949082687, - "grad_norm": 0.8885504603385925, - "learning_rate": 1.996465006151067e-05, - "loss": 0.4222, - "step": 982 - }, - { - "epoch": 0.09260262358399472, - "grad_norm": 0.9333905577659607, - "learning_rate": 1.9964523092980834e-05, - "loss": 0.4208, - "step": 983 - }, - { - "epoch": 0.09269682767716257, - "grad_norm": 0.8582695126533508, - "learning_rate": 1.996439589724446e-05, - "loss": 0.3992, - "step": 984 - }, - { - "epoch": 0.09279103177033042, - "grad_norm": 0.9202725291252136, - "learning_rate": 1.9964268474304448e-05, - "loss": 0.3822, - "step": 985 - }, - { - "epoch": 0.09288523586349827, - "grad_norm": 0.9933639168739319, - "learning_rate": 1.9964140824163705e-05, - "loss": 0.4417, - "step": 986 - }, - { - "epoch": 0.09297943995666612, - "grad_norm": 0.9218569397926331, - "learning_rate": 1.996401294682514e-05, - "loss": 0.3977, - "step": 987 - }, - { - "epoch": 0.09307364404983397, - "grad_norm": 1.0288808345794678, - "learning_rate": 1.9963884842291677e-05, - "loss": 0.5045, - "step": 988 - }, - { - "epoch": 0.09316784814300182, - "grad_norm": 0.980198860168457, - "learning_rate": 1.9963756510566222e-05, - "loss": 0.467, - "step": 989 - }, - { - "epoch": 0.09326205223616967, - "grad_norm": 1.0243239402770996, - "learning_rate": 1.9963627951651715e-05, - "loss": 0.4765, - "step": 990 - }, - { - "epoch": 0.09335625632933751, - "grad_norm": 0.9720578193664551, - "learning_rate": 1.996349916555108e-05, - "loss": 0.432, - "step": 991 - }, - { - "epoch": 0.09345046042250536, - "grad_norm": 0.922471284866333, - "learning_rate": 1.996337015226725e-05, - "loss": 0.4131, - "step": 992 - }, - { - "epoch": 0.09354466451567321, - "grad_norm": 0.9350531697273254, - "learning_rate": 1.996324091180318e-05, - "loss": 0.4721, - "step": 993 - }, - { - "epoch": 0.09363886860884106, - "grad_norm": 0.9498339295387268, - "learning_rate": 1.9963111444161806e-05, - "loss": 0.4494, - "step": 994 - }, - { - "epoch": 0.0937330727020089, - "grad_norm": 1.0895516872406006, - "learning_rate": 1.996298174934608e-05, - "loss": 0.4975, - "step": 995 - }, - { - "epoch": 0.09382727679517675, - "grad_norm": 0.939751386642456, - "learning_rate": 1.996285182735896e-05, - "loss": 0.4139, - "step": 996 - }, - { - "epoch": 0.0939214808883446, - "grad_norm": 1.0181207656860352, - "learning_rate": 1.9962721678203416e-05, - "loss": 0.4607, - "step": 997 - }, - { - "epoch": 0.09401568498151244, - "grad_norm": 0.9602494835853577, - "learning_rate": 1.996259130188241e-05, - "loss": 0.3947, - "step": 998 - }, - { - "epoch": 0.09410988907468029, - "grad_norm": 0.902991771697998, - "learning_rate": 1.9962460698398914e-05, - "loss": 0.4085, - "step": 999 - }, - { - "epoch": 0.09420409316784814, - "grad_norm": 0.9918155074119568, - "learning_rate": 1.9962329867755906e-05, - "loss": 0.4562, - "step": 1000 - }, - { - "epoch": 0.09429829726101599, - "grad_norm": 1.0148743391036987, - "learning_rate": 1.996219880995637e-05, - "loss": 0.4875, - "step": 1001 - }, - { - "epoch": 0.09439250135418384, - "grad_norm": 0.9490169286727905, - "learning_rate": 1.9962067525003295e-05, - "loss": 0.4258, - "step": 1002 - }, - { - "epoch": 0.09448670544735169, - "grad_norm": 1.0962616205215454, - "learning_rate": 1.9961936012899673e-05, - "loss": 0.4985, - "step": 1003 - }, - { - "epoch": 0.09458090954051954, - "grad_norm": 1.0087071657180786, - "learning_rate": 1.9961804273648502e-05, - "loss": 0.4723, - "step": 1004 - }, - { - "epoch": 0.09467511363368739, - "grad_norm": 0.9063972234725952, - "learning_rate": 1.996167230725279e-05, - "loss": 0.3771, - "step": 1005 - }, - { - "epoch": 0.09476931772685523, - "grad_norm": 1.0970993041992188, - "learning_rate": 1.9961540113715543e-05, - "loss": 0.4679, - "step": 1006 - }, - { - "epoch": 0.09486352182002308, - "grad_norm": 0.9199796319007874, - "learning_rate": 1.9961407693039777e-05, - "loss": 0.4201, - "step": 1007 - }, - { - "epoch": 0.09495772591319093, - "grad_norm": 0.9776504635810852, - "learning_rate": 1.9961275045228506e-05, - "loss": 0.4448, - "step": 1008 - }, - { - "epoch": 0.09505193000635878, - "grad_norm": 0.9351235628128052, - "learning_rate": 1.9961142170284762e-05, - "loss": 0.3965, - "step": 1009 - }, - { - "epoch": 0.09514613409952663, - "grad_norm": 0.9779771566390991, - "learning_rate": 1.996100906821157e-05, - "loss": 0.3929, - "step": 1010 - }, - { - "epoch": 0.09524033819269448, - "grad_norm": 0.9306912422180176, - "learning_rate": 1.9960875739011966e-05, - "loss": 0.4565, - "step": 1011 - }, - { - "epoch": 0.09533454228586231, - "grad_norm": 1.103110909461975, - "learning_rate": 1.996074218268899e-05, - "loss": 0.4952, - "step": 1012 - }, - { - "epoch": 0.09542874637903016, - "grad_norm": 0.8756340146064758, - "learning_rate": 1.9960608399245688e-05, - "loss": 0.4193, - "step": 1013 - }, - { - "epoch": 0.09552295047219801, - "grad_norm": 0.9869973659515381, - "learning_rate": 1.996047438868511e-05, - "loss": 0.4303, - "step": 1014 - }, - { - "epoch": 0.09561715456536586, - "grad_norm": 0.9271620512008667, - "learning_rate": 1.996034015101031e-05, - "loss": 0.43, - "step": 1015 - }, - { - "epoch": 0.09571135865853371, - "grad_norm": 0.9635888338088989, - "learning_rate": 1.9960205686224355e-05, - "loss": 0.394, - "step": 1016 - }, - { - "epoch": 0.09580556275170156, - "grad_norm": 1.020485281944275, - "learning_rate": 1.9960070994330307e-05, - "loss": 0.4472, - "step": 1017 - }, - { - "epoch": 0.09589976684486941, - "grad_norm": 1.0038342475891113, - "learning_rate": 1.995993607533123e-05, - "loss": 0.4227, - "step": 1018 - }, - { - "epoch": 0.09599397093803726, - "grad_norm": 0.8755595088005066, - "learning_rate": 1.9959800929230212e-05, - "loss": 0.3637, - "step": 1019 - }, - { - "epoch": 0.0960881750312051, - "grad_norm": 0.9497532248497009, - "learning_rate": 1.995966555603033e-05, - "loss": 0.4353, - "step": 1020 - }, - { - "epoch": 0.09618237912437295, - "grad_norm": 0.9116272926330566, - "learning_rate": 1.9959529955734668e-05, - "loss": 0.4417, - "step": 1021 - }, - { - "epoch": 0.0962765832175408, - "grad_norm": 0.9214377999305725, - "learning_rate": 1.9959394128346323e-05, - "loss": 0.4261, - "step": 1022 - }, - { - "epoch": 0.09637078731070865, - "grad_norm": 0.8842349648475647, - "learning_rate": 1.9959258073868387e-05, - "loss": 0.4008, - "step": 1023 - }, - { - "epoch": 0.0964649914038765, - "grad_norm": 0.983285665512085, - "learning_rate": 1.9959121792303967e-05, - "loss": 0.384, - "step": 1024 - }, - { - "epoch": 0.09655919549704435, - "grad_norm": 1.0836387872695923, - "learning_rate": 1.9958985283656164e-05, - "loss": 0.4158, - "step": 1025 - }, - { - "epoch": 0.0966533995902122, - "grad_norm": 0.8068239092826843, - "learning_rate": 1.9958848547928098e-05, - "loss": 0.363, - "step": 1026 - }, - { - "epoch": 0.09674760368338005, - "grad_norm": 0.9302456974983215, - "learning_rate": 1.995871158512288e-05, - "loss": 0.4719, - "step": 1027 - }, - { - "epoch": 0.0968418077765479, - "grad_norm": 0.9990631341934204, - "learning_rate": 1.9958574395243643e-05, - "loss": 0.4119, - "step": 1028 - }, - { - "epoch": 0.09693601186971573, - "grad_norm": 0.9791685938835144, - "learning_rate": 1.9958436978293503e-05, - "loss": 0.4358, - "step": 1029 - }, - { - "epoch": 0.09703021596288358, - "grad_norm": 0.9006029367446899, - "learning_rate": 1.9958299334275602e-05, - "loss": 0.376, - "step": 1030 - }, - { - "epoch": 0.09712442005605143, - "grad_norm": 1.055158019065857, - "learning_rate": 1.9958161463193074e-05, - "loss": 0.4614, - "step": 1031 - }, - { - "epoch": 0.09721862414921928, - "grad_norm": 1.053442358970642, - "learning_rate": 1.9958023365049063e-05, - "loss": 0.4223, - "step": 1032 - }, - { - "epoch": 0.09731282824238713, - "grad_norm": 0.9021908044815063, - "learning_rate": 1.995788503984672e-05, - "loss": 0.3829, - "step": 1033 - }, - { - "epoch": 0.09740703233555498, - "grad_norm": 0.9552052617073059, - "learning_rate": 1.99577464875892e-05, - "loss": 0.4522, - "step": 1034 - }, - { - "epoch": 0.09750123642872283, - "grad_norm": 0.9647678732872009, - "learning_rate": 1.9957607708279656e-05, - "loss": 0.3969, - "step": 1035 - }, - { - "epoch": 0.09759544052189068, - "grad_norm": 0.9882518649101257, - "learning_rate": 1.9957468701921257e-05, - "loss": 0.4226, - "step": 1036 - }, - { - "epoch": 0.09768964461505852, - "grad_norm": 1.0046566724777222, - "learning_rate": 1.9957329468517175e-05, - "loss": 0.4545, - "step": 1037 - }, - { - "epoch": 0.09778384870822637, - "grad_norm": 0.9556362628936768, - "learning_rate": 1.995719000807058e-05, - "loss": 0.4151, - "step": 1038 - }, - { - "epoch": 0.09787805280139422, - "grad_norm": 0.9986971020698547, - "learning_rate": 1.9957050320584653e-05, - "loss": 0.3908, - "step": 1039 - }, - { - "epoch": 0.09797225689456207, - "grad_norm": 0.975786566734314, - "learning_rate": 1.9956910406062583e-05, - "loss": 0.4215, - "step": 1040 - }, - { - "epoch": 0.09806646098772992, - "grad_norm": 0.9671193361282349, - "learning_rate": 1.9956770264507555e-05, - "loss": 0.447, - "step": 1041 - }, - { - "epoch": 0.09816066508089777, - "grad_norm": 0.8227758407592773, - "learning_rate": 1.9956629895922765e-05, - "loss": 0.397, - "step": 1042 - }, - { - "epoch": 0.09825486917406562, - "grad_norm": 0.9373804926872253, - "learning_rate": 1.9956489300311416e-05, - "loss": 0.4022, - "step": 1043 - }, - { - "epoch": 0.09834907326723347, - "grad_norm": 0.9801133275032043, - "learning_rate": 1.9956348477676714e-05, - "loss": 0.4443, - "step": 1044 - }, - { - "epoch": 0.09844327736040132, - "grad_norm": 0.975909411907196, - "learning_rate": 1.995620742802187e-05, - "loss": 0.4358, - "step": 1045 - }, - { - "epoch": 0.09853748145356915, - "grad_norm": 1.0563409328460693, - "learning_rate": 1.9956066151350097e-05, - "loss": 0.4765, - "step": 1046 - }, - { - "epoch": 0.098631685546737, - "grad_norm": 1.0944769382476807, - "learning_rate": 1.995592464766462e-05, - "loss": 0.4237, - "step": 1047 - }, - { - "epoch": 0.09872588963990485, - "grad_norm": 1.036834478378296, - "learning_rate": 1.9955782916968663e-05, - "loss": 0.3851, - "step": 1048 - }, - { - "epoch": 0.0988200937330727, - "grad_norm": 0.9747660160064697, - "learning_rate": 1.995564095926546e-05, - "loss": 0.4205, - "step": 1049 - }, - { - "epoch": 0.09891429782624055, - "grad_norm": 0.9076653122901917, - "learning_rate": 1.995549877455824e-05, - "loss": 0.4159, - "step": 1050 - }, - { - "epoch": 0.0990085019194084, - "grad_norm": 1.1281238794326782, - "learning_rate": 1.995535636285026e-05, - "loss": 0.5221, - "step": 1051 - }, - { - "epoch": 0.09910270601257624, - "grad_norm": 0.964013934135437, - "learning_rate": 1.9955213724144754e-05, - "loss": 0.3878, - "step": 1052 - }, - { - "epoch": 0.0991969101057441, - "grad_norm": 1.1707106828689575, - "learning_rate": 1.995507085844498e-05, - "loss": 0.4516, - "step": 1053 - }, - { - "epoch": 0.09929111419891194, - "grad_norm": 1.0508333444595337, - "learning_rate": 1.9954927765754195e-05, - "loss": 0.4217, - "step": 1054 - }, - { - "epoch": 0.09938531829207979, - "grad_norm": 0.9513484239578247, - "learning_rate": 1.995478444607566e-05, - "loss": 0.3977, - "step": 1055 - }, - { - "epoch": 0.09947952238524764, - "grad_norm": 1.0911073684692383, - "learning_rate": 1.9954640899412645e-05, - "loss": 0.4627, - "step": 1056 - }, - { - "epoch": 0.09957372647841549, - "grad_norm": 1.0177876949310303, - "learning_rate": 1.9954497125768423e-05, - "loss": 0.4408, - "step": 1057 - }, - { - "epoch": 0.09966793057158334, - "grad_norm": 0.9709005951881409, - "learning_rate": 1.9954353125146273e-05, - "loss": 0.453, - "step": 1058 - }, - { - "epoch": 0.09976213466475119, - "grad_norm": 0.9255178570747375, - "learning_rate": 1.9954208897549475e-05, - "loss": 0.3988, - "step": 1059 - }, - { - "epoch": 0.09985633875791904, - "grad_norm": 0.8608093857765198, - "learning_rate": 1.995406444298132e-05, - "loss": 0.3715, - "step": 1060 - }, - { - "epoch": 0.09995054285108688, - "grad_norm": 1.0226445198059082, - "learning_rate": 1.99539197614451e-05, - "loss": 0.3748, - "step": 1061 - }, - { - "epoch": 0.10004474694425473, - "grad_norm": 1.0217959880828857, - "learning_rate": 1.995377485294412e-05, - "loss": 0.4088, - "step": 1062 - }, - { - "epoch": 0.10013895103742258, - "grad_norm": 1.042251467704773, - "learning_rate": 1.9953629717481675e-05, - "loss": 0.4356, - "step": 1063 - }, - { - "epoch": 0.10023315513059042, - "grad_norm": 0.9148966670036316, - "learning_rate": 1.9953484355061078e-05, - "loss": 0.358, - "step": 1064 - }, - { - "epoch": 0.10032735922375827, - "grad_norm": 0.8217335343360901, - "learning_rate": 1.995333876568565e-05, - "loss": 0.3864, - "step": 1065 - }, - { - "epoch": 0.10042156331692612, - "grad_norm": 1.1984763145446777, - "learning_rate": 1.99531929493587e-05, - "loss": 0.4146, - "step": 1066 - }, - { - "epoch": 0.10051576741009396, - "grad_norm": 1.2049845457077026, - "learning_rate": 1.995304690608356e-05, - "loss": 0.4345, - "step": 1067 - }, - { - "epoch": 0.10060997150326181, - "grad_norm": 1.0960497856140137, - "learning_rate": 1.9952900635863558e-05, - "loss": 0.4823, - "step": 1068 - }, - { - "epoch": 0.10070417559642966, - "grad_norm": 0.9735412001609802, - "learning_rate": 1.9952754138702025e-05, - "loss": 0.4458, - "step": 1069 - }, - { - "epoch": 0.10079837968959751, - "grad_norm": 1.1528902053833008, - "learning_rate": 1.995260741460231e-05, - "loss": 0.4443, - "step": 1070 - }, - { - "epoch": 0.10089258378276536, - "grad_norm": 0.9167768359184265, - "learning_rate": 1.9952460463567752e-05, - "loss": 0.379, - "step": 1071 - }, - { - "epoch": 0.10098678787593321, - "grad_norm": 1.1124496459960938, - "learning_rate": 1.9952313285601706e-05, - "loss": 0.4275, - "step": 1072 - }, - { - "epoch": 0.10108099196910106, - "grad_norm": 0.8633842468261719, - "learning_rate": 1.9952165880707524e-05, - "loss": 0.3647, - "step": 1073 - }, - { - "epoch": 0.10117519606226891, - "grad_norm": 0.9196866750717163, - "learning_rate": 1.9952018248888567e-05, - "loss": 0.3929, - "step": 1074 - }, - { - "epoch": 0.10126940015543676, - "grad_norm": 0.9396743774414062, - "learning_rate": 1.9951870390148206e-05, - "loss": 0.4321, - "step": 1075 - }, - { - "epoch": 0.1013636042486046, - "grad_norm": 1.0641164779663086, - "learning_rate": 1.9951722304489806e-05, - "loss": 0.4499, - "step": 1076 - }, - { - "epoch": 0.10145780834177245, - "grad_norm": 0.9184996485710144, - "learning_rate": 1.9951573991916747e-05, - "loss": 0.4231, - "step": 1077 - }, - { - "epoch": 0.1015520124349403, - "grad_norm": 1.0018250942230225, - "learning_rate": 1.9951425452432415e-05, - "loss": 0.393, - "step": 1078 - }, - { - "epoch": 0.10164621652810815, - "grad_norm": 1.0571403503417969, - "learning_rate": 1.9951276686040188e-05, - "loss": 0.402, - "step": 1079 - }, - { - "epoch": 0.101740420621276, - "grad_norm": 0.9825863242149353, - "learning_rate": 1.9951127692743463e-05, - "loss": 0.4146, - "step": 1080 - }, - { - "epoch": 0.10183462471444384, - "grad_norm": 0.8631836771965027, - "learning_rate": 1.995097847254564e-05, - "loss": 0.3609, - "step": 1081 - }, - { - "epoch": 0.10192882880761168, - "grad_norm": 0.9502719044685364, - "learning_rate": 1.9950829025450116e-05, - "loss": 0.4387, - "step": 1082 - }, - { - "epoch": 0.10202303290077953, - "grad_norm": 1.0838639736175537, - "learning_rate": 1.9950679351460304e-05, - "loss": 0.4284, - "step": 1083 - }, - { - "epoch": 0.10211723699394738, - "grad_norm": 1.0188603401184082, - "learning_rate": 1.9950529450579607e-05, - "loss": 0.466, - "step": 1084 - }, - { - "epoch": 0.10221144108711523, - "grad_norm": 0.9595416188240051, - "learning_rate": 1.9950379322811456e-05, - "loss": 0.4428, - "step": 1085 - }, - { - "epoch": 0.10230564518028308, - "grad_norm": 0.9125534296035767, - "learning_rate": 1.9950228968159263e-05, - "loss": 0.3809, - "step": 1086 - }, - { - "epoch": 0.10239984927345093, - "grad_norm": 1.0977569818496704, - "learning_rate": 1.9950078386626465e-05, - "loss": 0.4142, - "step": 1087 - }, - { - "epoch": 0.10249405336661878, - "grad_norm": 1.0646089315414429, - "learning_rate": 1.994992757821649e-05, - "loss": 0.3855, - "step": 1088 - }, - { - "epoch": 0.10258825745978663, - "grad_norm": 1.055989146232605, - "learning_rate": 1.994977654293278e-05, - "loss": 0.4116, - "step": 1089 - }, - { - "epoch": 0.10268246155295448, - "grad_norm": 1.0687748193740845, - "learning_rate": 1.994962528077878e-05, - "loss": 0.4367, - "step": 1090 - }, - { - "epoch": 0.10277666564612233, - "grad_norm": 0.9160301089286804, - "learning_rate": 1.994947379175793e-05, - "loss": 0.4101, - "step": 1091 - }, - { - "epoch": 0.10287086973929017, - "grad_norm": 0.9010409116744995, - "learning_rate": 1.994932207587369e-05, - "loss": 0.4078, - "step": 1092 - }, - { - "epoch": 0.10296507383245802, - "grad_norm": 1.0180050134658813, - "learning_rate": 1.9949170133129524e-05, - "loss": 0.4309, - "step": 1093 - }, - { - "epoch": 0.10305927792562587, - "grad_norm": 1.0494129657745361, - "learning_rate": 1.9949017963528893e-05, - "loss": 0.419, - "step": 1094 - }, - { - "epoch": 0.10315348201879372, - "grad_norm": 0.9077717661857605, - "learning_rate": 1.9948865567075262e-05, - "loss": 0.4143, - "step": 1095 - }, - { - "epoch": 0.10324768611196157, - "grad_norm": 1.0535287857055664, - "learning_rate": 1.994871294377211e-05, - "loss": 0.436, - "step": 1096 - }, - { - "epoch": 0.10334189020512942, - "grad_norm": 0.9463797807693481, - "learning_rate": 1.994856009362292e-05, - "loss": 0.4422, - "step": 1097 - }, - { - "epoch": 0.10343609429829725, - "grad_norm": 0.9908978343009949, - "learning_rate": 1.994840701663117e-05, - "loss": 0.4177, - "step": 1098 - }, - { - "epoch": 0.1035302983914651, - "grad_norm": 0.9611839652061462, - "learning_rate": 1.9948253712800358e-05, - "loss": 0.3773, - "step": 1099 - }, - { - "epoch": 0.10362450248463295, - "grad_norm": 1.0818508863449097, - "learning_rate": 1.9948100182133977e-05, - "loss": 0.4429, - "step": 1100 - }, - { - "epoch": 0.1037187065778008, - "grad_norm": 0.9528419375419617, - "learning_rate": 1.9947946424635524e-05, - "loss": 0.3939, - "step": 1101 - }, - { - "epoch": 0.10381291067096865, - "grad_norm": 1.0397058725357056, - "learning_rate": 1.994779244030851e-05, - "loss": 0.4457, - "step": 1102 - }, - { - "epoch": 0.1039071147641365, - "grad_norm": 0.9053099155426025, - "learning_rate": 1.9947638229156442e-05, - "loss": 0.4193, - "step": 1103 - }, - { - "epoch": 0.10400131885730435, - "grad_norm": 0.8966038823127747, - "learning_rate": 1.994748379118284e-05, - "loss": 0.41, - "step": 1104 - }, - { - "epoch": 0.1040955229504722, - "grad_norm": 1.0802412033081055, - "learning_rate": 1.9947329126391218e-05, - "loss": 0.442, - "step": 1105 - }, - { - "epoch": 0.10418972704364005, - "grad_norm": 0.9492148756980896, - "learning_rate": 1.9947174234785115e-05, - "loss": 0.393, - "step": 1106 - }, - { - "epoch": 0.1042839311368079, - "grad_norm": 0.9896593689918518, - "learning_rate": 1.9947019116368052e-05, - "loss": 0.4653, - "step": 1107 - }, - { - "epoch": 0.10437813522997574, - "grad_norm": 1.1006875038146973, - "learning_rate": 1.9946863771143568e-05, - "loss": 0.3914, - "step": 1108 - }, - { - "epoch": 0.10447233932314359, - "grad_norm": 0.8899286389350891, - "learning_rate": 1.994670819911521e-05, - "loss": 0.3807, - "step": 1109 - }, - { - "epoch": 0.10456654341631144, - "grad_norm": 0.9169561266899109, - "learning_rate": 1.9946552400286526e-05, - "loss": 0.3909, - "step": 1110 - }, - { - "epoch": 0.10466074750947929, - "grad_norm": 0.9260783791542053, - "learning_rate": 1.994639637466106e-05, - "loss": 0.4816, - "step": 1111 - }, - { - "epoch": 0.10475495160264714, - "grad_norm": 0.8580223321914673, - "learning_rate": 1.9946240122242374e-05, - "loss": 0.3875, - "step": 1112 - }, - { - "epoch": 0.10484915569581499, - "grad_norm": 0.9351666569709778, - "learning_rate": 1.9946083643034032e-05, - "loss": 0.4119, - "step": 1113 - }, - { - "epoch": 0.10494335978898284, - "grad_norm": 0.96866774559021, - "learning_rate": 1.9945926937039603e-05, - "loss": 0.4657, - "step": 1114 - }, - { - "epoch": 0.10503756388215069, - "grad_norm": 0.9898170232772827, - "learning_rate": 1.9945770004262655e-05, - "loss": 0.4932, - "step": 1115 - }, - { - "epoch": 0.10513176797531852, - "grad_norm": 0.8722140789031982, - "learning_rate": 1.9945612844706768e-05, - "loss": 0.3971, - "step": 1116 - }, - { - "epoch": 0.10522597206848637, - "grad_norm": 0.9951531887054443, - "learning_rate": 1.9945455458375533e-05, - "loss": 0.4647, - "step": 1117 - }, - { - "epoch": 0.10532017616165422, - "grad_norm": 0.9879446625709534, - "learning_rate": 1.9945297845272527e-05, - "loss": 0.4687, - "step": 1118 - }, - { - "epoch": 0.10541438025482207, - "grad_norm": 0.9516083598136902, - "learning_rate": 1.9945140005401352e-05, - "loss": 0.4285, - "step": 1119 - }, - { - "epoch": 0.10550858434798992, - "grad_norm": 1.0435234308242798, - "learning_rate": 1.9944981938765603e-05, - "loss": 0.4443, - "step": 1120 - }, - { - "epoch": 0.10560278844115777, - "grad_norm": 0.9984058141708374, - "learning_rate": 1.9944823645368886e-05, - "loss": 0.3867, - "step": 1121 - }, - { - "epoch": 0.10569699253432562, - "grad_norm": 0.9314647912979126, - "learning_rate": 1.994466512521481e-05, - "loss": 0.382, - "step": 1122 - }, - { - "epoch": 0.10579119662749346, - "grad_norm": 1.0670300722122192, - "learning_rate": 1.9944506378306993e-05, - "loss": 0.4376, - "step": 1123 - }, - { - "epoch": 0.10588540072066131, - "grad_norm": 1.037182092666626, - "learning_rate": 1.9944347404649045e-05, - "loss": 0.4423, - "step": 1124 - }, - { - "epoch": 0.10597960481382916, - "grad_norm": 0.9391227960586548, - "learning_rate": 1.9944188204244602e-05, - "loss": 0.3961, - "step": 1125 - }, - { - "epoch": 0.10607380890699701, - "grad_norm": 0.9991692900657654, - "learning_rate": 1.9944028777097286e-05, - "loss": 0.4452, - "step": 1126 - }, - { - "epoch": 0.10616801300016486, - "grad_norm": 1.015335202217102, - "learning_rate": 1.9943869123210736e-05, - "loss": 0.4103, - "step": 1127 - }, - { - "epoch": 0.10626221709333271, - "grad_norm": 0.8520253300666809, - "learning_rate": 1.9943709242588588e-05, - "loss": 0.3964, - "step": 1128 - }, - { - "epoch": 0.10635642118650056, - "grad_norm": 0.991342306137085, - "learning_rate": 1.9943549135234496e-05, - "loss": 0.389, - "step": 1129 - }, - { - "epoch": 0.1064506252796684, - "grad_norm": 1.0495084524154663, - "learning_rate": 1.9943388801152107e-05, - "loss": 0.3862, - "step": 1130 - }, - { - "epoch": 0.10654482937283626, - "grad_norm": 0.965828001499176, - "learning_rate": 1.994322824034507e-05, - "loss": 0.449, - "step": 1131 - }, - { - "epoch": 0.1066390334660041, - "grad_norm": 0.9767307639122009, - "learning_rate": 1.9943067452817056e-05, - "loss": 0.4644, - "step": 1132 - }, - { - "epoch": 0.10673323755917194, - "grad_norm": 1.0747474431991577, - "learning_rate": 1.9942906438571727e-05, - "loss": 0.4394, - "step": 1133 - }, - { - "epoch": 0.10682744165233979, - "grad_norm": 0.8223479390144348, - "learning_rate": 1.994274519761275e-05, - "loss": 0.3783, - "step": 1134 - }, - { - "epoch": 0.10692164574550764, - "grad_norm": 0.959511399269104, - "learning_rate": 1.9942583729943806e-05, - "loss": 0.4156, - "step": 1135 - }, - { - "epoch": 0.10701584983867549, - "grad_norm": 1.038598895072937, - "learning_rate": 1.994242203556858e-05, - "loss": 0.4593, - "step": 1136 - }, - { - "epoch": 0.10711005393184334, - "grad_norm": 0.9544926285743713, - "learning_rate": 1.9942260114490754e-05, - "loss": 0.4334, - "step": 1137 - }, - { - "epoch": 0.10720425802501118, - "grad_norm": 0.9143279194831848, - "learning_rate": 1.9942097966714022e-05, - "loss": 0.392, - "step": 1138 - }, - { - "epoch": 0.10729846211817903, - "grad_norm": 0.9807520508766174, - "learning_rate": 1.994193559224208e-05, - "loss": 0.4353, - "step": 1139 - }, - { - "epoch": 0.10739266621134688, - "grad_norm": 0.9121593832969666, - "learning_rate": 1.994177299107863e-05, - "loss": 0.4215, - "step": 1140 - }, - { - "epoch": 0.10748687030451473, - "grad_norm": 0.9819267988204956, - "learning_rate": 1.9941610163227382e-05, - "loss": 0.436, - "step": 1141 - }, - { - "epoch": 0.10758107439768258, - "grad_norm": 0.9428817629814148, - "learning_rate": 1.9941447108692047e-05, - "loss": 0.4061, - "step": 1142 - }, - { - "epoch": 0.10767527849085043, - "grad_norm": 1.1399716138839722, - "learning_rate": 1.9941283827476344e-05, - "loss": 0.4847, - "step": 1143 - }, - { - "epoch": 0.10776948258401828, - "grad_norm": 0.9605879187583923, - "learning_rate": 1.9941120319583995e-05, - "loss": 0.4232, - "step": 1144 - }, - { - "epoch": 0.10786368667718613, - "grad_norm": 0.9342689514160156, - "learning_rate": 1.994095658501873e-05, - "loss": 0.3978, - "step": 1145 - }, - { - "epoch": 0.10795789077035398, - "grad_norm": 0.903514564037323, - "learning_rate": 1.9940792623784277e-05, - "loss": 0.3758, - "step": 1146 - }, - { - "epoch": 0.10805209486352182, - "grad_norm": 0.8668076395988464, - "learning_rate": 1.9940628435884378e-05, - "loss": 0.3828, - "step": 1147 - }, - { - "epoch": 0.10814629895668967, - "grad_norm": 0.8557491898536682, - "learning_rate": 1.994046402132278e-05, - "loss": 0.4151, - "step": 1148 - }, - { - "epoch": 0.10824050304985752, - "grad_norm": 0.991773784160614, - "learning_rate": 1.9940299380103226e-05, - "loss": 0.4073, - "step": 1149 - }, - { - "epoch": 0.10833470714302536, - "grad_norm": 0.9056696891784668, - "learning_rate": 1.994013451222948e-05, - "loss": 0.3922, - "step": 1150 - }, - { - "epoch": 0.1084289112361932, - "grad_norm": 0.8937355279922485, - "learning_rate": 1.9939969417705286e-05, - "loss": 0.452, - "step": 1151 - }, - { - "epoch": 0.10852311532936106, - "grad_norm": 0.9577783942222595, - "learning_rate": 1.993980409653442e-05, - "loss": 0.446, - "step": 1152 - }, - { - "epoch": 0.1086173194225289, - "grad_norm": 0.8804211616516113, - "learning_rate": 1.993963854872065e-05, - "loss": 0.3511, - "step": 1153 - }, - { - "epoch": 0.10871152351569675, - "grad_norm": 0.9269177913665771, - "learning_rate": 1.993947277426775e-05, - "loss": 0.448, - "step": 1154 - }, - { - "epoch": 0.1088057276088646, - "grad_norm": 0.934552788734436, - "learning_rate": 1.9939306773179498e-05, - "loss": 0.4598, - "step": 1155 - }, - { - "epoch": 0.10889993170203245, - "grad_norm": 0.8715839385986328, - "learning_rate": 1.9939140545459677e-05, - "loss": 0.4361, - "step": 1156 - }, - { - "epoch": 0.1089941357952003, - "grad_norm": 0.9728548526763916, - "learning_rate": 1.9938974091112084e-05, - "loss": 0.468, - "step": 1157 - }, - { - "epoch": 0.10908833988836815, - "grad_norm": 1.1369495391845703, - "learning_rate": 1.993880741014051e-05, - "loss": 0.4311, - "step": 1158 - }, - { - "epoch": 0.109182543981536, - "grad_norm": 0.9297184944152832, - "learning_rate": 1.9938640502548753e-05, - "loss": 0.417, - "step": 1159 - }, - { - "epoch": 0.10927674807470385, - "grad_norm": 1.0212570428848267, - "learning_rate": 1.9938473368340627e-05, - "loss": 0.4747, - "step": 1160 - }, - { - "epoch": 0.1093709521678717, - "grad_norm": 0.9136896133422852, - "learning_rate": 1.9938306007519936e-05, - "loss": 0.4211, - "step": 1161 - }, - { - "epoch": 0.10946515626103955, - "grad_norm": 1.049403429031372, - "learning_rate": 1.9938138420090502e-05, - "loss": 0.4049, - "step": 1162 - }, - { - "epoch": 0.1095593603542074, - "grad_norm": 0.8913701772689819, - "learning_rate": 1.9937970606056135e-05, - "loss": 0.3839, - "step": 1163 - }, - { - "epoch": 0.10965356444737524, - "grad_norm": 0.8904287219047546, - "learning_rate": 1.9937802565420675e-05, - "loss": 0.4361, - "step": 1164 - }, - { - "epoch": 0.10974776854054309, - "grad_norm": 0.9403328895568848, - "learning_rate": 1.9937634298187944e-05, - "loss": 0.4217, - "step": 1165 - }, - { - "epoch": 0.10984197263371094, - "grad_norm": 0.924608051776886, - "learning_rate": 1.9937465804361783e-05, - "loss": 0.3998, - "step": 1166 - }, - { - "epoch": 0.10993617672687878, - "grad_norm": 0.9521637558937073, - "learning_rate": 1.9937297083946032e-05, - "loss": 0.4222, - "step": 1167 - }, - { - "epoch": 0.11003038082004662, - "grad_norm": 0.9766367077827454, - "learning_rate": 1.9937128136944542e-05, - "loss": 0.4144, - "step": 1168 - }, - { - "epoch": 0.11012458491321447, - "grad_norm": 0.804704487323761, - "learning_rate": 1.993695896336116e-05, - "loss": 0.4075, - "step": 1169 - }, - { - "epoch": 0.11021878900638232, - "grad_norm": 0.9180735349655151, - "learning_rate": 1.9936789563199747e-05, - "loss": 0.422, - "step": 1170 - }, - { - "epoch": 0.11031299309955017, - "grad_norm": 1.005083441734314, - "learning_rate": 1.9936619936464163e-05, - "loss": 0.4462, - "step": 1171 - }, - { - "epoch": 0.11040719719271802, - "grad_norm": 0.8922913074493408, - "learning_rate": 1.9936450083158277e-05, - "loss": 0.3895, - "step": 1172 - }, - { - "epoch": 0.11050140128588587, - "grad_norm": 0.9868902564048767, - "learning_rate": 1.9936280003285966e-05, - "loss": 0.4011, - "step": 1173 - }, - { - "epoch": 0.11059560537905372, - "grad_norm": 1.131201148033142, - "learning_rate": 1.99361096968511e-05, - "loss": 0.4347, - "step": 1174 - }, - { - "epoch": 0.11068980947222157, - "grad_norm": 0.8656303882598877, - "learning_rate": 1.9935939163857568e-05, - "loss": 0.3485, - "step": 1175 - }, - { - "epoch": 0.11078401356538942, - "grad_norm": 0.9164494276046753, - "learning_rate": 1.993576840430926e-05, - "loss": 0.3497, - "step": 1176 - }, - { - "epoch": 0.11087821765855727, - "grad_norm": 0.8910269737243652, - "learning_rate": 1.993559741821006e-05, - "loss": 0.3892, - "step": 1177 - }, - { - "epoch": 0.11097242175172511, - "grad_norm": 1.0199134349822998, - "learning_rate": 1.9935426205563878e-05, - "loss": 0.4074, - "step": 1178 - }, - { - "epoch": 0.11106662584489296, - "grad_norm": 0.9422849416732788, - "learning_rate": 1.993525476637461e-05, - "loss": 0.4027, - "step": 1179 - }, - { - "epoch": 0.11116082993806081, - "grad_norm": 0.8377765417098999, - "learning_rate": 1.993508310064617e-05, - "loss": 0.3536, - "step": 1180 - }, - { - "epoch": 0.11125503403122866, - "grad_norm": 0.998676598072052, - "learning_rate": 1.993491120838247e-05, - "loss": 0.3833, - "step": 1181 - }, - { - "epoch": 0.11134923812439651, - "grad_norm": 1.2441054582595825, - "learning_rate": 1.993473908958743e-05, - "loss": 0.3946, - "step": 1182 - }, - { - "epoch": 0.11144344221756436, - "grad_norm": 1.2046563625335693, - "learning_rate": 1.9934566744264975e-05, - "loss": 0.4182, - "step": 1183 - }, - { - "epoch": 0.11153764631073221, - "grad_norm": 0.8576118350028992, - "learning_rate": 1.9934394172419032e-05, - "loss": 0.382, - "step": 1184 - }, - { - "epoch": 0.11163185040390004, - "grad_norm": 0.9637244343757629, - "learning_rate": 1.9934221374053538e-05, - "loss": 0.4097, - "step": 1185 - }, - { - "epoch": 0.11172605449706789, - "grad_norm": 1.0179036855697632, - "learning_rate": 1.9934048349172433e-05, - "loss": 0.4023, - "step": 1186 - }, - { - "epoch": 0.11182025859023574, - "grad_norm": 1.0819107294082642, - "learning_rate": 1.9933875097779665e-05, - "loss": 0.4271, - "step": 1187 - }, - { - "epoch": 0.11191446268340359, - "grad_norm": 0.960874617099762, - "learning_rate": 1.9933701619879183e-05, - "loss": 0.3348, - "step": 1188 - }, - { - "epoch": 0.11200866677657144, - "grad_norm": 0.9459748268127441, - "learning_rate": 1.9933527915474936e-05, - "loss": 0.4009, - "step": 1189 - }, - { - "epoch": 0.11210287086973929, - "grad_norm": 0.941529393196106, - "learning_rate": 1.9933353984570894e-05, - "loss": 0.3894, - "step": 1190 - }, - { - "epoch": 0.11219707496290714, - "grad_norm": 0.9069638252258301, - "learning_rate": 1.9933179827171017e-05, - "loss": 0.4032, - "step": 1191 - }, - { - "epoch": 0.11229127905607499, - "grad_norm": 0.8663362264633179, - "learning_rate": 1.9933005443279278e-05, - "loss": 0.3971, - "step": 1192 - }, - { - "epoch": 0.11238548314924283, - "grad_norm": 1.1562236547470093, - "learning_rate": 1.9932830832899656e-05, - "loss": 0.4631, - "step": 1193 - }, - { - "epoch": 0.11247968724241068, - "grad_norm": 0.8705164790153503, - "learning_rate": 1.993265599603613e-05, - "loss": 0.4337, - "step": 1194 - }, - { - "epoch": 0.11257389133557853, - "grad_norm": 1.0312925577163696, - "learning_rate": 1.9932480932692682e-05, - "loss": 0.4298, - "step": 1195 - }, - { - "epoch": 0.11266809542874638, - "grad_norm": 0.9345729947090149, - "learning_rate": 1.993230564287331e-05, - "loss": 0.3955, - "step": 1196 - }, - { - "epoch": 0.11276229952191423, - "grad_norm": 0.9218775629997253, - "learning_rate": 1.9932130126582007e-05, - "loss": 0.4478, - "step": 1197 - }, - { - "epoch": 0.11285650361508208, - "grad_norm": 0.8155993223190308, - "learning_rate": 1.9931954383822777e-05, - "loss": 0.358, - "step": 1198 - }, - { - "epoch": 0.11295070770824993, - "grad_norm": 0.9408160448074341, - "learning_rate": 1.993177841459963e-05, - "loss": 0.4146, - "step": 1199 - }, - { - "epoch": 0.11304491180141778, - "grad_norm": 0.8760764598846436, - "learning_rate": 1.9931602218916573e-05, - "loss": 0.4375, - "step": 1200 - }, - { - "epoch": 0.11313911589458563, - "grad_norm": 0.8879175782203674, - "learning_rate": 1.9931425796777627e-05, - "loss": 0.4248, - "step": 1201 - }, - { - "epoch": 0.11323331998775346, - "grad_norm": 1.0035958290100098, - "learning_rate": 1.9931249148186812e-05, - "loss": 0.4354, - "step": 1202 - }, - { - "epoch": 0.11332752408092131, - "grad_norm": 1.1458284854888916, - "learning_rate": 1.993107227314816e-05, - "loss": 0.4596, - "step": 1203 - }, - { - "epoch": 0.11342172817408916, - "grad_norm": 0.9176868796348572, - "learning_rate": 1.9930895171665696e-05, - "loss": 0.425, - "step": 1204 - }, - { - "epoch": 0.11351593226725701, - "grad_norm": 1.0198174715042114, - "learning_rate": 1.993071784374347e-05, - "loss": 0.4721, - "step": 1205 - }, - { - "epoch": 0.11361013636042486, - "grad_norm": 0.8922477960586548, - "learning_rate": 1.9930540289385518e-05, - "loss": 0.405, - "step": 1206 - }, - { - "epoch": 0.1137043404535927, - "grad_norm": 0.8916482329368591, - "learning_rate": 1.9930362508595886e-05, - "loss": 0.3866, - "step": 1207 - }, - { - "epoch": 0.11379854454676055, - "grad_norm": 0.8731459379196167, - "learning_rate": 1.9930184501378633e-05, - "loss": 0.4242, - "step": 1208 - }, - { - "epoch": 0.1138927486399284, - "grad_norm": 1.0246661901474, - "learning_rate": 1.9930006267737815e-05, - "loss": 0.4246, - "step": 1209 - }, - { - "epoch": 0.11398695273309625, - "grad_norm": 0.9761861562728882, - "learning_rate": 1.99298278076775e-05, - "loss": 0.4086, - "step": 1210 - }, - { - "epoch": 0.1140811568262641, - "grad_norm": 0.8512738347053528, - "learning_rate": 1.9929649121201752e-05, - "loss": 0.3949, - "step": 1211 - }, - { - "epoch": 0.11417536091943195, - "grad_norm": 0.998048722743988, - "learning_rate": 1.992947020831465e-05, - "loss": 0.422, - "step": 1212 - }, - { - "epoch": 0.1142695650125998, - "grad_norm": 0.9329831004142761, - "learning_rate": 1.9929291069020267e-05, - "loss": 0.3685, - "step": 1213 - }, - { - "epoch": 0.11436376910576765, - "grad_norm": 0.9052364230155945, - "learning_rate": 1.9929111703322693e-05, - "loss": 0.3486, - "step": 1214 - }, - { - "epoch": 0.1144579731989355, - "grad_norm": 1.0009113550186157, - "learning_rate": 1.992893211122602e-05, - "loss": 0.4671, - "step": 1215 - }, - { - "epoch": 0.11455217729210335, - "grad_norm": 0.9874580502510071, - "learning_rate": 1.9928752292734336e-05, - "loss": 0.4029, - "step": 1216 - }, - { - "epoch": 0.1146463813852712, - "grad_norm": 0.9014765024185181, - "learning_rate": 1.9928572247851745e-05, - "loss": 0.4142, - "step": 1217 - }, - { - "epoch": 0.11474058547843904, - "grad_norm": 0.9365810751914978, - "learning_rate": 1.992839197658235e-05, - "loss": 0.446, - "step": 1218 - }, - { - "epoch": 0.11483478957160688, - "grad_norm": 0.863187849521637, - "learning_rate": 1.9928211478930267e-05, - "loss": 0.3845, - "step": 1219 - }, - { - "epoch": 0.11492899366477473, - "grad_norm": 0.8800891637802124, - "learning_rate": 1.9928030754899607e-05, - "loss": 0.3981, - "step": 1220 - }, - { - "epoch": 0.11502319775794258, - "grad_norm": 0.8876266479492188, - "learning_rate": 1.9927849804494492e-05, - "loss": 0.3657, - "step": 1221 - }, - { - "epoch": 0.11511740185111043, - "grad_norm": 0.8542340397834778, - "learning_rate": 1.992766862771905e-05, - "loss": 0.3308, - "step": 1222 - }, - { - "epoch": 0.11521160594427828, - "grad_norm": 1.0105453729629517, - "learning_rate": 1.9927487224577402e-05, - "loss": 0.4347, - "step": 1223 - }, - { - "epoch": 0.11530581003744612, - "grad_norm": 0.885009765625, - "learning_rate": 1.99273055950737e-05, - "loss": 0.4144, - "step": 1224 - }, - { - "epoch": 0.11540001413061397, - "grad_norm": 0.9243080615997314, - "learning_rate": 1.9927123739212074e-05, - "loss": 0.3689, - "step": 1225 - }, - { - "epoch": 0.11549421822378182, - "grad_norm": 0.9319286346435547, - "learning_rate": 1.9926941656996673e-05, - "loss": 0.3817, - "step": 1226 - }, - { - "epoch": 0.11558842231694967, - "grad_norm": 0.8860958218574524, - "learning_rate": 1.9926759348431653e-05, - "loss": 0.4033, - "step": 1227 - }, - { - "epoch": 0.11568262641011752, - "grad_norm": 0.8765958547592163, - "learning_rate": 1.9926576813521167e-05, - "loss": 0.389, - "step": 1228 - }, - { - "epoch": 0.11577683050328537, - "grad_norm": 0.9463694095611572, - "learning_rate": 1.9926394052269376e-05, - "loss": 0.4109, - "step": 1229 - }, - { - "epoch": 0.11587103459645322, - "grad_norm": 0.9759585857391357, - "learning_rate": 1.992621106468045e-05, - "loss": 0.4117, - "step": 1230 - }, - { - "epoch": 0.11596523868962107, - "grad_norm": 0.9568681716918945, - "learning_rate": 1.9926027850758563e-05, - "loss": 0.4413, - "step": 1231 - }, - { - "epoch": 0.11605944278278892, - "grad_norm": 0.9808611273765564, - "learning_rate": 1.992584441050789e-05, - "loss": 0.4286, - "step": 1232 - }, - { - "epoch": 0.11615364687595676, - "grad_norm": 0.9273642301559448, - "learning_rate": 1.992566074393261e-05, - "loss": 0.4147, - "step": 1233 - }, - { - "epoch": 0.11624785096912461, - "grad_norm": 0.9518542289733887, - "learning_rate": 1.9925476851036918e-05, - "loss": 0.4341, - "step": 1234 - }, - { - "epoch": 0.11634205506229246, - "grad_norm": 0.9704899191856384, - "learning_rate": 1.9925292731825e-05, - "loss": 0.485, - "step": 1235 - }, - { - "epoch": 0.11643625915546031, - "grad_norm": 0.8993477821350098, - "learning_rate": 1.9925108386301063e-05, - "loss": 0.4007, - "step": 1236 - }, - { - "epoch": 0.11653046324862815, - "grad_norm": 1.0151928663253784, - "learning_rate": 1.99249238144693e-05, - "loss": 0.4211, - "step": 1237 - }, - { - "epoch": 0.116624667341796, - "grad_norm": 1.005807876586914, - "learning_rate": 1.992473901633393e-05, - "loss": 0.4874, - "step": 1238 - }, - { - "epoch": 0.11671887143496384, - "grad_norm": 1.0199459791183472, - "learning_rate": 1.992455399189916e-05, - "loss": 0.4673, - "step": 1239 - }, - { - "epoch": 0.1168130755281317, - "grad_norm": 0.9317260980606079, - "learning_rate": 1.992436874116921e-05, - "loss": 0.4715, - "step": 1240 - }, - { - "epoch": 0.11690727962129954, - "grad_norm": 0.9165276885032654, - "learning_rate": 1.9924183264148304e-05, - "loss": 0.3305, - "step": 1241 - }, - { - "epoch": 0.11700148371446739, - "grad_norm": 0.8516448140144348, - "learning_rate": 1.992399756084067e-05, - "loss": 0.3999, - "step": 1242 - }, - { - "epoch": 0.11709568780763524, - "grad_norm": 0.888482391834259, - "learning_rate": 1.9923811631250546e-05, - "loss": 0.4148, - "step": 1243 - }, - { - "epoch": 0.11718989190080309, - "grad_norm": 1.3115497827529907, - "learning_rate": 1.9923625475382166e-05, - "loss": 0.4154, - "step": 1244 - }, - { - "epoch": 0.11728409599397094, - "grad_norm": 0.8743381500244141, - "learning_rate": 1.9923439093239784e-05, - "loss": 0.3939, - "step": 1245 - }, - { - "epoch": 0.11737830008713879, - "grad_norm": 0.9198361039161682, - "learning_rate": 1.992325248482764e-05, - "loss": 0.4033, - "step": 1246 - }, - { - "epoch": 0.11747250418030664, - "grad_norm": 1.0421661138534546, - "learning_rate": 1.9923065650149995e-05, - "loss": 0.4425, - "step": 1247 - }, - { - "epoch": 0.11756670827347449, - "grad_norm": 0.9926596879959106, - "learning_rate": 1.9922878589211102e-05, - "loss": 0.4102, - "step": 1248 - }, - { - "epoch": 0.11766091236664233, - "grad_norm": 0.9322739243507385, - "learning_rate": 1.9922691302015232e-05, - "loss": 0.4311, - "step": 1249 - }, - { - "epoch": 0.11775511645981018, - "grad_norm": 0.9065204858779907, - "learning_rate": 1.992250378856666e-05, - "loss": 0.43, - "step": 1250 - }, - { - "epoch": 0.11784932055297803, - "grad_norm": 0.8456649780273438, - "learning_rate": 1.9922316048869652e-05, - "loss": 0.3513, - "step": 1251 - }, - { - "epoch": 0.11794352464614588, - "grad_norm": 0.891179621219635, - "learning_rate": 1.9922128082928497e-05, - "loss": 0.3792, - "step": 1252 - }, - { - "epoch": 0.11803772873931373, - "grad_norm": 0.9345036745071411, - "learning_rate": 1.992193989074747e-05, - "loss": 0.4186, - "step": 1253 - }, - { - "epoch": 0.11813193283248156, - "grad_norm": 0.9004818201065063, - "learning_rate": 1.9921751472330873e-05, - "loss": 0.433, - "step": 1254 - }, - { - "epoch": 0.11822613692564941, - "grad_norm": 0.91129070520401, - "learning_rate": 1.9921562827683e-05, - "loss": 0.3933, - "step": 1255 - }, - { - "epoch": 0.11832034101881726, - "grad_norm": 0.9426426887512207, - "learning_rate": 1.9921373956808144e-05, - "loss": 0.4362, - "step": 1256 - }, - { - "epoch": 0.11841454511198511, - "grad_norm": 1.0422171354293823, - "learning_rate": 1.9921184859710626e-05, - "loss": 0.4173, - "step": 1257 - }, - { - "epoch": 0.11850874920515296, - "grad_norm": 1.0042086839675903, - "learning_rate": 1.9920995536394745e-05, - "loss": 0.4088, - "step": 1258 - }, - { - "epoch": 0.11860295329832081, - "grad_norm": 1.1132287979125977, - "learning_rate": 1.9920805986864823e-05, - "loss": 0.3819, - "step": 1259 - }, - { - "epoch": 0.11869715739148866, - "grad_norm": 1.0502781867980957, - "learning_rate": 1.9920616211125185e-05, - "loss": 0.4934, - "step": 1260 - }, - { - "epoch": 0.11879136148465651, - "grad_norm": 0.9304486513137817, - "learning_rate": 1.992042620918015e-05, - "loss": 0.3992, - "step": 1261 - }, - { - "epoch": 0.11888556557782436, - "grad_norm": 0.871278703212738, - "learning_rate": 1.9920235981034056e-05, - "loss": 0.3653, - "step": 1262 - }, - { - "epoch": 0.1189797696709922, - "grad_norm": 0.9891937375068665, - "learning_rate": 1.9920045526691245e-05, - "loss": 0.4249, - "step": 1263 - }, - { - "epoch": 0.11907397376416005, - "grad_norm": 0.8685119152069092, - "learning_rate": 1.9919854846156048e-05, - "loss": 0.3826, - "step": 1264 - }, - { - "epoch": 0.1191681778573279, - "grad_norm": 0.8814318180084229, - "learning_rate": 1.9919663939432824e-05, - "loss": 0.3882, - "step": 1265 - }, - { - "epoch": 0.11926238195049575, - "grad_norm": 0.7924103140830994, - "learning_rate": 1.9919472806525915e-05, - "loss": 0.3739, - "step": 1266 - }, - { - "epoch": 0.1193565860436636, - "grad_norm": 1.0052324533462524, - "learning_rate": 1.991928144743969e-05, - "loss": 0.4225, - "step": 1267 - }, - { - "epoch": 0.11945079013683145, - "grad_norm": 0.9484582543373108, - "learning_rate": 1.991908986217851e-05, - "loss": 0.4116, - "step": 1268 - }, - { - "epoch": 0.1195449942299993, - "grad_norm": 1.009057879447937, - "learning_rate": 1.9918898050746738e-05, - "loss": 0.4072, - "step": 1269 - }, - { - "epoch": 0.11963919832316715, - "grad_norm": 1.0325987339019775, - "learning_rate": 1.991870601314875e-05, - "loss": 0.4395, - "step": 1270 - }, - { - "epoch": 0.11973340241633498, - "grad_norm": 0.7867721915245056, - "learning_rate": 1.9918513749388925e-05, - "loss": 0.3657, - "step": 1271 - }, - { - "epoch": 0.11982760650950283, - "grad_norm": 0.9199879169464111, - "learning_rate": 1.991832125947165e-05, - "loss": 0.4368, - "step": 1272 - }, - { - "epoch": 0.11992181060267068, - "grad_norm": 0.9976329803466797, - "learning_rate": 1.9918128543401307e-05, - "loss": 0.4094, - "step": 1273 - }, - { - "epoch": 0.12001601469583853, - "grad_norm": 0.886244535446167, - "learning_rate": 1.9917935601182295e-05, - "loss": 0.3622, - "step": 1274 - }, - { - "epoch": 0.12011021878900638, - "grad_norm": 1.0019792318344116, - "learning_rate": 1.9917742432819015e-05, - "loss": 0.4494, - "step": 1275 - }, - { - "epoch": 0.12020442288217423, - "grad_norm": 1.1114577054977417, - "learning_rate": 1.991754903831587e-05, - "loss": 0.524, - "step": 1276 - }, - { - "epoch": 0.12029862697534208, - "grad_norm": 0.8961803913116455, - "learning_rate": 1.9917355417677266e-05, - "loss": 0.3931, - "step": 1277 - }, - { - "epoch": 0.12039283106850993, - "grad_norm": 1.096078634262085, - "learning_rate": 1.9917161570907626e-05, - "loss": 0.4423, - "step": 1278 - }, - { - "epoch": 0.12048703516167777, - "grad_norm": 0.958937406539917, - "learning_rate": 1.991696749801136e-05, - "loss": 0.4306, - "step": 1279 - }, - { - "epoch": 0.12058123925484562, - "grad_norm": 0.8562394380569458, - "learning_rate": 1.99167731989929e-05, - "loss": 0.3854, - "step": 1280 - }, - { - "epoch": 0.12067544334801347, - "grad_norm": 0.9827834367752075, - "learning_rate": 1.9916578673856676e-05, - "loss": 0.3733, - "step": 1281 - }, - { - "epoch": 0.12076964744118132, - "grad_norm": 0.9177426695823669, - "learning_rate": 1.9916383922607122e-05, - "loss": 0.3531, - "step": 1282 - }, - { - "epoch": 0.12086385153434917, - "grad_norm": 1.0187934637069702, - "learning_rate": 1.9916188945248675e-05, - "loss": 0.4429, - "step": 1283 - }, - { - "epoch": 0.12095805562751702, - "grad_norm": 0.9321007132530212, - "learning_rate": 1.9915993741785788e-05, - "loss": 0.3826, - "step": 1284 - }, - { - "epoch": 0.12105225972068487, - "grad_norm": 1.0457597970962524, - "learning_rate": 1.991579831222291e-05, - "loss": 0.4454, - "step": 1285 - }, - { - "epoch": 0.12114646381385272, - "grad_norm": 0.8614075183868408, - "learning_rate": 1.991560265656449e-05, - "loss": 0.3726, - "step": 1286 - }, - { - "epoch": 0.12124066790702057, - "grad_norm": 0.9820986390113831, - "learning_rate": 1.9915406774814995e-05, - "loss": 0.4007, - "step": 1287 - }, - { - "epoch": 0.12133487200018842, - "grad_norm": 0.7843329310417175, - "learning_rate": 1.9915210666978896e-05, - "loss": 0.3486, - "step": 1288 - }, - { - "epoch": 0.12142907609335625, - "grad_norm": 0.8918038010597229, - "learning_rate": 1.9915014333060653e-05, - "loss": 0.4224, - "step": 1289 - }, - { - "epoch": 0.1215232801865241, - "grad_norm": 0.9657085537910461, - "learning_rate": 1.9914817773064756e-05, - "loss": 0.4088, - "step": 1290 - }, - { - "epoch": 0.12161748427969195, - "grad_norm": 0.9171475172042847, - "learning_rate": 1.9914620986995677e-05, - "loss": 0.422, - "step": 1291 - }, - { - "epoch": 0.1217116883728598, - "grad_norm": 0.9751061797142029, - "learning_rate": 1.9914423974857907e-05, - "loss": 0.4084, - "step": 1292 - }, - { - "epoch": 0.12180589246602765, - "grad_norm": 0.8386781811714172, - "learning_rate": 1.9914226736655936e-05, - "loss": 0.368, - "step": 1293 - }, - { - "epoch": 0.1219000965591955, - "grad_norm": 0.8160566091537476, - "learning_rate": 1.9914029272394265e-05, - "loss": 0.4005, - "step": 1294 - }, - { - "epoch": 0.12199430065236334, - "grad_norm": 0.8406798839569092, - "learning_rate": 1.9913831582077393e-05, - "loss": 0.4013, - "step": 1295 - }, - { - "epoch": 0.12208850474553119, - "grad_norm": 0.9921380877494812, - "learning_rate": 1.991363366570983e-05, - "loss": 0.4594, - "step": 1296 - }, - { - "epoch": 0.12218270883869904, - "grad_norm": 0.8436982035636902, - "learning_rate": 1.9913435523296085e-05, - "loss": 0.3873, - "step": 1297 - }, - { - "epoch": 0.12227691293186689, - "grad_norm": 0.9785180687904358, - "learning_rate": 1.991323715484068e-05, - "loss": 0.4451, - "step": 1298 - }, - { - "epoch": 0.12237111702503474, - "grad_norm": 0.8816764950752258, - "learning_rate": 1.9913038560348135e-05, - "loss": 0.3974, - "step": 1299 - }, - { - "epoch": 0.12246532111820259, - "grad_norm": 0.9945760369300842, - "learning_rate": 1.991283973982298e-05, - "loss": 0.433, - "step": 1300 - }, - { - "epoch": 0.12255952521137044, - "grad_norm": 1.4750216007232666, - "learning_rate": 1.9912640693269754e-05, - "loss": 0.4598, - "step": 1301 - }, - { - "epoch": 0.12265372930453829, - "grad_norm": 0.9504375457763672, - "learning_rate": 1.9912441420692986e-05, - "loss": 0.4003, - "step": 1302 - }, - { - "epoch": 0.12274793339770614, - "grad_norm": 1.042178988456726, - "learning_rate": 1.9912241922097225e-05, - "loss": 0.3946, - "step": 1303 - }, - { - "epoch": 0.12284213749087398, - "grad_norm": 1.08685302734375, - "learning_rate": 1.991204219748702e-05, - "loss": 0.4636, - "step": 1304 - }, - { - "epoch": 0.12293634158404183, - "grad_norm": 0.8524930477142334, - "learning_rate": 1.991184224686692e-05, - "loss": 0.3845, - "step": 1305 - }, - { - "epoch": 0.12303054567720967, - "grad_norm": 0.9859117269515991, - "learning_rate": 1.9911642070241487e-05, - "loss": 0.4568, - "step": 1306 - }, - { - "epoch": 0.12312474977037752, - "grad_norm": 0.8629952669143677, - "learning_rate": 1.991144166761529e-05, - "loss": 0.3693, - "step": 1307 - }, - { - "epoch": 0.12321895386354537, - "grad_norm": 0.9870200157165527, - "learning_rate": 1.9911241038992893e-05, - "loss": 0.4597, - "step": 1308 - }, - { - "epoch": 0.12331315795671322, - "grad_norm": 0.8797679543495178, - "learning_rate": 1.991104018437887e-05, - "loss": 0.3981, - "step": 1309 - }, - { - "epoch": 0.12340736204988106, - "grad_norm": 0.9802530407905579, - "learning_rate": 1.9910839103777805e-05, - "loss": 0.3633, - "step": 1310 - }, - { - "epoch": 0.12350156614304891, - "grad_norm": 0.8832137584686279, - "learning_rate": 1.9910637797194284e-05, - "loss": 0.4296, - "step": 1311 - }, - { - "epoch": 0.12359577023621676, - "grad_norm": 0.992841899394989, - "learning_rate": 1.991043626463289e-05, - "loss": 0.4085, - "step": 1312 - }, - { - "epoch": 0.12368997432938461, - "grad_norm": 1.0332484245300293, - "learning_rate": 1.9910234506098223e-05, - "loss": 0.3891, - "step": 1313 - }, - { - "epoch": 0.12378417842255246, - "grad_norm": 0.883171558380127, - "learning_rate": 1.9910032521594884e-05, - "loss": 0.3858, - "step": 1314 - }, - { - "epoch": 0.12387838251572031, - "grad_norm": 0.92570960521698, - "learning_rate": 1.9909830311127476e-05, - "loss": 0.3472, - "step": 1315 - }, - { - "epoch": 0.12397258660888816, - "grad_norm": 0.9907909035682678, - "learning_rate": 1.9909627874700615e-05, - "loss": 0.4656, - "step": 1316 - }, - { - "epoch": 0.124066790702056, - "grad_norm": 0.9065894484519958, - "learning_rate": 1.990942521231891e-05, - "loss": 0.4012, - "step": 1317 - }, - { - "epoch": 0.12416099479522386, - "grad_norm": 0.8376958966255188, - "learning_rate": 1.9909222323986984e-05, - "loss": 0.4036, - "step": 1318 - }, - { - "epoch": 0.1242551988883917, - "grad_norm": 0.966644287109375, - "learning_rate": 1.9909019209709465e-05, - "loss": 0.4196, - "step": 1319 - }, - { - "epoch": 0.12434940298155955, - "grad_norm": 0.819900393486023, - "learning_rate": 1.990881586949098e-05, - "loss": 0.3805, - "step": 1320 - }, - { - "epoch": 0.1244436070747274, - "grad_norm": 1.0244163274765015, - "learning_rate": 1.9908612303336174e-05, - "loss": 0.475, - "step": 1321 - }, - { - "epoch": 0.12453781116789525, - "grad_norm": 0.8884019255638123, - "learning_rate": 1.9908408511249682e-05, - "loss": 0.3703, - "step": 1322 - }, - { - "epoch": 0.12463201526106309, - "grad_norm": 0.9183163642883301, - "learning_rate": 1.9908204493236153e-05, - "loss": 0.4034, - "step": 1323 - }, - { - "epoch": 0.12472621935423094, - "grad_norm": 0.8690988421440125, - "learning_rate": 1.9908000249300238e-05, - "loss": 0.3972, - "step": 1324 - }, - { - "epoch": 0.12482042344739878, - "grad_norm": 0.9924200177192688, - "learning_rate": 1.990779577944659e-05, - "loss": 0.4268, - "step": 1325 - }, - { - "epoch": 0.12491462754056663, - "grad_norm": 0.9585636854171753, - "learning_rate": 1.9907591083679883e-05, - "loss": 0.3632, - "step": 1326 - }, - { - "epoch": 0.1250088316337345, - "grad_norm": 0.8694257736206055, - "learning_rate": 1.9907386162004775e-05, - "loss": 0.3931, - "step": 1327 - }, - { - "epoch": 0.12510303572690235, - "grad_norm": 0.9315399527549744, - "learning_rate": 1.9907181014425936e-05, - "loss": 0.41, - "step": 1328 - }, - { - "epoch": 0.1251972398200702, - "grad_norm": 0.9038522243499756, - "learning_rate": 1.990697564094805e-05, - "loss": 0.415, - "step": 1329 - }, - { - "epoch": 0.12529144391323804, - "grad_norm": 0.974467933177948, - "learning_rate": 1.99067700415758e-05, - "loss": 0.3798, - "step": 1330 - }, - { - "epoch": 0.1253856480064059, - "grad_norm": 0.9763962030410767, - "learning_rate": 1.990656421631387e-05, - "loss": 0.4399, - "step": 1331 - }, - { - "epoch": 0.1254798520995737, - "grad_norm": 0.8021870851516724, - "learning_rate": 1.9906358165166954e-05, - "loss": 0.3525, - "step": 1332 - }, - { - "epoch": 0.12557405619274156, - "grad_norm": 0.9852421879768372, - "learning_rate": 1.9906151888139753e-05, - "loss": 0.4017, - "step": 1333 - }, - { - "epoch": 0.1256682602859094, - "grad_norm": 0.8635556697845459, - "learning_rate": 1.990594538523697e-05, - "loss": 0.3696, - "step": 1334 - }, - { - "epoch": 0.12576246437907726, - "grad_norm": 0.9297789931297302, - "learning_rate": 1.9905738656463313e-05, - "loss": 0.3839, - "step": 1335 - }, - { - "epoch": 0.1258566684722451, - "grad_norm": 0.8907140493392944, - "learning_rate": 1.990553170182349e-05, - "loss": 0.4279, - "step": 1336 - }, - { - "epoch": 0.12595087256541296, - "grad_norm": 0.9584831595420837, - "learning_rate": 1.990532452132223e-05, - "loss": 0.3897, - "step": 1337 - }, - { - "epoch": 0.1260450766585808, - "grad_norm": 0.9200226664543152, - "learning_rate": 1.990511711496425e-05, - "loss": 0.3503, - "step": 1338 - }, - { - "epoch": 0.12613928075174866, - "grad_norm": 1.0291590690612793, - "learning_rate": 1.9904909482754283e-05, - "loss": 0.4569, - "step": 1339 - }, - { - "epoch": 0.1262334848449165, - "grad_norm": 0.8867980241775513, - "learning_rate": 1.990470162469706e-05, - "loss": 0.3825, - "step": 1340 - }, - { - "epoch": 0.12632768893808435, - "grad_norm": 0.9662911295890808, - "learning_rate": 1.990449354079732e-05, - "loss": 0.4518, - "step": 1341 - }, - { - "epoch": 0.1264218930312522, - "grad_norm": 0.9172998666763306, - "learning_rate": 1.990428523105981e-05, - "loss": 0.4395, - "step": 1342 - }, - { - "epoch": 0.12651609712442005, - "grad_norm": 1.0754560232162476, - "learning_rate": 1.9904076695489282e-05, - "loss": 0.4464, - "step": 1343 - }, - { - "epoch": 0.1266103012175879, - "grad_norm": 0.8089383244514465, - "learning_rate": 1.990386793409049e-05, - "loss": 0.3422, - "step": 1344 - }, - { - "epoch": 0.12670450531075575, - "grad_norm": 0.9732319116592407, - "learning_rate": 1.990365894686819e-05, - "loss": 0.4273, - "step": 1345 - }, - { - "epoch": 0.1267987094039236, - "grad_norm": 0.862170398235321, - "learning_rate": 1.990344973382715e-05, - "loss": 0.3798, - "step": 1346 - }, - { - "epoch": 0.12689291349709145, - "grad_norm": 0.8946678638458252, - "learning_rate": 1.9903240294972138e-05, - "loss": 0.4353, - "step": 1347 - }, - { - "epoch": 0.1269871175902593, - "grad_norm": 1.014513373374939, - "learning_rate": 1.9903030630307937e-05, - "loss": 0.4471, - "step": 1348 - }, - { - "epoch": 0.12708132168342715, - "grad_norm": 1.0029696226119995, - "learning_rate": 1.990282073983932e-05, - "loss": 0.4232, - "step": 1349 - }, - { - "epoch": 0.127175525776595, - "grad_norm": 0.8123729825019836, - "learning_rate": 1.990261062357107e-05, - "loss": 0.3467, - "step": 1350 - }, - { - "epoch": 0.12726972986976284, - "grad_norm": 0.9378588795661926, - "learning_rate": 1.9902400281507986e-05, - "loss": 0.3825, - "step": 1351 - }, - { - "epoch": 0.1273639339629307, - "grad_norm": 0.8797309398651123, - "learning_rate": 1.9902189713654864e-05, - "loss": 0.3793, - "step": 1352 - }, - { - "epoch": 0.12745813805609854, - "grad_norm": 0.9510934948921204, - "learning_rate": 1.99019789200165e-05, - "loss": 0.4268, - "step": 1353 - }, - { - "epoch": 0.1275523421492664, - "grad_norm": 1.120252013206482, - "learning_rate": 1.9901767900597704e-05, - "loss": 0.395, - "step": 1354 - }, - { - "epoch": 0.12764654624243424, - "grad_norm": 0.8084336519241333, - "learning_rate": 1.9901556655403285e-05, - "loss": 0.3601, - "step": 1355 - }, - { - "epoch": 0.1277407503356021, - "grad_norm": 0.8959628939628601, - "learning_rate": 1.9901345184438065e-05, - "loss": 0.4102, - "step": 1356 - }, - { - "epoch": 0.12783495442876994, - "grad_norm": 0.9366346001625061, - "learning_rate": 1.9901133487706858e-05, - "loss": 0.3752, - "step": 1357 - }, - { - "epoch": 0.12792915852193779, - "grad_norm": 1.0087013244628906, - "learning_rate": 1.9900921565214496e-05, - "loss": 0.4343, - "step": 1358 - }, - { - "epoch": 0.12802336261510563, - "grad_norm": 0.9205380082130432, - "learning_rate": 1.990070941696581e-05, - "loss": 0.3817, - "step": 1359 - }, - { - "epoch": 0.12811756670827348, - "grad_norm": 0.9430442452430725, - "learning_rate": 1.990049704296564e-05, - "loss": 0.4224, - "step": 1360 - }, - { - "epoch": 0.12821177080144133, - "grad_norm": 0.9510387778282166, - "learning_rate": 1.9900284443218825e-05, - "loss": 0.3808, - "step": 1361 - }, - { - "epoch": 0.12830597489460918, - "grad_norm": 0.7826544046401978, - "learning_rate": 1.9900071617730212e-05, - "loss": 0.3318, - "step": 1362 - }, - { - "epoch": 0.12840017898777703, - "grad_norm": 0.8948149681091309, - "learning_rate": 1.989985856650466e-05, - "loss": 0.3918, - "step": 1363 - }, - { - "epoch": 0.12849438308094488, - "grad_norm": 0.8969107866287231, - "learning_rate": 1.9899645289547017e-05, - "loss": 0.4408, - "step": 1364 - }, - { - "epoch": 0.12858858717411273, - "grad_norm": 0.9934526085853577, - "learning_rate": 1.9899431786862152e-05, - "loss": 0.4391, - "step": 1365 - }, - { - "epoch": 0.12868279126728055, - "grad_norm": 1.0301306247711182, - "learning_rate": 1.9899218058454933e-05, - "loss": 0.4187, - "step": 1366 - }, - { - "epoch": 0.1287769953604484, - "grad_norm": 0.9641363620758057, - "learning_rate": 1.9899004104330232e-05, - "loss": 0.4363, - "step": 1367 - }, - { - "epoch": 0.12887119945361625, - "grad_norm": 0.9025760293006897, - "learning_rate": 1.989878992449293e-05, - "loss": 0.389, - "step": 1368 - }, - { - "epoch": 0.1289654035467841, - "grad_norm": 0.8414632678031921, - "learning_rate": 1.989857551894791e-05, - "loss": 0.3644, - "step": 1369 - }, - { - "epoch": 0.12905960763995195, - "grad_norm": 0.9714823365211487, - "learning_rate": 1.9898360887700056e-05, - "loss": 0.4002, - "step": 1370 - }, - { - "epoch": 0.1291538117331198, - "grad_norm": 0.9647928476333618, - "learning_rate": 1.989814603075427e-05, - "loss": 0.3831, - "step": 1371 - }, - { - "epoch": 0.12924801582628764, - "grad_norm": 0.9595774412155151, - "learning_rate": 1.9897930948115444e-05, - "loss": 0.4624, - "step": 1372 - }, - { - "epoch": 0.1293422199194555, - "grad_norm": 0.9072070717811584, - "learning_rate": 1.9897715639788483e-05, - "loss": 0.4342, - "step": 1373 - }, - { - "epoch": 0.12943642401262334, - "grad_norm": 0.9162281155586243, - "learning_rate": 1.98975001057783e-05, - "loss": 0.4131, - "step": 1374 - }, - { - "epoch": 0.1295306281057912, - "grad_norm": 0.9252980947494507, - "learning_rate": 1.989728434608981e-05, - "loss": 0.3638, - "step": 1375 - }, - { - "epoch": 0.12962483219895904, - "grad_norm": 1.0374664068222046, - "learning_rate": 1.9897068360727933e-05, - "loss": 0.4535, - "step": 1376 - }, - { - "epoch": 0.1297190362921269, - "grad_norm": 0.9338465929031372, - "learning_rate": 1.9896852149697584e-05, - "loss": 0.3735, - "step": 1377 - }, - { - "epoch": 0.12981324038529474, - "grad_norm": 0.9684034585952759, - "learning_rate": 1.9896635713003706e-05, - "loss": 0.3973, - "step": 1378 - }, - { - "epoch": 0.12990744447846259, - "grad_norm": 0.8919799327850342, - "learning_rate": 1.9896419050651222e-05, - "loss": 0.4, - "step": 1379 - }, - { - "epoch": 0.13000164857163043, - "grad_norm": 0.9550321102142334, - "learning_rate": 1.9896202162645088e-05, - "loss": 0.4329, - "step": 1380 - }, - { - "epoch": 0.13009585266479828, - "grad_norm": 0.9291896224021912, - "learning_rate": 1.989598504899023e-05, - "loss": 0.4063, - "step": 1381 - }, - { - "epoch": 0.13019005675796613, - "grad_norm": 1.0140936374664307, - "learning_rate": 1.9895767709691617e-05, - "loss": 0.4293, - "step": 1382 - }, - { - "epoch": 0.13028426085113398, - "grad_norm": 0.8429473042488098, - "learning_rate": 1.989555014475419e-05, - "loss": 0.3547, - "step": 1383 - }, - { - "epoch": 0.13037846494430183, - "grad_norm": 0.9772353768348694, - "learning_rate": 1.9895332354182917e-05, - "loss": 0.3938, - "step": 1384 - }, - { - "epoch": 0.13047266903746968, - "grad_norm": 0.877116322517395, - "learning_rate": 1.9895114337982765e-05, - "loss": 0.3986, - "step": 1385 - }, - { - "epoch": 0.13056687313063753, - "grad_norm": 0.929966151714325, - "learning_rate": 1.98948960961587e-05, - "loss": 0.4453, - "step": 1386 - }, - { - "epoch": 0.13066107722380538, - "grad_norm": 0.831558108329773, - "learning_rate": 1.9894677628715706e-05, - "loss": 0.3872, - "step": 1387 - }, - { - "epoch": 0.13075528131697323, - "grad_norm": 0.9165502190589905, - "learning_rate": 1.9894458935658752e-05, - "loss": 0.3369, - "step": 1388 - }, - { - "epoch": 0.13084948541014108, - "grad_norm": 0.9749155640602112, - "learning_rate": 1.989424001699284e-05, - "loss": 0.4507, - "step": 1389 - }, - { - "epoch": 0.13094368950330892, - "grad_norm": 0.9584722518920898, - "learning_rate": 1.989402087272295e-05, - "loss": 0.415, - "step": 1390 - }, - { - "epoch": 0.13103789359647677, - "grad_norm": 0.9711076617240906, - "learning_rate": 1.9893801502854084e-05, - "loss": 0.4496, - "step": 1391 - }, - { - "epoch": 0.13113209768964462, - "grad_norm": 0.9657142758369446, - "learning_rate": 1.989358190739124e-05, - "loss": 0.4149, - "step": 1392 - }, - { - "epoch": 0.13122630178281247, - "grad_norm": 0.9022928476333618, - "learning_rate": 1.9893362086339428e-05, - "loss": 0.4107, - "step": 1393 - }, - { - "epoch": 0.13132050587598032, - "grad_norm": 0.9422235488891602, - "learning_rate": 1.9893142039703662e-05, - "loss": 0.4029, - "step": 1394 - }, - { - "epoch": 0.13141470996914817, - "grad_norm": 0.9564428329467773, - "learning_rate": 1.989292176748896e-05, - "loss": 0.3828, - "step": 1395 - }, - { - "epoch": 0.13150891406231602, - "grad_norm": 0.9131461381912231, - "learning_rate": 1.989270126970034e-05, - "loss": 0.4382, - "step": 1396 - }, - { - "epoch": 0.13160311815548387, - "grad_norm": 0.9364377856254578, - "learning_rate": 1.989248054634283e-05, - "loss": 0.3859, - "step": 1397 - }, - { - "epoch": 0.13169732224865172, - "grad_norm": 1.1112204790115356, - "learning_rate": 1.9892259597421466e-05, - "loss": 0.4799, - "step": 1398 - }, - { - "epoch": 0.13179152634181956, - "grad_norm": 0.9870941042900085, - "learning_rate": 1.9892038422941283e-05, - "loss": 0.3575, - "step": 1399 - }, - { - "epoch": 0.1318857304349874, - "grad_norm": 1.0119566917419434, - "learning_rate": 1.9891817022907326e-05, - "loss": 0.4426, - "step": 1400 - }, - { - "epoch": 0.13197993452815523, - "grad_norm": 0.8759822845458984, - "learning_rate": 1.9891595397324647e-05, - "loss": 0.3548, - "step": 1401 - }, - { - "epoch": 0.13207413862132308, - "grad_norm": 0.8948006629943848, - "learning_rate": 1.9891373546198293e-05, - "loss": 0.3958, - "step": 1402 - }, - { - "epoch": 0.13216834271449093, - "grad_norm": 1.0464363098144531, - "learning_rate": 1.9891151469533324e-05, - "loss": 0.4386, - "step": 1403 - }, - { - "epoch": 0.13226254680765878, - "grad_norm": 0.8876453638076782, - "learning_rate": 1.9890929167334803e-05, - "loss": 0.4215, - "step": 1404 - }, - { - "epoch": 0.13235675090082663, - "grad_norm": 0.9584650993347168, - "learning_rate": 1.98907066396078e-05, - "loss": 0.4056, - "step": 1405 - }, - { - "epoch": 0.13245095499399448, - "grad_norm": 0.952660083770752, - "learning_rate": 1.9890483886357393e-05, - "loss": 0.4232, - "step": 1406 - }, - { - "epoch": 0.13254515908716233, - "grad_norm": 0.9979923963546753, - "learning_rate": 1.9890260907588653e-05, - "loss": 0.4204, - "step": 1407 - }, - { - "epoch": 0.13263936318033018, - "grad_norm": 0.8783887028694153, - "learning_rate": 1.989003770330667e-05, - "loss": 0.3743, - "step": 1408 - }, - { - "epoch": 0.13273356727349803, - "grad_norm": 0.8242788910865784, - "learning_rate": 1.9889814273516536e-05, - "loss": 0.3699, - "step": 1409 - }, - { - "epoch": 0.13282777136666588, - "grad_norm": 0.8861004710197449, - "learning_rate": 1.988959061822334e-05, - "loss": 0.4144, - "step": 1410 - }, - { - "epoch": 0.13292197545983372, - "grad_norm": 0.8475175499916077, - "learning_rate": 1.988936673743218e-05, - "loss": 0.3934, - "step": 1411 - }, - { - "epoch": 0.13301617955300157, - "grad_norm": 0.9742621183395386, - "learning_rate": 1.988914263114817e-05, - "loss": 0.4338, - "step": 1412 - }, - { - "epoch": 0.13311038364616942, - "grad_norm": 0.8204099535942078, - "learning_rate": 1.9888918299376407e-05, - "loss": 0.3724, - "step": 1413 - }, - { - "epoch": 0.13320458773933727, - "grad_norm": 0.8778209090232849, - "learning_rate": 1.9888693742122017e-05, - "loss": 0.4136, - "step": 1414 - }, - { - "epoch": 0.13329879183250512, - "grad_norm": 0.9400804042816162, - "learning_rate": 1.9888468959390116e-05, - "loss": 0.4077, - "step": 1415 - }, - { - "epoch": 0.13339299592567297, - "grad_norm": 0.9618299603462219, - "learning_rate": 1.9888243951185834e-05, - "loss": 0.4257, - "step": 1416 - }, - { - "epoch": 0.13348720001884082, - "grad_norm": 0.9355471134185791, - "learning_rate": 1.9888018717514294e-05, - "loss": 0.3974, - "step": 1417 - }, - { - "epoch": 0.13358140411200867, - "grad_norm": 0.8644047379493713, - "learning_rate": 1.9887793258380635e-05, - "loss": 0.397, - "step": 1418 - }, - { - "epoch": 0.13367560820517652, - "grad_norm": 0.8545727729797363, - "learning_rate": 1.9887567573789997e-05, - "loss": 0.3949, - "step": 1419 - }, - { - "epoch": 0.13376981229834436, - "grad_norm": 0.9894800186157227, - "learning_rate": 1.9887341663747527e-05, - "loss": 0.3858, - "step": 1420 - }, - { - "epoch": 0.1338640163915122, - "grad_norm": 0.9499911665916443, - "learning_rate": 1.9887115528258375e-05, - "loss": 0.429, - "step": 1421 - }, - { - "epoch": 0.13395822048468006, - "grad_norm": 0.8847710490226746, - "learning_rate": 1.98868891673277e-05, - "loss": 0.4077, - "step": 1422 - }, - { - "epoch": 0.1340524245778479, - "grad_norm": 0.9346426129341125, - "learning_rate": 1.9886662580960664e-05, - "loss": 0.4167, - "step": 1423 - }, - { - "epoch": 0.13414662867101576, - "grad_norm": 0.8743436336517334, - "learning_rate": 1.988643576916243e-05, - "loss": 0.3943, - "step": 1424 - }, - { - "epoch": 0.1342408327641836, - "grad_norm": 0.8709068894386292, - "learning_rate": 1.988620873193817e-05, - "loss": 0.3863, - "step": 1425 - }, - { - "epoch": 0.13433503685735146, - "grad_norm": 0.876372754573822, - "learning_rate": 1.988598146929306e-05, - "loss": 0.4137, - "step": 1426 - }, - { - "epoch": 0.1344292409505193, - "grad_norm": 0.8295745253562927, - "learning_rate": 1.9885753981232284e-05, - "loss": 0.4034, - "step": 1427 - }, - { - "epoch": 0.13452344504368716, - "grad_norm": 0.9980677962303162, - "learning_rate": 1.9885526267761032e-05, - "loss": 0.4629, - "step": 1428 - }, - { - "epoch": 0.134617649136855, - "grad_norm": 0.8880258798599243, - "learning_rate": 1.9885298328884488e-05, - "loss": 0.3946, - "step": 1429 - }, - { - "epoch": 0.13471185323002285, - "grad_norm": 0.8771301507949829, - "learning_rate": 1.9885070164607855e-05, - "loss": 0.4202, - "step": 1430 - }, - { - "epoch": 0.1348060573231907, - "grad_norm": 0.91556316614151, - "learning_rate": 1.9884841774936337e-05, - "loss": 0.4194, - "step": 1431 - }, - { - "epoch": 0.13490026141635855, - "grad_norm": 0.8650923371315002, - "learning_rate": 1.988461315987514e-05, - "loss": 0.3744, - "step": 1432 - }, - { - "epoch": 0.1349944655095264, - "grad_norm": 0.9072087407112122, - "learning_rate": 1.9884384319429472e-05, - "loss": 0.3709, - "step": 1433 - }, - { - "epoch": 0.13508866960269425, - "grad_norm": 0.9184080958366394, - "learning_rate": 1.988415525360456e-05, - "loss": 0.3984, - "step": 1434 - }, - { - "epoch": 0.13518287369586207, - "grad_norm": 0.8926185369491577, - "learning_rate": 1.988392596240562e-05, - "loss": 0.367, - "step": 1435 - }, - { - "epoch": 0.13527707778902992, - "grad_norm": 0.9695875644683838, - "learning_rate": 1.988369644583788e-05, - "loss": 0.3913, - "step": 1436 - }, - { - "epoch": 0.13537128188219777, - "grad_norm": 0.9078540205955505, - "learning_rate": 1.988346670390658e-05, - "loss": 0.3695, - "step": 1437 - }, - { - "epoch": 0.13546548597536562, - "grad_norm": 0.922349750995636, - "learning_rate": 1.988323673661695e-05, - "loss": 0.3906, - "step": 1438 - }, - { - "epoch": 0.13555969006853347, - "grad_norm": 0.9601535797119141, - "learning_rate": 1.9883006543974238e-05, - "loss": 0.4213, - "step": 1439 - }, - { - "epoch": 0.13565389416170132, - "grad_norm": 0.9403246641159058, - "learning_rate": 1.9882776125983696e-05, - "loss": 0.4118, - "step": 1440 - }, - { - "epoch": 0.13574809825486917, - "grad_norm": 0.9881746768951416, - "learning_rate": 1.988254548265057e-05, - "loss": 0.4009, - "step": 1441 - }, - { - "epoch": 0.13584230234803701, - "grad_norm": 0.9344496130943298, - "learning_rate": 1.988231461398013e-05, - "loss": 0.4319, - "step": 1442 - }, - { - "epoch": 0.13593650644120486, - "grad_norm": 0.8761599063873291, - "learning_rate": 1.9882083519977623e-05, - "loss": 0.3881, - "step": 1443 - }, - { - "epoch": 0.1360307105343727, - "grad_norm": 0.921055793762207, - "learning_rate": 1.9881852200648338e-05, - "loss": 0.4169, - "step": 1444 - }, - { - "epoch": 0.13612491462754056, - "grad_norm": 0.8214818239212036, - "learning_rate": 1.9881620655997535e-05, - "loss": 0.3779, - "step": 1445 - }, - { - "epoch": 0.1362191187207084, - "grad_norm": 0.8684057593345642, - "learning_rate": 1.9881388886030503e-05, - "loss": 0.4157, - "step": 1446 - }, - { - "epoch": 0.13631332281387626, - "grad_norm": 0.9184609651565552, - "learning_rate": 1.9881156890752517e-05, - "loss": 0.3879, - "step": 1447 - }, - { - "epoch": 0.1364075269070441, - "grad_norm": 1.0127325057983398, - "learning_rate": 1.9880924670168877e-05, - "loss": 0.4588, - "step": 1448 - }, - { - "epoch": 0.13650173100021196, - "grad_norm": 0.8852851986885071, - "learning_rate": 1.988069222428487e-05, - "loss": 0.3958, - "step": 1449 - }, - { - "epoch": 0.1365959350933798, - "grad_norm": 0.9741384387016296, - "learning_rate": 1.9880459553105804e-05, - "loss": 0.4393, - "step": 1450 - }, - { - "epoch": 0.13669013918654765, - "grad_norm": 0.8674829602241516, - "learning_rate": 1.9880226656636977e-05, - "loss": 0.3795, - "step": 1451 - }, - { - "epoch": 0.1367843432797155, - "grad_norm": 0.8313913345336914, - "learning_rate": 1.9879993534883702e-05, - "loss": 0.337, - "step": 1452 - }, - { - "epoch": 0.13687854737288335, - "grad_norm": 1.0854746103286743, - "learning_rate": 1.9879760187851297e-05, - "loss": 0.411, - "step": 1453 - }, - { - "epoch": 0.1369727514660512, - "grad_norm": 0.9327601194381714, - "learning_rate": 1.9879526615545076e-05, - "loss": 0.4311, - "step": 1454 - }, - { - "epoch": 0.13706695555921905, - "grad_norm": 0.8348062038421631, - "learning_rate": 1.9879292817970372e-05, - "loss": 0.3995, - "step": 1455 - }, - { - "epoch": 0.1371611596523869, - "grad_norm": 0.8668566942214966, - "learning_rate": 1.9879058795132514e-05, - "loss": 0.3749, - "step": 1456 - }, - { - "epoch": 0.13725536374555475, - "grad_norm": 0.9548278450965881, - "learning_rate": 1.9878824547036838e-05, - "loss": 0.3863, - "step": 1457 - }, - { - "epoch": 0.1373495678387226, - "grad_norm": 1.0754507780075073, - "learning_rate": 1.987859007368868e-05, - "loss": 0.3791, - "step": 1458 - }, - { - "epoch": 0.13744377193189045, - "grad_norm": 0.8589168787002563, - "learning_rate": 1.9878355375093395e-05, - "loss": 0.4323, - "step": 1459 - }, - { - "epoch": 0.1375379760250583, - "grad_norm": 0.7914716005325317, - "learning_rate": 1.9878120451256325e-05, - "loss": 0.3678, - "step": 1460 - }, - { - "epoch": 0.13763218011822614, - "grad_norm": 0.9526810050010681, - "learning_rate": 1.9877885302182836e-05, - "loss": 0.3896, - "step": 1461 - }, - { - "epoch": 0.137726384211394, - "grad_norm": 0.8691291213035583, - "learning_rate": 1.987764992787829e-05, - "loss": 0.3984, - "step": 1462 - }, - { - "epoch": 0.13782058830456184, - "grad_norm": 1.0639795064926147, - "learning_rate": 1.9877414328348045e-05, - "loss": 0.3786, - "step": 1463 - }, - { - "epoch": 0.1379147923977297, - "grad_norm": 0.9380475878715515, - "learning_rate": 1.9877178503597476e-05, - "loss": 0.4111, - "step": 1464 - }, - { - "epoch": 0.13800899649089754, - "grad_norm": 0.9449976086616516, - "learning_rate": 1.9876942453631962e-05, - "loss": 0.3952, - "step": 1465 - }, - { - "epoch": 0.1381032005840654, - "grad_norm": 0.9195828437805176, - "learning_rate": 1.9876706178456884e-05, - "loss": 0.4033, - "step": 1466 - }, - { - "epoch": 0.13819740467723324, - "grad_norm": 0.807725727558136, - "learning_rate": 1.9876469678077634e-05, - "loss": 0.3879, - "step": 1467 - }, - { - "epoch": 0.1382916087704011, - "grad_norm": 0.9038071632385254, - "learning_rate": 1.98762329524996e-05, - "loss": 0.4214, - "step": 1468 - }, - { - "epoch": 0.13838581286356894, - "grad_norm": 0.9691506028175354, - "learning_rate": 1.987599600172818e-05, - "loss": 0.3749, - "step": 1469 - }, - { - "epoch": 0.13848001695673676, - "grad_norm": 0.8957289457321167, - "learning_rate": 1.987575882576878e-05, - "loss": 0.4419, - "step": 1470 - }, - { - "epoch": 0.1385742210499046, - "grad_norm": 1.027971863746643, - "learning_rate": 1.9875521424626802e-05, - "loss": 0.4452, - "step": 1471 - }, - { - "epoch": 0.13866842514307245, - "grad_norm": 1.001281499862671, - "learning_rate": 1.9875283798307664e-05, - "loss": 0.469, - "step": 1472 - }, - { - "epoch": 0.1387626292362403, - "grad_norm": 0.8806446194648743, - "learning_rate": 1.9875045946816784e-05, - "loss": 0.3987, - "step": 1473 - }, - { - "epoch": 0.13885683332940815, - "grad_norm": 0.9220328330993652, - "learning_rate": 1.9874807870159583e-05, - "loss": 0.3969, - "step": 1474 - }, - { - "epoch": 0.138951037422576, - "grad_norm": 0.9270832538604736, - "learning_rate": 1.9874569568341492e-05, - "loss": 0.3844, - "step": 1475 - }, - { - "epoch": 0.13904524151574385, - "grad_norm": 0.8893280029296875, - "learning_rate": 1.9874331041367946e-05, - "loss": 0.4448, - "step": 1476 - }, - { - "epoch": 0.1391394456089117, - "grad_norm": 0.8257045149803162, - "learning_rate": 1.987409228924438e-05, - "loss": 0.3667, - "step": 1477 - }, - { - "epoch": 0.13923364970207955, - "grad_norm": 0.930780291557312, - "learning_rate": 1.9873853311976235e-05, - "loss": 0.3743, - "step": 1478 - }, - { - "epoch": 0.1393278537952474, - "grad_norm": 0.9231829643249512, - "learning_rate": 1.9873614109568967e-05, - "loss": 0.3672, - "step": 1479 - }, - { - "epoch": 0.13942205788841525, - "grad_norm": 0.9927639961242676, - "learning_rate": 1.987337468202803e-05, - "loss": 0.388, - "step": 1480 - }, - { - "epoch": 0.1395162619815831, - "grad_norm": 0.8849180936813354, - "learning_rate": 1.9873135029358877e-05, - "loss": 0.4029, - "step": 1481 - }, - { - "epoch": 0.13961046607475094, - "grad_norm": 1.0194668769836426, - "learning_rate": 1.9872895151566975e-05, - "loss": 0.4712, - "step": 1482 - }, - { - "epoch": 0.1397046701679188, - "grad_norm": 0.9328537583351135, - "learning_rate": 1.9872655048657798e-05, - "loss": 0.4131, - "step": 1483 - }, - { - "epoch": 0.13979887426108664, - "grad_norm": 0.8719202280044556, - "learning_rate": 1.9872414720636815e-05, - "loss": 0.4055, - "step": 1484 - }, - { - "epoch": 0.1398930783542545, - "grad_norm": 0.817643940448761, - "learning_rate": 1.9872174167509515e-05, - "loss": 0.3533, - "step": 1485 - }, - { - "epoch": 0.13998728244742234, - "grad_norm": 0.9272326231002808, - "learning_rate": 1.987193338928137e-05, - "loss": 0.4082, - "step": 1486 - }, - { - "epoch": 0.1400814865405902, - "grad_norm": 0.9619669914245605, - "learning_rate": 1.987169238595788e-05, - "loss": 0.4678, - "step": 1487 - }, - { - "epoch": 0.14017569063375804, - "grad_norm": 0.9963866472244263, - "learning_rate": 1.9871451157544534e-05, - "loss": 0.4112, - "step": 1488 - }, - { - "epoch": 0.1402698947269259, - "grad_norm": 0.9173933863639832, - "learning_rate": 1.9871209704046835e-05, - "loss": 0.4102, - "step": 1489 - }, - { - "epoch": 0.14036409882009374, - "grad_norm": 0.9025501608848572, - "learning_rate": 1.9870968025470293e-05, - "loss": 0.4033, - "step": 1490 - }, - { - "epoch": 0.14045830291326158, - "grad_norm": 0.838789165019989, - "learning_rate": 1.9870726121820408e-05, - "loss": 0.3815, - "step": 1491 - }, - { - "epoch": 0.14055250700642943, - "grad_norm": 0.7602670788764954, - "learning_rate": 1.9870483993102704e-05, - "loss": 0.3658, - "step": 1492 - }, - { - "epoch": 0.14064671109959728, - "grad_norm": 0.8975557684898376, - "learning_rate": 1.98702416393227e-05, - "loss": 0.379, - "step": 1493 - }, - { - "epoch": 0.14074091519276513, - "grad_norm": 0.9210837483406067, - "learning_rate": 1.9869999060485927e-05, - "loss": 0.3882, - "step": 1494 - }, - { - "epoch": 0.14083511928593298, - "grad_norm": 0.8833448886871338, - "learning_rate": 1.9869756256597905e-05, - "loss": 0.3674, - "step": 1495 - }, - { - "epoch": 0.14092932337910083, - "grad_norm": 0.7752265334129333, - "learning_rate": 1.986951322766418e-05, - "loss": 0.3241, - "step": 1496 - }, - { - "epoch": 0.14102352747226868, - "grad_norm": 1.0192210674285889, - "learning_rate": 1.9869269973690287e-05, - "loss": 0.4657, - "step": 1497 - }, - { - "epoch": 0.14111773156543653, - "grad_norm": 1.0888237953186035, - "learning_rate": 1.9869026494681776e-05, - "loss": 0.3868, - "step": 1498 - }, - { - "epoch": 0.14121193565860438, - "grad_norm": 0.8453440070152283, - "learning_rate": 1.98687827906442e-05, - "loss": 0.3684, - "step": 1499 - }, - { - "epoch": 0.14130613975177223, - "grad_norm": 0.9561695456504822, - "learning_rate": 1.9868538861583112e-05, - "loss": 0.4303, - "step": 1500 - }, - { - "epoch": 0.14140034384494007, - "grad_norm": 0.8988255858421326, - "learning_rate": 1.9868294707504077e-05, - "loss": 0.3689, - "step": 1501 - }, - { - "epoch": 0.14149454793810792, - "grad_norm": 0.8484389781951904, - "learning_rate": 1.986805032841266e-05, - "loss": 0.3691, - "step": 1502 - }, - { - "epoch": 0.14158875203127577, - "grad_norm": 0.9466550946235657, - "learning_rate": 1.9867805724314438e-05, - "loss": 0.4252, - "step": 1503 - }, - { - "epoch": 0.14168295612444362, - "grad_norm": 0.9282593727111816, - "learning_rate": 1.986756089521498e-05, - "loss": 0.4692, - "step": 1504 - }, - { - "epoch": 0.14177716021761144, - "grad_norm": 0.8028031587600708, - "learning_rate": 1.9867315841119878e-05, - "loss": 0.3777, - "step": 1505 - }, - { - "epoch": 0.1418713643107793, - "grad_norm": 0.8507390022277832, - "learning_rate": 1.9867070562034712e-05, - "loss": 0.3991, - "step": 1506 - }, - { - "epoch": 0.14196556840394714, - "grad_norm": 0.9004268646240234, - "learning_rate": 1.986682505796508e-05, - "loss": 0.3723, - "step": 1507 - }, - { - "epoch": 0.142059772497115, - "grad_norm": 0.9132339954376221, - "learning_rate": 1.986657932891657e-05, - "loss": 0.4161, - "step": 1508 - }, - { - "epoch": 0.14215397659028284, - "grad_norm": 0.9097040891647339, - "learning_rate": 1.98663333748948e-05, - "loss": 0.3664, - "step": 1509 - }, - { - "epoch": 0.1422481806834507, - "grad_norm": 0.8887312412261963, - "learning_rate": 1.9866087195905365e-05, - "loss": 0.4225, - "step": 1510 - }, - { - "epoch": 0.14234238477661854, - "grad_norm": 0.8952493071556091, - "learning_rate": 1.9865840791953886e-05, - "loss": 0.406, - "step": 1511 - }, - { - "epoch": 0.14243658886978638, - "grad_norm": 0.9774636626243591, - "learning_rate": 1.9865594163045984e-05, - "loss": 0.4049, - "step": 1512 - }, - { - "epoch": 0.14253079296295423, - "grad_norm": 0.9388694167137146, - "learning_rate": 1.986534730918727e-05, - "loss": 0.3929, - "step": 1513 - }, - { - "epoch": 0.14262499705612208, - "grad_norm": 0.841415524482727, - "learning_rate": 1.9865100230383384e-05, - "loss": 0.3623, - "step": 1514 - }, - { - "epoch": 0.14271920114928993, - "grad_norm": 0.9300689697265625, - "learning_rate": 1.9864852926639955e-05, - "loss": 0.4353, - "step": 1515 - }, - { - "epoch": 0.14281340524245778, - "grad_norm": 0.9600360989570618, - "learning_rate": 1.9864605397962624e-05, - "loss": 0.4265, - "step": 1516 - }, - { - "epoch": 0.14290760933562563, - "grad_norm": 0.8746339678764343, - "learning_rate": 1.9864357644357036e-05, - "loss": 0.3741, - "step": 1517 - }, - { - "epoch": 0.14300181342879348, - "grad_norm": 1.224995493888855, - "learning_rate": 1.9864109665828835e-05, - "loss": 0.3732, - "step": 1518 - }, - { - "epoch": 0.14309601752196133, - "grad_norm": 0.9189375638961792, - "learning_rate": 1.986386146238368e-05, - "loss": 0.4001, - "step": 1519 - }, - { - "epoch": 0.14319022161512918, - "grad_norm": 0.8859357237815857, - "learning_rate": 1.9863613034027224e-05, - "loss": 0.4363, - "step": 1520 - }, - { - "epoch": 0.14328442570829703, - "grad_norm": 0.9655970931053162, - "learning_rate": 1.9863364380765144e-05, - "loss": 0.4381, - "step": 1521 - }, - { - "epoch": 0.14337862980146487, - "grad_norm": 0.9548221826553345, - "learning_rate": 1.9863115502603097e-05, - "loss": 0.3759, - "step": 1522 - }, - { - "epoch": 0.14347283389463272, - "grad_norm": 0.9607380032539368, - "learning_rate": 1.9862866399546762e-05, - "loss": 0.422, - "step": 1523 - }, - { - "epoch": 0.14356703798780057, - "grad_norm": 0.8558836579322815, - "learning_rate": 1.9862617071601825e-05, - "loss": 0.3665, - "step": 1524 - }, - { - "epoch": 0.14366124208096842, - "grad_norm": 0.9772205948829651, - "learning_rate": 1.9862367518773963e-05, - "loss": 0.4648, - "step": 1525 - }, - { - "epoch": 0.14375544617413627, - "grad_norm": 1.015708565711975, - "learning_rate": 1.986211774106887e-05, - "loss": 0.3942, - "step": 1526 - }, - { - "epoch": 0.14384965026730412, - "grad_norm": 1.1574227809906006, - "learning_rate": 1.986186773849224e-05, - "loss": 0.3912, - "step": 1527 - }, - { - "epoch": 0.14394385436047197, - "grad_norm": 0.8729410171508789, - "learning_rate": 1.9861617511049773e-05, - "loss": 0.4035, - "step": 1528 - }, - { - "epoch": 0.14403805845363982, - "grad_norm": 0.9859470725059509, - "learning_rate": 1.9861367058747175e-05, - "loss": 0.4218, - "step": 1529 - }, - { - "epoch": 0.14413226254680767, - "grad_norm": 0.9877438545227051, - "learning_rate": 1.986111638159016e-05, - "loss": 0.4801, - "step": 1530 - }, - { - "epoch": 0.14422646663997551, - "grad_norm": 0.8367306590080261, - "learning_rate": 1.986086547958444e-05, - "loss": 0.3492, - "step": 1531 - }, - { - "epoch": 0.14432067073314336, - "grad_norm": 0.8954234719276428, - "learning_rate": 1.9860614352735737e-05, - "loss": 0.421, - "step": 1532 - }, - { - "epoch": 0.1444148748263112, - "grad_norm": 1.0337097644805908, - "learning_rate": 1.9860363001049775e-05, - "loss": 0.4151, - "step": 1533 - }, - { - "epoch": 0.14450907891947906, - "grad_norm": 0.8945963382720947, - "learning_rate": 1.986011142453229e-05, - "loss": 0.3675, - "step": 1534 - }, - { - "epoch": 0.1446032830126469, - "grad_norm": 0.9584569931030273, - "learning_rate": 1.9859859623189015e-05, - "loss": 0.3832, - "step": 1535 - }, - { - "epoch": 0.14469748710581476, - "grad_norm": 0.9371083974838257, - "learning_rate": 1.985960759702569e-05, - "loss": 0.4199, - "step": 1536 - }, - { - "epoch": 0.1447916911989826, - "grad_norm": 0.967965841293335, - "learning_rate": 1.9859355346048065e-05, - "loss": 0.3832, - "step": 1537 - }, - { - "epoch": 0.14488589529215046, - "grad_norm": 0.859764814376831, - "learning_rate": 1.9859102870261887e-05, - "loss": 0.3217, - "step": 1538 - }, - { - "epoch": 0.14498009938531828, - "grad_norm": 0.9350664615631104, - "learning_rate": 1.9858850169672924e-05, - "loss": 0.4131, - "step": 1539 - }, - { - "epoch": 0.14507430347848613, - "grad_norm": 0.9000515341758728, - "learning_rate": 1.9858597244286923e-05, - "loss": 0.4084, - "step": 1540 - }, - { - "epoch": 0.14516850757165398, - "grad_norm": 0.9772770404815674, - "learning_rate": 1.985834409410966e-05, - "loss": 0.4033, - "step": 1541 - }, - { - "epoch": 0.14526271166482183, - "grad_norm": 0.8754479289054871, - "learning_rate": 1.9858090719146908e-05, - "loss": 0.4101, - "step": 1542 - }, - { - "epoch": 0.14535691575798967, - "grad_norm": 0.8851545453071594, - "learning_rate": 1.9857837119404438e-05, - "loss": 0.4137, - "step": 1543 - }, - { - "epoch": 0.14545111985115752, - "grad_norm": 1.01539945602417, - "learning_rate": 1.985758329488804e-05, - "loss": 0.4516, - "step": 1544 - }, - { - "epoch": 0.14554532394432537, - "grad_norm": 0.85793536901474, - "learning_rate": 1.9857329245603495e-05, - "loss": 0.3974, - "step": 1545 - }, - { - "epoch": 0.14563952803749322, - "grad_norm": 0.9479626417160034, - "learning_rate": 1.98570749715566e-05, - "loss": 0.4112, - "step": 1546 - }, - { - "epoch": 0.14573373213066107, - "grad_norm": 1.0737402439117432, - "learning_rate": 1.985682047275315e-05, - "loss": 0.3961, - "step": 1547 - }, - { - "epoch": 0.14582793622382892, - "grad_norm": 0.9221171140670776, - "learning_rate": 1.985656574919895e-05, - "loss": 0.3951, - "step": 1548 - }, - { - "epoch": 0.14592214031699677, - "grad_norm": 0.9689660668373108, - "learning_rate": 1.985631080089981e-05, - "loss": 0.3869, - "step": 1549 - }, - { - "epoch": 0.14601634441016462, - "grad_norm": 0.93543940782547, - "learning_rate": 1.9856055627861537e-05, - "loss": 0.3848, - "step": 1550 - }, - { - "epoch": 0.14611054850333247, - "grad_norm": 0.9185600876808167, - "learning_rate": 1.9855800230089955e-05, - "loss": 0.4093, - "step": 1551 - }, - { - "epoch": 0.14620475259650031, - "grad_norm": 0.8836493492126465, - "learning_rate": 1.9855544607590886e-05, - "loss": 0.3893, - "step": 1552 - }, - { - "epoch": 0.14629895668966816, - "grad_norm": 0.8601776361465454, - "learning_rate": 1.9855288760370154e-05, - "loss": 0.3919, - "step": 1553 - }, - { - "epoch": 0.146393160782836, - "grad_norm": 0.9663978219032288, - "learning_rate": 1.9855032688433603e-05, - "loss": 0.4357, - "step": 1554 - }, - { - "epoch": 0.14648736487600386, - "grad_norm": 0.8969776630401611, - "learning_rate": 1.985477639178706e-05, - "loss": 0.4259, - "step": 1555 - }, - { - "epoch": 0.1465815689691717, - "grad_norm": 0.9066229462623596, - "learning_rate": 1.9854519870436376e-05, - "loss": 0.3964, - "step": 1556 - }, - { - "epoch": 0.14667577306233956, - "grad_norm": 0.8684629797935486, - "learning_rate": 1.98542631243874e-05, - "loss": 0.3706, - "step": 1557 - }, - { - "epoch": 0.1467699771555074, - "grad_norm": 0.9415647387504578, - "learning_rate": 1.9854006153645983e-05, - "loss": 0.3839, - "step": 1558 - }, - { - "epoch": 0.14686418124867526, - "grad_norm": 0.8227822184562683, - "learning_rate": 1.985374895821799e-05, - "loss": 0.3556, - "step": 1559 - }, - { - "epoch": 0.1469583853418431, - "grad_norm": 0.9608584642410278, - "learning_rate": 1.985349153810928e-05, - "loss": 0.4359, - "step": 1560 - }, - { - "epoch": 0.14705258943501096, - "grad_norm": 0.9011355042457581, - "learning_rate": 1.9853233893325722e-05, - "loss": 0.4004, - "step": 1561 - }, - { - "epoch": 0.1471467935281788, - "grad_norm": 0.8800787925720215, - "learning_rate": 1.9852976023873196e-05, - "loss": 0.4057, - "step": 1562 - }, - { - "epoch": 0.14724099762134665, - "grad_norm": 1.0086630582809448, - "learning_rate": 1.9852717929757573e-05, - "loss": 0.452, - "step": 1563 - }, - { - "epoch": 0.1473352017145145, - "grad_norm": 0.927713930606842, - "learning_rate": 1.985245961098475e-05, - "loss": 0.395, - "step": 1564 - }, - { - "epoch": 0.14742940580768235, - "grad_norm": 0.7828457355499268, - "learning_rate": 1.9852201067560607e-05, - "loss": 0.3728, - "step": 1565 - }, - { - "epoch": 0.1475236099008502, - "grad_norm": 0.8762266635894775, - "learning_rate": 1.9851942299491043e-05, - "loss": 0.3737, - "step": 1566 - }, - { - "epoch": 0.14761781399401805, - "grad_norm": 0.967050313949585, - "learning_rate": 1.9851683306781962e-05, - "loss": 0.424, - "step": 1567 - }, - { - "epoch": 0.1477120180871859, - "grad_norm": 1.0238690376281738, - "learning_rate": 1.9851424089439263e-05, - "loss": 0.4298, - "step": 1568 - }, - { - "epoch": 0.14780622218035375, - "grad_norm": 0.8824076652526855, - "learning_rate": 1.985116464746886e-05, - "loss": 0.389, - "step": 1569 - }, - { - "epoch": 0.1479004262735216, - "grad_norm": 0.900592565536499, - "learning_rate": 1.985090498087667e-05, - "loss": 0.3649, - "step": 1570 - }, - { - "epoch": 0.14799463036668944, - "grad_norm": 0.9643316268920898, - "learning_rate": 1.9850645089668608e-05, - "loss": 0.4403, - "step": 1571 - }, - { - "epoch": 0.1480888344598573, - "grad_norm": 0.9956216216087341, - "learning_rate": 1.9850384973850603e-05, - "loss": 0.3818, - "step": 1572 - }, - { - "epoch": 0.14818303855302514, - "grad_norm": 1.0450785160064697, - "learning_rate": 1.985012463342859e-05, - "loss": 0.3986, - "step": 1573 - }, - { - "epoch": 0.14827724264619296, - "grad_norm": 0.8111163973808289, - "learning_rate": 1.98498640684085e-05, - "loss": 0.367, - "step": 1574 - }, - { - "epoch": 0.1483714467393608, - "grad_norm": 1.066404104232788, - "learning_rate": 1.9849603278796275e-05, - "loss": 0.4107, - "step": 1575 - }, - { - "epoch": 0.14846565083252866, - "grad_norm": 0.8669083714485168, - "learning_rate": 1.9849342264597864e-05, - "loss": 0.3758, - "step": 1576 - }, - { - "epoch": 0.1485598549256965, - "grad_norm": 0.8758968114852905, - "learning_rate": 1.984908102581922e-05, - "loss": 0.3903, - "step": 1577 - }, - { - "epoch": 0.14865405901886436, - "grad_norm": 0.8721821904182434, - "learning_rate": 1.9848819562466293e-05, - "loss": 0.3796, - "step": 1578 - }, - { - "epoch": 0.1487482631120322, - "grad_norm": 0.9264764785766602, - "learning_rate": 1.984855787454505e-05, - "loss": 0.4283, - "step": 1579 - }, - { - "epoch": 0.14884246720520006, - "grad_norm": 0.8868725299835205, - "learning_rate": 1.9848295962061455e-05, - "loss": 0.4002, - "step": 1580 - }, - { - "epoch": 0.1489366712983679, - "grad_norm": 0.897774338722229, - "learning_rate": 1.9848033825021482e-05, - "loss": 0.3563, - "step": 1581 - }, - { - "epoch": 0.14903087539153576, - "grad_norm": 0.9838619828224182, - "learning_rate": 1.9847771463431106e-05, - "loss": 0.4587, - "step": 1582 - }, - { - "epoch": 0.1491250794847036, - "grad_norm": 1.0027905702590942, - "learning_rate": 1.9847508877296314e-05, - "loss": 0.4377, - "step": 1583 - }, - { - "epoch": 0.14921928357787145, - "grad_norm": 0.868817150592804, - "learning_rate": 1.9847246066623086e-05, - "loss": 0.3713, - "step": 1584 - }, - { - "epoch": 0.1493134876710393, - "grad_norm": 0.870667576789856, - "learning_rate": 1.9846983031417423e-05, - "loss": 0.4033, - "step": 1585 - }, - { - "epoch": 0.14940769176420715, - "grad_norm": 0.9443947076797485, - "learning_rate": 1.9846719771685317e-05, - "loss": 0.3993, - "step": 1586 - }, - { - "epoch": 0.149501895857375, - "grad_norm": 1.0219776630401611, - "learning_rate": 1.984645628743277e-05, - "loss": 0.416, - "step": 1587 - }, - { - "epoch": 0.14959609995054285, - "grad_norm": 0.8650853633880615, - "learning_rate": 1.9846192578665792e-05, - "loss": 0.4026, - "step": 1588 - }, - { - "epoch": 0.1496903040437107, - "grad_norm": 0.9072234034538269, - "learning_rate": 1.9845928645390397e-05, - "loss": 0.3898, - "step": 1589 - }, - { - "epoch": 0.14978450813687855, - "grad_norm": 0.8843415975570679, - "learning_rate": 1.9845664487612602e-05, - "loss": 0.3917, - "step": 1590 - }, - { - "epoch": 0.1498787122300464, - "grad_norm": 1.0103051662445068, - "learning_rate": 1.984540010533843e-05, - "loss": 0.3474, - "step": 1591 - }, - { - "epoch": 0.14997291632321424, - "grad_norm": 0.7909182906150818, - "learning_rate": 1.984513549857391e-05, - "loss": 0.3542, - "step": 1592 - }, - { - "epoch": 0.1500671204163821, - "grad_norm": 0.9175459742546082, - "learning_rate": 1.9844870667325073e-05, - "loss": 0.4226, - "step": 1593 - }, - { - "epoch": 0.15016132450954994, - "grad_norm": 0.9356094002723694, - "learning_rate": 1.984460561159796e-05, - "loss": 0.4256, - "step": 1594 - }, - { - "epoch": 0.1502555286027178, - "grad_norm": 0.9353117346763611, - "learning_rate": 1.9844340331398613e-05, - "loss": 0.4015, - "step": 1595 - }, - { - "epoch": 0.15034973269588564, - "grad_norm": 0.834352433681488, - "learning_rate": 1.9844074826733085e-05, - "loss": 0.3815, - "step": 1596 - }, - { - "epoch": 0.1504439367890535, - "grad_norm": 0.9325944185256958, - "learning_rate": 1.9843809097607428e-05, - "loss": 0.4054, - "step": 1597 - }, - { - "epoch": 0.15053814088222134, - "grad_norm": 0.9160271286964417, - "learning_rate": 1.9843543144027695e-05, - "loss": 0.3888, - "step": 1598 - }, - { - "epoch": 0.1506323449753892, - "grad_norm": 0.8725243806838989, - "learning_rate": 1.9843276965999956e-05, - "loss": 0.3732, - "step": 1599 - }, - { - "epoch": 0.15072654906855704, - "grad_norm": 0.745169997215271, - "learning_rate": 1.984301056353028e-05, - "loss": 0.3401, - "step": 1600 - }, - { - "epoch": 0.15082075316172489, - "grad_norm": 0.823818027973175, - "learning_rate": 1.9842743936624743e-05, - "loss": 0.3884, - "step": 1601 - }, - { - "epoch": 0.15091495725489273, - "grad_norm": 0.8428900241851807, - "learning_rate": 1.9842477085289417e-05, - "loss": 0.3873, - "step": 1602 - }, - { - "epoch": 0.15100916134806058, - "grad_norm": 0.8186632394790649, - "learning_rate": 1.98422100095304e-05, - "loss": 0.3721, - "step": 1603 - }, - { - "epoch": 0.15110336544122843, - "grad_norm": 0.9110180139541626, - "learning_rate": 1.9841942709353765e-05, - "loss": 0.387, - "step": 1604 - }, - { - "epoch": 0.15119756953439628, - "grad_norm": 0.8393080234527588, - "learning_rate": 1.984167518476562e-05, - "loss": 0.3652, - "step": 1605 - }, - { - "epoch": 0.15129177362756413, - "grad_norm": 0.9034487009048462, - "learning_rate": 1.9841407435772056e-05, - "loss": 0.4181, - "step": 1606 - }, - { - "epoch": 0.15138597772073198, - "grad_norm": 0.8732578158378601, - "learning_rate": 1.9841139462379188e-05, - "loss": 0.4112, - "step": 1607 - }, - { - "epoch": 0.1514801818138998, - "grad_norm": 0.8301663398742676, - "learning_rate": 1.984087126459312e-05, - "loss": 0.3851, - "step": 1608 - }, - { - "epoch": 0.15157438590706765, - "grad_norm": 0.7910391688346863, - "learning_rate": 1.984060284241996e-05, - "loss": 0.3436, - "step": 1609 - }, - { - "epoch": 0.1516685900002355, - "grad_norm": 0.9603357315063477, - "learning_rate": 1.9840334195865846e-05, - "loss": 0.4142, - "step": 1610 - }, - { - "epoch": 0.15176279409340335, - "grad_norm": 0.8982900977134705, - "learning_rate": 1.984006532493689e-05, - "loss": 0.401, - "step": 1611 - }, - { - "epoch": 0.1518569981865712, - "grad_norm": 0.8096650838851929, - "learning_rate": 1.9839796229639226e-05, - "loss": 0.4028, - "step": 1612 - }, - { - "epoch": 0.15195120227973904, - "grad_norm": 0.8454784750938416, - "learning_rate": 1.9839526909978994e-05, - "loss": 0.3682, - "step": 1613 - }, - { - "epoch": 0.1520454063729069, - "grad_norm": 0.847681999206543, - "learning_rate": 1.9839257365962327e-05, - "loss": 0.3267, - "step": 1614 - }, - { - "epoch": 0.15213961046607474, - "grad_norm": 0.8493847250938416, - "learning_rate": 1.9838987597595377e-05, - "loss": 0.3922, - "step": 1615 - }, - { - "epoch": 0.1522338145592426, - "grad_norm": 0.8904460072517395, - "learning_rate": 1.9838717604884293e-05, - "loss": 0.3803, - "step": 1616 - }, - { - "epoch": 0.15232801865241044, - "grad_norm": 0.8912856578826904, - "learning_rate": 1.9838447387835233e-05, - "loss": 0.3799, - "step": 1617 - }, - { - "epoch": 0.1524222227455783, - "grad_norm": 0.8856900334358215, - "learning_rate": 1.9838176946454358e-05, - "loss": 0.3947, - "step": 1618 - }, - { - "epoch": 0.15251642683874614, - "grad_norm": 0.9876874685287476, - "learning_rate": 1.9837906280747832e-05, - "loss": 0.4121, - "step": 1619 - }, - { - "epoch": 0.152610630931914, - "grad_norm": 1.1396756172180176, - "learning_rate": 1.9837635390721828e-05, - "loss": 0.3945, - "step": 1620 - }, - { - "epoch": 0.15270483502508184, - "grad_norm": 1.0195603370666504, - "learning_rate": 1.9837364276382523e-05, - "loss": 0.4291, - "step": 1621 - }, - { - "epoch": 0.15279903911824969, - "grad_norm": 0.8799949884414673, - "learning_rate": 1.98370929377361e-05, - "loss": 0.4075, - "step": 1622 - }, - { - "epoch": 0.15289324321141753, - "grad_norm": 0.9101582765579224, - "learning_rate": 1.9836821374788742e-05, - "loss": 0.4404, - "step": 1623 - }, - { - "epoch": 0.15298744730458538, - "grad_norm": 0.9574260711669922, - "learning_rate": 1.9836549587546646e-05, - "loss": 0.4033, - "step": 1624 - }, - { - "epoch": 0.15308165139775323, - "grad_norm": 0.8660268783569336, - "learning_rate": 1.9836277576016006e-05, - "loss": 0.3577, - "step": 1625 - }, - { - "epoch": 0.15317585549092108, - "grad_norm": 1.075222373008728, - "learning_rate": 1.9836005340203026e-05, - "loss": 0.4407, - "step": 1626 - }, - { - "epoch": 0.15327005958408893, - "grad_norm": 0.9746554493904114, - "learning_rate": 1.9835732880113912e-05, - "loss": 0.4519, - "step": 1627 - }, - { - "epoch": 0.15336426367725678, - "grad_norm": 0.9732099771499634, - "learning_rate": 1.9835460195754878e-05, - "loss": 0.4228, - "step": 1628 - }, - { - "epoch": 0.15345846777042463, - "grad_norm": 0.9029169082641602, - "learning_rate": 1.983518728713214e-05, - "loss": 0.3808, - "step": 1629 - }, - { - "epoch": 0.15355267186359248, - "grad_norm": 0.8966045379638672, - "learning_rate": 1.983491415425192e-05, - "loss": 0.4243, - "step": 1630 - }, - { - "epoch": 0.15364687595676033, - "grad_norm": 0.9925733208656311, - "learning_rate": 1.9834640797120448e-05, - "loss": 0.4396, - "step": 1631 - }, - { - "epoch": 0.15374108004992817, - "grad_norm": 0.9034045934677124, - "learning_rate": 1.9834367215743958e-05, - "loss": 0.4096, - "step": 1632 - }, - { - "epoch": 0.15383528414309602, - "grad_norm": 0.8905357122421265, - "learning_rate": 1.9834093410128682e-05, - "loss": 0.3721, - "step": 1633 - }, - { - "epoch": 0.15392948823626387, - "grad_norm": 0.9021356701850891, - "learning_rate": 1.9833819380280875e-05, - "loss": 0.3734, - "step": 1634 - }, - { - "epoch": 0.15402369232943172, - "grad_norm": 0.8837315440177917, - "learning_rate": 1.983354512620677e-05, - "loss": 0.3884, - "step": 1635 - }, - { - "epoch": 0.15411789642259957, - "grad_norm": 0.9213358163833618, - "learning_rate": 1.983327064791263e-05, - "loss": 0.4331, - "step": 1636 - }, - { - "epoch": 0.15421210051576742, - "grad_norm": 0.8524973392486572, - "learning_rate": 1.9832995945404715e-05, - "loss": 0.342, - "step": 1637 - }, - { - "epoch": 0.15430630460893527, - "grad_norm": 0.8630796074867249, - "learning_rate": 1.9832721018689285e-05, - "loss": 0.3689, - "step": 1638 - }, - { - "epoch": 0.15440050870210312, - "grad_norm": 1.0397515296936035, - "learning_rate": 1.9832445867772606e-05, - "loss": 0.4243, - "step": 1639 - }, - { - "epoch": 0.15449471279527097, - "grad_norm": 0.9372056722640991, - "learning_rate": 1.983217049266096e-05, - "loss": 0.422, - "step": 1640 - }, - { - "epoch": 0.15458891688843882, - "grad_norm": 0.8932582139968872, - "learning_rate": 1.9831894893360617e-05, - "loss": 0.3874, - "step": 1641 - }, - { - "epoch": 0.15468312098160666, - "grad_norm": 0.9698359966278076, - "learning_rate": 1.9831619069877867e-05, - "loss": 0.3943, - "step": 1642 - }, - { - "epoch": 0.15477732507477449, - "grad_norm": 0.7887962460517883, - "learning_rate": 1.9831343022218998e-05, - "loss": 0.327, - "step": 1643 - }, - { - "epoch": 0.15487152916794233, - "grad_norm": 0.9296134114265442, - "learning_rate": 1.98310667503903e-05, - "loss": 0.3722, - "step": 1644 - }, - { - "epoch": 0.15496573326111018, - "grad_norm": 0.9402286410331726, - "learning_rate": 1.9830790254398078e-05, - "loss": 0.4177, - "step": 1645 - }, - { - "epoch": 0.15505993735427803, - "grad_norm": 0.8214709162712097, - "learning_rate": 1.9830513534248635e-05, - "loss": 0.3799, - "step": 1646 - }, - { - "epoch": 0.15515414144744588, - "grad_norm": 0.8556320071220398, - "learning_rate": 1.983023658994828e-05, - "loss": 0.3611, - "step": 1647 - }, - { - "epoch": 0.15524834554061373, - "grad_norm": 0.8597833514213562, - "learning_rate": 1.982995942150333e-05, - "loss": 0.3714, - "step": 1648 - }, - { - "epoch": 0.15534254963378158, - "grad_norm": 0.9170204401016235, - "learning_rate": 1.9829682028920102e-05, - "loss": 0.4182, - "step": 1649 - }, - { - "epoch": 0.15543675372694943, - "grad_norm": 0.840418815612793, - "learning_rate": 1.982940441220492e-05, - "loss": 0.3977, - "step": 1650 - }, - { - "epoch": 0.15553095782011728, - "grad_norm": 1.011001467704773, - "learning_rate": 1.9829126571364114e-05, - "loss": 0.4434, - "step": 1651 - }, - { - "epoch": 0.15562516191328513, - "grad_norm": 0.782615065574646, - "learning_rate": 1.9828848506404025e-05, - "loss": 0.3729, - "step": 1652 - }, - { - "epoch": 0.15571936600645297, - "grad_norm": 0.8115490674972534, - "learning_rate": 1.982857021733099e-05, - "loss": 0.3722, - "step": 1653 - }, - { - "epoch": 0.15581357009962082, - "grad_norm": 0.9715111255645752, - "learning_rate": 1.982829170415135e-05, - "loss": 0.3962, - "step": 1654 - }, - { - "epoch": 0.15590777419278867, - "grad_norm": 1.00404691696167, - "learning_rate": 1.9828012966871463e-05, - "loss": 0.4373, - "step": 1655 - }, - { - "epoch": 0.15600197828595652, - "grad_norm": 0.8605353832244873, - "learning_rate": 1.9827734005497677e-05, - "loss": 0.401, - "step": 1656 - }, - { - "epoch": 0.15609618237912437, - "grad_norm": 0.8684647679328918, - "learning_rate": 1.982745482003636e-05, - "loss": 0.3755, - "step": 1657 - }, - { - "epoch": 0.15619038647229222, - "grad_norm": 1.017825961112976, - "learning_rate": 1.9827175410493874e-05, - "loss": 0.3973, - "step": 1658 - }, - { - "epoch": 0.15628459056546007, - "grad_norm": 0.8150186538696289, - "learning_rate": 1.9826895776876594e-05, - "loss": 0.3319, - "step": 1659 - }, - { - "epoch": 0.15637879465862792, - "grad_norm": 0.8719780445098877, - "learning_rate": 1.9826615919190886e-05, - "loss": 0.3668, - "step": 1660 - }, - { - "epoch": 0.15647299875179577, - "grad_norm": 1.0488592386245728, - "learning_rate": 1.982633583744314e-05, - "loss": 0.414, - "step": 1661 - }, - { - "epoch": 0.15656720284496362, - "grad_norm": 0.8532881736755371, - "learning_rate": 1.982605553163974e-05, - "loss": 0.3757, - "step": 1662 - }, - { - "epoch": 0.15666140693813146, - "grad_norm": 0.9295089840888977, - "learning_rate": 1.9825775001787084e-05, - "loss": 0.3953, - "step": 1663 - }, - { - "epoch": 0.1567556110312993, - "grad_norm": 1.0750173330307007, - "learning_rate": 1.9825494247891557e-05, - "loss": 0.4015, - "step": 1664 - }, - { - "epoch": 0.15684981512446716, - "grad_norm": 0.8728357553482056, - "learning_rate": 1.9825213269959565e-05, - "loss": 0.3892, - "step": 1665 - }, - { - "epoch": 0.156944019217635, - "grad_norm": 0.9398319125175476, - "learning_rate": 1.9824932067997516e-05, - "loss": 0.4319, - "step": 1666 - }, - { - "epoch": 0.15703822331080286, - "grad_norm": 0.9120649099349976, - "learning_rate": 1.982465064201182e-05, - "loss": 0.4217, - "step": 1667 - }, - { - "epoch": 0.1571324274039707, - "grad_norm": 0.934065580368042, - "learning_rate": 1.98243689920089e-05, - "loss": 0.3464, - "step": 1668 - }, - { - "epoch": 0.15722663149713856, - "grad_norm": 0.8361982107162476, - "learning_rate": 1.982408711799517e-05, - "loss": 0.3657, - "step": 1669 - }, - { - "epoch": 0.1573208355903064, - "grad_norm": 0.9220681190490723, - "learning_rate": 1.982380501997706e-05, - "loss": 0.3386, - "step": 1670 - }, - { - "epoch": 0.15741503968347426, - "grad_norm": 0.8694973587989807, - "learning_rate": 1.9823522697961004e-05, - "loss": 0.3802, - "step": 1671 - }, - { - "epoch": 0.1575092437766421, - "grad_norm": 0.797346830368042, - "learning_rate": 1.9823240151953435e-05, - "loss": 0.3777, - "step": 1672 - }, - { - "epoch": 0.15760344786980995, - "grad_norm": 1.0393630266189575, - "learning_rate": 1.9822957381960802e-05, - "loss": 0.4052, - "step": 1673 - }, - { - "epoch": 0.1576976519629778, - "grad_norm": 1.0109260082244873, - "learning_rate": 1.9822674387989548e-05, - "loss": 0.4404, - "step": 1674 - }, - { - "epoch": 0.15779185605614565, - "grad_norm": 1.0433164834976196, - "learning_rate": 1.9822391170046127e-05, - "loss": 0.3708, - "step": 1675 - }, - { - "epoch": 0.1578860601493135, - "grad_norm": 0.8799563646316528, - "learning_rate": 1.9822107728137e-05, - "loss": 0.3827, - "step": 1676 - }, - { - "epoch": 0.15798026424248135, - "grad_norm": 0.9168888926506042, - "learning_rate": 1.982182406226862e-05, - "loss": 0.3589, - "step": 1677 - }, - { - "epoch": 0.15807446833564917, - "grad_norm": 0.9474049210548401, - "learning_rate": 1.9821540172447468e-05, - "loss": 0.3881, - "step": 1678 - }, - { - "epoch": 0.15816867242881702, - "grad_norm": 0.820391833782196, - "learning_rate": 1.982125605868001e-05, - "loss": 0.4008, - "step": 1679 - }, - { - "epoch": 0.15826287652198487, - "grad_norm": 0.857425332069397, - "learning_rate": 1.9820971720972723e-05, - "loss": 0.3829, - "step": 1680 - }, - { - "epoch": 0.15835708061515272, - "grad_norm": 0.8522971868515015, - "learning_rate": 1.9820687159332087e-05, - "loss": 0.4139, - "step": 1681 - }, - { - "epoch": 0.15845128470832057, - "grad_norm": 0.9846909642219543, - "learning_rate": 1.9820402373764604e-05, - "loss": 0.3765, - "step": 1682 - }, - { - "epoch": 0.15854548880148842, - "grad_norm": 0.9963827729225159, - "learning_rate": 1.9820117364276755e-05, - "loss": 0.4687, - "step": 1683 - }, - { - "epoch": 0.15863969289465626, - "grad_norm": 0.8635364770889282, - "learning_rate": 1.9819832130875044e-05, - "loss": 0.377, - "step": 1684 - }, - { - "epoch": 0.1587338969878241, - "grad_norm": 0.9445222616195679, - "learning_rate": 1.9819546673565975e-05, - "loss": 0.3951, - "step": 1685 - }, - { - "epoch": 0.15882810108099196, - "grad_norm": 0.8922547101974487, - "learning_rate": 1.9819260992356055e-05, - "loss": 0.3762, - "step": 1686 - }, - { - "epoch": 0.1589223051741598, - "grad_norm": 0.8504922389984131, - "learning_rate": 1.98189750872518e-05, - "loss": 0.404, - "step": 1687 - }, - { - "epoch": 0.15901650926732766, - "grad_norm": 0.9565543532371521, - "learning_rate": 1.9818688958259724e-05, - "loss": 0.4007, - "step": 1688 - }, - { - "epoch": 0.1591107133604955, - "grad_norm": 0.9205008149147034, - "learning_rate": 1.981840260538636e-05, - "loss": 0.3954, - "step": 1689 - }, - { - "epoch": 0.15920491745366336, - "grad_norm": 0.8571425080299377, - "learning_rate": 1.9818116028638224e-05, - "loss": 0.3808, - "step": 1690 - }, - { - "epoch": 0.1592991215468312, - "grad_norm": 1.0957655906677246, - "learning_rate": 1.9817829228021867e-05, - "loss": 0.4422, - "step": 1691 - }, - { - "epoch": 0.15939332563999906, - "grad_norm": 0.917987048625946, - "learning_rate": 1.9817542203543816e-05, - "loss": 0.3664, - "step": 1692 - }, - { - "epoch": 0.1594875297331669, - "grad_norm": 0.8339284658432007, - "learning_rate": 1.981725495521062e-05, - "loss": 0.3904, - "step": 1693 - }, - { - "epoch": 0.15958173382633475, - "grad_norm": 0.9408522248268127, - "learning_rate": 1.981696748302883e-05, - "loss": 0.4107, - "step": 1694 - }, - { - "epoch": 0.1596759379195026, - "grad_norm": 0.914353609085083, - "learning_rate": 1.9816679787005e-05, - "loss": 0.3887, - "step": 1695 - }, - { - "epoch": 0.15977014201267045, - "grad_norm": 0.859802782535553, - "learning_rate": 1.9816391867145685e-05, - "loss": 0.3477, - "step": 1696 - }, - { - "epoch": 0.1598643461058383, - "grad_norm": 0.9680219888687134, - "learning_rate": 1.9816103723457454e-05, - "loss": 0.3448, - "step": 1697 - }, - { - "epoch": 0.15995855019900615, - "grad_norm": 1.1162104606628418, - "learning_rate": 1.981581535594688e-05, - "loss": 0.3909, - "step": 1698 - }, - { - "epoch": 0.160052754292174, - "grad_norm": 1.0936061143875122, - "learning_rate": 1.9815526764620532e-05, - "loss": 0.4501, - "step": 1699 - }, - { - "epoch": 0.16014695838534185, - "grad_norm": 0.8204500079154968, - "learning_rate": 1.9815237949484998e-05, - "loss": 0.4177, - "step": 1700 - }, - { - "epoch": 0.1602411624785097, - "grad_norm": 0.8426515460014343, - "learning_rate": 1.981494891054686e-05, - "loss": 0.4003, - "step": 1701 - }, - { - "epoch": 0.16033536657167755, - "grad_norm": 0.8222750425338745, - "learning_rate": 1.9814659647812702e-05, - "loss": 0.3544, - "step": 1702 - }, - { - "epoch": 0.1604295706648454, - "grad_norm": 1.068662405014038, - "learning_rate": 1.981437016128913e-05, - "loss": 0.4428, - "step": 1703 - }, - { - "epoch": 0.16052377475801324, - "grad_norm": 0.8300691246986389, - "learning_rate": 1.981408045098274e-05, - "loss": 0.3453, - "step": 1704 - }, - { - "epoch": 0.1606179788511811, - "grad_norm": 0.835858941078186, - "learning_rate": 1.9813790516900134e-05, - "loss": 0.3334, - "step": 1705 - }, - { - "epoch": 0.16071218294434894, - "grad_norm": 0.8602931499481201, - "learning_rate": 1.981350035904793e-05, - "loss": 0.3839, - "step": 1706 - }, - { - "epoch": 0.1608063870375168, - "grad_norm": 0.9044490456581116, - "learning_rate": 1.981320997743274e-05, - "loss": 0.4032, - "step": 1707 - }, - { - "epoch": 0.16090059113068464, - "grad_norm": 0.8686575293540955, - "learning_rate": 1.9812919372061187e-05, - "loss": 0.398, - "step": 1708 - }, - { - "epoch": 0.1609947952238525, - "grad_norm": 0.7816247344017029, - "learning_rate": 1.9812628542939897e-05, - "loss": 0.3386, - "step": 1709 - }, - { - "epoch": 0.16108899931702034, - "grad_norm": 0.9940059781074524, - "learning_rate": 1.98123374900755e-05, - "loss": 0.3753, - "step": 1710 - }, - { - "epoch": 0.16118320341018819, - "grad_norm": 0.9607025980949402, - "learning_rate": 1.9812046213474632e-05, - "loss": 0.4175, - "step": 1711 - }, - { - "epoch": 0.161277407503356, - "grad_norm": 0.993575930595398, - "learning_rate": 1.9811754713143936e-05, - "loss": 0.4561, - "step": 1712 - }, - { - "epoch": 0.16137161159652386, - "grad_norm": 0.966627299785614, - "learning_rate": 1.981146298909006e-05, - "loss": 0.3753, - "step": 1713 - }, - { - "epoch": 0.1614658156896917, - "grad_norm": 0.9251071810722351, - "learning_rate": 1.981117104131965e-05, - "loss": 0.369, - "step": 1714 - }, - { - "epoch": 0.16156001978285955, - "grad_norm": 1.022159457206726, - "learning_rate": 1.981087886983937e-05, - "loss": 0.4351, - "step": 1715 - }, - { - "epoch": 0.1616542238760274, - "grad_norm": 0.9397159814834595, - "learning_rate": 1.981058647465588e-05, - "loss": 0.4274, - "step": 1716 - }, - { - "epoch": 0.16174842796919525, - "grad_norm": 0.9194698333740234, - "learning_rate": 1.9810293855775845e-05, - "loss": 0.416, - "step": 1717 - }, - { - "epoch": 0.1618426320623631, - "grad_norm": 0.848105251789093, - "learning_rate": 1.9810001013205936e-05, - "loss": 0.3991, - "step": 1718 - }, - { - "epoch": 0.16193683615553095, - "grad_norm": 0.9086849689483643, - "learning_rate": 1.9809707946952837e-05, - "loss": 0.3805, - "step": 1719 - }, - { - "epoch": 0.1620310402486988, - "grad_norm": 0.9052044153213501, - "learning_rate": 1.9809414657023222e-05, - "loss": 0.3928, - "step": 1720 - }, - { - "epoch": 0.16212524434186665, - "grad_norm": 0.8989402055740356, - "learning_rate": 1.9809121143423783e-05, - "loss": 0.3895, - "step": 1721 - }, - { - "epoch": 0.1622194484350345, - "grad_norm": 0.909193217754364, - "learning_rate": 1.9808827406161215e-05, - "loss": 0.3809, - "step": 1722 - }, - { - "epoch": 0.16231365252820235, - "grad_norm": 0.886569082736969, - "learning_rate": 1.980853344524221e-05, - "loss": 0.4072, - "step": 1723 - }, - { - "epoch": 0.1624078566213702, - "grad_norm": 0.8686087727546692, - "learning_rate": 1.9808239260673473e-05, - "loss": 0.356, - "step": 1724 - }, - { - "epoch": 0.16250206071453804, - "grad_norm": 0.8799514174461365, - "learning_rate": 1.9807944852461714e-05, - "loss": 0.4252, - "step": 1725 - }, - { - "epoch": 0.1625962648077059, - "grad_norm": 0.9682909250259399, - "learning_rate": 1.980765022061364e-05, - "loss": 0.3522, - "step": 1726 - }, - { - "epoch": 0.16269046890087374, - "grad_norm": 0.9706382155418396, - "learning_rate": 1.9807355365135978e-05, - "loss": 0.3659, - "step": 1727 - }, - { - "epoch": 0.1627846729940416, - "grad_norm": 1.0148420333862305, - "learning_rate": 1.9807060286035443e-05, - "loss": 0.4255, - "step": 1728 - }, - { - "epoch": 0.16287887708720944, - "grad_norm": 0.8712765574455261, - "learning_rate": 1.9806764983318766e-05, - "loss": 0.3685, - "step": 1729 - }, - { - "epoch": 0.1629730811803773, - "grad_norm": 0.9349738955497742, - "learning_rate": 1.9806469456992682e-05, - "loss": 0.4037, - "step": 1730 - }, - { - "epoch": 0.16306728527354514, - "grad_norm": 0.9326770901679993, - "learning_rate": 1.980617370706393e-05, - "loss": 0.409, - "step": 1731 - }, - { - "epoch": 0.163161489366713, - "grad_norm": 0.8601573705673218, - "learning_rate": 1.980587773353925e-05, - "loss": 0.3334, - "step": 1732 - }, - { - "epoch": 0.16325569345988084, - "grad_norm": 0.8509078025817871, - "learning_rate": 1.9805581536425393e-05, - "loss": 0.3676, - "step": 1733 - }, - { - "epoch": 0.16334989755304868, - "grad_norm": 0.934744656085968, - "learning_rate": 1.9805285115729113e-05, - "loss": 0.3749, - "step": 1734 - }, - { - "epoch": 0.16344410164621653, - "grad_norm": 0.875711977481842, - "learning_rate": 1.9804988471457168e-05, - "loss": 0.3843, - "step": 1735 - }, - { - "epoch": 0.16353830573938438, - "grad_norm": 0.8410339951515198, - "learning_rate": 1.9804691603616324e-05, - "loss": 0.3807, - "step": 1736 - }, - { - "epoch": 0.16363250983255223, - "grad_norm": 0.8422248363494873, - "learning_rate": 1.9804394512213342e-05, - "loss": 0.3945, - "step": 1737 - }, - { - "epoch": 0.16372671392572008, - "grad_norm": 0.8934512734413147, - "learning_rate": 1.980409719725501e-05, - "loss": 0.3858, - "step": 1738 - }, - { - "epoch": 0.16382091801888793, - "grad_norm": 0.8517009615898132, - "learning_rate": 1.9803799658748096e-05, - "loss": 0.3946, - "step": 1739 - }, - { - "epoch": 0.16391512211205578, - "grad_norm": 0.988314151763916, - "learning_rate": 1.9803501896699385e-05, - "loss": 0.417, - "step": 1740 - }, - { - "epoch": 0.16400932620522363, - "grad_norm": 0.994186282157898, - "learning_rate": 1.9803203911115677e-05, - "loss": 0.4031, - "step": 1741 - }, - { - "epoch": 0.16410353029839148, - "grad_norm": 0.8861984014511108, - "learning_rate": 1.9802905702003753e-05, - "loss": 0.4167, - "step": 1742 - }, - { - "epoch": 0.16419773439155932, - "grad_norm": 0.9142113327980042, - "learning_rate": 1.9802607269370418e-05, - "loss": 0.4387, - "step": 1743 - }, - { - "epoch": 0.16429193848472717, - "grad_norm": 0.8992695212364197, - "learning_rate": 1.980230861322248e-05, - "loss": 0.3591, - "step": 1744 - }, - { - "epoch": 0.16438614257789502, - "grad_norm": 0.9266336560249329, - "learning_rate": 1.9802009733566744e-05, - "loss": 0.4222, - "step": 1745 - }, - { - "epoch": 0.16448034667106287, - "grad_norm": 0.8919268250465393, - "learning_rate": 1.980171063041003e-05, - "loss": 0.3914, - "step": 1746 - }, - { - "epoch": 0.1645745507642307, - "grad_norm": 0.796708345413208, - "learning_rate": 1.9801411303759154e-05, - "loss": 0.3747, - "step": 1747 - }, - { - "epoch": 0.16466875485739854, - "grad_norm": 0.9004980325698853, - "learning_rate": 1.980111175362094e-05, - "loss": 0.386, - "step": 1748 - }, - { - "epoch": 0.1647629589505664, - "grad_norm": 0.9157950282096863, - "learning_rate": 1.9800811980002218e-05, - "loss": 0.3937, - "step": 1749 - }, - { - "epoch": 0.16485716304373424, - "grad_norm": 0.9503116011619568, - "learning_rate": 1.980051198290983e-05, - "loss": 0.4312, - "step": 1750 - }, - { - "epoch": 0.1649513671369021, - "grad_norm": 0.7941295504570007, - "learning_rate": 1.9800211762350612e-05, - "loss": 0.3391, - "step": 1751 - }, - { - "epoch": 0.16504557123006994, - "grad_norm": 1.0035288333892822, - "learning_rate": 1.9799911318331407e-05, - "loss": 0.4451, - "step": 1752 - }, - { - "epoch": 0.1651397753232378, - "grad_norm": 0.8477427959442139, - "learning_rate": 1.979961065085907e-05, - "loss": 0.3396, - "step": 1753 - }, - { - "epoch": 0.16523397941640564, - "grad_norm": 0.8529664874076843, - "learning_rate": 1.9799309759940457e-05, - "loss": 0.4083, - "step": 1754 - }, - { - "epoch": 0.16532818350957348, - "grad_norm": 0.9405017495155334, - "learning_rate": 1.9799008645582424e-05, - "loss": 0.4651, - "step": 1755 - }, - { - "epoch": 0.16542238760274133, - "grad_norm": 0.8225582242012024, - "learning_rate": 1.9798707307791837e-05, - "loss": 0.3528, - "step": 1756 - }, - { - "epoch": 0.16551659169590918, - "grad_norm": 0.8793336153030396, - "learning_rate": 1.9798405746575572e-05, - "loss": 0.3746, - "step": 1757 - }, - { - "epoch": 0.16561079578907703, - "grad_norm": 0.9157631397247314, - "learning_rate": 1.9798103961940503e-05, - "loss": 0.3768, - "step": 1758 - }, - { - "epoch": 0.16570499988224488, - "grad_norm": 0.8963596224784851, - "learning_rate": 1.979780195389351e-05, - "loss": 0.3725, - "step": 1759 - }, - { - "epoch": 0.16579920397541273, - "grad_norm": 1.0575075149536133, - "learning_rate": 1.979749972244148e-05, - "loss": 0.4207, - "step": 1760 - }, - { - "epoch": 0.16589340806858058, - "grad_norm": 0.8952525854110718, - "learning_rate": 1.9797197267591304e-05, - "loss": 0.3782, - "step": 1761 - }, - { - "epoch": 0.16598761216174843, - "grad_norm": 0.9012593030929565, - "learning_rate": 1.979689458934988e-05, - "loss": 0.3595, - "step": 1762 - }, - { - "epoch": 0.16608181625491628, - "grad_norm": 0.7629408240318298, - "learning_rate": 1.9796591687724103e-05, - "loss": 0.3153, - "step": 1763 - }, - { - "epoch": 0.16617602034808412, - "grad_norm": 1.0397006273269653, - "learning_rate": 1.979628856272089e-05, - "loss": 0.4345, - "step": 1764 - }, - { - "epoch": 0.16627022444125197, - "grad_norm": 0.9040034413337708, - "learning_rate": 1.9795985214347146e-05, - "loss": 0.3802, - "step": 1765 - }, - { - "epoch": 0.16636442853441982, - "grad_norm": 0.8815705180168152, - "learning_rate": 1.979568164260979e-05, - "loss": 0.3452, - "step": 1766 - }, - { - "epoch": 0.16645863262758767, - "grad_norm": 0.9503600001335144, - "learning_rate": 1.9795377847515743e-05, - "loss": 0.3307, - "step": 1767 - }, - { - "epoch": 0.16655283672075552, - "grad_norm": 0.9405672550201416, - "learning_rate": 1.979507382907193e-05, - "loss": 0.4515, - "step": 1768 - }, - { - "epoch": 0.16664704081392337, - "grad_norm": 1.1875604391098022, - "learning_rate": 1.9794769587285287e-05, - "loss": 0.5184, - "step": 1769 - }, - { - "epoch": 0.16674124490709122, - "grad_norm": 0.998553991317749, - "learning_rate": 1.979446512216275e-05, - "loss": 0.41, - "step": 1770 - }, - { - "epoch": 0.16683544900025907, - "grad_norm": 0.8785516023635864, - "learning_rate": 1.979416043371126e-05, - "loss": 0.3665, - "step": 1771 - }, - { - "epoch": 0.16692965309342692, - "grad_norm": 0.8816999197006226, - "learning_rate": 1.9793855521937766e-05, - "loss": 0.3452, - "step": 1772 - }, - { - "epoch": 0.16702385718659477, - "grad_norm": 0.8487377762794495, - "learning_rate": 1.979355038684922e-05, - "loss": 0.4154, - "step": 1773 - }, - { - "epoch": 0.16711806127976261, - "grad_norm": 0.9029588103294373, - "learning_rate": 1.9793245028452577e-05, - "loss": 0.3713, - "step": 1774 - }, - { - "epoch": 0.16721226537293046, - "grad_norm": 0.8425957560539246, - "learning_rate": 1.9792939446754804e-05, - "loss": 0.3727, - "step": 1775 - }, - { - "epoch": 0.1673064694660983, - "grad_norm": 0.83709317445755, - "learning_rate": 1.9792633641762865e-05, - "loss": 0.3617, - "step": 1776 - }, - { - "epoch": 0.16740067355926616, - "grad_norm": 0.8353833556175232, - "learning_rate": 1.9792327613483735e-05, - "loss": 0.3563, - "step": 1777 - }, - { - "epoch": 0.167494877652434, - "grad_norm": 0.9072156548500061, - "learning_rate": 1.9792021361924392e-05, - "loss": 0.3671, - "step": 1778 - }, - { - "epoch": 0.16758908174560186, - "grad_norm": 1.0261179208755493, - "learning_rate": 1.9791714887091816e-05, - "loss": 0.3652, - "step": 1779 - }, - { - "epoch": 0.1676832858387697, - "grad_norm": 0.9820303320884705, - "learning_rate": 1.9791408188993003e-05, - "loss": 0.4634, - "step": 1780 - }, - { - "epoch": 0.16777748993193753, - "grad_norm": 0.8858616352081299, - "learning_rate": 1.9791101267634937e-05, - "loss": 0.3631, - "step": 1781 - }, - { - "epoch": 0.16787169402510538, - "grad_norm": 0.8245965838432312, - "learning_rate": 1.9790794123024618e-05, - "loss": 0.3997, - "step": 1782 - }, - { - "epoch": 0.16796589811827323, - "grad_norm": 0.9756718873977661, - "learning_rate": 1.979048675516905e-05, - "loss": 0.467, - "step": 1783 - }, - { - "epoch": 0.16806010221144108, - "grad_norm": 0.9283931851387024, - "learning_rate": 1.9790179164075247e-05, - "loss": 0.4251, - "step": 1784 - }, - { - "epoch": 0.16815430630460892, - "grad_norm": 1.0229156017303467, - "learning_rate": 1.9789871349750216e-05, - "loss": 0.3911, - "step": 1785 - }, - { - "epoch": 0.16824851039777677, - "grad_norm": 0.9102269411087036, - "learning_rate": 1.978956331220098e-05, - "loss": 0.4388, - "step": 1786 - }, - { - "epoch": 0.16834271449094462, - "grad_norm": 0.8671599626541138, - "learning_rate": 1.978925505143456e-05, - "loss": 0.3785, - "step": 1787 - }, - { - "epoch": 0.16843691858411247, - "grad_norm": 0.7886928915977478, - "learning_rate": 1.9788946567457982e-05, - "loss": 0.3627, - "step": 1788 - }, - { - "epoch": 0.16853112267728032, - "grad_norm": 0.8896774053573608, - "learning_rate": 1.978863786027829e-05, - "loss": 0.3877, - "step": 1789 - }, - { - "epoch": 0.16862532677044817, - "grad_norm": 0.873624861240387, - "learning_rate": 1.978832892990251e-05, - "loss": 0.4248, - "step": 1790 - }, - { - "epoch": 0.16871953086361602, - "grad_norm": 0.9539051055908203, - "learning_rate": 1.9788019776337693e-05, - "loss": 0.4488, - "step": 1791 - }, - { - "epoch": 0.16881373495678387, - "grad_norm": 0.7920552492141724, - "learning_rate": 1.978771039959089e-05, - "loss": 0.3748, - "step": 1792 - }, - { - "epoch": 0.16890793904995172, - "grad_norm": 0.8468660712242126, - "learning_rate": 1.9787400799669155e-05, - "loss": 0.3353, - "step": 1793 - }, - { - "epoch": 0.16900214314311957, - "grad_norm": 1.2447292804718018, - "learning_rate": 1.978709097657954e-05, - "loss": 0.4093, - "step": 1794 - }, - { - "epoch": 0.16909634723628741, - "grad_norm": 0.9498482346534729, - "learning_rate": 1.978678093032912e-05, - "loss": 0.4097, - "step": 1795 - }, - { - "epoch": 0.16919055132945526, - "grad_norm": 0.827974259853363, - "learning_rate": 1.9786470660924958e-05, - "loss": 0.3704, - "step": 1796 - }, - { - "epoch": 0.1692847554226231, - "grad_norm": 0.8106819987297058, - "learning_rate": 1.9786160168374125e-05, - "loss": 0.3429, - "step": 1797 - }, - { - "epoch": 0.16937895951579096, - "grad_norm": 0.8402098417282104, - "learning_rate": 1.978584945268371e-05, - "loss": 0.3797, - "step": 1798 - }, - { - "epoch": 0.1694731636089588, - "grad_norm": 0.8330712914466858, - "learning_rate": 1.9785538513860794e-05, - "loss": 0.3266, - "step": 1799 - }, - { - "epoch": 0.16956736770212666, - "grad_norm": 0.9236308336257935, - "learning_rate": 1.978522735191246e-05, - "loss": 0.3974, - "step": 1800 - }, - { - "epoch": 0.1696615717952945, - "grad_norm": 1.0042564868927002, - "learning_rate": 1.9784915966845817e-05, - "loss": 0.3724, - "step": 1801 - }, - { - "epoch": 0.16975577588846236, - "grad_norm": 0.9548354148864746, - "learning_rate": 1.9784604358667954e-05, - "loss": 0.3869, - "step": 1802 - }, - { - "epoch": 0.1698499799816302, - "grad_norm": 0.8802642226219177, - "learning_rate": 1.978429252738598e-05, - "loss": 0.4395, - "step": 1803 - }, - { - "epoch": 0.16994418407479805, - "grad_norm": 0.8193650245666504, - "learning_rate": 1.9783980473007004e-05, - "loss": 0.3548, - "step": 1804 - }, - { - "epoch": 0.1700383881679659, - "grad_norm": 0.9388848543167114, - "learning_rate": 1.9783668195538143e-05, - "loss": 0.3968, - "step": 1805 - }, - { - "epoch": 0.17013259226113375, - "grad_norm": 0.8756192922592163, - "learning_rate": 1.9783355694986516e-05, - "loss": 0.4299, - "step": 1806 - }, - { - "epoch": 0.1702267963543016, - "grad_norm": 0.9549397230148315, - "learning_rate": 1.978304297135925e-05, - "loss": 0.4035, - "step": 1807 - }, - { - "epoch": 0.17032100044746945, - "grad_norm": 0.8104063272476196, - "learning_rate": 1.978273002466347e-05, - "loss": 0.3794, - "step": 1808 - }, - { - "epoch": 0.1704152045406373, - "grad_norm": 0.84712815284729, - "learning_rate": 1.978241685490632e-05, - "loss": 0.3785, - "step": 1809 - }, - { - "epoch": 0.17050940863380515, - "grad_norm": 0.9174196720123291, - "learning_rate": 1.9782103462094935e-05, - "loss": 0.4004, - "step": 1810 - }, - { - "epoch": 0.170603612726973, - "grad_norm": 0.8831159472465515, - "learning_rate": 1.9781789846236466e-05, - "loss": 0.3719, - "step": 1811 - }, - { - "epoch": 0.17069781682014085, - "grad_norm": 0.8972384333610535, - "learning_rate": 1.9781476007338058e-05, - "loss": 0.3865, - "step": 1812 - }, - { - "epoch": 0.1707920209133087, - "grad_norm": 0.947032630443573, - "learning_rate": 1.978116194540687e-05, - "loss": 0.3844, - "step": 1813 - }, - { - "epoch": 0.17088622500647654, - "grad_norm": 0.9138824343681335, - "learning_rate": 1.9780847660450062e-05, - "loss": 0.3505, - "step": 1814 - }, - { - "epoch": 0.1709804290996444, - "grad_norm": 0.9780575633049011, - "learning_rate": 1.9780533152474802e-05, - "loss": 0.3467, - "step": 1815 - }, - { - "epoch": 0.17107463319281221, - "grad_norm": 0.9022666215896606, - "learning_rate": 1.9780218421488263e-05, - "loss": 0.3679, - "step": 1816 - }, - { - "epoch": 0.17116883728598006, - "grad_norm": 0.9314306974411011, - "learning_rate": 1.9779903467497614e-05, - "loss": 0.4166, - "step": 1817 - }, - { - "epoch": 0.1712630413791479, - "grad_norm": 0.9205523133277893, - "learning_rate": 1.9779588290510044e-05, - "loss": 0.3931, - "step": 1818 - }, - { - "epoch": 0.17135724547231576, - "grad_norm": 1.0727566480636597, - "learning_rate": 1.9779272890532733e-05, - "loss": 0.4415, - "step": 1819 - }, - { - "epoch": 0.1714514495654836, - "grad_norm": 1.0124160051345825, - "learning_rate": 1.977895726757288e-05, - "loss": 0.4145, - "step": 1820 - }, - { - "epoch": 0.17154565365865146, - "grad_norm": 1.020991563796997, - "learning_rate": 1.977864142163768e-05, - "loss": 0.4205, - "step": 1821 - }, - { - "epoch": 0.1716398577518193, - "grad_norm": 0.8599303364753723, - "learning_rate": 1.9778325352734326e-05, - "loss": 0.3583, - "step": 1822 - }, - { - "epoch": 0.17173406184498716, - "grad_norm": 0.9480980038642883, - "learning_rate": 1.9778009060870035e-05, - "loss": 0.4539, - "step": 1823 - }, - { - "epoch": 0.171828265938155, - "grad_norm": 0.9444804191589355, - "learning_rate": 1.9777692546052014e-05, - "loss": 0.4561, - "step": 1824 - }, - { - "epoch": 0.17192247003132285, - "grad_norm": 1.0277150869369507, - "learning_rate": 1.977737580828748e-05, - "loss": 0.4486, - "step": 1825 - }, - { - "epoch": 0.1720166741244907, - "grad_norm": 0.7958475351333618, - "learning_rate": 1.977705884758366e-05, - "loss": 0.3596, - "step": 1826 - }, - { - "epoch": 0.17211087821765855, - "grad_norm": 0.8301064968109131, - "learning_rate": 1.977674166394778e-05, - "loss": 0.3664, - "step": 1827 - }, - { - "epoch": 0.1722050823108264, - "grad_norm": 1.009836196899414, - "learning_rate": 1.9776424257387066e-05, - "loss": 0.418, - "step": 1828 - }, - { - "epoch": 0.17229928640399425, - "grad_norm": 0.8978395462036133, - "learning_rate": 1.977610662790876e-05, - "loss": 0.3601, - "step": 1829 - }, - { - "epoch": 0.1723934904971621, - "grad_norm": 0.9540097713470459, - "learning_rate": 1.9775788775520105e-05, - "loss": 0.4594, - "step": 1830 - }, - { - "epoch": 0.17248769459032995, - "grad_norm": 0.8026547431945801, - "learning_rate": 1.9775470700228346e-05, - "loss": 0.3616, - "step": 1831 - }, - { - "epoch": 0.1725818986834978, - "grad_norm": 0.9324086904525757, - "learning_rate": 1.9775152402040735e-05, - "loss": 0.4165, - "step": 1832 - }, - { - "epoch": 0.17267610277666565, - "grad_norm": 0.9284736514091492, - "learning_rate": 1.9774833880964538e-05, - "loss": 0.4116, - "step": 1833 - }, - { - "epoch": 0.1727703068698335, - "grad_norm": 0.8991754651069641, - "learning_rate": 1.9774515137007006e-05, - "loss": 0.3435, - "step": 1834 - }, - { - "epoch": 0.17286451096300134, - "grad_norm": 1.0078505277633667, - "learning_rate": 1.9774196170175414e-05, - "loss": 0.4604, - "step": 1835 - }, - { - "epoch": 0.1729587150561692, - "grad_norm": 0.9617123007774353, - "learning_rate": 1.9773876980477033e-05, - "loss": 0.3642, - "step": 1836 - }, - { - "epoch": 0.17305291914933704, - "grad_norm": 0.9242467880249023, - "learning_rate": 1.977355756791914e-05, - "loss": 0.3946, - "step": 1837 - }, - { - "epoch": 0.1731471232425049, - "grad_norm": 0.7867587804794312, - "learning_rate": 1.977323793250902e-05, - "loss": 0.3754, - "step": 1838 - }, - { - "epoch": 0.17324132733567274, - "grad_norm": 0.8909556865692139, - "learning_rate": 1.9772918074253965e-05, - "loss": 0.4012, - "step": 1839 - }, - { - "epoch": 0.1733355314288406, - "grad_norm": 0.9703721404075623, - "learning_rate": 1.977259799316126e-05, - "loss": 0.3578, - "step": 1840 - }, - { - "epoch": 0.17342973552200844, - "grad_norm": 0.8696607351303101, - "learning_rate": 1.9772277689238205e-05, - "loss": 0.3712, - "step": 1841 - }, - { - "epoch": 0.1735239396151763, - "grad_norm": 0.9312201142311096, - "learning_rate": 1.977195716249211e-05, - "loss": 0.4205, - "step": 1842 - }, - { - "epoch": 0.17361814370834414, - "grad_norm": 0.951821506023407, - "learning_rate": 1.9771636412930274e-05, - "loss": 0.4594, - "step": 1843 - }, - { - "epoch": 0.17371234780151198, - "grad_norm": 0.900589644908905, - "learning_rate": 1.977131544056002e-05, - "loss": 0.3755, - "step": 1844 - }, - { - "epoch": 0.17380655189467983, - "grad_norm": 0.8959382772445679, - "learning_rate": 1.9770994245388658e-05, - "loss": 0.3854, - "step": 1845 - }, - { - "epoch": 0.17390075598784768, - "grad_norm": 0.84087073802948, - "learning_rate": 1.977067282742352e-05, - "loss": 0.41, - "step": 1846 - }, - { - "epoch": 0.17399496008101553, - "grad_norm": 0.7898133993148804, - "learning_rate": 1.977035118667193e-05, - "loss": 0.393, - "step": 1847 - }, - { - "epoch": 0.17408916417418338, - "grad_norm": 0.8709238171577454, - "learning_rate": 1.9770029323141224e-05, - "loss": 0.4058, - "step": 1848 - }, - { - "epoch": 0.17418336826735123, - "grad_norm": 0.931500256061554, - "learning_rate": 1.9769707236838737e-05, - "loss": 0.4019, - "step": 1849 - }, - { - "epoch": 0.17427757236051905, - "grad_norm": 0.7755554914474487, - "learning_rate": 1.976938492777182e-05, - "loss": 0.3452, - "step": 1850 - }, - { - "epoch": 0.1743717764536869, - "grad_norm": 0.8617866635322571, - "learning_rate": 1.9769062395947814e-05, - "loss": 0.4383, - "step": 1851 - }, - { - "epoch": 0.17446598054685475, - "grad_norm": 0.865623414516449, - "learning_rate": 1.976873964137408e-05, - "loss": 0.4186, - "step": 1852 - }, - { - "epoch": 0.1745601846400226, - "grad_norm": 1.092434287071228, - "learning_rate": 1.9768416664057973e-05, - "loss": 0.4365, - "step": 1853 - }, - { - "epoch": 0.17465438873319045, - "grad_norm": 0.874930739402771, - "learning_rate": 1.9768093464006856e-05, - "loss": 0.3717, - "step": 1854 - }, - { - "epoch": 0.1747485928263583, - "grad_norm": 0.8707634210586548, - "learning_rate": 1.9767770041228104e-05, - "loss": 0.4036, - "step": 1855 - }, - { - "epoch": 0.17484279691952614, - "grad_norm": 0.9189536571502686, - "learning_rate": 1.976744639572909e-05, - "loss": 0.4176, - "step": 1856 - }, - { - "epoch": 0.174937001012694, - "grad_norm": 0.8922774791717529, - "learning_rate": 1.9767122527517194e-05, - "loss": 0.4004, - "step": 1857 - }, - { - "epoch": 0.17503120510586184, - "grad_norm": 0.8719164729118347, - "learning_rate": 1.9766798436599795e-05, - "loss": 0.3678, - "step": 1858 - }, - { - "epoch": 0.1751254091990297, - "grad_norm": 0.9028275012969971, - "learning_rate": 1.9766474122984288e-05, - "loss": 0.4334, - "step": 1859 - }, - { - "epoch": 0.17521961329219754, - "grad_norm": 0.8829405903816223, - "learning_rate": 1.976614958667807e-05, - "loss": 0.4126, - "step": 1860 - }, - { - "epoch": 0.1753138173853654, - "grad_norm": 1.028039813041687, - "learning_rate": 1.9765824827688536e-05, - "loss": 0.4269, - "step": 1861 - }, - { - "epoch": 0.17540802147853324, - "grad_norm": 0.89798504114151, - "learning_rate": 1.976549984602309e-05, - "loss": 0.406, - "step": 1862 - }, - { - "epoch": 0.1755022255717011, - "grad_norm": 0.9848604202270508, - "learning_rate": 1.976517464168915e-05, - "loss": 0.3874, - "step": 1863 - }, - { - "epoch": 0.17559642966486894, - "grad_norm": 0.9031936526298523, - "learning_rate": 1.9764849214694122e-05, - "loss": 0.4119, - "step": 1864 - }, - { - "epoch": 0.17569063375803678, - "grad_norm": 1.0898340940475464, - "learning_rate": 1.976452356504543e-05, - "loss": 0.3884, - "step": 1865 - }, - { - "epoch": 0.17578483785120463, - "grad_norm": 0.8496800661087036, - "learning_rate": 1.9764197692750502e-05, - "loss": 0.3472, - "step": 1866 - }, - { - "epoch": 0.17587904194437248, - "grad_norm": 0.9799139499664307, - "learning_rate": 1.9763871597816765e-05, - "loss": 0.4278, - "step": 1867 - }, - { - "epoch": 0.17597324603754033, - "grad_norm": 0.9434148073196411, - "learning_rate": 1.9763545280251657e-05, - "loss": 0.3745, - "step": 1868 - }, - { - "epoch": 0.17606745013070818, - "grad_norm": 0.9892056584358215, - "learning_rate": 1.9763218740062613e-05, - "loss": 0.4429, - "step": 1869 - }, - { - "epoch": 0.17616165422387603, - "grad_norm": 0.8674705624580383, - "learning_rate": 1.976289197725709e-05, - "loss": 0.3697, - "step": 1870 - }, - { - "epoch": 0.17625585831704388, - "grad_norm": 0.8550935983657837, - "learning_rate": 1.9762564991842524e-05, - "loss": 0.4158, - "step": 1871 - }, - { - "epoch": 0.17635006241021173, - "grad_norm": 0.9619081020355225, - "learning_rate": 1.9762237783826383e-05, - "loss": 0.4016, - "step": 1872 - }, - { - "epoch": 0.17644426650337958, - "grad_norm": 0.9321436882019043, - "learning_rate": 1.9761910353216116e-05, - "loss": 0.363, - "step": 1873 - }, - { - "epoch": 0.17653847059654743, - "grad_norm": 0.965522289276123, - "learning_rate": 1.9761582700019203e-05, - "loss": 0.4403, - "step": 1874 - }, - { - "epoch": 0.17663267468971527, - "grad_norm": 0.8548703193664551, - "learning_rate": 1.9761254824243107e-05, - "loss": 0.3806, - "step": 1875 - }, - { - "epoch": 0.17672687878288312, - "grad_norm": 0.9804152846336365, - "learning_rate": 1.97609267258953e-05, - "loss": 0.3895, - "step": 1876 - }, - { - "epoch": 0.17682108287605097, - "grad_norm": 1.0053352117538452, - "learning_rate": 1.9760598404983274e-05, - "loss": 0.414, - "step": 1877 - }, - { - "epoch": 0.17691528696921882, - "grad_norm": 0.9046112895011902, - "learning_rate": 1.9760269861514507e-05, - "loss": 0.4142, - "step": 1878 - }, - { - "epoch": 0.17700949106238667, - "grad_norm": 0.7706135511398315, - "learning_rate": 1.9759941095496493e-05, - "loss": 0.3311, - "step": 1879 - }, - { - "epoch": 0.17710369515555452, - "grad_norm": 0.9193723797798157, - "learning_rate": 1.9759612106936723e-05, - "loss": 0.3733, - "step": 1880 - }, - { - "epoch": 0.17719789924872237, - "grad_norm": 0.8780800104141235, - "learning_rate": 1.975928289584271e-05, - "loss": 0.379, - "step": 1881 - }, - { - "epoch": 0.17729210334189022, - "grad_norm": 0.8904425501823425, - "learning_rate": 1.975895346222195e-05, - "loss": 0.4162, - "step": 1882 - }, - { - "epoch": 0.17738630743505807, - "grad_norm": 0.8909766674041748, - "learning_rate": 1.975862380608196e-05, - "loss": 0.3837, - "step": 1883 - }, - { - "epoch": 0.17748051152822591, - "grad_norm": 1.136483073234558, - "learning_rate": 1.9758293927430253e-05, - "loss": 0.3665, - "step": 1884 - }, - { - "epoch": 0.17757471562139374, - "grad_norm": 0.8756337761878967, - "learning_rate": 1.9757963826274357e-05, - "loss": 0.3776, - "step": 1885 - }, - { - "epoch": 0.17766891971456159, - "grad_norm": 1.091659665107727, - "learning_rate": 1.9757633502621794e-05, - "loss": 0.4115, - "step": 1886 - }, - { - "epoch": 0.17776312380772943, - "grad_norm": 0.9444653391838074, - "learning_rate": 1.9757302956480094e-05, - "loss": 0.3877, - "step": 1887 - }, - { - "epoch": 0.17785732790089728, - "grad_norm": 3.0061488151550293, - "learning_rate": 1.97569721878568e-05, - "loss": 0.3707, - "step": 1888 - }, - { - "epoch": 0.17795153199406513, - "grad_norm": 0.8458983302116394, - "learning_rate": 1.975664119675945e-05, - "loss": 0.406, - "step": 1889 - }, - { - "epoch": 0.17804573608723298, - "grad_norm": 0.9263994693756104, - "learning_rate": 1.9756309983195588e-05, - "loss": 0.4169, - "step": 1890 - }, - { - "epoch": 0.17813994018040083, - "grad_norm": 0.7784213423728943, - "learning_rate": 1.9755978547172776e-05, - "loss": 0.3559, - "step": 1891 - }, - { - "epoch": 0.17823414427356868, - "grad_norm": 0.9472523927688599, - "learning_rate": 1.9755646888698563e-05, - "loss": 0.3956, - "step": 1892 - }, - { - "epoch": 0.17832834836673653, - "grad_norm": 1.0705050230026245, - "learning_rate": 1.9755315007780507e-05, - "loss": 0.4067, - "step": 1893 - }, - { - "epoch": 0.17842255245990438, - "grad_norm": 0.9809260964393616, - "learning_rate": 1.975498290442619e-05, - "loss": 0.3695, - "step": 1894 - }, - { - "epoch": 0.17851675655307223, - "grad_norm": 0.8297300338745117, - "learning_rate": 1.9754650578643174e-05, - "loss": 0.3607, - "step": 1895 - }, - { - "epoch": 0.17861096064624007, - "grad_norm": 0.9693791270256042, - "learning_rate": 1.9754318030439038e-05, - "loss": 0.4735, - "step": 1896 - }, - { - "epoch": 0.17870516473940792, - "grad_norm": 0.8396703600883484, - "learning_rate": 1.975398525982137e-05, - "loss": 0.372, - "step": 1897 - }, - { - "epoch": 0.17879936883257577, - "grad_norm": 0.9750856161117554, - "learning_rate": 1.9753652266797746e-05, - "loss": 0.383, - "step": 1898 - }, - { - "epoch": 0.17889357292574362, - "grad_norm": 0.942491888999939, - "learning_rate": 1.9753319051375772e-05, - "loss": 0.3777, - "step": 1899 - }, - { - "epoch": 0.17898777701891147, - "grad_norm": 0.9724046587944031, - "learning_rate": 1.9752985613563038e-05, - "loss": 0.4327, - "step": 1900 - }, - { - "epoch": 0.17908198111207932, - "grad_norm": 1.0240174531936646, - "learning_rate": 1.9752651953367147e-05, - "loss": 0.4255, - "step": 1901 - }, - { - "epoch": 0.17917618520524717, - "grad_norm": 0.8948530554771423, - "learning_rate": 1.975231807079571e-05, - "loss": 0.3802, - "step": 1902 - }, - { - "epoch": 0.17927038929841502, - "grad_norm": 1.09205162525177, - "learning_rate": 1.9751983965856342e-05, - "loss": 0.4074, - "step": 1903 - }, - { - "epoch": 0.17936459339158287, - "grad_norm": 1.0225067138671875, - "learning_rate": 1.975164963855665e-05, - "loss": 0.3375, - "step": 1904 - }, - { - "epoch": 0.17945879748475072, - "grad_norm": 0.8763222694396973, - "learning_rate": 1.975131508890427e-05, - "loss": 0.3723, - "step": 1905 - }, - { - "epoch": 0.17955300157791856, - "grad_norm": 0.9270104169845581, - "learning_rate": 1.9750980316906826e-05, - "loss": 0.3886, - "step": 1906 - }, - { - "epoch": 0.1796472056710864, - "grad_norm": 1.0478739738464355, - "learning_rate": 1.9750645322571952e-05, - "loss": 0.348, - "step": 1907 - }, - { - "epoch": 0.17974140976425426, - "grad_norm": 1.2006361484527588, - "learning_rate": 1.975031010590728e-05, - "loss": 0.3926, - "step": 1908 - }, - { - "epoch": 0.1798356138574221, - "grad_norm": 1.0440820455551147, - "learning_rate": 1.9749974666920462e-05, - "loss": 0.4468, - "step": 1909 - }, - { - "epoch": 0.17992981795058996, - "grad_norm": 0.8804754614830017, - "learning_rate": 1.974963900561914e-05, - "loss": 0.4056, - "step": 1910 - }, - { - "epoch": 0.1800240220437578, - "grad_norm": 0.8772451281547546, - "learning_rate": 1.974930312201097e-05, - "loss": 0.3966, - "step": 1911 - }, - { - "epoch": 0.18011822613692566, - "grad_norm": 0.98329758644104, - "learning_rate": 1.9748967016103617e-05, - "loss": 0.4242, - "step": 1912 - }, - { - "epoch": 0.1802124302300935, - "grad_norm": 0.9673380255699158, - "learning_rate": 1.9748630687904735e-05, - "loss": 0.4327, - "step": 1913 - }, - { - "epoch": 0.18030663432326136, - "grad_norm": 0.9209880828857422, - "learning_rate": 1.9748294137421997e-05, - "loss": 0.3492, - "step": 1914 - }, - { - "epoch": 0.1804008384164292, - "grad_norm": 0.8868668675422668, - "learning_rate": 1.9747957364663076e-05, - "loss": 0.3949, - "step": 1915 - }, - { - "epoch": 0.18049504250959705, - "grad_norm": 0.834490954875946, - "learning_rate": 1.9747620369635653e-05, - "loss": 0.3812, - "step": 1916 - }, - { - "epoch": 0.1805892466027649, - "grad_norm": 1.0661643743515015, - "learning_rate": 1.974728315234741e-05, - "loss": 0.4484, - "step": 1917 - }, - { - "epoch": 0.18068345069593275, - "grad_norm": 0.9173630475997925, - "learning_rate": 1.9746945712806037e-05, - "loss": 0.3215, - "step": 1918 - }, - { - "epoch": 0.1807776547891006, - "grad_norm": 1.13764226436615, - "learning_rate": 1.9746608051019227e-05, - "loss": 0.3646, - "step": 1919 - }, - { - "epoch": 0.18087185888226842, - "grad_norm": 0.9189788103103638, - "learning_rate": 1.9746270166994682e-05, - "loss": 0.4053, - "step": 1920 - }, - { - "epoch": 0.18096606297543627, - "grad_norm": 0.7904914617538452, - "learning_rate": 1.9745932060740103e-05, - "loss": 0.3388, - "step": 1921 - }, - { - "epoch": 0.18106026706860412, - "grad_norm": 0.9093592166900635, - "learning_rate": 1.97455937322632e-05, - "loss": 0.3893, - "step": 1922 - }, - { - "epoch": 0.18115447116177197, - "grad_norm": 0.9486411809921265, - "learning_rate": 1.9745255181571686e-05, - "loss": 0.3878, - "step": 1923 - }, - { - "epoch": 0.18124867525493982, - "grad_norm": 1.0105358362197876, - "learning_rate": 1.9744916408673287e-05, - "loss": 0.4407, - "step": 1924 - }, - { - "epoch": 0.18134287934810767, - "grad_norm": 0.8826894164085388, - "learning_rate": 1.9744577413575723e-05, - "loss": 0.3491, - "step": 1925 - }, - { - "epoch": 0.18143708344127552, - "grad_norm": 0.9980819821357727, - "learning_rate": 1.974423819628672e-05, - "loss": 0.3908, - "step": 1926 - }, - { - "epoch": 0.18153128753444336, - "grad_norm": 1.1008509397506714, - "learning_rate": 1.9743898756814018e-05, - "loss": 0.41, - "step": 1927 - }, - { - "epoch": 0.1816254916276112, - "grad_norm": 0.8422188758850098, - "learning_rate": 1.9743559095165357e-05, - "loss": 0.3257, - "step": 1928 - }, - { - "epoch": 0.18171969572077906, - "grad_norm": 0.898560643196106, - "learning_rate": 1.974321921134848e-05, - "loss": 0.3473, - "step": 1929 - }, - { - "epoch": 0.1818138998139469, - "grad_norm": 0.9736857414245605, - "learning_rate": 1.9742879105371132e-05, - "loss": 0.4067, - "step": 1930 - }, - { - "epoch": 0.18190810390711476, - "grad_norm": 0.9870139956474304, - "learning_rate": 1.9742538777241078e-05, - "loss": 0.4176, - "step": 1931 - }, - { - "epoch": 0.1820023080002826, - "grad_norm": 1.0926724672317505, - "learning_rate": 1.9742198226966073e-05, - "loss": 0.415, - "step": 1932 - }, - { - "epoch": 0.18209651209345046, - "grad_norm": 0.9447426795959473, - "learning_rate": 1.9741857454553876e-05, - "loss": 0.3997, - "step": 1933 - }, - { - "epoch": 0.1821907161866183, - "grad_norm": 1.0695010423660278, - "learning_rate": 1.9741516460012268e-05, - "loss": 0.3581, - "step": 1934 - }, - { - "epoch": 0.18228492027978616, - "grad_norm": 0.9521197080612183, - "learning_rate": 1.9741175243349017e-05, - "loss": 0.3883, - "step": 1935 - }, - { - "epoch": 0.182379124372954, - "grad_norm": 0.780623197555542, - "learning_rate": 1.9740833804571907e-05, - "loss": 0.3433, - "step": 1936 - }, - { - "epoch": 0.18247332846612185, - "grad_norm": 0.8583131432533264, - "learning_rate": 1.974049214368872e-05, - "loss": 0.3645, - "step": 1937 - }, - { - "epoch": 0.1825675325592897, - "grad_norm": 0.8302966356277466, - "learning_rate": 1.974015026070725e-05, - "loss": 0.367, - "step": 1938 - }, - { - "epoch": 0.18266173665245755, - "grad_norm": 0.8479434251785278, - "learning_rate": 1.973980815563529e-05, - "loss": 0.327, - "step": 1939 - }, - { - "epoch": 0.1827559407456254, - "grad_norm": 0.9212160110473633, - "learning_rate": 1.973946582848064e-05, - "loss": 0.3893, - "step": 1940 - }, - { - "epoch": 0.18285014483879325, - "grad_norm": 0.8256272077560425, - "learning_rate": 1.9739123279251108e-05, - "loss": 0.3961, - "step": 1941 - }, - { - "epoch": 0.1829443489319611, - "grad_norm": 0.8914819955825806, - "learning_rate": 1.97387805079545e-05, - "loss": 0.3936, - "step": 1942 - }, - { - "epoch": 0.18303855302512895, - "grad_norm": 0.9249416589736938, - "learning_rate": 1.973843751459864e-05, - "loss": 0.3438, - "step": 1943 - }, - { - "epoch": 0.1831327571182968, - "grad_norm": 1.0605783462524414, - "learning_rate": 1.973809429919134e-05, - "loss": 0.4249, - "step": 1944 - }, - { - "epoch": 0.18322696121146465, - "grad_norm": 0.9442580342292786, - "learning_rate": 1.9737750861740434e-05, - "loss": 0.4023, - "step": 1945 - }, - { - "epoch": 0.1833211653046325, - "grad_norm": 0.825469970703125, - "learning_rate": 1.9737407202253745e-05, - "loss": 0.3649, - "step": 1946 - }, - { - "epoch": 0.18341536939780034, - "grad_norm": 0.9349367618560791, - "learning_rate": 1.9737063320739115e-05, - "loss": 0.3942, - "step": 1947 - }, - { - "epoch": 0.1835095734909682, - "grad_norm": 0.941352128982544, - "learning_rate": 1.973671921720438e-05, - "loss": 0.4213, - "step": 1948 - }, - { - "epoch": 0.18360377758413604, - "grad_norm": 0.8577041029930115, - "learning_rate": 1.973637489165739e-05, - "loss": 0.3857, - "step": 1949 - }, - { - "epoch": 0.1836979816773039, - "grad_norm": 0.933680534362793, - "learning_rate": 1.9736030344105997e-05, - "loss": 0.4139, - "step": 1950 - }, - { - "epoch": 0.18379218577047174, - "grad_norm": 0.924106240272522, - "learning_rate": 1.973568557455805e-05, - "loss": 0.4217, - "step": 1951 - }, - { - "epoch": 0.1838863898636396, - "grad_norm": 0.9497937560081482, - "learning_rate": 1.9735340583021417e-05, - "loss": 0.3893, - "step": 1952 - }, - { - "epoch": 0.18398059395680744, - "grad_norm": 0.8727536201477051, - "learning_rate": 1.9734995369503967e-05, - "loss": 0.3175, - "step": 1953 - }, - { - "epoch": 0.18407479804997526, - "grad_norm": 0.9945586323738098, - "learning_rate": 1.9734649934013564e-05, - "loss": 0.4017, - "step": 1954 - }, - { - "epoch": 0.1841690021431431, - "grad_norm": 0.8452147245407104, - "learning_rate": 1.9734304276558086e-05, - "loss": 0.3552, - "step": 1955 - }, - { - "epoch": 0.18426320623631096, - "grad_norm": 0.8479359745979309, - "learning_rate": 1.973395839714542e-05, - "loss": 0.3854, - "step": 1956 - }, - { - "epoch": 0.1843574103294788, - "grad_norm": 0.779893696308136, - "learning_rate": 1.9733612295783448e-05, - "loss": 0.3676, - "step": 1957 - }, - { - "epoch": 0.18445161442264665, - "grad_norm": 0.8586198687553406, - "learning_rate": 1.973326597248006e-05, - "loss": 0.4006, - "step": 1958 - }, - { - "epoch": 0.1845458185158145, - "grad_norm": 0.8984857201576233, - "learning_rate": 1.9732919427243155e-05, - "loss": 0.3877, - "step": 1959 - }, - { - "epoch": 0.18464002260898235, - "grad_norm": 1.2245627641677856, - "learning_rate": 1.9732572660080634e-05, - "loss": 0.4117, - "step": 1960 - }, - { - "epoch": 0.1847342267021502, - "grad_norm": 0.7990955114364624, - "learning_rate": 1.9732225671000408e-05, - "loss": 0.3674, - "step": 1961 - }, - { - "epoch": 0.18482843079531805, - "grad_norm": 0.9033900499343872, - "learning_rate": 1.9731878460010386e-05, - "loss": 0.4033, - "step": 1962 - }, - { - "epoch": 0.1849226348884859, - "grad_norm": 1.060862421989441, - "learning_rate": 1.9731531027118482e-05, - "loss": 0.4059, - "step": 1963 - }, - { - "epoch": 0.18501683898165375, - "grad_norm": 0.9788298010826111, - "learning_rate": 1.973118337233262e-05, - "loss": 0.4468, - "step": 1964 - }, - { - "epoch": 0.1851110430748216, - "grad_norm": 0.8543902635574341, - "learning_rate": 1.973083549566073e-05, - "loss": 0.3672, - "step": 1965 - }, - { - "epoch": 0.18520524716798945, - "grad_norm": 0.8796471953392029, - "learning_rate": 1.973048739711074e-05, - "loss": 0.3988, - "step": 1966 - }, - { - "epoch": 0.1852994512611573, - "grad_norm": 0.8813859820365906, - "learning_rate": 1.973013907669059e-05, - "loss": 0.3839, - "step": 1967 - }, - { - "epoch": 0.18539365535432514, - "grad_norm": 0.8523325324058533, - "learning_rate": 1.9729790534408216e-05, - "loss": 0.3964, - "step": 1968 - }, - { - "epoch": 0.185487859447493, - "grad_norm": 0.7992582321166992, - "learning_rate": 1.972944177027158e-05, - "loss": 0.3735, - "step": 1969 - }, - { - "epoch": 0.18558206354066084, - "grad_norm": 0.7629032731056213, - "learning_rate": 1.9729092784288618e-05, - "loss": 0.339, - "step": 1970 - }, - { - "epoch": 0.1856762676338287, - "grad_norm": 0.9305042624473572, - "learning_rate": 1.9728743576467294e-05, - "loss": 0.3744, - "step": 1971 - }, - { - "epoch": 0.18577047172699654, - "grad_norm": 0.8268446922302246, - "learning_rate": 1.9728394146815573e-05, - "loss": 0.3602, - "step": 1972 - }, - { - "epoch": 0.1858646758201644, - "grad_norm": 0.8215991258621216, - "learning_rate": 1.972804449534142e-05, - "loss": 0.3533, - "step": 1973 - }, - { - "epoch": 0.18595887991333224, - "grad_norm": 0.9372973442077637, - "learning_rate": 1.9727694622052805e-05, - "loss": 0.4443, - "step": 1974 - }, - { - "epoch": 0.18605308400650009, - "grad_norm": 0.8777764439582825, - "learning_rate": 1.9727344526957713e-05, - "loss": 0.3648, - "step": 1975 - }, - { - "epoch": 0.18614728809966793, - "grad_norm": 0.9062953591346741, - "learning_rate": 1.972699421006412e-05, - "loss": 0.3801, - "step": 1976 - }, - { - "epoch": 0.18624149219283578, - "grad_norm": 0.8242834210395813, - "learning_rate": 1.9726643671380014e-05, - "loss": 0.3477, - "step": 1977 - }, - { - "epoch": 0.18633569628600363, - "grad_norm": 0.9338749051094055, - "learning_rate": 1.9726292910913393e-05, - "loss": 0.3845, - "step": 1978 - }, - { - "epoch": 0.18642990037917148, - "grad_norm": 0.8638278841972351, - "learning_rate": 1.972594192867225e-05, - "loss": 0.3864, - "step": 1979 - }, - { - "epoch": 0.18652410447233933, - "grad_norm": 0.9354991316795349, - "learning_rate": 1.9725590724664587e-05, - "loss": 0.3864, - "step": 1980 - }, - { - "epoch": 0.18661830856550718, - "grad_norm": 0.7907112836837769, - "learning_rate": 1.972523929889842e-05, - "loss": 0.3381, - "step": 1981 - }, - { - "epoch": 0.18671251265867503, - "grad_norm": 0.8652738928794861, - "learning_rate": 1.9724887651381756e-05, - "loss": 0.3674, - "step": 1982 - }, - { - "epoch": 0.18680671675184288, - "grad_norm": 0.9682350754737854, - "learning_rate": 1.972453578212261e-05, - "loss": 0.3665, - "step": 1983 - }, - { - "epoch": 0.18690092084501073, - "grad_norm": 0.9161863327026367, - "learning_rate": 1.972418369112901e-05, - "loss": 0.397, - "step": 1984 - }, - { - "epoch": 0.18699512493817858, - "grad_norm": 0.8780052661895752, - "learning_rate": 1.972383137840898e-05, - "loss": 0.4238, - "step": 1985 - }, - { - "epoch": 0.18708932903134642, - "grad_norm": 0.752722978591919, - "learning_rate": 1.9723478843970562e-05, - "loss": 0.3206, - "step": 1986 - }, - { - "epoch": 0.18718353312451427, - "grad_norm": 0.8473148345947266, - "learning_rate": 1.9723126087821788e-05, - "loss": 0.3449, - "step": 1987 - }, - { - "epoch": 0.18727773721768212, - "grad_norm": 0.9261614084243774, - "learning_rate": 1.97227731099707e-05, - "loss": 0.4142, - "step": 1988 - }, - { - "epoch": 0.18737194131084994, - "grad_norm": 1.017713189125061, - "learning_rate": 1.972241991042535e-05, - "loss": 0.4077, - "step": 1989 - }, - { - "epoch": 0.1874661454040178, - "grad_norm": 0.8296705484390259, - "learning_rate": 1.9722066489193788e-05, - "loss": 0.4005, - "step": 1990 - }, - { - "epoch": 0.18756034949718564, - "grad_norm": 0.8227131366729736, - "learning_rate": 1.9721712846284076e-05, - "loss": 0.4067, - "step": 1991 - }, - { - "epoch": 0.1876545535903535, - "grad_norm": 0.7832923531532288, - "learning_rate": 1.9721358981704276e-05, - "loss": 0.3092, - "step": 1992 - }, - { - "epoch": 0.18774875768352134, - "grad_norm": 0.8305931091308594, - "learning_rate": 1.9721004895462457e-05, - "loss": 0.3452, - "step": 1993 - }, - { - "epoch": 0.1878429617766892, - "grad_norm": 0.8937137722969055, - "learning_rate": 1.9720650587566693e-05, - "loss": 0.3766, - "step": 1994 - }, - { - "epoch": 0.18793716586985704, - "grad_norm": 0.855707049369812, - "learning_rate": 1.972029605802506e-05, - "loss": 0.3421, - "step": 1995 - }, - { - "epoch": 0.18803136996302489, - "grad_norm": 0.8995879292488098, - "learning_rate": 1.9719941306845647e-05, - "loss": 0.3777, - "step": 1996 - }, - { - "epoch": 0.18812557405619273, - "grad_norm": 0.7978242039680481, - "learning_rate": 1.971958633403654e-05, - "loss": 0.3401, - "step": 1997 - }, - { - "epoch": 0.18821977814936058, - "grad_norm": 0.8712006211280823, - "learning_rate": 1.9719231139605833e-05, - "loss": 0.4002, - "step": 1998 - }, - { - "epoch": 0.18831398224252843, - "grad_norm": 0.7887129187583923, - "learning_rate": 1.9718875723561622e-05, - "loss": 0.3447, - "step": 1999 - }, - { - "epoch": 0.18840818633569628, - "grad_norm": 0.7492456436157227, - "learning_rate": 1.9718520085912017e-05, - "loss": 0.3674, - "step": 2000 - }, - { - "epoch": 0.18850239042886413, - "grad_norm": 0.8848216533660889, - "learning_rate": 1.971816422666512e-05, - "loss": 0.4458, - "step": 2001 - }, - { - "epoch": 0.18859659452203198, - "grad_norm": 1.1236287355422974, - "learning_rate": 1.9717808145829056e-05, - "loss": 0.3481, - "step": 2002 - }, - { - "epoch": 0.18869079861519983, - "grad_norm": 0.8653777241706848, - "learning_rate": 1.9717451843411934e-05, - "loss": 0.3864, - "step": 2003 - }, - { - "epoch": 0.18878500270836768, - "grad_norm": 0.887043833732605, - "learning_rate": 1.971709531942188e-05, - "loss": 0.3613, - "step": 2004 - }, - { - "epoch": 0.18887920680153553, - "grad_norm": 0.816224217414856, - "learning_rate": 1.9716738573867025e-05, - "loss": 0.3732, - "step": 2005 - }, - { - "epoch": 0.18897341089470338, - "grad_norm": 0.8825797438621521, - "learning_rate": 1.9716381606755502e-05, - "loss": 0.3687, - "step": 2006 - }, - { - "epoch": 0.18906761498787122, - "grad_norm": 0.8316991925239563, - "learning_rate": 1.9716024418095457e-05, - "loss": 0.4058, - "step": 2007 - }, - { - "epoch": 0.18916181908103907, - "grad_norm": 0.9464976787567139, - "learning_rate": 1.9715667007895026e-05, - "loss": 0.4656, - "step": 2008 - }, - { - "epoch": 0.18925602317420692, - "grad_norm": 0.8076721429824829, - "learning_rate": 1.971530937616236e-05, - "loss": 0.3652, - "step": 2009 - }, - { - "epoch": 0.18935022726737477, - "grad_norm": 0.9603601098060608, - "learning_rate": 1.9714951522905618e-05, - "loss": 0.4232, - "step": 2010 - }, - { - "epoch": 0.18944443136054262, - "grad_norm": 0.8963274359703064, - "learning_rate": 1.9714593448132955e-05, - "loss": 0.3481, - "step": 2011 - }, - { - "epoch": 0.18953863545371047, - "grad_norm": 0.7713266611099243, - "learning_rate": 1.9714235151852537e-05, - "loss": 0.3707, - "step": 2012 - }, - { - "epoch": 0.18963283954687832, - "grad_norm": 0.9094200134277344, - "learning_rate": 1.971387663407254e-05, - "loss": 0.4499, - "step": 2013 - }, - { - "epoch": 0.18972704364004617, - "grad_norm": 0.9872961044311523, - "learning_rate": 1.971351789480113e-05, - "loss": 0.4114, - "step": 2014 - }, - { - "epoch": 0.18982124773321402, - "grad_norm": 0.9223184585571289, - "learning_rate": 1.9713158934046485e-05, - "loss": 0.4124, - "step": 2015 - }, - { - "epoch": 0.18991545182638186, - "grad_norm": 0.8363121151924133, - "learning_rate": 1.9712799751816797e-05, - "loss": 0.3427, - "step": 2016 - }, - { - "epoch": 0.1900096559195497, - "grad_norm": 0.9477868676185608, - "learning_rate": 1.9712440348120256e-05, - "loss": 0.444, - "step": 2017 - }, - { - "epoch": 0.19010386001271756, - "grad_norm": 0.892019510269165, - "learning_rate": 1.9712080722965052e-05, - "loss": 0.359, - "step": 2018 - }, - { - "epoch": 0.1901980641058854, - "grad_norm": 0.857984721660614, - "learning_rate": 1.9711720876359387e-05, - "loss": 0.3233, - "step": 2019 - }, - { - "epoch": 0.19029226819905326, - "grad_norm": 0.9077023863792419, - "learning_rate": 1.971136080831147e-05, - "loss": 0.4238, - "step": 2020 - }, - { - "epoch": 0.1903864722922211, - "grad_norm": 0.9694737792015076, - "learning_rate": 1.9711000518829505e-05, - "loss": 0.4655, - "step": 2021 - }, - { - "epoch": 0.19048067638538896, - "grad_norm": 1.1711987257003784, - "learning_rate": 1.971064000792171e-05, - "loss": 0.3534, - "step": 2022 - }, - { - "epoch": 0.19057488047855678, - "grad_norm": 0.8994882106781006, - "learning_rate": 1.9710279275596307e-05, - "loss": 0.4086, - "step": 2023 - }, - { - "epoch": 0.19066908457172463, - "grad_norm": 0.9420601725578308, - "learning_rate": 1.9709918321861517e-05, - "loss": 0.349, - "step": 2024 - }, - { - "epoch": 0.19076328866489248, - "grad_norm": 0.8988878726959229, - "learning_rate": 1.9709557146725572e-05, - "loss": 0.3777, - "step": 2025 - }, - { - "epoch": 0.19085749275806033, - "grad_norm": 1.0279639959335327, - "learning_rate": 1.970919575019671e-05, - "loss": 0.44, - "step": 2026 - }, - { - "epoch": 0.19095169685122818, - "grad_norm": 0.9482018947601318, - "learning_rate": 1.970883413228317e-05, - "loss": 0.3635, - "step": 2027 - }, - { - "epoch": 0.19104590094439602, - "grad_norm": 0.8100656867027283, - "learning_rate": 1.9708472292993195e-05, - "loss": 0.3353, - "step": 2028 - }, - { - "epoch": 0.19114010503756387, - "grad_norm": 0.8380219340324402, - "learning_rate": 1.9708110232335035e-05, - "loss": 0.3713, - "step": 2029 - }, - { - "epoch": 0.19123430913073172, - "grad_norm": 0.8911309838294983, - "learning_rate": 1.9707747950316953e-05, - "loss": 0.4015, - "step": 2030 - }, - { - "epoch": 0.19132851322389957, - "grad_norm": 0.8748728036880493, - "learning_rate": 1.97073854469472e-05, - "loss": 0.4052, - "step": 2031 - }, - { - "epoch": 0.19142271731706742, - "grad_norm": 0.8939458727836609, - "learning_rate": 1.970702272223405e-05, - "loss": 0.3748, - "step": 2032 - }, - { - "epoch": 0.19151692141023527, - "grad_norm": 0.9449240565299988, - "learning_rate": 1.9706659776185767e-05, - "loss": 0.3506, - "step": 2033 - }, - { - "epoch": 0.19161112550340312, - "grad_norm": 0.7787311673164368, - "learning_rate": 1.970629660881063e-05, - "loss": 0.337, - "step": 2034 - }, - { - "epoch": 0.19170532959657097, - "grad_norm": 0.9287323951721191, - "learning_rate": 1.9705933220116918e-05, - "loss": 0.4118, - "step": 2035 - }, - { - "epoch": 0.19179953368973882, - "grad_norm": 1.080183744430542, - "learning_rate": 1.970556961011292e-05, - "loss": 0.451, - "step": 2036 - }, - { - "epoch": 0.19189373778290666, - "grad_norm": 0.8874815106391907, - "learning_rate": 1.9705205778806926e-05, - "loss": 0.3886, - "step": 2037 - }, - { - "epoch": 0.1919879418760745, - "grad_norm": 0.9046630263328552, - "learning_rate": 1.9704841726207228e-05, - "loss": 0.3824, - "step": 2038 - }, - { - "epoch": 0.19208214596924236, - "grad_norm": 0.8540805578231812, - "learning_rate": 1.970447745232213e-05, - "loss": 0.4134, - "step": 2039 - }, - { - "epoch": 0.1921763500624102, - "grad_norm": 0.7967076897621155, - "learning_rate": 1.970411295715994e-05, - "loss": 0.3744, - "step": 2040 - }, - { - "epoch": 0.19227055415557806, - "grad_norm": 1.020021677017212, - "learning_rate": 1.970374824072897e-05, - "loss": 0.4387, - "step": 2041 - }, - { - "epoch": 0.1923647582487459, - "grad_norm": 0.954599142074585, - "learning_rate": 1.9703383303037525e-05, - "loss": 0.3685, - "step": 2042 - }, - { - "epoch": 0.19245896234191376, - "grad_norm": 0.913081169128418, - "learning_rate": 1.970301814409394e-05, - "loss": 0.3236, - "step": 2043 - }, - { - "epoch": 0.1925531664350816, - "grad_norm": 1.0290967226028442, - "learning_rate": 1.9702652763906532e-05, - "loss": 0.3609, - "step": 2044 - }, - { - "epoch": 0.19264737052824946, - "grad_norm": 0.9929534196853638, - "learning_rate": 1.9702287162483634e-05, - "loss": 0.4037, - "step": 2045 - }, - { - "epoch": 0.1927415746214173, - "grad_norm": 0.9307869076728821, - "learning_rate": 1.970192133983359e-05, - "loss": 0.39, - "step": 2046 - }, - { - "epoch": 0.19283577871458515, - "grad_norm": 0.7979583740234375, - "learning_rate": 1.970155529596473e-05, - "loss": 0.33, - "step": 2047 - }, - { - "epoch": 0.192929982807753, - "grad_norm": 0.9032715559005737, - "learning_rate": 1.9701189030885407e-05, - "loss": 0.4202, - "step": 2048 - }, - { - "epoch": 0.19302418690092085, - "grad_norm": 0.927097737789154, - "learning_rate": 1.970082254460397e-05, - "loss": 0.3686, - "step": 2049 - }, - { - "epoch": 0.1931183909940887, - "grad_norm": 0.9695465564727783, - "learning_rate": 1.970045583712878e-05, - "loss": 0.3907, - "step": 2050 - }, - { - "epoch": 0.19321259508725655, - "grad_norm": 0.9572463035583496, - "learning_rate": 1.970008890846819e-05, - "loss": 0.384, - "step": 2051 - }, - { - "epoch": 0.1933067991804244, - "grad_norm": 0.8238348364830017, - "learning_rate": 1.9699721758630573e-05, - "loss": 0.3952, - "step": 2052 - }, - { - "epoch": 0.19340100327359225, - "grad_norm": 0.9012622237205505, - "learning_rate": 1.96993543876243e-05, - "loss": 0.3955, - "step": 2053 - }, - { - "epoch": 0.1934952073667601, - "grad_norm": 0.9931719899177551, - "learning_rate": 1.969898679545775e-05, - "loss": 0.3954, - "step": 2054 - }, - { - "epoch": 0.19358941145992795, - "grad_norm": 0.8941454887390137, - "learning_rate": 1.9698618982139294e-05, - "loss": 0.3701, - "step": 2055 - }, - { - "epoch": 0.1936836155530958, - "grad_norm": 0.9120371341705322, - "learning_rate": 1.969825094767733e-05, - "loss": 0.4075, - "step": 2056 - }, - { - "epoch": 0.19377781964626364, - "grad_norm": 0.8839127421379089, - "learning_rate": 1.9697882692080247e-05, - "loss": 0.3868, - "step": 2057 - }, - { - "epoch": 0.19387202373943146, - "grad_norm": 0.9981152415275574, - "learning_rate": 1.969751421535644e-05, - "loss": 0.3779, - "step": 2058 - }, - { - "epoch": 0.19396622783259931, - "grad_norm": 0.931559681892395, - "learning_rate": 1.969714551751431e-05, - "loss": 0.3703, - "step": 2059 - }, - { - "epoch": 0.19406043192576716, - "grad_norm": 0.9200083017349243, - "learning_rate": 1.9696776598562265e-05, - "loss": 0.3865, - "step": 2060 - }, - { - "epoch": 0.194154636018935, - "grad_norm": 0.8785022497177124, - "learning_rate": 1.969640745850872e-05, - "loss": 0.3803, - "step": 2061 - }, - { - "epoch": 0.19424884011210286, - "grad_norm": 0.874745786190033, - "learning_rate": 1.9696038097362083e-05, - "loss": 0.3634, - "step": 2062 - }, - { - "epoch": 0.1943430442052707, - "grad_norm": 0.8995983600616455, - "learning_rate": 1.969566851513079e-05, - "loss": 0.3825, - "step": 2063 - }, - { - "epoch": 0.19443724829843856, - "grad_norm": 0.8460052013397217, - "learning_rate": 1.9695298711823255e-05, - "loss": 0.3687, - "step": 2064 - }, - { - "epoch": 0.1945314523916064, - "grad_norm": 0.862362265586853, - "learning_rate": 1.9694928687447915e-05, - "loss": 0.4034, - "step": 2065 - }, - { - "epoch": 0.19462565648477426, - "grad_norm": 0.8720821142196655, - "learning_rate": 1.969455844201321e-05, - "loss": 0.3996, - "step": 2066 - }, - { - "epoch": 0.1947198605779421, - "grad_norm": 0.7716377973556519, - "learning_rate": 1.9694187975527577e-05, - "loss": 0.396, - "step": 2067 - }, - { - "epoch": 0.19481406467110995, - "grad_norm": 0.8582832217216492, - "learning_rate": 1.969381728799947e-05, - "loss": 0.3707, - "step": 2068 - }, - { - "epoch": 0.1949082687642778, - "grad_norm": 0.8442918062210083, - "learning_rate": 1.9693446379437336e-05, - "loss": 0.3937, - "step": 2069 - }, - { - "epoch": 0.19500247285744565, - "grad_norm": 0.8860918879508972, - "learning_rate": 1.9693075249849628e-05, - "loss": 0.3463, - "step": 2070 - }, - { - "epoch": 0.1950966769506135, - "grad_norm": 0.9189453721046448, - "learning_rate": 1.969270389924482e-05, - "loss": 0.3964, - "step": 2071 - }, - { - "epoch": 0.19519088104378135, - "grad_norm": 0.8853801488876343, - "learning_rate": 1.969233232763137e-05, - "loss": 0.3894, - "step": 2072 - }, - { - "epoch": 0.1952850851369492, - "grad_norm": 0.8096766471862793, - "learning_rate": 1.9691960535017754e-05, - "loss": 0.329, - "step": 2073 - }, - { - "epoch": 0.19537928923011705, - "grad_norm": 0.9416137337684631, - "learning_rate": 1.969158852141245e-05, - "loss": 0.4208, - "step": 2074 - }, - { - "epoch": 0.1954734933232849, - "grad_norm": 0.9120296239852905, - "learning_rate": 1.969121628682394e-05, - "loss": 0.3593, - "step": 2075 - }, - { - "epoch": 0.19556769741645275, - "grad_norm": 0.9446659684181213, - "learning_rate": 1.9690843831260705e-05, - "loss": 0.4148, - "step": 2076 - }, - { - "epoch": 0.1956619015096206, - "grad_norm": 0.8542370796203613, - "learning_rate": 1.969047115473125e-05, - "loss": 0.3482, - "step": 2077 - }, - { - "epoch": 0.19575610560278844, - "grad_norm": 0.9640230536460876, - "learning_rate": 1.9690098257244063e-05, - "loss": 0.3716, - "step": 2078 - }, - { - "epoch": 0.1958503096959563, - "grad_norm": 0.9649361371994019, - "learning_rate": 1.968972513880765e-05, - "loss": 0.4225, - "step": 2079 - }, - { - "epoch": 0.19594451378912414, - "grad_norm": 0.8834923505783081, - "learning_rate": 1.968935179943052e-05, - "loss": 0.3627, - "step": 2080 - }, - { - "epoch": 0.196038717882292, - "grad_norm": 0.9990054368972778, - "learning_rate": 1.9688978239121183e-05, - "loss": 0.4168, - "step": 2081 - }, - { - "epoch": 0.19613292197545984, - "grad_norm": 0.8540651202201843, - "learning_rate": 1.9688604457888157e-05, - "loss": 0.3607, - "step": 2082 - }, - { - "epoch": 0.1962271260686277, - "grad_norm": 0.8560302257537842, - "learning_rate": 1.9688230455739966e-05, - "loss": 0.391, - "step": 2083 - }, - { - "epoch": 0.19632133016179554, - "grad_norm": 0.8272385597229004, - "learning_rate": 1.968785623268514e-05, - "loss": 0.3915, - "step": 2084 - }, - { - "epoch": 0.1964155342549634, - "grad_norm": 0.9685487151145935, - "learning_rate": 1.9687481788732207e-05, - "loss": 0.3943, - "step": 2085 - }, - { - "epoch": 0.19650973834813124, - "grad_norm": 0.8381835222244263, - "learning_rate": 1.9687107123889708e-05, - "loss": 0.3361, - "step": 2086 - }, - { - "epoch": 0.19660394244129908, - "grad_norm": 0.8928012847900391, - "learning_rate": 1.9686732238166183e-05, - "loss": 0.3813, - "step": 2087 - }, - { - "epoch": 0.19669814653446693, - "grad_norm": 0.9864948987960815, - "learning_rate": 1.9686357131570184e-05, - "loss": 0.3955, - "step": 2088 - }, - { - "epoch": 0.19679235062763478, - "grad_norm": 0.81718909740448, - "learning_rate": 1.9685981804110263e-05, - "loss": 0.3256, - "step": 2089 - }, - { - "epoch": 0.19688655472080263, - "grad_norm": 0.8038667440414429, - "learning_rate": 1.9685606255794978e-05, - "loss": 0.3451, - "step": 2090 - }, - { - "epoch": 0.19698075881397048, - "grad_norm": 0.8458711504936218, - "learning_rate": 1.9685230486632888e-05, - "loss": 0.3539, - "step": 2091 - }, - { - "epoch": 0.1970749629071383, - "grad_norm": 0.733620822429657, - "learning_rate": 1.9684854496632567e-05, - "loss": 0.3123, - "step": 2092 - }, - { - "epoch": 0.19716916700030615, - "grad_norm": 0.8875676989555359, - "learning_rate": 1.9684478285802585e-05, - "loss": 0.3824, - "step": 2093 - }, - { - "epoch": 0.197263371093474, - "grad_norm": 0.895675778388977, - "learning_rate": 1.9684101854151517e-05, - "loss": 0.3813, - "step": 2094 - }, - { - "epoch": 0.19735757518664185, - "grad_norm": 0.7959586381912231, - "learning_rate": 1.9683725201687955e-05, - "loss": 0.38, - "step": 2095 - }, - { - "epoch": 0.1974517792798097, - "grad_norm": 0.9296727180480957, - "learning_rate": 1.968334832842048e-05, - "loss": 0.3732, - "step": 2096 - }, - { - "epoch": 0.19754598337297755, - "grad_norm": 1.003315806388855, - "learning_rate": 1.9682971234357688e-05, - "loss": 0.4405, - "step": 2097 - }, - { - "epoch": 0.1976401874661454, - "grad_norm": 0.919394314289093, - "learning_rate": 1.9682593919508177e-05, - "loss": 0.3746, - "step": 2098 - }, - { - "epoch": 0.19773439155931324, - "grad_norm": 0.9472411870956421, - "learning_rate": 1.968221638388055e-05, - "loss": 0.3974, - "step": 2099 - }, - { - "epoch": 0.1978285956524811, - "grad_norm": 0.9083194136619568, - "learning_rate": 1.9681838627483416e-05, - "loss": 0.3683, - "step": 2100 - }, - { - "epoch": 0.19792279974564894, - "grad_norm": 0.9355015158653259, - "learning_rate": 1.9681460650325387e-05, - "loss": 0.4127, - "step": 2101 - }, - { - "epoch": 0.1980170038388168, - "grad_norm": 0.9019935131072998, - "learning_rate": 1.9681082452415084e-05, - "loss": 0.4031, - "step": 2102 - }, - { - "epoch": 0.19811120793198464, - "grad_norm": 0.8776118755340576, - "learning_rate": 1.9680704033761128e-05, - "loss": 0.4174, - "step": 2103 - }, - { - "epoch": 0.1982054120251525, - "grad_norm": 0.881554901599884, - "learning_rate": 1.968032539437215e-05, - "loss": 0.3819, - "step": 2104 - }, - { - "epoch": 0.19829961611832034, - "grad_norm": 0.9217504262924194, - "learning_rate": 1.967994653425678e-05, - "loss": 0.4157, - "step": 2105 - }, - { - "epoch": 0.1983938202114882, - "grad_norm": 0.9133352041244507, - "learning_rate": 1.967956745342366e-05, - "loss": 0.3631, - "step": 2106 - }, - { - "epoch": 0.19848802430465604, - "grad_norm": 0.8852596879005432, - "learning_rate": 1.967918815188143e-05, - "loss": 0.4074, - "step": 2107 - }, - { - "epoch": 0.19858222839782388, - "grad_norm": 0.8754292130470276, - "learning_rate": 1.9678808629638744e-05, - "loss": 0.3917, - "step": 2108 - }, - { - "epoch": 0.19867643249099173, - "grad_norm": 1.0540508031845093, - "learning_rate": 1.967842888670425e-05, - "loss": 0.4664, - "step": 2109 - }, - { - "epoch": 0.19877063658415958, - "grad_norm": 0.8133193254470825, - "learning_rate": 1.9678048923086614e-05, - "loss": 0.3705, - "step": 2110 - }, - { - "epoch": 0.19886484067732743, - "grad_norm": 0.9278148412704468, - "learning_rate": 1.9677668738794492e-05, - "loss": 0.3982, - "step": 2111 - }, - { - "epoch": 0.19895904477049528, - "grad_norm": 0.9273229241371155, - "learning_rate": 1.9677288333836555e-05, - "loss": 0.3641, - "step": 2112 - }, - { - "epoch": 0.19905324886366313, - "grad_norm": 0.8346200585365295, - "learning_rate": 1.9676907708221476e-05, - "loss": 0.329, - "step": 2113 - }, - { - "epoch": 0.19914745295683098, - "grad_norm": 0.8903515934944153, - "learning_rate": 1.9676526861957944e-05, - "loss": 0.3383, - "step": 2114 - }, - { - "epoch": 0.19924165704999883, - "grad_norm": 0.9228113293647766, - "learning_rate": 1.9676145795054627e-05, - "loss": 0.3719, - "step": 2115 - }, - { - "epoch": 0.19933586114316668, - "grad_norm": 0.9408248066902161, - "learning_rate": 1.9675764507520225e-05, - "loss": 0.3956, - "step": 2116 - }, - { - "epoch": 0.19943006523633452, - "grad_norm": 0.8687821626663208, - "learning_rate": 1.9675382999363422e-05, - "loss": 0.3872, - "step": 2117 - }, - { - "epoch": 0.19952426932950237, - "grad_norm": 1.0548046827316284, - "learning_rate": 1.9675001270592932e-05, - "loss": 0.3836, - "step": 2118 - }, - { - "epoch": 0.19961847342267022, - "grad_norm": 0.743798017501831, - "learning_rate": 1.9674619321217447e-05, - "loss": 0.3141, - "step": 2119 - }, - { - "epoch": 0.19971267751583807, - "grad_norm": 0.8419427275657654, - "learning_rate": 1.9674237151245678e-05, - "loss": 0.3377, - "step": 2120 - }, - { - "epoch": 0.19980688160900592, - "grad_norm": 0.9172351360321045, - "learning_rate": 1.9673854760686343e-05, - "loss": 0.4094, - "step": 2121 - }, - { - "epoch": 0.19990108570217377, - "grad_norm": 0.8844809532165527, - "learning_rate": 1.9673472149548153e-05, - "loss": 0.3762, - "step": 2122 - }, - { - "epoch": 0.19999528979534162, - "grad_norm": 0.9465714693069458, - "learning_rate": 1.9673089317839843e-05, - "loss": 0.3829, - "step": 2123 - }, - { - "epoch": 0.20008949388850947, - "grad_norm": 0.7922621369361877, - "learning_rate": 1.9672706265570137e-05, - "loss": 0.3652, - "step": 2124 - }, - { - "epoch": 0.20018369798167732, - "grad_norm": 0.8763195872306824, - "learning_rate": 1.9672322992747766e-05, - "loss": 0.4096, - "step": 2125 - }, - { - "epoch": 0.20027790207484517, - "grad_norm": 0.8922156095504761, - "learning_rate": 1.9671939499381475e-05, - "loss": 0.4103, - "step": 2126 - }, - { - "epoch": 0.200372106168013, - "grad_norm": 0.8136999607086182, - "learning_rate": 1.967155578548e-05, - "loss": 0.3619, - "step": 2127 - }, - { - "epoch": 0.20046631026118084, - "grad_norm": 0.8069179654121399, - "learning_rate": 1.96711718510521e-05, - "loss": 0.3899, - "step": 2128 - }, - { - "epoch": 0.20056051435434868, - "grad_norm": 0.8518467545509338, - "learning_rate": 1.9670787696106525e-05, - "loss": 0.3772, - "step": 2129 - }, - { - "epoch": 0.20065471844751653, - "grad_norm": 0.8809990286827087, - "learning_rate": 1.967040332065204e-05, - "loss": 0.4185, - "step": 2130 - }, - { - "epoch": 0.20074892254068438, - "grad_norm": 0.6934998631477356, - "learning_rate": 1.9670018724697394e-05, - "loss": 0.3029, - "step": 2131 - }, - { - "epoch": 0.20084312663385223, - "grad_norm": 0.7762834429740906, - "learning_rate": 1.9669633908251372e-05, - "loss": 0.3634, - "step": 2132 - }, - { - "epoch": 0.20093733072702008, - "grad_norm": 0.9136695265769958, - "learning_rate": 1.9669248871322738e-05, - "loss": 0.395, - "step": 2133 - }, - { - "epoch": 0.20103153482018793, - "grad_norm": 1.4183223247528076, - "learning_rate": 1.966886361392028e-05, - "loss": 0.3731, - "step": 2134 - }, - { - "epoch": 0.20112573891335578, - "grad_norm": 1.0961859226226807, - "learning_rate": 1.9668478136052776e-05, - "loss": 0.3798, - "step": 2135 - }, - { - "epoch": 0.20121994300652363, - "grad_norm": 0.8356830477714539, - "learning_rate": 1.966809243772902e-05, - "loss": 0.3971, - "step": 2136 - }, - { - "epoch": 0.20131414709969148, - "grad_norm": 0.8490124940872192, - "learning_rate": 1.9667706518957803e-05, - "loss": 0.3835, - "step": 2137 - }, - { - "epoch": 0.20140835119285933, - "grad_norm": 0.8329681158065796, - "learning_rate": 1.966732037974793e-05, - "loss": 0.3249, - "step": 2138 - }, - { - "epoch": 0.20150255528602717, - "grad_norm": 0.8552191257476807, - "learning_rate": 1.9666934020108192e-05, - "loss": 0.3649, - "step": 2139 - }, - { - "epoch": 0.20159675937919502, - "grad_norm": 0.9588862061500549, - "learning_rate": 1.9666547440047417e-05, - "loss": 0.33, - "step": 2140 - }, - { - "epoch": 0.20169096347236287, - "grad_norm": 0.8517509698867798, - "learning_rate": 1.9666160639574404e-05, - "loss": 0.3459, - "step": 2141 - }, - { - "epoch": 0.20178516756553072, - "grad_norm": 0.8158048987388611, - "learning_rate": 1.9665773618697988e-05, - "loss": 0.338, - "step": 2142 - }, - { - "epoch": 0.20187937165869857, - "grad_norm": 0.8685369491577148, - "learning_rate": 1.966538637742698e-05, - "loss": 0.3572, - "step": 2143 - }, - { - "epoch": 0.20197357575186642, - "grad_norm": 1.0335828065872192, - "learning_rate": 1.9664998915770213e-05, - "loss": 0.3803, - "step": 2144 - }, - { - "epoch": 0.20206777984503427, - "grad_norm": 0.8579703569412231, - "learning_rate": 1.9664611233736527e-05, - "loss": 0.3764, - "step": 2145 - }, - { - "epoch": 0.20216198393820212, - "grad_norm": 0.8927084803581238, - "learning_rate": 1.966422333133476e-05, - "loss": 0.3703, - "step": 2146 - }, - { - "epoch": 0.20225618803136997, - "grad_norm": 1.055751919746399, - "learning_rate": 1.9663835208573747e-05, - "loss": 0.3258, - "step": 2147 - }, - { - "epoch": 0.20235039212453781, - "grad_norm": 1.0749523639678955, - "learning_rate": 1.966344686546235e-05, - "loss": 0.4551, - "step": 2148 - }, - { - "epoch": 0.20244459621770566, - "grad_norm": 0.8448135256767273, - "learning_rate": 1.966305830200942e-05, - "loss": 0.356, - "step": 2149 - }, - { - "epoch": 0.2025388003108735, - "grad_norm": 1.0148167610168457, - "learning_rate": 1.966266951822382e-05, - "loss": 0.3934, - "step": 2150 - }, - { - "epoch": 0.20263300440404136, - "grad_norm": 0.9345589280128479, - "learning_rate": 1.9662280514114408e-05, - "loss": 0.4071, - "step": 2151 - }, - { - "epoch": 0.2027272084972092, - "grad_norm": 0.7750939130783081, - "learning_rate": 1.9661891289690056e-05, - "loss": 0.3413, - "step": 2152 - }, - { - "epoch": 0.20282141259037706, - "grad_norm": 0.9436439275741577, - "learning_rate": 1.9661501844959642e-05, - "loss": 0.3759, - "step": 2153 - }, - { - "epoch": 0.2029156166835449, - "grad_norm": 0.8349786400794983, - "learning_rate": 1.966111217993204e-05, - "loss": 0.38, - "step": 2154 - }, - { - "epoch": 0.20300982077671276, - "grad_norm": 0.8802725672721863, - "learning_rate": 1.9660722294616148e-05, - "loss": 0.3607, - "step": 2155 - }, - { - "epoch": 0.2031040248698806, - "grad_norm": 0.8655793070793152, - "learning_rate": 1.966033218902084e-05, - "loss": 0.3965, - "step": 2156 - }, - { - "epoch": 0.20319822896304846, - "grad_norm": 0.9193881750106812, - "learning_rate": 1.965994186315502e-05, - "loss": 0.3983, - "step": 2157 - }, - { - "epoch": 0.2032924330562163, - "grad_norm": 0.9747195839881897, - "learning_rate": 1.9659551317027586e-05, - "loss": 0.4385, - "step": 2158 - }, - { - "epoch": 0.20338663714938415, - "grad_norm": 0.8492395281791687, - "learning_rate": 1.9659160550647446e-05, - "loss": 0.3729, - "step": 2159 - }, - { - "epoch": 0.203480841242552, - "grad_norm": 0.8659266829490662, - "learning_rate": 1.9658769564023502e-05, - "loss": 0.4089, - "step": 2160 - }, - { - "epoch": 0.20357504533571985, - "grad_norm": 0.8363007307052612, - "learning_rate": 1.9658378357164677e-05, - "loss": 0.4087, - "step": 2161 - }, - { - "epoch": 0.20366924942888767, - "grad_norm": 0.9424063563346863, - "learning_rate": 1.9657986930079888e-05, - "loss": 0.3999, - "step": 2162 - }, - { - "epoch": 0.20376345352205552, - "grad_norm": 0.8132457137107849, - "learning_rate": 1.9657595282778063e-05, - "loss": 0.3573, - "step": 2163 - }, - { - "epoch": 0.20385765761522337, - "grad_norm": 0.9497710466384888, - "learning_rate": 1.9657203415268128e-05, - "loss": 0.4127, - "step": 2164 - }, - { - "epoch": 0.20395186170839122, - "grad_norm": 0.8072819709777832, - "learning_rate": 1.965681132755902e-05, - "loss": 0.3615, - "step": 2165 - }, - { - "epoch": 0.20404606580155907, - "grad_norm": 0.9138504862785339, - "learning_rate": 1.965641901965968e-05, - "loss": 0.3613, - "step": 2166 - }, - { - "epoch": 0.20414026989472692, - "grad_norm": 1.0799670219421387, - "learning_rate": 1.965602649157905e-05, - "loss": 0.3837, - "step": 2167 - }, - { - "epoch": 0.20423447398789477, - "grad_norm": 0.8778116106987, - "learning_rate": 1.9655633743326084e-05, - "loss": 0.3871, - "step": 2168 - }, - { - "epoch": 0.20432867808106261, - "grad_norm": 0.9556635022163391, - "learning_rate": 1.9655240774909736e-05, - "loss": 0.3954, - "step": 2169 - }, - { - "epoch": 0.20442288217423046, - "grad_norm": 0.8863227963447571, - "learning_rate": 1.9654847586338966e-05, - "loss": 0.4123, - "step": 2170 - }, - { - "epoch": 0.2045170862673983, - "grad_norm": 0.805164098739624, - "learning_rate": 1.9654454177622742e-05, - "loss": 0.3697, - "step": 2171 - }, - { - "epoch": 0.20461129036056616, - "grad_norm": 0.9202094674110413, - "learning_rate": 1.965406054877003e-05, - "loss": 0.4277, - "step": 2172 - }, - { - "epoch": 0.204705494453734, - "grad_norm": 0.8749397397041321, - "learning_rate": 1.9653666699789807e-05, - "loss": 0.3708, - "step": 2173 - }, - { - "epoch": 0.20479969854690186, - "grad_norm": 0.8218052387237549, - "learning_rate": 1.9653272630691053e-05, - "loss": 0.3917, - "step": 2174 - }, - { - "epoch": 0.2048939026400697, - "grad_norm": 0.9710571765899658, - "learning_rate": 1.9652878341482755e-05, - "loss": 0.4102, - "step": 2175 - }, - { - "epoch": 0.20498810673323756, - "grad_norm": 0.915249228477478, - "learning_rate": 1.96524838321739e-05, - "loss": 0.395, - "step": 2176 - }, - { - "epoch": 0.2050823108264054, - "grad_norm": 1.1843119859695435, - "learning_rate": 1.9652089102773487e-05, - "loss": 0.3645, - "step": 2177 - }, - { - "epoch": 0.20517651491957326, - "grad_norm": 1.005880355834961, - "learning_rate": 1.9651694153290518e-05, - "loss": 0.4164, - "step": 2178 - }, - { - "epoch": 0.2052707190127411, - "grad_norm": 1.2592356204986572, - "learning_rate": 1.9651298983733993e-05, - "loss": 0.4019, - "step": 2179 - }, - { - "epoch": 0.20536492310590895, - "grad_norm": 0.9137241840362549, - "learning_rate": 1.9650903594112924e-05, - "loss": 0.3834, - "step": 2180 - }, - { - "epoch": 0.2054591271990768, - "grad_norm": 0.989930272102356, - "learning_rate": 1.9650507984436328e-05, - "loss": 0.3791, - "step": 2181 - }, - { - "epoch": 0.20555333129224465, - "grad_norm": 0.9751969575881958, - "learning_rate": 1.9650112154713227e-05, - "loss": 0.3326, - "step": 2182 - }, - { - "epoch": 0.2056475353854125, - "grad_norm": 0.9052295088768005, - "learning_rate": 1.9649716104952644e-05, - "loss": 0.4243, - "step": 2183 - }, - { - "epoch": 0.20574173947858035, - "grad_norm": 0.8224236965179443, - "learning_rate": 1.9649319835163614e-05, - "loss": 0.3847, - "step": 2184 - }, - { - "epoch": 0.2058359435717482, - "grad_norm": 0.9257561564445496, - "learning_rate": 1.964892334535516e-05, - "loss": 0.391, - "step": 2185 - }, - { - "epoch": 0.20593014766491605, - "grad_norm": 0.9604368209838867, - "learning_rate": 1.964852663553634e-05, - "loss": 0.4409, - "step": 2186 - }, - { - "epoch": 0.2060243517580839, - "grad_norm": 1.0182547569274902, - "learning_rate": 1.9648129705716188e-05, - "loss": 0.4224, - "step": 2187 - }, - { - "epoch": 0.20611855585125174, - "grad_norm": 0.7713980078697205, - "learning_rate": 1.9647732555903758e-05, - "loss": 0.3666, - "step": 2188 - }, - { - "epoch": 0.2062127599444196, - "grad_norm": 0.8546023368835449, - "learning_rate": 1.9647335186108104e-05, - "loss": 0.3935, - "step": 2189 - }, - { - "epoch": 0.20630696403758744, - "grad_norm": 1.0893975496292114, - "learning_rate": 1.9646937596338287e-05, - "loss": 0.4027, - "step": 2190 - }, - { - "epoch": 0.2064011681307553, - "grad_norm": 0.9164141416549683, - "learning_rate": 1.9646539786603376e-05, - "loss": 0.3996, - "step": 2191 - }, - { - "epoch": 0.20649537222392314, - "grad_norm": 1.1015797853469849, - "learning_rate": 1.9646141756912437e-05, - "loss": 0.3765, - "step": 2192 - }, - { - "epoch": 0.206589576317091, - "grad_norm": 1.004239797592163, - "learning_rate": 1.964574350727455e-05, - "loss": 0.3567, - "step": 2193 - }, - { - "epoch": 0.20668378041025884, - "grad_norm": 0.940284788608551, - "learning_rate": 1.964534503769879e-05, - "loss": 0.4211, - "step": 2194 - }, - { - "epoch": 0.2067779845034267, - "grad_norm": 1.0183979272842407, - "learning_rate": 1.964494634819425e-05, - "loss": 0.4444, - "step": 2195 - }, - { - "epoch": 0.2068721885965945, - "grad_norm": 0.8018262982368469, - "learning_rate": 1.9644547438770016e-05, - "loss": 0.3636, - "step": 2196 - }, - { - "epoch": 0.20696639268976236, - "grad_norm": 0.8908106088638306, - "learning_rate": 1.964414830943518e-05, - "loss": 0.3659, - "step": 2197 - }, - { - "epoch": 0.2070605967829302, - "grad_norm": 0.9282899498939514, - "learning_rate": 1.9643748960198857e-05, - "loss": 0.4189, - "step": 2198 - }, - { - "epoch": 0.20715480087609806, - "grad_norm": 1.0559660196304321, - "learning_rate": 1.9643349391070137e-05, - "loss": 0.3771, - "step": 2199 - }, - { - "epoch": 0.2072490049692659, - "grad_norm": 0.8157504200935364, - "learning_rate": 1.964294960205814e-05, - "loss": 0.369, - "step": 2200 - }, - { - "epoch": 0.20734320906243375, - "grad_norm": 0.779656171798706, - "learning_rate": 1.9642549593171977e-05, - "loss": 0.3366, - "step": 2201 - }, - { - "epoch": 0.2074374131556016, - "grad_norm": 0.8910467028617859, - "learning_rate": 1.964214936442077e-05, - "loss": 0.393, - "step": 2202 - }, - { - "epoch": 0.20753161724876945, - "grad_norm": 0.8166049718856812, - "learning_rate": 1.964174891581365e-05, - "loss": 0.3564, - "step": 2203 - }, - { - "epoch": 0.2076258213419373, - "grad_norm": 0.9754587411880493, - "learning_rate": 1.964134824735974e-05, - "loss": 0.4071, - "step": 2204 - }, - { - "epoch": 0.20772002543510515, - "grad_norm": 0.8878598809242249, - "learning_rate": 1.964094735906818e-05, - "loss": 0.3691, - "step": 2205 - }, - { - "epoch": 0.207814229528273, - "grad_norm": 0.8549135327339172, - "learning_rate": 1.964054625094811e-05, - "loss": 0.3834, - "step": 2206 - }, - { - "epoch": 0.20790843362144085, - "grad_norm": 0.9318049550056458, - "learning_rate": 1.9640144923008674e-05, - "loss": 0.3769, - "step": 2207 - }, - { - "epoch": 0.2080026377146087, - "grad_norm": 0.8580342531204224, - "learning_rate": 1.963974337525903e-05, - "loss": 0.3583, - "step": 2208 - }, - { - "epoch": 0.20809684180777654, - "grad_norm": 0.7783285975456238, - "learning_rate": 1.9639341607708324e-05, - "loss": 0.3304, - "step": 2209 - }, - { - "epoch": 0.2081910459009444, - "grad_norm": 0.8820192813873291, - "learning_rate": 1.9638939620365724e-05, - "loss": 0.3695, - "step": 2210 - }, - { - "epoch": 0.20828524999411224, - "grad_norm": 0.960730791091919, - "learning_rate": 1.9638537413240395e-05, - "loss": 0.4213, - "step": 2211 - }, - { - "epoch": 0.2083794540872801, - "grad_norm": 1.086413860321045, - "learning_rate": 1.9638134986341502e-05, - "loss": 0.3326, - "step": 2212 - }, - { - "epoch": 0.20847365818044794, - "grad_norm": 0.9690501689910889, - "learning_rate": 1.9637732339678234e-05, - "loss": 0.36, - "step": 2213 - }, - { - "epoch": 0.2085678622736158, - "grad_norm": 0.7819095253944397, - "learning_rate": 1.9637329473259752e-05, - "loss": 0.3757, - "step": 2214 - }, - { - "epoch": 0.20866206636678364, - "grad_norm": 0.8166189193725586, - "learning_rate": 1.9636926387095262e-05, - "loss": 0.3661, - "step": 2215 - }, - { - "epoch": 0.2087562704599515, - "grad_norm": 0.9223487377166748, - "learning_rate": 1.9636523081193946e-05, - "loss": 0.3384, - "step": 2216 - }, - { - "epoch": 0.20885047455311934, - "grad_norm": 0.8755592107772827, - "learning_rate": 1.9636119555565002e-05, - "loss": 0.3726, - "step": 2217 - }, - { - "epoch": 0.20894467864628719, - "grad_norm": 1.0222097635269165, - "learning_rate": 1.9635715810217623e-05, - "loss": 0.3686, - "step": 2218 - }, - { - "epoch": 0.20903888273945503, - "grad_norm": 0.8661210536956787, - "learning_rate": 1.9635311845161026e-05, - "loss": 0.3841, - "step": 2219 - }, - { - "epoch": 0.20913308683262288, - "grad_norm": 1.0370057821273804, - "learning_rate": 1.963490766040442e-05, - "loss": 0.4071, - "step": 2220 - }, - { - "epoch": 0.20922729092579073, - "grad_norm": 0.8432251214981079, - "learning_rate": 1.9634503255957014e-05, - "loss": 0.3458, - "step": 2221 - }, - { - "epoch": 0.20932149501895858, - "grad_norm": 1.3770086765289307, - "learning_rate": 1.9634098631828035e-05, - "loss": 0.3868, - "step": 2222 - }, - { - "epoch": 0.20941569911212643, - "grad_norm": 0.7160378694534302, - "learning_rate": 1.963369378802671e-05, - "loss": 0.302, - "step": 2223 - }, - { - "epoch": 0.20950990320529428, - "grad_norm": 1.0085020065307617, - "learning_rate": 1.9633288724562264e-05, - "loss": 0.354, - "step": 2224 - }, - { - "epoch": 0.20960410729846213, - "grad_norm": 0.8698961138725281, - "learning_rate": 1.963288344144394e-05, - "loss": 0.3967, - "step": 2225 - }, - { - "epoch": 0.20969831139162998, - "grad_norm": 0.8173272609710693, - "learning_rate": 1.9632477938680976e-05, - "loss": 0.3707, - "step": 2226 - }, - { - "epoch": 0.20979251548479783, - "grad_norm": 0.9659057855606079, - "learning_rate": 1.9632072216282617e-05, - "loss": 0.3718, - "step": 2227 - }, - { - "epoch": 0.20988671957796567, - "grad_norm": 0.9052210450172424, - "learning_rate": 1.9631666274258115e-05, - "loss": 0.3932, - "step": 2228 - }, - { - "epoch": 0.20998092367113352, - "grad_norm": 0.7987163066864014, - "learning_rate": 1.9631260112616727e-05, - "loss": 0.3623, - "step": 2229 - }, - { - "epoch": 0.21007512776430137, - "grad_norm": 0.8844078183174133, - "learning_rate": 1.9630853731367715e-05, - "loss": 0.4013, - "step": 2230 - }, - { - "epoch": 0.2101693318574692, - "grad_norm": 1.1359078884124756, - "learning_rate": 1.963044713052034e-05, - "loss": 0.3896, - "step": 2231 - }, - { - "epoch": 0.21026353595063704, - "grad_norm": 0.7967735528945923, - "learning_rate": 1.9630040310083877e-05, - "loss": 0.37, - "step": 2232 - }, - { - "epoch": 0.2103577400438049, - "grad_norm": 1.0041298866271973, - "learning_rate": 1.96296332700676e-05, - "loss": 0.4169, - "step": 2233 - }, - { - "epoch": 0.21045194413697274, - "grad_norm": 0.8157963752746582, - "learning_rate": 1.96292260104808e-05, - "loss": 0.384, - "step": 2234 - }, - { - "epoch": 0.2105461482301406, - "grad_norm": 0.8149320483207703, - "learning_rate": 1.9628818531332746e-05, - "loss": 0.3826, - "step": 2235 - }, - { - "epoch": 0.21064035232330844, - "grad_norm": 0.8771508932113647, - "learning_rate": 1.9628410832632744e-05, - "loss": 0.3834, - "step": 2236 - }, - { - "epoch": 0.2107345564164763, - "grad_norm": 0.849044919013977, - "learning_rate": 1.9628002914390083e-05, - "loss": 0.4438, - "step": 2237 - }, - { - "epoch": 0.21082876050964414, - "grad_norm": 0.8716703057289124, - "learning_rate": 1.9627594776614065e-05, - "loss": 0.4003, - "step": 2238 - }, - { - "epoch": 0.21092296460281199, - "grad_norm": 0.8728709816932678, - "learning_rate": 1.9627186419313997e-05, - "loss": 0.3574, - "step": 2239 - }, - { - "epoch": 0.21101716869597983, - "grad_norm": 0.781938374042511, - "learning_rate": 1.962677784249919e-05, - "loss": 0.337, - "step": 2240 - }, - { - "epoch": 0.21111137278914768, - "grad_norm": 0.8320380449295044, - "learning_rate": 1.962636904617896e-05, - "loss": 0.3323, - "step": 2241 - }, - { - "epoch": 0.21120557688231553, - "grad_norm": 0.8517211675643921, - "learning_rate": 1.962596003036263e-05, - "loss": 0.3537, - "step": 2242 - }, - { - "epoch": 0.21129978097548338, - "grad_norm": 0.9021350741386414, - "learning_rate": 1.962555079505952e-05, - "loss": 0.3711, - "step": 2243 - }, - { - "epoch": 0.21139398506865123, - "grad_norm": 0.8758695721626282, - "learning_rate": 1.962514134027897e-05, - "loss": 0.3948, - "step": 2244 - }, - { - "epoch": 0.21148818916181908, - "grad_norm": 0.9356995820999146, - "learning_rate": 1.9624731666030307e-05, - "loss": 0.4062, - "step": 2245 - }, - { - "epoch": 0.21158239325498693, - "grad_norm": 0.8839170336723328, - "learning_rate": 1.962432177232288e-05, - "loss": 0.3863, - "step": 2246 - }, - { - "epoch": 0.21167659734815478, - "grad_norm": 0.8100844621658325, - "learning_rate": 1.9623911659166034e-05, - "loss": 0.3789, - "step": 2247 - }, - { - "epoch": 0.21177080144132263, - "grad_norm": 0.8421288728713989, - "learning_rate": 1.9623501326569117e-05, - "loss": 0.3697, - "step": 2248 - }, - { - "epoch": 0.21186500553449047, - "grad_norm": 0.898219883441925, - "learning_rate": 1.962309077454149e-05, - "loss": 0.3782, - "step": 2249 - }, - { - "epoch": 0.21195920962765832, - "grad_norm": 0.8538417816162109, - "learning_rate": 1.9622680003092503e-05, - "loss": 0.3519, - "step": 2250 - }, - { - "epoch": 0.21205341372082617, - "grad_norm": 0.905387282371521, - "learning_rate": 1.9622269012231537e-05, - "loss": 0.394, - "step": 2251 - }, - { - "epoch": 0.21214761781399402, - "grad_norm": 0.9117529392242432, - "learning_rate": 1.9621857801967957e-05, - "loss": 0.3622, - "step": 2252 - }, - { - "epoch": 0.21224182190716187, - "grad_norm": 0.9556719064712524, - "learning_rate": 1.9621446372311134e-05, - "loss": 0.4214, - "step": 2253 - }, - { - "epoch": 0.21233602600032972, - "grad_norm": 0.8761550784111023, - "learning_rate": 1.9621034723270456e-05, - "loss": 0.3746, - "step": 2254 - }, - { - "epoch": 0.21243023009349757, - "grad_norm": 0.9756516814231873, - "learning_rate": 1.9620622854855307e-05, - "loss": 0.3567, - "step": 2255 - }, - { - "epoch": 0.21252443418666542, - "grad_norm": 0.8791412115097046, - "learning_rate": 1.9620210767075076e-05, - "loss": 0.3617, - "step": 2256 - }, - { - "epoch": 0.21261863827983327, - "grad_norm": 0.8773496747016907, - "learning_rate": 1.9619798459939164e-05, - "loss": 0.4002, - "step": 2257 - }, - { - "epoch": 0.21271284237300112, - "grad_norm": 0.8872103095054626, - "learning_rate": 1.9619385933456972e-05, - "loss": 0.4032, - "step": 2258 - }, - { - "epoch": 0.21280704646616896, - "grad_norm": 0.9003176093101501, - "learning_rate": 1.9618973187637902e-05, - "loss": 0.386, - "step": 2259 - }, - { - "epoch": 0.2129012505593368, - "grad_norm": 0.8247572183609009, - "learning_rate": 1.9618560222491367e-05, - "loss": 0.3983, - "step": 2260 - }, - { - "epoch": 0.21299545465250466, - "grad_norm": 0.8347664475440979, - "learning_rate": 1.961814703802678e-05, - "loss": 0.379, - "step": 2261 - }, - { - "epoch": 0.2130896587456725, - "grad_norm": 1.040482997894287, - "learning_rate": 1.9617733634253572e-05, - "loss": 0.3926, - "step": 2262 - }, - { - "epoch": 0.21318386283884036, - "grad_norm": 0.7695727348327637, - "learning_rate": 1.961732001118116e-05, - "loss": 0.3633, - "step": 2263 - }, - { - "epoch": 0.2132780669320082, - "grad_norm": 0.9341619610786438, - "learning_rate": 1.9616906168818977e-05, - "loss": 0.3716, - "step": 2264 - }, - { - "epoch": 0.21337227102517603, - "grad_norm": 0.8412464261054993, - "learning_rate": 1.9616492107176464e-05, - "loss": 0.3665, - "step": 2265 - }, - { - "epoch": 0.21346647511834388, - "grad_norm": 0.8501750826835632, - "learning_rate": 1.9616077826263056e-05, - "loss": 0.3644, - "step": 2266 - }, - { - "epoch": 0.21356067921151173, - "grad_norm": 0.8177802562713623, - "learning_rate": 1.9615663326088204e-05, - "loss": 0.358, - "step": 2267 - }, - { - "epoch": 0.21365488330467958, - "grad_norm": 1.0078129768371582, - "learning_rate": 1.9615248606661358e-05, - "loss": 0.4614, - "step": 2268 - }, - { - "epoch": 0.21374908739784743, - "grad_norm": 0.8450458645820618, - "learning_rate": 1.961483366799197e-05, - "loss": 0.3841, - "step": 2269 - }, - { - "epoch": 0.21384329149101527, - "grad_norm": 1.0928857326507568, - "learning_rate": 1.9614418510089504e-05, - "loss": 0.3636, - "step": 2270 - }, - { - "epoch": 0.21393749558418312, - "grad_norm": 0.7948547005653381, - "learning_rate": 1.961400313296343e-05, - "loss": 0.3818, - "step": 2271 - }, - { - "epoch": 0.21403169967735097, - "grad_norm": 0.9856407046318054, - "learning_rate": 1.9613587536623218e-05, - "loss": 0.414, - "step": 2272 - }, - { - "epoch": 0.21412590377051882, - "grad_norm": 0.9461509585380554, - "learning_rate": 1.9613171721078336e-05, - "loss": 0.3476, - "step": 2273 - }, - { - "epoch": 0.21422010786368667, - "grad_norm": 0.8441481590270996, - "learning_rate": 1.9612755686338278e-05, - "loss": 0.4129, - "step": 2274 - }, - { - "epoch": 0.21431431195685452, - "grad_norm": 0.9468504190444946, - "learning_rate": 1.961233943241252e-05, - "loss": 0.3796, - "step": 2275 - }, - { - "epoch": 0.21440851605002237, - "grad_norm": 0.8112934231758118, - "learning_rate": 1.961192295931056e-05, - "loss": 0.3238, - "step": 2276 - }, - { - "epoch": 0.21450272014319022, - "grad_norm": 0.8712162971496582, - "learning_rate": 1.9611506267041892e-05, - "loss": 0.3982, - "step": 2277 - }, - { - "epoch": 0.21459692423635807, - "grad_norm": 0.8355295062065125, - "learning_rate": 1.9611089355616015e-05, - "loss": 0.3834, - "step": 2278 - }, - { - "epoch": 0.21469112832952592, - "grad_norm": 0.8796846270561218, - "learning_rate": 1.9610672225042436e-05, - "loss": 0.3615, - "step": 2279 - }, - { - "epoch": 0.21478533242269376, - "grad_norm": 0.8133159875869751, - "learning_rate": 1.9610254875330666e-05, - "loss": 0.3449, - "step": 2280 - }, - { - "epoch": 0.2148795365158616, - "grad_norm": 0.9933817386627197, - "learning_rate": 1.9609837306490223e-05, - "loss": 0.3976, - "step": 2281 - }, - { - "epoch": 0.21497374060902946, - "grad_norm": 0.8980244398117065, - "learning_rate": 1.9609419518530634e-05, - "loss": 0.3596, - "step": 2282 - }, - { - "epoch": 0.2150679447021973, - "grad_norm": 0.8660174608230591, - "learning_rate": 1.9609001511461412e-05, - "loss": 0.4017, - "step": 2283 - }, - { - "epoch": 0.21516214879536516, - "grad_norm": 0.8546890616416931, - "learning_rate": 1.9608583285292092e-05, - "loss": 0.3473, - "step": 2284 - }, - { - "epoch": 0.215256352888533, - "grad_norm": 0.8344088196754456, - "learning_rate": 1.960816484003222e-05, - "loss": 0.4092, - "step": 2285 - }, - { - "epoch": 0.21535055698170086, - "grad_norm": 0.7839269638061523, - "learning_rate": 1.9607746175691328e-05, - "loss": 0.3648, - "step": 2286 - }, - { - "epoch": 0.2154447610748687, - "grad_norm": 0.8078948855400085, - "learning_rate": 1.9607327292278966e-05, - "loss": 0.3477, - "step": 2287 - }, - { - "epoch": 0.21553896516803656, - "grad_norm": 0.9352668523788452, - "learning_rate": 1.960690818980468e-05, - "loss": 0.399, - "step": 2288 - }, - { - "epoch": 0.2156331692612044, - "grad_norm": 1.1535351276397705, - "learning_rate": 1.9606488868278034e-05, - "loss": 0.3995, - "step": 2289 - }, - { - "epoch": 0.21572737335437225, - "grad_norm": 0.8281784057617188, - "learning_rate": 1.960606932770858e-05, - "loss": 0.4028, - "step": 2290 - }, - { - "epoch": 0.2158215774475401, - "grad_norm": 1.015548825263977, - "learning_rate": 1.9605649568105894e-05, - "loss": 0.4305, - "step": 2291 - }, - { - "epoch": 0.21591578154070795, - "grad_norm": 1.4392837285995483, - "learning_rate": 1.960522958947954e-05, - "loss": 0.3371, - "step": 2292 - }, - { - "epoch": 0.2160099856338758, - "grad_norm": 0.7565015554428101, - "learning_rate": 1.96048093918391e-05, - "loss": 0.3277, - "step": 2293 - }, - { - "epoch": 0.21610418972704365, - "grad_norm": 0.9951907992362976, - "learning_rate": 1.9604388975194147e-05, - "loss": 0.4148, - "step": 2294 - }, - { - "epoch": 0.2161983938202115, - "grad_norm": 0.9554421305656433, - "learning_rate": 1.9603968339554275e-05, - "loss": 0.4156, - "step": 2295 - }, - { - "epoch": 0.21629259791337935, - "grad_norm": 0.9876624345779419, - "learning_rate": 1.960354748492907e-05, - "loss": 0.4328, - "step": 2296 - }, - { - "epoch": 0.2163868020065472, - "grad_norm": 0.9269981980323792, - "learning_rate": 1.960312641132813e-05, - "loss": 0.4169, - "step": 2297 - }, - { - "epoch": 0.21648100609971505, - "grad_norm": 1.119171142578125, - "learning_rate": 1.9602705118761055e-05, - "loss": 0.3629, - "step": 2298 - }, - { - "epoch": 0.2165752101928829, - "grad_norm": 0.882019579410553, - "learning_rate": 1.9602283607237456e-05, - "loss": 0.3466, - "step": 2299 - }, - { - "epoch": 0.21666941428605072, - "grad_norm": 0.9692645072937012, - "learning_rate": 1.960186187676694e-05, - "loss": 0.4182, - "step": 2300 - }, - { - "epoch": 0.21676361837921856, - "grad_norm": 0.9262769818305969, - "learning_rate": 1.9601439927359123e-05, - "loss": 0.3786, - "step": 2301 - }, - { - "epoch": 0.2168578224723864, - "grad_norm": 0.8466152548789978, - "learning_rate": 1.960101775902363e-05, - "loss": 0.3428, - "step": 2302 - }, - { - "epoch": 0.21695202656555426, - "grad_norm": 0.8468576073646545, - "learning_rate": 1.960059537177008e-05, - "loss": 0.3199, - "step": 2303 - }, - { - "epoch": 0.2170462306587221, - "grad_norm": 0.8485195636749268, - "learning_rate": 1.9600172765608106e-05, - "loss": 0.3474, - "step": 2304 - }, - { - "epoch": 0.21714043475188996, - "grad_norm": 0.9259114861488342, - "learning_rate": 1.959974994054735e-05, - "loss": 0.3489, - "step": 2305 - }, - { - "epoch": 0.2172346388450578, - "grad_norm": 0.882205605506897, - "learning_rate": 1.9599326896597448e-05, - "loss": 0.3951, - "step": 2306 - }, - { - "epoch": 0.21732884293822566, - "grad_norm": 0.8148806095123291, - "learning_rate": 1.9598903633768043e-05, - "loss": 0.3793, - "step": 2307 - }, - { - "epoch": 0.2174230470313935, - "grad_norm": 0.8608700037002563, - "learning_rate": 1.9598480152068795e-05, - "loss": 0.3707, - "step": 2308 - }, - { - "epoch": 0.21751725112456136, - "grad_norm": 0.9020053148269653, - "learning_rate": 1.9598056451509355e-05, - "loss": 0.3805, - "step": 2309 - }, - { - "epoch": 0.2176114552177292, - "grad_norm": 0.9641295075416565, - "learning_rate": 1.9597632532099383e-05, - "loss": 0.4584, - "step": 2310 - }, - { - "epoch": 0.21770565931089705, - "grad_norm": 0.7809056639671326, - "learning_rate": 1.9597208393848546e-05, - "loss": 0.3412, - "step": 2311 - }, - { - "epoch": 0.2177998634040649, - "grad_norm": 0.8260183930397034, - "learning_rate": 1.9596784036766517e-05, - "loss": 0.3251, - "step": 2312 - }, - { - "epoch": 0.21789406749723275, - "grad_norm": 1.05546236038208, - "learning_rate": 1.959635946086297e-05, - "loss": 0.3328, - "step": 2313 - }, - { - "epoch": 0.2179882715904006, - "grad_norm": 1.0210241079330444, - "learning_rate": 1.9595934666147584e-05, - "loss": 0.4393, - "step": 2314 - }, - { - "epoch": 0.21808247568356845, - "grad_norm": 0.8821229338645935, - "learning_rate": 1.959550965263005e-05, - "loss": 0.3898, - "step": 2315 - }, - { - "epoch": 0.2181766797767363, - "grad_norm": 1.152212381362915, - "learning_rate": 1.9595084420320054e-05, - "loss": 0.3787, - "step": 2316 - }, - { - "epoch": 0.21827088386990415, - "grad_norm": 0.8634756207466125, - "learning_rate": 1.9594658969227295e-05, - "loss": 0.4109, - "step": 2317 - }, - { - "epoch": 0.218365087963072, - "grad_norm": 1.010636568069458, - "learning_rate": 1.959423329936147e-05, - "loss": 0.4071, - "step": 2318 - }, - { - "epoch": 0.21845929205623985, - "grad_norm": 0.9700372815132141, - "learning_rate": 1.959380741073229e-05, - "loss": 0.3638, - "step": 2319 - }, - { - "epoch": 0.2185534961494077, - "grad_norm": 0.8460371494293213, - "learning_rate": 1.9593381303349467e-05, - "loss": 0.324, - "step": 2320 - }, - { - "epoch": 0.21864770024257554, - "grad_norm": 0.9131547212600708, - "learning_rate": 1.9592954977222712e-05, - "loss": 0.3414, - "step": 2321 - }, - { - "epoch": 0.2187419043357434, - "grad_norm": 1.0337926149368286, - "learning_rate": 1.959252843236175e-05, - "loss": 0.4075, - "step": 2322 - }, - { - "epoch": 0.21883610842891124, - "grad_norm": 0.7619706988334656, - "learning_rate": 1.95921016687763e-05, - "loss": 0.3393, - "step": 2323 - }, - { - "epoch": 0.2189303125220791, - "grad_norm": 0.8029950857162476, - "learning_rate": 1.95916746864761e-05, - "loss": 0.419, - "step": 2324 - }, - { - "epoch": 0.21902451661524694, - "grad_norm": 1.0024975538253784, - "learning_rate": 1.959124748547088e-05, - "loss": 0.427, - "step": 2325 - }, - { - "epoch": 0.2191187207084148, - "grad_norm": 1.0406956672668457, - "learning_rate": 1.9590820065770387e-05, - "loss": 0.3005, - "step": 2326 - }, - { - "epoch": 0.21921292480158264, - "grad_norm": 0.8180007934570312, - "learning_rate": 1.9590392427384364e-05, - "loss": 0.3335, - "step": 2327 - }, - { - "epoch": 0.21930712889475049, - "grad_norm": 0.9022271037101746, - "learning_rate": 1.9589964570322562e-05, - "loss": 0.402, - "step": 2328 - }, - { - "epoch": 0.21940133298791833, - "grad_norm": 0.8517004251480103, - "learning_rate": 1.958953649459474e-05, - "loss": 0.4099, - "step": 2329 - }, - { - "epoch": 0.21949553708108618, - "grad_norm": 0.8223914504051208, - "learning_rate": 1.958910820021065e-05, - "loss": 0.3771, - "step": 2330 - }, - { - "epoch": 0.21958974117425403, - "grad_norm": 0.9121590256690979, - "learning_rate": 1.9588679687180066e-05, - "loss": 0.3847, - "step": 2331 - }, - { - "epoch": 0.21968394526742188, - "grad_norm": 0.9172884225845337, - "learning_rate": 1.9588250955512755e-05, - "loss": 0.3553, - "step": 2332 - }, - { - "epoch": 0.21977814936058973, - "grad_norm": 1.0046167373657227, - "learning_rate": 1.9587822005218495e-05, - "loss": 0.4066, - "step": 2333 - }, - { - "epoch": 0.21987235345375755, - "grad_norm": 0.9230954051017761, - "learning_rate": 1.9587392836307064e-05, - "loss": 0.3668, - "step": 2334 - }, - { - "epoch": 0.2199665575469254, - "grad_norm": 1.1170899868011475, - "learning_rate": 1.9586963448788247e-05, - "loss": 0.3554, - "step": 2335 - }, - { - "epoch": 0.22006076164009325, - "grad_norm": 0.8206403255462646, - "learning_rate": 1.958653384267184e-05, - "loss": 0.3599, - "step": 2336 - }, - { - "epoch": 0.2201549657332611, - "grad_norm": 0.7312343716621399, - "learning_rate": 1.9586104017967632e-05, - "loss": 0.3199, - "step": 2337 - }, - { - "epoch": 0.22024916982642895, - "grad_norm": 0.8107249736785889, - "learning_rate": 1.958567397468543e-05, - "loss": 0.3333, - "step": 2338 - }, - { - "epoch": 0.2203433739195968, - "grad_norm": 0.8425604701042175, - "learning_rate": 1.958524371283504e-05, - "loss": 0.411, - "step": 2339 - }, - { - "epoch": 0.22043757801276465, - "grad_norm": 0.8197388052940369, - "learning_rate": 1.9584813232426266e-05, - "loss": 0.3963, - "step": 2340 - }, - { - "epoch": 0.2205317821059325, - "grad_norm": 0.9427674412727356, - "learning_rate": 1.9584382533468925e-05, - "loss": 0.3883, - "step": 2341 - }, - { - "epoch": 0.22062598619910034, - "grad_norm": 1.010420560836792, - "learning_rate": 1.9583951615972842e-05, - "loss": 0.4528, - "step": 2342 - }, - { - "epoch": 0.2207201902922682, - "grad_norm": 0.8485791683197021, - "learning_rate": 1.958352047994784e-05, - "loss": 0.3837, - "step": 2343 - }, - { - "epoch": 0.22081439438543604, - "grad_norm": 0.8851433992385864, - "learning_rate": 1.9583089125403752e-05, - "loss": 0.4084, - "step": 2344 - }, - { - "epoch": 0.2209085984786039, - "grad_norm": 0.7958040833473206, - "learning_rate": 1.958265755235041e-05, - "loss": 0.3346, - "step": 2345 - }, - { - "epoch": 0.22100280257177174, - "grad_norm": 0.8419966101646423, - "learning_rate": 1.9582225760797657e-05, - "loss": 0.3399, - "step": 2346 - }, - { - "epoch": 0.2210970066649396, - "grad_norm": 0.8404862284660339, - "learning_rate": 1.9581793750755334e-05, - "loss": 0.356, - "step": 2347 - }, - { - "epoch": 0.22119121075810744, - "grad_norm": 0.9682923555374146, - "learning_rate": 1.9581361522233297e-05, - "loss": 0.3622, - "step": 2348 - }, - { - "epoch": 0.22128541485127529, - "grad_norm": 0.8320052027702332, - "learning_rate": 1.9580929075241398e-05, - "loss": 0.3926, - "step": 2349 - }, - { - "epoch": 0.22137961894444314, - "grad_norm": 0.9958815574645996, - "learning_rate": 1.95804964097895e-05, - "loss": 0.4735, - "step": 2350 - }, - { - "epoch": 0.22147382303761098, - "grad_norm": 0.8752058744430542, - "learning_rate": 1.958006352588747e-05, - "loss": 0.3749, - "step": 2351 - }, - { - "epoch": 0.22156802713077883, - "grad_norm": 1.0377271175384521, - "learning_rate": 1.957963042354517e-05, - "loss": 0.3795, - "step": 2352 - }, - { - "epoch": 0.22166223122394668, - "grad_norm": 0.8700814843177795, - "learning_rate": 1.957919710277249e-05, - "loss": 0.3786, - "step": 2353 - }, - { - "epoch": 0.22175643531711453, - "grad_norm": 0.9008076190948486, - "learning_rate": 1.9578763563579292e-05, - "loss": 0.4087, - "step": 2354 - }, - { - "epoch": 0.22185063941028238, - "grad_norm": 0.8733604550361633, - "learning_rate": 1.9578329805975476e-05, - "loss": 0.3679, - "step": 2355 - }, - { - "epoch": 0.22194484350345023, - "grad_norm": 0.82168048620224, - "learning_rate": 1.9577895829970926e-05, - "loss": 0.3823, - "step": 2356 - }, - { - "epoch": 0.22203904759661808, - "grad_norm": 1.019991159439087, - "learning_rate": 1.9577461635575536e-05, - "loss": 0.3931, - "step": 2357 - }, - { - "epoch": 0.22213325168978593, - "grad_norm": 0.8327137231826782, - "learning_rate": 1.9577027222799213e-05, - "loss": 0.3533, - "step": 2358 - }, - { - "epoch": 0.22222745578295378, - "grad_norm": 0.8154199123382568, - "learning_rate": 1.9576592591651856e-05, - "loss": 0.3744, - "step": 2359 - }, - { - "epoch": 0.22232165987612162, - "grad_norm": 0.7728456258773804, - "learning_rate": 1.9576157742143377e-05, - "loss": 0.3196, - "step": 2360 - }, - { - "epoch": 0.22241586396928947, - "grad_norm": 0.903174102306366, - "learning_rate": 1.9575722674283695e-05, - "loss": 0.4026, - "step": 2361 - }, - { - "epoch": 0.22251006806245732, - "grad_norm": 0.8509536981582642, - "learning_rate": 1.9575287388082724e-05, - "loss": 0.3459, - "step": 2362 - }, - { - "epoch": 0.22260427215562517, - "grad_norm": 1.0349715948104858, - "learning_rate": 1.9574851883550395e-05, - "loss": 0.418, - "step": 2363 - }, - { - "epoch": 0.22269847624879302, - "grad_norm": 0.9048078656196594, - "learning_rate": 1.957441616069663e-05, - "loss": 0.3945, - "step": 2364 - }, - { - "epoch": 0.22279268034196087, - "grad_norm": 0.8549168705940247, - "learning_rate": 1.9573980219531375e-05, - "loss": 0.362, - "step": 2365 - }, - { - "epoch": 0.22288688443512872, - "grad_norm": 0.7860109806060791, - "learning_rate": 1.9573544060064562e-05, - "loss": 0.3395, - "step": 2366 - }, - { - "epoch": 0.22298108852829657, - "grad_norm": 1.1166032552719116, - "learning_rate": 1.9573107682306137e-05, - "loss": 0.3713, - "step": 2367 - }, - { - "epoch": 0.22307529262146442, - "grad_norm": 0.803830623626709, - "learning_rate": 1.9572671086266054e-05, - "loss": 0.3673, - "step": 2368 - }, - { - "epoch": 0.22316949671463224, - "grad_norm": 0.8809511661529541, - "learning_rate": 1.9572234271954268e-05, - "loss": 0.3921, - "step": 2369 - }, - { - "epoch": 0.2232637008078001, - "grad_norm": 0.8397429585456848, - "learning_rate": 1.9571797239380733e-05, - "loss": 0.359, - "step": 2370 - }, - { - "epoch": 0.22335790490096794, - "grad_norm": 1.1121681928634644, - "learning_rate": 1.9571359988555417e-05, - "loss": 0.4321, - "step": 2371 - }, - { - "epoch": 0.22345210899413578, - "grad_norm": 0.8415611386299133, - "learning_rate": 1.9570922519488294e-05, - "loss": 0.3582, - "step": 2372 - }, - { - "epoch": 0.22354631308730363, - "grad_norm": 0.8948222398757935, - "learning_rate": 1.9570484832189333e-05, - "loss": 0.3981, - "step": 2373 - }, - { - "epoch": 0.22364051718047148, - "grad_norm": 0.9170864224433899, - "learning_rate": 1.957004692666852e-05, - "loss": 0.4195, - "step": 2374 - }, - { - "epoch": 0.22373472127363933, - "grad_norm": 0.8808520436286926, - "learning_rate": 1.9569608802935834e-05, - "loss": 0.3551, - "step": 2375 - }, - { - "epoch": 0.22382892536680718, - "grad_norm": 0.8277885317802429, - "learning_rate": 1.956917046100127e-05, - "loss": 0.3425, - "step": 2376 - }, - { - "epoch": 0.22392312945997503, - "grad_norm": 0.787257730960846, - "learning_rate": 1.956873190087482e-05, - "loss": 0.3633, - "step": 2377 - }, - { - "epoch": 0.22401733355314288, - "grad_norm": 0.9543466567993164, - "learning_rate": 1.9568293122566485e-05, - "loss": 0.3769, - "step": 2378 - }, - { - "epoch": 0.22411153764631073, - "grad_norm": 0.8029462695121765, - "learning_rate": 1.9567854126086265e-05, - "loss": 0.3813, - "step": 2379 - }, - { - "epoch": 0.22420574173947858, - "grad_norm": 0.8587418794631958, - "learning_rate": 1.956741491144418e-05, - "loss": 0.413, - "step": 2380 - }, - { - "epoch": 0.22429994583264642, - "grad_norm": 0.8977055549621582, - "learning_rate": 1.9566975478650234e-05, - "loss": 0.3904, - "step": 2381 - }, - { - "epoch": 0.22439414992581427, - "grad_norm": 0.8071858882904053, - "learning_rate": 1.9566535827714452e-05, - "loss": 0.3345, - "step": 2382 - }, - { - "epoch": 0.22448835401898212, - "grad_norm": 0.8893081545829773, - "learning_rate": 1.956609595864686e-05, - "loss": 0.3728, - "step": 2383 - }, - { - "epoch": 0.22458255811214997, - "grad_norm": 1.0501766204833984, - "learning_rate": 1.9565655871457486e-05, - "loss": 0.327, - "step": 2384 - }, - { - "epoch": 0.22467676220531782, - "grad_norm": 0.9032719731330872, - "learning_rate": 1.9565215566156363e-05, - "loss": 0.4021, - "step": 2385 - }, - { - "epoch": 0.22477096629848567, - "grad_norm": 0.9261976480484009, - "learning_rate": 1.956477504275353e-05, - "loss": 0.4339, - "step": 2386 - }, - { - "epoch": 0.22486517039165352, - "grad_norm": 0.8292469382286072, - "learning_rate": 1.956433430125904e-05, - "loss": 0.3286, - "step": 2387 - }, - { - "epoch": 0.22495937448482137, - "grad_norm": 0.8598193526268005, - "learning_rate": 1.9563893341682935e-05, - "loss": 0.4096, - "step": 2388 - }, - { - "epoch": 0.22505357857798922, - "grad_norm": 0.8294149041175842, - "learning_rate": 1.9563452164035268e-05, - "loss": 0.3673, - "step": 2389 - }, - { - "epoch": 0.22514778267115707, - "grad_norm": 0.8138467669487, - "learning_rate": 1.95630107683261e-05, - "loss": 0.3452, - "step": 2390 - }, - { - "epoch": 0.22524198676432491, - "grad_norm": 0.9328336715698242, - "learning_rate": 1.95625691545655e-05, - "loss": 0.4335, - "step": 2391 - }, - { - "epoch": 0.22533619085749276, - "grad_norm": 0.8273537755012512, - "learning_rate": 1.956212732276353e-05, - "loss": 0.4029, - "step": 2392 - }, - { - "epoch": 0.2254303949506606, - "grad_norm": 0.9863900542259216, - "learning_rate": 1.9561685272930274e-05, - "loss": 0.4171, - "step": 2393 - }, - { - "epoch": 0.22552459904382846, - "grad_norm": 0.8728018403053284, - "learning_rate": 1.9561243005075807e-05, - "loss": 0.3981, - "step": 2394 - }, - { - "epoch": 0.2256188031369963, - "grad_norm": 0.9171895384788513, - "learning_rate": 1.956080051921021e-05, - "loss": 0.3948, - "step": 2395 - }, - { - "epoch": 0.22571300723016416, - "grad_norm": 0.7338156700134277, - "learning_rate": 1.9560357815343577e-05, - "loss": 0.3817, - "step": 2396 - }, - { - "epoch": 0.225807211323332, - "grad_norm": 0.8601246476173401, - "learning_rate": 1.9559914893486e-05, - "loss": 0.368, - "step": 2397 - }, - { - "epoch": 0.22590141541649986, - "grad_norm": 0.9178417921066284, - "learning_rate": 1.9559471753647577e-05, - "loss": 0.396, - "step": 2398 - }, - { - "epoch": 0.2259956195096677, - "grad_norm": 0.9245316386222839, - "learning_rate": 1.955902839583842e-05, - "loss": 0.4357, - "step": 2399 - }, - { - "epoch": 0.22608982360283555, - "grad_norm": 0.8712064027786255, - "learning_rate": 1.9558584820068625e-05, - "loss": 0.3908, - "step": 2400 - }, - { - "epoch": 0.2261840276960034, - "grad_norm": 0.8233629465103149, - "learning_rate": 1.9558141026348316e-05, - "loss": 0.3785, - "step": 2401 - }, - { - "epoch": 0.22627823178917125, - "grad_norm": 1.1006391048431396, - "learning_rate": 1.9557697014687608e-05, - "loss": 0.4707, - "step": 2402 - }, - { - "epoch": 0.2263724358823391, - "grad_norm": 0.7773765921592712, - "learning_rate": 1.955725278509663e-05, - "loss": 0.3321, - "step": 2403 - }, - { - "epoch": 0.22646663997550692, - "grad_norm": 0.8021467328071594, - "learning_rate": 1.9556808337585504e-05, - "loss": 0.3743, - "step": 2404 - }, - { - "epoch": 0.22656084406867477, - "grad_norm": 0.8988116979598999, - "learning_rate": 1.955636367216437e-05, - "loss": 0.3615, - "step": 2405 - }, - { - "epoch": 0.22665504816184262, - "grad_norm": 0.9378819465637207, - "learning_rate": 1.9555918788843366e-05, - "loss": 0.3699, - "step": 2406 - }, - { - "epoch": 0.22674925225501047, - "grad_norm": 0.9987820386886597, - "learning_rate": 1.9555473687632638e-05, - "loss": 0.4506, - "step": 2407 - }, - { - "epoch": 0.22684345634817832, - "grad_norm": 0.7900689244270325, - "learning_rate": 1.955502836854233e-05, - "loss": 0.3586, - "step": 2408 - }, - { - "epoch": 0.22693766044134617, - "grad_norm": 0.8035356998443604, - "learning_rate": 1.9554582831582594e-05, - "loss": 0.3441, - "step": 2409 - }, - { - "epoch": 0.22703186453451402, - "grad_norm": 0.9192503690719604, - "learning_rate": 1.9554137076763595e-05, - "loss": 0.4067, - "step": 2410 - }, - { - "epoch": 0.22712606862768187, - "grad_norm": 0.8710134029388428, - "learning_rate": 1.9553691104095495e-05, - "loss": 0.4135, - "step": 2411 - }, - { - "epoch": 0.22722027272084971, - "grad_norm": 0.8533443808555603, - "learning_rate": 1.9553244913588464e-05, - "loss": 0.3806, - "step": 2412 - }, - { - "epoch": 0.22731447681401756, - "grad_norm": 0.9453597664833069, - "learning_rate": 1.9552798505252674e-05, - "loss": 0.4078, - "step": 2413 - }, - { - "epoch": 0.2274086809071854, - "grad_norm": 0.8405705094337463, - "learning_rate": 1.9552351879098306e-05, - "loss": 0.4063, - "step": 2414 - }, - { - "epoch": 0.22750288500035326, - "grad_norm": 0.9767290353775024, - "learning_rate": 1.9551905035135542e-05, - "loss": 0.4363, - "step": 2415 - }, - { - "epoch": 0.2275970890935211, - "grad_norm": 0.9663323163986206, - "learning_rate": 1.9551457973374565e-05, - "loss": 0.4438, - "step": 2416 - }, - { - "epoch": 0.22769129318668896, - "grad_norm": 0.7720850110054016, - "learning_rate": 1.9551010693825582e-05, - "loss": 0.3287, - "step": 2417 - }, - { - "epoch": 0.2277854972798568, - "grad_norm": 0.8272558450698853, - "learning_rate": 1.955056319649878e-05, - "loss": 0.3713, - "step": 2418 - }, - { - "epoch": 0.22787970137302466, - "grad_norm": 0.8968222737312317, - "learning_rate": 1.9550115481404368e-05, - "loss": 0.4241, - "step": 2419 - }, - { - "epoch": 0.2279739054661925, - "grad_norm": 0.9261634349822998, - "learning_rate": 1.9549667548552557e-05, - "loss": 0.389, - "step": 2420 - }, - { - "epoch": 0.22806810955936035, - "grad_norm": 0.9221539497375488, - "learning_rate": 1.9549219397953552e-05, - "loss": 0.3776, - "step": 2421 - }, - { - "epoch": 0.2281623136525282, - "grad_norm": 0.8815743327140808, - "learning_rate": 1.9548771029617577e-05, - "loss": 0.3661, - "step": 2422 - }, - { - "epoch": 0.22825651774569605, - "grad_norm": 0.842648983001709, - "learning_rate": 1.9548322443554855e-05, - "loss": 0.3549, - "step": 2423 - }, - { - "epoch": 0.2283507218388639, - "grad_norm": 0.8210880160331726, - "learning_rate": 1.9547873639775616e-05, - "loss": 0.3738, - "step": 2424 - }, - { - "epoch": 0.22844492593203175, - "grad_norm": 0.8543195724487305, - "learning_rate": 1.9547424618290095e-05, - "loss": 0.3679, - "step": 2425 - }, - { - "epoch": 0.2285391300251996, - "grad_norm": 1.115783452987671, - "learning_rate": 1.954697537910852e-05, - "loss": 0.3838, - "step": 2426 - }, - { - "epoch": 0.22863333411836745, - "grad_norm": 0.866740345954895, - "learning_rate": 1.9546525922241147e-05, - "loss": 0.3688, - "step": 2427 - }, - { - "epoch": 0.2287275382115353, - "grad_norm": 0.7821134328842163, - "learning_rate": 1.9546076247698218e-05, - "loss": 0.3072, - "step": 2428 - }, - { - "epoch": 0.22882174230470315, - "grad_norm": 0.9816327095031738, - "learning_rate": 1.9545626355489986e-05, - "loss": 0.386, - "step": 2429 - }, - { - "epoch": 0.228915946397871, - "grad_norm": 0.8954646587371826, - "learning_rate": 1.9545176245626715e-05, - "loss": 0.3639, - "step": 2430 - }, - { - "epoch": 0.22901015049103884, - "grad_norm": 0.8879236578941345, - "learning_rate": 1.954472591811866e-05, - "loss": 0.385, - "step": 2431 - }, - { - "epoch": 0.2291043545842067, - "grad_norm": 0.8517841696739197, - "learning_rate": 1.954427537297609e-05, - "loss": 0.3821, - "step": 2432 - }, - { - "epoch": 0.22919855867737454, - "grad_norm": 0.8577247262001038, - "learning_rate": 1.9543824610209284e-05, - "loss": 0.4077, - "step": 2433 - }, - { - "epoch": 0.2292927627705424, - "grad_norm": 1.0095113515853882, - "learning_rate": 1.954337362982852e-05, - "loss": 0.4262, - "step": 2434 - }, - { - "epoch": 0.22938696686371024, - "grad_norm": 0.8891860246658325, - "learning_rate": 1.9542922431844074e-05, - "loss": 0.3888, - "step": 2435 - }, - { - "epoch": 0.2294811709568781, - "grad_norm": 0.7986401319503784, - "learning_rate": 1.9542471016266236e-05, - "loss": 0.3206, - "step": 2436 - }, - { - "epoch": 0.22957537505004594, - "grad_norm": 0.8828191161155701, - "learning_rate": 1.9542019383105306e-05, - "loss": 0.3792, - "step": 2437 - }, - { - "epoch": 0.22966957914321376, - "grad_norm": 0.873891294002533, - "learning_rate": 1.9541567532371577e-05, - "loss": 0.393, - "step": 2438 - }, - { - "epoch": 0.2297637832363816, - "grad_norm": 0.7492256760597229, - "learning_rate": 1.954111546407535e-05, - "loss": 0.3247, - "step": 2439 - }, - { - "epoch": 0.22985798732954946, - "grad_norm": 0.8440161943435669, - "learning_rate": 1.9540663178226936e-05, - "loss": 0.3894, - "step": 2440 - }, - { - "epoch": 0.2299521914227173, - "grad_norm": 0.828023374080658, - "learning_rate": 1.9540210674836645e-05, - "loss": 0.3587, - "step": 2441 - }, - { - "epoch": 0.23004639551588515, - "grad_norm": 0.8087053298950195, - "learning_rate": 1.9539757953914798e-05, - "loss": 0.3629, - "step": 2442 - }, - { - "epoch": 0.230140599609053, - "grad_norm": 0.8228766918182373, - "learning_rate": 1.9539305015471716e-05, - "loss": 0.3271, - "step": 2443 - }, - { - "epoch": 0.23023480370222085, - "grad_norm": 0.9928690791130066, - "learning_rate": 1.9538851859517728e-05, - "loss": 0.4028, - "step": 2444 - }, - { - "epoch": 0.2303290077953887, - "grad_norm": 0.8397461175918579, - "learning_rate": 1.9538398486063162e-05, - "loss": 0.3758, - "step": 2445 - }, - { - "epoch": 0.23042321188855655, - "grad_norm": 0.9226630926132202, - "learning_rate": 1.9537944895118362e-05, - "loss": 0.3873, - "step": 2446 - }, - { - "epoch": 0.2305174159817244, - "grad_norm": 0.864921510219574, - "learning_rate": 1.953749108669367e-05, - "loss": 0.3904, - "step": 2447 - }, - { - "epoch": 0.23061162007489225, - "grad_norm": 0.9132973551750183, - "learning_rate": 1.9537037060799423e-05, - "loss": 0.4133, - "step": 2448 - }, - { - "epoch": 0.2307058241680601, - "grad_norm": 0.8780419230461121, - "learning_rate": 1.9536582817445988e-05, - "loss": 0.4172, - "step": 2449 - }, - { - "epoch": 0.23080002826122795, - "grad_norm": 0.7681748270988464, - "learning_rate": 1.9536128356643715e-05, - "loss": 0.3133, - "step": 2450 - }, - { - "epoch": 0.2308942323543958, - "grad_norm": 0.7827266454696655, - "learning_rate": 1.9535673678402968e-05, - "loss": 0.3512, - "step": 2451 - }, - { - "epoch": 0.23098843644756364, - "grad_norm": 0.8806381225585938, - "learning_rate": 1.9535218782734112e-05, - "loss": 0.3926, - "step": 2452 - }, - { - "epoch": 0.2310826405407315, - "grad_norm": 0.8704474568367004, - "learning_rate": 1.9534763669647524e-05, - "loss": 0.3953, - "step": 2453 - }, - { - "epoch": 0.23117684463389934, - "grad_norm": 0.9033093452453613, - "learning_rate": 1.9534308339153573e-05, - "loss": 0.3794, - "step": 2454 - }, - { - "epoch": 0.2312710487270672, - "grad_norm": 0.8191359639167786, - "learning_rate": 1.9533852791262653e-05, - "loss": 0.4014, - "step": 2455 - }, - { - "epoch": 0.23136525282023504, - "grad_norm": 0.8146197199821472, - "learning_rate": 1.953339702598514e-05, - "loss": 0.3552, - "step": 2456 - }, - { - "epoch": 0.2314594569134029, - "grad_norm": 1.0001959800720215, - "learning_rate": 1.9532941043331437e-05, - "loss": 0.3923, - "step": 2457 - }, - { - "epoch": 0.23155366100657074, - "grad_norm": 0.9717074632644653, - "learning_rate": 1.953248484331193e-05, - "loss": 0.4005, - "step": 2458 - }, - { - "epoch": 0.2316478650997386, - "grad_norm": 0.9192183017730713, - "learning_rate": 1.9532028425937028e-05, - "loss": 0.4112, - "step": 2459 - }, - { - "epoch": 0.23174206919290644, - "grad_norm": 0.9282395243644714, - "learning_rate": 1.9531571791217137e-05, - "loss": 0.2936, - "step": 2460 - }, - { - "epoch": 0.23183627328607428, - "grad_norm": 0.8893698453903198, - "learning_rate": 1.953111493916267e-05, - "loss": 0.3518, - "step": 2461 - }, - { - "epoch": 0.23193047737924213, - "grad_norm": 0.9180042147636414, - "learning_rate": 1.953065786978404e-05, - "loss": 0.398, - "step": 2462 - }, - { - "epoch": 0.23202468147240998, - "grad_norm": 0.9521582722663879, - "learning_rate": 1.953020058309167e-05, - "loss": 0.407, - "step": 2463 - }, - { - "epoch": 0.23211888556557783, - "grad_norm": 0.835546612739563, - "learning_rate": 1.9529743079095992e-05, - "loss": 0.3468, - "step": 2464 - }, - { - "epoch": 0.23221308965874568, - "grad_norm": 0.8486538529396057, - "learning_rate": 1.9529285357807427e-05, - "loss": 0.3822, - "step": 2465 - }, - { - "epoch": 0.23230729375191353, - "grad_norm": 0.9088033437728882, - "learning_rate": 1.9528827419236425e-05, - "loss": 0.3657, - "step": 2466 - }, - { - "epoch": 0.23240149784508138, - "grad_norm": 0.9178118109703064, - "learning_rate": 1.952836926339342e-05, - "loss": 0.3661, - "step": 2467 - }, - { - "epoch": 0.23249570193824923, - "grad_norm": 0.7571852803230286, - "learning_rate": 1.9527910890288857e-05, - "loss": 0.3395, - "step": 2468 - }, - { - "epoch": 0.23258990603141708, - "grad_norm": 0.8834770917892456, - "learning_rate": 1.9527452299933192e-05, - "loss": 0.3964, - "step": 2469 - }, - { - "epoch": 0.23268411012458493, - "grad_norm": 0.9025447964668274, - "learning_rate": 1.952699349233688e-05, - "loss": 0.3745, - "step": 2470 - }, - { - "epoch": 0.23277831421775277, - "grad_norm": 0.8389468789100647, - "learning_rate": 1.9526534467510382e-05, - "loss": 0.3361, - "step": 2471 - }, - { - "epoch": 0.23287251831092062, - "grad_norm": 0.9149181246757507, - "learning_rate": 1.9526075225464166e-05, - "loss": 0.3805, - "step": 2472 - }, - { - "epoch": 0.23296672240408844, - "grad_norm": 0.9167463779449463, - "learning_rate": 1.9525615766208704e-05, - "loss": 0.3896, - "step": 2473 - }, - { - "epoch": 0.2330609264972563, - "grad_norm": 0.9016802906990051, - "learning_rate": 1.9525156089754468e-05, - "loss": 0.3993, - "step": 2474 - }, - { - "epoch": 0.23315513059042414, - "grad_norm": 0.8447569608688354, - "learning_rate": 1.9524696196111944e-05, - "loss": 0.364, - "step": 2475 - }, - { - "epoch": 0.233249334683592, - "grad_norm": 0.8921357989311218, - "learning_rate": 1.9524236085291614e-05, - "loss": 0.4126, - "step": 2476 - }, - { - "epoch": 0.23334353877675984, - "grad_norm": 0.9665382504463196, - "learning_rate": 1.9523775757303975e-05, - "loss": 0.4028, - "step": 2477 - }, - { - "epoch": 0.2334377428699277, - "grad_norm": 0.8849314451217651, - "learning_rate": 1.9523315212159518e-05, - "loss": 0.3623, - "step": 2478 - }, - { - "epoch": 0.23353194696309554, - "grad_norm": 0.9236788749694824, - "learning_rate": 1.952285444986875e-05, - "loss": 0.4164, - "step": 2479 - }, - { - "epoch": 0.2336261510562634, - "grad_norm": 0.9704916477203369, - "learning_rate": 1.9522393470442165e-05, - "loss": 0.3789, - "step": 2480 - }, - { - "epoch": 0.23372035514943124, - "grad_norm": 0.9800971746444702, - "learning_rate": 1.952193227389029e-05, - "loss": 0.4098, - "step": 2481 - }, - { - "epoch": 0.23381455924259908, - "grad_norm": 0.8776868581771851, - "learning_rate": 1.9521470860223633e-05, - "loss": 0.3835, - "step": 2482 - }, - { - "epoch": 0.23390876333576693, - "grad_norm": 0.7817308902740479, - "learning_rate": 1.952100922945271e-05, - "loss": 0.3545, - "step": 2483 - }, - { - "epoch": 0.23400296742893478, - "grad_norm": 0.8747748732566833, - "learning_rate": 1.9520547381588054e-05, - "loss": 0.3784, - "step": 2484 - }, - { - "epoch": 0.23409717152210263, - "grad_norm": 0.8949740529060364, - "learning_rate": 1.9520085316640196e-05, - "loss": 0.3799, - "step": 2485 - }, - { - "epoch": 0.23419137561527048, - "grad_norm": 0.8536345362663269, - "learning_rate": 1.9519623034619668e-05, - "loss": 0.4348, - "step": 2486 - }, - { - "epoch": 0.23428557970843833, - "grad_norm": 0.8350498080253601, - "learning_rate": 1.9519160535537013e-05, - "loss": 0.4019, - "step": 2487 - }, - { - "epoch": 0.23437978380160618, - "grad_norm": 0.7977070212364197, - "learning_rate": 1.9518697819402775e-05, - "loss": 0.3593, - "step": 2488 - }, - { - "epoch": 0.23447398789477403, - "grad_norm": 0.8070759177207947, - "learning_rate": 1.9518234886227505e-05, - "loss": 0.3255, - "step": 2489 - }, - { - "epoch": 0.23456819198794188, - "grad_norm": 0.9064916968345642, - "learning_rate": 1.9517771736021763e-05, - "loss": 0.357, - "step": 2490 - }, - { - "epoch": 0.23466239608110973, - "grad_norm": 0.9692730903625488, - "learning_rate": 1.95173083687961e-05, - "loss": 0.4252, - "step": 2491 - }, - { - "epoch": 0.23475660017427757, - "grad_norm": 0.8616840839385986, - "learning_rate": 1.9516844784561092e-05, - "loss": 0.3795, - "step": 2492 - }, - { - "epoch": 0.23485080426744542, - "grad_norm": 0.8935884833335876, - "learning_rate": 1.9516380983327305e-05, - "loss": 0.3543, - "step": 2493 - }, - { - "epoch": 0.23494500836061327, - "grad_norm": 0.8609200119972229, - "learning_rate": 1.9515916965105313e-05, - "loss": 0.3687, - "step": 2494 - }, - { - "epoch": 0.23503921245378112, - "grad_norm": 0.8925521969795227, - "learning_rate": 1.9515452729905697e-05, - "loss": 0.3509, - "step": 2495 - }, - { - "epoch": 0.23513341654694897, - "grad_norm": 1.0318409204483032, - "learning_rate": 1.9514988277739046e-05, - "loss": 0.429, - "step": 2496 - }, - { - "epoch": 0.23522762064011682, - "grad_norm": 0.8946929574012756, - "learning_rate": 1.951452360861594e-05, - "loss": 0.3891, - "step": 2497 - }, - { - "epoch": 0.23532182473328467, - "grad_norm": 0.9271129965782166, - "learning_rate": 1.951405872254699e-05, - "loss": 0.4159, - "step": 2498 - }, - { - "epoch": 0.23541602882645252, - "grad_norm": 0.8822172284126282, - "learning_rate": 1.9513593619542784e-05, - "loss": 0.3604, - "step": 2499 - }, - { - "epoch": 0.23551023291962037, - "grad_norm": 0.7919142246246338, - "learning_rate": 1.951312829961393e-05, - "loss": 0.3904, - "step": 2500 - }, - { - "epoch": 0.23560443701278821, - "grad_norm": 0.8769456744194031, - "learning_rate": 1.9512662762771034e-05, - "loss": 0.3692, - "step": 2501 - }, - { - "epoch": 0.23569864110595606, - "grad_norm": 0.8468378186225891, - "learning_rate": 1.9512197009024724e-05, - "loss": 0.3793, - "step": 2502 - }, - { - "epoch": 0.2357928451991239, - "grad_norm": 0.7992005944252014, - "learning_rate": 1.9511731038385605e-05, - "loss": 0.3163, - "step": 2503 - }, - { - "epoch": 0.23588704929229176, - "grad_norm": 1.0220085382461548, - "learning_rate": 1.951126485086431e-05, - "loss": 0.4069, - "step": 2504 - }, - { - "epoch": 0.2359812533854596, - "grad_norm": 0.8660708069801331, - "learning_rate": 1.9510798446471467e-05, - "loss": 0.3913, - "step": 2505 - }, - { - "epoch": 0.23607545747862746, - "grad_norm": 1.070881962776184, - "learning_rate": 1.951033182521771e-05, - "loss": 0.4357, - "step": 2506 - }, - { - "epoch": 0.23616966157179528, - "grad_norm": 0.8952769637107849, - "learning_rate": 1.950986498711368e-05, - "loss": 0.399, - "step": 2507 - }, - { - "epoch": 0.23626386566496313, - "grad_norm": 1.0205018520355225, - "learning_rate": 1.950939793217002e-05, - "loss": 0.375, - "step": 2508 - }, - { - "epoch": 0.23635806975813098, - "grad_norm": 1.0109773874282837, - "learning_rate": 1.950893066039738e-05, - "loss": 0.3929, - "step": 2509 - }, - { - "epoch": 0.23645227385129883, - "grad_norm": 1.0607317686080933, - "learning_rate": 1.9508463171806417e-05, - "loss": 0.3655, - "step": 2510 - }, - { - "epoch": 0.23654647794446668, - "grad_norm": 0.8195115923881531, - "learning_rate": 1.950799546640779e-05, - "loss": 0.3105, - "step": 2511 - }, - { - "epoch": 0.23664068203763453, - "grad_norm": 0.960180401802063, - "learning_rate": 1.950752754421216e-05, - "loss": 0.3719, - "step": 2512 - }, - { - "epoch": 0.23673488613080237, - "grad_norm": 0.8067786693572998, - "learning_rate": 1.9507059405230197e-05, - "loss": 0.3586, - "step": 2513 - }, - { - "epoch": 0.23682909022397022, - "grad_norm": 0.8305049538612366, - "learning_rate": 1.950659104947258e-05, - "loss": 0.3484, - "step": 2514 - }, - { - "epoch": 0.23692329431713807, - "grad_norm": 0.9226266741752625, - "learning_rate": 1.950612247694998e-05, - "loss": 0.3696, - "step": 2515 - }, - { - "epoch": 0.23701749841030592, - "grad_norm": 0.9060239195823669, - "learning_rate": 1.950565368767309e-05, - "loss": 0.3201, - "step": 2516 - }, - { - "epoch": 0.23711170250347377, - "grad_norm": 0.9163451790809631, - "learning_rate": 1.9505184681652594e-05, - "loss": 0.3689, - "step": 2517 - }, - { - "epoch": 0.23720590659664162, - "grad_norm": 0.9099129438400269, - "learning_rate": 1.950471545889919e-05, - "loss": 0.3589, - "step": 2518 - }, - { - "epoch": 0.23730011068980947, - "grad_norm": 0.9115674495697021, - "learning_rate": 1.9504246019423568e-05, - "loss": 0.3723, - "step": 2519 - }, - { - "epoch": 0.23739431478297732, - "grad_norm": 0.848437488079071, - "learning_rate": 1.950377636323644e-05, - "loss": 0.351, - "step": 2520 - }, - { - "epoch": 0.23748851887614517, - "grad_norm": 0.8350842595100403, - "learning_rate": 1.9503306490348518e-05, - "loss": 0.3698, - "step": 2521 - }, - { - "epoch": 0.23758272296931301, - "grad_norm": 0.8197678923606873, - "learning_rate": 1.950283640077051e-05, - "loss": 0.3891, - "step": 2522 - }, - { - "epoch": 0.23767692706248086, - "grad_norm": 0.8316652774810791, - "learning_rate": 1.950236609451313e-05, - "loss": 0.3894, - "step": 2523 - }, - { - "epoch": 0.2377711311556487, - "grad_norm": 0.8374845385551453, - "learning_rate": 1.9501895571587113e-05, - "loss": 0.3821, - "step": 2524 - }, - { - "epoch": 0.23786533524881656, - "grad_norm": 0.9662516713142395, - "learning_rate": 1.9501424832003176e-05, - "loss": 0.4459, - "step": 2525 - }, - { - "epoch": 0.2379595393419844, - "grad_norm": 0.7826052308082581, - "learning_rate": 1.9500953875772064e-05, - "loss": 0.3517, - "step": 2526 - }, - { - "epoch": 0.23805374343515226, - "grad_norm": 0.8361783027648926, - "learning_rate": 1.950048270290451e-05, - "loss": 0.4001, - "step": 2527 - }, - { - "epoch": 0.2381479475283201, - "grad_norm": 1.0288957357406616, - "learning_rate": 1.9500011313411253e-05, - "loss": 0.4515, - "step": 2528 - }, - { - "epoch": 0.23824215162148796, - "grad_norm": 0.7369556427001953, - "learning_rate": 1.949953970730305e-05, - "loss": 0.3296, - "step": 2529 - }, - { - "epoch": 0.2383363557146558, - "grad_norm": 0.8121489882469177, - "learning_rate": 1.9499067884590646e-05, - "loss": 0.4298, - "step": 2530 - }, - { - "epoch": 0.23843055980782366, - "grad_norm": 0.8475870490074158, - "learning_rate": 1.949859584528481e-05, - "loss": 0.3204, - "step": 2531 - }, - { - "epoch": 0.2385247639009915, - "grad_norm": 0.890520453453064, - "learning_rate": 1.9498123589396294e-05, - "loss": 0.3712, - "step": 2532 - }, - { - "epoch": 0.23861896799415935, - "grad_norm": 1.6862674951553345, - "learning_rate": 1.9497651116935874e-05, - "loss": 0.3774, - "step": 2533 - }, - { - "epoch": 0.2387131720873272, - "grad_norm": 0.9394170641899109, - "learning_rate": 1.949717842791432e-05, - "loss": 0.392, - "step": 2534 - }, - { - "epoch": 0.23880737618049505, - "grad_norm": 0.9652701020240784, - "learning_rate": 1.949670552234241e-05, - "loss": 0.3633, - "step": 2535 - }, - { - "epoch": 0.2389015802736629, - "grad_norm": 1.1054222583770752, - "learning_rate": 1.9496232400230927e-05, - "loss": 0.3531, - "step": 2536 - }, - { - "epoch": 0.23899578436683075, - "grad_norm": 1.029251217842102, - "learning_rate": 1.9495759061590654e-05, - "loss": 0.3779, - "step": 2537 - }, - { - "epoch": 0.2390899884599986, - "grad_norm": 0.9493234157562256, - "learning_rate": 1.9495285506432395e-05, - "loss": 0.4521, - "step": 2538 - }, - { - "epoch": 0.23918419255316645, - "grad_norm": 0.8804742097854614, - "learning_rate": 1.949481173476694e-05, - "loss": 0.3991, - "step": 2539 - }, - { - "epoch": 0.2392783966463343, - "grad_norm": 0.8648819327354431, - "learning_rate": 1.949433774660509e-05, - "loss": 0.3673, - "step": 2540 - }, - { - "epoch": 0.23937260073950214, - "grad_norm": 0.9381065964698792, - "learning_rate": 1.9493863541957662e-05, - "loss": 0.3908, - "step": 2541 - }, - { - "epoch": 0.23946680483266997, - "grad_norm": 0.8563313484191895, - "learning_rate": 1.9493389120835462e-05, - "loss": 0.3734, - "step": 2542 - }, - { - "epoch": 0.23956100892583782, - "grad_norm": 0.8014841675758362, - "learning_rate": 1.9492914483249304e-05, - "loss": 0.3001, - "step": 2543 - }, - { - "epoch": 0.23965521301900566, - "grad_norm": 0.9085146188735962, - "learning_rate": 1.949243962921002e-05, - "loss": 0.391, - "step": 2544 - }, - { - "epoch": 0.2397494171121735, - "grad_norm": 0.9402478933334351, - "learning_rate": 1.9491964558728428e-05, - "loss": 0.3498, - "step": 2545 - }, - { - "epoch": 0.23984362120534136, - "grad_norm": 0.8987466096878052, - "learning_rate": 1.9491489271815364e-05, - "loss": 0.4015, - "step": 2546 - }, - { - "epoch": 0.2399378252985092, - "grad_norm": 0.8225127458572388, - "learning_rate": 1.949101376848167e-05, - "loss": 0.4109, - "step": 2547 - }, - { - "epoch": 0.24003202939167706, - "grad_norm": 1.0403307676315308, - "learning_rate": 1.949053804873818e-05, - "loss": 0.3515, - "step": 2548 - }, - { - "epoch": 0.2401262334848449, - "grad_norm": 0.8384838104248047, - "learning_rate": 1.9490062112595745e-05, - "loss": 0.3844, - "step": 2549 - }, - { - "epoch": 0.24022043757801276, - "grad_norm": 1.0988948345184326, - "learning_rate": 1.9489585960065218e-05, - "loss": 0.3582, - "step": 2550 - }, - { - "epoch": 0.2403146416711806, - "grad_norm": 0.8224526643753052, - "learning_rate": 1.9489109591157458e-05, - "loss": 0.3549, - "step": 2551 - }, - { - "epoch": 0.24040884576434846, - "grad_norm": 0.8611648082733154, - "learning_rate": 1.948863300588332e-05, - "loss": 0.3626, - "step": 2552 - }, - { - "epoch": 0.2405030498575163, - "grad_norm": 0.8884101510047913, - "learning_rate": 1.9488156204253678e-05, - "loss": 0.3723, - "step": 2553 - }, - { - "epoch": 0.24059725395068415, - "grad_norm": 0.7818182110786438, - "learning_rate": 1.94876791862794e-05, - "loss": 0.3329, - "step": 2554 - }, - { - "epoch": 0.240691458043852, - "grad_norm": 0.8338054418563843, - "learning_rate": 1.9487201951971363e-05, - "loss": 0.3674, - "step": 2555 - }, - { - "epoch": 0.24078566213701985, - "grad_norm": 0.9098851680755615, - "learning_rate": 1.948672450134045e-05, - "loss": 0.3787, - "step": 2556 - }, - { - "epoch": 0.2408798662301877, - "grad_norm": 0.8057156205177307, - "learning_rate": 1.9486246834397546e-05, - "loss": 0.3679, - "step": 2557 - }, - { - "epoch": 0.24097407032335555, - "grad_norm": 0.9018388390541077, - "learning_rate": 1.9485768951153545e-05, - "loss": 0.3701, - "step": 2558 - }, - { - "epoch": 0.2410682744165234, - "grad_norm": 0.7368927001953125, - "learning_rate": 1.948529085161934e-05, - "loss": 0.3477, - "step": 2559 - }, - { - "epoch": 0.24116247850969125, - "grad_norm": 0.9526506662368774, - "learning_rate": 1.9484812535805835e-05, - "loss": 0.3872, - "step": 2560 - }, - { - "epoch": 0.2412566826028591, - "grad_norm": 0.8121480941772461, - "learning_rate": 1.948433400372394e-05, - "loss": 0.3629, - "step": 2561 - }, - { - "epoch": 0.24135088669602694, - "grad_norm": 0.9268913269042969, - "learning_rate": 1.9483855255384555e-05, - "loss": 0.4345, - "step": 2562 - }, - { - "epoch": 0.2414450907891948, - "grad_norm": 0.8510921001434326, - "learning_rate": 1.9483376290798603e-05, - "loss": 0.3706, - "step": 2563 - }, - { - "epoch": 0.24153929488236264, - "grad_norm": 0.9793437123298645, - "learning_rate": 1.9482897109977007e-05, - "loss": 0.3579, - "step": 2564 - }, - { - "epoch": 0.2416334989755305, - "grad_norm": 0.798780620098114, - "learning_rate": 1.9482417712930696e-05, - "loss": 0.3521, - "step": 2565 - }, - { - "epoch": 0.24172770306869834, - "grad_norm": 0.8711868524551392, - "learning_rate": 1.9481938099670592e-05, - "loss": 0.3817, - "step": 2566 - }, - { - "epoch": 0.2418219071618662, - "grad_norm": 0.7080528736114502, - "learning_rate": 1.9481458270207635e-05, - "loss": 0.328, - "step": 2567 - }, - { - "epoch": 0.24191611125503404, - "grad_norm": 1.154837965965271, - "learning_rate": 1.9480978224552766e-05, - "loss": 0.4186, - "step": 2568 - }, - { - "epoch": 0.2420103153482019, - "grad_norm": 0.9485461115837097, - "learning_rate": 1.9480497962716932e-05, - "loss": 0.3723, - "step": 2569 - }, - { - "epoch": 0.24210451944136974, - "grad_norm": 0.9751293063163757, - "learning_rate": 1.9480017484711083e-05, - "loss": 0.4036, - "step": 2570 - }, - { - "epoch": 0.24219872353453759, - "grad_norm": 0.9526638984680176, - "learning_rate": 1.947953679054617e-05, - "loss": 0.3897, - "step": 2571 - }, - { - "epoch": 0.24229292762770543, - "grad_norm": 0.9223150610923767, - "learning_rate": 1.9479055880233164e-05, - "loss": 0.4004, - "step": 2572 - }, - { - "epoch": 0.24238713172087328, - "grad_norm": 1.0746058225631714, - "learning_rate": 1.947857475378302e-05, - "loss": 0.4136, - "step": 2573 - }, - { - "epoch": 0.24248133581404113, - "grad_norm": 1.041205644607544, - "learning_rate": 1.9478093411206717e-05, - "loss": 0.41, - "step": 2574 - }, - { - "epoch": 0.24257553990720898, - "grad_norm": 0.8298726677894592, - "learning_rate": 1.947761185251522e-05, - "loss": 0.3915, - "step": 2575 - }, - { - "epoch": 0.24266974400037683, - "grad_norm": 0.8218982219696045, - "learning_rate": 1.947713007771952e-05, - "loss": 0.3271, - "step": 2576 - }, - { - "epoch": 0.24276394809354465, - "grad_norm": 0.8517094850540161, - "learning_rate": 1.9476648086830598e-05, - "loss": 0.3578, - "step": 2577 - }, - { - "epoch": 0.2428581521867125, - "grad_norm": 1.017295241355896, - "learning_rate": 1.947616587985944e-05, - "loss": 0.3737, - "step": 2578 - }, - { - "epoch": 0.24295235627988035, - "grad_norm": 0.9262570142745972, - "learning_rate": 1.947568345681705e-05, - "loss": 0.3378, - "step": 2579 - }, - { - "epoch": 0.2430465603730482, - "grad_norm": 0.9176540970802307, - "learning_rate": 1.9475200817714416e-05, - "loss": 0.4121, - "step": 2580 - }, - { - "epoch": 0.24314076446621605, - "grad_norm": 0.9431273341178894, - "learning_rate": 1.9474717962562554e-05, - "loss": 0.3315, - "step": 2581 - }, - { - "epoch": 0.2432349685593839, - "grad_norm": 0.9327338933944702, - "learning_rate": 1.947423489137247e-05, - "loss": 0.3689, - "step": 2582 - }, - { - "epoch": 0.24332917265255175, - "grad_norm": 0.7591254115104675, - "learning_rate": 1.9473751604155176e-05, - "loss": 0.326, - "step": 2583 - }, - { - "epoch": 0.2434233767457196, - "grad_norm": 0.8648123741149902, - "learning_rate": 1.9473268100921697e-05, - "loss": 0.3635, - "step": 2584 - }, - { - "epoch": 0.24351758083888744, - "grad_norm": 0.8905833959579468, - "learning_rate": 1.9472784381683052e-05, - "loss": 0.3974, - "step": 2585 - }, - { - "epoch": 0.2436117849320553, - "grad_norm": 1.354017734527588, - "learning_rate": 1.9472300446450273e-05, - "loss": 0.4085, - "step": 2586 - }, - { - "epoch": 0.24370598902522314, - "grad_norm": 0.8489964008331299, - "learning_rate": 1.9471816295234397e-05, - "loss": 0.3394, - "step": 2587 - }, - { - "epoch": 0.243800193118391, - "grad_norm": 0.832228422164917, - "learning_rate": 1.9471331928046457e-05, - "loss": 0.3554, - "step": 2588 - }, - { - "epoch": 0.24389439721155884, - "grad_norm": 0.9334375262260437, - "learning_rate": 1.9470847344897507e-05, - "loss": 0.3665, - "step": 2589 - }, - { - "epoch": 0.2439886013047267, - "grad_norm": 0.8970177173614502, - "learning_rate": 1.9470362545798583e-05, - "loss": 0.3646, - "step": 2590 - }, - { - "epoch": 0.24408280539789454, - "grad_norm": 0.8576048016548157, - "learning_rate": 1.9469877530760753e-05, - "loss": 0.4117, - "step": 2591 - }, - { - "epoch": 0.24417700949106239, - "grad_norm": 0.789254367351532, - "learning_rate": 1.946939229979507e-05, - "loss": 0.3471, - "step": 2592 - }, - { - "epoch": 0.24427121358423023, - "grad_norm": 0.8189065456390381, - "learning_rate": 1.9468906852912595e-05, - "loss": 0.3545, - "step": 2593 - }, - { - "epoch": 0.24436541767739808, - "grad_norm": 1.0103284120559692, - "learning_rate": 1.94684211901244e-05, - "loss": 0.3514, - "step": 2594 - }, - { - "epoch": 0.24445962177056593, - "grad_norm": 0.7853721380233765, - "learning_rate": 1.946793531144156e-05, - "loss": 0.3254, - "step": 2595 - }, - { - "epoch": 0.24455382586373378, - "grad_norm": 1.181939959526062, - "learning_rate": 1.9467449216875153e-05, - "loss": 0.4127, - "step": 2596 - }, - { - "epoch": 0.24464802995690163, - "grad_norm": 0.938714325428009, - "learning_rate": 1.946696290643626e-05, - "loss": 0.3301, - "step": 2597 - }, - { - "epoch": 0.24474223405006948, - "grad_norm": 0.8742538690567017, - "learning_rate": 1.946647638013597e-05, - "loss": 0.3783, - "step": 2598 - }, - { - "epoch": 0.24483643814323733, - "grad_norm": 0.8467581272125244, - "learning_rate": 1.946598963798538e-05, - "loss": 0.3545, - "step": 2599 - }, - { - "epoch": 0.24493064223640518, - "grad_norm": 0.8565823435783386, - "learning_rate": 1.946550267999559e-05, - "loss": 0.3615, - "step": 2600 - }, - { - "epoch": 0.24502484632957303, - "grad_norm": 0.9403883218765259, - "learning_rate": 1.94650155061777e-05, - "loss": 0.3955, - "step": 2601 - }, - { - "epoch": 0.24511905042274088, - "grad_norm": 0.907534122467041, - "learning_rate": 1.9464528116542816e-05, - "loss": 0.4037, - "step": 2602 - }, - { - "epoch": 0.24521325451590872, - "grad_norm": 0.9900031685829163, - "learning_rate": 1.9464040511102054e-05, - "loss": 0.39, - "step": 2603 - }, - { - "epoch": 0.24530745860907657, - "grad_norm": 0.8356846570968628, - "learning_rate": 1.9463552689866533e-05, - "loss": 0.3728, - "step": 2604 - }, - { - "epoch": 0.24540166270224442, - "grad_norm": 0.8656098246574402, - "learning_rate": 1.9463064652847373e-05, - "loss": 0.3525, - "step": 2605 - }, - { - "epoch": 0.24549586679541227, - "grad_norm": 0.7707147002220154, - "learning_rate": 1.9462576400055707e-05, - "loss": 0.3464, - "step": 2606 - }, - { - "epoch": 0.24559007088858012, - "grad_norm": 0.8406088352203369, - "learning_rate": 1.9462087931502663e-05, - "loss": 0.3718, - "step": 2607 - }, - { - "epoch": 0.24568427498174797, - "grad_norm": 0.9418051838874817, - "learning_rate": 1.946159924719938e-05, - "loss": 0.3888, - "step": 2608 - }, - { - "epoch": 0.24577847907491582, - "grad_norm": 0.9811563491821289, - "learning_rate": 1.9461110347157005e-05, - "loss": 0.4128, - "step": 2609 - }, - { - "epoch": 0.24587268316808367, - "grad_norm": 0.9344661235809326, - "learning_rate": 1.946062123138668e-05, - "loss": 0.3683, - "step": 2610 - }, - { - "epoch": 0.2459668872612515, - "grad_norm": 0.8481505513191223, - "learning_rate": 1.946013189989956e-05, - "loss": 0.332, - "step": 2611 - }, - { - "epoch": 0.24606109135441934, - "grad_norm": 0.8536441922187805, - "learning_rate": 1.94596423527068e-05, - "loss": 0.3681, - "step": 2612 - }, - { - "epoch": 0.24615529544758719, - "grad_norm": 0.8502405285835266, - "learning_rate": 1.945915258981957e-05, - "loss": 0.3773, - "step": 2613 - }, - { - "epoch": 0.24624949954075503, - "grad_norm": 0.8009442687034607, - "learning_rate": 1.9458662611249024e-05, - "loss": 0.3458, - "step": 2614 - }, - { - "epoch": 0.24634370363392288, - "grad_norm": 0.9556559920310974, - "learning_rate": 1.9458172417006347e-05, - "loss": 0.3471, - "step": 2615 - }, - { - "epoch": 0.24643790772709073, - "grad_norm": 0.8855275511741638, - "learning_rate": 1.9457682007102713e-05, - "loss": 0.3865, - "step": 2616 - }, - { - "epoch": 0.24653211182025858, - "grad_norm": 0.8879374265670776, - "learning_rate": 1.9457191381549304e-05, - "loss": 0.3541, - "step": 2617 - }, - { - "epoch": 0.24662631591342643, - "grad_norm": 0.7845891118049622, - "learning_rate": 1.94567005403573e-05, - "loss": 0.3418, - "step": 2618 - }, - { - "epoch": 0.24672052000659428, - "grad_norm": 0.8479444980621338, - "learning_rate": 1.9456209483537902e-05, - "loss": 0.3749, - "step": 2619 - }, - { - "epoch": 0.24681472409976213, - "grad_norm": 0.8177931904792786, - "learning_rate": 1.9455718211102305e-05, - "loss": 0.3965, - "step": 2620 - }, - { - "epoch": 0.24690892819292998, - "grad_norm": 0.9803379774093628, - "learning_rate": 1.9455226723061704e-05, - "loss": 0.4162, - "step": 2621 - }, - { - "epoch": 0.24700313228609783, - "grad_norm": 0.7587687969207764, - "learning_rate": 1.9454735019427315e-05, - "loss": 0.3643, - "step": 2622 - }, - { - "epoch": 0.24709733637926568, - "grad_norm": 0.8936195373535156, - "learning_rate": 1.9454243100210345e-05, - "loss": 0.3969, - "step": 2623 - }, - { - "epoch": 0.24719154047243352, - "grad_norm": 0.9062477350234985, - "learning_rate": 1.945375096542201e-05, - "loss": 0.4177, - "step": 2624 - }, - { - "epoch": 0.24728574456560137, - "grad_norm": 0.8809071183204651, - "learning_rate": 1.9453258615073534e-05, - "loss": 0.3703, - "step": 2625 - }, - { - "epoch": 0.24737994865876922, - "grad_norm": 1.2047926187515259, - "learning_rate": 1.9452766049176143e-05, - "loss": 0.4173, - "step": 2626 - }, - { - "epoch": 0.24747415275193707, - "grad_norm": 1.0378501415252686, - "learning_rate": 1.9452273267741063e-05, - "loss": 0.4247, - "step": 2627 - }, - { - "epoch": 0.24756835684510492, - "grad_norm": 0.9909701943397522, - "learning_rate": 1.9451780270779538e-05, - "loss": 0.4418, - "step": 2628 - }, - { - "epoch": 0.24766256093827277, - "grad_norm": 0.8759151697158813, - "learning_rate": 1.94512870583028e-05, - "loss": 0.368, - "step": 2629 - }, - { - "epoch": 0.24775676503144062, - "grad_norm": 0.7846768498420715, - "learning_rate": 1.9450793630322106e-05, - "loss": 0.3262, - "step": 2630 - }, - { - "epoch": 0.24785096912460847, - "grad_norm": 0.9084791541099548, - "learning_rate": 1.94502999868487e-05, - "loss": 0.3998, - "step": 2631 - }, - { - "epoch": 0.24794517321777632, - "grad_norm": 0.8984910249710083, - "learning_rate": 1.944980612789384e-05, - "loss": 0.3733, - "step": 2632 - }, - { - "epoch": 0.24803937731094416, - "grad_norm": 0.8418054580688477, - "learning_rate": 1.9449312053468782e-05, - "loss": 0.3929, - "step": 2633 - }, - { - "epoch": 0.248133581404112, - "grad_norm": 0.7799889445304871, - "learning_rate": 1.94488177635848e-05, - "loss": 0.3721, - "step": 2634 - }, - { - "epoch": 0.24822778549727986, - "grad_norm": 1.1770952939987183, - "learning_rate": 1.9448323258253157e-05, - "loss": 0.3851, - "step": 2635 - }, - { - "epoch": 0.2483219895904477, - "grad_norm": 0.7383617162704468, - "learning_rate": 1.9447828537485133e-05, - "loss": 0.3525, - "step": 2636 - }, - { - "epoch": 0.24841619368361556, - "grad_norm": 0.9092996120452881, - "learning_rate": 1.9447333601292006e-05, - "loss": 0.3579, - "step": 2637 - }, - { - "epoch": 0.2485103977767834, - "grad_norm": 0.8483371138572693, - "learning_rate": 1.9446838449685064e-05, - "loss": 0.3626, - "step": 2638 - }, - { - "epoch": 0.24860460186995126, - "grad_norm": 0.8367277383804321, - "learning_rate": 1.9446343082675594e-05, - "loss": 0.3537, - "step": 2639 - }, - { - "epoch": 0.2486988059631191, - "grad_norm": 0.7318528890609741, - "learning_rate": 1.9445847500274895e-05, - "loss": 0.3382, - "step": 2640 - }, - { - "epoch": 0.24879301005628696, - "grad_norm": 0.8454012274742126, - "learning_rate": 1.944535170249426e-05, - "loss": 0.3134, - "step": 2641 - }, - { - "epoch": 0.2488872141494548, - "grad_norm": 0.8470341563224792, - "learning_rate": 1.9444855689345002e-05, - "loss": 0.3589, - "step": 2642 - }, - { - "epoch": 0.24898141824262265, - "grad_norm": 0.8552345633506775, - "learning_rate": 1.944435946083843e-05, - "loss": 0.36, - "step": 2643 - }, - { - "epoch": 0.2490756223357905, - "grad_norm": 0.7981129288673401, - "learning_rate": 1.944386301698585e-05, - "loss": 0.3207, - "step": 2644 - }, - { - "epoch": 0.24916982642895835, - "grad_norm": 0.8820785284042358, - "learning_rate": 1.9443366357798594e-05, - "loss": 0.3725, - "step": 2645 - }, - { - "epoch": 0.24926403052212617, - "grad_norm": 0.8401349782943726, - "learning_rate": 1.944286948328798e-05, - "loss": 0.3567, - "step": 2646 - }, - { - "epoch": 0.24935823461529402, - "grad_norm": 0.8499570488929749, - "learning_rate": 1.9442372393465332e-05, - "loss": 0.3798, - "step": 2647 - }, - { - "epoch": 0.24945243870846187, - "grad_norm": 0.8055731058120728, - "learning_rate": 1.9441875088342e-05, - "loss": 0.3517, - "step": 2648 - }, - { - "epoch": 0.24954664280162972, - "grad_norm": 0.8906497359275818, - "learning_rate": 1.9441377567929306e-05, - "loss": 0.3686, - "step": 2649 - }, - { - "epoch": 0.24964084689479757, - "grad_norm": 0.8505920767784119, - "learning_rate": 1.9440879832238603e-05, - "loss": 0.4328, - "step": 2650 - }, - { - "epoch": 0.24973505098796542, - "grad_norm": 0.9310007691383362, - "learning_rate": 1.944038188128124e-05, - "loss": 0.4144, - "step": 2651 - }, - { - "epoch": 0.24982925508113327, - "grad_norm": 0.8184910416603088, - "learning_rate": 1.9439883715068572e-05, - "loss": 0.3753, - "step": 2652 - }, - { - "epoch": 0.24992345917430112, - "grad_norm": 0.7765772938728333, - "learning_rate": 1.9439385333611954e-05, - "loss": 0.3676, - "step": 2653 - }, - { - "epoch": 0.250017663267469, - "grad_norm": 0.9684597253799438, - "learning_rate": 1.9438886736922757e-05, - "loss": 0.4205, - "step": 2654 - }, - { - "epoch": 0.25011186736063684, - "grad_norm": 0.7522620558738708, - "learning_rate": 1.943838792501234e-05, - "loss": 0.3473, - "step": 2655 - }, - { - "epoch": 0.2502060714538047, - "grad_norm": 1.0273616313934326, - "learning_rate": 1.9437888897892085e-05, - "loss": 0.3991, - "step": 2656 - }, - { - "epoch": 0.25030027554697254, - "grad_norm": 0.8650879859924316, - "learning_rate": 1.943738965557336e-05, - "loss": 0.3243, - "step": 2657 - }, - { - "epoch": 0.2503944796401404, - "grad_norm": 0.7960026860237122, - "learning_rate": 1.9436890198067565e-05, - "loss": 0.3458, - "step": 2658 - }, - { - "epoch": 0.25048868373330824, - "grad_norm": 0.8975011110305786, - "learning_rate": 1.9436390525386072e-05, - "loss": 0.3925, - "step": 2659 - }, - { - "epoch": 0.2505828878264761, - "grad_norm": 0.8295131325721741, - "learning_rate": 1.9435890637540284e-05, - "loss": 0.3433, - "step": 2660 - }, - { - "epoch": 0.25067709191964394, - "grad_norm": 0.9529341459274292, - "learning_rate": 1.9435390534541598e-05, - "loss": 0.3631, - "step": 2661 - }, - { - "epoch": 0.2507712960128118, - "grad_norm": 0.7790176868438721, - "learning_rate": 1.9434890216401416e-05, - "loss": 0.367, - "step": 2662 - }, - { - "epoch": 0.2508655001059796, - "grad_norm": 0.9206838011741638, - "learning_rate": 1.943438968313114e-05, - "loss": 0.3433, - "step": 2663 - }, - { - "epoch": 0.2509597041991474, - "grad_norm": 0.8748577237129211, - "learning_rate": 1.9433888934742193e-05, - "loss": 0.3746, - "step": 2664 - }, - { - "epoch": 0.2510539082923153, - "grad_norm": 0.8435645699501038, - "learning_rate": 1.943338797124599e-05, - "loss": 0.3615, - "step": 2665 - }, - { - "epoch": 0.2511481123854831, - "grad_norm": 1.0106738805770874, - "learning_rate": 1.943288679265395e-05, - "loss": 0.4068, - "step": 2666 - }, - { - "epoch": 0.251242316478651, - "grad_norm": 0.911796510219574, - "learning_rate": 1.94323853989775e-05, - "loss": 0.4102, - "step": 2667 - }, - { - "epoch": 0.2513365205718188, - "grad_norm": 0.9706193804740906, - "learning_rate": 1.943188379022808e-05, - "loss": 0.366, - "step": 2668 - }, - { - "epoch": 0.25143072466498667, - "grad_norm": 0.8246974945068359, - "learning_rate": 1.9431381966417125e-05, - "loss": 0.3813, - "step": 2669 - }, - { - "epoch": 0.2515249287581545, - "grad_norm": 0.7846889495849609, - "learning_rate": 1.943087992755607e-05, - "loss": 0.3331, - "step": 2670 - }, - { - "epoch": 0.25161913285132237, - "grad_norm": 0.7623711824417114, - "learning_rate": 1.9430377673656372e-05, - "loss": 0.3241, - "step": 2671 - }, - { - "epoch": 0.2517133369444902, - "grad_norm": 0.7921915650367737, - "learning_rate": 1.9429875204729476e-05, - "loss": 0.3245, - "step": 2672 - }, - { - "epoch": 0.25180754103765807, - "grad_norm": 0.7707971334457397, - "learning_rate": 1.942937252078684e-05, - "loss": 0.3439, - "step": 2673 - }, - { - "epoch": 0.2519017451308259, - "grad_norm": 0.8475367426872253, - "learning_rate": 1.942886962183993e-05, - "loss": 0.3564, - "step": 2674 - }, - { - "epoch": 0.25199594922399376, - "grad_norm": 0.9145054817199707, - "learning_rate": 1.942836650790021e-05, - "loss": 0.4058, - "step": 2675 - }, - { - "epoch": 0.2520901533171616, - "grad_norm": 0.9165285229682922, - "learning_rate": 1.9427863178979152e-05, - "loss": 0.3632, - "step": 2676 - }, - { - "epoch": 0.25218435741032946, - "grad_norm": 0.9520749449729919, - "learning_rate": 1.9427359635088235e-05, - "loss": 0.3624, - "step": 2677 - }, - { - "epoch": 0.2522785615034973, - "grad_norm": 0.9126151204109192, - "learning_rate": 1.9426855876238937e-05, - "loss": 0.3721, - "step": 2678 - }, - { - "epoch": 0.25237276559666516, - "grad_norm": 0.8739978671073914, - "learning_rate": 1.9426351902442746e-05, - "loss": 0.3466, - "step": 2679 - }, - { - "epoch": 0.252466969689833, - "grad_norm": 0.8436156511306763, - "learning_rate": 1.9425847713711155e-05, - "loss": 0.3804, - "step": 2680 - }, - { - "epoch": 0.25256117378300086, - "grad_norm": 0.9655857682228088, - "learning_rate": 1.9425343310055654e-05, - "loss": 0.3739, - "step": 2681 - }, - { - "epoch": 0.2526553778761687, - "grad_norm": 0.8512799143791199, - "learning_rate": 1.9424838691487755e-05, - "loss": 0.3698, - "step": 2682 - }, - { - "epoch": 0.25274958196933656, - "grad_norm": 0.8704470992088318, - "learning_rate": 1.9424333858018954e-05, - "loss": 0.3619, - "step": 2683 - }, - { - "epoch": 0.2528437860625044, - "grad_norm": 0.8650441765785217, - "learning_rate": 1.942382880966077e-05, - "loss": 0.3811, - "step": 2684 - }, - { - "epoch": 0.25293799015567225, - "grad_norm": 0.8505967259407043, - "learning_rate": 1.9423323546424712e-05, - "loss": 0.3659, - "step": 2685 - }, - { - "epoch": 0.2530321942488401, - "grad_norm": 0.8026577234268188, - "learning_rate": 1.9422818068322303e-05, - "loss": 0.3506, - "step": 2686 - }, - { - "epoch": 0.25312639834200795, - "grad_norm": 0.98709636926651, - "learning_rate": 1.942231237536507e-05, - "loss": 0.333, - "step": 2687 - }, - { - "epoch": 0.2532206024351758, - "grad_norm": 0.8574108481407166, - "learning_rate": 1.9421806467564546e-05, - "loss": 0.378, - "step": 2688 - }, - { - "epoch": 0.25331480652834365, - "grad_norm": 0.8704533576965332, - "learning_rate": 1.942130034493226e-05, - "loss": 0.3253, - "step": 2689 - }, - { - "epoch": 0.2534090106215115, - "grad_norm": 0.8911964893341064, - "learning_rate": 1.9420794007479757e-05, - "loss": 0.3907, - "step": 2690 - }, - { - "epoch": 0.25350321471467935, - "grad_norm": 0.811689019203186, - "learning_rate": 1.942028745521858e-05, - "loss": 0.3441, - "step": 2691 - }, - { - "epoch": 0.2535974188078472, - "grad_norm": 0.9497051239013672, - "learning_rate": 1.9419780688160285e-05, - "loss": 0.4407, - "step": 2692 - }, - { - "epoch": 0.25369162290101505, - "grad_norm": 0.8960627317428589, - "learning_rate": 1.9419273706316416e-05, - "loss": 0.3707, - "step": 2693 - }, - { - "epoch": 0.2537858269941829, - "grad_norm": 0.8265642523765564, - "learning_rate": 1.9418766509698544e-05, - "loss": 0.3648, - "step": 2694 - }, - { - "epoch": 0.25388003108735074, - "grad_norm": 0.805389404296875, - "learning_rate": 1.9418259098318226e-05, - "loss": 0.3685, - "step": 2695 - }, - { - "epoch": 0.2539742351805186, - "grad_norm": 1.0355446338653564, - "learning_rate": 1.9417751472187032e-05, - "loss": 0.3896, - "step": 2696 - }, - { - "epoch": 0.25406843927368644, - "grad_norm": 0.9594706892967224, - "learning_rate": 1.9417243631316548e-05, - "loss": 0.3926, - "step": 2697 - }, - { - "epoch": 0.2541626433668543, - "grad_norm": 0.8476678729057312, - "learning_rate": 1.941673557571834e-05, - "loss": 0.3579, - "step": 2698 - }, - { - "epoch": 0.25425684746002214, - "grad_norm": 1.066336989402771, - "learning_rate": 1.9416227305404e-05, - "loss": 0.3944, - "step": 2699 - }, - { - "epoch": 0.25435105155319, - "grad_norm": 0.8706140518188477, - "learning_rate": 1.941571882038511e-05, - "loss": 0.3879, - "step": 2700 - }, - { - "epoch": 0.25444525564635784, - "grad_norm": 0.8646376132965088, - "learning_rate": 1.9415210120673275e-05, - "loss": 0.3663, - "step": 2701 - }, - { - "epoch": 0.2545394597395257, - "grad_norm": 0.9006446003913879, - "learning_rate": 1.9414701206280083e-05, - "loss": 0.3779, - "step": 2702 - }, - { - "epoch": 0.25463366383269354, - "grad_norm": 0.831724226474762, - "learning_rate": 1.941419207721715e-05, - "loss": 0.3611, - "step": 2703 - }, - { - "epoch": 0.2547278679258614, - "grad_norm": 0.8399516940116882, - "learning_rate": 1.9413682733496073e-05, - "loss": 0.3824, - "step": 2704 - }, - { - "epoch": 0.25482207201902923, - "grad_norm": 0.8994430303573608, - "learning_rate": 1.9413173175128472e-05, - "loss": 0.3605, - "step": 2705 - }, - { - "epoch": 0.2549162761121971, - "grad_norm": 0.7757525444030762, - "learning_rate": 1.9412663402125968e-05, - "loss": 0.349, - "step": 2706 - }, - { - "epoch": 0.25501048020536493, - "grad_norm": 0.8783275485038757, - "learning_rate": 1.9412153414500176e-05, - "loss": 0.3803, - "step": 2707 - }, - { - "epoch": 0.2551046842985328, - "grad_norm": 0.9689286947250366, - "learning_rate": 1.9411643212262733e-05, - "loss": 0.3418, - "step": 2708 - }, - { - "epoch": 0.25519888839170063, - "grad_norm": 0.70932537317276, - "learning_rate": 1.941113279542527e-05, - "loss": 0.2944, - "step": 2709 - }, - { - "epoch": 0.2552930924848685, - "grad_norm": 0.9320895671844482, - "learning_rate": 1.9410622163999423e-05, - "loss": 0.4067, - "step": 2710 - }, - { - "epoch": 0.2553872965780363, - "grad_norm": 0.8390423059463501, - "learning_rate": 1.941011131799684e-05, - "loss": 0.3477, - "step": 2711 - }, - { - "epoch": 0.2554815006712042, - "grad_norm": 0.876505434513092, - "learning_rate": 1.9409600257429164e-05, - "loss": 0.388, - "step": 2712 - }, - { - "epoch": 0.255575704764372, - "grad_norm": 0.9468458890914917, - "learning_rate": 1.940908898230805e-05, - "loss": 0.3464, - "step": 2713 - }, - { - "epoch": 0.2556699088575399, - "grad_norm": 0.9577358961105347, - "learning_rate": 1.9408577492645153e-05, - "loss": 0.4196, - "step": 2714 - }, - { - "epoch": 0.2557641129507077, - "grad_norm": 0.7596282362937927, - "learning_rate": 1.940806578845214e-05, - "loss": 0.3717, - "step": 2715 - }, - { - "epoch": 0.25585831704387557, - "grad_norm": 0.8836774826049805, - "learning_rate": 1.940755386974068e-05, - "loss": 0.362, - "step": 2716 - }, - { - "epoch": 0.2559525211370434, - "grad_norm": 0.8619797825813293, - "learning_rate": 1.940704173652244e-05, - "loss": 0.3593, - "step": 2717 - }, - { - "epoch": 0.25604672523021127, - "grad_norm": 0.7850464582443237, - "learning_rate": 1.9406529388809103e-05, - "loss": 0.3108, - "step": 2718 - }, - { - "epoch": 0.2561409293233791, - "grad_norm": 0.8899826407432556, - "learning_rate": 1.9406016826612346e-05, - "loss": 0.4021, - "step": 2719 - }, - { - "epoch": 0.25623513341654697, - "grad_norm": 0.9437614679336548, - "learning_rate": 1.9405504049943858e-05, - "loss": 0.4167, - "step": 2720 - }, - { - "epoch": 0.2563293375097148, - "grad_norm": 0.860201358795166, - "learning_rate": 1.9404991058815335e-05, - "loss": 0.3854, - "step": 2721 - }, - { - "epoch": 0.25642354160288267, - "grad_norm": 0.8130446672439575, - "learning_rate": 1.9404477853238467e-05, - "loss": 0.3256, - "step": 2722 - }, - { - "epoch": 0.2565177456960505, - "grad_norm": 0.834983766078949, - "learning_rate": 1.9403964433224963e-05, - "loss": 0.3715, - "step": 2723 - }, - { - "epoch": 0.25661194978921836, - "grad_norm": 0.9256806969642639, - "learning_rate": 1.9403450798786525e-05, - "loss": 0.3505, - "step": 2724 - }, - { - "epoch": 0.2567061538823862, - "grad_norm": 1.429145097732544, - "learning_rate": 1.9402936949934865e-05, - "loss": 0.3304, - "step": 2725 - }, - { - "epoch": 0.25680035797555406, - "grad_norm": 0.8746187090873718, - "learning_rate": 1.94024228866817e-05, - "loss": 0.3588, - "step": 2726 - }, - { - "epoch": 0.2568945620687219, - "grad_norm": 0.9026733636856079, - "learning_rate": 1.9401908609038752e-05, - "loss": 0.3895, - "step": 2727 - }, - { - "epoch": 0.25698876616188976, - "grad_norm": 0.8675488829612732, - "learning_rate": 1.940139411701775e-05, - "loss": 0.3645, - "step": 2728 - }, - { - "epoch": 0.2570829702550576, - "grad_norm": 0.9900367259979248, - "learning_rate": 1.940087941063042e-05, - "loss": 0.3669, - "step": 2729 - }, - { - "epoch": 0.25717717434822546, - "grad_norm": 0.8831049799919128, - "learning_rate": 1.9400364489888505e-05, - "loss": 0.3588, - "step": 2730 - }, - { - "epoch": 0.2572713784413933, - "grad_norm": 0.7980867624282837, - "learning_rate": 1.9399849354803735e-05, - "loss": 0.3983, - "step": 2731 - }, - { - "epoch": 0.2573655825345611, - "grad_norm": 1.0000221729278564, - "learning_rate": 1.939933400538787e-05, - "loss": 0.4212, - "step": 2732 - }, - { - "epoch": 0.25745978662772895, - "grad_norm": 0.8112549185752869, - "learning_rate": 1.939881844165265e-05, - "loss": 0.3665, - "step": 2733 - }, - { - "epoch": 0.2575539907208968, - "grad_norm": 0.7917676568031311, - "learning_rate": 1.9398302663609835e-05, - "loss": 0.3281, - "step": 2734 - }, - { - "epoch": 0.25764819481406465, - "grad_norm": 0.9711036682128906, - "learning_rate": 1.9397786671271182e-05, - "loss": 0.375, - "step": 2735 - }, - { - "epoch": 0.2577423989072325, - "grad_norm": 0.938240110874176, - "learning_rate": 1.9397270464648457e-05, - "loss": 0.4084, - "step": 2736 - }, - { - "epoch": 0.25783660300040034, - "grad_norm": 0.7782493829727173, - "learning_rate": 1.9396754043753437e-05, - "loss": 0.3454, - "step": 2737 - }, - { - "epoch": 0.2579308070935682, - "grad_norm": 0.7555148005485535, - "learning_rate": 1.9396237408597893e-05, - "loss": 0.3702, - "step": 2738 - }, - { - "epoch": 0.25802501118673604, - "grad_norm": 0.8061802387237549, - "learning_rate": 1.93957205591936e-05, - "loss": 0.371, - "step": 2739 - }, - { - "epoch": 0.2581192152799039, - "grad_norm": 0.8084131479263306, - "learning_rate": 1.9395203495552352e-05, - "loss": 0.3319, - "step": 2740 - }, - { - "epoch": 0.25821341937307174, - "grad_norm": 0.8582677841186523, - "learning_rate": 1.9394686217685933e-05, - "loss": 0.3741, - "step": 2741 - }, - { - "epoch": 0.2583076234662396, - "grad_norm": 0.8153006434440613, - "learning_rate": 1.9394168725606136e-05, - "loss": 0.3701, - "step": 2742 - }, - { - "epoch": 0.25840182755940744, - "grad_norm": 0.8366333246231079, - "learning_rate": 1.939365101932477e-05, - "loss": 0.4172, - "step": 2743 - }, - { - "epoch": 0.2584960316525753, - "grad_norm": 0.7946856021881104, - "learning_rate": 1.9393133098853627e-05, - "loss": 0.3562, - "step": 2744 - }, - { - "epoch": 0.25859023574574314, - "grad_norm": 0.7716690301895142, - "learning_rate": 1.9392614964204526e-05, - "loss": 0.3434, - "step": 2745 - }, - { - "epoch": 0.258684439838911, - "grad_norm": 0.8663679957389832, - "learning_rate": 1.9392096615389273e-05, - "loss": 0.3262, - "step": 2746 - }, - { - "epoch": 0.25877864393207883, - "grad_norm": 0.8803080916404724, - "learning_rate": 1.9391578052419697e-05, - "loss": 0.3629, - "step": 2747 - }, - { - "epoch": 0.2588728480252467, - "grad_norm": 0.9695155024528503, - "learning_rate": 1.9391059275307615e-05, - "loss": 0.4092, - "step": 2748 - }, - { - "epoch": 0.25896705211841453, - "grad_norm": 0.8796296119689941, - "learning_rate": 1.9390540284064858e-05, - "loss": 0.3786, - "step": 2749 - }, - { - "epoch": 0.2590612562115824, - "grad_norm": 0.7485645413398743, - "learning_rate": 1.9390021078703262e-05, - "loss": 0.3524, - "step": 2750 - }, - { - "epoch": 0.25915546030475023, - "grad_norm": 1.0117424726486206, - "learning_rate": 1.938950165923466e-05, - "loss": 0.3258, - "step": 2751 - }, - { - "epoch": 0.2592496643979181, - "grad_norm": 0.7757304906845093, - "learning_rate": 1.9388982025670903e-05, - "loss": 0.3172, - "step": 2752 - }, - { - "epoch": 0.2593438684910859, - "grad_norm": 0.8561500310897827, - "learning_rate": 1.9388462178023834e-05, - "loss": 0.3562, - "step": 2753 - }, - { - "epoch": 0.2594380725842538, - "grad_norm": 0.7588501572608948, - "learning_rate": 1.9387942116305307e-05, - "loss": 0.3297, - "step": 2754 - }, - { - "epoch": 0.2595322766774216, - "grad_norm": 0.8772037625312805, - "learning_rate": 1.938742184052718e-05, - "loss": 0.4068, - "step": 2755 - }, - { - "epoch": 0.2596264807705895, - "grad_norm": 0.8602582812309265, - "learning_rate": 1.938690135070132e-05, - "loss": 0.3845, - "step": 2756 - }, - { - "epoch": 0.2597206848637573, - "grad_norm": 0.88422691822052, - "learning_rate": 1.938638064683959e-05, - "loss": 0.3799, - "step": 2757 - }, - { - "epoch": 0.25981488895692517, - "grad_norm": 0.8500288724899292, - "learning_rate": 1.9385859728953866e-05, - "loss": 0.3806, - "step": 2758 - }, - { - "epoch": 0.259909093050093, - "grad_norm": 0.837928831577301, - "learning_rate": 1.938533859705602e-05, - "loss": 0.3755, - "step": 2759 - }, - { - "epoch": 0.26000329714326087, - "grad_norm": 0.880570113658905, - "learning_rate": 1.9384817251157945e-05, - "loss": 0.4118, - "step": 2760 - }, - { - "epoch": 0.2600975012364287, - "grad_norm": 0.9217453598976135, - "learning_rate": 1.9384295691271523e-05, - "loss": 0.4329, - "step": 2761 - }, - { - "epoch": 0.26019170532959657, - "grad_norm": 0.8721567392349243, - "learning_rate": 1.9383773917408644e-05, - "loss": 0.3772, - "step": 2762 - }, - { - "epoch": 0.2602859094227644, - "grad_norm": 0.9112967848777771, - "learning_rate": 1.9383251929581208e-05, - "loss": 0.3699, - "step": 2763 - }, - { - "epoch": 0.26038011351593227, - "grad_norm": 0.8288027048110962, - "learning_rate": 1.9382729727801116e-05, - "loss": 0.3522, - "step": 2764 - }, - { - "epoch": 0.2604743176091001, - "grad_norm": 0.7674428224563599, - "learning_rate": 1.9382207312080275e-05, - "loss": 0.3699, - "step": 2765 - }, - { - "epoch": 0.26056852170226796, - "grad_norm": 0.8218756318092346, - "learning_rate": 1.9381684682430597e-05, - "loss": 0.4002, - "step": 2766 - }, - { - "epoch": 0.2606627257954358, - "grad_norm": 0.8563632965087891, - "learning_rate": 1.9381161838864e-05, - "loss": 0.3556, - "step": 2767 - }, - { - "epoch": 0.26075692988860366, - "grad_norm": 0.7240821123123169, - "learning_rate": 1.9380638781392406e-05, - "loss": 0.3441, - "step": 2768 - }, - { - "epoch": 0.2608511339817715, - "grad_norm": 0.9017511010169983, - "learning_rate": 1.9380115510027742e-05, - "loss": 0.3579, - "step": 2769 - }, - { - "epoch": 0.26094533807493936, - "grad_norm": 0.8759740591049194, - "learning_rate": 1.9379592024781932e-05, - "loss": 0.3554, - "step": 2770 - }, - { - "epoch": 0.2610395421681072, - "grad_norm": 0.848834753036499, - "learning_rate": 1.937906832566692e-05, - "loss": 0.3683, - "step": 2771 - }, - { - "epoch": 0.26113374626127506, - "grad_norm": 0.8701202273368835, - "learning_rate": 1.937854441269465e-05, - "loss": 0.4041, - "step": 2772 - }, - { - "epoch": 0.2612279503544429, - "grad_norm": 0.8468180298805237, - "learning_rate": 1.9378020285877056e-05, - "loss": 0.3281, - "step": 2773 - }, - { - "epoch": 0.26132215444761075, - "grad_norm": 0.7769880890846252, - "learning_rate": 1.93774959452261e-05, - "loss": 0.3327, - "step": 2774 - }, - { - "epoch": 0.2614163585407786, - "grad_norm": 0.8081656694412231, - "learning_rate": 1.9376971390753736e-05, - "loss": 0.3383, - "step": 2775 - }, - { - "epoch": 0.26151056263394645, - "grad_norm": 0.7840548157691956, - "learning_rate": 1.937644662247192e-05, - "loss": 0.364, - "step": 2776 - }, - { - "epoch": 0.2616047667271143, - "grad_norm": 0.7989134192466736, - "learning_rate": 1.937592164039262e-05, - "loss": 0.342, - "step": 2777 - }, - { - "epoch": 0.26169897082028215, - "grad_norm": 0.8289729356765747, - "learning_rate": 1.9375396444527807e-05, - "loss": 0.4254, - "step": 2778 - }, - { - "epoch": 0.26179317491345, - "grad_norm": 0.7800250053405762, - "learning_rate": 1.9374871034889457e-05, - "loss": 0.337, - "step": 2779 - }, - { - "epoch": 0.26188737900661785, - "grad_norm": 0.866980254650116, - "learning_rate": 1.9374345411489546e-05, - "loss": 0.3631, - "step": 2780 - }, - { - "epoch": 0.2619815830997857, - "grad_norm": 0.8509105443954468, - "learning_rate": 1.9373819574340064e-05, - "loss": 0.3452, - "step": 2781 - }, - { - "epoch": 0.26207578719295355, - "grad_norm": 0.771759033203125, - "learning_rate": 1.9373293523452996e-05, - "loss": 0.3488, - "step": 2782 - }, - { - "epoch": 0.2621699912861214, - "grad_norm": 0.9045215249061584, - "learning_rate": 1.937276725884034e-05, - "loss": 0.4015, - "step": 2783 - }, - { - "epoch": 0.26226419537928924, - "grad_norm": 0.9882833957672119, - "learning_rate": 1.93722407805141e-05, - "loss": 0.4366, - "step": 2784 - }, - { - "epoch": 0.2623583994724571, - "grad_norm": 0.7805891036987305, - "learning_rate": 1.937171408848627e-05, - "loss": 0.3357, - "step": 2785 - }, - { - "epoch": 0.26245260356562494, - "grad_norm": 0.8759221434593201, - "learning_rate": 1.937118718276887e-05, - "loss": 0.3699, - "step": 2786 - }, - { - "epoch": 0.2625468076587928, - "grad_norm": 0.8115664124488831, - "learning_rate": 1.9370660063373905e-05, - "loss": 0.3987, - "step": 2787 - }, - { - "epoch": 0.26264101175196064, - "grad_norm": 0.8264844417572021, - "learning_rate": 1.9370132730313403e-05, - "loss": 0.3385, - "step": 2788 - }, - { - "epoch": 0.2627352158451285, - "grad_norm": 0.8643694519996643, - "learning_rate": 1.9369605183599377e-05, - "loss": 0.38, - "step": 2789 - }, - { - "epoch": 0.26282941993829634, - "grad_norm": 0.7324055433273315, - "learning_rate": 1.936907742324387e-05, - "loss": 0.3248, - "step": 2790 - }, - { - "epoch": 0.2629236240314642, - "grad_norm": 0.928814172744751, - "learning_rate": 1.9368549449258903e-05, - "loss": 0.3749, - "step": 2791 - }, - { - "epoch": 0.26301782812463204, - "grad_norm": 0.8646256327629089, - "learning_rate": 1.9368021261656523e-05, - "loss": 0.3552, - "step": 2792 - }, - { - "epoch": 0.2631120322177999, - "grad_norm": 0.9403034448623657, - "learning_rate": 1.936749286044877e-05, - "loss": 0.3957, - "step": 2793 - }, - { - "epoch": 0.26320623631096773, - "grad_norm": 0.8048885464668274, - "learning_rate": 1.936696424564769e-05, - "loss": 0.3386, - "step": 2794 - }, - { - "epoch": 0.2633004404041356, - "grad_norm": 0.8479284644126892, - "learning_rate": 1.936643541726534e-05, - "loss": 0.3765, - "step": 2795 - }, - { - "epoch": 0.26339464449730343, - "grad_norm": 0.895535409450531, - "learning_rate": 1.936590637531378e-05, - "loss": 0.3849, - "step": 2796 - }, - { - "epoch": 0.2634888485904713, - "grad_norm": 0.7173005938529968, - "learning_rate": 1.9365377119805068e-05, - "loss": 0.3217, - "step": 2797 - }, - { - "epoch": 0.26358305268363913, - "grad_norm": 0.8052489161491394, - "learning_rate": 1.936484765075127e-05, - "loss": 0.3446, - "step": 2798 - }, - { - "epoch": 0.263677256776807, - "grad_norm": 0.8405939340591431, - "learning_rate": 1.9364317968164466e-05, - "loss": 0.3687, - "step": 2799 - }, - { - "epoch": 0.2637714608699748, - "grad_norm": 1.122497797012329, - "learning_rate": 1.936378807205673e-05, - "loss": 0.4123, - "step": 2800 - }, - { - "epoch": 0.2638656649631426, - "grad_norm": 0.8345810174942017, - "learning_rate": 1.9363257962440147e-05, - "loss": 0.4561, - "step": 2801 - }, - { - "epoch": 0.26395986905631047, - "grad_norm": 0.7875660061836243, - "learning_rate": 1.9362727639326798e-05, - "loss": 0.3285, - "step": 2802 - }, - { - "epoch": 0.2640540731494783, - "grad_norm": 0.7944983243942261, - "learning_rate": 1.936219710272878e-05, - "loss": 0.3591, - "step": 2803 - }, - { - "epoch": 0.26414827724264617, - "grad_norm": 0.899957537651062, - "learning_rate": 1.936166635265819e-05, - "loss": 0.4162, - "step": 2804 - }, - { - "epoch": 0.264242481335814, - "grad_norm": 0.8116116523742676, - "learning_rate": 1.936113538912713e-05, - "loss": 0.3344, - "step": 2805 - }, - { - "epoch": 0.26433668542898187, - "grad_norm": 0.8326045870780945, - "learning_rate": 1.9360604212147706e-05, - "loss": 0.3389, - "step": 2806 - }, - { - "epoch": 0.2644308895221497, - "grad_norm": 0.845786452293396, - "learning_rate": 1.9360072821732027e-05, - "loss": 0.422, - "step": 2807 - }, - { - "epoch": 0.26452509361531756, - "grad_norm": 0.7908915877342224, - "learning_rate": 1.935954121789221e-05, - "loss": 0.3638, - "step": 2808 - }, - { - "epoch": 0.2646192977084854, - "grad_norm": 0.8267839550971985, - "learning_rate": 1.9359009400640384e-05, - "loss": 0.3936, - "step": 2809 - }, - { - "epoch": 0.26471350180165326, - "grad_norm": 0.8255811333656311, - "learning_rate": 1.935847736998867e-05, - "loss": 0.3582, - "step": 2810 - }, - { - "epoch": 0.2648077058948211, - "grad_norm": 0.8436806797981262, - "learning_rate": 1.9357945125949194e-05, - "loss": 0.4031, - "step": 2811 - }, - { - "epoch": 0.26490190998798896, - "grad_norm": 0.8261404037475586, - "learning_rate": 1.9357412668534098e-05, - "loss": 0.3373, - "step": 2812 - }, - { - "epoch": 0.2649961140811568, - "grad_norm": 0.8110752105712891, - "learning_rate": 1.9356879997755525e-05, - "loss": 0.3454, - "step": 2813 - }, - { - "epoch": 0.26509031817432466, - "grad_norm": 0.7850409746170044, - "learning_rate": 1.9356347113625612e-05, - "loss": 0.3491, - "step": 2814 - }, - { - "epoch": 0.2651845222674925, - "grad_norm": 0.8864262700080872, - "learning_rate": 1.935581401615652e-05, - "loss": 0.398, - "step": 2815 - }, - { - "epoch": 0.26527872636066036, - "grad_norm": 0.94769686460495, - "learning_rate": 1.9355280705360395e-05, - "loss": 0.3932, - "step": 2816 - }, - { - "epoch": 0.2653729304538282, - "grad_norm": 0.9494755864143372, - "learning_rate": 1.9354747181249406e-05, - "loss": 0.3352, - "step": 2817 - }, - { - "epoch": 0.26546713454699605, - "grad_norm": 0.7967216968536377, - "learning_rate": 1.935421344383571e-05, - "loss": 0.356, - "step": 2818 - }, - { - "epoch": 0.2655613386401639, - "grad_norm": 0.8378462791442871, - "learning_rate": 1.9353679493131486e-05, - "loss": 0.3712, - "step": 2819 - }, - { - "epoch": 0.26565554273333175, - "grad_norm": 0.8824589252471924, - "learning_rate": 1.9353145329148898e-05, - "loss": 0.425, - "step": 2820 - }, - { - "epoch": 0.2657497468264996, - "grad_norm": 0.76907879114151, - "learning_rate": 1.9352610951900135e-05, - "loss": 0.3427, - "step": 2821 - }, - { - "epoch": 0.26584395091966745, - "grad_norm": 0.9133584499359131, - "learning_rate": 1.9352076361397376e-05, - "loss": 0.3723, - "step": 2822 - }, - { - "epoch": 0.2659381550128353, - "grad_norm": 0.8533929586410522, - "learning_rate": 1.9351541557652816e-05, - "loss": 0.3978, - "step": 2823 - }, - { - "epoch": 0.26603235910600315, - "grad_norm": 0.8981824517250061, - "learning_rate": 1.935100654067864e-05, - "loss": 0.3694, - "step": 2824 - }, - { - "epoch": 0.266126563199171, - "grad_norm": 0.9650942087173462, - "learning_rate": 1.9350471310487063e-05, - "loss": 0.409, - "step": 2825 - }, - { - "epoch": 0.26622076729233884, - "grad_norm": 0.8894109129905701, - "learning_rate": 1.9349935867090272e-05, - "loss": 0.3412, - "step": 2826 - }, - { - "epoch": 0.2663149713855067, - "grad_norm": 0.8993169665336609, - "learning_rate": 1.9349400210500482e-05, - "loss": 0.3701, - "step": 2827 - }, - { - "epoch": 0.26640917547867454, - "grad_norm": 0.9565675258636475, - "learning_rate": 1.9348864340729915e-05, - "loss": 0.4271, - "step": 2828 - }, - { - "epoch": 0.2665033795718424, - "grad_norm": 0.8573203682899475, - "learning_rate": 1.9348328257790777e-05, - "loss": 0.3429, - "step": 2829 - }, - { - "epoch": 0.26659758366501024, - "grad_norm": 0.8001130223274231, - "learning_rate": 1.93477919616953e-05, - "loss": 0.3578, - "step": 2830 - }, - { - "epoch": 0.2666917877581781, - "grad_norm": 1.0578571557998657, - "learning_rate": 1.9347255452455707e-05, - "loss": 0.4134, - "step": 2831 - }, - { - "epoch": 0.26678599185134594, - "grad_norm": 1.0042641162872314, - "learning_rate": 1.9346718730084238e-05, - "loss": 0.436, - "step": 2832 - }, - { - "epoch": 0.2668801959445138, - "grad_norm": 0.8931630253791809, - "learning_rate": 1.9346181794593123e-05, - "loss": 0.3758, - "step": 2833 - }, - { - "epoch": 0.26697440003768164, - "grad_norm": 0.885483980178833, - "learning_rate": 1.934564464599461e-05, - "loss": 0.3731, - "step": 2834 - }, - { - "epoch": 0.2670686041308495, - "grad_norm": 0.8488326668739319, - "learning_rate": 1.9345107284300945e-05, - "loss": 0.3707, - "step": 2835 - }, - { - "epoch": 0.26716280822401733, - "grad_norm": 0.7997558116912842, - "learning_rate": 1.9344569709524385e-05, - "loss": 0.3458, - "step": 2836 - }, - { - "epoch": 0.2672570123171852, - "grad_norm": 0.8410530090332031, - "learning_rate": 1.934403192167718e-05, - "loss": 0.3859, - "step": 2837 - }, - { - "epoch": 0.26735121641035303, - "grad_norm": 0.8481351137161255, - "learning_rate": 1.93434939207716e-05, - "loss": 0.3582, - "step": 2838 - }, - { - "epoch": 0.2674454205035209, - "grad_norm": 0.8808439373970032, - "learning_rate": 1.9342955706819905e-05, - "loss": 0.3709, - "step": 2839 - }, - { - "epoch": 0.26753962459668873, - "grad_norm": 0.8731141686439514, - "learning_rate": 1.9342417279834373e-05, - "loss": 0.3665, - "step": 2840 - }, - { - "epoch": 0.2676338286898566, - "grad_norm": 1.0484288930892944, - "learning_rate": 1.9341878639827277e-05, - "loss": 0.4142, - "step": 2841 - }, - { - "epoch": 0.2677280327830244, - "grad_norm": 0.8605663180351257, - "learning_rate": 1.93413397868109e-05, - "loss": 0.3377, - "step": 2842 - }, - { - "epoch": 0.2678222368761923, - "grad_norm": 0.8253269195556641, - "learning_rate": 1.9340800720797533e-05, - "loss": 0.381, - "step": 2843 - }, - { - "epoch": 0.2679164409693601, - "grad_norm": 0.8432108163833618, - "learning_rate": 1.934026144179946e-05, - "loss": 0.3714, - "step": 2844 - }, - { - "epoch": 0.268010645062528, - "grad_norm": 1.0049387216567993, - "learning_rate": 1.9339721949828982e-05, - "loss": 0.4052, - "step": 2845 - }, - { - "epoch": 0.2681048491556958, - "grad_norm": 0.911248505115509, - "learning_rate": 1.93391822448984e-05, - "loss": 0.4099, - "step": 2846 - }, - { - "epoch": 0.2681990532488637, - "grad_norm": 0.7077834010124207, - "learning_rate": 1.933864232702002e-05, - "loss": 0.3201, - "step": 2847 - }, - { - "epoch": 0.2682932573420315, - "grad_norm": 0.9433247447013855, - "learning_rate": 1.9338102196206155e-05, - "loss": 0.3602, - "step": 2848 - }, - { - "epoch": 0.26838746143519937, - "grad_norm": 0.7861822247505188, - "learning_rate": 1.9337561852469113e-05, - "loss": 0.3539, - "step": 2849 - }, - { - "epoch": 0.2684816655283672, - "grad_norm": 0.8620867729187012, - "learning_rate": 1.9337021295821224e-05, - "loss": 0.3702, - "step": 2850 - }, - { - "epoch": 0.26857586962153507, - "grad_norm": 0.7886176705360413, - "learning_rate": 1.9336480526274806e-05, - "loss": 0.3428, - "step": 2851 - }, - { - "epoch": 0.2686700737147029, - "grad_norm": 0.8866832852363586, - "learning_rate": 1.9335939543842195e-05, - "loss": 0.3175, - "step": 2852 - }, - { - "epoch": 0.26876427780787077, - "grad_norm": 0.9585645794868469, - "learning_rate": 1.9335398348535724e-05, - "loss": 0.4021, - "step": 2853 - }, - { - "epoch": 0.2688584819010386, - "grad_norm": 0.8415154218673706, - "learning_rate": 1.933485694036773e-05, - "loss": 0.3992, - "step": 2854 - }, - { - "epoch": 0.26895268599420646, - "grad_norm": 0.8004138469696045, - "learning_rate": 1.9334315319350567e-05, - "loss": 0.3325, - "step": 2855 - }, - { - "epoch": 0.2690468900873743, - "grad_norm": 0.8173907399177551, - "learning_rate": 1.9333773485496575e-05, - "loss": 0.3323, - "step": 2856 - }, - { - "epoch": 0.26914109418054216, - "grad_norm": 0.8906183242797852, - "learning_rate": 1.933323143881811e-05, - "loss": 0.3828, - "step": 2857 - }, - { - "epoch": 0.26923529827371, - "grad_norm": 0.8708525896072388, - "learning_rate": 1.933268917932754e-05, - "loss": 0.3806, - "step": 2858 - }, - { - "epoch": 0.26932950236687786, - "grad_norm": 0.8405883312225342, - "learning_rate": 1.9332146707037222e-05, - "loss": 0.3543, - "step": 2859 - }, - { - "epoch": 0.2694237064600457, - "grad_norm": 0.7336653470993042, - "learning_rate": 1.9331604021959526e-05, - "loss": 0.3285, - "step": 2860 - }, - { - "epoch": 0.26951791055321356, - "grad_norm": 0.8797584176063538, - "learning_rate": 1.9331061124106826e-05, - "loss": 0.3395, - "step": 2861 - }, - { - "epoch": 0.2696121146463814, - "grad_norm": 0.9453029632568359, - "learning_rate": 1.93305180134915e-05, - "loss": 0.4, - "step": 2862 - }, - { - "epoch": 0.26970631873954926, - "grad_norm": 1.0486997365951538, - "learning_rate": 1.9329974690125937e-05, - "loss": 0.3158, - "step": 2863 - }, - { - "epoch": 0.2698005228327171, - "grad_norm": 0.8365334272384644, - "learning_rate": 1.932943115402252e-05, - "loss": 0.3438, - "step": 2864 - }, - { - "epoch": 0.26989472692588495, - "grad_norm": 0.9265006184577942, - "learning_rate": 1.9328887405193645e-05, - "loss": 0.3936, - "step": 2865 - }, - { - "epoch": 0.2699889310190528, - "grad_norm": 0.8348186016082764, - "learning_rate": 1.932834344365171e-05, - "loss": 0.3479, - "step": 2866 - }, - { - "epoch": 0.27008313511222065, - "grad_norm": 0.8081981539726257, - "learning_rate": 1.9327799269409114e-05, - "loss": 0.3326, - "step": 2867 - }, - { - "epoch": 0.2701773392053885, - "grad_norm": 0.8048989176750183, - "learning_rate": 1.9327254882478272e-05, - "loss": 0.3186, - "step": 2868 - }, - { - "epoch": 0.27027154329855635, - "grad_norm": 0.8223410248756409, - "learning_rate": 1.9326710282871596e-05, - "loss": 0.3445, - "step": 2869 - }, - { - "epoch": 0.27036574739172414, - "grad_norm": 0.9238550066947937, - "learning_rate": 1.93261654706015e-05, - "loss": 0.3568, - "step": 2870 - }, - { - "epoch": 0.270459951484892, - "grad_norm": 1.181865930557251, - "learning_rate": 1.932562044568041e-05, - "loss": 0.3, - "step": 2871 - }, - { - "epoch": 0.27055415557805984, - "grad_norm": 0.8466827273368835, - "learning_rate": 1.9325075208120746e-05, - "loss": 0.367, - "step": 2872 - }, - { - "epoch": 0.2706483596712277, - "grad_norm": 0.9028671383857727, - "learning_rate": 1.9324529757934948e-05, - "loss": 0.3833, - "step": 2873 - }, - { - "epoch": 0.27074256376439554, - "grad_norm": 0.8522211313247681, - "learning_rate": 1.9323984095135454e-05, - "loss": 0.343, - "step": 2874 - }, - { - "epoch": 0.2708367678575634, - "grad_norm": 0.9431736469268799, - "learning_rate": 1.9323438219734698e-05, - "loss": 0.3687, - "step": 2875 - }, - { - "epoch": 0.27093097195073124, - "grad_norm": 0.8010151982307434, - "learning_rate": 1.9322892131745135e-05, - "loss": 0.3344, - "step": 2876 - }, - { - "epoch": 0.2710251760438991, - "grad_norm": 0.8623033761978149, - "learning_rate": 1.9322345831179214e-05, - "loss": 0.37, - "step": 2877 - }, - { - "epoch": 0.27111938013706693, - "grad_norm": 0.7385076880455017, - "learning_rate": 1.932179931804939e-05, - "loss": 0.3575, - "step": 2878 - }, - { - "epoch": 0.2712135842302348, - "grad_norm": 0.8643718957901001, - "learning_rate": 1.9321252592368124e-05, - "loss": 0.3725, - "step": 2879 - }, - { - "epoch": 0.27130778832340263, - "grad_norm": 0.8411838412284851, - "learning_rate": 1.9320705654147882e-05, - "loss": 0.3526, - "step": 2880 - }, - { - "epoch": 0.2714019924165705, - "grad_norm": 0.9364979863166809, - "learning_rate": 1.9320158503401137e-05, - "loss": 0.3791, - "step": 2881 - }, - { - "epoch": 0.27149619650973833, - "grad_norm": 0.8383809924125671, - "learning_rate": 1.9319611140140365e-05, - "loss": 0.3696, - "step": 2882 - }, - { - "epoch": 0.2715904006029062, - "grad_norm": 0.8176441788673401, - "learning_rate": 1.9319063564378048e-05, - "loss": 0.3678, - "step": 2883 - }, - { - "epoch": 0.27168460469607403, - "grad_norm": 0.858260452747345, - "learning_rate": 1.9318515776126666e-05, - "loss": 0.35, - "step": 2884 - }, - { - "epoch": 0.2717788087892419, - "grad_norm": 0.8059695363044739, - "learning_rate": 1.9317967775398717e-05, - "loss": 0.4062, - "step": 2885 - }, - { - "epoch": 0.2718730128824097, - "grad_norm": 0.884614884853363, - "learning_rate": 1.9317419562206688e-05, - "loss": 0.4595, - "step": 2886 - }, - { - "epoch": 0.2719672169755776, - "grad_norm": 0.9380375146865845, - "learning_rate": 1.9316871136563085e-05, - "loss": 0.3154, - "step": 2887 - }, - { - "epoch": 0.2720614210687454, - "grad_norm": 0.7669954299926758, - "learning_rate": 1.931632249848041e-05, - "loss": 0.3645, - "step": 2888 - }, - { - "epoch": 0.2721556251619133, - "grad_norm": 0.7636342644691467, - "learning_rate": 1.9315773647971177e-05, - "loss": 0.3302, - "step": 2889 - }, - { - "epoch": 0.2722498292550811, - "grad_norm": 0.966870903968811, - "learning_rate": 1.9315224585047896e-05, - "loss": 0.3283, - "step": 2890 - }, - { - "epoch": 0.27234403334824897, - "grad_norm": 0.9514498710632324, - "learning_rate": 1.931467530972309e-05, - "loss": 0.4385, - "step": 2891 - }, - { - "epoch": 0.2724382374414168, - "grad_norm": 0.8300036787986755, - "learning_rate": 1.931412582200928e-05, - "loss": 0.3483, - "step": 2892 - }, - { - "epoch": 0.27253244153458467, - "grad_norm": 0.9490203857421875, - "learning_rate": 1.9313576121918994e-05, - "loss": 0.2998, - "step": 2893 - }, - { - "epoch": 0.2726266456277525, - "grad_norm": 0.8159230947494507, - "learning_rate": 1.931302620946477e-05, - "loss": 0.3244, - "step": 2894 - }, - { - "epoch": 0.27272084972092037, - "grad_norm": 0.8888617157936096, - "learning_rate": 1.931247608465915e-05, - "loss": 0.3554, - "step": 2895 - }, - { - "epoch": 0.2728150538140882, - "grad_norm": 0.8638678789138794, - "learning_rate": 1.931192574751467e-05, - "loss": 0.3664, - "step": 2896 - }, - { - "epoch": 0.27290925790725606, - "grad_norm": 0.9164793491363525, - "learning_rate": 1.931137519804388e-05, - "loss": 0.3798, - "step": 2897 - }, - { - "epoch": 0.2730034620004239, - "grad_norm": 0.9217027425765991, - "learning_rate": 1.931082443625934e-05, - "loss": 0.3687, - "step": 2898 - }, - { - "epoch": 0.27309766609359176, - "grad_norm": 0.9147371053695679, - "learning_rate": 1.9310273462173597e-05, - "loss": 0.3423, - "step": 2899 - }, - { - "epoch": 0.2731918701867596, - "grad_norm": 0.9040961265563965, - "learning_rate": 1.9309722275799226e-05, - "loss": 0.3966, - "step": 2900 - }, - { - "epoch": 0.27328607427992746, - "grad_norm": 0.969359815120697, - "learning_rate": 1.9309170877148786e-05, - "loss": 0.3762, - "step": 2901 - }, - { - "epoch": 0.2733802783730953, - "grad_norm": 0.9515261650085449, - "learning_rate": 1.9308619266234855e-05, - "loss": 0.3824, - "step": 2902 - }, - { - "epoch": 0.27347448246626316, - "grad_norm": 0.7625579237937927, - "learning_rate": 1.9308067443070008e-05, - "loss": 0.315, - "step": 2903 - }, - { - "epoch": 0.273568686559431, - "grad_norm": 0.8695298433303833, - "learning_rate": 1.9307515407666826e-05, - "loss": 0.3496, - "step": 2904 - }, - { - "epoch": 0.27366289065259886, - "grad_norm": 0.7960308790206909, - "learning_rate": 1.9306963160037902e-05, - "loss": 0.3458, - "step": 2905 - }, - { - "epoch": 0.2737570947457667, - "grad_norm": 0.9033235907554626, - "learning_rate": 1.9306410700195824e-05, - "loss": 0.4401, - "step": 2906 - }, - { - "epoch": 0.27385129883893455, - "grad_norm": 2.428537368774414, - "learning_rate": 1.9305858028153186e-05, - "loss": 0.4223, - "step": 2907 - }, - { - "epoch": 0.2739455029321024, - "grad_norm": 0.7999145984649658, - "learning_rate": 1.9305305143922597e-05, - "loss": 0.3605, - "step": 2908 - }, - { - "epoch": 0.27403970702527025, - "grad_norm": 0.9468194842338562, - "learning_rate": 1.930475204751666e-05, - "loss": 0.3739, - "step": 2909 - }, - { - "epoch": 0.2741339111184381, - "grad_norm": 0.8207023739814758, - "learning_rate": 1.930419873894798e-05, - "loss": 0.3578, - "step": 2910 - }, - { - "epoch": 0.27422811521160595, - "grad_norm": 0.9839059710502625, - "learning_rate": 1.9303645218229185e-05, - "loss": 0.398, - "step": 2911 - }, - { - "epoch": 0.2743223193047738, - "grad_norm": 0.786546528339386, - "learning_rate": 1.930309148537289e-05, - "loss": 0.3149, - "step": 2912 - }, - { - "epoch": 0.27441652339794165, - "grad_norm": 0.9546533823013306, - "learning_rate": 1.930253754039172e-05, - "loss": 0.3486, - "step": 2913 - }, - { - "epoch": 0.2745107274911095, - "grad_norm": 0.8896610736846924, - "learning_rate": 1.9301983383298312e-05, - "loss": 0.3759, - "step": 2914 - }, - { - "epoch": 0.27460493158427735, - "grad_norm": 0.7661377191543579, - "learning_rate": 1.930142901410529e-05, - "loss": 0.3208, - "step": 2915 - }, - { - "epoch": 0.2746991356774452, - "grad_norm": 0.7883954644203186, - "learning_rate": 1.9300874432825307e-05, - "loss": 0.3395, - "step": 2916 - }, - { - "epoch": 0.27479333977061304, - "grad_norm": 0.8070877194404602, - "learning_rate": 1.9300319639471e-05, - "loss": 0.3499, - "step": 2917 - }, - { - "epoch": 0.2748875438637809, - "grad_norm": 0.7798469066619873, - "learning_rate": 1.9299764634055026e-05, - "loss": 0.3389, - "step": 2918 - }, - { - "epoch": 0.27498174795694874, - "grad_norm": 1.0146276950836182, - "learning_rate": 1.929920941659003e-05, - "loss": 0.3387, - "step": 2919 - }, - { - "epoch": 0.2750759520501166, - "grad_norm": 0.813177764415741, - "learning_rate": 1.9298653987088683e-05, - "loss": 0.3374, - "step": 2920 - }, - { - "epoch": 0.27517015614328444, - "grad_norm": 0.921242356300354, - "learning_rate": 1.9298098345563644e-05, - "loss": 0.3477, - "step": 2921 - }, - { - "epoch": 0.2752643602364523, - "grad_norm": 0.9590490460395813, - "learning_rate": 1.929754249202758e-05, - "loss": 0.3914, - "step": 2922 - }, - { - "epoch": 0.27535856432962014, - "grad_norm": 0.881549060344696, - "learning_rate": 1.9296986426493168e-05, - "loss": 0.3492, - "step": 2923 - }, - { - "epoch": 0.275452768422788, - "grad_norm": 0.8650107979774475, - "learning_rate": 1.9296430148973088e-05, - "loss": 0.3648, - "step": 2924 - }, - { - "epoch": 0.27554697251595583, - "grad_norm": 0.8315461874008179, - "learning_rate": 1.9295873659480024e-05, - "loss": 0.367, - "step": 2925 - }, - { - "epoch": 0.2756411766091237, - "grad_norm": 0.8754969835281372, - "learning_rate": 1.9295316958026666e-05, - "loss": 0.3452, - "step": 2926 - }, - { - "epoch": 0.27573538070229153, - "grad_norm": 0.8329753875732422, - "learning_rate": 1.9294760044625705e-05, - "loss": 0.3355, - "step": 2927 - }, - { - "epoch": 0.2758295847954594, - "grad_norm": 0.815252423286438, - "learning_rate": 1.929420291928984e-05, - "loss": 0.3803, - "step": 2928 - }, - { - "epoch": 0.27592378888862723, - "grad_norm": 0.7977908849716187, - "learning_rate": 1.9293645582031775e-05, - "loss": 0.3465, - "step": 2929 - }, - { - "epoch": 0.2760179929817951, - "grad_norm": 0.9232553839683533, - "learning_rate": 1.9293088032864218e-05, - "loss": 0.3313, - "step": 2930 - }, - { - "epoch": 0.27611219707496293, - "grad_norm": 0.9138956069946289, - "learning_rate": 1.929253027179988e-05, - "loss": 0.3818, - "step": 2931 - }, - { - "epoch": 0.2762064011681308, - "grad_norm": 0.8025838732719421, - "learning_rate": 1.9291972298851483e-05, - "loss": 0.3597, - "step": 2932 - }, - { - "epoch": 0.2763006052612986, - "grad_norm": 0.8352190852165222, - "learning_rate": 1.9291414114031744e-05, - "loss": 0.3556, - "step": 2933 - }, - { - "epoch": 0.2763948093544665, - "grad_norm": 0.9157130122184753, - "learning_rate": 1.9290855717353394e-05, - "loss": 0.3827, - "step": 2934 - }, - { - "epoch": 0.2764890134476343, - "grad_norm": 0.8056769371032715, - "learning_rate": 1.929029710882917e-05, - "loss": 0.3371, - "step": 2935 - }, - { - "epoch": 0.2765832175408022, - "grad_norm": 1.0625853538513184, - "learning_rate": 1.92897382884718e-05, - "loss": 0.3627, - "step": 2936 - }, - { - "epoch": 0.27667742163397, - "grad_norm": 0.9039706587791443, - "learning_rate": 1.9289179256294034e-05, - "loss": 0.3833, - "step": 2937 - }, - { - "epoch": 0.27677162572713787, - "grad_norm": 0.972366988658905, - "learning_rate": 1.928862001230861e-05, - "loss": 0.403, - "step": 2938 - }, - { - "epoch": 0.2768658298203057, - "grad_norm": 0.8596249222755432, - "learning_rate": 1.9288060556528287e-05, - "loss": 0.3219, - "step": 2939 - }, - { - "epoch": 0.2769600339134735, - "grad_norm": 0.8336518406867981, - "learning_rate": 1.928750088896582e-05, - "loss": 0.3962, - "step": 2940 - }, - { - "epoch": 0.27705423800664136, - "grad_norm": 0.8468518853187561, - "learning_rate": 1.9286941009633965e-05, - "loss": 0.3824, - "step": 2941 - }, - { - "epoch": 0.2771484420998092, - "grad_norm": 0.8352329730987549, - "learning_rate": 1.9286380918545497e-05, - "loss": 0.3195, - "step": 2942 - }, - { - "epoch": 0.27724264619297706, - "grad_norm": 0.8433838486671448, - "learning_rate": 1.928582061571318e-05, - "loss": 0.3407, - "step": 2943 - }, - { - "epoch": 0.2773368502861449, - "grad_norm": 0.8705379366874695, - "learning_rate": 1.9285260101149795e-05, - "loss": 0.38, - "step": 2944 - }, - { - "epoch": 0.27743105437931276, - "grad_norm": 0.9752731323242188, - "learning_rate": 1.9284699374868118e-05, - "loss": 0.3647, - "step": 2945 - }, - { - "epoch": 0.2775252584724806, - "grad_norm": 0.9700624346733093, - "learning_rate": 1.9284138436880934e-05, - "loss": 0.3757, - "step": 2946 - }, - { - "epoch": 0.27761946256564846, - "grad_norm": 0.9997284412384033, - "learning_rate": 1.928357728720104e-05, - "loss": 0.3503, - "step": 2947 - }, - { - "epoch": 0.2777136666588163, - "grad_norm": 0.8637526035308838, - "learning_rate": 1.928301592584122e-05, - "loss": 0.3199, - "step": 2948 - }, - { - "epoch": 0.27780787075198415, - "grad_norm": 0.8112731575965881, - "learning_rate": 1.928245435281429e-05, - "loss": 0.3754, - "step": 2949 - }, - { - "epoch": 0.277902074845152, - "grad_norm": 0.7939879298210144, - "learning_rate": 1.928189256813304e-05, - "loss": 0.3626, - "step": 2950 - }, - { - "epoch": 0.27799627893831985, - "grad_norm": 0.8063762187957764, - "learning_rate": 1.9281330571810282e-05, - "loss": 0.4303, - "step": 2951 - }, - { - "epoch": 0.2780904830314877, - "grad_norm": 0.795852541923523, - "learning_rate": 1.9280768363858834e-05, - "loss": 0.3721, - "step": 2952 - }, - { - "epoch": 0.27818468712465555, - "grad_norm": 0.9110891819000244, - "learning_rate": 1.9280205944291516e-05, - "loss": 0.3393, - "step": 2953 - }, - { - "epoch": 0.2782788912178234, - "grad_norm": 0.8853954076766968, - "learning_rate": 1.927964331312115e-05, - "loss": 0.3613, - "step": 2954 - }, - { - "epoch": 0.27837309531099125, - "grad_norm": 4.366802215576172, - "learning_rate": 1.9279080470360565e-05, - "loss": 0.3407, - "step": 2955 - }, - { - "epoch": 0.2784672994041591, - "grad_norm": 0.79060959815979, - "learning_rate": 1.92785174160226e-05, - "loss": 0.3472, - "step": 2956 - }, - { - "epoch": 0.27856150349732695, - "grad_norm": 0.9146413803100586, - "learning_rate": 1.927795415012008e-05, - "loss": 0.3651, - "step": 2957 - }, - { - "epoch": 0.2786557075904948, - "grad_norm": 0.9158467054367065, - "learning_rate": 1.927739067266586e-05, - "loss": 0.4311, - "step": 2958 - }, - { - "epoch": 0.27874991168366264, - "grad_norm": 0.8566685318946838, - "learning_rate": 1.9276826983672788e-05, - "loss": 0.3735, - "step": 2959 - }, - { - "epoch": 0.2788441157768305, - "grad_norm": 0.9517194032669067, - "learning_rate": 1.9276263083153708e-05, - "loss": 0.371, - "step": 2960 - }, - { - "epoch": 0.27893831986999834, - "grad_norm": 1.017311453819275, - "learning_rate": 1.927569897112149e-05, - "loss": 0.386, - "step": 2961 - }, - { - "epoch": 0.2790325239631662, - "grad_norm": 0.8881323337554932, - "learning_rate": 1.9275134647588985e-05, - "loss": 0.3573, - "step": 2962 - }, - { - "epoch": 0.27912672805633404, - "grad_norm": 0.768549919128418, - "learning_rate": 1.9274570112569067e-05, - "loss": 0.3186, - "step": 2963 - }, - { - "epoch": 0.2792209321495019, - "grad_norm": 0.883156955242157, - "learning_rate": 1.9274005366074608e-05, - "loss": 0.3517, - "step": 2964 - }, - { - "epoch": 0.27931513624266974, - "grad_norm": 0.833206832408905, - "learning_rate": 1.9273440408118486e-05, - "loss": 0.36, - "step": 2965 - }, - { - "epoch": 0.2794093403358376, - "grad_norm": 0.8506464958190918, - "learning_rate": 1.9272875238713578e-05, - "loss": 0.3828, - "step": 2966 - }, - { - "epoch": 0.27950354442900543, - "grad_norm": 0.8978786468505859, - "learning_rate": 1.9272309857872777e-05, - "loss": 0.3208, - "step": 2967 - }, - { - "epoch": 0.2795977485221733, - "grad_norm": 0.9665808081626892, - "learning_rate": 1.927174426560897e-05, - "loss": 0.3457, - "step": 2968 - }, - { - "epoch": 0.27969195261534113, - "grad_norm": 0.8978529572486877, - "learning_rate": 1.927117846193505e-05, - "loss": 0.359, - "step": 2969 - }, - { - "epoch": 0.279786156708509, - "grad_norm": 0.8304259777069092, - "learning_rate": 1.927061244686393e-05, - "loss": 0.3223, - "step": 2970 - }, - { - "epoch": 0.27988036080167683, - "grad_norm": 0.879393994808197, - "learning_rate": 1.9270046220408506e-05, - "loss": 0.314, - "step": 2971 - }, - { - "epoch": 0.2799745648948447, - "grad_norm": 0.7944085597991943, - "learning_rate": 1.926947978258169e-05, - "loss": 0.3264, - "step": 2972 - }, - { - "epoch": 0.28006876898801253, - "grad_norm": 0.7613434791564941, - "learning_rate": 1.92689131333964e-05, - "loss": 0.3351, - "step": 2973 - }, - { - "epoch": 0.2801629730811804, - "grad_norm": 1.0007861852645874, - "learning_rate": 1.9268346272865558e-05, - "loss": 0.364, - "step": 2974 - }, - { - "epoch": 0.2802571771743482, - "grad_norm": 1.1431987285614014, - "learning_rate": 1.9267779201002085e-05, - "loss": 0.4041, - "step": 2975 - }, - { - "epoch": 0.2803513812675161, - "grad_norm": 1.0769010782241821, - "learning_rate": 1.9267211917818916e-05, - "loss": 0.4118, - "step": 2976 - }, - { - "epoch": 0.2804455853606839, - "grad_norm": 0.7843855023384094, - "learning_rate": 1.926664442332898e-05, - "loss": 0.3406, - "step": 2977 - }, - { - "epoch": 0.2805397894538518, - "grad_norm": 2.05409574508667, - "learning_rate": 1.9266076717545224e-05, - "loss": 0.3654, - "step": 2978 - }, - { - "epoch": 0.2806339935470196, - "grad_norm": 1.0190244913101196, - "learning_rate": 1.9265508800480588e-05, - "loss": 0.3892, - "step": 2979 - }, - { - "epoch": 0.28072819764018747, - "grad_norm": 0.9311212301254272, - "learning_rate": 1.9264940672148018e-05, - "loss": 0.3977, - "step": 2980 - }, - { - "epoch": 0.2808224017333553, - "grad_norm": 1.1254518032073975, - "learning_rate": 1.9264372332560475e-05, - "loss": 0.3527, - "step": 2981 - }, - { - "epoch": 0.28091660582652317, - "grad_norm": 0.9334866404533386, - "learning_rate": 1.9263803781730917e-05, - "loss": 0.3648, - "step": 2982 - }, - { - "epoch": 0.281010809919691, - "grad_norm": 0.928142786026001, - "learning_rate": 1.92632350196723e-05, - "loss": 0.4029, - "step": 2983 - }, - { - "epoch": 0.28110501401285887, - "grad_norm": 0.8772962689399719, - "learning_rate": 1.9262666046397603e-05, - "loss": 0.3563, - "step": 2984 - }, - { - "epoch": 0.2811992181060267, - "grad_norm": 0.959918737411499, - "learning_rate": 1.9262096861919797e-05, - "loss": 0.3952, - "step": 2985 - }, - { - "epoch": 0.28129342219919456, - "grad_norm": 0.8838319778442383, - "learning_rate": 1.9261527466251856e-05, - "loss": 0.3789, - "step": 2986 - }, - { - "epoch": 0.2813876262923624, - "grad_norm": 0.8237535357475281, - "learning_rate": 1.9260957859406763e-05, - "loss": 0.3433, - "step": 2987 - }, - { - "epoch": 0.28148183038553026, - "grad_norm": 0.7726234793663025, - "learning_rate": 1.9260388041397512e-05, - "loss": 0.3504, - "step": 2988 - }, - { - "epoch": 0.2815760344786981, - "grad_norm": 1.0417954921722412, - "learning_rate": 1.9259818012237092e-05, - "loss": 0.388, - "step": 2989 - }, - { - "epoch": 0.28167023857186596, - "grad_norm": 0.7924198508262634, - "learning_rate": 1.92592477719385e-05, - "loss": 0.3674, - "step": 2990 - }, - { - "epoch": 0.2817644426650338, - "grad_norm": 0.8858199119567871, - "learning_rate": 1.925867732051474e-05, - "loss": 0.3828, - "step": 2991 - }, - { - "epoch": 0.28185864675820166, - "grad_norm": 0.9502096176147461, - "learning_rate": 1.9258106657978816e-05, - "loss": 0.3831, - "step": 2992 - }, - { - "epoch": 0.2819528508513695, - "grad_norm": 0.7754489779472351, - "learning_rate": 1.9257535784343743e-05, - "loss": 0.3135, - "step": 2993 - }, - { - "epoch": 0.28204705494453736, - "grad_norm": 1.1764442920684814, - "learning_rate": 1.925696469962254e-05, - "loss": 0.3802, - "step": 2994 - }, - { - "epoch": 0.2821412590377052, - "grad_norm": 1.030421495437622, - "learning_rate": 1.925639340382822e-05, - "loss": 0.4167, - "step": 2995 - }, - { - "epoch": 0.28223546313087305, - "grad_norm": 1.0301690101623535, - "learning_rate": 1.925582189697382e-05, - "loss": 0.3447, - "step": 2996 - }, - { - "epoch": 0.2823296672240409, - "grad_norm": 0.8555263876914978, - "learning_rate": 1.9255250179072365e-05, - "loss": 0.3645, - "step": 2997 - }, - { - "epoch": 0.28242387131720875, - "grad_norm": 1.2720202207565308, - "learning_rate": 1.9254678250136893e-05, - "loss": 0.4207, - "step": 2998 - }, - { - "epoch": 0.2825180754103766, - "grad_norm": 0.7826864123344421, - "learning_rate": 1.9254106110180442e-05, - "loss": 0.3546, - "step": 2999 - }, - { - "epoch": 0.28261227950354445, - "grad_norm": 0.9047046899795532, - "learning_rate": 1.9253533759216063e-05, - "loss": 0.3881, - "step": 3000 - }, - { - "epoch": 0.2827064835967123, - "grad_norm": 0.9287173748016357, - "learning_rate": 1.9252961197256802e-05, - "loss": 0.4003, - "step": 3001 - }, - { - "epoch": 0.28280068768988015, - "grad_norm": 0.8424138426780701, - "learning_rate": 1.9252388424315716e-05, - "loss": 0.3601, - "step": 3002 - }, - { - "epoch": 0.282894891783048, - "grad_norm": 0.8370364308357239, - "learning_rate": 1.9251815440405862e-05, - "loss": 0.354, - "step": 3003 - }, - { - "epoch": 0.28298909587621585, - "grad_norm": 1.424588680267334, - "learning_rate": 1.925124224554031e-05, - "loss": 0.3957, - "step": 3004 - }, - { - "epoch": 0.2830832999693837, - "grad_norm": 0.8218227028846741, - "learning_rate": 1.9250668839732127e-05, - "loss": 0.3777, - "step": 3005 - }, - { - "epoch": 0.28317750406255154, - "grad_norm": 0.9550889730453491, - "learning_rate": 1.925009522299439e-05, - "loss": 0.372, - "step": 3006 - }, - { - "epoch": 0.2832717081557194, - "grad_norm": 1.0440438985824585, - "learning_rate": 1.9249521395340177e-05, - "loss": 0.3646, - "step": 3007 - }, - { - "epoch": 0.28336591224888724, - "grad_norm": 0.9570475220680237, - "learning_rate": 1.924894735678257e-05, - "loss": 0.368, - "step": 3008 - }, - { - "epoch": 0.28346011634205504, - "grad_norm": 0.8169949650764465, - "learning_rate": 1.9248373107334656e-05, - "loss": 0.3582, - "step": 3009 - }, - { - "epoch": 0.2835543204352229, - "grad_norm": 1.0188626050949097, - "learning_rate": 1.9247798647009536e-05, - "loss": 0.3905, - "step": 3010 - }, - { - "epoch": 0.28364852452839073, - "grad_norm": 0.7640846371650696, - "learning_rate": 1.9247223975820303e-05, - "loss": 0.3535, - "step": 3011 - }, - { - "epoch": 0.2837427286215586, - "grad_norm": 0.8944517970085144, - "learning_rate": 1.9246649093780063e-05, - "loss": 0.341, - "step": 3012 - }, - { - "epoch": 0.28383693271472643, - "grad_norm": 0.8176144361495972, - "learning_rate": 1.9246074000901925e-05, - "loss": 0.3574, - "step": 3013 - }, - { - "epoch": 0.2839311368078943, - "grad_norm": 1.0871111154556274, - "learning_rate": 1.9245498697198997e-05, - "loss": 0.4363, - "step": 3014 - }, - { - "epoch": 0.28402534090106213, - "grad_norm": 0.8974987864494324, - "learning_rate": 1.9244923182684406e-05, - "loss": 0.3993, - "step": 3015 - }, - { - "epoch": 0.28411954499423, - "grad_norm": 0.8624328374862671, - "learning_rate": 1.9244347457371266e-05, - "loss": 0.3661, - "step": 3016 - }, - { - "epoch": 0.2842137490873978, - "grad_norm": 0.8629999160766602, - "learning_rate": 1.9243771521272706e-05, - "loss": 0.4342, - "step": 3017 - }, - { - "epoch": 0.2843079531805657, - "grad_norm": 0.9005641341209412, - "learning_rate": 1.924319537440186e-05, - "loss": 0.4474, - "step": 3018 - }, - { - "epoch": 0.2844021572737335, - "grad_norm": 0.9175708293914795, - "learning_rate": 1.9242619016771865e-05, - "loss": 0.3955, - "step": 3019 - }, - { - "epoch": 0.2844963613669014, - "grad_norm": 0.8099044561386108, - "learning_rate": 1.9242042448395862e-05, - "loss": 0.3283, - "step": 3020 - }, - { - "epoch": 0.2845905654600692, - "grad_norm": 0.947959303855896, - "learning_rate": 1.9241465669286996e-05, - "loss": 0.4325, - "step": 3021 - }, - { - "epoch": 0.28468476955323707, - "grad_norm": 0.8129771947860718, - "learning_rate": 1.9240888679458424e-05, - "loss": 0.3597, - "step": 3022 - }, - { - "epoch": 0.2847789736464049, - "grad_norm": 0.9085898399353027, - "learning_rate": 1.92403114789233e-05, - "loss": 0.3587, - "step": 3023 - }, - { - "epoch": 0.28487317773957277, - "grad_norm": 0.9046205282211304, - "learning_rate": 1.923973406769478e-05, - "loss": 0.4014, - "step": 3024 - }, - { - "epoch": 0.2849673818327406, - "grad_norm": 0.8786502480506897, - "learning_rate": 1.9239156445786037e-05, - "loss": 0.3593, - "step": 3025 - }, - { - "epoch": 0.28506158592590847, - "grad_norm": 0.8941583037376404, - "learning_rate": 1.9238578613210236e-05, - "loss": 0.371, - "step": 3026 - }, - { - "epoch": 0.2851557900190763, - "grad_norm": 0.9228273034095764, - "learning_rate": 1.9238000569980553e-05, - "loss": 0.4221, - "step": 3027 - }, - { - "epoch": 0.28524999411224417, - "grad_norm": 0.8081850409507751, - "learning_rate": 1.9237422316110175e-05, - "loss": 0.3951, - "step": 3028 - }, - { - "epoch": 0.285344198205412, - "grad_norm": 0.8693435192108154, - "learning_rate": 1.9236843851612278e-05, - "loss": 0.3937, - "step": 3029 - }, - { - "epoch": 0.28543840229857986, - "grad_norm": 1.069024920463562, - "learning_rate": 1.923626517650006e-05, - "loss": 0.3762, - "step": 3030 - }, - { - "epoch": 0.2855326063917477, - "grad_norm": 0.9611799120903015, - "learning_rate": 1.923568629078671e-05, - "loss": 0.3533, - "step": 3031 - }, - { - "epoch": 0.28562681048491556, - "grad_norm": 0.8414198756217957, - "learning_rate": 1.923510719448543e-05, - "loss": 0.3685, - "step": 3032 - }, - { - "epoch": 0.2857210145780834, - "grad_norm": 0.8800758719444275, - "learning_rate": 1.923452788760942e-05, - "loss": 0.3708, - "step": 3033 - }, - { - "epoch": 0.28581521867125126, - "grad_norm": 0.760816752910614, - "learning_rate": 1.9233948370171894e-05, - "loss": 0.3609, - "step": 3034 - }, - { - "epoch": 0.2859094227644191, - "grad_norm": 0.9222679734230042, - "learning_rate": 1.923336864218607e-05, - "loss": 0.3904, - "step": 3035 - }, - { - "epoch": 0.28600362685758696, - "grad_norm": 0.8487080931663513, - "learning_rate": 1.9232788703665157e-05, - "loss": 0.3236, - "step": 3036 - }, - { - "epoch": 0.2860978309507548, - "grad_norm": 0.9020145535469055, - "learning_rate": 1.9232208554622382e-05, - "loss": 0.4112, - "step": 3037 - }, - { - "epoch": 0.28619203504392265, - "grad_norm": 0.8178302049636841, - "learning_rate": 1.9231628195070973e-05, - "loss": 0.3394, - "step": 3038 - }, - { - "epoch": 0.2862862391370905, - "grad_norm": 0.7846415042877197, - "learning_rate": 1.923104762502417e-05, - "loss": 0.3647, - "step": 3039 - }, - { - "epoch": 0.28638044323025835, - "grad_norm": 0.8704529404640198, - "learning_rate": 1.92304668444952e-05, - "loss": 0.3605, - "step": 3040 - }, - { - "epoch": 0.2864746473234262, - "grad_norm": 0.9248875379562378, - "learning_rate": 1.9229885853497312e-05, - "loss": 0.3989, - "step": 3041 - }, - { - "epoch": 0.28656885141659405, - "grad_norm": 0.8046138882637024, - "learning_rate": 1.9229304652043754e-05, - "loss": 0.402, - "step": 3042 - }, - { - "epoch": 0.2866630555097619, - "grad_norm": 0.869045615196228, - "learning_rate": 1.9228723240147773e-05, - "loss": 0.3706, - "step": 3043 - }, - { - "epoch": 0.28675725960292975, - "grad_norm": 0.7658939957618713, - "learning_rate": 1.9228141617822632e-05, - "loss": 0.338, - "step": 3044 - }, - { - "epoch": 0.2868514636960976, - "grad_norm": 0.7998064160346985, - "learning_rate": 1.922755978508159e-05, - "loss": 0.3526, - "step": 3045 - }, - { - "epoch": 0.28694566778926545, - "grad_norm": 0.8407435417175293, - "learning_rate": 1.9226977741937915e-05, - "loss": 0.3784, - "step": 3046 - }, - { - "epoch": 0.2870398718824333, - "grad_norm": 2.0962464809417725, - "learning_rate": 1.9226395488404875e-05, - "loss": 0.3773, - "step": 3047 - }, - { - "epoch": 0.28713407597560114, - "grad_norm": 0.9090295433998108, - "learning_rate": 1.9225813024495753e-05, - "loss": 0.3856, - "step": 3048 - }, - { - "epoch": 0.287228280068769, - "grad_norm": 0.8656986355781555, - "learning_rate": 1.9225230350223826e-05, - "loss": 0.3295, - "step": 3049 - }, - { - "epoch": 0.28732248416193684, - "grad_norm": 0.7657508850097656, - "learning_rate": 1.9224647465602374e-05, - "loss": 0.3147, - "step": 3050 - }, - { - "epoch": 0.2874166882551047, - "grad_norm": 0.8553867340087891, - "learning_rate": 1.92240643706447e-05, - "loss": 0.3496, - "step": 3051 - }, - { - "epoch": 0.28751089234827254, - "grad_norm": 0.8059185743331909, - "learning_rate": 1.922348106536409e-05, - "loss": 0.315, - "step": 3052 - }, - { - "epoch": 0.2876050964414404, - "grad_norm": 0.7943899035453796, - "learning_rate": 1.922289754977385e-05, - "loss": 0.3599, - "step": 3053 - }, - { - "epoch": 0.28769930053460824, - "grad_norm": 0.9065893888473511, - "learning_rate": 1.922231382388728e-05, - "loss": 0.3816, - "step": 3054 - }, - { - "epoch": 0.2877935046277761, - "grad_norm": 0.7682626843452454, - "learning_rate": 1.9221729887717693e-05, - "loss": 0.3591, - "step": 3055 - }, - { - "epoch": 0.28788770872094394, - "grad_norm": 0.8986775875091553, - "learning_rate": 1.92211457412784e-05, - "loss": 0.3394, - "step": 3056 - }, - { - "epoch": 0.2879819128141118, - "grad_norm": 0.8122639656066895, - "learning_rate": 1.9220561384582726e-05, - "loss": 0.3704, - "step": 3057 - }, - { - "epoch": 0.28807611690727963, - "grad_norm": 0.9597777724266052, - "learning_rate": 1.9219976817643994e-05, - "loss": 0.3579, - "step": 3058 - }, - { - "epoch": 0.2881703210004475, - "grad_norm": 1.2639124393463135, - "learning_rate": 1.921939204047553e-05, - "loss": 0.3733, - "step": 3059 - }, - { - "epoch": 0.28826452509361533, - "grad_norm": 0.875393807888031, - "learning_rate": 1.921880705309067e-05, - "loss": 0.349, - "step": 3060 - }, - { - "epoch": 0.2883587291867832, - "grad_norm": 0.817523181438446, - "learning_rate": 1.921822185550275e-05, - "loss": 0.336, - "step": 3061 - }, - { - "epoch": 0.28845293327995103, - "grad_norm": 0.9193385243415833, - "learning_rate": 1.9217636447725118e-05, - "loss": 0.3918, - "step": 3062 - }, - { - "epoch": 0.2885471373731189, - "grad_norm": 0.8694098591804504, - "learning_rate": 1.9217050829771116e-05, - "loss": 0.282, - "step": 3063 - }, - { - "epoch": 0.2886413414662867, - "grad_norm": 0.7830197811126709, - "learning_rate": 1.9216465001654105e-05, - "loss": 0.3407, - "step": 3064 - }, - { - "epoch": 0.2887355455594546, - "grad_norm": 0.8466004133224487, - "learning_rate": 1.9215878963387434e-05, - "loss": 0.3772, - "step": 3065 - }, - { - "epoch": 0.2888297496526224, - "grad_norm": 0.9068024754524231, - "learning_rate": 1.9215292714984475e-05, - "loss": 0.3431, - "step": 3066 - }, - { - "epoch": 0.2889239537457903, - "grad_norm": 0.9880037307739258, - "learning_rate": 1.9214706256458585e-05, - "loss": 0.3957, - "step": 3067 - }, - { - "epoch": 0.2890181578389581, - "grad_norm": 0.9142576456069946, - "learning_rate": 1.9214119587823147e-05, - "loss": 0.3879, - "step": 3068 - }, - { - "epoch": 0.28911236193212597, - "grad_norm": 0.8890142440795898, - "learning_rate": 1.921353270909153e-05, - "loss": 0.3919, - "step": 3069 - }, - { - "epoch": 0.2892065660252938, - "grad_norm": 0.7431747913360596, - "learning_rate": 1.9212945620277118e-05, - "loss": 0.3461, - "step": 3070 - }, - { - "epoch": 0.28930077011846167, - "grad_norm": 0.8724109530448914, - "learning_rate": 1.9212358321393297e-05, - "loss": 0.3134, - "step": 3071 - }, - { - "epoch": 0.2893949742116295, - "grad_norm": 0.8182178139686584, - "learning_rate": 1.921177081245346e-05, - "loss": 0.3252, - "step": 3072 - }, - { - "epoch": 0.28948917830479737, - "grad_norm": 0.7662839293479919, - "learning_rate": 1.9211183093471004e-05, - "loss": 0.3684, - "step": 3073 - }, - { - "epoch": 0.2895833823979652, - "grad_norm": 0.8630828857421875, - "learning_rate": 1.9210595164459326e-05, - "loss": 0.3703, - "step": 3074 - }, - { - "epoch": 0.28967758649113307, - "grad_norm": 0.8949058651924133, - "learning_rate": 1.9210007025431835e-05, - "loss": 0.3974, - "step": 3075 - }, - { - "epoch": 0.2897717905843009, - "grad_norm": 1.089571475982666, - "learning_rate": 1.9209418676401936e-05, - "loss": 0.3937, - "step": 3076 - }, - { - "epoch": 0.28986599467746876, - "grad_norm": 0.8037537336349487, - "learning_rate": 1.9208830117383056e-05, - "loss": 0.3499, - "step": 3077 - }, - { - "epoch": 0.28996019877063656, - "grad_norm": 0.7318845391273499, - "learning_rate": 1.92082413483886e-05, - "loss": 0.3134, - "step": 3078 - }, - { - "epoch": 0.2900544028638044, - "grad_norm": 0.8375080823898315, - "learning_rate": 1.9207652369432005e-05, - "loss": 0.3803, - "step": 3079 - }, - { - "epoch": 0.29014860695697225, - "grad_norm": 0.812122642993927, - "learning_rate": 1.9207063180526695e-05, - "loss": 0.4008, - "step": 3080 - }, - { - "epoch": 0.2902428110501401, - "grad_norm": 0.7605108618736267, - "learning_rate": 1.9206473781686106e-05, - "loss": 0.3537, - "step": 3081 - }, - { - "epoch": 0.29033701514330795, - "grad_norm": 0.7535540461540222, - "learning_rate": 1.9205884172923675e-05, - "loss": 0.3237, - "step": 3082 - }, - { - "epoch": 0.2904312192364758, - "grad_norm": 0.8617050647735596, - "learning_rate": 1.920529435425285e-05, - "loss": 0.3524, - "step": 3083 - }, - { - "epoch": 0.29052542332964365, - "grad_norm": 0.7852376699447632, - "learning_rate": 1.9204704325687075e-05, - "loss": 0.3551, - "step": 3084 - }, - { - "epoch": 0.2906196274228115, - "grad_norm": 0.8138406276702881, - "learning_rate": 1.9204114087239806e-05, - "loss": 0.347, - "step": 3085 - }, - { - "epoch": 0.29071383151597935, - "grad_norm": 0.8943343162536621, - "learning_rate": 1.9203523638924504e-05, - "loss": 0.3637, - "step": 3086 - }, - { - "epoch": 0.2908080356091472, - "grad_norm": 0.792445182800293, - "learning_rate": 1.9202932980754628e-05, - "loss": 0.3698, - "step": 3087 - }, - { - "epoch": 0.29090223970231505, - "grad_norm": 0.8244633078575134, - "learning_rate": 1.920234211274365e-05, - "loss": 0.3698, - "step": 3088 - }, - { - "epoch": 0.2909964437954829, - "grad_norm": 0.8255532383918762, - "learning_rate": 1.9201751034905037e-05, - "loss": 0.3679, - "step": 3089 - }, - { - "epoch": 0.29109064788865074, - "grad_norm": 1.2693150043487549, - "learning_rate": 1.920115974725227e-05, - "loss": 0.3796, - "step": 3090 - }, - { - "epoch": 0.2911848519818186, - "grad_norm": 0.8762882947921753, - "learning_rate": 1.920056824979883e-05, - "loss": 0.3515, - "step": 3091 - }, - { - "epoch": 0.29127905607498644, - "grad_norm": 0.9370526075363159, - "learning_rate": 1.9199976542558206e-05, - "loss": 0.3211, - "step": 3092 - }, - { - "epoch": 0.2913732601681543, - "grad_norm": 0.8304166793823242, - "learning_rate": 1.919938462554389e-05, - "loss": 0.347, - "step": 3093 - }, - { - "epoch": 0.29146746426132214, - "grad_norm": 0.8803110718727112, - "learning_rate": 1.919879249876938e-05, - "loss": 0.3431, - "step": 3094 - }, - { - "epoch": 0.29156166835449, - "grad_norm": 0.8673834800720215, - "learning_rate": 1.9198200162248167e-05, - "loss": 0.3735, - "step": 3095 - }, - { - "epoch": 0.29165587244765784, - "grad_norm": 0.7874166369438171, - "learning_rate": 1.919760761599377e-05, - "loss": 0.3228, - "step": 3096 - }, - { - "epoch": 0.2917500765408257, - "grad_norm": 0.8854159712791443, - "learning_rate": 1.9197014860019695e-05, - "loss": 0.3854, - "step": 3097 - }, - { - "epoch": 0.29184428063399354, - "grad_norm": 0.8951593637466431, - "learning_rate": 1.9196421894339455e-05, - "loss": 0.3805, - "step": 3098 - }, - { - "epoch": 0.2919384847271614, - "grad_norm": 0.8391319513320923, - "learning_rate": 1.9195828718966577e-05, - "loss": 0.3672, - "step": 3099 - }, - { - "epoch": 0.29203268882032923, - "grad_norm": 0.7788271903991699, - "learning_rate": 1.9195235333914582e-05, - "loss": 0.3448, - "step": 3100 - }, - { - "epoch": 0.2921268929134971, - "grad_norm": 0.7561749219894409, - "learning_rate": 1.9194641739196996e-05, - "loss": 0.3348, - "step": 3101 - }, - { - "epoch": 0.29222109700666493, - "grad_norm": 0.7474419474601746, - "learning_rate": 1.9194047934827365e-05, - "loss": 0.3525, - "step": 3102 - }, - { - "epoch": 0.2923153010998328, - "grad_norm": 0.8443570733070374, - "learning_rate": 1.919345392081922e-05, - "loss": 0.3959, - "step": 3103 - }, - { - "epoch": 0.29240950519300063, - "grad_norm": 0.9475430846214294, - "learning_rate": 1.9192859697186105e-05, - "loss": 0.3922, - "step": 3104 - }, - { - "epoch": 0.2925037092861685, - "grad_norm": 0.8380106091499329, - "learning_rate": 1.9192265263941575e-05, - "loss": 0.3514, - "step": 3105 - }, - { - "epoch": 0.2925979133793363, - "grad_norm": 0.8615056872367859, - "learning_rate": 1.919167062109918e-05, - "loss": 0.3334, - "step": 3106 - }, - { - "epoch": 0.2926921174725042, - "grad_norm": 0.7354727983474731, - "learning_rate": 1.9191075768672477e-05, - "loss": 0.312, - "step": 3107 - }, - { - "epoch": 0.292786321565672, - "grad_norm": 0.9131101369857788, - "learning_rate": 1.9190480706675035e-05, - "loss": 0.4067, - "step": 3108 - }, - { - "epoch": 0.2928805256588399, - "grad_norm": 0.961203932762146, - "learning_rate": 1.918988543512042e-05, - "loss": 0.4389, - "step": 3109 - }, - { - "epoch": 0.2929747297520077, - "grad_norm": 0.7584219574928284, - "learning_rate": 1.9189289954022207e-05, - "loss": 0.3338, - "step": 3110 - }, - { - "epoch": 0.29306893384517557, - "grad_norm": 0.8488348722457886, - "learning_rate": 1.9188694263393967e-05, - "loss": 0.3306, - "step": 3111 - }, - { - "epoch": 0.2931631379383434, - "grad_norm": 0.8354858756065369, - "learning_rate": 1.9188098363249288e-05, - "loss": 0.3299, - "step": 3112 - }, - { - "epoch": 0.29325734203151127, - "grad_norm": 0.9114026427268982, - "learning_rate": 1.9187502253601757e-05, - "loss": 0.3528, - "step": 3113 - }, - { - "epoch": 0.2933515461246791, - "grad_norm": 0.9232490658760071, - "learning_rate": 1.9186905934464967e-05, - "loss": 0.3506, - "step": 3114 - }, - { - "epoch": 0.29344575021784697, - "grad_norm": 0.9491410255432129, - "learning_rate": 1.918630940585251e-05, - "loss": 0.3936, - "step": 3115 - }, - { - "epoch": 0.2935399543110148, - "grad_norm": 0.8008426427841187, - "learning_rate": 1.9185712667777995e-05, - "loss": 0.3512, - "step": 3116 - }, - { - "epoch": 0.29363415840418267, - "grad_norm": 0.8293586373329163, - "learning_rate": 1.9185115720255027e-05, - "loss": 0.4341, - "step": 3117 - }, - { - "epoch": 0.2937283624973505, - "grad_norm": 0.8840357065200806, - "learning_rate": 1.9184518563297213e-05, - "loss": 0.3662, - "step": 3118 - }, - { - "epoch": 0.29382256659051836, - "grad_norm": 0.8755851984024048, - "learning_rate": 1.918392119691817e-05, - "loss": 0.3425, - "step": 3119 - }, - { - "epoch": 0.2939167706836862, - "grad_norm": 0.7893072366714478, - "learning_rate": 1.9183323621131523e-05, - "loss": 0.3207, - "step": 3120 - }, - { - "epoch": 0.29401097477685406, - "grad_norm": 0.905914843082428, - "learning_rate": 1.9182725835950894e-05, - "loss": 0.3187, - "step": 3121 - }, - { - "epoch": 0.2941051788700219, - "grad_norm": 0.8867089748382568, - "learning_rate": 1.9182127841389917e-05, - "loss": 0.3474, - "step": 3122 - }, - { - "epoch": 0.29419938296318976, - "grad_norm": 0.9891049265861511, - "learning_rate": 1.9181529637462222e-05, - "loss": 0.3434, - "step": 3123 - }, - { - "epoch": 0.2942935870563576, - "grad_norm": 0.8790674209594727, - "learning_rate": 1.9180931224181455e-05, - "loss": 0.3652, - "step": 3124 - }, - { - "epoch": 0.29438779114952546, - "grad_norm": 0.7837187051773071, - "learning_rate": 1.9180332601561255e-05, - "loss": 0.3344, - "step": 3125 - }, - { - "epoch": 0.2944819952426933, - "grad_norm": 0.8639039993286133, - "learning_rate": 1.9179733769615273e-05, - "loss": 0.3487, - "step": 3126 - }, - { - "epoch": 0.29457619933586116, - "grad_norm": 0.7621277570724487, - "learning_rate": 1.9179134728357164e-05, - "loss": 0.3546, - "step": 3127 - }, - { - "epoch": 0.294670403429029, - "grad_norm": 0.9621046781539917, - "learning_rate": 1.9178535477800588e-05, - "loss": 0.4256, - "step": 3128 - }, - { - "epoch": 0.29476460752219685, - "grad_norm": 0.9978613257408142, - "learning_rate": 1.9177936017959213e-05, - "loss": 0.3559, - "step": 3129 - }, - { - "epoch": 0.2948588116153647, - "grad_norm": 0.7238567471504211, - "learning_rate": 1.9177336348846696e-05, - "loss": 0.3594, - "step": 3130 - }, - { - "epoch": 0.29495301570853255, - "grad_norm": 0.8040878176689148, - "learning_rate": 1.9176736470476723e-05, - "loss": 0.3409, - "step": 3131 - }, - { - "epoch": 0.2950472198017004, - "grad_norm": 0.7095673084259033, - "learning_rate": 1.917613638286296e-05, - "loss": 0.3179, - "step": 3132 - }, - { - "epoch": 0.29514142389486825, - "grad_norm": 0.7767865657806396, - "learning_rate": 1.91755360860191e-05, - "loss": 0.3149, - "step": 3133 - }, - { - "epoch": 0.2952356279880361, - "grad_norm": 0.8106109499931335, - "learning_rate": 1.9174935579958828e-05, - "loss": 0.353, - "step": 3134 - }, - { - "epoch": 0.29532983208120395, - "grad_norm": 0.8412537574768066, - "learning_rate": 1.9174334864695834e-05, - "loss": 0.3444, - "step": 3135 - }, - { - "epoch": 0.2954240361743718, - "grad_norm": 1.0176568031311035, - "learning_rate": 1.9173733940243817e-05, - "loss": 0.4045, - "step": 3136 - }, - { - "epoch": 0.29551824026753964, - "grad_norm": 0.8890383243560791, - "learning_rate": 1.9173132806616477e-05, - "loss": 0.3678, - "step": 3137 - }, - { - "epoch": 0.2956124443607075, - "grad_norm": 0.7867995500564575, - "learning_rate": 1.917253146382753e-05, - "loss": 0.3349, - "step": 3138 - }, - { - "epoch": 0.29570664845387534, - "grad_norm": 1.007822036743164, - "learning_rate": 1.917192991189067e-05, - "loss": 0.3936, - "step": 3139 - }, - { - "epoch": 0.2958008525470432, - "grad_norm": 0.8795698881149292, - "learning_rate": 1.917132815081963e-05, - "loss": 0.4265, - "step": 3140 - }, - { - "epoch": 0.29589505664021104, - "grad_norm": 0.8458032011985779, - "learning_rate": 1.917072618062812e-05, - "loss": 0.376, - "step": 3141 - }, - { - "epoch": 0.2959892607333789, - "grad_norm": 0.9046775698661804, - "learning_rate": 1.9170124001329873e-05, - "loss": 0.3758, - "step": 3142 - }, - { - "epoch": 0.29608346482654674, - "grad_norm": 0.7450736165046692, - "learning_rate": 1.916952161293862e-05, - "loss": 0.3558, - "step": 3143 - }, - { - "epoch": 0.2961776689197146, - "grad_norm": 0.8185693621635437, - "learning_rate": 1.9168919015468086e-05, - "loss": 0.35, - "step": 3144 - }, - { - "epoch": 0.29627187301288244, - "grad_norm": 0.8537670373916626, - "learning_rate": 1.916831620893202e-05, - "loss": 0.3881, - "step": 3145 - }, - { - "epoch": 0.2963660771060503, - "grad_norm": 0.8352190256118774, - "learning_rate": 1.916771319334417e-05, - "loss": 0.3928, - "step": 3146 - }, - { - "epoch": 0.2964602811992181, - "grad_norm": 0.8838286399841309, - "learning_rate": 1.916710996871828e-05, - "loss": 0.3924, - "step": 3147 - }, - { - "epoch": 0.2965544852923859, - "grad_norm": 1.0096112489700317, - "learning_rate": 1.91665065350681e-05, - "loss": 0.292, - "step": 3148 - }, - { - "epoch": 0.2966486893855538, - "grad_norm": 0.8836523294448853, - "learning_rate": 1.9165902892407402e-05, - "loss": 0.3494, - "step": 3149 - }, - { - "epoch": 0.2967428934787216, - "grad_norm": 0.8463241457939148, - "learning_rate": 1.9165299040749937e-05, - "loss": 0.3971, - "step": 3150 - }, - { - "epoch": 0.2968370975718895, - "grad_norm": 0.8585659861564636, - "learning_rate": 1.9164694980109484e-05, - "loss": 0.3701, - "step": 3151 - }, - { - "epoch": 0.2969313016650573, - "grad_norm": 0.822482705116272, - "learning_rate": 1.916409071049981e-05, - "loss": 0.3571, - "step": 3152 - }, - { - "epoch": 0.2970255057582252, - "grad_norm": 0.9102155566215515, - "learning_rate": 1.9163486231934693e-05, - "loss": 0.3845, - "step": 3153 - }, - { - "epoch": 0.297119709851393, - "grad_norm": 0.9315751194953918, - "learning_rate": 1.916288154442792e-05, - "loss": 0.3398, - "step": 3154 - }, - { - "epoch": 0.29721391394456087, - "grad_norm": 0.8502662777900696, - "learning_rate": 1.9162276647993278e-05, - "loss": 0.3402, - "step": 3155 - }, - { - "epoch": 0.2973081180377287, - "grad_norm": 0.7931502461433411, - "learning_rate": 1.9161671542644557e-05, - "loss": 0.3376, - "step": 3156 - }, - { - "epoch": 0.29740232213089657, - "grad_norm": 0.9208962917327881, - "learning_rate": 1.916106622839556e-05, - "loss": 0.3598, - "step": 3157 - }, - { - "epoch": 0.2974965262240644, - "grad_norm": 0.8193071484565735, - "learning_rate": 1.916046070526008e-05, - "loss": 0.3931, - "step": 3158 - }, - { - "epoch": 0.29759073031723227, - "grad_norm": 0.8380117416381836, - "learning_rate": 1.9159854973251932e-05, - "loss": 0.3781, - "step": 3159 - }, - { - "epoch": 0.2976849344104001, - "grad_norm": 0.9172781109809875, - "learning_rate": 1.9159249032384924e-05, - "loss": 0.3828, - "step": 3160 - }, - { - "epoch": 0.29777913850356796, - "grad_norm": 0.8817245364189148, - "learning_rate": 1.9158642882672873e-05, - "loss": 0.39, - "step": 3161 - }, - { - "epoch": 0.2978733425967358, - "grad_norm": 1.0869908332824707, - "learning_rate": 1.91580365241296e-05, - "loss": 0.4045, - "step": 3162 - }, - { - "epoch": 0.29796754668990366, - "grad_norm": 1.013283610343933, - "learning_rate": 1.9157429956768932e-05, - "loss": 0.3322, - "step": 3163 - }, - { - "epoch": 0.2980617507830715, - "grad_norm": 0.9602604508399963, - "learning_rate": 1.91568231806047e-05, - "loss": 0.3396, - "step": 3164 - }, - { - "epoch": 0.29815595487623936, - "grad_norm": 0.8908482193946838, - "learning_rate": 1.9156216195650735e-05, - "loss": 0.36, - "step": 3165 - }, - { - "epoch": 0.2982501589694072, - "grad_norm": 0.893203854560852, - "learning_rate": 1.915560900192088e-05, - "loss": 0.3369, - "step": 3166 - }, - { - "epoch": 0.29834436306257506, - "grad_norm": 0.824182391166687, - "learning_rate": 1.9155001599428982e-05, - "loss": 0.3034, - "step": 3167 - }, - { - "epoch": 0.2984385671557429, - "grad_norm": 0.8576279282569885, - "learning_rate": 1.915439398818889e-05, - "loss": 0.3643, - "step": 3168 - }, - { - "epoch": 0.29853277124891076, - "grad_norm": 0.8324764966964722, - "learning_rate": 1.9153786168214456e-05, - "loss": 0.377, - "step": 3169 - }, - { - "epoch": 0.2986269753420786, - "grad_norm": 0.9648815989494324, - "learning_rate": 1.9153178139519538e-05, - "loss": 0.3594, - "step": 3170 - }, - { - "epoch": 0.29872117943524645, - "grad_norm": 0.8513934016227722, - "learning_rate": 1.915256990211801e-05, - "loss": 0.3396, - "step": 3171 - }, - { - "epoch": 0.2988153835284143, - "grad_norm": 0.8295388221740723, - "learning_rate": 1.915196145602373e-05, - "loss": 0.3466, - "step": 3172 - }, - { - "epoch": 0.29890958762158215, - "grad_norm": 0.8665640950202942, - "learning_rate": 1.9151352801250572e-05, - "loss": 0.4002, - "step": 3173 - }, - { - "epoch": 0.29900379171475, - "grad_norm": 0.7799671292304993, - "learning_rate": 1.9150743937812423e-05, - "loss": 0.3576, - "step": 3174 - }, - { - "epoch": 0.29909799580791785, - "grad_norm": 0.8510997295379639, - "learning_rate": 1.9150134865723154e-05, - "loss": 0.3358, - "step": 3175 - }, - { - "epoch": 0.2991921999010857, - "grad_norm": 0.9179261326789856, - "learning_rate": 1.9149525584996663e-05, - "loss": 0.3845, - "step": 3176 - }, - { - "epoch": 0.29928640399425355, - "grad_norm": 0.8507550358772278, - "learning_rate": 1.9148916095646838e-05, - "loss": 0.3519, - "step": 3177 - }, - { - "epoch": 0.2993806080874214, - "grad_norm": 0.8420320749282837, - "learning_rate": 1.9148306397687573e-05, - "loss": 0.3551, - "step": 3178 - }, - { - "epoch": 0.29947481218058924, - "grad_norm": 0.809543788433075, - "learning_rate": 1.914769649113278e-05, - "loss": 0.3684, - "step": 3179 - }, - { - "epoch": 0.2995690162737571, - "grad_norm": 2.2630064487457275, - "learning_rate": 1.914708637599636e-05, - "loss": 0.3646, - "step": 3180 - }, - { - "epoch": 0.29966322036692494, - "grad_norm": 0.9354215860366821, - "learning_rate": 1.914647605229222e-05, - "loss": 0.4167, - "step": 3181 - }, - { - "epoch": 0.2997574244600928, - "grad_norm": 0.8339396715164185, - "learning_rate": 1.9145865520034282e-05, - "loss": 0.3651, - "step": 3182 - }, - { - "epoch": 0.29985162855326064, - "grad_norm": 0.7765888571739197, - "learning_rate": 1.914525477923647e-05, - "loss": 0.3686, - "step": 3183 - }, - { - "epoch": 0.2999458326464285, - "grad_norm": 0.8726272583007812, - "learning_rate": 1.9144643829912698e-05, - "loss": 0.3873, - "step": 3184 - }, - { - "epoch": 0.30004003673959634, - "grad_norm": 0.8301263451576233, - "learning_rate": 1.914403267207691e-05, - "loss": 0.3717, - "step": 3185 - }, - { - "epoch": 0.3001342408327642, - "grad_norm": 0.8069661855697632, - "learning_rate": 1.9143421305743035e-05, - "loss": 0.3722, - "step": 3186 - }, - { - "epoch": 0.30022844492593204, - "grad_norm": 0.7408289313316345, - "learning_rate": 1.9142809730925012e-05, - "loss": 0.3821, - "step": 3187 - }, - { - "epoch": 0.3003226490190999, - "grad_norm": 0.8679337501525879, - "learning_rate": 1.9142197947636788e-05, - "loss": 0.3881, - "step": 3188 - }, - { - "epoch": 0.30041685311226773, - "grad_norm": 0.8776544332504272, - "learning_rate": 1.914158595589231e-05, - "loss": 0.4097, - "step": 3189 - }, - { - "epoch": 0.3005110572054356, - "grad_norm": 0.8305463790893555, - "learning_rate": 1.9140973755705538e-05, - "loss": 0.3972, - "step": 3190 - }, - { - "epoch": 0.30060526129860343, - "grad_norm": 0.9195913672447205, - "learning_rate": 1.9140361347090426e-05, - "loss": 0.3575, - "step": 3191 - }, - { - "epoch": 0.3006994653917713, - "grad_norm": 0.7249822020530701, - "learning_rate": 1.913974873006094e-05, - "loss": 0.3394, - "step": 3192 - }, - { - "epoch": 0.30079366948493913, - "grad_norm": 0.8495468497276306, - "learning_rate": 1.913913590463105e-05, - "loss": 0.3949, - "step": 3193 - }, - { - "epoch": 0.300887873578107, - "grad_norm": 0.7819181084632874, - "learning_rate": 1.9138522870814725e-05, - "loss": 0.3607, - "step": 3194 - }, - { - "epoch": 0.30098207767127483, - "grad_norm": 0.8274915814399719, - "learning_rate": 1.9137909628625942e-05, - "loss": 0.3376, - "step": 3195 - }, - { - "epoch": 0.3010762817644427, - "grad_norm": 0.7591769099235535, - "learning_rate": 1.9137296178078692e-05, - "loss": 0.3113, - "step": 3196 - }, - { - "epoch": 0.3011704858576105, - "grad_norm": 0.8821949362754822, - "learning_rate": 1.9136682519186958e-05, - "loss": 0.4159, - "step": 3197 - }, - { - "epoch": 0.3012646899507784, - "grad_norm": 0.8360042572021484, - "learning_rate": 1.913606865196473e-05, - "loss": 0.3614, - "step": 3198 - }, - { - "epoch": 0.3013588940439462, - "grad_norm": 0.8298535346984863, - "learning_rate": 1.913545457642601e-05, - "loss": 0.3452, - "step": 3199 - }, - { - "epoch": 0.3014530981371141, - "grad_norm": 0.8901185989379883, - "learning_rate": 1.91348402925848e-05, - "loss": 0.3411, - "step": 3200 - }, - { - "epoch": 0.3015473022302819, - "grad_norm": 0.8033450245857239, - "learning_rate": 1.91342258004551e-05, - "loss": 0.2998, - "step": 3201 - }, - { - "epoch": 0.30164150632344977, - "grad_norm": 0.9016701579093933, - "learning_rate": 1.9133611100050925e-05, - "loss": 0.3856, - "step": 3202 - }, - { - "epoch": 0.3017357104166176, - "grad_norm": 0.7769457101821899, - "learning_rate": 1.9132996191386292e-05, - "loss": 0.356, - "step": 3203 - }, - { - "epoch": 0.30182991450978547, - "grad_norm": 0.9773686528205872, - "learning_rate": 1.913238107447522e-05, - "loss": 0.3866, - "step": 3204 - }, - { - "epoch": 0.3019241186029533, - "grad_norm": 0.7966118454933167, - "learning_rate": 1.913176574933174e-05, - "loss": 0.3699, - "step": 3205 - }, - { - "epoch": 0.30201832269612117, - "grad_norm": 0.7131487131118774, - "learning_rate": 1.9131150215969875e-05, - "loss": 0.3102, - "step": 3206 - }, - { - "epoch": 0.302112526789289, - "grad_norm": 0.8672724366188049, - "learning_rate": 1.9130534474403664e-05, - "loss": 0.3329, - "step": 3207 - }, - { - "epoch": 0.30220673088245686, - "grad_norm": 0.8312980532646179, - "learning_rate": 1.912991852464715e-05, - "loss": 0.3629, - "step": 3208 - }, - { - "epoch": 0.3023009349756247, - "grad_norm": 0.8550496697425842, - "learning_rate": 1.912930236671437e-05, - "loss": 0.3939, - "step": 3209 - }, - { - "epoch": 0.30239513906879256, - "grad_norm": 0.7986893653869629, - "learning_rate": 1.9128686000619378e-05, - "loss": 0.3419, - "step": 3210 - }, - { - "epoch": 0.3024893431619604, - "grad_norm": 0.9180712103843689, - "learning_rate": 1.9128069426376225e-05, - "loss": 0.4041, - "step": 3211 - }, - { - "epoch": 0.30258354725512826, - "grad_norm": 0.8762322068214417, - "learning_rate": 1.9127452643998975e-05, - "loss": 0.312, - "step": 3212 - }, - { - "epoch": 0.3026777513482961, - "grad_norm": 0.9074586629867554, - "learning_rate": 1.912683565350169e-05, - "loss": 0.3554, - "step": 3213 - }, - { - "epoch": 0.30277195544146396, - "grad_norm": 0.8098133206367493, - "learning_rate": 1.9126218454898437e-05, - "loss": 0.3743, - "step": 3214 - }, - { - "epoch": 0.3028661595346318, - "grad_norm": 0.7473288774490356, - "learning_rate": 1.9125601048203288e-05, - "loss": 0.3636, - "step": 3215 - }, - { - "epoch": 0.3029603636277996, - "grad_norm": 0.754591703414917, - "learning_rate": 1.912498343343032e-05, - "loss": 0.3602, - "step": 3216 - }, - { - "epoch": 0.30305456772096745, - "grad_norm": 0.7766704559326172, - "learning_rate": 1.9124365610593624e-05, - "loss": 0.3099, - "step": 3217 - }, - { - "epoch": 0.3031487718141353, - "grad_norm": 0.7497815489768982, - "learning_rate": 1.9123747579707275e-05, - "loss": 0.3477, - "step": 3218 - }, - { - "epoch": 0.30324297590730315, - "grad_norm": 0.9382441639900208, - "learning_rate": 1.9123129340785372e-05, - "loss": 0.3614, - "step": 3219 - }, - { - "epoch": 0.303337180000471, - "grad_norm": 0.7490923404693604, - "learning_rate": 1.9122510893842013e-05, - "loss": 0.3511, - "step": 3220 - }, - { - "epoch": 0.30343138409363885, - "grad_norm": 0.756651759147644, - "learning_rate": 1.9121892238891296e-05, - "loss": 0.3333, - "step": 3221 - }, - { - "epoch": 0.3035255881868067, - "grad_norm": 0.8930402398109436, - "learning_rate": 1.9121273375947326e-05, - "loss": 0.384, - "step": 3222 - }, - { - "epoch": 0.30361979227997454, - "grad_norm": 0.907922625541687, - "learning_rate": 1.9120654305024224e-05, - "loss": 0.4312, - "step": 3223 - }, - { - "epoch": 0.3037139963731424, - "grad_norm": 0.7934293746948242, - "learning_rate": 1.912003502613609e-05, - "loss": 0.3264, - "step": 3224 - }, - { - "epoch": 0.30380820046631024, - "grad_norm": 0.8397114872932434, - "learning_rate": 1.9119415539297058e-05, - "loss": 0.3524, - "step": 3225 - }, - { - "epoch": 0.3039024045594781, - "grad_norm": 0.7385463714599609, - "learning_rate": 1.911879584452125e-05, - "loss": 0.371, - "step": 3226 - }, - { - "epoch": 0.30399660865264594, - "grad_norm": 0.8333579301834106, - "learning_rate": 1.911817594182279e-05, - "loss": 0.3722, - "step": 3227 - }, - { - "epoch": 0.3040908127458138, - "grad_norm": 0.818589985370636, - "learning_rate": 1.9117555831215818e-05, - "loss": 0.4054, - "step": 3228 - }, - { - "epoch": 0.30418501683898164, - "grad_norm": 0.8527333736419678, - "learning_rate": 1.9116935512714473e-05, - "loss": 0.3594, - "step": 3229 - }, - { - "epoch": 0.3042792209321495, - "grad_norm": 0.840151846408844, - "learning_rate": 1.91163149863329e-05, - "loss": 0.3739, - "step": 3230 - }, - { - "epoch": 0.30437342502531733, - "grad_norm": 0.733751118183136, - "learning_rate": 1.9115694252085246e-05, - "loss": 0.3377, - "step": 3231 - }, - { - "epoch": 0.3044676291184852, - "grad_norm": 0.9031135439872742, - "learning_rate": 1.9115073309985665e-05, - "loss": 0.3651, - "step": 3232 - }, - { - "epoch": 0.30456183321165303, - "grad_norm": 0.7640702724456787, - "learning_rate": 1.9114452160048315e-05, - "loss": 0.2965, - "step": 3233 - }, - { - "epoch": 0.3046560373048209, - "grad_norm": 0.959571361541748, - "learning_rate": 1.911383080228736e-05, - "loss": 0.3829, - "step": 3234 - }, - { - "epoch": 0.30475024139798873, - "grad_norm": 0.799164891242981, - "learning_rate": 1.911320923671697e-05, - "loss": 0.3541, - "step": 3235 - }, - { - "epoch": 0.3048444454911566, - "grad_norm": 0.8224729299545288, - "learning_rate": 1.9112587463351313e-05, - "loss": 0.3418, - "step": 3236 - }, - { - "epoch": 0.30493864958432443, - "grad_norm": 0.7661332488059998, - "learning_rate": 1.9111965482204568e-05, - "loss": 0.3226, - "step": 3237 - }, - { - "epoch": 0.3050328536774923, - "grad_norm": 0.8191931843757629, - "learning_rate": 1.9111343293290923e-05, - "loss": 0.3706, - "step": 3238 - }, - { - "epoch": 0.3051270577706601, - "grad_norm": 0.8266605734825134, - "learning_rate": 1.9110720896624556e-05, - "loss": 0.3394, - "step": 3239 - }, - { - "epoch": 0.305221261863828, - "grad_norm": 1.0821118354797363, - "learning_rate": 1.9110098292219665e-05, - "loss": 0.3564, - "step": 3240 - }, - { - "epoch": 0.3053154659569958, - "grad_norm": 0.7629269361495972, - "learning_rate": 1.910947548009044e-05, - "loss": 0.3565, - "step": 3241 - }, - { - "epoch": 0.3054096700501637, - "grad_norm": 0.8535257577896118, - "learning_rate": 1.9108852460251088e-05, - "loss": 0.2997, - "step": 3242 - }, - { - "epoch": 0.3055038741433315, - "grad_norm": 0.8011213541030884, - "learning_rate": 1.9108229232715814e-05, - "loss": 0.3979, - "step": 3243 - }, - { - "epoch": 0.30559807823649937, - "grad_norm": 0.7791256308555603, - "learning_rate": 1.9107605797498827e-05, - "loss": 0.3114, - "step": 3244 - }, - { - "epoch": 0.3056922823296672, - "grad_norm": 0.8558793663978577, - "learning_rate": 1.910698215461434e-05, - "loss": 0.3712, - "step": 3245 - }, - { - "epoch": 0.30578648642283507, - "grad_norm": 0.8836783170700073, - "learning_rate": 1.910635830407658e-05, - "loss": 0.3945, - "step": 3246 - }, - { - "epoch": 0.3058806905160029, - "grad_norm": 0.7929200530052185, - "learning_rate": 1.9105734245899765e-05, - "loss": 0.3458, - "step": 3247 - }, - { - "epoch": 0.30597489460917077, - "grad_norm": 0.8550586104393005, - "learning_rate": 1.9105109980098126e-05, - "loss": 0.3745, - "step": 3248 - }, - { - "epoch": 0.3060690987023386, - "grad_norm": 0.9599501490592957, - "learning_rate": 1.9104485506685902e-05, - "loss": 0.3805, - "step": 3249 - }, - { - "epoch": 0.30616330279550646, - "grad_norm": 0.8369544148445129, - "learning_rate": 1.910386082567732e-05, - "loss": 0.3691, - "step": 3250 - }, - { - "epoch": 0.3062575068886743, - "grad_norm": 0.8377211689949036, - "learning_rate": 1.910323593708664e-05, - "loss": 0.3911, - "step": 3251 - }, - { - "epoch": 0.30635171098184216, - "grad_norm": 0.866549551486969, - "learning_rate": 1.91026108409281e-05, - "loss": 0.3498, - "step": 3252 - }, - { - "epoch": 0.30644591507501, - "grad_norm": 0.933917760848999, - "learning_rate": 1.9101985537215956e-05, - "loss": 0.3928, - "step": 3253 - }, - { - "epoch": 0.30654011916817786, - "grad_norm": 0.9976698756217957, - "learning_rate": 1.9101360025964464e-05, - "loss": 0.3369, - "step": 3254 - }, - { - "epoch": 0.3066343232613457, - "grad_norm": 0.9265713691711426, - "learning_rate": 1.9100734307187887e-05, - "loss": 0.3807, - "step": 3255 - }, - { - "epoch": 0.30672852735451356, - "grad_norm": 0.8622773885726929, - "learning_rate": 1.9100108380900493e-05, - "loss": 0.3674, - "step": 3256 - }, - { - "epoch": 0.3068227314476814, - "grad_norm": 0.7766557931900024, - "learning_rate": 1.9099482247116556e-05, - "loss": 0.3355, - "step": 3257 - }, - { - "epoch": 0.30691693554084926, - "grad_norm": 0.9115668535232544, - "learning_rate": 1.909885590585035e-05, - "loss": 0.3418, - "step": 3258 - }, - { - "epoch": 0.3070111396340171, - "grad_norm": 0.7861838936805725, - "learning_rate": 1.909822935711616e-05, - "loss": 0.3652, - "step": 3259 - }, - { - "epoch": 0.30710534372718495, - "grad_norm": 0.8084872961044312, - "learning_rate": 1.909760260092827e-05, - "loss": 0.3643, - "step": 3260 - }, - { - "epoch": 0.3071995478203528, - "grad_norm": 0.7769855260848999, - "learning_rate": 1.9096975637300968e-05, - "loss": 0.3532, - "step": 3261 - }, - { - "epoch": 0.30729375191352065, - "grad_norm": 0.847029983997345, - "learning_rate": 1.9096348466248554e-05, - "loss": 0.3546, - "step": 3262 - }, - { - "epoch": 0.3073879560066885, - "grad_norm": 0.8707786798477173, - "learning_rate": 1.9095721087785327e-05, - "loss": 0.372, - "step": 3263 - }, - { - "epoch": 0.30748216009985635, - "grad_norm": 0.8614506125450134, - "learning_rate": 1.909509350192559e-05, - "loss": 0.35, - "step": 3264 - }, - { - "epoch": 0.3075763641930242, - "grad_norm": 0.9336690902709961, - "learning_rate": 1.909446570868366e-05, - "loss": 0.422, - "step": 3265 - }, - { - "epoch": 0.30767056828619205, - "grad_norm": 0.9768769145011902, - "learning_rate": 1.9093837708073843e-05, - "loss": 0.3865, - "step": 3266 - }, - { - "epoch": 0.3077647723793599, - "grad_norm": 0.8792558312416077, - "learning_rate": 1.9093209500110465e-05, - "loss": 0.3646, - "step": 3267 - }, - { - "epoch": 0.30785897647252775, - "grad_norm": 0.8601863980293274, - "learning_rate": 1.9092581084807848e-05, - "loss": 0.3196, - "step": 3268 - }, - { - "epoch": 0.3079531805656956, - "grad_norm": 0.8642717003822327, - "learning_rate": 1.9091952462180317e-05, - "loss": 0.3951, - "step": 3269 - }, - { - "epoch": 0.30804738465886344, - "grad_norm": 0.8343533873558044, - "learning_rate": 1.909132363224221e-05, - "loss": 0.3492, - "step": 3270 - }, - { - "epoch": 0.3081415887520313, - "grad_norm": 0.8218293190002441, - "learning_rate": 1.9090694595007866e-05, - "loss": 0.3518, - "step": 3271 - }, - { - "epoch": 0.30823579284519914, - "grad_norm": 0.7444469332695007, - "learning_rate": 1.909006535049163e-05, - "loss": 0.3121, - "step": 3272 - }, - { - "epoch": 0.308329996938367, - "grad_norm": 0.832141101360321, - "learning_rate": 1.908943589870784e-05, - "loss": 0.3668, - "step": 3273 - }, - { - "epoch": 0.30842420103153484, - "grad_norm": 0.8032460808753967, - "learning_rate": 1.9088806239670855e-05, - "loss": 0.3408, - "step": 3274 - }, - { - "epoch": 0.3085184051247027, - "grad_norm": 0.8065330982208252, - "learning_rate": 1.908817637339503e-05, - "loss": 0.3422, - "step": 3275 - }, - { - "epoch": 0.30861260921787054, - "grad_norm": 0.7814967036247253, - "learning_rate": 1.908754629989473e-05, - "loss": 0.3494, - "step": 3276 - }, - { - "epoch": 0.3087068133110384, - "grad_norm": 0.7664377093315125, - "learning_rate": 1.9086916019184323e-05, - "loss": 0.3393, - "step": 3277 - }, - { - "epoch": 0.30880101740420624, - "grad_norm": 0.926455020904541, - "learning_rate": 1.9086285531278176e-05, - "loss": 0.3679, - "step": 3278 - }, - { - "epoch": 0.3088952214973741, - "grad_norm": 0.833058774471283, - "learning_rate": 1.9085654836190665e-05, - "loss": 0.3592, - "step": 3279 - }, - { - "epoch": 0.30898942559054193, - "grad_norm": 0.8743307590484619, - "learning_rate": 1.9085023933936174e-05, - "loss": 0.3721, - "step": 3280 - }, - { - "epoch": 0.3090836296837098, - "grad_norm": 1.2178056240081787, - "learning_rate": 1.908439282452909e-05, - "loss": 0.3882, - "step": 3281 - }, - { - "epoch": 0.30917783377687763, - "grad_norm": 0.7862185835838318, - "learning_rate": 1.9083761507983794e-05, - "loss": 0.3243, - "step": 3282 - }, - { - "epoch": 0.3092720378700455, - "grad_norm": 1.1203469038009644, - "learning_rate": 1.9083129984314694e-05, - "loss": 0.4072, - "step": 3283 - }, - { - "epoch": 0.30936624196321333, - "grad_norm": 0.7928134799003601, - "learning_rate": 1.9082498253536175e-05, - "loss": 0.3249, - "step": 3284 - }, - { - "epoch": 0.3094604460563811, - "grad_norm": 0.8092864155769348, - "learning_rate": 1.9081866315662655e-05, - "loss": 0.3542, - "step": 3285 - }, - { - "epoch": 0.30955465014954897, - "grad_norm": 0.8150007724761963, - "learning_rate": 1.908123417070854e-05, - "loss": 0.3374, - "step": 3286 - }, - { - "epoch": 0.3096488542427168, - "grad_norm": 0.8889190554618835, - "learning_rate": 1.9080601818688234e-05, - "loss": 0.3871, - "step": 3287 - }, - { - "epoch": 0.30974305833588467, - "grad_norm": 0.7795815467834473, - "learning_rate": 1.907996925961617e-05, - "loss": 0.3329, - "step": 3288 - }, - { - "epoch": 0.3098372624290525, - "grad_norm": 0.892247200012207, - "learning_rate": 1.907933649350676e-05, - "loss": 0.4365, - "step": 3289 - }, - { - "epoch": 0.30993146652222037, - "grad_norm": 0.8202358484268188, - "learning_rate": 1.9078703520374436e-05, - "loss": 0.3698, - "step": 3290 - }, - { - "epoch": 0.3100256706153882, - "grad_norm": 0.7944460511207581, - "learning_rate": 1.9078070340233637e-05, - "loss": 0.3733, - "step": 3291 - }, - { - "epoch": 0.31011987470855606, - "grad_norm": 0.8122773766517639, - "learning_rate": 1.9077436953098792e-05, - "loss": 0.3989, - "step": 3292 - }, - { - "epoch": 0.3102140788017239, - "grad_norm": 0.8976459503173828, - "learning_rate": 1.9076803358984345e-05, - "loss": 0.364, - "step": 3293 - }, - { - "epoch": 0.31030828289489176, - "grad_norm": 0.7487342953681946, - "learning_rate": 1.9076169557904743e-05, - "loss": 0.3332, - "step": 3294 - }, - { - "epoch": 0.3104024869880596, - "grad_norm": 0.9525068402290344, - "learning_rate": 1.9075535549874436e-05, - "loss": 0.3647, - "step": 3295 - }, - { - "epoch": 0.31049669108122746, - "grad_norm": 0.9670138359069824, - "learning_rate": 1.9074901334907888e-05, - "loss": 0.3442, - "step": 3296 - }, - { - "epoch": 0.3105908951743953, - "grad_norm": 0.8507341742515564, - "learning_rate": 1.9074266913019553e-05, - "loss": 0.357, - "step": 3297 - }, - { - "epoch": 0.31068509926756316, - "grad_norm": 0.7282482981681824, - "learning_rate": 1.9073632284223902e-05, - "loss": 0.339, - "step": 3298 - }, - { - "epoch": 0.310779303360731, - "grad_norm": 0.8985245227813721, - "learning_rate": 1.9072997448535398e-05, - "loss": 0.3847, - "step": 3299 - }, - { - "epoch": 0.31087350745389886, - "grad_norm": 0.833671510219574, - "learning_rate": 1.9072362405968522e-05, - "loss": 0.3849, - "step": 3300 - }, - { - "epoch": 0.3109677115470667, - "grad_norm": 0.762282133102417, - "learning_rate": 1.9071727156537753e-05, - "loss": 0.3498, - "step": 3301 - }, - { - "epoch": 0.31106191564023455, - "grad_norm": 0.7250514030456543, - "learning_rate": 1.9071091700257574e-05, - "loss": 0.3302, - "step": 3302 - }, - { - "epoch": 0.3111561197334024, - "grad_norm": 0.7673451900482178, - "learning_rate": 1.9070456037142475e-05, - "loss": 0.3275, - "step": 3303 - }, - { - "epoch": 0.31125032382657025, - "grad_norm": 0.8764957189559937, - "learning_rate": 1.9069820167206953e-05, - "loss": 0.4017, - "step": 3304 - }, - { - "epoch": 0.3113445279197381, - "grad_norm": 0.8258739113807678, - "learning_rate": 1.9069184090465504e-05, - "loss": 0.3298, - "step": 3305 - }, - { - "epoch": 0.31143873201290595, - "grad_norm": 0.7824748754501343, - "learning_rate": 1.906854780693263e-05, - "loss": 0.3577, - "step": 3306 - }, - { - "epoch": 0.3115329361060738, - "grad_norm": 0.8174601793289185, - "learning_rate": 1.906791131662284e-05, - "loss": 0.3411, - "step": 3307 - }, - { - "epoch": 0.31162714019924165, - "grad_norm": 0.7796163558959961, - "learning_rate": 1.906727461955065e-05, - "loss": 0.3098, - "step": 3308 - }, - { - "epoch": 0.3117213442924095, - "grad_norm": 0.7932508587837219, - "learning_rate": 1.9066637715730578e-05, - "loss": 0.3796, - "step": 3309 - }, - { - "epoch": 0.31181554838557735, - "grad_norm": 0.8183960318565369, - "learning_rate": 1.9066000605177143e-05, - "loss": 0.3799, - "step": 3310 - }, - { - "epoch": 0.3119097524787452, - "grad_norm": 0.7983317971229553, - "learning_rate": 1.9065363287904873e-05, - "loss": 0.3644, - "step": 3311 - }, - { - "epoch": 0.31200395657191304, - "grad_norm": 0.8192973136901855, - "learning_rate": 1.90647257639283e-05, - "loss": 0.3484, - "step": 3312 - }, - { - "epoch": 0.3120981606650809, - "grad_norm": 0.8482248783111572, - "learning_rate": 1.906408803326196e-05, - "loss": 0.3503, - "step": 3313 - }, - { - "epoch": 0.31219236475824874, - "grad_norm": 0.8947495818138123, - "learning_rate": 1.9063450095920397e-05, - "loss": 0.3688, - "step": 3314 - }, - { - "epoch": 0.3122865688514166, - "grad_norm": 0.8172609806060791, - "learning_rate": 1.906281195191815e-05, - "loss": 0.3556, - "step": 3315 - }, - { - "epoch": 0.31238077294458444, - "grad_norm": 0.8087257742881775, - "learning_rate": 1.906217360126978e-05, - "loss": 0.3451, - "step": 3316 - }, - { - "epoch": 0.3124749770377523, - "grad_norm": 0.9458954930305481, - "learning_rate": 1.906153504398984e-05, - "loss": 0.3794, - "step": 3317 - }, - { - "epoch": 0.31256918113092014, - "grad_norm": 0.7924895286560059, - "learning_rate": 1.906089628009288e-05, - "loss": 0.3259, - "step": 3318 - }, - { - "epoch": 0.312663385224088, - "grad_norm": 0.8065479397773743, - "learning_rate": 1.9060257309593473e-05, - "loss": 0.368, - "step": 3319 - }, - { - "epoch": 0.31275758931725584, - "grad_norm": 0.9134812355041504, - "learning_rate": 1.905961813250619e-05, - "loss": 0.4007, - "step": 3320 - }, - { - "epoch": 0.3128517934104237, - "grad_norm": 1.23716402053833, - "learning_rate": 1.90589787488456e-05, - "loss": 0.3557, - "step": 3321 - }, - { - "epoch": 0.31294599750359153, - "grad_norm": 0.9040558934211731, - "learning_rate": 1.9058339158626286e-05, - "loss": 0.3511, - "step": 3322 - }, - { - "epoch": 0.3130402015967594, - "grad_norm": 0.8373039960861206, - "learning_rate": 1.9057699361862832e-05, - "loss": 0.3723, - "step": 3323 - }, - { - "epoch": 0.31313440568992723, - "grad_norm": 0.825303852558136, - "learning_rate": 1.905705935856982e-05, - "loss": 0.397, - "step": 3324 - }, - { - "epoch": 0.3132286097830951, - "grad_norm": 0.8393895626068115, - "learning_rate": 1.905641914876185e-05, - "loss": 0.3596, - "step": 3325 - }, - { - "epoch": 0.31332281387626293, - "grad_norm": 0.8771679401397705, - "learning_rate": 1.9055778732453518e-05, - "loss": 0.3707, - "step": 3326 - }, - { - "epoch": 0.3134170179694308, - "grad_norm": 0.833461582660675, - "learning_rate": 1.9055138109659427e-05, - "loss": 0.3577, - "step": 3327 - }, - { - "epoch": 0.3135112220625986, - "grad_norm": 0.8579936623573303, - "learning_rate": 1.905449728039418e-05, - "loss": 0.4035, - "step": 3328 - }, - { - "epoch": 0.3136054261557665, - "grad_norm": 0.8387287259101868, - "learning_rate": 1.9053856244672392e-05, - "loss": 0.3426, - "step": 3329 - }, - { - "epoch": 0.3136996302489343, - "grad_norm": 0.784853994846344, - "learning_rate": 1.905321500250868e-05, - "loss": 0.3615, - "step": 3330 - }, - { - "epoch": 0.3137938343421022, - "grad_norm": 1.0262951850891113, - "learning_rate": 1.9052573553917667e-05, - "loss": 0.3515, - "step": 3331 - }, - { - "epoch": 0.31388803843527, - "grad_norm": 0.7893538475036621, - "learning_rate": 1.9051931898913977e-05, - "loss": 0.3542, - "step": 3332 - }, - { - "epoch": 0.31398224252843787, - "grad_norm": 0.8260703682899475, - "learning_rate": 1.905129003751224e-05, - "loss": 0.3426, - "step": 3333 - }, - { - "epoch": 0.3140764466216057, - "grad_norm": 0.9318860173225403, - "learning_rate": 1.9050647969727088e-05, - "loss": 0.3481, - "step": 3334 - }, - { - "epoch": 0.31417065071477357, - "grad_norm": 0.8694115281105042, - "learning_rate": 1.905000569557317e-05, - "loss": 0.3807, - "step": 3335 - }, - { - "epoch": 0.3142648548079414, - "grad_norm": 0.7466548681259155, - "learning_rate": 1.9049363215065124e-05, - "loss": 0.3221, - "step": 3336 - }, - { - "epoch": 0.31435905890110927, - "grad_norm": 0.8590157628059387, - "learning_rate": 1.9048720528217604e-05, - "loss": 0.3359, - "step": 3337 - }, - { - "epoch": 0.3144532629942771, - "grad_norm": 0.855535089969635, - "learning_rate": 1.9048077635045258e-05, - "loss": 0.4009, - "step": 3338 - }, - { - "epoch": 0.31454746708744497, - "grad_norm": 0.7641542553901672, - "learning_rate": 1.904743453556275e-05, - "loss": 0.3455, - "step": 3339 - }, - { - "epoch": 0.3146416711806128, - "grad_norm": 0.764505922794342, - "learning_rate": 1.9046791229784745e-05, - "loss": 0.343, - "step": 3340 - }, - { - "epoch": 0.31473587527378066, - "grad_norm": 0.8607409596443176, - "learning_rate": 1.904614771772591e-05, - "loss": 0.3363, - "step": 3341 - }, - { - "epoch": 0.3148300793669485, - "grad_norm": 0.7796515822410583, - "learning_rate": 1.904550399940091e-05, - "loss": 0.3583, - "step": 3342 - }, - { - "epoch": 0.31492428346011636, - "grad_norm": 0.7588558793067932, - "learning_rate": 1.9044860074824436e-05, - "loss": 0.3031, - "step": 3343 - }, - { - "epoch": 0.3150184875532842, - "grad_norm": 0.8214284181594849, - "learning_rate": 1.9044215944011163e-05, - "loss": 0.3443, - "step": 3344 - }, - { - "epoch": 0.31511269164645206, - "grad_norm": 0.8406785130500793, - "learning_rate": 1.9043571606975776e-05, - "loss": 0.3502, - "step": 3345 - }, - { - "epoch": 0.3152068957396199, - "grad_norm": 0.8515416979789734, - "learning_rate": 1.9042927063732972e-05, - "loss": 0.384, - "step": 3346 - }, - { - "epoch": 0.31530109983278776, - "grad_norm": 0.8178034424781799, - "learning_rate": 1.9042282314297446e-05, - "loss": 0.3452, - "step": 3347 - }, - { - "epoch": 0.3153953039259556, - "grad_norm": 0.7956291437149048, - "learning_rate": 1.90416373586839e-05, - "loss": 0.3206, - "step": 3348 - }, - { - "epoch": 0.31548950801912345, - "grad_norm": 0.8119598031044006, - "learning_rate": 1.904099219690704e-05, - "loss": 0.3607, - "step": 3349 - }, - { - "epoch": 0.3155837121122913, - "grad_norm": 0.7631762623786926, - "learning_rate": 1.9040346828981574e-05, - "loss": 0.3011, - "step": 3350 - }, - { - "epoch": 0.31567791620545915, - "grad_norm": 0.7502119541168213, - "learning_rate": 1.903970125492222e-05, - "loss": 0.3215, - "step": 3351 - }, - { - "epoch": 0.315772120298627, - "grad_norm": 0.7395917177200317, - "learning_rate": 1.9039055474743694e-05, - "loss": 0.2909, - "step": 3352 - }, - { - "epoch": 0.31586632439179485, - "grad_norm": 0.8743318915367126, - "learning_rate": 1.9038409488460728e-05, - "loss": 0.3779, - "step": 3353 - }, - { - "epoch": 0.3159605284849627, - "grad_norm": 0.7067281007766724, - "learning_rate": 1.9037763296088046e-05, - "loss": 0.3173, - "step": 3354 - }, - { - "epoch": 0.3160547325781305, - "grad_norm": 0.8748904466629028, - "learning_rate": 1.9037116897640386e-05, - "loss": 0.4102, - "step": 3355 - }, - { - "epoch": 0.31614893667129834, - "grad_norm": 0.8747310638427734, - "learning_rate": 1.903647029313248e-05, - "loss": 0.3563, - "step": 3356 - }, - { - "epoch": 0.3162431407644662, - "grad_norm": 0.7853719592094421, - "learning_rate": 1.9035823482579076e-05, - "loss": 0.3073, - "step": 3357 - }, - { - "epoch": 0.31633734485763404, - "grad_norm": 0.8870383501052856, - "learning_rate": 1.9035176465994927e-05, - "loss": 0.3611, - "step": 3358 - }, - { - "epoch": 0.3164315489508019, - "grad_norm": 0.8669750094413757, - "learning_rate": 1.9034529243394777e-05, - "loss": 0.3426, - "step": 3359 - }, - { - "epoch": 0.31652575304396974, - "grad_norm": 0.7884622812271118, - "learning_rate": 1.903388181479339e-05, - "loss": 0.3437, - "step": 3360 - }, - { - "epoch": 0.3166199571371376, - "grad_norm": 0.7719337344169617, - "learning_rate": 1.903323418020553e-05, - "loss": 0.3602, - "step": 3361 - }, - { - "epoch": 0.31671416123030544, - "grad_norm": 1.0963482856750488, - "learning_rate": 1.9032586339645954e-05, - "loss": 0.3569, - "step": 3362 - }, - { - "epoch": 0.3168083653234733, - "grad_norm": 0.9027066826820374, - "learning_rate": 1.9031938293129443e-05, - "loss": 0.377, - "step": 3363 - }, - { - "epoch": 0.31690256941664113, - "grad_norm": 0.8001086711883545, - "learning_rate": 1.9031290040670773e-05, - "loss": 0.3745, - "step": 3364 - }, - { - "epoch": 0.316996773509809, - "grad_norm": 0.8479501605033875, - "learning_rate": 1.903064158228472e-05, - "loss": 0.3432, - "step": 3365 - }, - { - "epoch": 0.31709097760297683, - "grad_norm": 0.7776916027069092, - "learning_rate": 1.9029992917986075e-05, - "loss": 0.3247, - "step": 3366 - }, - { - "epoch": 0.3171851816961447, - "grad_norm": 0.8749487996101379, - "learning_rate": 1.9029344047789627e-05, - "loss": 0.3525, - "step": 3367 - }, - { - "epoch": 0.31727938578931253, - "grad_norm": 0.7998982071876526, - "learning_rate": 1.9028694971710166e-05, - "loss": 0.3642, - "step": 3368 - }, - { - "epoch": 0.3173735898824804, - "grad_norm": 0.8136427402496338, - "learning_rate": 1.90280456897625e-05, - "loss": 0.3358, - "step": 3369 - }, - { - "epoch": 0.3174677939756482, - "grad_norm": 0.9419760704040527, - "learning_rate": 1.902739620196143e-05, - "loss": 0.3391, - "step": 3370 - }, - { - "epoch": 0.3175619980688161, - "grad_norm": 0.8789461255073547, - "learning_rate": 1.9026746508321766e-05, - "loss": 0.3801, - "step": 3371 - }, - { - "epoch": 0.3176562021619839, - "grad_norm": 0.7949413061141968, - "learning_rate": 1.902609660885832e-05, - "loss": 0.3409, - "step": 3372 - }, - { - "epoch": 0.3177504062551518, - "grad_norm": 0.8431394696235657, - "learning_rate": 1.902544650358591e-05, - "loss": 0.3271, - "step": 3373 - }, - { - "epoch": 0.3178446103483196, - "grad_norm": 0.905490517616272, - "learning_rate": 1.9024796192519364e-05, - "loss": 0.3346, - "step": 3374 - }, - { - "epoch": 0.31793881444148747, - "grad_norm": 0.9226703643798828, - "learning_rate": 1.902414567567351e-05, - "loss": 0.3449, - "step": 3375 - }, - { - "epoch": 0.3180330185346553, - "grad_norm": 0.845367431640625, - "learning_rate": 1.9023494953063175e-05, - "loss": 0.3573, - "step": 3376 - }, - { - "epoch": 0.31812722262782317, - "grad_norm": 0.7797483801841736, - "learning_rate": 1.90228440247032e-05, - "loss": 0.3392, - "step": 3377 - }, - { - "epoch": 0.318221426720991, - "grad_norm": 0.8446933031082153, - "learning_rate": 1.9022192890608432e-05, - "loss": 0.3661, - "step": 3378 - }, - { - "epoch": 0.31831563081415887, - "grad_norm": 0.828841507434845, - "learning_rate": 1.9021541550793707e-05, - "loss": 0.3936, - "step": 3379 - }, - { - "epoch": 0.3184098349073267, - "grad_norm": 0.8184050917625427, - "learning_rate": 1.9020890005273887e-05, - "loss": 0.3671, - "step": 3380 - }, - { - "epoch": 0.31850403900049457, - "grad_norm": 0.8316391706466675, - "learning_rate": 1.902023825406382e-05, - "loss": 0.3382, - "step": 3381 - }, - { - "epoch": 0.3185982430936624, - "grad_norm": 0.894083559513092, - "learning_rate": 1.9019586297178374e-05, - "loss": 0.3867, - "step": 3382 - }, - { - "epoch": 0.31869244718683026, - "grad_norm": 0.9373259544372559, - "learning_rate": 1.901893413463241e-05, - "loss": 0.4018, - "step": 3383 - }, - { - "epoch": 0.3187866512799981, - "grad_norm": 0.7572992444038391, - "learning_rate": 1.90182817664408e-05, - "loss": 0.385, - "step": 3384 - }, - { - "epoch": 0.31888085537316596, - "grad_norm": 0.7976850271224976, - "learning_rate": 1.9017629192618418e-05, - "loss": 0.3523, - "step": 3385 - }, - { - "epoch": 0.3189750594663338, - "grad_norm": 0.9599197506904602, - "learning_rate": 1.9016976413180143e-05, - "loss": 0.3978, - "step": 3386 - }, - { - "epoch": 0.31906926355950166, - "grad_norm": 0.724345326423645, - "learning_rate": 1.9016323428140863e-05, - "loss": 0.3274, - "step": 3387 - }, - { - "epoch": 0.3191634676526695, - "grad_norm": 0.7421485781669617, - "learning_rate": 1.9015670237515465e-05, - "loss": 0.3711, - "step": 3388 - }, - { - "epoch": 0.31925767174583736, - "grad_norm": 0.7720860838890076, - "learning_rate": 1.9015016841318843e-05, - "loss": 0.3822, - "step": 3389 - }, - { - "epoch": 0.3193518758390052, - "grad_norm": 0.7846570014953613, - "learning_rate": 1.9014363239565892e-05, - "loss": 0.3715, - "step": 3390 - }, - { - "epoch": 0.31944607993217305, - "grad_norm": 0.7919772863388062, - "learning_rate": 1.901370943227152e-05, - "loss": 0.3364, - "step": 3391 - }, - { - "epoch": 0.3195402840253409, - "grad_norm": 0.8712970614433289, - "learning_rate": 1.9013055419450634e-05, - "loss": 0.3768, - "step": 3392 - }, - { - "epoch": 0.31963448811850875, - "grad_norm": 0.9658766984939575, - "learning_rate": 1.9012401201118145e-05, - "loss": 0.337, - "step": 3393 - }, - { - "epoch": 0.3197286922116766, - "grad_norm": 0.8209637403488159, - "learning_rate": 1.9011746777288968e-05, - "loss": 0.3875, - "step": 3394 - }, - { - "epoch": 0.31982289630484445, - "grad_norm": 0.9107644557952881, - "learning_rate": 1.901109214797803e-05, - "loss": 0.3724, - "step": 3395 - }, - { - "epoch": 0.3199171003980123, - "grad_norm": 0.8732518553733826, - "learning_rate": 1.9010437313200256e-05, - "loss": 0.3586, - "step": 3396 - }, - { - "epoch": 0.32001130449118015, - "grad_norm": 0.9876217842102051, - "learning_rate": 1.9009782272970577e-05, - "loss": 0.3495, - "step": 3397 - }, - { - "epoch": 0.320105508584348, - "grad_norm": 0.7326115369796753, - "learning_rate": 1.9009127027303925e-05, - "loss": 0.3234, - "step": 3398 - }, - { - "epoch": 0.32019971267751585, - "grad_norm": 0.8744220733642578, - "learning_rate": 1.9008471576215247e-05, - "loss": 0.3624, - "step": 3399 - }, - { - "epoch": 0.3202939167706837, - "grad_norm": 0.8153958320617676, - "learning_rate": 1.9007815919719483e-05, - "loss": 0.3374, - "step": 3400 - }, - { - "epoch": 0.32038812086385154, - "grad_norm": 0.7342997789382935, - "learning_rate": 1.9007160057831585e-05, - "loss": 0.3099, - "step": 3401 - }, - { - "epoch": 0.3204823249570194, - "grad_norm": 0.9112691879272461, - "learning_rate": 1.900650399056651e-05, - "loss": 0.3833, - "step": 3402 - }, - { - "epoch": 0.32057652905018724, - "grad_norm": 0.8247086405754089, - "learning_rate": 1.9005847717939213e-05, - "loss": 0.3497, - "step": 3403 - }, - { - "epoch": 0.3206707331433551, - "grad_norm": 0.9501208662986755, - "learning_rate": 1.900519123996466e-05, - "loss": 0.4221, - "step": 3404 - }, - { - "epoch": 0.32076493723652294, - "grad_norm": 0.9292295575141907, - "learning_rate": 1.900453455665782e-05, - "loss": 0.3616, - "step": 3405 - }, - { - "epoch": 0.3208591413296908, - "grad_norm": 0.7539984583854675, - "learning_rate": 1.900387766803367e-05, - "loss": 0.3055, - "step": 3406 - }, - { - "epoch": 0.32095334542285864, - "grad_norm": 0.7906505465507507, - "learning_rate": 1.9003220574107178e-05, - "loss": 0.3281, - "step": 3407 - }, - { - "epoch": 0.3210475495160265, - "grad_norm": 0.8261638283729553, - "learning_rate": 1.9002563274893337e-05, - "loss": 0.357, - "step": 3408 - }, - { - "epoch": 0.32114175360919434, - "grad_norm": 0.7509334683418274, - "learning_rate": 1.900190577040713e-05, - "loss": 0.3546, - "step": 3409 - }, - { - "epoch": 0.3212359577023622, - "grad_norm": 0.9319678544998169, - "learning_rate": 1.900124806066355e-05, - "loss": 0.3577, - "step": 3410 - }, - { - "epoch": 0.32133016179553003, - "grad_norm": 0.7860541343688965, - "learning_rate": 1.9000590145677593e-05, - "loss": 0.3785, - "step": 3411 - }, - { - "epoch": 0.3214243658886979, - "grad_norm": 0.8138558268547058, - "learning_rate": 1.8999932025464262e-05, - "loss": 0.3311, - "step": 3412 - }, - { - "epoch": 0.32151856998186573, - "grad_norm": 0.779015064239502, - "learning_rate": 1.8999273700038564e-05, - "loss": 0.3458, - "step": 3413 - }, - { - "epoch": 0.3216127740750336, - "grad_norm": 0.7839806079864502, - "learning_rate": 1.8998615169415506e-05, - "loss": 0.3003, - "step": 3414 - }, - { - "epoch": 0.32170697816820143, - "grad_norm": 0.788892924785614, - "learning_rate": 1.8997956433610104e-05, - "loss": 0.3594, - "step": 3415 - }, - { - "epoch": 0.3218011822613693, - "grad_norm": 0.7324049472808838, - "learning_rate": 1.899729749263738e-05, - "loss": 0.2969, - "step": 3416 - }, - { - "epoch": 0.3218953863545371, - "grad_norm": 0.8111429810523987, - "learning_rate": 1.899663834651236e-05, - "loss": 0.3821, - "step": 3417 - }, - { - "epoch": 0.321989590447705, - "grad_norm": 0.7825393676757812, - "learning_rate": 1.899597899525007e-05, - "loss": 0.3719, - "step": 3418 - }, - { - "epoch": 0.3220837945408728, - "grad_norm": 0.7664816975593567, - "learning_rate": 1.899531943886555e-05, - "loss": 0.3688, - "step": 3419 - }, - { - "epoch": 0.3221779986340407, - "grad_norm": 0.8949109315872192, - "learning_rate": 1.8994659677373834e-05, - "loss": 0.3789, - "step": 3420 - }, - { - "epoch": 0.3222722027272085, - "grad_norm": 0.9079577922821045, - "learning_rate": 1.8993999710789966e-05, - "loss": 0.3246, - "step": 3421 - }, - { - "epoch": 0.32236640682037637, - "grad_norm": 0.7905574440956116, - "learning_rate": 1.8993339539128993e-05, - "loss": 0.3632, - "step": 3422 - }, - { - "epoch": 0.3224606109135442, - "grad_norm": 0.753884494304657, - "learning_rate": 1.8992679162405975e-05, - "loss": 0.3497, - "step": 3423 - }, - { - "epoch": 0.322554815006712, - "grad_norm": 0.7039985656738281, - "learning_rate": 1.899201858063596e-05, - "loss": 0.3318, - "step": 3424 - }, - { - "epoch": 0.32264901909987986, - "grad_norm": 0.8347176313400269, - "learning_rate": 1.8991357793834018e-05, - "loss": 0.3366, - "step": 3425 - }, - { - "epoch": 0.3227432231930477, - "grad_norm": 0.8024595379829407, - "learning_rate": 1.8990696802015213e-05, - "loss": 0.3639, - "step": 3426 - }, - { - "epoch": 0.32283742728621556, - "grad_norm": 0.9033552408218384, - "learning_rate": 1.8990035605194614e-05, - "loss": 0.3906, - "step": 3427 - }, - { - "epoch": 0.3229316313793834, - "grad_norm": 0.8519493341445923, - "learning_rate": 1.8989374203387302e-05, - "loss": 0.3633, - "step": 3428 - }, - { - "epoch": 0.32302583547255126, - "grad_norm": 0.7858714461326599, - "learning_rate": 1.8988712596608354e-05, - "loss": 0.3604, - "step": 3429 - }, - { - "epoch": 0.3231200395657191, - "grad_norm": 0.7719192504882812, - "learning_rate": 1.898805078487286e-05, - "loss": 0.3646, - "step": 3430 - }, - { - "epoch": 0.32321424365888696, - "grad_norm": 0.8226203322410583, - "learning_rate": 1.8987388768195906e-05, - "loss": 0.4058, - "step": 3431 - }, - { - "epoch": 0.3233084477520548, - "grad_norm": 1.0411338806152344, - "learning_rate": 1.898672654659259e-05, - "loss": 0.374, - "step": 3432 - }, - { - "epoch": 0.32340265184522266, - "grad_norm": 0.8630709052085876, - "learning_rate": 1.898606412007801e-05, - "loss": 0.3801, - "step": 3433 - }, - { - "epoch": 0.3234968559383905, - "grad_norm": 0.7369588613510132, - "learning_rate": 1.898540148866727e-05, - "loss": 0.3255, - "step": 3434 - }, - { - "epoch": 0.32359106003155835, - "grad_norm": 0.7781053781509399, - "learning_rate": 1.898473865237548e-05, - "loss": 0.3523, - "step": 3435 - }, - { - "epoch": 0.3236852641247262, - "grad_norm": 0.8066985607147217, - "learning_rate": 1.898407561121775e-05, - "loss": 0.381, - "step": 3436 - }, - { - "epoch": 0.32377946821789405, - "grad_norm": 0.8260643482208252, - "learning_rate": 1.8983412365209207e-05, - "loss": 0.3586, - "step": 3437 - }, - { - "epoch": 0.3238736723110619, - "grad_norm": 0.8808850049972534, - "learning_rate": 1.898274891436497e-05, - "loss": 0.3441, - "step": 3438 - }, - { - "epoch": 0.32396787640422975, - "grad_norm": 0.8420130014419556, - "learning_rate": 1.8982085258700164e-05, - "loss": 0.4047, - "step": 3439 - }, - { - "epoch": 0.3240620804973976, - "grad_norm": 0.7730211019515991, - "learning_rate": 1.898142139822992e-05, - "loss": 0.3499, - "step": 3440 - }, - { - "epoch": 0.32415628459056545, - "grad_norm": 0.8381409645080566, - "learning_rate": 1.8980757332969382e-05, - "loss": 0.3538, - "step": 3441 - }, - { - "epoch": 0.3242504886837333, - "grad_norm": 0.8234378099441528, - "learning_rate": 1.8980093062933686e-05, - "loss": 0.3609, - "step": 3442 - }, - { - "epoch": 0.32434469277690114, - "grad_norm": 0.807602047920227, - "learning_rate": 1.897942858813798e-05, - "loss": 0.3586, - "step": 3443 - }, - { - "epoch": 0.324438896870069, - "grad_norm": 0.8544538021087646, - "learning_rate": 1.8978763908597417e-05, - "loss": 0.3772, - "step": 3444 - }, - { - "epoch": 0.32453310096323684, - "grad_norm": 0.7575989365577698, - "learning_rate": 1.897809902432715e-05, - "loss": 0.3696, - "step": 3445 - }, - { - "epoch": 0.3246273050564047, - "grad_norm": 0.8299974203109741, - "learning_rate": 1.8977433935342338e-05, - "loss": 0.3541, - "step": 3446 - }, - { - "epoch": 0.32472150914957254, - "grad_norm": 0.7398593425750732, - "learning_rate": 1.897676864165815e-05, - "loss": 0.3588, - "step": 3447 - }, - { - "epoch": 0.3248157132427404, - "grad_norm": 0.8261170983314514, - "learning_rate": 1.8976103143289756e-05, - "loss": 0.3756, - "step": 3448 - }, - { - "epoch": 0.32490991733590824, - "grad_norm": 0.7719841599464417, - "learning_rate": 1.897543744025233e-05, - "loss": 0.3359, - "step": 3449 - }, - { - "epoch": 0.3250041214290761, - "grad_norm": 0.853029727935791, - "learning_rate": 1.8974771532561046e-05, - "loss": 0.3741, - "step": 3450 - }, - { - "epoch": 0.32509832552224394, - "grad_norm": 0.777460515499115, - "learning_rate": 1.897410542023109e-05, - "loss": 0.3836, - "step": 3451 - }, - { - "epoch": 0.3251925296154118, - "grad_norm": 0.8113864660263062, - "learning_rate": 1.8973439103277657e-05, - "loss": 0.3281, - "step": 3452 - }, - { - "epoch": 0.32528673370857963, - "grad_norm": 0.7618050575256348, - "learning_rate": 1.8972772581715932e-05, - "loss": 0.3352, - "step": 3453 - }, - { - "epoch": 0.3253809378017475, - "grad_norm": 0.7826343774795532, - "learning_rate": 1.897210585556111e-05, - "loss": 0.3173, - "step": 3454 - }, - { - "epoch": 0.32547514189491533, - "grad_norm": 0.8260231614112854, - "learning_rate": 1.8971438924828404e-05, - "loss": 0.3396, - "step": 3455 - }, - { - "epoch": 0.3255693459880832, - "grad_norm": 0.8214619159698486, - "learning_rate": 1.897077178953302e-05, - "loss": 0.347, - "step": 3456 - }, - { - "epoch": 0.32566355008125103, - "grad_norm": 0.8712473511695862, - "learning_rate": 1.897010444969016e-05, - "loss": 0.4287, - "step": 3457 - }, - { - "epoch": 0.3257577541744189, - "grad_norm": 0.8250601291656494, - "learning_rate": 1.8969436905315045e-05, - "loss": 0.3639, - "step": 3458 - }, - { - "epoch": 0.3258519582675867, - "grad_norm": 0.8448606133460999, - "learning_rate": 1.89687691564229e-05, - "loss": 0.3821, - "step": 3459 - }, - { - "epoch": 0.3259461623607546, - "grad_norm": 0.7553685307502747, - "learning_rate": 1.8968101203028947e-05, - "loss": 0.3507, - "step": 3460 - }, - { - "epoch": 0.3260403664539224, - "grad_norm": 0.6598140597343445, - "learning_rate": 1.896743304514842e-05, - "loss": 0.2978, - "step": 3461 - }, - { - "epoch": 0.3261345705470903, - "grad_norm": 0.8599854111671448, - "learning_rate": 1.8966764682796546e-05, - "loss": 0.3804, - "step": 3462 - }, - { - "epoch": 0.3262287746402581, - "grad_norm": 0.8068474531173706, - "learning_rate": 1.8966096115988572e-05, - "loss": 0.3663, - "step": 3463 - }, - { - "epoch": 0.326322978733426, - "grad_norm": 0.7263758182525635, - "learning_rate": 1.8965427344739743e-05, - "loss": 0.278, - "step": 3464 - }, - { - "epoch": 0.3264171828265938, - "grad_norm": 0.7682604789733887, - "learning_rate": 1.8964758369065303e-05, - "loss": 0.3688, - "step": 3465 - }, - { - "epoch": 0.32651138691976167, - "grad_norm": 0.7840115427970886, - "learning_rate": 1.8964089188980508e-05, - "loss": 0.3633, - "step": 3466 - }, - { - "epoch": 0.3266055910129295, - "grad_norm": 0.9188367128372192, - "learning_rate": 1.8963419804500615e-05, - "loss": 0.4079, - "step": 3467 - }, - { - "epoch": 0.32669979510609737, - "grad_norm": 0.8452111482620239, - "learning_rate": 1.896275021564089e-05, - "loss": 0.3865, - "step": 3468 - }, - { - "epoch": 0.3267939991992652, - "grad_norm": 0.9212802648544312, - "learning_rate": 1.89620804224166e-05, - "loss": 0.3861, - "step": 3469 - }, - { - "epoch": 0.32688820329243307, - "grad_norm": 0.8427731394767761, - "learning_rate": 1.896141042484301e-05, - "loss": 0.3486, - "step": 3470 - }, - { - "epoch": 0.3269824073856009, - "grad_norm": 0.6711952686309814, - "learning_rate": 1.896074022293541e-05, - "loss": 0.3076, - "step": 3471 - }, - { - "epoch": 0.32707661147876876, - "grad_norm": 0.7252352237701416, - "learning_rate": 1.8960069816709073e-05, - "loss": 0.3372, - "step": 3472 - }, - { - "epoch": 0.3271708155719366, - "grad_norm": 0.9933614730834961, - "learning_rate": 1.8959399206179286e-05, - "loss": 0.3728, - "step": 3473 - }, - { - "epoch": 0.32726501966510446, - "grad_norm": 0.7768873572349548, - "learning_rate": 1.8958728391361343e-05, - "loss": 0.3163, - "step": 3474 - }, - { - "epoch": 0.3273592237582723, - "grad_norm": 0.8002369999885559, - "learning_rate": 1.8958057372270536e-05, - "loss": 0.3197, - "step": 3475 - }, - { - "epoch": 0.32745342785144016, - "grad_norm": 0.8852412700653076, - "learning_rate": 1.8957386148922166e-05, - "loss": 0.329, - "step": 3476 - }, - { - "epoch": 0.327547631944608, - "grad_norm": 0.7686527371406555, - "learning_rate": 1.8956714721331543e-05, - "loss": 0.2952, - "step": 3477 - }, - { - "epoch": 0.32764183603777586, - "grad_norm": 0.7055858373641968, - "learning_rate": 1.8956043089513968e-05, - "loss": 0.3069, - "step": 3478 - }, - { - "epoch": 0.3277360401309437, - "grad_norm": 0.8490488529205322, - "learning_rate": 1.8955371253484762e-05, - "loss": 0.3633, - "step": 3479 - }, - { - "epoch": 0.32783024422411156, - "grad_norm": 0.858548641204834, - "learning_rate": 1.895469921325924e-05, - "loss": 0.3721, - "step": 3480 - }, - { - "epoch": 0.3279244483172794, - "grad_norm": 0.7843692898750305, - "learning_rate": 1.895402696885273e-05, - "loss": 0.3758, - "step": 3481 - }, - { - "epoch": 0.32801865241044725, - "grad_norm": 0.8354014754295349, - "learning_rate": 1.8953354520280557e-05, - "loss": 0.3795, - "step": 3482 - }, - { - "epoch": 0.3281128565036151, - "grad_norm": 0.9440718293190002, - "learning_rate": 1.8952681867558053e-05, - "loss": 0.4087, - "step": 3483 - }, - { - "epoch": 0.32820706059678295, - "grad_norm": 0.8207374811172485, - "learning_rate": 1.8952009010700556e-05, - "loss": 0.3432, - "step": 3484 - }, - { - "epoch": 0.3283012646899508, - "grad_norm": 0.9222376942634583, - "learning_rate": 1.8951335949723412e-05, - "loss": 0.4125, - "step": 3485 - }, - { - "epoch": 0.32839546878311865, - "grad_norm": 0.8984072208404541, - "learning_rate": 1.8950662684641963e-05, - "loss": 0.3678, - "step": 3486 - }, - { - "epoch": 0.3284896728762865, - "grad_norm": 0.7753778696060181, - "learning_rate": 1.894998921547156e-05, - "loss": 0.3421, - "step": 3487 - }, - { - "epoch": 0.32858387696945435, - "grad_norm": 0.8408216834068298, - "learning_rate": 1.8949315542227568e-05, - "loss": 0.3931, - "step": 3488 - }, - { - "epoch": 0.3286780810626222, - "grad_norm": 0.8501113653182983, - "learning_rate": 1.8948641664925335e-05, - "loss": 0.4026, - "step": 3489 - }, - { - "epoch": 0.32877228515579004, - "grad_norm": 0.9063554406166077, - "learning_rate": 1.8947967583580234e-05, - "loss": 0.3686, - "step": 3490 - }, - { - "epoch": 0.3288664892489579, - "grad_norm": 1.043784260749817, - "learning_rate": 1.8947293298207637e-05, - "loss": 0.4105, - "step": 3491 - }, - { - "epoch": 0.32896069334212574, - "grad_norm": 0.9878965616226196, - "learning_rate": 1.8946618808822913e-05, - "loss": 0.3663, - "step": 3492 - }, - { - "epoch": 0.32905489743529354, - "grad_norm": 0.8769758343696594, - "learning_rate": 1.8945944115441444e-05, - "loss": 0.3666, - "step": 3493 - }, - { - "epoch": 0.3291491015284614, - "grad_norm": 0.8564929962158203, - "learning_rate": 1.8945269218078615e-05, - "loss": 0.3991, - "step": 3494 - }, - { - "epoch": 0.32924330562162923, - "grad_norm": 0.91790771484375, - "learning_rate": 1.8944594116749812e-05, - "loss": 0.3838, - "step": 3495 - }, - { - "epoch": 0.3293375097147971, - "grad_norm": 0.7588348388671875, - "learning_rate": 1.894391881147043e-05, - "loss": 0.3331, - "step": 3496 - }, - { - "epoch": 0.32943171380796493, - "grad_norm": 0.7754861116409302, - "learning_rate": 1.894324330225587e-05, - "loss": 0.316, - "step": 3497 - }, - { - "epoch": 0.3295259179011328, - "grad_norm": 0.8175535798072815, - "learning_rate": 1.894256758912153e-05, - "loss": 0.3432, - "step": 3498 - }, - { - "epoch": 0.32962012199430063, - "grad_norm": 0.8538705706596375, - "learning_rate": 1.8941891672082816e-05, - "loss": 0.351, - "step": 3499 - }, - { - "epoch": 0.3297143260874685, - "grad_norm": 0.7761415839195251, - "learning_rate": 1.894121555115515e-05, - "loss": 0.3113, - "step": 3500 - }, - { - "epoch": 0.32980853018063633, - "grad_norm": 0.9469418525695801, - "learning_rate": 1.8940539226353935e-05, - "loss": 0.4011, - "step": 3501 - }, - { - "epoch": 0.3299027342738042, - "grad_norm": 0.8777265548706055, - "learning_rate": 1.8939862697694602e-05, - "loss": 0.3625, - "step": 3502 - }, - { - "epoch": 0.329996938366972, - "grad_norm": 0.8174872398376465, - "learning_rate": 1.8939185965192572e-05, - "loss": 0.3662, - "step": 3503 - }, - { - "epoch": 0.3300911424601399, - "grad_norm": 0.9018071293830872, - "learning_rate": 1.8938509028863278e-05, - "loss": 0.3792, - "step": 3504 - }, - { - "epoch": 0.3301853465533077, - "grad_norm": 0.9734531044960022, - "learning_rate": 1.8937831888722152e-05, - "loss": 0.3997, - "step": 3505 - }, - { - "epoch": 0.3302795506464756, - "grad_norm": 0.9966946840286255, - "learning_rate": 1.8937154544784642e-05, - "loss": 0.3543, - "step": 3506 - }, - { - "epoch": 0.3303737547396434, - "grad_norm": 0.8334128260612488, - "learning_rate": 1.893647699706618e-05, - "loss": 0.3797, - "step": 3507 - }, - { - "epoch": 0.33046795883281127, - "grad_norm": 0.8594630360603333, - "learning_rate": 1.893579924558223e-05, - "loss": 0.3735, - "step": 3508 - }, - { - "epoch": 0.3305621629259791, - "grad_norm": 0.7860228419303894, - "learning_rate": 1.8935121290348232e-05, - "loss": 0.3597, - "step": 3509 - }, - { - "epoch": 0.33065636701914697, - "grad_norm": 0.9093959927558899, - "learning_rate": 1.8934443131379652e-05, - "loss": 0.4012, - "step": 3510 - }, - { - "epoch": 0.3307505711123148, - "grad_norm": 0.7881484031677246, - "learning_rate": 1.893376476869195e-05, - "loss": 0.3599, - "step": 3511 - }, - { - "epoch": 0.33084477520548267, - "grad_norm": 0.851662278175354, - "learning_rate": 1.8933086202300597e-05, - "loss": 0.3637, - "step": 3512 - }, - { - "epoch": 0.3309389792986505, - "grad_norm": 1.1916617155075073, - "learning_rate": 1.893240743222106e-05, - "loss": 0.3572, - "step": 3513 - }, - { - "epoch": 0.33103318339181836, - "grad_norm": 0.8736041188240051, - "learning_rate": 1.8931728458468824e-05, - "loss": 0.3805, - "step": 3514 - }, - { - "epoch": 0.3311273874849862, - "grad_norm": 0.9154252409934998, - "learning_rate": 1.8931049281059362e-05, - "loss": 0.4029, - "step": 3515 - }, - { - "epoch": 0.33122159157815406, - "grad_norm": 0.7905598282814026, - "learning_rate": 1.8930369900008166e-05, - "loss": 0.3388, - "step": 3516 - }, - { - "epoch": 0.3313157956713219, - "grad_norm": 0.782569169998169, - "learning_rate": 1.8929690315330724e-05, - "loss": 0.315, - "step": 3517 - }, - { - "epoch": 0.33140999976448976, - "grad_norm": 0.8409260511398315, - "learning_rate": 1.8929010527042533e-05, - "loss": 0.3778, - "step": 3518 - }, - { - "epoch": 0.3315042038576576, - "grad_norm": 0.8331377506256104, - "learning_rate": 1.8928330535159094e-05, - "loss": 0.3402, - "step": 3519 - }, - { - "epoch": 0.33159840795082546, - "grad_norm": 0.7925302982330322, - "learning_rate": 1.892765033969591e-05, - "loss": 0.3557, - "step": 3520 - }, - { - "epoch": 0.3316926120439933, - "grad_norm": 0.772601842880249, - "learning_rate": 1.8926969940668495e-05, - "loss": 0.3582, - "step": 3521 - }, - { - "epoch": 0.33178681613716116, - "grad_norm": 0.9124496579170227, - "learning_rate": 1.8926289338092352e-05, - "loss": 0.4083, - "step": 3522 - }, - { - "epoch": 0.331881020230329, - "grad_norm": 1.2293285131454468, - "learning_rate": 1.8925608531983012e-05, - "loss": 0.37, - "step": 3523 - }, - { - "epoch": 0.33197522432349685, - "grad_norm": 0.8313436508178711, - "learning_rate": 1.8924927522355996e-05, - "loss": 0.3469, - "step": 3524 - }, - { - "epoch": 0.3320694284166647, - "grad_norm": 0.8663201928138733, - "learning_rate": 1.8924246309226826e-05, - "loss": 0.3485, - "step": 3525 - }, - { - "epoch": 0.33216363250983255, - "grad_norm": 0.8084797859191895, - "learning_rate": 1.892356489261104e-05, - "loss": 0.318, - "step": 3526 - }, - { - "epoch": 0.3322578366030004, - "grad_norm": 0.8676424026489258, - "learning_rate": 1.8922883272524172e-05, - "loss": 0.3519, - "step": 3527 - }, - { - "epoch": 0.33235204069616825, - "grad_norm": 0.8464802503585815, - "learning_rate": 1.8922201448981766e-05, - "loss": 0.3346, - "step": 3528 - }, - { - "epoch": 0.3324462447893361, - "grad_norm": 0.7341082692146301, - "learning_rate": 1.8921519421999374e-05, - "loss": 0.3206, - "step": 3529 - }, - { - "epoch": 0.33254044888250395, - "grad_norm": 0.797134518623352, - "learning_rate": 1.8920837191592535e-05, - "loss": 0.3441, - "step": 3530 - }, - { - "epoch": 0.3326346529756718, - "grad_norm": 0.847991406917572, - "learning_rate": 1.8920154757776816e-05, - "loss": 0.3483, - "step": 3531 - }, - { - "epoch": 0.33272885706883965, - "grad_norm": 0.8134728670120239, - "learning_rate": 1.891947212056777e-05, - "loss": 0.3152, - "step": 3532 - }, - { - "epoch": 0.3328230611620075, - "grad_norm": 0.9314228296279907, - "learning_rate": 1.891878927998097e-05, - "loss": 0.36, - "step": 3533 - }, - { - "epoch": 0.33291726525517534, - "grad_norm": 0.7768193483352661, - "learning_rate": 1.8918106236031976e-05, - "loss": 0.3594, - "step": 3534 - }, - { - "epoch": 0.3330114693483432, - "grad_norm": 0.8359523415565491, - "learning_rate": 1.8917422988736372e-05, - "loss": 0.3816, - "step": 3535 - }, - { - "epoch": 0.33310567344151104, - "grad_norm": 0.7809811234474182, - "learning_rate": 1.891673953810973e-05, - "loss": 0.3489, - "step": 3536 - }, - { - "epoch": 0.3331998775346789, - "grad_norm": 0.8549154996871948, - "learning_rate": 1.8916055884167638e-05, - "loss": 0.3765, - "step": 3537 - }, - { - "epoch": 0.33329408162784674, - "grad_norm": 0.8402571082115173, - "learning_rate": 1.8915372026925682e-05, - "loss": 0.3258, - "step": 3538 - }, - { - "epoch": 0.3333882857210146, - "grad_norm": 0.7873833775520325, - "learning_rate": 1.8914687966399457e-05, - "loss": 0.3103, - "step": 3539 - }, - { - "epoch": 0.33348248981418244, - "grad_norm": 0.7410790324211121, - "learning_rate": 1.8914003702604557e-05, - "loss": 0.3348, - "step": 3540 - }, - { - "epoch": 0.3335766939073503, - "grad_norm": 0.7940850853919983, - "learning_rate": 1.891331923555659e-05, - "loss": 0.3524, - "step": 3541 - }, - { - "epoch": 0.33367089800051813, - "grad_norm": 0.733606219291687, - "learning_rate": 1.8912634565271156e-05, - "loss": 0.3557, - "step": 3542 - }, - { - "epoch": 0.333765102093686, - "grad_norm": 0.914272129535675, - "learning_rate": 1.8911949691763875e-05, - "loss": 0.3898, - "step": 3543 - }, - { - "epoch": 0.33385930618685383, - "grad_norm": 0.7942153811454773, - "learning_rate": 1.8911264615050357e-05, - "loss": 0.3441, - "step": 3544 - }, - { - "epoch": 0.3339535102800217, - "grad_norm": 0.7900113463401794, - "learning_rate": 1.891057933514622e-05, - "loss": 0.3518, - "step": 3545 - }, - { - "epoch": 0.33404771437318953, - "grad_norm": 0.7690131068229675, - "learning_rate": 1.89098938520671e-05, - "loss": 0.3316, - "step": 3546 - }, - { - "epoch": 0.3341419184663574, - "grad_norm": 0.7978188991546631, - "learning_rate": 1.8909208165828618e-05, - "loss": 0.3002, - "step": 3547 - }, - { - "epoch": 0.33423612255952523, - "grad_norm": 0.857190728187561, - "learning_rate": 1.8908522276446408e-05, - "loss": 0.4009, - "step": 3548 - }, - { - "epoch": 0.3343303266526931, - "grad_norm": 0.877989649772644, - "learning_rate": 1.890783618393612e-05, - "loss": 0.326, - "step": 3549 - }, - { - "epoch": 0.3344245307458609, - "grad_norm": 0.8402450084686279, - "learning_rate": 1.8907149888313384e-05, - "loss": 0.3893, - "step": 3550 - }, - { - "epoch": 0.3345187348390288, - "grad_norm": 0.9059008955955505, - "learning_rate": 1.890646338959386e-05, - "loss": 0.3549, - "step": 3551 - }, - { - "epoch": 0.3346129389321966, - "grad_norm": 0.8231593370437622, - "learning_rate": 1.890577668779319e-05, - "loss": 0.3332, - "step": 3552 - }, - { - "epoch": 0.3347071430253645, - "grad_norm": 0.8435704708099365, - "learning_rate": 1.8905089782927047e-05, - "loss": 0.3588, - "step": 3553 - }, - { - "epoch": 0.3348013471185323, - "grad_norm": 0.8788042068481445, - "learning_rate": 1.890440267501108e-05, - "loss": 0.3823, - "step": 3554 - }, - { - "epoch": 0.33489555121170017, - "grad_norm": 0.8267970085144043, - "learning_rate": 1.8903715364060962e-05, - "loss": 0.3574, - "step": 3555 - }, - { - "epoch": 0.334989755304868, - "grad_norm": 0.8716968894004822, - "learning_rate": 1.8903027850092365e-05, - "loss": 0.3856, - "step": 3556 - }, - { - "epoch": 0.33508395939803587, - "grad_norm": 0.8291651010513306, - "learning_rate": 1.8902340133120965e-05, - "loss": 0.351, - "step": 3557 - }, - { - "epoch": 0.3351781634912037, - "grad_norm": 0.8509311079978943, - "learning_rate": 1.890165221316244e-05, - "loss": 0.3379, - "step": 3558 - }, - { - "epoch": 0.33527236758437157, - "grad_norm": 0.7940100431442261, - "learning_rate": 1.8900964090232477e-05, - "loss": 0.3704, - "step": 3559 - }, - { - "epoch": 0.3353665716775394, - "grad_norm": 0.966296911239624, - "learning_rate": 1.890027576434677e-05, - "loss": 0.368, - "step": 3560 - }, - { - "epoch": 0.33546077577070726, - "grad_norm": 0.7433187365531921, - "learning_rate": 1.8899587235521006e-05, - "loss": 0.3277, - "step": 3561 - }, - { - "epoch": 0.33555497986387506, - "grad_norm": 0.8418706059455872, - "learning_rate": 1.8898898503770895e-05, - "loss": 0.3634, - "step": 3562 - }, - { - "epoch": 0.3356491839570429, - "grad_norm": 0.8187812566757202, - "learning_rate": 1.8898209569112133e-05, - "loss": 0.3589, - "step": 3563 - }, - { - "epoch": 0.33574338805021076, - "grad_norm": 0.8903088569641113, - "learning_rate": 1.8897520431560435e-05, - "loss": 0.3559, - "step": 3564 - }, - { - "epoch": 0.3358375921433786, - "grad_norm": 0.8367480039596558, - "learning_rate": 1.8896831091131506e-05, - "loss": 0.3474, - "step": 3565 - }, - { - "epoch": 0.33593179623654645, - "grad_norm": 0.7861426472663879, - "learning_rate": 1.8896141547841072e-05, - "loss": 0.3794, - "step": 3566 - }, - { - "epoch": 0.3360260003297143, - "grad_norm": 0.9281355738639832, - "learning_rate": 1.8895451801704848e-05, - "loss": 0.3604, - "step": 3567 - }, - { - "epoch": 0.33612020442288215, - "grad_norm": 0.9803590774536133, - "learning_rate": 1.8894761852738572e-05, - "loss": 0.3384, - "step": 3568 - }, - { - "epoch": 0.33621440851605, - "grad_norm": 0.8485681414604187, - "learning_rate": 1.889407170095797e-05, - "loss": 0.3576, - "step": 3569 - }, - { - "epoch": 0.33630861260921785, - "grad_norm": 0.8599330186843872, - "learning_rate": 1.8893381346378775e-05, - "loss": 0.3728, - "step": 3570 - }, - { - "epoch": 0.3364028167023857, - "grad_norm": 0.9006670713424683, - "learning_rate": 1.889269078901673e-05, - "loss": 0.3922, - "step": 3571 - }, - { - "epoch": 0.33649702079555355, - "grad_norm": 0.8296506404876709, - "learning_rate": 1.8892000028887586e-05, - "loss": 0.3499, - "step": 3572 - }, - { - "epoch": 0.3365912248887214, - "grad_norm": 0.9027323126792908, - "learning_rate": 1.8891309066007087e-05, - "loss": 0.4125, - "step": 3573 - }, - { - "epoch": 0.33668542898188925, - "grad_norm": 0.8506341576576233, - "learning_rate": 1.889061790039099e-05, - "loss": 0.3719, - "step": 3574 - }, - { - "epoch": 0.3367796330750571, - "grad_norm": 0.8128954768180847, - "learning_rate": 1.8889926532055057e-05, - "loss": 0.3563, - "step": 3575 - }, - { - "epoch": 0.33687383716822494, - "grad_norm": 0.8806983828544617, - "learning_rate": 1.888923496101505e-05, - "loss": 0.3788, - "step": 3576 - }, - { - "epoch": 0.3369680412613928, - "grad_norm": 0.8672088980674744, - "learning_rate": 1.8888543187286738e-05, - "loss": 0.3162, - "step": 3577 - }, - { - "epoch": 0.33706224535456064, - "grad_norm": 0.8352708220481873, - "learning_rate": 1.8887851210885893e-05, - "loss": 0.3146, - "step": 3578 - }, - { - "epoch": 0.3371564494477285, - "grad_norm": 1.2389875650405884, - "learning_rate": 1.88871590318283e-05, - "loss": 0.3541, - "step": 3579 - }, - { - "epoch": 0.33725065354089634, - "grad_norm": 0.7673290371894836, - "learning_rate": 1.8886466650129734e-05, - "loss": 0.3417, - "step": 3580 - }, - { - "epoch": 0.3373448576340642, - "grad_norm": 0.8438095450401306, - "learning_rate": 1.8885774065805987e-05, - "loss": 0.413, - "step": 3581 - }, - { - "epoch": 0.33743906172723204, - "grad_norm": 1.0049430131912231, - "learning_rate": 1.8885081278872846e-05, - "loss": 0.4122, - "step": 3582 - }, - { - "epoch": 0.3375332658203999, - "grad_norm": 0.8492429256439209, - "learning_rate": 1.8884388289346115e-05, - "loss": 0.3387, - "step": 3583 - }, - { - "epoch": 0.33762746991356773, - "grad_norm": 1.0927600860595703, - "learning_rate": 1.8883695097241584e-05, - "loss": 0.4039, - "step": 3584 - }, - { - "epoch": 0.3377216740067356, - "grad_norm": 0.7235574126243591, - "learning_rate": 1.8883001702575074e-05, - "loss": 0.3715, - "step": 3585 - }, - { - "epoch": 0.33781587809990343, - "grad_norm": 1.0561778545379639, - "learning_rate": 1.8882308105362383e-05, - "loss": 0.3545, - "step": 3586 - }, - { - "epoch": 0.3379100821930713, - "grad_norm": 0.7849492430686951, - "learning_rate": 1.888161430561933e-05, - "loss": 0.3578, - "step": 3587 - }, - { - "epoch": 0.33800428628623913, - "grad_norm": 0.868797242641449, - "learning_rate": 1.888092030336174e-05, - "loss": 0.4318, - "step": 3588 - }, - { - "epoch": 0.338098490379407, - "grad_norm": 0.7665191292762756, - "learning_rate": 1.8880226098605427e-05, - "loss": 0.3273, - "step": 3589 - }, - { - "epoch": 0.33819269447257483, - "grad_norm": 0.8948681354522705, - "learning_rate": 1.8879531691366228e-05, - "loss": 0.4198, - "step": 3590 - }, - { - "epoch": 0.3382868985657427, - "grad_norm": 0.7987225651741028, - "learning_rate": 1.887883708165997e-05, - "loss": 0.3509, - "step": 3591 - }, - { - "epoch": 0.3383811026589105, - "grad_norm": 0.8362273573875427, - "learning_rate": 1.88781422695025e-05, - "loss": 0.3478, - "step": 3592 - }, - { - "epoch": 0.3384753067520784, - "grad_norm": 0.815270185470581, - "learning_rate": 1.8877447254909654e-05, - "loss": 0.3879, - "step": 3593 - }, - { - "epoch": 0.3385695108452462, - "grad_norm": 0.8665373921394348, - "learning_rate": 1.8876752037897285e-05, - "loss": 0.3813, - "step": 3594 - }, - { - "epoch": 0.3386637149384141, - "grad_norm": 0.766981303691864, - "learning_rate": 1.887605661848124e-05, - "loss": 0.3206, - "step": 3595 - }, - { - "epoch": 0.3387579190315819, - "grad_norm": 0.7618220448493958, - "learning_rate": 1.8875360996677373e-05, - "loss": 0.3532, - "step": 3596 - }, - { - "epoch": 0.33885212312474977, - "grad_norm": 0.9736241102218628, - "learning_rate": 1.8874665172501548e-05, - "loss": 0.3866, - "step": 3597 - }, - { - "epoch": 0.3389463272179176, - "grad_norm": 0.7357736825942993, - "learning_rate": 1.8873969145969636e-05, - "loss": 0.3321, - "step": 3598 - }, - { - "epoch": 0.33904053131108547, - "grad_norm": 1.4544962644577026, - "learning_rate": 1.8873272917097505e-05, - "loss": 0.3433, - "step": 3599 - }, - { - "epoch": 0.3391347354042533, - "grad_norm": 0.8322678804397583, - "learning_rate": 1.8872576485901027e-05, - "loss": 0.364, - "step": 3600 - }, - { - "epoch": 0.33922893949742117, - "grad_norm": 0.8751059770584106, - "learning_rate": 1.8871879852396083e-05, - "loss": 0.3336, - "step": 3601 - }, - { - "epoch": 0.339323143590589, - "grad_norm": 0.8897792100906372, - "learning_rate": 1.887118301659856e-05, - "loss": 0.377, - "step": 3602 - }, - { - "epoch": 0.33941734768375686, - "grad_norm": 0.8926268219947815, - "learning_rate": 1.887048597852434e-05, - "loss": 0.3551, - "step": 3603 - }, - { - "epoch": 0.3395115517769247, - "grad_norm": 0.9420211911201477, - "learning_rate": 1.8869788738189326e-05, - "loss": 0.3914, - "step": 3604 - }, - { - "epoch": 0.33960575587009256, - "grad_norm": 0.8444533348083496, - "learning_rate": 1.8869091295609407e-05, - "loss": 0.3364, - "step": 3605 - }, - { - "epoch": 0.3396999599632604, - "grad_norm": 0.7704145312309265, - "learning_rate": 1.886839365080049e-05, - "loss": 0.3386, - "step": 3606 - }, - { - "epoch": 0.33979416405642826, - "grad_norm": 0.803094208240509, - "learning_rate": 1.8867695803778487e-05, - "loss": 0.3435, - "step": 3607 - }, - { - "epoch": 0.3398883681495961, - "grad_norm": 0.761256754398346, - "learning_rate": 1.8866997754559304e-05, - "loss": 0.3099, - "step": 3608 - }, - { - "epoch": 0.33998257224276396, - "grad_norm": 0.8315357565879822, - "learning_rate": 1.8866299503158858e-05, - "loss": 0.3901, - "step": 3609 - }, - { - "epoch": 0.3400767763359318, - "grad_norm": 0.7468592524528503, - "learning_rate": 1.8865601049593075e-05, - "loss": 0.3118, - "step": 3610 - }, - { - "epoch": 0.34017098042909966, - "grad_norm": 0.807157039642334, - "learning_rate": 1.8864902393877874e-05, - "loss": 0.3802, - "step": 3611 - }, - { - "epoch": 0.3402651845222675, - "grad_norm": 0.9533775448799133, - "learning_rate": 1.8864203536029188e-05, - "loss": 0.4266, - "step": 3612 - }, - { - "epoch": 0.34035938861543535, - "grad_norm": 0.8372734785079956, - "learning_rate": 1.886350447606295e-05, - "loss": 0.3423, - "step": 3613 - }, - { - "epoch": 0.3404535927086032, - "grad_norm": 0.796859085559845, - "learning_rate": 1.8862805213995107e-05, - "loss": 0.3512, - "step": 3614 - }, - { - "epoch": 0.34054779680177105, - "grad_norm": 0.7418161630630493, - "learning_rate": 1.8862105749841596e-05, - "loss": 0.3033, - "step": 3615 - }, - { - "epoch": 0.3406420008949389, - "grad_norm": 0.7803031802177429, - "learning_rate": 1.8861406083618373e-05, - "loss": 0.3306, - "step": 3616 - }, - { - "epoch": 0.34073620498810675, - "grad_norm": 0.8984958529472351, - "learning_rate": 1.8860706215341383e-05, - "loss": 0.4, - "step": 3617 - }, - { - "epoch": 0.3408304090812746, - "grad_norm": 0.8020842671394348, - "learning_rate": 1.8860006145026585e-05, - "loss": 0.3364, - "step": 3618 - }, - { - "epoch": 0.34092461317444245, - "grad_norm": 0.8880037069320679, - "learning_rate": 1.885930587268995e-05, - "loss": 0.3755, - "step": 3619 - }, - { - "epoch": 0.3410188172676103, - "grad_norm": 0.8976454734802246, - "learning_rate": 1.8858605398347438e-05, - "loss": 0.4055, - "step": 3620 - }, - { - "epoch": 0.34111302136077815, - "grad_norm": 0.7939981818199158, - "learning_rate": 1.885790472201502e-05, - "loss": 0.3137, - "step": 3621 - }, - { - "epoch": 0.341207225453946, - "grad_norm": 0.8750141263008118, - "learning_rate": 1.885720384370868e-05, - "loss": 0.3721, - "step": 3622 - }, - { - "epoch": 0.34130142954711384, - "grad_norm": 0.8971021771430969, - "learning_rate": 1.8856502763444392e-05, - "loss": 0.3571, - "step": 3623 - }, - { - "epoch": 0.3413956336402817, - "grad_norm": 0.7954822182655334, - "learning_rate": 1.8855801481238146e-05, - "loss": 0.3529, - "step": 3624 - }, - { - "epoch": 0.34148983773344954, - "grad_norm": 0.813042402267456, - "learning_rate": 1.8855099997105927e-05, - "loss": 0.3477, - "step": 3625 - }, - { - "epoch": 0.3415840418266174, - "grad_norm": 0.8554645776748657, - "learning_rate": 1.8854398311063734e-05, - "loss": 0.3717, - "step": 3626 - }, - { - "epoch": 0.34167824591978524, - "grad_norm": 0.9292104244232178, - "learning_rate": 1.885369642312757e-05, - "loss": 0.4091, - "step": 3627 - }, - { - "epoch": 0.3417724500129531, - "grad_norm": 1.1380723714828491, - "learning_rate": 1.8852994333313432e-05, - "loss": 0.3633, - "step": 3628 - }, - { - "epoch": 0.34186665410612094, - "grad_norm": 0.9120110273361206, - "learning_rate": 1.885229204163733e-05, - "loss": 0.3871, - "step": 3629 - }, - { - "epoch": 0.3419608581992888, - "grad_norm": 0.8591219186782837, - "learning_rate": 1.8851589548115282e-05, - "loss": 0.4018, - "step": 3630 - }, - { - "epoch": 0.3420550622924566, - "grad_norm": 0.8268954157829285, - "learning_rate": 1.8850886852763302e-05, - "loss": 0.3343, - "step": 3631 - }, - { - "epoch": 0.34214926638562443, - "grad_norm": 0.9206968545913696, - "learning_rate": 1.8850183955597416e-05, - "loss": 0.3305, - "step": 3632 - }, - { - "epoch": 0.3422434704787923, - "grad_norm": 0.7509943246841431, - "learning_rate": 1.8849480856633647e-05, - "loss": 0.3255, - "step": 3633 - }, - { - "epoch": 0.3423376745719601, - "grad_norm": 0.7434682250022888, - "learning_rate": 1.8848777555888027e-05, - "loss": 0.311, - "step": 3634 - }, - { - "epoch": 0.342431878665128, - "grad_norm": 0.8783538341522217, - "learning_rate": 1.8848074053376597e-05, - "loss": 0.4488, - "step": 3635 - }, - { - "epoch": 0.3425260827582958, - "grad_norm": 1.094637393951416, - "learning_rate": 1.884737034911539e-05, - "loss": 0.375, - "step": 3636 - }, - { - "epoch": 0.3426202868514637, - "grad_norm": 0.7740301489830017, - "learning_rate": 1.884666644312046e-05, - "loss": 0.364, - "step": 3637 - }, - { - "epoch": 0.3427144909446315, - "grad_norm": 0.841786801815033, - "learning_rate": 1.8845962335407854e-05, - "loss": 0.3757, - "step": 3638 - }, - { - "epoch": 0.34280869503779937, - "grad_norm": 1.4861465692520142, - "learning_rate": 1.884525802599362e-05, - "loss": 0.364, - "step": 3639 - }, - { - "epoch": 0.3429028991309672, - "grad_norm": 0.7735800743103027, - "learning_rate": 1.884455351489383e-05, - "loss": 0.3298, - "step": 3640 - }, - { - "epoch": 0.34299710322413507, - "grad_norm": 0.87845379114151, - "learning_rate": 1.884384880212454e-05, - "loss": 0.3609, - "step": 3641 - }, - { - "epoch": 0.3430913073173029, - "grad_norm": 0.9437776207923889, - "learning_rate": 1.884314388770182e-05, - "loss": 0.3503, - "step": 3642 - }, - { - "epoch": 0.34318551141047077, - "grad_norm": 0.8418173789978027, - "learning_rate": 1.8842438771641745e-05, - "loss": 0.3316, - "step": 3643 - }, - { - "epoch": 0.3432797155036386, - "grad_norm": 0.8820387721061707, - "learning_rate": 1.884173345396039e-05, - "loss": 0.3608, - "step": 3644 - }, - { - "epoch": 0.34337391959680647, - "grad_norm": 0.8833423852920532, - "learning_rate": 1.8841027934673835e-05, - "loss": 0.3796, - "step": 3645 - }, - { - "epoch": 0.3434681236899743, - "grad_norm": 0.8288242816925049, - "learning_rate": 1.8840322213798173e-05, - "loss": 0.3669, - "step": 3646 - }, - { - "epoch": 0.34356232778314216, - "grad_norm": 0.7158920168876648, - "learning_rate": 1.8839616291349493e-05, - "loss": 0.2771, - "step": 3647 - }, - { - "epoch": 0.34365653187631, - "grad_norm": 0.9035677909851074, - "learning_rate": 1.883891016734389e-05, - "loss": 0.3581, - "step": 3648 - }, - { - "epoch": 0.34375073596947786, - "grad_norm": 0.7641481161117554, - "learning_rate": 1.8838203841797464e-05, - "loss": 0.3837, - "step": 3649 - }, - { - "epoch": 0.3438449400626457, - "grad_norm": 0.8371859192848206, - "learning_rate": 1.8837497314726322e-05, - "loss": 0.3365, - "step": 3650 - }, - { - "epoch": 0.34393914415581356, - "grad_norm": 2.0889203548431396, - "learning_rate": 1.8836790586146578e-05, - "loss": 0.3551, - "step": 3651 - }, - { - "epoch": 0.3440333482489814, - "grad_norm": 0.684039294719696, - "learning_rate": 1.883608365607434e-05, - "loss": 0.3247, - "step": 3652 - }, - { - "epoch": 0.34412755234214926, - "grad_norm": 0.8166343569755554, - "learning_rate": 1.8835376524525725e-05, - "loss": 0.3166, - "step": 3653 - }, - { - "epoch": 0.3442217564353171, - "grad_norm": 0.8467241525650024, - "learning_rate": 1.8834669191516866e-05, - "loss": 0.3941, - "step": 3654 - }, - { - "epoch": 0.34431596052848495, - "grad_norm": 0.8928960561752319, - "learning_rate": 1.8833961657063887e-05, - "loss": 0.3683, - "step": 3655 - }, - { - "epoch": 0.3444101646216528, - "grad_norm": 0.9479843974113464, - "learning_rate": 1.8833253921182916e-05, - "loss": 0.3575, - "step": 3656 - }, - { - "epoch": 0.34450436871482065, - "grad_norm": 0.9287818670272827, - "learning_rate": 1.88325459838901e-05, - "loss": 0.3589, - "step": 3657 - }, - { - "epoch": 0.3445985728079885, - "grad_norm": 0.7292333841323853, - "learning_rate": 1.8831837845201573e-05, - "loss": 0.3028, - "step": 3658 - }, - { - "epoch": 0.34469277690115635, - "grad_norm": 0.7780672907829285, - "learning_rate": 1.8831129505133485e-05, - "loss": 0.3375, - "step": 3659 - }, - { - "epoch": 0.3447869809943242, - "grad_norm": 0.9152745604515076, - "learning_rate": 1.8830420963701985e-05, - "loss": 0.3537, - "step": 3660 - }, - { - "epoch": 0.34488118508749205, - "grad_norm": 0.7500927448272705, - "learning_rate": 1.8829712220923228e-05, - "loss": 0.3127, - "step": 3661 - }, - { - "epoch": 0.3449753891806599, - "grad_norm": 0.9070961475372314, - "learning_rate": 1.8829003276813383e-05, - "loss": 0.3357, - "step": 3662 - }, - { - "epoch": 0.34506959327382775, - "grad_norm": 0.9231308698654175, - "learning_rate": 1.8828294131388607e-05, - "loss": 0.3461, - "step": 3663 - }, - { - "epoch": 0.3451637973669956, - "grad_norm": 0.7690940499305725, - "learning_rate": 1.882758478466507e-05, - "loss": 0.3342, - "step": 3664 - }, - { - "epoch": 0.34525800146016344, - "grad_norm": 0.902086079120636, - "learning_rate": 1.882687523665895e-05, - "loss": 0.3846, - "step": 3665 - }, - { - "epoch": 0.3453522055533313, - "grad_norm": 0.8080546259880066, - "learning_rate": 1.882616548738642e-05, - "loss": 0.3162, - "step": 3666 - }, - { - "epoch": 0.34544640964649914, - "grad_norm": 0.7301914095878601, - "learning_rate": 1.882545553686367e-05, - "loss": 0.3039, - "step": 3667 - }, - { - "epoch": 0.345540613739667, - "grad_norm": 0.9094197750091553, - "learning_rate": 1.8824745385106883e-05, - "loss": 0.3576, - "step": 3668 - }, - { - "epoch": 0.34563481783283484, - "grad_norm": 0.9327391982078552, - "learning_rate": 1.8824035032132256e-05, - "loss": 0.3676, - "step": 3669 - }, - { - "epoch": 0.3457290219260027, - "grad_norm": 0.9570345282554626, - "learning_rate": 1.8823324477955985e-05, - "loss": 0.4053, - "step": 3670 - }, - { - "epoch": 0.34582322601917054, - "grad_norm": 0.8123584985733032, - "learning_rate": 1.8822613722594264e-05, - "loss": 0.3242, - "step": 3671 - }, - { - "epoch": 0.3459174301123384, - "grad_norm": 0.8803601264953613, - "learning_rate": 1.8821902766063312e-05, - "loss": 0.3964, - "step": 3672 - }, - { - "epoch": 0.34601163420550624, - "grad_norm": 0.8959583044052124, - "learning_rate": 1.882119160837933e-05, - "loss": 0.386, - "step": 3673 - }, - { - "epoch": 0.3461058382986741, - "grad_norm": 0.827250063419342, - "learning_rate": 1.8820480249558538e-05, - "loss": 0.3226, - "step": 3674 - }, - { - "epoch": 0.34620004239184193, - "grad_norm": 0.7794238924980164, - "learning_rate": 1.8819768689617154e-05, - "loss": 0.3718, - "step": 3675 - }, - { - "epoch": 0.3462942464850098, - "grad_norm": 1.088750958442688, - "learning_rate": 1.8819056928571405e-05, - "loss": 0.4164, - "step": 3676 - }, - { - "epoch": 0.34638845057817763, - "grad_norm": 0.7334954738616943, - "learning_rate": 1.881834496643752e-05, - "loss": 0.315, - "step": 3677 - }, - { - "epoch": 0.3464826546713455, - "grad_norm": 0.9737700819969177, - "learning_rate": 1.881763280323173e-05, - "loss": 0.3383, - "step": 3678 - }, - { - "epoch": 0.34657685876451333, - "grad_norm": 0.845251202583313, - "learning_rate": 1.881692043897028e-05, - "loss": 0.3463, - "step": 3679 - }, - { - "epoch": 0.3466710628576812, - "grad_norm": 0.7490604519844055, - "learning_rate": 1.8816207873669406e-05, - "loss": 0.3522, - "step": 3680 - }, - { - "epoch": 0.346765266950849, - "grad_norm": 0.9043464064598083, - "learning_rate": 1.8815495107345355e-05, - "loss": 0.3892, - "step": 3681 - }, - { - "epoch": 0.3468594710440169, - "grad_norm": 0.8788156509399414, - "learning_rate": 1.8814782140014386e-05, - "loss": 0.3462, - "step": 3682 - }, - { - "epoch": 0.3469536751371847, - "grad_norm": 1.0707277059555054, - "learning_rate": 1.8814068971692747e-05, - "loss": 0.3827, - "step": 3683 - }, - { - "epoch": 0.3470478792303526, - "grad_norm": 0.8980178236961365, - "learning_rate": 1.881335560239671e-05, - "loss": 0.3636, - "step": 3684 - }, - { - "epoch": 0.3471420833235204, - "grad_norm": 0.8110436201095581, - "learning_rate": 1.881264203214253e-05, - "loss": 0.3325, - "step": 3685 - }, - { - "epoch": 0.34723628741668827, - "grad_norm": 0.871622622013092, - "learning_rate": 1.8811928260946482e-05, - "loss": 0.3942, - "step": 3686 - }, - { - "epoch": 0.3473304915098561, - "grad_norm": 0.8642867803573608, - "learning_rate": 1.8811214288824847e-05, - "loss": 0.3338, - "step": 3687 - }, - { - "epoch": 0.34742469560302397, - "grad_norm": 0.8314307928085327, - "learning_rate": 1.8810500115793895e-05, - "loss": 0.2986, - "step": 3688 - }, - { - "epoch": 0.3475188996961918, - "grad_norm": 0.7452064156532288, - "learning_rate": 1.880978574186992e-05, - "loss": 0.3237, - "step": 3689 - }, - { - "epoch": 0.34761310378935967, - "grad_norm": 0.8303627371788025, - "learning_rate": 1.8809071167069202e-05, - "loss": 0.3095, - "step": 3690 - }, - { - "epoch": 0.3477073078825275, - "grad_norm": 0.8369124531745911, - "learning_rate": 1.8808356391408037e-05, - "loss": 0.345, - "step": 3691 - }, - { - "epoch": 0.34780151197569537, - "grad_norm": 0.7832201719284058, - "learning_rate": 1.8807641414902726e-05, - "loss": 0.3605, - "step": 3692 - }, - { - "epoch": 0.3478957160688632, - "grad_norm": 0.7621707320213318, - "learning_rate": 1.8806926237569566e-05, - "loss": 0.3313, - "step": 3693 - }, - { - "epoch": 0.34798992016203106, - "grad_norm": 0.8039724230766296, - "learning_rate": 1.880621085942487e-05, - "loss": 0.3815, - "step": 3694 - }, - { - "epoch": 0.3480841242551989, - "grad_norm": 0.9030138850212097, - "learning_rate": 1.880549528048495e-05, - "loss": 0.3179, - "step": 3695 - }, - { - "epoch": 0.34817832834836676, - "grad_norm": 0.7929380536079407, - "learning_rate": 1.8804779500766118e-05, - "loss": 0.3375, - "step": 3696 - }, - { - "epoch": 0.3482725324415346, - "grad_norm": 0.8301117420196533, - "learning_rate": 1.8804063520284693e-05, - "loss": 0.3539, - "step": 3697 - }, - { - "epoch": 0.34836673653470246, - "grad_norm": 0.8487005233764648, - "learning_rate": 1.8803347339057008e-05, - "loss": 0.3629, - "step": 3698 - }, - { - "epoch": 0.3484609406278703, - "grad_norm": 0.8481366634368896, - "learning_rate": 1.8802630957099387e-05, - "loss": 0.3991, - "step": 3699 - }, - { - "epoch": 0.3485551447210381, - "grad_norm": 0.8308368921279907, - "learning_rate": 1.880191437442817e-05, - "loss": 0.3254, - "step": 3700 - }, - { - "epoch": 0.34864934881420595, - "grad_norm": 1.0492184162139893, - "learning_rate": 1.8801197591059686e-05, - "loss": 0.3924, - "step": 3701 - }, - { - "epoch": 0.3487435529073738, - "grad_norm": 0.9140027761459351, - "learning_rate": 1.880048060701029e-05, - "loss": 0.3779, - "step": 3702 - }, - { - "epoch": 0.34883775700054165, - "grad_norm": 0.9277212023735046, - "learning_rate": 1.8799763422296327e-05, - "loss": 0.4054, - "step": 3703 - }, - { - "epoch": 0.3489319610937095, - "grad_norm": 0.7970870137214661, - "learning_rate": 1.8799046036934147e-05, - "loss": 0.3884, - "step": 3704 - }, - { - "epoch": 0.34902616518687735, - "grad_norm": 0.8798114657402039, - "learning_rate": 1.879832845094011e-05, - "loss": 0.3645, - "step": 3705 - }, - { - "epoch": 0.3491203692800452, - "grad_norm": 0.7972387075424194, - "learning_rate": 1.8797610664330578e-05, - "loss": 0.3876, - "step": 3706 - }, - { - "epoch": 0.34921457337321304, - "grad_norm": 0.8588185906410217, - "learning_rate": 1.8796892677121917e-05, - "loss": 0.3539, - "step": 3707 - }, - { - "epoch": 0.3493087774663809, - "grad_norm": 0.8626374006271362, - "learning_rate": 1.8796174489330497e-05, - "loss": 0.4001, - "step": 3708 - }, - { - "epoch": 0.34940298155954874, - "grad_norm": 0.8604520559310913, - "learning_rate": 1.8795456100972697e-05, - "loss": 0.3593, - "step": 3709 - }, - { - "epoch": 0.3494971856527166, - "grad_norm": 0.8322687745094299, - "learning_rate": 1.879473751206489e-05, - "loss": 0.3562, - "step": 3710 - }, - { - "epoch": 0.34959138974588444, - "grad_norm": 0.817677915096283, - "learning_rate": 1.879401872262347e-05, - "loss": 0.3697, - "step": 3711 - }, - { - "epoch": 0.3496855938390523, - "grad_norm": 0.7093011736869812, - "learning_rate": 1.8793299732664826e-05, - "loss": 0.3279, - "step": 3712 - }, - { - "epoch": 0.34977979793222014, - "grad_norm": 0.9430680274963379, - "learning_rate": 1.8792580542205348e-05, - "loss": 0.3841, - "step": 3713 - }, - { - "epoch": 0.349874002025388, - "grad_norm": 0.8343164920806885, - "learning_rate": 1.879186115126143e-05, - "loss": 0.3902, - "step": 3714 - }, - { - "epoch": 0.34996820611855584, - "grad_norm": 0.821296751499176, - "learning_rate": 1.879114155984949e-05, - "loss": 0.4154, - "step": 3715 - }, - { - "epoch": 0.3500624102117237, - "grad_norm": 0.7688080668449402, - "learning_rate": 1.879042176798592e-05, - "loss": 0.3671, - "step": 3716 - }, - { - "epoch": 0.35015661430489153, - "grad_norm": 0.6696940064430237, - "learning_rate": 1.8789701775687142e-05, - "loss": 0.3031, - "step": 3717 - }, - { - "epoch": 0.3502508183980594, - "grad_norm": 0.8230246305465698, - "learning_rate": 1.878898158296957e-05, - "loss": 0.3715, - "step": 3718 - }, - { - "epoch": 0.35034502249122723, - "grad_norm": 0.8132197260856628, - "learning_rate": 1.8788261189849622e-05, - "loss": 0.3505, - "step": 3719 - }, - { - "epoch": 0.3504392265843951, - "grad_norm": 0.8106905221939087, - "learning_rate": 1.878754059634373e-05, - "loss": 0.347, - "step": 3720 - }, - { - "epoch": 0.35053343067756293, - "grad_norm": 0.9174231886863708, - "learning_rate": 1.878681980246832e-05, - "loss": 0.371, - "step": 3721 - }, - { - "epoch": 0.3506276347707308, - "grad_norm": 0.9030629396438599, - "learning_rate": 1.8786098808239832e-05, - "loss": 0.3696, - "step": 3722 - }, - { - "epoch": 0.3507218388638986, - "grad_norm": 0.9923394918441772, - "learning_rate": 1.8785377613674705e-05, - "loss": 0.4036, - "step": 3723 - }, - { - "epoch": 0.3508160429570665, - "grad_norm": 1.898908257484436, - "learning_rate": 1.878465621878938e-05, - "loss": 0.3972, - "step": 3724 - }, - { - "epoch": 0.3509102470502343, - "grad_norm": 1.493820309638977, - "learning_rate": 1.8783934623600307e-05, - "loss": 0.3234, - "step": 3725 - }, - { - "epoch": 0.3510044511434022, - "grad_norm": 0.7513868808746338, - "learning_rate": 1.878321282812394e-05, - "loss": 0.347, - "step": 3726 - }, - { - "epoch": 0.35109865523657, - "grad_norm": 0.8554733395576477, - "learning_rate": 1.8782490832376737e-05, - "loss": 0.3507, - "step": 3727 - }, - { - "epoch": 0.35119285932973787, - "grad_norm": 0.8039769530296326, - "learning_rate": 1.8781768636375158e-05, - "loss": 0.3337, - "step": 3728 - }, - { - "epoch": 0.3512870634229057, - "grad_norm": 0.7684638500213623, - "learning_rate": 1.8781046240135678e-05, - "loss": 0.3307, - "step": 3729 - }, - { - "epoch": 0.35138126751607357, - "grad_norm": 0.8677926659584045, - "learning_rate": 1.878032364367476e-05, - "loss": 0.3922, - "step": 3730 - }, - { - "epoch": 0.3514754716092414, - "grad_norm": 0.8814738392829895, - "learning_rate": 1.8779600847008884e-05, - "loss": 0.3616, - "step": 3731 - }, - { - "epoch": 0.35156967570240927, - "grad_norm": 0.7777526378631592, - "learning_rate": 1.8778877850154532e-05, - "loss": 0.3802, - "step": 3732 - }, - { - "epoch": 0.3516638797955771, - "grad_norm": 0.8300305008888245, - "learning_rate": 1.8778154653128186e-05, - "loss": 0.3725, - "step": 3733 - }, - { - "epoch": 0.35175808388874497, - "grad_norm": 0.8085060715675354, - "learning_rate": 1.8777431255946342e-05, - "loss": 0.3436, - "step": 3734 - }, - { - "epoch": 0.3518522879819128, - "grad_norm": 0.8895578980445862, - "learning_rate": 1.877670765862549e-05, - "loss": 0.354, - "step": 3735 - }, - { - "epoch": 0.35194649207508066, - "grad_norm": 0.7110785245895386, - "learning_rate": 1.8775983861182126e-05, - "loss": 0.318, - "step": 3736 - }, - { - "epoch": 0.3520406961682485, - "grad_norm": 0.8436791896820068, - "learning_rate": 1.877525986363276e-05, - "loss": 0.3857, - "step": 3737 - }, - { - "epoch": 0.35213490026141636, - "grad_norm": 0.7469220757484436, - "learning_rate": 1.87745356659939e-05, - "loss": 0.374, - "step": 3738 - }, - { - "epoch": 0.3522291043545842, - "grad_norm": 0.8427312970161438, - "learning_rate": 1.8773811268282053e-05, - "loss": 0.3268, - "step": 3739 - }, - { - "epoch": 0.35232330844775206, - "grad_norm": 0.7238284945487976, - "learning_rate": 1.877308667051374e-05, - "loss": 0.296, - "step": 3740 - }, - { - "epoch": 0.3524175125409199, - "grad_norm": 0.8895593285560608, - "learning_rate": 1.8772361872705486e-05, - "loss": 0.3594, - "step": 3741 - }, - { - "epoch": 0.35251171663408776, - "grad_norm": 0.8994559645652771, - "learning_rate": 1.8771636874873812e-05, - "loss": 0.3459, - "step": 3742 - }, - { - "epoch": 0.3526059207272556, - "grad_norm": 0.9097210764884949, - "learning_rate": 1.8770911677035254e-05, - "loss": 0.4051, - "step": 3743 - }, - { - "epoch": 0.35270012482042346, - "grad_norm": 0.7662896513938904, - "learning_rate": 1.877018627920635e-05, - "loss": 0.3662, - "step": 3744 - }, - { - "epoch": 0.3527943289135913, - "grad_norm": 0.916433572769165, - "learning_rate": 1.8769460681403628e-05, - "loss": 0.3969, - "step": 3745 - }, - { - "epoch": 0.35288853300675915, - "grad_norm": 0.8712926506996155, - "learning_rate": 1.876873488364364e-05, - "loss": 0.3913, - "step": 3746 - }, - { - "epoch": 0.352982737099927, - "grad_norm": 0.9781820774078369, - "learning_rate": 1.876800888594294e-05, - "loss": 0.3743, - "step": 3747 - }, - { - "epoch": 0.35307694119309485, - "grad_norm": 0.7476435303688049, - "learning_rate": 1.8767282688318075e-05, - "loss": 0.3283, - "step": 3748 - }, - { - "epoch": 0.3531711452862627, - "grad_norm": 0.7212189435958862, - "learning_rate": 1.8766556290785605e-05, - "loss": 0.32, - "step": 3749 - }, - { - "epoch": 0.35326534937943055, - "grad_norm": 0.9467104077339172, - "learning_rate": 1.8765829693362097e-05, - "loss": 0.3758, - "step": 3750 - }, - { - "epoch": 0.3533595534725984, - "grad_norm": 0.8207167983055115, - "learning_rate": 1.876510289606411e-05, - "loss": 0.3419, - "step": 3751 - }, - { - "epoch": 0.35345375756576625, - "grad_norm": 0.8008447289466858, - "learning_rate": 1.8764375898908228e-05, - "loss": 0.3645, - "step": 3752 - }, - { - "epoch": 0.3535479616589341, - "grad_norm": 0.7681457996368408, - "learning_rate": 1.876364870191102e-05, - "loss": 0.3415, - "step": 3753 - }, - { - "epoch": 0.35364216575210194, - "grad_norm": 0.8857048153877258, - "learning_rate": 1.8762921305089064e-05, - "loss": 0.3442, - "step": 3754 - }, - { - "epoch": 0.3537363698452698, - "grad_norm": 0.8540251851081848, - "learning_rate": 1.8762193708458952e-05, - "loss": 0.3576, - "step": 3755 - }, - { - "epoch": 0.35383057393843764, - "grad_norm": 0.9321168065071106, - "learning_rate": 1.8761465912037272e-05, - "loss": 0.4115, - "step": 3756 - }, - { - "epoch": 0.3539247780316055, - "grad_norm": 0.9076574444770813, - "learning_rate": 1.876073791584062e-05, - "loss": 0.3748, - "step": 3757 - }, - { - "epoch": 0.35401898212477334, - "grad_norm": 0.8139601945877075, - "learning_rate": 1.8760009719885595e-05, - "loss": 0.3246, - "step": 3758 - }, - { - "epoch": 0.3541131862179412, - "grad_norm": 0.9335690140724182, - "learning_rate": 1.87592813241888e-05, - "loss": 0.3616, - "step": 3759 - }, - { - "epoch": 0.35420739031110904, - "grad_norm": 0.7616863250732422, - "learning_rate": 1.8758552728766845e-05, - "loss": 0.3594, - "step": 3760 - }, - { - "epoch": 0.3543015944042769, - "grad_norm": 0.7629071474075317, - "learning_rate": 1.875782393363634e-05, - "loss": 0.3384, - "step": 3761 - }, - { - "epoch": 0.35439579849744474, - "grad_norm": 0.912653386592865, - "learning_rate": 1.875709493881391e-05, - "loss": 0.3394, - "step": 3762 - }, - { - "epoch": 0.3544900025906126, - "grad_norm": 0.9390417337417603, - "learning_rate": 1.8756365744316172e-05, - "loss": 0.3263, - "step": 3763 - }, - { - "epoch": 0.35458420668378043, - "grad_norm": 0.8299874663352966, - "learning_rate": 1.875563635015975e-05, - "loss": 0.338, - "step": 3764 - }, - { - "epoch": 0.3546784107769483, - "grad_norm": 0.865662157535553, - "learning_rate": 1.875490675636128e-05, - "loss": 0.395, - "step": 3765 - }, - { - "epoch": 0.35477261487011613, - "grad_norm": 0.7513821125030518, - "learning_rate": 1.8754176962937397e-05, - "loss": 0.3485, - "step": 3766 - }, - { - "epoch": 0.354866818963284, - "grad_norm": 0.8353309035301208, - "learning_rate": 1.8753446969904737e-05, - "loss": 0.3605, - "step": 3767 - }, - { - "epoch": 0.35496102305645183, - "grad_norm": 0.8011833429336548, - "learning_rate": 1.8752716777279955e-05, - "loss": 0.357, - "step": 3768 - }, - { - "epoch": 0.3550552271496196, - "grad_norm": 0.7678676247596741, - "learning_rate": 1.8751986385079685e-05, - "loss": 0.3138, - "step": 3769 - }, - { - "epoch": 0.35514943124278747, - "grad_norm": 0.75164395570755, - "learning_rate": 1.8751255793320597e-05, - "loss": 0.3262, - "step": 3770 - }, - { - "epoch": 0.3552436353359553, - "grad_norm": 0.7251051068305969, - "learning_rate": 1.875052500201934e-05, - "loss": 0.3536, - "step": 3771 - }, - { - "epoch": 0.35533783942912317, - "grad_norm": 1.1679542064666748, - "learning_rate": 1.874979401119258e-05, - "loss": 0.3832, - "step": 3772 - }, - { - "epoch": 0.355432043522291, - "grad_norm": 0.910009503364563, - "learning_rate": 1.8749062820856986e-05, - "loss": 0.3567, - "step": 3773 - }, - { - "epoch": 0.35552624761545887, - "grad_norm": 0.8140091300010681, - "learning_rate": 1.8748331431029232e-05, - "loss": 0.3485, - "step": 3774 - }, - { - "epoch": 0.3556204517086267, - "grad_norm": 0.8223723769187927, - "learning_rate": 1.8747599841725988e-05, - "loss": 0.3367, - "step": 3775 - }, - { - "epoch": 0.35571465580179457, - "grad_norm": 0.8308326005935669, - "learning_rate": 1.8746868052963938e-05, - "loss": 0.3205, - "step": 3776 - }, - { - "epoch": 0.3558088598949624, - "grad_norm": 0.8465903401374817, - "learning_rate": 1.8746136064759774e-05, - "loss": 0.3395, - "step": 3777 - }, - { - "epoch": 0.35590306398813026, - "grad_norm": 0.7565946578979492, - "learning_rate": 1.8745403877130176e-05, - "loss": 0.2984, - "step": 3778 - }, - { - "epoch": 0.3559972680812981, - "grad_norm": 0.9386110901832581, - "learning_rate": 1.8744671490091848e-05, - "loss": 0.4277, - "step": 3779 - }, - { - "epoch": 0.35609147217446596, - "grad_norm": 0.7682352662086487, - "learning_rate": 1.8743938903661487e-05, - "loss": 0.3544, - "step": 3780 - }, - { - "epoch": 0.3561856762676338, - "grad_norm": 0.8476884961128235, - "learning_rate": 1.8743206117855792e-05, - "loss": 0.3586, - "step": 3781 - }, - { - "epoch": 0.35627988036080166, - "grad_norm": 0.8152371048927307, - "learning_rate": 1.874247313269148e-05, - "loss": 0.3158, - "step": 3782 - }, - { - "epoch": 0.3563740844539695, - "grad_norm": 0.6988675594329834, - "learning_rate": 1.8741739948185256e-05, - "loss": 0.3067, - "step": 3783 - }, - { - "epoch": 0.35646828854713736, - "grad_norm": 0.844517171382904, - "learning_rate": 1.8741006564353847e-05, - "loss": 0.3534, - "step": 3784 - }, - { - "epoch": 0.3565624926403052, - "grad_norm": 0.768746554851532, - "learning_rate": 1.8740272981213965e-05, - "loss": 0.333, - "step": 3785 - }, - { - "epoch": 0.35665669673347306, - "grad_norm": 0.9396389722824097, - "learning_rate": 1.8739539198782345e-05, - "loss": 0.362, - "step": 3786 - }, - { - "epoch": 0.3567509008266409, - "grad_norm": 0.8951597809791565, - "learning_rate": 1.8738805217075714e-05, - "loss": 0.3854, - "step": 3787 - }, - { - "epoch": 0.35684510491980875, - "grad_norm": 0.8619693517684937, - "learning_rate": 1.873807103611081e-05, - "loss": 0.397, - "step": 3788 - }, - { - "epoch": 0.3569393090129766, - "grad_norm": 0.8142619132995605, - "learning_rate": 1.8737336655904374e-05, - "loss": 0.3517, - "step": 3789 - }, - { - "epoch": 0.35703351310614445, - "grad_norm": 0.7423619627952576, - "learning_rate": 1.8736602076473148e-05, - "loss": 0.334, - "step": 3790 - }, - { - "epoch": 0.3571277171993123, - "grad_norm": 0.8979628682136536, - "learning_rate": 1.8735867297833882e-05, - "loss": 0.354, - "step": 3791 - }, - { - "epoch": 0.35722192129248015, - "grad_norm": 0.9283548593521118, - "learning_rate": 1.8735132320003332e-05, - "loss": 0.4025, - "step": 3792 - }, - { - "epoch": 0.357316125385648, - "grad_norm": 0.7603616714477539, - "learning_rate": 1.8734397142998257e-05, - "loss": 0.3707, - "step": 3793 - }, - { - "epoch": 0.35741032947881585, - "grad_norm": 0.8556933403015137, - "learning_rate": 1.8733661766835417e-05, - "loss": 0.3974, - "step": 3794 - }, - { - "epoch": 0.3575045335719837, - "grad_norm": 0.872689962387085, - "learning_rate": 1.8732926191531584e-05, - "loss": 0.3476, - "step": 3795 - }, - { - "epoch": 0.35759873766515154, - "grad_norm": 0.827884316444397, - "learning_rate": 1.873219041710353e-05, - "loss": 0.3828, - "step": 3796 - }, - { - "epoch": 0.3576929417583194, - "grad_norm": 0.771144449710846, - "learning_rate": 1.8731454443568026e-05, - "loss": 0.3358, - "step": 3797 - }, - { - "epoch": 0.35778714585148724, - "grad_norm": 0.8942975401878357, - "learning_rate": 1.873071827094186e-05, - "loss": 0.357, - "step": 3798 - }, - { - "epoch": 0.3578813499446551, - "grad_norm": 0.8680719137191772, - "learning_rate": 1.8729981899241815e-05, - "loss": 0.3558, - "step": 3799 - }, - { - "epoch": 0.35797555403782294, - "grad_norm": 0.820220947265625, - "learning_rate": 1.872924532848468e-05, - "loss": 0.3441, - "step": 3800 - }, - { - "epoch": 0.3580697581309908, - "grad_norm": 0.8636904358863831, - "learning_rate": 1.872850855868725e-05, - "loss": 0.3445, - "step": 3801 - }, - { - "epoch": 0.35816396222415864, - "grad_norm": 0.816992998123169, - "learning_rate": 1.872777158986633e-05, - "loss": 0.3766, - "step": 3802 - }, - { - "epoch": 0.3582581663173265, - "grad_norm": 0.9569509029388428, - "learning_rate": 1.8727034422038718e-05, - "loss": 0.3857, - "step": 3803 - }, - { - "epoch": 0.35835237041049434, - "grad_norm": 0.9457529187202454, - "learning_rate": 1.8726297055221224e-05, - "loss": 0.37, - "step": 3804 - }, - { - "epoch": 0.3584465745036622, - "grad_norm": 0.7957568168640137, - "learning_rate": 1.8725559489430664e-05, - "loss": 0.3436, - "step": 3805 - }, - { - "epoch": 0.35854077859683003, - "grad_norm": 0.8334015011787415, - "learning_rate": 1.8724821724683847e-05, - "loss": 0.3364, - "step": 3806 - }, - { - "epoch": 0.3586349826899979, - "grad_norm": 0.8348851799964905, - "learning_rate": 1.8724083760997608e-05, - "loss": 0.319, - "step": 3807 - }, - { - "epoch": 0.35872918678316573, - "grad_norm": 0.840887725353241, - "learning_rate": 1.8723345598388764e-05, - "loss": 0.372, - "step": 3808 - }, - { - "epoch": 0.3588233908763336, - "grad_norm": 0.9339392185211182, - "learning_rate": 1.8722607236874155e-05, - "loss": 0.3722, - "step": 3809 - }, - { - "epoch": 0.35891759496950143, - "grad_norm": 0.7736247181892395, - "learning_rate": 1.8721868676470602e-05, - "loss": 0.3575, - "step": 3810 - }, - { - "epoch": 0.3590117990626693, - "grad_norm": 0.8696429133415222, - "learning_rate": 1.8721129917194962e-05, - "loss": 0.3061, - "step": 3811 - }, - { - "epoch": 0.35910600315583713, - "grad_norm": 0.7258344888687134, - "learning_rate": 1.872039095906407e-05, - "loss": 0.3227, - "step": 3812 - }, - { - "epoch": 0.359200207249005, - "grad_norm": 0.7984716296195984, - "learning_rate": 1.8719651802094775e-05, - "loss": 0.3121, - "step": 3813 - }, - { - "epoch": 0.3592944113421728, - "grad_norm": 0.8016639351844788, - "learning_rate": 1.8718912446303938e-05, - "loss": 0.3661, - "step": 3814 - }, - { - "epoch": 0.3593886154353407, - "grad_norm": 0.858106255531311, - "learning_rate": 1.8718172891708412e-05, - "loss": 0.3631, - "step": 3815 - }, - { - "epoch": 0.3594828195285085, - "grad_norm": 0.7816081047058105, - "learning_rate": 1.871743313832506e-05, - "loss": 0.354, - "step": 3816 - }, - { - "epoch": 0.3595770236216764, - "grad_norm": 0.8900384306907654, - "learning_rate": 1.8716693186170748e-05, - "loss": 0.377, - "step": 3817 - }, - { - "epoch": 0.3596712277148442, - "grad_norm": 0.8501194715499878, - "learning_rate": 1.8715953035262354e-05, - "loss": 0.3507, - "step": 3818 - }, - { - "epoch": 0.35976543180801207, - "grad_norm": 0.9237880706787109, - "learning_rate": 1.871521268561675e-05, - "loss": 0.3818, - "step": 3819 - }, - { - "epoch": 0.3598596359011799, - "grad_norm": 0.8632811307907104, - "learning_rate": 1.8714472137250822e-05, - "loss": 0.3362, - "step": 3820 - }, - { - "epoch": 0.35995383999434777, - "grad_norm": 0.8760498762130737, - "learning_rate": 1.871373139018145e-05, - "loss": 0.3772, - "step": 3821 - }, - { - "epoch": 0.3600480440875156, - "grad_norm": 0.7853875160217285, - "learning_rate": 1.8712990444425527e-05, - "loss": 0.3438, - "step": 3822 - }, - { - "epoch": 0.36014224818068347, - "grad_norm": 0.9706776142120361, - "learning_rate": 1.8712249299999948e-05, - "loss": 0.3318, - "step": 3823 - }, - { - "epoch": 0.3602364522738513, - "grad_norm": 1.0395607948303223, - "learning_rate": 1.871150795692161e-05, - "loss": 0.3257, - "step": 3824 - }, - { - "epoch": 0.36033065636701916, - "grad_norm": 0.817263662815094, - "learning_rate": 1.8710766415207417e-05, - "loss": 0.3363, - "step": 3825 - }, - { - "epoch": 0.360424860460187, - "grad_norm": 0.7719822525978088, - "learning_rate": 1.8710024674874278e-05, - "loss": 0.3349, - "step": 3826 - }, - { - "epoch": 0.36051906455335486, - "grad_norm": 0.7931634187698364, - "learning_rate": 1.8709282735939106e-05, - "loss": 0.3832, - "step": 3827 - }, - { - "epoch": 0.3606132686465227, - "grad_norm": 0.7968805432319641, - "learning_rate": 1.8708540598418818e-05, - "loss": 0.3211, - "step": 3828 - }, - { - "epoch": 0.36070747273969056, - "grad_norm": 0.9289378523826599, - "learning_rate": 1.8707798262330337e-05, - "loss": 0.4267, - "step": 3829 - }, - { - "epoch": 0.3608016768328584, - "grad_norm": 0.7175437808036804, - "learning_rate": 1.8707055727690592e-05, - "loss": 0.2979, - "step": 3830 - }, - { - "epoch": 0.36089588092602626, - "grad_norm": 0.7979193329811096, - "learning_rate": 1.8706312994516508e-05, - "loss": 0.3269, - "step": 3831 - }, - { - "epoch": 0.3609900850191941, - "grad_norm": 0.7682902812957764, - "learning_rate": 1.870557006282502e-05, - "loss": 0.3194, - "step": 3832 - }, - { - "epoch": 0.36108428911236196, - "grad_norm": 0.7294326424598694, - "learning_rate": 1.8704826932633072e-05, - "loss": 0.3405, - "step": 3833 - }, - { - "epoch": 0.3611784932055298, - "grad_norm": 0.8687989711761475, - "learning_rate": 1.870408360395761e-05, - "loss": 0.3971, - "step": 3834 - }, - { - "epoch": 0.36127269729869765, - "grad_norm": 0.8161478042602539, - "learning_rate": 1.8703340076815585e-05, - "loss": 0.3438, - "step": 3835 - }, - { - "epoch": 0.3613669013918655, - "grad_norm": 0.8905363082885742, - "learning_rate": 1.8702596351223938e-05, - "loss": 0.388, - "step": 3836 - }, - { - "epoch": 0.36146110548503335, - "grad_norm": 0.796614408493042, - "learning_rate": 1.870185242719964e-05, - "loss": 0.2826, - "step": 3837 - }, - { - "epoch": 0.3615553095782012, - "grad_norm": 0.8235340714454651, - "learning_rate": 1.870110830475965e-05, - "loss": 0.3215, - "step": 3838 - }, - { - "epoch": 0.361649513671369, - "grad_norm": 0.8155031204223633, - "learning_rate": 1.870036398392093e-05, - "loss": 0.3472, - "step": 3839 - }, - { - "epoch": 0.36174371776453684, - "grad_norm": 0.8262405395507812, - "learning_rate": 1.869961946470046e-05, - "loss": 0.3223, - "step": 3840 - }, - { - "epoch": 0.3618379218577047, - "grad_norm": 0.762555718421936, - "learning_rate": 1.8698874747115206e-05, - "loss": 0.3327, - "step": 3841 - }, - { - "epoch": 0.36193212595087254, - "grad_norm": 0.8004673719406128, - "learning_rate": 1.8698129831182165e-05, - "loss": 0.3517, - "step": 3842 - }, - { - "epoch": 0.3620263300440404, - "grad_norm": 0.9113042950630188, - "learning_rate": 1.86973847169183e-05, - "loss": 0.3716, - "step": 3843 - }, - { - "epoch": 0.36212053413720824, - "grad_norm": 0.9958996772766113, - "learning_rate": 1.869663940434062e-05, - "loss": 0.3498, - "step": 3844 - }, - { - "epoch": 0.3622147382303761, - "grad_norm": 0.8684587478637695, - "learning_rate": 1.869589389346611e-05, - "loss": 0.3915, - "step": 3845 - }, - { - "epoch": 0.36230894232354394, - "grad_norm": 0.7987110614776611, - "learning_rate": 1.8695148184311772e-05, - "loss": 0.3386, - "step": 3846 - }, - { - "epoch": 0.3624031464167118, - "grad_norm": 0.87180495262146, - "learning_rate": 1.8694402276894607e-05, - "loss": 0.3813, - "step": 3847 - }, - { - "epoch": 0.36249735050987963, - "grad_norm": 1.040747046470642, - "learning_rate": 1.8693656171231623e-05, - "loss": 0.3869, - "step": 3848 - }, - { - "epoch": 0.3625915546030475, - "grad_norm": 0.8927814960479736, - "learning_rate": 1.8692909867339834e-05, - "loss": 0.3794, - "step": 3849 - }, - { - "epoch": 0.36268575869621533, - "grad_norm": 0.8000523447990417, - "learning_rate": 1.869216336523626e-05, - "loss": 0.3241, - "step": 3850 - }, - { - "epoch": 0.3627799627893832, - "grad_norm": 1.3600900173187256, - "learning_rate": 1.8691416664937915e-05, - "loss": 0.3616, - "step": 3851 - }, - { - "epoch": 0.36287416688255103, - "grad_norm": 0.79920494556427, - "learning_rate": 1.8690669766461827e-05, - "loss": 0.3241, - "step": 3852 - }, - { - "epoch": 0.3629683709757189, - "grad_norm": 0.8300445079803467, - "learning_rate": 1.868992266982503e-05, - "loss": 0.3844, - "step": 3853 - }, - { - "epoch": 0.36306257506888673, - "grad_norm": 0.8212257027626038, - "learning_rate": 1.8689175375044558e-05, - "loss": 0.3618, - "step": 3854 - }, - { - "epoch": 0.3631567791620546, - "grad_norm": 0.7338040471076965, - "learning_rate": 1.8688427882137448e-05, - "loss": 0.3252, - "step": 3855 - }, - { - "epoch": 0.3632509832552224, - "grad_norm": 0.8911314010620117, - "learning_rate": 1.8687680191120746e-05, - "loss": 0.373, - "step": 3856 - }, - { - "epoch": 0.3633451873483903, - "grad_norm": 0.8007513880729675, - "learning_rate": 1.8686932302011498e-05, - "loss": 0.324, - "step": 3857 - }, - { - "epoch": 0.3634393914415581, - "grad_norm": 0.8686206936836243, - "learning_rate": 1.868618421482676e-05, - "loss": 0.346, - "step": 3858 - }, - { - "epoch": 0.363533595534726, - "grad_norm": 0.9042792320251465, - "learning_rate": 1.8685435929583587e-05, - "loss": 0.3793, - "step": 3859 - }, - { - "epoch": 0.3636277996278938, - "grad_norm": 0.8040450811386108, - "learning_rate": 1.8684687446299046e-05, - "loss": 0.3419, - "step": 3860 - }, - { - "epoch": 0.36372200372106167, - "grad_norm": 0.9083951115608215, - "learning_rate": 1.8683938764990196e-05, - "loss": 0.3588, - "step": 3861 - }, - { - "epoch": 0.3638162078142295, - "grad_norm": 0.8532748222351074, - "learning_rate": 1.8683189885674117e-05, - "loss": 0.3637, - "step": 3862 - }, - { - "epoch": 0.36391041190739737, - "grad_norm": 1.0092157125473022, - "learning_rate": 1.8682440808367874e-05, - "loss": 0.4086, - "step": 3863 - }, - { - "epoch": 0.3640046160005652, - "grad_norm": 0.7411501407623291, - "learning_rate": 1.868169153308856e-05, - "loss": 0.3316, - "step": 3864 - }, - { - "epoch": 0.36409882009373307, - "grad_norm": 0.8762557506561279, - "learning_rate": 1.8680942059853246e-05, - "loss": 0.3776, - "step": 3865 - }, - { - "epoch": 0.3641930241869009, - "grad_norm": 0.755689799785614, - "learning_rate": 1.868019238867903e-05, - "loss": 0.3312, - "step": 3866 - }, - { - "epoch": 0.36428722828006876, - "grad_norm": 0.786252498626709, - "learning_rate": 1.8679442519583004e-05, - "loss": 0.3425, - "step": 3867 - }, - { - "epoch": 0.3643814323732366, - "grad_norm": 0.833122193813324, - "learning_rate": 1.8678692452582266e-05, - "loss": 0.3558, - "step": 3868 - }, - { - "epoch": 0.36447563646640446, - "grad_norm": 0.8415796756744385, - "learning_rate": 1.8677942187693917e-05, - "loss": 0.3323, - "step": 3869 - }, - { - "epoch": 0.3645698405595723, - "grad_norm": 0.8360006213188171, - "learning_rate": 1.8677191724935066e-05, - "loss": 0.3747, - "step": 3870 - }, - { - "epoch": 0.36466404465274016, - "grad_norm": 0.8955270051956177, - "learning_rate": 1.8676441064322827e-05, - "loss": 0.3843, - "step": 3871 - }, - { - "epoch": 0.364758248745908, - "grad_norm": 0.8831353187561035, - "learning_rate": 1.8675690205874306e-05, - "loss": 0.3441, - "step": 3872 - }, - { - "epoch": 0.36485245283907586, - "grad_norm": 0.7926917672157288, - "learning_rate": 1.867493914960664e-05, - "loss": 0.3264, - "step": 3873 - }, - { - "epoch": 0.3649466569322437, - "grad_norm": 0.8308305740356445, - "learning_rate": 1.867418789553694e-05, - "loss": 0.3619, - "step": 3874 - }, - { - "epoch": 0.36504086102541156, - "grad_norm": 0.8902815580368042, - "learning_rate": 1.867343644368234e-05, - "loss": 0.377, - "step": 3875 - }, - { - "epoch": 0.3651350651185794, - "grad_norm": 0.8571057319641113, - "learning_rate": 1.867268479405998e-05, - "loss": 0.348, - "step": 3876 - }, - { - "epoch": 0.36522926921174725, - "grad_norm": 1.425744652748108, - "learning_rate": 1.8671932946686988e-05, - "loss": 0.3631, - "step": 3877 - }, - { - "epoch": 0.3653234733049151, - "grad_norm": 0.7357031106948853, - "learning_rate": 1.8671180901580518e-05, - "loss": 0.3295, - "step": 3878 - }, - { - "epoch": 0.36541767739808295, - "grad_norm": 0.892597496509552, - "learning_rate": 1.867042865875771e-05, - "loss": 0.3744, - "step": 3879 - }, - { - "epoch": 0.3655118814912508, - "grad_norm": 0.8237472176551819, - "learning_rate": 1.866967621823572e-05, - "loss": 0.3421, - "step": 3880 - }, - { - "epoch": 0.36560608558441865, - "grad_norm": 0.8594658374786377, - "learning_rate": 1.866892358003171e-05, - "loss": 0.3364, - "step": 3881 - }, - { - "epoch": 0.3657002896775865, - "grad_norm": 0.9222422242164612, - "learning_rate": 1.866817074416283e-05, - "loss": 0.3701, - "step": 3882 - }, - { - "epoch": 0.36579449377075435, - "grad_norm": 0.8643504977226257, - "learning_rate": 1.8667417710646253e-05, - "loss": 0.3472, - "step": 3883 - }, - { - "epoch": 0.3658886978639222, - "grad_norm": 0.9093027114868164, - "learning_rate": 1.8666664479499148e-05, - "loss": 0.3786, - "step": 3884 - }, - { - "epoch": 0.36598290195709005, - "grad_norm": 0.8560269474983215, - "learning_rate": 1.866591105073869e-05, - "loss": 0.3382, - "step": 3885 - }, - { - "epoch": 0.3660771060502579, - "grad_norm": 0.8399043679237366, - "learning_rate": 1.8665157424382058e-05, - "loss": 0.3606, - "step": 3886 - }, - { - "epoch": 0.36617131014342574, - "grad_norm": 0.9316064119338989, - "learning_rate": 1.8664403600446435e-05, - "loss": 0.4022, - "step": 3887 - }, - { - "epoch": 0.3662655142365936, - "grad_norm": 0.8026871681213379, - "learning_rate": 1.8663649578949008e-05, - "loss": 0.345, - "step": 3888 - }, - { - "epoch": 0.36635971832976144, - "grad_norm": 0.763960599899292, - "learning_rate": 1.8662895359906973e-05, - "loss": 0.346, - "step": 3889 - }, - { - "epoch": 0.3664539224229293, - "grad_norm": 0.9255404472351074, - "learning_rate": 1.866214094333753e-05, - "loss": 0.3639, - "step": 3890 - }, - { - "epoch": 0.36654812651609714, - "grad_norm": 0.7488731145858765, - "learning_rate": 1.8661386329257874e-05, - "loss": 0.3219, - "step": 3891 - }, - { - "epoch": 0.366642330609265, - "grad_norm": 0.949048638343811, - "learning_rate": 1.8660631517685216e-05, - "loss": 0.4642, - "step": 3892 - }, - { - "epoch": 0.36673653470243284, - "grad_norm": 0.9563046097755432, - "learning_rate": 1.8659876508636764e-05, - "loss": 0.3703, - "step": 3893 - }, - { - "epoch": 0.3668307387956007, - "grad_norm": 0.8219785690307617, - "learning_rate": 1.8659121302129737e-05, - "loss": 0.3185, - "step": 3894 - }, - { - "epoch": 0.36692494288876853, - "grad_norm": 0.8395075798034668, - "learning_rate": 1.8658365898181352e-05, - "loss": 0.3503, - "step": 3895 - }, - { - "epoch": 0.3670191469819364, - "grad_norm": 0.8385910987854004, - "learning_rate": 1.8657610296808832e-05, - "loss": 0.3574, - "step": 3896 - }, - { - "epoch": 0.36711335107510423, - "grad_norm": 0.7934730052947998, - "learning_rate": 1.8656854498029408e-05, - "loss": 0.3486, - "step": 3897 - }, - { - "epoch": 0.3672075551682721, - "grad_norm": 0.7972601652145386, - "learning_rate": 1.8656098501860316e-05, - "loss": 0.3216, - "step": 3898 - }, - { - "epoch": 0.36730175926143993, - "grad_norm": 0.8734428882598877, - "learning_rate": 1.8655342308318792e-05, - "loss": 0.3677, - "step": 3899 - }, - { - "epoch": 0.3673959633546078, - "grad_norm": 0.8547226190567017, - "learning_rate": 1.8654585917422075e-05, - "loss": 0.363, - "step": 3900 - }, - { - "epoch": 0.36749016744777563, - "grad_norm": 0.8113787770271301, - "learning_rate": 1.8653829329187415e-05, - "loss": 0.3459, - "step": 3901 - }, - { - "epoch": 0.3675843715409435, - "grad_norm": 0.775188148021698, - "learning_rate": 1.8653072543632064e-05, - "loss": 0.3819, - "step": 3902 - }, - { - "epoch": 0.3676785756341113, - "grad_norm": 0.9267304539680481, - "learning_rate": 1.8652315560773276e-05, - "loss": 0.3697, - "step": 3903 - }, - { - "epoch": 0.3677727797272792, - "grad_norm": 0.808070182800293, - "learning_rate": 1.865155838062831e-05, - "loss": 0.2989, - "step": 3904 - }, - { - "epoch": 0.367866983820447, - "grad_norm": 0.7583027482032776, - "learning_rate": 1.8650801003214436e-05, - "loss": 0.3611, - "step": 3905 - }, - { - "epoch": 0.3679611879136149, - "grad_norm": 0.7289133667945862, - "learning_rate": 1.865004342854892e-05, - "loss": 0.3533, - "step": 3906 - }, - { - "epoch": 0.3680553920067827, - "grad_norm": 0.7687907814979553, - "learning_rate": 1.8649285656649035e-05, - "loss": 0.3498, - "step": 3907 - }, - { - "epoch": 0.3681495960999505, - "grad_norm": 0.8461649417877197, - "learning_rate": 1.8648527687532062e-05, - "loss": 0.348, - "step": 3908 - }, - { - "epoch": 0.36824380019311836, - "grad_norm": 1.2314167022705078, - "learning_rate": 1.8647769521215283e-05, - "loss": 0.3559, - "step": 3909 - }, - { - "epoch": 0.3683380042862862, - "grad_norm": 0.8200388550758362, - "learning_rate": 1.8647011157715983e-05, - "loss": 0.3453, - "step": 3910 - }, - { - "epoch": 0.36843220837945406, - "grad_norm": 0.9525389075279236, - "learning_rate": 1.8646252597051458e-05, - "loss": 0.3999, - "step": 3911 - }, - { - "epoch": 0.3685264124726219, - "grad_norm": 0.9427107572555542, - "learning_rate": 1.8645493839238998e-05, - "loss": 0.4111, - "step": 3912 - }, - { - "epoch": 0.36862061656578976, - "grad_norm": 0.8151512742042542, - "learning_rate": 1.8644734884295913e-05, - "loss": 0.4052, - "step": 3913 - }, - { - "epoch": 0.3687148206589576, - "grad_norm": 1.0325891971588135, - "learning_rate": 1.86439757322395e-05, - "loss": 0.3589, - "step": 3914 - }, - { - "epoch": 0.36880902475212546, - "grad_norm": 0.8066545724868774, - "learning_rate": 1.864321638308707e-05, - "loss": 0.3739, - "step": 3915 - }, - { - "epoch": 0.3689032288452933, - "grad_norm": 1.0493665933609009, - "learning_rate": 1.8642456836855944e-05, - "loss": 0.3612, - "step": 3916 - }, - { - "epoch": 0.36899743293846116, - "grad_norm": 0.8509879112243652, - "learning_rate": 1.8641697093563435e-05, - "loss": 0.3753, - "step": 3917 - }, - { - "epoch": 0.369091637031629, - "grad_norm": 0.8060911297798157, - "learning_rate": 1.864093715322687e-05, - "loss": 0.3234, - "step": 3918 - }, - { - "epoch": 0.36918584112479685, - "grad_norm": 0.8596218824386597, - "learning_rate": 1.864017701586357e-05, - "loss": 0.3767, - "step": 3919 - }, - { - "epoch": 0.3692800452179647, - "grad_norm": 0.8724544644355774, - "learning_rate": 1.8639416681490875e-05, - "loss": 0.3412, - "step": 3920 - }, - { - "epoch": 0.36937424931113255, - "grad_norm": 0.7228909730911255, - "learning_rate": 1.863865615012612e-05, - "loss": 0.3406, - "step": 3921 - }, - { - "epoch": 0.3694684534043004, - "grad_norm": 0.8851790428161621, - "learning_rate": 1.863789542178664e-05, - "loss": 0.3716, - "step": 3922 - }, - { - "epoch": 0.36956265749746825, - "grad_norm": 0.7576696872711182, - "learning_rate": 1.8637134496489786e-05, - "loss": 0.3367, - "step": 3923 - }, - { - "epoch": 0.3696568615906361, - "grad_norm": 0.7606495022773743, - "learning_rate": 1.8636373374252908e-05, - "loss": 0.3288, - "step": 3924 - }, - { - "epoch": 0.36975106568380395, - "grad_norm": 0.766771137714386, - "learning_rate": 1.8635612055093365e-05, - "loss": 0.3599, - "step": 3925 - }, - { - "epoch": 0.3698452697769718, - "grad_norm": 0.8141925930976868, - "learning_rate": 1.8634850539028508e-05, - "loss": 0.3571, - "step": 3926 - }, - { - "epoch": 0.36993947387013965, - "grad_norm": 0.7698119282722473, - "learning_rate": 1.8634088826075704e-05, - "loss": 0.2954, - "step": 3927 - }, - { - "epoch": 0.3700336779633075, - "grad_norm": 0.9796344041824341, - "learning_rate": 1.8633326916252325e-05, - "loss": 0.3692, - "step": 3928 - }, - { - "epoch": 0.37012788205647534, - "grad_norm": 1.10334050655365, - "learning_rate": 1.863256480957574e-05, - "loss": 0.4254, - "step": 3929 - }, - { - "epoch": 0.3702220861496432, - "grad_norm": 0.7263920903205872, - "learning_rate": 1.8631802506063326e-05, - "loss": 0.307, - "step": 3930 - }, - { - "epoch": 0.37031629024281104, - "grad_norm": 0.943158745765686, - "learning_rate": 1.8631040005732466e-05, - "loss": 0.3579, - "step": 3931 - }, - { - "epoch": 0.3704104943359789, - "grad_norm": 0.9819375276565552, - "learning_rate": 1.8630277308600548e-05, - "loss": 0.436, - "step": 3932 - }, - { - "epoch": 0.37050469842914674, - "grad_norm": 0.8973391056060791, - "learning_rate": 1.8629514414684956e-05, - "loss": 0.3431, - "step": 3933 - }, - { - "epoch": 0.3705989025223146, - "grad_norm": 0.7966498136520386, - "learning_rate": 1.862875132400309e-05, - "loss": 0.3537, - "step": 3934 - }, - { - "epoch": 0.37069310661548244, - "grad_norm": 0.8658461570739746, - "learning_rate": 1.862798803657235e-05, - "loss": 0.3343, - "step": 3935 - }, - { - "epoch": 0.3707873107086503, - "grad_norm": 0.9335054755210876, - "learning_rate": 1.862722455241014e-05, - "loss": 0.3932, - "step": 3936 - }, - { - "epoch": 0.37088151480181814, - "grad_norm": 0.7528518438339233, - "learning_rate": 1.8626460871533866e-05, - "loss": 0.3043, - "step": 3937 - }, - { - "epoch": 0.370975718894986, - "grad_norm": 0.8395419716835022, - "learning_rate": 1.8625696993960945e-05, - "loss": 0.3763, - "step": 3938 - }, - { - "epoch": 0.37106992298815383, - "grad_norm": 0.9247756004333496, - "learning_rate": 1.8624932919708794e-05, - "loss": 0.4132, - "step": 3939 - }, - { - "epoch": 0.3711641270813217, - "grad_norm": 1.244017243385315, - "learning_rate": 1.8624168648794833e-05, - "loss": 0.3528, - "step": 3940 - }, - { - "epoch": 0.37125833117448953, - "grad_norm": 1.0481994152069092, - "learning_rate": 1.8623404181236488e-05, - "loss": 0.3886, - "step": 3941 - }, - { - "epoch": 0.3713525352676574, - "grad_norm": 0.8091402649879456, - "learning_rate": 1.862263951705119e-05, - "loss": 0.3523, - "step": 3942 - }, - { - "epoch": 0.37144673936082523, - "grad_norm": 0.7829480767250061, - "learning_rate": 1.8621874656256378e-05, - "loss": 0.3771, - "step": 3943 - }, - { - "epoch": 0.3715409434539931, - "grad_norm": 0.7555829882621765, - "learning_rate": 1.862110959886949e-05, - "loss": 0.3509, - "step": 3944 - }, - { - "epoch": 0.3716351475471609, - "grad_norm": 0.7845138311386108, - "learning_rate": 1.8620344344907973e-05, - "loss": 0.3753, - "step": 3945 - }, - { - "epoch": 0.3717293516403288, - "grad_norm": 0.8772324919700623, - "learning_rate": 1.861957889438927e-05, - "loss": 0.381, - "step": 3946 - }, - { - "epoch": 0.3718235557334966, - "grad_norm": 0.8137223720550537, - "learning_rate": 1.8618813247330836e-05, - "loss": 0.3497, - "step": 3947 - }, - { - "epoch": 0.3719177598266645, - "grad_norm": 0.7370177507400513, - "learning_rate": 1.8618047403750132e-05, - "loss": 0.2666, - "step": 3948 - }, - { - "epoch": 0.3720119639198323, - "grad_norm": 0.7705456614494324, - "learning_rate": 1.861728136366462e-05, - "loss": 0.3215, - "step": 3949 - }, - { - "epoch": 0.37210616801300017, - "grad_norm": 0.9182578921318054, - "learning_rate": 1.861651512709177e-05, - "loss": 0.3461, - "step": 3950 - }, - { - "epoch": 0.372200372106168, - "grad_norm": 0.8463134765625, - "learning_rate": 1.861574869404904e-05, - "loss": 0.4012, - "step": 3951 - }, - { - "epoch": 0.37229457619933587, - "grad_norm": 0.7903316020965576, - "learning_rate": 1.861498206455392e-05, - "loss": 0.3163, - "step": 3952 - }, - { - "epoch": 0.3723887802925037, - "grad_norm": 0.8026413917541504, - "learning_rate": 1.861421523862389e-05, - "loss": 0.4018, - "step": 3953 - }, - { - "epoch": 0.37248298438567157, - "grad_norm": 0.7673872113227844, - "learning_rate": 1.8613448216276424e-05, - "loss": 0.3272, - "step": 3954 - }, - { - "epoch": 0.3725771884788394, - "grad_norm": 0.9731943011283875, - "learning_rate": 1.861268099752902e-05, - "loss": 0.3321, - "step": 3955 - }, - { - "epoch": 0.37267139257200727, - "grad_norm": 0.8283352255821228, - "learning_rate": 1.861191358239917e-05, - "loss": 0.2921, - "step": 3956 - }, - { - "epoch": 0.3727655966651751, - "grad_norm": 0.734096348285675, - "learning_rate": 1.8611145970904372e-05, - "loss": 0.3303, - "step": 3957 - }, - { - "epoch": 0.37285980075834296, - "grad_norm": 0.8195230960845947, - "learning_rate": 1.8610378163062127e-05, - "loss": 0.3395, - "step": 3958 - }, - { - "epoch": 0.3729540048515108, - "grad_norm": 0.6890751719474792, - "learning_rate": 1.8609610158889943e-05, - "loss": 0.2908, - "step": 3959 - }, - { - "epoch": 0.37304820894467866, - "grad_norm": 0.8741453886032104, - "learning_rate": 1.8608841958405338e-05, - "loss": 0.3464, - "step": 3960 - }, - { - "epoch": 0.3731424130378465, - "grad_norm": 0.83934485912323, - "learning_rate": 1.8608073561625817e-05, - "loss": 0.3746, - "step": 3961 - }, - { - "epoch": 0.37323661713101436, - "grad_norm": 0.9127764105796814, - "learning_rate": 1.8607304968568905e-05, - "loss": 0.3526, - "step": 3962 - }, - { - "epoch": 0.3733308212241822, - "grad_norm": 0.7462518215179443, - "learning_rate": 1.8606536179252132e-05, - "loss": 0.3223, - "step": 3963 - }, - { - "epoch": 0.37342502531735006, - "grad_norm": 0.75594562292099, - "learning_rate": 1.8605767193693023e-05, - "loss": 0.3547, - "step": 3964 - }, - { - "epoch": 0.3735192294105179, - "grad_norm": 0.7274197936058044, - "learning_rate": 1.8604998011909114e-05, - "loss": 0.2963, - "step": 3965 - }, - { - "epoch": 0.37361343350368575, - "grad_norm": 0.779574990272522, - "learning_rate": 1.860422863391794e-05, - "loss": 0.3382, - "step": 3966 - }, - { - "epoch": 0.3737076375968536, - "grad_norm": 0.9977636933326721, - "learning_rate": 1.8603459059737046e-05, - "loss": 0.4237, - "step": 3967 - }, - { - "epoch": 0.37380184169002145, - "grad_norm": 0.8320953845977783, - "learning_rate": 1.8602689289383982e-05, - "loss": 0.3311, - "step": 3968 - }, - { - "epoch": 0.3738960457831893, - "grad_norm": 0.8946393132209778, - "learning_rate": 1.86019193228763e-05, - "loss": 0.4028, - "step": 3969 - }, - { - "epoch": 0.37399024987635715, - "grad_norm": 0.8022248148918152, - "learning_rate": 1.860114916023155e-05, - "loss": 0.3935, - "step": 3970 - }, - { - "epoch": 0.374084453969525, - "grad_norm": 0.8089770078659058, - "learning_rate": 1.8600378801467297e-05, - "loss": 0.357, - "step": 3971 - }, - { - "epoch": 0.37417865806269285, - "grad_norm": 0.95083087682724, - "learning_rate": 1.8599608246601112e-05, - "loss": 0.3627, - "step": 3972 - }, - { - "epoch": 0.3742728621558607, - "grad_norm": 0.8489338159561157, - "learning_rate": 1.8598837495650555e-05, - "loss": 0.3766, - "step": 3973 - }, - { - "epoch": 0.37436706624902855, - "grad_norm": 0.7435100078582764, - "learning_rate": 1.8598066548633205e-05, - "loss": 0.3194, - "step": 3974 - }, - { - "epoch": 0.3744612703421964, - "grad_norm": 0.776519238948822, - "learning_rate": 1.859729540556664e-05, - "loss": 0.3767, - "step": 3975 - }, - { - "epoch": 0.37455547443536424, - "grad_norm": 1.5168535709381104, - "learning_rate": 1.8596524066468444e-05, - "loss": 0.3531, - "step": 3976 - }, - { - "epoch": 0.37464967852853204, - "grad_norm": 0.7984569072723389, - "learning_rate": 1.8595752531356207e-05, - "loss": 0.3643, - "step": 3977 - }, - { - "epoch": 0.3747438826216999, - "grad_norm": 0.8715662956237793, - "learning_rate": 1.859498080024752e-05, - "loss": 0.3749, - "step": 3978 - }, - { - "epoch": 0.37483808671486774, - "grad_norm": 0.8876890540122986, - "learning_rate": 1.8594208873159974e-05, - "loss": 0.3909, - "step": 3979 - }, - { - "epoch": 0.3749322908080356, - "grad_norm": 0.7428289651870728, - "learning_rate": 1.8593436750111174e-05, - "loss": 0.3519, - "step": 3980 - }, - { - "epoch": 0.37502649490120343, - "grad_norm": 0.8159217238426208, - "learning_rate": 1.859266443111873e-05, - "loss": 0.3498, - "step": 3981 - }, - { - "epoch": 0.3751206989943713, - "grad_norm": 0.9959158897399902, - "learning_rate": 1.8591891916200248e-05, - "loss": 0.3643, - "step": 3982 - }, - { - "epoch": 0.37521490308753913, - "grad_norm": 0.6949526071548462, - "learning_rate": 1.8591119205373337e-05, - "loss": 0.3142, - "step": 3983 - }, - { - "epoch": 0.375309107180707, - "grad_norm": 0.9546235799789429, - "learning_rate": 1.8590346298655624e-05, - "loss": 0.3963, - "step": 3984 - }, - { - "epoch": 0.37540331127387483, - "grad_norm": 0.7107335329055786, - "learning_rate": 1.8589573196064734e-05, - "loss": 0.2952, - "step": 3985 - }, - { - "epoch": 0.3754975153670427, - "grad_norm": 1.7656023502349854, - "learning_rate": 1.858879989761829e-05, - "loss": 0.3878, - "step": 3986 - }, - { - "epoch": 0.3755917194602105, - "grad_norm": 0.8152875304222107, - "learning_rate": 1.858802640333392e-05, - "loss": 0.3453, - "step": 3987 - }, - { - "epoch": 0.3756859235533784, - "grad_norm": 0.8879491686820984, - "learning_rate": 1.8587252713229276e-05, - "loss": 0.3229, - "step": 3988 - }, - { - "epoch": 0.3757801276465462, - "grad_norm": 0.857681155204773, - "learning_rate": 1.8586478827321985e-05, - "loss": 0.3701, - "step": 3989 - }, - { - "epoch": 0.3758743317397141, - "grad_norm": 0.970120370388031, - "learning_rate": 1.8585704745629695e-05, - "loss": 0.3036, - "step": 3990 - }, - { - "epoch": 0.3759685358328819, - "grad_norm": 0.8371943831443787, - "learning_rate": 1.858493046817006e-05, - "loss": 0.3575, - "step": 3991 - }, - { - "epoch": 0.37606273992604977, - "grad_norm": 0.8610896468162537, - "learning_rate": 1.8584155994960734e-05, - "loss": 0.3305, - "step": 3992 - }, - { - "epoch": 0.3761569440192176, - "grad_norm": 0.7553221583366394, - "learning_rate": 1.8583381326019377e-05, - "loss": 0.361, - "step": 3993 - }, - { - "epoch": 0.37625114811238547, - "grad_norm": 0.7915468811988831, - "learning_rate": 1.858260646136365e-05, - "loss": 0.3958, - "step": 3994 - }, - { - "epoch": 0.3763453522055533, - "grad_norm": 0.8172153830528259, - "learning_rate": 1.8581831401011222e-05, - "loss": 0.3379, - "step": 3995 - }, - { - "epoch": 0.37643955629872117, - "grad_norm": 0.7263503074645996, - "learning_rate": 1.858105614497977e-05, - "loss": 0.338, - "step": 3996 - }, - { - "epoch": 0.376533760391889, - "grad_norm": 0.9005851149559021, - "learning_rate": 1.8580280693286958e-05, - "loss": 0.366, - "step": 3997 - }, - { - "epoch": 0.37662796448505687, - "grad_norm": 0.8273485898971558, - "learning_rate": 1.8579505045950485e-05, - "loss": 0.3189, - "step": 3998 - }, - { - "epoch": 0.3767221685782247, - "grad_norm": 0.7768470048904419, - "learning_rate": 1.8578729202988025e-05, - "loss": 0.3504, - "step": 3999 - }, - { - "epoch": 0.37681637267139256, - "grad_norm": 0.7757167220115662, - "learning_rate": 1.857795316441727e-05, - "loss": 0.3889, - "step": 4000 - }, - { - "epoch": 0.3769105767645604, - "grad_norm": 0.7570431232452393, - "learning_rate": 1.8577176930255923e-05, - "loss": 0.377, - "step": 4001 - }, - { - "epoch": 0.37700478085772826, - "grad_norm": 0.8670721054077148, - "learning_rate": 1.8576400500521673e-05, - "loss": 0.3578, - "step": 4002 - }, - { - "epoch": 0.3770989849508961, - "grad_norm": 0.7168530225753784, - "learning_rate": 1.8575623875232228e-05, - "loss": 0.3552, - "step": 4003 - }, - { - "epoch": 0.37719318904406396, - "grad_norm": 0.8752408623695374, - "learning_rate": 1.8574847054405294e-05, - "loss": 0.3424, - "step": 4004 - }, - { - "epoch": 0.3772873931372318, - "grad_norm": 0.9238356947898865, - "learning_rate": 1.857407003805859e-05, - "loss": 0.387, - "step": 4005 - }, - { - "epoch": 0.37738159723039966, - "grad_norm": 0.9070408344268799, - "learning_rate": 1.8573292826209827e-05, - "loss": 0.3439, - "step": 4006 - }, - { - "epoch": 0.3774758013235675, - "grad_norm": 0.794664740562439, - "learning_rate": 1.857251541887673e-05, - "loss": 0.3377, - "step": 4007 - }, - { - "epoch": 0.37757000541673535, - "grad_norm": 0.7839815616607666, - "learning_rate": 1.8571737816077022e-05, - "loss": 0.3117, - "step": 4008 - }, - { - "epoch": 0.3776642095099032, - "grad_norm": 0.843227207660675, - "learning_rate": 1.8570960017828437e-05, - "loss": 0.3365, - "step": 4009 - }, - { - "epoch": 0.37775841360307105, - "grad_norm": 0.7713897824287415, - "learning_rate": 1.8570182024148707e-05, - "loss": 0.3071, - "step": 4010 - }, - { - "epoch": 0.3778526176962389, - "grad_norm": 0.8083482384681702, - "learning_rate": 1.8569403835055576e-05, - "loss": 0.3039, - "step": 4011 - }, - { - "epoch": 0.37794682178940675, - "grad_norm": 0.8170844316482544, - "learning_rate": 1.856862545056678e-05, - "loss": 0.3383, - "step": 4012 - }, - { - "epoch": 0.3780410258825746, - "grad_norm": 0.9065813422203064, - "learning_rate": 1.8567846870700076e-05, - "loss": 0.3459, - "step": 4013 - }, - { - "epoch": 0.37813522997574245, - "grad_norm": 0.8003597259521484, - "learning_rate": 1.856706809547321e-05, - "loss": 0.3747, - "step": 4014 - }, - { - "epoch": 0.3782294340689103, - "grad_norm": 0.7988014221191406, - "learning_rate": 1.8566289124903945e-05, - "loss": 0.3275, - "step": 4015 - }, - { - "epoch": 0.37832363816207815, - "grad_norm": 0.8614307641983032, - "learning_rate": 1.8565509959010037e-05, - "loss": 0.3558, - "step": 4016 - }, - { - "epoch": 0.378417842255246, - "grad_norm": 0.8755281567573547, - "learning_rate": 1.8564730597809258e-05, - "loss": 0.3993, - "step": 4017 - }, - { - "epoch": 0.37851204634841384, - "grad_norm": 0.9249305725097656, - "learning_rate": 1.8563951041319375e-05, - "loss": 0.3839, - "step": 4018 - }, - { - "epoch": 0.3786062504415817, - "grad_norm": 0.9633373618125916, - "learning_rate": 1.856317128955816e-05, - "loss": 0.377, - "step": 4019 - }, - { - "epoch": 0.37870045453474954, - "grad_norm": 0.8036518096923828, - "learning_rate": 1.8562391342543403e-05, - "loss": 0.3539, - "step": 4020 - }, - { - "epoch": 0.3787946586279174, - "grad_norm": 0.6962668895721436, - "learning_rate": 1.856161120029288e-05, - "loss": 0.3327, - "step": 4021 - }, - { - "epoch": 0.37888886272108524, - "grad_norm": 0.8387727737426758, - "learning_rate": 1.8560830862824375e-05, - "loss": 0.3585, - "step": 4022 - }, - { - "epoch": 0.3789830668142531, - "grad_norm": 0.8466220498085022, - "learning_rate": 1.856005033015569e-05, - "loss": 0.3429, - "step": 4023 - }, - { - "epoch": 0.37907727090742094, - "grad_norm": 0.9862387180328369, - "learning_rate": 1.8559269602304623e-05, - "loss": 0.3319, - "step": 4024 - }, - { - "epoch": 0.3791714750005888, - "grad_norm": 0.8368757963180542, - "learning_rate": 1.8558488679288967e-05, - "loss": 0.3342, - "step": 4025 - }, - { - "epoch": 0.37926567909375664, - "grad_norm": 0.8175406455993652, - "learning_rate": 1.8557707561126533e-05, - "loss": 0.3807, - "step": 4026 - }, - { - "epoch": 0.3793598831869245, - "grad_norm": 0.8657997846603394, - "learning_rate": 1.8556926247835135e-05, - "loss": 0.3695, - "step": 4027 - }, - { - "epoch": 0.37945408728009233, - "grad_norm": 0.9448007345199585, - "learning_rate": 1.8556144739432584e-05, - "loss": 0.3488, - "step": 4028 - }, - { - "epoch": 0.3795482913732602, - "grad_norm": 0.7154188752174377, - "learning_rate": 1.8555363035936698e-05, - "loss": 0.3244, - "step": 4029 - }, - { - "epoch": 0.37964249546642803, - "grad_norm": 0.7990254163742065, - "learning_rate": 1.8554581137365307e-05, - "loss": 0.3418, - "step": 4030 - }, - { - "epoch": 0.3797366995595959, - "grad_norm": 0.781462550163269, - "learning_rate": 1.8553799043736234e-05, - "loss": 0.3869, - "step": 4031 - }, - { - "epoch": 0.37983090365276373, - "grad_norm": 0.8279241323471069, - "learning_rate": 1.8553016755067315e-05, - "loss": 0.3443, - "step": 4032 - }, - { - "epoch": 0.3799251077459316, - "grad_norm": 0.8141920566558838, - "learning_rate": 1.8552234271376388e-05, - "loss": 0.3859, - "step": 4033 - }, - { - "epoch": 0.3800193118390994, - "grad_norm": 0.865777313709259, - "learning_rate": 1.8551451592681292e-05, - "loss": 0.3951, - "step": 4034 - }, - { - "epoch": 0.3801135159322673, - "grad_norm": 0.8812288045883179, - "learning_rate": 1.8550668718999873e-05, - "loss": 0.353, - "step": 4035 - }, - { - "epoch": 0.3802077200254351, - "grad_norm": 1.1636347770690918, - "learning_rate": 1.8549885650349985e-05, - "loss": 0.3255, - "step": 4036 - }, - { - "epoch": 0.380301924118603, - "grad_norm": 0.7793205380439758, - "learning_rate": 1.854910238674948e-05, - "loss": 0.3367, - "step": 4037 - }, - { - "epoch": 0.3803961282117708, - "grad_norm": 0.9390274882316589, - "learning_rate": 1.8548318928216223e-05, - "loss": 0.3541, - "step": 4038 - }, - { - "epoch": 0.38049033230493867, - "grad_norm": 0.8669273853302002, - "learning_rate": 1.8547535274768073e-05, - "loss": 0.3415, - "step": 4039 - }, - { - "epoch": 0.3805845363981065, - "grad_norm": 0.8564456701278687, - "learning_rate": 1.8546751426422894e-05, - "loss": 0.3406, - "step": 4040 - }, - { - "epoch": 0.38067874049127437, - "grad_norm": 0.8560154438018799, - "learning_rate": 1.854596738319857e-05, - "loss": 0.3543, - "step": 4041 - }, - { - "epoch": 0.3807729445844422, - "grad_norm": 2.012359619140625, - "learning_rate": 1.8545183145112973e-05, - "loss": 0.3288, - "step": 4042 - }, - { - "epoch": 0.38086714867761007, - "grad_norm": 0.8635702133178711, - "learning_rate": 1.8544398712183987e-05, - "loss": 0.377, - "step": 4043 - }, - { - "epoch": 0.3809613527707779, - "grad_norm": 0.7307999730110168, - "learning_rate": 1.854361408442949e-05, - "loss": 0.3146, - "step": 4044 - }, - { - "epoch": 0.38105555686394577, - "grad_norm": 0.8456552624702454, - "learning_rate": 1.8542829261867382e-05, - "loss": 0.3656, - "step": 4045 - }, - { - "epoch": 0.38114976095711356, - "grad_norm": 0.8214126229286194, - "learning_rate": 1.8542044244515556e-05, - "loss": 0.3461, - "step": 4046 - }, - { - "epoch": 0.3812439650502814, - "grad_norm": 0.8272832632064819, - "learning_rate": 1.8541259032391908e-05, - "loss": 0.3478, - "step": 4047 - }, - { - "epoch": 0.38133816914344926, - "grad_norm": 0.8121527433395386, - "learning_rate": 1.854047362551435e-05, - "loss": 0.332, - "step": 4048 - }, - { - "epoch": 0.3814323732366171, - "grad_norm": 0.7890356183052063, - "learning_rate": 1.853968802390078e-05, - "loss": 0.338, - "step": 4049 - }, - { - "epoch": 0.38152657732978495, - "grad_norm": 0.7535851001739502, - "learning_rate": 1.8538902227569118e-05, - "loss": 0.3473, - "step": 4050 - }, - { - "epoch": 0.3816207814229528, - "grad_norm": 0.8237473368644714, - "learning_rate": 1.8538116236537283e-05, - "loss": 0.3648, - "step": 4051 - }, - { - "epoch": 0.38171498551612065, - "grad_norm": 0.8340440392494202, - "learning_rate": 1.8537330050823188e-05, - "loss": 0.3689, - "step": 4052 - }, - { - "epoch": 0.3818091896092885, - "grad_norm": 0.8527438044548035, - "learning_rate": 1.8536543670444767e-05, - "loss": 0.3525, - "step": 4053 - }, - { - "epoch": 0.38190339370245635, - "grad_norm": 0.7588123679161072, - "learning_rate": 1.8535757095419947e-05, - "loss": 0.3513, - "step": 4054 - }, - { - "epoch": 0.3819975977956242, - "grad_norm": 0.7642732858657837, - "learning_rate": 1.853497032576667e-05, - "loss": 0.3674, - "step": 4055 - }, - { - "epoch": 0.38209180188879205, - "grad_norm": 0.804860532283783, - "learning_rate": 1.8534183361502864e-05, - "loss": 0.3909, - "step": 4056 - }, - { - "epoch": 0.3821860059819599, - "grad_norm": 0.7832939028739929, - "learning_rate": 1.853339620264648e-05, - "loss": 0.3187, - "step": 4057 - }, - { - "epoch": 0.38228021007512775, - "grad_norm": 0.7937828898429871, - "learning_rate": 1.8532608849215464e-05, - "loss": 0.3717, - "step": 4058 - }, - { - "epoch": 0.3823744141682956, - "grad_norm": 0.6621168851852417, - "learning_rate": 1.8531821301227774e-05, - "loss": 0.3237, - "step": 4059 - }, - { - "epoch": 0.38246861826146344, - "grad_norm": 0.8530427813529968, - "learning_rate": 1.8531033558701363e-05, - "loss": 0.3541, - "step": 4060 - }, - { - "epoch": 0.3825628223546313, - "grad_norm": 0.8988747000694275, - "learning_rate": 1.8530245621654188e-05, - "loss": 0.3537, - "step": 4061 - }, - { - "epoch": 0.38265702644779914, - "grad_norm": 0.9490792155265808, - "learning_rate": 1.8529457490104226e-05, - "loss": 0.318, - "step": 4062 - }, - { - "epoch": 0.382751230540967, - "grad_norm": 0.881201446056366, - "learning_rate": 1.8528669164069438e-05, - "loss": 0.3128, - "step": 4063 - }, - { - "epoch": 0.38284543463413484, - "grad_norm": 0.7725705504417419, - "learning_rate": 1.8527880643567808e-05, - "loss": 0.3612, - "step": 4064 - }, - { - "epoch": 0.3829396387273027, - "grad_norm": 0.6691721677780151, - "learning_rate": 1.852709192861731e-05, - "loss": 0.2916, - "step": 4065 - }, - { - "epoch": 0.38303384282047054, - "grad_norm": 0.8485406637191772, - "learning_rate": 1.8526303019235926e-05, - "loss": 0.3415, - "step": 4066 - }, - { - "epoch": 0.3831280469136384, - "grad_norm": 0.9098604321479797, - "learning_rate": 1.852551391544165e-05, - "loss": 0.363, - "step": 4067 - }, - { - "epoch": 0.38322225100680624, - "grad_norm": 0.8491668701171875, - "learning_rate": 1.852472461725247e-05, - "loss": 0.3404, - "step": 4068 - }, - { - "epoch": 0.3833164550999741, - "grad_norm": 0.7346593141555786, - "learning_rate": 1.8523935124686384e-05, - "loss": 0.3099, - "step": 4069 - }, - { - "epoch": 0.38341065919314193, - "grad_norm": 0.9048673510551453, - "learning_rate": 1.8523145437761395e-05, - "loss": 0.4132, - "step": 4070 - }, - { - "epoch": 0.3835048632863098, - "grad_norm": 0.7384666204452515, - "learning_rate": 1.852235555649551e-05, - "loss": 0.302, - "step": 4071 - }, - { - "epoch": 0.38359906737947763, - "grad_norm": 0.6942598223686218, - "learning_rate": 1.8521565480906737e-05, - "loss": 0.3003, - "step": 4072 - }, - { - "epoch": 0.3836932714726455, - "grad_norm": 0.7419342398643494, - "learning_rate": 1.8520775211013094e-05, - "loss": 0.3233, - "step": 4073 - }, - { - "epoch": 0.38378747556581333, - "grad_norm": 0.7706865072250366, - "learning_rate": 1.8519984746832597e-05, - "loss": 0.3459, - "step": 4074 - }, - { - "epoch": 0.3838816796589812, - "grad_norm": 0.8181882500648499, - "learning_rate": 1.851919408838327e-05, - "loss": 0.3547, - "step": 4075 - }, - { - "epoch": 0.383975883752149, - "grad_norm": 0.7399076819419861, - "learning_rate": 1.8518403235683147e-05, - "loss": 0.3457, - "step": 4076 - }, - { - "epoch": 0.3840700878453169, - "grad_norm": 0.8902973532676697, - "learning_rate": 1.8517612188750254e-05, - "loss": 0.3458, - "step": 4077 - }, - { - "epoch": 0.3841642919384847, - "grad_norm": 0.8323981761932373, - "learning_rate": 1.851682094760263e-05, - "loss": 0.3395, - "step": 4078 - }, - { - "epoch": 0.3842584960316526, - "grad_norm": 0.8129898309707642, - "learning_rate": 1.851602951225832e-05, - "loss": 0.3506, - "step": 4079 - }, - { - "epoch": 0.3843527001248204, - "grad_norm": 0.7929735779762268, - "learning_rate": 1.851523788273536e-05, - "loss": 0.3539, - "step": 4080 - }, - { - "epoch": 0.3844469042179883, - "grad_norm": 0.733232855796814, - "learning_rate": 1.8514446059051813e-05, - "loss": 0.3007, - "step": 4081 - }, - { - "epoch": 0.3845411083111561, - "grad_norm": 0.7781715989112854, - "learning_rate": 1.8513654041225723e-05, - "loss": 0.3691, - "step": 4082 - }, - { - "epoch": 0.38463531240432397, - "grad_norm": 0.7716662883758545, - "learning_rate": 1.851286182927516e-05, - "loss": 0.395, - "step": 4083 - }, - { - "epoch": 0.3847295164974918, - "grad_norm": 0.8850762844085693, - "learning_rate": 1.8512069423218176e-05, - "loss": 0.3031, - "step": 4084 - }, - { - "epoch": 0.38482372059065967, - "grad_norm": 0.7346330285072327, - "learning_rate": 1.851127682307285e-05, - "loss": 0.3451, - "step": 4085 - }, - { - "epoch": 0.3849179246838275, - "grad_norm": 0.8006929159164429, - "learning_rate": 1.8510484028857245e-05, - "loss": 0.3467, - "step": 4086 - }, - { - "epoch": 0.38501212877699537, - "grad_norm": 0.7510959506034851, - "learning_rate": 1.8509691040589448e-05, - "loss": 0.3029, - "step": 4087 - }, - { - "epoch": 0.3851063328701632, - "grad_norm": 0.7754647731781006, - "learning_rate": 1.8508897858287528e-05, - "loss": 0.3684, - "step": 4088 - }, - { - "epoch": 0.38520053696333106, - "grad_norm": 1.0007089376449585, - "learning_rate": 1.8508104481969585e-05, - "loss": 0.3311, - "step": 4089 - }, - { - "epoch": 0.3852947410564989, - "grad_norm": 0.7495034337043762, - "learning_rate": 1.8507310911653696e-05, - "loss": 0.3827, - "step": 4090 - }, - { - "epoch": 0.38538894514966676, - "grad_norm": 0.7500461339950562, - "learning_rate": 1.8506517147357966e-05, - "loss": 0.3252, - "step": 4091 - }, - { - "epoch": 0.3854831492428346, - "grad_norm": 0.840074360370636, - "learning_rate": 1.8505723189100483e-05, - "loss": 0.3599, - "step": 4092 - }, - { - "epoch": 0.38557735333600246, - "grad_norm": 0.7502328753471375, - "learning_rate": 1.8504929036899363e-05, - "loss": 0.3037, - "step": 4093 - }, - { - "epoch": 0.3856715574291703, - "grad_norm": 0.8640468120574951, - "learning_rate": 1.8504134690772706e-05, - "loss": 0.4063, - "step": 4094 - }, - { - "epoch": 0.38576576152233816, - "grad_norm": 0.9477185606956482, - "learning_rate": 1.8503340150738626e-05, - "loss": 0.3895, - "step": 4095 - }, - { - "epoch": 0.385859965615506, - "grad_norm": 0.810876190662384, - "learning_rate": 1.8502545416815238e-05, - "loss": 0.3492, - "step": 4096 - }, - { - "epoch": 0.38595416970867386, - "grad_norm": 0.8612748980522156, - "learning_rate": 1.850175048902067e-05, - "loss": 0.3368, - "step": 4097 - }, - { - "epoch": 0.3860483738018417, - "grad_norm": 0.7046990990638733, - "learning_rate": 1.8500955367373038e-05, - "loss": 0.3161, - "step": 4098 - }, - { - "epoch": 0.38614257789500955, - "grad_norm": 0.7437293529510498, - "learning_rate": 1.8500160051890475e-05, - "loss": 0.3364, - "step": 4099 - }, - { - "epoch": 0.3862367819881774, - "grad_norm": 0.9109039306640625, - "learning_rate": 1.849936454259112e-05, - "loss": 0.32, - "step": 4100 - }, - { - "epoch": 0.38633098608134525, - "grad_norm": 0.8287127017974854, - "learning_rate": 1.8498568839493112e-05, - "loss": 0.4096, - "step": 4101 - }, - { - "epoch": 0.3864251901745131, - "grad_norm": 0.8941023945808411, - "learning_rate": 1.849777294261459e-05, - "loss": 0.3508, - "step": 4102 - }, - { - "epoch": 0.38651939426768095, - "grad_norm": 1.0079180002212524, - "learning_rate": 1.84969768519737e-05, - "loss": 0.3376, - "step": 4103 - }, - { - "epoch": 0.3866135983608488, - "grad_norm": 0.9072720408439636, - "learning_rate": 1.84961805675886e-05, - "loss": 0.3379, - "step": 4104 - }, - { - "epoch": 0.38670780245401665, - "grad_norm": 0.8411976099014282, - "learning_rate": 1.849538408947744e-05, - "loss": 0.3411, - "step": 4105 - }, - { - "epoch": 0.3868020065471845, - "grad_norm": 0.9072670340538025, - "learning_rate": 1.8494587417658385e-05, - "loss": 0.3534, - "step": 4106 - }, - { - "epoch": 0.38689621064035234, - "grad_norm": 0.8730388879776001, - "learning_rate": 1.8493790552149602e-05, - "loss": 0.3367, - "step": 4107 - }, - { - "epoch": 0.3869904147335202, - "grad_norm": 0.8113542199134827, - "learning_rate": 1.8492993492969257e-05, - "loss": 0.3655, - "step": 4108 - }, - { - "epoch": 0.38708461882668804, - "grad_norm": 0.6983941793441772, - "learning_rate": 1.8492196240135526e-05, - "loss": 0.3288, - "step": 4109 - }, - { - "epoch": 0.3871788229198559, - "grad_norm": 0.7515357732772827, - "learning_rate": 1.8491398793666587e-05, - "loss": 0.3499, - "step": 4110 - }, - { - "epoch": 0.38727302701302374, - "grad_norm": 0.7293741106987, - "learning_rate": 1.849060115358062e-05, - "loss": 0.3394, - "step": 4111 - }, - { - "epoch": 0.3873672311061916, - "grad_norm": 0.7499194145202637, - "learning_rate": 1.848980331989582e-05, - "loss": 0.328, - "step": 4112 - }, - { - "epoch": 0.38746143519935944, - "grad_norm": 0.8803209662437439, - "learning_rate": 1.8489005292630377e-05, - "loss": 0.3931, - "step": 4113 - }, - { - "epoch": 0.3875556392925273, - "grad_norm": 0.8406884670257568, - "learning_rate": 1.848820707180248e-05, - "loss": 0.3564, - "step": 4114 - }, - { - "epoch": 0.3876498433856951, - "grad_norm": 0.8496602177619934, - "learning_rate": 1.8487408657430338e-05, - "loss": 0.369, - "step": 4115 - }, - { - "epoch": 0.38774404747886293, - "grad_norm": 0.7715287208557129, - "learning_rate": 1.8486610049532146e-05, - "loss": 0.3525, - "step": 4116 - }, - { - "epoch": 0.3878382515720308, - "grad_norm": 0.8887027502059937, - "learning_rate": 1.8485811248126127e-05, - "loss": 0.4193, - "step": 4117 - }, - { - "epoch": 0.38793245566519863, - "grad_norm": 0.7482026815414429, - "learning_rate": 1.8485012253230484e-05, - "loss": 0.3492, - "step": 4118 - }, - { - "epoch": 0.3880266597583665, - "grad_norm": 0.7965260148048401, - "learning_rate": 1.8484213064863442e-05, - "loss": 0.3143, - "step": 4119 - }, - { - "epoch": 0.3881208638515343, - "grad_norm": 0.8715170621871948, - "learning_rate": 1.8483413683043224e-05, - "loss": 0.3614, - "step": 4120 - }, - { - "epoch": 0.3882150679447022, - "grad_norm": 0.8241899013519287, - "learning_rate": 1.848261410778805e-05, - "loss": 0.3076, - "step": 4121 - }, - { - "epoch": 0.38830927203787, - "grad_norm": 0.8795972466468811, - "learning_rate": 1.8481814339116154e-05, - "loss": 0.3467, - "step": 4122 - }, - { - "epoch": 0.3884034761310379, - "grad_norm": 0.7732073664665222, - "learning_rate": 1.8481014377045773e-05, - "loss": 0.3241, - "step": 4123 - }, - { - "epoch": 0.3884976802242057, - "grad_norm": 0.8015792965888977, - "learning_rate": 1.848021422159515e-05, - "loss": 0.3074, - "step": 4124 - }, - { - "epoch": 0.38859188431737357, - "grad_norm": 0.9260653257369995, - "learning_rate": 1.8479413872782532e-05, - "loss": 0.3879, - "step": 4125 - }, - { - "epoch": 0.3886860884105414, - "grad_norm": 0.7994526624679565, - "learning_rate": 1.8478613330626157e-05, - "loss": 0.3578, - "step": 4126 - }, - { - "epoch": 0.38878029250370927, - "grad_norm": 1.007300615310669, - "learning_rate": 1.8477812595144292e-05, - "loss": 0.3817, - "step": 4127 - }, - { - "epoch": 0.3888744965968771, - "grad_norm": 0.9153285622596741, - "learning_rate": 1.8477011666355188e-05, - "loss": 0.3673, - "step": 4128 - }, - { - "epoch": 0.38896870069004497, - "grad_norm": 0.7606241703033447, - "learning_rate": 1.8476210544277105e-05, - "loss": 0.3249, - "step": 4129 - }, - { - "epoch": 0.3890629047832128, - "grad_norm": 0.7886267304420471, - "learning_rate": 1.8475409228928314e-05, - "loss": 0.3611, - "step": 4130 - }, - { - "epoch": 0.38915710887638066, - "grad_norm": 1.02424156665802, - "learning_rate": 1.8474607720327084e-05, - "loss": 0.4415, - "step": 4131 - }, - { - "epoch": 0.3892513129695485, - "grad_norm": 0.9800648093223572, - "learning_rate": 1.847380601849169e-05, - "loss": 0.3471, - "step": 4132 - }, - { - "epoch": 0.38934551706271636, - "grad_norm": 0.8154638409614563, - "learning_rate": 1.847300412344042e-05, - "loss": 0.3675, - "step": 4133 - }, - { - "epoch": 0.3894397211558842, - "grad_norm": 0.7895480394363403, - "learning_rate": 1.8472202035191547e-05, - "loss": 0.3231, - "step": 4134 - }, - { - "epoch": 0.38953392524905206, - "grad_norm": 0.8950668573379517, - "learning_rate": 1.8471399753763366e-05, - "loss": 0.3685, - "step": 4135 - }, - { - "epoch": 0.3896281293422199, - "grad_norm": 0.8978734612464905, - "learning_rate": 1.8470597279174173e-05, - "loss": 0.3351, - "step": 4136 - }, - { - "epoch": 0.38972233343538776, - "grad_norm": 0.8425732851028442, - "learning_rate": 1.8469794611442257e-05, - "loss": 0.3242, - "step": 4137 - }, - { - "epoch": 0.3898165375285556, - "grad_norm": 0.8049556612968445, - "learning_rate": 1.8468991750585928e-05, - "loss": 0.3835, - "step": 4138 - }, - { - "epoch": 0.38991074162172346, - "grad_norm": 0.7788106799125671, - "learning_rate": 1.846818869662349e-05, - "loss": 0.3088, - "step": 4139 - }, - { - "epoch": 0.3900049457148913, - "grad_norm": 0.897233784198761, - "learning_rate": 1.846738544957325e-05, - "loss": 0.3917, - "step": 4140 - }, - { - "epoch": 0.39009914980805915, - "grad_norm": 0.747938871383667, - "learning_rate": 1.846658200945353e-05, - "loss": 0.3766, - "step": 4141 - }, - { - "epoch": 0.390193353901227, - "grad_norm": 0.8425939679145813, - "learning_rate": 1.8465778376282645e-05, - "loss": 0.3325, - "step": 4142 - }, - { - "epoch": 0.39028755799439485, - "grad_norm": 0.7967554926872253, - "learning_rate": 1.846497455007892e-05, - "loss": 0.3271, - "step": 4143 - }, - { - "epoch": 0.3903817620875627, - "grad_norm": 0.7986201643943787, - "learning_rate": 1.8464170530860684e-05, - "loss": 0.3492, - "step": 4144 - }, - { - "epoch": 0.39047596618073055, - "grad_norm": 0.7835004925727844, - "learning_rate": 1.8463366318646274e-05, - "loss": 0.3253, - "step": 4145 - }, - { - "epoch": 0.3905701702738984, - "grad_norm": 0.8994381427764893, - "learning_rate": 1.8462561913454017e-05, - "loss": 0.3736, - "step": 4146 - }, - { - "epoch": 0.39066437436706625, - "grad_norm": 0.7640780806541443, - "learning_rate": 1.8461757315302264e-05, - "loss": 0.3578, - "step": 4147 - }, - { - "epoch": 0.3907585784602341, - "grad_norm": 0.836300253868103, - "learning_rate": 1.8460952524209355e-05, - "loss": 0.3283, - "step": 4148 - }, - { - "epoch": 0.39085278255340195, - "grad_norm": 0.7115257382392883, - "learning_rate": 1.8460147540193648e-05, - "loss": 0.3347, - "step": 4149 - }, - { - "epoch": 0.3909469866465698, - "grad_norm": 0.7805563807487488, - "learning_rate": 1.8459342363273488e-05, - "loss": 0.3487, - "step": 4150 - }, - { - "epoch": 0.39104119073973764, - "grad_norm": 0.8107002377510071, - "learning_rate": 1.8458536993467242e-05, - "loss": 0.3396, - "step": 4151 - }, - { - "epoch": 0.3911353948329055, - "grad_norm": 0.8486902117729187, - "learning_rate": 1.8457731430793272e-05, - "loss": 0.3908, - "step": 4152 - }, - { - "epoch": 0.39122959892607334, - "grad_norm": 0.8985098600387573, - "learning_rate": 1.8456925675269944e-05, - "loss": 0.3819, - "step": 4153 - }, - { - "epoch": 0.3913238030192412, - "grad_norm": 0.7981569170951843, - "learning_rate": 1.8456119726915635e-05, - "loss": 0.4176, - "step": 4154 - }, - { - "epoch": 0.39141800711240904, - "grad_norm": 0.8377177715301514, - "learning_rate": 1.8455313585748716e-05, - "loss": 0.3641, - "step": 4155 - }, - { - "epoch": 0.3915122112055769, - "grad_norm": 1.0901809930801392, - "learning_rate": 1.8454507251787567e-05, - "loss": 0.3688, - "step": 4156 - }, - { - "epoch": 0.39160641529874474, - "grad_norm": 0.7740063667297363, - "learning_rate": 1.8453700725050583e-05, - "loss": 0.3487, - "step": 4157 - }, - { - "epoch": 0.3917006193919126, - "grad_norm": 0.816830575466156, - "learning_rate": 1.8452894005556148e-05, - "loss": 0.3351, - "step": 4158 - }, - { - "epoch": 0.39179482348508043, - "grad_norm": 1.1118569374084473, - "learning_rate": 1.8452087093322655e-05, - "loss": 0.3811, - "step": 4159 - }, - { - "epoch": 0.3918890275782483, - "grad_norm": 0.7970894575119019, - "learning_rate": 1.8451279988368506e-05, - "loss": 0.3356, - "step": 4160 - }, - { - "epoch": 0.39198323167141613, - "grad_norm": 0.8318902254104614, - "learning_rate": 1.84504726907121e-05, - "loss": 0.3474, - "step": 4161 - }, - { - "epoch": 0.392077435764584, - "grad_norm": 0.7451596856117249, - "learning_rate": 1.8449665200371852e-05, - "loss": 0.3627, - "step": 4162 - }, - { - "epoch": 0.39217163985775183, - "grad_norm": 0.8282124996185303, - "learning_rate": 1.8448857517366167e-05, - "loss": 0.3563, - "step": 4163 - }, - { - "epoch": 0.3922658439509197, - "grad_norm": 0.8604304790496826, - "learning_rate": 1.844804964171347e-05, - "loss": 0.3767, - "step": 4164 - }, - { - "epoch": 0.39236004804408753, - "grad_norm": 0.8274044394493103, - "learning_rate": 1.8447241573432168e-05, - "loss": 0.3427, - "step": 4165 - }, - { - "epoch": 0.3924542521372554, - "grad_norm": 0.9021215438842773, - "learning_rate": 1.8446433312540697e-05, - "loss": 0.4002, - "step": 4166 - }, - { - "epoch": 0.3925484562304232, - "grad_norm": 0.829105019569397, - "learning_rate": 1.8445624859057485e-05, - "loss": 0.3666, - "step": 4167 - }, - { - "epoch": 0.3926426603235911, - "grad_norm": 0.7571730017662048, - "learning_rate": 1.8444816213000963e-05, - "loss": 0.3433, - "step": 4168 - }, - { - "epoch": 0.3927368644167589, - "grad_norm": 0.8027662038803101, - "learning_rate": 1.8444007374389573e-05, - "loss": 0.3743, - "step": 4169 - }, - { - "epoch": 0.3928310685099268, - "grad_norm": 0.8503544926643372, - "learning_rate": 1.8443198343241756e-05, - "loss": 0.3864, - "step": 4170 - }, - { - "epoch": 0.3929252726030946, - "grad_norm": 0.8933030962944031, - "learning_rate": 1.844238911957596e-05, - "loss": 0.3639, - "step": 4171 - }, - { - "epoch": 0.39301947669626247, - "grad_norm": 0.7828903198242188, - "learning_rate": 1.8441579703410633e-05, - "loss": 0.378, - "step": 4172 - }, - { - "epoch": 0.3931136807894303, - "grad_norm": 0.8003404140472412, - "learning_rate": 1.8440770094764236e-05, - "loss": 0.3207, - "step": 4173 - }, - { - "epoch": 0.39320788488259817, - "grad_norm": 0.842401921749115, - "learning_rate": 1.8439960293655227e-05, - "loss": 0.3411, - "step": 4174 - }, - { - "epoch": 0.393302088975766, - "grad_norm": 0.8401558995246887, - "learning_rate": 1.843915030010207e-05, - "loss": 0.349, - "step": 4175 - }, - { - "epoch": 0.39339629306893387, - "grad_norm": 0.8366354703903198, - "learning_rate": 1.843834011412323e-05, - "loss": 0.3827, - "step": 4176 - }, - { - "epoch": 0.3934904971621017, - "grad_norm": 0.7191557288169861, - "learning_rate": 1.8437529735737192e-05, - "loss": 0.3263, - "step": 4177 - }, - { - "epoch": 0.39358470125526956, - "grad_norm": 0.9180736541748047, - "learning_rate": 1.8436719164962426e-05, - "loss": 0.356, - "step": 4178 - }, - { - "epoch": 0.3936789053484374, - "grad_norm": 1.4437901973724365, - "learning_rate": 1.8435908401817413e-05, - "loss": 0.2938, - "step": 4179 - }, - { - "epoch": 0.39377310944160526, - "grad_norm": 0.8307088613510132, - "learning_rate": 1.843509744632064e-05, - "loss": 0.3617, - "step": 4180 - }, - { - "epoch": 0.3938673135347731, - "grad_norm": 0.9016256928443909, - "learning_rate": 1.8434286298490605e-05, - "loss": 0.3712, - "step": 4181 - }, - { - "epoch": 0.39396151762794096, - "grad_norm": 0.8308224081993103, - "learning_rate": 1.8433474958345797e-05, - "loss": 0.3318, - "step": 4182 - }, - { - "epoch": 0.3940557217211088, - "grad_norm": 0.7834711670875549, - "learning_rate": 1.8432663425904716e-05, - "loss": 0.3122, - "step": 4183 - }, - { - "epoch": 0.3941499258142766, - "grad_norm": 0.7277787327766418, - "learning_rate": 1.843185170118587e-05, - "loss": 0.3105, - "step": 4184 - }, - { - "epoch": 0.39424412990744445, - "grad_norm": 0.8183156251907349, - "learning_rate": 1.843103978420776e-05, - "loss": 0.3576, - "step": 4185 - }, - { - "epoch": 0.3943383340006123, - "grad_norm": 0.8041008114814758, - "learning_rate": 1.8430227674988913e-05, - "loss": 0.399, - "step": 4186 - }, - { - "epoch": 0.39443253809378015, - "grad_norm": 0.7788959741592407, - "learning_rate": 1.842941537354783e-05, - "loss": 0.3623, - "step": 4187 - }, - { - "epoch": 0.394526742186948, - "grad_norm": 0.8078553676605225, - "learning_rate": 1.842860287990304e-05, - "loss": 0.3368, - "step": 4188 - }, - { - "epoch": 0.39462094628011585, - "grad_norm": 0.8752886652946472, - "learning_rate": 1.8427790194073072e-05, - "loss": 0.3592, - "step": 4189 - }, - { - "epoch": 0.3947151503732837, - "grad_norm": 0.9009623527526855, - "learning_rate": 1.8426977316076455e-05, - "loss": 0.4217, - "step": 4190 - }, - { - "epoch": 0.39480935446645155, - "grad_norm": 0.8680552840232849, - "learning_rate": 1.842616424593172e-05, - "loss": 0.3975, - "step": 4191 - }, - { - "epoch": 0.3949035585596194, - "grad_norm": 0.881288468837738, - "learning_rate": 1.842535098365741e-05, - "loss": 0.4005, - "step": 4192 - }, - { - "epoch": 0.39499776265278724, - "grad_norm": 0.7390192747116089, - "learning_rate": 1.8424537529272068e-05, - "loss": 0.3649, - "step": 4193 - }, - { - "epoch": 0.3950919667459551, - "grad_norm": 0.7918478846549988, - "learning_rate": 1.842372388279424e-05, - "loss": 0.3361, - "step": 4194 - }, - { - "epoch": 0.39518617083912294, - "grad_norm": 0.7925350069999695, - "learning_rate": 1.8422910044242483e-05, - "loss": 0.3365, - "step": 4195 - }, - { - "epoch": 0.3952803749322908, - "grad_norm": 0.8270667195320129, - "learning_rate": 1.8422096013635347e-05, - "loss": 0.3418, - "step": 4196 - }, - { - "epoch": 0.39537457902545864, - "grad_norm": 0.8141732811927795, - "learning_rate": 1.8421281790991398e-05, - "loss": 0.3368, - "step": 4197 - }, - { - "epoch": 0.3954687831186265, - "grad_norm": 0.7719465494155884, - "learning_rate": 1.84204673763292e-05, - "loss": 0.393, - "step": 4198 - }, - { - "epoch": 0.39556298721179434, - "grad_norm": 0.7901314496994019, - "learning_rate": 1.8419652769667324e-05, - "loss": 0.3711, - "step": 4199 - }, - { - "epoch": 0.3956571913049622, - "grad_norm": 0.7895154356956482, - "learning_rate": 1.8418837971024346e-05, - "loss": 0.3691, - "step": 4200 - }, - { - "epoch": 0.39575139539813003, - "grad_norm": 0.7976158857345581, - "learning_rate": 1.8418022980418836e-05, - "loss": 0.3522, - "step": 4201 - }, - { - "epoch": 0.3958455994912979, - "grad_norm": 0.863312304019928, - "learning_rate": 1.841720779786939e-05, - "loss": 0.4068, - "step": 4202 - }, - { - "epoch": 0.39593980358446573, - "grad_norm": 0.7937854528427124, - "learning_rate": 1.8416392423394586e-05, - "loss": 0.3119, - "step": 4203 - }, - { - "epoch": 0.3960340076776336, - "grad_norm": 0.8557348251342773, - "learning_rate": 1.841557685701302e-05, - "loss": 0.3506, - "step": 4204 - }, - { - "epoch": 0.39612821177080143, - "grad_norm": 0.8589996099472046, - "learning_rate": 1.8414761098743285e-05, - "loss": 0.3221, - "step": 4205 - }, - { - "epoch": 0.3962224158639693, - "grad_norm": 0.8741294741630554, - "learning_rate": 1.8413945148603982e-05, - "loss": 0.3906, - "step": 4206 - }, - { - "epoch": 0.39631661995713713, - "grad_norm": 0.8319645524024963, - "learning_rate": 1.841312900661372e-05, - "loss": 0.3474, - "step": 4207 - }, - { - "epoch": 0.396410824050305, - "grad_norm": 0.8147428631782532, - "learning_rate": 1.84123126727911e-05, - "loss": 0.3565, - "step": 4208 - }, - { - "epoch": 0.3965050281434728, - "grad_norm": 0.7853788137435913, - "learning_rate": 1.841149614715475e-05, - "loss": 0.3682, - "step": 4209 - }, - { - "epoch": 0.3965992322366407, - "grad_norm": 0.7525576949119568, - "learning_rate": 1.8410679429723273e-05, - "loss": 0.3215, - "step": 4210 - }, - { - "epoch": 0.3966934363298085, - "grad_norm": 1.671125888824463, - "learning_rate": 1.84098625205153e-05, - "loss": 0.3343, - "step": 4211 - }, - { - "epoch": 0.3967876404229764, - "grad_norm": 0.7521957755088806, - "learning_rate": 1.8409045419549455e-05, - "loss": 0.3478, - "step": 4212 - }, - { - "epoch": 0.3968818445161442, - "grad_norm": 0.8269384503364563, - "learning_rate": 1.840822812684437e-05, - "loss": 0.3967, - "step": 4213 - }, - { - "epoch": 0.39697604860931207, - "grad_norm": 0.7471799850463867, - "learning_rate": 1.8407410642418678e-05, - "loss": 0.328, - "step": 4214 - }, - { - "epoch": 0.3970702527024799, - "grad_norm": 0.7251665592193604, - "learning_rate": 1.840659296629102e-05, - "loss": 0.3575, - "step": 4215 - }, - { - "epoch": 0.39716445679564777, - "grad_norm": 0.7672768831253052, - "learning_rate": 1.8405775098480046e-05, - "loss": 0.3423, - "step": 4216 - }, - { - "epoch": 0.3972586608888156, - "grad_norm": 0.7551266551017761, - "learning_rate": 1.8404957039004397e-05, - "loss": 0.3342, - "step": 4217 - }, - { - "epoch": 0.39735286498198347, - "grad_norm": 0.6800012588500977, - "learning_rate": 1.840413878788273e-05, - "loss": 0.2809, - "step": 4218 - }, - { - "epoch": 0.3974470690751513, - "grad_norm": 0.679454505443573, - "learning_rate": 1.8403320345133703e-05, - "loss": 0.2885, - "step": 4219 - }, - { - "epoch": 0.39754127316831916, - "grad_norm": 0.8225396275520325, - "learning_rate": 1.8402501710775973e-05, - "loss": 0.302, - "step": 4220 - }, - { - "epoch": 0.397635477261487, - "grad_norm": 0.8708465695381165, - "learning_rate": 1.8401682884828212e-05, - "loss": 0.3729, - "step": 4221 - }, - { - "epoch": 0.39772968135465486, - "grad_norm": 0.7675060629844666, - "learning_rate": 1.8400863867309084e-05, - "loss": 0.3545, - "step": 4222 - }, - { - "epoch": 0.3978238854478227, - "grad_norm": 0.7647223472595215, - "learning_rate": 1.840004465823727e-05, - "loss": 0.3975, - "step": 4223 - }, - { - "epoch": 0.39791808954099056, - "grad_norm": 0.8100164532661438, - "learning_rate": 1.8399225257631447e-05, - "loss": 0.3389, - "step": 4224 - }, - { - "epoch": 0.3980122936341584, - "grad_norm": 0.7076160311698914, - "learning_rate": 1.83984056655103e-05, - "loss": 0.3224, - "step": 4225 - }, - { - "epoch": 0.39810649772732626, - "grad_norm": 0.7941327095031738, - "learning_rate": 1.839758588189251e-05, - "loss": 0.3261, - "step": 4226 - }, - { - "epoch": 0.3982007018204941, - "grad_norm": 0.8033230304718018, - "learning_rate": 1.839676590679678e-05, - "loss": 0.3312, - "step": 4227 - }, - { - "epoch": 0.39829490591366196, - "grad_norm": 0.8265009522438049, - "learning_rate": 1.8395945740241802e-05, - "loss": 0.3568, - "step": 4228 - }, - { - "epoch": 0.3983891100068298, - "grad_norm": 0.8011536598205566, - "learning_rate": 1.839512538224627e-05, - "loss": 0.3686, - "step": 4229 - }, - { - "epoch": 0.39848331409999765, - "grad_norm": 0.8207288980484009, - "learning_rate": 1.8394304832828905e-05, - "loss": 0.3327, - "step": 4230 - }, - { - "epoch": 0.3985775181931655, - "grad_norm": 0.771256685256958, - "learning_rate": 1.8393484092008404e-05, - "loss": 0.3302, - "step": 4231 - }, - { - "epoch": 0.39867172228633335, - "grad_norm": 0.6601364016532898, - "learning_rate": 1.8392663159803482e-05, - "loss": 0.3005, - "step": 4232 - }, - { - "epoch": 0.3987659263795012, - "grad_norm": 0.8057689666748047, - "learning_rate": 1.839184203623286e-05, - "loss": 0.3253, - "step": 4233 - }, - { - "epoch": 0.39886013047266905, - "grad_norm": 0.7493592500686646, - "learning_rate": 1.8391020721315266e-05, - "loss": 0.3405, - "step": 4234 - }, - { - "epoch": 0.3989543345658369, - "grad_norm": 0.8171237707138062, - "learning_rate": 1.839019921506942e-05, - "loss": 0.3457, - "step": 4235 - }, - { - "epoch": 0.39904853865900475, - "grad_norm": 0.7632202506065369, - "learning_rate": 1.838937751751406e-05, - "loss": 0.35, - "step": 4236 - }, - { - "epoch": 0.3991427427521726, - "grad_norm": 0.8660060167312622, - "learning_rate": 1.8388555628667913e-05, - "loss": 0.364, - "step": 4237 - }, - { - "epoch": 0.39923694684534045, - "grad_norm": 0.7073898911476135, - "learning_rate": 1.8387733548549728e-05, - "loss": 0.3051, - "step": 4238 - }, - { - "epoch": 0.3993311509385083, - "grad_norm": 0.7002679705619812, - "learning_rate": 1.8386911277178242e-05, - "loss": 0.3133, - "step": 4239 - }, - { - "epoch": 0.39942535503167614, - "grad_norm": 0.777336061000824, - "learning_rate": 1.8386088814572213e-05, - "loss": 0.3331, - "step": 4240 - }, - { - "epoch": 0.399519559124844, - "grad_norm": 0.8492711782455444, - "learning_rate": 1.8385266160750386e-05, - "loss": 0.3352, - "step": 4241 - }, - { - "epoch": 0.39961376321801184, - "grad_norm": 0.7613757252693176, - "learning_rate": 1.8384443315731525e-05, - "loss": 0.3109, - "step": 4242 - }, - { - "epoch": 0.3997079673111797, - "grad_norm": 0.9914324879646301, - "learning_rate": 1.8383620279534387e-05, - "loss": 0.3336, - "step": 4243 - }, - { - "epoch": 0.39980217140434754, - "grad_norm": 0.8649870157241821, - "learning_rate": 1.8382797052177746e-05, - "loss": 0.3809, - "step": 4244 - }, - { - "epoch": 0.3998963754975154, - "grad_norm": 0.7795773148536682, - "learning_rate": 1.8381973633680365e-05, - "loss": 0.3073, - "step": 4245 - }, - { - "epoch": 0.39999057959068324, - "grad_norm": 0.7418776154518127, - "learning_rate": 1.838115002406102e-05, - "loss": 0.3172, - "step": 4246 - }, - { - "epoch": 0.4000847836838511, - "grad_norm": 0.7328126430511475, - "learning_rate": 1.8380326223338497e-05, - "loss": 0.3487, - "step": 4247 - }, - { - "epoch": 0.40017898777701894, - "grad_norm": 1.4264343976974487, - "learning_rate": 1.8379502231531572e-05, - "loss": 0.3381, - "step": 4248 - }, - { - "epoch": 0.4002731918701868, - "grad_norm": 0.7884756326675415, - "learning_rate": 1.8378678048659038e-05, - "loss": 0.3705, - "step": 4249 - }, - { - "epoch": 0.40036739596335463, - "grad_norm": 0.7564305663108826, - "learning_rate": 1.8377853674739687e-05, - "loss": 0.3195, - "step": 4250 - }, - { - "epoch": 0.4004616000565225, - "grad_norm": 0.8775674700737, - "learning_rate": 1.8377029109792315e-05, - "loss": 0.3315, - "step": 4251 - }, - { - "epoch": 0.40055580414969033, - "grad_norm": 0.8128756880760193, - "learning_rate": 1.8376204353835725e-05, - "loss": 0.3092, - "step": 4252 - }, - { - "epoch": 0.4006500082428582, - "grad_norm": 0.7862222790718079, - "learning_rate": 1.8375379406888722e-05, - "loss": 0.3446, - "step": 4253 - }, - { - "epoch": 0.400744212336026, - "grad_norm": 0.8245273232460022, - "learning_rate": 1.8374554268970114e-05, - "loss": 0.386, - "step": 4254 - }, - { - "epoch": 0.4008384164291938, - "grad_norm": 0.9503718018531799, - "learning_rate": 1.8373728940098717e-05, - "loss": 0.3433, - "step": 4255 - }, - { - "epoch": 0.40093262052236167, - "grad_norm": 0.9467640519142151, - "learning_rate": 1.8372903420293354e-05, - "loss": 0.4113, - "step": 4256 - }, - { - "epoch": 0.4010268246155295, - "grad_norm": 0.8559558391571045, - "learning_rate": 1.8372077709572843e-05, - "loss": 0.3089, - "step": 4257 - }, - { - "epoch": 0.40112102870869737, - "grad_norm": 0.7818161845207214, - "learning_rate": 1.8371251807956008e-05, - "loss": 0.3298, - "step": 4258 - }, - { - "epoch": 0.4012152328018652, - "grad_norm": 0.8104586005210876, - "learning_rate": 1.8370425715461688e-05, - "loss": 0.3446, - "step": 4259 - }, - { - "epoch": 0.40130943689503307, - "grad_norm": 0.8099079728126526, - "learning_rate": 1.8369599432108717e-05, - "loss": 0.3472, - "step": 4260 - }, - { - "epoch": 0.4014036409882009, - "grad_norm": 0.7487495541572571, - "learning_rate": 1.8368772957915936e-05, - "loss": 0.3429, - "step": 4261 - }, - { - "epoch": 0.40149784508136876, - "grad_norm": 1.418947458267212, - "learning_rate": 1.8367946292902187e-05, - "loss": 0.3338, - "step": 4262 - }, - { - "epoch": 0.4015920491745366, - "grad_norm": 0.8419050574302673, - "learning_rate": 1.8367119437086322e-05, - "loss": 0.3404, - "step": 4263 - }, - { - "epoch": 0.40168625326770446, - "grad_norm": 0.7401953339576721, - "learning_rate": 1.8366292390487195e-05, - "loss": 0.2862, - "step": 4264 - }, - { - "epoch": 0.4017804573608723, - "grad_norm": 0.8642869591712952, - "learning_rate": 1.836546515312366e-05, - "loss": 0.3216, - "step": 4265 - }, - { - "epoch": 0.40187466145404016, - "grad_norm": 0.7470360994338989, - "learning_rate": 1.8364637725014583e-05, - "loss": 0.3298, - "step": 4266 - }, - { - "epoch": 0.401968865547208, - "grad_norm": 0.8600407838821411, - "learning_rate": 1.8363810106178832e-05, - "loss": 0.3149, - "step": 4267 - }, - { - "epoch": 0.40206306964037586, - "grad_norm": 0.900352418422699, - "learning_rate": 1.8362982296635276e-05, - "loss": 0.3499, - "step": 4268 - }, - { - "epoch": 0.4021572737335437, - "grad_norm": 0.7902324199676514, - "learning_rate": 1.8362154296402787e-05, - "loss": 0.323, - "step": 4269 - }, - { - "epoch": 0.40225147782671156, - "grad_norm": 0.8110232353210449, - "learning_rate": 1.8361326105500252e-05, - "loss": 0.3486, - "step": 4270 - }, - { - "epoch": 0.4023456819198794, - "grad_norm": 0.7503052949905396, - "learning_rate": 1.8360497723946544e-05, - "loss": 0.3286, - "step": 4271 - }, - { - "epoch": 0.40243988601304725, - "grad_norm": 0.9421465396881104, - "learning_rate": 1.835966915176056e-05, - "loss": 0.3038, - "step": 4272 - }, - { - "epoch": 0.4025340901062151, - "grad_norm": 0.816394567489624, - "learning_rate": 1.8358840388961196e-05, - "loss": 0.3973, - "step": 4273 - }, - { - "epoch": 0.40262829419938295, - "grad_norm": 0.7936789989471436, - "learning_rate": 1.835801143556734e-05, - "loss": 0.3043, - "step": 4274 - }, - { - "epoch": 0.4027224982925508, - "grad_norm": 0.9342827796936035, - "learning_rate": 1.8357182291597897e-05, - "loss": 0.386, - "step": 4275 - }, - { - "epoch": 0.40281670238571865, - "grad_norm": 1.0587079524993896, - "learning_rate": 1.8356352957071775e-05, - "loss": 0.3793, - "step": 4276 - }, - { - "epoch": 0.4029109064788865, - "grad_norm": 1.2501111030578613, - "learning_rate": 1.835552343200788e-05, - "loss": 0.3369, - "step": 4277 - }, - { - "epoch": 0.40300511057205435, - "grad_norm": 0.8193202018737793, - "learning_rate": 1.8354693716425132e-05, - "loss": 0.3885, - "step": 4278 - }, - { - "epoch": 0.4030993146652222, - "grad_norm": 0.7024616003036499, - "learning_rate": 1.8353863810342444e-05, - "loss": 0.3362, - "step": 4279 - }, - { - "epoch": 0.40319351875839005, - "grad_norm": 0.9073119759559631, - "learning_rate": 1.835303371377874e-05, - "loss": 0.3905, - "step": 4280 - }, - { - "epoch": 0.4032877228515579, - "grad_norm": 0.7984116077423096, - "learning_rate": 1.835220342675295e-05, - "loss": 0.3215, - "step": 4281 - }, - { - "epoch": 0.40338192694472574, - "grad_norm": 0.8524281978607178, - "learning_rate": 1.835137294928401e-05, - "loss": 0.359, - "step": 4282 - }, - { - "epoch": 0.4034761310378936, - "grad_norm": 0.8253399729728699, - "learning_rate": 1.8350542281390845e-05, - "loss": 0.397, - "step": 4283 - }, - { - "epoch": 0.40357033513106144, - "grad_norm": 0.769839346408844, - "learning_rate": 1.8349711423092403e-05, - "loss": 0.3201, - "step": 4284 - }, - { - "epoch": 0.4036645392242293, - "grad_norm": 0.7591856718063354, - "learning_rate": 1.834888037440763e-05, - "loss": 0.3798, - "step": 4285 - }, - { - "epoch": 0.40375874331739714, - "grad_norm": 0.8179476857185364, - "learning_rate": 1.834804913535547e-05, - "loss": 0.3247, - "step": 4286 - }, - { - "epoch": 0.403852947410565, - "grad_norm": 0.7844939827919006, - "learning_rate": 1.834721770595488e-05, - "loss": 0.3277, - "step": 4287 - }, - { - "epoch": 0.40394715150373284, - "grad_norm": 0.7527651786804199, - "learning_rate": 1.834638608622482e-05, - "loss": 0.3579, - "step": 4288 - }, - { - "epoch": 0.4040413555969007, - "grad_norm": 0.8115012645721436, - "learning_rate": 1.8345554276184247e-05, - "loss": 0.3622, - "step": 4289 - }, - { - "epoch": 0.40413555969006854, - "grad_norm": 0.7577059864997864, - "learning_rate": 1.834472227585213e-05, - "loss": 0.3217, - "step": 4290 - }, - { - "epoch": 0.4042297637832364, - "grad_norm": 0.7256118655204773, - "learning_rate": 1.8343890085247437e-05, - "loss": 0.3539, - "step": 4291 - }, - { - "epoch": 0.40432396787640423, - "grad_norm": 0.8135298490524292, - "learning_rate": 1.834305770438915e-05, - "loss": 0.334, - "step": 4292 - }, - { - "epoch": 0.4044181719695721, - "grad_norm": 0.6899747252464294, - "learning_rate": 1.8342225133296244e-05, - "loss": 0.318, - "step": 4293 - }, - { - "epoch": 0.40451237606273993, - "grad_norm": 0.8328760266304016, - "learning_rate": 1.83413923719877e-05, - "loss": 0.3621, - "step": 4294 - }, - { - "epoch": 0.4046065801559078, - "grad_norm": 0.7262910604476929, - "learning_rate": 1.8340559420482513e-05, - "loss": 0.3362, - "step": 4295 - }, - { - "epoch": 0.40470078424907563, - "grad_norm": 0.8466453552246094, - "learning_rate": 1.833972627879967e-05, - "loss": 0.3469, - "step": 4296 - }, - { - "epoch": 0.4047949883422435, - "grad_norm": 0.9842650890350342, - "learning_rate": 1.8338892946958172e-05, - "loss": 0.3442, - "step": 4297 - }, - { - "epoch": 0.4048891924354113, - "grad_norm": 0.7387780547142029, - "learning_rate": 1.8338059424977017e-05, - "loss": 0.3354, - "step": 4298 - }, - { - "epoch": 0.4049833965285792, - "grad_norm": 0.7268993854522705, - "learning_rate": 1.8337225712875213e-05, - "loss": 0.3373, - "step": 4299 - }, - { - "epoch": 0.405077600621747, - "grad_norm": 0.7663114666938782, - "learning_rate": 1.8336391810671773e-05, - "loss": 0.3274, - "step": 4300 - }, - { - "epoch": 0.4051718047149149, - "grad_norm": 0.809139609336853, - "learning_rate": 1.8335557718385702e-05, - "loss": 0.3712, - "step": 4301 - }, - { - "epoch": 0.4052660088080827, - "grad_norm": 0.8831266760826111, - "learning_rate": 1.8334723436036022e-05, - "loss": 0.3433, - "step": 4302 - }, - { - "epoch": 0.40536021290125057, - "grad_norm": 0.7973636984825134, - "learning_rate": 1.8333888963641762e-05, - "loss": 0.3761, - "step": 4303 - }, - { - "epoch": 0.4054544169944184, - "grad_norm": 0.8654884099960327, - "learning_rate": 1.8333054301221945e-05, - "loss": 0.3722, - "step": 4304 - }, - { - "epoch": 0.40554862108758627, - "grad_norm": 0.6960375308990479, - "learning_rate": 1.8332219448795602e-05, - "loss": 0.3297, - "step": 4305 - }, - { - "epoch": 0.4056428251807541, - "grad_norm": 0.9065430164337158, - "learning_rate": 1.8331384406381768e-05, - "loss": 0.355, - "step": 4306 - }, - { - "epoch": 0.40573702927392197, - "grad_norm": 0.8746337890625, - "learning_rate": 1.8330549173999484e-05, - "loss": 0.3384, - "step": 4307 - }, - { - "epoch": 0.4058312333670898, - "grad_norm": 0.8848806023597717, - "learning_rate": 1.83297137516678e-05, - "loss": 0.3688, - "step": 4308 - }, - { - "epoch": 0.40592543746025767, - "grad_norm": 0.7663493752479553, - "learning_rate": 1.8328878139405756e-05, - "loss": 0.2883, - "step": 4309 - }, - { - "epoch": 0.4060196415534255, - "grad_norm": 0.8225603699684143, - "learning_rate": 1.8328042337232412e-05, - "loss": 0.3586, - "step": 4310 - }, - { - "epoch": 0.40611384564659336, - "grad_norm": 0.7770514488220215, - "learning_rate": 1.832720634516682e-05, - "loss": 0.3499, - "step": 4311 - }, - { - "epoch": 0.4062080497397612, - "grad_norm": 0.7962725162506104, - "learning_rate": 1.832637016322805e-05, - "loss": 0.3759, - "step": 4312 - }, - { - "epoch": 0.40630225383292906, - "grad_norm": 0.8327206373214722, - "learning_rate": 1.832553379143516e-05, - "loss": 0.3704, - "step": 4313 - }, - { - "epoch": 0.4063964579260969, - "grad_norm": 0.7537351250648499, - "learning_rate": 1.832469722980722e-05, - "loss": 0.3408, - "step": 4314 - }, - { - "epoch": 0.40649066201926476, - "grad_norm": 0.6225210428237915, - "learning_rate": 1.8323860478363317e-05, - "loss": 0.2987, - "step": 4315 - }, - { - "epoch": 0.4065848661124326, - "grad_norm": 0.8770931363105774, - "learning_rate": 1.8323023537122518e-05, - "loss": 0.3177, - "step": 4316 - }, - { - "epoch": 0.40667907020560046, - "grad_norm": 0.8675561547279358, - "learning_rate": 1.8322186406103913e-05, - "loss": 0.3088, - "step": 4317 - }, - { - "epoch": 0.4067732742987683, - "grad_norm": 0.7490965723991394, - "learning_rate": 1.8321349085326583e-05, - "loss": 0.3347, - "step": 4318 - }, - { - "epoch": 0.40686747839193615, - "grad_norm": 0.8933178782463074, - "learning_rate": 1.8320511574809625e-05, - "loss": 0.3216, - "step": 4319 - }, - { - "epoch": 0.406961682485104, - "grad_norm": 0.9704069495201111, - "learning_rate": 1.831967387457214e-05, - "loss": 0.407, - "step": 4320 - }, - { - "epoch": 0.40705588657827185, - "grad_norm": 0.7570332884788513, - "learning_rate": 1.831883598463322e-05, - "loss": 0.3584, - "step": 4321 - }, - { - "epoch": 0.4071500906714397, - "grad_norm": 0.7434055805206299, - "learning_rate": 1.8317997905011975e-05, - "loss": 0.3329, - "step": 4322 - }, - { - "epoch": 0.4072442947646075, - "grad_norm": 0.8250865340232849, - "learning_rate": 1.8317159635727517e-05, - "loss": 0.3339, - "step": 4323 - }, - { - "epoch": 0.40733849885777534, - "grad_norm": 0.9020285606384277, - "learning_rate": 1.831632117679895e-05, - "loss": 0.3378, - "step": 4324 - }, - { - "epoch": 0.4074327029509432, - "grad_norm": 0.8903120160102844, - "learning_rate": 1.8315482528245404e-05, - "loss": 0.4144, - "step": 4325 - }, - { - "epoch": 0.40752690704411104, - "grad_norm": 0.8243562579154968, - "learning_rate": 1.8314643690085992e-05, - "loss": 0.3239, - "step": 4326 - }, - { - "epoch": 0.4076211111372789, - "grad_norm": 0.7726811170578003, - "learning_rate": 1.8313804662339847e-05, - "loss": 0.3523, - "step": 4327 - }, - { - "epoch": 0.40771531523044674, - "grad_norm": 0.7769786715507507, - "learning_rate": 1.83129654450261e-05, - "loss": 0.3454, - "step": 4328 - }, - { - "epoch": 0.4078095193236146, - "grad_norm": 0.8535809516906738, - "learning_rate": 1.8312126038163884e-05, - "loss": 0.4068, - "step": 4329 - }, - { - "epoch": 0.40790372341678244, - "grad_norm": 0.6568573713302612, - "learning_rate": 1.831128644177234e-05, - "loss": 0.2698, - "step": 4330 - }, - { - "epoch": 0.4079979275099503, - "grad_norm": 0.7760324478149414, - "learning_rate": 1.8310446655870607e-05, - "loss": 0.3258, - "step": 4331 - }, - { - "epoch": 0.40809213160311814, - "grad_norm": 0.8847746253013611, - "learning_rate": 1.830960668047784e-05, - "loss": 0.384, - "step": 4332 - }, - { - "epoch": 0.408186335696286, - "grad_norm": 0.7380432486534119, - "learning_rate": 1.830876651561319e-05, - "loss": 0.352, - "step": 4333 - }, - { - "epoch": 0.40828053978945383, - "grad_norm": 0.8275204300880432, - "learning_rate": 1.8307926161295818e-05, - "loss": 0.3694, - "step": 4334 - }, - { - "epoch": 0.4083747438826217, - "grad_norm": 0.8669615387916565, - "learning_rate": 1.8307085617544876e-05, - "loss": 0.3764, - "step": 4335 - }, - { - "epoch": 0.40846894797578953, - "grad_norm": 1.0183978080749512, - "learning_rate": 1.8306244884379536e-05, - "loss": 0.3497, - "step": 4336 - }, - { - "epoch": 0.4085631520689574, - "grad_norm": 0.7218812704086304, - "learning_rate": 1.8305403961818967e-05, - "loss": 0.3439, - "step": 4337 - }, - { - "epoch": 0.40865735616212523, - "grad_norm": 0.6889415383338928, - "learning_rate": 1.8304562849882343e-05, - "loss": 0.3266, - "step": 4338 - }, - { - "epoch": 0.4087515602552931, - "grad_norm": 0.7743616104125977, - "learning_rate": 1.8303721548588843e-05, - "loss": 0.327, - "step": 4339 - }, - { - "epoch": 0.4088457643484609, - "grad_norm": 0.7835550904273987, - "learning_rate": 1.8302880057957653e-05, - "loss": 0.3708, - "step": 4340 - }, - { - "epoch": 0.4089399684416288, - "grad_norm": 0.8127569556236267, - "learning_rate": 1.830203837800795e-05, - "loss": 0.3436, - "step": 4341 - }, - { - "epoch": 0.4090341725347966, - "grad_norm": 0.7509648203849792, - "learning_rate": 1.8301196508758935e-05, - "loss": 0.3353, - "step": 4342 - }, - { - "epoch": 0.4091283766279645, - "grad_norm": 0.7613946199417114, - "learning_rate": 1.8300354450229805e-05, - "loss": 0.3009, - "step": 4343 - }, - { - "epoch": 0.4092225807211323, - "grad_norm": 0.9445093870162964, - "learning_rate": 1.8299512202439756e-05, - "loss": 0.4089, - "step": 4344 - }, - { - "epoch": 0.40931678481430017, - "grad_norm": 0.7827582359313965, - "learning_rate": 1.829866976540799e-05, - "loss": 0.3774, - "step": 4345 - }, - { - "epoch": 0.409410988907468, - "grad_norm": 0.7520980834960938, - "learning_rate": 1.829782713915372e-05, - "loss": 0.3072, - "step": 4346 - }, - { - "epoch": 0.40950519300063587, - "grad_norm": 0.8516082763671875, - "learning_rate": 1.8296984323696162e-05, - "loss": 0.3864, - "step": 4347 - }, - { - "epoch": 0.4095993970938037, - "grad_norm": 0.7789568901062012, - "learning_rate": 1.8296141319054527e-05, - "loss": 0.3514, - "step": 4348 - }, - { - "epoch": 0.40969360118697157, - "grad_norm": 0.838067352771759, - "learning_rate": 1.829529812524804e-05, - "loss": 0.3672, - "step": 4349 - }, - { - "epoch": 0.4097878052801394, - "grad_norm": 0.9141841530799866, - "learning_rate": 1.8294454742295927e-05, - "loss": 0.3696, - "step": 4350 - }, - { - "epoch": 0.40988200937330727, - "grad_norm": 0.7358622550964355, - "learning_rate": 1.8293611170217417e-05, - "loss": 0.3099, - "step": 4351 - }, - { - "epoch": 0.4099762134664751, - "grad_norm": 0.734024703502655, - "learning_rate": 1.8292767409031748e-05, - "loss": 0.3305, - "step": 4352 - }, - { - "epoch": 0.41007041755964296, - "grad_norm": 0.7921773791313171, - "learning_rate": 1.8291923458758157e-05, - "loss": 0.2937, - "step": 4353 - }, - { - "epoch": 0.4101646216528108, - "grad_norm": 0.7627155780792236, - "learning_rate": 1.8291079319415888e-05, - "loss": 0.3908, - "step": 4354 - }, - { - "epoch": 0.41025882574597866, - "grad_norm": 0.6983011960983276, - "learning_rate": 1.8290234991024184e-05, - "loss": 0.3196, - "step": 4355 - }, - { - "epoch": 0.4103530298391465, - "grad_norm": 0.8267301321029663, - "learning_rate": 1.8289390473602305e-05, - "loss": 0.3707, - "step": 4356 - }, - { - "epoch": 0.41044723393231436, - "grad_norm": 0.7846118807792664, - "learning_rate": 1.8288545767169503e-05, - "loss": 0.3199, - "step": 4357 - }, - { - "epoch": 0.4105414380254822, - "grad_norm": 0.749038815498352, - "learning_rate": 1.8287700871745036e-05, - "loss": 0.3106, - "step": 4358 - }, - { - "epoch": 0.41063564211865006, - "grad_norm": 0.7653847336769104, - "learning_rate": 1.8286855787348176e-05, - "loss": 0.357, - "step": 4359 - }, - { - "epoch": 0.4107298462118179, - "grad_norm": 0.860435962677002, - "learning_rate": 1.8286010513998188e-05, - "loss": 0.3663, - "step": 4360 - }, - { - "epoch": 0.41082405030498576, - "grad_norm": 0.728864848613739, - "learning_rate": 1.8285165051714343e-05, - "loss": 0.3327, - "step": 4361 - }, - { - "epoch": 0.4109182543981536, - "grad_norm": 0.9315623641014099, - "learning_rate": 1.8284319400515923e-05, - "loss": 0.3747, - "step": 4362 - }, - { - "epoch": 0.41101245849132145, - "grad_norm": 0.8237623572349548, - "learning_rate": 1.828347356042221e-05, - "loss": 0.3875, - "step": 4363 - }, - { - "epoch": 0.4111066625844893, - "grad_norm": 0.8893877267837524, - "learning_rate": 1.8282627531452487e-05, - "loss": 0.3513, - "step": 4364 - }, - { - "epoch": 0.41120086667765715, - "grad_norm": 0.7763910889625549, - "learning_rate": 1.8281781313626047e-05, - "loss": 0.3341, - "step": 4365 - }, - { - "epoch": 0.411295070770825, - "grad_norm": 0.7412164807319641, - "learning_rate": 1.8280934906962184e-05, - "loss": 0.3232, - "step": 4366 - }, - { - "epoch": 0.41138927486399285, - "grad_norm": 0.7896450161933899, - "learning_rate": 1.8280088311480203e-05, - "loss": 0.368, - "step": 4367 - }, - { - "epoch": 0.4114834789571607, - "grad_norm": 0.7353463768959045, - "learning_rate": 1.82792415271994e-05, - "loss": 0.3218, - "step": 4368 - }, - { - "epoch": 0.41157768305032855, - "grad_norm": 0.8171700239181519, - "learning_rate": 1.8278394554139086e-05, - "loss": 0.3807, - "step": 4369 - }, - { - "epoch": 0.4116718871434964, - "grad_norm": 0.7039395570755005, - "learning_rate": 1.8277547392318574e-05, - "loss": 0.2872, - "step": 4370 - }, - { - "epoch": 0.41176609123666424, - "grad_norm": 0.8705812096595764, - "learning_rate": 1.8276700041757177e-05, - "loss": 0.3588, - "step": 4371 - }, - { - "epoch": 0.4118602953298321, - "grad_norm": 0.8330321907997131, - "learning_rate": 1.8275852502474223e-05, - "loss": 0.3837, - "step": 4372 - }, - { - "epoch": 0.41195449942299994, - "grad_norm": 0.7887098789215088, - "learning_rate": 1.8275004774489032e-05, - "loss": 0.37, - "step": 4373 - }, - { - "epoch": 0.4120487035161678, - "grad_norm": 0.8045909404754639, - "learning_rate": 1.8274156857820933e-05, - "loss": 0.3297, - "step": 4374 - }, - { - "epoch": 0.41214290760933564, - "grad_norm": 0.724745512008667, - "learning_rate": 1.8273308752489263e-05, - "loss": 0.3494, - "step": 4375 - }, - { - "epoch": 0.4122371117025035, - "grad_norm": 0.7332324385643005, - "learning_rate": 1.827246045851336e-05, - "loss": 0.3508, - "step": 4376 - }, - { - "epoch": 0.41233131579567134, - "grad_norm": 0.7815578579902649, - "learning_rate": 1.827161197591256e-05, - "loss": 0.338, - "step": 4377 - }, - { - "epoch": 0.4124255198888392, - "grad_norm": 0.8719455599784851, - "learning_rate": 1.827076330470622e-05, - "loss": 0.3601, - "step": 4378 - }, - { - "epoch": 0.41251972398200704, - "grad_norm": 0.8573183417320251, - "learning_rate": 1.8269914444913682e-05, - "loss": 0.3847, - "step": 4379 - }, - { - "epoch": 0.4126139280751749, - "grad_norm": 0.7446406483650208, - "learning_rate": 1.826906539655431e-05, - "loss": 0.344, - "step": 4380 - }, - { - "epoch": 0.41270813216834273, - "grad_norm": 0.9083143472671509, - "learning_rate": 1.8268216159647455e-05, - "loss": 0.3434, - "step": 4381 - }, - { - "epoch": 0.4128023362615106, - "grad_norm": 0.7628920078277588, - "learning_rate": 1.8267366734212483e-05, - "loss": 0.354, - "step": 4382 - }, - { - "epoch": 0.41289654035467843, - "grad_norm": 0.748205304145813, - "learning_rate": 1.8266517120268763e-05, - "loss": 0.3186, - "step": 4383 - }, - { - "epoch": 0.4129907444478463, - "grad_norm": 0.7351827621459961, - "learning_rate": 1.8265667317835673e-05, - "loss": 0.3536, - "step": 4384 - }, - { - "epoch": 0.41308494854101413, - "grad_norm": 0.8449647426605225, - "learning_rate": 1.826481732693258e-05, - "loss": 0.2795, - "step": 4385 - }, - { - "epoch": 0.413179152634182, - "grad_norm": 0.8797194361686707, - "learning_rate": 1.8263967147578875e-05, - "loss": 0.3641, - "step": 4386 - }, - { - "epoch": 0.4132733567273498, - "grad_norm": 0.7823154926300049, - "learning_rate": 1.8263116779793936e-05, - "loss": 0.3688, - "step": 4387 - }, - { - "epoch": 0.4133675608205177, - "grad_norm": 0.7594738602638245, - "learning_rate": 1.8262266223597155e-05, - "loss": 0.3202, - "step": 4388 - }, - { - "epoch": 0.4134617649136855, - "grad_norm": 0.7818530201911926, - "learning_rate": 1.826141547900793e-05, - "loss": 0.3268, - "step": 4389 - }, - { - "epoch": 0.4135559690068534, - "grad_norm": 0.7931559085845947, - "learning_rate": 1.826056454604565e-05, - "loss": 0.3095, - "step": 4390 - }, - { - "epoch": 0.4136501731000212, - "grad_norm": 0.7975097298622131, - "learning_rate": 1.8259713424729723e-05, - "loss": 0.3448, - "step": 4391 - }, - { - "epoch": 0.413744377193189, - "grad_norm": 0.7636379599571228, - "learning_rate": 1.825886211507956e-05, - "loss": 0.3324, - "step": 4392 - }, - { - "epoch": 0.41383858128635687, - "grad_norm": 0.7058627605438232, - "learning_rate": 1.825801061711457e-05, - "loss": 0.3167, - "step": 4393 - }, - { - "epoch": 0.4139327853795247, - "grad_norm": 0.8211748003959656, - "learning_rate": 1.8257158930854163e-05, - "loss": 0.39, - "step": 4394 - }, - { - "epoch": 0.41402698947269256, - "grad_norm": 0.7415569424629211, - "learning_rate": 1.8256307056317763e-05, - "loss": 0.3314, - "step": 4395 - }, - { - "epoch": 0.4141211935658604, - "grad_norm": 1.0090950727462769, - "learning_rate": 1.825545499352479e-05, - "loss": 0.3832, - "step": 4396 - }, - { - "epoch": 0.41421539765902826, - "grad_norm": 0.7703089118003845, - "learning_rate": 1.8254602742494677e-05, - "loss": 0.3447, - "step": 4397 - }, - { - "epoch": 0.4143096017521961, - "grad_norm": 0.7498367428779602, - "learning_rate": 1.825375030324686e-05, - "loss": 0.3216, - "step": 4398 - }, - { - "epoch": 0.41440380584536396, - "grad_norm": 0.7507278919219971, - "learning_rate": 1.825289767580077e-05, - "loss": 0.3267, - "step": 4399 - }, - { - "epoch": 0.4144980099385318, - "grad_norm": 0.7983956336975098, - "learning_rate": 1.8252044860175847e-05, - "loss": 0.3676, - "step": 4400 - }, - { - "epoch": 0.41459221403169966, - "grad_norm": 0.9007934927940369, - "learning_rate": 1.825119185639154e-05, - "loss": 0.3902, - "step": 4401 - }, - { - "epoch": 0.4146864181248675, - "grad_norm": 0.8932568430900574, - "learning_rate": 1.82503386644673e-05, - "loss": 0.3512, - "step": 4402 - }, - { - "epoch": 0.41478062221803536, - "grad_norm": 0.8058125376701355, - "learning_rate": 1.824948528442258e-05, - "loss": 0.3646, - "step": 4403 - }, - { - "epoch": 0.4148748263112032, - "grad_norm": 0.8728594779968262, - "learning_rate": 1.8248631716276835e-05, - "loss": 0.3521, - "step": 4404 - }, - { - "epoch": 0.41496903040437105, - "grad_norm": 0.7817156910896301, - "learning_rate": 1.824777796004953e-05, - "loss": 0.3019, - "step": 4405 - }, - { - "epoch": 0.4150632344975389, - "grad_norm": 0.6912437081336975, - "learning_rate": 1.824692401576013e-05, - "loss": 0.3391, - "step": 4406 - }, - { - "epoch": 0.41515743859070675, - "grad_norm": 0.7800760865211487, - "learning_rate": 1.8246069883428113e-05, - "loss": 0.2977, - "step": 4407 - }, - { - "epoch": 0.4152516426838746, - "grad_norm": 0.8556133508682251, - "learning_rate": 1.8245215563072948e-05, - "loss": 0.3919, - "step": 4408 - }, - { - "epoch": 0.41534584677704245, - "grad_norm": 0.7732865810394287, - "learning_rate": 1.8244361054714118e-05, - "loss": 0.3286, - "step": 4409 - }, - { - "epoch": 0.4154400508702103, - "grad_norm": 0.7321618795394897, - "learning_rate": 1.82435063583711e-05, - "loss": 0.2987, - "step": 4410 - }, - { - "epoch": 0.41553425496337815, - "grad_norm": 0.7288196086883545, - "learning_rate": 1.8242651474063392e-05, - "loss": 0.3123, - "step": 4411 - }, - { - "epoch": 0.415628459056546, - "grad_norm": 0.7751367092132568, - "learning_rate": 1.8241796401810486e-05, - "loss": 0.3809, - "step": 4412 - }, - { - "epoch": 0.41572266314971384, - "grad_norm": 0.7491422891616821, - "learning_rate": 1.8240941141631873e-05, - "loss": 0.3318, - "step": 4413 - }, - { - "epoch": 0.4158168672428817, - "grad_norm": 1.0179657936096191, - "learning_rate": 1.8240085693547058e-05, - "loss": 0.3986, - "step": 4414 - }, - { - "epoch": 0.41591107133604954, - "grad_norm": 0.7423537969589233, - "learning_rate": 1.8239230057575542e-05, - "loss": 0.332, - "step": 4415 - }, - { - "epoch": 0.4160052754292174, - "grad_norm": 0.7375214695930481, - "learning_rate": 1.8238374233736845e-05, - "loss": 0.3516, - "step": 4416 - }, - { - "epoch": 0.41609947952238524, - "grad_norm": 0.7761600017547607, - "learning_rate": 1.823751822205047e-05, - "loss": 0.3683, - "step": 4417 - }, - { - "epoch": 0.4161936836155531, - "grad_norm": 0.8303682208061218, - "learning_rate": 1.823666202253594e-05, - "loss": 0.4009, - "step": 4418 - }, - { - "epoch": 0.41628788770872094, - "grad_norm": 0.8851097226142883, - "learning_rate": 1.8235805635212778e-05, - "loss": 0.3395, - "step": 4419 - }, - { - "epoch": 0.4163820918018888, - "grad_norm": 0.833597719669342, - "learning_rate": 1.8234949060100513e-05, - "loss": 0.3487, - "step": 4420 - }, - { - "epoch": 0.41647629589505664, - "grad_norm": 0.9360535740852356, - "learning_rate": 1.823409229721867e-05, - "loss": 0.335, - "step": 4421 - }, - { - "epoch": 0.4165704999882245, - "grad_norm": 0.8861821293830872, - "learning_rate": 1.823323534658679e-05, - "loss": 0.3483, - "step": 4422 - }, - { - "epoch": 0.41666470408139233, - "grad_norm": 0.9428109526634216, - "learning_rate": 1.8232378208224414e-05, - "loss": 0.3561, - "step": 4423 - }, - { - "epoch": 0.4167589081745602, - "grad_norm": 0.8216004967689514, - "learning_rate": 1.823152088215108e-05, - "loss": 0.3437, - "step": 4424 - }, - { - "epoch": 0.41685311226772803, - "grad_norm": 0.7535174489021301, - "learning_rate": 1.823066336838634e-05, - "loss": 0.334, - "step": 4425 - }, - { - "epoch": 0.4169473163608959, - "grad_norm": 0.7837510108947754, - "learning_rate": 1.8229805666949745e-05, - "loss": 0.3033, - "step": 4426 - }, - { - "epoch": 0.41704152045406373, - "grad_norm": 0.7305752038955688, - "learning_rate": 1.8228947777860858e-05, - "loss": 0.3192, - "step": 4427 - }, - { - "epoch": 0.4171357245472316, - "grad_norm": 0.7885417342185974, - "learning_rate": 1.822808970113923e-05, - "loss": 0.3216, - "step": 4428 - }, - { - "epoch": 0.41722992864039943, - "grad_norm": 0.7974655628204346, - "learning_rate": 1.8227231436804434e-05, - "loss": 0.3781, - "step": 4429 - }, - { - "epoch": 0.4173241327335673, - "grad_norm": 0.8113394975662231, - "learning_rate": 1.822637298487604e-05, - "loss": 0.3168, - "step": 4430 - }, - { - "epoch": 0.4174183368267351, - "grad_norm": 0.8304500579833984, - "learning_rate": 1.8225514345373617e-05, - "loss": 0.3733, - "step": 4431 - }, - { - "epoch": 0.417512540919903, - "grad_norm": 0.7840065956115723, - "learning_rate": 1.8224655518316745e-05, - "loss": 0.3255, - "step": 4432 - }, - { - "epoch": 0.4176067450130708, - "grad_norm": 0.6819175481796265, - "learning_rate": 1.8223796503725007e-05, - "loss": 0.3034, - "step": 4433 - }, - { - "epoch": 0.4177009491062387, - "grad_norm": 0.7774067521095276, - "learning_rate": 1.8222937301617993e-05, - "loss": 0.2968, - "step": 4434 - }, - { - "epoch": 0.4177951531994065, - "grad_norm": 1.0673409700393677, - "learning_rate": 1.822207791201529e-05, - "loss": 0.3672, - "step": 4435 - }, - { - "epoch": 0.41788935729257437, - "grad_norm": 0.7810318470001221, - "learning_rate": 1.8221218334936496e-05, - "loss": 0.3414, - "step": 4436 - }, - { - "epoch": 0.4179835613857422, - "grad_norm": 0.8518936038017273, - "learning_rate": 1.822035857040121e-05, - "loss": 0.3521, - "step": 4437 - }, - { - "epoch": 0.41807776547891007, - "grad_norm": 0.8019551038742065, - "learning_rate": 1.8219498618429033e-05, - "loss": 0.3763, - "step": 4438 - }, - { - "epoch": 0.4181719695720779, - "grad_norm": 0.7329900860786438, - "learning_rate": 1.8218638479039577e-05, - "loss": 0.3561, - "step": 4439 - }, - { - "epoch": 0.41826617366524577, - "grad_norm": 0.7428778409957886, - "learning_rate": 1.821777815225245e-05, - "loss": 0.3027, - "step": 4440 - }, - { - "epoch": 0.4183603777584136, - "grad_norm": 0.8585033416748047, - "learning_rate": 1.8216917638087278e-05, - "loss": 0.3644, - "step": 4441 - }, - { - "epoch": 0.41845458185158146, - "grad_norm": 0.8240845799446106, - "learning_rate": 1.8216056936563675e-05, - "loss": 0.3943, - "step": 4442 - }, - { - "epoch": 0.4185487859447493, - "grad_norm": 0.7246481776237488, - "learning_rate": 1.8215196047701264e-05, - "loss": 0.3578, - "step": 4443 - }, - { - "epoch": 0.41864299003791716, - "grad_norm": 0.6832184195518494, - "learning_rate": 1.821433497151968e-05, - "loss": 0.3556, - "step": 4444 - }, - { - "epoch": 0.418737194131085, - "grad_norm": 0.8402509093284607, - "learning_rate": 1.8213473708038558e-05, - "loss": 0.3425, - "step": 4445 - }, - { - "epoch": 0.41883139822425286, - "grad_norm": 0.8606646656990051, - "learning_rate": 1.821261225727753e-05, - "loss": 0.4296, - "step": 4446 - }, - { - "epoch": 0.4189256023174207, - "grad_norm": 0.78255695104599, - "learning_rate": 1.821175061925624e-05, - "loss": 0.3629, - "step": 4447 - }, - { - "epoch": 0.41901980641058856, - "grad_norm": 0.8021367192268372, - "learning_rate": 1.821088879399434e-05, - "loss": 0.3207, - "step": 4448 - }, - { - "epoch": 0.4191140105037564, - "grad_norm": 0.8378644585609436, - "learning_rate": 1.8210026781511474e-05, - "loss": 0.3379, - "step": 4449 - }, - { - "epoch": 0.41920821459692426, - "grad_norm": 0.7594597935676575, - "learning_rate": 1.8209164581827304e-05, - "loss": 0.3409, - "step": 4450 - }, - { - "epoch": 0.4193024186900921, - "grad_norm": 0.7872465252876282, - "learning_rate": 1.8208302194961484e-05, - "loss": 0.3833, - "step": 4451 - }, - { - "epoch": 0.41939662278325995, - "grad_norm": 0.6902915239334106, - "learning_rate": 1.8207439620933675e-05, - "loss": 0.2875, - "step": 4452 - }, - { - "epoch": 0.4194908268764278, - "grad_norm": 0.7065645456314087, - "learning_rate": 1.8206576859763555e-05, - "loss": 0.3433, - "step": 4453 - }, - { - "epoch": 0.41958503096959565, - "grad_norm": 0.7474706768989563, - "learning_rate": 1.820571391147079e-05, - "loss": 0.3279, - "step": 4454 - }, - { - "epoch": 0.4196792350627635, - "grad_norm": 0.7695397734642029, - "learning_rate": 1.8204850776075055e-05, - "loss": 0.4055, - "step": 4455 - }, - { - "epoch": 0.41977343915593135, - "grad_norm": 0.782455563545227, - "learning_rate": 1.8203987453596035e-05, - "loss": 0.3039, - "step": 4456 - }, - { - "epoch": 0.4198676432490992, - "grad_norm": 0.8355002403259277, - "learning_rate": 1.8203123944053414e-05, - "loss": 0.364, - "step": 4457 - }, - { - "epoch": 0.41996184734226705, - "grad_norm": 0.670702338218689, - "learning_rate": 1.8202260247466883e-05, - "loss": 0.312, - "step": 4458 - }, - { - "epoch": 0.4200560514354349, - "grad_norm": 0.7533585429191589, - "learning_rate": 1.820139636385613e-05, - "loss": 0.3209, - "step": 4459 - }, - { - "epoch": 0.42015025552860275, - "grad_norm": 0.9109524488449097, - "learning_rate": 1.8200532293240855e-05, - "loss": 0.3519, - "step": 4460 - }, - { - "epoch": 0.42024445962177054, - "grad_norm": 1.1653105020523071, - "learning_rate": 1.819966803564076e-05, - "loss": 0.3252, - "step": 4461 - }, - { - "epoch": 0.4203386637149384, - "grad_norm": 0.7316860556602478, - "learning_rate": 1.8198803591075556e-05, - "loss": 0.3401, - "step": 4462 - }, - { - "epoch": 0.42043286780810624, - "grad_norm": 0.8423137664794922, - "learning_rate": 1.8197938959564952e-05, - "loss": 0.3575, - "step": 4463 - }, - { - "epoch": 0.4205270719012741, - "grad_norm": 0.7673403024673462, - "learning_rate": 1.819707414112866e-05, - "loss": 0.3411, - "step": 4464 - }, - { - "epoch": 0.42062127599444193, - "grad_norm": 0.8541059494018555, - "learning_rate": 1.81962091357864e-05, - "loss": 0.3374, - "step": 4465 - }, - { - "epoch": 0.4207154800876098, - "grad_norm": 0.8002111315727234, - "learning_rate": 1.8195343943557894e-05, - "loss": 0.3106, - "step": 4466 - }, - { - "epoch": 0.42080968418077763, - "grad_norm": 0.7413492202758789, - "learning_rate": 1.8194478564462878e-05, - "loss": 0.3338, - "step": 4467 - }, - { - "epoch": 0.4209038882739455, - "grad_norm": 0.7816640138626099, - "learning_rate": 1.819361299852107e-05, - "loss": 0.3172, - "step": 4468 - }, - { - "epoch": 0.42099809236711333, - "grad_norm": 0.8512669205665588, - "learning_rate": 1.8192747245752218e-05, - "loss": 0.3707, - "step": 4469 - }, - { - "epoch": 0.4210922964602812, - "grad_norm": 0.7964914441108704, - "learning_rate": 1.819188130617606e-05, - "loss": 0.328, - "step": 4470 - }, - { - "epoch": 0.42118650055344903, - "grad_norm": 0.816870927810669, - "learning_rate": 1.819101517981234e-05, - "loss": 0.3275, - "step": 4471 - }, - { - "epoch": 0.4212807046466169, - "grad_norm": 0.7393790483474731, - "learning_rate": 1.81901488666808e-05, - "loss": 0.3253, - "step": 4472 - }, - { - "epoch": 0.4213749087397847, - "grad_norm": 0.6618467569351196, - "learning_rate": 1.8189282366801204e-05, - "loss": 0.2977, - "step": 4473 - }, - { - "epoch": 0.4214691128329526, - "grad_norm": 0.9202789068222046, - "learning_rate": 1.8188415680193303e-05, - "loss": 0.3779, - "step": 4474 - }, - { - "epoch": 0.4215633169261204, - "grad_norm": 0.8780861496925354, - "learning_rate": 1.8187548806876863e-05, - "loss": 0.3342, - "step": 4475 - }, - { - "epoch": 0.4216575210192883, - "grad_norm": 0.7859781384468079, - "learning_rate": 1.8186681746871645e-05, - "loss": 0.3419, - "step": 4476 - }, - { - "epoch": 0.4217517251124561, - "grad_norm": 0.9070796370506287, - "learning_rate": 1.8185814500197424e-05, - "loss": 0.3564, - "step": 4477 - }, - { - "epoch": 0.42184592920562397, - "grad_norm": 1.0571869611740112, - "learning_rate": 1.8184947066873974e-05, - "loss": 0.3621, - "step": 4478 - }, - { - "epoch": 0.4219401332987918, - "grad_norm": 0.7637447118759155, - "learning_rate": 1.818407944692107e-05, - "loss": 0.367, - "step": 4479 - }, - { - "epoch": 0.42203433739195967, - "grad_norm": 0.6667627096176147, - "learning_rate": 1.81832116403585e-05, - "loss": 0.3101, - "step": 4480 - }, - { - "epoch": 0.4221285414851275, - "grad_norm": 0.6804084777832031, - "learning_rate": 1.818234364720605e-05, - "loss": 0.2992, - "step": 4481 - }, - { - "epoch": 0.42222274557829537, - "grad_norm": 0.7190942764282227, - "learning_rate": 1.8181475467483508e-05, - "loss": 0.3627, - "step": 4482 - }, - { - "epoch": 0.4223169496714632, - "grad_norm": 0.7158463001251221, - "learning_rate": 1.8180607101210675e-05, - "loss": 0.3421, - "step": 4483 - }, - { - "epoch": 0.42241115376463106, - "grad_norm": 0.7563744783401489, - "learning_rate": 1.8179738548407347e-05, - "loss": 0.3024, - "step": 4484 - }, - { - "epoch": 0.4225053578577989, - "grad_norm": 0.8165837526321411, - "learning_rate": 1.8178869809093327e-05, - "loss": 0.3404, - "step": 4485 - }, - { - "epoch": 0.42259956195096676, - "grad_norm": 0.6919609904289246, - "learning_rate": 1.8178000883288432e-05, - "loss": 0.3356, - "step": 4486 - }, - { - "epoch": 0.4226937660441346, - "grad_norm": 0.778649091720581, - "learning_rate": 1.8177131771012463e-05, - "loss": 0.3981, - "step": 4487 - }, - { - "epoch": 0.42278797013730246, - "grad_norm": 1.1520659923553467, - "learning_rate": 1.817626247228525e-05, - "loss": 0.3886, - "step": 4488 - }, - { - "epoch": 0.4228821742304703, - "grad_norm": 0.8374298214912415, - "learning_rate": 1.8175392987126603e-05, - "loss": 0.3575, - "step": 4489 - }, - { - "epoch": 0.42297637832363816, - "grad_norm": 0.9034845232963562, - "learning_rate": 1.8174523315556354e-05, - "loss": 0.3631, - "step": 4490 - }, - { - "epoch": 0.423070582416806, - "grad_norm": 0.7980120778083801, - "learning_rate": 1.817365345759433e-05, - "loss": 0.3782, - "step": 4491 - }, - { - "epoch": 0.42316478650997386, - "grad_norm": 0.7888476252555847, - "learning_rate": 1.817278341326037e-05, - "loss": 0.3385, - "step": 4492 - }, - { - "epoch": 0.4232589906031417, - "grad_norm": 0.9399864673614502, - "learning_rate": 1.8171913182574306e-05, - "loss": 0.3892, - "step": 4493 - }, - { - "epoch": 0.42335319469630955, - "grad_norm": 0.8143534064292908, - "learning_rate": 1.8171042765555985e-05, - "loss": 0.4158, - "step": 4494 - }, - { - "epoch": 0.4234473987894774, - "grad_norm": 0.8164240717887878, - "learning_rate": 1.817017216222525e-05, - "loss": 0.3458, - "step": 4495 - }, - { - "epoch": 0.42354160288264525, - "grad_norm": 0.8037379384040833, - "learning_rate": 1.8169301372601955e-05, - "loss": 0.3435, - "step": 4496 - }, - { - "epoch": 0.4236358069758131, - "grad_norm": 0.801935076713562, - "learning_rate": 1.8168430396705956e-05, - "loss": 0.3151, - "step": 4497 - }, - { - "epoch": 0.42373001106898095, - "grad_norm": 0.899776816368103, - "learning_rate": 1.8167559234557108e-05, - "loss": 0.3883, - "step": 4498 - }, - { - "epoch": 0.4238242151621488, - "grad_norm": 0.8704493641853333, - "learning_rate": 1.8166687886175283e-05, - "loss": 0.4019, - "step": 4499 - }, - { - "epoch": 0.42391841925531665, - "grad_norm": 0.7331259250640869, - "learning_rate": 1.8165816351580342e-05, - "loss": 0.3274, - "step": 4500 - }, - { - "epoch": 0.4240126233484845, - "grad_norm": 0.7820464372634888, - "learning_rate": 1.8164944630792158e-05, - "loss": 0.3836, - "step": 4501 - }, - { - "epoch": 0.42410682744165235, - "grad_norm": 0.8430935740470886, - "learning_rate": 1.816407272383061e-05, - "loss": 0.3191, - "step": 4502 - }, - { - "epoch": 0.4242010315348202, - "grad_norm": 1.1456849575042725, - "learning_rate": 1.8163200630715575e-05, - "loss": 0.3613, - "step": 4503 - }, - { - "epoch": 0.42429523562798804, - "grad_norm": 0.8349385857582092, - "learning_rate": 1.8162328351466947e-05, - "loss": 0.336, - "step": 4504 - }, - { - "epoch": 0.4243894397211559, - "grad_norm": 0.7342354655265808, - "learning_rate": 1.8161455886104608e-05, - "loss": 0.3007, - "step": 4505 - }, - { - "epoch": 0.42448364381432374, - "grad_norm": 0.7335234880447388, - "learning_rate": 1.816058323464845e-05, - "loss": 0.3106, - "step": 4506 - }, - { - "epoch": 0.4245778479074916, - "grad_norm": 0.8044220209121704, - "learning_rate": 1.8159710397118372e-05, - "loss": 0.3767, - "step": 4507 - }, - { - "epoch": 0.42467205200065944, - "grad_norm": 0.7103661298751831, - "learning_rate": 1.815883737353428e-05, - "loss": 0.3192, - "step": 4508 - }, - { - "epoch": 0.4247662560938273, - "grad_norm": 0.7537291646003723, - "learning_rate": 1.815796416391608e-05, - "loss": 0.377, - "step": 4509 - }, - { - "epoch": 0.42486046018699514, - "grad_norm": 0.7830828428268433, - "learning_rate": 1.815709076828368e-05, - "loss": 0.3996, - "step": 4510 - }, - { - "epoch": 0.424954664280163, - "grad_norm": 0.832629919052124, - "learning_rate": 1.8156217186656992e-05, - "loss": 0.3847, - "step": 4511 - }, - { - "epoch": 0.42504886837333083, - "grad_norm": 0.7425336241722107, - "learning_rate": 1.815534341905594e-05, - "loss": 0.3632, - "step": 4512 - }, - { - "epoch": 0.4251430724664987, - "grad_norm": 0.8146251440048218, - "learning_rate": 1.8154469465500447e-05, - "loss": 0.3255, - "step": 4513 - }, - { - "epoch": 0.42523727655966653, - "grad_norm": 0.6974042654037476, - "learning_rate": 1.815359532601044e-05, - "loss": 0.3256, - "step": 4514 - }, - { - "epoch": 0.4253314806528344, - "grad_norm": 0.6622070670127869, - "learning_rate": 1.815272100060585e-05, - "loss": 0.3304, - "step": 4515 - }, - { - "epoch": 0.42542568474600223, - "grad_norm": 0.7694042921066284, - "learning_rate": 1.8151846489306607e-05, - "loss": 0.3269, - "step": 4516 - }, - { - "epoch": 0.4255198888391701, - "grad_norm": 0.7804112434387207, - "learning_rate": 1.8150971792132663e-05, - "loss": 0.3764, - "step": 4517 - }, - { - "epoch": 0.42561409293233793, - "grad_norm": 0.7945122718811035, - "learning_rate": 1.8150096909103955e-05, - "loss": 0.3396, - "step": 4518 - }, - { - "epoch": 0.4257082970255058, - "grad_norm": 0.6925581097602844, - "learning_rate": 1.814922184024043e-05, - "loss": 0.3447, - "step": 4519 - }, - { - "epoch": 0.4258025011186736, - "grad_norm": 0.8137907385826111, - "learning_rate": 1.8148346585562048e-05, - "loss": 0.3574, - "step": 4520 - }, - { - "epoch": 0.4258967052118415, - "grad_norm": 1.1398564577102661, - "learning_rate": 1.814747114508876e-05, - "loss": 0.319, - "step": 4521 - }, - { - "epoch": 0.4259909093050093, - "grad_norm": 0.8205663561820984, - "learning_rate": 1.814659551884053e-05, - "loss": 0.3725, - "step": 4522 - }, - { - "epoch": 0.4260851133981772, - "grad_norm": 1.0114045143127441, - "learning_rate": 1.8145719706837322e-05, - "loss": 0.3379, - "step": 4523 - }, - { - "epoch": 0.426179317491345, - "grad_norm": 0.7457922697067261, - "learning_rate": 1.814484370909911e-05, - "loss": 0.3423, - "step": 4524 - }, - { - "epoch": 0.42627352158451287, - "grad_norm": 0.8835598230361938, - "learning_rate": 1.8143967525645863e-05, - "loss": 0.3538, - "step": 4525 - }, - { - "epoch": 0.4263677256776807, - "grad_norm": 0.748531699180603, - "learning_rate": 1.8143091156497565e-05, - "loss": 0.3278, - "step": 4526 - }, - { - "epoch": 0.42646192977084857, - "grad_norm": 0.7603402137756348, - "learning_rate": 1.814221460167419e-05, - "loss": 0.3628, - "step": 4527 - }, - { - "epoch": 0.4265561338640164, - "grad_norm": 0.8315427303314209, - "learning_rate": 1.814133786119573e-05, - "loss": 0.376, - "step": 4528 - }, - { - "epoch": 0.42665033795718427, - "grad_norm": 0.8794881105422974, - "learning_rate": 1.814046093508218e-05, - "loss": 0.3615, - "step": 4529 - }, - { - "epoch": 0.42674454205035206, - "grad_norm": 0.8055482506752014, - "learning_rate": 1.813958382335353e-05, - "loss": 0.3395, - "step": 4530 - }, - { - "epoch": 0.4268387461435199, - "grad_norm": 0.7016241550445557, - "learning_rate": 1.813870652602978e-05, - "loss": 0.3322, - "step": 4531 - }, - { - "epoch": 0.42693295023668776, - "grad_norm": 0.8527817726135254, - "learning_rate": 1.813782904313093e-05, - "loss": 0.3761, - "step": 4532 - }, - { - "epoch": 0.4270271543298556, - "grad_norm": 0.8396844863891602, - "learning_rate": 1.8136951374677e-05, - "loss": 0.3072, - "step": 4533 - }, - { - "epoch": 0.42712135842302346, - "grad_norm": 0.8562626838684082, - "learning_rate": 1.8136073520687992e-05, - "loss": 0.3916, - "step": 4534 - }, - { - "epoch": 0.4272155625161913, - "grad_norm": 0.7464911341667175, - "learning_rate": 1.8135195481183925e-05, - "loss": 0.3768, - "step": 4535 - }, - { - "epoch": 0.42730976660935915, - "grad_norm": 1.3565607070922852, - "learning_rate": 1.813431725618482e-05, - "loss": 0.3447, - "step": 4536 - }, - { - "epoch": 0.427403970702527, - "grad_norm": 0.7935023903846741, - "learning_rate": 1.81334388457107e-05, - "loss": 0.3382, - "step": 4537 - }, - { - "epoch": 0.42749817479569485, - "grad_norm": 0.7430272102355957, - "learning_rate": 1.8132560249781597e-05, - "loss": 0.3167, - "step": 4538 - }, - { - "epoch": 0.4275923788888627, - "grad_norm": 0.7452899813652039, - "learning_rate": 1.813168146841754e-05, - "loss": 0.3518, - "step": 4539 - }, - { - "epoch": 0.42768658298203055, - "grad_norm": 0.7788861393928528, - "learning_rate": 1.8130802501638575e-05, - "loss": 0.3556, - "step": 4540 - }, - { - "epoch": 0.4277807870751984, - "grad_norm": 0.7843717336654663, - "learning_rate": 1.8129923349464734e-05, - "loss": 0.3903, - "step": 4541 - }, - { - "epoch": 0.42787499116836625, - "grad_norm": 0.8972139954566956, - "learning_rate": 1.812904401191607e-05, - "loss": 0.3327, - "step": 4542 - }, - { - "epoch": 0.4279691952615341, - "grad_norm": 0.7734087705612183, - "learning_rate": 1.812816448901263e-05, - "loss": 0.3129, - "step": 4543 - }, - { - "epoch": 0.42806339935470195, - "grad_norm": 0.7029120922088623, - "learning_rate": 1.812728478077447e-05, - "loss": 0.3349, - "step": 4544 - }, - { - "epoch": 0.4281576034478698, - "grad_norm": 0.7353888750076294, - "learning_rate": 1.8126404887221646e-05, - "loss": 0.3223, - "step": 4545 - }, - { - "epoch": 0.42825180754103764, - "grad_norm": 0.7380049824714661, - "learning_rate": 1.812552480837422e-05, - "loss": 0.3266, - "step": 4546 - }, - { - "epoch": 0.4283460116342055, - "grad_norm": 0.7154632210731506, - "learning_rate": 1.812464454425227e-05, - "loss": 0.3189, - "step": 4547 - }, - { - "epoch": 0.42844021572737334, - "grad_norm": 0.6878069043159485, - "learning_rate": 1.8123764094875855e-05, - "loss": 0.3043, - "step": 4548 - }, - { - "epoch": 0.4285344198205412, - "grad_norm": 0.8377850651741028, - "learning_rate": 1.8122883460265055e-05, - "loss": 0.355, - "step": 4549 - }, - { - "epoch": 0.42862862391370904, - "grad_norm": 0.7448782920837402, - "learning_rate": 1.812200264043995e-05, - "loss": 0.3119, - "step": 4550 - }, - { - "epoch": 0.4287228280068769, - "grad_norm": 0.8376103043556213, - "learning_rate": 1.8121121635420623e-05, - "loss": 0.3454, - "step": 4551 - }, - { - "epoch": 0.42881703210004474, - "grad_norm": 0.7931928038597107, - "learning_rate": 1.8120240445227164e-05, - "loss": 0.2978, - "step": 4552 - }, - { - "epoch": 0.4289112361932126, - "grad_norm": 0.8019053339958191, - "learning_rate": 1.8119359069879665e-05, - "loss": 0.3702, - "step": 4553 - }, - { - "epoch": 0.42900544028638044, - "grad_norm": 0.9101332426071167, - "learning_rate": 1.811847750939822e-05, - "loss": 0.3298, - "step": 4554 - }, - { - "epoch": 0.4290996443795483, - "grad_norm": 0.7293367385864258, - "learning_rate": 1.8117595763802938e-05, - "loss": 0.3044, - "step": 4555 - }, - { - "epoch": 0.42919384847271613, - "grad_norm": 0.8479684591293335, - "learning_rate": 1.8116713833113913e-05, - "loss": 0.3822, - "step": 4556 - }, - { - "epoch": 0.429288052565884, - "grad_norm": 0.7806784510612488, - "learning_rate": 1.8115831717351263e-05, - "loss": 0.3257, - "step": 4557 - }, - { - "epoch": 0.42938225665905183, - "grad_norm": 0.8044211864471436, - "learning_rate": 1.81149494165351e-05, - "loss": 0.3169, - "step": 4558 - }, - { - "epoch": 0.4294764607522197, - "grad_norm": 0.825407087802887, - "learning_rate": 1.8114066930685535e-05, - "loss": 0.3354, - "step": 4559 - }, - { - "epoch": 0.42957066484538753, - "grad_norm": 0.8185461163520813, - "learning_rate": 1.8113184259822695e-05, - "loss": 0.2993, - "step": 4560 - }, - { - "epoch": 0.4296648689385554, - "grad_norm": 0.8297215104103088, - "learning_rate": 1.811230140396671e-05, - "loss": 0.3611, - "step": 4561 - }, - { - "epoch": 0.4297590730317232, - "grad_norm": 0.8848738074302673, - "learning_rate": 1.811141836313771e-05, - "loss": 0.3444, - "step": 4562 - }, - { - "epoch": 0.4298532771248911, - "grad_norm": 0.8999459743499756, - "learning_rate": 1.811053513735582e-05, - "loss": 0.3424, - "step": 4563 - }, - { - "epoch": 0.4299474812180589, - "grad_norm": 0.726396918296814, - "learning_rate": 1.810965172664119e-05, - "loss": 0.3749, - "step": 4564 - }, - { - "epoch": 0.4300416853112268, - "grad_norm": 0.8606882691383362, - "learning_rate": 1.8108768131013958e-05, - "loss": 0.3863, - "step": 4565 - }, - { - "epoch": 0.4301358894043946, - "grad_norm": 0.8336482644081116, - "learning_rate": 1.8107884350494274e-05, - "loss": 0.3232, - "step": 4566 - }, - { - "epoch": 0.43023009349756247, - "grad_norm": 0.8066686987876892, - "learning_rate": 1.8107000385102284e-05, - "loss": 0.34, - "step": 4567 - }, - { - "epoch": 0.4303242975907303, - "grad_norm": 0.7661321759223938, - "learning_rate": 1.810611623485815e-05, - "loss": 0.3749, - "step": 4568 - }, - { - "epoch": 0.43041850168389817, - "grad_norm": 0.8105167150497437, - "learning_rate": 1.810523189978203e-05, - "loss": 0.3508, - "step": 4569 - }, - { - "epoch": 0.430512705777066, - "grad_norm": 0.8608072400093079, - "learning_rate": 1.8104347379894084e-05, - "loss": 0.3066, - "step": 4570 - }, - { - "epoch": 0.43060690987023387, - "grad_norm": 0.7740591764450073, - "learning_rate": 1.8103462675214485e-05, - "loss": 0.3455, - "step": 4571 - }, - { - "epoch": 0.4307011139634017, - "grad_norm": 0.8389507532119751, - "learning_rate": 1.8102577785763407e-05, - "loss": 0.3804, - "step": 4572 - }, - { - "epoch": 0.43079531805656957, - "grad_norm": 0.9266282320022583, - "learning_rate": 1.8101692711561027e-05, - "loss": 0.3919, - "step": 4573 - }, - { - "epoch": 0.4308895221497374, - "grad_norm": 0.7471932768821716, - "learning_rate": 1.810080745262752e-05, - "loss": 0.3466, - "step": 4574 - }, - { - "epoch": 0.43098372624290526, - "grad_norm": 0.7252283692359924, - "learning_rate": 1.8099922008983075e-05, - "loss": 0.3256, - "step": 4575 - }, - { - "epoch": 0.4310779303360731, - "grad_norm": 0.8164095878601074, - "learning_rate": 1.809903638064788e-05, - "loss": 0.3707, - "step": 4576 - }, - { - "epoch": 0.43117213442924096, - "grad_norm": 0.8254788517951965, - "learning_rate": 1.8098150567642134e-05, - "loss": 0.3351, - "step": 4577 - }, - { - "epoch": 0.4312663385224088, - "grad_norm": 0.8885326385498047, - "learning_rate": 1.809726456998603e-05, - "loss": 0.322, - "step": 4578 - }, - { - "epoch": 0.43136054261557666, - "grad_norm": 0.7449198961257935, - "learning_rate": 1.809637838769977e-05, - "loss": 0.3487, - "step": 4579 - }, - { - "epoch": 0.4314547467087445, - "grad_norm": 0.812122106552124, - "learning_rate": 1.809549202080356e-05, - "loss": 0.3639, - "step": 4580 - }, - { - "epoch": 0.43154895080191236, - "grad_norm": 0.8443834781646729, - "learning_rate": 1.8094605469317613e-05, - "loss": 0.3817, - "step": 4581 - }, - { - "epoch": 0.4316431548950802, - "grad_norm": 0.8098493814468384, - "learning_rate": 1.8093718733262143e-05, - "loss": 0.3586, - "step": 4582 - }, - { - "epoch": 0.43173735898824805, - "grad_norm": 0.8115017414093018, - "learning_rate": 1.809283181265737e-05, - "loss": 0.3549, - "step": 4583 - }, - { - "epoch": 0.4318315630814159, - "grad_norm": 0.873196542263031, - "learning_rate": 1.8091944707523516e-05, - "loss": 0.3747, - "step": 4584 - }, - { - "epoch": 0.43192576717458375, - "grad_norm": 0.7385686635971069, - "learning_rate": 1.8091057417880807e-05, - "loss": 0.3292, - "step": 4585 - }, - { - "epoch": 0.4320199712677516, - "grad_norm": 0.806738555431366, - "learning_rate": 1.8090169943749477e-05, - "loss": 0.3408, - "step": 4586 - }, - { - "epoch": 0.43211417536091945, - "grad_norm": 0.8190243244171143, - "learning_rate": 1.808928228514976e-05, - "loss": 0.3532, - "step": 4587 - }, - { - "epoch": 0.4322083794540873, - "grad_norm": 0.8552412390708923, - "learning_rate": 1.8088394442101896e-05, - "loss": 0.364, - "step": 4588 - }, - { - "epoch": 0.43230258354725515, - "grad_norm": 0.7642033100128174, - "learning_rate": 1.8087506414626127e-05, - "loss": 0.3511, - "step": 4589 - }, - { - "epoch": 0.432396787640423, - "grad_norm": 0.7361543774604797, - "learning_rate": 1.808661820274271e-05, - "loss": 0.3543, - "step": 4590 - }, - { - "epoch": 0.43249099173359085, - "grad_norm": 0.726957380771637, - "learning_rate": 1.8085729806471888e-05, - "loss": 0.3334, - "step": 4591 - }, - { - "epoch": 0.4325851958267587, - "grad_norm": 0.7136059403419495, - "learning_rate": 1.808484122583392e-05, - "loss": 0.3368, - "step": 4592 - }, - { - "epoch": 0.43267939991992654, - "grad_norm": 1.0841796398162842, - "learning_rate": 1.808395246084907e-05, - "loss": 0.3448, - "step": 4593 - }, - { - "epoch": 0.4327736040130944, - "grad_norm": 0.899951696395874, - "learning_rate": 1.8083063511537605e-05, - "loss": 0.3709, - "step": 4594 - }, - { - "epoch": 0.43286780810626224, - "grad_norm": 0.7780426740646362, - "learning_rate": 1.8082174377919792e-05, - "loss": 0.3625, - "step": 4595 - }, - { - "epoch": 0.4329620121994301, - "grad_norm": 0.8089221715927124, - "learning_rate": 1.80812850600159e-05, - "loss": 0.3615, - "step": 4596 - }, - { - "epoch": 0.43305621629259794, - "grad_norm": 0.7787632942199707, - "learning_rate": 1.8080395557846213e-05, - "loss": 0.3034, - "step": 4597 - }, - { - "epoch": 0.4331504203857658, - "grad_norm": 0.9258681535720825, - "learning_rate": 1.807950587143101e-05, - "loss": 0.3316, - "step": 4598 - }, - { - "epoch": 0.4332446244789336, - "grad_norm": 0.7238567471504211, - "learning_rate": 1.8078616000790577e-05, - "loss": 0.3164, - "step": 4599 - }, - { - "epoch": 0.43333882857210143, - "grad_norm": 0.8651083707809448, - "learning_rate": 1.8077725945945203e-05, - "loss": 0.3395, - "step": 4600 - }, - { - "epoch": 0.4334330326652693, - "grad_norm": 0.850307047367096, - "learning_rate": 1.807683570691519e-05, - "loss": 0.3185, - "step": 4601 - }, - { - "epoch": 0.43352723675843713, - "grad_norm": 0.7419808506965637, - "learning_rate": 1.8075945283720832e-05, - "loss": 0.3684, - "step": 4602 - }, - { - "epoch": 0.433621440851605, - "grad_norm": 0.8960341811180115, - "learning_rate": 1.8075054676382426e-05, - "loss": 0.3858, - "step": 4603 - }, - { - "epoch": 0.4337156449447728, - "grad_norm": 0.7304967641830444, - "learning_rate": 1.807416388492029e-05, - "loss": 0.3001, - "step": 4604 - }, - { - "epoch": 0.4338098490379407, - "grad_norm": 0.7991971969604492, - "learning_rate": 1.8073272909354727e-05, - "loss": 0.3751, - "step": 4605 - }, - { - "epoch": 0.4339040531311085, - "grad_norm": 0.785723865032196, - "learning_rate": 1.8072381749706058e-05, - "loss": 0.3603, - "step": 4606 - }, - { - "epoch": 0.4339982572242764, - "grad_norm": 0.7629894614219666, - "learning_rate": 1.80714904059946e-05, - "loss": 0.3187, - "step": 4607 - }, - { - "epoch": 0.4340924613174442, - "grad_norm": 0.8983370065689087, - "learning_rate": 1.807059887824068e-05, - "loss": 0.3929, - "step": 4608 - }, - { - "epoch": 0.43418666541061207, - "grad_norm": 0.8052110075950623, - "learning_rate": 1.8069707166464624e-05, - "loss": 0.3542, - "step": 4609 - }, - { - "epoch": 0.4342808695037799, - "grad_norm": 0.7605381608009338, - "learning_rate": 1.8068815270686763e-05, - "loss": 0.3391, - "step": 4610 - }, - { - "epoch": 0.43437507359694777, - "grad_norm": 0.7355569005012512, - "learning_rate": 1.8067923190927437e-05, - "loss": 0.3396, - "step": 4611 - }, - { - "epoch": 0.4344692776901156, - "grad_norm": 0.7521963715553284, - "learning_rate": 1.8067030927206984e-05, - "loss": 0.3223, - "step": 4612 - }, - { - "epoch": 0.43456348178328347, - "grad_norm": 0.9807536602020264, - "learning_rate": 1.806613847954575e-05, - "loss": 0.4031, - "step": 4613 - }, - { - "epoch": 0.4346576858764513, - "grad_norm": 0.7357286810874939, - "learning_rate": 1.8065245847964085e-05, - "loss": 0.3321, - "step": 4614 - }, - { - "epoch": 0.43475188996961917, - "grad_norm": 0.8147128820419312, - "learning_rate": 1.806435303248234e-05, - "loss": 0.3851, - "step": 4615 - }, - { - "epoch": 0.434846094062787, - "grad_norm": 0.8538789749145508, - "learning_rate": 1.8063460033120873e-05, - "loss": 0.3206, - "step": 4616 - }, - { - "epoch": 0.43494029815595486, - "grad_norm": 0.9050279855728149, - "learning_rate": 1.8062566849900048e-05, - "loss": 0.3643, - "step": 4617 - }, - { - "epoch": 0.4350345022491227, - "grad_norm": 0.7920427918434143, - "learning_rate": 1.8061673482840228e-05, - "loss": 0.3764, - "step": 4618 - }, - { - "epoch": 0.43512870634229056, - "grad_norm": 0.7693020105361938, - "learning_rate": 1.806077993196179e-05, - "loss": 0.4008, - "step": 4619 - }, - { - "epoch": 0.4352229104354584, - "grad_norm": 0.7530403733253479, - "learning_rate": 1.80598861972851e-05, - "loss": 0.3302, - "step": 4620 - }, - { - "epoch": 0.43531711452862626, - "grad_norm": 0.8060566186904907, - "learning_rate": 1.805899227883054e-05, - "loss": 0.3652, - "step": 4621 - }, - { - "epoch": 0.4354113186217941, - "grad_norm": 0.8145161867141724, - "learning_rate": 1.8058098176618496e-05, - "loss": 0.3356, - "step": 4622 - }, - { - "epoch": 0.43550552271496196, - "grad_norm": 0.8926414847373962, - "learning_rate": 1.8057203890669346e-05, - "loss": 0.3025, - "step": 4623 - }, - { - "epoch": 0.4355997268081298, - "grad_norm": 0.9973188638687134, - "learning_rate": 1.805630942100349e-05, - "loss": 0.3763, - "step": 4624 - }, - { - "epoch": 0.43569393090129765, - "grad_norm": 1.064535140991211, - "learning_rate": 1.8055414767641316e-05, - "loss": 0.2956, - "step": 4625 - }, - { - "epoch": 0.4357881349944655, - "grad_norm": 0.7880619168281555, - "learning_rate": 1.805451993060323e-05, - "loss": 0.3397, - "step": 4626 - }, - { - "epoch": 0.43588233908763335, - "grad_norm": 0.7307702898979187, - "learning_rate": 1.8053624909909634e-05, - "loss": 0.3222, - "step": 4627 - }, - { - "epoch": 0.4359765431808012, - "grad_norm": 0.6933474540710449, - "learning_rate": 1.8052729705580935e-05, - "loss": 0.311, - "step": 4628 - }, - { - "epoch": 0.43607074727396905, - "grad_norm": 0.8088884949684143, - "learning_rate": 1.8051834317637547e-05, - "loss": 0.3392, - "step": 4629 - }, - { - "epoch": 0.4361649513671369, - "grad_norm": 0.793786883354187, - "learning_rate": 1.805093874609988e-05, - "loss": 0.3733, - "step": 4630 - }, - { - "epoch": 0.43625915546030475, - "grad_norm": 0.8003841042518616, - "learning_rate": 1.8050042990988358e-05, - "loss": 0.3027, - "step": 4631 - }, - { - "epoch": 0.4363533595534726, - "grad_norm": 0.8275167346000671, - "learning_rate": 1.804914705232341e-05, - "loss": 0.3455, - "step": 4632 - }, - { - "epoch": 0.43644756364664045, - "grad_norm": 0.7653775215148926, - "learning_rate": 1.804825093012546e-05, - "loss": 0.3003, - "step": 4633 - }, - { - "epoch": 0.4365417677398083, - "grad_norm": 0.8436541557312012, - "learning_rate": 1.804735462441494e-05, - "loss": 0.3288, - "step": 4634 - }, - { - "epoch": 0.43663597183297614, - "grad_norm": 0.7628139853477478, - "learning_rate": 1.804645813521229e-05, - "loss": 0.2999, - "step": 4635 - }, - { - "epoch": 0.436730175926144, - "grad_norm": 0.8498921394348145, - "learning_rate": 1.804556146253795e-05, - "loss": 0.3578, - "step": 4636 - }, - { - "epoch": 0.43682438001931184, - "grad_norm": 0.9100565314292908, - "learning_rate": 1.8044664606412366e-05, - "loss": 0.3661, - "step": 4637 - }, - { - "epoch": 0.4369185841124797, - "grad_norm": 0.7419862747192383, - "learning_rate": 1.8043767566855988e-05, - "loss": 0.3399, - "step": 4638 - }, - { - "epoch": 0.43701278820564754, - "grad_norm": 0.8369824290275574, - "learning_rate": 1.804287034388927e-05, - "loss": 0.3425, - "step": 4639 - }, - { - "epoch": 0.4371069922988154, - "grad_norm": 0.7686251401901245, - "learning_rate": 1.804197293753267e-05, - "loss": 0.3035, - "step": 4640 - }, - { - "epoch": 0.43720119639198324, - "grad_norm": 0.7537437081336975, - "learning_rate": 1.8041075347806647e-05, - "loss": 0.3458, - "step": 4641 - }, - { - "epoch": 0.4372954004851511, - "grad_norm": 0.8594371676445007, - "learning_rate": 1.8040177574731673e-05, - "loss": 0.3297, - "step": 4642 - }, - { - "epoch": 0.43738960457831894, - "grad_norm": 0.6765527129173279, - "learning_rate": 1.8039279618328215e-05, - "loss": 0.2741, - "step": 4643 - }, - { - "epoch": 0.4374838086714868, - "grad_norm": 0.8351267576217651, - "learning_rate": 1.8038381478616747e-05, - "loss": 0.3206, - "step": 4644 - }, - { - "epoch": 0.43757801276465463, - "grad_norm": 0.8332872986793518, - "learning_rate": 1.8037483155617755e-05, - "loss": 0.36, - "step": 4645 - }, - { - "epoch": 0.4376722168578225, - "grad_norm": 0.9106321334838867, - "learning_rate": 1.8036584649351713e-05, - "loss": 0.3788, - "step": 4646 - }, - { - "epoch": 0.43776642095099033, - "grad_norm": 0.6762860417366028, - "learning_rate": 1.803568595983911e-05, - "loss": 0.3089, - "step": 4647 - }, - { - "epoch": 0.4378606250441582, - "grad_norm": 0.7984324097633362, - "learning_rate": 1.8034787087100442e-05, - "loss": 0.3536, - "step": 4648 - }, - { - "epoch": 0.43795482913732603, - "grad_norm": 0.7609680891036987, - "learning_rate": 1.8033888031156204e-05, - "loss": 0.3261, - "step": 4649 - }, - { - "epoch": 0.4380490332304939, - "grad_norm": 0.8033218383789062, - "learning_rate": 1.8032988792026894e-05, - "loss": 0.2957, - "step": 4650 - }, - { - "epoch": 0.4381432373236617, - "grad_norm": 0.8091560006141663, - "learning_rate": 1.8032089369733015e-05, - "loss": 0.3087, - "step": 4651 - }, - { - "epoch": 0.4382374414168296, - "grad_norm": 0.7749262452125549, - "learning_rate": 1.8031189764295075e-05, - "loss": 0.3672, - "step": 4652 - }, - { - "epoch": 0.4383316455099974, - "grad_norm": 0.6797177791595459, - "learning_rate": 1.8030289975733592e-05, - "loss": 0.3116, - "step": 4653 - }, - { - "epoch": 0.4384258496031653, - "grad_norm": 0.7387074828147888, - "learning_rate": 1.8029390004069075e-05, - "loss": 0.352, - "step": 4654 - }, - { - "epoch": 0.4385200536963331, - "grad_norm": 0.747401237487793, - "learning_rate": 1.8028489849322052e-05, - "loss": 0.3357, - "step": 4655 - }, - { - "epoch": 0.43861425778950097, - "grad_norm": 0.7775043249130249, - "learning_rate": 1.802758951151304e-05, - "loss": 0.3648, - "step": 4656 - }, - { - "epoch": 0.4387084618826688, - "grad_norm": 0.7466880679130554, - "learning_rate": 1.8026688990662574e-05, - "loss": 0.3228, - "step": 4657 - }, - { - "epoch": 0.43880266597583667, - "grad_norm": 0.7472065687179565, - "learning_rate": 1.8025788286791183e-05, - "loss": 0.3074, - "step": 4658 - }, - { - "epoch": 0.4388968700690045, - "grad_norm": 0.8351142406463623, - "learning_rate": 1.802488739991941e-05, - "loss": 0.2955, - "step": 4659 - }, - { - "epoch": 0.43899107416217237, - "grad_norm": 0.9496912956237793, - "learning_rate": 1.8023986330067795e-05, - "loss": 0.3861, - "step": 4660 - }, - { - "epoch": 0.4390852782553402, - "grad_norm": 0.9483456015586853, - "learning_rate": 1.8023085077256878e-05, - "loss": 0.3534, - "step": 4661 - }, - { - "epoch": 0.43917948234850807, - "grad_norm": 0.6802927851676941, - "learning_rate": 1.8022183641507215e-05, - "loss": 0.3231, - "step": 4662 - }, - { - "epoch": 0.4392736864416759, - "grad_norm": 0.952570378780365, - "learning_rate": 1.8021282022839363e-05, - "loss": 0.4003, - "step": 4663 - }, - { - "epoch": 0.43936789053484376, - "grad_norm": 0.8370252847671509, - "learning_rate": 1.802038022127387e-05, - "loss": 0.3964, - "step": 4664 - }, - { - "epoch": 0.4394620946280116, - "grad_norm": 1.0444804430007935, - "learning_rate": 1.8019478236831306e-05, - "loss": 0.3511, - "step": 4665 - }, - { - "epoch": 0.43955629872117946, - "grad_norm": 0.7669371962547302, - "learning_rate": 1.8018576069532235e-05, - "loss": 0.334, - "step": 4666 - }, - { - "epoch": 0.4396505028143473, - "grad_norm": 0.8720434904098511, - "learning_rate": 1.801767371939723e-05, - "loss": 0.313, - "step": 4667 - }, - { - "epoch": 0.4397447069075151, - "grad_norm": 0.7665590047836304, - "learning_rate": 1.8016771186446864e-05, - "loss": 0.3384, - "step": 4668 - }, - { - "epoch": 0.43983891100068295, - "grad_norm": 0.7445639967918396, - "learning_rate": 1.8015868470701715e-05, - "loss": 0.3087, - "step": 4669 - }, - { - "epoch": 0.4399331150938508, - "grad_norm": 1.0419539213180542, - "learning_rate": 1.801496557218237e-05, - "loss": 0.3779, - "step": 4670 - }, - { - "epoch": 0.44002731918701865, - "grad_norm": 0.6913175582885742, - "learning_rate": 1.8014062490909414e-05, - "loss": 0.3212, - "step": 4671 - }, - { - "epoch": 0.4401215232801865, - "grad_norm": 0.7021580934524536, - "learning_rate": 1.801315922690344e-05, - "loss": 0.3479, - "step": 4672 - }, - { - "epoch": 0.44021572737335435, - "grad_norm": 0.8642510771751404, - "learning_rate": 1.8012255780185043e-05, - "loss": 0.3567, - "step": 4673 - }, - { - "epoch": 0.4403099314665222, - "grad_norm": 1.0100830793380737, - "learning_rate": 1.8011352150774823e-05, - "loss": 0.3944, - "step": 4674 - }, - { - "epoch": 0.44040413555969005, - "grad_norm": 0.7227535247802734, - "learning_rate": 1.8010448338693382e-05, - "loss": 0.3056, - "step": 4675 - }, - { - "epoch": 0.4404983396528579, - "grad_norm": 0.7552227973937988, - "learning_rate": 1.8009544343961335e-05, - "loss": 0.2957, - "step": 4676 - }, - { - "epoch": 0.44059254374602574, - "grad_norm": 0.8633525371551514, - "learning_rate": 1.8008640166599283e-05, - "loss": 0.3687, - "step": 4677 - }, - { - "epoch": 0.4406867478391936, - "grad_norm": 0.9835649728775024, - "learning_rate": 1.8007735806627856e-05, - "loss": 0.3463, - "step": 4678 - }, - { - "epoch": 0.44078095193236144, - "grad_norm": 0.7401981949806213, - "learning_rate": 1.8006831264067668e-05, - "loss": 0.3413, - "step": 4679 - }, - { - "epoch": 0.4408751560255293, - "grad_norm": 0.8326581120491028, - "learning_rate": 1.8005926538939344e-05, - "loss": 0.4091, - "step": 4680 - }, - { - "epoch": 0.44096936011869714, - "grad_norm": 0.9332050085067749, - "learning_rate": 1.800502163126351e-05, - "loss": 0.3743, - "step": 4681 - }, - { - "epoch": 0.441063564211865, - "grad_norm": 0.8416963815689087, - "learning_rate": 1.8004116541060804e-05, - "loss": 0.3267, - "step": 4682 - }, - { - "epoch": 0.44115776830503284, - "grad_norm": 1.0526691675186157, - "learning_rate": 1.8003211268351863e-05, - "loss": 0.3646, - "step": 4683 - }, - { - "epoch": 0.4412519723982007, - "grad_norm": 0.7330875992774963, - "learning_rate": 1.8002305813157327e-05, - "loss": 0.3313, - "step": 4684 - }, - { - "epoch": 0.44134617649136854, - "grad_norm": 0.677757740020752, - "learning_rate": 1.8001400175497844e-05, - "loss": 0.3143, - "step": 4685 - }, - { - "epoch": 0.4414403805845364, - "grad_norm": 0.7628189921379089, - "learning_rate": 1.8000494355394064e-05, - "loss": 0.3374, - "step": 4686 - }, - { - "epoch": 0.44153458467770423, - "grad_norm": 0.8393889665603638, - "learning_rate": 1.7999588352866638e-05, - "loss": 0.3582, - "step": 4687 - }, - { - "epoch": 0.4416287887708721, - "grad_norm": 0.7568100690841675, - "learning_rate": 1.7998682167936227e-05, - "loss": 0.3194, - "step": 4688 - }, - { - "epoch": 0.44172299286403993, - "grad_norm": 1.173956036567688, - "learning_rate": 1.799777580062349e-05, - "loss": 0.3112, - "step": 4689 - }, - { - "epoch": 0.4418171969572078, - "grad_norm": 0.738646388053894, - "learning_rate": 1.7996869250949095e-05, - "loss": 0.3075, - "step": 4690 - }, - { - "epoch": 0.44191140105037563, - "grad_norm": 0.6442466378211975, - "learning_rate": 1.799596251893372e-05, - "loss": 0.2764, - "step": 4691 - }, - { - "epoch": 0.4420056051435435, - "grad_norm": 0.7446337342262268, - "learning_rate": 1.7995055604598027e-05, - "loss": 0.3342, - "step": 4692 - }, - { - "epoch": 0.4420998092367113, - "grad_norm": 0.8343163728713989, - "learning_rate": 1.7994148507962706e-05, - "loss": 0.3139, - "step": 4693 - }, - { - "epoch": 0.4421940133298792, - "grad_norm": 0.8424333930015564, - "learning_rate": 1.7993241229048434e-05, - "loss": 0.3566, - "step": 4694 - }, - { - "epoch": 0.442288217423047, - "grad_norm": 0.8876257538795471, - "learning_rate": 1.79923337678759e-05, - "loss": 0.3852, - "step": 4695 - }, - { - "epoch": 0.4423824215162149, - "grad_norm": 0.7722113728523254, - "learning_rate": 1.7991426124465792e-05, - "loss": 0.3317, - "step": 4696 - }, - { - "epoch": 0.4424766256093827, - "grad_norm": 0.8378174901008606, - "learning_rate": 1.7990518298838817e-05, - "loss": 0.3857, - "step": 4697 - }, - { - "epoch": 0.44257082970255057, - "grad_norm": 0.9277419447898865, - "learning_rate": 1.798961029101566e-05, - "loss": 0.3287, - "step": 4698 - }, - { - "epoch": 0.4426650337957184, - "grad_norm": 0.7418336272239685, - "learning_rate": 1.7988702101017036e-05, - "loss": 0.3008, - "step": 4699 - }, - { - "epoch": 0.44275923788888627, - "grad_norm": 0.9991570711135864, - "learning_rate": 1.798779372886365e-05, - "loss": 0.3649, - "step": 4700 - }, - { - "epoch": 0.4428534419820541, - "grad_norm": 0.858697772026062, - "learning_rate": 1.7986885174576212e-05, - "loss": 0.3509, - "step": 4701 - }, - { - "epoch": 0.44294764607522197, - "grad_norm": 0.7897425889968872, - "learning_rate": 1.7985976438175444e-05, - "loss": 0.3493, - "step": 4702 - }, - { - "epoch": 0.4430418501683898, - "grad_norm": 0.6957587599754333, - "learning_rate": 1.7985067519682057e-05, - "loss": 0.2817, - "step": 4703 - }, - { - "epoch": 0.44313605426155767, - "grad_norm": 0.8322944045066833, - "learning_rate": 1.7984158419116783e-05, - "loss": 0.3411, - "step": 4704 - }, - { - "epoch": 0.4432302583547255, - "grad_norm": 0.924765408039093, - "learning_rate": 1.798324913650035e-05, - "loss": 0.41, - "step": 4705 - }, - { - "epoch": 0.44332446244789336, - "grad_norm": 0.743596613407135, - "learning_rate": 1.7982339671853492e-05, - "loss": 0.3418, - "step": 4706 - }, - { - "epoch": 0.4434186665410612, - "grad_norm": 0.7587670683860779, - "learning_rate": 1.7981430025196946e-05, - "loss": 0.3186, - "step": 4707 - }, - { - "epoch": 0.44351287063422906, - "grad_norm": 0.7345204949378967, - "learning_rate": 1.7980520196551444e-05, - "loss": 0.3578, - "step": 4708 - }, - { - "epoch": 0.4436070747273969, - "grad_norm": 0.8454651236534119, - "learning_rate": 1.7979610185937746e-05, - "loss": 0.3555, - "step": 4709 - }, - { - "epoch": 0.44370127882056476, - "grad_norm": 0.7994370460510254, - "learning_rate": 1.7978699993376593e-05, - "loss": 0.357, - "step": 4710 - }, - { - "epoch": 0.4437954829137326, - "grad_norm": 0.7655448913574219, - "learning_rate": 1.7977789618888742e-05, - "loss": 0.3652, - "step": 4711 - }, - { - "epoch": 0.44388968700690046, - "grad_norm": 0.8610358238220215, - "learning_rate": 1.797687906249495e-05, - "loss": 0.3611, - "step": 4712 - }, - { - "epoch": 0.4439838911000683, - "grad_norm": 0.7728049159049988, - "learning_rate": 1.7975968324215977e-05, - "loss": 0.2849, - "step": 4713 - }, - { - "epoch": 0.44407809519323616, - "grad_norm": 0.7278761267662048, - "learning_rate": 1.797505740407259e-05, - "loss": 0.3296, - "step": 4714 - }, - { - "epoch": 0.444172299286404, - "grad_norm": 0.8426651358604431, - "learning_rate": 1.7974146302085563e-05, - "loss": 0.3408, - "step": 4715 - }, - { - "epoch": 0.44426650337957185, - "grad_norm": 0.8193193674087524, - "learning_rate": 1.797323501827567e-05, - "loss": 0.3588, - "step": 4716 - }, - { - "epoch": 0.4443607074727397, - "grad_norm": 0.8741720914840698, - "learning_rate": 1.7972323552663686e-05, - "loss": 0.4083, - "step": 4717 - }, - { - "epoch": 0.44445491156590755, - "grad_norm": 0.806844174861908, - "learning_rate": 1.797141190527039e-05, - "loss": 0.3395, - "step": 4718 - }, - { - "epoch": 0.4445491156590754, - "grad_norm": 0.8051599264144897, - "learning_rate": 1.7970500076116583e-05, - "loss": 0.3676, - "step": 4719 - }, - { - "epoch": 0.44464331975224325, - "grad_norm": 0.8780580163002014, - "learning_rate": 1.7969588065223043e-05, - "loss": 0.3785, - "step": 4720 - }, - { - "epoch": 0.4447375238454111, - "grad_norm": 0.7754378914833069, - "learning_rate": 1.7968675872610572e-05, - "loss": 0.3094, - "step": 4721 - }, - { - "epoch": 0.44483172793857895, - "grad_norm": 0.730026364326477, - "learning_rate": 1.7967763498299965e-05, - "loss": 0.3207, - "step": 4722 - }, - { - "epoch": 0.4449259320317468, - "grad_norm": 0.6212615966796875, - "learning_rate": 1.796685094231203e-05, - "loss": 0.2724, - "step": 4723 - }, - { - "epoch": 0.44502013612491464, - "grad_norm": 0.8475649356842041, - "learning_rate": 1.796593820466757e-05, - "loss": 0.3158, - "step": 4724 - }, - { - "epoch": 0.4451143402180825, - "grad_norm": 0.8592053055763245, - "learning_rate": 1.7965025285387402e-05, - "loss": 0.3722, - "step": 4725 - }, - { - "epoch": 0.44520854431125034, - "grad_norm": 0.7791186571121216, - "learning_rate": 1.796411218449234e-05, - "loss": 0.3344, - "step": 4726 - }, - { - "epoch": 0.4453027484044182, - "grad_norm": 0.8951060175895691, - "learning_rate": 1.7963198902003202e-05, - "loss": 0.3531, - "step": 4727 - }, - { - "epoch": 0.44539695249758604, - "grad_norm": 0.8267094492912292, - "learning_rate": 1.7962285437940813e-05, - "loss": 0.3449, - "step": 4728 - }, - { - "epoch": 0.4454911565907539, - "grad_norm": 0.7618321776390076, - "learning_rate": 1.7961371792326004e-05, - "loss": 0.3041, - "step": 4729 - }, - { - "epoch": 0.44558536068392174, - "grad_norm": 0.7991135716438293, - "learning_rate": 1.7960457965179604e-05, - "loss": 0.3587, - "step": 4730 - }, - { - "epoch": 0.4456795647770896, - "grad_norm": 0.8610298037528992, - "learning_rate": 1.795954395652245e-05, - "loss": 0.3344, - "step": 4731 - }, - { - "epoch": 0.44577376887025744, - "grad_norm": 0.7768810987472534, - "learning_rate": 1.7958629766375387e-05, - "loss": 0.3357, - "step": 4732 - }, - { - "epoch": 0.4458679729634253, - "grad_norm": 0.8742057681083679, - "learning_rate": 1.7957715394759258e-05, - "loss": 0.3445, - "step": 4733 - }, - { - "epoch": 0.44596217705659313, - "grad_norm": 0.8278061747550964, - "learning_rate": 1.7956800841694906e-05, - "loss": 0.3595, - "step": 4734 - }, - { - "epoch": 0.446056381149761, - "grad_norm": 0.7438100576400757, - "learning_rate": 1.7955886107203194e-05, - "loss": 0.3369, - "step": 4735 - }, - { - "epoch": 0.44615058524292883, - "grad_norm": 1.0333058834075928, - "learning_rate": 1.795497119130497e-05, - "loss": 0.3849, - "step": 4736 - }, - { - "epoch": 0.4462447893360967, - "grad_norm": 0.8227161169052124, - "learning_rate": 1.7954056094021105e-05, - "loss": 0.3973, - "step": 4737 - }, - { - "epoch": 0.4463389934292645, - "grad_norm": 0.7209968566894531, - "learning_rate": 1.7953140815372457e-05, - "loss": 0.3257, - "step": 4738 - }, - { - "epoch": 0.4464331975224323, - "grad_norm": 1.1025093793869019, - "learning_rate": 1.79522253553799e-05, - "loss": 0.3451, - "step": 4739 - }, - { - "epoch": 0.4465274016156002, - "grad_norm": 0.969059944152832, - "learning_rate": 1.795130971406431e-05, - "loss": 0.4156, - "step": 4740 - }, - { - "epoch": 0.446621605708768, - "grad_norm": 0.8435564041137695, - "learning_rate": 1.7950393891446555e-05, - "loss": 0.353, - "step": 4741 - }, - { - "epoch": 0.44671580980193587, - "grad_norm": 0.8512372970581055, - "learning_rate": 1.7949477887547528e-05, - "loss": 0.3407, - "step": 4742 - }, - { - "epoch": 0.4468100138951037, - "grad_norm": 0.7670828700065613, - "learning_rate": 1.794856170238811e-05, - "loss": 0.3653, - "step": 4743 - }, - { - "epoch": 0.44690421798827157, - "grad_norm": 0.6930330395698547, - "learning_rate": 1.7947645335989192e-05, - "loss": 0.3251, - "step": 4744 - }, - { - "epoch": 0.4469984220814394, - "grad_norm": 0.7446362972259521, - "learning_rate": 1.794672878837167e-05, - "loss": 0.3119, - "step": 4745 - }, - { - "epoch": 0.44709262617460727, - "grad_norm": 0.7399316430091858, - "learning_rate": 1.794581205955644e-05, - "loss": 0.3362, - "step": 4746 - }, - { - "epoch": 0.4471868302677751, - "grad_norm": 0.8266837000846863, - "learning_rate": 1.7944895149564407e-05, - "loss": 0.338, - "step": 4747 - }, - { - "epoch": 0.44728103436094296, - "grad_norm": 0.855751097202301, - "learning_rate": 1.7943978058416477e-05, - "loss": 0.3183, - "step": 4748 - }, - { - "epoch": 0.4473752384541108, - "grad_norm": 0.8574679493904114, - "learning_rate": 1.7943060786133567e-05, - "loss": 0.3526, - "step": 4749 - }, - { - "epoch": 0.44746944254727866, - "grad_norm": 0.7882106304168701, - "learning_rate": 1.794214333273658e-05, - "loss": 0.3346, - "step": 4750 - }, - { - "epoch": 0.4475636466404465, - "grad_norm": 0.7670796513557434, - "learning_rate": 1.7941225698246445e-05, - "loss": 0.3449, - "step": 4751 - }, - { - "epoch": 0.44765785073361436, - "grad_norm": 0.7950530052185059, - "learning_rate": 1.7940307882684084e-05, - "loss": 0.2871, - "step": 4752 - }, - { - "epoch": 0.4477520548267822, - "grad_norm": 0.7690070867538452, - "learning_rate": 1.7939389886070422e-05, - "loss": 0.3564, - "step": 4753 - }, - { - "epoch": 0.44784625891995006, - "grad_norm": 0.8102042078971863, - "learning_rate": 1.7938471708426392e-05, - "loss": 0.3656, - "step": 4754 - }, - { - "epoch": 0.4479404630131179, - "grad_norm": 0.803938627243042, - "learning_rate": 1.7937553349772932e-05, - "loss": 0.3311, - "step": 4755 - }, - { - "epoch": 0.44803466710628576, - "grad_norm": 0.730629026889801, - "learning_rate": 1.7936634810130974e-05, - "loss": 0.2843, - "step": 4756 - }, - { - "epoch": 0.4481288711994536, - "grad_norm": 0.8434792757034302, - "learning_rate": 1.7935716089521474e-05, - "loss": 0.3195, - "step": 4757 - }, - { - "epoch": 0.44822307529262145, - "grad_norm": 0.946655809879303, - "learning_rate": 1.7934797187965374e-05, - "loss": 0.4033, - "step": 4758 - }, - { - "epoch": 0.4483172793857893, - "grad_norm": 0.7012725472450256, - "learning_rate": 1.7933878105483623e-05, - "loss": 0.2827, - "step": 4759 - }, - { - "epoch": 0.44841148347895715, - "grad_norm": 0.7796767354011536, - "learning_rate": 1.7932958842097186e-05, - "loss": 0.367, - "step": 4760 - }, - { - "epoch": 0.448505687572125, - "grad_norm": 0.7940994501113892, - "learning_rate": 1.793203939782702e-05, - "loss": 0.335, - "step": 4761 - }, - { - "epoch": 0.44859989166529285, - "grad_norm": 0.7789350748062134, - "learning_rate": 1.793111977269408e-05, - "loss": 0.3073, - "step": 4762 - }, - { - "epoch": 0.4486940957584607, - "grad_norm": 0.7863308191299438, - "learning_rate": 1.793019996671935e-05, - "loss": 0.3153, - "step": 4763 - }, - { - "epoch": 0.44878829985162855, - "grad_norm": 0.7704945206642151, - "learning_rate": 1.7929279979923794e-05, - "loss": 0.3236, - "step": 4764 - }, - { - "epoch": 0.4488825039447964, - "grad_norm": 0.9169216752052307, - "learning_rate": 1.7928359812328392e-05, - "loss": 0.3433, - "step": 4765 - }, - { - "epoch": 0.44897670803796424, - "grad_norm": 0.8632755875587463, - "learning_rate": 1.7927439463954125e-05, - "loss": 0.316, - "step": 4766 - }, - { - "epoch": 0.4490709121311321, - "grad_norm": 0.7209241390228271, - "learning_rate": 1.792651893482198e-05, - "loss": 0.3052, - "step": 4767 - }, - { - "epoch": 0.44916511622429994, - "grad_norm": 0.7108308672904968, - "learning_rate": 1.7925598224952945e-05, - "loss": 0.2939, - "step": 4768 - }, - { - "epoch": 0.4492593203174678, - "grad_norm": 0.9988152980804443, - "learning_rate": 1.792467733436801e-05, - "loss": 0.3072, - "step": 4769 - }, - { - "epoch": 0.44935352441063564, - "grad_norm": 0.7612809538841248, - "learning_rate": 1.792375626308818e-05, - "loss": 0.368, - "step": 4770 - }, - { - "epoch": 0.4494477285038035, - "grad_norm": 0.6917704939842224, - "learning_rate": 1.792283501113445e-05, - "loss": 0.3016, - "step": 4771 - }, - { - "epoch": 0.44954193259697134, - "grad_norm": 0.8588003516197205, - "learning_rate": 1.7921913578527827e-05, - "loss": 0.3324, - "step": 4772 - }, - { - "epoch": 0.4496361366901392, - "grad_norm": 0.9099318385124207, - "learning_rate": 1.7920991965289327e-05, - "loss": 0.3872, - "step": 4773 - }, - { - "epoch": 0.44973034078330704, - "grad_norm": 0.9846810698509216, - "learning_rate": 1.7920070171439956e-05, - "loss": 0.3522, - "step": 4774 - }, - { - "epoch": 0.4498245448764749, - "grad_norm": 0.843734860420227, - "learning_rate": 1.7919148197000738e-05, - "loss": 0.37, - "step": 4775 - }, - { - "epoch": 0.44991874896964273, - "grad_norm": 0.7469123005867004, - "learning_rate": 1.7918226041992697e-05, - "loss": 0.2989, - "step": 4776 - }, - { - "epoch": 0.4500129530628106, - "grad_norm": 0.870191752910614, - "learning_rate": 1.7917303706436856e-05, - "loss": 0.3206, - "step": 4777 - }, - { - "epoch": 0.45010715715597843, - "grad_norm": 0.8193684816360474, - "learning_rate": 1.791638119035424e-05, - "loss": 0.3457, - "step": 4778 - }, - { - "epoch": 0.4502013612491463, - "grad_norm": 0.8482380509376526, - "learning_rate": 1.79154584937659e-05, - "loss": 0.38, - "step": 4779 - }, - { - "epoch": 0.45029556534231413, - "grad_norm": 0.8874141573905945, - "learning_rate": 1.7914535616692856e-05, - "loss": 0.4028, - "step": 4780 - }, - { - "epoch": 0.450389769435482, - "grad_norm": 0.8776803016662598, - "learning_rate": 1.7913612559156163e-05, - "loss": 0.4122, - "step": 4781 - }, - { - "epoch": 0.45048397352864983, - "grad_norm": 0.8267475962638855, - "learning_rate": 1.7912689321176867e-05, - "loss": 0.368, - "step": 4782 - }, - { - "epoch": 0.4505781776218177, - "grad_norm": 1.0508227348327637, - "learning_rate": 1.7911765902776014e-05, - "loss": 0.3739, - "step": 4783 - }, - { - "epoch": 0.4506723817149855, - "grad_norm": 0.7327133417129517, - "learning_rate": 1.7910842303974666e-05, - "loss": 0.3236, - "step": 4784 - }, - { - "epoch": 0.4507665858081534, - "grad_norm": 0.6878085732460022, - "learning_rate": 1.7909918524793878e-05, - "loss": 0.2869, - "step": 4785 - }, - { - "epoch": 0.4508607899013212, - "grad_norm": 2.2699522972106934, - "learning_rate": 1.7908994565254713e-05, - "loss": 0.3113, - "step": 4786 - }, - { - "epoch": 0.4509549939944891, - "grad_norm": 0.8151803016662598, - "learning_rate": 1.7908070425378242e-05, - "loss": 0.323, - "step": 4787 - }, - { - "epoch": 0.4510491980876569, - "grad_norm": 0.8966958522796631, - "learning_rate": 1.790714610518553e-05, - "loss": 0.4524, - "step": 4788 - }, - { - "epoch": 0.45114340218082477, - "grad_norm": 0.8446221947669983, - "learning_rate": 1.7906221604697664e-05, - "loss": 0.3257, - "step": 4789 - }, - { - "epoch": 0.4512376062739926, - "grad_norm": 0.9763929843902588, - "learning_rate": 1.7905296923935718e-05, - "loss": 0.3832, - "step": 4790 - }, - { - "epoch": 0.45133181036716047, - "grad_norm": 0.7935441136360168, - "learning_rate": 1.7904372062920776e-05, - "loss": 0.3746, - "step": 4791 - }, - { - "epoch": 0.4514260144603283, - "grad_norm": 0.7477930188179016, - "learning_rate": 1.7903447021673924e-05, - "loss": 0.3361, - "step": 4792 - }, - { - "epoch": 0.45152021855349617, - "grad_norm": 0.7294282913208008, - "learning_rate": 1.7902521800216258e-05, - "loss": 0.3036, - "step": 4793 - }, - { - "epoch": 0.451614422646664, - "grad_norm": 0.7948665618896484, - "learning_rate": 1.7901596398568877e-05, - "loss": 0.3452, - "step": 4794 - }, - { - "epoch": 0.45170862673983186, - "grad_norm": 0.7534676790237427, - "learning_rate": 1.7900670816752875e-05, - "loss": 0.3087, - "step": 4795 - }, - { - "epoch": 0.4518028308329997, - "grad_norm": 0.776970386505127, - "learning_rate": 1.7899745054789358e-05, - "loss": 0.3258, - "step": 4796 - }, - { - "epoch": 0.45189703492616756, - "grad_norm": 0.7340169548988342, - "learning_rate": 1.789881911269944e-05, - "loss": 0.3417, - "step": 4797 - }, - { - "epoch": 0.4519912390193354, - "grad_norm": 0.7311638593673706, - "learning_rate": 1.789789299050423e-05, - "loss": 0.336, - "step": 4798 - }, - { - "epoch": 0.45208544311250326, - "grad_norm": 0.8584436178207397, - "learning_rate": 1.7896966688224843e-05, - "loss": 0.3535, - "step": 4799 - }, - { - "epoch": 0.4521796472056711, - "grad_norm": 0.7118861675262451, - "learning_rate": 1.7896040205882405e-05, - "loss": 0.3351, - "step": 4800 - }, - { - "epoch": 0.45227385129883896, - "grad_norm": 0.7925348281860352, - "learning_rate": 1.789511354349804e-05, - "loss": 0.3641, - "step": 4801 - }, - { - "epoch": 0.4523680553920068, - "grad_norm": 0.7541933059692383, - "learning_rate": 1.7894186701092874e-05, - "loss": 0.3218, - "step": 4802 - }, - { - "epoch": 0.45246225948517466, - "grad_norm": 0.762876570224762, - "learning_rate": 1.789325967868804e-05, - "loss": 0.3001, - "step": 4803 - }, - { - "epoch": 0.4525564635783425, - "grad_norm": 0.7414087057113647, - "learning_rate": 1.7892332476304684e-05, - "loss": 0.3026, - "step": 4804 - }, - { - "epoch": 0.45265066767151035, - "grad_norm": 1.387542963027954, - "learning_rate": 1.789140509396394e-05, - "loss": 0.3305, - "step": 4805 - }, - { - "epoch": 0.4527448717646782, - "grad_norm": 0.8495650291442871, - "learning_rate": 1.7890477531686953e-05, - "loss": 0.3466, - "step": 4806 - }, - { - "epoch": 0.452839075857846, - "grad_norm": 0.8461723327636719, - "learning_rate": 1.7889549789494876e-05, - "loss": 0.354, - "step": 4807 - }, - { - "epoch": 0.45293327995101385, - "grad_norm": 0.7747946381568909, - "learning_rate": 1.7888621867408864e-05, - "loss": 0.3115, - "step": 4808 - }, - { - "epoch": 0.4530274840441817, - "grad_norm": 0.7994644045829773, - "learning_rate": 1.788769376545007e-05, - "loss": 0.3099, - "step": 4809 - }, - { - "epoch": 0.45312168813734954, - "grad_norm": 0.7755493521690369, - "learning_rate": 1.7886765483639666e-05, - "loss": 0.3465, - "step": 4810 - }, - { - "epoch": 0.4532158922305174, - "grad_norm": 0.784080445766449, - "learning_rate": 1.7885837021998806e-05, - "loss": 0.3391, - "step": 4811 - }, - { - "epoch": 0.45331009632368524, - "grad_norm": 0.9579580426216125, - "learning_rate": 1.7884908380548668e-05, - "loss": 0.3874, - "step": 4812 - }, - { - "epoch": 0.4534043004168531, - "grad_norm": 0.8352931141853333, - "learning_rate": 1.7883979559310427e-05, - "loss": 0.3628, - "step": 4813 - }, - { - "epoch": 0.45349850451002094, - "grad_norm": 0.8074339032173157, - "learning_rate": 1.7883050558305255e-05, - "loss": 0.3758, - "step": 4814 - }, - { - "epoch": 0.4535927086031888, - "grad_norm": 0.7530412077903748, - "learning_rate": 1.7882121377554342e-05, - "loss": 0.3245, - "step": 4815 - }, - { - "epoch": 0.45368691269635664, - "grad_norm": 0.7738369703292847, - "learning_rate": 1.788119201707887e-05, - "loss": 0.321, - "step": 4816 - }, - { - "epoch": 0.4537811167895245, - "grad_norm": 0.9809846878051758, - "learning_rate": 1.7880262476900032e-05, - "loss": 0.3473, - "step": 4817 - }, - { - "epoch": 0.45387532088269233, - "grad_norm": 0.7883719205856323, - "learning_rate": 1.787933275703902e-05, - "loss": 0.3199, - "step": 4818 - }, - { - "epoch": 0.4539695249758602, - "grad_norm": 0.8401819467544556, - "learning_rate": 1.7878402857517044e-05, - "loss": 0.384, - "step": 4819 - }, - { - "epoch": 0.45406372906902803, - "grad_norm": 0.7190160751342773, - "learning_rate": 1.7877472778355292e-05, - "loss": 0.3125, - "step": 4820 - }, - { - "epoch": 0.4541579331621959, - "grad_norm": 0.8060850501060486, - "learning_rate": 1.787654251957498e-05, - "loss": 0.3674, - "step": 4821 - }, - { - "epoch": 0.45425213725536373, - "grad_norm": 0.7536591291427612, - "learning_rate": 1.7875612081197315e-05, - "loss": 0.3106, - "step": 4822 - }, - { - "epoch": 0.4543463413485316, - "grad_norm": 0.8911111950874329, - "learning_rate": 1.7874681463243516e-05, - "loss": 0.3664, - "step": 4823 - }, - { - "epoch": 0.45444054544169943, - "grad_norm": 0.8450865745544434, - "learning_rate": 1.7873750665734803e-05, - "loss": 0.33, - "step": 4824 - }, - { - "epoch": 0.4545347495348673, - "grad_norm": 0.7691038846969604, - "learning_rate": 1.7872819688692394e-05, - "loss": 0.3602, - "step": 4825 - }, - { - "epoch": 0.4546289536280351, - "grad_norm": 0.9228634238243103, - "learning_rate": 1.7871888532137524e-05, - "loss": 0.3903, - "step": 4826 - }, - { - "epoch": 0.454723157721203, - "grad_norm": 0.7325657606124878, - "learning_rate": 1.7870957196091415e-05, - "loss": 0.2993, - "step": 4827 - }, - { - "epoch": 0.4548173618143708, - "grad_norm": 0.7746484875679016, - "learning_rate": 1.7870025680575315e-05, - "loss": 0.3491, - "step": 4828 - }, - { - "epoch": 0.4549115659075387, - "grad_norm": 0.8059161901473999, - "learning_rate": 1.7869093985610457e-05, - "loss": 0.3414, - "step": 4829 - }, - { - "epoch": 0.4550057700007065, - "grad_norm": 0.7639622688293457, - "learning_rate": 1.786816211121809e-05, - "loss": 0.3296, - "step": 4830 - }, - { - "epoch": 0.45509997409387437, - "grad_norm": 0.7499001622200012, - "learning_rate": 1.7867230057419455e-05, - "loss": 0.3494, - "step": 4831 - }, - { - "epoch": 0.4551941781870422, - "grad_norm": 0.8941529393196106, - "learning_rate": 1.7866297824235808e-05, - "loss": 0.3568, - "step": 4832 - }, - { - "epoch": 0.45528838228021007, - "grad_norm": 0.8757671117782593, - "learning_rate": 1.7865365411688405e-05, - "loss": 0.3385, - "step": 4833 - }, - { - "epoch": 0.4553825863733779, - "grad_norm": 0.9858109354972839, - "learning_rate": 1.7864432819798506e-05, - "loss": 0.33, - "step": 4834 - }, - { - "epoch": 0.45547679046654577, - "grad_norm": 0.778272271156311, - "learning_rate": 1.7863500048587378e-05, - "loss": 0.3484, - "step": 4835 - }, - { - "epoch": 0.4555709945597136, - "grad_norm": 0.7088613510131836, - "learning_rate": 1.7862567098076285e-05, - "loss": 0.3089, - "step": 4836 - }, - { - "epoch": 0.45566519865288146, - "grad_norm": 0.7945699095726013, - "learning_rate": 1.7861633968286502e-05, - "loss": 0.3196, - "step": 4837 - }, - { - "epoch": 0.4557594027460493, - "grad_norm": 0.9303908944129944, - "learning_rate": 1.786070065923931e-05, - "loss": 0.3346, - "step": 4838 - }, - { - "epoch": 0.45585360683921716, - "grad_norm": 0.6546573042869568, - "learning_rate": 1.7859767170955983e-05, - "loss": 0.3137, - "step": 4839 - }, - { - "epoch": 0.455947810932385, - "grad_norm": 0.7632937431335449, - "learning_rate": 1.785883350345781e-05, - "loss": 0.351, - "step": 4840 - }, - { - "epoch": 0.45604201502555286, - "grad_norm": 0.7434275150299072, - "learning_rate": 1.785789965676608e-05, - "loss": 0.3104, - "step": 4841 - }, - { - "epoch": 0.4561362191187207, - "grad_norm": 0.8117392063140869, - "learning_rate": 1.7856965630902085e-05, - "loss": 0.3675, - "step": 4842 - }, - { - "epoch": 0.45623042321188856, - "grad_norm": 0.8150480389595032, - "learning_rate": 1.7856031425887127e-05, - "loss": 0.3365, - "step": 4843 - }, - { - "epoch": 0.4563246273050564, - "grad_norm": 0.7850632667541504, - "learning_rate": 1.7855097041742495e-05, - "loss": 0.3739, - "step": 4844 - }, - { - "epoch": 0.45641883139822426, - "grad_norm": 0.7791478037834167, - "learning_rate": 1.7854162478489507e-05, - "loss": 0.3229, - "step": 4845 - }, - { - "epoch": 0.4565130354913921, - "grad_norm": 0.8242077827453613, - "learning_rate": 1.7853227736149468e-05, - "loss": 0.3356, - "step": 4846 - }, - { - "epoch": 0.45660723958455995, - "grad_norm": 0.777420163154602, - "learning_rate": 1.785229281474369e-05, - "loss": 0.3696, - "step": 4847 - }, - { - "epoch": 0.4567014436777278, - "grad_norm": 0.8795902729034424, - "learning_rate": 1.7851357714293488e-05, - "loss": 0.3829, - "step": 4848 - }, - { - "epoch": 0.45679564777089565, - "grad_norm": 0.9282544255256653, - "learning_rate": 1.7850422434820194e-05, - "loss": 0.4562, - "step": 4849 - }, - { - "epoch": 0.4568898518640635, - "grad_norm": 0.775903046131134, - "learning_rate": 1.7849486976345125e-05, - "loss": 0.3489, - "step": 4850 - }, - { - "epoch": 0.45698405595723135, - "grad_norm": 1.5967013835906982, - "learning_rate": 1.7848551338889615e-05, - "loss": 0.3815, - "step": 4851 - }, - { - "epoch": 0.4570782600503992, - "grad_norm": 0.7737221121788025, - "learning_rate": 1.7847615522474993e-05, - "loss": 0.2724, - "step": 4852 - }, - { - "epoch": 0.45717246414356705, - "grad_norm": 0.7455960512161255, - "learning_rate": 1.7846679527122605e-05, - "loss": 0.3158, - "step": 4853 - }, - { - "epoch": 0.4572666682367349, - "grad_norm": 0.8008338809013367, - "learning_rate": 1.784574335285379e-05, - "loss": 0.3704, - "step": 4854 - }, - { - "epoch": 0.45736087232990275, - "grad_norm": 0.8408627510070801, - "learning_rate": 1.7844806999689886e-05, - "loss": 0.331, - "step": 4855 - }, - { - "epoch": 0.4574550764230706, - "grad_norm": 0.8857258558273315, - "learning_rate": 1.7843870467652252e-05, - "loss": 0.3796, - "step": 4856 - }, - { - "epoch": 0.45754928051623844, - "grad_norm": 0.6914681196212769, - "learning_rate": 1.7842933756762246e-05, - "loss": 0.3093, - "step": 4857 - }, - { - "epoch": 0.4576434846094063, - "grad_norm": 0.6860592365264893, - "learning_rate": 1.7841996867041213e-05, - "loss": 0.2916, - "step": 4858 - }, - { - "epoch": 0.45773768870257414, - "grad_norm": 0.7824400663375854, - "learning_rate": 1.784105979851053e-05, - "loss": 0.3269, - "step": 4859 - }, - { - "epoch": 0.457831892795742, - "grad_norm": 0.7608857154846191, - "learning_rate": 1.7840122551191555e-05, - "loss": 0.3494, - "step": 4860 - }, - { - "epoch": 0.45792609688890984, - "grad_norm": 0.8246377110481262, - "learning_rate": 1.783918512510566e-05, - "loss": 0.3698, - "step": 4861 - }, - { - "epoch": 0.4580203009820777, - "grad_norm": 0.8004178404808044, - "learning_rate": 1.7838247520274223e-05, - "loss": 0.3597, - "step": 4862 - }, - { - "epoch": 0.45811450507524554, - "grad_norm": 1.0507524013519287, - "learning_rate": 1.7837309736718616e-05, - "loss": 0.3652, - "step": 4863 - }, - { - "epoch": 0.4582087091684134, - "grad_norm": 0.8246618509292603, - "learning_rate": 1.783637177446023e-05, - "loss": 0.3411, - "step": 4864 - }, - { - "epoch": 0.45830291326158124, - "grad_norm": 0.843111515045166, - "learning_rate": 1.783543363352045e-05, - "loss": 0.3652, - "step": 4865 - }, - { - "epoch": 0.4583971173547491, - "grad_norm": 0.9570274949073792, - "learning_rate": 1.7834495313920662e-05, - "loss": 0.3723, - "step": 4866 - }, - { - "epoch": 0.45849132144791693, - "grad_norm": 0.9009582996368408, - "learning_rate": 1.7833556815682263e-05, - "loss": 0.3898, - "step": 4867 - }, - { - "epoch": 0.4585855255410848, - "grad_norm": 0.7815099954605103, - "learning_rate": 1.783261813882666e-05, - "loss": 0.3088, - "step": 4868 - }, - { - "epoch": 0.45867972963425263, - "grad_norm": 0.7526468634605408, - "learning_rate": 1.7831679283375245e-05, - "loss": 0.2947, - "step": 4869 - }, - { - "epoch": 0.4587739337274205, - "grad_norm": 0.7108429074287415, - "learning_rate": 1.783074024934943e-05, - "loss": 0.3357, - "step": 4870 - }, - { - "epoch": 0.45886813782058833, - "grad_norm": 0.8212223052978516, - "learning_rate": 1.7829801036770628e-05, - "loss": 0.3615, - "step": 4871 - }, - { - "epoch": 0.4589623419137562, - "grad_norm": 0.9447070360183716, - "learning_rate": 1.7828861645660257e-05, - "loss": 0.3824, - "step": 4872 - }, - { - "epoch": 0.459056546006924, - "grad_norm": 0.7781452536582947, - "learning_rate": 1.782792207603973e-05, - "loss": 0.3244, - "step": 4873 - }, - { - "epoch": 0.4591507501000919, - "grad_norm": 0.8141428232192993, - "learning_rate": 1.7826982327930474e-05, - "loss": 0.3668, - "step": 4874 - }, - { - "epoch": 0.4592449541932597, - "grad_norm": 0.748768150806427, - "learning_rate": 1.7826042401353914e-05, - "loss": 0.3, - "step": 4875 - }, - { - "epoch": 0.4593391582864275, - "grad_norm": 0.7869177460670471, - "learning_rate": 1.7825102296331483e-05, - "loss": 0.3523, - "step": 4876 - }, - { - "epoch": 0.45943336237959537, - "grad_norm": 0.8053706884384155, - "learning_rate": 1.7824162012884625e-05, - "loss": 0.3656, - "step": 4877 - }, - { - "epoch": 0.4595275664727632, - "grad_norm": 0.7964096665382385, - "learning_rate": 1.7823221551034766e-05, - "loss": 0.3686, - "step": 4878 - }, - { - "epoch": 0.45962177056593106, - "grad_norm": 0.89954674243927, - "learning_rate": 1.782228091080336e-05, - "loss": 0.4377, - "step": 4879 - }, - { - "epoch": 0.4597159746590989, - "grad_norm": 0.7223961353302002, - "learning_rate": 1.7821340092211853e-05, - "loss": 0.3245, - "step": 4880 - }, - { - "epoch": 0.45981017875226676, - "grad_norm": 0.965713620185852, - "learning_rate": 1.782039909528169e-05, - "loss": 0.3642, - "step": 4881 - }, - { - "epoch": 0.4599043828454346, - "grad_norm": 1.8892385959625244, - "learning_rate": 1.781945792003434e-05, - "loss": 0.3652, - "step": 4882 - }, - { - "epoch": 0.45999858693860246, - "grad_norm": 0.7769013047218323, - "learning_rate": 1.7818516566491254e-05, - "loss": 0.347, - "step": 4883 - }, - { - "epoch": 0.4600927910317703, - "grad_norm": 0.9221687912940979, - "learning_rate": 1.78175750346739e-05, - "loss": 0.3585, - "step": 4884 - }, - { - "epoch": 0.46018699512493816, - "grad_norm": 0.772746205329895, - "learning_rate": 1.781663332460374e-05, - "loss": 0.3258, - "step": 4885 - }, - { - "epoch": 0.460281199218106, - "grad_norm": 0.8882587552070618, - "learning_rate": 1.7815691436302255e-05, - "loss": 0.341, - "step": 4886 - }, - { - "epoch": 0.46037540331127386, - "grad_norm": 1.0558087825775146, - "learning_rate": 1.781474936979092e-05, - "loss": 0.3485, - "step": 4887 - }, - { - "epoch": 0.4604696074044417, - "grad_norm": 0.8857042193412781, - "learning_rate": 1.7813807125091213e-05, - "loss": 0.3225, - "step": 4888 - }, - { - "epoch": 0.46056381149760955, - "grad_norm": 0.8105474710464478, - "learning_rate": 1.7812864702224616e-05, - "loss": 0.3585, - "step": 4889 - }, - { - "epoch": 0.4606580155907774, - "grad_norm": 0.9532637000083923, - "learning_rate": 1.7811922101212622e-05, - "loss": 0.2935, - "step": 4890 - }, - { - "epoch": 0.46075221968394525, - "grad_norm": 0.8896369934082031, - "learning_rate": 1.7810979322076725e-05, - "loss": 0.3823, - "step": 4891 - }, - { - "epoch": 0.4608464237771131, - "grad_norm": 0.86832195520401, - "learning_rate": 1.7810036364838418e-05, - "loss": 0.4171, - "step": 4892 - }, - { - "epoch": 0.46094062787028095, - "grad_norm": 1.2206555604934692, - "learning_rate": 1.7809093229519203e-05, - "loss": 0.3373, - "step": 4893 - }, - { - "epoch": 0.4610348319634488, - "grad_norm": 0.8856639266014099, - "learning_rate": 1.7808149916140584e-05, - "loss": 0.3417, - "step": 4894 - }, - { - "epoch": 0.46112903605661665, - "grad_norm": 0.7485201954841614, - "learning_rate": 1.7807206424724076e-05, - "loss": 0.333, - "step": 4895 - }, - { - "epoch": 0.4612232401497845, - "grad_norm": 2.2062063217163086, - "learning_rate": 1.780626275529118e-05, - "loss": 0.3084, - "step": 4896 - }, - { - "epoch": 0.46131744424295235, - "grad_norm": 0.7655016779899597, - "learning_rate": 1.7805318907863428e-05, - "loss": 0.3406, - "step": 4897 - }, - { - "epoch": 0.4614116483361202, - "grad_norm": 0.9309150576591492, - "learning_rate": 1.780437488246233e-05, - "loss": 0.4256, - "step": 4898 - }, - { - "epoch": 0.46150585242928804, - "grad_norm": 0.7266963124275208, - "learning_rate": 1.7803430679109416e-05, - "loss": 0.3245, - "step": 4899 - }, - { - "epoch": 0.4616000565224559, - "grad_norm": 0.7196773290634155, - "learning_rate": 1.7802486297826216e-05, - "loss": 0.3361, - "step": 4900 - }, - { - "epoch": 0.46169426061562374, - "grad_norm": 0.7565684914588928, - "learning_rate": 1.780154173863426e-05, - "loss": 0.3536, - "step": 4901 - }, - { - "epoch": 0.4617884647087916, - "grad_norm": 0.8094671368598938, - "learning_rate": 1.7800597001555087e-05, - "loss": 0.3419, - "step": 4902 - }, - { - "epoch": 0.46188266880195944, - "grad_norm": 0.7917925119400024, - "learning_rate": 1.779965208661024e-05, - "loss": 0.3173, - "step": 4903 - }, - { - "epoch": 0.4619768728951273, - "grad_norm": 0.7965604662895203, - "learning_rate": 1.779870699382126e-05, - "loss": 0.3294, - "step": 4904 - }, - { - "epoch": 0.46207107698829514, - "grad_norm": 0.86468106508255, - "learning_rate": 1.77977617232097e-05, - "loss": 0.2982, - "step": 4905 - }, - { - "epoch": 0.462165281081463, - "grad_norm": 0.8347722291946411, - "learning_rate": 1.7796816274797115e-05, - "loss": 0.3868, - "step": 4906 - }, - { - "epoch": 0.46225948517463084, - "grad_norm": 0.8633783459663391, - "learning_rate": 1.779587064860506e-05, - "loss": 0.3621, - "step": 4907 - }, - { - "epoch": 0.4623536892677987, - "grad_norm": 0.7564566135406494, - "learning_rate": 1.77949248446551e-05, - "loss": 0.3439, - "step": 4908 - }, - { - "epoch": 0.46244789336096653, - "grad_norm": 1.1352871656417847, - "learning_rate": 1.7793978862968794e-05, - "loss": 0.3524, - "step": 4909 - }, - { - "epoch": 0.4625420974541344, - "grad_norm": 0.7950761914253235, - "learning_rate": 1.779303270356772e-05, - "loss": 0.3256, - "step": 4910 - }, - { - "epoch": 0.46263630154730223, - "grad_norm": 0.7687955498695374, - "learning_rate": 1.7792086366473447e-05, - "loss": 0.3191, - "step": 4911 - }, - { - "epoch": 0.4627305056404701, - "grad_norm": 0.7171542644500732, - "learning_rate": 1.7791139851707552e-05, - "loss": 0.3123, - "step": 4912 - }, - { - "epoch": 0.46282470973363793, - "grad_norm": 0.8845537900924683, - "learning_rate": 1.7790193159291622e-05, - "loss": 0.3339, - "step": 4913 - }, - { - "epoch": 0.4629189138268058, - "grad_norm": 0.7290259599685669, - "learning_rate": 1.7789246289247238e-05, - "loss": 0.2956, - "step": 4914 - }, - { - "epoch": 0.4630131179199736, - "grad_norm": 1.083981990814209, - "learning_rate": 1.7788299241595997e-05, - "loss": 0.3068, - "step": 4915 - }, - { - "epoch": 0.4631073220131415, - "grad_norm": 0.6418946385383606, - "learning_rate": 1.7787352016359483e-05, - "loss": 0.2986, - "step": 4916 - }, - { - "epoch": 0.4632015261063093, - "grad_norm": 0.7161775231361389, - "learning_rate": 1.7786404613559304e-05, - "loss": 0.3123, - "step": 4917 - }, - { - "epoch": 0.4632957301994772, - "grad_norm": 0.9228521585464478, - "learning_rate": 1.7785457033217054e-05, - "loss": 0.3632, - "step": 4918 - }, - { - "epoch": 0.463389934292645, - "grad_norm": 0.8408086895942688, - "learning_rate": 1.7784509275354348e-05, - "loss": 0.3463, - "step": 4919 - }, - { - "epoch": 0.46348413838581287, - "grad_norm": 0.7921558022499084, - "learning_rate": 1.7783561339992784e-05, - "loss": 0.398, - "step": 4920 - }, - { - "epoch": 0.4635783424789807, - "grad_norm": 0.8069859743118286, - "learning_rate": 1.778261322715399e-05, - "loss": 0.3098, - "step": 4921 - }, - { - "epoch": 0.46367254657214857, - "grad_norm": 1.0308336019515991, - "learning_rate": 1.7781664936859576e-05, - "loss": 0.3692, - "step": 4922 - }, - { - "epoch": 0.4637667506653164, - "grad_norm": 0.7749543190002441, - "learning_rate": 1.7780716469131167e-05, - "loss": 0.3501, - "step": 4923 - }, - { - "epoch": 0.46386095475848427, - "grad_norm": 0.8365309834480286, - "learning_rate": 1.7779767823990394e-05, - "loss": 0.3125, - "step": 4924 - }, - { - "epoch": 0.4639551588516521, - "grad_norm": 0.8372287750244141, - "learning_rate": 1.7778819001458877e-05, - "loss": 0.3244, - "step": 4925 - }, - { - "epoch": 0.46404936294481997, - "grad_norm": 0.8181421160697937, - "learning_rate": 1.777787000155826e-05, - "loss": 0.3302, - "step": 4926 - }, - { - "epoch": 0.4641435670379878, - "grad_norm": 0.8703104853630066, - "learning_rate": 1.7776920824310174e-05, - "loss": 0.3419, - "step": 4927 - }, - { - "epoch": 0.46423777113115566, - "grad_norm": 0.8228803277015686, - "learning_rate": 1.777597146973627e-05, - "loss": 0.3443, - "step": 4928 - }, - { - "epoch": 0.4643319752243235, - "grad_norm": 0.7369617819786072, - "learning_rate": 1.777502193785819e-05, - "loss": 0.3136, - "step": 4929 - }, - { - "epoch": 0.46442617931749136, - "grad_norm": 0.8577859997749329, - "learning_rate": 1.7774072228697584e-05, - "loss": 0.3719, - "step": 4930 - }, - { - "epoch": 0.4645203834106592, - "grad_norm": 0.8309158086776733, - "learning_rate": 1.777312234227611e-05, - "loss": 0.3265, - "step": 4931 - }, - { - "epoch": 0.46461458750382706, - "grad_norm": 0.8043569326400757, - "learning_rate": 1.7772172278615424e-05, - "loss": 0.3451, - "step": 4932 - }, - { - "epoch": 0.4647087915969949, - "grad_norm": 0.7470012307167053, - "learning_rate": 1.7771222037737192e-05, - "loss": 0.3274, - "step": 4933 - }, - { - "epoch": 0.46480299569016276, - "grad_norm": 0.725937008857727, - "learning_rate": 1.777027161966308e-05, - "loss": 0.3113, - "step": 4934 - }, - { - "epoch": 0.4648971997833306, - "grad_norm": 0.8681197762489319, - "learning_rate": 1.7769321024414753e-05, - "loss": 0.3397, - "step": 4935 - }, - { - "epoch": 0.46499140387649845, - "grad_norm": 0.8625389933586121, - "learning_rate": 1.7768370252013897e-05, - "loss": 0.3492, - "step": 4936 - }, - { - "epoch": 0.4650856079696663, - "grad_norm": 0.7602217197418213, - "learning_rate": 1.776741930248218e-05, - "loss": 0.3682, - "step": 4937 - }, - { - "epoch": 0.46517981206283415, - "grad_norm": 0.9192726016044617, - "learning_rate": 1.7766468175841295e-05, - "loss": 0.3581, - "step": 4938 - }, - { - "epoch": 0.465274016156002, - "grad_norm": 0.8578839898109436, - "learning_rate": 1.776551687211292e-05, - "loss": 0.3345, - "step": 4939 - }, - { - "epoch": 0.46536822024916985, - "grad_norm": 1.0106184482574463, - "learning_rate": 1.7764565391318753e-05, - "loss": 0.3459, - "step": 4940 - }, - { - "epoch": 0.4654624243423377, - "grad_norm": 0.7562296390533447, - "learning_rate": 1.7763613733480486e-05, - "loss": 0.3194, - "step": 4941 - }, - { - "epoch": 0.46555662843550555, - "grad_norm": 0.8383955359458923, - "learning_rate": 1.776266189861982e-05, - "loss": 0.3424, - "step": 4942 - }, - { - "epoch": 0.4656508325286734, - "grad_norm": 0.800328254699707, - "learning_rate": 1.7761709886758458e-05, - "loss": 0.3477, - "step": 4943 - }, - { - "epoch": 0.46574503662184125, - "grad_norm": 0.8032830357551575, - "learning_rate": 1.776075769791811e-05, - "loss": 0.336, - "step": 4944 - }, - { - "epoch": 0.46583924071500904, - "grad_norm": 0.7230134010314941, - "learning_rate": 1.775980533212048e-05, - "loss": 0.339, - "step": 4945 - }, - { - "epoch": 0.4659334448081769, - "grad_norm": 0.7199127674102783, - "learning_rate": 1.775885278938729e-05, - "loss": 0.2564, - "step": 4946 - }, - { - "epoch": 0.46602764890134474, - "grad_norm": 0.8952180743217468, - "learning_rate": 1.7757900069740253e-05, - "loss": 0.3629, - "step": 4947 - }, - { - "epoch": 0.4661218529945126, - "grad_norm": 0.8440223336219788, - "learning_rate": 1.77569471732011e-05, - "loss": 0.3758, - "step": 4948 - }, - { - "epoch": 0.46621605708768044, - "grad_norm": 0.8343638777732849, - "learning_rate": 1.7755994099791555e-05, - "loss": 0.341, - "step": 4949 - }, - { - "epoch": 0.4663102611808483, - "grad_norm": 0.8626476526260376, - "learning_rate": 1.775504084953335e-05, - "loss": 0.3534, - "step": 4950 - }, - { - "epoch": 0.46640446527401613, - "grad_norm": 0.7678931951522827, - "learning_rate": 1.7754087422448217e-05, - "loss": 0.3387, - "step": 4951 - }, - { - "epoch": 0.466498669367184, - "grad_norm": 0.6777465343475342, - "learning_rate": 1.77531338185579e-05, - "loss": 0.2817, - "step": 4952 - }, - { - "epoch": 0.46659287346035183, - "grad_norm": 0.8011043071746826, - "learning_rate": 1.7752180037884143e-05, - "loss": 0.3121, - "step": 4953 - }, - { - "epoch": 0.4666870775535197, - "grad_norm": 0.8017278909683228, - "learning_rate": 1.7751226080448694e-05, - "loss": 0.3342, - "step": 4954 - }, - { - "epoch": 0.46678128164668753, - "grad_norm": 0.805519700050354, - "learning_rate": 1.77502719462733e-05, - "loss": 0.3789, - "step": 4955 - }, - { - "epoch": 0.4668754857398554, - "grad_norm": 0.8002102375030518, - "learning_rate": 1.7749317635379718e-05, - "loss": 0.3464, - "step": 4956 - }, - { - "epoch": 0.4669696898330232, - "grad_norm": 0.8926291465759277, - "learning_rate": 1.7748363147789712e-05, - "loss": 0.3619, - "step": 4957 - }, - { - "epoch": 0.4670638939261911, - "grad_norm": 0.7907378673553467, - "learning_rate": 1.7747408483525045e-05, - "loss": 0.3027, - "step": 4958 - }, - { - "epoch": 0.4671580980193589, - "grad_norm": 0.8785030245780945, - "learning_rate": 1.774645364260748e-05, - "loss": 0.3662, - "step": 4959 - }, - { - "epoch": 0.4672523021125268, - "grad_norm": 1.0228649377822876, - "learning_rate": 1.774549862505879e-05, - "loss": 0.367, - "step": 4960 - }, - { - "epoch": 0.4673465062056946, - "grad_norm": 0.7833585739135742, - "learning_rate": 1.7744543430900755e-05, - "loss": 0.35, - "step": 4961 - }, - { - "epoch": 0.46744071029886247, - "grad_norm": 0.7718712091445923, - "learning_rate": 1.7743588060155153e-05, - "loss": 0.3274, - "step": 4962 - }, - { - "epoch": 0.4675349143920303, - "grad_norm": 0.7573497891426086, - "learning_rate": 1.7742632512843768e-05, - "loss": 0.3189, - "step": 4963 - }, - { - "epoch": 0.46762911848519817, - "grad_norm": 0.857114851474762, - "learning_rate": 1.7741676788988386e-05, - "loss": 0.3441, - "step": 4964 - }, - { - "epoch": 0.467723322578366, - "grad_norm": 0.7921775579452515, - "learning_rate": 1.7740720888610802e-05, - "loss": 0.361, - "step": 4965 - }, - { - "epoch": 0.46781752667153387, - "grad_norm": 0.6085286140441895, - "learning_rate": 1.773976481173281e-05, - "loss": 0.2574, - "step": 4966 - }, - { - "epoch": 0.4679117307647017, - "grad_norm": 0.7605984807014465, - "learning_rate": 1.773880855837621e-05, - "loss": 0.3201, - "step": 4967 - }, - { - "epoch": 0.46800593485786957, - "grad_norm": 0.8221992254257202, - "learning_rate": 1.7737852128562807e-05, - "loss": 0.3248, - "step": 4968 - }, - { - "epoch": 0.4681001389510374, - "grad_norm": 0.7544741034507751, - "learning_rate": 1.773689552231441e-05, - "loss": 0.3248, - "step": 4969 - }, - { - "epoch": 0.46819434304420526, - "grad_norm": 1.3404844999313354, - "learning_rate": 1.7735938739652827e-05, - "loss": 0.3196, - "step": 4970 - }, - { - "epoch": 0.4682885471373731, - "grad_norm": 0.8288660049438477, - "learning_rate": 1.773498178059988e-05, - "loss": 0.3616, - "step": 4971 - }, - { - "epoch": 0.46838275123054096, - "grad_norm": 1.1371781826019287, - "learning_rate": 1.7734024645177382e-05, - "loss": 0.372, - "step": 4972 - }, - { - "epoch": 0.4684769553237088, - "grad_norm": 0.8881229162216187, - "learning_rate": 1.773306733340716e-05, - "loss": 0.372, - "step": 4973 - }, - { - "epoch": 0.46857115941687666, - "grad_norm": 0.775094747543335, - "learning_rate": 1.773210984531105e-05, - "loss": 0.3763, - "step": 4974 - }, - { - "epoch": 0.4686653635100445, - "grad_norm": 0.8069360256195068, - "learning_rate": 1.773115218091087e-05, - "loss": 0.318, - "step": 4975 - }, - { - "epoch": 0.46875956760321236, - "grad_norm": 0.8223565816879272, - "learning_rate": 1.773019434022847e-05, - "loss": 0.329, - "step": 4976 - }, - { - "epoch": 0.4688537716963802, - "grad_norm": 0.7707805037498474, - "learning_rate": 1.7729236323285684e-05, - "loss": 0.2907, - "step": 4977 - }, - { - "epoch": 0.46894797578954805, - "grad_norm": 0.8500064611434937, - "learning_rate": 1.7728278130104356e-05, - "loss": 0.3522, - "step": 4978 - }, - { - "epoch": 0.4690421798827159, - "grad_norm": 0.8865066766738892, - "learning_rate": 1.772731976070633e-05, - "loss": 0.3546, - "step": 4979 - }, - { - "epoch": 0.46913638397588375, - "grad_norm": 0.7410879135131836, - "learning_rate": 1.7726361215113467e-05, - "loss": 0.3377, - "step": 4980 - }, - { - "epoch": 0.4692305880690516, - "grad_norm": 0.8143529891967773, - "learning_rate": 1.7725402493347618e-05, - "loss": 0.3295, - "step": 4981 - }, - { - "epoch": 0.46932479216221945, - "grad_norm": 0.8637810945510864, - "learning_rate": 1.7724443595430643e-05, - "loss": 0.3686, - "step": 4982 - }, - { - "epoch": 0.4694189962553873, - "grad_norm": 0.772445797920227, - "learning_rate": 1.7723484521384412e-05, - "loss": 0.3033, - "step": 4983 - }, - { - "epoch": 0.46951320034855515, - "grad_norm": 0.9185500741004944, - "learning_rate": 1.772252527123079e-05, - "loss": 0.3474, - "step": 4984 - }, - { - "epoch": 0.469607404441723, - "grad_norm": 0.8710028529167175, - "learning_rate": 1.7721565844991643e-05, - "loss": 0.3813, - "step": 4985 - }, - { - "epoch": 0.46970160853489085, - "grad_norm": 0.7688071131706238, - "learning_rate": 1.772060624268886e-05, - "loss": 0.3218, - "step": 4986 - }, - { - "epoch": 0.4697958126280587, - "grad_norm": 0.7208905816078186, - "learning_rate": 1.7719646464344307e-05, - "loss": 0.3245, - "step": 4987 - }, - { - "epoch": 0.46989001672122654, - "grad_norm": 0.788725733757019, - "learning_rate": 1.771868650997988e-05, - "loss": 0.3381, - "step": 4988 - }, - { - "epoch": 0.4699842208143944, - "grad_norm": 0.8555259704589844, - "learning_rate": 1.7717726379617462e-05, - "loss": 0.3384, - "step": 4989 - }, - { - "epoch": 0.47007842490756224, - "grad_norm": 0.8116970658302307, - "learning_rate": 1.771676607327895e-05, - "loss": 0.3377, - "step": 4990 - }, - { - "epoch": 0.4701726290007301, - "grad_norm": 0.817533552646637, - "learning_rate": 1.7715805590986232e-05, - "loss": 0.3524, - "step": 4991 - }, - { - "epoch": 0.47026683309389794, - "grad_norm": 1.2434886693954468, - "learning_rate": 1.771484493276122e-05, - "loss": 0.3988, - "step": 4992 - }, - { - "epoch": 0.4703610371870658, - "grad_norm": 0.8891094923019409, - "learning_rate": 1.771388409862581e-05, - "loss": 0.3913, - "step": 4993 - }, - { - "epoch": 0.47045524128023364, - "grad_norm": 0.7836058139801025, - "learning_rate": 1.771292308860191e-05, - "loss": 0.3209, - "step": 4994 - }, - { - "epoch": 0.4705494453734015, - "grad_norm": 0.7556778192520142, - "learning_rate": 1.7711961902711437e-05, - "loss": 0.323, - "step": 4995 - }, - { - "epoch": 0.47064364946656934, - "grad_norm": 1.004414439201355, - "learning_rate": 1.7711000540976305e-05, - "loss": 0.3219, - "step": 4996 - }, - { - "epoch": 0.4707378535597372, - "grad_norm": 0.7572587132453918, - "learning_rate": 1.7710039003418437e-05, - "loss": 0.3538, - "step": 4997 - }, - { - "epoch": 0.47083205765290503, - "grad_norm": 0.7744436264038086, - "learning_rate": 1.7709077290059755e-05, - "loss": 0.2975, - "step": 4998 - }, - { - "epoch": 0.4709262617460729, - "grad_norm": 0.7824291586875916, - "learning_rate": 1.7708115400922187e-05, - "loss": 0.3389, - "step": 4999 - }, - { - "epoch": 0.47102046583924073, - "grad_norm": 0.7729814648628235, - "learning_rate": 1.770715333602767e-05, - "loss": 0.3669, - "step": 5000 - }, - { - "epoch": 0.4711146699324086, - "grad_norm": 0.8225262761116028, - "learning_rate": 1.7706191095398138e-05, - "loss": 0.3689, - "step": 5001 - }, - { - "epoch": 0.47120887402557643, - "grad_norm": 0.7559041380882263, - "learning_rate": 1.770522867905553e-05, - "loss": 0.3428, - "step": 5002 - }, - { - "epoch": 0.4713030781187443, - "grad_norm": 0.7640758752822876, - "learning_rate": 1.770426608702179e-05, - "loss": 0.3299, - "step": 5003 - }, - { - "epoch": 0.4713972822119121, - "grad_norm": 0.747041642665863, - "learning_rate": 1.770330331931887e-05, - "loss": 0.3372, - "step": 5004 - }, - { - "epoch": 0.47149148630508, - "grad_norm": 1.1163392066955566, - "learning_rate": 1.7702340375968724e-05, - "loss": 0.3193, - "step": 5005 - }, - { - "epoch": 0.4715856903982478, - "grad_norm": 0.6798670887947083, - "learning_rate": 1.7701377256993296e-05, - "loss": 0.3568, - "step": 5006 - }, - { - "epoch": 0.4716798944914157, - "grad_norm": 0.778786838054657, - "learning_rate": 1.7700413962414565e-05, - "loss": 0.3651, - "step": 5007 - }, - { - "epoch": 0.4717740985845835, - "grad_norm": 0.8317043781280518, - "learning_rate": 1.7699450492254484e-05, - "loss": 0.3241, - "step": 5008 - }, - { - "epoch": 0.4718683026777514, - "grad_norm": 0.7771879434585571, - "learning_rate": 1.7698486846535026e-05, - "loss": 0.3363, - "step": 5009 - }, - { - "epoch": 0.4719625067709192, - "grad_norm": 1.0484275817871094, - "learning_rate": 1.7697523025278158e-05, - "loss": 0.4066, - "step": 5010 - }, - { - "epoch": 0.47205671086408707, - "grad_norm": 0.7146066427230835, - "learning_rate": 1.7696559028505864e-05, - "loss": 0.3468, - "step": 5011 - }, - { - "epoch": 0.4721509149572549, - "grad_norm": 0.9016075134277344, - "learning_rate": 1.7695594856240118e-05, - "loss": 0.3458, - "step": 5012 - }, - { - "epoch": 0.47224511905042277, - "grad_norm": 0.8408075571060181, - "learning_rate": 1.769463050850291e-05, - "loss": 0.3434, - "step": 5013 - }, - { - "epoch": 0.47233932314359056, - "grad_norm": 0.8501318693161011, - "learning_rate": 1.769366598531623e-05, - "loss": 0.372, - "step": 5014 - }, - { - "epoch": 0.4724335272367584, - "grad_norm": 0.7973361611366272, - "learning_rate": 1.7692701286702062e-05, - "loss": 0.3209, - "step": 5015 - }, - { - "epoch": 0.47252773132992626, - "grad_norm": 0.8058852553367615, - "learning_rate": 1.7691736412682408e-05, - "loss": 0.3414, - "step": 5016 - }, - { - "epoch": 0.4726219354230941, - "grad_norm": 0.9156191349029541, - "learning_rate": 1.7690771363279272e-05, - "loss": 0.3019, - "step": 5017 - }, - { - "epoch": 0.47271613951626196, - "grad_norm": 0.8219974040985107, - "learning_rate": 1.768980613851465e-05, - "loss": 0.3451, - "step": 5018 - }, - { - "epoch": 0.4728103436094298, - "grad_norm": 0.805870532989502, - "learning_rate": 1.768884073841056e-05, - "loss": 0.2751, - "step": 5019 - }, - { - "epoch": 0.47290454770259766, - "grad_norm": 0.8654406666755676, - "learning_rate": 1.768787516298901e-05, - "loss": 0.3512, - "step": 5020 - }, - { - "epoch": 0.4729987517957655, - "grad_norm": 0.7221662998199463, - "learning_rate": 1.768690941227201e-05, - "loss": 0.3044, - "step": 5021 - }, - { - "epoch": 0.47309295588893335, - "grad_norm": 0.8214747309684753, - "learning_rate": 1.7685943486281595e-05, - "loss": 0.323, - "step": 5022 - }, - { - "epoch": 0.4731871599821012, - "grad_norm": 0.8377925753593445, - "learning_rate": 1.7684977385039776e-05, - "loss": 0.3466, - "step": 5023 - }, - { - "epoch": 0.47328136407526905, - "grad_norm": 0.9490770101547241, - "learning_rate": 1.7684011108568593e-05, - "loss": 0.378, - "step": 5024 - }, - { - "epoch": 0.4733755681684369, - "grad_norm": 0.7713491320610046, - "learning_rate": 1.768304465689007e-05, - "loss": 0.3266, - "step": 5025 - }, - { - "epoch": 0.47346977226160475, - "grad_norm": 0.8269317150115967, - "learning_rate": 1.7682078030026245e-05, - "loss": 0.374, - "step": 5026 - }, - { - "epoch": 0.4735639763547726, - "grad_norm": 0.7009604573249817, - "learning_rate": 1.7681111227999164e-05, - "loss": 0.2888, - "step": 5027 - }, - { - "epoch": 0.47365818044794045, - "grad_norm": 0.8689761161804199, - "learning_rate": 1.7680144250830868e-05, - "loss": 0.3569, - "step": 5028 - }, - { - "epoch": 0.4737523845411083, - "grad_norm": 0.7176832556724548, - "learning_rate": 1.7679177098543405e-05, - "loss": 0.3106, - "step": 5029 - }, - { - "epoch": 0.47384658863427614, - "grad_norm": 0.9018487334251404, - "learning_rate": 1.7678209771158827e-05, - "loss": 0.3323, - "step": 5030 - }, - { - "epoch": 0.473940792727444, - "grad_norm": 0.7709859609603882, - "learning_rate": 1.7677242268699192e-05, - "loss": 0.3564, - "step": 5031 - }, - { - "epoch": 0.47403499682061184, - "grad_norm": 0.7048654556274414, - "learning_rate": 1.7676274591186562e-05, - "loss": 0.3244, - "step": 5032 - }, - { - "epoch": 0.4741292009137797, - "grad_norm": 0.7146427631378174, - "learning_rate": 1.7675306738642996e-05, - "loss": 0.3219, - "step": 5033 - }, - { - "epoch": 0.47422340500694754, - "grad_norm": 0.7720121145248413, - "learning_rate": 1.7674338711090568e-05, - "loss": 0.3171, - "step": 5034 - }, - { - "epoch": 0.4743176091001154, - "grad_norm": 0.862352192401886, - "learning_rate": 1.767337050855135e-05, - "loss": 0.3547, - "step": 5035 - }, - { - "epoch": 0.47441181319328324, - "grad_norm": 0.8553547263145447, - "learning_rate": 1.7672402131047414e-05, - "loss": 0.3598, - "step": 5036 - }, - { - "epoch": 0.4745060172864511, - "grad_norm": 0.7929760217666626, - "learning_rate": 1.7671433578600846e-05, - "loss": 0.3262, - "step": 5037 - }, - { - "epoch": 0.47460022137961894, - "grad_norm": 0.699970543384552, - "learning_rate": 1.767046485123373e-05, - "loss": 0.3004, - "step": 5038 - }, - { - "epoch": 0.4746944254727868, - "grad_norm": 0.7730126976966858, - "learning_rate": 1.7669495948968152e-05, - "loss": 0.3368, - "step": 5039 - }, - { - "epoch": 0.47478862956595463, - "grad_norm": 0.8011327385902405, - "learning_rate": 1.7668526871826204e-05, - "loss": 0.3613, - "step": 5040 - }, - { - "epoch": 0.4748828336591225, - "grad_norm": 0.7555213570594788, - "learning_rate": 1.7667557619829985e-05, - "loss": 0.328, - "step": 5041 - }, - { - "epoch": 0.47497703775229033, - "grad_norm": 0.7480307221412659, - "learning_rate": 1.7666588193001595e-05, - "loss": 0.337, - "step": 5042 - }, - { - "epoch": 0.4750712418454582, - "grad_norm": 0.750718891620636, - "learning_rate": 1.7665618591363135e-05, - "loss": 0.3007, - "step": 5043 - }, - { - "epoch": 0.47516544593862603, - "grad_norm": 0.8020790815353394, - "learning_rate": 1.7664648814936716e-05, - "loss": 0.3826, - "step": 5044 - }, - { - "epoch": 0.4752596500317939, - "grad_norm": 0.8922072052955627, - "learning_rate": 1.7663678863744455e-05, - "loss": 0.442, - "step": 5045 - }, - { - "epoch": 0.4753538541249617, - "grad_norm": 0.856925904750824, - "learning_rate": 1.7662708737808457e-05, - "loss": 0.3588, - "step": 5046 - }, - { - "epoch": 0.4754480582181296, - "grad_norm": 0.6952528953552246, - "learning_rate": 1.7661738437150853e-05, - "loss": 0.2948, - "step": 5047 - }, - { - "epoch": 0.4755422623112974, - "grad_norm": 0.8187353610992432, - "learning_rate": 1.7660767961793764e-05, - "loss": 0.3601, - "step": 5048 - }, - { - "epoch": 0.4756364664044653, - "grad_norm": 0.8927239179611206, - "learning_rate": 1.7659797311759314e-05, - "loss": 0.3659, - "step": 5049 - }, - { - "epoch": 0.4757306704976331, - "grad_norm": 0.7681635618209839, - "learning_rate": 1.7658826487069642e-05, - "loss": 0.3202, - "step": 5050 - }, - { - "epoch": 0.475824874590801, - "grad_norm": 0.7328925132751465, - "learning_rate": 1.7657855487746883e-05, - "loss": 0.3278, - "step": 5051 - }, - { - "epoch": 0.4759190786839688, - "grad_norm": 0.8765425682067871, - "learning_rate": 1.7656884313813174e-05, - "loss": 0.3556, - "step": 5052 - }, - { - "epoch": 0.47601328277713667, - "grad_norm": 0.7214697003364563, - "learning_rate": 1.765591296529066e-05, - "loss": 0.3369, - "step": 5053 - }, - { - "epoch": 0.4761074868703045, - "grad_norm": 0.7442885041236877, - "learning_rate": 1.7654941442201493e-05, - "loss": 0.3216, - "step": 5054 - }, - { - "epoch": 0.47620169096347237, - "grad_norm": 0.8426844477653503, - "learning_rate": 1.765396974456782e-05, - "loss": 0.3394, - "step": 5055 - }, - { - "epoch": 0.4762958950566402, - "grad_norm": 0.7568658590316772, - "learning_rate": 1.76529978724118e-05, - "loss": 0.3103, - "step": 5056 - }, - { - "epoch": 0.47639009914980807, - "grad_norm": 0.7060257792472839, - "learning_rate": 1.7652025825755593e-05, - "loss": 0.299, - "step": 5057 - }, - { - "epoch": 0.4764843032429759, - "grad_norm": 0.8324049711227417, - "learning_rate": 1.7651053604621367e-05, - "loss": 0.3264, - "step": 5058 - }, - { - "epoch": 0.47657850733614376, - "grad_norm": 0.6237125992774963, - "learning_rate": 1.7650081209031282e-05, - "loss": 0.2504, - "step": 5059 - }, - { - "epoch": 0.4766727114293116, - "grad_norm": 0.8777263760566711, - "learning_rate": 1.7649108639007516e-05, - "loss": 0.3544, - "step": 5060 - }, - { - "epoch": 0.47676691552247946, - "grad_norm": 0.7348818778991699, - "learning_rate": 1.764813589457224e-05, - "loss": 0.2901, - "step": 5061 - }, - { - "epoch": 0.4768611196156473, - "grad_norm": 0.7848557829856873, - "learning_rate": 1.7647162975747646e-05, - "loss": 0.3155, - "step": 5062 - }, - { - "epoch": 0.47695532370881516, - "grad_norm": 0.7364405393600464, - "learning_rate": 1.76461898825559e-05, - "loss": 0.3081, - "step": 5063 - }, - { - "epoch": 0.477049527801983, - "grad_norm": 0.8653562068939209, - "learning_rate": 1.7645216615019204e-05, - "loss": 0.3288, - "step": 5064 - }, - { - "epoch": 0.47714373189515086, - "grad_norm": 0.9205647110939026, - "learning_rate": 1.7644243173159746e-05, - "loss": 0.3057, - "step": 5065 - }, - { - "epoch": 0.4772379359883187, - "grad_norm": 0.8097381591796875, - "learning_rate": 1.764326955699972e-05, - "loss": 0.3551, - "step": 5066 - }, - { - "epoch": 0.47733214008148656, - "grad_norm": 0.8962020874023438, - "learning_rate": 1.764229576656133e-05, - "loss": 0.39, - "step": 5067 - }, - { - "epoch": 0.4774263441746544, - "grad_norm": 0.901494562625885, - "learning_rate": 1.7641321801866776e-05, - "loss": 0.2975, - "step": 5068 - }, - { - "epoch": 0.47752054826782225, - "grad_norm": 0.64666748046875, - "learning_rate": 1.7640347662938268e-05, - "loss": 0.3007, - "step": 5069 - }, - { - "epoch": 0.4776147523609901, - "grad_norm": 0.6876158714294434, - "learning_rate": 1.7639373349798016e-05, - "loss": 0.28, - "step": 5070 - }, - { - "epoch": 0.47770895645415795, - "grad_norm": 0.992737889289856, - "learning_rate": 1.7638398862468237e-05, - "loss": 0.2968, - "step": 5071 - }, - { - "epoch": 0.4778031605473258, - "grad_norm": 0.7393843531608582, - "learning_rate": 1.763742420097115e-05, - "loss": 0.347, - "step": 5072 - }, - { - "epoch": 0.47789736464049365, - "grad_norm": 0.7149779796600342, - "learning_rate": 1.7636449365328983e-05, - "loss": 0.3087, - "step": 5073 - }, - { - "epoch": 0.4779915687336615, - "grad_norm": 0.7476661205291748, - "learning_rate": 1.763547435556396e-05, - "loss": 0.2981, - "step": 5074 - }, - { - "epoch": 0.47808577282682935, - "grad_norm": 0.8229698538780212, - "learning_rate": 1.763449917169831e-05, - "loss": 0.3592, - "step": 5075 - }, - { - "epoch": 0.4781799769199972, - "grad_norm": 0.7402316927909851, - "learning_rate": 1.7633523813754276e-05, - "loss": 0.3137, - "step": 5076 - }, - { - "epoch": 0.47827418101316505, - "grad_norm": 0.876528799533844, - "learning_rate": 1.763254828175409e-05, - "loss": 0.4053, - "step": 5077 - }, - { - "epoch": 0.4783683851063329, - "grad_norm": 0.8130778670310974, - "learning_rate": 1.763157257572e-05, - "loss": 0.362, - "step": 5078 - }, - { - "epoch": 0.47846258919950074, - "grad_norm": 0.6440185308456421, - "learning_rate": 1.7630596695674254e-05, - "loss": 0.2755, - "step": 5079 - }, - { - "epoch": 0.4785567932926686, - "grad_norm": 0.7436035871505737, - "learning_rate": 1.7629620641639102e-05, - "loss": 0.2935, - "step": 5080 - }, - { - "epoch": 0.47865099738583644, - "grad_norm": 0.7836533188819885, - "learning_rate": 1.76286444136368e-05, - "loss": 0.3261, - "step": 5081 - }, - { - "epoch": 0.4787452014790043, - "grad_norm": 0.7561307549476624, - "learning_rate": 1.7627668011689607e-05, - "loss": 0.3232, - "step": 5082 - }, - { - "epoch": 0.4788394055721721, - "grad_norm": 0.8446324467658997, - "learning_rate": 1.7626691435819787e-05, - "loss": 0.3189, - "step": 5083 - }, - { - "epoch": 0.47893360966533993, - "grad_norm": 0.9219985008239746, - "learning_rate": 1.7625714686049607e-05, - "loss": 0.3258, - "step": 5084 - }, - { - "epoch": 0.4790278137585078, - "grad_norm": 0.7031102776527405, - "learning_rate": 1.762473776240134e-05, - "loss": 0.3253, - "step": 5085 - }, - { - "epoch": 0.47912201785167563, - "grad_norm": 0.894633412361145, - "learning_rate": 1.762376066489726e-05, - "loss": 0.3357, - "step": 5086 - }, - { - "epoch": 0.4792162219448435, - "grad_norm": 0.8764644265174866, - "learning_rate": 1.7622783393559646e-05, - "loss": 0.3381, - "step": 5087 - }, - { - "epoch": 0.47931042603801133, - "grad_norm": 0.8285947442054749, - "learning_rate": 1.7621805948410782e-05, - "loss": 0.3491, - "step": 5088 - }, - { - "epoch": 0.4794046301311792, - "grad_norm": 0.8000497817993164, - "learning_rate": 1.7620828329472952e-05, - "loss": 0.3458, - "step": 5089 - }, - { - "epoch": 0.479498834224347, - "grad_norm": 0.8902774453163147, - "learning_rate": 1.7619850536768455e-05, - "loss": 0.3236, - "step": 5090 - }, - { - "epoch": 0.4795930383175149, - "grad_norm": 0.6526225209236145, - "learning_rate": 1.7618872570319577e-05, - "loss": 0.2875, - "step": 5091 - }, - { - "epoch": 0.4796872424106827, - "grad_norm": 0.7107720375061035, - "learning_rate": 1.7617894430148626e-05, - "loss": 0.2934, - "step": 5092 - }, - { - "epoch": 0.4797814465038506, - "grad_norm": 0.812839150428772, - "learning_rate": 1.76169161162779e-05, - "loss": 0.3133, - "step": 5093 - }, - { - "epoch": 0.4798756505970184, - "grad_norm": 0.7085913419723511, - "learning_rate": 1.7615937628729704e-05, - "loss": 0.3124, - "step": 5094 - }, - { - "epoch": 0.47996985469018627, - "grad_norm": 0.8842501640319824, - "learning_rate": 1.7614958967526354e-05, - "loss": 0.3006, - "step": 5095 - }, - { - "epoch": 0.4800640587833541, - "grad_norm": 0.8363269567489624, - "learning_rate": 1.761398013269016e-05, - "loss": 0.3572, - "step": 5096 - }, - { - "epoch": 0.48015826287652197, - "grad_norm": 0.9766021370887756, - "learning_rate": 1.7613001124243448e-05, - "loss": 0.3211, - "step": 5097 - }, - { - "epoch": 0.4802524669696898, - "grad_norm": 0.8415078520774841, - "learning_rate": 1.761202194220853e-05, - "loss": 0.3477, - "step": 5098 - }, - { - "epoch": 0.48034667106285767, - "grad_norm": 0.7183197140693665, - "learning_rate": 1.7611042586607748e-05, - "loss": 0.3358, - "step": 5099 - }, - { - "epoch": 0.4804408751560255, - "grad_norm": 0.7389125823974609, - "learning_rate": 1.761006305746342e-05, - "loss": 0.3196, - "step": 5100 - }, - { - "epoch": 0.48053507924919336, - "grad_norm": 0.7939767837524414, - "learning_rate": 1.7609083354797884e-05, - "loss": 0.3451, - "step": 5101 - }, - { - "epoch": 0.4806292833423612, - "grad_norm": 0.8271040916442871, - "learning_rate": 1.7608103478633483e-05, - "loss": 0.2931, - "step": 5102 - }, - { - "epoch": 0.48072348743552906, - "grad_norm": 0.8910988569259644, - "learning_rate": 1.7607123428992554e-05, - "loss": 0.3108, - "step": 5103 - }, - { - "epoch": 0.4808176915286969, - "grad_norm": 0.777472198009491, - "learning_rate": 1.7606143205897445e-05, - "loss": 0.349, - "step": 5104 - }, - { - "epoch": 0.48091189562186476, - "grad_norm": 0.8044363856315613, - "learning_rate": 1.760516280937051e-05, - "loss": 0.3259, - "step": 5105 - }, - { - "epoch": 0.4810060997150326, - "grad_norm": 0.8904417157173157, - "learning_rate": 1.76041822394341e-05, - "loss": 0.3641, - "step": 5106 - }, - { - "epoch": 0.48110030380820046, - "grad_norm": 0.7185295820236206, - "learning_rate": 1.760320149611058e-05, - "loss": 0.2872, - "step": 5107 - }, - { - "epoch": 0.4811945079013683, - "grad_norm": 0.8121291399002075, - "learning_rate": 1.76022205794223e-05, - "loss": 0.3497, - "step": 5108 - }, - { - "epoch": 0.48128871199453616, - "grad_norm": 0.8890112638473511, - "learning_rate": 1.760123948939164e-05, - "loss": 0.3693, - "step": 5109 - }, - { - "epoch": 0.481382916087704, - "grad_norm": 0.7963833212852478, - "learning_rate": 1.7600258226040957e-05, - "loss": 0.349, - "step": 5110 - }, - { - "epoch": 0.48147712018087185, - "grad_norm": 0.8878795504570007, - "learning_rate": 1.759927678939264e-05, - "loss": 0.3739, - "step": 5111 - }, - { - "epoch": 0.4815713242740397, - "grad_norm": 0.8152940273284912, - "learning_rate": 1.7598295179469053e-05, - "loss": 0.2985, - "step": 5112 - }, - { - "epoch": 0.48166552836720755, - "grad_norm": 0.7406737208366394, - "learning_rate": 1.759731339629259e-05, - "loss": 0.3328, - "step": 5113 - }, - { - "epoch": 0.4817597324603754, - "grad_norm": 0.77753084897995, - "learning_rate": 1.759633143988563e-05, - "loss": 0.3112, - "step": 5114 - }, - { - "epoch": 0.48185393655354325, - "grad_norm": 0.7563633918762207, - "learning_rate": 1.7595349310270565e-05, - "loss": 0.3617, - "step": 5115 - }, - { - "epoch": 0.4819481406467111, - "grad_norm": 0.8029349446296692, - "learning_rate": 1.759436700746979e-05, - "loss": 0.3483, - "step": 5116 - }, - { - "epoch": 0.48204234473987895, - "grad_norm": 0.6659033894538879, - "learning_rate": 1.7593384531505703e-05, - "loss": 0.3155, - "step": 5117 - }, - { - "epoch": 0.4821365488330468, - "grad_norm": 0.9070823788642883, - "learning_rate": 1.7592401882400704e-05, - "loss": 0.3096, - "step": 5118 - }, - { - "epoch": 0.48223075292621465, - "grad_norm": 0.8133385181427002, - "learning_rate": 1.75914190601772e-05, - "loss": 0.3642, - "step": 5119 - }, - { - "epoch": 0.4823249570193825, - "grad_norm": 0.9977114200592041, - "learning_rate": 1.7590436064857603e-05, - "loss": 0.3252, - "step": 5120 - }, - { - "epoch": 0.48241916111255034, - "grad_norm": 0.6693086624145508, - "learning_rate": 1.758945289646432e-05, - "loss": 0.2655, - "step": 5121 - }, - { - "epoch": 0.4825133652057182, - "grad_norm": 0.8078154921531677, - "learning_rate": 1.7588469555019777e-05, - "loss": 0.3635, - "step": 5122 - }, - { - "epoch": 0.48260756929888604, - "grad_norm": 0.7313911318778992, - "learning_rate": 1.7587486040546392e-05, - "loss": 0.273, - "step": 5123 - }, - { - "epoch": 0.4827017733920539, - "grad_norm": 0.8575907349586487, - "learning_rate": 1.7586502353066593e-05, - "loss": 0.3633, - "step": 5124 - }, - { - "epoch": 0.48279597748522174, - "grad_norm": 0.7386175394058228, - "learning_rate": 1.7585518492602807e-05, - "loss": 0.3086, - "step": 5125 - }, - { - "epoch": 0.4828901815783896, - "grad_norm": 0.7343104481697083, - "learning_rate": 1.7584534459177466e-05, - "loss": 0.3236, - "step": 5126 - }, - { - "epoch": 0.48298438567155744, - "grad_norm": 0.9238309860229492, - "learning_rate": 1.758355025281301e-05, - "loss": 0.3199, - "step": 5127 - }, - { - "epoch": 0.4830785897647253, - "grad_norm": 0.7808854579925537, - "learning_rate": 1.758256587353188e-05, - "loss": 0.3399, - "step": 5128 - }, - { - "epoch": 0.48317279385789313, - "grad_norm": 0.7596392035484314, - "learning_rate": 1.7581581321356524e-05, - "loss": 0.3142, - "step": 5129 - }, - { - "epoch": 0.483266997951061, - "grad_norm": 0.8428837060928345, - "learning_rate": 1.7580596596309387e-05, - "loss": 0.3767, - "step": 5130 - }, - { - "epoch": 0.48336120204422883, - "grad_norm": 1.090317964553833, - "learning_rate": 1.7579611698412923e-05, - "loss": 0.3112, - "step": 5131 - }, - { - "epoch": 0.4834554061373967, - "grad_norm": 0.8067708611488342, - "learning_rate": 1.7578626627689594e-05, - "loss": 0.363, - "step": 5132 - }, - { - "epoch": 0.48354961023056453, - "grad_norm": 0.7280093431472778, - "learning_rate": 1.757764138416185e-05, - "loss": 0.3154, - "step": 5133 - }, - { - "epoch": 0.4836438143237324, - "grad_norm": 0.9062605500221252, - "learning_rate": 1.757665596785217e-05, - "loss": 0.3212, - "step": 5134 - }, - { - "epoch": 0.48373801841690023, - "grad_norm": 0.7609298825263977, - "learning_rate": 1.7575670378783013e-05, - "loss": 0.3096, - "step": 5135 - }, - { - "epoch": 0.4838322225100681, - "grad_norm": 0.7838581204414368, - "learning_rate": 1.7574684616976857e-05, - "loss": 0.3518, - "step": 5136 - }, - { - "epoch": 0.4839264266032359, - "grad_norm": 0.7546284198760986, - "learning_rate": 1.7573698682456176e-05, - "loss": 0.3158, - "step": 5137 - }, - { - "epoch": 0.4840206306964038, - "grad_norm": 0.7096731066703796, - "learning_rate": 1.7572712575243454e-05, - "loss": 0.3411, - "step": 5138 - }, - { - "epoch": 0.4841148347895716, - "grad_norm": 0.8472162485122681, - "learning_rate": 1.7571726295361172e-05, - "loss": 0.2935, - "step": 5139 - }, - { - "epoch": 0.4842090388827395, - "grad_norm": 0.8366554379463196, - "learning_rate": 1.7570739842831824e-05, - "loss": 0.3189, - "step": 5140 - }, - { - "epoch": 0.4843032429759073, - "grad_norm": 0.8048580884933472, - "learning_rate": 1.7569753217677893e-05, - "loss": 0.2775, - "step": 5141 - }, - { - "epoch": 0.48439744706907517, - "grad_norm": 0.7406998872756958, - "learning_rate": 1.7568766419921887e-05, - "loss": 0.3351, - "step": 5142 - }, - { - "epoch": 0.484491651162243, - "grad_norm": 0.7719348073005676, - "learning_rate": 1.75677794495863e-05, - "loss": 0.3258, - "step": 5143 - }, - { - "epoch": 0.48458585525541087, - "grad_norm": 0.8818472027778625, - "learning_rate": 1.7566792306693636e-05, - "loss": 0.3767, - "step": 5144 - }, - { - "epoch": 0.4846800593485787, - "grad_norm": 0.9801118969917297, - "learning_rate": 1.7565804991266407e-05, - "loss": 0.3647, - "step": 5145 - }, - { - "epoch": 0.48477426344174657, - "grad_norm": 0.7627350091934204, - "learning_rate": 1.7564817503327125e-05, - "loss": 0.3265, - "step": 5146 - }, - { - "epoch": 0.4848684675349144, - "grad_norm": 0.7406090497970581, - "learning_rate": 1.75638298428983e-05, - "loss": 0.3264, - "step": 5147 - }, - { - "epoch": 0.48496267162808226, - "grad_norm": 0.8357557654380798, - "learning_rate": 1.756284201000246e-05, - "loss": 0.3622, - "step": 5148 - }, - { - "epoch": 0.4850568757212501, - "grad_norm": 0.6987957954406738, - "learning_rate": 1.7561854004662126e-05, - "loss": 0.2845, - "step": 5149 - }, - { - "epoch": 0.48515107981441796, - "grad_norm": 0.8057776689529419, - "learning_rate": 1.7560865826899825e-05, - "loss": 0.383, - "step": 5150 - }, - { - "epoch": 0.4852452839075858, - "grad_norm": 0.7629268169403076, - "learning_rate": 1.755987747673809e-05, - "loss": 0.3477, - "step": 5151 - }, - { - "epoch": 0.48533948800075366, - "grad_norm": 0.7710081934928894, - "learning_rate": 1.755888895419946e-05, - "loss": 0.3138, - "step": 5152 - }, - { - "epoch": 0.48543369209392145, - "grad_norm": 0.7008806467056274, - "learning_rate": 1.755790025930647e-05, - "loss": 0.3631, - "step": 5153 - }, - { - "epoch": 0.4855278961870893, - "grad_norm": 0.8604103326797485, - "learning_rate": 1.7556911392081663e-05, - "loss": 0.3868, - "step": 5154 - }, - { - "epoch": 0.48562210028025715, - "grad_norm": 0.7947174906730652, - "learning_rate": 1.755592235254759e-05, - "loss": 0.3356, - "step": 5155 - }, - { - "epoch": 0.485716304373425, - "grad_norm": 0.840647280216217, - "learning_rate": 1.7554933140726803e-05, - "loss": 0.3162, - "step": 5156 - }, - { - "epoch": 0.48581050846659285, - "grad_norm": 0.8356987237930298, - "learning_rate": 1.755394375664186e-05, - "loss": 0.3557, - "step": 5157 - }, - { - "epoch": 0.4859047125597607, - "grad_norm": 0.893537163734436, - "learning_rate": 1.7552954200315313e-05, - "loss": 0.3298, - "step": 5158 - }, - { - "epoch": 0.48599891665292855, - "grad_norm": 0.8929669857025146, - "learning_rate": 1.755196447176973e-05, - "loss": 0.3518, - "step": 5159 - }, - { - "epoch": 0.4860931207460964, - "grad_norm": 1.0336229801177979, - "learning_rate": 1.7550974571027675e-05, - "loss": 0.3418, - "step": 5160 - }, - { - "epoch": 0.48618732483926425, - "grad_norm": 0.7523311376571655, - "learning_rate": 1.7549984498111724e-05, - "loss": 0.3341, - "step": 5161 - }, - { - "epoch": 0.4862815289324321, - "grad_norm": 0.6885283589363098, - "learning_rate": 1.7548994253044453e-05, - "loss": 0.3446, - "step": 5162 - }, - { - "epoch": 0.48637573302559994, - "grad_norm": 0.8484752178192139, - "learning_rate": 1.7548003835848436e-05, - "loss": 0.3315, - "step": 5163 - }, - { - "epoch": 0.4864699371187678, - "grad_norm": 0.8555371761322021, - "learning_rate": 1.7547013246546258e-05, - "loss": 0.3676, - "step": 5164 - }, - { - "epoch": 0.48656414121193564, - "grad_norm": 0.6904305815696716, - "learning_rate": 1.7546022485160507e-05, - "loss": 0.2942, - "step": 5165 - }, - { - "epoch": 0.4866583453051035, - "grad_norm": 0.8539214730262756, - "learning_rate": 1.7545031551713772e-05, - "loss": 0.3705, - "step": 5166 - }, - { - "epoch": 0.48675254939827134, - "grad_norm": 0.7262186408042908, - "learning_rate": 1.7544040446228652e-05, - "loss": 0.3406, - "step": 5167 - }, - { - "epoch": 0.4868467534914392, - "grad_norm": 0.7756384015083313, - "learning_rate": 1.7543049168727742e-05, - "loss": 0.3184, - "step": 5168 - }, - { - "epoch": 0.48694095758460704, - "grad_norm": 0.817236065864563, - "learning_rate": 1.754205771923364e-05, - "loss": 0.34, - "step": 5169 - }, - { - "epoch": 0.4870351616777749, - "grad_norm": 0.7116366624832153, - "learning_rate": 1.7541066097768965e-05, - "loss": 0.2996, - "step": 5170 - }, - { - "epoch": 0.48712936577094273, - "grad_norm": 0.7133092880249023, - "learning_rate": 1.7540074304356316e-05, - "loss": 0.3175, - "step": 5171 - }, - { - "epoch": 0.4872235698641106, - "grad_norm": 0.7212796807289124, - "learning_rate": 1.7539082339018314e-05, - "loss": 0.3484, - "step": 5172 - }, - { - "epoch": 0.48731777395727843, - "grad_norm": 0.7220133543014526, - "learning_rate": 1.7538090201777573e-05, - "loss": 0.3472, - "step": 5173 - }, - { - "epoch": 0.4874119780504463, - "grad_norm": 0.9291368126869202, - "learning_rate": 1.7537097892656724e-05, - "loss": 0.3562, - "step": 5174 - }, - { - "epoch": 0.48750618214361413, - "grad_norm": 0.7621473670005798, - "learning_rate": 1.753610541167838e-05, - "loss": 0.2822, - "step": 5175 - }, - { - "epoch": 0.487600386236782, - "grad_norm": 0.8579990267753601, - "learning_rate": 1.753511275886518e-05, - "loss": 0.3662, - "step": 5176 - }, - { - "epoch": 0.48769459032994983, - "grad_norm": 0.8141975998878479, - "learning_rate": 1.7534119934239754e-05, - "loss": 0.3588, - "step": 5177 - }, - { - "epoch": 0.4877887944231177, - "grad_norm": 0.7674275636672974, - "learning_rate": 1.7533126937824746e-05, - "loss": 0.3731, - "step": 5178 - }, - { - "epoch": 0.4878829985162855, - "grad_norm": 0.8437183499336243, - "learning_rate": 1.753213376964279e-05, - "loss": 0.333, - "step": 5179 - }, - { - "epoch": 0.4879772026094534, - "grad_norm": 0.7552438378334045, - "learning_rate": 1.7531140429716536e-05, - "loss": 0.3402, - "step": 5180 - }, - { - "epoch": 0.4880714067026212, - "grad_norm": 0.7128962278366089, - "learning_rate": 1.7530146918068633e-05, - "loss": 0.315, - "step": 5181 - }, - { - "epoch": 0.4881656107957891, - "grad_norm": 0.7599475979804993, - "learning_rate": 1.7529153234721733e-05, - "loss": 0.3402, - "step": 5182 - }, - { - "epoch": 0.4882598148889569, - "grad_norm": 0.916378378868103, - "learning_rate": 1.75281593796985e-05, - "loss": 0.383, - "step": 5183 - }, - { - "epoch": 0.48835401898212477, - "grad_norm": 0.6470703482627869, - "learning_rate": 1.7527165353021585e-05, - "loss": 0.2833, - "step": 5184 - }, - { - "epoch": 0.4884482230752926, - "grad_norm": 0.7961269021034241, - "learning_rate": 1.7526171154713664e-05, - "loss": 0.3685, - "step": 5185 - }, - { - "epoch": 0.48854242716846047, - "grad_norm": 0.9246634244918823, - "learning_rate": 1.75251767847974e-05, - "loss": 0.4044, - "step": 5186 - }, - { - "epoch": 0.4886366312616283, - "grad_norm": 0.8082994222640991, - "learning_rate": 1.7524182243295464e-05, - "loss": 0.382, - "step": 5187 - }, - { - "epoch": 0.48873083535479617, - "grad_norm": 0.7272515296936035, - "learning_rate": 1.7523187530230543e-05, - "loss": 0.3306, - "step": 5188 - }, - { - "epoch": 0.488825039447964, - "grad_norm": 0.7833527326583862, - "learning_rate": 1.7522192645625305e-05, - "loss": 0.3351, - "step": 5189 - }, - { - "epoch": 0.48891924354113186, - "grad_norm": 0.8743553161621094, - "learning_rate": 1.7521197589502442e-05, - "loss": 0.3526, - "step": 5190 - }, - { - "epoch": 0.4890134476342997, - "grad_norm": 0.720193088054657, - "learning_rate": 1.7520202361884643e-05, - "loss": 0.3225, - "step": 5191 - }, - { - "epoch": 0.48910765172746756, - "grad_norm": 0.6840072870254517, - "learning_rate": 1.7519206962794605e-05, - "loss": 0.2961, - "step": 5192 - }, - { - "epoch": 0.4892018558206354, - "grad_norm": 0.7914387583732605, - "learning_rate": 1.7518211392255013e-05, - "loss": 0.3699, - "step": 5193 - }, - { - "epoch": 0.48929605991380326, - "grad_norm": 0.6713058352470398, - "learning_rate": 1.7517215650288576e-05, - "loss": 0.2956, - "step": 5194 - }, - { - "epoch": 0.4893902640069711, - "grad_norm": 0.8198142647743225, - "learning_rate": 1.7516219736918e-05, - "loss": 0.374, - "step": 5195 - }, - { - "epoch": 0.48948446810013896, - "grad_norm": 0.869257926940918, - "learning_rate": 1.7515223652165985e-05, - "loss": 0.3474, - "step": 5196 - }, - { - "epoch": 0.4895786721933068, - "grad_norm": 0.791228711605072, - "learning_rate": 1.7514227396055252e-05, - "loss": 0.3528, - "step": 5197 - }, - { - "epoch": 0.48967287628647466, - "grad_norm": 0.7481163144111633, - "learning_rate": 1.7513230968608514e-05, - "loss": 0.3095, - "step": 5198 - }, - { - "epoch": 0.4897670803796425, - "grad_norm": 0.9064458608627319, - "learning_rate": 1.7512234369848484e-05, - "loss": 0.3704, - "step": 5199 - }, - { - "epoch": 0.48986128447281035, - "grad_norm": 0.8498217463493347, - "learning_rate": 1.7511237599797898e-05, - "loss": 0.3492, - "step": 5200 - }, - { - "epoch": 0.4899554885659782, - "grad_norm": 0.8195668458938599, - "learning_rate": 1.751024065847948e-05, - "loss": 0.3572, - "step": 5201 - }, - { - "epoch": 0.49004969265914605, - "grad_norm": 0.7476743459701538, - "learning_rate": 1.7509243545915953e-05, - "loss": 0.332, - "step": 5202 - }, - { - "epoch": 0.4901438967523139, - "grad_norm": 0.8016562461853027, - "learning_rate": 1.7508246262130065e-05, - "loss": 0.3863, - "step": 5203 - }, - { - "epoch": 0.49023810084548175, - "grad_norm": 0.6951786875724792, - "learning_rate": 1.750724880714455e-05, - "loss": 0.327, - "step": 5204 - }, - { - "epoch": 0.4903323049386496, - "grad_norm": 0.7730926275253296, - "learning_rate": 1.7506251180982153e-05, - "loss": 0.3423, - "step": 5205 - }, - { - "epoch": 0.49042650903181745, - "grad_norm": 0.6924357414245605, - "learning_rate": 1.7505253383665618e-05, - "loss": 0.3064, - "step": 5206 - }, - { - "epoch": 0.4905207131249853, - "grad_norm": 0.861555278301239, - "learning_rate": 1.75042554152177e-05, - "loss": 0.3739, - "step": 5207 - }, - { - "epoch": 0.49061491721815315, - "grad_norm": 0.8362349271774292, - "learning_rate": 1.750325727566115e-05, - "loss": 0.3512, - "step": 5208 - }, - { - "epoch": 0.490709121311321, - "grad_norm": 0.7900026440620422, - "learning_rate": 1.7502258965018737e-05, - "loss": 0.3095, - "step": 5209 - }, - { - "epoch": 0.49080332540448884, - "grad_norm": 0.8224048614501953, - "learning_rate": 1.750126048331321e-05, - "loss": 0.3615, - "step": 5210 - }, - { - "epoch": 0.4908975294976567, - "grad_norm": 0.9498628377914429, - "learning_rate": 1.7500261830567348e-05, - "loss": 0.3035, - "step": 5211 - }, - { - "epoch": 0.49099173359082454, - "grad_norm": 0.9057954549789429, - "learning_rate": 1.7499263006803913e-05, - "loss": 0.3277, - "step": 5212 - }, - { - "epoch": 0.4910859376839924, - "grad_norm": 0.8104428648948669, - "learning_rate": 1.7498264012045686e-05, - "loss": 0.3926, - "step": 5213 - }, - { - "epoch": 0.49118014177716024, - "grad_norm": 0.8333841562271118, - "learning_rate": 1.7497264846315443e-05, - "loss": 0.3278, - "step": 5214 - }, - { - "epoch": 0.4912743458703281, - "grad_norm": 0.8042159080505371, - "learning_rate": 1.7496265509635968e-05, - "loss": 0.3766, - "step": 5215 - }, - { - "epoch": 0.49136854996349594, - "grad_norm": 0.725310742855072, - "learning_rate": 1.7495266002030045e-05, - "loss": 0.3361, - "step": 5216 - }, - { - "epoch": 0.4914627540566638, - "grad_norm": 0.7242028117179871, - "learning_rate": 1.7494266323520466e-05, - "loss": 0.3016, - "step": 5217 - }, - { - "epoch": 0.49155695814983164, - "grad_norm": 0.8324516415596008, - "learning_rate": 1.7493266474130024e-05, - "loss": 0.3514, - "step": 5218 - }, - { - "epoch": 0.4916511622429995, - "grad_norm": 0.7419780492782593, - "learning_rate": 1.7492266453881516e-05, - "loss": 0.364, - "step": 5219 - }, - { - "epoch": 0.49174536633616733, - "grad_norm": 0.8550251126289368, - "learning_rate": 1.749126626279775e-05, - "loss": 0.3315, - "step": 5220 - }, - { - "epoch": 0.4918395704293352, - "grad_norm": 0.767841637134552, - "learning_rate": 1.749026590090152e-05, - "loss": 0.351, - "step": 5221 - }, - { - "epoch": 0.491933774522503, - "grad_norm": 0.7968747019767761, - "learning_rate": 1.748926536821565e-05, - "loss": 0.3368, - "step": 5222 - }, - { - "epoch": 0.4920279786156708, - "grad_norm": 0.9009972810745239, - "learning_rate": 1.7488264664762948e-05, - "loss": 0.3702, - "step": 5223 - }, - { - "epoch": 0.4921221827088387, - "grad_norm": 0.7290366291999817, - "learning_rate": 1.748726379056623e-05, - "loss": 0.3331, - "step": 5224 - }, - { - "epoch": 0.4922163868020065, - "grad_norm": 0.9687650203704834, - "learning_rate": 1.7486262745648316e-05, - "loss": 0.3235, - "step": 5225 - }, - { - "epoch": 0.49231059089517437, - "grad_norm": 0.7802406549453735, - "learning_rate": 1.7485261530032036e-05, - "loss": 0.3025, - "step": 5226 - }, - { - "epoch": 0.4924047949883422, - "grad_norm": 0.8119762539863586, - "learning_rate": 1.7484260143740216e-05, - "loss": 0.3105, - "step": 5227 - }, - { - "epoch": 0.49249899908151007, - "grad_norm": 0.8451618552207947, - "learning_rate": 1.7483258586795692e-05, - "loss": 0.3509, - "step": 5228 - }, - { - "epoch": 0.4925932031746779, - "grad_norm": 0.7818936109542847, - "learning_rate": 1.7482256859221297e-05, - "loss": 0.3317, - "step": 5229 - }, - { - "epoch": 0.49268740726784577, - "grad_norm": 0.8083449602127075, - "learning_rate": 1.7481254961039874e-05, - "loss": 0.3321, - "step": 5230 - }, - { - "epoch": 0.4927816113610136, - "grad_norm": 0.7966959476470947, - "learning_rate": 1.748025289227427e-05, - "loss": 0.3557, - "step": 5231 - }, - { - "epoch": 0.49287581545418147, - "grad_norm": 0.6925898790359497, - "learning_rate": 1.7479250652947325e-05, - "loss": 0.2877, - "step": 5232 - }, - { - "epoch": 0.4929700195473493, - "grad_norm": 0.8530373573303223, - "learning_rate": 1.7478248243081904e-05, - "loss": 0.367, - "step": 5233 - }, - { - "epoch": 0.49306422364051716, - "grad_norm": 0.7606818675994873, - "learning_rate": 1.7477245662700857e-05, - "loss": 0.3346, - "step": 5234 - }, - { - "epoch": 0.493158427733685, - "grad_norm": 0.7215161919593811, - "learning_rate": 1.7476242911827043e-05, - "loss": 0.3004, - "step": 5235 - }, - { - "epoch": 0.49325263182685286, - "grad_norm": 0.8876181840896606, - "learning_rate": 1.7475239990483332e-05, - "loss": 0.3717, - "step": 5236 - }, - { - "epoch": 0.4933468359200207, - "grad_norm": 0.8085825443267822, - "learning_rate": 1.7474236898692587e-05, - "loss": 0.3312, - "step": 5237 - }, - { - "epoch": 0.49344104001318856, - "grad_norm": 0.879197359085083, - "learning_rate": 1.747323363647768e-05, - "loss": 0.3265, - "step": 5238 - }, - { - "epoch": 0.4935352441063564, - "grad_norm": 0.7793401479721069, - "learning_rate": 1.747223020386149e-05, - "loss": 0.3641, - "step": 5239 - }, - { - "epoch": 0.49362944819952426, - "grad_norm": 0.7720771431922913, - "learning_rate": 1.7471226600866895e-05, - "loss": 0.3185, - "step": 5240 - }, - { - "epoch": 0.4937236522926921, - "grad_norm": 0.6867986917495728, - "learning_rate": 1.747022282751678e-05, - "loss": 0.3019, - "step": 5241 - }, - { - "epoch": 0.49381785638585995, - "grad_norm": 0.7740907073020935, - "learning_rate": 1.7469218883834033e-05, - "loss": 0.3386, - "step": 5242 - }, - { - "epoch": 0.4939120604790278, - "grad_norm": 0.9793553352355957, - "learning_rate": 1.7468214769841542e-05, - "loss": 0.3707, - "step": 5243 - }, - { - "epoch": 0.49400626457219565, - "grad_norm": 0.9979997277259827, - "learning_rate": 1.74672104855622e-05, - "loss": 0.3702, - "step": 5244 - }, - { - "epoch": 0.4941004686653635, - "grad_norm": 0.8075796365737915, - "learning_rate": 1.7466206031018918e-05, - "loss": 0.3085, - "step": 5245 - }, - { - "epoch": 0.49419467275853135, - "grad_norm": 0.865083634853363, - "learning_rate": 1.7465201406234585e-05, - "loss": 0.3501, - "step": 5246 - }, - { - "epoch": 0.4942888768516992, - "grad_norm": 0.8638558387756348, - "learning_rate": 1.746419661123212e-05, - "loss": 0.3572, - "step": 5247 - }, - { - "epoch": 0.49438308094486705, - "grad_norm": 0.684909999370575, - "learning_rate": 1.746319164603443e-05, - "loss": 0.3082, - "step": 5248 - }, - { - "epoch": 0.4944772850380349, - "grad_norm": 0.6876487135887146, - "learning_rate": 1.7462186510664426e-05, - "loss": 0.3275, - "step": 5249 - }, - { - "epoch": 0.49457148913120275, - "grad_norm": 0.6835941672325134, - "learning_rate": 1.7461181205145027e-05, - "loss": 0.2478, - "step": 5250 - }, - { - "epoch": 0.4946656932243706, - "grad_norm": 0.8077230453491211, - "learning_rate": 1.7460175729499158e-05, - "loss": 0.3566, - "step": 5251 - }, - { - "epoch": 0.49475989731753844, - "grad_norm": 0.7662020921707153, - "learning_rate": 1.7459170083749747e-05, - "loss": 0.368, - "step": 5252 - }, - { - "epoch": 0.4948541014107063, - "grad_norm": 0.7811310887336731, - "learning_rate": 1.7458164267919723e-05, - "loss": 0.3218, - "step": 5253 - }, - { - "epoch": 0.49494830550387414, - "grad_norm": 0.8241111636161804, - "learning_rate": 1.7457158282032018e-05, - "loss": 0.3658, - "step": 5254 - }, - { - "epoch": 0.495042509597042, - "grad_norm": 0.695144772529602, - "learning_rate": 1.745615212610957e-05, - "loss": 0.3463, - "step": 5255 - }, - { - "epoch": 0.49513671369020984, - "grad_norm": 0.8194564580917358, - "learning_rate": 1.7455145800175323e-05, - "loss": 0.3482, - "step": 5256 - }, - { - "epoch": 0.4952309177833777, - "grad_norm": 0.6636349558830261, - "learning_rate": 1.7454139304252225e-05, - "loss": 0.3177, - "step": 5257 - }, - { - "epoch": 0.49532512187654554, - "grad_norm": 0.8018602728843689, - "learning_rate": 1.745313263836322e-05, - "loss": 0.3435, - "step": 5258 - }, - { - "epoch": 0.4954193259697134, - "grad_norm": 0.648948609828949, - "learning_rate": 1.7452125802531266e-05, - "loss": 0.2837, - "step": 5259 - }, - { - "epoch": 0.49551353006288124, - "grad_norm": 0.7257091999053955, - "learning_rate": 1.7451118796779315e-05, - "loss": 0.3613, - "step": 5260 - }, - { - "epoch": 0.4956077341560491, - "grad_norm": 0.7232349514961243, - "learning_rate": 1.7450111621130335e-05, - "loss": 0.2871, - "step": 5261 - }, - { - "epoch": 0.49570193824921693, - "grad_norm": 0.8945488333702087, - "learning_rate": 1.7449104275607292e-05, - "loss": 0.3041, - "step": 5262 - }, - { - "epoch": 0.4957961423423848, - "grad_norm": 0.7877565026283264, - "learning_rate": 1.7448096760233143e-05, - "loss": 0.3282, - "step": 5263 - }, - { - "epoch": 0.49589034643555263, - "grad_norm": 1.1505717039108276, - "learning_rate": 1.7447089075030877e-05, - "loss": 0.3351, - "step": 5264 - }, - { - "epoch": 0.4959845505287205, - "grad_norm": 0.8410128951072693, - "learning_rate": 1.744608122002346e-05, - "loss": 0.3485, - "step": 5265 - }, - { - "epoch": 0.49607875462188833, - "grad_norm": 0.717402458190918, - "learning_rate": 1.7445073195233874e-05, - "loss": 0.3289, - "step": 5266 - }, - { - "epoch": 0.4961729587150562, - "grad_norm": 0.6788548827171326, - "learning_rate": 1.7444065000685103e-05, - "loss": 0.353, - "step": 5267 - }, - { - "epoch": 0.496267162808224, - "grad_norm": 0.6685397028923035, - "learning_rate": 1.7443056636400145e-05, - "loss": 0.3076, - "step": 5268 - }, - { - "epoch": 0.4963613669013919, - "grad_norm": 0.7368515729904175, - "learning_rate": 1.7442048102401982e-05, - "loss": 0.2994, - "step": 5269 - }, - { - "epoch": 0.4964555709945597, - "grad_norm": 0.7723667025566101, - "learning_rate": 1.744103939871361e-05, - "loss": 0.3074, - "step": 5270 - }, - { - "epoch": 0.4965497750877276, - "grad_norm": 0.885190486907959, - "learning_rate": 1.744003052535803e-05, - "loss": 0.3465, - "step": 5271 - }, - { - "epoch": 0.4966439791808954, - "grad_norm": 0.7844976782798767, - "learning_rate": 1.7439021482358254e-05, - "loss": 0.38, - "step": 5272 - }, - { - "epoch": 0.49673818327406327, - "grad_norm": 0.7608745098114014, - "learning_rate": 1.743801226973728e-05, - "loss": 0.3054, - "step": 5273 - }, - { - "epoch": 0.4968323873672311, - "grad_norm": 0.9637671113014221, - "learning_rate": 1.7437002887518125e-05, - "loss": 0.3801, - "step": 5274 - }, - { - "epoch": 0.49692659146039897, - "grad_norm": 0.8396672010421753, - "learning_rate": 1.74359933357238e-05, - "loss": 0.3398, - "step": 5275 - }, - { - "epoch": 0.4970207955535668, - "grad_norm": 0.7731151580810547, - "learning_rate": 1.743498361437733e-05, - "loss": 0.28, - "step": 5276 - }, - { - "epoch": 0.49711499964673467, - "grad_norm": 0.7089502215385437, - "learning_rate": 1.7433973723501732e-05, - "loss": 0.3192, - "step": 5277 - }, - { - "epoch": 0.4972092037399025, - "grad_norm": 0.7073197364807129, - "learning_rate": 1.7432963663120037e-05, - "loss": 0.3351, - "step": 5278 - }, - { - "epoch": 0.49730340783307037, - "grad_norm": 0.7932494282722473, - "learning_rate": 1.743195343325528e-05, - "loss": 0.3659, - "step": 5279 - }, - { - "epoch": 0.4973976119262382, - "grad_norm": 0.7352001070976257, - "learning_rate": 1.7430943033930483e-05, - "loss": 0.3492, - "step": 5280 - }, - { - "epoch": 0.49749181601940606, - "grad_norm": 0.8383285403251648, - "learning_rate": 1.7429932465168693e-05, - "loss": 0.3538, - "step": 5281 - }, - { - "epoch": 0.4975860201125739, - "grad_norm": 0.7251850962638855, - "learning_rate": 1.742892172699296e-05, - "loss": 0.3453, - "step": 5282 - }, - { - "epoch": 0.49768022420574176, - "grad_norm": 0.8373103737831116, - "learning_rate": 1.7427910819426318e-05, - "loss": 0.3231, - "step": 5283 - }, - { - "epoch": 0.4977744282989096, - "grad_norm": 0.7442533373832703, - "learning_rate": 1.7426899742491824e-05, - "loss": 0.31, - "step": 5284 - }, - { - "epoch": 0.49786863239207746, - "grad_norm": 0.7947465777397156, - "learning_rate": 1.7425888496212527e-05, - "loss": 0.3506, - "step": 5285 - }, - { - "epoch": 0.4979628364852453, - "grad_norm": 0.9055950045585632, - "learning_rate": 1.742487708061149e-05, - "loss": 0.3811, - "step": 5286 - }, - { - "epoch": 0.49805704057841316, - "grad_norm": 0.7091962695121765, - "learning_rate": 1.742386549571177e-05, - "loss": 0.327, - "step": 5287 - }, - { - "epoch": 0.498151244671581, - "grad_norm": 0.803471565246582, - "learning_rate": 1.7422853741536437e-05, - "loss": 0.3663, - "step": 5288 - }, - { - "epoch": 0.49824544876474886, - "grad_norm": 0.809049129486084, - "learning_rate": 1.742184181810856e-05, - "loss": 0.3592, - "step": 5289 - }, - { - "epoch": 0.4983396528579167, - "grad_norm": 0.7729027271270752, - "learning_rate": 1.742082972545121e-05, - "loss": 0.3076, - "step": 5290 - }, - { - "epoch": 0.4984338569510845, - "grad_norm": 0.8046419620513916, - "learning_rate": 1.7419817463587466e-05, - "loss": 0.3497, - "step": 5291 - }, - { - "epoch": 0.49852806104425235, - "grad_norm": 0.7646002173423767, - "learning_rate": 1.741880503254041e-05, - "loss": 0.3245, - "step": 5292 - }, - { - "epoch": 0.4986222651374202, - "grad_norm": 0.7153975963592529, - "learning_rate": 1.7417792432333124e-05, - "loss": 0.314, - "step": 5293 - }, - { - "epoch": 0.49871646923058804, - "grad_norm": 0.7249170541763306, - "learning_rate": 1.7416779662988702e-05, - "loss": 0.3337, - "step": 5294 - }, - { - "epoch": 0.4988106733237559, - "grad_norm": 1.015845775604248, - "learning_rate": 1.7415766724530232e-05, - "loss": 0.3111, - "step": 5295 - }, - { - "epoch": 0.49890487741692374, - "grad_norm": 0.8393731713294983, - "learning_rate": 1.741475361698081e-05, - "loss": 0.3366, - "step": 5296 - }, - { - "epoch": 0.4989990815100916, - "grad_norm": 0.9029054641723633, - "learning_rate": 1.7413740340363542e-05, - "loss": 0.3352, - "step": 5297 - }, - { - "epoch": 0.49909328560325944, - "grad_norm": 0.7641615867614746, - "learning_rate": 1.7412726894701527e-05, - "loss": 0.3471, - "step": 5298 - }, - { - "epoch": 0.4991874896964273, - "grad_norm": 0.8021811842918396, - "learning_rate": 1.741171328001787e-05, - "loss": 0.3387, - "step": 5299 - }, - { - "epoch": 0.49928169378959514, - "grad_norm": 0.9175771474838257, - "learning_rate": 1.7410699496335693e-05, - "loss": 0.3636, - "step": 5300 - }, - { - "epoch": 0.499375897882763, - "grad_norm": 0.8105636239051819, - "learning_rate": 1.7409685543678105e-05, - "loss": 0.3546, - "step": 5301 - }, - { - "epoch": 0.49947010197593084, - "grad_norm": 0.8636082410812378, - "learning_rate": 1.740867142206823e-05, - "loss": 0.3379, - "step": 5302 - }, - { - "epoch": 0.4995643060690987, - "grad_norm": 0.7709434628486633, - "learning_rate": 1.740765713152919e-05, - "loss": 0.3692, - "step": 5303 - }, - { - "epoch": 0.49965851016226653, - "grad_norm": 0.6728577017784119, - "learning_rate": 1.7406642672084105e-05, - "loss": 0.3277, - "step": 5304 - }, - { - "epoch": 0.4997527142554344, - "grad_norm": 0.7524570822715759, - "learning_rate": 1.7405628043756114e-05, - "loss": 0.3128, - "step": 5305 - }, - { - "epoch": 0.49984691834860223, - "grad_norm": 0.8218162059783936, - "learning_rate": 1.740461324656835e-05, - "loss": 0.3535, - "step": 5306 - }, - { - "epoch": 0.4999411224417701, - "grad_norm": 0.7679923176765442, - "learning_rate": 1.7403598280543955e-05, - "loss": 0.3312, - "step": 5307 - }, - { - "epoch": 0.500035326534938, - "grad_norm": 0.8444740176200867, - "learning_rate": 1.740258314570607e-05, - "loss": 0.3357, - "step": 5308 - }, - { - "epoch": 0.5001295306281058, - "grad_norm": 0.7953502535820007, - "learning_rate": 1.740156784207784e-05, - "loss": 0.3661, - "step": 5309 - }, - { - "epoch": 0.5002237347212737, - "grad_norm": 0.8228588104248047, - "learning_rate": 1.7400552369682417e-05, - "loss": 0.3662, - "step": 5310 - }, - { - "epoch": 0.5003179388144415, - "grad_norm": 0.9413067698478699, - "learning_rate": 1.7399536728542955e-05, - "loss": 0.3763, - "step": 5311 - }, - { - "epoch": 0.5004121429076094, - "grad_norm": 0.9656098484992981, - "learning_rate": 1.739852091868261e-05, - "loss": 0.3424, - "step": 5312 - }, - { - "epoch": 0.5005063470007772, - "grad_norm": 0.8443370461463928, - "learning_rate": 1.7397504940124546e-05, - "loss": 0.3378, - "step": 5313 - }, - { - "epoch": 0.5006005510939451, - "grad_norm": 0.7331113219261169, - "learning_rate": 1.7396488792891932e-05, - "loss": 0.3346, - "step": 5314 - }, - { - "epoch": 0.5006947551871129, - "grad_norm": 0.7605288624763489, - "learning_rate": 1.7395472477007932e-05, - "loss": 0.314, - "step": 5315 - }, - { - "epoch": 0.5007889592802808, - "grad_norm": 0.8171616792678833, - "learning_rate": 1.7394455992495722e-05, - "loss": 0.389, - "step": 5316 - }, - { - "epoch": 0.5008831633734486, - "grad_norm": 0.7240248918533325, - "learning_rate": 1.739343933937848e-05, - "loss": 0.2431, - "step": 5317 - }, - { - "epoch": 0.5009773674666165, - "grad_norm": 0.7218488454818726, - "learning_rate": 1.739242251767939e-05, - "loss": 0.3376, - "step": 5318 - }, - { - "epoch": 0.5010715715597843, - "grad_norm": 0.7530635595321655, - "learning_rate": 1.739140552742163e-05, - "loss": 0.3486, - "step": 5319 - }, - { - "epoch": 0.5011657756529522, - "grad_norm": 0.704440712928772, - "learning_rate": 1.7390388368628396e-05, - "loss": 0.3059, - "step": 5320 - }, - { - "epoch": 0.50125997974612, - "grad_norm": 0.8903000354766846, - "learning_rate": 1.7389371041322872e-05, - "loss": 0.3019, - "step": 5321 - }, - { - "epoch": 0.5013541838392879, - "grad_norm": 0.9734460115432739, - "learning_rate": 1.7388353545528265e-05, - "loss": 0.373, - "step": 5322 - }, - { - "epoch": 0.5014483879324557, - "grad_norm": 0.6763521432876587, - "learning_rate": 1.7387335881267774e-05, - "loss": 0.3321, - "step": 5323 - }, - { - "epoch": 0.5015425920256236, - "grad_norm": 0.7226347923278809, - "learning_rate": 1.7386318048564596e-05, - "loss": 0.2939, - "step": 5324 - }, - { - "epoch": 0.5016367961187913, - "grad_norm": 0.8059448599815369, - "learning_rate": 1.7385300047441944e-05, - "loss": 0.3213, - "step": 5325 - }, - { - "epoch": 0.5017310002119592, - "grad_norm": 0.9217113852500916, - "learning_rate": 1.738428187792303e-05, - "loss": 0.4016, - "step": 5326 - }, - { - "epoch": 0.501825204305127, - "grad_norm": 0.7564734816551208, - "learning_rate": 1.738326354003107e-05, - "loss": 0.3242, - "step": 5327 - }, - { - "epoch": 0.5019194083982949, - "grad_norm": 0.9484173059463501, - "learning_rate": 1.7382245033789277e-05, - "loss": 0.3468, - "step": 5328 - }, - { - "epoch": 0.5020136124914627, - "grad_norm": 0.8150784373283386, - "learning_rate": 1.7381226359220886e-05, - "loss": 0.3703, - "step": 5329 - }, - { - "epoch": 0.5021078165846306, - "grad_norm": 0.878149688243866, - "learning_rate": 1.7380207516349115e-05, - "loss": 0.3925, - "step": 5330 - }, - { - "epoch": 0.5022020206777984, - "grad_norm": 0.7702428102493286, - "learning_rate": 1.73791885051972e-05, - "loss": 0.3257, - "step": 5331 - }, - { - "epoch": 0.5022962247709662, - "grad_norm": 0.799381673336029, - "learning_rate": 1.7378169325788378e-05, - "loss": 0.3442, - "step": 5332 - }, - { - "epoch": 0.5023904288641341, - "grad_norm": 0.7093852162361145, - "learning_rate": 1.7377149978145883e-05, - "loss": 0.3711, - "step": 5333 - }, - { - "epoch": 0.502484632957302, - "grad_norm": 0.8848658800125122, - "learning_rate": 1.737613046229296e-05, - "loss": 0.3682, - "step": 5334 - }, - { - "epoch": 0.5025788370504698, - "grad_norm": 0.7343833446502686, - "learning_rate": 1.7375110778252855e-05, - "loss": 0.3377, - "step": 5335 - }, - { - "epoch": 0.5026730411436376, - "grad_norm": 0.8974533677101135, - "learning_rate": 1.7374090926048815e-05, - "loss": 0.3093, - "step": 5336 - }, - { - "epoch": 0.5027672452368055, - "grad_norm": 0.6666314601898193, - "learning_rate": 1.7373070905704103e-05, - "loss": 0.2988, - "step": 5337 - }, - { - "epoch": 0.5028614493299733, - "grad_norm": 0.8002578616142273, - "learning_rate": 1.737205071724197e-05, - "loss": 0.3454, - "step": 5338 - }, - { - "epoch": 0.5029556534231412, - "grad_norm": 0.9683690071105957, - "learning_rate": 1.7371030360685676e-05, - "loss": 0.3133, - "step": 5339 - }, - { - "epoch": 0.503049857516309, - "grad_norm": 0.908248782157898, - "learning_rate": 1.7370009836058493e-05, - "loss": 0.3499, - "step": 5340 - }, - { - "epoch": 0.5031440616094769, - "grad_norm": 0.7834539413452148, - "learning_rate": 1.736898914338369e-05, - "loss": 0.2971, - "step": 5341 - }, - { - "epoch": 0.5032382657026447, - "grad_norm": 0.6673378944396973, - "learning_rate": 1.7367968282684537e-05, - "loss": 0.2834, - "step": 5342 - }, - { - "epoch": 0.5033324697958126, - "grad_norm": 0.7688459753990173, - "learning_rate": 1.7366947253984313e-05, - "loss": 0.3274, - "step": 5343 - }, - { - "epoch": 0.5034266738889804, - "grad_norm": 3.2844011783599854, - "learning_rate": 1.7365926057306292e-05, - "loss": 0.3038, - "step": 5344 - }, - { - "epoch": 0.5035208779821483, - "grad_norm": 0.7614874839782715, - "learning_rate": 1.736490469267377e-05, - "loss": 0.3166, - "step": 5345 - }, - { - "epoch": 0.5036150820753161, - "grad_norm": 0.8794525265693665, - "learning_rate": 1.7363883160110032e-05, - "loss": 0.3714, - "step": 5346 - }, - { - "epoch": 0.503709286168484, - "grad_norm": 0.7511628270149231, - "learning_rate": 1.736286145963837e-05, - "loss": 0.3322, - "step": 5347 - }, - { - "epoch": 0.5038034902616518, - "grad_norm": 0.7849157452583313, - "learning_rate": 1.7361839591282076e-05, - "loss": 0.337, - "step": 5348 - }, - { - "epoch": 0.5038976943548197, - "grad_norm": 0.6939377784729004, - "learning_rate": 1.736081755506446e-05, - "loss": 0.3224, - "step": 5349 - }, - { - "epoch": 0.5039918984479875, - "grad_norm": 0.8040423393249512, - "learning_rate": 1.7359795351008816e-05, - "loss": 0.3452, - "step": 5350 - }, - { - "epoch": 0.5040861025411554, - "grad_norm": 0.6809116005897522, - "learning_rate": 1.7358772979138453e-05, - "loss": 0.3081, - "step": 5351 - }, - { - "epoch": 0.5041803066343232, - "grad_norm": 0.7361429929733276, - "learning_rate": 1.735775043947669e-05, - "loss": 0.3203, - "step": 5352 - }, - { - "epoch": 0.5042745107274911, - "grad_norm": 0.7542217969894409, - "learning_rate": 1.7356727732046835e-05, - "loss": 0.3544, - "step": 5353 - }, - { - "epoch": 0.5043687148206589, - "grad_norm": 0.7274656891822815, - "learning_rate": 1.7355704856872212e-05, - "loss": 0.3476, - "step": 5354 - }, - { - "epoch": 0.5044629189138268, - "grad_norm": 0.861713707447052, - "learning_rate": 1.7354681813976145e-05, - "loss": 0.3313, - "step": 5355 - }, - { - "epoch": 0.5045571230069946, - "grad_norm": 0.8149729371070862, - "learning_rate": 1.7353658603381956e-05, - "loss": 0.2991, - "step": 5356 - }, - { - "epoch": 0.5046513271001625, - "grad_norm": 0.7789759039878845, - "learning_rate": 1.7352635225112978e-05, - "loss": 0.3453, - "step": 5357 - }, - { - "epoch": 0.5047455311933303, - "grad_norm": 1.1818147897720337, - "learning_rate": 1.7351611679192547e-05, - "loss": 0.2856, - "step": 5358 - }, - { - "epoch": 0.5048397352864982, - "grad_norm": 0.8158087134361267, - "learning_rate": 1.7350587965643998e-05, - "loss": 0.3409, - "step": 5359 - }, - { - "epoch": 0.504933939379666, - "grad_norm": 1.0676349401474, - "learning_rate": 1.7349564084490678e-05, - "loss": 0.3229, - "step": 5360 - }, - { - "epoch": 0.5050281434728339, - "grad_norm": 0.8740731477737427, - "learning_rate": 1.7348540035755928e-05, - "loss": 0.3742, - "step": 5361 - }, - { - "epoch": 0.5051223475660017, - "grad_norm": 0.6558460593223572, - "learning_rate": 1.73475158194631e-05, - "loss": 0.2965, - "step": 5362 - }, - { - "epoch": 0.5052165516591696, - "grad_norm": 0.7562674880027771, - "learning_rate": 1.734649143563555e-05, - "loss": 0.3686, - "step": 5363 - }, - { - "epoch": 0.5053107557523374, - "grad_norm": 0.7583739757537842, - "learning_rate": 1.7345466884296636e-05, - "loss": 0.3373, - "step": 5364 - }, - { - "epoch": 0.5054049598455053, - "grad_norm": 0.965645432472229, - "learning_rate": 1.7344442165469714e-05, - "loss": 0.3581, - "step": 5365 - }, - { - "epoch": 0.5054991639386731, - "grad_norm": 0.7194229960441589, - "learning_rate": 1.7343417279178153e-05, - "loss": 0.2868, - "step": 5366 - }, - { - "epoch": 0.505593368031841, - "grad_norm": 0.812960147857666, - "learning_rate": 1.734239222544532e-05, - "loss": 0.3705, - "step": 5367 - }, - { - "epoch": 0.5056875721250088, - "grad_norm": 0.7716863751411438, - "learning_rate": 1.734136700429459e-05, - "loss": 0.3119, - "step": 5368 - }, - { - "epoch": 0.5057817762181767, - "grad_norm": 0.8303197026252747, - "learning_rate": 1.734034161574934e-05, - "loss": 0.3224, - "step": 5369 - }, - { - "epoch": 0.5058759803113445, - "grad_norm": 0.7828728556632996, - "learning_rate": 1.7339316059832946e-05, - "loss": 0.3984, - "step": 5370 - }, - { - "epoch": 0.5059701844045124, - "grad_norm": 0.8528280258178711, - "learning_rate": 1.7338290336568798e-05, - "loss": 0.3455, - "step": 5371 - }, - { - "epoch": 0.5060643884976802, - "grad_norm": 0.8985176086425781, - "learning_rate": 1.733726444598028e-05, - "loss": 0.311, - "step": 5372 - }, - { - "epoch": 0.506158592590848, - "grad_norm": 0.6802164316177368, - "learning_rate": 1.7336238388090787e-05, - "loss": 0.29, - "step": 5373 - }, - { - "epoch": 0.5062527966840159, - "grad_norm": 0.8211531639099121, - "learning_rate": 1.733521216292371e-05, - "loss": 0.3426, - "step": 5374 - }, - { - "epoch": 0.5063470007771838, - "grad_norm": 0.8165231347084045, - "learning_rate": 1.7334185770502453e-05, - "loss": 0.3192, - "step": 5375 - }, - { - "epoch": 0.5064412048703516, - "grad_norm": 0.8941564559936523, - "learning_rate": 1.7333159210850417e-05, - "loss": 0.3064, - "step": 5376 - }, - { - "epoch": 0.5065354089635195, - "grad_norm": 0.8007015585899353, - "learning_rate": 1.7332132483991015e-05, - "loss": 0.3443, - "step": 5377 - }, - { - "epoch": 0.5066296130566873, - "grad_norm": 0.7286374568939209, - "learning_rate": 1.7331105589947648e-05, - "loss": 0.3131, - "step": 5378 - }, - { - "epoch": 0.5067238171498551, - "grad_norm": 0.6885233521461487, - "learning_rate": 1.7330078528743738e-05, - "loss": 0.3361, - "step": 5379 - }, - { - "epoch": 0.506818021243023, - "grad_norm": 0.7876170873641968, - "learning_rate": 1.73290513004027e-05, - "loss": 0.3917, - "step": 5380 - }, - { - "epoch": 0.5069122253361908, - "grad_norm": 0.6517916917800903, - "learning_rate": 1.7328023904947958e-05, - "loss": 0.3179, - "step": 5381 - }, - { - "epoch": 0.5070064294293587, - "grad_norm": 0.7782155871391296, - "learning_rate": 1.732699634240294e-05, - "loss": 0.2861, - "step": 5382 - }, - { - "epoch": 0.5071006335225265, - "grad_norm": 0.7896504998207092, - "learning_rate": 1.7325968612791074e-05, - "loss": 0.3452, - "step": 5383 - }, - { - "epoch": 0.5071948376156944, - "grad_norm": 0.890192985534668, - "learning_rate": 1.732494071613579e-05, - "loss": 0.3359, - "step": 5384 - }, - { - "epoch": 0.5072890417088622, - "grad_norm": 0.7129732966423035, - "learning_rate": 1.732391265246053e-05, - "loss": 0.3197, - "step": 5385 - }, - { - "epoch": 0.5073832458020301, - "grad_norm": 0.7338051795959473, - "learning_rate": 1.7322884421788736e-05, - "loss": 0.316, - "step": 5386 - }, - { - "epoch": 0.5074774498951979, - "grad_norm": 0.7098434567451477, - "learning_rate": 1.7321856024143855e-05, - "loss": 0.3262, - "step": 5387 - }, - { - "epoch": 0.5075716539883658, - "grad_norm": 0.7533916234970093, - "learning_rate": 1.732082745954933e-05, - "loss": 0.2947, - "step": 5388 - }, - { - "epoch": 0.5076658580815336, - "grad_norm": 1.1245689392089844, - "learning_rate": 1.7319798728028617e-05, - "loss": 0.3883, - "step": 5389 - }, - { - "epoch": 0.5077600621747015, - "grad_norm": 0.8866914510726929, - "learning_rate": 1.7318769829605176e-05, - "loss": 0.3529, - "step": 5390 - }, - { - "epoch": 0.5078542662678693, - "grad_norm": 0.7427348494529724, - "learning_rate": 1.731774076430246e-05, - "loss": 0.3152, - "step": 5391 - }, - { - "epoch": 0.5079484703610372, - "grad_norm": 0.8100770711898804, - "learning_rate": 1.731671153214394e-05, - "loss": 0.3355, - "step": 5392 - }, - { - "epoch": 0.508042674454205, - "grad_norm": 0.6134782433509827, - "learning_rate": 1.731568213315308e-05, - "loss": 0.2811, - "step": 5393 - }, - { - "epoch": 0.5081368785473729, - "grad_norm": 0.7467697262763977, - "learning_rate": 1.7314652567353355e-05, - "loss": 0.3613, - "step": 5394 - }, - { - "epoch": 0.5082310826405407, - "grad_norm": 0.8124426007270813, - "learning_rate": 1.731362283476824e-05, - "loss": 0.3457, - "step": 5395 - }, - { - "epoch": 0.5083252867337086, - "grad_norm": 0.9458872079849243, - "learning_rate": 1.731259293542121e-05, - "loss": 0.318, - "step": 5396 - }, - { - "epoch": 0.5084194908268764, - "grad_norm": 0.8066940903663635, - "learning_rate": 1.7311562869335753e-05, - "loss": 0.3041, - "step": 5397 - }, - { - "epoch": 0.5085136949200443, - "grad_norm": 0.8185582756996155, - "learning_rate": 1.7310532636535357e-05, - "loss": 0.407, - "step": 5398 - }, - { - "epoch": 0.5086078990132121, - "grad_norm": 0.823703944683075, - "learning_rate": 1.7309502237043508e-05, - "loss": 0.3189, - "step": 5399 - }, - { - "epoch": 0.50870210310638, - "grad_norm": 0.7584295868873596, - "learning_rate": 1.7308471670883707e-05, - "loss": 0.3335, - "step": 5400 - }, - { - "epoch": 0.5087963071995478, - "grad_norm": 0.7898347973823547, - "learning_rate": 1.7307440938079447e-05, - "loss": 0.3086, - "step": 5401 - }, - { - "epoch": 0.5088905112927157, - "grad_norm": 0.6594393849372864, - "learning_rate": 1.730641003865423e-05, - "loss": 0.3196, - "step": 5402 - }, - { - "epoch": 0.5089847153858835, - "grad_norm": 0.850841760635376, - "learning_rate": 1.730537897263157e-05, - "loss": 0.3599, - "step": 5403 - }, - { - "epoch": 0.5090789194790514, - "grad_norm": 0.6819915771484375, - "learning_rate": 1.7304347740034968e-05, - "loss": 0.3202, - "step": 5404 - }, - { - "epoch": 0.5091731235722192, - "grad_norm": 0.7515741586685181, - "learning_rate": 1.7303316340887943e-05, - "loss": 0.3524, - "step": 5405 - }, - { - "epoch": 0.5092673276653871, - "grad_norm": 0.8181531429290771, - "learning_rate": 1.730228477521401e-05, - "loss": 0.3905, - "step": 5406 - }, - { - "epoch": 0.5093615317585549, - "grad_norm": 0.7893922924995422, - "learning_rate": 1.730125304303669e-05, - "loss": 0.3236, - "step": 5407 - }, - { - "epoch": 0.5094557358517228, - "grad_norm": 0.698948085308075, - "learning_rate": 1.730022114437951e-05, - "loss": 0.3337, - "step": 5408 - }, - { - "epoch": 0.5095499399448906, - "grad_norm": 0.830158531665802, - "learning_rate": 1.7299189079266e-05, - "loss": 0.3207, - "step": 5409 - }, - { - "epoch": 0.5096441440380585, - "grad_norm": 0.8753250241279602, - "learning_rate": 1.7298156847719687e-05, - "loss": 0.3427, - "step": 5410 - }, - { - "epoch": 0.5097383481312263, - "grad_norm": 0.9487413167953491, - "learning_rate": 1.729712444976411e-05, - "loss": 0.3504, - "step": 5411 - }, - { - "epoch": 0.5098325522243942, - "grad_norm": 0.788690447807312, - "learning_rate": 1.7296091885422816e-05, - "loss": 0.3139, - "step": 5412 - }, - { - "epoch": 0.509926756317562, - "grad_norm": 0.9058326482772827, - "learning_rate": 1.7295059154719337e-05, - "loss": 0.327, - "step": 5413 - }, - { - "epoch": 0.5100209604107299, - "grad_norm": 0.810093104839325, - "learning_rate": 1.7294026257677233e-05, - "loss": 0.3691, - "step": 5414 - }, - { - "epoch": 0.5101151645038977, - "grad_norm": 0.7243669033050537, - "learning_rate": 1.7292993194320047e-05, - "loss": 0.3132, - "step": 5415 - }, - { - "epoch": 0.5102093685970656, - "grad_norm": 0.7211150527000427, - "learning_rate": 1.7291959964671337e-05, - "loss": 0.333, - "step": 5416 - }, - { - "epoch": 0.5103035726902334, - "grad_norm": 0.7501480579376221, - "learning_rate": 1.7290926568754664e-05, - "loss": 0.3353, - "step": 5417 - }, - { - "epoch": 0.5103977767834013, - "grad_norm": 0.7837489247322083, - "learning_rate": 1.728989300659359e-05, - "loss": 0.3502, - "step": 5418 - }, - { - "epoch": 0.5104919808765691, - "grad_norm": 0.7330644726753235, - "learning_rate": 1.7288859278211676e-05, - "loss": 0.3383, - "step": 5419 - }, - { - "epoch": 0.510586184969737, - "grad_norm": 0.6996157765388489, - "learning_rate": 1.7287825383632503e-05, - "loss": 0.3377, - "step": 5420 - }, - { - "epoch": 0.5106803890629048, - "grad_norm": 0.7708149552345276, - "learning_rate": 1.728679132287964e-05, - "loss": 0.33, - "step": 5421 - }, - { - "epoch": 0.5107745931560727, - "grad_norm": 0.8168370127677917, - "learning_rate": 1.728575709597666e-05, - "loss": 0.3027, - "step": 5422 - }, - { - "epoch": 0.5108687972492405, - "grad_norm": 0.7973650097846985, - "learning_rate": 1.7284722702947162e-05, - "loss": 0.3504, - "step": 5423 - }, - { - "epoch": 0.5109630013424084, - "grad_norm": 0.8431777358055115, - "learning_rate": 1.728368814381471e-05, - "loss": 0.3333, - "step": 5424 - }, - { - "epoch": 0.5110572054355762, - "grad_norm": 0.6388592720031738, - "learning_rate": 1.7282653418602908e-05, - "loss": 0.2916, - "step": 5425 - }, - { - "epoch": 0.511151409528744, - "grad_norm": 0.6659934520721436, - "learning_rate": 1.7281618527335347e-05, - "loss": 0.31, - "step": 5426 - }, - { - "epoch": 0.5112456136219119, - "grad_norm": 0.755641520023346, - "learning_rate": 1.728058347003562e-05, - "loss": 0.3206, - "step": 5427 - }, - { - "epoch": 0.5113398177150797, - "grad_norm": 0.7816777229309082, - "learning_rate": 1.727954824672733e-05, - "loss": 0.347, - "step": 5428 - }, - { - "epoch": 0.5114340218082476, - "grad_norm": 0.6937658190727234, - "learning_rate": 1.7278512857434085e-05, - "loss": 0.2998, - "step": 5429 - }, - { - "epoch": 0.5115282259014154, - "grad_norm": 0.7459749579429626, - "learning_rate": 1.7277477302179487e-05, - "loss": 0.3347, - "step": 5430 - }, - { - "epoch": 0.5116224299945833, - "grad_norm": 0.8485836982727051, - "learning_rate": 1.727644158098715e-05, - "loss": 0.3318, - "step": 5431 - }, - { - "epoch": 0.5117166340877511, - "grad_norm": 0.7895025014877319, - "learning_rate": 1.7275405693880697e-05, - "loss": 0.3334, - "step": 5432 - }, - { - "epoch": 0.511810838180919, - "grad_norm": 0.7498315572738647, - "learning_rate": 1.727436964088374e-05, - "loss": 0.288, - "step": 5433 - }, - { - "epoch": 0.5119050422740868, - "grad_norm": 0.7922171950340271, - "learning_rate": 1.7273333422019905e-05, - "loss": 0.3679, - "step": 5434 - }, - { - "epoch": 0.5119992463672547, - "grad_norm": 0.7560186386108398, - "learning_rate": 1.7272297037312817e-05, - "loss": 0.268, - "step": 5435 - }, - { - "epoch": 0.5120934504604225, - "grad_norm": 0.7202543020248413, - "learning_rate": 1.7271260486786114e-05, - "loss": 0.3706, - "step": 5436 - }, - { - "epoch": 0.5121876545535904, - "grad_norm": 0.9651054739952087, - "learning_rate": 1.7270223770463424e-05, - "loss": 0.2877, - "step": 5437 - }, - { - "epoch": 0.5122818586467582, - "grad_norm": 0.7424997091293335, - "learning_rate": 1.726918688836839e-05, - "loss": 0.3793, - "step": 5438 - }, - { - "epoch": 0.5123760627399261, - "grad_norm": 0.7279515862464905, - "learning_rate": 1.7268149840524653e-05, - "loss": 0.3479, - "step": 5439 - }, - { - "epoch": 0.5124702668330939, - "grad_norm": 0.6700050830841064, - "learning_rate": 1.7267112626955856e-05, - "loss": 0.2837, - "step": 5440 - }, - { - "epoch": 0.5125644709262618, - "grad_norm": 0.7173581123352051, - "learning_rate": 1.7266075247685656e-05, - "loss": 0.2837, - "step": 5441 - }, - { - "epoch": 0.5126586750194296, - "grad_norm": 0.8028951287269592, - "learning_rate": 1.7265037702737703e-05, - "loss": 0.3404, - "step": 5442 - }, - { - "epoch": 0.5127528791125975, - "grad_norm": 0.8250238299369812, - "learning_rate": 1.7263999992135654e-05, - "loss": 0.3008, - "step": 5443 - }, - { - "epoch": 0.5128470832057653, - "grad_norm": 0.7309693694114685, - "learning_rate": 1.726296211590317e-05, - "loss": 0.3226, - "step": 5444 - }, - { - "epoch": 0.5129412872989332, - "grad_norm": 0.71340411901474, - "learning_rate": 1.726192407406392e-05, - "loss": 0.3574, - "step": 5445 - }, - { - "epoch": 0.513035491392101, - "grad_norm": 0.7260133624076843, - "learning_rate": 1.726088586664157e-05, - "loss": 0.3388, - "step": 5446 - }, - { - "epoch": 0.5131296954852689, - "grad_norm": 0.9490150213241577, - "learning_rate": 1.7259847493659793e-05, - "loss": 0.3615, - "step": 5447 - }, - { - "epoch": 0.5132238995784367, - "grad_norm": 0.754058837890625, - "learning_rate": 1.725880895514226e-05, - "loss": 0.3652, - "step": 5448 - }, - { - "epoch": 0.5133181036716046, - "grad_norm": 0.787288248538971, - "learning_rate": 1.7257770251112662e-05, - "loss": 0.3023, - "step": 5449 - }, - { - "epoch": 0.5134123077647724, - "grad_norm": 1.0981327295303345, - "learning_rate": 1.7256731381594677e-05, - "loss": 0.3503, - "step": 5450 - }, - { - "epoch": 0.5135065118579403, - "grad_norm": 0.7456867694854736, - "learning_rate": 1.7255692346611994e-05, - "loss": 0.3643, - "step": 5451 - }, - { - "epoch": 0.5136007159511081, - "grad_norm": 0.766739010810852, - "learning_rate": 1.7254653146188306e-05, - "loss": 0.3477, - "step": 5452 - }, - { - "epoch": 0.513694920044276, - "grad_norm": 0.7890262603759766, - "learning_rate": 1.7253613780347303e-05, - "loss": 0.3745, - "step": 5453 - }, - { - "epoch": 0.5137891241374438, - "grad_norm": 0.9555128216743469, - "learning_rate": 1.725257424911269e-05, - "loss": 0.3556, - "step": 5454 - }, - { - "epoch": 0.5138833282306117, - "grad_norm": 0.7513731122016907, - "learning_rate": 1.725153455250817e-05, - "loss": 0.3147, - "step": 5455 - }, - { - "epoch": 0.5139775323237795, - "grad_norm": 1.0011706352233887, - "learning_rate": 1.7250494690557445e-05, - "loss": 0.3655, - "step": 5456 - }, - { - "epoch": 0.5140717364169474, - "grad_norm": 0.7627401947975159, - "learning_rate": 1.7249454663284225e-05, - "loss": 0.3403, - "step": 5457 - }, - { - "epoch": 0.5141659405101152, - "grad_norm": 0.7304606437683105, - "learning_rate": 1.7248414470712232e-05, - "loss": 0.3463, - "step": 5458 - }, - { - "epoch": 0.5142601446032831, - "grad_norm": 0.8432670831680298, - "learning_rate": 1.7247374112865178e-05, - "loss": 0.3694, - "step": 5459 - }, - { - "epoch": 0.5143543486964509, - "grad_norm": 0.8038775324821472, - "learning_rate": 1.7246333589766786e-05, - "loss": 0.3167, - "step": 5460 - }, - { - "epoch": 0.5144485527896188, - "grad_norm": 0.7647266983985901, - "learning_rate": 1.724529290144078e-05, - "loss": 0.3057, - "step": 5461 - }, - { - "epoch": 0.5145427568827866, - "grad_norm": 0.820859968662262, - "learning_rate": 1.7244252047910893e-05, - "loss": 0.3457, - "step": 5462 - }, - { - "epoch": 0.5146369609759545, - "grad_norm": 0.8634036779403687, - "learning_rate": 1.7243211029200852e-05, - "loss": 0.3016, - "step": 5463 - }, - { - "epoch": 0.5147311650691222, - "grad_norm": 0.8282648921012878, - "learning_rate": 1.72421698453344e-05, - "loss": 0.3581, - "step": 5464 - }, - { - "epoch": 0.51482536916229, - "grad_norm": 0.7817854285240173, - "learning_rate": 1.7241128496335276e-05, - "loss": 0.353, - "step": 5465 - }, - { - "epoch": 0.5149195732554579, - "grad_norm": 0.7150627374649048, - "learning_rate": 1.7240086982227225e-05, - "loss": 0.3542, - "step": 5466 - }, - { - "epoch": 0.5150137773486257, - "grad_norm": 0.6410994529724121, - "learning_rate": 1.723904530303399e-05, - "loss": 0.3014, - "step": 5467 - }, - { - "epoch": 0.5151079814417936, - "grad_norm": 0.8309205770492554, - "learning_rate": 1.7238003458779327e-05, - "loss": 0.3426, - "step": 5468 - }, - { - "epoch": 0.5152021855349614, - "grad_norm": 0.8303483724594116, - "learning_rate": 1.7236961449486996e-05, - "loss": 0.3431, - "step": 5469 - }, - { - "epoch": 0.5152963896281293, - "grad_norm": 0.7229982018470764, - "learning_rate": 1.7235919275180748e-05, - "loss": 0.3116, - "step": 5470 - }, - { - "epoch": 0.5153905937212971, - "grad_norm": 0.7184120416641235, - "learning_rate": 1.723487693588435e-05, - "loss": 0.3366, - "step": 5471 - }, - { - "epoch": 0.515484797814465, - "grad_norm": 0.8860011100769043, - "learning_rate": 1.723383443162157e-05, - "loss": 0.299, - "step": 5472 - }, - { - "epoch": 0.5155790019076328, - "grad_norm": 0.7865309119224548, - "learning_rate": 1.7232791762416176e-05, - "loss": 0.3552, - "step": 5473 - }, - { - "epoch": 0.5156732060008007, - "grad_norm": 0.7601330876350403, - "learning_rate": 1.7231748928291946e-05, - "loss": 0.2869, - "step": 5474 - }, - { - "epoch": 0.5157674100939685, - "grad_norm": 0.6787477731704712, - "learning_rate": 1.7230705929272655e-05, - "loss": 0.2832, - "step": 5475 - }, - { - "epoch": 0.5158616141871364, - "grad_norm": 0.7316486239433289, - "learning_rate": 1.722966276538209e-05, - "loss": 0.3329, - "step": 5476 - }, - { - "epoch": 0.5159558182803042, - "grad_norm": 0.9282322525978088, - "learning_rate": 1.7228619436644026e-05, - "loss": 0.3011, - "step": 5477 - }, - { - "epoch": 0.5160500223734721, - "grad_norm": 0.6631028652191162, - "learning_rate": 1.7227575943082268e-05, - "loss": 0.2993, - "step": 5478 - }, - { - "epoch": 0.5161442264666399, - "grad_norm": 0.7901523113250732, - "learning_rate": 1.7226532284720594e-05, - "loss": 0.3189, - "step": 5479 - }, - { - "epoch": 0.5162384305598078, - "grad_norm": 0.8018639087677002, - "learning_rate": 1.7225488461582812e-05, - "loss": 0.3547, - "step": 5480 - }, - { - "epoch": 0.5163326346529756, - "grad_norm": 0.6779786348342896, - "learning_rate": 1.7224444473692718e-05, - "loss": 0.3308, - "step": 5481 - }, - { - "epoch": 0.5164268387461435, - "grad_norm": 0.9510851502418518, - "learning_rate": 1.7223400321074115e-05, - "loss": 0.3882, - "step": 5482 - }, - { - "epoch": 0.5165210428393113, - "grad_norm": 2.2835378646850586, - "learning_rate": 1.7222356003750814e-05, - "loss": 0.3359, - "step": 5483 - }, - { - "epoch": 0.5166152469324792, - "grad_norm": 0.8252720832824707, - "learning_rate": 1.7221311521746628e-05, - "loss": 0.3025, - "step": 5484 - }, - { - "epoch": 0.516709451025647, - "grad_norm": 0.9316931962966919, - "learning_rate": 1.722026687508537e-05, - "loss": 0.3915, - "step": 5485 - }, - { - "epoch": 0.5168036551188149, - "grad_norm": 0.8290546536445618, - "learning_rate": 1.721922206379086e-05, - "loss": 0.387, - "step": 5486 - }, - { - "epoch": 0.5168978592119827, - "grad_norm": 0.7082507610321045, - "learning_rate": 1.7218177087886923e-05, - "loss": 0.3085, - "step": 5487 - }, - { - "epoch": 0.5169920633051506, - "grad_norm": 0.7358608245849609, - "learning_rate": 1.7217131947397386e-05, - "loss": 0.3285, - "step": 5488 - }, - { - "epoch": 0.5170862673983184, - "grad_norm": 0.7969195246696472, - "learning_rate": 1.721608664234608e-05, - "loss": 0.3485, - "step": 5489 - }, - { - "epoch": 0.5171804714914863, - "grad_norm": 0.7457312345504761, - "learning_rate": 1.7215041172756838e-05, - "loss": 0.3061, - "step": 5490 - }, - { - "epoch": 0.5172746755846541, - "grad_norm": 0.7926058173179626, - "learning_rate": 1.7213995538653497e-05, - "loss": 0.3561, - "step": 5491 - }, - { - "epoch": 0.517368879677822, - "grad_norm": 0.7441315650939941, - "learning_rate": 1.7212949740059903e-05, - "loss": 0.3754, - "step": 5492 - }, - { - "epoch": 0.5174630837709898, - "grad_norm": 0.7602240443229675, - "learning_rate": 1.7211903776999903e-05, - "loss": 0.3352, - "step": 5493 - }, - { - "epoch": 0.5175572878641577, - "grad_norm": 0.8453928828239441, - "learning_rate": 1.721085764949734e-05, - "loss": 0.2914, - "step": 5494 - }, - { - "epoch": 0.5176514919573255, - "grad_norm": 0.739108145236969, - "learning_rate": 1.7209811357576066e-05, - "loss": 0.2925, - "step": 5495 - }, - { - "epoch": 0.5177456960504934, - "grad_norm": 0.7192702889442444, - "learning_rate": 1.720876490125995e-05, - "loss": 0.289, - "step": 5496 - }, - { - "epoch": 0.5178399001436612, - "grad_norm": 0.8548442125320435, - "learning_rate": 1.7207718280572844e-05, - "loss": 0.3342, - "step": 5497 - }, - { - "epoch": 0.5179341042368291, - "grad_norm": 0.6716485619544983, - "learning_rate": 1.720667149553861e-05, - "loss": 0.3124, - "step": 5498 - }, - { - "epoch": 0.5180283083299969, - "grad_norm": 0.66901695728302, - "learning_rate": 1.7205624546181128e-05, - "loss": 0.3067, - "step": 5499 - }, - { - "epoch": 0.5181225124231648, - "grad_norm": 0.7227648496627808, - "learning_rate": 1.7204577432524257e-05, - "loss": 0.3009, - "step": 5500 - }, - { - "epoch": 0.5182167165163326, - "grad_norm": 0.8730160593986511, - "learning_rate": 1.7203530154591883e-05, - "loss": 0.3605, - "step": 5501 - }, - { - "epoch": 0.5183109206095005, - "grad_norm": 0.7889743447303772, - "learning_rate": 1.7202482712407876e-05, - "loss": 0.3175, - "step": 5502 - }, - { - "epoch": 0.5184051247026683, - "grad_norm": 0.7347235083580017, - "learning_rate": 1.7201435105996128e-05, - "loss": 0.3111, - "step": 5503 - }, - { - "epoch": 0.5184993287958362, - "grad_norm": 0.7456490993499756, - "learning_rate": 1.720038733538052e-05, - "loss": 0.3193, - "step": 5504 - }, - { - "epoch": 0.518593532889004, - "grad_norm": 0.9792962074279785, - "learning_rate": 1.7199339400584944e-05, - "loss": 0.3732, - "step": 5505 - }, - { - "epoch": 0.5186877369821719, - "grad_norm": 0.8372748494148254, - "learning_rate": 1.7198291301633298e-05, - "loss": 0.3591, - "step": 5506 - }, - { - "epoch": 0.5187819410753397, - "grad_norm": 0.9081077575683594, - "learning_rate": 1.719724303854948e-05, - "loss": 0.4306, - "step": 5507 - }, - { - "epoch": 0.5188761451685076, - "grad_norm": 0.7463353872299194, - "learning_rate": 1.719619461135739e-05, - "loss": 0.3307, - "step": 5508 - }, - { - "epoch": 0.5189703492616754, - "grad_norm": 0.7470934391021729, - "learning_rate": 1.719514602008093e-05, - "loss": 0.315, - "step": 5509 - }, - { - "epoch": 0.5190645533548432, - "grad_norm": 0.7186209559440613, - "learning_rate": 1.7194097264744014e-05, - "loss": 0.3001, - "step": 5510 - }, - { - "epoch": 0.5191587574480111, - "grad_norm": 0.8188936114311218, - "learning_rate": 1.7193048345370553e-05, - "loss": 0.3803, - "step": 5511 - }, - { - "epoch": 0.519252961541179, - "grad_norm": 0.7234528064727783, - "learning_rate": 1.7191999261984466e-05, - "loss": 0.3001, - "step": 5512 - }, - { - "epoch": 0.5193471656343468, - "grad_norm": 0.847127377986908, - "learning_rate": 1.7190950014609677e-05, - "loss": 0.3308, - "step": 5513 - }, - { - "epoch": 0.5194413697275146, - "grad_norm": 0.8823010921478271, - "learning_rate": 1.7189900603270105e-05, - "loss": 0.3513, - "step": 5514 - }, - { - "epoch": 0.5195355738206825, - "grad_norm": 0.7539229989051819, - "learning_rate": 1.718885102798968e-05, - "loss": 0.2975, - "step": 5515 - }, - { - "epoch": 0.5196297779138503, - "grad_norm": 0.8143484592437744, - "learning_rate": 1.718780128879233e-05, - "loss": 0.3359, - "step": 5516 - }, - { - "epoch": 0.5197239820070182, - "grad_norm": 0.8117477893829346, - "learning_rate": 1.7186751385701998e-05, - "loss": 0.3133, - "step": 5517 - }, - { - "epoch": 0.519818186100186, - "grad_norm": 0.8710623383522034, - "learning_rate": 1.718570131874262e-05, - "loss": 0.3412, - "step": 5518 - }, - { - "epoch": 0.5199123901933539, - "grad_norm": 0.9749717116355896, - "learning_rate": 1.7184651087938138e-05, - "loss": 0.3676, - "step": 5519 - }, - { - "epoch": 0.5200065942865217, - "grad_norm": 0.7328717708587646, - "learning_rate": 1.7183600693312503e-05, - "loss": 0.3066, - "step": 5520 - }, - { - "epoch": 0.5201007983796896, - "grad_norm": 0.7747542858123779, - "learning_rate": 1.718255013488966e-05, - "loss": 0.3091, - "step": 5521 - }, - { - "epoch": 0.5201950024728574, - "grad_norm": 0.757550835609436, - "learning_rate": 1.7181499412693563e-05, - "loss": 0.3609, - "step": 5522 - }, - { - "epoch": 0.5202892065660253, - "grad_norm": 0.7175095081329346, - "learning_rate": 1.7180448526748177e-05, - "loss": 0.3076, - "step": 5523 - }, - { - "epoch": 0.5203834106591931, - "grad_norm": 0.830827534198761, - "learning_rate": 1.717939747707746e-05, - "loss": 0.3386, - "step": 5524 - }, - { - "epoch": 0.520477614752361, - "grad_norm": 0.8881201148033142, - "learning_rate": 1.7178346263705372e-05, - "loss": 0.3547, - "step": 5525 - }, - { - "epoch": 0.5205718188455288, - "grad_norm": 0.7616137862205505, - "learning_rate": 1.7177294886655894e-05, - "loss": 0.3291, - "step": 5526 - }, - { - "epoch": 0.5206660229386967, - "grad_norm": 0.8208198547363281, - "learning_rate": 1.717624334595299e-05, - "loss": 0.3096, - "step": 5527 - }, - { - "epoch": 0.5207602270318645, - "grad_norm": 0.7254412770271301, - "learning_rate": 1.7175191641620637e-05, - "loss": 0.3449, - "step": 5528 - }, - { - "epoch": 0.5208544311250324, - "grad_norm": 0.7519254088401794, - "learning_rate": 1.7174139773682824e-05, - "loss": 0.3091, - "step": 5529 - }, - { - "epoch": 0.5209486352182002, - "grad_norm": 0.853268563747406, - "learning_rate": 1.7173087742163527e-05, - "loss": 0.3382, - "step": 5530 - }, - { - "epoch": 0.5210428393113681, - "grad_norm": 0.7265686988830566, - "learning_rate": 1.7172035547086732e-05, - "loss": 0.3032, - "step": 5531 - }, - { - "epoch": 0.5211370434045359, - "grad_norm": 0.674207329750061, - "learning_rate": 1.7170983188476437e-05, - "loss": 0.2717, - "step": 5532 - }, - { - "epoch": 0.5212312474977038, - "grad_norm": 0.874945878982544, - "learning_rate": 1.7169930666356637e-05, - "loss": 0.3788, - "step": 5533 - }, - { - "epoch": 0.5213254515908716, - "grad_norm": 0.8548377752304077, - "learning_rate": 1.716887798075133e-05, - "loss": 0.3248, - "step": 5534 - }, - { - "epoch": 0.5214196556840395, - "grad_norm": 0.7522386312484741, - "learning_rate": 1.7167825131684516e-05, - "loss": 0.3738, - "step": 5535 - }, - { - "epoch": 0.5215138597772073, - "grad_norm": 0.8124439120292664, - "learning_rate": 1.7166772119180202e-05, - "loss": 0.3562, - "step": 5536 - }, - { - "epoch": 0.5216080638703752, - "grad_norm": 0.7803117036819458, - "learning_rate": 1.7165718943262402e-05, - "loss": 0.3614, - "step": 5537 - }, - { - "epoch": 0.521702267963543, - "grad_norm": 0.6467984318733215, - "learning_rate": 1.7164665603955128e-05, - "loss": 0.2744, - "step": 5538 - }, - { - "epoch": 0.5217964720567109, - "grad_norm": 0.7423121929168701, - "learning_rate": 1.7163612101282398e-05, - "loss": 0.297, - "step": 5539 - }, - { - "epoch": 0.5218906761498787, - "grad_norm": 0.8348680734634399, - "learning_rate": 1.7162558435268235e-05, - "loss": 0.3428, - "step": 5540 - }, - { - "epoch": 0.5219848802430466, - "grad_norm": 0.7723286151885986, - "learning_rate": 1.716150460593666e-05, - "loss": 0.3472, - "step": 5541 - }, - { - "epoch": 0.5220790843362144, - "grad_norm": 0.868765652179718, - "learning_rate": 1.7160450613311704e-05, - "loss": 0.3718, - "step": 5542 - }, - { - "epoch": 0.5221732884293823, - "grad_norm": 0.7805017232894897, - "learning_rate": 1.7159396457417405e-05, - "loss": 0.3947, - "step": 5543 - }, - { - "epoch": 0.5222674925225501, - "grad_norm": 0.9394553899765015, - "learning_rate": 1.715834213827779e-05, - "loss": 0.3416, - "step": 5544 - }, - { - "epoch": 0.522361696615718, - "grad_norm": 0.7767390012741089, - "learning_rate": 1.7157287655916904e-05, - "loss": 0.3545, - "step": 5545 - }, - { - "epoch": 0.5224559007088858, - "grad_norm": 0.7393049001693726, - "learning_rate": 1.715623301035879e-05, - "loss": 0.3298, - "step": 5546 - }, - { - "epoch": 0.5225501048020537, - "grad_norm": 0.780881404876709, - "learning_rate": 1.7155178201627497e-05, - "loss": 0.3271, - "step": 5547 - }, - { - "epoch": 0.5226443088952215, - "grad_norm": 0.7244885563850403, - "learning_rate": 1.7154123229747077e-05, - "loss": 0.3464, - "step": 5548 - }, - { - "epoch": 0.5227385129883894, - "grad_norm": 0.7590210437774658, - "learning_rate": 1.715306809474158e-05, - "loss": 0.2862, - "step": 5549 - }, - { - "epoch": 0.5228327170815572, - "grad_norm": 0.8282087445259094, - "learning_rate": 1.715201279663507e-05, - "loss": 0.3589, - "step": 5550 - }, - { - "epoch": 0.5229269211747251, - "grad_norm": 0.7800232172012329, - "learning_rate": 1.715095733545161e-05, - "loss": 0.3254, - "step": 5551 - }, - { - "epoch": 0.5230211252678929, - "grad_norm": 0.8241845369338989, - "learning_rate": 1.714990171121526e-05, - "loss": 0.316, - "step": 5552 - }, - { - "epoch": 0.5231153293610608, - "grad_norm": 1.1471837759017944, - "learning_rate": 1.7148845923950092e-05, - "loss": 0.3573, - "step": 5553 - }, - { - "epoch": 0.5232095334542286, - "grad_norm": 0.7228818535804749, - "learning_rate": 1.7147789973680184e-05, - "loss": 0.3171, - "step": 5554 - }, - { - "epoch": 0.5233037375473965, - "grad_norm": 0.9623462557792664, - "learning_rate": 1.7146733860429614e-05, - "loss": 0.3724, - "step": 5555 - }, - { - "epoch": 0.5233979416405643, - "grad_norm": 0.7974388599395752, - "learning_rate": 1.7145677584222454e-05, - "loss": 0.3478, - "step": 5556 - }, - { - "epoch": 0.5234921457337322, - "grad_norm": 0.9783768057823181, - "learning_rate": 1.7144621145082794e-05, - "loss": 0.3258, - "step": 5557 - }, - { - "epoch": 0.5235863498269, - "grad_norm": 0.7557503581047058, - "learning_rate": 1.7143564543034724e-05, - "loss": 0.3821, - "step": 5558 - }, - { - "epoch": 0.5236805539200678, - "grad_norm": 1.0619620084762573, - "learning_rate": 1.7142507778102334e-05, - "loss": 0.3297, - "step": 5559 - }, - { - "epoch": 0.5237747580132357, - "grad_norm": 0.8709781169891357, - "learning_rate": 1.714145085030972e-05, - "loss": 0.3376, - "step": 5560 - }, - { - "epoch": 0.5238689621064035, - "grad_norm": 0.8476454019546509, - "learning_rate": 1.714039375968098e-05, - "loss": 0.3563, - "step": 5561 - }, - { - "epoch": 0.5239631661995714, - "grad_norm": 0.720432698726654, - "learning_rate": 1.7139336506240227e-05, - "loss": 0.2886, - "step": 5562 - }, - { - "epoch": 0.5240573702927392, - "grad_norm": 0.9466307759284973, - "learning_rate": 1.7138279090011556e-05, - "loss": 0.3622, - "step": 5563 - }, - { - "epoch": 0.5241515743859071, - "grad_norm": 0.6946443915367126, - "learning_rate": 1.7137221511019083e-05, - "loss": 0.3279, - "step": 5564 - }, - { - "epoch": 0.5242457784790749, - "grad_norm": 0.7914426326751709, - "learning_rate": 1.713616376928692e-05, - "loss": 0.3326, - "step": 5565 - }, - { - "epoch": 0.5243399825722428, - "grad_norm": 0.7700616121292114, - "learning_rate": 1.7135105864839187e-05, - "loss": 0.2737, - "step": 5566 - }, - { - "epoch": 0.5244341866654106, - "grad_norm": 0.8591805696487427, - "learning_rate": 1.7134047797700004e-05, - "loss": 0.334, - "step": 5567 - }, - { - "epoch": 0.5245283907585785, - "grad_norm": 0.9132749438285828, - "learning_rate": 1.71329895678935e-05, - "loss": 0.3397, - "step": 5568 - }, - { - "epoch": 0.5246225948517463, - "grad_norm": 0.7847031950950623, - "learning_rate": 1.7131931175443806e-05, - "loss": 0.3108, - "step": 5569 - }, - { - "epoch": 0.5247167989449142, - "grad_norm": 0.927769124507904, - "learning_rate": 1.7130872620375048e-05, - "loss": 0.3748, - "step": 5570 - }, - { - "epoch": 0.524811003038082, - "grad_norm": 0.9591184854507446, - "learning_rate": 1.7129813902711366e-05, - "loss": 0.3548, - "step": 5571 - }, - { - "epoch": 0.5249052071312499, - "grad_norm": 0.797385573387146, - "learning_rate": 1.71287550224769e-05, - "loss": 0.3013, - "step": 5572 - }, - { - "epoch": 0.5249994112244177, - "grad_norm": 0.7265573143959045, - "learning_rate": 1.7127695979695795e-05, - "loss": 0.3252, - "step": 5573 - }, - { - "epoch": 0.5250936153175856, - "grad_norm": 0.9222989082336426, - "learning_rate": 1.71266367743922e-05, - "loss": 0.3214, - "step": 5574 - }, - { - "epoch": 0.5251878194107534, - "grad_norm": 0.906359851360321, - "learning_rate": 1.7125577406590266e-05, - "loss": 0.3792, - "step": 5575 - }, - { - "epoch": 0.5252820235039213, - "grad_norm": 0.8564597964286804, - "learning_rate": 1.7124517876314143e-05, - "loss": 0.3431, - "step": 5576 - }, - { - "epoch": 0.5253762275970891, - "grad_norm": 0.8298361301422119, - "learning_rate": 1.7123458183587996e-05, - "loss": 0.3019, - "step": 5577 - }, - { - "epoch": 0.525470431690257, - "grad_norm": 0.8447403311729431, - "learning_rate": 1.712239832843599e-05, - "loss": 0.3527, - "step": 5578 - }, - { - "epoch": 0.5255646357834248, - "grad_norm": 0.7736579775810242, - "learning_rate": 1.7121338310882283e-05, - "loss": 0.3332, - "step": 5579 - }, - { - "epoch": 0.5256588398765927, - "grad_norm": 0.8325954675674438, - "learning_rate": 1.7120278130951046e-05, - "loss": 0.3545, - "step": 5580 - }, - { - "epoch": 0.5257530439697605, - "grad_norm": 0.8582270741462708, - "learning_rate": 1.7119217788666462e-05, - "loss": 0.3623, - "step": 5581 - }, - { - "epoch": 0.5258472480629284, - "grad_norm": 0.7773718237876892, - "learning_rate": 1.71181572840527e-05, - "loss": 0.352, - "step": 5582 - }, - { - "epoch": 0.5259414521560962, - "grad_norm": 0.8369136452674866, - "learning_rate": 1.7117096617133943e-05, - "loss": 0.368, - "step": 5583 - }, - { - "epoch": 0.5260356562492641, - "grad_norm": 0.8247154951095581, - "learning_rate": 1.7116035787934377e-05, - "loss": 0.3153, - "step": 5584 - }, - { - "epoch": 0.5261298603424319, - "grad_norm": 0.7184444069862366, - "learning_rate": 1.711497479647819e-05, - "loss": 0.3331, - "step": 5585 - }, - { - "epoch": 0.5262240644355998, - "grad_norm": 0.9262751340866089, - "learning_rate": 1.711391364278957e-05, - "loss": 0.3529, - "step": 5586 - }, - { - "epoch": 0.5263182685287676, - "grad_norm": 0.7359481453895569, - "learning_rate": 1.711285232689272e-05, - "loss": 0.3376, - "step": 5587 - }, - { - "epoch": 0.5264124726219355, - "grad_norm": 0.8670516610145569, - "learning_rate": 1.711179084881184e-05, - "loss": 0.3538, - "step": 5588 - }, - { - "epoch": 0.5265066767151033, - "grad_norm": 0.8430225253105164, - "learning_rate": 1.7110729208571128e-05, - "loss": 0.377, - "step": 5589 - }, - { - "epoch": 0.5266008808082712, - "grad_norm": 0.8114784955978394, - "learning_rate": 1.7109667406194792e-05, - "loss": 0.2815, - "step": 5590 - }, - { - "epoch": 0.526695084901439, - "grad_norm": 0.7337326407432556, - "learning_rate": 1.7108605441707046e-05, - "loss": 0.3201, - "step": 5591 - }, - { - "epoch": 0.5267892889946069, - "grad_norm": 1.7015444040298462, - "learning_rate": 1.71075433151321e-05, - "loss": 0.3603, - "step": 5592 - }, - { - "epoch": 0.5268834930877747, - "grad_norm": 0.8098084926605225, - "learning_rate": 1.7106481026494175e-05, - "loss": 0.3353, - "step": 5593 - }, - { - "epoch": 0.5269776971809426, - "grad_norm": 0.772486686706543, - "learning_rate": 1.710541857581749e-05, - "loss": 0.3192, - "step": 5594 - }, - { - "epoch": 0.5270719012741104, - "grad_norm": 0.7702192068099976, - "learning_rate": 1.7104355963126275e-05, - "loss": 0.378, - "step": 5595 - }, - { - "epoch": 0.5271661053672783, - "grad_norm": 0.893916130065918, - "learning_rate": 1.7103293188444756e-05, - "loss": 0.3371, - "step": 5596 - }, - { - "epoch": 0.5272603094604461, - "grad_norm": 0.9102244973182678, - "learning_rate": 1.7102230251797168e-05, - "loss": 0.3498, - "step": 5597 - }, - { - "epoch": 0.527354513553614, - "grad_norm": 0.9307056665420532, - "learning_rate": 1.7101167153207746e-05, - "loss": 0.3123, - "step": 5598 - }, - { - "epoch": 0.5274487176467818, - "grad_norm": 0.765352189540863, - "learning_rate": 1.7100103892700733e-05, - "loss": 0.3247, - "step": 5599 - }, - { - "epoch": 0.5275429217399497, - "grad_norm": 0.7557453513145447, - "learning_rate": 1.7099040470300366e-05, - "loss": 0.363, - "step": 5600 - }, - { - "epoch": 0.5276371258331175, - "grad_norm": 0.8222182393074036, - "learning_rate": 1.7097976886030902e-05, - "loss": 0.3337, - "step": 5601 - }, - { - "epoch": 0.5277313299262852, - "grad_norm": 0.9086328148841858, - "learning_rate": 1.7096913139916583e-05, - "loss": 0.3495, - "step": 5602 - }, - { - "epoch": 0.5278255340194531, - "grad_norm": 0.6892519593238831, - "learning_rate": 1.7095849231981674e-05, - "loss": 0.2742, - "step": 5603 - }, - { - "epoch": 0.5279197381126209, - "grad_norm": 0.875949501991272, - "learning_rate": 1.7094785162250428e-05, - "loss": 0.3633, - "step": 5604 - }, - { - "epoch": 0.5280139422057888, - "grad_norm": 0.784872829914093, - "learning_rate": 1.7093720930747104e-05, - "loss": 0.3159, - "step": 5605 - }, - { - "epoch": 0.5281081462989566, - "grad_norm": 0.8378578424453735, - "learning_rate": 1.7092656537495974e-05, - "loss": 0.3402, - "step": 5606 - }, - { - "epoch": 0.5282023503921245, - "grad_norm": 0.7835084199905396, - "learning_rate": 1.7091591982521305e-05, - "loss": 0.3068, - "step": 5607 - }, - { - "epoch": 0.5282965544852923, - "grad_norm": 1.1599599123001099, - "learning_rate": 1.7090527265847375e-05, - "loss": 0.3309, - "step": 5608 - }, - { - "epoch": 0.5283907585784602, - "grad_norm": 0.8036758303642273, - "learning_rate": 1.7089462387498453e-05, - "loss": 0.3676, - "step": 5609 - }, - { - "epoch": 0.528484962671628, - "grad_norm": 0.8531378507614136, - "learning_rate": 1.708839734749883e-05, - "loss": 0.346, - "step": 5610 - }, - { - "epoch": 0.5285791667647959, - "grad_norm": 0.9988169074058533, - "learning_rate": 1.7087332145872778e-05, - "loss": 0.3574, - "step": 5611 - }, - { - "epoch": 0.5286733708579637, - "grad_norm": 0.8267210125923157, - "learning_rate": 1.70862667826446e-05, - "loss": 0.3186, - "step": 5612 - }, - { - "epoch": 0.5287675749511316, - "grad_norm": 0.9520545601844788, - "learning_rate": 1.7085201257838574e-05, - "loss": 0.3687, - "step": 5613 - }, - { - "epoch": 0.5288617790442994, - "grad_norm": 0.6689392328262329, - "learning_rate": 1.7084135571479005e-05, - "loss": 0.3086, - "step": 5614 - }, - { - "epoch": 0.5289559831374673, - "grad_norm": 0.7221395969390869, - "learning_rate": 1.708306972359019e-05, - "loss": 0.318, - "step": 5615 - }, - { - "epoch": 0.5290501872306351, - "grad_norm": 0.7734705805778503, - "learning_rate": 1.7082003714196428e-05, - "loss": 0.3415, - "step": 5616 - }, - { - "epoch": 0.529144391323803, - "grad_norm": 1.0123803615570068, - "learning_rate": 1.708093754332203e-05, - "loss": 0.3626, - "step": 5617 - }, - { - "epoch": 0.5292385954169708, - "grad_norm": 1.1305488348007202, - "learning_rate": 1.7079871210991306e-05, - "loss": 0.363, - "step": 5618 - }, - { - "epoch": 0.5293327995101387, - "grad_norm": 0.8126332759857178, - "learning_rate": 1.7078804717228568e-05, - "loss": 0.3115, - "step": 5619 - }, - { - "epoch": 0.5294270036033065, - "grad_norm": 0.7479857206344604, - "learning_rate": 1.7077738062058135e-05, - "loss": 0.3584, - "step": 5620 - }, - { - "epoch": 0.5295212076964744, - "grad_norm": 0.86054927110672, - "learning_rate": 1.707667124550433e-05, - "loss": 0.383, - "step": 5621 - }, - { - "epoch": 0.5296154117896422, - "grad_norm": 0.7326262593269348, - "learning_rate": 1.7075604267591475e-05, - "loss": 0.3778, - "step": 5622 - }, - { - "epoch": 0.5297096158828101, - "grad_norm": 0.6459468007087708, - "learning_rate": 1.70745371283439e-05, - "loss": 0.2673, - "step": 5623 - }, - { - "epoch": 0.5298038199759779, - "grad_norm": 0.8367050290107727, - "learning_rate": 1.7073469827785936e-05, - "loss": 0.3682, - "step": 5624 - }, - { - "epoch": 0.5298980240691458, - "grad_norm": 0.9269501566886902, - "learning_rate": 1.7072402365941925e-05, - "loss": 0.3366, - "step": 5625 - }, - { - "epoch": 0.5299922281623136, - "grad_norm": 0.75841224193573, - "learning_rate": 1.70713347428362e-05, - "loss": 0.3442, - "step": 5626 - }, - { - "epoch": 0.5300864322554815, - "grad_norm": 0.7608478665351868, - "learning_rate": 1.7070266958493103e-05, - "loss": 0.3226, - "step": 5627 - }, - { - "epoch": 0.5301806363486493, - "grad_norm": 0.7829641103744507, - "learning_rate": 1.706919901293699e-05, - "loss": 0.3231, - "step": 5628 - }, - { - "epoch": 0.5302748404418172, - "grad_norm": 0.828075110912323, - "learning_rate": 1.7068130906192207e-05, - "loss": 0.3378, - "step": 5629 - }, - { - "epoch": 0.530369044534985, - "grad_norm": 0.7459672689437866, - "learning_rate": 1.7067062638283104e-05, - "loss": 0.341, - "step": 5630 - }, - { - "epoch": 0.5304632486281529, - "grad_norm": 0.8326666951179504, - "learning_rate": 1.7065994209234044e-05, - "loss": 0.3374, - "step": 5631 - }, - { - "epoch": 0.5305574527213207, - "grad_norm": 0.8724443316459656, - "learning_rate": 1.7064925619069393e-05, - "loss": 0.3682, - "step": 5632 - }, - { - "epoch": 0.5306516568144886, - "grad_norm": 0.8615604639053345, - "learning_rate": 1.7063856867813505e-05, - "loss": 0.3717, - "step": 5633 - }, - { - "epoch": 0.5307458609076564, - "grad_norm": 0.7260650992393494, - "learning_rate": 1.7062787955490762e-05, - "loss": 0.3283, - "step": 5634 - }, - { - "epoch": 0.5308400650008243, - "grad_norm": 0.7473740577697754, - "learning_rate": 1.7061718882125528e-05, - "loss": 0.3357, - "step": 5635 - }, - { - "epoch": 0.5309342690939921, - "grad_norm": 0.7821106910705566, - "learning_rate": 1.7060649647742183e-05, - "loss": 0.3039, - "step": 5636 - }, - { - "epoch": 0.53102847318716, - "grad_norm": 0.778623640537262, - "learning_rate": 1.705958025236511e-05, - "loss": 0.3356, - "step": 5637 - }, - { - "epoch": 0.5311226772803278, - "grad_norm": 0.7781974673271179, - "learning_rate": 1.7058510696018686e-05, - "loss": 0.3379, - "step": 5638 - }, - { - "epoch": 0.5312168813734957, - "grad_norm": 0.7608444690704346, - "learning_rate": 1.70574409787273e-05, - "loss": 0.3, - "step": 5639 - }, - { - "epoch": 0.5313110854666635, - "grad_norm": 1.0025386810302734, - "learning_rate": 1.7056371100515345e-05, - "loss": 0.3566, - "step": 5640 - }, - { - "epoch": 0.5314052895598314, - "grad_norm": 0.7889797687530518, - "learning_rate": 1.7055301061407217e-05, - "loss": 0.282, - "step": 5641 - }, - { - "epoch": 0.5314994936529992, - "grad_norm": 0.7105996012687683, - "learning_rate": 1.7054230861427316e-05, - "loss": 0.3223, - "step": 5642 - }, - { - "epoch": 0.531593697746167, - "grad_norm": 0.7551090717315674, - "learning_rate": 1.705316050060004e-05, - "loss": 0.3308, - "step": 5643 - }, - { - "epoch": 0.5316879018393349, - "grad_norm": 0.8662528395652771, - "learning_rate": 1.7052089978949796e-05, - "loss": 0.3789, - "step": 5644 - }, - { - "epoch": 0.5317821059325027, - "grad_norm": 0.7586779594421387, - "learning_rate": 1.7051019296501e-05, - "loss": 0.3023, - "step": 5645 - }, - { - "epoch": 0.5318763100256706, - "grad_norm": 0.7028634548187256, - "learning_rate": 1.7049948453278052e-05, - "loss": 0.3238, - "step": 5646 - }, - { - "epoch": 0.5319705141188384, - "grad_norm": 0.7961217164993286, - "learning_rate": 1.704887744930538e-05, - "loss": 0.3831, - "step": 5647 - }, - { - "epoch": 0.5320647182120063, - "grad_norm": 0.7676875591278076, - "learning_rate": 1.70478062846074e-05, - "loss": 0.3099, - "step": 5648 - }, - { - "epoch": 0.5321589223051741, - "grad_norm": 0.7232643365859985, - "learning_rate": 1.7046734959208536e-05, - "loss": 0.3167, - "step": 5649 - }, - { - "epoch": 0.532253126398342, - "grad_norm": 0.7947850823402405, - "learning_rate": 1.7045663473133215e-05, - "loss": 0.3208, - "step": 5650 - }, - { - "epoch": 0.5323473304915098, - "grad_norm": 0.7122358083724976, - "learning_rate": 1.7044591826405877e-05, - "loss": 0.3155, - "step": 5651 - }, - { - "epoch": 0.5324415345846777, - "grad_norm": 0.9151607751846313, - "learning_rate": 1.7043520019050945e-05, - "loss": 0.3632, - "step": 5652 - }, - { - "epoch": 0.5325357386778455, - "grad_norm": 0.8084571957588196, - "learning_rate": 1.7042448051092867e-05, - "loss": 0.3433, - "step": 5653 - }, - { - "epoch": 0.5326299427710134, - "grad_norm": 0.8168090581893921, - "learning_rate": 1.704137592255608e-05, - "loss": 0.3339, - "step": 5654 - }, - { - "epoch": 0.5327241468641812, - "grad_norm": 0.723678469657898, - "learning_rate": 1.7040303633465033e-05, - "loss": 0.3043, - "step": 5655 - }, - { - "epoch": 0.5328183509573491, - "grad_norm": 0.7119664549827576, - "learning_rate": 1.7039231183844174e-05, - "loss": 0.2625, - "step": 5656 - }, - { - "epoch": 0.5329125550505169, - "grad_norm": 0.686281144618988, - "learning_rate": 1.703815857371796e-05, - "loss": 0.2849, - "step": 5657 - }, - { - "epoch": 0.5330067591436848, - "grad_norm": 0.9660475850105286, - "learning_rate": 1.7037085803110845e-05, - "loss": 0.3211, - "step": 5658 - }, - { - "epoch": 0.5331009632368526, - "grad_norm": 0.7467395067214966, - "learning_rate": 1.703601287204729e-05, - "loss": 0.3489, - "step": 5659 - }, - { - "epoch": 0.5331951673300205, - "grad_norm": 1.1019151210784912, - "learning_rate": 1.703493978055176e-05, - "loss": 0.3479, - "step": 5660 - }, - { - "epoch": 0.5332893714231883, - "grad_norm": 0.768088698387146, - "learning_rate": 1.7033866528648722e-05, - "loss": 0.2789, - "step": 5661 - }, - { - "epoch": 0.5333835755163562, - "grad_norm": 0.971484899520874, - "learning_rate": 1.703279311636265e-05, - "loss": 0.3748, - "step": 5662 - }, - { - "epoch": 0.533477779609524, - "grad_norm": 0.7638828754425049, - "learning_rate": 1.7031719543718018e-05, - "loss": 0.3528, - "step": 5663 - }, - { - "epoch": 0.5335719837026919, - "grad_norm": 0.817284345626831, - "learning_rate": 1.703064581073931e-05, - "loss": 0.3509, - "step": 5664 - }, - { - "epoch": 0.5336661877958597, - "grad_norm": 0.8197344541549683, - "learning_rate": 1.7029571917451e-05, - "loss": 0.3453, - "step": 5665 - }, - { - "epoch": 0.5337603918890276, - "grad_norm": 0.7332201600074768, - "learning_rate": 1.7028497863877576e-05, - "loss": 0.3008, - "step": 5666 - }, - { - "epoch": 0.5338545959821954, - "grad_norm": 0.876661479473114, - "learning_rate": 1.7027423650043538e-05, - "loss": 0.3491, - "step": 5667 - }, - { - "epoch": 0.5339488000753633, - "grad_norm": 0.7736461758613586, - "learning_rate": 1.702634927597337e-05, - "loss": 0.3967, - "step": 5668 - }, - { - "epoch": 0.5340430041685311, - "grad_norm": 1.092462182044983, - "learning_rate": 1.702527474169157e-05, - "loss": 0.3447, - "step": 5669 - }, - { - "epoch": 0.534137208261699, - "grad_norm": 0.7720444798469543, - "learning_rate": 1.7024200047222645e-05, - "loss": 0.3777, - "step": 5670 - }, - { - "epoch": 0.5342314123548668, - "grad_norm": 0.8187174797058105, - "learning_rate": 1.7023125192591092e-05, - "loss": 0.3194, - "step": 5671 - }, - { - "epoch": 0.5343256164480347, - "grad_norm": 0.6948956251144409, - "learning_rate": 1.7022050177821425e-05, - "loss": 0.3183, - "step": 5672 - }, - { - "epoch": 0.5344198205412025, - "grad_norm": 0.8639745116233826, - "learning_rate": 1.702097500293815e-05, - "loss": 0.3483, - "step": 5673 - }, - { - "epoch": 0.5345140246343704, - "grad_norm": 0.7871007919311523, - "learning_rate": 1.7019899667965795e-05, - "loss": 0.3817, - "step": 5674 - }, - { - "epoch": 0.5346082287275382, - "grad_norm": 0.7117692232131958, - "learning_rate": 1.7018824172928864e-05, - "loss": 0.3135, - "step": 5675 - }, - { - "epoch": 0.5347024328207061, - "grad_norm": 0.6765762567520142, - "learning_rate": 1.701774851785189e-05, - "loss": 0.351, - "step": 5676 - }, - { - "epoch": 0.5347966369138739, - "grad_norm": 0.7431235313415527, - "learning_rate": 1.7016672702759397e-05, - "loss": 0.2992, - "step": 5677 - }, - { - "epoch": 0.5348908410070418, - "grad_norm": 0.7420750856399536, - "learning_rate": 1.7015596727675914e-05, - "loss": 0.3344, - "step": 5678 - }, - { - "epoch": 0.5349850451002096, - "grad_norm": 0.6724424958229065, - "learning_rate": 1.7014520592625977e-05, - "loss": 0.2744, - "step": 5679 - }, - { - "epoch": 0.5350792491933775, - "grad_norm": 0.7238250970840454, - "learning_rate": 1.7013444297634122e-05, - "loss": 0.3036, - "step": 5680 - }, - { - "epoch": 0.5351734532865453, - "grad_norm": 0.8361940383911133, - "learning_rate": 1.7012367842724887e-05, - "loss": 0.3544, - "step": 5681 - }, - { - "epoch": 0.5352676573797132, - "grad_norm": 0.7910535335540771, - "learning_rate": 1.7011291227922827e-05, - "loss": 0.3195, - "step": 5682 - }, - { - "epoch": 0.535361861472881, - "grad_norm": 0.9023531675338745, - "learning_rate": 1.7010214453252477e-05, - "loss": 0.3374, - "step": 5683 - }, - { - "epoch": 0.5354560655660489, - "grad_norm": 0.7297065258026123, - "learning_rate": 1.7009137518738397e-05, - "loss": 0.3461, - "step": 5684 - }, - { - "epoch": 0.5355502696592167, - "grad_norm": 0.9916013479232788, - "learning_rate": 1.7008060424405145e-05, - "loss": 0.3538, - "step": 5685 - }, - { - "epoch": 0.5356444737523846, - "grad_norm": 0.8602205514907837, - "learning_rate": 1.7006983170277277e-05, - "loss": 0.3424, - "step": 5686 - }, - { - "epoch": 0.5357386778455524, - "grad_norm": 0.7788354754447937, - "learning_rate": 1.7005905756379354e-05, - "loss": 0.2895, - "step": 5687 - }, - { - "epoch": 0.5358328819387203, - "grad_norm": 0.9086410999298096, - "learning_rate": 1.7004828182735947e-05, - "loss": 0.4563, - "step": 5688 - }, - { - "epoch": 0.5359270860318881, - "grad_norm": 0.7011946439743042, - "learning_rate": 1.7003750449371624e-05, - "loss": 0.3276, - "step": 5689 - }, - { - "epoch": 0.536021290125056, - "grad_norm": 0.7426707148551941, - "learning_rate": 1.7002672556310957e-05, - "loss": 0.3452, - "step": 5690 - }, - { - "epoch": 0.5361154942182238, - "grad_norm": 0.7261567711830139, - "learning_rate": 1.7001594503578526e-05, - "loss": 0.2979, - "step": 5691 - }, - { - "epoch": 0.5362096983113916, - "grad_norm": 0.760969340801239, - "learning_rate": 1.7000516291198914e-05, - "loss": 0.3198, - "step": 5692 - }, - { - "epoch": 0.5363039024045595, - "grad_norm": 0.7361472845077515, - "learning_rate": 1.6999437919196705e-05, - "loss": 0.3361, - "step": 5693 - }, - { - "epoch": 0.5363981064977273, - "grad_norm": 0.818294107913971, - "learning_rate": 1.6998359387596484e-05, - "loss": 0.3582, - "step": 5694 - }, - { - "epoch": 0.5364923105908952, - "grad_norm": 0.8254144191741943, - "learning_rate": 1.699728069642285e-05, - "loss": 0.3582, - "step": 5695 - }, - { - "epoch": 0.536586514684063, - "grad_norm": 0.7636622190475464, - "learning_rate": 1.699620184570039e-05, - "loss": 0.337, - "step": 5696 - }, - { - "epoch": 0.5366807187772309, - "grad_norm": 0.7546584606170654, - "learning_rate": 1.6995122835453708e-05, - "loss": 0.3052, - "step": 5697 - }, - { - "epoch": 0.5367749228703987, - "grad_norm": 0.8165667057037354, - "learning_rate": 1.699404366570741e-05, - "loss": 0.2908, - "step": 5698 - }, - { - "epoch": 0.5368691269635666, - "grad_norm": 0.7652526497840881, - "learning_rate": 1.6992964336486094e-05, - "loss": 0.3535, - "step": 5699 - }, - { - "epoch": 0.5369633310567344, - "grad_norm": 0.786133348941803, - "learning_rate": 1.6991884847814385e-05, - "loss": 0.3139, - "step": 5700 - }, - { - "epoch": 0.5370575351499023, - "grad_norm": 0.7707028985023499, - "learning_rate": 1.6990805199716885e-05, - "loss": 0.3701, - "step": 5701 - }, - { - "epoch": 0.5371517392430701, - "grad_norm": 0.7296610474586487, - "learning_rate": 1.6989725392218213e-05, - "loss": 0.308, - "step": 5702 - }, - { - "epoch": 0.537245943336238, - "grad_norm": 0.6958910226821899, - "learning_rate": 1.6988645425342993e-05, - "loss": 0.3136, - "step": 5703 - }, - { - "epoch": 0.5373401474294058, - "grad_norm": 0.8359389901161194, - "learning_rate": 1.698756529911585e-05, - "loss": 0.3752, - "step": 5704 - }, - { - "epoch": 0.5374343515225737, - "grad_norm": 0.7149270176887512, - "learning_rate": 1.698648501356141e-05, - "loss": 0.3216, - "step": 5705 - }, - { - "epoch": 0.5375285556157415, - "grad_norm": 0.6960687041282654, - "learning_rate": 1.698540456870431e-05, - "loss": 0.295, - "step": 5706 - }, - { - "epoch": 0.5376227597089094, - "grad_norm": 0.7871416807174683, - "learning_rate": 1.698432396456918e-05, - "loss": 0.3146, - "step": 5707 - }, - { - "epoch": 0.5377169638020772, - "grad_norm": 0.7495483160018921, - "learning_rate": 1.6983243201180663e-05, - "loss": 0.3428, - "step": 5708 - }, - { - "epoch": 0.5378111678952451, - "grad_norm": 0.7924083471298218, - "learning_rate": 1.69821622785634e-05, - "loss": 0.4065, - "step": 5709 - }, - { - "epoch": 0.5379053719884129, - "grad_norm": 0.6857657432556152, - "learning_rate": 1.698108119674204e-05, - "loss": 0.3177, - "step": 5710 - }, - { - "epoch": 0.5379995760815808, - "grad_norm": 0.6931462287902832, - "learning_rate": 1.6979999955741234e-05, - "loss": 0.2973, - "step": 5711 - }, - { - "epoch": 0.5380937801747486, - "grad_norm": 0.8428118824958801, - "learning_rate": 1.6978918555585634e-05, - "loss": 0.3352, - "step": 5712 - }, - { - "epoch": 0.5381879842679165, - "grad_norm": 0.7515289187431335, - "learning_rate": 1.6977836996299896e-05, - "loss": 0.3374, - "step": 5713 - }, - { - "epoch": 0.5382821883610843, - "grad_norm": 0.7781171798706055, - "learning_rate": 1.6976755277908684e-05, - "loss": 0.3353, - "step": 5714 - }, - { - "epoch": 0.5383763924542522, - "grad_norm": 0.8441618084907532, - "learning_rate": 1.6975673400436662e-05, - "loss": 0.3592, - "step": 5715 - }, - { - "epoch": 0.53847059654742, - "grad_norm": 0.7104495763778687, - "learning_rate": 1.6974591363908496e-05, - "loss": 0.3033, - "step": 5716 - }, - { - "epoch": 0.5385648006405879, - "grad_norm": 0.83321213722229, - "learning_rate": 1.6973509168348863e-05, - "loss": 0.3641, - "step": 5717 - }, - { - "epoch": 0.5386590047337557, - "grad_norm": 0.8065574169158936, - "learning_rate": 1.6972426813782433e-05, - "loss": 0.3526, - "step": 5718 - }, - { - "epoch": 0.5387532088269236, - "grad_norm": 0.7498657703399658, - "learning_rate": 1.6971344300233893e-05, - "loss": 0.312, - "step": 5719 - }, - { - "epoch": 0.5388474129200914, - "grad_norm": 0.7255940437316895, - "learning_rate": 1.697026162772792e-05, - "loss": 0.2893, - "step": 5720 - }, - { - "epoch": 0.5389416170132593, - "grad_norm": 0.7836167812347412, - "learning_rate": 1.6969178796289202e-05, - "loss": 0.2813, - "step": 5721 - }, - { - "epoch": 0.5390358211064271, - "grad_norm": 0.9158719182014465, - "learning_rate": 1.696809580594243e-05, - "loss": 0.3561, - "step": 5722 - }, - { - "epoch": 0.539130025199595, - "grad_norm": 0.8073228001594543, - "learning_rate": 1.6967012656712296e-05, - "loss": 0.3363, - "step": 5723 - }, - { - "epoch": 0.5392242292927628, - "grad_norm": 0.7554263472557068, - "learning_rate": 1.6965929348623497e-05, - "loss": 0.2851, - "step": 5724 - }, - { - "epoch": 0.5393184333859307, - "grad_norm": 0.760604739189148, - "learning_rate": 1.696484588170074e-05, - "loss": 0.3638, - "step": 5725 - }, - { - "epoch": 0.5394126374790985, - "grad_norm": 0.9967789649963379, - "learning_rate": 1.6963762255968723e-05, - "loss": 0.3422, - "step": 5726 - }, - { - "epoch": 0.5395068415722664, - "grad_norm": 0.7668309807777405, - "learning_rate": 1.6962678471452158e-05, - "loss": 0.3403, - "step": 5727 - }, - { - "epoch": 0.5396010456654342, - "grad_norm": 0.7941800355911255, - "learning_rate": 1.6961594528175757e-05, - "loss": 0.3789, - "step": 5728 - }, - { - "epoch": 0.5396952497586021, - "grad_norm": 0.6772311925888062, - "learning_rate": 1.6960510426164233e-05, - "loss": 0.2775, - "step": 5729 - }, - { - "epoch": 0.5397894538517699, - "grad_norm": 0.7815453410148621, - "learning_rate": 1.6959426165442306e-05, - "loss": 0.3235, - "step": 5730 - }, - { - "epoch": 0.5398836579449378, - "grad_norm": 0.8438002467155457, - "learning_rate": 1.69583417460347e-05, - "loss": 0.3244, - "step": 5731 - }, - { - "epoch": 0.5399778620381056, - "grad_norm": 0.8616735339164734, - "learning_rate": 1.6957257167966142e-05, - "loss": 0.3283, - "step": 5732 - }, - { - "epoch": 0.5400720661312735, - "grad_norm": 0.8114157915115356, - "learning_rate": 1.695617243126136e-05, - "loss": 0.3469, - "step": 5733 - }, - { - "epoch": 0.5401662702244413, - "grad_norm": 0.7440274953842163, - "learning_rate": 1.6955087535945085e-05, - "loss": 0.3141, - "step": 5734 - }, - { - "epoch": 0.5402604743176092, - "grad_norm": 0.7023537755012512, - "learning_rate": 1.6954002482042065e-05, - "loss": 0.3036, - "step": 5735 - }, - { - "epoch": 0.540354678410777, - "grad_norm": 0.6811996102333069, - "learning_rate": 1.6952917269577026e-05, - "loss": 0.3162, - "step": 5736 - }, - { - "epoch": 0.5404488825039448, - "grad_norm": 0.8686399459838867, - "learning_rate": 1.6951831898574727e-05, - "loss": 0.381, - "step": 5737 - }, - { - "epoch": 0.5405430865971127, - "grad_norm": 0.8593584895133972, - "learning_rate": 1.6950746369059908e-05, - "loss": 0.378, - "step": 5738 - }, - { - "epoch": 0.5406372906902805, - "grad_norm": 0.8086289763450623, - "learning_rate": 1.694966068105732e-05, - "loss": 0.3461, - "step": 5739 - }, - { - "epoch": 0.5407314947834483, - "grad_norm": 0.952411413192749, - "learning_rate": 1.6948574834591722e-05, - "loss": 0.3862, - "step": 5740 - }, - { - "epoch": 0.5408256988766161, - "grad_norm": 0.7227944731712341, - "learning_rate": 1.694748882968787e-05, - "loss": 0.3257, - "step": 5741 - }, - { - "epoch": 0.540919902969784, - "grad_norm": 0.6877641081809998, - "learning_rate": 1.6946402666370533e-05, - "loss": 0.2851, - "step": 5742 - }, - { - "epoch": 0.5410141070629518, - "grad_norm": 0.8375235795974731, - "learning_rate": 1.6945316344664468e-05, - "loss": 0.3337, - "step": 5743 - }, - { - "epoch": 0.5411083111561197, - "grad_norm": 0.7709132432937622, - "learning_rate": 1.694422986459445e-05, - "loss": 0.3152, - "step": 5744 - }, - { - "epoch": 0.5412025152492875, - "grad_norm": 0.7222093343734741, - "learning_rate": 1.6943143226185252e-05, - "loss": 0.3001, - "step": 5745 - }, - { - "epoch": 0.5412967193424554, - "grad_norm": 2.080857276916504, - "learning_rate": 1.694205642946165e-05, - "loss": 0.2963, - "step": 5746 - }, - { - "epoch": 0.5413909234356232, - "grad_norm": 0.847663938999176, - "learning_rate": 1.6940969474448427e-05, - "loss": 0.3205, - "step": 5747 - }, - { - "epoch": 0.5414851275287911, - "grad_norm": 0.8542855381965637, - "learning_rate": 1.6939882361170364e-05, - "loss": 0.3426, - "step": 5748 - }, - { - "epoch": 0.5415793316219589, - "grad_norm": 0.8413586616516113, - "learning_rate": 1.693879508965225e-05, - "loss": 0.3558, - "step": 5749 - }, - { - "epoch": 0.5416735357151268, - "grad_norm": 0.7056402564048767, - "learning_rate": 1.693770765991888e-05, - "loss": 0.3113, - "step": 5750 - }, - { - "epoch": 0.5417677398082946, - "grad_norm": 0.7774807214736938, - "learning_rate": 1.6936620071995044e-05, - "loss": 0.3115, - "step": 5751 - }, - { - "epoch": 0.5418619439014625, - "grad_norm": 0.8446057438850403, - "learning_rate": 1.693553232590554e-05, - "loss": 0.4087, - "step": 5752 - }, - { - "epoch": 0.5419561479946303, - "grad_norm": 0.7298682928085327, - "learning_rate": 1.693444442167518e-05, - "loss": 0.3067, - "step": 5753 - }, - { - "epoch": 0.5420503520877982, - "grad_norm": 0.7178559899330139, - "learning_rate": 1.6933356359328756e-05, - "loss": 0.327, - "step": 5754 - }, - { - "epoch": 0.542144556180966, - "grad_norm": 0.7992531061172485, - "learning_rate": 1.693226813889109e-05, - "loss": 0.333, - "step": 5755 - }, - { - "epoch": 0.5422387602741339, - "grad_norm": 0.6976920962333679, - "learning_rate": 1.6931179760386983e-05, - "loss": 0.2899, - "step": 5756 - }, - { - "epoch": 0.5423329643673017, - "grad_norm": 0.778207540512085, - "learning_rate": 1.693009122384126e-05, - "loss": 0.3128, - "step": 5757 - }, - { - "epoch": 0.5424271684604696, - "grad_norm": 0.812389612197876, - "learning_rate": 1.692900252927874e-05, - "loss": 0.3304, - "step": 5758 - }, - { - "epoch": 0.5425213725536374, - "grad_norm": 0.6788195371627808, - "learning_rate": 1.6927913676724247e-05, - "loss": 0.3077, - "step": 5759 - }, - { - "epoch": 0.5426155766468053, - "grad_norm": 0.7889478206634521, - "learning_rate": 1.6926824666202612e-05, - "loss": 0.3277, - "step": 5760 - }, - { - "epoch": 0.5427097807399731, - "grad_norm": 0.8202923536300659, - "learning_rate": 1.6925735497738656e-05, - "loss": 0.3129, - "step": 5761 - }, - { - "epoch": 0.542803984833141, - "grad_norm": 0.6905041337013245, - "learning_rate": 1.6924646171357225e-05, - "loss": 0.3115, - "step": 5762 - }, - { - "epoch": 0.5428981889263088, - "grad_norm": 0.7418622374534607, - "learning_rate": 1.6923556687083147e-05, - "loss": 0.319, - "step": 5763 - }, - { - "epoch": 0.5429923930194767, - "grad_norm": 0.7273638844490051, - "learning_rate": 1.692246704494127e-05, - "loss": 0.3173, - "step": 5764 - }, - { - "epoch": 0.5430865971126445, - "grad_norm": 0.7781940698623657, - "learning_rate": 1.6921377244956444e-05, - "loss": 0.3177, - "step": 5765 - }, - { - "epoch": 0.5431808012058124, - "grad_norm": 0.6877148747444153, - "learning_rate": 1.6920287287153506e-05, - "loss": 0.2994, - "step": 5766 - }, - { - "epoch": 0.5432750052989802, - "grad_norm": 0.7039399743080139, - "learning_rate": 1.691919717155732e-05, - "loss": 0.3828, - "step": 5767 - }, - { - "epoch": 0.5433692093921481, - "grad_norm": 0.825885534286499, - "learning_rate": 1.6918106898192734e-05, - "loss": 0.3283, - "step": 5768 - }, - { - "epoch": 0.5434634134853159, - "grad_norm": 0.7322880029678345, - "learning_rate": 1.6917016467084614e-05, - "loss": 0.3095, - "step": 5769 - }, - { - "epoch": 0.5435576175784838, - "grad_norm": 0.7304609417915344, - "learning_rate": 1.691592587825782e-05, - "loss": 0.3661, - "step": 5770 - }, - { - "epoch": 0.5436518216716516, - "grad_norm": 0.9463625550270081, - "learning_rate": 1.691483513173722e-05, - "loss": 0.3022, - "step": 5771 - }, - { - "epoch": 0.5437460257648195, - "grad_norm": 0.7674868106842041, - "learning_rate": 1.6913744227547687e-05, - "loss": 0.341, - "step": 5772 - }, - { - "epoch": 0.5438402298579873, - "grad_norm": 0.6974307894706726, - "learning_rate": 1.691265316571409e-05, - "loss": 0.3063, - "step": 5773 - }, - { - "epoch": 0.5439344339511551, - "grad_norm": 0.8258277177810669, - "learning_rate": 1.691156194626131e-05, - "loss": 0.3272, - "step": 5774 - }, - { - "epoch": 0.544028638044323, - "grad_norm": 0.8583522439002991, - "learning_rate": 1.6910470569214236e-05, - "loss": 0.3066, - "step": 5775 - }, - { - "epoch": 0.5441228421374908, - "grad_norm": 0.7144218683242798, - "learning_rate": 1.6909379034597742e-05, - "loss": 0.3272, - "step": 5776 - }, - { - "epoch": 0.5442170462306587, - "grad_norm": 0.8724987506866455, - "learning_rate": 1.6908287342436718e-05, - "loss": 0.3839, - "step": 5777 - }, - { - "epoch": 0.5443112503238265, - "grad_norm": 0.7545903325080872, - "learning_rate": 1.690719549275606e-05, - "loss": 0.3492, - "step": 5778 - }, - { - "epoch": 0.5444054544169944, - "grad_norm": 0.8440935611724854, - "learning_rate": 1.690610348558066e-05, - "loss": 0.3367, - "step": 5779 - }, - { - "epoch": 0.5444996585101622, - "grad_norm": 0.7396623492240906, - "learning_rate": 1.6905011320935425e-05, - "loss": 0.3481, - "step": 5780 - }, - { - "epoch": 0.5445938626033301, - "grad_norm": 0.7483841776847839, - "learning_rate": 1.690391899884525e-05, - "loss": 0.3532, - "step": 5781 - }, - { - "epoch": 0.5446880666964979, - "grad_norm": 0.7436134815216064, - "learning_rate": 1.6902826519335048e-05, - "loss": 0.3338, - "step": 5782 - }, - { - "epoch": 0.5447822707896658, - "grad_norm": 0.7555732727050781, - "learning_rate": 1.690173388242972e-05, - "loss": 0.3299, - "step": 5783 - }, - { - "epoch": 0.5448764748828336, - "grad_norm": 0.9253072142601013, - "learning_rate": 1.690064108815419e-05, - "loss": 0.3687, - "step": 5784 - }, - { - "epoch": 0.5449706789760015, - "grad_norm": 0.7201090455055237, - "learning_rate": 1.689954813653337e-05, - "loss": 0.3182, - "step": 5785 - }, - { - "epoch": 0.5450648830691693, - "grad_norm": 0.7178362011909485, - "learning_rate": 1.6898455027592184e-05, - "loss": 0.3253, - "step": 5786 - }, - { - "epoch": 0.5451590871623372, - "grad_norm": 0.7304385304450989, - "learning_rate": 1.689736176135555e-05, - "loss": 0.3179, - "step": 5787 - }, - { - "epoch": 0.545253291255505, - "grad_norm": 0.7304025292396545, - "learning_rate": 1.6896268337848404e-05, - "loss": 0.3228, - "step": 5788 - }, - { - "epoch": 0.5453474953486729, - "grad_norm": 0.7466516494750977, - "learning_rate": 1.6895174757095676e-05, - "loss": 0.3581, - "step": 5789 - }, - { - "epoch": 0.5454416994418407, - "grad_norm": 0.7444651126861572, - "learning_rate": 1.68940810191223e-05, - "loss": 0.3225, - "step": 5790 - }, - { - "epoch": 0.5455359035350086, - "grad_norm": 0.9712061882019043, - "learning_rate": 1.689298712395321e-05, - "loss": 0.2931, - "step": 5791 - }, - { - "epoch": 0.5456301076281764, - "grad_norm": 0.7565516829490662, - "learning_rate": 1.689189307161336e-05, - "loss": 0.3122, - "step": 5792 - }, - { - "epoch": 0.5457243117213443, - "grad_norm": 0.7904927134513855, - "learning_rate": 1.6890798862127683e-05, - "loss": 0.3025, - "step": 5793 - }, - { - "epoch": 0.5458185158145121, - "grad_norm": 0.8687581419944763, - "learning_rate": 1.688970449552114e-05, - "loss": 0.3277, - "step": 5794 - }, - { - "epoch": 0.54591271990768, - "grad_norm": 0.7989944219589233, - "learning_rate": 1.688860997181868e-05, - "loss": 0.3416, - "step": 5795 - }, - { - "epoch": 0.5460069240008478, - "grad_norm": 0.8734157681465149, - "learning_rate": 1.6887515291045255e-05, - "loss": 0.3433, - "step": 5796 - }, - { - "epoch": 0.5461011280940157, - "grad_norm": 0.8354502320289612, - "learning_rate": 1.6886420453225832e-05, - "loss": 0.3027, - "step": 5797 - }, - { - "epoch": 0.5461953321871835, - "grad_norm": 0.8106501698493958, - "learning_rate": 1.6885325458385372e-05, - "loss": 0.3838, - "step": 5798 - }, - { - "epoch": 0.5462895362803514, - "grad_norm": 0.9645668864250183, - "learning_rate": 1.6884230306548842e-05, - "loss": 0.3242, - "step": 5799 - }, - { - "epoch": 0.5463837403735192, - "grad_norm": 1.0720152854919434, - "learning_rate": 1.6883134997741217e-05, - "loss": 0.3666, - "step": 5800 - }, - { - "epoch": 0.5464779444666871, - "grad_norm": 0.7827078104019165, - "learning_rate": 1.6882039531987467e-05, - "loss": 0.3435, - "step": 5801 - }, - { - "epoch": 0.5465721485598549, - "grad_norm": 0.7407355308532715, - "learning_rate": 1.6880943909312573e-05, - "loss": 0.3001, - "step": 5802 - }, - { - "epoch": 0.5466663526530228, - "grad_norm": 0.8197450637817383, - "learning_rate": 1.6879848129741516e-05, - "loss": 0.3273, - "step": 5803 - }, - { - "epoch": 0.5467605567461906, - "grad_norm": 0.7303133606910706, - "learning_rate": 1.6878752193299282e-05, - "loss": 0.3215, - "step": 5804 - }, - { - "epoch": 0.5468547608393585, - "grad_norm": 0.7092004418373108, - "learning_rate": 1.687765610001086e-05, - "loss": 0.3097, - "step": 5805 - }, - { - "epoch": 0.5469489649325263, - "grad_norm": 0.7629405856132507, - "learning_rate": 1.6876559849901243e-05, - "loss": 0.3387, - "step": 5806 - }, - { - "epoch": 0.5470431690256942, - "grad_norm": 0.9411080479621887, - "learning_rate": 1.6875463442995426e-05, - "loss": 0.3637, - "step": 5807 - }, - { - "epoch": 0.547137373118862, - "grad_norm": 0.7682428359985352, - "learning_rate": 1.687436687931841e-05, - "loss": 0.2983, - "step": 5808 - }, - { - "epoch": 0.5472315772120299, - "grad_norm": 0.8321592807769775, - "learning_rate": 1.6873270158895196e-05, - "loss": 0.3313, - "step": 5809 - }, - { - "epoch": 0.5473257813051977, - "grad_norm": 0.7408233880996704, - "learning_rate": 1.6872173281750796e-05, - "loss": 0.3124, - "step": 5810 - }, - { - "epoch": 0.5474199853983656, - "grad_norm": 0.7069203853607178, - "learning_rate": 1.6871076247910216e-05, - "loss": 0.2999, - "step": 5811 - }, - { - "epoch": 0.5475141894915334, - "grad_norm": 0.7806495428085327, - "learning_rate": 1.6869979057398468e-05, - "loss": 0.3431, - "step": 5812 - }, - { - "epoch": 0.5476083935847013, - "grad_norm": 0.8627522587776184, - "learning_rate": 1.6868881710240574e-05, - "loss": 0.3756, - "step": 5813 - }, - { - "epoch": 0.5477025976778691, - "grad_norm": 0.6727634072303772, - "learning_rate": 1.6867784206461554e-05, - "loss": 0.2804, - "step": 5814 - }, - { - "epoch": 0.547796801771037, - "grad_norm": 0.822658360004425, - "learning_rate": 1.6866686546086435e-05, - "loss": 0.3237, - "step": 5815 - }, - { - "epoch": 0.5478910058642048, - "grad_norm": 0.7285251021385193, - "learning_rate": 1.6865588729140242e-05, - "loss": 0.3172, - "step": 5816 - }, - { - "epoch": 0.5479852099573727, - "grad_norm": 0.671876072883606, - "learning_rate": 1.686449075564801e-05, - "loss": 0.2954, - "step": 5817 - }, - { - "epoch": 0.5480794140505405, - "grad_norm": 0.8374348878860474, - "learning_rate": 1.686339262563477e-05, - "loss": 0.3504, - "step": 5818 - }, - { - "epoch": 0.5481736181437084, - "grad_norm": 0.7035629153251648, - "learning_rate": 1.686229433912556e-05, - "loss": 0.3206, - "step": 5819 - }, - { - "epoch": 0.5482678222368762, - "grad_norm": 0.8646016716957092, - "learning_rate": 1.6861195896145436e-05, - "loss": 0.3803, - "step": 5820 - }, - { - "epoch": 0.548362026330044, - "grad_norm": 0.7874694466590881, - "learning_rate": 1.686009729671943e-05, - "loss": 0.3362, - "step": 5821 - }, - { - "epoch": 0.5484562304232119, - "grad_norm": 0.7751681208610535, - "learning_rate": 1.6858998540872594e-05, - "loss": 0.324, - "step": 5822 - }, - { - "epoch": 0.5485504345163797, - "grad_norm": 0.7489470839500427, - "learning_rate": 1.6857899628629985e-05, - "loss": 0.3287, - "step": 5823 - }, - { - "epoch": 0.5486446386095476, - "grad_norm": 0.7112763524055481, - "learning_rate": 1.6856800560016657e-05, - "loss": 0.3521, - "step": 5824 - }, - { - "epoch": 0.5487388427027154, - "grad_norm": 0.8134369850158691, - "learning_rate": 1.6855701335057677e-05, - "loss": 0.3763, - "step": 5825 - }, - { - "epoch": 0.5488330467958833, - "grad_norm": 0.6973388195037842, - "learning_rate": 1.68546019537781e-05, - "loss": 0.332, - "step": 5826 - }, - { - "epoch": 0.5489272508890511, - "grad_norm": 0.7977001667022705, - "learning_rate": 1.6853502416203e-05, - "loss": 0.3845, - "step": 5827 - }, - { - "epoch": 0.549021454982219, - "grad_norm": 0.8567266464233398, - "learning_rate": 1.6852402722357443e-05, - "loss": 0.3562, - "step": 5828 - }, - { - "epoch": 0.5491156590753868, - "grad_norm": 0.7990422248840332, - "learning_rate": 1.6851302872266507e-05, - "loss": 0.318, - "step": 5829 - }, - { - "epoch": 0.5492098631685547, - "grad_norm": 0.701423168182373, - "learning_rate": 1.6850202865955272e-05, - "loss": 0.3355, - "step": 5830 - }, - { - "epoch": 0.5493040672617225, - "grad_norm": 0.7400321960449219, - "learning_rate": 1.6849102703448818e-05, - "loss": 0.3452, - "step": 5831 - }, - { - "epoch": 0.5493982713548904, - "grad_norm": 0.7852572202682495, - "learning_rate": 1.6848002384772225e-05, - "loss": 0.3286, - "step": 5832 - }, - { - "epoch": 0.5494924754480582, - "grad_norm": 0.6592714786529541, - "learning_rate": 1.6846901909950594e-05, - "loss": 0.2842, - "step": 5833 - }, - { - "epoch": 0.5495866795412261, - "grad_norm": 0.7866077423095703, - "learning_rate": 1.6845801279009e-05, - "loss": 0.3133, - "step": 5834 - }, - { - "epoch": 0.5496808836343939, - "grad_norm": 0.730317234992981, - "learning_rate": 1.684470049197256e-05, - "loss": 0.3178, - "step": 5835 - }, - { - "epoch": 0.5497750877275618, - "grad_norm": 1.0602613687515259, - "learning_rate": 1.684359954886636e-05, - "loss": 0.3482, - "step": 5836 - }, - { - "epoch": 0.5498692918207296, - "grad_norm": 0.8464245796203613, - "learning_rate": 1.6842498449715506e-05, - "loss": 0.3677, - "step": 5837 - }, - { - "epoch": 0.5499634959138975, - "grad_norm": 0.8427851796150208, - "learning_rate": 1.6841397194545104e-05, - "loss": 0.368, - "step": 5838 - }, - { - "epoch": 0.5500577000070653, - "grad_norm": 0.7537657618522644, - "learning_rate": 1.6840295783380266e-05, - "loss": 0.3785, - "step": 5839 - }, - { - "epoch": 0.5501519041002332, - "grad_norm": 1.0740464925765991, - "learning_rate": 1.683919421624611e-05, - "loss": 0.3479, - "step": 5840 - }, - { - "epoch": 0.550246108193401, - "grad_norm": 0.6912543773651123, - "learning_rate": 1.683809249316774e-05, - "loss": 0.317, - "step": 5841 - }, - { - "epoch": 0.5503403122865689, - "grad_norm": 0.8974886536598206, - "learning_rate": 1.6836990614170296e-05, - "loss": 0.3481, - "step": 5842 - }, - { - "epoch": 0.5504345163797367, - "grad_norm": 0.6748872995376587, - "learning_rate": 1.6835888579278887e-05, - "loss": 0.2973, - "step": 5843 - }, - { - "epoch": 0.5505287204729046, - "grad_norm": 0.8794151544570923, - "learning_rate": 1.6834786388518646e-05, - "loss": 0.3499, - "step": 5844 - }, - { - "epoch": 0.5506229245660724, - "grad_norm": 0.8693121075630188, - "learning_rate": 1.6833684041914704e-05, - "loss": 0.3676, - "step": 5845 - }, - { - "epoch": 0.5507171286592403, - "grad_norm": 0.7209320664405823, - "learning_rate": 1.6832581539492198e-05, - "loss": 0.3318, - "step": 5846 - }, - { - "epoch": 0.5508113327524081, - "grad_norm": 0.6598643064498901, - "learning_rate": 1.683147888127627e-05, - "loss": 0.3241, - "step": 5847 - }, - { - "epoch": 0.550905536845576, - "grad_norm": 1.2043993473052979, - "learning_rate": 1.6830376067292053e-05, - "loss": 0.3987, - "step": 5848 - }, - { - "epoch": 0.5509997409387438, - "grad_norm": 0.7692755460739136, - "learning_rate": 1.6829273097564702e-05, - "loss": 0.3262, - "step": 5849 - }, - { - "epoch": 0.5510939450319117, - "grad_norm": 0.7148430943489075, - "learning_rate": 1.6828169972119362e-05, - "loss": 0.3101, - "step": 5850 - }, - { - "epoch": 0.5511881491250795, - "grad_norm": 0.7019156217575073, - "learning_rate": 1.6827066690981188e-05, - "loss": 0.3423, - "step": 5851 - }, - { - "epoch": 0.5512823532182474, - "grad_norm": 0.8840952515602112, - "learning_rate": 1.6825963254175333e-05, - "loss": 0.3792, - "step": 5852 - }, - { - "epoch": 0.5513765573114152, - "grad_norm": 0.8111600875854492, - "learning_rate": 1.682485966172696e-05, - "loss": 0.3665, - "step": 5853 - }, - { - "epoch": 0.5514707614045831, - "grad_norm": 0.6971681714057922, - "learning_rate": 1.682375591366123e-05, - "loss": 0.3451, - "step": 5854 - }, - { - "epoch": 0.5515649654977509, - "grad_norm": 0.7528238892555237, - "learning_rate": 1.6822652010003316e-05, - "loss": 0.3123, - "step": 5855 - }, - { - "epoch": 0.5516591695909188, - "grad_norm": 0.7064241170883179, - "learning_rate": 1.682154795077838e-05, - "loss": 0.3132, - "step": 5856 - }, - { - "epoch": 0.5517533736840866, - "grad_norm": 0.8221530914306641, - "learning_rate": 1.6820443736011604e-05, - "loss": 0.3341, - "step": 5857 - }, - { - "epoch": 0.5518475777772545, - "grad_norm": 0.6869242787361145, - "learning_rate": 1.6819339365728162e-05, - "loss": 0.3087, - "step": 5858 - }, - { - "epoch": 0.5519417818704223, - "grad_norm": 0.6711872816085815, - "learning_rate": 1.6818234839953237e-05, - "loss": 0.301, - "step": 5859 - }, - { - "epoch": 0.5520359859635902, - "grad_norm": 1.7455143928527832, - "learning_rate": 1.6817130158712013e-05, - "loss": 0.3433, - "step": 5860 - }, - { - "epoch": 0.552130190056758, - "grad_norm": 0.6875800490379333, - "learning_rate": 1.6816025322029676e-05, - "loss": 0.3371, - "step": 5861 - }, - { - "epoch": 0.5522243941499259, - "grad_norm": 0.7491611242294312, - "learning_rate": 1.6814920329931427e-05, - "loss": 0.3523, - "step": 5862 - }, - { - "epoch": 0.5523185982430937, - "grad_norm": 0.7028124332427979, - "learning_rate": 1.681381518244245e-05, - "loss": 0.3379, - "step": 5863 - }, - { - "epoch": 0.5524128023362616, - "grad_norm": 0.718915581703186, - "learning_rate": 1.6812709879587944e-05, - "loss": 0.3384, - "step": 5864 - }, - { - "epoch": 0.5525070064294294, - "grad_norm": 0.8772738575935364, - "learning_rate": 1.6811604421393126e-05, - "loss": 0.317, - "step": 5865 - }, - { - "epoch": 0.5526012105225973, - "grad_norm": 0.7316091656684875, - "learning_rate": 1.6810498807883185e-05, - "loss": 0.3346, - "step": 5866 - }, - { - "epoch": 0.5526954146157651, - "grad_norm": 0.7291149497032166, - "learning_rate": 1.6809393039083347e-05, - "loss": 0.2907, - "step": 5867 - }, - { - "epoch": 0.552789618708933, - "grad_norm": 1.0712668895721436, - "learning_rate": 1.680828711501881e-05, - "loss": 0.3861, - "step": 5868 - }, - { - "epoch": 0.5528838228021008, - "grad_norm": 0.7991876006126404, - "learning_rate": 1.68071810357148e-05, - "loss": 0.3588, - "step": 5869 - }, - { - "epoch": 0.5529780268952686, - "grad_norm": 0.7969545125961304, - "learning_rate": 1.6806074801196536e-05, - "loss": 0.3586, - "step": 5870 - }, - { - "epoch": 0.5530722309884365, - "grad_norm": 0.7247651815414429, - "learning_rate": 1.6804968411489238e-05, - "loss": 0.3448, - "step": 5871 - }, - { - "epoch": 0.5531664350816043, - "grad_norm": 0.7463715672492981, - "learning_rate": 1.680386186661814e-05, - "loss": 0.2937, - "step": 5872 - }, - { - "epoch": 0.5532606391747722, - "grad_norm": 0.9406419992446899, - "learning_rate": 1.6802755166608465e-05, - "loss": 0.3404, - "step": 5873 - }, - { - "epoch": 0.55335484326794, - "grad_norm": 0.7245320677757263, - "learning_rate": 1.6801648311485453e-05, - "loss": 0.2765, - "step": 5874 - }, - { - "epoch": 0.5534490473611079, - "grad_norm": 0.8365300893783569, - "learning_rate": 1.6800541301274344e-05, - "loss": 0.3799, - "step": 5875 - }, - { - "epoch": 0.5535432514542757, - "grad_norm": 0.7576985359191895, - "learning_rate": 1.679943413600037e-05, - "loss": 0.3251, - "step": 5876 - }, - { - "epoch": 0.5536374555474436, - "grad_norm": 0.8103680610656738, - "learning_rate": 1.6798326815688787e-05, - "loss": 0.3515, - "step": 5877 - }, - { - "epoch": 0.5537316596406114, - "grad_norm": 0.7867361307144165, - "learning_rate": 1.6797219340364836e-05, - "loss": 0.3497, - "step": 5878 - }, - { - "epoch": 0.5538258637337792, - "grad_norm": 0.8220361471176147, - "learning_rate": 1.679611171005377e-05, - "loss": 0.3254, - "step": 5879 - }, - { - "epoch": 0.553920067826947, - "grad_norm": 0.7523766756057739, - "learning_rate": 1.6795003924780854e-05, - "loss": 0.3603, - "step": 5880 - }, - { - "epoch": 0.5540142719201149, - "grad_norm": 0.8207775950431824, - "learning_rate": 1.6793895984571333e-05, - "loss": 0.3432, - "step": 5881 - }, - { - "epoch": 0.5541084760132827, - "grad_norm": 0.7494213581085205, - "learning_rate": 1.679278788945048e-05, - "loss": 0.3571, - "step": 5882 - }, - { - "epoch": 0.5542026801064506, - "grad_norm": 0.7716641426086426, - "learning_rate": 1.6791679639443556e-05, - "loss": 0.3705, - "step": 5883 - }, - { - "epoch": 0.5542968841996184, - "grad_norm": 0.6826990842819214, - "learning_rate": 1.6790571234575833e-05, - "loss": 0.3504, - "step": 5884 - }, - { - "epoch": 0.5543910882927863, - "grad_norm": 0.7369707822799683, - "learning_rate": 1.6789462674872584e-05, - "loss": 0.3452, - "step": 5885 - }, - { - "epoch": 0.5544852923859541, - "grad_norm": 0.8421262502670288, - "learning_rate": 1.6788353960359086e-05, - "loss": 0.3487, - "step": 5886 - }, - { - "epoch": 0.554579496479122, - "grad_norm": 0.8324693441390991, - "learning_rate": 1.678724509106062e-05, - "loss": 0.3402, - "step": 5887 - }, - { - "epoch": 0.5546737005722898, - "grad_norm": 0.9260829091072083, - "learning_rate": 1.678613606700247e-05, - "loss": 0.3887, - "step": 5888 - }, - { - "epoch": 0.5547679046654577, - "grad_norm": 0.756175696849823, - "learning_rate": 1.678502688820992e-05, - "loss": 0.3463, - "step": 5889 - }, - { - "epoch": 0.5548621087586255, - "grad_norm": 0.7201794385910034, - "learning_rate": 1.6783917554708264e-05, - "loss": 0.3544, - "step": 5890 - }, - { - "epoch": 0.5549563128517934, - "grad_norm": 0.7131332159042358, - "learning_rate": 1.6782808066522796e-05, - "loss": 0.3211, - "step": 5891 - }, - { - "epoch": 0.5550505169449612, - "grad_norm": 0.8617600798606873, - "learning_rate": 1.678169842367882e-05, - "loss": 0.3358, - "step": 5892 - }, - { - "epoch": 0.5551447210381291, - "grad_norm": 0.7968863844871521, - "learning_rate": 1.6780588626201626e-05, - "loss": 0.3499, - "step": 5893 - }, - { - "epoch": 0.5552389251312969, - "grad_norm": 0.856134831905365, - "learning_rate": 1.677947867411653e-05, - "loss": 0.2971, - "step": 5894 - }, - { - "epoch": 0.5553331292244648, - "grad_norm": 1.0501809120178223, - "learning_rate": 1.677836856744883e-05, - "loss": 0.3298, - "step": 5895 - }, - { - "epoch": 0.5554273333176326, - "grad_norm": 0.7026547789573669, - "learning_rate": 1.6777258306223845e-05, - "loss": 0.3443, - "step": 5896 - }, - { - "epoch": 0.5555215374108005, - "grad_norm": 0.7354648113250732, - "learning_rate": 1.677614789046689e-05, - "loss": 0.3458, - "step": 5897 - }, - { - "epoch": 0.5556157415039683, - "grad_norm": 0.8148699402809143, - "learning_rate": 1.6775037320203285e-05, - "loss": 0.3603, - "step": 5898 - }, - { - "epoch": 0.5557099455971362, - "grad_norm": 0.810670793056488, - "learning_rate": 1.677392659545835e-05, - "loss": 0.3365, - "step": 5899 - }, - { - "epoch": 0.555804149690304, - "grad_norm": 0.7497707009315491, - "learning_rate": 1.6772815716257414e-05, - "loss": 0.3075, - "step": 5900 - }, - { - "epoch": 0.5558983537834719, - "grad_norm": 1.0751029253005981, - "learning_rate": 1.6771704682625802e-05, - "loss": 0.3328, - "step": 5901 - }, - { - "epoch": 0.5559925578766397, - "grad_norm": 0.7456806898117065, - "learning_rate": 1.6770593494588853e-05, - "loss": 0.3326, - "step": 5902 - }, - { - "epoch": 0.5560867619698076, - "grad_norm": 0.9161959886550903, - "learning_rate": 1.6769482152171902e-05, - "loss": 0.3603, - "step": 5903 - }, - { - "epoch": 0.5561809660629754, - "grad_norm": 0.7455811500549316, - "learning_rate": 1.6768370655400286e-05, - "loss": 0.3131, - "step": 5904 - }, - { - "epoch": 0.5562751701561433, - "grad_norm": 0.8076489567756653, - "learning_rate": 1.6767259004299355e-05, - "loss": 0.3364, - "step": 5905 - }, - { - "epoch": 0.5563693742493111, - "grad_norm": 0.8564948439598083, - "learning_rate": 1.676614719889445e-05, - "loss": 0.3658, - "step": 5906 - }, - { - "epoch": 0.556463578342479, - "grad_norm": 1.1845258474349976, - "learning_rate": 1.6765035239210926e-05, - "loss": 0.3103, - "step": 5907 - }, - { - "epoch": 0.5565577824356468, - "grad_norm": 0.9748694896697998, - "learning_rate": 1.6763923125274137e-05, - "loss": 0.3656, - "step": 5908 - }, - { - "epoch": 0.5566519865288146, - "grad_norm": 0.804319441318512, - "learning_rate": 1.6762810857109436e-05, - "loss": 0.3332, - "step": 5909 - }, - { - "epoch": 0.5567461906219825, - "grad_norm": 0.6985915899276733, - "learning_rate": 1.6761698434742195e-05, - "loss": 0.3004, - "step": 5910 - }, - { - "epoch": 0.5568403947151503, - "grad_norm": 1.3096137046813965, - "learning_rate": 1.676058585819777e-05, - "loss": 0.3829, - "step": 5911 - }, - { - "epoch": 0.5569345988083182, - "grad_norm": 0.8578698635101318, - "learning_rate": 1.6759473127501532e-05, - "loss": 0.3393, - "step": 5912 - }, - { - "epoch": 0.557028802901486, - "grad_norm": 0.7410597801208496, - "learning_rate": 1.6758360242678852e-05, - "loss": 0.309, - "step": 5913 - }, - { - "epoch": 0.5571230069946539, - "grad_norm": 0.7900176048278809, - "learning_rate": 1.675724720375511e-05, - "loss": 0.4037, - "step": 5914 - }, - { - "epoch": 0.5572172110878217, - "grad_norm": 0.7226098775863647, - "learning_rate": 1.6756134010755675e-05, - "loss": 0.2961, - "step": 5915 - }, - { - "epoch": 0.5573114151809896, - "grad_norm": 0.7388399839401245, - "learning_rate": 1.675502066370594e-05, - "loss": 0.2991, - "step": 5916 - }, - { - "epoch": 0.5574056192741574, - "grad_norm": 0.6885120868682861, - "learning_rate": 1.6753907162631286e-05, - "loss": 0.2969, - "step": 5917 - }, - { - "epoch": 0.5574998233673253, - "grad_norm": 1.1873542070388794, - "learning_rate": 1.67527935075571e-05, - "loss": 0.3435, - "step": 5918 - }, - { - "epoch": 0.5575940274604931, - "grad_norm": 0.7631744146347046, - "learning_rate": 1.6751679698508786e-05, - "loss": 0.331, - "step": 5919 - }, - { - "epoch": 0.557688231553661, - "grad_norm": 0.7792918086051941, - "learning_rate": 1.675056573551173e-05, - "loss": 0.3342, - "step": 5920 - }, - { - "epoch": 0.5577824356468288, - "grad_norm": 0.8030377626419067, - "learning_rate": 1.6749451618591335e-05, - "loss": 0.3352, - "step": 5921 - }, - { - "epoch": 0.5578766397399967, - "grad_norm": 0.81960129737854, - "learning_rate": 1.6748337347773003e-05, - "loss": 0.384, - "step": 5922 - }, - { - "epoch": 0.5579708438331645, - "grad_norm": 0.8801048994064331, - "learning_rate": 1.674722292308215e-05, - "loss": 0.3506, - "step": 5923 - }, - { - "epoch": 0.5580650479263324, - "grad_norm": 0.7337673306465149, - "learning_rate": 1.674610834454417e-05, - "loss": 0.352, - "step": 5924 - }, - { - "epoch": 0.5581592520195002, - "grad_norm": 0.7766098976135254, - "learning_rate": 1.674499361218449e-05, - "loss": 0.3359, - "step": 5925 - }, - { - "epoch": 0.5582534561126681, - "grad_norm": 0.95281982421875, - "learning_rate": 1.6743878726028525e-05, - "loss": 0.3893, - "step": 5926 - }, - { - "epoch": 0.5583476602058359, - "grad_norm": 0.7152496576309204, - "learning_rate": 1.67427636861017e-05, - "loss": 0.3264, - "step": 5927 - }, - { - "epoch": 0.5584418642990038, - "grad_norm": 0.7302485108375549, - "learning_rate": 1.674164849242943e-05, - "loss": 0.3053, - "step": 5928 - }, - { - "epoch": 0.5585360683921716, - "grad_norm": 0.7903436422348022, - "learning_rate": 1.6740533145037147e-05, - "loss": 0.3568, - "step": 5929 - }, - { - "epoch": 0.5586302724853395, - "grad_norm": 0.7666296362876892, - "learning_rate": 1.6739417643950287e-05, - "loss": 0.3069, - "step": 5930 - }, - { - "epoch": 0.5587244765785073, - "grad_norm": 0.7227993011474609, - "learning_rate": 1.673830198919428e-05, - "loss": 0.322, - "step": 5931 - }, - { - "epoch": 0.5588186806716752, - "grad_norm": 0.7785037159919739, - "learning_rate": 1.6737186180794573e-05, - "loss": 0.3548, - "step": 5932 - }, - { - "epoch": 0.558912884764843, - "grad_norm": 0.8512129187583923, - "learning_rate": 1.6736070218776594e-05, - "loss": 0.3441, - "step": 5933 - }, - { - "epoch": 0.5590070888580109, - "grad_norm": 0.6713905334472656, - "learning_rate": 1.67349541031658e-05, - "loss": 0.3327, - "step": 5934 - }, - { - "epoch": 0.5591012929511787, - "grad_norm": 0.7852702140808105, - "learning_rate": 1.6733837833987634e-05, - "loss": 0.3128, - "step": 5935 - }, - { - "epoch": 0.5591954970443466, - "grad_norm": 1.1700233221054077, - "learning_rate": 1.673272141126755e-05, - "loss": 0.3093, - "step": 5936 - }, - { - "epoch": 0.5592897011375144, - "grad_norm": 0.7732242941856384, - "learning_rate": 1.673160483503101e-05, - "loss": 0.3331, - "step": 5937 - }, - { - "epoch": 0.5593839052306823, - "grad_norm": 0.7639914751052856, - "learning_rate": 1.6730488105303467e-05, - "loss": 0.3335, - "step": 5938 - }, - { - "epoch": 0.5594781093238501, - "grad_norm": 0.8488541841506958, - "learning_rate": 1.672937122211039e-05, - "loss": 0.3783, - "step": 5939 - }, - { - "epoch": 0.559572313417018, - "grad_norm": 0.7996029257774353, - "learning_rate": 1.672825418547724e-05, - "loss": 0.3387, - "step": 5940 - }, - { - "epoch": 0.5596665175101858, - "grad_norm": 0.847177267074585, - "learning_rate": 1.6727136995429484e-05, - "loss": 0.3859, - "step": 5941 - }, - { - "epoch": 0.5597607216033537, - "grad_norm": 0.8273327946662903, - "learning_rate": 1.6726019651992607e-05, - "loss": 0.3483, - "step": 5942 - }, - { - "epoch": 0.5598549256965215, - "grad_norm": 0.7557797431945801, - "learning_rate": 1.6724902155192077e-05, - "loss": 0.3221, - "step": 5943 - }, - { - "epoch": 0.5599491297896894, - "grad_norm": 0.7209107875823975, - "learning_rate": 1.6723784505053377e-05, - "loss": 0.3196, - "step": 5944 - }, - { - "epoch": 0.5600433338828572, - "grad_norm": 0.7248364686965942, - "learning_rate": 1.6722666701601997e-05, - "loss": 0.348, - "step": 5945 - }, - { - "epoch": 0.5601375379760251, - "grad_norm": 0.7239592671394348, - "learning_rate": 1.6721548744863413e-05, - "loss": 0.2932, - "step": 5946 - }, - { - "epoch": 0.5602317420691929, - "grad_norm": 0.8387351036071777, - "learning_rate": 1.6720430634863126e-05, - "loss": 0.3113, - "step": 5947 - }, - { - "epoch": 0.5603259461623608, - "grad_norm": 0.6595821976661682, - "learning_rate": 1.6719312371626623e-05, - "loss": 0.2771, - "step": 5948 - }, - { - "epoch": 0.5604201502555286, - "grad_norm": 0.8561384677886963, - "learning_rate": 1.671819395517941e-05, - "loss": 0.3539, - "step": 5949 - }, - { - "epoch": 0.5605143543486965, - "grad_norm": 0.7971121072769165, - "learning_rate": 1.6717075385546986e-05, - "loss": 0.3421, - "step": 5950 - }, - { - "epoch": 0.5606085584418643, - "grad_norm": 0.8530870079994202, - "learning_rate": 1.671595666275485e-05, - "loss": 0.3475, - "step": 5951 - }, - { - "epoch": 0.5607027625350322, - "grad_norm": 0.6426548361778259, - "learning_rate": 1.6714837786828525e-05, - "loss": 0.2956, - "step": 5952 - }, - { - "epoch": 0.5607969666282, - "grad_norm": 0.7764912843704224, - "learning_rate": 1.6713718757793508e-05, - "loss": 0.3498, - "step": 5953 - }, - { - "epoch": 0.5608911707213678, - "grad_norm": 0.8872030377388, - "learning_rate": 1.6712599575675318e-05, - "loss": 0.3307, - "step": 5954 - }, - { - "epoch": 0.5609853748145357, - "grad_norm": 0.6706919074058533, - "learning_rate": 1.6711480240499477e-05, - "loss": 0.2947, - "step": 5955 - }, - { - "epoch": 0.5610795789077035, - "grad_norm": 0.9219316840171814, - "learning_rate": 1.671036075229151e-05, - "loss": 0.38, - "step": 5956 - }, - { - "epoch": 0.5611737830008714, - "grad_norm": 0.7901992797851562, - "learning_rate": 1.6709241111076937e-05, - "loss": 0.299, - "step": 5957 - }, - { - "epoch": 0.5612679870940392, - "grad_norm": 0.8954774737358093, - "learning_rate": 1.6708121316881295e-05, - "loss": 0.3843, - "step": 5958 - }, - { - "epoch": 0.5613621911872071, - "grad_norm": 0.7102924585342407, - "learning_rate": 1.670700136973011e-05, - "loss": 0.3078, - "step": 5959 - }, - { - "epoch": 0.5614563952803749, - "grad_norm": 0.658418595790863, - "learning_rate": 1.670588126964892e-05, - "loss": 0.2838, - "step": 5960 - }, - { - "epoch": 0.5615505993735428, - "grad_norm": 0.8780316710472107, - "learning_rate": 1.6704761016663266e-05, - "loss": 0.3387, - "step": 5961 - }, - { - "epoch": 0.5616448034667106, - "grad_norm": 0.7748724222183228, - "learning_rate": 1.6703640610798694e-05, - "loss": 0.3662, - "step": 5962 - }, - { - "epoch": 0.5617390075598785, - "grad_norm": 0.6843838691711426, - "learning_rate": 1.670252005208075e-05, - "loss": 0.3143, - "step": 5963 - }, - { - "epoch": 0.5618332116530463, - "grad_norm": 0.9076165556907654, - "learning_rate": 1.670139934053498e-05, - "loss": 0.3444, - "step": 5964 - }, - { - "epoch": 0.5619274157462142, - "grad_norm": 0.71778404712677, - "learning_rate": 1.670027847618694e-05, - "loss": 0.301, - "step": 5965 - }, - { - "epoch": 0.562021619839382, - "grad_norm": 0.8193926215171814, - "learning_rate": 1.669915745906219e-05, - "loss": 0.3233, - "step": 5966 - }, - { - "epoch": 0.5621158239325499, - "grad_norm": 0.6978916525840759, - "learning_rate": 1.669803628918629e-05, - "loss": 0.3458, - "step": 5967 - }, - { - "epoch": 0.5622100280257177, - "grad_norm": 0.7813271880149841, - "learning_rate": 1.6696914966584805e-05, - "loss": 0.3363, - "step": 5968 - }, - { - "epoch": 0.5623042321188856, - "grad_norm": 0.7352415323257446, - "learning_rate": 1.6695793491283302e-05, - "loss": 0.3238, - "step": 5969 - }, - { - "epoch": 0.5623984362120534, - "grad_norm": 0.7068782448768616, - "learning_rate": 1.6694671863307352e-05, - "loss": 0.3334, - "step": 5970 - }, - { - "epoch": 0.5624926403052213, - "grad_norm": 0.7115263938903809, - "learning_rate": 1.6693550082682527e-05, - "loss": 0.3223, - "step": 5971 - }, - { - "epoch": 0.5625868443983891, - "grad_norm": 0.7842731475830078, - "learning_rate": 1.6692428149434413e-05, - "loss": 0.3312, - "step": 5972 - }, - { - "epoch": 0.562681048491557, - "grad_norm": 0.8012655377388, - "learning_rate": 1.6691306063588583e-05, - "loss": 0.3435, - "step": 5973 - }, - { - "epoch": 0.5627752525847248, - "grad_norm": 0.964786171913147, - "learning_rate": 1.669018382517063e-05, - "loss": 0.3674, - "step": 5974 - }, - { - "epoch": 0.5628694566778927, - "grad_norm": 0.8468067049980164, - "learning_rate": 1.6689061434206134e-05, - "loss": 0.3292, - "step": 5975 - }, - { - "epoch": 0.5629636607710605, - "grad_norm": 0.7377479672431946, - "learning_rate": 1.6687938890720698e-05, - "loss": 0.3522, - "step": 5976 - }, - { - "epoch": 0.5630578648642284, - "grad_norm": 0.783111035823822, - "learning_rate": 1.668681619473991e-05, - "loss": 0.3473, - "step": 5977 - }, - { - "epoch": 0.5631520689573962, - "grad_norm": 0.7521863579750061, - "learning_rate": 1.6685693346289372e-05, - "loss": 0.3193, - "step": 5978 - }, - { - "epoch": 0.5632462730505641, - "grad_norm": 0.7061949968338013, - "learning_rate": 1.6684570345394683e-05, - "loss": 0.2905, - "step": 5979 - }, - { - "epoch": 0.5633404771437319, - "grad_norm": 0.8568319082260132, - "learning_rate": 1.6683447192081457e-05, - "loss": 0.3315, - "step": 5980 - }, - { - "epoch": 0.5634346812368998, - "grad_norm": 0.7928352952003479, - "learning_rate": 1.6682323886375294e-05, - "loss": 0.3603, - "step": 5981 - }, - { - "epoch": 0.5635288853300676, - "grad_norm": 0.8035825490951538, - "learning_rate": 1.6681200428301816e-05, - "loss": 0.3295, - "step": 5982 - }, - { - "epoch": 0.5636230894232355, - "grad_norm": 0.851418673992157, - "learning_rate": 1.6680076817886632e-05, - "loss": 0.3602, - "step": 5983 - }, - { - "epoch": 0.5637172935164033, - "grad_norm": 0.6932213306427002, - "learning_rate": 1.667895305515537e-05, - "loss": 0.3067, - "step": 5984 - }, - { - "epoch": 0.5638114976095712, - "grad_norm": 0.7550251483917236, - "learning_rate": 1.6677829140133647e-05, - "loss": 0.3421, - "step": 5985 - }, - { - "epoch": 0.563905701702739, - "grad_norm": 0.8431921601295471, - "learning_rate": 1.667670507284709e-05, - "loss": 0.3746, - "step": 5986 - }, - { - "epoch": 0.5639999057959069, - "grad_norm": 0.7865931987762451, - "learning_rate": 1.6675580853321335e-05, - "loss": 0.3782, - "step": 5987 - }, - { - "epoch": 0.5640941098890747, - "grad_norm": 0.7720548510551453, - "learning_rate": 1.667445648158201e-05, - "loss": 0.3459, - "step": 5988 - }, - { - "epoch": 0.5641883139822426, - "grad_norm": 0.8384525179862976, - "learning_rate": 1.6673331957654755e-05, - "loss": 0.333, - "step": 5989 - }, - { - "epoch": 0.5642825180754104, - "grad_norm": 0.7852795720100403, - "learning_rate": 1.6672207281565212e-05, - "loss": 0.3709, - "step": 5990 - }, - { - "epoch": 0.5643767221685783, - "grad_norm": 0.8175713419914246, - "learning_rate": 1.6671082453339024e-05, - "loss": 0.3286, - "step": 5991 - }, - { - "epoch": 0.5644709262617461, - "grad_norm": 0.9116515517234802, - "learning_rate": 1.666995747300184e-05, - "loss": 0.3214, - "step": 5992 - }, - { - "epoch": 0.564565130354914, - "grad_norm": 0.7680133581161499, - "learning_rate": 1.666883234057931e-05, - "loss": 0.3157, - "step": 5993 - }, - { - "epoch": 0.5646593344480818, - "grad_norm": 0.8134737610816956, - "learning_rate": 1.666770705609709e-05, - "loss": 0.3543, - "step": 5994 - }, - { - "epoch": 0.5647535385412497, - "grad_norm": 0.7875015139579773, - "learning_rate": 1.6666581619580835e-05, - "loss": 0.3644, - "step": 5995 - }, - { - "epoch": 0.5648477426344175, - "grad_norm": 0.6372561454772949, - "learning_rate": 1.666545603105621e-05, - "loss": 0.3292, - "step": 5996 - }, - { - "epoch": 0.5649419467275854, - "grad_norm": 0.71250319480896, - "learning_rate": 1.666433029054888e-05, - "loss": 0.326, - "step": 5997 - }, - { - "epoch": 0.5650361508207532, - "grad_norm": 0.8104060888290405, - "learning_rate": 1.666320439808451e-05, - "loss": 0.3636, - "step": 5998 - }, - { - "epoch": 0.565130354913921, - "grad_norm": 0.7650859355926514, - "learning_rate": 1.666207835368878e-05, - "loss": 0.3158, - "step": 5999 - }, - { - "epoch": 0.5652245590070889, - "grad_norm": 0.9378272294998169, - "learning_rate": 1.6660952157387355e-05, - "loss": 0.3618, - "step": 6000 - }, - { - "epoch": 0.5653187631002567, - "grad_norm": 0.7213669419288635, - "learning_rate": 1.6659825809205924e-05, - "loss": 0.3188, - "step": 6001 - }, - { - "epoch": 0.5654129671934246, - "grad_norm": 0.8028793931007385, - "learning_rate": 1.6658699309170157e-05, - "loss": 0.35, - "step": 6002 - }, - { - "epoch": 0.5655071712865924, - "grad_norm": 0.6953859329223633, - "learning_rate": 1.6657572657305758e-05, - "loss": 0.3512, - "step": 6003 - }, - { - "epoch": 0.5656013753797603, - "grad_norm": 0.7779675126075745, - "learning_rate": 1.66564458536384e-05, - "loss": 0.3353, - "step": 6004 - }, - { - "epoch": 0.5656955794729281, - "grad_norm": 0.785291314125061, - "learning_rate": 1.6655318898193784e-05, - "loss": 0.325, - "step": 6005 - }, - { - "epoch": 0.565789783566096, - "grad_norm": 0.8910825252532959, - "learning_rate": 1.6654191790997604e-05, - "loss": 0.3489, - "step": 6006 - }, - { - "epoch": 0.5658839876592638, - "grad_norm": 1.6566963195800781, - "learning_rate": 1.6653064532075563e-05, - "loss": 0.3491, - "step": 6007 - }, - { - "epoch": 0.5659781917524317, - "grad_norm": 0.6922672986984253, - "learning_rate": 1.6651937121453357e-05, - "loss": 0.3293, - "step": 6008 - }, - { - "epoch": 0.5660723958455995, - "grad_norm": 0.6288778185844421, - "learning_rate": 1.66508095591567e-05, - "loss": 0.2707, - "step": 6009 - }, - { - "epoch": 0.5661665999387674, - "grad_norm": 0.7408380508422852, - "learning_rate": 1.66496818452113e-05, - "loss": 0.3155, - "step": 6010 - }, - { - "epoch": 0.5662608040319352, - "grad_norm": 1.2888745069503784, - "learning_rate": 1.6648553979642867e-05, - "loss": 0.3063, - "step": 6011 - }, - { - "epoch": 0.5663550081251031, - "grad_norm": 0.7629866600036621, - "learning_rate": 1.6647425962477122e-05, - "loss": 0.3297, - "step": 6012 - }, - { - "epoch": 0.5664492122182709, - "grad_norm": 0.7858827114105225, - "learning_rate": 1.6646297793739784e-05, - "loss": 0.3095, - "step": 6013 - }, - { - "epoch": 0.5665434163114388, - "grad_norm": 0.6547446250915527, - "learning_rate": 1.664516947345658e-05, - "loss": 0.3263, - "step": 6014 - }, - { - "epoch": 0.5666376204046066, - "grad_norm": 0.7657425403594971, - "learning_rate": 1.6644041001653236e-05, - "loss": 0.3195, - "step": 6015 - }, - { - "epoch": 0.5667318244977745, - "grad_norm": 0.7175026535987854, - "learning_rate": 1.6642912378355478e-05, - "loss": 0.3413, - "step": 6016 - }, - { - "epoch": 0.5668260285909422, - "grad_norm": 0.810639500617981, - "learning_rate": 1.6641783603589048e-05, - "loss": 0.3336, - "step": 6017 - }, - { - "epoch": 0.5669202326841101, - "grad_norm": 0.7549232244491577, - "learning_rate": 1.6640654677379677e-05, - "loss": 0.3141, - "step": 6018 - }, - { - "epoch": 0.5670144367772779, - "grad_norm": 0.8682640790939331, - "learning_rate": 1.663952559975311e-05, - "loss": 0.3471, - "step": 6019 - }, - { - "epoch": 0.5671086408704458, - "grad_norm": 0.8188992142677307, - "learning_rate": 1.6638396370735095e-05, - "loss": 0.3525, - "step": 6020 - }, - { - "epoch": 0.5672028449636136, - "grad_norm": 0.7149308919906616, - "learning_rate": 1.663726699035137e-05, - "loss": 0.3279, - "step": 6021 - }, - { - "epoch": 0.5672970490567815, - "grad_norm": 0.7880874872207642, - "learning_rate": 1.6636137458627696e-05, - "loss": 0.3741, - "step": 6022 - }, - { - "epoch": 0.5673912531499493, - "grad_norm": 0.7084752321243286, - "learning_rate": 1.6635007775589826e-05, - "loss": 0.2901, - "step": 6023 - }, - { - "epoch": 0.5674854572431172, - "grad_norm": 0.7272505760192871, - "learning_rate": 1.6633877941263517e-05, - "loss": 0.3339, - "step": 6024 - }, - { - "epoch": 0.567579661336285, - "grad_norm": 0.7028395533561707, - "learning_rate": 1.663274795567453e-05, - "loss": 0.3252, - "step": 6025 - }, - { - "epoch": 0.5676738654294529, - "grad_norm": 0.8396478295326233, - "learning_rate": 1.6631617818848632e-05, - "loss": 0.3106, - "step": 6026 - }, - { - "epoch": 0.5677680695226207, - "grad_norm": 0.8230246901512146, - "learning_rate": 1.6630487530811594e-05, - "loss": 0.3202, - "step": 6027 - }, - { - "epoch": 0.5678622736157886, - "grad_norm": 0.7576961517333984, - "learning_rate": 1.6629357091589184e-05, - "loss": 0.3503, - "step": 6028 - }, - { - "epoch": 0.5679564777089564, - "grad_norm": 0.7407094836235046, - "learning_rate": 1.662822650120718e-05, - "loss": 0.3276, - "step": 6029 - }, - { - "epoch": 0.5680506818021243, - "grad_norm": 0.8200357556343079, - "learning_rate": 1.6627095759691364e-05, - "loss": 0.3449, - "step": 6030 - }, - { - "epoch": 0.5681448858952921, - "grad_norm": 0.9754223823547363, - "learning_rate": 1.6625964867067514e-05, - "loss": 0.3591, - "step": 6031 - }, - { - "epoch": 0.56823908998846, - "grad_norm": 0.8169598579406738, - "learning_rate": 1.6624833823361416e-05, - "loss": 0.3135, - "step": 6032 - }, - { - "epoch": 0.5683332940816278, - "grad_norm": 0.8093365430831909, - "learning_rate": 1.662370262859886e-05, - "loss": 0.323, - "step": 6033 - }, - { - "epoch": 0.5684274981747957, - "grad_norm": 0.8016453981399536, - "learning_rate": 1.6622571282805642e-05, - "loss": 0.3414, - "step": 6034 - }, - { - "epoch": 0.5685217022679635, - "grad_norm": 0.769794762134552, - "learning_rate": 1.6621439786007557e-05, - "loss": 0.3579, - "step": 6035 - }, - { - "epoch": 0.5686159063611314, - "grad_norm": 0.8014020919799805, - "learning_rate": 1.6620308138230406e-05, - "loss": 0.3087, - "step": 6036 - }, - { - "epoch": 0.5687101104542992, - "grad_norm": 0.8015003800392151, - "learning_rate": 1.6619176339499993e-05, - "loss": 0.3561, - "step": 6037 - }, - { - "epoch": 0.568804314547467, - "grad_norm": 0.7012295722961426, - "learning_rate": 1.6618044389842116e-05, - "loss": 0.338, - "step": 6038 - }, - { - "epoch": 0.5688985186406349, - "grad_norm": 0.7357358932495117, - "learning_rate": 1.6616912289282595e-05, - "loss": 0.3359, - "step": 6039 - }, - { - "epoch": 0.5689927227338027, - "grad_norm": 1.0131534337997437, - "learning_rate": 1.661578003784724e-05, - "loss": 0.3035, - "step": 6040 - }, - { - "epoch": 0.5690869268269706, - "grad_norm": 0.752738893032074, - "learning_rate": 1.6614647635561872e-05, - "loss": 0.3494, - "step": 6041 - }, - { - "epoch": 0.5691811309201384, - "grad_norm": 0.7525772452354431, - "learning_rate": 1.661351508245231e-05, - "loss": 0.3618, - "step": 6042 - }, - { - "epoch": 0.5692753350133063, - "grad_norm": 0.8118856549263, - "learning_rate": 1.661238237854437e-05, - "loss": 0.3678, - "step": 6043 - }, - { - "epoch": 0.5693695391064741, - "grad_norm": 0.7494533658027649, - "learning_rate": 1.6611249523863886e-05, - "loss": 0.3157, - "step": 6044 - }, - { - "epoch": 0.569463743199642, - "grad_norm": 0.7700856924057007, - "learning_rate": 1.661011651843669e-05, - "loss": 0.3459, - "step": 6045 - }, - { - "epoch": 0.5695579472928098, - "grad_norm": 0.706672191619873, - "learning_rate": 1.6608983362288612e-05, - "loss": 0.3149, - "step": 6046 - }, - { - "epoch": 0.5696521513859777, - "grad_norm": 0.7745957374572754, - "learning_rate": 1.6607850055445497e-05, - "loss": 0.36, - "step": 6047 - }, - { - "epoch": 0.5697463554791455, - "grad_norm": 0.7428537011146545, - "learning_rate": 1.660671659793318e-05, - "loss": 0.3265, - "step": 6048 - }, - { - "epoch": 0.5698405595723134, - "grad_norm": 0.6797862648963928, - "learning_rate": 1.6605582989777504e-05, - "loss": 0.3167, - "step": 6049 - }, - { - "epoch": 0.5699347636654812, - "grad_norm": 0.6493890881538391, - "learning_rate": 1.6604449231004323e-05, - "loss": 0.3108, - "step": 6050 - }, - { - "epoch": 0.5700289677586491, - "grad_norm": 0.8943933844566345, - "learning_rate": 1.6603315321639486e-05, - "loss": 0.3856, - "step": 6051 - }, - { - "epoch": 0.5701231718518169, - "grad_norm": 0.7026787400245667, - "learning_rate": 1.6602181261708847e-05, - "loss": 0.3084, - "step": 6052 - }, - { - "epoch": 0.5702173759449848, - "grad_norm": 0.7042235732078552, - "learning_rate": 1.6601047051238264e-05, - "loss": 0.3246, - "step": 6053 - }, - { - "epoch": 0.5703115800381526, - "grad_norm": 0.7456470131874084, - "learning_rate": 1.65999126902536e-05, - "loss": 0.3478, - "step": 6054 - }, - { - "epoch": 0.5704057841313205, - "grad_norm": 0.8093039393424988, - "learning_rate": 1.6598778178780718e-05, - "loss": 0.3588, - "step": 6055 - }, - { - "epoch": 0.5704999882244883, - "grad_norm": 0.8204628229141235, - "learning_rate": 1.659764351684549e-05, - "loss": 0.3227, - "step": 6056 - }, - { - "epoch": 0.5705941923176562, - "grad_norm": 0.9243156909942627, - "learning_rate": 1.6596508704473787e-05, - "loss": 0.3555, - "step": 6057 - }, - { - "epoch": 0.570688396410824, - "grad_norm": 0.7073413133621216, - "learning_rate": 1.659537374169148e-05, - "loss": 0.2897, - "step": 6058 - }, - { - "epoch": 0.5707826005039919, - "grad_norm": 0.7905264496803284, - "learning_rate": 1.6594238628524456e-05, - "loss": 0.3473, - "step": 6059 - }, - { - "epoch": 0.5708768045971597, - "grad_norm": 0.7482105493545532, - "learning_rate": 1.6593103364998593e-05, - "loss": 0.3458, - "step": 6060 - }, - { - "epoch": 0.5709710086903276, - "grad_norm": 0.73758864402771, - "learning_rate": 1.6591967951139777e-05, - "loss": 0.3241, - "step": 6061 - }, - { - "epoch": 0.5710652127834954, - "grad_norm": 0.8431020379066467, - "learning_rate": 1.6590832386973895e-05, - "loss": 0.3491, - "step": 6062 - }, - { - "epoch": 0.5711594168766633, - "grad_norm": 0.6449090838432312, - "learning_rate": 1.6589696672526844e-05, - "loss": 0.2917, - "step": 6063 - }, - { - "epoch": 0.5712536209698311, - "grad_norm": 0.7195934653282166, - "learning_rate": 1.6588560807824513e-05, - "loss": 0.3207, - "step": 6064 - }, - { - "epoch": 0.571347825062999, - "grad_norm": 0.8135504722595215, - "learning_rate": 1.658742479289281e-05, - "loss": 0.3832, - "step": 6065 - }, - { - "epoch": 0.5714420291561668, - "grad_norm": 0.7827112078666687, - "learning_rate": 1.6586288627757633e-05, - "loss": 0.3414, - "step": 6066 - }, - { - "epoch": 0.5715362332493347, - "grad_norm": 0.7211251854896545, - "learning_rate": 1.658515231244489e-05, - "loss": 0.3356, - "step": 6067 - }, - { - "epoch": 0.5716304373425025, - "grad_norm": 0.7144401669502258, - "learning_rate": 1.658401584698049e-05, - "loss": 0.2805, - "step": 6068 - }, - { - "epoch": 0.5717246414356704, - "grad_norm": 0.8043273091316223, - "learning_rate": 1.658287923139035e-05, - "loss": 0.3678, - "step": 6069 - }, - { - "epoch": 0.5718188455288382, - "grad_norm": 0.8267249464988708, - "learning_rate": 1.6581742465700377e-05, - "loss": 0.3456, - "step": 6070 - }, - { - "epoch": 0.5719130496220061, - "grad_norm": 0.8214240670204163, - "learning_rate": 1.6580605549936496e-05, - "loss": 0.3052, - "step": 6071 - }, - { - "epoch": 0.5720072537151739, - "grad_norm": 0.7541815638542175, - "learning_rate": 1.6579468484124637e-05, - "loss": 0.355, - "step": 6072 - }, - { - "epoch": 0.5721014578083418, - "grad_norm": 0.7984311580657959, - "learning_rate": 1.657833126829072e-05, - "loss": 0.3192, - "step": 6073 - }, - { - "epoch": 0.5721956619015096, - "grad_norm": 0.7665155529975891, - "learning_rate": 1.6577193902460675e-05, - "loss": 0.4045, - "step": 6074 - }, - { - "epoch": 0.5722898659946775, - "grad_norm": 0.7091019153594971, - "learning_rate": 1.6576056386660435e-05, - "loss": 0.2941, - "step": 6075 - }, - { - "epoch": 0.5723840700878453, - "grad_norm": 0.7414884567260742, - "learning_rate": 1.657491872091594e-05, - "loss": 0.3314, - "step": 6076 - }, - { - "epoch": 0.5724782741810132, - "grad_norm": 0.7666023969650269, - "learning_rate": 1.6573780905253133e-05, - "loss": 0.3601, - "step": 6077 - }, - { - "epoch": 0.572572478274181, - "grad_norm": 0.848470151424408, - "learning_rate": 1.6572642939697955e-05, - "loss": 0.2971, - "step": 6078 - }, - { - "epoch": 0.5726666823673489, - "grad_norm": 0.6963145136833191, - "learning_rate": 1.657150482427635e-05, - "loss": 0.2959, - "step": 6079 - }, - { - "epoch": 0.5727608864605167, - "grad_norm": 0.7042207717895508, - "learning_rate": 1.657036655901427e-05, - "loss": 0.3387, - "step": 6080 - }, - { - "epoch": 0.5728550905536846, - "grad_norm": 0.7728890776634216, - "learning_rate": 1.6569228143937678e-05, - "loss": 0.3307, - "step": 6081 - }, - { - "epoch": 0.5729492946468524, - "grad_norm": 0.8549204468727112, - "learning_rate": 1.656808957907252e-05, - "loss": 0.3461, - "step": 6082 - }, - { - "epoch": 0.5730434987400203, - "grad_norm": 0.7708202600479126, - "learning_rate": 1.6566950864444766e-05, - "loss": 0.3259, - "step": 6083 - }, - { - "epoch": 0.5731377028331881, - "grad_norm": 0.9670395851135254, - "learning_rate": 1.6565812000080373e-05, - "loss": 0.3047, - "step": 6084 - }, - { - "epoch": 0.573231906926356, - "grad_norm": 0.6677764058113098, - "learning_rate": 1.6564672986005312e-05, - "loss": 0.2787, - "step": 6085 - }, - { - "epoch": 0.5733261110195238, - "grad_norm": 0.6614779829978943, - "learning_rate": 1.6563533822245553e-05, - "loss": 0.2757, - "step": 6086 - }, - { - "epoch": 0.5734203151126916, - "grad_norm": 0.8284443020820618, - "learning_rate": 1.6562394508827077e-05, - "loss": 0.3727, - "step": 6087 - }, - { - "epoch": 0.5735145192058595, - "grad_norm": 0.797268271446228, - "learning_rate": 1.6561255045775856e-05, - "loss": 0.3205, - "step": 6088 - }, - { - "epoch": 0.5736087232990273, - "grad_norm": 0.9774706363677979, - "learning_rate": 1.656011543311787e-05, - "loss": 0.3245, - "step": 6089 - }, - { - "epoch": 0.5737029273921952, - "grad_norm": 0.6838274598121643, - "learning_rate": 1.655897567087911e-05, - "loss": 0.2869, - "step": 6090 - }, - { - "epoch": 0.573797131485363, - "grad_norm": 0.8185810446739197, - "learning_rate": 1.6557835759085555e-05, - "loss": 0.3422, - "step": 6091 - }, - { - "epoch": 0.5738913355785309, - "grad_norm": 0.7627773880958557, - "learning_rate": 1.655669569776321e-05, - "loss": 0.3203, - "step": 6092 - }, - { - "epoch": 0.5739855396716987, - "grad_norm": 0.7528077363967896, - "learning_rate": 1.655555548693806e-05, - "loss": 0.3591, - "step": 6093 - }, - { - "epoch": 0.5740797437648666, - "grad_norm": 0.8339890837669373, - "learning_rate": 1.6554415126636104e-05, - "loss": 0.346, - "step": 6094 - }, - { - "epoch": 0.5741739478580344, - "grad_norm": 0.7653105854988098, - "learning_rate": 1.655327461688335e-05, - "loss": 0.3586, - "step": 6095 - }, - { - "epoch": 0.5742681519512023, - "grad_norm": 0.8562196493148804, - "learning_rate": 1.65521339577058e-05, - "loss": 0.364, - "step": 6096 - }, - { - "epoch": 0.5743623560443701, - "grad_norm": 0.7822995781898499, - "learning_rate": 1.6550993149129463e-05, - "loss": 0.3116, - "step": 6097 - }, - { - "epoch": 0.574456560137538, - "grad_norm": 1.4264404773712158, - "learning_rate": 1.6549852191180348e-05, - "loss": 0.3083, - "step": 6098 - }, - { - "epoch": 0.5745507642307058, - "grad_norm": 0.7861554622650146, - "learning_rate": 1.6548711083884474e-05, - "loss": 0.3046, - "step": 6099 - }, - { - "epoch": 0.5746449683238737, - "grad_norm": 0.9693179726600647, - "learning_rate": 1.6547569827267863e-05, - "loss": 0.3648, - "step": 6100 - }, - { - "epoch": 0.5747391724170415, - "grad_norm": 0.8236418962478638, - "learning_rate": 1.6546428421356527e-05, - "loss": 0.3386, - "step": 6101 - }, - { - "epoch": 0.5748333765102094, - "grad_norm": 0.7198533415794373, - "learning_rate": 1.6545286866176506e-05, - "loss": 0.2965, - "step": 6102 - }, - { - "epoch": 0.5749275806033772, - "grad_norm": 1.6166539192199707, - "learning_rate": 1.654414516175382e-05, - "loss": 0.3522, - "step": 6103 - }, - { - "epoch": 0.5750217846965451, - "grad_norm": 0.7453978061676025, - "learning_rate": 1.65430033081145e-05, - "loss": 0.3368, - "step": 6104 - }, - { - "epoch": 0.5751159887897129, - "grad_norm": 0.757904589176178, - "learning_rate": 1.654186130528459e-05, - "loss": 0.3615, - "step": 6105 - }, - { - "epoch": 0.5752101928828808, - "grad_norm": 0.7784548401832581, - "learning_rate": 1.6540719153290125e-05, - "loss": 0.3639, - "step": 6106 - }, - { - "epoch": 0.5753043969760486, - "grad_norm": 0.7553150653839111, - "learning_rate": 1.6539576852157145e-05, - "loss": 0.3607, - "step": 6107 - }, - { - "epoch": 0.5753986010692165, - "grad_norm": 0.7249464988708496, - "learning_rate": 1.65384344019117e-05, - "loss": 0.334, - "step": 6108 - }, - { - "epoch": 0.5754928051623843, - "grad_norm": 0.8580317497253418, - "learning_rate": 1.653729180257984e-05, - "loss": 0.3098, - "step": 6109 - }, - { - "epoch": 0.5755870092555522, - "grad_norm": 0.7214152812957764, - "learning_rate": 1.6536149054187615e-05, - "loss": 0.3158, - "step": 6110 - }, - { - "epoch": 0.57568121334872, - "grad_norm": 0.7895944714546204, - "learning_rate": 1.6535006156761085e-05, - "loss": 0.341, - "step": 6111 - }, - { - "epoch": 0.5757754174418879, - "grad_norm": 0.7869628071784973, - "learning_rate": 1.6533863110326305e-05, - "loss": 0.321, - "step": 6112 - }, - { - "epoch": 0.5758696215350557, - "grad_norm": 0.7153104543685913, - "learning_rate": 1.6532719914909347e-05, - "loss": 0.3335, - "step": 6113 - }, - { - "epoch": 0.5759638256282236, - "grad_norm": 0.779529333114624, - "learning_rate": 1.6531576570536267e-05, - "loss": 0.354, - "step": 6114 - }, - { - "epoch": 0.5760580297213914, - "grad_norm": 0.9581845998764038, - "learning_rate": 1.6530433077233142e-05, - "loss": 0.3407, - "step": 6115 - }, - { - "epoch": 0.5761522338145593, - "grad_norm": 0.7201777696609497, - "learning_rate": 1.6529289435026043e-05, - "loss": 0.32, - "step": 6116 - }, - { - "epoch": 0.5762464379077271, - "grad_norm": 0.7921628355979919, - "learning_rate": 1.652814564394105e-05, - "loss": 0.3061, - "step": 6117 - }, - { - "epoch": 0.576340642000895, - "grad_norm": 0.7956567406654358, - "learning_rate": 1.6527001704004237e-05, - "loss": 0.3992, - "step": 6118 - }, - { - "epoch": 0.5764348460940628, - "grad_norm": 0.8144785165786743, - "learning_rate": 1.6525857615241686e-05, - "loss": 0.349, - "step": 6119 - }, - { - "epoch": 0.5765290501872307, - "grad_norm": 0.7400908470153809, - "learning_rate": 1.6524713377679496e-05, - "loss": 0.3468, - "step": 6120 - }, - { - "epoch": 0.5766232542803985, - "grad_norm": 0.816346287727356, - "learning_rate": 1.6523568991343747e-05, - "loss": 0.3759, - "step": 6121 - }, - { - "epoch": 0.5767174583735664, - "grad_norm": 0.8564147353172302, - "learning_rate": 1.6522424456260537e-05, - "loss": 0.3723, - "step": 6122 - }, - { - "epoch": 0.5768116624667342, - "grad_norm": 0.8647595047950745, - "learning_rate": 1.652127977245596e-05, - "loss": 0.3198, - "step": 6123 - }, - { - "epoch": 0.5769058665599021, - "grad_norm": 0.9383739829063416, - "learning_rate": 1.6520134939956125e-05, - "loss": 0.3527, - "step": 6124 - }, - { - "epoch": 0.5770000706530699, - "grad_norm": 0.8213065266609192, - "learning_rate": 1.6518989958787126e-05, - "loss": 0.3493, - "step": 6125 - }, - { - "epoch": 0.5770942747462378, - "grad_norm": 0.7273536920547485, - "learning_rate": 1.6517844828975076e-05, - "loss": 0.2987, - "step": 6126 - }, - { - "epoch": 0.5771884788394056, - "grad_norm": 0.69364333152771, - "learning_rate": 1.6516699550546084e-05, - "loss": 0.2913, - "step": 6127 - }, - { - "epoch": 0.5772826829325735, - "grad_norm": 0.6990786790847778, - "learning_rate": 1.651555412352626e-05, - "loss": 0.2861, - "step": 6128 - }, - { - "epoch": 0.5773768870257413, - "grad_norm": 0.7258824706077576, - "learning_rate": 1.6514408547941728e-05, - "loss": 0.3566, - "step": 6129 - }, - { - "epoch": 0.5774710911189092, - "grad_norm": 0.8860750198364258, - "learning_rate": 1.651326282381861e-05, - "loss": 0.3253, - "step": 6130 - }, - { - "epoch": 0.577565295212077, - "grad_norm": 0.7322821021080017, - "learning_rate": 1.651211695118302e-05, - "loss": 0.3348, - "step": 6131 - }, - { - "epoch": 0.5776594993052449, - "grad_norm": 0.8280245065689087, - "learning_rate": 1.6510970930061095e-05, - "loss": 0.3294, - "step": 6132 - }, - { - "epoch": 0.5777537033984127, - "grad_norm": 0.7888951301574707, - "learning_rate": 1.650982476047896e-05, - "loss": 0.317, - "step": 6133 - }, - { - "epoch": 0.5778479074915805, - "grad_norm": 0.9588354825973511, - "learning_rate": 1.650867844246276e-05, - "loss": 0.3446, - "step": 6134 - }, - { - "epoch": 0.5779421115847484, - "grad_norm": 0.707567572593689, - "learning_rate": 1.6507531976038622e-05, - "loss": 0.3042, - "step": 6135 - }, - { - "epoch": 0.5780363156779162, - "grad_norm": 0.7950550317764282, - "learning_rate": 1.6506385361232688e-05, - "loss": 0.3341, - "step": 6136 - }, - { - "epoch": 0.5781305197710841, - "grad_norm": 0.6977543830871582, - "learning_rate": 1.6505238598071112e-05, - "loss": 0.3163, - "step": 6137 - }, - { - "epoch": 0.5782247238642519, - "grad_norm": 0.743246853351593, - "learning_rate": 1.650409168658003e-05, - "loss": 0.3252, - "step": 6138 - }, - { - "epoch": 0.5783189279574198, - "grad_norm": 0.6571413278579712, - "learning_rate": 1.65029446267856e-05, - "loss": 0.265, - "step": 6139 - }, - { - "epoch": 0.5784131320505876, - "grad_norm": 0.7845455408096313, - "learning_rate": 1.650179741871398e-05, - "loss": 0.3826, - "step": 6140 - }, - { - "epoch": 0.5785073361437555, - "grad_norm": 0.7896680235862732, - "learning_rate": 1.6500650062391318e-05, - "loss": 0.34, - "step": 6141 - }, - { - "epoch": 0.5786015402369233, - "grad_norm": 0.7778838872909546, - "learning_rate": 1.6499502557843782e-05, - "loss": 0.3443, - "step": 6142 - }, - { - "epoch": 0.5786957443300912, - "grad_norm": 0.7396849393844604, - "learning_rate": 1.649835490509754e-05, - "loss": 0.3063, - "step": 6143 - }, - { - "epoch": 0.578789948423259, - "grad_norm": 0.7858325242996216, - "learning_rate": 1.6497207104178748e-05, - "loss": 0.3322, - "step": 6144 - }, - { - "epoch": 0.5788841525164269, - "grad_norm": 0.7330732345581055, - "learning_rate": 1.649605915511359e-05, - "loss": 0.3113, - "step": 6145 - }, - { - "epoch": 0.5789783566095947, - "grad_norm": 0.8418616652488708, - "learning_rate": 1.649491105792824e-05, - "loss": 0.3616, - "step": 6146 - }, - { - "epoch": 0.5790725607027626, - "grad_norm": 0.7793329954147339, - "learning_rate": 1.6493762812648872e-05, - "loss": 0.3404, - "step": 6147 - }, - { - "epoch": 0.5791667647959304, - "grad_norm": 0.7458708882331848, - "learning_rate": 1.649261441930167e-05, - "loss": 0.3146, - "step": 6148 - }, - { - "epoch": 0.5792609688890983, - "grad_norm": 0.8060473203659058, - "learning_rate": 1.6491465877912815e-05, - "loss": 0.3224, - "step": 6149 - }, - { - "epoch": 0.5793551729822661, - "grad_norm": 0.6975293159484863, - "learning_rate": 1.64903171885085e-05, - "loss": 0.337, - "step": 6150 - }, - { - "epoch": 0.579449377075434, - "grad_norm": 0.7504389882087708, - "learning_rate": 1.6489168351114913e-05, - "loss": 0.3023, - "step": 6151 - }, - { - "epoch": 0.5795435811686018, - "grad_norm": 1.0578118562698364, - "learning_rate": 1.6488019365758256e-05, - "loss": 0.3307, - "step": 6152 - }, - { - "epoch": 0.5796377852617697, - "grad_norm": 0.7639904618263245, - "learning_rate": 1.6486870232464724e-05, - "loss": 0.3828, - "step": 6153 - }, - { - "epoch": 0.5797319893549375, - "grad_norm": 1.1117942333221436, - "learning_rate": 1.6485720951260516e-05, - "loss": 0.3385, - "step": 6154 - }, - { - "epoch": 0.5798261934481053, - "grad_norm": 0.8805582523345947, - "learning_rate": 1.648457152217184e-05, - "loss": 0.3436, - "step": 6155 - }, - { - "epoch": 0.5799203975412731, - "grad_norm": 0.7073836922645569, - "learning_rate": 1.6483421945224906e-05, - "loss": 0.3074, - "step": 6156 - }, - { - "epoch": 0.580014601634441, - "grad_norm": 0.6927123665809631, - "learning_rate": 1.6482272220445925e-05, - "loss": 0.3361, - "step": 6157 - }, - { - "epoch": 0.5801088057276088, - "grad_norm": 1.035823106765747, - "learning_rate": 1.648112234786111e-05, - "loss": 0.3312, - "step": 6158 - }, - { - "epoch": 0.5802030098207767, - "grad_norm": 0.8590176105499268, - "learning_rate": 1.6479972327496685e-05, - "loss": 0.3081, - "step": 6159 - }, - { - "epoch": 0.5802972139139445, - "grad_norm": 0.7897781133651733, - "learning_rate": 1.647882215937887e-05, - "loss": 0.3009, - "step": 6160 - }, - { - "epoch": 0.5803914180071124, - "grad_norm": 0.751808226108551, - "learning_rate": 1.6477671843533885e-05, - "loss": 0.3499, - "step": 6161 - }, - { - "epoch": 0.5804856221002802, - "grad_norm": 0.7967620491981506, - "learning_rate": 1.647652137998797e-05, - "loss": 0.3186, - "step": 6162 - }, - { - "epoch": 0.5805798261934481, - "grad_norm": 0.9034838676452637, - "learning_rate": 1.647537076876735e-05, - "loss": 0.3432, - "step": 6163 - }, - { - "epoch": 0.5806740302866159, - "grad_norm": 0.8102205991744995, - "learning_rate": 1.6474220009898258e-05, - "loss": 0.3468, - "step": 6164 - }, - { - "epoch": 0.5807682343797838, - "grad_norm": 0.834168553352356, - "learning_rate": 1.6473069103406943e-05, - "loss": 0.3292, - "step": 6165 - }, - { - "epoch": 0.5808624384729516, - "grad_norm": 0.7016925811767578, - "learning_rate": 1.647191804931964e-05, - "loss": 0.3116, - "step": 6166 - }, - { - "epoch": 0.5809566425661195, - "grad_norm": 0.7217550873756409, - "learning_rate": 1.6470766847662596e-05, - "loss": 0.308, - "step": 6167 - }, - { - "epoch": 0.5810508466592873, - "grad_norm": 0.8545812368392944, - "learning_rate": 1.6469615498462057e-05, - "loss": 0.3669, - "step": 6168 - }, - { - "epoch": 0.5811450507524552, - "grad_norm": 1.0315021276474, - "learning_rate": 1.6468464001744283e-05, - "loss": 0.3695, - "step": 6169 - }, - { - "epoch": 0.581239254845623, - "grad_norm": 0.9519716501235962, - "learning_rate": 1.6467312357535524e-05, - "loss": 0.3322, - "step": 6170 - }, - { - "epoch": 0.5813334589387908, - "grad_norm": 1.0121030807495117, - "learning_rate": 1.6466160565862043e-05, - "loss": 0.323, - "step": 6171 - }, - { - "epoch": 0.5814276630319587, - "grad_norm": 0.73393315076828, - "learning_rate": 1.6465008626750104e-05, - "loss": 0.3027, - "step": 6172 - }, - { - "epoch": 0.5815218671251265, - "grad_norm": 0.7842426300048828, - "learning_rate": 1.6463856540225965e-05, - "loss": 0.3182, - "step": 6173 - }, - { - "epoch": 0.5816160712182944, - "grad_norm": 0.8784646987915039, - "learning_rate": 1.64627043063159e-05, - "loss": 0.3543, - "step": 6174 - }, - { - "epoch": 0.5817102753114622, - "grad_norm": 0.8059666752815247, - "learning_rate": 1.6461551925046184e-05, - "loss": 0.2835, - "step": 6175 - }, - { - "epoch": 0.5818044794046301, - "grad_norm": 0.9482343792915344, - "learning_rate": 1.646039939644309e-05, - "loss": 0.3429, - "step": 6176 - }, - { - "epoch": 0.5818986834977979, - "grad_norm": 0.7454880475997925, - "learning_rate": 1.6459246720532906e-05, - "loss": 0.3151, - "step": 6177 - }, - { - "epoch": 0.5819928875909658, - "grad_norm": 0.7945920825004578, - "learning_rate": 1.6458093897341898e-05, - "loss": 0.3464, - "step": 6178 - }, - { - "epoch": 0.5820870916841336, - "grad_norm": 0.9061700701713562, - "learning_rate": 1.6456940926896365e-05, - "loss": 0.3755, - "step": 6179 - }, - { - "epoch": 0.5821812957773015, - "grad_norm": 0.7213881015777588, - "learning_rate": 1.645578780922259e-05, - "loss": 0.3295, - "step": 6180 - }, - { - "epoch": 0.5822754998704693, - "grad_norm": 0.7835339307785034, - "learning_rate": 1.6454634544346872e-05, - "loss": 0.3683, - "step": 6181 - }, - { - "epoch": 0.5823697039636372, - "grad_norm": 0.7409759759902954, - "learning_rate": 1.6453481132295507e-05, - "loss": 0.3062, - "step": 6182 - }, - { - "epoch": 0.582463908056805, - "grad_norm": 0.7188201546669006, - "learning_rate": 1.6452327573094783e-05, - "loss": 0.2995, - "step": 6183 - }, - { - "epoch": 0.5825581121499729, - "grad_norm": 0.7926046848297119, - "learning_rate": 1.645117386677102e-05, - "loss": 0.3231, - "step": 6184 - }, - { - "epoch": 0.5826523162431407, - "grad_norm": 0.7814804315567017, - "learning_rate": 1.645002001335051e-05, - "loss": 0.3117, - "step": 6185 - }, - { - "epoch": 0.5827465203363086, - "grad_norm": 0.6480827331542969, - "learning_rate": 1.644886601285957e-05, - "loss": 0.3006, - "step": 6186 - }, - { - "epoch": 0.5828407244294764, - "grad_norm": 0.7361974120140076, - "learning_rate": 1.6447711865324517e-05, - "loss": 0.3355, - "step": 6187 - }, - { - "epoch": 0.5829349285226443, - "grad_norm": 0.8072815537452698, - "learning_rate": 1.6446557570771656e-05, - "loss": 0.3641, - "step": 6188 - }, - { - "epoch": 0.5830291326158121, - "grad_norm": 0.7710379362106323, - "learning_rate": 1.6445403129227316e-05, - "loss": 0.3285, - "step": 6189 - }, - { - "epoch": 0.58312333670898, - "grad_norm": 0.7955127954483032, - "learning_rate": 1.6444248540717812e-05, - "loss": 0.3388, - "step": 6190 - }, - { - "epoch": 0.5832175408021478, - "grad_norm": 0.8016431927680969, - "learning_rate": 1.6443093805269482e-05, - "loss": 0.3347, - "step": 6191 - }, - { - "epoch": 0.5833117448953157, - "grad_norm": 0.7551401853561401, - "learning_rate": 1.6441938922908644e-05, - "loss": 0.3395, - "step": 6192 - }, - { - "epoch": 0.5834059489884835, - "grad_norm": 0.9337490797042847, - "learning_rate": 1.6440783893661637e-05, - "loss": 0.3666, - "step": 6193 - }, - { - "epoch": 0.5835001530816514, - "grad_norm": 0.7654420137405396, - "learning_rate": 1.6439628717554794e-05, - "loss": 0.3571, - "step": 6194 - }, - { - "epoch": 0.5835943571748192, - "grad_norm": 0.8538124561309814, - "learning_rate": 1.643847339461446e-05, - "loss": 0.3574, - "step": 6195 - }, - { - "epoch": 0.5836885612679871, - "grad_norm": 0.7688474059104919, - "learning_rate": 1.6437317924866977e-05, - "loss": 0.3026, - "step": 6196 - }, - { - "epoch": 0.5837827653611549, - "grad_norm": 0.8166373372077942, - "learning_rate": 1.6436162308338685e-05, - "loss": 0.3623, - "step": 6197 - }, - { - "epoch": 0.5838769694543228, - "grad_norm": 0.762607216835022, - "learning_rate": 1.643500654505594e-05, - "loss": 0.3101, - "step": 6198 - }, - { - "epoch": 0.5839711735474906, - "grad_norm": 0.7641329169273376, - "learning_rate": 1.6433850635045095e-05, - "loss": 0.3236, - "step": 6199 - }, - { - "epoch": 0.5840653776406585, - "grad_norm": 0.8614398837089539, - "learning_rate": 1.6432694578332507e-05, - "loss": 0.343, - "step": 6200 - }, - { - "epoch": 0.5841595817338263, - "grad_norm": 0.6884185075759888, - "learning_rate": 1.6431538374944532e-05, - "loss": 0.3076, - "step": 6201 - }, - { - "epoch": 0.5842537858269942, - "grad_norm": 0.7685940265655518, - "learning_rate": 1.6430382024907538e-05, - "loss": 0.3418, - "step": 6202 - }, - { - "epoch": 0.584347989920162, - "grad_norm": 0.7043229937553406, - "learning_rate": 1.6429225528247886e-05, - "loss": 0.2962, - "step": 6203 - }, - { - "epoch": 0.5844421940133299, - "grad_norm": 0.655018150806427, - "learning_rate": 1.642806888499195e-05, - "loss": 0.2674, - "step": 6204 - }, - { - "epoch": 0.5845363981064977, - "grad_norm": 0.7077436447143555, - "learning_rate": 1.64269120951661e-05, - "loss": 0.3026, - "step": 6205 - }, - { - "epoch": 0.5846306021996656, - "grad_norm": 0.6780821681022644, - "learning_rate": 1.642575515879672e-05, - "loss": 0.2931, - "step": 6206 - }, - { - "epoch": 0.5847248062928334, - "grad_norm": 0.7512264251708984, - "learning_rate": 1.642459807591018e-05, - "loss": 0.3367, - "step": 6207 - }, - { - "epoch": 0.5848190103860013, - "grad_norm": 0.7362152934074402, - "learning_rate": 1.642344084653287e-05, - "loss": 0.3548, - "step": 6208 - }, - { - "epoch": 0.5849132144791691, - "grad_norm": 0.6350093483924866, - "learning_rate": 1.6422283470691174e-05, - "loss": 0.3267, - "step": 6209 - }, - { - "epoch": 0.585007418572337, - "grad_norm": 0.8073122501373291, - "learning_rate": 1.6421125948411484e-05, - "loss": 0.3571, - "step": 6210 - }, - { - "epoch": 0.5851016226655048, - "grad_norm": 0.8017840385437012, - "learning_rate": 1.641996827972019e-05, - "loss": 0.3316, - "step": 6211 - }, - { - "epoch": 0.5851958267586727, - "grad_norm": 0.7363123297691345, - "learning_rate": 1.6418810464643694e-05, - "loss": 0.3095, - "step": 6212 - }, - { - "epoch": 0.5852900308518405, - "grad_norm": 0.7336936593055725, - "learning_rate": 1.641765250320839e-05, - "loss": 0.304, - "step": 6213 - }, - { - "epoch": 0.5853842349450084, - "grad_norm": 0.8079828023910522, - "learning_rate": 1.641649439544068e-05, - "loss": 0.3465, - "step": 6214 - }, - { - "epoch": 0.5854784390381762, - "grad_norm": 1.0163007974624634, - "learning_rate": 1.641533614136698e-05, - "loss": 0.3477, - "step": 6215 - }, - { - "epoch": 0.585572643131344, - "grad_norm": 0.7917940616607666, - "learning_rate": 1.6414177741013696e-05, - "loss": 0.3607, - "step": 6216 - }, - { - "epoch": 0.5856668472245119, - "grad_norm": 0.6637675762176514, - "learning_rate": 1.6413019194407237e-05, - "loss": 0.3309, - "step": 6217 - }, - { - "epoch": 0.5857610513176797, - "grad_norm": 0.7806336879730225, - "learning_rate": 1.641186050157402e-05, - "loss": 0.3381, - "step": 6218 - }, - { - "epoch": 0.5858552554108476, - "grad_norm": 0.7391020059585571, - "learning_rate": 1.641070166254047e-05, - "loss": 0.3097, - "step": 6219 - }, - { - "epoch": 0.5859494595040154, - "grad_norm": 0.8411687016487122, - "learning_rate": 1.6409542677333007e-05, - "loss": 0.3638, - "step": 6220 - }, - { - "epoch": 0.5860436635971833, - "grad_norm": 0.7389108538627625, - "learning_rate": 1.6408383545978058e-05, - "loss": 0.2995, - "step": 6221 - }, - { - "epoch": 0.5861378676903511, - "grad_norm": 0.7615322470664978, - "learning_rate": 1.6407224268502055e-05, - "loss": 0.3436, - "step": 6222 - }, - { - "epoch": 0.586232071783519, - "grad_norm": 0.7641750574111938, - "learning_rate": 1.6406064844931428e-05, - "loss": 0.3177, - "step": 6223 - }, - { - "epoch": 0.5863262758766868, - "grad_norm": 0.8718242049217224, - "learning_rate": 1.6404905275292616e-05, - "loss": 0.3555, - "step": 6224 - }, - { - "epoch": 0.5864204799698547, - "grad_norm": 0.7840854525566101, - "learning_rate": 1.6403745559612063e-05, - "loss": 0.3483, - "step": 6225 - }, - { - "epoch": 0.5865146840630225, - "grad_norm": 0.7070127725601196, - "learning_rate": 1.64025856979162e-05, - "loss": 0.3352, - "step": 6226 - }, - { - "epoch": 0.5866088881561904, - "grad_norm": 0.6949003338813782, - "learning_rate": 1.6401425690231486e-05, - "loss": 0.3256, - "step": 6227 - }, - { - "epoch": 0.5867030922493582, - "grad_norm": 0.8621259927749634, - "learning_rate": 1.6400265536584364e-05, - "loss": 0.3317, - "step": 6228 - }, - { - "epoch": 0.5867972963425261, - "grad_norm": 0.7552080154418945, - "learning_rate": 1.639910523700129e-05, - "loss": 0.3409, - "step": 6229 - }, - { - "epoch": 0.5868915004356939, - "grad_norm": 0.7715356945991516, - "learning_rate": 1.639794479150872e-05, - "loss": 0.359, - "step": 6230 - }, - { - "epoch": 0.5869857045288618, - "grad_norm": 0.7047427296638489, - "learning_rate": 1.6396784200133114e-05, - "loss": 0.3242, - "step": 6231 - }, - { - "epoch": 0.5870799086220296, - "grad_norm": 0.7249360084533691, - "learning_rate": 1.639562346290094e-05, - "loss": 0.3225, - "step": 6232 - }, - { - "epoch": 0.5871741127151975, - "grad_norm": 0.7523140907287598, - "learning_rate": 1.6394462579838653e-05, - "loss": 0.3249, - "step": 6233 - }, - { - "epoch": 0.5872683168083653, - "grad_norm": 0.7092387080192566, - "learning_rate": 1.639330155097273e-05, - "loss": 0.3372, - "step": 6234 - }, - { - "epoch": 0.5873625209015332, - "grad_norm": 0.8043521642684937, - "learning_rate": 1.6392140376329652e-05, - "loss": 0.2957, - "step": 6235 - }, - { - "epoch": 0.587456724994701, - "grad_norm": 0.8312609791755676, - "learning_rate": 1.639097905593588e-05, - "loss": 0.3079, - "step": 6236 - }, - { - "epoch": 0.5875509290878689, - "grad_norm": 0.7470102310180664, - "learning_rate": 1.6389817589817908e-05, - "loss": 0.3012, - "step": 6237 - }, - { - "epoch": 0.5876451331810367, - "grad_norm": 0.6481717824935913, - "learning_rate": 1.638865597800221e-05, - "loss": 0.2944, - "step": 6238 - }, - { - "epoch": 0.5877393372742046, - "grad_norm": 0.7260505557060242, - "learning_rate": 1.6387494220515276e-05, - "loss": 0.297, - "step": 6239 - }, - { - "epoch": 0.5878335413673724, - "grad_norm": 0.6912792921066284, - "learning_rate": 1.6386332317383594e-05, - "loss": 0.3345, - "step": 6240 - }, - { - "epoch": 0.5879277454605403, - "grad_norm": 0.7770125865936279, - "learning_rate": 1.6385170268633658e-05, - "loss": 0.3079, - "step": 6241 - }, - { - "epoch": 0.5880219495537081, - "grad_norm": 0.8794979453086853, - "learning_rate": 1.6384008074291965e-05, - "loss": 0.3393, - "step": 6242 - }, - { - "epoch": 0.588116153646876, - "grad_norm": 0.7339308261871338, - "learning_rate": 1.6382845734385014e-05, - "loss": 0.3141, - "step": 6243 - }, - { - "epoch": 0.5882103577400438, - "grad_norm": 0.7595008611679077, - "learning_rate": 1.6381683248939313e-05, - "loss": 0.291, - "step": 6244 - }, - { - "epoch": 0.5883045618332117, - "grad_norm": 0.6484681963920593, - "learning_rate": 1.6380520617981362e-05, - "loss": 0.2828, - "step": 6245 - }, - { - "epoch": 0.5883987659263795, - "grad_norm": 0.7001732587814331, - "learning_rate": 1.6379357841537672e-05, - "loss": 0.2951, - "step": 6246 - }, - { - "epoch": 0.5884929700195474, - "grad_norm": 0.9187387824058533, - "learning_rate": 1.6378194919634762e-05, - "loss": 0.3507, - "step": 6247 - }, - { - "epoch": 0.5885871741127152, - "grad_norm": 0.7903961539268494, - "learning_rate": 1.6377031852299138e-05, - "loss": 0.2709, - "step": 6248 - }, - { - "epoch": 0.5886813782058831, - "grad_norm": 0.7197296023368835, - "learning_rate": 1.637586863955733e-05, - "loss": 0.3396, - "step": 6249 - }, - { - "epoch": 0.5887755822990509, - "grad_norm": 0.8467602133750916, - "learning_rate": 1.6374705281435853e-05, - "loss": 0.3276, - "step": 6250 - }, - { - "epoch": 0.5888697863922188, - "grad_norm": 0.8200076222419739, - "learning_rate": 1.637354177796124e-05, - "loss": 0.3467, - "step": 6251 - }, - { - "epoch": 0.5889639904853866, - "grad_norm": 0.7501189112663269, - "learning_rate": 1.6372378129160015e-05, - "loss": 0.3241, - "step": 6252 - }, - { - "epoch": 0.5890581945785545, - "grad_norm": 0.7274275422096252, - "learning_rate": 1.6371214335058714e-05, - "loss": 0.3151, - "step": 6253 - }, - { - "epoch": 0.5891523986717223, - "grad_norm": 0.8300936818122864, - "learning_rate": 1.6370050395683873e-05, - "loss": 0.3146, - "step": 6254 - }, - { - "epoch": 0.5892466027648902, - "grad_norm": 0.8656488060951233, - "learning_rate": 1.6368886311062032e-05, - "loss": 0.3497, - "step": 6255 - }, - { - "epoch": 0.589340806858058, - "grad_norm": 0.7315640449523926, - "learning_rate": 1.6367722081219736e-05, - "loss": 0.3053, - "step": 6256 - }, - { - "epoch": 0.5894350109512259, - "grad_norm": 0.7211642861366272, - "learning_rate": 1.6366557706183527e-05, - "loss": 0.3306, - "step": 6257 - }, - { - "epoch": 0.5895292150443937, - "grad_norm": 0.6625840663909912, - "learning_rate": 1.6365393185979953e-05, - "loss": 0.2982, - "step": 6258 - }, - { - "epoch": 0.5896234191375616, - "grad_norm": 0.724456250667572, - "learning_rate": 1.6364228520635572e-05, - "loss": 0.3318, - "step": 6259 - }, - { - "epoch": 0.5897176232307294, - "grad_norm": 0.8413934111595154, - "learning_rate": 1.636306371017694e-05, - "loss": 0.3287, - "step": 6260 - }, - { - "epoch": 0.5898118273238973, - "grad_norm": 0.7873420715332031, - "learning_rate": 1.6361898754630616e-05, - "loss": 0.3801, - "step": 6261 - }, - { - "epoch": 0.5899060314170651, - "grad_norm": 0.7733575105667114, - "learning_rate": 1.636073365402316e-05, - "loss": 0.318, - "step": 6262 - }, - { - "epoch": 0.590000235510233, - "grad_norm": 0.7879183292388916, - "learning_rate": 1.6359568408381138e-05, - "loss": 0.3678, - "step": 6263 - }, - { - "epoch": 0.5900944396034008, - "grad_norm": 0.8884425759315491, - "learning_rate": 1.635840301773112e-05, - "loss": 0.3611, - "step": 6264 - }, - { - "epoch": 0.5901886436965686, - "grad_norm": 0.7099341154098511, - "learning_rate": 1.6357237482099682e-05, - "loss": 0.3, - "step": 6265 - }, - { - "epoch": 0.5902828477897365, - "grad_norm": 0.7190153002738953, - "learning_rate": 1.6356071801513395e-05, - "loss": 0.319, - "step": 6266 - }, - { - "epoch": 0.5903770518829043, - "grad_norm": 0.7086362838745117, - "learning_rate": 1.635490597599885e-05, - "loss": 0.3434, - "step": 6267 - }, - { - "epoch": 0.5904712559760722, - "grad_norm": 0.7540826201438904, - "learning_rate": 1.635374000558261e-05, - "loss": 0.3496, - "step": 6268 - }, - { - "epoch": 0.59056546006924, - "grad_norm": 0.7036347985267639, - "learning_rate": 1.6352573890291275e-05, - "loss": 0.3418, - "step": 6269 - }, - { - "epoch": 0.5906596641624079, - "grad_norm": 0.6594241857528687, - "learning_rate": 1.635140763015143e-05, - "loss": 0.2874, - "step": 6270 - }, - { - "epoch": 0.5907538682555757, - "grad_norm": 0.7100725769996643, - "learning_rate": 1.6350241225189672e-05, - "loss": 0.3356, - "step": 6271 - }, - { - "epoch": 0.5908480723487436, - "grad_norm": 0.7776488065719604, - "learning_rate": 1.634907467543259e-05, - "loss": 0.346, - "step": 6272 - }, - { - "epoch": 0.5909422764419114, - "grad_norm": 0.7593851685523987, - "learning_rate": 1.6347907980906788e-05, - "loss": 0.3152, - "step": 6273 - }, - { - "epoch": 0.5910364805350793, - "grad_norm": 0.635486364364624, - "learning_rate": 1.6346741141638863e-05, - "loss": 0.2968, - "step": 6274 - }, - { - "epoch": 0.5911306846282471, - "grad_norm": 0.6391139030456543, - "learning_rate": 1.634557415765543e-05, - "loss": 0.3186, - "step": 6275 - }, - { - "epoch": 0.591224888721415, - "grad_norm": 0.7033796310424805, - "learning_rate": 1.634440702898309e-05, - "loss": 0.3068, - "step": 6276 - }, - { - "epoch": 0.5913190928145828, - "grad_norm": 0.6473323702812195, - "learning_rate": 1.634323975564846e-05, - "loss": 0.3039, - "step": 6277 - }, - { - "epoch": 0.5914132969077507, - "grad_norm": 0.725439727306366, - "learning_rate": 1.6342072337678152e-05, - "loss": 0.3223, - "step": 6278 - }, - { - "epoch": 0.5915075010009185, - "grad_norm": 0.8260787129402161, - "learning_rate": 1.6340904775098785e-05, - "loss": 0.3401, - "step": 6279 - }, - { - "epoch": 0.5916017050940864, - "grad_norm": 0.6736300587654114, - "learning_rate": 1.6339737067936986e-05, - "loss": 0.2882, - "step": 6280 - }, - { - "epoch": 0.5916959091872542, - "grad_norm": 0.6979449987411499, - "learning_rate": 1.6338569216219375e-05, - "loss": 0.2978, - "step": 6281 - }, - { - "epoch": 0.5917901132804221, - "grad_norm": 0.8412538170814514, - "learning_rate": 1.6337401219972583e-05, - "loss": 0.3518, - "step": 6282 - }, - { - "epoch": 0.5918843173735899, - "grad_norm": 0.8842818737030029, - "learning_rate": 1.6336233079223244e-05, - "loss": 0.3375, - "step": 6283 - }, - { - "epoch": 0.5919785214667578, - "grad_norm": 0.7733251452445984, - "learning_rate": 1.633506479399799e-05, - "loss": 0.3652, - "step": 6284 - }, - { - "epoch": 0.5920727255599256, - "grad_norm": 0.7524434924125671, - "learning_rate": 1.6333896364323462e-05, - "loss": 0.3354, - "step": 6285 - }, - { - "epoch": 0.5921669296530935, - "grad_norm": 0.7618670463562012, - "learning_rate": 1.63327277902263e-05, - "loss": 0.3328, - "step": 6286 - }, - { - "epoch": 0.5922611337462613, - "grad_norm": 0.7212899327278137, - "learning_rate": 1.6331559071733153e-05, - "loss": 0.2971, - "step": 6287 - }, - { - "epoch": 0.5923553378394292, - "grad_norm": 0.7447496652603149, - "learning_rate": 1.6330390208870666e-05, - "loss": 0.2942, - "step": 6288 - }, - { - "epoch": 0.592449541932597, - "grad_norm": 0.7702828049659729, - "learning_rate": 1.632922120166549e-05, - "loss": 0.3935, - "step": 6289 - }, - { - "epoch": 0.5925437460257649, - "grad_norm": 0.7589718699455261, - "learning_rate": 1.6328052050144285e-05, - "loss": 0.3181, - "step": 6290 - }, - { - "epoch": 0.5926379501189327, - "grad_norm": 0.7950143814086914, - "learning_rate": 1.6326882754333708e-05, - "loss": 0.3401, - "step": 6291 - }, - { - "epoch": 0.5927321542121006, - "grad_norm": 1.0885834693908691, - "learning_rate": 1.6325713314260415e-05, - "loss": 0.3906, - "step": 6292 - }, - { - "epoch": 0.5928263583052684, - "grad_norm": 1.0690568685531616, - "learning_rate": 1.632454372995108e-05, - "loss": 0.2889, - "step": 6293 - }, - { - "epoch": 0.5929205623984362, - "grad_norm": 0.7887766361236572, - "learning_rate": 1.6323374001432362e-05, - "loss": 0.3554, - "step": 6294 - }, - { - "epoch": 0.593014766491604, - "grad_norm": 0.7963334918022156, - "learning_rate": 1.6322204128730944e-05, - "loss": 0.3442, - "step": 6295 - }, - { - "epoch": 0.5931089705847719, - "grad_norm": 0.7484216690063477, - "learning_rate": 1.6321034111873487e-05, - "loss": 0.3302, - "step": 6296 - }, - { - "epoch": 0.5932031746779397, - "grad_norm": 0.8004947304725647, - "learning_rate": 1.6319863950886683e-05, - "loss": 0.2989, - "step": 6297 - }, - { - "epoch": 0.5932973787711076, - "grad_norm": 0.6810011267662048, - "learning_rate": 1.6318693645797202e-05, - "loss": 0.3102, - "step": 6298 - }, - { - "epoch": 0.5933915828642754, - "grad_norm": 0.829971432685852, - "learning_rate": 1.6317523196631737e-05, - "loss": 0.3402, - "step": 6299 - }, - { - "epoch": 0.5934857869574433, - "grad_norm": 0.6834694743156433, - "learning_rate": 1.6316352603416975e-05, - "loss": 0.3081, - "step": 6300 - }, - { - "epoch": 0.5935799910506111, - "grad_norm": 0.6294878721237183, - "learning_rate": 1.63151818661796e-05, - "loss": 0.2939, - "step": 6301 - }, - { - "epoch": 0.593674195143779, - "grad_norm": 0.7290021777153015, - "learning_rate": 1.6314010984946317e-05, - "loss": 0.3125, - "step": 6302 - }, - { - "epoch": 0.5937683992369468, - "grad_norm": 0.7154480218887329, - "learning_rate": 1.631283995974381e-05, - "loss": 0.3153, - "step": 6303 - }, - { - "epoch": 0.5938626033301146, - "grad_norm": 0.7332636117935181, - "learning_rate": 1.6311668790598795e-05, - "loss": 0.3197, - "step": 6304 - }, - { - "epoch": 0.5939568074232825, - "grad_norm": 0.7051419615745544, - "learning_rate": 1.6310497477537973e-05, - "loss": 0.3229, - "step": 6305 - }, - { - "epoch": 0.5940510115164503, - "grad_norm": 0.6272082328796387, - "learning_rate": 1.6309326020588048e-05, - "loss": 0.2736, - "step": 6306 - }, - { - "epoch": 0.5941452156096182, - "grad_norm": 0.6881904006004333, - "learning_rate": 1.630815441977573e-05, - "loss": 0.3129, - "step": 6307 - }, - { - "epoch": 0.594239419702786, - "grad_norm": 0.7506492733955383, - "learning_rate": 1.6306982675127735e-05, - "loss": 0.3306, - "step": 6308 - }, - { - "epoch": 0.5943336237959539, - "grad_norm": 0.7264650464057922, - "learning_rate": 1.630581078667078e-05, - "loss": 0.3373, - "step": 6309 - }, - { - "epoch": 0.5944278278891217, - "grad_norm": 0.7115740776062012, - "learning_rate": 1.6304638754431592e-05, - "loss": 0.3135, - "step": 6310 - }, - { - "epoch": 0.5945220319822896, - "grad_norm": 0.7792097330093384, - "learning_rate": 1.6303466578436883e-05, - "loss": 0.3129, - "step": 6311 - }, - { - "epoch": 0.5946162360754574, - "grad_norm": 0.7447195053100586, - "learning_rate": 1.6302294258713395e-05, - "loss": 0.3125, - "step": 6312 - }, - { - "epoch": 0.5947104401686253, - "grad_norm": 0.703715443611145, - "learning_rate": 1.6301121795287846e-05, - "loss": 0.305, - "step": 6313 - }, - { - "epoch": 0.5948046442617931, - "grad_norm": 0.7331198453903198, - "learning_rate": 1.6299949188186977e-05, - "loss": 0.3225, - "step": 6314 - }, - { - "epoch": 0.594898848354961, - "grad_norm": 0.719076931476593, - "learning_rate": 1.6298776437437526e-05, - "loss": 0.2862, - "step": 6315 - }, - { - "epoch": 0.5949930524481288, - "grad_norm": 0.7023767232894897, - "learning_rate": 1.6297603543066226e-05, - "loss": 0.312, - "step": 6316 - }, - { - "epoch": 0.5950872565412967, - "grad_norm": 0.8601207733154297, - "learning_rate": 1.6296430505099828e-05, - "loss": 0.3679, - "step": 6317 - }, - { - "epoch": 0.5951814606344645, - "grad_norm": 0.7502859830856323, - "learning_rate": 1.6295257323565077e-05, - "loss": 0.3062, - "step": 6318 - }, - { - "epoch": 0.5952756647276324, - "grad_norm": 0.7650384306907654, - "learning_rate": 1.6294083998488727e-05, - "loss": 0.3732, - "step": 6319 - }, - { - "epoch": 0.5953698688208002, - "grad_norm": 0.6345619559288025, - "learning_rate": 1.6292910529897522e-05, - "loss": 0.2863, - "step": 6320 - }, - { - "epoch": 0.5954640729139681, - "grad_norm": 0.7029299736022949, - "learning_rate": 1.6291736917818227e-05, - "loss": 0.3419, - "step": 6321 - }, - { - "epoch": 0.5955582770071359, - "grad_norm": 0.7231665849685669, - "learning_rate": 1.62905631622776e-05, - "loss": 0.2789, - "step": 6322 - }, - { - "epoch": 0.5956524811003038, - "grad_norm": 0.7480236887931824, - "learning_rate": 1.6289389263302404e-05, - "loss": 0.3443, - "step": 6323 - }, - { - "epoch": 0.5957466851934716, - "grad_norm": 0.7741104960441589, - "learning_rate": 1.6288215220919405e-05, - "loss": 0.3667, - "step": 6324 - }, - { - "epoch": 0.5958408892866395, - "grad_norm": 0.846674919128418, - "learning_rate": 1.6287041035155375e-05, - "loss": 0.3913, - "step": 6325 - }, - { - "epoch": 0.5959350933798073, - "grad_norm": 0.9323556423187256, - "learning_rate": 1.628586670603709e-05, - "loss": 0.3027, - "step": 6326 - }, - { - "epoch": 0.5960292974729752, - "grad_norm": 0.7532998323440552, - "learning_rate": 1.6284692233591316e-05, - "loss": 0.3009, - "step": 6327 - }, - { - "epoch": 0.596123501566143, - "grad_norm": 0.7252593040466309, - "learning_rate": 1.6283517617844843e-05, - "loss": 0.3249, - "step": 6328 - }, - { - "epoch": 0.5962177056593109, - "grad_norm": 0.7854056358337402, - "learning_rate": 1.6282342858824452e-05, - "loss": 0.3284, - "step": 6329 - }, - { - "epoch": 0.5963119097524787, - "grad_norm": 1.1120035648345947, - "learning_rate": 1.6281167956556925e-05, - "loss": 0.354, - "step": 6330 - }, - { - "epoch": 0.5964061138456466, - "grad_norm": 0.7881585955619812, - "learning_rate": 1.627999291106906e-05, - "loss": 0.3359, - "step": 6331 - }, - { - "epoch": 0.5965003179388144, - "grad_norm": 0.7999001741409302, - "learning_rate": 1.6278817722387638e-05, - "loss": 0.3137, - "step": 6332 - }, - { - "epoch": 0.5965945220319823, - "grad_norm": 0.7499207854270935, - "learning_rate": 1.6277642390539465e-05, - "loss": 0.3427, - "step": 6333 - }, - { - "epoch": 0.5966887261251501, - "grad_norm": 0.8090696334838867, - "learning_rate": 1.6276466915551336e-05, - "loss": 0.3378, - "step": 6334 - }, - { - "epoch": 0.596782930218318, - "grad_norm": 0.7112345099449158, - "learning_rate": 1.6275291297450055e-05, - "loss": 0.3045, - "step": 6335 - }, - { - "epoch": 0.5968771343114858, - "grad_norm": 0.8204688429832458, - "learning_rate": 1.6274115536262425e-05, - "loss": 0.3058, - "step": 6336 - }, - { - "epoch": 0.5969713384046537, - "grad_norm": 0.8297174572944641, - "learning_rate": 1.627293963201526e-05, - "loss": 0.4045, - "step": 6337 - }, - { - "epoch": 0.5970655424978215, - "grad_norm": 0.7028781771659851, - "learning_rate": 1.6271763584735373e-05, - "loss": 0.3385, - "step": 6338 - }, - { - "epoch": 0.5971597465909894, - "grad_norm": 0.8586329221725464, - "learning_rate": 1.6270587394449573e-05, - "loss": 0.3884, - "step": 6339 - }, - { - "epoch": 0.5972539506841572, - "grad_norm": 0.789535641670227, - "learning_rate": 1.6269411061184683e-05, - "loss": 0.336, - "step": 6340 - }, - { - "epoch": 0.5973481547773251, - "grad_norm": 0.7452242970466614, - "learning_rate": 1.6268234584967527e-05, - "loss": 0.336, - "step": 6341 - }, - { - "epoch": 0.5974423588704929, - "grad_norm": 0.7482655048370361, - "learning_rate": 1.6267057965824925e-05, - "loss": 0.3672, - "step": 6342 - }, - { - "epoch": 0.5975365629636608, - "grad_norm": 0.8206696510314941, - "learning_rate": 1.626588120378371e-05, - "loss": 0.3469, - "step": 6343 - }, - { - "epoch": 0.5976307670568286, - "grad_norm": 0.7177087664604187, - "learning_rate": 1.6264704298870715e-05, - "loss": 0.3186, - "step": 6344 - }, - { - "epoch": 0.5977249711499965, - "grad_norm": 0.7452403903007507, - "learning_rate": 1.6263527251112775e-05, - "loss": 0.34, - "step": 6345 - }, - { - "epoch": 0.5978191752431643, - "grad_norm": 0.7684333920478821, - "learning_rate": 1.626235006053672e-05, - "loss": 0.3334, - "step": 6346 - }, - { - "epoch": 0.5979133793363322, - "grad_norm": 0.7974861264228821, - "learning_rate": 1.6261172727169406e-05, - "loss": 0.3471, - "step": 6347 - }, - { - "epoch": 0.5980075834295, - "grad_norm": 0.8435362577438354, - "learning_rate": 1.6259995251037672e-05, - "loss": 0.3429, - "step": 6348 - }, - { - "epoch": 0.5981017875226678, - "grad_norm": 0.7124806642532349, - "learning_rate": 1.6258817632168357e-05, - "loss": 0.3262, - "step": 6349 - }, - { - "epoch": 0.5981959916158357, - "grad_norm": 0.7446151375770569, - "learning_rate": 1.6257639870588325e-05, - "loss": 0.3336, - "step": 6350 - }, - { - "epoch": 0.5982901957090035, - "grad_norm": 0.7604463696479797, - "learning_rate": 1.6256461966324425e-05, - "loss": 0.3157, - "step": 6351 - }, - { - "epoch": 0.5983843998021714, - "grad_norm": 0.7514591813087463, - "learning_rate": 1.625528391940352e-05, - "loss": 0.3376, - "step": 6352 - }, - { - "epoch": 0.5984786038953392, - "grad_norm": 0.765207052230835, - "learning_rate": 1.6254105729852466e-05, - "loss": 0.3526, - "step": 6353 - }, - { - "epoch": 0.5985728079885071, - "grad_norm": 0.7503435015678406, - "learning_rate": 1.6252927397698125e-05, - "loss": 0.3441, - "step": 6354 - }, - { - "epoch": 0.5986670120816749, - "grad_norm": 0.7455945611000061, - "learning_rate": 1.6251748922967374e-05, - "loss": 0.3624, - "step": 6355 - }, - { - "epoch": 0.5987612161748428, - "grad_norm": 0.6535062193870544, - "learning_rate": 1.6250570305687077e-05, - "loss": 0.2649, - "step": 6356 - }, - { - "epoch": 0.5988554202680106, - "grad_norm": 0.8072831630706787, - "learning_rate": 1.624939154588411e-05, - "loss": 0.3485, - "step": 6357 - }, - { - "epoch": 0.5989496243611785, - "grad_norm": 1.0439919233322144, - "learning_rate": 1.624821264358535e-05, - "loss": 0.3534, - "step": 6358 - }, - { - "epoch": 0.5990438284543463, - "grad_norm": 0.7842357158660889, - "learning_rate": 1.6247033598817683e-05, - "loss": 0.3284, - "step": 6359 - }, - { - "epoch": 0.5991380325475142, - "grad_norm": 0.740278959274292, - "learning_rate": 1.6245854411607987e-05, - "loss": 0.3337, - "step": 6360 - }, - { - "epoch": 0.599232236640682, - "grad_norm": 0.7467502355575562, - "learning_rate": 1.624467508198315e-05, - "loss": 0.3537, - "step": 6361 - }, - { - "epoch": 0.5993264407338499, - "grad_norm": 0.688685417175293, - "learning_rate": 1.6243495609970063e-05, - "loss": 0.2656, - "step": 6362 - }, - { - "epoch": 0.5994206448270177, - "grad_norm": 0.8290200233459473, - "learning_rate": 1.624231599559562e-05, - "loss": 0.3136, - "step": 6363 - }, - { - "epoch": 0.5995148489201856, - "grad_norm": 0.748266339302063, - "learning_rate": 1.6241136238886718e-05, - "loss": 0.3122, - "step": 6364 - }, - { - "epoch": 0.5996090530133534, - "grad_norm": 0.7195231318473816, - "learning_rate": 1.623995633987026e-05, - "loss": 0.3422, - "step": 6365 - }, - { - "epoch": 0.5997032571065213, - "grad_norm": 0.8352035284042358, - "learning_rate": 1.6238776298573146e-05, - "loss": 0.3844, - "step": 6366 - }, - { - "epoch": 0.5997974611996891, - "grad_norm": 0.7455942630767822, - "learning_rate": 1.6237596115022284e-05, - "loss": 0.3229, - "step": 6367 - }, - { - "epoch": 0.599891665292857, - "grad_norm": 0.8090448379516602, - "learning_rate": 1.6236415789244586e-05, - "loss": 0.37, - "step": 6368 - }, - { - "epoch": 0.5999858693860248, - "grad_norm": 0.8044054508209229, - "learning_rate": 1.6235235321266958e-05, - "loss": 0.3058, - "step": 6369 - }, - { - "epoch": 0.6000800734791927, - "grad_norm": 0.7277327179908752, - "learning_rate": 1.6234054711116325e-05, - "loss": 0.2925, - "step": 6370 - }, - { - "epoch": 0.6001742775723605, - "grad_norm": 0.8373342752456665, - "learning_rate": 1.6232873958819603e-05, - "loss": 0.3469, - "step": 6371 - }, - { - "epoch": 0.6002684816655284, - "grad_norm": 0.7661371827125549, - "learning_rate": 1.6231693064403716e-05, - "loss": 0.3311, - "step": 6372 - }, - { - "epoch": 0.6003626857586962, - "grad_norm": 0.6980554461479187, - "learning_rate": 1.623051202789559e-05, - "loss": 0.2768, - "step": 6373 - }, - { - "epoch": 0.6004568898518641, - "grad_norm": 0.7779518961906433, - "learning_rate": 1.6229330849322155e-05, - "loss": 0.35, - "step": 6374 - }, - { - "epoch": 0.6005510939450319, - "grad_norm": 0.8295838236808777, - "learning_rate": 1.622814952871034e-05, - "loss": 0.3401, - "step": 6375 - }, - { - "epoch": 0.6006452980381998, - "grad_norm": 0.6845333576202393, - "learning_rate": 1.6226968066087084e-05, - "loss": 0.3166, - "step": 6376 - }, - { - "epoch": 0.6007395021313676, - "grad_norm": 0.7462641596794128, - "learning_rate": 1.622578646147933e-05, - "loss": 0.2987, - "step": 6377 - }, - { - "epoch": 0.6008337062245355, - "grad_norm": 0.7424726486206055, - "learning_rate": 1.6224604714914013e-05, - "loss": 0.3264, - "step": 6378 - }, - { - "epoch": 0.6009279103177033, - "grad_norm": 0.7039068937301636, - "learning_rate": 1.622342282641808e-05, - "loss": 0.3031, - "step": 6379 - }, - { - "epoch": 0.6010221144108712, - "grad_norm": 0.7586017847061157, - "learning_rate": 1.6222240796018485e-05, - "loss": 0.3026, - "step": 6380 - }, - { - "epoch": 0.601116318504039, - "grad_norm": 0.6884627938270569, - "learning_rate": 1.6221058623742175e-05, - "loss": 0.298, - "step": 6381 - }, - { - "epoch": 0.6012105225972069, - "grad_norm": 0.6970832943916321, - "learning_rate": 1.621987630961611e-05, - "loss": 0.3049, - "step": 6382 - }, - { - "epoch": 0.6013047266903747, - "grad_norm": 0.7597485780715942, - "learning_rate": 1.6218693853667243e-05, - "loss": 0.3237, - "step": 6383 - }, - { - "epoch": 0.6013989307835426, - "grad_norm": 0.6965933442115784, - "learning_rate": 1.6217511255922537e-05, - "loss": 0.2817, - "step": 6384 - }, - { - "epoch": 0.6014931348767104, - "grad_norm": 0.7090418338775635, - "learning_rate": 1.6216328516408967e-05, - "loss": 0.3308, - "step": 6385 - }, - { - "epoch": 0.6015873389698783, - "grad_norm": 0.7436476349830627, - "learning_rate": 1.6215145635153486e-05, - "loss": 0.2948, - "step": 6386 - }, - { - "epoch": 0.6016815430630461, - "grad_norm": 0.824032723903656, - "learning_rate": 1.6213962612183075e-05, - "loss": 0.3688, - "step": 6387 - }, - { - "epoch": 0.601775747156214, - "grad_norm": 0.8087217807769775, - "learning_rate": 1.6212779447524705e-05, - "loss": 0.3284, - "step": 6388 - }, - { - "epoch": 0.6018699512493818, - "grad_norm": 0.9960329532623291, - "learning_rate": 1.6211596141205354e-05, - "loss": 0.3162, - "step": 6389 - }, - { - "epoch": 0.6019641553425497, - "grad_norm": 1.5578619241714478, - "learning_rate": 1.6210412693252008e-05, - "loss": 0.3386, - "step": 6390 - }, - { - "epoch": 0.6020583594357175, - "grad_norm": 0.8485972285270691, - "learning_rate": 1.6209229103691646e-05, - "loss": 0.3746, - "step": 6391 - }, - { - "epoch": 0.6021525635288854, - "grad_norm": 0.8117714524269104, - "learning_rate": 1.620804537255126e-05, - "loss": 0.3493, - "step": 6392 - }, - { - "epoch": 0.6022467676220532, - "grad_norm": 0.8204595446586609, - "learning_rate": 1.6206861499857834e-05, - "loss": 0.3076, - "step": 6393 - }, - { - "epoch": 0.602340971715221, - "grad_norm": 0.7204444408416748, - "learning_rate": 1.620567748563837e-05, - "loss": 0.3395, - "step": 6394 - }, - { - "epoch": 0.6024351758083889, - "grad_norm": 0.6890771985054016, - "learning_rate": 1.6204493329919863e-05, - "loss": 0.3302, - "step": 6395 - }, - { - "epoch": 0.6025293799015567, - "grad_norm": 0.9710570573806763, - "learning_rate": 1.620330903272931e-05, - "loss": 0.3111, - "step": 6396 - }, - { - "epoch": 0.6026235839947246, - "grad_norm": 0.7515151500701904, - "learning_rate": 1.620212459409372e-05, - "loss": 0.3214, - "step": 6397 - }, - { - "epoch": 0.6027177880878924, - "grad_norm": 0.7595040202140808, - "learning_rate": 1.62009400140401e-05, - "loss": 0.3546, - "step": 6398 - }, - { - "epoch": 0.6028119921810603, - "grad_norm": 0.7797225117683411, - "learning_rate": 1.619975529259545e-05, - "loss": 0.293, - "step": 6399 - }, - { - "epoch": 0.6029061962742281, - "grad_norm": 0.6177393198013306, - "learning_rate": 1.6198570429786797e-05, - "loss": 0.2894, - "step": 6400 - }, - { - "epoch": 0.603000400367396, - "grad_norm": 0.9203518033027649, - "learning_rate": 1.619738542564115e-05, - "loss": 0.3339, - "step": 6401 - }, - { - "epoch": 0.6030946044605638, - "grad_norm": 0.7182189226150513, - "learning_rate": 1.6196200280185532e-05, - "loss": 0.3188, - "step": 6402 - }, - { - "epoch": 0.6031888085537317, - "grad_norm": 0.8205165863037109, - "learning_rate": 1.619501499344696e-05, - "loss": 0.394, - "step": 6403 - }, - { - "epoch": 0.6032830126468995, - "grad_norm": 0.7397306561470032, - "learning_rate": 1.619382956545247e-05, - "loss": 0.3184, - "step": 6404 - }, - { - "epoch": 0.6033772167400674, - "grad_norm": 0.7576934695243835, - "learning_rate": 1.619264399622909e-05, - "loss": 0.3423, - "step": 6405 - }, - { - "epoch": 0.6034714208332352, - "grad_norm": 0.6832389235496521, - "learning_rate": 1.6191458285803845e-05, - "loss": 0.2759, - "step": 6406 - }, - { - "epoch": 0.6035656249264031, - "grad_norm": 0.7263265252113342, - "learning_rate": 1.6190272434203776e-05, - "loss": 0.3262, - "step": 6407 - }, - { - "epoch": 0.6036598290195709, - "grad_norm": 0.7277495265007019, - "learning_rate": 1.6189086441455922e-05, - "loss": 0.3115, - "step": 6408 - }, - { - "epoch": 0.6037540331127388, - "grad_norm": 0.7232794761657715, - "learning_rate": 1.6187900307587328e-05, - "loss": 0.2919, - "step": 6409 - }, - { - "epoch": 0.6038482372059066, - "grad_norm": 0.8191846013069153, - "learning_rate": 1.6186714032625036e-05, - "loss": 0.3251, - "step": 6410 - }, - { - "epoch": 0.6039424412990745, - "grad_norm": 0.7194938659667969, - "learning_rate": 1.6185527616596096e-05, - "loss": 0.3441, - "step": 6411 - }, - { - "epoch": 0.6040366453922423, - "grad_norm": 0.7607441544532776, - "learning_rate": 1.618434105952756e-05, - "loss": 0.3488, - "step": 6412 - }, - { - "epoch": 0.6041308494854102, - "grad_norm": 0.7107058167457581, - "learning_rate": 1.6183154361446484e-05, - "loss": 0.3302, - "step": 6413 - }, - { - "epoch": 0.604225053578578, - "grad_norm": 0.7096536755561829, - "learning_rate": 1.6181967522379925e-05, - "loss": 0.2899, - "step": 6414 - }, - { - "epoch": 0.6043192576717459, - "grad_norm": 0.881682813167572, - "learning_rate": 1.6180780542354947e-05, - "loss": 0.3484, - "step": 6415 - }, - { - "epoch": 0.6044134617649137, - "grad_norm": 0.7957301735877991, - "learning_rate": 1.6179593421398614e-05, - "loss": 0.3169, - "step": 6416 - }, - { - "epoch": 0.6045076658580816, - "grad_norm": 0.737355649471283, - "learning_rate": 1.6178406159537992e-05, - "loss": 0.3238, - "step": 6417 - }, - { - "epoch": 0.6046018699512494, - "grad_norm": 0.697355329990387, - "learning_rate": 1.6177218756800158e-05, - "loss": 0.3201, - "step": 6418 - }, - { - "epoch": 0.6046960740444173, - "grad_norm": 1.2903817892074585, - "learning_rate": 1.617603121321218e-05, - "loss": 0.3525, - "step": 6419 - }, - { - "epoch": 0.6047902781375851, - "grad_norm": 0.8625286221504211, - "learning_rate": 1.617484352880114e-05, - "loss": 0.3398, - "step": 6420 - }, - { - "epoch": 0.604884482230753, - "grad_norm": 0.7835841774940491, - "learning_rate": 1.617365570359412e-05, - "loss": 0.3522, - "step": 6421 - }, - { - "epoch": 0.6049786863239208, - "grad_norm": 0.7972666025161743, - "learning_rate": 1.61724677376182e-05, - "loss": 0.3105, - "step": 6422 - }, - { - "epoch": 0.6050728904170887, - "grad_norm": 0.9149686694145203, - "learning_rate": 1.6171279630900468e-05, - "loss": 0.3529, - "step": 6423 - }, - { - "epoch": 0.6051670945102565, - "grad_norm": 0.8179230093955994, - "learning_rate": 1.6170091383468022e-05, - "loss": 0.3443, - "step": 6424 - }, - { - "epoch": 0.6052612986034244, - "grad_norm": 0.7353527545928955, - "learning_rate": 1.6168902995347944e-05, - "loss": 0.3243, - "step": 6425 - }, - { - "epoch": 0.6053555026965922, - "grad_norm": 0.7327826023101807, - "learning_rate": 1.616771446656734e-05, - "loss": 0.3194, - "step": 6426 - }, - { - "epoch": 0.6054497067897601, - "grad_norm": 0.7549339532852173, - "learning_rate": 1.616652579715331e-05, - "loss": 0.3609, - "step": 6427 - }, - { - "epoch": 0.6055439108829279, - "grad_norm": 0.7450674772262573, - "learning_rate": 1.616533698713295e-05, - "loss": 0.3143, - "step": 6428 - }, - { - "epoch": 0.6056381149760958, - "grad_norm": 0.7472980618476868, - "learning_rate": 1.6164148036533376e-05, - "loss": 0.3383, - "step": 6429 - }, - { - "epoch": 0.6057323190692636, - "grad_norm": 0.683591902256012, - "learning_rate": 1.616295894538169e-05, - "loss": 0.3279, - "step": 6430 - }, - { - "epoch": 0.6058265231624315, - "grad_norm": 0.699081301689148, - "learning_rate": 1.6161769713705015e-05, - "loss": 0.3008, - "step": 6431 - }, - { - "epoch": 0.6059207272555992, - "grad_norm": 0.8056870102882385, - "learning_rate": 1.6160580341530455e-05, - "loss": 0.3514, - "step": 6432 - }, - { - "epoch": 0.606014931348767, - "grad_norm": 0.7246406674385071, - "learning_rate": 1.6159390828885137e-05, - "loss": 0.2994, - "step": 6433 - }, - { - "epoch": 0.6061091354419349, - "grad_norm": 0.6836478114128113, - "learning_rate": 1.6158201175796186e-05, - "loss": 0.3015, - "step": 6434 - }, - { - "epoch": 0.6062033395351027, - "grad_norm": 0.7912698984146118, - "learning_rate": 1.615701138229072e-05, - "loss": 0.3346, - "step": 6435 - }, - { - "epoch": 0.6062975436282706, - "grad_norm": 0.7896254658699036, - "learning_rate": 1.6155821448395874e-05, - "loss": 0.3272, - "step": 6436 - }, - { - "epoch": 0.6063917477214384, - "grad_norm": 0.7898254990577698, - "learning_rate": 1.6154631374138777e-05, - "loss": 0.3083, - "step": 6437 - }, - { - "epoch": 0.6064859518146063, - "grad_norm": 0.7792987823486328, - "learning_rate": 1.615344115954657e-05, - "loss": 0.3989, - "step": 6438 - }, - { - "epoch": 0.6065801559077741, - "grad_norm": 0.7514324188232422, - "learning_rate": 1.6152250804646386e-05, - "loss": 0.3793, - "step": 6439 - }, - { - "epoch": 0.606674360000942, - "grad_norm": 1.2695530652999878, - "learning_rate": 1.615106030946537e-05, - "loss": 0.3289, - "step": 6440 - }, - { - "epoch": 0.6067685640941098, - "grad_norm": 0.8743518590927124, - "learning_rate": 1.6149869674030663e-05, - "loss": 0.3704, - "step": 6441 - }, - { - "epoch": 0.6068627681872777, - "grad_norm": 0.7111954092979431, - "learning_rate": 1.6148678898369422e-05, - "loss": 0.3217, - "step": 6442 - }, - { - "epoch": 0.6069569722804455, - "grad_norm": 0.6936813592910767, - "learning_rate": 1.6147487982508788e-05, - "loss": 0.3209, - "step": 6443 - }, - { - "epoch": 0.6070511763736134, - "grad_norm": 0.7675489187240601, - "learning_rate": 1.614629692647592e-05, - "loss": 0.3221, - "step": 6444 - }, - { - "epoch": 0.6071453804667812, - "grad_norm": 0.7326104044914246, - "learning_rate": 1.6145105730297986e-05, - "loss": 0.3387, - "step": 6445 - }, - { - "epoch": 0.6072395845599491, - "grad_norm": 0.7697705626487732, - "learning_rate": 1.6143914394002127e-05, - "loss": 0.3137, - "step": 6446 - }, - { - "epoch": 0.6073337886531169, - "grad_norm": 0.7528795599937439, - "learning_rate": 1.6142722917615526e-05, - "loss": 0.3744, - "step": 6447 - }, - { - "epoch": 0.6074279927462848, - "grad_norm": 0.7764901518821716, - "learning_rate": 1.6141531301165335e-05, - "loss": 0.31, - "step": 6448 - }, - { - "epoch": 0.6075221968394526, - "grad_norm": 0.8144018650054932, - "learning_rate": 1.6140339544678738e-05, - "loss": 0.3191, - "step": 6449 - }, - { - "epoch": 0.6076164009326205, - "grad_norm": 0.7953997254371643, - "learning_rate": 1.61391476481829e-05, - "loss": 0.3249, - "step": 6450 - }, - { - "epoch": 0.6077106050257883, - "grad_norm": 0.7794833779335022, - "learning_rate": 1.6137955611705003e-05, - "loss": 0.3676, - "step": 6451 - }, - { - "epoch": 0.6078048091189562, - "grad_norm": 0.7058234810829163, - "learning_rate": 1.6136763435272227e-05, - "loss": 0.3165, - "step": 6452 - }, - { - "epoch": 0.607899013212124, - "grad_norm": 0.7798442244529724, - "learning_rate": 1.613557111891175e-05, - "loss": 0.29, - "step": 6453 - }, - { - "epoch": 0.6079932173052919, - "grad_norm": 0.6657100319862366, - "learning_rate": 1.6134378662650763e-05, - "loss": 0.3063, - "step": 6454 - }, - { - "epoch": 0.6080874213984597, - "grad_norm": 0.7678936123847961, - "learning_rate": 1.613318606651646e-05, - "loss": 0.3025, - "step": 6455 - }, - { - "epoch": 0.6081816254916276, - "grad_norm": 0.7438762187957764, - "learning_rate": 1.6131993330536025e-05, - "loss": 0.338, - "step": 6456 - }, - { - "epoch": 0.6082758295847954, - "grad_norm": 0.7807517647743225, - "learning_rate": 1.6130800454736663e-05, - "loss": 0.3099, - "step": 6457 - }, - { - "epoch": 0.6083700336779633, - "grad_norm": 0.7923942804336548, - "learning_rate": 1.6129607439145564e-05, - "loss": 0.338, - "step": 6458 - }, - { - "epoch": 0.6084642377711311, - "grad_norm": 0.7815907597541809, - "learning_rate": 1.6128414283789937e-05, - "loss": 0.3054, - "step": 6459 - }, - { - "epoch": 0.608558441864299, - "grad_norm": 0.7461601495742798, - "learning_rate": 1.612722098869699e-05, - "loss": 0.3402, - "step": 6460 - }, - { - "epoch": 0.6086526459574668, - "grad_norm": 0.7422223687171936, - "learning_rate": 1.6126027553893924e-05, - "loss": 0.3018, - "step": 6461 - }, - { - "epoch": 0.6087468500506347, - "grad_norm": 1.1513397693634033, - "learning_rate": 1.612483397940796e-05, - "loss": 0.3206, - "step": 6462 - }, - { - "epoch": 0.6088410541438025, - "grad_norm": 0.7039524912834167, - "learning_rate": 1.6123640265266306e-05, - "loss": 0.3185, - "step": 6463 - }, - { - "epoch": 0.6089352582369704, - "grad_norm": 0.6909993290901184, - "learning_rate": 1.6122446411496184e-05, - "loss": 0.3022, - "step": 6464 - }, - { - "epoch": 0.6090294623301382, - "grad_norm": 0.7051947116851807, - "learning_rate": 1.612125241812482e-05, - "loss": 0.2992, - "step": 6465 - }, - { - "epoch": 0.6091236664233061, - "grad_norm": 0.675311267375946, - "learning_rate": 1.612005828517943e-05, - "loss": 0.2678, - "step": 6466 - }, - { - "epoch": 0.6092178705164739, - "grad_norm": 0.7527945637702942, - "learning_rate": 1.6118864012687246e-05, - "loss": 0.345, - "step": 6467 - }, - { - "epoch": 0.6093120746096418, - "grad_norm": 0.7540647983551025, - "learning_rate": 1.61176696006755e-05, - "loss": 0.3219, - "step": 6468 - }, - { - "epoch": 0.6094062787028096, - "grad_norm": 0.9244742393493652, - "learning_rate": 1.6116475049171424e-05, - "loss": 0.3276, - "step": 6469 - }, - { - "epoch": 0.6095004827959775, - "grad_norm": 1.0800234079360962, - "learning_rate": 1.6115280358202258e-05, - "loss": 0.3412, - "step": 6470 - }, - { - "epoch": 0.6095946868891453, - "grad_norm": 0.7098768353462219, - "learning_rate": 1.6114085527795243e-05, - "loss": 0.3596, - "step": 6471 - }, - { - "epoch": 0.6096888909823132, - "grad_norm": 0.7159668207168579, - "learning_rate": 1.6112890557977627e-05, - "loss": 0.3013, - "step": 6472 - }, - { - "epoch": 0.609783095075481, - "grad_norm": 0.7337132096290588, - "learning_rate": 1.6111695448776646e-05, - "loss": 0.2818, - "step": 6473 - }, - { - "epoch": 0.6098772991686489, - "grad_norm": 0.9161654114723206, - "learning_rate": 1.6110500200219562e-05, - "loss": 0.351, - "step": 6474 - }, - { - "epoch": 0.6099715032618167, - "grad_norm": 0.7414738535881042, - "learning_rate": 1.610930481233362e-05, - "loss": 0.3271, - "step": 6475 - }, - { - "epoch": 0.6100657073549846, - "grad_norm": 0.7354120016098022, - "learning_rate": 1.610810928514608e-05, - "loss": 0.3817, - "step": 6476 - }, - { - "epoch": 0.6101599114481524, - "grad_norm": 0.6794965267181396, - "learning_rate": 1.6106913618684204e-05, - "loss": 0.3438, - "step": 6477 - }, - { - "epoch": 0.6102541155413203, - "grad_norm": 0.9572599530220032, - "learning_rate": 1.6105717812975254e-05, - "loss": 0.318, - "step": 6478 - }, - { - "epoch": 0.6103483196344881, - "grad_norm": 0.8186508417129517, - "learning_rate": 1.610452186804649e-05, - "loss": 0.3164, - "step": 6479 - }, - { - "epoch": 0.610442523727656, - "grad_norm": 0.7896221280097961, - "learning_rate": 1.610332578392519e-05, - "loss": 0.3159, - "step": 6480 - }, - { - "epoch": 0.6105367278208238, - "grad_norm": 0.6747741103172302, - "learning_rate": 1.610212956063862e-05, - "loss": 0.3161, - "step": 6481 - }, - { - "epoch": 0.6106309319139916, - "grad_norm": 0.728388249874115, - "learning_rate": 1.6100933198214065e-05, - "loss": 0.3035, - "step": 6482 - }, - { - "epoch": 0.6107251360071595, - "grad_norm": 0.7789629697799683, - "learning_rate": 1.6099736696678795e-05, - "loss": 0.3037, - "step": 6483 - }, - { - "epoch": 0.6108193401003273, - "grad_norm": 0.8035514950752258, - "learning_rate": 1.609854005606009e-05, - "loss": 0.3518, - "step": 6484 - }, - { - "epoch": 0.6109135441934952, - "grad_norm": 0.9325544834136963, - "learning_rate": 1.6097343276385248e-05, - "loss": 0.3471, - "step": 6485 - }, - { - "epoch": 0.611007748286663, - "grad_norm": 0.7909505367279053, - "learning_rate": 1.6096146357681545e-05, - "loss": 0.3618, - "step": 6486 - }, - { - "epoch": 0.6111019523798309, - "grad_norm": 0.7790225744247437, - "learning_rate": 1.6094949299976283e-05, - "loss": 0.3519, - "step": 6487 - }, - { - "epoch": 0.6111961564729987, - "grad_norm": 0.7841339111328125, - "learning_rate": 1.6093752103296742e-05, - "loss": 0.3627, - "step": 6488 - }, - { - "epoch": 0.6112903605661666, - "grad_norm": 0.7887298464775085, - "learning_rate": 1.6092554767670236e-05, - "loss": 0.3259, - "step": 6489 - }, - { - "epoch": 0.6113845646593344, - "grad_norm": 0.7558081746101379, - "learning_rate": 1.6091357293124055e-05, - "loss": 0.2976, - "step": 6490 - }, - { - "epoch": 0.6114787687525023, - "grad_norm": 0.7437493801116943, - "learning_rate": 1.6090159679685507e-05, - "loss": 0.3187, - "step": 6491 - }, - { - "epoch": 0.6115729728456701, - "grad_norm": 0.6939466595649719, - "learning_rate": 1.60889619273819e-05, - "loss": 0.3225, - "step": 6492 - }, - { - "epoch": 0.611667176938838, - "grad_norm": 0.7833172082901001, - "learning_rate": 1.6087764036240545e-05, - "loss": 0.3288, - "step": 6493 - }, - { - "epoch": 0.6117613810320058, - "grad_norm": 0.8887497186660767, - "learning_rate": 1.6086566006288755e-05, - "loss": 0.3882, - "step": 6494 - }, - { - "epoch": 0.6118555851251737, - "grad_norm": 0.776712954044342, - "learning_rate": 1.608536783755385e-05, - "loss": 0.2764, - "step": 6495 - }, - { - "epoch": 0.6119497892183415, - "grad_norm": 0.9048894643783569, - "learning_rate": 1.608416953006314e-05, - "loss": 0.3693, - "step": 6496 - }, - { - "epoch": 0.6120439933115094, - "grad_norm": 0.6698115468025208, - "learning_rate": 1.6082971083843962e-05, - "loss": 0.2707, - "step": 6497 - }, - { - "epoch": 0.6121381974046772, - "grad_norm": 0.786310613155365, - "learning_rate": 1.6081772498923635e-05, - "loss": 0.3426, - "step": 6498 - }, - { - "epoch": 0.6122324014978451, - "grad_norm": 0.9830306768417358, - "learning_rate": 1.6080573775329487e-05, - "loss": 0.3652, - "step": 6499 - }, - { - "epoch": 0.6123266055910129, - "grad_norm": 0.6997115612030029, - "learning_rate": 1.6079374913088855e-05, - "loss": 0.3163, - "step": 6500 - }, - { - "epoch": 0.6124208096841808, - "grad_norm": 0.7159838080406189, - "learning_rate": 1.607817591222907e-05, - "loss": 0.3141, - "step": 6501 - }, - { - "epoch": 0.6125150137773486, - "grad_norm": 0.6669812798500061, - "learning_rate": 1.607697677277748e-05, - "loss": 0.2776, - "step": 6502 - }, - { - "epoch": 0.6126092178705165, - "grad_norm": 0.7507301568984985, - "learning_rate": 1.6075777494761418e-05, - "loss": 0.3215, - "step": 6503 - }, - { - "epoch": 0.6127034219636843, - "grad_norm": 0.7340702414512634, - "learning_rate": 1.607457807820823e-05, - "loss": 0.3295, - "step": 6504 - }, - { - "epoch": 0.6127976260568522, - "grad_norm": 0.741966724395752, - "learning_rate": 1.6073378523145272e-05, - "loss": 0.3646, - "step": 6505 - }, - { - "epoch": 0.61289183015002, - "grad_norm": 0.9108197093009949, - "learning_rate": 1.6072178829599886e-05, - "loss": 0.3381, - "step": 6506 - }, - { - "epoch": 0.6129860342431879, - "grad_norm": 0.7761343121528625, - "learning_rate": 1.6070978997599436e-05, - "loss": 0.3381, - "step": 6507 - }, - { - "epoch": 0.6130802383363557, - "grad_norm": 0.6764536499977112, - "learning_rate": 1.6069779027171274e-05, - "loss": 0.3239, - "step": 6508 - }, - { - "epoch": 0.6131744424295236, - "grad_norm": 0.6758368611335754, - "learning_rate": 1.6068578918342764e-05, - "loss": 0.2754, - "step": 6509 - }, - { - "epoch": 0.6132686465226914, - "grad_norm": 0.7585486769676208, - "learning_rate": 1.6067378671141268e-05, - "loss": 0.3488, - "step": 6510 - }, - { - "epoch": 0.6133628506158593, - "grad_norm": 0.7682276368141174, - "learning_rate": 1.6066178285594155e-05, - "loss": 0.3452, - "step": 6511 - }, - { - "epoch": 0.6134570547090271, - "grad_norm": 0.7287978529930115, - "learning_rate": 1.6064977761728798e-05, - "loss": 0.322, - "step": 6512 - }, - { - "epoch": 0.613551258802195, - "grad_norm": 0.7907217144966125, - "learning_rate": 1.606377709957257e-05, - "loss": 0.375, - "step": 6513 - }, - { - "epoch": 0.6136454628953628, - "grad_norm": 0.7274413108825684, - "learning_rate": 1.606257629915284e-05, - "loss": 0.3072, - "step": 6514 - }, - { - "epoch": 0.6137396669885307, - "grad_norm": 0.8911492824554443, - "learning_rate": 1.6061375360497e-05, - "loss": 0.3349, - "step": 6515 - }, - { - "epoch": 0.6138338710816985, - "grad_norm": 0.711392879486084, - "learning_rate": 1.6060174283632423e-05, - "loss": 0.3151, - "step": 6516 - }, - { - "epoch": 0.6139280751748664, - "grad_norm": 0.7808783650398254, - "learning_rate": 1.6058973068586503e-05, - "loss": 0.3171, - "step": 6517 - }, - { - "epoch": 0.6140222792680342, - "grad_norm": 0.717170000076294, - "learning_rate": 1.6057771715386624e-05, - "loss": 0.2443, - "step": 6518 - }, - { - "epoch": 0.6141164833612021, - "grad_norm": 0.6050516366958618, - "learning_rate": 1.6056570224060184e-05, - "loss": 0.2431, - "step": 6519 - }, - { - "epoch": 0.6142106874543699, - "grad_norm": 0.7782800793647766, - "learning_rate": 1.6055368594634576e-05, - "loss": 0.3064, - "step": 6520 - }, - { - "epoch": 0.6143048915475378, - "grad_norm": 0.9019989967346191, - "learning_rate": 1.60541668271372e-05, - "loss": 0.3511, - "step": 6521 - }, - { - "epoch": 0.6143990956407056, - "grad_norm": 0.9744110703468323, - "learning_rate": 1.6052964921595457e-05, - "loss": 0.3424, - "step": 6522 - }, - { - "epoch": 0.6144932997338735, - "grad_norm": 0.7475957870483398, - "learning_rate": 1.605176287803675e-05, - "loss": 0.3177, - "step": 6523 - }, - { - "epoch": 0.6145875038270413, - "grad_norm": 0.856407880783081, - "learning_rate": 1.6050560696488493e-05, - "loss": 0.3021, - "step": 6524 - }, - { - "epoch": 0.6146817079202092, - "grad_norm": 0.7817702293395996, - "learning_rate": 1.6049358376978092e-05, - "loss": 0.3265, - "step": 6525 - }, - { - "epoch": 0.614775912013377, - "grad_norm": 1.1578670740127563, - "learning_rate": 1.6048155919532967e-05, - "loss": 0.3299, - "step": 6526 - }, - { - "epoch": 0.6148701161065449, - "grad_norm": 0.7789450883865356, - "learning_rate": 1.604695332418053e-05, - "loss": 0.3703, - "step": 6527 - }, - { - "epoch": 0.6149643201997127, - "grad_norm": 0.8720627427101135, - "learning_rate": 1.6045750590948214e-05, - "loss": 0.3612, - "step": 6528 - }, - { - "epoch": 0.6150585242928805, - "grad_norm": 0.7772655487060547, - "learning_rate": 1.6044547719863426e-05, - "loss": 0.3449, - "step": 6529 - }, - { - "epoch": 0.6151527283860484, - "grad_norm": 0.7157712578773499, - "learning_rate": 1.6043344710953604e-05, - "loss": 0.3074, - "step": 6530 - }, - { - "epoch": 0.6152469324792162, - "grad_norm": 0.6866297125816345, - "learning_rate": 1.6042141564246175e-05, - "loss": 0.3429, - "step": 6531 - }, - { - "epoch": 0.6153411365723841, - "grad_norm": 0.8729900121688843, - "learning_rate": 1.604093827976858e-05, - "loss": 0.398, - "step": 6532 - }, - { - "epoch": 0.6154353406655519, - "grad_norm": 0.872604489326477, - "learning_rate": 1.6039734857548243e-05, - "loss": 0.3161, - "step": 6533 - }, - { - "epoch": 0.6155295447587198, - "grad_norm": 0.9276250004768372, - "learning_rate": 1.6038531297612613e-05, - "loss": 0.371, - "step": 6534 - }, - { - "epoch": 0.6156237488518876, - "grad_norm": 0.739362895488739, - "learning_rate": 1.603732759998913e-05, - "loss": 0.3207, - "step": 6535 - }, - { - "epoch": 0.6157179529450555, - "grad_norm": 0.7881128787994385, - "learning_rate": 1.6036123764705245e-05, - "loss": 0.3218, - "step": 6536 - }, - { - "epoch": 0.6158121570382233, - "grad_norm": 0.7558095455169678, - "learning_rate": 1.6034919791788398e-05, - "loss": 0.3343, - "step": 6537 - }, - { - "epoch": 0.6159063611313912, - "grad_norm": 1.2704678773880005, - "learning_rate": 1.603371568126605e-05, - "loss": 0.3478, - "step": 6538 - }, - { - "epoch": 0.616000565224559, - "grad_norm": 0.8412241339683533, - "learning_rate": 1.6032511433165652e-05, - "loss": 0.3525, - "step": 6539 - }, - { - "epoch": 0.6160947693177269, - "grad_norm": 0.8343496918678284, - "learning_rate": 1.6031307047514665e-05, - "loss": 0.3353, - "step": 6540 - }, - { - "epoch": 0.6161889734108947, - "grad_norm": 0.6275550723075867, - "learning_rate": 1.6030102524340547e-05, - "loss": 0.2906, - "step": 6541 - }, - { - "epoch": 0.6162831775040626, - "grad_norm": 0.8872958421707153, - "learning_rate": 1.6028897863670768e-05, - "loss": 0.3251, - "step": 6542 - }, - { - "epoch": 0.6163773815972304, - "grad_norm": 0.7549136877059937, - "learning_rate": 1.602769306553279e-05, - "loss": 0.3325, - "step": 6543 - }, - { - "epoch": 0.6164715856903983, - "grad_norm": 0.6889528632164001, - "learning_rate": 1.6026488129954092e-05, - "loss": 0.3275, - "step": 6544 - }, - { - "epoch": 0.6165657897835661, - "grad_norm": 0.7459689378738403, - "learning_rate": 1.6025283056962143e-05, - "loss": 0.33, - "step": 6545 - }, - { - "epoch": 0.616659993876734, - "grad_norm": 0.7205366492271423, - "learning_rate": 1.602407784658442e-05, - "loss": 0.3013, - "step": 6546 - }, - { - "epoch": 0.6167541979699018, - "grad_norm": 0.7754681706428528, - "learning_rate": 1.6022872498848408e-05, - "loss": 0.3339, - "step": 6547 - }, - { - "epoch": 0.6168484020630697, - "grad_norm": 0.899183988571167, - "learning_rate": 1.602166701378159e-05, - "loss": 0.3756, - "step": 6548 - }, - { - "epoch": 0.6169426061562375, - "grad_norm": 0.8031266331672668, - "learning_rate": 1.6020461391411452e-05, - "loss": 0.2941, - "step": 6549 - }, - { - "epoch": 0.6170368102494054, - "grad_norm": 0.711356520652771, - "learning_rate": 1.601925563176548e-05, - "loss": 0.3009, - "step": 6550 - }, - { - "epoch": 0.6171310143425732, - "grad_norm": 0.7564597129821777, - "learning_rate": 1.6018049734871174e-05, - "loss": 0.3229, - "step": 6551 - }, - { - "epoch": 0.6172252184357411, - "grad_norm": 0.8064227104187012, - "learning_rate": 1.6016843700756025e-05, - "loss": 0.3266, - "step": 6552 - }, - { - "epoch": 0.6173194225289089, - "grad_norm": 0.6288996338844299, - "learning_rate": 1.6015637529447533e-05, - "loss": 0.2794, - "step": 6553 - }, - { - "epoch": 0.6174136266220768, - "grad_norm": 0.8485068082809448, - "learning_rate": 1.6014431220973205e-05, - "loss": 0.3633, - "step": 6554 - }, - { - "epoch": 0.6175078307152446, - "grad_norm": 0.7502201199531555, - "learning_rate": 1.601322477536054e-05, - "loss": 0.3425, - "step": 6555 - }, - { - "epoch": 0.6176020348084125, - "grad_norm": 0.7736746072769165, - "learning_rate": 1.6012018192637052e-05, - "loss": 0.3416, - "step": 6556 - }, - { - "epoch": 0.6176962389015803, - "grad_norm": 0.8648883104324341, - "learning_rate": 1.6010811472830253e-05, - "loss": 0.3529, - "step": 6557 - }, - { - "epoch": 0.6177904429947482, - "grad_norm": 0.7279908657073975, - "learning_rate": 1.6009604615967656e-05, - "loss": 0.3293, - "step": 6558 - }, - { - "epoch": 0.617884647087916, - "grad_norm": 0.8421671986579895, - "learning_rate": 1.6008397622076778e-05, - "loss": 0.3428, - "step": 6559 - }, - { - "epoch": 0.6179788511810839, - "grad_norm": 0.8226191401481628, - "learning_rate": 1.6007190491185144e-05, - "loss": 0.3391, - "step": 6560 - }, - { - "epoch": 0.6180730552742517, - "grad_norm": 0.745063066482544, - "learning_rate": 1.6005983223320276e-05, - "loss": 0.3136, - "step": 6561 - }, - { - "epoch": 0.6181672593674196, - "grad_norm": 0.8741940855979919, - "learning_rate": 1.60047758185097e-05, - "loss": 0.3044, - "step": 6562 - }, - { - "epoch": 0.6182614634605874, - "grad_norm": 0.7277724146842957, - "learning_rate": 1.600356827678095e-05, - "loss": 0.2929, - "step": 6563 - }, - { - "epoch": 0.6183556675537553, - "grad_norm": 0.7360935211181641, - "learning_rate": 1.600236059816156e-05, - "loss": 0.2837, - "step": 6564 - }, - { - "epoch": 0.6184498716469231, - "grad_norm": 0.9177241325378418, - "learning_rate": 1.6001152782679067e-05, - "loss": 0.371, - "step": 6565 - }, - { - "epoch": 0.618544075740091, - "grad_norm": 0.7400040626525879, - "learning_rate": 1.5999944830361007e-05, - "loss": 0.3262, - "step": 6566 - }, - { - "epoch": 0.6186382798332588, - "grad_norm": 0.716580867767334, - "learning_rate": 1.5998736741234922e-05, - "loss": 0.3269, - "step": 6567 - }, - { - "epoch": 0.6187324839264267, - "grad_norm": 0.7563260793685913, - "learning_rate": 1.5997528515328367e-05, - "loss": 0.3266, - "step": 6568 - }, - { - "epoch": 0.6188266880195945, - "grad_norm": 0.8368607759475708, - "learning_rate": 1.5996320152668886e-05, - "loss": 0.3438, - "step": 6569 - }, - { - "epoch": 0.6189208921127622, - "grad_norm": 0.7384721040725708, - "learning_rate": 1.5995111653284036e-05, - "loss": 0.3393, - "step": 6570 - }, - { - "epoch": 0.6190150962059301, - "grad_norm": 0.7471886277198792, - "learning_rate": 1.5993903017201363e-05, - "loss": 0.3232, - "step": 6571 - }, - { - "epoch": 0.6191093002990979, - "grad_norm": 0.7168338298797607, - "learning_rate": 1.5992694244448434e-05, - "loss": 0.3572, - "step": 6572 - }, - { - "epoch": 0.6192035043922658, - "grad_norm": 0.8276976346969604, - "learning_rate": 1.5991485335052807e-05, - "loss": 0.3045, - "step": 6573 - }, - { - "epoch": 0.6192977084854336, - "grad_norm": 0.7061718702316284, - "learning_rate": 1.5990276289042047e-05, - "loss": 0.3401, - "step": 6574 - }, - { - "epoch": 0.6193919125786015, - "grad_norm": 0.7345423698425293, - "learning_rate": 1.5989067106443722e-05, - "loss": 0.3714, - "step": 6575 - }, - { - "epoch": 0.6194861166717693, - "grad_norm": 0.7423486113548279, - "learning_rate": 1.598785778728541e-05, - "loss": 0.345, - "step": 6576 - }, - { - "epoch": 0.6195803207649372, - "grad_norm": 0.7642102837562561, - "learning_rate": 1.5986648331594678e-05, - "loss": 0.3387, - "step": 6577 - }, - { - "epoch": 0.619674524858105, - "grad_norm": 0.686305820941925, - "learning_rate": 1.5985438739399107e-05, - "loss": 0.2932, - "step": 6578 - }, - { - "epoch": 0.6197687289512729, - "grad_norm": 0.8475738763809204, - "learning_rate": 1.598422901072627e-05, - "loss": 0.3521, - "step": 6579 - }, - { - "epoch": 0.6198629330444407, - "grad_norm": 0.8894240260124207, - "learning_rate": 1.5983019145603766e-05, - "loss": 0.3937, - "step": 6580 - }, - { - "epoch": 0.6199571371376086, - "grad_norm": 0.7335094213485718, - "learning_rate": 1.5981809144059167e-05, - "loss": 0.3217, - "step": 6581 - }, - { - "epoch": 0.6200513412307764, - "grad_norm": 0.6611246466636658, - "learning_rate": 1.598059900612007e-05, - "loss": 0.2838, - "step": 6582 - }, - { - "epoch": 0.6201455453239443, - "grad_norm": 0.7276688814163208, - "learning_rate": 1.5979388731814063e-05, - "loss": 0.3131, - "step": 6583 - }, - { - "epoch": 0.6202397494171121, - "grad_norm": 0.8888239860534668, - "learning_rate": 1.5978178321168746e-05, - "loss": 0.3382, - "step": 6584 - }, - { - "epoch": 0.62033395351028, - "grad_norm": 0.9546866416931152, - "learning_rate": 1.597696777421172e-05, - "loss": 0.285, - "step": 6585 - }, - { - "epoch": 0.6204281576034478, - "grad_norm": 0.7835800647735596, - "learning_rate": 1.5975757090970586e-05, - "loss": 0.3688, - "step": 6586 - }, - { - "epoch": 0.6205223616966157, - "grad_norm": 1.0066163539886475, - "learning_rate": 1.5974546271472952e-05, - "loss": 0.3065, - "step": 6587 - }, - { - "epoch": 0.6206165657897835, - "grad_norm": 0.7106859087944031, - "learning_rate": 1.5973335315746416e-05, - "loss": 0.3054, - "step": 6588 - }, - { - "epoch": 0.6207107698829514, - "grad_norm": 0.8374449610710144, - "learning_rate": 1.59721242238186e-05, - "loss": 0.3169, - "step": 6589 - }, - { - "epoch": 0.6208049739761192, - "grad_norm": 0.702578067779541, - "learning_rate": 1.597091299571712e-05, - "loss": 0.3196, - "step": 6590 - }, - { - "epoch": 0.6208991780692871, - "grad_norm": 0.7177067995071411, - "learning_rate": 1.596970163146958e-05, - "loss": 0.3182, - "step": 6591 - }, - { - "epoch": 0.6209933821624549, - "grad_norm": 0.7188496589660645, - "learning_rate": 1.5968490131103618e-05, - "loss": 0.2904, - "step": 6592 - }, - { - "epoch": 0.6210875862556228, - "grad_norm": 0.7323099374771118, - "learning_rate": 1.5967278494646847e-05, - "loss": 0.3555, - "step": 6593 - }, - { - "epoch": 0.6211817903487906, - "grad_norm": 0.8324366211891174, - "learning_rate": 1.5966066722126897e-05, - "loss": 0.2937, - "step": 6594 - }, - { - "epoch": 0.6212759944419585, - "grad_norm": 0.752227246761322, - "learning_rate": 1.5964854813571403e-05, - "loss": 0.2981, - "step": 6595 - }, - { - "epoch": 0.6213701985351263, - "grad_norm": 0.7920807003974915, - "learning_rate": 1.596364276900799e-05, - "loss": 0.3655, - "step": 6596 - }, - { - "epoch": 0.6214644026282942, - "grad_norm": 0.7501734495162964, - "learning_rate": 1.5962430588464302e-05, - "loss": 0.3009, - "step": 6597 - }, - { - "epoch": 0.621558606721462, - "grad_norm": 0.8034332394599915, - "learning_rate": 1.5961218271967976e-05, - "loss": 0.3655, - "step": 6598 - }, - { - "epoch": 0.6216528108146299, - "grad_norm": 0.6022971272468567, - "learning_rate": 1.596000581954665e-05, - "loss": 0.2758, - "step": 6599 - }, - { - "epoch": 0.6217470149077977, - "grad_norm": 0.7588047385215759, - "learning_rate": 1.595879323122798e-05, - "loss": 0.3601, - "step": 6600 - }, - { - "epoch": 0.6218412190009656, - "grad_norm": 0.7950819730758667, - "learning_rate": 1.5957580507039604e-05, - "loss": 0.3183, - "step": 6601 - }, - { - "epoch": 0.6219354230941334, - "grad_norm": 0.7252808213233948, - "learning_rate": 1.5956367647009185e-05, - "loss": 0.3409, - "step": 6602 - }, - { - "epoch": 0.6220296271873013, - "grad_norm": 0.7759315371513367, - "learning_rate": 1.5955154651164368e-05, - "loss": 0.3408, - "step": 6603 - }, - { - "epoch": 0.6221238312804691, - "grad_norm": 0.788898766040802, - "learning_rate": 1.595394151953281e-05, - "loss": 0.3177, - "step": 6604 - }, - { - "epoch": 0.622218035373637, - "grad_norm": 0.7779346108436584, - "learning_rate": 1.5952728252142183e-05, - "loss": 0.3513, - "step": 6605 - }, - { - "epoch": 0.6223122394668048, - "grad_norm": 0.779161810874939, - "learning_rate": 1.5951514849020147e-05, - "loss": 0.3713, - "step": 6606 - }, - { - "epoch": 0.6224064435599727, - "grad_norm": 0.7594448924064636, - "learning_rate": 1.5950301310194366e-05, - "loss": 0.3097, - "step": 6607 - }, - { - "epoch": 0.6225006476531405, - "grad_norm": 0.7905697822570801, - "learning_rate": 1.5949087635692512e-05, - "loss": 0.3418, - "step": 6608 - }, - { - "epoch": 0.6225948517463084, - "grad_norm": 0.936529815196991, - "learning_rate": 1.594787382554226e-05, - "loss": 0.3151, - "step": 6609 - }, - { - "epoch": 0.6226890558394762, - "grad_norm": 0.7440918684005737, - "learning_rate": 1.5946659879771288e-05, - "loss": 0.3529, - "step": 6610 - }, - { - "epoch": 0.622783259932644, - "grad_norm": 0.7420088648796082, - "learning_rate": 1.5945445798407274e-05, - "loss": 0.3043, - "step": 6611 - }, - { - "epoch": 0.6228774640258119, - "grad_norm": 0.7822197675704956, - "learning_rate": 1.59442315814779e-05, - "loss": 0.3951, - "step": 6612 - }, - { - "epoch": 0.6229716681189797, - "grad_norm": 0.7788919806480408, - "learning_rate": 1.594301722901085e-05, - "loss": 0.3468, - "step": 6613 - }, - { - "epoch": 0.6230658722121476, - "grad_norm": 0.833311915397644, - "learning_rate": 1.594180274103382e-05, - "loss": 0.3853, - "step": 6614 - }, - { - "epoch": 0.6231600763053154, - "grad_norm": 0.6766711473464966, - "learning_rate": 1.5940588117574497e-05, - "loss": 0.2991, - "step": 6615 - }, - { - "epoch": 0.6232542803984833, - "grad_norm": 0.717984139919281, - "learning_rate": 1.593937335866058e-05, - "loss": 0.2994, - "step": 6616 - }, - { - "epoch": 0.6233484844916511, - "grad_norm": 0.8255095481872559, - "learning_rate": 1.5938158464319763e-05, - "loss": 0.3249, - "step": 6617 - }, - { - "epoch": 0.623442688584819, - "grad_norm": 0.6872636675834656, - "learning_rate": 1.5936943434579748e-05, - "loss": 0.2778, - "step": 6618 - }, - { - "epoch": 0.6235368926779868, - "grad_norm": 0.7614285945892334, - "learning_rate": 1.593572826946824e-05, - "loss": 0.3422, - "step": 6619 - }, - { - "epoch": 0.6236310967711547, - "grad_norm": 0.7388732433319092, - "learning_rate": 1.5934512969012953e-05, - "loss": 0.3095, - "step": 6620 - }, - { - "epoch": 0.6237253008643225, - "grad_norm": 0.6868925094604492, - "learning_rate": 1.593329753324159e-05, - "loss": 0.3295, - "step": 6621 - }, - { - "epoch": 0.6238195049574904, - "grad_norm": 1.0019731521606445, - "learning_rate": 1.5932081962181863e-05, - "loss": 0.3794, - "step": 6622 - }, - { - "epoch": 0.6239137090506582, - "grad_norm": 0.787661075592041, - "learning_rate": 1.5930866255861498e-05, - "loss": 0.3288, - "step": 6623 - }, - { - "epoch": 0.6240079131438261, - "grad_norm": 0.8396860957145691, - "learning_rate": 1.592965041430821e-05, - "loss": 0.3159, - "step": 6624 - }, - { - "epoch": 0.6241021172369939, - "grad_norm": 0.9020205140113831, - "learning_rate": 1.5928434437549724e-05, - "loss": 0.3365, - "step": 6625 - }, - { - "epoch": 0.6241963213301618, - "grad_norm": 0.8161234855651855, - "learning_rate": 1.5927218325613758e-05, - "loss": 0.3024, - "step": 6626 - }, - { - "epoch": 0.6242905254233296, - "grad_norm": 0.8369538187980652, - "learning_rate": 1.592600207852805e-05, - "loss": 0.3211, - "step": 6627 - }, - { - "epoch": 0.6243847295164975, - "grad_norm": 0.7108891606330872, - "learning_rate": 1.592478569632033e-05, - "loss": 0.309, - "step": 6628 - }, - { - "epoch": 0.6244789336096653, - "grad_norm": 0.7067973017692566, - "learning_rate": 1.5923569179018333e-05, - "loss": 0.3026, - "step": 6629 - }, - { - "epoch": 0.6245731377028332, - "grad_norm": 0.8192605972290039, - "learning_rate": 1.5922352526649803e-05, - "loss": 0.3017, - "step": 6630 - }, - { - "epoch": 0.624667341796001, - "grad_norm": 0.8145104050636292, - "learning_rate": 1.5921135739242473e-05, - "loss": 0.3164, - "step": 6631 - }, - { - "epoch": 0.6247615458891689, - "grad_norm": 0.731330931186676, - "learning_rate": 1.591991881682409e-05, - "loss": 0.312, - "step": 6632 - }, - { - "epoch": 0.6248557499823367, - "grad_norm": 0.7608345150947571, - "learning_rate": 1.59187017594224e-05, - "loss": 0.332, - "step": 6633 - }, - { - "epoch": 0.6249499540755046, - "grad_norm": 0.7256740927696228, - "learning_rate": 1.591748456706516e-05, - "loss": 0.3063, - "step": 6634 - }, - { - "epoch": 0.6250441581686724, - "grad_norm": 0.8325484991073608, - "learning_rate": 1.591626723978012e-05, - "loss": 0.3611, - "step": 6635 - }, - { - "epoch": 0.6251383622618403, - "grad_norm": 0.7092111706733704, - "learning_rate": 1.5915049777595036e-05, - "loss": 0.3152, - "step": 6636 - }, - { - "epoch": 0.6252325663550081, - "grad_norm": 0.7213624119758606, - "learning_rate": 1.591383218053767e-05, - "loss": 0.3182, - "step": 6637 - }, - { - "epoch": 0.625326770448176, - "grad_norm": 0.7438649535179138, - "learning_rate": 1.5912614448635784e-05, - "loss": 0.3343, - "step": 6638 - }, - { - "epoch": 0.6254209745413438, - "grad_norm": 1.3518234491348267, - "learning_rate": 1.5911396581917144e-05, - "loss": 0.332, - "step": 6639 - }, - { - "epoch": 0.6255151786345117, - "grad_norm": 0.7216207981109619, - "learning_rate": 1.5910178580409522e-05, - "loss": 0.2506, - "step": 6640 - }, - { - "epoch": 0.6256093827276795, - "grad_norm": 0.7129900455474854, - "learning_rate": 1.5908960444140686e-05, - "loss": 0.2791, - "step": 6641 - }, - { - "epoch": 0.6257035868208474, - "grad_norm": 0.7139818072319031, - "learning_rate": 1.5907742173138415e-05, - "loss": 0.3754, - "step": 6642 - }, - { - "epoch": 0.6257977909140152, - "grad_norm": 0.730042576789856, - "learning_rate": 1.5906523767430485e-05, - "loss": 0.3263, - "step": 6643 - }, - { - "epoch": 0.6258919950071831, - "grad_norm": 0.7480024695396423, - "learning_rate": 1.590530522704468e-05, - "loss": 0.308, - "step": 6644 - }, - { - "epoch": 0.6259861991003509, - "grad_norm": 0.7542113661766052, - "learning_rate": 1.590408655200878e-05, - "loss": 0.3602, - "step": 6645 - }, - { - "epoch": 0.6260804031935188, - "grad_norm": 0.7510566711425781, - "learning_rate": 1.5902867742350578e-05, - "loss": 0.3131, - "step": 6646 - }, - { - "epoch": 0.6261746072866866, - "grad_norm": 0.8571420907974243, - "learning_rate": 1.5901648798097863e-05, - "loss": 0.3479, - "step": 6647 - }, - { - "epoch": 0.6262688113798545, - "grad_norm": 0.6914628744125366, - "learning_rate": 1.5900429719278428e-05, - "loss": 0.2876, - "step": 6648 - }, - { - "epoch": 0.6263630154730223, - "grad_norm": 0.8220464587211609, - "learning_rate": 1.5899210505920066e-05, - "loss": 0.3221, - "step": 6649 - }, - { - "epoch": 0.6264572195661902, - "grad_norm": 0.7591971755027771, - "learning_rate": 1.5897991158050586e-05, - "loss": 0.34, - "step": 6650 - }, - { - "epoch": 0.626551423659358, - "grad_norm": 0.7553174495697021, - "learning_rate": 1.5896771675697786e-05, - "loss": 0.2989, - "step": 6651 - }, - { - "epoch": 0.6266456277525259, - "grad_norm": 0.6980757117271423, - "learning_rate": 1.589555205888947e-05, - "loss": 0.293, - "step": 6652 - }, - { - "epoch": 0.6267398318456937, - "grad_norm": 0.7261850833892822, - "learning_rate": 1.589433230765345e-05, - "loss": 0.3286, - "step": 6653 - }, - { - "epoch": 0.6268340359388616, - "grad_norm": 0.6873316168785095, - "learning_rate": 1.589311242201754e-05, - "loss": 0.2959, - "step": 6654 - }, - { - "epoch": 0.6269282400320294, - "grad_norm": 0.7370021343231201, - "learning_rate": 1.589189240200955e-05, - "loss": 0.3687, - "step": 6655 - }, - { - "epoch": 0.6270224441251973, - "grad_norm": 0.8008817434310913, - "learning_rate": 1.5890672247657303e-05, - "loss": 0.3434, - "step": 6656 - }, - { - "epoch": 0.6271166482183651, - "grad_norm": 0.8398225903511047, - "learning_rate": 1.588945195898862e-05, - "loss": 0.3857, - "step": 6657 - }, - { - "epoch": 0.627210852311533, - "grad_norm": 1.046363115310669, - "learning_rate": 1.588823153603132e-05, - "loss": 0.3579, - "step": 6658 - }, - { - "epoch": 0.6273050564047008, - "grad_norm": 0.7483053803443909, - "learning_rate": 1.5887010978813235e-05, - "loss": 0.3334, - "step": 6659 - }, - { - "epoch": 0.6273992604978686, - "grad_norm": 0.7133119106292725, - "learning_rate": 1.58857902873622e-05, - "loss": 0.3184, - "step": 6660 - }, - { - "epoch": 0.6274934645910365, - "grad_norm": 0.7998007535934448, - "learning_rate": 1.588456946170604e-05, - "loss": 0.3422, - "step": 6661 - }, - { - "epoch": 0.6275876686842043, - "grad_norm": 0.7578026056289673, - "learning_rate": 1.5883348501872597e-05, - "loss": 0.3168, - "step": 6662 - }, - { - "epoch": 0.6276818727773722, - "grad_norm": 0.7887904644012451, - "learning_rate": 1.588212740788971e-05, - "loss": 0.3716, - "step": 6663 - }, - { - "epoch": 0.62777607687054, - "grad_norm": 0.7964813709259033, - "learning_rate": 1.5880906179785222e-05, - "loss": 0.3104, - "step": 6664 - }, - { - "epoch": 0.6278702809637079, - "grad_norm": 0.7783161997795105, - "learning_rate": 1.5879684817586974e-05, - "loss": 0.3619, - "step": 6665 - }, - { - "epoch": 0.6279644850568757, - "grad_norm": 0.9833865761756897, - "learning_rate": 1.5878463321322822e-05, - "loss": 0.3808, - "step": 6666 - }, - { - "epoch": 0.6280586891500436, - "grad_norm": 0.6703827977180481, - "learning_rate": 1.587724169102061e-05, - "loss": 0.3076, - "step": 6667 - }, - { - "epoch": 0.6281528932432114, - "grad_norm": 0.7270950675010681, - "learning_rate": 1.5876019926708207e-05, - "loss": 0.2927, - "step": 6668 - }, - { - "epoch": 0.6282470973363793, - "grad_norm": 0.8654983043670654, - "learning_rate": 1.5874798028413456e-05, - "loss": 0.3234, - "step": 6669 - }, - { - "epoch": 0.6283413014295471, - "grad_norm": 0.7640737891197205, - "learning_rate": 1.587357599616423e-05, - "loss": 0.3389, - "step": 6670 - }, - { - "epoch": 0.628435505522715, - "grad_norm": 2.5828471183776855, - "learning_rate": 1.587235382998838e-05, - "loss": 0.309, - "step": 6671 - }, - { - "epoch": 0.6285297096158828, - "grad_norm": 0.6812258362770081, - "learning_rate": 1.5871131529913782e-05, - "loss": 0.2965, - "step": 6672 - }, - { - "epoch": 0.6286239137090507, - "grad_norm": 0.7365323901176453, - "learning_rate": 1.586990909596831e-05, - "loss": 0.3237, - "step": 6673 - }, - { - "epoch": 0.6287181178022185, - "grad_norm": 0.8122865557670593, - "learning_rate": 1.586868652817983e-05, - "loss": 0.3093, - "step": 6674 - }, - { - "epoch": 0.6288123218953864, - "grad_norm": 0.7173646092414856, - "learning_rate": 1.5867463826576223e-05, - "loss": 0.3315, - "step": 6675 - }, - { - "epoch": 0.6289065259885542, - "grad_norm": 0.6234768629074097, - "learning_rate": 1.5866240991185365e-05, - "loss": 0.2994, - "step": 6676 - }, - { - "epoch": 0.6290007300817221, - "grad_norm": 0.6665681600570679, - "learning_rate": 1.586501802203514e-05, - "loss": 0.3055, - "step": 6677 - }, - { - "epoch": 0.6290949341748899, - "grad_norm": 0.6856829524040222, - "learning_rate": 1.586379491915343e-05, - "loss": 0.2913, - "step": 6678 - }, - { - "epoch": 0.6291891382680578, - "grad_norm": 0.7844224572181702, - "learning_rate": 1.5862571682568135e-05, - "loss": 0.3622, - "step": 6679 - }, - { - "epoch": 0.6292833423612256, - "grad_norm": 0.7180244326591492, - "learning_rate": 1.5861348312307138e-05, - "loss": 0.3065, - "step": 6680 - }, - { - "epoch": 0.6293775464543935, - "grad_norm": 0.7316251993179321, - "learning_rate": 1.586012480839833e-05, - "loss": 0.3329, - "step": 6681 - }, - { - "epoch": 0.6294717505475613, - "grad_norm": 0.6861616373062134, - "learning_rate": 1.5858901170869613e-05, - "loss": 0.3216, - "step": 6682 - }, - { - "epoch": 0.6295659546407292, - "grad_norm": 0.6843136548995972, - "learning_rate": 1.585767739974889e-05, - "loss": 0.3171, - "step": 6683 - }, - { - "epoch": 0.629660158733897, - "grad_norm": 0.7426478266716003, - "learning_rate": 1.5856453495064067e-05, - "loss": 0.3227, - "step": 6684 - }, - { - "epoch": 0.6297543628270649, - "grad_norm": 0.6927346587181091, - "learning_rate": 1.5855229456843045e-05, - "loss": 0.3399, - "step": 6685 - }, - { - "epoch": 0.6298485669202327, - "grad_norm": 0.7568566203117371, - "learning_rate": 1.5854005285113734e-05, - "loss": 0.3248, - "step": 6686 - }, - { - "epoch": 0.6299427710134006, - "grad_norm": 0.703330397605896, - "learning_rate": 1.585278097990405e-05, - "loss": 0.307, - "step": 6687 - }, - { - "epoch": 0.6300369751065684, - "grad_norm": 0.7454832792282104, - "learning_rate": 1.585155654124191e-05, - "loss": 0.305, - "step": 6688 - }, - { - "epoch": 0.6301311791997363, - "grad_norm": 0.7922065258026123, - "learning_rate": 1.5850331969155228e-05, - "loss": 0.3402, - "step": 6689 - }, - { - "epoch": 0.6302253832929041, - "grad_norm": 0.7510750889778137, - "learning_rate": 1.5849107263671934e-05, - "loss": 0.3135, - "step": 6690 - }, - { - "epoch": 0.630319587386072, - "grad_norm": 0.8461636304855347, - "learning_rate": 1.5847882424819944e-05, - "loss": 0.3373, - "step": 6691 - }, - { - "epoch": 0.6304137914792398, - "grad_norm": 0.7858095765113831, - "learning_rate": 1.584665745262719e-05, - "loss": 0.3472, - "step": 6692 - }, - { - "epoch": 0.6305079955724077, - "grad_norm": 0.8112091422080994, - "learning_rate": 1.584543234712161e-05, - "loss": 0.3363, - "step": 6693 - }, - { - "epoch": 0.6306021996655755, - "grad_norm": 0.7483133673667908, - "learning_rate": 1.5844207108331125e-05, - "loss": 0.3419, - "step": 6694 - }, - { - "epoch": 0.6306964037587434, - "grad_norm": 0.7431141138076782, - "learning_rate": 1.5842981736283686e-05, - "loss": 0.3081, - "step": 6695 - }, - { - "epoch": 0.6307906078519112, - "grad_norm": 0.7465755939483643, - "learning_rate": 1.5841756231007224e-05, - "loss": 0.3327, - "step": 6696 - }, - { - "epoch": 0.6308848119450791, - "grad_norm": 0.7578116059303284, - "learning_rate": 1.5840530592529685e-05, - "loss": 0.2924, - "step": 6697 - }, - { - "epoch": 0.6309790160382469, - "grad_norm": 0.7328236103057861, - "learning_rate": 1.5839304820879016e-05, - "loss": 0.3, - "step": 6698 - }, - { - "epoch": 0.6310732201314148, - "grad_norm": 0.6725963950157166, - "learning_rate": 1.5838078916083168e-05, - "loss": 0.3046, - "step": 6699 - }, - { - "epoch": 0.6311674242245826, - "grad_norm": 0.8018659949302673, - "learning_rate": 1.5836852878170095e-05, - "loss": 0.3182, - "step": 6700 - }, - { - "epoch": 0.6312616283177505, - "grad_norm": 0.9003369808197021, - "learning_rate": 1.5835626707167742e-05, - "loss": 0.3683, - "step": 6701 - }, - { - "epoch": 0.6313558324109183, - "grad_norm": 0.6894645094871521, - "learning_rate": 1.583440040310408e-05, - "loss": 0.2916, - "step": 6702 - }, - { - "epoch": 0.6314500365040862, - "grad_norm": 0.7459478378295898, - "learning_rate": 1.583317396600707e-05, - "loss": 0.3374, - "step": 6703 - }, - { - "epoch": 0.631544240597254, - "grad_norm": 0.8423133492469788, - "learning_rate": 1.5831947395904662e-05, - "loss": 0.3395, - "step": 6704 - }, - { - "epoch": 0.6316384446904219, - "grad_norm": 0.8305002450942993, - "learning_rate": 1.583072069282484e-05, - "loss": 0.3492, - "step": 6705 - }, - { - "epoch": 0.6317326487835897, - "grad_norm": 0.8187609314918518, - "learning_rate": 1.582949385679557e-05, - "loss": 0.3322, - "step": 6706 - }, - { - "epoch": 0.6318268528767575, - "grad_norm": 0.7323743104934692, - "learning_rate": 1.582826688784482e-05, - "loss": 0.2975, - "step": 6707 - }, - { - "epoch": 0.6319210569699254, - "grad_norm": 0.7921598553657532, - "learning_rate": 1.5827039786000574e-05, - "loss": 0.3277, - "step": 6708 - }, - { - "epoch": 0.6320152610630931, - "grad_norm": 0.6891117095947266, - "learning_rate": 1.582581255129081e-05, - "loss": 0.3077, - "step": 6709 - }, - { - "epoch": 0.632109465156261, - "grad_norm": 0.7351700067520142, - "learning_rate": 1.582458518374351e-05, - "loss": 0.2881, - "step": 6710 - }, - { - "epoch": 0.6322036692494288, - "grad_norm": 0.8305718302726746, - "learning_rate": 1.582335768338666e-05, - "loss": 0.3465, - "step": 6711 - }, - { - "epoch": 0.6322978733425967, - "grad_norm": 0.759661078453064, - "learning_rate": 1.582213005024825e-05, - "loss": 0.3083, - "step": 6712 - }, - { - "epoch": 0.6323920774357645, - "grad_norm": 0.716044008731842, - "learning_rate": 1.5820902284356267e-05, - "loss": 0.3346, - "step": 6713 - }, - { - "epoch": 0.6324862815289324, - "grad_norm": 0.7560633420944214, - "learning_rate": 1.5819674385738712e-05, - "loss": 0.3015, - "step": 6714 - }, - { - "epoch": 0.6325804856221002, - "grad_norm": 0.9049429893493652, - "learning_rate": 1.581844635442358e-05, - "loss": 0.3334, - "step": 6715 - }, - { - "epoch": 0.6326746897152681, - "grad_norm": 0.7073135375976562, - "learning_rate": 1.5817218190438872e-05, - "loss": 0.2907, - "step": 6716 - }, - { - "epoch": 0.6327688938084359, - "grad_norm": 0.9328410625457764, - "learning_rate": 1.5815989893812594e-05, - "loss": 0.343, - "step": 6717 - }, - { - "epoch": 0.6328630979016038, - "grad_norm": 0.8090577721595764, - "learning_rate": 1.5814761464572753e-05, - "loss": 0.3365, - "step": 6718 - }, - { - "epoch": 0.6329573019947716, - "grad_norm": 0.765785276889801, - "learning_rate": 1.5813532902747354e-05, - "loss": 0.3213, - "step": 6719 - }, - { - "epoch": 0.6330515060879395, - "grad_norm": 0.8419969081878662, - "learning_rate": 1.581230420836442e-05, - "loss": 0.3207, - "step": 6720 - }, - { - "epoch": 0.6331457101811073, - "grad_norm": 0.8804196119308472, - "learning_rate": 1.5811075381451954e-05, - "loss": 0.3305, - "step": 6721 - }, - { - "epoch": 0.6332399142742752, - "grad_norm": 0.7283568978309631, - "learning_rate": 1.5809846422037986e-05, - "loss": 0.2971, - "step": 6722 - }, - { - "epoch": 0.633334118367443, - "grad_norm": 0.7433123588562012, - "learning_rate": 1.5808617330150535e-05, - "loss": 0.3753, - "step": 6723 - }, - { - "epoch": 0.6334283224606109, - "grad_norm": 0.687572181224823, - "learning_rate": 1.5807388105817625e-05, - "loss": 0.304, - "step": 6724 - }, - { - "epoch": 0.6335225265537787, - "grad_norm": 0.8676371574401855, - "learning_rate": 1.5806158749067285e-05, - "loss": 0.3183, - "step": 6725 - }, - { - "epoch": 0.6336167306469466, - "grad_norm": 0.9256935715675354, - "learning_rate": 1.5804929259927545e-05, - "loss": 0.3332, - "step": 6726 - }, - { - "epoch": 0.6337109347401144, - "grad_norm": 0.760909378528595, - "learning_rate": 1.5803699638426442e-05, - "loss": 0.2998, - "step": 6727 - }, - { - "epoch": 0.6338051388332823, - "grad_norm": 0.725662112236023, - "learning_rate": 1.580246988459201e-05, - "loss": 0.3273, - "step": 6728 - }, - { - "epoch": 0.6338993429264501, - "grad_norm": 0.785145103931427, - "learning_rate": 1.5801239998452294e-05, - "loss": 0.2735, - "step": 6729 - }, - { - "epoch": 0.633993547019618, - "grad_norm": 0.8270518183708191, - "learning_rate": 1.5800009980035334e-05, - "loss": 0.3611, - "step": 6730 - }, - { - "epoch": 0.6340877511127858, - "grad_norm": 0.7773357629776001, - "learning_rate": 1.5798779829369174e-05, - "loss": 0.3255, - "step": 6731 - }, - { - "epoch": 0.6341819552059537, - "grad_norm": 2.223407506942749, - "learning_rate": 1.5797549546481866e-05, - "loss": 0.3426, - "step": 6732 - }, - { - "epoch": 0.6342761592991215, - "grad_norm": 0.7606045603752136, - "learning_rate": 1.5796319131401463e-05, - "loss": 0.329, - "step": 6733 - }, - { - "epoch": 0.6343703633922894, - "grad_norm": 0.8077683448791504, - "learning_rate": 1.579508858415602e-05, - "loss": 0.3607, - "step": 6734 - }, - { - "epoch": 0.6344645674854572, - "grad_norm": 0.7511078119277954, - "learning_rate": 1.5793857904773595e-05, - "loss": 0.3202, - "step": 6735 - }, - { - "epoch": 0.6345587715786251, - "grad_norm": 0.7200794219970703, - "learning_rate": 1.5792627093282247e-05, - "loss": 0.3061, - "step": 6736 - }, - { - "epoch": 0.6346529756717929, - "grad_norm": 0.7865471243858337, - "learning_rate": 1.5791396149710046e-05, - "loss": 0.3427, - "step": 6737 - }, - { - "epoch": 0.6347471797649608, - "grad_norm": 0.830402672290802, - "learning_rate": 1.5790165074085057e-05, - "loss": 0.3343, - "step": 6738 - }, - { - "epoch": 0.6348413838581286, - "grad_norm": 0.6368622183799744, - "learning_rate": 1.5788933866435346e-05, - "loss": 0.3058, - "step": 6739 - }, - { - "epoch": 0.6349355879512965, - "grad_norm": 0.8150480389595032, - "learning_rate": 1.5787702526788994e-05, - "loss": 0.3063, - "step": 6740 - }, - { - "epoch": 0.6350297920444643, - "grad_norm": 0.7546366453170776, - "learning_rate": 1.578647105517407e-05, - "loss": 0.3172, - "step": 6741 - }, - { - "epoch": 0.6351239961376322, - "grad_norm": 0.7057252526283264, - "learning_rate": 1.5785239451618654e-05, - "loss": 0.323, - "step": 6742 - }, - { - "epoch": 0.6352182002308, - "grad_norm": 0.8907890319824219, - "learning_rate": 1.578400771615084e-05, - "loss": 0.3618, - "step": 6743 - }, - { - "epoch": 0.6353124043239678, - "grad_norm": 0.8284263014793396, - "learning_rate": 1.5782775848798698e-05, - "loss": 0.3361, - "step": 6744 - }, - { - "epoch": 0.6354066084171357, - "grad_norm": 0.7687682509422302, - "learning_rate": 1.578154384959033e-05, - "loss": 0.3625, - "step": 6745 - }, - { - "epoch": 0.6355008125103035, - "grad_norm": 0.8055520057678223, - "learning_rate": 1.5780311718553813e-05, - "loss": 0.3628, - "step": 6746 - }, - { - "epoch": 0.6355950166034714, - "grad_norm": 0.7241090536117554, - "learning_rate": 1.5779079455717253e-05, - "loss": 0.3248, - "step": 6747 - }, - { - "epoch": 0.6356892206966392, - "grad_norm": 0.7102499008178711, - "learning_rate": 1.5777847061108747e-05, - "loss": 0.3317, - "step": 6748 - }, - { - "epoch": 0.6357834247898071, - "grad_norm": 0.7378541827201843, - "learning_rate": 1.5776614534756388e-05, - "loss": 0.3458, - "step": 6749 - }, - { - "epoch": 0.6358776288829749, - "grad_norm": 0.816027820110321, - "learning_rate": 1.577538187668829e-05, - "loss": 0.3443, - "step": 6750 - }, - { - "epoch": 0.6359718329761428, - "grad_norm": 0.7290892601013184, - "learning_rate": 1.5774149086932546e-05, - "loss": 0.3363, - "step": 6751 - }, - { - "epoch": 0.6360660370693106, - "grad_norm": 0.8670916557312012, - "learning_rate": 1.5772916165517275e-05, - "loss": 0.3243, - "step": 6752 - }, - { - "epoch": 0.6361602411624785, - "grad_norm": 0.6518049240112305, - "learning_rate": 1.5771683112470587e-05, - "loss": 0.3129, - "step": 6753 - }, - { - "epoch": 0.6362544452556463, - "grad_norm": 0.750372052192688, - "learning_rate": 1.57704499278206e-05, - "loss": 0.3366, - "step": 6754 - }, - { - "epoch": 0.6363486493488142, - "grad_norm": 0.7233075499534607, - "learning_rate": 1.5769216611595432e-05, - "loss": 0.305, - "step": 6755 - }, - { - "epoch": 0.636442853441982, - "grad_norm": 0.7097463607788086, - "learning_rate": 1.5767983163823198e-05, - "loss": 0.3388, - "step": 6756 - }, - { - "epoch": 0.6365370575351499, - "grad_norm": 0.7607314586639404, - "learning_rate": 1.576674958453203e-05, - "loss": 0.3238, - "step": 6757 - }, - { - "epoch": 0.6366312616283177, - "grad_norm": 0.7543935179710388, - "learning_rate": 1.5765515873750055e-05, - "loss": 0.3287, - "step": 6758 - }, - { - "epoch": 0.6367254657214856, - "grad_norm": 0.8424161076545715, - "learning_rate": 1.57642820315054e-05, - "loss": 0.3453, - "step": 6759 - }, - { - "epoch": 0.6368196698146534, - "grad_norm": 0.9995113611221313, - "learning_rate": 1.57630480578262e-05, - "loss": 0.3726, - "step": 6760 - }, - { - "epoch": 0.6369138739078213, - "grad_norm": 0.7712142467498779, - "learning_rate": 1.576181395274059e-05, - "loss": 0.3068, - "step": 6761 - }, - { - "epoch": 0.6370080780009891, - "grad_norm": 0.7060720324516296, - "learning_rate": 1.5760579716276714e-05, - "loss": 0.326, - "step": 6762 - }, - { - "epoch": 0.637102282094157, - "grad_norm": 0.8832055330276489, - "learning_rate": 1.575934534846271e-05, - "loss": 0.3783, - "step": 6763 - }, - { - "epoch": 0.6371964861873248, - "grad_norm": 0.6925953030586243, - "learning_rate": 1.5758110849326724e-05, - "loss": 0.2866, - "step": 6764 - }, - { - "epoch": 0.6372906902804927, - "grad_norm": 0.7408279180526733, - "learning_rate": 1.5756876218896908e-05, - "loss": 0.3361, - "step": 6765 - }, - { - "epoch": 0.6373848943736605, - "grad_norm": 0.8043666481971741, - "learning_rate": 1.575564145720141e-05, - "loss": 0.3528, - "step": 6766 - }, - { - "epoch": 0.6374790984668284, - "grad_norm": 0.7212235331535339, - "learning_rate": 1.575440656426838e-05, - "loss": 0.3354, - "step": 6767 - }, - { - "epoch": 0.6375733025599962, - "grad_norm": 0.959936797618866, - "learning_rate": 1.5753171540125993e-05, - "loss": 0.3474, - "step": 6768 - }, - { - "epoch": 0.6376675066531641, - "grad_norm": 0.7940359115600586, - "learning_rate": 1.5751936384802388e-05, - "loss": 0.3192, - "step": 6769 - }, - { - "epoch": 0.6377617107463319, - "grad_norm": 0.7453657388687134, - "learning_rate": 1.575070109832574e-05, - "loss": 0.3333, - "step": 6770 - }, - { - "epoch": 0.6378559148394998, - "grad_norm": 0.7639998197555542, - "learning_rate": 1.5749465680724215e-05, - "loss": 0.3857, - "step": 6771 - }, - { - "epoch": 0.6379501189326676, - "grad_norm": 0.6975438594818115, - "learning_rate": 1.574823013202598e-05, - "loss": 0.2871, - "step": 6772 - }, - { - "epoch": 0.6380443230258355, - "grad_norm": 0.7081899642944336, - "learning_rate": 1.5746994452259206e-05, - "loss": 0.2871, - "step": 6773 - }, - { - "epoch": 0.6381385271190033, - "grad_norm": 0.8343057036399841, - "learning_rate": 1.5745758641452074e-05, - "loss": 0.3262, - "step": 6774 - }, - { - "epoch": 0.6382327312121712, - "grad_norm": 0.6873591542243958, - "learning_rate": 1.5744522699632757e-05, - "loss": 0.3026, - "step": 6775 - }, - { - "epoch": 0.638326935305339, - "grad_norm": 0.8080335259437561, - "learning_rate": 1.5743286626829437e-05, - "loss": 0.3114, - "step": 6776 - }, - { - "epoch": 0.6384211393985069, - "grad_norm": 0.8233070373535156, - "learning_rate": 1.57420504230703e-05, - "loss": 0.3154, - "step": 6777 - }, - { - "epoch": 0.6385153434916747, - "grad_norm": 0.8136635422706604, - "learning_rate": 1.574081408838354e-05, - "loss": 0.3472, - "step": 6778 - }, - { - "epoch": 0.6386095475848426, - "grad_norm": 0.7526406645774841, - "learning_rate": 1.5739577622797334e-05, - "loss": 0.3168, - "step": 6779 - }, - { - "epoch": 0.6387037516780104, - "grad_norm": 0.6697912216186523, - "learning_rate": 1.5738341026339882e-05, - "loss": 0.2986, - "step": 6780 - }, - { - "epoch": 0.6387979557711783, - "grad_norm": 0.8566330671310425, - "learning_rate": 1.573710429903938e-05, - "loss": 0.3363, - "step": 6781 - }, - { - "epoch": 0.6388921598643461, - "grad_norm": 0.8247984647750854, - "learning_rate": 1.5735867440924027e-05, - "loss": 0.3355, - "step": 6782 - }, - { - "epoch": 0.638986363957514, - "grad_norm": 0.7584102153778076, - "learning_rate": 1.5734630452022028e-05, - "loss": 0.3233, - "step": 6783 - }, - { - "epoch": 0.6390805680506818, - "grad_norm": 0.8046115040779114, - "learning_rate": 1.5733393332361585e-05, - "loss": 0.3161, - "step": 6784 - }, - { - "epoch": 0.6391747721438497, - "grad_norm": 0.631939172744751, - "learning_rate": 1.573215608197091e-05, - "loss": 0.2771, - "step": 6785 - }, - { - "epoch": 0.6392689762370175, - "grad_norm": 0.819452166557312, - "learning_rate": 1.5730918700878203e-05, - "loss": 0.3283, - "step": 6786 - }, - { - "epoch": 0.6393631803301854, - "grad_norm": 0.6071840524673462, - "learning_rate": 1.5729681189111694e-05, - "loss": 0.2443, - "step": 6787 - }, - { - "epoch": 0.6394573844233532, - "grad_norm": 1.0969040393829346, - "learning_rate": 1.5728443546699592e-05, - "loss": 0.3384, - "step": 6788 - }, - { - "epoch": 0.639551588516521, - "grad_norm": 0.7380397915840149, - "learning_rate": 1.5727205773670117e-05, - "loss": 0.343, - "step": 6789 - }, - { - "epoch": 0.6396457926096889, - "grad_norm": 0.7145038843154907, - "learning_rate": 1.572596787005149e-05, - "loss": 0.3134, - "step": 6790 - }, - { - "epoch": 0.6397399967028568, - "grad_norm": 0.854451060295105, - "learning_rate": 1.5724729835871945e-05, - "loss": 0.404, - "step": 6791 - }, - { - "epoch": 0.6398342007960246, - "grad_norm": 0.7254519462585449, - "learning_rate": 1.5723491671159703e-05, - "loss": 0.3069, - "step": 6792 - }, - { - "epoch": 0.6399284048891924, - "grad_norm": 0.7702884674072266, - "learning_rate": 1.5722253375943002e-05, - "loss": 0.3559, - "step": 6793 - }, - { - "epoch": 0.6400226089823603, - "grad_norm": 0.8042786717414856, - "learning_rate": 1.5721014950250074e-05, - "loss": 0.3222, - "step": 6794 - }, - { - "epoch": 0.6401168130755281, - "grad_norm": 0.726668119430542, - "learning_rate": 1.5719776394109152e-05, - "loss": 0.2788, - "step": 6795 - }, - { - "epoch": 0.640211017168696, - "grad_norm": 0.6974692940711975, - "learning_rate": 1.5718537707548488e-05, - "loss": 0.3105, - "step": 6796 - }, - { - "epoch": 0.6403052212618638, - "grad_norm": 0.6868124008178711, - "learning_rate": 1.5717298890596317e-05, - "loss": 0.3269, - "step": 6797 - }, - { - "epoch": 0.6403994253550317, - "grad_norm": 0.7715316414833069, - "learning_rate": 1.5716059943280896e-05, - "loss": 0.359, - "step": 6798 - }, - { - "epoch": 0.6404936294481995, - "grad_norm": 0.7154345512390137, - "learning_rate": 1.5714820865630462e-05, - "loss": 0.3162, - "step": 6799 - }, - { - "epoch": 0.6405878335413674, - "grad_norm": 0.7197099328041077, - "learning_rate": 1.5713581657673276e-05, - "loss": 0.283, - "step": 6800 - }, - { - "epoch": 0.6406820376345352, - "grad_norm": 0.7262243628501892, - "learning_rate": 1.5712342319437592e-05, - "loss": 0.327, - "step": 6801 - }, - { - "epoch": 0.6407762417277031, - "grad_norm": 0.7279204726219177, - "learning_rate": 1.571110285095167e-05, - "loss": 0.3027, - "step": 6802 - }, - { - "epoch": 0.6408704458208709, - "grad_norm": 0.8032323718070984, - "learning_rate": 1.5709863252243768e-05, - "loss": 0.349, - "step": 6803 - }, - { - "epoch": 0.6409646499140388, - "grad_norm": 0.7235321402549744, - "learning_rate": 1.5708623523342153e-05, - "loss": 0.3158, - "step": 6804 - }, - { - "epoch": 0.6410588540072066, - "grad_norm": 0.760461151599884, - "learning_rate": 1.5707383664275094e-05, - "loss": 0.3293, - "step": 6805 - }, - { - "epoch": 0.6411530581003745, - "grad_norm": 0.7263423800468445, - "learning_rate": 1.5706143675070862e-05, - "loss": 0.3249, - "step": 6806 - }, - { - "epoch": 0.6412472621935423, - "grad_norm": 0.6820230484008789, - "learning_rate": 1.5704903555757728e-05, - "loss": 0.3317, - "step": 6807 - }, - { - "epoch": 0.6413414662867102, - "grad_norm": 0.8332756161689758, - "learning_rate": 1.5703663306363976e-05, - "loss": 0.3476, - "step": 6808 - }, - { - "epoch": 0.641435670379878, - "grad_norm": 0.7073219418525696, - "learning_rate": 1.5702422926917872e-05, - "loss": 0.321, - "step": 6809 - }, - { - "epoch": 0.6415298744730459, - "grad_norm": 0.7357454895973206, - "learning_rate": 1.570118241744771e-05, - "loss": 0.2712, - "step": 6810 - }, - { - "epoch": 0.6416240785662137, - "grad_norm": 0.7738254070281982, - "learning_rate": 1.5699941777981772e-05, - "loss": 0.3165, - "step": 6811 - }, - { - "epoch": 0.6417182826593816, - "grad_norm": 0.669426441192627, - "learning_rate": 1.5698701008548343e-05, - "loss": 0.331, - "step": 6812 - }, - { - "epoch": 0.6418124867525494, - "grad_norm": 0.7154690623283386, - "learning_rate": 1.569746010917572e-05, - "loss": 0.2842, - "step": 6813 - }, - { - "epoch": 0.6419066908457173, - "grad_norm": 0.7300938963890076, - "learning_rate": 1.5696219079892198e-05, - "loss": 0.3314, - "step": 6814 - }, - { - "epoch": 0.6420008949388851, - "grad_norm": 0.7254599332809448, - "learning_rate": 1.5694977920726066e-05, - "loss": 0.3008, - "step": 6815 - }, - { - "epoch": 0.642095099032053, - "grad_norm": 0.7386945486068726, - "learning_rate": 1.5693736631705632e-05, - "loss": 0.2717, - "step": 6816 - }, - { - "epoch": 0.6421893031252208, - "grad_norm": 0.9841634631156921, - "learning_rate": 1.56924952128592e-05, - "loss": 0.2993, - "step": 6817 - }, - { - "epoch": 0.6422835072183887, - "grad_norm": 0.7504370808601379, - "learning_rate": 1.569125366421507e-05, - "loss": 0.3875, - "step": 6818 - }, - { - "epoch": 0.6423777113115565, - "grad_norm": 0.7812278866767883, - "learning_rate": 1.569001198580156e-05, - "loss": 0.3503, - "step": 6819 - }, - { - "epoch": 0.6424719154047244, - "grad_norm": 0.8756741881370544, - "learning_rate": 1.5688770177646972e-05, - "loss": 0.3034, - "step": 6820 - }, - { - "epoch": 0.6425661194978922, - "grad_norm": 0.7655273079872131, - "learning_rate": 1.5687528239779627e-05, - "loss": 0.3183, - "step": 6821 - }, - { - "epoch": 0.6426603235910601, - "grad_norm": 0.6566729545593262, - "learning_rate": 1.5686286172227844e-05, - "loss": 0.2956, - "step": 6822 - }, - { - "epoch": 0.6427545276842279, - "grad_norm": 0.7181766033172607, - "learning_rate": 1.568504397501994e-05, - "loss": 0.3117, - "step": 6823 - }, - { - "epoch": 0.6428487317773958, - "grad_norm": 0.7318626046180725, - "learning_rate": 1.568380164818424e-05, - "loss": 0.2907, - "step": 6824 - }, - { - "epoch": 0.6429429358705636, - "grad_norm": 0.8867868185043335, - "learning_rate": 1.5682559191749075e-05, - "loss": 0.3467, - "step": 6825 - }, - { - "epoch": 0.6430371399637315, - "grad_norm": 0.7642084360122681, - "learning_rate": 1.5681316605742773e-05, - "loss": 0.3273, - "step": 6826 - }, - { - "epoch": 0.6431313440568993, - "grad_norm": 0.7472430467605591, - "learning_rate": 1.5680073890193662e-05, - "loss": 0.3329, - "step": 6827 - }, - { - "epoch": 0.6432255481500672, - "grad_norm": 0.8168688416481018, - "learning_rate": 1.5678831045130086e-05, - "loss": 0.3099, - "step": 6828 - }, - { - "epoch": 0.643319752243235, - "grad_norm": 0.7869260311126709, - "learning_rate": 1.567758807058038e-05, - "loss": 0.2915, - "step": 6829 - }, - { - "epoch": 0.6434139563364029, - "grad_norm": 0.82233726978302, - "learning_rate": 1.5676344966572882e-05, - "loss": 0.3511, - "step": 6830 - }, - { - "epoch": 0.6435081604295707, - "grad_norm": 0.7085413336753845, - "learning_rate": 1.567510173313594e-05, - "loss": 0.2987, - "step": 6831 - }, - { - "epoch": 0.6436023645227386, - "grad_norm": 0.6723721027374268, - "learning_rate": 1.5673858370297906e-05, - "loss": 0.3143, - "step": 6832 - }, - { - "epoch": 0.6436965686159064, - "grad_norm": 0.7982920408248901, - "learning_rate": 1.5672614878087125e-05, - "loss": 0.3605, - "step": 6833 - }, - { - "epoch": 0.6437907727090743, - "grad_norm": 0.6995309591293335, - "learning_rate": 1.5671371256531952e-05, - "loss": 0.2919, - "step": 6834 - }, - { - "epoch": 0.6438849768022421, - "grad_norm": 0.7752536535263062, - "learning_rate": 1.567012750566074e-05, - "loss": 0.3161, - "step": 6835 - }, - { - "epoch": 0.64397918089541, - "grad_norm": 0.8362805843353271, - "learning_rate": 1.5668883625501856e-05, - "loss": 0.3332, - "step": 6836 - }, - { - "epoch": 0.6440733849885778, - "grad_norm": 0.8919989466667175, - "learning_rate": 1.5667639616083653e-05, - "loss": 0.3331, - "step": 6837 - }, - { - "epoch": 0.6441675890817457, - "grad_norm": 0.7835939526557922, - "learning_rate": 1.5666395477434508e-05, - "loss": 0.3112, - "step": 6838 - }, - { - "epoch": 0.6442617931749135, - "grad_norm": 0.7878260016441345, - "learning_rate": 1.566515120958278e-05, - "loss": 0.3195, - "step": 6839 - }, - { - "epoch": 0.6443559972680813, - "grad_norm": 0.7726975083351135, - "learning_rate": 1.5663906812556843e-05, - "loss": 0.3585, - "step": 6840 - }, - { - "epoch": 0.6444502013612492, - "grad_norm": 0.7149421572685242, - "learning_rate": 1.566266228638507e-05, - "loss": 0.3018, - "step": 6841 - }, - { - "epoch": 0.644544405454417, - "grad_norm": 0.7051762342453003, - "learning_rate": 1.566141763109584e-05, - "loss": 0.3098, - "step": 6842 - }, - { - "epoch": 0.6446386095475849, - "grad_norm": 0.8522613048553467, - "learning_rate": 1.5660172846717536e-05, - "loss": 0.3887, - "step": 6843 - }, - { - "epoch": 0.6447328136407527, - "grad_norm": 0.6800092458724976, - "learning_rate": 1.565892793327853e-05, - "loss": 0.3298, - "step": 6844 - }, - { - "epoch": 0.6448270177339206, - "grad_norm": 0.7485498189926147, - "learning_rate": 1.5657682890807225e-05, - "loss": 0.3623, - "step": 6845 - }, - { - "epoch": 0.6449212218270884, - "grad_norm": 0.6815838813781738, - "learning_rate": 1.5656437719331993e-05, - "loss": 0.3088, - "step": 6846 - }, - { - "epoch": 0.6450154259202562, - "grad_norm": 0.7152532935142517, - "learning_rate": 1.5655192418881235e-05, - "loss": 0.3026, - "step": 6847 - }, - { - "epoch": 0.645109630013424, - "grad_norm": 0.8499749302864075, - "learning_rate": 1.5653946989483345e-05, - "loss": 0.323, - "step": 6848 - }, - { - "epoch": 0.6452038341065919, - "grad_norm": 0.8016695380210876, - "learning_rate": 1.565270143116672e-05, - "loss": 0.2988, - "step": 6849 - }, - { - "epoch": 0.6452980381997597, - "grad_norm": 0.6921725869178772, - "learning_rate": 1.565145574395976e-05, - "loss": 0.2804, - "step": 6850 - }, - { - "epoch": 0.6453922422929276, - "grad_norm": 0.7418727874755859, - "learning_rate": 1.5650209927890868e-05, - "loss": 0.2879, - "step": 6851 - }, - { - "epoch": 0.6454864463860954, - "grad_norm": 0.8103009462356567, - "learning_rate": 1.564896398298845e-05, - "loss": 0.3066, - "step": 6852 - }, - { - "epoch": 0.6455806504792633, - "grad_norm": 0.7640576362609863, - "learning_rate": 1.564771790928092e-05, - "loss": 0.3625, - "step": 6853 - }, - { - "epoch": 0.6456748545724311, - "grad_norm": 0.9308742880821228, - "learning_rate": 1.5646471706796686e-05, - "loss": 0.3705, - "step": 6854 - }, - { - "epoch": 0.645769058665599, - "grad_norm": 0.7717334032058716, - "learning_rate": 1.5645225375564165e-05, - "loss": 0.3456, - "step": 6855 - }, - { - "epoch": 0.6458632627587668, - "grad_norm": 0.7640878558158875, - "learning_rate": 1.564397891561177e-05, - "loss": 0.3292, - "step": 6856 - }, - { - "epoch": 0.6459574668519347, - "grad_norm": 0.7348083853721619, - "learning_rate": 1.5642732326967934e-05, - "loss": 0.332, - "step": 6857 - }, - { - "epoch": 0.6460516709451025, - "grad_norm": 0.6975739002227783, - "learning_rate": 1.5641485609661073e-05, - "loss": 0.2983, - "step": 6858 - }, - { - "epoch": 0.6461458750382704, - "grad_norm": 0.768665611743927, - "learning_rate": 1.5640238763719614e-05, - "loss": 0.3287, - "step": 6859 - }, - { - "epoch": 0.6462400791314382, - "grad_norm": 0.8188949823379517, - "learning_rate": 1.563899178917199e-05, - "loss": 0.3534, - "step": 6860 - }, - { - "epoch": 0.6463342832246061, - "grad_norm": 0.8445432186126709, - "learning_rate": 1.563774468604663e-05, - "loss": 0.3449, - "step": 6861 - }, - { - "epoch": 0.6464284873177739, - "grad_norm": 0.8140099048614502, - "learning_rate": 1.5636497454371973e-05, - "loss": 0.3215, - "step": 6862 - }, - { - "epoch": 0.6465226914109418, - "grad_norm": 0.6855039000511169, - "learning_rate": 1.5635250094176456e-05, - "loss": 0.2879, - "step": 6863 - }, - { - "epoch": 0.6466168955041096, - "grad_norm": 0.7547051906585693, - "learning_rate": 1.5634002605488524e-05, - "loss": 0.3131, - "step": 6864 - }, - { - "epoch": 0.6467110995972775, - "grad_norm": 0.855553388595581, - "learning_rate": 1.563275498833662e-05, - "loss": 0.3354, - "step": 6865 - }, - { - "epoch": 0.6468053036904453, - "grad_norm": 0.6999885439872742, - "learning_rate": 1.5631507242749187e-05, - "loss": 0.3232, - "step": 6866 - }, - { - "epoch": 0.6468995077836132, - "grad_norm": 0.7532494068145752, - "learning_rate": 1.5630259368754682e-05, - "loss": 0.3198, - "step": 6867 - }, - { - "epoch": 0.646993711876781, - "grad_norm": 0.6313682794570923, - "learning_rate": 1.5629011366381556e-05, - "loss": 0.2997, - "step": 6868 - }, - { - "epoch": 0.6470879159699489, - "grad_norm": 0.8592627048492432, - "learning_rate": 1.5627763235658266e-05, - "loss": 0.3376, - "step": 6869 - }, - { - "epoch": 0.6471821200631167, - "grad_norm": 0.7356202006340027, - "learning_rate": 1.5626514976613273e-05, - "loss": 0.328, - "step": 6870 - }, - { - "epoch": 0.6472763241562846, - "grad_norm": 0.7478100061416626, - "learning_rate": 1.5625266589275032e-05, - "loss": 0.318, - "step": 6871 - }, - { - "epoch": 0.6473705282494524, - "grad_norm": 0.8141969442367554, - "learning_rate": 1.5624018073672016e-05, - "loss": 0.3171, - "step": 6872 - }, - { - "epoch": 0.6474647323426203, - "grad_norm": 0.7093797922134399, - "learning_rate": 1.5622769429832687e-05, - "loss": 0.3347, - "step": 6873 - }, - { - "epoch": 0.6475589364357881, - "grad_norm": 0.721733808517456, - "learning_rate": 1.5621520657785523e-05, - "loss": 0.3195, - "step": 6874 - }, - { - "epoch": 0.647653140528956, - "grad_norm": 0.7266020774841309, - "learning_rate": 1.5620271757558994e-05, - "loss": 0.3223, - "step": 6875 - }, - { - "epoch": 0.6477473446221238, - "grad_norm": 0.8292143940925598, - "learning_rate": 1.5619022729181575e-05, - "loss": 0.3082, - "step": 6876 - }, - { - "epoch": 0.6478415487152916, - "grad_norm": 0.7851315140724182, - "learning_rate": 1.5617773572681748e-05, - "loss": 0.3001, - "step": 6877 - }, - { - "epoch": 0.6479357528084595, - "grad_norm": 0.8517122864723206, - "learning_rate": 1.5616524288088e-05, - "loss": 0.3389, - "step": 6878 - }, - { - "epoch": 0.6480299569016273, - "grad_norm": 0.7251204252243042, - "learning_rate": 1.5615274875428807e-05, - "loss": 0.3061, - "step": 6879 - }, - { - "epoch": 0.6481241609947952, - "grad_norm": 0.9605844616889954, - "learning_rate": 1.5614025334732664e-05, - "loss": 0.3732, - "step": 6880 - }, - { - "epoch": 0.648218365087963, - "grad_norm": 0.7894840836524963, - "learning_rate": 1.561277566602806e-05, - "loss": 0.3382, - "step": 6881 - }, - { - "epoch": 0.6483125691811309, - "grad_norm": 0.7801871299743652, - "learning_rate": 1.561152586934349e-05, - "loss": 0.3377, - "step": 6882 - }, - { - "epoch": 0.6484067732742987, - "grad_norm": 0.7582337856292725, - "learning_rate": 1.5610275944707454e-05, - "loss": 0.3575, - "step": 6883 - }, - { - "epoch": 0.6485009773674666, - "grad_norm": 0.6954520344734192, - "learning_rate": 1.560902589214845e-05, - "loss": 0.2911, - "step": 6884 - }, - { - "epoch": 0.6485951814606344, - "grad_norm": 0.667131781578064, - "learning_rate": 1.560777571169498e-05, - "loss": 0.2855, - "step": 6885 - }, - { - "epoch": 0.6486893855538023, - "grad_norm": 0.7147957682609558, - "learning_rate": 1.560652540337555e-05, - "loss": 0.303, - "step": 6886 - }, - { - "epoch": 0.6487835896469701, - "grad_norm": 0.6967618465423584, - "learning_rate": 1.5605274967218672e-05, - "loss": 0.323, - "step": 6887 - }, - { - "epoch": 0.648877793740138, - "grad_norm": 0.7516948580741882, - "learning_rate": 1.5604024403252858e-05, - "loss": 0.3295, - "step": 6888 - }, - { - "epoch": 0.6489719978333058, - "grad_norm": 0.7380844354629517, - "learning_rate": 1.5602773711506617e-05, - "loss": 0.3091, - "step": 6889 - }, - { - "epoch": 0.6490662019264737, - "grad_norm": 0.6943092346191406, - "learning_rate": 1.5601522892008475e-05, - "loss": 0.3289, - "step": 6890 - }, - { - "epoch": 0.6491604060196415, - "grad_norm": 0.7739840745925903, - "learning_rate": 1.5600271944786944e-05, - "loss": 0.3311, - "step": 6891 - }, - { - "epoch": 0.6492546101128094, - "grad_norm": 0.9014347195625305, - "learning_rate": 1.559902086987055e-05, - "loss": 0.359, - "step": 6892 - }, - { - "epoch": 0.6493488142059772, - "grad_norm": 0.632686197757721, - "learning_rate": 1.5597769667287826e-05, - "loss": 0.3407, - "step": 6893 - }, - { - "epoch": 0.6494430182991451, - "grad_norm": 0.7913288474082947, - "learning_rate": 1.5596518337067293e-05, - "loss": 0.3465, - "step": 6894 - }, - { - "epoch": 0.6495372223923129, - "grad_norm": 0.6992575526237488, - "learning_rate": 1.5595266879237492e-05, - "loss": 0.3417, - "step": 6895 - }, - { - "epoch": 0.6496314264854808, - "grad_norm": 0.7911957502365112, - "learning_rate": 1.5594015293826945e-05, - "loss": 0.3579, - "step": 6896 - }, - { - "epoch": 0.6497256305786486, - "grad_norm": 0.6819725036621094, - "learning_rate": 1.5592763580864204e-05, - "loss": 0.3191, - "step": 6897 - }, - { - "epoch": 0.6498198346718165, - "grad_norm": 0.6952462196350098, - "learning_rate": 1.5591511740377802e-05, - "loss": 0.29, - "step": 6898 - }, - { - "epoch": 0.6499140387649843, - "grad_norm": 0.7029697895050049, - "learning_rate": 1.5590259772396287e-05, - "loss": 0.3408, - "step": 6899 - }, - { - "epoch": 0.6500082428581522, - "grad_norm": 0.7309373617172241, - "learning_rate": 1.55890076769482e-05, - "loss": 0.3427, - "step": 6900 - }, - { - "epoch": 0.65010244695132, - "grad_norm": 0.6198153495788574, - "learning_rate": 1.5587755454062095e-05, - "loss": 0.2921, - "step": 6901 - }, - { - "epoch": 0.6501966510444879, - "grad_norm": 0.722285270690918, - "learning_rate": 1.5586503103766526e-05, - "loss": 0.3019, - "step": 6902 - }, - { - "epoch": 0.6502908551376557, - "grad_norm": 0.5886370539665222, - "learning_rate": 1.5585250626090045e-05, - "loss": 0.2599, - "step": 6903 - }, - { - "epoch": 0.6503850592308236, - "grad_norm": 0.8670148849487305, - "learning_rate": 1.5583998021061213e-05, - "loss": 0.3129, - "step": 6904 - }, - { - "epoch": 0.6504792633239914, - "grad_norm": 0.7227097749710083, - "learning_rate": 1.558274528870859e-05, - "loss": 0.3129, - "step": 6905 - }, - { - "epoch": 0.6505734674171593, - "grad_norm": 0.7777684926986694, - "learning_rate": 1.558149242906074e-05, - "loss": 0.3186, - "step": 6906 - }, - { - "epoch": 0.6506676715103271, - "grad_norm": 0.7884363532066345, - "learning_rate": 1.558023944214623e-05, - "loss": 0.3167, - "step": 6907 - }, - { - "epoch": 0.650761875603495, - "grad_norm": 0.7935061454772949, - "learning_rate": 1.5578986327993633e-05, - "loss": 0.2878, - "step": 6908 - }, - { - "epoch": 0.6508560796966628, - "grad_norm": 0.6717670559883118, - "learning_rate": 1.557773308663152e-05, - "loss": 0.3035, - "step": 6909 - }, - { - "epoch": 0.6509502837898307, - "grad_norm": 0.6283624768257141, - "learning_rate": 1.5576479718088466e-05, - "loss": 0.3054, - "step": 6910 - }, - { - "epoch": 0.6510444878829985, - "grad_norm": 0.7556511759757996, - "learning_rate": 1.5575226222393048e-05, - "loss": 0.3377, - "step": 6911 - }, - { - "epoch": 0.6511386919761664, - "grad_norm": 0.7569495439529419, - "learning_rate": 1.557397259957385e-05, - "loss": 0.2915, - "step": 6912 - }, - { - "epoch": 0.6512328960693342, - "grad_norm": 0.7043121457099915, - "learning_rate": 1.5572718849659458e-05, - "loss": 0.3003, - "step": 6913 - }, - { - "epoch": 0.6513271001625021, - "grad_norm": 0.6409788727760315, - "learning_rate": 1.5571464972678457e-05, - "loss": 0.3125, - "step": 6914 - }, - { - "epoch": 0.6514213042556699, - "grad_norm": 0.796539843082428, - "learning_rate": 1.557021096865944e-05, - "loss": 0.3095, - "step": 6915 - }, - { - "epoch": 0.6515155083488378, - "grad_norm": 0.7086146473884583, - "learning_rate": 1.5568956837630996e-05, - "loss": 0.3483, - "step": 6916 - }, - { - "epoch": 0.6516097124420056, - "grad_norm": 1.0475225448608398, - "learning_rate": 1.5567702579621724e-05, - "loss": 0.3078, - "step": 6917 - }, - { - "epoch": 0.6517039165351735, - "grad_norm": 0.8035339117050171, - "learning_rate": 1.5566448194660225e-05, - "loss": 0.3203, - "step": 6918 - }, - { - "epoch": 0.6517981206283413, - "grad_norm": 0.6819237470626831, - "learning_rate": 1.5565193682775097e-05, - "loss": 0.3577, - "step": 6919 - }, - { - "epoch": 0.6518923247215092, - "grad_norm": 0.6923936009407043, - "learning_rate": 1.5563939043994944e-05, - "loss": 0.326, - "step": 6920 - }, - { - "epoch": 0.651986528814677, - "grad_norm": 0.9624119997024536, - "learning_rate": 1.5562684278348378e-05, - "loss": 0.324, - "step": 6921 - }, - { - "epoch": 0.6520807329078449, - "grad_norm": 0.7303101420402527, - "learning_rate": 1.5561429385864005e-05, - "loss": 0.3153, - "step": 6922 - }, - { - "epoch": 0.6521749370010127, - "grad_norm": 0.7050034403800964, - "learning_rate": 1.5560174366570448e-05, - "loss": 0.2999, - "step": 6923 - }, - { - "epoch": 0.6522691410941805, - "grad_norm": 0.6949707865715027, - "learning_rate": 1.555891922049631e-05, - "loss": 0.3143, - "step": 6924 - }, - { - "epoch": 0.6523633451873484, - "grad_norm": 0.7842676043510437, - "learning_rate": 1.555766394767022e-05, - "loss": 0.3172, - "step": 6925 - }, - { - "epoch": 0.6524575492805162, - "grad_norm": 0.8317732214927673, - "learning_rate": 1.5556408548120794e-05, - "loss": 0.3373, - "step": 6926 - }, - { - "epoch": 0.6525517533736841, - "grad_norm": 0.7175168991088867, - "learning_rate": 1.555515302187666e-05, - "loss": 0.345, - "step": 6927 - }, - { - "epoch": 0.652645957466852, - "grad_norm": 0.7620663046836853, - "learning_rate": 1.555389736896645e-05, - "loss": 0.3163, - "step": 6928 - }, - { - "epoch": 0.6527401615600198, - "grad_norm": 0.6921946406364441, - "learning_rate": 1.555264158941879e-05, - "loss": 0.3488, - "step": 6929 - }, - { - "epoch": 0.6528343656531876, - "grad_norm": 0.7068236470222473, - "learning_rate": 1.555138568326231e-05, - "loss": 0.3179, - "step": 6930 - }, - { - "epoch": 0.6529285697463555, - "grad_norm": 1.0250000953674316, - "learning_rate": 1.5550129650525655e-05, - "loss": 0.3019, - "step": 6931 - }, - { - "epoch": 0.6530227738395233, - "grad_norm": 0.8708575367927551, - "learning_rate": 1.5548873491237458e-05, - "loss": 0.2934, - "step": 6932 - }, - { - "epoch": 0.6531169779326912, - "grad_norm": 0.6972542405128479, - "learning_rate": 1.5547617205426367e-05, - "loss": 0.2928, - "step": 6933 - }, - { - "epoch": 0.653211182025859, - "grad_norm": 0.7019140720367432, - "learning_rate": 1.554636079312102e-05, - "loss": 0.335, - "step": 6934 - }, - { - "epoch": 0.6533053861190269, - "grad_norm": 0.6595550179481506, - "learning_rate": 1.5545104254350074e-05, - "loss": 0.3089, - "step": 6935 - }, - { - "epoch": 0.6533995902121947, - "grad_norm": 0.893386960029602, - "learning_rate": 1.5543847589142173e-05, - "loss": 0.3344, - "step": 6936 - }, - { - "epoch": 0.6534937943053626, - "grad_norm": 0.758198082447052, - "learning_rate": 1.554259079752597e-05, - "loss": 0.3077, - "step": 6937 - }, - { - "epoch": 0.6535879983985304, - "grad_norm": 0.8025262355804443, - "learning_rate": 1.5541333879530132e-05, - "loss": 0.3256, - "step": 6938 - }, - { - "epoch": 0.6536822024916983, - "grad_norm": 0.9499841928482056, - "learning_rate": 1.5540076835183307e-05, - "loss": 0.3883, - "step": 6939 - }, - { - "epoch": 0.6537764065848661, - "grad_norm": 0.6841041445732117, - "learning_rate": 1.5538819664514165e-05, - "loss": 0.2785, - "step": 6940 - }, - { - "epoch": 0.653870610678034, - "grad_norm": 0.8491919040679932, - "learning_rate": 1.5537562367551365e-05, - "loss": 0.3484, - "step": 6941 - }, - { - "epoch": 0.6539648147712018, - "grad_norm": 0.7580857872962952, - "learning_rate": 1.553630494432358e-05, - "loss": 0.3264, - "step": 6942 - }, - { - "epoch": 0.6540590188643697, - "grad_norm": 0.7019778490066528, - "learning_rate": 1.553504739485948e-05, - "loss": 0.2818, - "step": 6943 - }, - { - "epoch": 0.6541532229575375, - "grad_norm": 0.7552631497383118, - "learning_rate": 1.553378971918774e-05, - "loss": 0.3703, - "step": 6944 - }, - { - "epoch": 0.6542474270507054, - "grad_norm": 0.7579901218414307, - "learning_rate": 1.553253191733704e-05, - "loss": 0.347, - "step": 6945 - }, - { - "epoch": 0.6543416311438732, - "grad_norm": 0.7338920831680298, - "learning_rate": 1.5531273989336052e-05, - "loss": 0.32, - "step": 6946 - }, - { - "epoch": 0.6544358352370411, - "grad_norm": 0.6904371976852417, - "learning_rate": 1.553001593521346e-05, - "loss": 0.3305, - "step": 6947 - }, - { - "epoch": 0.6545300393302089, - "grad_norm": 0.7427716255187988, - "learning_rate": 1.5528757754997957e-05, - "loss": 0.3337, - "step": 6948 - }, - { - "epoch": 0.6546242434233768, - "grad_norm": 0.7259464263916016, - "learning_rate": 1.5527499448718225e-05, - "loss": 0.2932, - "step": 6949 - }, - { - "epoch": 0.6547184475165446, - "grad_norm": 0.6885246634483337, - "learning_rate": 1.5526241016402962e-05, - "loss": 0.3193, - "step": 6950 - }, - { - "epoch": 0.6548126516097125, - "grad_norm": 0.8501678705215454, - "learning_rate": 1.552498245808085e-05, - "loss": 0.3212, - "step": 6951 - }, - { - "epoch": 0.6549068557028803, - "grad_norm": 0.7948251962661743, - "learning_rate": 1.5523723773780597e-05, - "loss": 0.35, - "step": 6952 - }, - { - "epoch": 0.6550010597960482, - "grad_norm": 0.7340381741523743, - "learning_rate": 1.55224649635309e-05, - "loss": 0.3224, - "step": 6953 - }, - { - "epoch": 0.655095263889216, - "grad_norm": 0.886984646320343, - "learning_rate": 1.5521206027360458e-05, - "loss": 0.3283, - "step": 6954 - }, - { - "epoch": 0.6551894679823839, - "grad_norm": 0.8832926154136658, - "learning_rate": 1.5519946965297984e-05, - "loss": 0.3545, - "step": 6955 - }, - { - "epoch": 0.6552836720755517, - "grad_norm": 0.6827700138092041, - "learning_rate": 1.551868777737218e-05, - "loss": 0.3008, - "step": 6956 - }, - { - "epoch": 0.6553778761687196, - "grad_norm": 0.8592090010643005, - "learning_rate": 1.551742846361176e-05, - "loss": 0.3598, - "step": 6957 - }, - { - "epoch": 0.6554720802618874, - "grad_norm": 0.7674247622489929, - "learning_rate": 1.5516169024045437e-05, - "loss": 0.313, - "step": 6958 - }, - { - "epoch": 0.6555662843550553, - "grad_norm": 0.9212135076522827, - "learning_rate": 1.551490945870193e-05, - "loss": 0.3673, - "step": 6959 - }, - { - "epoch": 0.6556604884482231, - "grad_norm": 0.7794392108917236, - "learning_rate": 1.5513649767609962e-05, - "loss": 0.3913, - "step": 6960 - }, - { - "epoch": 0.655754692541391, - "grad_norm": 0.690242350101471, - "learning_rate": 1.5512389950798248e-05, - "loss": 0.3117, - "step": 6961 - }, - { - "epoch": 0.6558488966345588, - "grad_norm": 0.7565612196922302, - "learning_rate": 1.551113000829552e-05, - "loss": 0.321, - "step": 6962 - }, - { - "epoch": 0.6559431007277267, - "grad_norm": 0.750796377658844, - "learning_rate": 1.55098699401305e-05, - "loss": 0.3157, - "step": 6963 - }, - { - "epoch": 0.6560373048208945, - "grad_norm": 0.7473530769348145, - "learning_rate": 1.550860974633193e-05, - "loss": 0.3469, - "step": 6964 - }, - { - "epoch": 0.6561315089140624, - "grad_norm": 0.7537606954574585, - "learning_rate": 1.550734942692854e-05, - "loss": 0.351, - "step": 6965 - }, - { - "epoch": 0.6562257130072302, - "grad_norm": 0.7087019085884094, - "learning_rate": 1.550608898194906e-05, - "loss": 0.3084, - "step": 6966 - }, - { - "epoch": 0.656319917100398, - "grad_norm": 0.6352601647377014, - "learning_rate": 1.5504828411422237e-05, - "loss": 0.2826, - "step": 6967 - }, - { - "epoch": 0.6564141211935659, - "grad_norm": 0.81820148229599, - "learning_rate": 1.550356771537682e-05, - "loss": 0.2954, - "step": 6968 - }, - { - "epoch": 0.6565083252867338, - "grad_norm": 0.6734491586685181, - "learning_rate": 1.550230689384154e-05, - "loss": 0.2805, - "step": 6969 - }, - { - "epoch": 0.6566025293799016, - "grad_norm": 0.6242235898971558, - "learning_rate": 1.550104594684515e-05, - "loss": 0.2695, - "step": 6970 - }, - { - "epoch": 0.6566967334730694, - "grad_norm": 0.6996893286705017, - "learning_rate": 1.549978487441641e-05, - "loss": 0.2763, - "step": 6971 - }, - { - "epoch": 0.6567909375662373, - "grad_norm": 0.766075611114502, - "learning_rate": 1.549852367658407e-05, - "loss": 0.3404, - "step": 6972 - }, - { - "epoch": 0.6568851416594051, - "grad_norm": 0.8189030289649963, - "learning_rate": 1.5497262353376888e-05, - "loss": 0.3535, - "step": 6973 - }, - { - "epoch": 0.656979345752573, - "grad_norm": 0.8577380180358887, - "learning_rate": 1.5496000904823622e-05, - "loss": 0.3258, - "step": 6974 - }, - { - "epoch": 0.6570735498457408, - "grad_norm": 0.8298494815826416, - "learning_rate": 1.5494739330953034e-05, - "loss": 0.3608, - "step": 6975 - }, - { - "epoch": 0.6571677539389087, - "grad_norm": 0.7967024445533752, - "learning_rate": 1.5493477631793893e-05, - "loss": 0.3569, - "step": 6976 - }, - { - "epoch": 0.6572619580320765, - "grad_norm": 0.7760042548179626, - "learning_rate": 1.549221580737496e-05, - "loss": 0.3331, - "step": 6977 - }, - { - "epoch": 0.6573561621252444, - "grad_norm": 0.7782050967216492, - "learning_rate": 1.5490953857725023e-05, - "loss": 0.3272, - "step": 6978 - }, - { - "epoch": 0.6574503662184122, - "grad_norm": 0.6885071396827698, - "learning_rate": 1.5489691782872838e-05, - "loss": 0.3077, - "step": 6979 - }, - { - "epoch": 0.6575445703115801, - "grad_norm": 0.815770149230957, - "learning_rate": 1.5488429582847194e-05, - "loss": 0.3228, - "step": 6980 - }, - { - "epoch": 0.6576387744047479, - "grad_norm": 0.7214744687080383, - "learning_rate": 1.5487167257676868e-05, - "loss": 0.2933, - "step": 6981 - }, - { - "epoch": 0.6577329784979158, - "grad_norm": 0.7750257849693298, - "learning_rate": 1.5485904807390638e-05, - "loss": 0.3294, - "step": 6982 - }, - { - "epoch": 0.6578271825910836, - "grad_norm": 0.6970694065093994, - "learning_rate": 1.54846422320173e-05, - "loss": 0.3188, - "step": 6983 - }, - { - "epoch": 0.6579213866842515, - "grad_norm": 0.8175873160362244, - "learning_rate": 1.5483379531585634e-05, - "loss": 0.3047, - "step": 6984 - }, - { - "epoch": 0.6580155907774192, - "grad_norm": 0.6365392804145813, - "learning_rate": 1.5482116706124435e-05, - "loss": 0.3088, - "step": 6985 - }, - { - "epoch": 0.6581097948705871, - "grad_norm": 0.8470643162727356, - "learning_rate": 1.5480853755662498e-05, - "loss": 0.3328, - "step": 6986 - }, - { - "epoch": 0.6582039989637549, - "grad_norm": 0.6874431371688843, - "learning_rate": 1.5479590680228612e-05, - "loss": 0.2949, - "step": 6987 - }, - { - "epoch": 0.6582982030569228, - "grad_norm": 0.7445035576820374, - "learning_rate": 1.5478327479851592e-05, - "loss": 0.3493, - "step": 6988 - }, - { - "epoch": 0.6583924071500906, - "grad_norm": 0.6998085975646973, - "learning_rate": 1.5477064154560232e-05, - "loss": 0.3309, - "step": 6989 - }, - { - "epoch": 0.6584866112432585, - "grad_norm": 0.7649194002151489, - "learning_rate": 1.5475800704383338e-05, - "loss": 0.3242, - "step": 6990 - }, - { - "epoch": 0.6585808153364263, - "grad_norm": 0.687041163444519, - "learning_rate": 1.547453712934972e-05, - "loss": 0.298, - "step": 6991 - }, - { - "epoch": 0.6586750194295942, - "grad_norm": 0.8076814413070679, - "learning_rate": 1.5473273429488187e-05, - "loss": 0.329, - "step": 6992 - }, - { - "epoch": 0.658769223522762, - "grad_norm": 0.8376584649085999, - "learning_rate": 1.5472009604827557e-05, - "loss": 0.3438, - "step": 6993 - }, - { - "epoch": 0.6588634276159299, - "grad_norm": 0.9204269647598267, - "learning_rate": 1.5470745655396643e-05, - "loss": 0.3012, - "step": 6994 - }, - { - "epoch": 0.6589576317090977, - "grad_norm": 0.653154194355011, - "learning_rate": 1.5469481581224274e-05, - "loss": 0.2979, - "step": 6995 - }, - { - "epoch": 0.6590518358022656, - "grad_norm": 0.8088932633399963, - "learning_rate": 1.5468217382339256e-05, - "loss": 0.3347, - "step": 6996 - }, - { - "epoch": 0.6591460398954334, - "grad_norm": 0.8216959238052368, - "learning_rate": 1.546695305877043e-05, - "loss": 0.3401, - "step": 6997 - }, - { - "epoch": 0.6592402439886013, - "grad_norm": 0.737947404384613, - "learning_rate": 1.546568861054662e-05, - "loss": 0.3203, - "step": 6998 - }, - { - "epoch": 0.6593344480817691, - "grad_norm": 0.7853481769561768, - "learning_rate": 1.5464424037696655e-05, - "loss": 0.3525, - "step": 6999 - }, - { - "epoch": 0.659428652174937, - "grad_norm": 0.7092281579971313, - "learning_rate": 1.5463159340249377e-05, - "loss": 0.3131, - "step": 7000 - }, - { - "epoch": 0.6595228562681048, - "grad_norm": 0.7580530643463135, - "learning_rate": 1.546189451823361e-05, - "loss": 0.3227, - "step": 7001 - }, - { - "epoch": 0.6596170603612727, - "grad_norm": 0.7429535984992981, - "learning_rate": 1.5460629571678205e-05, - "loss": 0.3315, - "step": 7002 - }, - { - "epoch": 0.6597112644544405, - "grad_norm": 0.7682792544364929, - "learning_rate": 1.5459364500612e-05, - "loss": 0.3303, - "step": 7003 - }, - { - "epoch": 0.6598054685476084, - "grad_norm": 0.6755010485649109, - "learning_rate": 1.545809930506384e-05, - "loss": 0.2796, - "step": 7004 - }, - { - "epoch": 0.6598996726407762, - "grad_norm": 0.776313304901123, - "learning_rate": 1.5456833985062574e-05, - "loss": 0.3469, - "step": 7005 - }, - { - "epoch": 0.659993876733944, - "grad_norm": 0.7985623478889465, - "learning_rate": 1.5455568540637055e-05, - "loss": 0.3586, - "step": 7006 - }, - { - "epoch": 0.6600880808271119, - "grad_norm": 0.7103636264801025, - "learning_rate": 1.5454302971816138e-05, - "loss": 0.3139, - "step": 7007 - }, - { - "epoch": 0.6601822849202797, - "grad_norm": 0.7411162853240967, - "learning_rate": 1.5453037278628676e-05, - "loss": 0.3214, - "step": 7008 - }, - { - "epoch": 0.6602764890134476, - "grad_norm": 0.7773886322975159, - "learning_rate": 1.545177146110353e-05, - "loss": 0.3126, - "step": 7009 - }, - { - "epoch": 0.6603706931066154, - "grad_norm": 0.7389939427375793, - "learning_rate": 1.5450505519269568e-05, - "loss": 0.3177, - "step": 7010 - }, - { - "epoch": 0.6604648971997833, - "grad_norm": 0.7481390833854675, - "learning_rate": 1.544923945315565e-05, - "loss": 0.298, - "step": 7011 - }, - { - "epoch": 0.6605591012929511, - "grad_norm": 0.699995756149292, - "learning_rate": 1.5447973262790638e-05, - "loss": 0.3108, - "step": 7012 - }, - { - "epoch": 0.660653305386119, - "grad_norm": 0.7394351959228516, - "learning_rate": 1.5446706948203415e-05, - "loss": 0.3307, - "step": 7013 - }, - { - "epoch": 0.6607475094792868, - "grad_norm": 0.7475301623344421, - "learning_rate": 1.544544050942285e-05, - "loss": 0.3158, - "step": 7014 - }, - { - "epoch": 0.6608417135724547, - "grad_norm": 0.8901613354682922, - "learning_rate": 1.544417394647782e-05, - "loss": 0.3282, - "step": 7015 - }, - { - "epoch": 0.6609359176656225, - "grad_norm": 0.72362220287323, - "learning_rate": 1.5442907259397203e-05, - "loss": 0.3231, - "step": 7016 - }, - { - "epoch": 0.6610301217587904, - "grad_norm": 0.7319814562797546, - "learning_rate": 1.5441640448209884e-05, - "loss": 0.36, - "step": 7017 - }, - { - "epoch": 0.6611243258519582, - "grad_norm": 0.7211571335792542, - "learning_rate": 1.544037351294475e-05, - "loss": 0.3455, - "step": 7018 - }, - { - "epoch": 0.6612185299451261, - "grad_norm": 0.6436552405357361, - "learning_rate": 1.5439106453630683e-05, - "loss": 0.2964, - "step": 7019 - }, - { - "epoch": 0.6613127340382939, - "grad_norm": 0.6992090940475464, - "learning_rate": 1.5437839270296575e-05, - "loss": 0.3117, - "step": 7020 - }, - { - "epoch": 0.6614069381314618, - "grad_norm": 0.7446430921554565, - "learning_rate": 1.5436571962971325e-05, - "loss": 0.3103, - "step": 7021 - }, - { - "epoch": 0.6615011422246296, - "grad_norm": 0.8667694330215454, - "learning_rate": 1.5435304531683827e-05, - "loss": 0.299, - "step": 7022 - }, - { - "epoch": 0.6615953463177975, - "grad_norm": 0.8172776699066162, - "learning_rate": 1.5434036976462977e-05, - "loss": 0.3013, - "step": 7023 - }, - { - "epoch": 0.6616895504109653, - "grad_norm": 0.8228224515914917, - "learning_rate": 1.543276929733768e-05, - "loss": 0.3266, - "step": 7024 - }, - { - "epoch": 0.6617837545041332, - "grad_norm": 0.8509236574172974, - "learning_rate": 1.5431501494336843e-05, - "loss": 0.3442, - "step": 7025 - }, - { - "epoch": 0.661877958597301, - "grad_norm": 0.7069663405418396, - "learning_rate": 1.5430233567489375e-05, - "loss": 0.279, - "step": 7026 - }, - { - "epoch": 0.6619721626904689, - "grad_norm": 0.7457709908485413, - "learning_rate": 1.5428965516824178e-05, - "loss": 0.3255, - "step": 7027 - }, - { - "epoch": 0.6620663667836367, - "grad_norm": 0.8237126469612122, - "learning_rate": 1.5427697342370175e-05, - "loss": 0.3547, - "step": 7028 - }, - { - "epoch": 0.6621605708768046, - "grad_norm": 0.9309609532356262, - "learning_rate": 1.5426429044156276e-05, - "loss": 0.2992, - "step": 7029 - }, - { - "epoch": 0.6622547749699724, - "grad_norm": 0.7035329341888428, - "learning_rate": 1.5425160622211402e-05, - "loss": 0.3505, - "step": 7030 - }, - { - "epoch": 0.6623489790631403, - "grad_norm": 0.6925642490386963, - "learning_rate": 1.542389207656448e-05, - "loss": 0.3032, - "step": 7031 - }, - { - "epoch": 0.6624431831563081, - "grad_norm": 0.8220372200012207, - "learning_rate": 1.5422623407244425e-05, - "loss": 0.3311, - "step": 7032 - }, - { - "epoch": 0.662537387249476, - "grad_norm": 0.7475167512893677, - "learning_rate": 1.5421354614280174e-05, - "loss": 0.2834, - "step": 7033 - }, - { - "epoch": 0.6626315913426438, - "grad_norm": 0.7504574060440063, - "learning_rate": 1.542008569770065e-05, - "loss": 0.2699, - "step": 7034 - }, - { - "epoch": 0.6627257954358117, - "grad_norm": 0.6945213079452515, - "learning_rate": 1.5418816657534793e-05, - "loss": 0.3236, - "step": 7035 - }, - { - "epoch": 0.6628199995289795, - "grad_norm": 0.9477180242538452, - "learning_rate": 1.5417547493811533e-05, - "loss": 0.3594, - "step": 7036 - }, - { - "epoch": 0.6629142036221474, - "grad_norm": 0.7390096783638, - "learning_rate": 1.5416278206559816e-05, - "loss": 0.365, - "step": 7037 - }, - { - "epoch": 0.6630084077153152, - "grad_norm": 0.7738800048828125, - "learning_rate": 1.5415008795808578e-05, - "loss": 0.3057, - "step": 7038 - }, - { - "epoch": 0.6631026118084831, - "grad_norm": 0.7573422193527222, - "learning_rate": 1.541373926158676e-05, - "loss": 0.3143, - "step": 7039 - }, - { - "epoch": 0.6631968159016509, - "grad_norm": 0.6920155882835388, - "learning_rate": 1.541246960392332e-05, - "loss": 0.2913, - "step": 7040 - }, - { - "epoch": 0.6632910199948188, - "grad_norm": 0.8231220245361328, - "learning_rate": 1.5411199822847202e-05, - "loss": 0.318, - "step": 7041 - }, - { - "epoch": 0.6633852240879866, - "grad_norm": 0.7004749774932861, - "learning_rate": 1.5409929918387357e-05, - "loss": 0.284, - "step": 7042 - }, - { - "epoch": 0.6634794281811545, - "grad_norm": 0.7746490240097046, - "learning_rate": 1.5408659890572746e-05, - "loss": 0.3166, - "step": 7043 - }, - { - "epoch": 0.6635736322743223, - "grad_norm": 0.8276163935661316, - "learning_rate": 1.540738973943232e-05, - "loss": 0.3065, - "step": 7044 - }, - { - "epoch": 0.6636678363674902, - "grad_norm": 0.6804275512695312, - "learning_rate": 1.540611946499505e-05, - "loss": 0.2782, - "step": 7045 - }, - { - "epoch": 0.663762040460658, - "grad_norm": 0.9253482818603516, - "learning_rate": 1.5404849067289896e-05, - "loss": 0.3574, - "step": 7046 - }, - { - "epoch": 0.6638562445538259, - "grad_norm": 0.7378069162368774, - "learning_rate": 1.540357854634582e-05, - "loss": 0.3055, - "step": 7047 - }, - { - "epoch": 0.6639504486469937, - "grad_norm": 0.7682768106460571, - "learning_rate": 1.5402307902191803e-05, - "loss": 0.3325, - "step": 7048 - }, - { - "epoch": 0.6640446527401616, - "grad_norm": 0.7268179059028625, - "learning_rate": 1.54010371348568e-05, - "loss": 0.2796, - "step": 7049 - }, - { - "epoch": 0.6641388568333294, - "grad_norm": 0.8494754433631897, - "learning_rate": 1.5399766244369806e-05, - "loss": 0.282, - "step": 7050 - }, - { - "epoch": 0.6642330609264973, - "grad_norm": 0.7781804800033569, - "learning_rate": 1.5398495230759793e-05, - "loss": 0.3255, - "step": 7051 - }, - { - "epoch": 0.6643272650196651, - "grad_norm": 0.6691144108772278, - "learning_rate": 1.5397224094055732e-05, - "loss": 0.2812, - "step": 7052 - }, - { - "epoch": 0.664421469112833, - "grad_norm": 0.784238338470459, - "learning_rate": 1.539595283428662e-05, - "loss": 0.3432, - "step": 7053 - }, - { - "epoch": 0.6645156732060008, - "grad_norm": 0.7589924335479736, - "learning_rate": 1.5394681451481437e-05, - "loss": 0.3392, - "step": 7054 - }, - { - "epoch": 0.6646098772991686, - "grad_norm": 0.6793816089630127, - "learning_rate": 1.5393409945669177e-05, - "loss": 0.3015, - "step": 7055 - }, - { - "epoch": 0.6647040813923365, - "grad_norm": 0.7202833890914917, - "learning_rate": 1.5392138316878826e-05, - "loss": 0.3121, - "step": 7056 - }, - { - "epoch": 0.6647982854855043, - "grad_norm": 0.8880021572113037, - "learning_rate": 1.539086656513938e-05, - "loss": 0.2888, - "step": 7057 - }, - { - "epoch": 0.6648924895786722, - "grad_norm": 0.7612547278404236, - "learning_rate": 1.538959469047984e-05, - "loss": 0.3234, - "step": 7058 - }, - { - "epoch": 0.66498669367184, - "grad_norm": 0.8277795314788818, - "learning_rate": 1.5388322692929207e-05, - "loss": 0.3174, - "step": 7059 - }, - { - "epoch": 0.6650808977650079, - "grad_norm": 0.8115323781967163, - "learning_rate": 1.5387050572516488e-05, - "loss": 0.2976, - "step": 7060 - }, - { - "epoch": 0.6651751018581757, - "grad_norm": 0.7044195532798767, - "learning_rate": 1.5385778329270676e-05, - "loss": 0.3068, - "step": 7061 - }, - { - "epoch": 0.6652693059513436, - "grad_norm": 0.9168974161148071, - "learning_rate": 1.5384505963220794e-05, - "loss": 0.3771, - "step": 7062 - }, - { - "epoch": 0.6653635100445114, - "grad_norm": 0.7239624857902527, - "learning_rate": 1.5383233474395848e-05, - "loss": 0.3074, - "step": 7063 - }, - { - "epoch": 0.6654577141376793, - "grad_norm": 0.7223944664001465, - "learning_rate": 1.5381960862824853e-05, - "loss": 0.3603, - "step": 7064 - }, - { - "epoch": 0.6655519182308471, - "grad_norm": 0.7156306505203247, - "learning_rate": 1.5380688128536827e-05, - "loss": 0.3039, - "step": 7065 - }, - { - "epoch": 0.665646122324015, - "grad_norm": 0.8616565465927124, - "learning_rate": 1.5379415271560794e-05, - "loss": 0.3457, - "step": 7066 - }, - { - "epoch": 0.6657403264171828, - "grad_norm": 0.8814892172813416, - "learning_rate": 1.5378142291925768e-05, - "loss": 0.3634, - "step": 7067 - }, - { - "epoch": 0.6658345305103507, - "grad_norm": 0.8556869626045227, - "learning_rate": 1.5376869189660784e-05, - "loss": 0.3858, - "step": 7068 - }, - { - "epoch": 0.6659287346035185, - "grad_norm": 0.758821964263916, - "learning_rate": 1.5375595964794862e-05, - "loss": 0.3542, - "step": 7069 - }, - { - "epoch": 0.6660229386966864, - "grad_norm": 0.915661096572876, - "learning_rate": 1.5374322617357046e-05, - "loss": 0.3306, - "step": 7070 - }, - { - "epoch": 0.6661171427898542, - "grad_norm": 0.7214668393135071, - "learning_rate": 1.5373049147376358e-05, - "loss": 0.3163, - "step": 7071 - }, - { - "epoch": 0.6662113468830221, - "grad_norm": 0.9709456562995911, - "learning_rate": 1.5371775554881837e-05, - "loss": 0.2997, - "step": 7072 - }, - { - "epoch": 0.6663055509761899, - "grad_norm": 0.7502962350845337, - "learning_rate": 1.5370501839902533e-05, - "loss": 0.3331, - "step": 7073 - }, - { - "epoch": 0.6663997550693578, - "grad_norm": 0.8238278031349182, - "learning_rate": 1.5369228002467477e-05, - "loss": 0.324, - "step": 7074 - }, - { - "epoch": 0.6664939591625256, - "grad_norm": 0.698924720287323, - "learning_rate": 1.536795404260572e-05, - "loss": 0.2914, - "step": 7075 - }, - { - "epoch": 0.6665881632556935, - "grad_norm": 0.7339386343955994, - "learning_rate": 1.5366679960346307e-05, - "loss": 0.3572, - "step": 7076 - }, - { - "epoch": 0.6666823673488613, - "grad_norm": 0.7864017486572266, - "learning_rate": 1.5365405755718293e-05, - "loss": 0.3427, - "step": 7077 - }, - { - "epoch": 0.6667765714420292, - "grad_norm": 0.8368102312088013, - "learning_rate": 1.536413142875073e-05, - "loss": 0.3304, - "step": 7078 - }, - { - "epoch": 0.666870775535197, - "grad_norm": 0.6254177093505859, - "learning_rate": 1.5362856979472672e-05, - "loss": 0.2907, - "step": 7079 - }, - { - "epoch": 0.6669649796283649, - "grad_norm": 0.7711076140403748, - "learning_rate": 1.5361582407913188e-05, - "loss": 0.3246, - "step": 7080 - }, - { - "epoch": 0.6670591837215327, - "grad_norm": 0.7080456018447876, - "learning_rate": 1.5360307714101326e-05, - "loss": 0.3072, - "step": 7081 - }, - { - "epoch": 0.6671533878147006, - "grad_norm": 0.7083311080932617, - "learning_rate": 1.535903289806616e-05, - "loss": 0.2914, - "step": 7082 - }, - { - "epoch": 0.6672475919078684, - "grad_norm": 0.7401914596557617, - "learning_rate": 1.535775795983676e-05, - "loss": 0.2759, - "step": 7083 - }, - { - "epoch": 0.6673417960010363, - "grad_norm": 0.8789626955986023, - "learning_rate": 1.5356482899442188e-05, - "loss": 0.3096, - "step": 7084 - }, - { - "epoch": 0.6674360000942041, - "grad_norm": 0.7936314940452576, - "learning_rate": 1.5355207716911523e-05, - "loss": 0.3301, - "step": 7085 - }, - { - "epoch": 0.667530204187372, - "grad_norm": 0.6872920393943787, - "learning_rate": 1.535393241227384e-05, - "loss": 0.3099, - "step": 7086 - }, - { - "epoch": 0.6676244082805398, - "grad_norm": 0.7527125477790833, - "learning_rate": 1.535265698555822e-05, - "loss": 0.3669, - "step": 7087 - }, - { - "epoch": 0.6677186123737077, - "grad_norm": 0.8189874887466431, - "learning_rate": 1.535138143679374e-05, - "loss": 0.3399, - "step": 7088 - }, - { - "epoch": 0.6678128164668755, - "grad_norm": 0.8073145151138306, - "learning_rate": 1.535010576600949e-05, - "loss": 0.3524, - "step": 7089 - }, - { - "epoch": 0.6679070205600434, - "grad_norm": 0.760679304599762, - "learning_rate": 1.534882997323455e-05, - "loss": 0.3643, - "step": 7090 - }, - { - "epoch": 0.6680012246532112, - "grad_norm": 0.968233048915863, - "learning_rate": 1.534755405849802e-05, - "loss": 0.3642, - "step": 7091 - }, - { - "epoch": 0.6680954287463791, - "grad_norm": 0.7697311043739319, - "learning_rate": 1.5346278021828983e-05, - "loss": 0.3609, - "step": 7092 - }, - { - "epoch": 0.6681896328395469, - "grad_norm": 0.824440062046051, - "learning_rate": 1.534500186325654e-05, - "loss": 0.3532, - "step": 7093 - }, - { - "epoch": 0.6682838369327148, - "grad_norm": 0.7930930852890015, - "learning_rate": 1.5343725582809793e-05, - "loss": 0.3141, - "step": 7094 - }, - { - "epoch": 0.6683780410258826, - "grad_norm": 0.6535442471504211, - "learning_rate": 1.5342449180517834e-05, - "loss": 0.3, - "step": 7095 - }, - { - "epoch": 0.6684722451190505, - "grad_norm": 1.4270353317260742, - "learning_rate": 1.534117265640977e-05, - "loss": 0.329, - "step": 7096 - }, - { - "epoch": 0.6685664492122183, - "grad_norm": 0.7029897570610046, - "learning_rate": 1.533989601051471e-05, - "loss": 0.2952, - "step": 7097 - }, - { - "epoch": 0.6686606533053862, - "grad_norm": 0.8326965570449829, - "learning_rate": 1.5338619242861766e-05, - "loss": 0.323, - "step": 7098 - }, - { - "epoch": 0.668754857398554, - "grad_norm": 0.9096085429191589, - "learning_rate": 1.5337342353480044e-05, - "loss": 0.3177, - "step": 7099 - }, - { - "epoch": 0.6688490614917219, - "grad_norm": 0.9957058429718018, - "learning_rate": 1.5336065342398664e-05, - "loss": 0.3176, - "step": 7100 - }, - { - "epoch": 0.6689432655848897, - "grad_norm": 0.7245864272117615, - "learning_rate": 1.5334788209646738e-05, - "loss": 0.2801, - "step": 7101 - }, - { - "epoch": 0.6690374696780576, - "grad_norm": 0.8069775700569153, - "learning_rate": 1.5333510955253396e-05, - "loss": 0.341, - "step": 7102 - }, - { - "epoch": 0.6691316737712254, - "grad_norm": 0.7672380805015564, - "learning_rate": 1.533223357924775e-05, - "loss": 0.3182, - "step": 7103 - }, - { - "epoch": 0.6692258778643932, - "grad_norm": 0.692295253276825, - "learning_rate": 1.5330956081658932e-05, - "loss": 0.3094, - "step": 7104 - }, - { - "epoch": 0.6693200819575611, - "grad_norm": 0.757405698299408, - "learning_rate": 1.5329678462516073e-05, - "loss": 0.3011, - "step": 7105 - }, - { - "epoch": 0.669414286050729, - "grad_norm": 0.6376831531524658, - "learning_rate": 1.5328400721848305e-05, - "loss": 0.3108, - "step": 7106 - }, - { - "epoch": 0.6695084901438968, - "grad_norm": 0.6972873210906982, - "learning_rate": 1.5327122859684758e-05, - "loss": 0.3065, - "step": 7107 - }, - { - "epoch": 0.6696026942370646, - "grad_norm": 0.7769290804862976, - "learning_rate": 1.532584487605457e-05, - "loss": 0.3314, - "step": 7108 - }, - { - "epoch": 0.6696968983302325, - "grad_norm": 0.8072546124458313, - "learning_rate": 1.5324566770986884e-05, - "loss": 0.3751, - "step": 7109 - }, - { - "epoch": 0.6697911024234003, - "grad_norm": 0.7501782178878784, - "learning_rate": 1.532328854451084e-05, - "loss": 0.341, - "step": 7110 - }, - { - "epoch": 0.6698853065165682, - "grad_norm": 0.6741506457328796, - "learning_rate": 1.532201019665559e-05, - "loss": 0.3058, - "step": 7111 - }, - { - "epoch": 0.669979510609736, - "grad_norm": 0.71759033203125, - "learning_rate": 1.5320731727450268e-05, - "loss": 0.3151, - "step": 7112 - }, - { - "epoch": 0.6700737147029039, - "grad_norm": 0.7359454035758972, - "learning_rate": 1.5319453136924037e-05, - "loss": 0.3131, - "step": 7113 - }, - { - "epoch": 0.6701679187960717, - "grad_norm": 0.737964928150177, - "learning_rate": 1.531817442510605e-05, - "loss": 0.3149, - "step": 7114 - }, - { - "epoch": 0.6702621228892396, - "grad_norm": 0.8473628163337708, - "learning_rate": 1.5316895592025458e-05, - "loss": 0.3736, - "step": 7115 - }, - { - "epoch": 0.6703563269824074, - "grad_norm": 0.672426700592041, - "learning_rate": 1.5315616637711424e-05, - "loss": 0.3112, - "step": 7116 - }, - { - "epoch": 0.6704505310755753, - "grad_norm": 0.6875741481781006, - "learning_rate": 1.5314337562193112e-05, - "loss": 0.3068, - "step": 7117 - }, - { - "epoch": 0.6705447351687431, - "grad_norm": 0.9028030633926392, - "learning_rate": 1.5313058365499686e-05, - "loss": 0.3761, - "step": 7118 - }, - { - "epoch": 0.670638939261911, - "grad_norm": 0.7115257382392883, - "learning_rate": 1.5311779047660312e-05, - "loss": 0.3361, - "step": 7119 - }, - { - "epoch": 0.6707331433550788, - "grad_norm": 0.8416838049888611, - "learning_rate": 1.531049960870416e-05, - "loss": 0.3459, - "step": 7120 - }, - { - "epoch": 0.6708273474482467, - "grad_norm": 0.8257519006729126, - "learning_rate": 1.5309220048660403e-05, - "loss": 0.3544, - "step": 7121 - }, - { - "epoch": 0.6709215515414145, - "grad_norm": 0.6752427220344543, - "learning_rate": 1.5307940367558217e-05, - "loss": 0.303, - "step": 7122 - }, - { - "epoch": 0.6710157556345823, - "grad_norm": 0.6976400017738342, - "learning_rate": 1.530666056542679e-05, - "loss": 0.3069, - "step": 7123 - }, - { - "epoch": 0.6711099597277501, - "grad_norm": 0.7103172540664673, - "learning_rate": 1.5305380642295285e-05, - "loss": 0.3026, - "step": 7124 - }, - { - "epoch": 0.671204163820918, - "grad_norm": 0.7587864995002747, - "learning_rate": 1.53041005981929e-05, - "loss": 0.3254, - "step": 7125 - }, - { - "epoch": 0.6712983679140858, - "grad_norm": 0.7170512676239014, - "learning_rate": 1.5302820433148817e-05, - "loss": 0.3277, - "step": 7126 - }, - { - "epoch": 0.6713925720072537, - "grad_norm": 0.7051998972892761, - "learning_rate": 1.5301540147192227e-05, - "loss": 0.2846, - "step": 7127 - }, - { - "epoch": 0.6714867761004215, - "grad_norm": 0.7642733454704285, - "learning_rate": 1.5300259740352327e-05, - "loss": 0.3373, - "step": 7128 - }, - { - "epoch": 0.6715809801935894, - "grad_norm": 0.6517353653907776, - "learning_rate": 1.5298979212658304e-05, - "loss": 0.2974, - "step": 7129 - }, - { - "epoch": 0.6716751842867572, - "grad_norm": 0.7439292669296265, - "learning_rate": 1.5297698564139364e-05, - "loss": 0.3111, - "step": 7130 - }, - { - "epoch": 0.6717693883799251, - "grad_norm": 0.6721612811088562, - "learning_rate": 1.52964177948247e-05, - "loss": 0.3028, - "step": 7131 - }, - { - "epoch": 0.6718635924730929, - "grad_norm": 0.7699888944625854, - "learning_rate": 1.5295136904743518e-05, - "loss": 0.3321, - "step": 7132 - }, - { - "epoch": 0.6719577965662608, - "grad_norm": 0.7608176469802856, - "learning_rate": 1.529385589392503e-05, - "loss": 0.2732, - "step": 7133 - }, - { - "epoch": 0.6720520006594286, - "grad_norm": 0.7948134541511536, - "learning_rate": 1.529257476239844e-05, - "loss": 0.3372, - "step": 7134 - }, - { - "epoch": 0.6721462047525965, - "grad_norm": 0.7166592478752136, - "learning_rate": 1.5291293510192957e-05, - "loss": 0.3339, - "step": 7135 - }, - { - "epoch": 0.6722404088457643, - "grad_norm": 0.7006956338882446, - "learning_rate": 1.52900121373378e-05, - "loss": 0.3186, - "step": 7136 - }, - { - "epoch": 0.6723346129389322, - "grad_norm": 0.7558178305625916, - "learning_rate": 1.5288730643862185e-05, - "loss": 0.3647, - "step": 7137 - }, - { - "epoch": 0.6724288170321, - "grad_norm": 0.8109870553016663, - "learning_rate": 1.5287449029795335e-05, - "loss": 0.3012, - "step": 7138 - }, - { - "epoch": 0.6725230211252679, - "grad_norm": 0.8044179081916809, - "learning_rate": 1.5286167295166468e-05, - "loss": 0.3522, - "step": 7139 - }, - { - "epoch": 0.6726172252184357, - "grad_norm": 0.8509412407875061, - "learning_rate": 1.528488544000481e-05, - "loss": 0.3154, - "step": 7140 - }, - { - "epoch": 0.6727114293116035, - "grad_norm": 0.7896276116371155, - "learning_rate": 1.528360346433959e-05, - "loss": 0.3271, - "step": 7141 - }, - { - "epoch": 0.6728056334047714, - "grad_norm": 0.9728242754936218, - "learning_rate": 1.528232136820004e-05, - "loss": 0.3971, - "step": 7142 - }, - { - "epoch": 0.6728998374979392, - "grad_norm": 0.8134274482727051, - "learning_rate": 1.52810391516154e-05, - "loss": 0.3393, - "step": 7143 - }, - { - "epoch": 0.6729940415911071, - "grad_norm": 1.1658309698104858, - "learning_rate": 1.527975681461489e-05, - "loss": 0.3121, - "step": 7144 - }, - { - "epoch": 0.6730882456842749, - "grad_norm": 0.7035720944404602, - "learning_rate": 1.5278474357227765e-05, - "loss": 0.2972, - "step": 7145 - }, - { - "epoch": 0.6731824497774428, - "grad_norm": 0.7104215025901794, - "learning_rate": 1.527719177948326e-05, - "loss": 0.3073, - "step": 7146 - }, - { - "epoch": 0.6732766538706106, - "grad_norm": 0.7597449421882629, - "learning_rate": 1.5275909081410622e-05, - "loss": 0.3584, - "step": 7147 - }, - { - "epoch": 0.6733708579637785, - "grad_norm": 0.6931965947151184, - "learning_rate": 1.52746262630391e-05, - "loss": 0.3201, - "step": 7148 - }, - { - "epoch": 0.6734650620569463, - "grad_norm": 0.7502024173736572, - "learning_rate": 1.5273343324397938e-05, - "loss": 0.2716, - "step": 7149 - }, - { - "epoch": 0.6735592661501142, - "grad_norm": 0.7594082951545715, - "learning_rate": 1.5272060265516392e-05, - "loss": 0.2708, - "step": 7150 - }, - { - "epoch": 0.673653470243282, - "grad_norm": 0.8175405859947205, - "learning_rate": 1.5270777086423724e-05, - "loss": 0.3482, - "step": 7151 - }, - { - "epoch": 0.6737476743364499, - "grad_norm": 0.8257627487182617, - "learning_rate": 1.5269493787149183e-05, - "loss": 0.3398, - "step": 7152 - }, - { - "epoch": 0.6738418784296177, - "grad_norm": 0.6777796149253845, - "learning_rate": 1.5268210367722035e-05, - "loss": 0.3117, - "step": 7153 - }, - { - "epoch": 0.6739360825227856, - "grad_norm": 0.638866126537323, - "learning_rate": 1.5266926828171542e-05, - "loss": 0.3195, - "step": 7154 - }, - { - "epoch": 0.6740302866159534, - "grad_norm": 0.6855704188346863, - "learning_rate": 1.5265643168526972e-05, - "loss": 0.2833, - "step": 7155 - }, - { - "epoch": 0.6741244907091213, - "grad_norm": 0.7688138484954834, - "learning_rate": 1.5264359388817596e-05, - "loss": 0.3279, - "step": 7156 - }, - { - "epoch": 0.6742186948022891, - "grad_norm": 0.6711253523826599, - "learning_rate": 1.526307548907268e-05, - "loss": 0.287, - "step": 7157 - }, - { - "epoch": 0.674312898895457, - "grad_norm": 0.8004896640777588, - "learning_rate": 1.526179146932151e-05, - "loss": 0.3616, - "step": 7158 - }, - { - "epoch": 0.6744071029886248, - "grad_norm": 0.6729282736778259, - "learning_rate": 1.5260507329593354e-05, - "loss": 0.3272, - "step": 7159 - }, - { - "epoch": 0.6745013070817927, - "grad_norm": 0.7034173011779785, - "learning_rate": 1.52592230699175e-05, - "loss": 0.3348, - "step": 7160 - }, - { - "epoch": 0.6745955111749605, - "grad_norm": 0.6225616931915283, - "learning_rate": 1.5257938690323218e-05, - "loss": 0.2642, - "step": 7161 - }, - { - "epoch": 0.6746897152681284, - "grad_norm": 0.7345744967460632, - "learning_rate": 1.5256654190839806e-05, - "loss": 0.2824, - "step": 7162 - }, - { - "epoch": 0.6747839193612962, - "grad_norm": 0.6629753112792969, - "learning_rate": 1.525536957149655e-05, - "loss": 0.3058, - "step": 7163 - }, - { - "epoch": 0.6748781234544641, - "grad_norm": 0.7448536157608032, - "learning_rate": 1.5254084832322736e-05, - "loss": 0.3332, - "step": 7164 - }, - { - "epoch": 0.6749723275476319, - "grad_norm": 0.7381179332733154, - "learning_rate": 1.5252799973347667e-05, - "loss": 0.3314, - "step": 7165 - }, - { - "epoch": 0.6750665316407998, - "grad_norm": 0.7234967350959778, - "learning_rate": 1.5251514994600633e-05, - "loss": 0.3454, - "step": 7166 - }, - { - "epoch": 0.6751607357339676, - "grad_norm": 0.6849322319030762, - "learning_rate": 1.5250229896110935e-05, - "loss": 0.3421, - "step": 7167 - }, - { - "epoch": 0.6752549398271355, - "grad_norm": 0.7592267990112305, - "learning_rate": 1.5248944677907877e-05, - "loss": 0.3175, - "step": 7168 - }, - { - "epoch": 0.6753491439203033, - "grad_norm": 0.707671046257019, - "learning_rate": 1.5247659340020763e-05, - "loss": 0.3274, - "step": 7169 - }, - { - "epoch": 0.6754433480134712, - "grad_norm": 0.7229474186897278, - "learning_rate": 1.5246373882478899e-05, - "loss": 0.3219, - "step": 7170 - }, - { - "epoch": 0.675537552106639, - "grad_norm": 0.8195897936820984, - "learning_rate": 1.5245088305311599e-05, - "loss": 0.3377, - "step": 7171 - }, - { - "epoch": 0.6756317561998069, - "grad_norm": 0.720527172088623, - "learning_rate": 1.524380260854817e-05, - "loss": 0.3536, - "step": 7172 - }, - { - "epoch": 0.6757259602929747, - "grad_norm": 0.7520124912261963, - "learning_rate": 1.5242516792217936e-05, - "loss": 0.2899, - "step": 7173 - }, - { - "epoch": 0.6758201643861426, - "grad_norm": 0.8100510835647583, - "learning_rate": 1.5241230856350209e-05, - "loss": 0.3314, - "step": 7174 - }, - { - "epoch": 0.6759143684793104, - "grad_norm": 0.7205991744995117, - "learning_rate": 1.5239944800974313e-05, - "loss": 0.3313, - "step": 7175 - }, - { - "epoch": 0.6760085725724783, - "grad_norm": 0.8324742317199707, - "learning_rate": 1.5238658626119574e-05, - "loss": 0.3426, - "step": 7176 - }, - { - "epoch": 0.6761027766656461, - "grad_norm": 0.7149767279624939, - "learning_rate": 1.5237372331815312e-05, - "loss": 0.3028, - "step": 7177 - }, - { - "epoch": 0.676196980758814, - "grad_norm": 0.7355038523674011, - "learning_rate": 1.5236085918090867e-05, - "loss": 0.3039, - "step": 7178 - }, - { - "epoch": 0.6762911848519818, - "grad_norm": 0.7257593870162964, - "learning_rate": 1.5234799384975562e-05, - "loss": 0.3449, - "step": 7179 - }, - { - "epoch": 0.6763853889451497, - "grad_norm": 0.7089222073554993, - "learning_rate": 1.5233512732498735e-05, - "loss": 0.2887, - "step": 7180 - }, - { - "epoch": 0.6764795930383175, - "grad_norm": 0.7266278266906738, - "learning_rate": 1.5232225960689722e-05, - "loss": 0.284, - "step": 7181 - }, - { - "epoch": 0.6765737971314854, - "grad_norm": 0.7619205713272095, - "learning_rate": 1.523093906957787e-05, - "loss": 0.3695, - "step": 7182 - }, - { - "epoch": 0.6766680012246532, - "grad_norm": 0.7840532660484314, - "learning_rate": 1.5229652059192515e-05, - "loss": 0.3703, - "step": 7183 - }, - { - "epoch": 0.676762205317821, - "grad_norm": 0.7276999950408936, - "learning_rate": 1.5228364929563004e-05, - "loss": 0.2923, - "step": 7184 - }, - { - "epoch": 0.6768564094109889, - "grad_norm": 0.9476488828659058, - "learning_rate": 1.522707768071869e-05, - "loss": 0.308, - "step": 7185 - }, - { - "epoch": 0.6769506135041568, - "grad_norm": 0.7192996740341187, - "learning_rate": 1.522579031268892e-05, - "loss": 0.3442, - "step": 7186 - }, - { - "epoch": 0.6770448175973246, - "grad_norm": 0.6776466369628906, - "learning_rate": 1.5224502825503045e-05, - "loss": 0.305, - "step": 7187 - }, - { - "epoch": 0.6771390216904924, - "grad_norm": 0.6640827655792236, - "learning_rate": 1.522321521919043e-05, - "loss": 0.2844, - "step": 7188 - }, - { - "epoch": 0.6772332257836603, - "grad_norm": 0.7528949975967407, - "learning_rate": 1.5221927493780428e-05, - "loss": 0.335, - "step": 7189 - }, - { - "epoch": 0.6773274298768281, - "grad_norm": 0.6359009146690369, - "learning_rate": 1.5220639649302404e-05, - "loss": 0.3238, - "step": 7190 - }, - { - "epoch": 0.677421633969996, - "grad_norm": 0.6911217570304871, - "learning_rate": 1.5219351685785724e-05, - "loss": 0.269, - "step": 7191 - }, - { - "epoch": 0.6775158380631638, - "grad_norm": 0.7504013180732727, - "learning_rate": 1.5218063603259751e-05, - "loss": 0.353, - "step": 7192 - }, - { - "epoch": 0.6776100421563317, - "grad_norm": 0.7562568187713623, - "learning_rate": 1.5216775401753859e-05, - "loss": 0.3469, - "step": 7193 - }, - { - "epoch": 0.6777042462494995, - "grad_norm": 0.7544463872909546, - "learning_rate": 1.5215487081297422e-05, - "loss": 0.3095, - "step": 7194 - }, - { - "epoch": 0.6777984503426674, - "grad_norm": 0.7292487621307373, - "learning_rate": 1.5214198641919811e-05, - "loss": 0.335, - "step": 7195 - }, - { - "epoch": 0.6778926544358352, - "grad_norm": 0.7718842625617981, - "learning_rate": 1.5212910083650404e-05, - "loss": 0.3387, - "step": 7196 - }, - { - "epoch": 0.6779868585290031, - "grad_norm": 0.6888427734375, - "learning_rate": 1.521162140651859e-05, - "loss": 0.3307, - "step": 7197 - }, - { - "epoch": 0.6780810626221709, - "grad_norm": 0.7067251205444336, - "learning_rate": 1.5210332610553749e-05, - "loss": 0.3215, - "step": 7198 - }, - { - "epoch": 0.6781752667153388, - "grad_norm": 0.6551802158355713, - "learning_rate": 1.5209043695785264e-05, - "loss": 0.2828, - "step": 7199 - }, - { - "epoch": 0.6782694708085066, - "grad_norm": 0.7964441776275635, - "learning_rate": 1.5207754662242528e-05, - "loss": 0.3374, - "step": 7200 - }, - { - "epoch": 0.6783636749016745, - "grad_norm": 0.7138524055480957, - "learning_rate": 1.520646550995493e-05, - "loss": 0.3223, - "step": 7201 - }, - { - "epoch": 0.6784578789948423, - "grad_norm": 0.7613831162452698, - "learning_rate": 1.520517623895187e-05, - "loss": 0.3431, - "step": 7202 - }, - { - "epoch": 0.6785520830880102, - "grad_norm": 0.7344279289245605, - "learning_rate": 1.5203886849262736e-05, - "loss": 0.3366, - "step": 7203 - }, - { - "epoch": 0.678646287181178, - "grad_norm": 0.7466939687728882, - "learning_rate": 1.5202597340916935e-05, - "loss": 0.3003, - "step": 7204 - }, - { - "epoch": 0.6787404912743459, - "grad_norm": 0.8112344145774841, - "learning_rate": 1.5201307713943875e-05, - "loss": 0.3652, - "step": 7205 - }, - { - "epoch": 0.6788346953675137, - "grad_norm": 0.8347423076629639, - "learning_rate": 1.5200017968372951e-05, - "loss": 0.3301, - "step": 7206 - }, - { - "epoch": 0.6789288994606816, - "grad_norm": 0.6893008351325989, - "learning_rate": 1.5198728104233573e-05, - "loss": 0.3412, - "step": 7207 - }, - { - "epoch": 0.6790231035538494, - "grad_norm": 0.8349971175193787, - "learning_rate": 1.5197438121555159e-05, - "loss": 0.3159, - "step": 7208 - }, - { - "epoch": 0.6791173076470173, - "grad_norm": 0.67628413438797, - "learning_rate": 1.5196148020367112e-05, - "loss": 0.334, - "step": 7209 - }, - { - "epoch": 0.6792115117401851, - "grad_norm": 0.7063706517219543, - "learning_rate": 1.519485780069886e-05, - "loss": 0.3581, - "step": 7210 - }, - { - "epoch": 0.679305715833353, - "grad_norm": 0.861177384853363, - "learning_rate": 1.5193567462579815e-05, - "loss": 0.3792, - "step": 7211 - }, - { - "epoch": 0.6793999199265208, - "grad_norm": 0.6842749714851379, - "learning_rate": 1.5192277006039395e-05, - "loss": 0.2947, - "step": 7212 - }, - { - "epoch": 0.6794941240196887, - "grad_norm": 0.717474639415741, - "learning_rate": 1.5190986431107033e-05, - "loss": 0.3059, - "step": 7213 - }, - { - "epoch": 0.6795883281128565, - "grad_norm": 1.0629812479019165, - "learning_rate": 1.5189695737812153e-05, - "loss": 0.3097, - "step": 7214 - }, - { - "epoch": 0.6796825322060244, - "grad_norm": 0.850768506526947, - "learning_rate": 1.5188404926184184e-05, - "loss": 0.3584, - "step": 7215 - }, - { - "epoch": 0.6797767362991922, - "grad_norm": 0.7694965600967407, - "learning_rate": 1.5187113996252556e-05, - "loss": 0.3311, - "step": 7216 - }, - { - "epoch": 0.6798709403923601, - "grad_norm": 0.7983599901199341, - "learning_rate": 1.518582294804671e-05, - "loss": 0.3123, - "step": 7217 - }, - { - "epoch": 0.6799651444855279, - "grad_norm": 0.7268584966659546, - "learning_rate": 1.5184531781596079e-05, - "loss": 0.318, - "step": 7218 - }, - { - "epoch": 0.6800593485786958, - "grad_norm": 0.868614137172699, - "learning_rate": 1.5183240496930105e-05, - "loss": 0.3533, - "step": 7219 - }, - { - "epoch": 0.6801535526718636, - "grad_norm": 0.7720600366592407, - "learning_rate": 1.5181949094078235e-05, - "loss": 0.307, - "step": 7220 - }, - { - "epoch": 0.6802477567650315, - "grad_norm": 0.7273695468902588, - "learning_rate": 1.5180657573069906e-05, - "loss": 0.3076, - "step": 7221 - }, - { - "epoch": 0.6803419608581993, - "grad_norm": 0.6686651110649109, - "learning_rate": 1.5179365933934577e-05, - "loss": 0.2642, - "step": 7222 - }, - { - "epoch": 0.6804361649513672, - "grad_norm": 0.7749326229095459, - "learning_rate": 1.5178074176701691e-05, - "loss": 0.3221, - "step": 7223 - }, - { - "epoch": 0.680530369044535, - "grad_norm": 0.7839756608009338, - "learning_rate": 1.5176782301400707e-05, - "loss": 0.2697, - "step": 7224 - }, - { - "epoch": 0.6806245731377029, - "grad_norm": 0.7855985760688782, - "learning_rate": 1.5175490308061085e-05, - "loss": 0.3313, - "step": 7225 - }, - { - "epoch": 0.6807187772308707, - "grad_norm": 0.692952036857605, - "learning_rate": 1.5174198196712273e-05, - "loss": 0.3358, - "step": 7226 - }, - { - "epoch": 0.6808129813240386, - "grad_norm": 0.7354058027267456, - "learning_rate": 1.5172905967383744e-05, - "loss": 0.342, - "step": 7227 - }, - { - "epoch": 0.6809071854172064, - "grad_norm": 0.6863634586334229, - "learning_rate": 1.5171613620104961e-05, - "loss": 0.3144, - "step": 7228 - }, - { - "epoch": 0.6810013895103743, - "grad_norm": 0.7194997668266296, - "learning_rate": 1.5170321154905386e-05, - "loss": 0.294, - "step": 7229 - }, - { - "epoch": 0.6810955936035421, - "grad_norm": 0.874332070350647, - "learning_rate": 1.516902857181449e-05, - "loss": 0.4257, - "step": 7230 - }, - { - "epoch": 0.68118979769671, - "grad_norm": 0.748012363910675, - "learning_rate": 1.5167735870861756e-05, - "loss": 0.3238, - "step": 7231 - }, - { - "epoch": 0.6812840017898778, - "grad_norm": 0.7189104557037354, - "learning_rate": 1.5166443052076645e-05, - "loss": 0.3173, - "step": 7232 - }, - { - "epoch": 0.6813782058830457, - "grad_norm": 0.6643146276473999, - "learning_rate": 1.5165150115488644e-05, - "loss": 0.2946, - "step": 7233 - }, - { - "epoch": 0.6814724099762135, - "grad_norm": 0.7028785943984985, - "learning_rate": 1.5163857061127233e-05, - "loss": 0.3132, - "step": 7234 - }, - { - "epoch": 0.6815666140693813, - "grad_norm": 1.397181749343872, - "learning_rate": 1.5162563889021897e-05, - "loss": 0.3317, - "step": 7235 - }, - { - "epoch": 0.6816608181625492, - "grad_norm": 0.6844511032104492, - "learning_rate": 1.516127059920212e-05, - "loss": 0.3198, - "step": 7236 - }, - { - "epoch": 0.681755022255717, - "grad_norm": 0.7008311152458191, - "learning_rate": 1.5159977191697386e-05, - "loss": 0.2861, - "step": 7237 - }, - { - "epoch": 0.6818492263488849, - "grad_norm": 0.7851535081863403, - "learning_rate": 1.5158683666537197e-05, - "loss": 0.3309, - "step": 7238 - }, - { - "epoch": 0.6819434304420527, - "grad_norm": 0.7332495450973511, - "learning_rate": 1.5157390023751042e-05, - "loss": 0.3277, - "step": 7239 - }, - { - "epoch": 0.6820376345352206, - "grad_norm": 0.767930805683136, - "learning_rate": 1.5156096263368418e-05, - "loss": 0.3209, - "step": 7240 - }, - { - "epoch": 0.6821318386283884, - "grad_norm": 0.9117291569709778, - "learning_rate": 1.5154802385418825e-05, - "loss": 0.3244, - "step": 7241 - }, - { - "epoch": 0.6822260427215563, - "grad_norm": 0.7538485527038574, - "learning_rate": 1.5153508389931766e-05, - "loss": 0.3462, - "step": 7242 - }, - { - "epoch": 0.6823202468147241, - "grad_norm": 0.7208590507507324, - "learning_rate": 1.5152214276936743e-05, - "loss": 0.3632, - "step": 7243 - }, - { - "epoch": 0.682414450907892, - "grad_norm": 0.7393494844436646, - "learning_rate": 1.5150920046463266e-05, - "loss": 0.3261, - "step": 7244 - }, - { - "epoch": 0.6825086550010598, - "grad_norm": 1.5232760906219482, - "learning_rate": 1.5149625698540853e-05, - "loss": 0.3299, - "step": 7245 - }, - { - "epoch": 0.6826028590942277, - "grad_norm": 0.6693358421325684, - "learning_rate": 1.5148331233199002e-05, - "loss": 0.3022, - "step": 7246 - }, - { - "epoch": 0.6826970631873955, - "grad_norm": 0.8321530818939209, - "learning_rate": 1.514703665046724e-05, - "loss": 0.3431, - "step": 7247 - }, - { - "epoch": 0.6827912672805634, - "grad_norm": 0.7812127470970154, - "learning_rate": 1.5145741950375085e-05, - "loss": 0.3076, - "step": 7248 - }, - { - "epoch": 0.6828854713737312, - "grad_norm": 0.750748872756958, - "learning_rate": 1.5144447132952052e-05, - "loss": 0.3458, - "step": 7249 - }, - { - "epoch": 0.6829796754668991, - "grad_norm": 0.7384516596794128, - "learning_rate": 1.5143152198227668e-05, - "loss": 0.3273, - "step": 7250 - }, - { - "epoch": 0.6830738795600669, - "grad_norm": 0.6384652853012085, - "learning_rate": 1.5141857146231462e-05, - "loss": 0.3068, - "step": 7251 - }, - { - "epoch": 0.6831680836532348, - "grad_norm": 1.020987629890442, - "learning_rate": 1.5140561976992959e-05, - "loss": 0.3177, - "step": 7252 - }, - { - "epoch": 0.6832622877464026, - "grad_norm": 0.778069019317627, - "learning_rate": 1.5139266690541695e-05, - "loss": 0.3769, - "step": 7253 - }, - { - "epoch": 0.6833564918395705, - "grad_norm": 0.7732622623443604, - "learning_rate": 1.5137971286907202e-05, - "loss": 0.2927, - "step": 7254 - }, - { - "epoch": 0.6834506959327383, - "grad_norm": 0.7108065485954285, - "learning_rate": 1.5136675766119017e-05, - "loss": 0.3024, - "step": 7255 - }, - { - "epoch": 0.6835449000259062, - "grad_norm": 0.745226263999939, - "learning_rate": 1.5135380128206683e-05, - "loss": 0.291, - "step": 7256 - }, - { - "epoch": 0.683639104119074, - "grad_norm": 0.9556676149368286, - "learning_rate": 1.5134084373199733e-05, - "loss": 0.2916, - "step": 7257 - }, - { - "epoch": 0.6837333082122419, - "grad_norm": 0.6682932376861572, - "learning_rate": 1.5132788501127727e-05, - "loss": 0.2971, - "step": 7258 - }, - { - "epoch": 0.6838275123054097, - "grad_norm": 0.7709806561470032, - "learning_rate": 1.51314925120202e-05, - "loss": 0.3476, - "step": 7259 - }, - { - "epoch": 0.6839217163985776, - "grad_norm": 0.9079115390777588, - "learning_rate": 1.5130196405906711e-05, - "loss": 0.3558, - "step": 7260 - }, - { - "epoch": 0.6840159204917454, - "grad_norm": 0.685491144657135, - "learning_rate": 1.5128900182816808e-05, - "loss": 0.283, - "step": 7261 - }, - { - "epoch": 0.6841101245849132, - "grad_norm": 0.6666446328163147, - "learning_rate": 1.512760384278005e-05, - "loss": 0.3051, - "step": 7262 - }, - { - "epoch": 0.684204328678081, - "grad_norm": 0.6125575304031372, - "learning_rate": 1.5126307385825993e-05, - "loss": 0.2523, - "step": 7263 - }, - { - "epoch": 0.6842985327712489, - "grad_norm": 0.7506864070892334, - "learning_rate": 1.51250108119842e-05, - "loss": 0.2981, - "step": 7264 - }, - { - "epoch": 0.6843927368644167, - "grad_norm": 0.7479058504104614, - "learning_rate": 1.512371412128424e-05, - "loss": 0.3377, - "step": 7265 - }, - { - "epoch": 0.6844869409575846, - "grad_norm": 0.6979084014892578, - "learning_rate": 1.5122417313755667e-05, - "loss": 0.2938, - "step": 7266 - }, - { - "epoch": 0.6845811450507524, - "grad_norm": 0.7295845746994019, - "learning_rate": 1.5121120389428058e-05, - "loss": 0.2793, - "step": 7267 - }, - { - "epoch": 0.6846753491439203, - "grad_norm": 0.7008985280990601, - "learning_rate": 1.5119823348330988e-05, - "loss": 0.3245, - "step": 7268 - }, - { - "epoch": 0.6847695532370881, - "grad_norm": 0.7741862535476685, - "learning_rate": 1.5118526190494022e-05, - "loss": 0.357, - "step": 7269 - }, - { - "epoch": 0.684863757330256, - "grad_norm": 0.696915864944458, - "learning_rate": 1.511722891594675e-05, - "loss": 0.3251, - "step": 7270 - }, - { - "epoch": 0.6849579614234238, - "grad_norm": 0.8568238019943237, - "learning_rate": 1.5115931524718739e-05, - "loss": 0.3236, - "step": 7271 - }, - { - "epoch": 0.6850521655165916, - "grad_norm": 0.7071682810783386, - "learning_rate": 1.5114634016839578e-05, - "loss": 0.3299, - "step": 7272 - }, - { - "epoch": 0.6851463696097595, - "grad_norm": 0.7535279393196106, - "learning_rate": 1.5113336392338853e-05, - "loss": 0.3423, - "step": 7273 - }, - { - "epoch": 0.6852405737029273, - "grad_norm": 0.7997384071350098, - "learning_rate": 1.511203865124615e-05, - "loss": 0.3216, - "step": 7274 - }, - { - "epoch": 0.6853347777960952, - "grad_norm": 0.8931795358657837, - "learning_rate": 1.511074079359106e-05, - "loss": 0.3887, - "step": 7275 - }, - { - "epoch": 0.685428981889263, - "grad_norm": 0.7333723306655884, - "learning_rate": 1.5109442819403178e-05, - "loss": 0.3081, - "step": 7276 - }, - { - "epoch": 0.6855231859824309, - "grad_norm": 0.6808123588562012, - "learning_rate": 1.5108144728712093e-05, - "loss": 0.2971, - "step": 7277 - }, - { - "epoch": 0.6856173900755987, - "grad_norm": 0.6848199367523193, - "learning_rate": 1.5106846521547413e-05, - "loss": 0.3307, - "step": 7278 - }, - { - "epoch": 0.6857115941687666, - "grad_norm": 0.7820274233818054, - "learning_rate": 1.5105548197938733e-05, - "loss": 0.3739, - "step": 7279 - }, - { - "epoch": 0.6858057982619344, - "grad_norm": 0.7594008445739746, - "learning_rate": 1.5104249757915658e-05, - "loss": 0.3421, - "step": 7280 - }, - { - "epoch": 0.6859000023551023, - "grad_norm": 0.6797794103622437, - "learning_rate": 1.5102951201507792e-05, - "loss": 0.2652, - "step": 7281 - }, - { - "epoch": 0.6859942064482701, - "grad_norm": 0.8875043988227844, - "learning_rate": 1.510165252874475e-05, - "loss": 0.3433, - "step": 7282 - }, - { - "epoch": 0.686088410541438, - "grad_norm": 0.7918300032615662, - "learning_rate": 1.5100353739656139e-05, - "loss": 0.3706, - "step": 7283 - }, - { - "epoch": 0.6861826146346058, - "grad_norm": 0.7903196215629578, - "learning_rate": 1.5099054834271574e-05, - "loss": 0.3153, - "step": 7284 - }, - { - "epoch": 0.6862768187277737, - "grad_norm": 0.7844611406326294, - "learning_rate": 1.5097755812620679e-05, - "loss": 0.3556, - "step": 7285 - }, - { - "epoch": 0.6863710228209415, - "grad_norm": 0.7814464569091797, - "learning_rate": 1.5096456674733059e-05, - "loss": 0.3168, - "step": 7286 - }, - { - "epoch": 0.6864652269141094, - "grad_norm": 0.7347399592399597, - "learning_rate": 1.5095157420638349e-05, - "loss": 0.3406, - "step": 7287 - }, - { - "epoch": 0.6865594310072772, - "grad_norm": 0.7254319190979004, - "learning_rate": 1.509385805036617e-05, - "loss": 0.328, - "step": 7288 - }, - { - "epoch": 0.6866536351004451, - "grad_norm": 0.7397040128707886, - "learning_rate": 1.509255856394615e-05, - "loss": 0.2663, - "step": 7289 - }, - { - "epoch": 0.6867478391936129, - "grad_norm": 0.7593182325363159, - "learning_rate": 1.509125896140792e-05, - "loss": 0.3113, - "step": 7290 - }, - { - "epoch": 0.6868420432867808, - "grad_norm": 0.7546846270561218, - "learning_rate": 1.5089959242781109e-05, - "loss": 0.3033, - "step": 7291 - }, - { - "epoch": 0.6869362473799486, - "grad_norm": 0.6999562978744507, - "learning_rate": 1.5088659408095356e-05, - "loss": 0.3063, - "step": 7292 - }, - { - "epoch": 0.6870304514731165, - "grad_norm": 0.6722689867019653, - "learning_rate": 1.5087359457380299e-05, - "loss": 0.2836, - "step": 7293 - }, - { - "epoch": 0.6871246555662843, - "grad_norm": 0.7416934370994568, - "learning_rate": 1.5086059390665582e-05, - "loss": 0.3114, - "step": 7294 - }, - { - "epoch": 0.6872188596594522, - "grad_norm": 0.7684811949729919, - "learning_rate": 1.5084759207980844e-05, - "loss": 0.3342, - "step": 7295 - }, - { - "epoch": 0.68731306375262, - "grad_norm": 0.9004353880882263, - "learning_rate": 1.5083458909355728e-05, - "loss": 0.3416, - "step": 7296 - }, - { - "epoch": 0.6874072678457879, - "grad_norm": 0.7631049752235413, - "learning_rate": 1.508215849481989e-05, - "loss": 0.3257, - "step": 7297 - }, - { - "epoch": 0.6875014719389557, - "grad_norm": 0.8043209910392761, - "learning_rate": 1.5080857964402978e-05, - "loss": 0.3137, - "step": 7298 - }, - { - "epoch": 0.6875956760321236, - "grad_norm": 0.8755385279655457, - "learning_rate": 1.507955731813465e-05, - "loss": 0.3667, - "step": 7299 - }, - { - "epoch": 0.6876898801252914, - "grad_norm": 0.6808297038078308, - "learning_rate": 1.5078256556044557e-05, - "loss": 0.301, - "step": 7300 - }, - { - "epoch": 0.6877840842184593, - "grad_norm": 0.839055061340332, - "learning_rate": 1.5076955678162359e-05, - "loss": 0.3008, - "step": 7301 - }, - { - "epoch": 0.6878782883116271, - "grad_norm": 0.7584748864173889, - "learning_rate": 1.5075654684517722e-05, - "loss": 0.304, - "step": 7302 - }, - { - "epoch": 0.687972492404795, - "grad_norm": 0.7207547426223755, - "learning_rate": 1.5074353575140307e-05, - "loss": 0.307, - "step": 7303 - }, - { - "epoch": 0.6880666964979628, - "grad_norm": 0.8173823356628418, - "learning_rate": 1.5073052350059783e-05, - "loss": 0.3126, - "step": 7304 - }, - { - "epoch": 0.6881609005911307, - "grad_norm": 1.3754568099975586, - "learning_rate": 1.5071751009305824e-05, - "loss": 0.3187, - "step": 7305 - }, - { - "epoch": 0.6882551046842985, - "grad_norm": 0.9424414038658142, - "learning_rate": 1.5070449552908093e-05, - "loss": 0.3687, - "step": 7306 - }, - { - "epoch": 0.6883493087774664, - "grad_norm": 0.8139634728431702, - "learning_rate": 1.5069147980896272e-05, - "loss": 0.3351, - "step": 7307 - }, - { - "epoch": 0.6884435128706342, - "grad_norm": 0.6309536695480347, - "learning_rate": 1.506784629330004e-05, - "loss": 0.299, - "step": 7308 - }, - { - "epoch": 0.6885377169638021, - "grad_norm": 0.7700256705284119, - "learning_rate": 1.5066544490149068e-05, - "loss": 0.3735, - "step": 7309 - }, - { - "epoch": 0.6886319210569699, - "grad_norm": 0.7104466557502747, - "learning_rate": 1.5065242571473054e-05, - "loss": 0.3123, - "step": 7310 - }, - { - "epoch": 0.6887261251501378, - "grad_norm": 0.6783331036567688, - "learning_rate": 1.5063940537301673e-05, - "loss": 0.3288, - "step": 7311 - }, - { - "epoch": 0.6888203292433056, - "grad_norm": 0.6386809945106506, - "learning_rate": 1.5062638387664614e-05, - "loss": 0.2988, - "step": 7312 - }, - { - "epoch": 0.6889145333364735, - "grad_norm": 0.6671721935272217, - "learning_rate": 1.5061336122591574e-05, - "loss": 0.3652, - "step": 7313 - }, - { - "epoch": 0.6890087374296413, - "grad_norm": 0.9433803558349609, - "learning_rate": 1.5060033742112242e-05, - "loss": 0.3975, - "step": 7314 - }, - { - "epoch": 0.6891029415228092, - "grad_norm": 0.6702458262443542, - "learning_rate": 1.5058731246256315e-05, - "loss": 0.2997, - "step": 7315 - }, - { - "epoch": 0.689197145615977, - "grad_norm": 0.6700741052627563, - "learning_rate": 1.505742863505349e-05, - "loss": 0.2819, - "step": 7316 - }, - { - "epoch": 0.6892913497091449, - "grad_norm": 0.7831470370292664, - "learning_rate": 1.5056125908533472e-05, - "loss": 0.2924, - "step": 7317 - }, - { - "epoch": 0.6893855538023127, - "grad_norm": 0.7220161557197571, - "learning_rate": 1.5054823066725966e-05, - "loss": 0.3258, - "step": 7318 - }, - { - "epoch": 0.6894797578954805, - "grad_norm": 0.6773331761360168, - "learning_rate": 1.5053520109660674e-05, - "loss": 0.3416, - "step": 7319 - }, - { - "epoch": 0.6895739619886484, - "grad_norm": 0.7731783390045166, - "learning_rate": 1.5052217037367309e-05, - "loss": 0.3591, - "step": 7320 - }, - { - "epoch": 0.6896681660818162, - "grad_norm": 0.7107616662979126, - "learning_rate": 1.5050913849875582e-05, - "loss": 0.3189, - "step": 7321 - }, - { - "epoch": 0.6897623701749841, - "grad_norm": 0.9149519801139832, - "learning_rate": 1.5049610547215205e-05, - "loss": 0.3726, - "step": 7322 - }, - { - "epoch": 0.689856574268152, - "grad_norm": 0.7625017762184143, - "learning_rate": 1.5048307129415903e-05, - "loss": 0.3269, - "step": 7323 - }, - { - "epoch": 0.6899507783613198, - "grad_norm": 0.7767654061317444, - "learning_rate": 1.5047003596507388e-05, - "loss": 0.3448, - "step": 7324 - }, - { - "epoch": 0.6900449824544876, - "grad_norm": 0.6901801824569702, - "learning_rate": 1.5045699948519388e-05, - "loss": 0.3182, - "step": 7325 - }, - { - "epoch": 0.6901391865476555, - "grad_norm": 0.7489223480224609, - "learning_rate": 1.5044396185481622e-05, - "loss": 0.3471, - "step": 7326 - }, - { - "epoch": 0.6902333906408233, - "grad_norm": 0.7715135812759399, - "learning_rate": 1.5043092307423825e-05, - "loss": 0.3547, - "step": 7327 - }, - { - "epoch": 0.6903275947339912, - "grad_norm": 0.6974141001701355, - "learning_rate": 1.5041788314375722e-05, - "loss": 0.3095, - "step": 7328 - }, - { - "epoch": 0.690421798827159, - "grad_norm": 0.7149832248687744, - "learning_rate": 1.5040484206367049e-05, - "loss": 0.3247, - "step": 7329 - }, - { - "epoch": 0.6905160029203269, - "grad_norm": 0.8140485286712646, - "learning_rate": 1.5039179983427543e-05, - "loss": 0.3222, - "step": 7330 - }, - { - "epoch": 0.6906102070134947, - "grad_norm": 0.8052679896354675, - "learning_rate": 1.5037875645586937e-05, - "loss": 0.3354, - "step": 7331 - }, - { - "epoch": 0.6907044111066626, - "grad_norm": 0.9338856935501099, - "learning_rate": 1.5036571192874977e-05, - "loss": 0.3547, - "step": 7332 - }, - { - "epoch": 0.6907986151998304, - "grad_norm": 0.7552533149719238, - "learning_rate": 1.5035266625321403e-05, - "loss": 0.3038, - "step": 7333 - }, - { - "epoch": 0.6908928192929983, - "grad_norm": 0.9233847856521606, - "learning_rate": 1.5033961942955966e-05, - "loss": 0.3901, - "step": 7334 - }, - { - "epoch": 0.6909870233861661, - "grad_norm": 0.7393520474433899, - "learning_rate": 1.5032657145808409e-05, - "loss": 0.2992, - "step": 7335 - }, - { - "epoch": 0.691081227479334, - "grad_norm": 0.68341463804245, - "learning_rate": 1.5031352233908486e-05, - "loss": 0.3029, - "step": 7336 - }, - { - "epoch": 0.6911754315725018, - "grad_norm": 0.7289931178092957, - "learning_rate": 1.5030047207285949e-05, - "loss": 0.2857, - "step": 7337 - }, - { - "epoch": 0.6912696356656697, - "grad_norm": 0.6364055275917053, - "learning_rate": 1.502874206597056e-05, - "loss": 0.2769, - "step": 7338 - }, - { - "epoch": 0.6913638397588375, - "grad_norm": 0.7573159337043762, - "learning_rate": 1.5027436809992076e-05, - "loss": 0.3168, - "step": 7339 - }, - { - "epoch": 0.6914580438520054, - "grad_norm": 0.7118650078773499, - "learning_rate": 1.5026131439380254e-05, - "loss": 0.3248, - "step": 7340 - }, - { - "epoch": 0.6915522479451732, - "grad_norm": 0.7071848511695862, - "learning_rate": 1.5024825954164862e-05, - "loss": 0.3323, - "step": 7341 - }, - { - "epoch": 0.6916464520383411, - "grad_norm": 0.8578891754150391, - "learning_rate": 1.5023520354375669e-05, - "loss": 0.292, - "step": 7342 - }, - { - "epoch": 0.6917406561315089, - "grad_norm": 0.6838341951370239, - "learning_rate": 1.5022214640042444e-05, - "loss": 0.2881, - "step": 7343 - }, - { - "epoch": 0.6918348602246768, - "grad_norm": 0.6806995868682861, - "learning_rate": 1.5020908811194952e-05, - "loss": 0.3118, - "step": 7344 - }, - { - "epoch": 0.6919290643178446, - "grad_norm": 0.7868961095809937, - "learning_rate": 1.5019602867862983e-05, - "loss": 0.2921, - "step": 7345 - }, - { - "epoch": 0.6920232684110125, - "grad_norm": 0.8781580924987793, - "learning_rate": 1.5018296810076299e-05, - "loss": 0.3064, - "step": 7346 - }, - { - "epoch": 0.6921174725041803, - "grad_norm": 0.7523723840713501, - "learning_rate": 1.501699063786469e-05, - "loss": 0.3128, - "step": 7347 - }, - { - "epoch": 0.6922116765973482, - "grad_norm": 0.9119831919670105, - "learning_rate": 1.5015684351257935e-05, - "loss": 0.2864, - "step": 7348 - }, - { - "epoch": 0.692305880690516, - "grad_norm": 0.6982825994491577, - "learning_rate": 1.5014377950285818e-05, - "loss": 0.3008, - "step": 7349 - }, - { - "epoch": 0.6924000847836839, - "grad_norm": 0.6579086780548096, - "learning_rate": 1.501307143497813e-05, - "loss": 0.2484, - "step": 7350 - }, - { - "epoch": 0.6924942888768517, - "grad_norm": 0.8615376353263855, - "learning_rate": 1.5011764805364663e-05, - "loss": 0.3572, - "step": 7351 - }, - { - "epoch": 0.6925884929700196, - "grad_norm": 0.7610296010971069, - "learning_rate": 1.5010458061475203e-05, - "loss": 0.33, - "step": 7352 - }, - { - "epoch": 0.6926826970631874, - "grad_norm": 0.6841700673103333, - "learning_rate": 1.5009151203339556e-05, - "loss": 0.3228, - "step": 7353 - }, - { - "epoch": 0.6927769011563553, - "grad_norm": 0.7239129543304443, - "learning_rate": 1.500784423098751e-05, - "loss": 0.3231, - "step": 7354 - }, - { - "epoch": 0.6928711052495231, - "grad_norm": 0.7602190971374512, - "learning_rate": 1.5006537144448871e-05, - "loss": 0.3059, - "step": 7355 - }, - { - "epoch": 0.692965309342691, - "grad_norm": 1.425072193145752, - "learning_rate": 1.5005229943753445e-05, - "loss": 0.3425, - "step": 7356 - }, - { - "epoch": 0.6930595134358588, - "grad_norm": 0.6205198168754578, - "learning_rate": 1.500392262893103e-05, - "loss": 0.2982, - "step": 7357 - }, - { - "epoch": 0.6931537175290267, - "grad_norm": 0.7715321779251099, - "learning_rate": 1.5002615200011447e-05, - "loss": 0.3352, - "step": 7358 - }, - { - "epoch": 0.6932479216221945, - "grad_norm": 0.7210916876792908, - "learning_rate": 1.50013076570245e-05, - "loss": 0.3328, - "step": 7359 - }, - { - "epoch": 0.6933421257153624, - "grad_norm": 0.652855634689331, - "learning_rate": 1.5000000000000002e-05, - "loss": 0.3075, - "step": 7360 - }, - { - "epoch": 0.6934363298085302, - "grad_norm": 0.7345904111862183, - "learning_rate": 1.499869222896777e-05, - "loss": 0.3021, - "step": 7361 - }, - { - "epoch": 0.693530533901698, - "grad_norm": 0.7462295293807983, - "learning_rate": 1.4997384343957626e-05, - "loss": 0.3683, - "step": 7362 - }, - { - "epoch": 0.6936247379948659, - "grad_norm": 0.7322737574577332, - "learning_rate": 1.499607634499939e-05, - "loss": 0.3329, - "step": 7363 - }, - { - "epoch": 0.6937189420880338, - "grad_norm": 0.7705998420715332, - "learning_rate": 1.4994768232122887e-05, - "loss": 0.3383, - "step": 7364 - }, - { - "epoch": 0.6938131461812016, - "grad_norm": 0.7012845873832703, - "learning_rate": 1.4993460005357948e-05, - "loss": 0.322, - "step": 7365 - }, - { - "epoch": 0.6939073502743694, - "grad_norm": 0.6502741575241089, - "learning_rate": 1.499215166473439e-05, - "loss": 0.2975, - "step": 7366 - }, - { - "epoch": 0.6940015543675373, - "grad_norm": 0.6985697150230408, - "learning_rate": 1.4990843210282058e-05, - "loss": 0.3163, - "step": 7367 - }, - { - "epoch": 0.6940957584607051, - "grad_norm": 0.7667263150215149, - "learning_rate": 1.4989534642030783e-05, - "loss": 0.3301, - "step": 7368 - }, - { - "epoch": 0.694189962553873, - "grad_norm": 0.7916030287742615, - "learning_rate": 1.4988225960010398e-05, - "loss": 0.3283, - "step": 7369 - }, - { - "epoch": 0.6942841666470408, - "grad_norm": 0.681128740310669, - "learning_rate": 1.4986917164250751e-05, - "loss": 0.3103, - "step": 7370 - }, - { - "epoch": 0.6943783707402087, - "grad_norm": 0.7746285200119019, - "learning_rate": 1.498560825478168e-05, - "loss": 0.3573, - "step": 7371 - }, - { - "epoch": 0.6944725748333765, - "grad_norm": 0.8283178210258484, - "learning_rate": 1.4984299231633027e-05, - "loss": 0.2894, - "step": 7372 - }, - { - "epoch": 0.6945667789265444, - "grad_norm": 0.7182471752166748, - "learning_rate": 1.4982990094834647e-05, - "loss": 0.2693, - "step": 7373 - }, - { - "epoch": 0.6946609830197122, - "grad_norm": 1.1338497400283813, - "learning_rate": 1.4981680844416384e-05, - "loss": 0.3941, - "step": 7374 - }, - { - "epoch": 0.6947551871128801, - "grad_norm": 0.7589165568351746, - "learning_rate": 1.4980371480408094e-05, - "loss": 0.3794, - "step": 7375 - }, - { - "epoch": 0.6948493912060479, - "grad_norm": 0.7595975399017334, - "learning_rate": 1.4979062002839634e-05, - "loss": 0.3811, - "step": 7376 - }, - { - "epoch": 0.6949435952992158, - "grad_norm": 0.7198370695114136, - "learning_rate": 1.4977752411740855e-05, - "loss": 0.3589, - "step": 7377 - }, - { - "epoch": 0.6950377993923836, - "grad_norm": 0.6901041865348816, - "learning_rate": 1.4976442707141625e-05, - "loss": 0.2827, - "step": 7378 - }, - { - "epoch": 0.6951320034855515, - "grad_norm": 0.7376834154129028, - "learning_rate": 1.4975132889071808e-05, - "loss": 0.3498, - "step": 7379 - }, - { - "epoch": 0.6952262075787193, - "grad_norm": 0.8776727914810181, - "learning_rate": 1.4973822957561266e-05, - "loss": 0.3504, - "step": 7380 - }, - { - "epoch": 0.6953204116718872, - "grad_norm": 0.7100030183792114, - "learning_rate": 1.4972512912639864e-05, - "loss": 0.3353, - "step": 7381 - }, - { - "epoch": 0.695414615765055, - "grad_norm": 0.8575757145881653, - "learning_rate": 1.4971202754337482e-05, - "loss": 0.3404, - "step": 7382 - }, - { - "epoch": 0.6955088198582229, - "grad_norm": 0.7626842260360718, - "learning_rate": 1.496989248268399e-05, - "loss": 0.3046, - "step": 7383 - }, - { - "epoch": 0.6956030239513907, - "grad_norm": 0.7086209058761597, - "learning_rate": 1.4968582097709259e-05, - "loss": 0.3093, - "step": 7384 - }, - { - "epoch": 0.6956972280445586, - "grad_norm": 0.6811178922653198, - "learning_rate": 1.4967271599443176e-05, - "loss": 0.2899, - "step": 7385 - }, - { - "epoch": 0.6957914321377264, - "grad_norm": 1.1253029108047485, - "learning_rate": 1.4965960987915615e-05, - "loss": 0.3812, - "step": 7386 - }, - { - "epoch": 0.6958856362308943, - "grad_norm": 0.762640655040741, - "learning_rate": 1.4964650263156466e-05, - "loss": 0.3376, - "step": 7387 - }, - { - "epoch": 0.6959798403240621, - "grad_norm": 0.7089758515357971, - "learning_rate": 1.4963339425195611e-05, - "loss": 0.3254, - "step": 7388 - }, - { - "epoch": 0.69607404441723, - "grad_norm": 0.6183210015296936, - "learning_rate": 1.4962028474062941e-05, - "loss": 0.2624, - "step": 7389 - }, - { - "epoch": 0.6961682485103978, - "grad_norm": 0.8221989274024963, - "learning_rate": 1.496071740978835e-05, - "loss": 0.331, - "step": 7390 - }, - { - "epoch": 0.6962624526035657, - "grad_norm": 0.6767950057983398, - "learning_rate": 1.495940623240173e-05, - "loss": 0.3022, - "step": 7391 - }, - { - "epoch": 0.6963566566967335, - "grad_norm": 0.7246094346046448, - "learning_rate": 1.4958094941932977e-05, - "loss": 0.3315, - "step": 7392 - }, - { - "epoch": 0.6964508607899014, - "grad_norm": 0.8228355050086975, - "learning_rate": 1.4956783538411994e-05, - "loss": 0.3194, - "step": 7393 - }, - { - "epoch": 0.6965450648830692, - "grad_norm": 0.7368044853210449, - "learning_rate": 1.4955472021868676e-05, - "loss": 0.3127, - "step": 7394 - }, - { - "epoch": 0.6966392689762371, - "grad_norm": 0.6888823509216309, - "learning_rate": 1.4954160392332938e-05, - "loss": 0.2869, - "step": 7395 - }, - { - "epoch": 0.6967334730694049, - "grad_norm": 0.80766761302948, - "learning_rate": 1.4952848649834676e-05, - "loss": 0.3349, - "step": 7396 - }, - { - "epoch": 0.6968276771625728, - "grad_norm": 0.7352944612503052, - "learning_rate": 1.4951536794403805e-05, - "loss": 0.3244, - "step": 7397 - }, - { - "epoch": 0.6969218812557406, - "grad_norm": 0.7246667742729187, - "learning_rate": 1.495022482607024e-05, - "loss": 0.3178, - "step": 7398 - }, - { - "epoch": 0.6970160853489085, - "grad_norm": 0.601959764957428, - "learning_rate": 1.4948912744863892e-05, - "loss": 0.2314, - "step": 7399 - }, - { - "epoch": 0.6971102894420762, - "grad_norm": 0.6760597825050354, - "learning_rate": 1.4947600550814683e-05, - "loss": 0.3039, - "step": 7400 - }, - { - "epoch": 0.697204493535244, - "grad_norm": 0.7061398029327393, - "learning_rate": 1.4946288243952524e-05, - "loss": 0.3108, - "step": 7401 - }, - { - "epoch": 0.6972986976284119, - "grad_norm": 0.7306122779846191, - "learning_rate": 1.4944975824307348e-05, - "loss": 0.3273, - "step": 7402 - }, - { - "epoch": 0.6973929017215797, - "grad_norm": 0.8416723012924194, - "learning_rate": 1.4943663291909074e-05, - "loss": 0.3187, - "step": 7403 - }, - { - "epoch": 0.6974871058147476, - "grad_norm": 0.6917863488197327, - "learning_rate": 1.494235064678763e-05, - "loss": 0.3189, - "step": 7404 - }, - { - "epoch": 0.6975813099079154, - "grad_norm": 0.8066960573196411, - "learning_rate": 1.4941037888972953e-05, - "loss": 0.3375, - "step": 7405 - }, - { - "epoch": 0.6976755140010833, - "grad_norm": 0.6849872469902039, - "learning_rate": 1.4939725018494966e-05, - "loss": 0.3212, - "step": 7406 - }, - { - "epoch": 0.6977697180942511, - "grad_norm": 0.8231762647628784, - "learning_rate": 1.493841203538361e-05, - "loss": 0.3257, - "step": 7407 - }, - { - "epoch": 0.697863922187419, - "grad_norm": 0.7129079103469849, - "learning_rate": 1.4937098939668823e-05, - "loss": 0.3392, - "step": 7408 - }, - { - "epoch": 0.6979581262805868, - "grad_norm": 0.7407151460647583, - "learning_rate": 1.4935785731380543e-05, - "loss": 0.3138, - "step": 7409 - }, - { - "epoch": 0.6980523303737547, - "grad_norm": 0.7718027830123901, - "learning_rate": 1.4934472410548717e-05, - "loss": 0.2729, - "step": 7410 - }, - { - "epoch": 0.6981465344669225, - "grad_norm": 0.8110967874526978, - "learning_rate": 1.4933158977203288e-05, - "loss": 0.3182, - "step": 7411 - }, - { - "epoch": 0.6982407385600904, - "grad_norm": 0.6419220566749573, - "learning_rate": 1.4931845431374203e-05, - "loss": 0.2866, - "step": 7412 - }, - { - "epoch": 0.6983349426532582, - "grad_norm": 0.7150808572769165, - "learning_rate": 1.493053177309142e-05, - "loss": 0.3134, - "step": 7413 - }, - { - "epoch": 0.6984291467464261, - "grad_norm": 0.7292571067810059, - "learning_rate": 1.4929218002384884e-05, - "loss": 0.333, - "step": 7414 - }, - { - "epoch": 0.6985233508395939, - "grad_norm": 0.8620499968528748, - "learning_rate": 1.4927904119284552e-05, - "loss": 0.3092, - "step": 7415 - }, - { - "epoch": 0.6986175549327618, - "grad_norm": 0.766682505607605, - "learning_rate": 1.492659012382039e-05, - "loss": 0.326, - "step": 7416 - }, - { - "epoch": 0.6987117590259296, - "grad_norm": 0.6765879392623901, - "learning_rate": 1.492527601602235e-05, - "loss": 0.2916, - "step": 7417 - }, - { - "epoch": 0.6988059631190975, - "grad_norm": 0.762783944606781, - "learning_rate": 1.4923961795920402e-05, - "loss": 0.3447, - "step": 7418 - }, - { - "epoch": 0.6989001672122653, - "grad_norm": 0.7239778637886047, - "learning_rate": 1.492264746354451e-05, - "loss": 0.3173, - "step": 7419 - }, - { - "epoch": 0.6989943713054332, - "grad_norm": 0.7437894940376282, - "learning_rate": 1.4921333018924645e-05, - "loss": 0.3443, - "step": 7420 - }, - { - "epoch": 0.699088575398601, - "grad_norm": 1.151016354560852, - "learning_rate": 1.4920018462090771e-05, - "loss": 0.2907, - "step": 7421 - }, - { - "epoch": 0.6991827794917689, - "grad_norm": 0.6117764115333557, - "learning_rate": 1.4918703793072869e-05, - "loss": 0.2701, - "step": 7422 - }, - { - "epoch": 0.6992769835849367, - "grad_norm": 0.7207898497581482, - "learning_rate": 1.4917389011900913e-05, - "loss": 0.3036, - "step": 7423 - }, - { - "epoch": 0.6993711876781046, - "grad_norm": 0.843279242515564, - "learning_rate": 1.4916074118604887e-05, - "loss": 0.3656, - "step": 7424 - }, - { - "epoch": 0.6994653917712724, - "grad_norm": 0.9610438942909241, - "learning_rate": 1.4914759113214765e-05, - "loss": 0.3661, - "step": 7425 - }, - { - "epoch": 0.6995595958644403, - "grad_norm": 0.6298871040344238, - "learning_rate": 1.4913443995760533e-05, - "loss": 0.2975, - "step": 7426 - }, - { - "epoch": 0.6996537999576081, - "grad_norm": 0.7482747435569763, - "learning_rate": 1.491212876627218e-05, - "loss": 0.3257, - "step": 7427 - }, - { - "epoch": 0.699748004050776, - "grad_norm": 1.1633214950561523, - "learning_rate": 1.4910813424779695e-05, - "loss": 0.3578, - "step": 7428 - }, - { - "epoch": 0.6998422081439438, - "grad_norm": 0.6494668126106262, - "learning_rate": 1.4909497971313069e-05, - "loss": 0.2765, - "step": 7429 - }, - { - "epoch": 0.6999364122371117, - "grad_norm": 0.7671998739242554, - "learning_rate": 1.4908182405902296e-05, - "loss": 0.33, - "step": 7430 - }, - { - "epoch": 0.7000306163302795, - "grad_norm": 0.7501470446586609, - "learning_rate": 1.4906866728577373e-05, - "loss": 0.3344, - "step": 7431 - }, - { - "epoch": 0.7001248204234474, - "grad_norm": 3.6615259647369385, - "learning_rate": 1.49055509393683e-05, - "loss": 0.3136, - "step": 7432 - }, - { - "epoch": 0.7002190245166152, - "grad_norm": 0.8734865188598633, - "learning_rate": 1.4904235038305084e-05, - "loss": 0.3371, - "step": 7433 - }, - { - "epoch": 0.7003132286097831, - "grad_norm": 0.8595309853553772, - "learning_rate": 1.4902919025417718e-05, - "loss": 0.3185, - "step": 7434 - }, - { - "epoch": 0.7004074327029509, - "grad_norm": 0.6879934072494507, - "learning_rate": 1.490160290073622e-05, - "loss": 0.2974, - "step": 7435 - }, - { - "epoch": 0.7005016367961188, - "grad_norm": 0.7653011679649353, - "learning_rate": 1.4900286664290593e-05, - "loss": 0.3238, - "step": 7436 - }, - { - "epoch": 0.7005958408892866, - "grad_norm": 0.6484694480895996, - "learning_rate": 1.4898970316110852e-05, - "loss": 0.3273, - "step": 7437 - }, - { - "epoch": 0.7006900449824545, - "grad_norm": 0.6532779335975647, - "learning_rate": 1.4897653856227012e-05, - "loss": 0.3061, - "step": 7438 - }, - { - "epoch": 0.7007842490756223, - "grad_norm": 0.910477876663208, - "learning_rate": 1.4896337284669091e-05, - "loss": 0.2835, - "step": 7439 - }, - { - "epoch": 0.7008784531687902, - "grad_norm": 0.6949968338012695, - "learning_rate": 1.4895020601467107e-05, - "loss": 0.3215, - "step": 7440 - }, - { - "epoch": 0.700972657261958, - "grad_norm": 0.7668197154998779, - "learning_rate": 1.4893703806651081e-05, - "loss": 0.3319, - "step": 7441 - }, - { - "epoch": 0.7010668613551259, - "grad_norm": 0.6469756364822388, - "learning_rate": 1.4892386900251041e-05, - "loss": 0.3026, - "step": 7442 - }, - { - "epoch": 0.7011610654482937, - "grad_norm": 0.9849398732185364, - "learning_rate": 1.4891069882297013e-05, - "loss": 0.3075, - "step": 7443 - }, - { - "epoch": 0.7012552695414616, - "grad_norm": 0.787672221660614, - "learning_rate": 1.4889752752819029e-05, - "loss": 0.2772, - "step": 7444 - }, - { - "epoch": 0.7013494736346294, - "grad_norm": 0.859749436378479, - "learning_rate": 1.488843551184712e-05, - "loss": 0.331, - "step": 7445 - }, - { - "epoch": 0.7014436777277973, - "grad_norm": 0.747424840927124, - "learning_rate": 1.4887118159411317e-05, - "loss": 0.3107, - "step": 7446 - }, - { - "epoch": 0.7015378818209651, - "grad_norm": 0.7198090553283691, - "learning_rate": 1.4885800695541668e-05, - "loss": 0.3098, - "step": 7447 - }, - { - "epoch": 0.701632085914133, - "grad_norm": 0.7659379243850708, - "learning_rate": 1.4884483120268202e-05, - "loss": 0.3099, - "step": 7448 - }, - { - "epoch": 0.7017262900073008, - "grad_norm": 0.7211584448814392, - "learning_rate": 1.488316543362097e-05, - "loss": 0.3165, - "step": 7449 - }, - { - "epoch": 0.7018204941004687, - "grad_norm": 0.668483555316925, - "learning_rate": 1.4881847635630014e-05, - "loss": 0.2987, - "step": 7450 - }, - { - "epoch": 0.7019146981936365, - "grad_norm": 0.7471227645874023, - "learning_rate": 1.488052972632538e-05, - "loss": 0.3173, - "step": 7451 - }, - { - "epoch": 0.7020089022868043, - "grad_norm": 0.6653876900672913, - "learning_rate": 1.4879211705737121e-05, - "loss": 0.2566, - "step": 7452 - }, - { - "epoch": 0.7021031063799722, - "grad_norm": 0.6835277676582336, - "learning_rate": 1.4877893573895292e-05, - "loss": 0.2719, - "step": 7453 - }, - { - "epoch": 0.70219731047314, - "grad_norm": 0.6887438893318176, - "learning_rate": 1.4876575330829943e-05, - "loss": 0.3095, - "step": 7454 - }, - { - "epoch": 0.7022915145663079, - "grad_norm": 0.7306256890296936, - "learning_rate": 1.4875256976571135e-05, - "loss": 0.3151, - "step": 7455 - }, - { - "epoch": 0.7023857186594757, - "grad_norm": 0.7601445913314819, - "learning_rate": 1.487393851114893e-05, - "loss": 0.3356, - "step": 7456 - }, - { - "epoch": 0.7024799227526436, - "grad_norm": 0.7967188358306885, - "learning_rate": 1.4872619934593387e-05, - "loss": 0.3189, - "step": 7457 - }, - { - "epoch": 0.7025741268458114, - "grad_norm": 0.7820927500724792, - "learning_rate": 1.4871301246934574e-05, - "loss": 0.3021, - "step": 7458 - }, - { - "epoch": 0.7026683309389793, - "grad_norm": 0.7008146047592163, - "learning_rate": 1.486998244820256e-05, - "loss": 0.2988, - "step": 7459 - }, - { - "epoch": 0.7027625350321471, - "grad_norm": 0.7662867903709412, - "learning_rate": 1.4868663538427415e-05, - "loss": 0.3522, - "step": 7460 - }, - { - "epoch": 0.702856739125315, - "grad_norm": 0.9097483158111572, - "learning_rate": 1.4867344517639208e-05, - "loss": 0.3585, - "step": 7461 - }, - { - "epoch": 0.7029509432184828, - "grad_norm": 1.0366448163986206, - "learning_rate": 1.4866025385868024e-05, - "loss": 0.372, - "step": 7462 - }, - { - "epoch": 0.7030451473116507, - "grad_norm": 0.8925309777259827, - "learning_rate": 1.4864706143143931e-05, - "loss": 0.327, - "step": 7463 - }, - { - "epoch": 0.7031393514048185, - "grad_norm": 0.7557048201560974, - "learning_rate": 1.4863386789497019e-05, - "loss": 0.3275, - "step": 7464 - }, - { - "epoch": 0.7032335554979864, - "grad_norm": 0.7582609057426453, - "learning_rate": 1.4862067324957364e-05, - "loss": 0.3, - "step": 7465 - }, - { - "epoch": 0.7033277595911542, - "grad_norm": 0.9192906618118286, - "learning_rate": 1.4860747749555054e-05, - "loss": 0.3507, - "step": 7466 - }, - { - "epoch": 0.7034219636843221, - "grad_norm": 0.7212841510772705, - "learning_rate": 1.485942806332018e-05, - "loss": 0.3203, - "step": 7467 - }, - { - "epoch": 0.7035161677774899, - "grad_norm": 0.7245615124702454, - "learning_rate": 1.4858108266282831e-05, - "loss": 0.3122, - "step": 7468 - }, - { - "epoch": 0.7036103718706578, - "grad_norm": 0.7031428217887878, - "learning_rate": 1.4856788358473097e-05, - "loss": 0.3309, - "step": 7469 - }, - { - "epoch": 0.7037045759638256, - "grad_norm": 0.7170652747154236, - "learning_rate": 1.4855468339921082e-05, - "loss": 0.2901, - "step": 7470 - }, - { - "epoch": 0.7037987800569935, - "grad_norm": 0.7371145486831665, - "learning_rate": 1.4854148210656876e-05, - "loss": 0.3079, - "step": 7471 - }, - { - "epoch": 0.7038929841501613, - "grad_norm": 0.7147948145866394, - "learning_rate": 1.4852827970710584e-05, - "loss": 0.2954, - "step": 7472 - }, - { - "epoch": 0.7039871882433292, - "grad_norm": 0.7179796099662781, - "learning_rate": 1.4851507620112313e-05, - "loss": 0.3479, - "step": 7473 - }, - { - "epoch": 0.704081392336497, - "grad_norm": 0.7083032727241516, - "learning_rate": 1.4850187158892163e-05, - "loss": 0.3177, - "step": 7474 - }, - { - "epoch": 0.7041755964296649, - "grad_norm": 0.7225296497344971, - "learning_rate": 1.4848866587080245e-05, - "loss": 0.2992, - "step": 7475 - }, - { - "epoch": 0.7042698005228327, - "grad_norm": 0.6736642122268677, - "learning_rate": 1.4847545904706668e-05, - "loss": 0.3126, - "step": 7476 - }, - { - "epoch": 0.7043640046160006, - "grad_norm": 0.7275652885437012, - "learning_rate": 1.4846225111801546e-05, - "loss": 0.3571, - "step": 7477 - }, - { - "epoch": 0.7044582087091684, - "grad_norm": 0.8346661925315857, - "learning_rate": 1.4844904208395001e-05, - "loss": 0.3115, - "step": 7478 - }, - { - "epoch": 0.7045524128023363, - "grad_norm": 0.7387918829917908, - "learning_rate": 1.4843583194517145e-05, - "loss": 0.297, - "step": 7479 - }, - { - "epoch": 0.7046466168955041, - "grad_norm": 0.8029735088348389, - "learning_rate": 1.4842262070198104e-05, - "loss": 0.297, - "step": 7480 - }, - { - "epoch": 0.704740820988672, - "grad_norm": 0.7478859424591064, - "learning_rate": 1.4840940835467996e-05, - "loss": 0.3135, - "step": 7481 - }, - { - "epoch": 0.7048350250818398, - "grad_norm": 0.6728004217147827, - "learning_rate": 1.4839619490356953e-05, - "loss": 0.2493, - "step": 7482 - }, - { - "epoch": 0.7049292291750077, - "grad_norm": 0.7483137845993042, - "learning_rate": 1.4838298034895097e-05, - "loss": 0.3107, - "step": 7483 - }, - { - "epoch": 0.7050234332681755, - "grad_norm": 0.8089814782142639, - "learning_rate": 1.483697646911257e-05, - "loss": 0.3221, - "step": 7484 - }, - { - "epoch": 0.7051176373613434, - "grad_norm": 0.7896853685379028, - "learning_rate": 1.4835654793039493e-05, - "loss": 0.3284, - "step": 7485 - }, - { - "epoch": 0.7052118414545112, - "grad_norm": 1.2581260204315186, - "learning_rate": 1.483433300670601e-05, - "loss": 0.3294, - "step": 7486 - }, - { - "epoch": 0.7053060455476791, - "grad_norm": 0.732612669467926, - "learning_rate": 1.4833011110142257e-05, - "loss": 0.343, - "step": 7487 - }, - { - "epoch": 0.7054002496408469, - "grad_norm": 0.819580078125, - "learning_rate": 1.4831689103378378e-05, - "loss": 0.3698, - "step": 7488 - }, - { - "epoch": 0.7054944537340148, - "grad_norm": 0.69870924949646, - "learning_rate": 1.4830366986444513e-05, - "loss": 0.292, - "step": 7489 - }, - { - "epoch": 0.7055886578271826, - "grad_norm": 0.7503359317779541, - "learning_rate": 1.4829044759370814e-05, - "loss": 0.2922, - "step": 7490 - }, - { - "epoch": 0.7056828619203505, - "grad_norm": 0.9934459924697876, - "learning_rate": 1.4827722422187424e-05, - "loss": 0.3177, - "step": 7491 - }, - { - "epoch": 0.7057770660135183, - "grad_norm": 0.8618832230567932, - "learning_rate": 1.4826399974924495e-05, - "loss": 0.3352, - "step": 7492 - }, - { - "epoch": 0.7058712701066862, - "grad_norm": 0.7464866042137146, - "learning_rate": 1.4825077417612187e-05, - "loss": 0.3037, - "step": 7493 - }, - { - "epoch": 0.705965474199854, - "grad_norm": 0.7409743070602417, - "learning_rate": 1.4823754750280646e-05, - "loss": 0.3414, - "step": 7494 - }, - { - "epoch": 0.7060596782930219, - "grad_norm": 0.7519456148147583, - "learning_rate": 1.482243197296004e-05, - "loss": 0.3243, - "step": 7495 - }, - { - "epoch": 0.7061538823861897, - "grad_norm": 0.7602671384811401, - "learning_rate": 1.4821109085680528e-05, - "loss": 0.3124, - "step": 7496 - }, - { - "epoch": 0.7062480864793576, - "grad_norm": 0.7375609278678894, - "learning_rate": 1.4819786088472268e-05, - "loss": 0.2839, - "step": 7497 - }, - { - "epoch": 0.7063422905725254, - "grad_norm": 0.6524137258529663, - "learning_rate": 1.4818462981365435e-05, - "loss": 0.2732, - "step": 7498 - }, - { - "epoch": 0.7064364946656932, - "grad_norm": 0.8175925612449646, - "learning_rate": 1.4817139764390193e-05, - "loss": 0.3312, - "step": 7499 - }, - { - "epoch": 0.7065306987588611, - "grad_norm": 1.2809901237487793, - "learning_rate": 1.4815816437576716e-05, - "loss": 0.323, - "step": 7500 - }, - { - "epoch": 0.706624902852029, - "grad_norm": 0.7297874093055725, - "learning_rate": 1.4814493000955175e-05, - "loss": 0.3283, - "step": 7501 - }, - { - "epoch": 0.7067191069451968, - "grad_norm": 0.6727867126464844, - "learning_rate": 1.481316945455575e-05, - "loss": 0.294, - "step": 7502 - }, - { - "epoch": 0.7068133110383646, - "grad_norm": 0.7023646831512451, - "learning_rate": 1.4811845798408615e-05, - "loss": 0.3109, - "step": 7503 - }, - { - "epoch": 0.7069075151315325, - "grad_norm": 0.8942722082138062, - "learning_rate": 1.4810522032543956e-05, - "loss": 0.3004, - "step": 7504 - }, - { - "epoch": 0.7070017192247003, - "grad_norm": 0.7658995389938354, - "learning_rate": 1.4809198156991954e-05, - "loss": 0.2923, - "step": 7505 - }, - { - "epoch": 0.7070959233178682, - "grad_norm": 0.8051748275756836, - "learning_rate": 1.4807874171782795e-05, - "loss": 0.3487, - "step": 7506 - }, - { - "epoch": 0.707190127411036, - "grad_norm": 0.7508425712585449, - "learning_rate": 1.4806550076946673e-05, - "loss": 0.3322, - "step": 7507 - }, - { - "epoch": 0.7072843315042039, - "grad_norm": 0.7547079920768738, - "learning_rate": 1.4805225872513774e-05, - "loss": 0.2869, - "step": 7508 - }, - { - "epoch": 0.7073785355973717, - "grad_norm": 1.0497976541519165, - "learning_rate": 1.4803901558514291e-05, - "loss": 0.3009, - "step": 7509 - }, - { - "epoch": 0.7074727396905396, - "grad_norm": 0.7662302255630493, - "learning_rate": 1.4802577134978429e-05, - "loss": 0.3124, - "step": 7510 - }, - { - "epoch": 0.7075669437837074, - "grad_norm": 0.7299298644065857, - "learning_rate": 1.4801252601936375e-05, - "loss": 0.3711, - "step": 7511 - }, - { - "epoch": 0.7076611478768753, - "grad_norm": 0.7905606031417847, - "learning_rate": 1.4799927959418338e-05, - "loss": 0.2966, - "step": 7512 - }, - { - "epoch": 0.7077553519700431, - "grad_norm": 0.7510884404182434, - "learning_rate": 1.4798603207454524e-05, - "loss": 0.3529, - "step": 7513 - }, - { - "epoch": 0.707849556063211, - "grad_norm": 0.754071831703186, - "learning_rate": 1.479727834607513e-05, - "loss": 0.3493, - "step": 7514 - }, - { - "epoch": 0.7079437601563788, - "grad_norm": 0.7773962616920471, - "learning_rate": 1.4795953375310375e-05, - "loss": 0.3261, - "step": 7515 - }, - { - "epoch": 0.7080379642495467, - "grad_norm": 0.6722862720489502, - "learning_rate": 1.4794628295190466e-05, - "loss": 0.2935, - "step": 7516 - }, - { - "epoch": 0.7081321683427145, - "grad_norm": 0.6952345371246338, - "learning_rate": 1.4793303105745613e-05, - "loss": 0.2584, - "step": 7517 - }, - { - "epoch": 0.7082263724358824, - "grad_norm": 0.8048527240753174, - "learning_rate": 1.479197780700604e-05, - "loss": 0.3083, - "step": 7518 - }, - { - "epoch": 0.7083205765290502, - "grad_norm": 0.8349993824958801, - "learning_rate": 1.4790652399001962e-05, - "loss": 0.3216, - "step": 7519 - }, - { - "epoch": 0.7084147806222181, - "grad_norm": 0.8492235541343689, - "learning_rate": 1.4789326881763597e-05, - "loss": 0.3474, - "step": 7520 - }, - { - "epoch": 0.7085089847153859, - "grad_norm": 0.7065709829330444, - "learning_rate": 1.4788001255321174e-05, - "loss": 0.2937, - "step": 7521 - }, - { - "epoch": 0.7086031888085538, - "grad_norm": 0.8696237802505493, - "learning_rate": 1.4786675519704919e-05, - "loss": 0.3609, - "step": 7522 - }, - { - "epoch": 0.7086973929017216, - "grad_norm": 0.7493220567703247, - "learning_rate": 1.4785349674945058e-05, - "loss": 0.343, - "step": 7523 - }, - { - "epoch": 0.7087915969948895, - "grad_norm": 0.7842844128608704, - "learning_rate": 1.4784023721071825e-05, - "loss": 0.3472, - "step": 7524 - }, - { - "epoch": 0.7088858010880573, - "grad_norm": 0.7957137823104858, - "learning_rate": 1.478269765811545e-05, - "loss": 0.3187, - "step": 7525 - }, - { - "epoch": 0.7089800051812252, - "grad_norm": 0.7527223825454712, - "learning_rate": 1.4781371486106174e-05, - "loss": 0.2964, - "step": 7526 - }, - { - "epoch": 0.709074209274393, - "grad_norm": 0.8188031315803528, - "learning_rate": 1.4780045205074231e-05, - "loss": 0.3618, - "step": 7527 - }, - { - "epoch": 0.7091684133675609, - "grad_norm": 0.7470711469650269, - "learning_rate": 1.4778718815049868e-05, - "loss": 0.3106, - "step": 7528 - }, - { - "epoch": 0.7092626174607287, - "grad_norm": 0.8296229839324951, - "learning_rate": 1.4777392316063324e-05, - "loss": 0.3047, - "step": 7529 - }, - { - "epoch": 0.7093568215538966, - "grad_norm": 0.6684839725494385, - "learning_rate": 1.4776065708144848e-05, - "loss": 0.2667, - "step": 7530 - }, - { - "epoch": 0.7094510256470644, - "grad_norm": 0.7411872744560242, - "learning_rate": 1.4774738991324686e-05, - "loss": 0.3176, - "step": 7531 - }, - { - "epoch": 0.7095452297402323, - "grad_norm": 0.689228355884552, - "learning_rate": 1.477341216563309e-05, - "loss": 0.311, - "step": 7532 - }, - { - "epoch": 0.7096394338334001, - "grad_norm": 0.7025071978569031, - "learning_rate": 1.477208523110032e-05, - "loss": 0.3029, - "step": 7533 - }, - { - "epoch": 0.709733637926568, - "grad_norm": 0.6913065910339355, - "learning_rate": 1.4770758187756619e-05, - "loss": 0.3309, - "step": 7534 - }, - { - "epoch": 0.7098278420197358, - "grad_norm": 0.9382787942886353, - "learning_rate": 1.4769431035632258e-05, - "loss": 0.3401, - "step": 7535 - }, - { - "epoch": 0.7099220461129037, - "grad_norm": 0.8482984304428101, - "learning_rate": 1.4768103774757491e-05, - "loss": 0.3835, - "step": 7536 - }, - { - "epoch": 0.7100162502060715, - "grad_norm": 0.9082434773445129, - "learning_rate": 1.4766776405162584e-05, - "loss": 0.3392, - "step": 7537 - }, - { - "epoch": 0.7101104542992392, - "grad_norm": 0.7156729698181152, - "learning_rate": 1.4765448926877804e-05, - "loss": 0.334, - "step": 7538 - }, - { - "epoch": 0.7102046583924071, - "grad_norm": 0.6966708302497864, - "learning_rate": 1.476412133993342e-05, - "loss": 0.2901, - "step": 7539 - }, - { - "epoch": 0.7102988624855749, - "grad_norm": 0.8167742490768433, - "learning_rate": 1.4762793644359699e-05, - "loss": 0.3767, - "step": 7540 - }, - { - "epoch": 0.7103930665787428, - "grad_norm": 0.7934980988502502, - "learning_rate": 1.4761465840186919e-05, - "loss": 0.3093, - "step": 7541 - }, - { - "epoch": 0.7104872706719106, - "grad_norm": 0.8151422142982483, - "learning_rate": 1.476013792744535e-05, - "loss": 0.3036, - "step": 7542 - }, - { - "epoch": 0.7105814747650785, - "grad_norm": 0.7666172981262207, - "learning_rate": 1.4758809906165281e-05, - "loss": 0.3427, - "step": 7543 - }, - { - "epoch": 0.7106756788582463, - "grad_norm": 0.7245919108390808, - "learning_rate": 1.4757481776376983e-05, - "loss": 0.3168, - "step": 7544 - }, - { - "epoch": 0.7107698829514142, - "grad_norm": 0.8002803921699524, - "learning_rate": 1.4756153538110742e-05, - "loss": 0.2777, - "step": 7545 - }, - { - "epoch": 0.710864087044582, - "grad_norm": 0.7824395895004272, - "learning_rate": 1.4754825191396847e-05, - "loss": 0.3185, - "step": 7546 - }, - { - "epoch": 0.7109582911377499, - "grad_norm": 0.7184972167015076, - "learning_rate": 1.4753496736265582e-05, - "loss": 0.3018, - "step": 7547 - }, - { - "epoch": 0.7110524952309177, - "grad_norm": 0.8208193778991699, - "learning_rate": 1.4752168172747242e-05, - "loss": 0.3566, - "step": 7548 - }, - { - "epoch": 0.7111466993240856, - "grad_norm": 0.8324835300445557, - "learning_rate": 1.4750839500872116e-05, - "loss": 0.29, - "step": 7549 - }, - { - "epoch": 0.7112409034172534, - "grad_norm": 0.8856258392333984, - "learning_rate": 1.4749510720670506e-05, - "loss": 0.3169, - "step": 7550 - }, - { - "epoch": 0.7113351075104213, - "grad_norm": 0.7524356245994568, - "learning_rate": 1.4748181832172703e-05, - "loss": 0.3383, - "step": 7551 - }, - { - "epoch": 0.7114293116035891, - "grad_norm": 0.6797780990600586, - "learning_rate": 1.4746852835409009e-05, - "loss": 0.2675, - "step": 7552 - }, - { - "epoch": 0.711523515696757, - "grad_norm": 0.7996805310249329, - "learning_rate": 1.4745523730409734e-05, - "loss": 0.3425, - "step": 7553 - }, - { - "epoch": 0.7116177197899248, - "grad_norm": 0.7375102639198303, - "learning_rate": 1.4744194517205177e-05, - "loss": 0.2872, - "step": 7554 - }, - { - "epoch": 0.7117119238830927, - "grad_norm": 0.7449827194213867, - "learning_rate": 1.4742865195825649e-05, - "loss": 0.2979, - "step": 7555 - }, - { - "epoch": 0.7118061279762605, - "grad_norm": 0.7717209458351135, - "learning_rate": 1.4741535766301458e-05, - "loss": 0.3293, - "step": 7556 - }, - { - "epoch": 0.7119003320694284, - "grad_norm": 0.7713416814804077, - "learning_rate": 1.4740206228662917e-05, - "loss": 0.3874, - "step": 7557 - }, - { - "epoch": 0.7119945361625962, - "grad_norm": 0.8193777799606323, - "learning_rate": 1.4738876582940344e-05, - "loss": 0.323, - "step": 7558 - }, - { - "epoch": 0.7120887402557641, - "grad_norm": 0.7421199083328247, - "learning_rate": 1.4737546829164057e-05, - "loss": 0.3599, - "step": 7559 - }, - { - "epoch": 0.7121829443489319, - "grad_norm": 0.7332062125205994, - "learning_rate": 1.4736216967364379e-05, - "loss": 0.3498, - "step": 7560 - }, - { - "epoch": 0.7122771484420998, - "grad_norm": 0.7626232504844666, - "learning_rate": 1.4734886997571627e-05, - "loss": 0.3051, - "step": 7561 - }, - { - "epoch": 0.7123713525352676, - "grad_norm": 1.1218847036361694, - "learning_rate": 1.4733556919816126e-05, - "loss": 0.2813, - "step": 7562 - }, - { - "epoch": 0.7124655566284355, - "grad_norm": 0.7519378066062927, - "learning_rate": 1.4732226734128208e-05, - "loss": 0.3451, - "step": 7563 - }, - { - "epoch": 0.7125597607216033, - "grad_norm": 0.7056507468223572, - "learning_rate": 1.4730896440538203e-05, - "loss": 0.3118, - "step": 7564 - }, - { - "epoch": 0.7126539648147712, - "grad_norm": 0.77479487657547, - "learning_rate": 1.4729566039076444e-05, - "loss": 0.3676, - "step": 7565 - }, - { - "epoch": 0.712748168907939, - "grad_norm": 0.7520031929016113, - "learning_rate": 1.472823552977326e-05, - "loss": 0.3103, - "step": 7566 - }, - { - "epoch": 0.7128423730011069, - "grad_norm": 0.7875931859016418, - "learning_rate": 1.4726904912658997e-05, - "loss": 0.3242, - "step": 7567 - }, - { - "epoch": 0.7129365770942747, - "grad_norm": 0.772013247013092, - "learning_rate": 1.4725574187763991e-05, - "loss": 0.3427, - "step": 7568 - }, - { - "epoch": 0.7130307811874426, - "grad_norm": 0.7841171026229858, - "learning_rate": 1.4724243355118587e-05, - "loss": 0.3181, - "step": 7569 - }, - { - "epoch": 0.7131249852806104, - "grad_norm": 0.8140650987625122, - "learning_rate": 1.4722912414753127e-05, - "loss": 0.3338, - "step": 7570 - }, - { - "epoch": 0.7132191893737783, - "grad_norm": 0.7578203678131104, - "learning_rate": 1.4721581366697959e-05, - "loss": 0.3208, - "step": 7571 - }, - { - "epoch": 0.7133133934669461, - "grad_norm": 0.7792476415634155, - "learning_rate": 1.4720250210983434e-05, - "loss": 0.3746, - "step": 7572 - }, - { - "epoch": 0.713407597560114, - "grad_norm": 0.9261788129806519, - "learning_rate": 1.4718918947639907e-05, - "loss": 0.3038, - "step": 7573 - }, - { - "epoch": 0.7135018016532818, - "grad_norm": 0.8140808939933777, - "learning_rate": 1.4717587576697727e-05, - "loss": 0.3144, - "step": 7574 - }, - { - "epoch": 0.7135960057464497, - "grad_norm": 0.6864731907844543, - "learning_rate": 1.4716256098187254e-05, - "loss": 0.3152, - "step": 7575 - }, - { - "epoch": 0.7136902098396175, - "grad_norm": 0.8876187801361084, - "learning_rate": 1.471492451213885e-05, - "loss": 0.3289, - "step": 7576 - }, - { - "epoch": 0.7137844139327854, - "grad_norm": 0.7731730937957764, - "learning_rate": 1.4713592818582873e-05, - "loss": 0.3409, - "step": 7577 - }, - { - "epoch": 0.7138786180259532, - "grad_norm": 0.8989897966384888, - "learning_rate": 1.4712261017549692e-05, - "loss": 0.3412, - "step": 7578 - }, - { - "epoch": 0.713972822119121, - "grad_norm": 0.7042735815048218, - "learning_rate": 1.4710929109069674e-05, - "loss": 0.2827, - "step": 7579 - }, - { - "epoch": 0.7140670262122889, - "grad_norm": 0.7563043832778931, - "learning_rate": 1.4709597093173184e-05, - "loss": 0.3589, - "step": 7580 - }, - { - "epoch": 0.7141612303054568, - "grad_norm": 0.7748274207115173, - "learning_rate": 1.4708264969890598e-05, - "loss": 0.3215, - "step": 7581 - }, - { - "epoch": 0.7142554343986246, - "grad_norm": 0.6974570155143738, - "learning_rate": 1.4706932739252286e-05, - "loss": 0.2845, - "step": 7582 - }, - { - "epoch": 0.7143496384917924, - "grad_norm": 0.7702315449714661, - "learning_rate": 1.4705600401288632e-05, - "loss": 0.3051, - "step": 7583 - }, - { - "epoch": 0.7144438425849603, - "grad_norm": 0.7599518299102783, - "learning_rate": 1.4704267956030011e-05, - "loss": 0.3312, - "step": 7584 - }, - { - "epoch": 0.7145380466781281, - "grad_norm": 0.7633971571922302, - "learning_rate": 1.4702935403506804e-05, - "loss": 0.3311, - "step": 7585 - }, - { - "epoch": 0.714632250771296, - "grad_norm": 0.7205565571784973, - "learning_rate": 1.4701602743749397e-05, - "loss": 0.3354, - "step": 7586 - }, - { - "epoch": 0.7147264548644638, - "grad_norm": 0.7502606511116028, - "learning_rate": 1.4700269976788175e-05, - "loss": 0.3379, - "step": 7587 - }, - { - "epoch": 0.7148206589576317, - "grad_norm": 0.7521887421607971, - "learning_rate": 1.469893710265353e-05, - "loss": 0.3116, - "step": 7588 - }, - { - "epoch": 0.7149148630507995, - "grad_norm": 0.7757728099822998, - "learning_rate": 1.469760412137585e-05, - "loss": 0.2945, - "step": 7589 - }, - { - "epoch": 0.7150090671439674, - "grad_norm": 0.8067355751991272, - "learning_rate": 1.4696271032985536e-05, - "loss": 0.3169, - "step": 7590 - }, - { - "epoch": 0.7151032712371352, - "grad_norm": 0.8102144002914429, - "learning_rate": 1.4694937837512975e-05, - "loss": 0.3197, - "step": 7591 - }, - { - "epoch": 0.7151974753303031, - "grad_norm": 0.7835685610771179, - "learning_rate": 1.469360453498857e-05, - "loss": 0.3038, - "step": 7592 - }, - { - "epoch": 0.7152916794234709, - "grad_norm": 0.7127066254615784, - "learning_rate": 1.4692271125442727e-05, - "loss": 0.2981, - "step": 7593 - }, - { - "epoch": 0.7153858835166388, - "grad_norm": 0.7197777628898621, - "learning_rate": 1.4690937608905843e-05, - "loss": 0.3397, - "step": 7594 - }, - { - "epoch": 0.7154800876098066, - "grad_norm": 0.6157107949256897, - "learning_rate": 1.4689603985408324e-05, - "loss": 0.2464, - "step": 7595 - }, - { - "epoch": 0.7155742917029745, - "grad_norm": 0.8053467869758606, - "learning_rate": 1.4688270254980588e-05, - "loss": 0.3336, - "step": 7596 - }, - { - "epoch": 0.7156684957961423, - "grad_norm": 1.9933987855911255, - "learning_rate": 1.4686936417653032e-05, - "loss": 0.3317, - "step": 7597 - }, - { - "epoch": 0.7157626998893102, - "grad_norm": 0.6925097107887268, - "learning_rate": 1.4685602473456084e-05, - "loss": 0.3272, - "step": 7598 - }, - { - "epoch": 0.715856903982478, - "grad_norm": 0.6546260118484497, - "learning_rate": 1.4684268422420149e-05, - "loss": 0.2834, - "step": 7599 - }, - { - "epoch": 0.7159511080756459, - "grad_norm": 0.7096473574638367, - "learning_rate": 1.4682934264575655e-05, - "loss": 0.3105, - "step": 7600 - }, - { - "epoch": 0.7160453121688137, - "grad_norm": 0.8337863683700562, - "learning_rate": 1.4681599999953014e-05, - "loss": 0.3549, - "step": 7601 - }, - { - "epoch": 0.7161395162619816, - "grad_norm": 0.7449379563331604, - "learning_rate": 1.4680265628582649e-05, - "loss": 0.3275, - "step": 7602 - }, - { - "epoch": 0.7162337203551494, - "grad_norm": 0.6516759395599365, - "learning_rate": 1.4678931150494992e-05, - "loss": 0.2657, - "step": 7603 - }, - { - "epoch": 0.7163279244483173, - "grad_norm": 0.7168840765953064, - "learning_rate": 1.4677596565720472e-05, - "loss": 0.3175, - "step": 7604 - }, - { - "epoch": 0.7164221285414851, - "grad_norm": 0.7934387922286987, - "learning_rate": 1.4676261874289512e-05, - "loss": 0.3344, - "step": 7605 - }, - { - "epoch": 0.716516332634653, - "grad_norm": 0.6381176710128784, - "learning_rate": 1.4674927076232553e-05, - "loss": 0.2709, - "step": 7606 - }, - { - "epoch": 0.7166105367278208, - "grad_norm": 0.7143204808235168, - "learning_rate": 1.4673592171580025e-05, - "loss": 0.3133, - "step": 7607 - }, - { - "epoch": 0.7167047408209887, - "grad_norm": 0.7504051923751831, - "learning_rate": 1.4672257160362367e-05, - "loss": 0.3191, - "step": 7608 - }, - { - "epoch": 0.7167989449141565, - "grad_norm": 0.7678667902946472, - "learning_rate": 1.4670922042610018e-05, - "loss": 0.3171, - "step": 7609 - }, - { - "epoch": 0.7168931490073244, - "grad_norm": 0.8258387446403503, - "learning_rate": 1.4669586818353427e-05, - "loss": 0.4037, - "step": 7610 - }, - { - "epoch": 0.7169873531004922, - "grad_norm": 0.8272293210029602, - "learning_rate": 1.4668251487623033e-05, - "loss": 0.356, - "step": 7611 - }, - { - "epoch": 0.7170815571936601, - "grad_norm": 0.9082145690917969, - "learning_rate": 1.4666916050449285e-05, - "loss": 0.2972, - "step": 7612 - }, - { - "epoch": 0.7171757612868279, - "grad_norm": 0.9436091184616089, - "learning_rate": 1.4665580506862636e-05, - "loss": 0.3203, - "step": 7613 - }, - { - "epoch": 0.7172699653799958, - "grad_norm": 0.7845689058303833, - "learning_rate": 1.4664244856893532e-05, - "loss": 0.3522, - "step": 7614 - }, - { - "epoch": 0.7173641694731636, - "grad_norm": 0.7713825106620789, - "learning_rate": 1.4662909100572433e-05, - "loss": 0.298, - "step": 7615 - }, - { - "epoch": 0.7174583735663315, - "grad_norm": 0.7790085077285767, - "learning_rate": 1.4661573237929798e-05, - "loss": 0.3182, - "step": 7616 - }, - { - "epoch": 0.7175525776594993, - "grad_norm": 0.7305101752281189, - "learning_rate": 1.466023726899608e-05, - "loss": 0.3121, - "step": 7617 - }, - { - "epoch": 0.7176467817526672, - "grad_norm": 0.7902939319610596, - "learning_rate": 1.4658901193801747e-05, - "loss": 0.3211, - "step": 7618 - }, - { - "epoch": 0.717740985845835, - "grad_norm": 0.7859717011451721, - "learning_rate": 1.4657565012377263e-05, - "loss": 0.3065, - "step": 7619 - }, - { - "epoch": 0.7178351899390029, - "grad_norm": 0.8295626044273376, - "learning_rate": 1.4656228724753093e-05, - "loss": 0.2733, - "step": 7620 - }, - { - "epoch": 0.7179293940321707, - "grad_norm": 0.7178802490234375, - "learning_rate": 1.4654892330959707e-05, - "loss": 0.289, - "step": 7621 - }, - { - "epoch": 0.7180235981253386, - "grad_norm": 0.8516690135002136, - "learning_rate": 1.4653555831027575e-05, - "loss": 0.316, - "step": 7622 - }, - { - "epoch": 0.7181178022185064, - "grad_norm": 0.7444583773612976, - "learning_rate": 1.4652219224987174e-05, - "loss": 0.2749, - "step": 7623 - }, - { - "epoch": 0.7182120063116743, - "grad_norm": 0.7659174799919128, - "learning_rate": 1.4650882512868983e-05, - "loss": 0.3405, - "step": 7624 - }, - { - "epoch": 0.7183062104048421, - "grad_norm": 5.310520648956299, - "learning_rate": 1.4649545694703476e-05, - "loss": 0.3339, - "step": 7625 - }, - { - "epoch": 0.71840041449801, - "grad_norm": 0.756412148475647, - "learning_rate": 1.4648208770521134e-05, - "loss": 0.366, - "step": 7626 - }, - { - "epoch": 0.7184946185911778, - "grad_norm": 0.7343206405639648, - "learning_rate": 1.4646871740352444e-05, - "loss": 0.3252, - "step": 7627 - }, - { - "epoch": 0.7185888226843457, - "grad_norm": 0.6697603464126587, - "learning_rate": 1.4645534604227894e-05, - "loss": 0.3041, - "step": 7628 - }, - { - "epoch": 0.7186830267775135, - "grad_norm": 0.665007472038269, - "learning_rate": 1.4644197362177967e-05, - "loss": 0.2974, - "step": 7629 - }, - { - "epoch": 0.7187772308706813, - "grad_norm": 0.6484889388084412, - "learning_rate": 1.4642860014233164e-05, - "loss": 0.2805, - "step": 7630 - }, - { - "epoch": 0.7188714349638492, - "grad_norm": 0.8215962648391724, - "learning_rate": 1.4641522560423966e-05, - "loss": 0.3275, - "step": 7631 - }, - { - "epoch": 0.718965639057017, - "grad_norm": 0.8847222328186035, - "learning_rate": 1.464018500078088e-05, - "loss": 0.3599, - "step": 7632 - }, - { - "epoch": 0.7190598431501849, - "grad_norm": 0.8452063202857971, - "learning_rate": 1.4638847335334399e-05, - "loss": 0.342, - "step": 7633 - }, - { - "epoch": 0.7191540472433527, - "grad_norm": 0.8655790686607361, - "learning_rate": 1.4637509564115022e-05, - "loss": 0.397, - "step": 7634 - }, - { - "epoch": 0.7192482513365206, - "grad_norm": 0.6889362931251526, - "learning_rate": 1.4636171687153255e-05, - "loss": 0.2995, - "step": 7635 - }, - { - "epoch": 0.7193424554296884, - "grad_norm": 1.0323363542556763, - "learning_rate": 1.4634833704479607e-05, - "loss": 0.3485, - "step": 7636 - }, - { - "epoch": 0.7194366595228563, - "grad_norm": 0.6997215747833252, - "learning_rate": 1.463349561612458e-05, - "loss": 0.2629, - "step": 7637 - }, - { - "epoch": 0.7195308636160241, - "grad_norm": 0.7511755228042603, - "learning_rate": 1.4632157422118687e-05, - "loss": 0.3361, - "step": 7638 - }, - { - "epoch": 0.719625067709192, - "grad_norm": 0.7879907488822937, - "learning_rate": 1.4630819122492444e-05, - "loss": 0.2914, - "step": 7639 - }, - { - "epoch": 0.7197192718023598, - "grad_norm": 0.837253212928772, - "learning_rate": 1.4629480717276361e-05, - "loss": 0.334, - "step": 7640 - }, - { - "epoch": 0.7198134758955277, - "grad_norm": 0.7338079810142517, - "learning_rate": 1.4628142206500959e-05, - "loss": 0.2808, - "step": 7641 - }, - { - "epoch": 0.7199076799886955, - "grad_norm": 0.7047969698905945, - "learning_rate": 1.4626803590196754e-05, - "loss": 0.3102, - "step": 7642 - }, - { - "epoch": 0.7200018840818634, - "grad_norm": 0.6892857551574707, - "learning_rate": 1.4625464868394275e-05, - "loss": 0.3119, - "step": 7643 - }, - { - "epoch": 0.7200960881750312, - "grad_norm": 0.8218495845794678, - "learning_rate": 1.4624126041124043e-05, - "loss": 0.3265, - "step": 7644 - }, - { - "epoch": 0.7201902922681991, - "grad_norm": 0.7993860244750977, - "learning_rate": 1.4622787108416585e-05, - "loss": 0.3039, - "step": 7645 - }, - { - "epoch": 0.7202844963613669, - "grad_norm": 0.7169907093048096, - "learning_rate": 1.462144807030243e-05, - "loss": 0.3169, - "step": 7646 - }, - { - "epoch": 0.7203787004545348, - "grad_norm": 0.704494833946228, - "learning_rate": 1.4620108926812115e-05, - "loss": 0.3111, - "step": 7647 - }, - { - "epoch": 0.7204729045477026, - "grad_norm": 0.7978274822235107, - "learning_rate": 1.461876967797617e-05, - "loss": 0.3151, - "step": 7648 - }, - { - "epoch": 0.7205671086408705, - "grad_norm": 0.5734168291091919, - "learning_rate": 1.4617430323825133e-05, - "loss": 0.2606, - "step": 7649 - }, - { - "epoch": 0.7206613127340383, - "grad_norm": 0.6860144734382629, - "learning_rate": 1.4616090864389545e-05, - "loss": 0.2641, - "step": 7650 - }, - { - "epoch": 0.7207555168272062, - "grad_norm": 0.6949218511581421, - "learning_rate": 1.4614751299699946e-05, - "loss": 0.3033, - "step": 7651 - }, - { - "epoch": 0.720849720920374, - "grad_norm": 0.7663481831550598, - "learning_rate": 1.461341162978688e-05, - "loss": 0.3392, - "step": 7652 - }, - { - "epoch": 0.7209439250135419, - "grad_norm": 0.7189931273460388, - "learning_rate": 1.4612071854680893e-05, - "loss": 0.3315, - "step": 7653 - }, - { - "epoch": 0.7210381291067097, - "grad_norm": 0.7467421293258667, - "learning_rate": 1.4610731974412535e-05, - "loss": 0.2719, - "step": 7654 - }, - { - "epoch": 0.7211323331998776, - "grad_norm": 0.7532447576522827, - "learning_rate": 1.460939198901236e-05, - "loss": 0.321, - "step": 7655 - }, - { - "epoch": 0.7212265372930454, - "grad_norm": 0.8318077921867371, - "learning_rate": 1.4608051898510918e-05, - "loss": 0.324, - "step": 7656 - }, - { - "epoch": 0.7213207413862133, - "grad_norm": 0.9194123148918152, - "learning_rate": 1.4606711702938765e-05, - "loss": 0.318, - "step": 7657 - }, - { - "epoch": 0.7214149454793811, - "grad_norm": 0.8693779110908508, - "learning_rate": 1.4605371402326462e-05, - "loss": 0.3017, - "step": 7658 - }, - { - "epoch": 0.721509149572549, - "grad_norm": 1.0532331466674805, - "learning_rate": 1.4604030996704568e-05, - "loss": 0.2838, - "step": 7659 - }, - { - "epoch": 0.7216033536657168, - "grad_norm": 0.7592741847038269, - "learning_rate": 1.4602690486103648e-05, - "loss": 0.3202, - "step": 7660 - }, - { - "epoch": 0.7216975577588847, - "grad_norm": 0.907318115234375, - "learning_rate": 1.4601349870554264e-05, - "loss": 0.3529, - "step": 7661 - }, - { - "epoch": 0.7217917618520525, - "grad_norm": 0.8160556554794312, - "learning_rate": 1.460000915008699e-05, - "loss": 0.3145, - "step": 7662 - }, - { - "epoch": 0.7218859659452204, - "grad_norm": 0.8162074089050293, - "learning_rate": 1.4598668324732392e-05, - "loss": 0.3461, - "step": 7663 - }, - { - "epoch": 0.7219801700383882, - "grad_norm": 0.7328223586082458, - "learning_rate": 1.4597327394521044e-05, - "loss": 0.3273, - "step": 7664 - }, - { - "epoch": 0.7220743741315561, - "grad_norm": 0.6975407600402832, - "learning_rate": 1.4595986359483523e-05, - "loss": 0.3584, - "step": 7665 - }, - { - "epoch": 0.7221685782247239, - "grad_norm": 0.7907822132110596, - "learning_rate": 1.45946452196504e-05, - "loss": 0.3562, - "step": 7666 - }, - { - "epoch": 0.7222627823178918, - "grad_norm": 1.0598143339157104, - "learning_rate": 1.4593303975052266e-05, - "loss": 0.3115, - "step": 7667 - }, - { - "epoch": 0.7223569864110596, - "grad_norm": 0.7048850655555725, - "learning_rate": 1.4591962625719696e-05, - "loss": 0.2968, - "step": 7668 - }, - { - "epoch": 0.7224511905042275, - "grad_norm": 0.9282791614532471, - "learning_rate": 1.4590621171683275e-05, - "loss": 0.3706, - "step": 7669 - }, - { - "epoch": 0.7225453945973953, - "grad_norm": 1.0115927457809448, - "learning_rate": 1.4589279612973597e-05, - "loss": 0.375, - "step": 7670 - }, - { - "epoch": 0.7226395986905632, - "grad_norm": 0.69843590259552, - "learning_rate": 1.458793794962124e-05, - "loss": 0.3144, - "step": 7671 - }, - { - "epoch": 0.722733802783731, - "grad_norm": 0.6993424892425537, - "learning_rate": 1.4586596181656804e-05, - "loss": 0.2873, - "step": 7672 - }, - { - "epoch": 0.7228280068768989, - "grad_norm": 0.7413098216056824, - "learning_rate": 1.4585254309110886e-05, - "loss": 0.2961, - "step": 7673 - }, - { - "epoch": 0.7229222109700667, - "grad_norm": 0.7417903542518616, - "learning_rate": 1.4583912332014071e-05, - "loss": 0.2996, - "step": 7674 - }, - { - "epoch": 0.7230164150632346, - "grad_norm": 0.7722974419593811, - "learning_rate": 1.4582570250396972e-05, - "loss": 0.3328, - "step": 7675 - }, - { - "epoch": 0.7231106191564024, - "grad_norm": 0.8128108978271484, - "learning_rate": 1.4581228064290182e-05, - "loss": 0.2928, - "step": 7676 - }, - { - "epoch": 0.7232048232495701, - "grad_norm": 0.8373945355415344, - "learning_rate": 1.4579885773724305e-05, - "loss": 0.3435, - "step": 7677 - }, - { - "epoch": 0.723299027342738, - "grad_norm": 0.8415419459342957, - "learning_rate": 1.4578543378729954e-05, - "loss": 0.3078, - "step": 7678 - }, - { - "epoch": 0.7233932314359058, - "grad_norm": 0.8667798638343811, - "learning_rate": 1.457720087933773e-05, - "loss": 0.3105, - "step": 7679 - }, - { - "epoch": 0.7234874355290737, - "grad_norm": 0.7439168095588684, - "learning_rate": 1.4575858275578248e-05, - "loss": 0.2982, - "step": 7680 - }, - { - "epoch": 0.7235816396222415, - "grad_norm": 0.6972314119338989, - "learning_rate": 1.4574515567482122e-05, - "loss": 0.2696, - "step": 7681 - }, - { - "epoch": 0.7236758437154094, - "grad_norm": 0.8637144565582275, - "learning_rate": 1.4573172755079963e-05, - "loss": 0.3105, - "step": 7682 - }, - { - "epoch": 0.7237700478085772, - "grad_norm": 0.7399028539657593, - "learning_rate": 1.4571829838402394e-05, - "loss": 0.3514, - "step": 7683 - }, - { - "epoch": 0.7238642519017451, - "grad_norm": 0.7735146284103394, - "learning_rate": 1.4570486817480036e-05, - "loss": 0.3345, - "step": 7684 - }, - { - "epoch": 0.7239584559949129, - "grad_norm": 0.7760829329490662, - "learning_rate": 1.456914369234351e-05, - "loss": 0.3542, - "step": 7685 - }, - { - "epoch": 0.7240526600880808, - "grad_norm": 0.7597216367721558, - "learning_rate": 1.4567800463023438e-05, - "loss": 0.3513, - "step": 7686 - }, - { - "epoch": 0.7241468641812486, - "grad_norm": 0.631736695766449, - "learning_rate": 1.4566457129550453e-05, - "loss": 0.283, - "step": 7687 - }, - { - "epoch": 0.7242410682744165, - "grad_norm": 0.7289599180221558, - "learning_rate": 1.4565113691955187e-05, - "loss": 0.2733, - "step": 7688 - }, - { - "epoch": 0.7243352723675843, - "grad_norm": 0.8164040446281433, - "learning_rate": 1.4563770150268264e-05, - "loss": 0.3156, - "step": 7689 - }, - { - "epoch": 0.7244294764607522, - "grad_norm": 0.7564865350723267, - "learning_rate": 1.4562426504520326e-05, - "loss": 0.3268, - "step": 7690 - }, - { - "epoch": 0.72452368055392, - "grad_norm": 0.6564485430717468, - "learning_rate": 1.4561082754742007e-05, - "loss": 0.3047, - "step": 7691 - }, - { - "epoch": 0.7246178846470879, - "grad_norm": 0.7447567582130432, - "learning_rate": 1.4559738900963946e-05, - "loss": 0.2957, - "step": 7692 - }, - { - "epoch": 0.7247120887402557, - "grad_norm": 0.8177298307418823, - "learning_rate": 1.4558394943216789e-05, - "loss": 0.3118, - "step": 7693 - }, - { - "epoch": 0.7248062928334236, - "grad_norm": 0.6390464901924133, - "learning_rate": 1.4557050881531174e-05, - "loss": 0.2806, - "step": 7694 - }, - { - "epoch": 0.7249004969265914, - "grad_norm": 0.7485120892524719, - "learning_rate": 1.4555706715937755e-05, - "loss": 0.2938, - "step": 7695 - }, - { - "epoch": 0.7249947010197593, - "grad_norm": 0.7831445336341858, - "learning_rate": 1.4554362446467175e-05, - "loss": 0.3013, - "step": 7696 - }, - { - "epoch": 0.7250889051129271, - "grad_norm": 0.7116540670394897, - "learning_rate": 1.4553018073150088e-05, - "loss": 0.325, - "step": 7697 - }, - { - "epoch": 0.725183109206095, - "grad_norm": 0.7577566504478455, - "learning_rate": 1.455167359601715e-05, - "loss": 0.2896, - "step": 7698 - }, - { - "epoch": 0.7252773132992628, - "grad_norm": 0.8203549981117249, - "learning_rate": 1.4550329015099012e-05, - "loss": 0.3543, - "step": 7699 - }, - { - "epoch": 0.7253715173924307, - "grad_norm": 0.9177870154380798, - "learning_rate": 1.4548984330426337e-05, - "loss": 0.3804, - "step": 7700 - }, - { - "epoch": 0.7254657214855985, - "grad_norm": 0.6643106937408447, - "learning_rate": 1.4547639542029784e-05, - "loss": 0.3008, - "step": 7701 - }, - { - "epoch": 0.7255599255787664, - "grad_norm": 1.296820044517517, - "learning_rate": 1.4546294649940014e-05, - "loss": 0.2951, - "step": 7702 - }, - { - "epoch": 0.7256541296719342, - "grad_norm": 0.7863390445709229, - "learning_rate": 1.4544949654187697e-05, - "loss": 0.2886, - "step": 7703 - }, - { - "epoch": 0.7257483337651021, - "grad_norm": 0.8272736668586731, - "learning_rate": 1.4543604554803499e-05, - "loss": 0.3391, - "step": 7704 - }, - { - "epoch": 0.7258425378582699, - "grad_norm": 0.7474284768104553, - "learning_rate": 1.4542259351818092e-05, - "loss": 0.3191, - "step": 7705 - }, - { - "epoch": 0.7259367419514378, - "grad_norm": 0.8520966172218323, - "learning_rate": 1.4540914045262142e-05, - "loss": 0.3483, - "step": 7706 - }, - { - "epoch": 0.7260309460446056, - "grad_norm": 0.83542799949646, - "learning_rate": 1.4539568635166332e-05, - "loss": 0.3682, - "step": 7707 - }, - { - "epoch": 0.7261251501377735, - "grad_norm": 0.787520170211792, - "learning_rate": 1.4538223121561338e-05, - "loss": 0.286, - "step": 7708 - }, - { - "epoch": 0.7262193542309413, - "grad_norm": 0.7447176575660706, - "learning_rate": 1.4536877504477836e-05, - "loss": 0.3206, - "step": 7709 - }, - { - "epoch": 0.7263135583241092, - "grad_norm": 0.6937974691390991, - "learning_rate": 1.4535531783946513e-05, - "loss": 0.3101, - "step": 7710 - }, - { - "epoch": 0.726407762417277, - "grad_norm": 0.6858431696891785, - "learning_rate": 1.4534185959998048e-05, - "loss": 0.3008, - "step": 7711 - }, - { - "epoch": 0.7265019665104449, - "grad_norm": 0.6721896529197693, - "learning_rate": 1.453284003266313e-05, - "loss": 0.295, - "step": 7712 - }, - { - "epoch": 0.7265961706036127, - "grad_norm": 0.8485425114631653, - "learning_rate": 1.4531494001972455e-05, - "loss": 0.2685, - "step": 7713 - }, - { - "epoch": 0.7266903746967805, - "grad_norm": 0.8477091193199158, - "learning_rate": 1.4530147867956703e-05, - "loss": 0.3375, - "step": 7714 - }, - { - "epoch": 0.7267845787899484, - "grad_norm": 0.6675555109977722, - "learning_rate": 1.4528801630646577e-05, - "loss": 0.3135, - "step": 7715 - }, - { - "epoch": 0.7268787828831162, - "grad_norm": 0.6150081157684326, - "learning_rate": 1.4527455290072767e-05, - "loss": 0.2665, - "step": 7716 - }, - { - "epoch": 0.7269729869762841, - "grad_norm": 0.7910028696060181, - "learning_rate": 1.4526108846265976e-05, - "loss": 0.3216, - "step": 7717 - }, - { - "epoch": 0.727067191069452, - "grad_norm": 0.8232051730155945, - "learning_rate": 1.4524762299256904e-05, - "loss": 0.3288, - "step": 7718 - }, - { - "epoch": 0.7271613951626198, - "grad_norm": 0.8236272931098938, - "learning_rate": 1.4523415649076253e-05, - "loss": 0.2982, - "step": 7719 - }, - { - "epoch": 0.7272555992557876, - "grad_norm": 0.8505282402038574, - "learning_rate": 1.452206889575473e-05, - "loss": 0.3502, - "step": 7720 - }, - { - "epoch": 0.7273498033489555, - "grad_norm": 0.7677298188209534, - "learning_rate": 1.4520722039323045e-05, - "loss": 0.327, - "step": 7721 - }, - { - "epoch": 0.7274440074421233, - "grad_norm": 0.7097388505935669, - "learning_rate": 1.4519375079811902e-05, - "loss": 0.3, - "step": 7722 - }, - { - "epoch": 0.7275382115352912, - "grad_norm": 0.6577833890914917, - "learning_rate": 1.4518028017252019e-05, - "loss": 0.2954, - "step": 7723 - }, - { - "epoch": 0.727632415628459, - "grad_norm": 1.1290863752365112, - "learning_rate": 1.4516680851674112e-05, - "loss": 0.3226, - "step": 7724 - }, - { - "epoch": 0.7277266197216269, - "grad_norm": 0.8429285883903503, - "learning_rate": 1.4515333583108896e-05, - "loss": 0.3298, - "step": 7725 - }, - { - "epoch": 0.7278208238147947, - "grad_norm": 0.7749203443527222, - "learning_rate": 1.4513986211587087e-05, - "loss": 0.3388, - "step": 7726 - }, - { - "epoch": 0.7279150279079626, - "grad_norm": 0.9690777659416199, - "learning_rate": 1.4512638737139417e-05, - "loss": 0.3394, - "step": 7727 - }, - { - "epoch": 0.7280092320011304, - "grad_norm": 0.7415863275527954, - "learning_rate": 1.45112911597966e-05, - "loss": 0.3133, - "step": 7728 - }, - { - "epoch": 0.7281034360942983, - "grad_norm": 0.6323605179786682, - "learning_rate": 1.4509943479589373e-05, - "loss": 0.2861, - "step": 7729 - }, - { - "epoch": 0.7281976401874661, - "grad_norm": 0.7535349130630493, - "learning_rate": 1.4508595696548457e-05, - "loss": 0.3172, - "step": 7730 - }, - { - "epoch": 0.728291844280634, - "grad_norm": 0.7147517204284668, - "learning_rate": 1.4507247810704586e-05, - "loss": 0.3317, - "step": 7731 - }, - { - "epoch": 0.7283860483738018, - "grad_norm": 0.644232451915741, - "learning_rate": 1.4505899822088494e-05, - "loss": 0.2731, - "step": 7732 - }, - { - "epoch": 0.7284802524669697, - "grad_norm": 0.7214465737342834, - "learning_rate": 1.450455173073092e-05, - "loss": 0.3029, - "step": 7733 - }, - { - "epoch": 0.7285744565601375, - "grad_norm": 0.7252299189567566, - "learning_rate": 1.4503203536662596e-05, - "loss": 0.3139, - "step": 7734 - }, - { - "epoch": 0.7286686606533054, - "grad_norm": 0.6736034750938416, - "learning_rate": 1.450185523991427e-05, - "loss": 0.2972, - "step": 7735 - }, - { - "epoch": 0.7287628647464732, - "grad_norm": 0.741563081741333, - "learning_rate": 1.4500506840516683e-05, - "loss": 0.334, - "step": 7736 - }, - { - "epoch": 0.7288570688396411, - "grad_norm": 0.8350456953048706, - "learning_rate": 1.4499158338500578e-05, - "loss": 0.3538, - "step": 7737 - }, - { - "epoch": 0.7289512729328089, - "grad_norm": 0.6199936866760254, - "learning_rate": 1.4497809733896708e-05, - "loss": 0.2771, - "step": 7738 - }, - { - "epoch": 0.7290454770259768, - "grad_norm": 0.7002807259559631, - "learning_rate": 1.4496461026735815e-05, - "loss": 0.3028, - "step": 7739 - }, - { - "epoch": 0.7291396811191446, - "grad_norm": 0.8033571243286133, - "learning_rate": 1.449511221704866e-05, - "loss": 0.3532, - "step": 7740 - }, - { - "epoch": 0.7292338852123125, - "grad_norm": 0.9143756628036499, - "learning_rate": 1.4493763304865995e-05, - "loss": 0.3265, - "step": 7741 - }, - { - "epoch": 0.7293280893054803, - "grad_norm": 0.6929057240486145, - "learning_rate": 1.4492414290218575e-05, - "loss": 0.3382, - "step": 7742 - }, - { - "epoch": 0.7294222933986482, - "grad_norm": 0.8464141488075256, - "learning_rate": 1.4491065173137162e-05, - "loss": 0.3094, - "step": 7743 - }, - { - "epoch": 0.729516497491816, - "grad_norm": 0.7139920592308044, - "learning_rate": 1.4489715953652517e-05, - "loss": 0.3127, - "step": 7744 - }, - { - "epoch": 0.7296107015849839, - "grad_norm": 0.7467449307441711, - "learning_rate": 1.4488366631795408e-05, - "loss": 0.3303, - "step": 7745 - }, - { - "epoch": 0.7297049056781517, - "grad_norm": 0.7776731848716736, - "learning_rate": 1.4487017207596595e-05, - "loss": 0.2987, - "step": 7746 - }, - { - "epoch": 0.7297991097713196, - "grad_norm": 0.8799088001251221, - "learning_rate": 1.448566768108685e-05, - "loss": 0.2734, - "step": 7747 - }, - { - "epoch": 0.7298933138644874, - "grad_norm": 0.835666298866272, - "learning_rate": 1.4484318052296946e-05, - "loss": 0.3238, - "step": 7748 - }, - { - "epoch": 0.7299875179576553, - "grad_norm": 1.0648915767669678, - "learning_rate": 1.4482968321257658e-05, - "loss": 0.3295, - "step": 7749 - }, - { - "epoch": 0.7300817220508231, - "grad_norm": 0.8137198090553284, - "learning_rate": 1.4481618487999755e-05, - "loss": 0.3468, - "step": 7750 - }, - { - "epoch": 0.730175926143991, - "grad_norm": 0.7838937044143677, - "learning_rate": 1.448026855255402e-05, - "loss": 0.2977, - "step": 7751 - }, - { - "epoch": 0.7302701302371588, - "grad_norm": 0.6746060848236084, - "learning_rate": 1.4478918514951235e-05, - "loss": 0.2868, - "step": 7752 - }, - { - "epoch": 0.7303643343303267, - "grad_norm": 0.8358673453330994, - "learning_rate": 1.447756837522218e-05, - "loss": 0.3153, - "step": 7753 - }, - { - "epoch": 0.7304585384234945, - "grad_norm": 0.7528150677680969, - "learning_rate": 1.447621813339764e-05, - "loss": 0.3058, - "step": 7754 - }, - { - "epoch": 0.7305527425166624, - "grad_norm": 0.6820029616355896, - "learning_rate": 1.447486778950841e-05, - "loss": 0.2789, - "step": 7755 - }, - { - "epoch": 0.7306469466098302, - "grad_norm": 0.8257085084915161, - "learning_rate": 1.4473517343585265e-05, - "loss": 0.3562, - "step": 7756 - }, - { - "epoch": 0.730741150702998, - "grad_norm": 0.710114598274231, - "learning_rate": 1.447216679565901e-05, - "loss": 0.3109, - "step": 7757 - }, - { - "epoch": 0.7308353547961659, - "grad_norm": 0.7254622578620911, - "learning_rate": 1.4470816145760437e-05, - "loss": 0.3128, - "step": 7758 - }, - { - "epoch": 0.7309295588893338, - "grad_norm": 0.7184939980506897, - "learning_rate": 1.4469465393920341e-05, - "loss": 0.2997, - "step": 7759 - }, - { - "epoch": 0.7310237629825016, - "grad_norm": 0.6432626843452454, - "learning_rate": 1.4468114540169522e-05, - "loss": 0.272, - "step": 7760 - }, - { - "epoch": 0.7311179670756695, - "grad_norm": 0.7790955305099487, - "learning_rate": 1.4466763584538783e-05, - "loss": 0.3428, - "step": 7761 - }, - { - "epoch": 0.7312121711688373, - "grad_norm": 0.7339643836021423, - "learning_rate": 1.4465412527058921e-05, - "loss": 0.3384, - "step": 7762 - }, - { - "epoch": 0.7313063752620051, - "grad_norm": 0.8110752701759338, - "learning_rate": 1.4464061367760753e-05, - "loss": 0.2799, - "step": 7763 - }, - { - "epoch": 0.731400579355173, - "grad_norm": 0.7033218145370483, - "learning_rate": 1.4462710106675079e-05, - "loss": 0.31, - "step": 7764 - }, - { - "epoch": 0.7314947834483408, - "grad_norm": 0.7438430190086365, - "learning_rate": 1.4461358743832716e-05, - "loss": 0.341, - "step": 7765 - }, - { - "epoch": 0.7315889875415087, - "grad_norm": 1.0014957189559937, - "learning_rate": 1.446000727926447e-05, - "loss": 0.3457, - "step": 7766 - }, - { - "epoch": 0.7316831916346765, - "grad_norm": 0.9046600461006165, - "learning_rate": 1.4458655713001162e-05, - "loss": 0.3285, - "step": 7767 - }, - { - "epoch": 0.7317773957278444, - "grad_norm": 0.7668585777282715, - "learning_rate": 1.445730404507361e-05, - "loss": 0.2884, - "step": 7768 - }, - { - "epoch": 0.7318715998210122, - "grad_norm": 0.6925591230392456, - "learning_rate": 1.4455952275512632e-05, - "loss": 0.3028, - "step": 7769 - }, - { - "epoch": 0.7319658039141801, - "grad_norm": 0.8074665665626526, - "learning_rate": 1.445460040434905e-05, - "loss": 0.3269, - "step": 7770 - }, - { - "epoch": 0.7320600080073479, - "grad_norm": 0.848996639251709, - "learning_rate": 1.4453248431613687e-05, - "loss": 0.3035, - "step": 7771 - }, - { - "epoch": 0.7321542121005158, - "grad_norm": 0.7105998396873474, - "learning_rate": 1.4451896357337376e-05, - "loss": 0.3296, - "step": 7772 - }, - { - "epoch": 0.7322484161936836, - "grad_norm": 0.6984401345252991, - "learning_rate": 1.4450544181550943e-05, - "loss": 0.2914, - "step": 7773 - }, - { - "epoch": 0.7323426202868515, - "grad_norm": 0.7580311894416809, - "learning_rate": 1.4449191904285218e-05, - "loss": 0.289, - "step": 7774 - }, - { - "epoch": 0.7324368243800193, - "grad_norm": 0.7013496160507202, - "learning_rate": 1.444783952557104e-05, - "loss": 0.2993, - "step": 7775 - }, - { - "epoch": 0.7325310284731872, - "grad_norm": 0.6538723111152649, - "learning_rate": 1.4446487045439237e-05, - "loss": 0.2664, - "step": 7776 - }, - { - "epoch": 0.732625232566355, - "grad_norm": 0.7903271317481995, - "learning_rate": 1.4445134463920656e-05, - "loss": 0.3435, - "step": 7777 - }, - { - "epoch": 0.7327194366595229, - "grad_norm": 0.8297932147979736, - "learning_rate": 1.4443781781046135e-05, - "loss": 0.3158, - "step": 7778 - }, - { - "epoch": 0.7328136407526907, - "grad_norm": 0.7473205327987671, - "learning_rate": 1.4442428996846515e-05, - "loss": 0.35, - "step": 7779 - }, - { - "epoch": 0.7329078448458586, - "grad_norm": 1.1932625770568848, - "learning_rate": 1.4441076111352646e-05, - "loss": 0.3148, - "step": 7780 - }, - { - "epoch": 0.7330020489390264, - "grad_norm": 0.7813129425048828, - "learning_rate": 1.4439723124595373e-05, - "loss": 0.3275, - "step": 7781 - }, - { - "epoch": 0.7330962530321943, - "grad_norm": 0.7652055621147156, - "learning_rate": 1.4438370036605545e-05, - "loss": 0.3187, - "step": 7782 - }, - { - "epoch": 0.7331904571253621, - "grad_norm": 0.7172680497169495, - "learning_rate": 1.4437016847414017e-05, - "loss": 0.2769, - "step": 7783 - }, - { - "epoch": 0.73328466121853, - "grad_norm": 0.7182568907737732, - "learning_rate": 1.4435663557051643e-05, - "loss": 0.3502, - "step": 7784 - }, - { - "epoch": 0.7333788653116978, - "grad_norm": 0.6757748126983643, - "learning_rate": 1.4434310165549282e-05, - "loss": 0.2798, - "step": 7785 - }, - { - "epoch": 0.7334730694048657, - "grad_norm": 0.7448100447654724, - "learning_rate": 1.4432956672937785e-05, - "loss": 0.3146, - "step": 7786 - }, - { - "epoch": 0.7335672734980335, - "grad_norm": 0.762901246547699, - "learning_rate": 1.4431603079248024e-05, - "loss": 0.2983, - "step": 7787 - }, - { - "epoch": 0.7336614775912014, - "grad_norm": 0.8209456205368042, - "learning_rate": 1.443024938451086e-05, - "loss": 0.3238, - "step": 7788 - }, - { - "epoch": 0.7337556816843692, - "grad_norm": 0.6932891607284546, - "learning_rate": 1.4428895588757162e-05, - "loss": 0.3441, - "step": 7789 - }, - { - "epoch": 0.7338498857775371, - "grad_norm": 0.7887647151947021, - "learning_rate": 1.442754169201779e-05, - "loss": 0.3272, - "step": 7790 - }, - { - "epoch": 0.7339440898707049, - "grad_norm": 0.7699485421180725, - "learning_rate": 1.4426187694323619e-05, - "loss": 0.3537, - "step": 7791 - }, - { - "epoch": 0.7340382939638728, - "grad_norm": 0.8260576128959656, - "learning_rate": 1.4424833595705527e-05, - "loss": 0.338, - "step": 7792 - }, - { - "epoch": 0.7341324980570406, - "grad_norm": 0.869855523109436, - "learning_rate": 1.4423479396194387e-05, - "loss": 0.2982, - "step": 7793 - }, - { - "epoch": 0.7342267021502085, - "grad_norm": 0.8493530750274658, - "learning_rate": 1.4422125095821068e-05, - "loss": 0.3599, - "step": 7794 - }, - { - "epoch": 0.7343209062433763, - "grad_norm": 0.7826700806617737, - "learning_rate": 1.4420770694616469e-05, - "loss": 0.3199, - "step": 7795 - }, - { - "epoch": 0.7344151103365442, - "grad_norm": 0.8466247916221619, - "learning_rate": 1.4419416192611453e-05, - "loss": 0.3228, - "step": 7796 - }, - { - "epoch": 0.734509314429712, - "grad_norm": 0.7265615463256836, - "learning_rate": 1.4418061589836914e-05, - "loss": 0.3111, - "step": 7797 - }, - { - "epoch": 0.7346035185228799, - "grad_norm": 0.6740742921829224, - "learning_rate": 1.4416706886323741e-05, - "loss": 0.2788, - "step": 7798 - }, - { - "epoch": 0.7346977226160477, - "grad_norm": 0.7846463918685913, - "learning_rate": 1.4415352082102818e-05, - "loss": 0.2909, - "step": 7799 - }, - { - "epoch": 0.7347919267092156, - "grad_norm": 0.8173394203186035, - "learning_rate": 1.4413997177205042e-05, - "loss": 0.3649, - "step": 7800 - }, - { - "epoch": 0.7348861308023834, - "grad_norm": 0.7414736747741699, - "learning_rate": 1.4412642171661301e-05, - "loss": 0.3457, - "step": 7801 - }, - { - "epoch": 0.7349803348955513, - "grad_norm": 0.8608464002609253, - "learning_rate": 1.4411287065502494e-05, - "loss": 0.3097, - "step": 7802 - }, - { - "epoch": 0.7350745389887191, - "grad_norm": 0.7352781891822815, - "learning_rate": 1.4409931858759523e-05, - "loss": 0.2752, - "step": 7803 - }, - { - "epoch": 0.735168743081887, - "grad_norm": 1.0024056434631348, - "learning_rate": 1.4408576551463283e-05, - "loss": 0.3473, - "step": 7804 - }, - { - "epoch": 0.7352629471750548, - "grad_norm": 0.8175967931747437, - "learning_rate": 1.4407221143644681e-05, - "loss": 0.3221, - "step": 7805 - }, - { - "epoch": 0.7353571512682227, - "grad_norm": 0.6681419014930725, - "learning_rate": 1.4405865635334619e-05, - "loss": 0.2863, - "step": 7806 - }, - { - "epoch": 0.7354513553613905, - "grad_norm": 0.7162423133850098, - "learning_rate": 1.4404510026564007e-05, - "loss": 0.3276, - "step": 7807 - }, - { - "epoch": 0.7355455594545584, - "grad_norm": 0.7024120688438416, - "learning_rate": 1.4403154317363757e-05, - "loss": 0.3066, - "step": 7808 - }, - { - "epoch": 0.7356397635477262, - "grad_norm": 0.8371044993400574, - "learning_rate": 1.4401798507764777e-05, - "loss": 0.3513, - "step": 7809 - }, - { - "epoch": 0.735733967640894, - "grad_norm": 0.7337832450866699, - "learning_rate": 1.4400442597797985e-05, - "loss": 0.3478, - "step": 7810 - }, - { - "epoch": 0.7358281717340619, - "grad_norm": 0.831731379032135, - "learning_rate": 1.4399086587494292e-05, - "loss": 0.3103, - "step": 7811 - }, - { - "epoch": 0.7359223758272297, - "grad_norm": 0.7630707621574402, - "learning_rate": 1.4397730476884628e-05, - "loss": 0.3501, - "step": 7812 - }, - { - "epoch": 0.7360165799203976, - "grad_norm": 0.5945831537246704, - "learning_rate": 1.4396374265999905e-05, - "loss": 0.2768, - "step": 7813 - }, - { - "epoch": 0.7361107840135654, - "grad_norm": 0.7229190468788147, - "learning_rate": 1.4395017954871046e-05, - "loss": 0.3436, - "step": 7814 - }, - { - "epoch": 0.7362049881067332, - "grad_norm": 0.802777886390686, - "learning_rate": 1.4393661543528988e-05, - "loss": 0.362, - "step": 7815 - }, - { - "epoch": 0.736299192199901, - "grad_norm": 0.6775187253952026, - "learning_rate": 1.4392305032004648e-05, - "loss": 0.3228, - "step": 7816 - }, - { - "epoch": 0.7363933962930689, - "grad_norm": 0.8231747150421143, - "learning_rate": 1.439094842032896e-05, - "loss": 0.335, - "step": 7817 - }, - { - "epoch": 0.7364876003862367, - "grad_norm": 0.7906420826911926, - "learning_rate": 1.4389591708532858e-05, - "loss": 0.3352, - "step": 7818 - }, - { - "epoch": 0.7365818044794046, - "grad_norm": 0.7793775200843811, - "learning_rate": 1.4388234896647272e-05, - "loss": 0.3572, - "step": 7819 - }, - { - "epoch": 0.7366760085725724, - "grad_norm": 0.7064700722694397, - "learning_rate": 1.4386877984703152e-05, - "loss": 0.3197, - "step": 7820 - }, - { - "epoch": 0.7367702126657403, - "grad_norm": 0.7348716855049133, - "learning_rate": 1.4385520972731423e-05, - "loss": 0.3316, - "step": 7821 - }, - { - "epoch": 0.7368644167589081, - "grad_norm": 0.750260055065155, - "learning_rate": 1.4384163860763037e-05, - "loss": 0.3037, - "step": 7822 - }, - { - "epoch": 0.736958620852076, - "grad_norm": 0.777546763420105, - "learning_rate": 1.4382806648828934e-05, - "loss": 0.2928, - "step": 7823 - }, - { - "epoch": 0.7370528249452438, - "grad_norm": 0.6131889820098877, - "learning_rate": 1.438144933696006e-05, - "loss": 0.2752, - "step": 7824 - }, - { - "epoch": 0.7371470290384117, - "grad_norm": 0.8008215427398682, - "learning_rate": 1.4380091925187368e-05, - "loss": 0.3215, - "step": 7825 - }, - { - "epoch": 0.7372412331315795, - "grad_norm": 0.8012502193450928, - "learning_rate": 1.4378734413541802e-05, - "loss": 0.3252, - "step": 7826 - }, - { - "epoch": 0.7373354372247474, - "grad_norm": 0.75214684009552, - "learning_rate": 1.4377376802054322e-05, - "loss": 0.3336, - "step": 7827 - }, - { - "epoch": 0.7374296413179152, - "grad_norm": 0.797979474067688, - "learning_rate": 1.4376019090755882e-05, - "loss": 0.3361, - "step": 7828 - }, - { - "epoch": 0.7375238454110831, - "grad_norm": 1.650551438331604, - "learning_rate": 1.4374661279677438e-05, - "loss": 0.3175, - "step": 7829 - }, - { - "epoch": 0.7376180495042509, - "grad_norm": 1.420734167098999, - "learning_rate": 1.437330336884995e-05, - "loss": 0.3828, - "step": 7830 - }, - { - "epoch": 0.7377122535974188, - "grad_norm": 0.777060866355896, - "learning_rate": 1.4371945358304383e-05, - "loss": 0.3606, - "step": 7831 - }, - { - "epoch": 0.7378064576905866, - "grad_norm": 0.8682255744934082, - "learning_rate": 1.4370587248071698e-05, - "loss": 0.3288, - "step": 7832 - }, - { - "epoch": 0.7379006617837545, - "grad_norm": 0.6950915455818176, - "learning_rate": 1.436922903818287e-05, - "loss": 0.297, - "step": 7833 - }, - { - "epoch": 0.7379948658769223, - "grad_norm": 0.6280592679977417, - "learning_rate": 1.4367870728668858e-05, - "loss": 0.291, - "step": 7834 - }, - { - "epoch": 0.7380890699700902, - "grad_norm": 0.7080531120300293, - "learning_rate": 1.4366512319560642e-05, - "loss": 0.3251, - "step": 7835 - }, - { - "epoch": 0.738183274063258, - "grad_norm": 0.7126514315605164, - "learning_rate": 1.4365153810889188e-05, - "loss": 0.3244, - "step": 7836 - }, - { - "epoch": 0.7382774781564259, - "grad_norm": 0.6826251149177551, - "learning_rate": 1.4363795202685478e-05, - "loss": 0.309, - "step": 7837 - }, - { - "epoch": 0.7383716822495937, - "grad_norm": 0.7983651757240295, - "learning_rate": 1.436243649498049e-05, - "loss": 0.3098, - "step": 7838 - }, - { - "epoch": 0.7384658863427616, - "grad_norm": 1.0099413394927979, - "learning_rate": 1.4361077687805201e-05, - "loss": 0.3366, - "step": 7839 - }, - { - "epoch": 0.7385600904359294, - "grad_norm": 0.7486034631729126, - "learning_rate": 1.43597187811906e-05, - "loss": 0.3248, - "step": 7840 - }, - { - "epoch": 0.7386542945290973, - "grad_norm": 0.8024405241012573, - "learning_rate": 1.4358359775167666e-05, - "loss": 0.3657, - "step": 7841 - }, - { - "epoch": 0.7387484986222651, - "grad_norm": 0.7130158543586731, - "learning_rate": 1.4357000669767386e-05, - "loss": 0.3575, - "step": 7842 - }, - { - "epoch": 0.738842702715433, - "grad_norm": 0.8017832636833191, - "learning_rate": 1.4355641465020755e-05, - "loss": 0.3025, - "step": 7843 - }, - { - "epoch": 0.7389369068086008, - "grad_norm": 0.7155521512031555, - "learning_rate": 1.4354282160958764e-05, - "loss": 0.3482, - "step": 7844 - }, - { - "epoch": 0.7390311109017687, - "grad_norm": 0.7501134276390076, - "learning_rate": 1.4352922757612407e-05, - "loss": 0.2926, - "step": 7845 - }, - { - "epoch": 0.7391253149949365, - "grad_norm": 0.7163833379745483, - "learning_rate": 1.4351563255012674e-05, - "loss": 0.304, - "step": 7846 - }, - { - "epoch": 0.7392195190881043, - "grad_norm": 0.7612854838371277, - "learning_rate": 1.435020365319057e-05, - "loss": 0.3363, - "step": 7847 - }, - { - "epoch": 0.7393137231812722, - "grad_norm": 0.7598935961723328, - "learning_rate": 1.4348843952177098e-05, - "loss": 0.3056, - "step": 7848 - }, - { - "epoch": 0.73940792727444, - "grad_norm": 0.6794955730438232, - "learning_rate": 1.4347484152003256e-05, - "loss": 0.3472, - "step": 7849 - }, - { - "epoch": 0.7395021313676079, - "grad_norm": 0.7308415174484253, - "learning_rate": 1.4346124252700056e-05, - "loss": 0.3234, - "step": 7850 - }, - { - "epoch": 0.7395963354607757, - "grad_norm": 0.7182336449623108, - "learning_rate": 1.4344764254298495e-05, - "loss": 0.3104, - "step": 7851 - }, - { - "epoch": 0.7396905395539436, - "grad_norm": 0.6178147196769714, - "learning_rate": 1.4343404156829595e-05, - "loss": 0.2624, - "step": 7852 - }, - { - "epoch": 0.7397847436471114, - "grad_norm": 0.7868710160255432, - "learning_rate": 1.4342043960324361e-05, - "loss": 0.3175, - "step": 7853 - }, - { - "epoch": 0.7398789477402793, - "grad_norm": 0.7279520034790039, - "learning_rate": 1.4340683664813809e-05, - "loss": 0.3327, - "step": 7854 - }, - { - "epoch": 0.7399731518334471, - "grad_norm": 0.6738916039466858, - "learning_rate": 1.4339323270328957e-05, - "loss": 0.2856, - "step": 7855 - }, - { - "epoch": 0.740067355926615, - "grad_norm": 1.0068432092666626, - "learning_rate": 1.4337962776900822e-05, - "loss": 0.3266, - "step": 7856 - }, - { - "epoch": 0.7401615600197828, - "grad_norm": 0.6624978184700012, - "learning_rate": 1.4336602184560429e-05, - "loss": 0.301, - "step": 7857 - }, - { - "epoch": 0.7402557641129507, - "grad_norm": 0.9040200114250183, - "learning_rate": 1.43352414933388e-05, - "loss": 0.4003, - "step": 7858 - }, - { - "epoch": 0.7403499682061185, - "grad_norm": 0.7967157959938049, - "learning_rate": 1.4333880703266959e-05, - "loss": 0.319, - "step": 7859 - }, - { - "epoch": 0.7404441722992864, - "grad_norm": 0.7224143147468567, - "learning_rate": 1.4332519814375937e-05, - "loss": 0.3178, - "step": 7860 - }, - { - "epoch": 0.7405383763924542, - "grad_norm": 1.0972416400909424, - "learning_rate": 1.4331158826696761e-05, - "loss": 0.3998, - "step": 7861 - }, - { - "epoch": 0.7406325804856221, - "grad_norm": 0.8151473999023438, - "learning_rate": 1.4329797740260465e-05, - "loss": 0.3415, - "step": 7862 - }, - { - "epoch": 0.7407267845787899, - "grad_norm": 0.6831167936325073, - "learning_rate": 1.4328436555098083e-05, - "loss": 0.2768, - "step": 7863 - }, - { - "epoch": 0.7408209886719578, - "grad_norm": 0.8053088188171387, - "learning_rate": 1.432707527124066e-05, - "loss": 0.3417, - "step": 7864 - }, - { - "epoch": 0.7409151927651256, - "grad_norm": 0.6614615321159363, - "learning_rate": 1.4325713888719224e-05, - "loss": 0.2893, - "step": 7865 - }, - { - "epoch": 0.7410093968582935, - "grad_norm": 0.9234987497329712, - "learning_rate": 1.4324352407564824e-05, - "loss": 0.3428, - "step": 7866 - }, - { - "epoch": 0.7411036009514613, - "grad_norm": 0.74969881772995, - "learning_rate": 1.4322990827808499e-05, - "loss": 0.2816, - "step": 7867 - }, - { - "epoch": 0.7411978050446292, - "grad_norm": 0.7505182027816772, - "learning_rate": 1.4321629149481298e-05, - "loss": 0.3518, - "step": 7868 - }, - { - "epoch": 0.741292009137797, - "grad_norm": 0.6784083247184753, - "learning_rate": 1.432026737261427e-05, - "loss": 0.3529, - "step": 7869 - }, - { - "epoch": 0.7413862132309649, - "grad_norm": 0.8002153038978577, - "learning_rate": 1.4318905497238462e-05, - "loss": 0.3351, - "step": 7870 - }, - { - "epoch": 0.7414804173241327, - "grad_norm": 0.8567597270011902, - "learning_rate": 1.4317543523384928e-05, - "loss": 0.2924, - "step": 7871 - }, - { - "epoch": 0.7415746214173006, - "grad_norm": 0.7616846561431885, - "learning_rate": 1.431618145108473e-05, - "loss": 0.3456, - "step": 7872 - }, - { - "epoch": 0.7416688255104684, - "grad_norm": 0.849391520023346, - "learning_rate": 1.4314819280368916e-05, - "loss": 0.3267, - "step": 7873 - }, - { - "epoch": 0.7417630296036363, - "grad_norm": 0.7345016598701477, - "learning_rate": 1.4313457011268552e-05, - "loss": 0.3301, - "step": 7874 - }, - { - "epoch": 0.7418572336968041, - "grad_norm": 0.5992431044578552, - "learning_rate": 1.4312094643814698e-05, - "loss": 0.2562, - "step": 7875 - }, - { - "epoch": 0.741951437789972, - "grad_norm": 0.8267952799797058, - "learning_rate": 1.4310732178038413e-05, - "loss": 0.3047, - "step": 7876 - }, - { - "epoch": 0.7420456418831398, - "grad_norm": 0.679075300693512, - "learning_rate": 1.4309369613970767e-05, - "loss": 0.2938, - "step": 7877 - }, - { - "epoch": 0.7421398459763077, - "grad_norm": 0.758065402507782, - "learning_rate": 1.4308006951642837e-05, - "loss": 0.2984, - "step": 7878 - }, - { - "epoch": 0.7422340500694755, - "grad_norm": 0.7927003502845764, - "learning_rate": 1.430664419108568e-05, - "loss": 0.3213, - "step": 7879 - }, - { - "epoch": 0.7423282541626434, - "grad_norm": 0.7506650686264038, - "learning_rate": 1.4305281332330375e-05, - "loss": 0.2948, - "step": 7880 - }, - { - "epoch": 0.7424224582558112, - "grad_norm": 0.883560061454773, - "learning_rate": 1.4303918375407999e-05, - "loss": 0.3047, - "step": 7881 - }, - { - "epoch": 0.7425166623489791, - "grad_norm": 0.7973462343215942, - "learning_rate": 1.4302555320349627e-05, - "loss": 0.3665, - "step": 7882 - }, - { - "epoch": 0.7426108664421469, - "grad_norm": 0.6922618746757507, - "learning_rate": 1.430119216718634e-05, - "loss": 0.294, - "step": 7883 - }, - { - "epoch": 0.7427050705353148, - "grad_norm": 0.6867779493331909, - "learning_rate": 1.4299828915949221e-05, - "loss": 0.3169, - "step": 7884 - }, - { - "epoch": 0.7427992746284826, - "grad_norm": 0.7345553040504456, - "learning_rate": 1.4298465566669353e-05, - "loss": 0.3628, - "step": 7885 - }, - { - "epoch": 0.7428934787216505, - "grad_norm": 0.6013640761375427, - "learning_rate": 1.4297102119377821e-05, - "loss": 0.2683, - "step": 7886 - }, - { - "epoch": 0.7429876828148183, - "grad_norm": 0.7088180780410767, - "learning_rate": 1.4295738574105713e-05, - "loss": 0.3391, - "step": 7887 - }, - { - "epoch": 0.7430818869079862, - "grad_norm": 0.7746397256851196, - "learning_rate": 1.4294374930884126e-05, - "loss": 0.3309, - "step": 7888 - }, - { - "epoch": 0.743176091001154, - "grad_norm": 0.8246579766273499, - "learning_rate": 1.4293011189744146e-05, - "loss": 0.34, - "step": 7889 - }, - { - "epoch": 0.7432702950943219, - "grad_norm": 0.8670780062675476, - "learning_rate": 1.4291647350716876e-05, - "loss": 0.3149, - "step": 7890 - }, - { - "epoch": 0.7433644991874897, - "grad_norm": 0.7426936626434326, - "learning_rate": 1.4290283413833404e-05, - "loss": 0.3235, - "step": 7891 - }, - { - "epoch": 0.7434587032806576, - "grad_norm": 0.6389971971511841, - "learning_rate": 1.4288919379124837e-05, - "loss": 0.2535, - "step": 7892 - }, - { - "epoch": 0.7435529073738254, - "grad_norm": 0.6910268068313599, - "learning_rate": 1.4287555246622276e-05, - "loss": 0.3008, - "step": 7893 - }, - { - "epoch": 0.7436471114669932, - "grad_norm": 1.02792227268219, - "learning_rate": 1.4286191016356822e-05, - "loss": 0.347, - "step": 7894 - }, - { - "epoch": 0.7437413155601611, - "grad_norm": 0.6576209664344788, - "learning_rate": 1.428482668835959e-05, - "loss": 0.29, - "step": 7895 - }, - { - "epoch": 0.743835519653329, - "grad_norm": 0.7317492365837097, - "learning_rate": 1.4283462262661675e-05, - "loss": 0.3104, - "step": 7896 - }, - { - "epoch": 0.7439297237464968, - "grad_norm": 0.6787400245666504, - "learning_rate": 1.42820977392942e-05, - "loss": 0.3396, - "step": 7897 - }, - { - "epoch": 0.7440239278396646, - "grad_norm": 0.7138457894325256, - "learning_rate": 1.4280733118288277e-05, - "loss": 0.2857, - "step": 7898 - }, - { - "epoch": 0.7441181319328325, - "grad_norm": 0.7572120428085327, - "learning_rate": 1.4279368399675015e-05, - "loss": 0.3215, - "step": 7899 - }, - { - "epoch": 0.7442123360260003, - "grad_norm": 0.6458143591880798, - "learning_rate": 1.4278003583485537e-05, - "loss": 0.2809, - "step": 7900 - }, - { - "epoch": 0.7443065401191682, - "grad_norm": 0.7505171298980713, - "learning_rate": 1.427663866975096e-05, - "loss": 0.3363, - "step": 7901 - }, - { - "epoch": 0.744400744212336, - "grad_norm": 0.7094796895980835, - "learning_rate": 1.4275273658502407e-05, - "loss": 0.3119, - "step": 7902 - }, - { - "epoch": 0.7444949483055039, - "grad_norm": 0.8144106864929199, - "learning_rate": 1.4273908549771003e-05, - "loss": 0.3166, - "step": 7903 - }, - { - "epoch": 0.7445891523986717, - "grad_norm": 0.6488919258117676, - "learning_rate": 1.4272543343587875e-05, - "loss": 0.309, - "step": 7904 - }, - { - "epoch": 0.7446833564918396, - "grad_norm": 0.8431639671325684, - "learning_rate": 1.4271178039984153e-05, - "loss": 0.3034, - "step": 7905 - }, - { - "epoch": 0.7447775605850074, - "grad_norm": 0.7071564197540283, - "learning_rate": 1.4269812638990965e-05, - "loss": 0.2825, - "step": 7906 - }, - { - "epoch": 0.7448717646781753, - "grad_norm": 0.6395066380500793, - "learning_rate": 1.4268447140639444e-05, - "loss": 0.291, - "step": 7907 - }, - { - "epoch": 0.7449659687713431, - "grad_norm": 0.6347860097885132, - "learning_rate": 1.4267081544960726e-05, - "loss": 0.2977, - "step": 7908 - }, - { - "epoch": 0.745060172864511, - "grad_norm": 0.6679201722145081, - "learning_rate": 1.4265715851985951e-05, - "loss": 0.2718, - "step": 7909 - }, - { - "epoch": 0.7451543769576788, - "grad_norm": 0.7665024995803833, - "learning_rate": 1.4264350061746259e-05, - "loss": 0.3337, - "step": 7910 - }, - { - "epoch": 0.7452485810508467, - "grad_norm": 0.7200942039489746, - "learning_rate": 1.4262984174272787e-05, - "loss": 0.3243, - "step": 7911 - }, - { - "epoch": 0.7453427851440145, - "grad_norm": 0.8476040959358215, - "learning_rate": 1.4261618189596687e-05, - "loss": 0.3242, - "step": 7912 - }, - { - "epoch": 0.7454369892371824, - "grad_norm": 0.7364603877067566, - "learning_rate": 1.42602521077491e-05, - "loss": 0.342, - "step": 7913 - }, - { - "epoch": 0.7455311933303502, - "grad_norm": 0.8330252170562744, - "learning_rate": 1.4258885928761175e-05, - "loss": 0.316, - "step": 7914 - }, - { - "epoch": 0.7456253974235181, - "grad_norm": 0.7311208844184875, - "learning_rate": 1.4257519652664069e-05, - "loss": 0.3249, - "step": 7915 - }, - { - "epoch": 0.7457196015166859, - "grad_norm": 0.7431167960166931, - "learning_rate": 1.4256153279488925e-05, - "loss": 0.3321, - "step": 7916 - }, - { - "epoch": 0.7458138056098538, - "grad_norm": 0.8051790595054626, - "learning_rate": 1.4254786809266907e-05, - "loss": 0.3327, - "step": 7917 - }, - { - "epoch": 0.7459080097030216, - "grad_norm": 0.7629981637001038, - "learning_rate": 1.4253420242029168e-05, - "loss": 0.3202, - "step": 7918 - }, - { - "epoch": 0.7460022137961895, - "grad_norm": 0.7642293572425842, - "learning_rate": 1.4252053577806867e-05, - "loss": 0.3112, - "step": 7919 - }, - { - "epoch": 0.7460964178893573, - "grad_norm": 0.6936383843421936, - "learning_rate": 1.4250686816631174e-05, - "loss": 0.3177, - "step": 7920 - }, - { - "epoch": 0.7461906219825252, - "grad_norm": 0.7215296030044556, - "learning_rate": 1.4249319958533245e-05, - "loss": 0.2934, - "step": 7921 - }, - { - "epoch": 0.746284826075693, - "grad_norm": 0.7291421294212341, - "learning_rate": 1.4247953003544248e-05, - "loss": 0.3014, - "step": 7922 - }, - { - "epoch": 0.7463790301688609, - "grad_norm": 0.7604468464851379, - "learning_rate": 1.4246585951695356e-05, - "loss": 0.3054, - "step": 7923 - }, - { - "epoch": 0.7464732342620287, - "grad_norm": 0.6813427209854126, - "learning_rate": 1.4245218803017735e-05, - "loss": 0.3009, - "step": 7924 - }, - { - "epoch": 0.7465674383551966, - "grad_norm": 0.6189952492713928, - "learning_rate": 1.4243851557542561e-05, - "loss": 0.2692, - "step": 7925 - }, - { - "epoch": 0.7466616424483644, - "grad_norm": 0.8515956997871399, - "learning_rate": 1.4242484215301009e-05, - "loss": 0.2973, - "step": 7926 - }, - { - "epoch": 0.7467558465415323, - "grad_norm": 0.7358274459838867, - "learning_rate": 1.4241116776324253e-05, - "loss": 0.2999, - "step": 7927 - }, - { - "epoch": 0.7468500506347001, - "grad_norm": 0.7132756114006042, - "learning_rate": 1.4239749240643477e-05, - "loss": 0.3015, - "step": 7928 - }, - { - "epoch": 0.746944254727868, - "grad_norm": 0.7790988087654114, - "learning_rate": 1.4238381608289862e-05, - "loss": 0.2955, - "step": 7929 - }, - { - "epoch": 0.7470384588210358, - "grad_norm": 0.6831228137016296, - "learning_rate": 1.423701387929459e-05, - "loss": 0.3137, - "step": 7930 - }, - { - "epoch": 0.7471326629142037, - "grad_norm": 0.8106114268302917, - "learning_rate": 1.4235646053688847e-05, - "loss": 0.3298, - "step": 7931 - }, - { - "epoch": 0.7472268670073715, - "grad_norm": 0.8043280243873596, - "learning_rate": 1.4234278131503829e-05, - "loss": 0.3127, - "step": 7932 - }, - { - "epoch": 0.7473210711005394, - "grad_norm": 0.7510353922843933, - "learning_rate": 1.4232910112770717e-05, - "loss": 0.3049, - "step": 7933 - }, - { - "epoch": 0.7474152751937072, - "grad_norm": 0.7884879112243652, - "learning_rate": 1.4231541997520708e-05, - "loss": 0.3412, - "step": 7934 - }, - { - "epoch": 0.7475094792868751, - "grad_norm": 0.7577821612358093, - "learning_rate": 1.4230173785785002e-05, - "loss": 0.297, - "step": 7935 - }, - { - "epoch": 0.7476036833800429, - "grad_norm": 0.7543067336082458, - "learning_rate": 1.4228805477594788e-05, - "loss": 0.314, - "step": 7936 - }, - { - "epoch": 0.7476978874732108, - "grad_norm": 0.7481685876846313, - "learning_rate": 1.422743707298127e-05, - "loss": 0.31, - "step": 7937 - }, - { - "epoch": 0.7477920915663786, - "grad_norm": 0.7791463136672974, - "learning_rate": 1.4226068571975647e-05, - "loss": 0.3088, - "step": 7938 - }, - { - "epoch": 0.7478862956595465, - "grad_norm": 0.7431942820549011, - "learning_rate": 1.4224699974609125e-05, - "loss": 0.2799, - "step": 7939 - }, - { - "epoch": 0.7479804997527143, - "grad_norm": 0.7565886974334717, - "learning_rate": 1.4223331280912912e-05, - "loss": 0.3445, - "step": 7940 - }, - { - "epoch": 0.7480747038458821, - "grad_norm": 0.7745346426963806, - "learning_rate": 1.4221962490918214e-05, - "loss": 0.3155, - "step": 7941 - }, - { - "epoch": 0.74816890793905, - "grad_norm": 0.6992605328559875, - "learning_rate": 1.422059360465624e-05, - "loss": 0.2928, - "step": 7942 - }, - { - "epoch": 0.7482631120322178, - "grad_norm": 0.7950454354286194, - "learning_rate": 1.4219224622158204e-05, - "loss": 0.3356, - "step": 7943 - }, - { - "epoch": 0.7483573161253857, - "grad_norm": 0.8491213917732239, - "learning_rate": 1.4217855543455323e-05, - "loss": 0.3467, - "step": 7944 - }, - { - "epoch": 0.7484515202185535, - "grad_norm": 0.6545068621635437, - "learning_rate": 1.4216486368578815e-05, - "loss": 0.3091, - "step": 7945 - }, - { - "epoch": 0.7485457243117214, - "grad_norm": 0.7261474132537842, - "learning_rate": 1.4215117097559893e-05, - "loss": 0.34, - "step": 7946 - }, - { - "epoch": 0.7486399284048892, - "grad_norm": 0.7189820408821106, - "learning_rate": 1.4213747730429783e-05, - "loss": 0.3038, - "step": 7947 - }, - { - "epoch": 0.7487341324980571, - "grad_norm": 0.9007904529571533, - "learning_rate": 1.421237826721971e-05, - "loss": 0.3348, - "step": 7948 - }, - { - "epoch": 0.7488283365912249, - "grad_norm": 0.8184025883674622, - "learning_rate": 1.4211008707960897e-05, - "loss": 0.3194, - "step": 7949 - }, - { - "epoch": 0.7489225406843928, - "grad_norm": 0.7761973738670349, - "learning_rate": 1.4209639052684574e-05, - "loss": 0.317, - "step": 7950 - }, - { - "epoch": 0.7490167447775606, - "grad_norm": 0.8261517286300659, - "learning_rate": 1.4208269301421966e-05, - "loss": 0.3124, - "step": 7951 - }, - { - "epoch": 0.7491109488707285, - "grad_norm": 0.7534788250923157, - "learning_rate": 1.4206899454204315e-05, - "loss": 0.3432, - "step": 7952 - }, - { - "epoch": 0.7492051529638962, - "grad_norm": 0.7461051940917969, - "learning_rate": 1.4205529511062847e-05, - "loss": 0.3091, - "step": 7953 - }, - { - "epoch": 0.7492993570570641, - "grad_norm": 0.7263821363449097, - "learning_rate": 1.4204159472028801e-05, - "loss": 0.3569, - "step": 7954 - }, - { - "epoch": 0.7493935611502319, - "grad_norm": 0.7321131229400635, - "learning_rate": 1.4202789337133425e-05, - "loss": 0.308, - "step": 7955 - }, - { - "epoch": 0.7494877652433998, - "grad_norm": 0.7263550162315369, - "learning_rate": 1.4201419106407946e-05, - "loss": 0.3249, - "step": 7956 - }, - { - "epoch": 0.7495819693365676, - "grad_norm": 0.7975026369094849, - "learning_rate": 1.4200048779883613e-05, - "loss": 0.2992, - "step": 7957 - }, - { - "epoch": 0.7496761734297355, - "grad_norm": 0.7149685025215149, - "learning_rate": 1.4198678357591676e-05, - "loss": 0.3195, - "step": 7958 - }, - { - "epoch": 0.7497703775229033, - "grad_norm": 1.5166430473327637, - "learning_rate": 1.4197307839563375e-05, - "loss": 0.3252, - "step": 7959 - }, - { - "epoch": 0.7498645816160712, - "grad_norm": 0.9258027076721191, - "learning_rate": 1.4195937225829965e-05, - "loss": 0.3329, - "step": 7960 - }, - { - "epoch": 0.749958785709239, - "grad_norm": 0.7562490105628967, - "learning_rate": 1.4194566516422698e-05, - "loss": 0.3796, - "step": 7961 - }, - { - "epoch": 0.7500529898024069, - "grad_norm": 0.6452712416648865, - "learning_rate": 1.4193195711372828e-05, - "loss": 0.2851, - "step": 7962 - }, - { - "epoch": 0.7501471938955747, - "grad_norm": 0.7906914949417114, - "learning_rate": 1.4191824810711615e-05, - "loss": 0.3526, - "step": 7963 - }, - { - "epoch": 0.7502413979887426, - "grad_norm": 0.7486674189567566, - "learning_rate": 1.4190453814470306e-05, - "loss": 0.2646, - "step": 7964 - }, - { - "epoch": 0.7503356020819104, - "grad_norm": 0.8381386995315552, - "learning_rate": 1.4189082722680173e-05, - "loss": 0.375, - "step": 7965 - }, - { - "epoch": 0.7504298061750783, - "grad_norm": 0.7577159404754639, - "learning_rate": 1.4187711535372475e-05, - "loss": 0.2916, - "step": 7966 - }, - { - "epoch": 0.7505240102682461, - "grad_norm": 0.6354668140411377, - "learning_rate": 1.4186340252578475e-05, - "loss": 0.2829, - "step": 7967 - }, - { - "epoch": 0.750618214361414, - "grad_norm": 0.865922212600708, - "learning_rate": 1.4184968874329446e-05, - "loss": 0.3279, - "step": 7968 - }, - { - "epoch": 0.7507124184545818, - "grad_norm": 0.7015376091003418, - "learning_rate": 1.4183597400656655e-05, - "loss": 0.2861, - "step": 7969 - }, - { - "epoch": 0.7508066225477497, - "grad_norm": 0.6503175497055054, - "learning_rate": 1.4182225831591372e-05, - "loss": 0.2547, - "step": 7970 - }, - { - "epoch": 0.7509008266409175, - "grad_norm": 0.7912094593048096, - "learning_rate": 1.418085416716487e-05, - "loss": 0.3194, - "step": 7971 - }, - { - "epoch": 0.7509950307340854, - "grad_norm": 0.7017026543617249, - "learning_rate": 1.4179482407408429e-05, - "loss": 0.2929, - "step": 7972 - }, - { - "epoch": 0.7510892348272532, - "grad_norm": 0.7565056681632996, - "learning_rate": 1.4178110552353329e-05, - "loss": 0.2862, - "step": 7973 - }, - { - "epoch": 0.751183438920421, - "grad_norm": 0.7025262713432312, - "learning_rate": 1.4176738602030842e-05, - "loss": 0.3332, - "step": 7974 - }, - { - "epoch": 0.7512776430135889, - "grad_norm": 0.6589545607566833, - "learning_rate": 1.4175366556472259e-05, - "loss": 0.2889, - "step": 7975 - }, - { - "epoch": 0.7513718471067568, - "grad_norm": 0.8647730946540833, - "learning_rate": 1.4173994415708856e-05, - "loss": 0.3268, - "step": 7976 - }, - { - "epoch": 0.7514660511999246, - "grad_norm": 0.7150059342384338, - "learning_rate": 1.4172622179771929e-05, - "loss": 0.3157, - "step": 7977 - }, - { - "epoch": 0.7515602552930924, - "grad_norm": 0.6853088140487671, - "learning_rate": 1.4171249848692762e-05, - "loss": 0.2909, - "step": 7978 - }, - { - "epoch": 0.7516544593862603, - "grad_norm": 0.6837041974067688, - "learning_rate": 1.4169877422502646e-05, - "loss": 0.3078, - "step": 7979 - }, - { - "epoch": 0.7517486634794281, - "grad_norm": 0.7358826994895935, - "learning_rate": 1.416850490123288e-05, - "loss": 0.2862, - "step": 7980 - }, - { - "epoch": 0.751842867572596, - "grad_norm": 0.6721023917198181, - "learning_rate": 1.4167132284914752e-05, - "loss": 0.3298, - "step": 7981 - }, - { - "epoch": 0.7519370716657638, - "grad_norm": 0.747821569442749, - "learning_rate": 1.4165759573579565e-05, - "loss": 0.3282, - "step": 7982 - }, - { - "epoch": 0.7520312757589317, - "grad_norm": 0.7575122714042664, - "learning_rate": 1.416438676725862e-05, - "loss": 0.2959, - "step": 7983 - }, - { - "epoch": 0.7521254798520995, - "grad_norm": 0.6453376412391663, - "learning_rate": 1.416301386598321e-05, - "loss": 0.2999, - "step": 7984 - }, - { - "epoch": 0.7522196839452674, - "grad_norm": 0.748228907585144, - "learning_rate": 1.4161640869784651e-05, - "loss": 0.3322, - "step": 7985 - }, - { - "epoch": 0.7523138880384352, - "grad_norm": 0.8601110577583313, - "learning_rate": 1.4160267778694241e-05, - "loss": 0.3335, - "step": 7986 - }, - { - "epoch": 0.7524080921316031, - "grad_norm": 0.7255735993385315, - "learning_rate": 1.4158894592743293e-05, - "loss": 0.3292, - "step": 7987 - }, - { - "epoch": 0.7525022962247709, - "grad_norm": 0.7679718136787415, - "learning_rate": 1.4157521311963116e-05, - "loss": 0.3068, - "step": 7988 - }, - { - "epoch": 0.7525965003179388, - "grad_norm": 0.6139155626296997, - "learning_rate": 1.4156147936385023e-05, - "loss": 0.2903, - "step": 7989 - }, - { - "epoch": 0.7526907044111066, - "grad_norm": 0.7174772620201111, - "learning_rate": 1.415477446604033e-05, - "loss": 0.2985, - "step": 7990 - }, - { - "epoch": 0.7527849085042745, - "grad_norm": 0.7099175453186035, - "learning_rate": 1.4153400900960353e-05, - "loss": 0.3357, - "step": 7991 - }, - { - "epoch": 0.7528791125974423, - "grad_norm": 0.6271936893463135, - "learning_rate": 1.4152027241176414e-05, - "loss": 0.2822, - "step": 7992 - }, - { - "epoch": 0.7529733166906102, - "grad_norm": 0.7048296928405762, - "learning_rate": 1.4150653486719832e-05, - "loss": 0.2922, - "step": 7993 - }, - { - "epoch": 0.753067520783778, - "grad_norm": 0.8052453398704529, - "learning_rate": 1.414927963762193e-05, - "loss": 0.3411, - "step": 7994 - }, - { - "epoch": 0.7531617248769459, - "grad_norm": 0.763670802116394, - "learning_rate": 1.4147905693914037e-05, - "loss": 0.3061, - "step": 7995 - }, - { - "epoch": 0.7532559289701137, - "grad_norm": 0.6347185969352722, - "learning_rate": 1.4146531655627476e-05, - "loss": 0.2963, - "step": 7996 - }, - { - "epoch": 0.7533501330632816, - "grad_norm": 0.7523487210273743, - "learning_rate": 1.4145157522793584e-05, - "loss": 0.3171, - "step": 7997 - }, - { - "epoch": 0.7534443371564494, - "grad_norm": 0.7609498500823975, - "learning_rate": 1.414378329544369e-05, - "loss": 0.3208, - "step": 7998 - }, - { - "epoch": 0.7535385412496173, - "grad_norm": 0.8232473731040955, - "learning_rate": 1.4142408973609125e-05, - "loss": 0.3439, - "step": 7999 - }, - { - "epoch": 0.7536327453427851, - "grad_norm": 0.7161168456077576, - "learning_rate": 1.4141034557321232e-05, - "loss": 0.3228, - "step": 8000 - }, - { - "epoch": 0.753726949435953, - "grad_norm": 0.711948812007904, - "learning_rate": 1.4139660046611346e-05, - "loss": 0.319, - "step": 8001 - }, - { - "epoch": 0.7538211535291208, - "grad_norm": 0.8147966265678406, - "learning_rate": 1.4138285441510808e-05, - "loss": 0.3277, - "step": 8002 - }, - { - "epoch": 0.7539153576222887, - "grad_norm": 0.6981361508369446, - "learning_rate": 1.4136910742050965e-05, - "loss": 0.3249, - "step": 8003 - }, - { - "epoch": 0.7540095617154565, - "grad_norm": 0.754050612449646, - "learning_rate": 1.4135535948263155e-05, - "loss": 0.32, - "step": 8004 - }, - { - "epoch": 0.7541037658086244, - "grad_norm": 0.7808064818382263, - "learning_rate": 1.4134161060178732e-05, - "loss": 0.3342, - "step": 8005 - }, - { - "epoch": 0.7541979699017922, - "grad_norm": 0.7009022831916809, - "learning_rate": 1.4132786077829044e-05, - "loss": 0.3126, - "step": 8006 - }, - { - "epoch": 0.7542921739949601, - "grad_norm": 0.7169830203056335, - "learning_rate": 1.4131411001245438e-05, - "loss": 0.3044, - "step": 8007 - }, - { - "epoch": 0.7543863780881279, - "grad_norm": 0.7986016273498535, - "learning_rate": 1.4130035830459276e-05, - "loss": 0.3823, - "step": 8008 - }, - { - "epoch": 0.7544805821812958, - "grad_norm": 0.7309896349906921, - "learning_rate": 1.4128660565501911e-05, - "loss": 0.3422, - "step": 8009 - }, - { - "epoch": 0.7545747862744636, - "grad_norm": 0.7200166583061218, - "learning_rate": 1.4127285206404697e-05, - "loss": 0.3264, - "step": 8010 - }, - { - "epoch": 0.7546689903676315, - "grad_norm": 0.805809497833252, - "learning_rate": 1.4125909753198996e-05, - "loss": 0.3373, - "step": 8011 - }, - { - "epoch": 0.7547631944607993, - "grad_norm": 0.6786282658576965, - "learning_rate": 1.4124534205916174e-05, - "loss": 0.3372, - "step": 8012 - }, - { - "epoch": 0.7548573985539672, - "grad_norm": 0.6856420040130615, - "learning_rate": 1.4123158564587594e-05, - "loss": 0.3215, - "step": 8013 - }, - { - "epoch": 0.754951602647135, - "grad_norm": 0.676866888999939, - "learning_rate": 1.4121782829244618e-05, - "loss": 0.2929, - "step": 8014 - }, - { - "epoch": 0.7550458067403029, - "grad_norm": 0.7994716167449951, - "learning_rate": 1.4120406999918626e-05, - "loss": 0.3283, - "step": 8015 - }, - { - "epoch": 0.7551400108334707, - "grad_norm": 0.8341429829597473, - "learning_rate": 1.4119031076640977e-05, - "loss": 0.3011, - "step": 8016 - }, - { - "epoch": 0.7552342149266386, - "grad_norm": 0.6932470798492432, - "learning_rate": 1.4117655059443052e-05, - "loss": 0.2757, - "step": 8017 - }, - { - "epoch": 0.7553284190198064, - "grad_norm": 0.9134334921836853, - "learning_rate": 1.4116278948356222e-05, - "loss": 0.3036, - "step": 8018 - }, - { - "epoch": 0.7554226231129743, - "grad_norm": 0.7064512968063354, - "learning_rate": 1.4114902743411864e-05, - "loss": 0.3207, - "step": 8019 - }, - { - "epoch": 0.7555168272061421, - "grad_norm": 0.691676914691925, - "learning_rate": 1.4113526444641363e-05, - "loss": 0.3261, - "step": 8020 - }, - { - "epoch": 0.75561103129931, - "grad_norm": 0.6851295232772827, - "learning_rate": 1.4112150052076094e-05, - "loss": 0.3128, - "step": 8021 - }, - { - "epoch": 0.7557052353924778, - "grad_norm": 1.0884650945663452, - "learning_rate": 1.4110773565747446e-05, - "loss": 0.3101, - "step": 8022 - }, - { - "epoch": 0.7557994394856457, - "grad_norm": 0.754021167755127, - "learning_rate": 1.4109396985686808e-05, - "loss": 0.3529, - "step": 8023 - }, - { - "epoch": 0.7558936435788135, - "grad_norm": 0.6890600919723511, - "learning_rate": 1.4108020311925557e-05, - "loss": 0.2677, - "step": 8024 - }, - { - "epoch": 0.7559878476719814, - "grad_norm": 0.673433244228363, - "learning_rate": 1.4106643544495092e-05, - "loss": 0.2949, - "step": 8025 - }, - { - "epoch": 0.7560820517651492, - "grad_norm": 0.7391211986541748, - "learning_rate": 1.4105266683426804e-05, - "loss": 0.2777, - "step": 8026 - }, - { - "epoch": 0.756176255858317, - "grad_norm": 0.7063663601875305, - "learning_rate": 1.4103889728752083e-05, - "loss": 0.3192, - "step": 8027 - }, - { - "epoch": 0.7562704599514849, - "grad_norm": 0.6587368845939636, - "learning_rate": 1.4102512680502333e-05, - "loss": 0.2801, - "step": 8028 - }, - { - "epoch": 0.7563646640446527, - "grad_norm": 0.7968171238899231, - "learning_rate": 1.4101135538708948e-05, - "loss": 0.3284, - "step": 8029 - }, - { - "epoch": 0.7564588681378206, - "grad_norm": 0.6723277568817139, - "learning_rate": 1.4099758303403333e-05, - "loss": 0.2703, - "step": 8030 - }, - { - "epoch": 0.7565530722309884, - "grad_norm": 1.5148507356643677, - "learning_rate": 1.4098380974616882e-05, - "loss": 0.3164, - "step": 8031 - }, - { - "epoch": 0.7566472763241563, - "grad_norm": 0.7872427701950073, - "learning_rate": 1.4097003552381012e-05, - "loss": 0.2998, - "step": 8032 - }, - { - "epoch": 0.7567414804173241, - "grad_norm": 0.6773476004600525, - "learning_rate": 1.4095626036727124e-05, - "loss": 0.3328, - "step": 8033 - }, - { - "epoch": 0.756835684510492, - "grad_norm": 0.7695625424385071, - "learning_rate": 1.4094248427686628e-05, - "loss": 0.3296, - "step": 8034 - }, - { - "epoch": 0.7569298886036598, - "grad_norm": 0.8502200841903687, - "learning_rate": 1.4092870725290934e-05, - "loss": 0.3237, - "step": 8035 - }, - { - "epoch": 0.7570240926968277, - "grad_norm": 0.7770158052444458, - "learning_rate": 1.4091492929571458e-05, - "loss": 0.3165, - "step": 8036 - }, - { - "epoch": 0.7571182967899955, - "grad_norm": 0.8137102127075195, - "learning_rate": 1.4090115040559617e-05, - "loss": 0.3679, - "step": 8037 - }, - { - "epoch": 0.7572125008831634, - "grad_norm": 0.7285508513450623, - "learning_rate": 1.4088737058286828e-05, - "loss": 0.3147, - "step": 8038 - }, - { - "epoch": 0.7573067049763312, - "grad_norm": 0.793168842792511, - "learning_rate": 1.4087358982784509e-05, - "loss": 0.3697, - "step": 8039 - }, - { - "epoch": 0.7574009090694991, - "grad_norm": 0.6035233736038208, - "learning_rate": 1.4085980814084086e-05, - "loss": 0.2632, - "step": 8040 - }, - { - "epoch": 0.7574951131626669, - "grad_norm": 0.6877842545509338, - "learning_rate": 1.408460255221698e-05, - "loss": 0.2853, - "step": 8041 - }, - { - "epoch": 0.7575893172558348, - "grad_norm": 0.6556431651115417, - "learning_rate": 1.4083224197214618e-05, - "loss": 0.3135, - "step": 8042 - }, - { - "epoch": 0.7576835213490026, - "grad_norm": 0.7101203203201294, - "learning_rate": 1.4081845749108433e-05, - "loss": 0.3361, - "step": 8043 - }, - { - "epoch": 0.7577777254421705, - "grad_norm": 0.8247424960136414, - "learning_rate": 1.408046720792985e-05, - "loss": 0.3474, - "step": 8044 - }, - { - "epoch": 0.7578719295353383, - "grad_norm": 0.8068971633911133, - "learning_rate": 1.4079088573710302e-05, - "loss": 0.3401, - "step": 8045 - }, - { - "epoch": 0.7579661336285062, - "grad_norm": 0.82241290807724, - "learning_rate": 1.407770984648123e-05, - "loss": 0.3199, - "step": 8046 - }, - { - "epoch": 0.758060337721674, - "grad_norm": 0.8863838911056519, - "learning_rate": 1.4076331026274063e-05, - "loss": 0.33, - "step": 8047 - }, - { - "epoch": 0.7581545418148419, - "grad_norm": 0.8461202383041382, - "learning_rate": 1.4074952113120248e-05, - "loss": 0.3091, - "step": 8048 - }, - { - "epoch": 0.7582487459080097, - "grad_norm": 0.8029640913009644, - "learning_rate": 1.4073573107051222e-05, - "loss": 0.3681, - "step": 8049 - }, - { - "epoch": 0.7583429500011776, - "grad_norm": 0.7179197072982788, - "learning_rate": 1.4072194008098427e-05, - "loss": 0.3133, - "step": 8050 - }, - { - "epoch": 0.7584371540943454, - "grad_norm": 0.688663899898529, - "learning_rate": 1.4070814816293313e-05, - "loss": 0.2863, - "step": 8051 - }, - { - "epoch": 0.7585313581875133, - "grad_norm": 0.732281506061554, - "learning_rate": 1.4069435531667326e-05, - "loss": 0.2834, - "step": 8052 - }, - { - "epoch": 0.7586255622806811, - "grad_norm": 0.6937088370323181, - "learning_rate": 1.4068056154251914e-05, - "loss": 0.2947, - "step": 8053 - }, - { - "epoch": 0.758719766373849, - "grad_norm": 0.7696384191513062, - "learning_rate": 1.4066676684078533e-05, - "loss": 0.322, - "step": 8054 - }, - { - "epoch": 0.7588139704670168, - "grad_norm": 0.6650841236114502, - "learning_rate": 1.4065297121178631e-05, - "loss": 0.3047, - "step": 8055 - }, - { - "epoch": 0.7589081745601847, - "grad_norm": 0.7577145099639893, - "learning_rate": 1.4063917465583668e-05, - "loss": 0.3068, - "step": 8056 - }, - { - "epoch": 0.7590023786533525, - "grad_norm": 0.8542148470878601, - "learning_rate": 1.4062537717325104e-05, - "loss": 0.3257, - "step": 8057 - }, - { - "epoch": 0.7590965827465204, - "grad_norm": 0.6929702758789062, - "learning_rate": 1.4061157876434395e-05, - "loss": 0.3078, - "step": 8058 - }, - { - "epoch": 0.7591907868396882, - "grad_norm": 0.678442120552063, - "learning_rate": 1.4059777942943005e-05, - "loss": 0.2982, - "step": 8059 - }, - { - "epoch": 0.7592849909328561, - "grad_norm": 0.6912717223167419, - "learning_rate": 1.4058397916882402e-05, - "loss": 0.3069, - "step": 8060 - }, - { - "epoch": 0.7593791950260239, - "grad_norm": 0.7928740382194519, - "learning_rate": 1.4057017798284049e-05, - "loss": 0.3178, - "step": 8061 - }, - { - "epoch": 0.7594733991191918, - "grad_norm": 0.861893892288208, - "learning_rate": 1.4055637587179413e-05, - "loss": 0.3265, - "step": 8062 - }, - { - "epoch": 0.7595676032123596, - "grad_norm": 0.7354323267936707, - "learning_rate": 1.4054257283599974e-05, - "loss": 0.3189, - "step": 8063 - }, - { - "epoch": 0.7596618073055275, - "grad_norm": 0.8310542702674866, - "learning_rate": 1.4052876887577194e-05, - "loss": 0.3373, - "step": 8064 - }, - { - "epoch": 0.7597560113986953, - "grad_norm": 0.7149031162261963, - "learning_rate": 1.4051496399142557e-05, - "loss": 0.3172, - "step": 8065 - }, - { - "epoch": 0.7598502154918632, - "grad_norm": 0.8128882646560669, - "learning_rate": 1.4050115818327531e-05, - "loss": 0.2976, - "step": 8066 - }, - { - "epoch": 0.759944419585031, - "grad_norm": 0.6709223985671997, - "learning_rate": 1.4048735145163604e-05, - "loss": 0.3164, - "step": 8067 - }, - { - "epoch": 0.7600386236781989, - "grad_norm": 0.7269225716590881, - "learning_rate": 1.4047354379682254e-05, - "loss": 0.2663, - "step": 8068 - }, - { - "epoch": 0.7601328277713667, - "grad_norm": 0.7139459252357483, - "learning_rate": 1.4045973521914967e-05, - "loss": 0.2883, - "step": 8069 - }, - { - "epoch": 0.7602270318645346, - "grad_norm": 0.8169942498207092, - "learning_rate": 1.4044592571893223e-05, - "loss": 0.345, - "step": 8070 - }, - { - "epoch": 0.7603212359577024, - "grad_norm": 0.7426708340644836, - "learning_rate": 1.4043211529648512e-05, - "loss": 0.2864, - "step": 8071 - }, - { - "epoch": 0.7604154400508703, - "grad_norm": 0.8026089668273926, - "learning_rate": 1.4041830395212328e-05, - "loss": 0.3014, - "step": 8072 - }, - { - "epoch": 0.7605096441440381, - "grad_norm": 0.7458574771881104, - "learning_rate": 1.4040449168616161e-05, - "loss": 0.3015, - "step": 8073 - }, - { - "epoch": 0.760603848237206, - "grad_norm": 0.7134662866592407, - "learning_rate": 1.4039067849891503e-05, - "loss": 0.3032, - "step": 8074 - }, - { - "epoch": 0.7606980523303738, - "grad_norm": 0.6305004954338074, - "learning_rate": 1.4037686439069853e-05, - "loss": 0.2523, - "step": 8075 - }, - { - "epoch": 0.7607922564235416, - "grad_norm": 0.8049094080924988, - "learning_rate": 1.4036304936182705e-05, - "loss": 0.3237, - "step": 8076 - }, - { - "epoch": 0.7608864605167095, - "grad_norm": 0.7716397047042847, - "learning_rate": 1.4034923341261565e-05, - "loss": 0.342, - "step": 8077 - }, - { - "epoch": 0.7609806646098773, - "grad_norm": 0.7197418212890625, - "learning_rate": 1.403354165433793e-05, - "loss": 0.3002, - "step": 8078 - }, - { - "epoch": 0.7610748687030452, - "grad_norm": 0.77171790599823, - "learning_rate": 1.4032159875443307e-05, - "loss": 0.341, - "step": 8079 - }, - { - "epoch": 0.761169072796213, - "grad_norm": 0.6065251231193542, - "learning_rate": 1.4030778004609209e-05, - "loss": 0.2757, - "step": 8080 - }, - { - "epoch": 0.7612632768893809, - "grad_norm": 0.7310432195663452, - "learning_rate": 1.4029396041867132e-05, - "loss": 0.3066, - "step": 8081 - }, - { - "epoch": 0.7613574809825487, - "grad_norm": 0.7769371867179871, - "learning_rate": 1.4028013987248595e-05, - "loss": 0.3343, - "step": 8082 - }, - { - "epoch": 0.7614516850757166, - "grad_norm": 0.7630373239517212, - "learning_rate": 1.4026631840785112e-05, - "loss": 0.3188, - "step": 8083 - }, - { - "epoch": 0.7615458891688844, - "grad_norm": 0.794040322303772, - "learning_rate": 1.4025249602508193e-05, - "loss": 0.3359, - "step": 8084 - }, - { - "epoch": 0.7616400932620523, - "grad_norm": 0.9124943017959595, - "learning_rate": 1.4023867272449358e-05, - "loss": 0.3374, - "step": 8085 - }, - { - "epoch": 0.7617342973552201, - "grad_norm": 0.736742377281189, - "learning_rate": 1.4022484850640128e-05, - "loss": 0.3346, - "step": 8086 - }, - { - "epoch": 0.761828501448388, - "grad_norm": 0.7343044877052307, - "learning_rate": 1.402110233711202e-05, - "loss": 0.3091, - "step": 8087 - }, - { - "epoch": 0.7619227055415558, - "grad_norm": 0.6946993470191956, - "learning_rate": 1.4019719731896564e-05, - "loss": 0.3206, - "step": 8088 - }, - { - "epoch": 0.7620169096347237, - "grad_norm": 0.7589704394340515, - "learning_rate": 1.401833703502528e-05, - "loss": 0.3133, - "step": 8089 - }, - { - "epoch": 0.7621111137278915, - "grad_norm": 0.7888718843460083, - "learning_rate": 1.4016954246529697e-05, - "loss": 0.33, - "step": 8090 - }, - { - "epoch": 0.7622053178210594, - "grad_norm": 0.731242299079895, - "learning_rate": 1.4015571366441343e-05, - "loss": 0.3199, - "step": 8091 - }, - { - "epoch": 0.7622995219142271, - "grad_norm": 0.6867145299911499, - "learning_rate": 1.4014188394791753e-05, - "loss": 0.3337, - "step": 8092 - }, - { - "epoch": 0.762393726007395, - "grad_norm": 0.8416774272918701, - "learning_rate": 1.401280533161246e-05, - "loss": 0.3111, - "step": 8093 - }, - { - "epoch": 0.7624879301005628, - "grad_norm": 0.7183943390846252, - "learning_rate": 1.4011422176935e-05, - "loss": 0.3122, - "step": 8094 - }, - { - "epoch": 0.7625821341937307, - "grad_norm": 0.7674915194511414, - "learning_rate": 1.401003893079091e-05, - "loss": 0.3151, - "step": 8095 - }, - { - "epoch": 0.7626763382868985, - "grad_norm": 0.6793264746665955, - "learning_rate": 1.4008655593211729e-05, - "loss": 0.306, - "step": 8096 - }, - { - "epoch": 0.7627705423800664, - "grad_norm": 0.7976694703102112, - "learning_rate": 1.4007272164229003e-05, - "loss": 0.3329, - "step": 8097 - }, - { - "epoch": 0.7628647464732342, - "grad_norm": 0.8111255168914795, - "learning_rate": 1.4005888643874273e-05, - "loss": 0.3308, - "step": 8098 - }, - { - "epoch": 0.7629589505664021, - "grad_norm": 0.6630674600601196, - "learning_rate": 1.4004505032179085e-05, - "loss": 0.3016, - "step": 8099 - }, - { - "epoch": 0.7630531546595699, - "grad_norm": 0.6879538297653198, - "learning_rate": 1.4003121329174993e-05, - "loss": 0.3092, - "step": 8100 - }, - { - "epoch": 0.7631473587527378, - "grad_norm": 0.6774730086326599, - "learning_rate": 1.4001737534893542e-05, - "loss": 0.3023, - "step": 8101 - }, - { - "epoch": 0.7632415628459056, - "grad_norm": 0.7137449979782104, - "learning_rate": 1.4000353649366285e-05, - "loss": 0.2802, - "step": 8102 - }, - { - "epoch": 0.7633357669390735, - "grad_norm": 0.6669721007347107, - "learning_rate": 1.3998969672624782e-05, - "loss": 0.2965, - "step": 8103 - }, - { - "epoch": 0.7634299710322413, - "grad_norm": 0.6711835861206055, - "learning_rate": 1.3997585604700584e-05, - "loss": 0.2843, - "step": 8104 - }, - { - "epoch": 0.7635241751254092, - "grad_norm": 0.6854143738746643, - "learning_rate": 1.3996201445625251e-05, - "loss": 0.2827, - "step": 8105 - }, - { - "epoch": 0.763618379218577, - "grad_norm": 0.6946225166320801, - "learning_rate": 1.3994817195430345e-05, - "loss": 0.2896, - "step": 8106 - }, - { - "epoch": 0.7637125833117449, - "grad_norm": 0.7499455809593201, - "learning_rate": 1.3993432854147429e-05, - "loss": 0.3348, - "step": 8107 - }, - { - "epoch": 0.7638067874049127, - "grad_norm": 0.8228910565376282, - "learning_rate": 1.3992048421808066e-05, - "loss": 0.2826, - "step": 8108 - }, - { - "epoch": 0.7639009914980806, - "grad_norm": 0.6496267914772034, - "learning_rate": 1.399066389844383e-05, - "loss": 0.3002, - "step": 8109 - }, - { - "epoch": 0.7639951955912484, - "grad_norm": 0.8050598502159119, - "learning_rate": 1.3989279284086283e-05, - "loss": 0.3685, - "step": 8110 - }, - { - "epoch": 0.7640893996844162, - "grad_norm": 0.7404391169548035, - "learning_rate": 1.3987894578766997e-05, - "loss": 0.3167, - "step": 8111 - }, - { - "epoch": 0.7641836037775841, - "grad_norm": 0.7374035716056824, - "learning_rate": 1.3986509782517548e-05, - "loss": 0.3142, - "step": 8112 - }, - { - "epoch": 0.764277807870752, - "grad_norm": 0.7885564565658569, - "learning_rate": 1.3985124895369513e-05, - "loss": 0.3198, - "step": 8113 - }, - { - "epoch": 0.7643720119639198, - "grad_norm": 0.7686183452606201, - "learning_rate": 1.3983739917354466e-05, - "loss": 0.3225, - "step": 8114 - }, - { - "epoch": 0.7644662160570876, - "grad_norm": 0.7687329649925232, - "learning_rate": 1.3982354848503987e-05, - "loss": 0.3247, - "step": 8115 - }, - { - "epoch": 0.7645604201502555, - "grad_norm": 0.7419228553771973, - "learning_rate": 1.3980969688849659e-05, - "loss": 0.3078, - "step": 8116 - }, - { - "epoch": 0.7646546242434233, - "grad_norm": 0.7465457320213318, - "learning_rate": 1.3979584438423066e-05, - "loss": 0.3261, - "step": 8117 - }, - { - "epoch": 0.7647488283365912, - "grad_norm": 0.678605318069458, - "learning_rate": 1.3978199097255791e-05, - "loss": 0.3024, - "step": 8118 - }, - { - "epoch": 0.764843032429759, - "grad_norm": 0.8090999126434326, - "learning_rate": 1.3976813665379427e-05, - "loss": 0.2924, - "step": 8119 - }, - { - "epoch": 0.7649372365229269, - "grad_norm": 0.7168018817901611, - "learning_rate": 1.3975428142825562e-05, - "loss": 0.303, - "step": 8120 - }, - { - "epoch": 0.7650314406160947, - "grad_norm": 0.7721872925758362, - "learning_rate": 1.3974042529625784e-05, - "loss": 0.2712, - "step": 8121 - }, - { - "epoch": 0.7651256447092626, - "grad_norm": 0.8302431702613831, - "learning_rate": 1.3972656825811691e-05, - "loss": 0.2959, - "step": 8122 - }, - { - "epoch": 0.7652198488024304, - "grad_norm": 0.7172962427139282, - "learning_rate": 1.3971271031414882e-05, - "loss": 0.3422, - "step": 8123 - }, - { - "epoch": 0.7653140528955983, - "grad_norm": 0.8304644823074341, - "learning_rate": 1.3969885146466946e-05, - "loss": 0.3443, - "step": 8124 - }, - { - "epoch": 0.7654082569887661, - "grad_norm": 0.6628880500793457, - "learning_rate": 1.3968499170999495e-05, - "loss": 0.2508, - "step": 8125 - }, - { - "epoch": 0.765502461081934, - "grad_norm": 0.7547474503517151, - "learning_rate": 1.3967113105044121e-05, - "loss": 0.3166, - "step": 8126 - }, - { - "epoch": 0.7655966651751018, - "grad_norm": 2.869410991668701, - "learning_rate": 1.3965726948632434e-05, - "loss": 0.3228, - "step": 8127 - }, - { - "epoch": 0.7656908692682697, - "grad_norm": 0.715911865234375, - "learning_rate": 1.396434070179604e-05, - "loss": 0.3164, - "step": 8128 - }, - { - "epoch": 0.7657850733614375, - "grad_norm": 0.6984981298446655, - "learning_rate": 1.3962954364566547e-05, - "loss": 0.308, - "step": 8129 - }, - { - "epoch": 0.7658792774546054, - "grad_norm": 0.7362462282180786, - "learning_rate": 1.3961567936975566e-05, - "loss": 0.2996, - "step": 8130 - }, - { - "epoch": 0.7659734815477732, - "grad_norm": 0.8271350860595703, - "learning_rate": 1.3960181419054708e-05, - "loss": 0.3011, - "step": 8131 - }, - { - "epoch": 0.7660676856409411, - "grad_norm": 0.7668757438659668, - "learning_rate": 1.3958794810835592e-05, - "loss": 0.3077, - "step": 8132 - }, - { - "epoch": 0.7661618897341089, - "grad_norm": 0.916616678237915, - "learning_rate": 1.395740811234983e-05, - "loss": 0.3014, - "step": 8133 - }, - { - "epoch": 0.7662560938272768, - "grad_norm": 0.6834691762924194, - "learning_rate": 1.3956021323629043e-05, - "loss": 0.3064, - "step": 8134 - }, - { - "epoch": 0.7663502979204446, - "grad_norm": 0.7731265425682068, - "learning_rate": 1.3954634444704854e-05, - "loss": 0.3148, - "step": 8135 - }, - { - "epoch": 0.7664445020136125, - "grad_norm": 0.805877149105072, - "learning_rate": 1.395324747560888e-05, - "loss": 0.3069, - "step": 8136 - }, - { - "epoch": 0.7665387061067803, - "grad_norm": 0.6751599311828613, - "learning_rate": 1.3951860416372748e-05, - "loss": 0.3253, - "step": 8137 - }, - { - "epoch": 0.7666329101999482, - "grad_norm": 0.6992517113685608, - "learning_rate": 1.3950473267028093e-05, - "loss": 0.305, - "step": 8138 - }, - { - "epoch": 0.766727114293116, - "grad_norm": 0.7160972356796265, - "learning_rate": 1.3949086027606533e-05, - "loss": 0.2926, - "step": 8139 - }, - { - "epoch": 0.7668213183862839, - "grad_norm": 0.8054567575454712, - "learning_rate": 1.3947698698139708e-05, - "loss": 0.3314, - "step": 8140 - }, - { - "epoch": 0.7669155224794517, - "grad_norm": 0.7271167635917664, - "learning_rate": 1.3946311278659246e-05, - "loss": 0.3636, - "step": 8141 - }, - { - "epoch": 0.7670097265726196, - "grad_norm": 0.68764328956604, - "learning_rate": 1.3944923769196781e-05, - "loss": 0.3025, - "step": 8142 - }, - { - "epoch": 0.7671039306657874, - "grad_norm": 0.9515770673751831, - "learning_rate": 1.3943536169783958e-05, - "loss": 0.3, - "step": 8143 - }, - { - "epoch": 0.7671981347589553, - "grad_norm": 0.7493038773536682, - "learning_rate": 1.3942148480452407e-05, - "loss": 0.3265, - "step": 8144 - }, - { - "epoch": 0.7672923388521231, - "grad_norm": 0.8873345851898193, - "learning_rate": 1.3940760701233775e-05, - "loss": 0.3252, - "step": 8145 - }, - { - "epoch": 0.767386542945291, - "grad_norm": 0.7618383169174194, - "learning_rate": 1.3939372832159709e-05, - "loss": 0.3099, - "step": 8146 - }, - { - "epoch": 0.7674807470384588, - "grad_norm": 0.7449729442596436, - "learning_rate": 1.3937984873261843e-05, - "loss": 0.3375, - "step": 8147 - }, - { - "epoch": 0.7675749511316267, - "grad_norm": 0.6962200403213501, - "learning_rate": 1.3936596824571838e-05, - "loss": 0.362, - "step": 8148 - }, - { - "epoch": 0.7676691552247945, - "grad_norm": 0.6698456406593323, - "learning_rate": 1.3935208686121333e-05, - "loss": 0.2944, - "step": 8149 - }, - { - "epoch": 0.7677633593179624, - "grad_norm": 0.6792502999305725, - "learning_rate": 1.3933820457941986e-05, - "loss": 0.3251, - "step": 8150 - }, - { - "epoch": 0.7678575634111302, - "grad_norm": 0.7187953591346741, - "learning_rate": 1.3932432140065451e-05, - "loss": 0.3096, - "step": 8151 - }, - { - "epoch": 0.767951767504298, - "grad_norm": 0.6905596256256104, - "learning_rate": 1.3931043732523377e-05, - "loss": 0.2816, - "step": 8152 - }, - { - "epoch": 0.7680459715974659, - "grad_norm": 0.8274813890457153, - "learning_rate": 1.3929655235347429e-05, - "loss": 0.3219, - "step": 8153 - }, - { - "epoch": 0.7681401756906338, - "grad_norm": 0.7077164053916931, - "learning_rate": 1.3928266648569264e-05, - "loss": 0.2866, - "step": 8154 - }, - { - "epoch": 0.7682343797838016, - "grad_norm": 0.7026647329330444, - "learning_rate": 1.3926877972220543e-05, - "loss": 0.2728, - "step": 8155 - }, - { - "epoch": 0.7683285838769695, - "grad_norm": 0.736984372138977, - "learning_rate": 1.3925489206332932e-05, - "loss": 0.3352, - "step": 8156 - }, - { - "epoch": 0.7684227879701373, - "grad_norm": 0.8198491334915161, - "learning_rate": 1.3924100350938097e-05, - "loss": 0.3118, - "step": 8157 - }, - { - "epoch": 0.7685169920633051, - "grad_norm": 0.6261926293373108, - "learning_rate": 1.3922711406067703e-05, - "loss": 0.2887, - "step": 8158 - }, - { - "epoch": 0.768611196156473, - "grad_norm": 0.7135415077209473, - "learning_rate": 1.3921322371753423e-05, - "loss": 0.3018, - "step": 8159 - }, - { - "epoch": 0.7687054002496408, - "grad_norm": 0.7526355385780334, - "learning_rate": 1.3919933248026933e-05, - "loss": 0.3307, - "step": 8160 - }, - { - "epoch": 0.7687996043428087, - "grad_norm": 0.7028264403343201, - "learning_rate": 1.3918544034919897e-05, - "loss": 0.3125, - "step": 8161 - }, - { - "epoch": 0.7688938084359765, - "grad_norm": 0.8565718531608582, - "learning_rate": 1.3917154732463998e-05, - "loss": 0.3268, - "step": 8162 - }, - { - "epoch": 0.7689880125291444, - "grad_norm": 0.7672988176345825, - "learning_rate": 1.3915765340690916e-05, - "loss": 0.326, - "step": 8163 - }, - { - "epoch": 0.7690822166223122, - "grad_norm": 0.8035504817962646, - "learning_rate": 1.3914375859632325e-05, - "loss": 0.3036, - "step": 8164 - }, - { - "epoch": 0.7691764207154801, - "grad_norm": 0.7635858058929443, - "learning_rate": 1.3912986289319914e-05, - "loss": 0.3715, - "step": 8165 - }, - { - "epoch": 0.7692706248086479, - "grad_norm": 0.7464389204978943, - "learning_rate": 1.3911596629785362e-05, - "loss": 0.3249, - "step": 8166 - }, - { - "epoch": 0.7693648289018158, - "grad_norm": 0.8326256275177002, - "learning_rate": 1.3910206881060355e-05, - "loss": 0.2854, - "step": 8167 - }, - { - "epoch": 0.7694590329949836, - "grad_norm": 0.7256987690925598, - "learning_rate": 1.3908817043176588e-05, - "loss": 0.3221, - "step": 8168 - }, - { - "epoch": 0.7695532370881515, - "grad_norm": 0.825480580329895, - "learning_rate": 1.3907427116165746e-05, - "loss": 0.3515, - "step": 8169 - }, - { - "epoch": 0.7696474411813193, - "grad_norm": 0.8091462850570679, - "learning_rate": 1.3906037100059524e-05, - "loss": 0.275, - "step": 8170 - }, - { - "epoch": 0.7697416452744872, - "grad_norm": 0.9617534279823303, - "learning_rate": 1.3904646994889614e-05, - "loss": 0.3468, - "step": 8171 - }, - { - "epoch": 0.769835849367655, - "grad_norm": 0.7480612397193909, - "learning_rate": 1.3903256800687711e-05, - "loss": 0.2869, - "step": 8172 - }, - { - "epoch": 0.7699300534608229, - "grad_norm": 0.8497846722602844, - "learning_rate": 1.3901866517485521e-05, - "loss": 0.3008, - "step": 8173 - }, - { - "epoch": 0.7700242575539907, - "grad_norm": 0.7676090598106384, - "learning_rate": 1.3900476145314739e-05, - "loss": 0.3214, - "step": 8174 - }, - { - "epoch": 0.7701184616471586, - "grad_norm": 0.6978567838668823, - "learning_rate": 1.3899085684207068e-05, - "loss": 0.3002, - "step": 8175 - }, - { - "epoch": 0.7702126657403264, - "grad_norm": 0.7182883024215698, - "learning_rate": 1.389769513419421e-05, - "loss": 0.3341, - "step": 8176 - }, - { - "epoch": 0.7703068698334943, - "grad_norm": 0.7043347358703613, - "learning_rate": 1.3896304495307881e-05, - "loss": 0.2765, - "step": 8177 - }, - { - "epoch": 0.7704010739266621, - "grad_norm": 1.1452966928482056, - "learning_rate": 1.389491376757978e-05, - "loss": 0.3302, - "step": 8178 - }, - { - "epoch": 0.77049527801983, - "grad_norm": 0.8262236714363098, - "learning_rate": 1.3893522951041622e-05, - "loss": 0.2965, - "step": 8179 - }, - { - "epoch": 0.7705894821129978, - "grad_norm": 0.6995968222618103, - "learning_rate": 1.3892132045725122e-05, - "loss": 0.301, - "step": 8180 - }, - { - "epoch": 0.7706836862061657, - "grad_norm": 0.7085097432136536, - "learning_rate": 1.3890741051661989e-05, - "loss": 0.2831, - "step": 8181 - }, - { - "epoch": 0.7707778902993335, - "grad_norm": 0.7411379814147949, - "learning_rate": 1.3889349968883943e-05, - "loss": 0.3106, - "step": 8182 - }, - { - "epoch": 0.7708720943925014, - "grad_norm": 0.7735549211502075, - "learning_rate": 1.3887958797422707e-05, - "loss": 0.3126, - "step": 8183 - }, - { - "epoch": 0.7709662984856692, - "grad_norm": 0.69361811876297, - "learning_rate": 1.3886567537309992e-05, - "loss": 0.312, - "step": 8184 - }, - { - "epoch": 0.7710605025788371, - "grad_norm": 0.7850237488746643, - "learning_rate": 1.388517618857753e-05, - "loss": 0.3094, - "step": 8185 - }, - { - "epoch": 0.7711547066720049, - "grad_norm": 0.7504292130470276, - "learning_rate": 1.3883784751257042e-05, - "loss": 0.3169, - "step": 8186 - }, - { - "epoch": 0.7712489107651728, - "grad_norm": 0.7509244680404663, - "learning_rate": 1.3882393225380251e-05, - "loss": 0.2529, - "step": 8187 - }, - { - "epoch": 0.7713431148583406, - "grad_norm": 0.7746277451515198, - "learning_rate": 1.3881001610978894e-05, - "loss": 0.3651, - "step": 8188 - }, - { - "epoch": 0.7714373189515085, - "grad_norm": 0.8231454491615295, - "learning_rate": 1.3879609908084697e-05, - "loss": 0.3469, - "step": 8189 - }, - { - "epoch": 0.7715315230446763, - "grad_norm": 0.6980456709861755, - "learning_rate": 1.3878218116729397e-05, - "loss": 0.3341, - "step": 8190 - }, - { - "epoch": 0.7716257271378442, - "grad_norm": 0.750406801700592, - "learning_rate": 1.3876826236944724e-05, - "loss": 0.3357, - "step": 8191 - }, - { - "epoch": 0.771719931231012, - "grad_norm": 0.696100115776062, - "learning_rate": 1.3875434268762415e-05, - "loss": 0.3139, - "step": 8192 - }, - { - "epoch": 0.7718141353241799, - "grad_norm": 0.7752167582511902, - "learning_rate": 1.3874042212214215e-05, - "loss": 0.3647, - "step": 8193 - }, - { - "epoch": 0.7719083394173477, - "grad_norm": 0.8316193222999573, - "learning_rate": 1.3872650067331859e-05, - "loss": 0.295, - "step": 8194 - }, - { - "epoch": 0.7720025435105156, - "grad_norm": 0.6846539378166199, - "learning_rate": 1.3871257834147094e-05, - "loss": 0.2856, - "step": 8195 - }, - { - "epoch": 0.7720967476036834, - "grad_norm": 0.8855867385864258, - "learning_rate": 1.386986551269166e-05, - "loss": 0.384, - "step": 8196 - }, - { - "epoch": 0.7721909516968513, - "grad_norm": 0.6644063591957092, - "learning_rate": 1.3868473102997311e-05, - "loss": 0.2704, - "step": 8197 - }, - { - "epoch": 0.7722851557900191, - "grad_norm": 1.0443947315216064, - "learning_rate": 1.386708060509579e-05, - "loss": 0.3466, - "step": 8198 - }, - { - "epoch": 0.772379359883187, - "grad_norm": 0.7246731519699097, - "learning_rate": 1.386568801901885e-05, - "loss": 0.3448, - "step": 8199 - }, - { - "epoch": 0.7724735639763548, - "grad_norm": 0.6464845538139343, - "learning_rate": 1.3864295344798251e-05, - "loss": 0.2988, - "step": 8200 - }, - { - "epoch": 0.7725677680695227, - "grad_norm": 0.7232339382171631, - "learning_rate": 1.3862902582465736e-05, - "loss": 0.3653, - "step": 8201 - }, - { - "epoch": 0.7726619721626905, - "grad_norm": 0.8247326016426086, - "learning_rate": 1.3861509732053067e-05, - "loss": 0.3376, - "step": 8202 - }, - { - "epoch": 0.7727561762558584, - "grad_norm": 0.7704737782478333, - "learning_rate": 1.3860116793592009e-05, - "loss": 0.3394, - "step": 8203 - }, - { - "epoch": 0.7728503803490262, - "grad_norm": 0.716284990310669, - "learning_rate": 1.3858723767114314e-05, - "loss": 0.3142, - "step": 8204 - }, - { - "epoch": 0.772944584442194, - "grad_norm": 0.7100145220756531, - "learning_rate": 1.3857330652651753e-05, - "loss": 0.3099, - "step": 8205 - }, - { - "epoch": 0.7730387885353619, - "grad_norm": 0.789027988910675, - "learning_rate": 1.3855937450236087e-05, - "loss": 0.3277, - "step": 8206 - }, - { - "epoch": 0.7731329926285297, - "grad_norm": 0.7239149808883667, - "learning_rate": 1.3854544159899081e-05, - "loss": 0.3365, - "step": 8207 - }, - { - "epoch": 0.7732271967216976, - "grad_norm": 0.6745103001594543, - "learning_rate": 1.3853150781672508e-05, - "loss": 0.3027, - "step": 8208 - }, - { - "epoch": 0.7733214008148654, - "grad_norm": 0.6679433584213257, - "learning_rate": 1.3851757315588141e-05, - "loss": 0.3138, - "step": 8209 - }, - { - "epoch": 0.7734156049080333, - "grad_norm": 0.7353999614715576, - "learning_rate": 1.385036376167775e-05, - "loss": 0.2889, - "step": 8210 - }, - { - "epoch": 0.7735098090012011, - "grad_norm": 0.7928226590156555, - "learning_rate": 1.3848970119973108e-05, - "loss": 0.3359, - "step": 8211 - }, - { - "epoch": 0.773604013094369, - "grad_norm": 0.6852613091468811, - "learning_rate": 1.3847576390505994e-05, - "loss": 0.3217, - "step": 8212 - }, - { - "epoch": 0.7736982171875368, - "grad_norm": 0.6255519986152649, - "learning_rate": 1.3846182573308191e-05, - "loss": 0.2603, - "step": 8213 - }, - { - "epoch": 0.7737924212807047, - "grad_norm": 0.7862254977226257, - "learning_rate": 1.3844788668411475e-05, - "loss": 0.3375, - "step": 8214 - }, - { - "epoch": 0.7738866253738725, - "grad_norm": 0.7256548404693604, - "learning_rate": 1.3843394675847635e-05, - "loss": 0.3236, - "step": 8215 - }, - { - "epoch": 0.7739808294670404, - "grad_norm": 0.7552547454833984, - "learning_rate": 1.3842000595648446e-05, - "loss": 0.3212, - "step": 8216 - }, - { - "epoch": 0.7740750335602082, - "grad_norm": 0.5907566547393799, - "learning_rate": 1.3840606427845707e-05, - "loss": 0.3019, - "step": 8217 - }, - { - "epoch": 0.7741692376533761, - "grad_norm": 0.7375285029411316, - "learning_rate": 1.38392121724712e-05, - "loss": 0.3383, - "step": 8218 - }, - { - "epoch": 0.7742634417465439, - "grad_norm": 0.7448163032531738, - "learning_rate": 1.3837817829556716e-05, - "loss": 0.3322, - "step": 8219 - }, - { - "epoch": 0.7743576458397118, - "grad_norm": 0.7693195343017578, - "learning_rate": 1.3836423399134056e-05, - "loss": 0.2978, - "step": 8220 - }, - { - "epoch": 0.7744518499328796, - "grad_norm": 0.7602647542953491, - "learning_rate": 1.3835028881235001e-05, - "loss": 0.3288, - "step": 8221 - }, - { - "epoch": 0.7745460540260475, - "grad_norm": 0.7578790187835693, - "learning_rate": 1.3833634275891364e-05, - "loss": 0.3453, - "step": 8222 - }, - { - "epoch": 0.7746402581192153, - "grad_norm": 0.7210747599601746, - "learning_rate": 1.3832239583134934e-05, - "loss": 0.298, - "step": 8223 - }, - { - "epoch": 0.7747344622123832, - "grad_norm": 0.6661896705627441, - "learning_rate": 1.3830844802997514e-05, - "loss": 0.304, - "step": 8224 - }, - { - "epoch": 0.774828666305551, - "grad_norm": 0.8601593375205994, - "learning_rate": 1.3829449935510908e-05, - "loss": 0.333, - "step": 8225 - }, - { - "epoch": 0.7749228703987189, - "grad_norm": 0.6311641335487366, - "learning_rate": 1.3828054980706921e-05, - "loss": 0.2966, - "step": 8226 - }, - { - "epoch": 0.7750170744918867, - "grad_norm": 0.6895314455032349, - "learning_rate": 1.382665993861736e-05, - "loss": 0.3055, - "step": 8227 - }, - { - "epoch": 0.7751112785850546, - "grad_norm": 0.6774254441261292, - "learning_rate": 1.3825264809274037e-05, - "loss": 0.2659, - "step": 8228 - }, - { - "epoch": 0.7752054826782224, - "grad_norm": 0.7692509889602661, - "learning_rate": 1.3823869592708759e-05, - "loss": 0.3441, - "step": 8229 - }, - { - "epoch": 0.7752996867713902, - "grad_norm": 0.7428672909736633, - "learning_rate": 1.382247428895334e-05, - "loss": 0.3138, - "step": 8230 - }, - { - "epoch": 0.775393890864558, - "grad_norm": 0.7270107865333557, - "learning_rate": 1.3821078898039598e-05, - "loss": 0.3359, - "step": 8231 - }, - { - "epoch": 0.7754880949577259, - "grad_norm": 0.7511183619499207, - "learning_rate": 1.3819683419999343e-05, - "loss": 0.3063, - "step": 8232 - }, - { - "epoch": 0.7755822990508937, - "grad_norm": 0.7697188854217529, - "learning_rate": 1.3818287854864401e-05, - "loss": 0.3548, - "step": 8233 - }, - { - "epoch": 0.7756765031440616, - "grad_norm": 0.6531904935836792, - "learning_rate": 1.3816892202666591e-05, - "loss": 0.2833, - "step": 8234 - }, - { - "epoch": 0.7757707072372294, - "grad_norm": 0.6312221884727478, - "learning_rate": 1.3815496463437739e-05, - "loss": 0.2818, - "step": 8235 - }, - { - "epoch": 0.7758649113303973, - "grad_norm": 0.847802996635437, - "learning_rate": 1.3814100637209663e-05, - "loss": 0.337, - "step": 8236 - }, - { - "epoch": 0.7759591154235651, - "grad_norm": 0.7020447254180908, - "learning_rate": 1.3812704724014192e-05, - "loss": 0.31, - "step": 8237 - }, - { - "epoch": 0.776053319516733, - "grad_norm": 0.7162377834320068, - "learning_rate": 1.381130872388316e-05, - "loss": 0.2889, - "step": 8238 - }, - { - "epoch": 0.7761475236099008, - "grad_norm": 0.8712856769561768, - "learning_rate": 1.3809912636848395e-05, - "loss": 0.3241, - "step": 8239 - }, - { - "epoch": 0.7762417277030687, - "grad_norm": 0.6571957468986511, - "learning_rate": 1.380851646294173e-05, - "loss": 0.2901, - "step": 8240 - }, - { - "epoch": 0.7763359317962365, - "grad_norm": 0.8193198442459106, - "learning_rate": 1.3807120202195e-05, - "loss": 0.354, - "step": 8241 - }, - { - "epoch": 0.7764301358894043, - "grad_norm": 1.451332926750183, - "learning_rate": 1.3805723854640039e-05, - "loss": 0.354, - "step": 8242 - }, - { - "epoch": 0.7765243399825722, - "grad_norm": 0.7328004240989685, - "learning_rate": 1.380432742030869e-05, - "loss": 0.3538, - "step": 8243 - }, - { - "epoch": 0.77661854407574, - "grad_norm": 0.7978833913803101, - "learning_rate": 1.3802930899232791e-05, - "loss": 0.3297, - "step": 8244 - }, - { - "epoch": 0.7767127481689079, - "grad_norm": 0.7007066011428833, - "learning_rate": 1.3801534291444187e-05, - "loss": 0.2975, - "step": 8245 - }, - { - "epoch": 0.7768069522620757, - "grad_norm": 0.6987437605857849, - "learning_rate": 1.3800137596974723e-05, - "loss": 0.3003, - "step": 8246 - }, - { - "epoch": 0.7769011563552436, - "grad_norm": 0.6815653443336487, - "learning_rate": 1.3798740815856241e-05, - "loss": 0.2912, - "step": 8247 - }, - { - "epoch": 0.7769953604484114, - "grad_norm": 0.7572231292724609, - "learning_rate": 1.3797343948120599e-05, - "loss": 0.3054, - "step": 8248 - }, - { - "epoch": 0.7770895645415793, - "grad_norm": 0.7254391312599182, - "learning_rate": 1.379594699379964e-05, - "loss": 0.2941, - "step": 8249 - }, - { - "epoch": 0.7771837686347471, - "grad_norm": 0.7043079733848572, - "learning_rate": 1.3794549952925217e-05, - "loss": 0.3013, - "step": 8250 - }, - { - "epoch": 0.777277972727915, - "grad_norm": 1.0014352798461914, - "learning_rate": 1.379315282552919e-05, - "loss": 0.3082, - "step": 8251 - }, - { - "epoch": 0.7773721768210828, - "grad_norm": 0.8136102557182312, - "learning_rate": 1.3791755611643409e-05, - "loss": 0.3184, - "step": 8252 - }, - { - "epoch": 0.7774663809142507, - "grad_norm": 0.7315954566001892, - "learning_rate": 1.3790358311299739e-05, - "loss": 0.3245, - "step": 8253 - }, - { - "epoch": 0.7775605850074185, - "grad_norm": 0.7224061489105225, - "learning_rate": 1.3788960924530037e-05, - "loss": 0.3155, - "step": 8254 - }, - { - "epoch": 0.7776547891005864, - "grad_norm": 0.9970704913139343, - "learning_rate": 1.3787563451366167e-05, - "loss": 0.2995, - "step": 8255 - }, - { - "epoch": 0.7777489931937542, - "grad_norm": 0.7501780986785889, - "learning_rate": 1.3786165891839988e-05, - "loss": 0.3284, - "step": 8256 - }, - { - "epoch": 0.7778431972869221, - "grad_norm": 0.7430704236030579, - "learning_rate": 1.3784768245983377e-05, - "loss": 0.2966, - "step": 8257 - }, - { - "epoch": 0.7779374013800899, - "grad_norm": 0.759501039981842, - "learning_rate": 1.3783370513828194e-05, - "loss": 0.3284, - "step": 8258 - }, - { - "epoch": 0.7780316054732578, - "grad_norm": 0.7739242911338806, - "learning_rate": 1.378197269540631e-05, - "loss": 0.3237, - "step": 8259 - }, - { - "epoch": 0.7781258095664256, - "grad_norm": 0.8120517134666443, - "learning_rate": 1.3780574790749606e-05, - "loss": 0.2965, - "step": 8260 - }, - { - "epoch": 0.7782200136595935, - "grad_norm": 0.7086346745491028, - "learning_rate": 1.3779176799889944e-05, - "loss": 0.2946, - "step": 8261 - }, - { - "epoch": 0.7783142177527613, - "grad_norm": 0.723177433013916, - "learning_rate": 1.377777872285921e-05, - "loss": 0.3502, - "step": 8262 - }, - { - "epoch": 0.7784084218459292, - "grad_norm": 0.7041103839874268, - "learning_rate": 1.3776380559689278e-05, - "loss": 0.3152, - "step": 8263 - }, - { - "epoch": 0.778502625939097, - "grad_norm": 0.6800200343132019, - "learning_rate": 1.3774982310412025e-05, - "loss": 0.2653, - "step": 8264 - }, - { - "epoch": 0.7785968300322649, - "grad_norm": 0.664059042930603, - "learning_rate": 1.3773583975059341e-05, - "loss": 0.3084, - "step": 8265 - }, - { - "epoch": 0.7786910341254327, - "grad_norm": 0.8002199530601501, - "learning_rate": 1.3772185553663107e-05, - "loss": 0.3038, - "step": 8266 - }, - { - "epoch": 0.7787852382186006, - "grad_norm": 0.7921991348266602, - "learning_rate": 1.3770787046255205e-05, - "loss": 0.3117, - "step": 8267 - }, - { - "epoch": 0.7788794423117684, - "grad_norm": 0.7738434672355652, - "learning_rate": 1.3769388452867532e-05, - "loss": 0.345, - "step": 8268 - }, - { - "epoch": 0.7789736464049363, - "grad_norm": 0.7458887100219727, - "learning_rate": 1.3767989773531969e-05, - "loss": 0.2633, - "step": 8269 - }, - { - "epoch": 0.7790678504981041, - "grad_norm": 0.7952979207038879, - "learning_rate": 1.3766591008280413e-05, - "loss": 0.2914, - "step": 8270 - }, - { - "epoch": 0.779162054591272, - "grad_norm": 0.7417327165603638, - "learning_rate": 1.3765192157144757e-05, - "loss": 0.3492, - "step": 8271 - }, - { - "epoch": 0.7792562586844398, - "grad_norm": 0.6867636442184448, - "learning_rate": 1.3763793220156893e-05, - "loss": 0.3074, - "step": 8272 - }, - { - "epoch": 0.7793504627776077, - "grad_norm": 0.6547208428382874, - "learning_rate": 1.3762394197348727e-05, - "loss": 0.2581, - "step": 8273 - }, - { - "epoch": 0.7794446668707755, - "grad_norm": 0.7010359764099121, - "learning_rate": 1.3760995088752153e-05, - "loss": 0.2769, - "step": 8274 - }, - { - "epoch": 0.7795388709639434, - "grad_norm": 0.7501020431518555, - "learning_rate": 1.3759595894399078e-05, - "loss": 0.3148, - "step": 8275 - }, - { - "epoch": 0.7796330750571112, - "grad_norm": 0.6372538208961487, - "learning_rate": 1.3758196614321398e-05, - "loss": 0.2668, - "step": 8276 - }, - { - "epoch": 0.7797272791502791, - "grad_norm": 0.7269129753112793, - "learning_rate": 1.3756797248551026e-05, - "loss": 0.2931, - "step": 8277 - }, - { - "epoch": 0.7798214832434469, - "grad_norm": 0.6892969012260437, - "learning_rate": 1.3755397797119864e-05, - "loss": 0.3268, - "step": 8278 - }, - { - "epoch": 0.7799156873366148, - "grad_norm": 0.6834263801574707, - "learning_rate": 1.3753998260059825e-05, - "loss": 0.3344, - "step": 8279 - }, - { - "epoch": 0.7800098914297826, - "grad_norm": 0.683588445186615, - "learning_rate": 1.3752598637402824e-05, - "loss": 0.2749, - "step": 8280 - }, - { - "epoch": 0.7801040955229505, - "grad_norm": 0.6910967230796814, - "learning_rate": 1.3751198929180765e-05, - "loss": 0.3284, - "step": 8281 - }, - { - "epoch": 0.7801982996161183, - "grad_norm": 0.8028631806373596, - "learning_rate": 1.3749799135425573e-05, - "loss": 0.3242, - "step": 8282 - }, - { - "epoch": 0.7802925037092862, - "grad_norm": 0.7768567800521851, - "learning_rate": 1.3748399256169158e-05, - "loss": 0.3309, - "step": 8283 - }, - { - "epoch": 0.780386707802454, - "grad_norm": 0.7080867886543274, - "learning_rate": 1.3746999291443444e-05, - "loss": 0.3029, - "step": 8284 - }, - { - "epoch": 0.7804809118956219, - "grad_norm": 0.8380313515663147, - "learning_rate": 1.3745599241280352e-05, - "loss": 0.3342, - "step": 8285 - }, - { - "epoch": 0.7805751159887897, - "grad_norm": 0.8456419706344604, - "learning_rate": 1.3744199105711806e-05, - "loss": 0.3106, - "step": 8286 - }, - { - "epoch": 0.7806693200819576, - "grad_norm": 0.6792260408401489, - "learning_rate": 1.3742798884769727e-05, - "loss": 0.3044, - "step": 8287 - }, - { - "epoch": 0.7807635241751254, - "grad_norm": 0.6739451885223389, - "learning_rate": 1.3741398578486049e-05, - "loss": 0.3054, - "step": 8288 - }, - { - "epoch": 0.7808577282682932, - "grad_norm": 0.737349271774292, - "learning_rate": 1.3739998186892694e-05, - "loss": 0.299, - "step": 8289 - }, - { - "epoch": 0.7809519323614611, - "grad_norm": 0.7201666831970215, - "learning_rate": 1.3738597710021598e-05, - "loss": 0.3134, - "step": 8290 - }, - { - "epoch": 0.781046136454629, - "grad_norm": 0.698501467704773, - "learning_rate": 1.373719714790469e-05, - "loss": 0.3177, - "step": 8291 - }, - { - "epoch": 0.7811403405477968, - "grad_norm": 0.8547974228858948, - "learning_rate": 1.373579650057391e-05, - "loss": 0.3312, - "step": 8292 - }, - { - "epoch": 0.7812345446409646, - "grad_norm": 0.7456308007240295, - "learning_rate": 1.373439576806119e-05, - "loss": 0.3104, - "step": 8293 - }, - { - "epoch": 0.7813287487341325, - "grad_norm": 0.7419720888137817, - "learning_rate": 1.3732994950398472e-05, - "loss": 0.3395, - "step": 8294 - }, - { - "epoch": 0.7814229528273003, - "grad_norm": 0.7196950912475586, - "learning_rate": 1.3731594047617698e-05, - "loss": 0.2703, - "step": 8295 - }, - { - "epoch": 0.7815171569204682, - "grad_norm": 0.7468432188034058, - "learning_rate": 1.3730193059750806e-05, - "loss": 0.3043, - "step": 8296 - }, - { - "epoch": 0.781611361013636, - "grad_norm": 0.7866313457489014, - "learning_rate": 1.3728791986829743e-05, - "loss": 0.3058, - "step": 8297 - }, - { - "epoch": 0.7817055651068039, - "grad_norm": 0.6918613910675049, - "learning_rate": 1.372739082888646e-05, - "loss": 0.319, - "step": 8298 - }, - { - "epoch": 0.7817997691999717, - "grad_norm": 0.727741003036499, - "learning_rate": 1.3725989585952897e-05, - "loss": 0.2761, - "step": 8299 - }, - { - "epoch": 0.7818939732931396, - "grad_norm": 0.7165054678916931, - "learning_rate": 1.3724588258061013e-05, - "loss": 0.3093, - "step": 8300 - }, - { - "epoch": 0.7819881773863074, - "grad_norm": 0.7405712008476257, - "learning_rate": 1.3723186845242753e-05, - "loss": 0.3038, - "step": 8301 - }, - { - "epoch": 0.7820823814794753, - "grad_norm": 0.6232830286026001, - "learning_rate": 1.3721785347530077e-05, - "loss": 0.2679, - "step": 8302 - }, - { - "epoch": 0.7821765855726431, - "grad_norm": 1.404122233390808, - "learning_rate": 1.3720383764954938e-05, - "loss": 0.3374, - "step": 8303 - }, - { - "epoch": 0.782270789665811, - "grad_norm": 0.6597297787666321, - "learning_rate": 1.3718982097549296e-05, - "loss": 0.2817, - "step": 8304 - }, - { - "epoch": 0.7823649937589788, - "grad_norm": 0.6930121779441833, - "learning_rate": 1.3717580345345112e-05, - "loss": 0.2841, - "step": 8305 - }, - { - "epoch": 0.7824591978521467, - "grad_norm": 1.024834156036377, - "learning_rate": 1.3716178508374344e-05, - "loss": 0.3172, - "step": 8306 - }, - { - "epoch": 0.7825534019453145, - "grad_norm": 0.6708495020866394, - "learning_rate": 1.371477658666896e-05, - "loss": 0.2735, - "step": 8307 - }, - { - "epoch": 0.7826476060384824, - "grad_norm": 0.7648048400878906, - "learning_rate": 1.3713374580260927e-05, - "loss": 0.309, - "step": 8308 - }, - { - "epoch": 0.7827418101316502, - "grad_norm": 0.7775411009788513, - "learning_rate": 1.3711972489182208e-05, - "loss": 0.3023, - "step": 8309 - }, - { - "epoch": 0.7828360142248181, - "grad_norm": 0.9280102849006653, - "learning_rate": 1.3710570313464778e-05, - "loss": 0.2923, - "step": 8310 - }, - { - "epoch": 0.7829302183179859, - "grad_norm": 0.9284132122993469, - "learning_rate": 1.3709168053140604e-05, - "loss": 0.3423, - "step": 8311 - }, - { - "epoch": 0.7830244224111538, - "grad_norm": 0.7038367986679077, - "learning_rate": 1.3707765708241663e-05, - "loss": 0.2897, - "step": 8312 - }, - { - "epoch": 0.7831186265043216, - "grad_norm": 0.7105676531791687, - "learning_rate": 1.3706363278799931e-05, - "loss": 0.2623, - "step": 8313 - }, - { - "epoch": 0.7832128305974895, - "grad_norm": 0.9595640301704407, - "learning_rate": 1.370496076484738e-05, - "loss": 0.3136, - "step": 8314 - }, - { - "epoch": 0.7833070346906573, - "grad_norm": 0.7618606686592102, - "learning_rate": 1.3703558166415998e-05, - "loss": 0.3421, - "step": 8315 - }, - { - "epoch": 0.7834012387838252, - "grad_norm": 0.666176438331604, - "learning_rate": 1.370215548353776e-05, - "loss": 0.2788, - "step": 8316 - }, - { - "epoch": 0.783495442876993, - "grad_norm": 0.6845123171806335, - "learning_rate": 1.3700752716244651e-05, - "loss": 0.312, - "step": 8317 - }, - { - "epoch": 0.7835896469701609, - "grad_norm": 0.7459732890129089, - "learning_rate": 1.369934986456866e-05, - "loss": 0.3177, - "step": 8318 - }, - { - "epoch": 0.7836838510633287, - "grad_norm": 0.7005225419998169, - "learning_rate": 1.3697946928541768e-05, - "loss": 0.2864, - "step": 8319 - }, - { - "epoch": 0.7837780551564966, - "grad_norm": 0.6945435404777527, - "learning_rate": 1.369654390819597e-05, - "loss": 0.2965, - "step": 8320 - }, - { - "epoch": 0.7838722592496644, - "grad_norm": 0.7020549178123474, - "learning_rate": 1.369514080356325e-05, - "loss": 0.3204, - "step": 8321 - }, - { - "epoch": 0.7839664633428323, - "grad_norm": 0.7289341688156128, - "learning_rate": 1.3693737614675608e-05, - "loss": 0.3118, - "step": 8322 - }, - { - "epoch": 0.7840606674360001, - "grad_norm": 0.7745868563652039, - "learning_rate": 1.3692334341565037e-05, - "loss": 0.2994, - "step": 8323 - }, - { - "epoch": 0.784154871529168, - "grad_norm": 0.7368453145027161, - "learning_rate": 1.3690930984263528e-05, - "loss": 0.3034, - "step": 8324 - }, - { - "epoch": 0.7842490756223358, - "grad_norm": 0.685530424118042, - "learning_rate": 1.3689527542803087e-05, - "loss": 0.2837, - "step": 8325 - }, - { - "epoch": 0.7843432797155037, - "grad_norm": 0.8169264793395996, - "learning_rate": 1.3688124017215714e-05, - "loss": 0.3502, - "step": 8326 - }, - { - "epoch": 0.7844374838086715, - "grad_norm": 0.9533312916755676, - "learning_rate": 1.3686720407533404e-05, - "loss": 0.35, - "step": 8327 - }, - { - "epoch": 0.7845316879018394, - "grad_norm": 0.721153199672699, - "learning_rate": 1.3685316713788174e-05, - "loss": 0.2947, - "step": 8328 - }, - { - "epoch": 0.7846258919950072, - "grad_norm": 0.7245532870292664, - "learning_rate": 1.3683912936012021e-05, - "loss": 0.3427, - "step": 8329 - }, - { - "epoch": 0.7847200960881751, - "grad_norm": 0.684908926486969, - "learning_rate": 1.3682509074236954e-05, - "loss": 0.302, - "step": 8330 - }, - { - "epoch": 0.7848143001813429, - "grad_norm": 0.7864391207695007, - "learning_rate": 1.3681105128494987e-05, - "loss": 0.3273, - "step": 8331 - }, - { - "epoch": 0.7849085042745108, - "grad_norm": 0.6541042923927307, - "learning_rate": 1.3679701098818128e-05, - "loss": 0.2963, - "step": 8332 - }, - { - "epoch": 0.7850027083676786, - "grad_norm": 0.6770807504653931, - "learning_rate": 1.3678296985238395e-05, - "loss": 0.3088, - "step": 8333 - }, - { - "epoch": 0.7850969124608465, - "grad_norm": 0.823836088180542, - "learning_rate": 1.3676892787787801e-05, - "loss": 0.2892, - "step": 8334 - }, - { - "epoch": 0.7851911165540143, - "grad_norm": 0.6440860629081726, - "learning_rate": 1.3675488506498367e-05, - "loss": 0.2827, - "step": 8335 - }, - { - "epoch": 0.7852853206471822, - "grad_norm": 0.6993657946586609, - "learning_rate": 1.3674084141402108e-05, - "loss": 0.3128, - "step": 8336 - }, - { - "epoch": 0.78537952474035, - "grad_norm": 0.7165569067001343, - "learning_rate": 1.367267969253105e-05, - "loss": 0.3159, - "step": 8337 - }, - { - "epoch": 0.7854737288335178, - "grad_norm": 0.7132493257522583, - "learning_rate": 1.3671275159917214e-05, - "loss": 0.3267, - "step": 8338 - }, - { - "epoch": 0.7855679329266857, - "grad_norm": 0.6765950322151184, - "learning_rate": 1.3669870543592629e-05, - "loss": 0.2855, - "step": 8339 - }, - { - "epoch": 0.7856621370198535, - "grad_norm": 0.6541544795036316, - "learning_rate": 1.3668465843589318e-05, - "loss": 0.298, - "step": 8340 - }, - { - "epoch": 0.7857563411130214, - "grad_norm": 0.836005449295044, - "learning_rate": 1.3667061059939312e-05, - "loss": 0.3522, - "step": 8341 - }, - { - "epoch": 0.7858505452061892, - "grad_norm": 0.8120798468589783, - "learning_rate": 1.3665656192674645e-05, - "loss": 0.3356, - "step": 8342 - }, - { - "epoch": 0.7859447492993571, - "grad_norm": 0.6587315201759338, - "learning_rate": 1.3664251241827347e-05, - "loss": 0.2633, - "step": 8343 - }, - { - "epoch": 0.7860389533925249, - "grad_norm": 0.7285856008529663, - "learning_rate": 1.3662846207429453e-05, - "loss": 0.3178, - "step": 8344 - }, - { - "epoch": 0.7861331574856928, - "grad_norm": 0.8499427437782288, - "learning_rate": 1.3661441089512998e-05, - "loss": 0.2918, - "step": 8345 - }, - { - "epoch": 0.7862273615788606, - "grad_norm": 0.6641467809677124, - "learning_rate": 1.3660035888110027e-05, - "loss": 0.2999, - "step": 8346 - }, - { - "epoch": 0.7863215656720285, - "grad_norm": 0.8487431406974792, - "learning_rate": 1.3658630603252578e-05, - "loss": 0.3024, - "step": 8347 - }, - { - "epoch": 0.7864157697651963, - "grad_norm": 0.7532932162284851, - "learning_rate": 1.3657225234972695e-05, - "loss": 0.3307, - "step": 8348 - }, - { - "epoch": 0.7865099738583642, - "grad_norm": 0.7543956637382507, - "learning_rate": 1.3655819783302415e-05, - "loss": 0.3076, - "step": 8349 - }, - { - "epoch": 0.786604177951532, - "grad_norm": 0.7056576609611511, - "learning_rate": 1.3654414248273792e-05, - "loss": 0.2803, - "step": 8350 - }, - { - "epoch": 0.7866983820446999, - "grad_norm": 0.6470280289649963, - "learning_rate": 1.3653008629918875e-05, - "loss": 0.3051, - "step": 8351 - }, - { - "epoch": 0.7867925861378677, - "grad_norm": 0.6129191517829895, - "learning_rate": 1.365160292826971e-05, - "loss": 0.2892, - "step": 8352 - }, - { - "epoch": 0.7868867902310356, - "grad_norm": 0.7020622491836548, - "learning_rate": 1.365019714335835e-05, - "loss": 0.2817, - "step": 8353 - }, - { - "epoch": 0.7869809943242034, - "grad_norm": 0.710928201675415, - "learning_rate": 1.364879127521685e-05, - "loss": 0.3287, - "step": 8354 - }, - { - "epoch": 0.7870751984173713, - "grad_norm": 0.698966920375824, - "learning_rate": 1.3647385323877269e-05, - "loss": 0.2689, - "step": 8355 - }, - { - "epoch": 0.7871694025105391, - "grad_norm": 0.731090784072876, - "learning_rate": 1.3645979289371658e-05, - "loss": 0.2874, - "step": 8356 - }, - { - "epoch": 0.787263606603707, - "grad_norm": 0.7392199635505676, - "learning_rate": 1.3644573171732082e-05, - "loss": 0.3158, - "step": 8357 - }, - { - "epoch": 0.7873578106968748, - "grad_norm": 0.6243274211883545, - "learning_rate": 1.3643166970990604e-05, - "loss": 0.2703, - "step": 8358 - }, - { - "epoch": 0.7874520147900427, - "grad_norm": 0.7213075757026672, - "learning_rate": 1.3641760687179281e-05, - "loss": 0.3139, - "step": 8359 - }, - { - "epoch": 0.7875462188832105, - "grad_norm": 0.8094550371170044, - "learning_rate": 1.3640354320330185e-05, - "loss": 0.3348, - "step": 8360 - }, - { - "epoch": 0.7876404229763784, - "grad_norm": 0.7065017819404602, - "learning_rate": 1.3638947870475376e-05, - "loss": 0.2956, - "step": 8361 - }, - { - "epoch": 0.7877346270695462, - "grad_norm": 0.7342098355293274, - "learning_rate": 1.3637541337646933e-05, - "loss": 0.3074, - "step": 8362 - }, - { - "epoch": 0.7878288311627141, - "grad_norm": 0.7462935447692871, - "learning_rate": 1.3636134721876922e-05, - "loss": 0.3157, - "step": 8363 - }, - { - "epoch": 0.7879230352558819, - "grad_norm": 0.7283497452735901, - "learning_rate": 1.3634728023197412e-05, - "loss": 0.3114, - "step": 8364 - }, - { - "epoch": 0.7880172393490498, - "grad_norm": 0.7347304224967957, - "learning_rate": 1.3633321241640485e-05, - "loss": 0.3235, - "step": 8365 - }, - { - "epoch": 0.7881114434422176, - "grad_norm": 0.7423493266105652, - "learning_rate": 1.3631914377238213e-05, - "loss": 0.3015, - "step": 8366 - }, - { - "epoch": 0.7882056475353855, - "grad_norm": 0.719247043132782, - "learning_rate": 1.3630507430022674e-05, - "loss": 0.3318, - "step": 8367 - }, - { - "epoch": 0.7882998516285532, - "grad_norm": 0.6581484079360962, - "learning_rate": 1.3629100400025956e-05, - "loss": 0.2722, - "step": 8368 - }, - { - "epoch": 0.788394055721721, - "grad_norm": 0.722379744052887, - "learning_rate": 1.3627693287280132e-05, - "loss": 0.2994, - "step": 8369 - }, - { - "epoch": 0.7884882598148889, - "grad_norm": 0.6473997235298157, - "learning_rate": 1.3626286091817293e-05, - "loss": 0.2885, - "step": 8370 - }, - { - "epoch": 0.7885824639080568, - "grad_norm": 0.8524295687675476, - "learning_rate": 1.3624878813669524e-05, - "loss": 0.3142, - "step": 8371 - }, - { - "epoch": 0.7886766680012246, - "grad_norm": 0.6791268587112427, - "learning_rate": 1.3623471452868908e-05, - "loss": 0.3375, - "step": 8372 - }, - { - "epoch": 0.7887708720943925, - "grad_norm": 0.805366575717926, - "learning_rate": 1.3622064009447544e-05, - "loss": 0.3437, - "step": 8373 - }, - { - "epoch": 0.7888650761875603, - "grad_norm": 0.7245519757270813, - "learning_rate": 1.3620656483437518e-05, - "loss": 0.339, - "step": 8374 - }, - { - "epoch": 0.7889592802807281, - "grad_norm": 0.7727537155151367, - "learning_rate": 1.3619248874870924e-05, - "loss": 0.3093, - "step": 8375 - }, - { - "epoch": 0.789053484373896, - "grad_norm": 0.8279337286949158, - "learning_rate": 1.3617841183779853e-05, - "loss": 0.3432, - "step": 8376 - }, - { - "epoch": 0.7891476884670638, - "grad_norm": 0.7298142910003662, - "learning_rate": 1.3616433410196414e-05, - "loss": 0.3138, - "step": 8377 - }, - { - "epoch": 0.7892418925602317, - "grad_norm": 0.6630862951278687, - "learning_rate": 1.36150255541527e-05, - "loss": 0.2768, - "step": 8378 - }, - { - "epoch": 0.7893360966533995, - "grad_norm": 0.645504891872406, - "learning_rate": 1.3613617615680812e-05, - "loss": 0.293, - "step": 8379 - }, - { - "epoch": 0.7894303007465674, - "grad_norm": 0.6487581133842468, - "learning_rate": 1.3612209594812856e-05, - "loss": 0.287, - "step": 8380 - }, - { - "epoch": 0.7895245048397352, - "grad_norm": 0.8455442786216736, - "learning_rate": 1.3610801491580929e-05, - "loss": 0.3331, - "step": 8381 - }, - { - "epoch": 0.7896187089329031, - "grad_norm": 0.697076141834259, - "learning_rate": 1.3609393306017149e-05, - "loss": 0.3141, - "step": 8382 - }, - { - "epoch": 0.7897129130260709, - "grad_norm": 0.724602222442627, - "learning_rate": 1.3607985038153616e-05, - "loss": 0.2976, - "step": 8383 - }, - { - "epoch": 0.7898071171192388, - "grad_norm": 0.7992532849311829, - "learning_rate": 1.3606576688022446e-05, - "loss": 0.3487, - "step": 8384 - }, - { - "epoch": 0.7899013212124066, - "grad_norm": 0.6527953743934631, - "learning_rate": 1.3605168255655752e-05, - "loss": 0.3096, - "step": 8385 - }, - { - "epoch": 0.7899955253055745, - "grad_norm": 0.8105137944221497, - "learning_rate": 1.3603759741085642e-05, - "loss": 0.3776, - "step": 8386 - }, - { - "epoch": 0.7900897293987423, - "grad_norm": 0.7063913941383362, - "learning_rate": 1.360235114434424e-05, - "loss": 0.3311, - "step": 8387 - }, - { - "epoch": 0.7901839334919102, - "grad_norm": 0.7864614725112915, - "learning_rate": 1.360094246546366e-05, - "loss": 0.3316, - "step": 8388 - }, - { - "epoch": 0.790278137585078, - "grad_norm": 0.7231094837188721, - "learning_rate": 1.359953370447602e-05, - "loss": 0.3261, - "step": 8389 - }, - { - "epoch": 0.7903723416782459, - "grad_norm": 0.9381780028343201, - "learning_rate": 1.359812486141345e-05, - "loss": 0.321, - "step": 8390 - }, - { - "epoch": 0.7904665457714137, - "grad_norm": 1.0008454322814941, - "learning_rate": 1.3596715936308064e-05, - "loss": 0.3944, - "step": 8391 - }, - { - "epoch": 0.7905607498645816, - "grad_norm": 0.8007410764694214, - "learning_rate": 1.3595306929191994e-05, - "loss": 0.2822, - "step": 8392 - }, - { - "epoch": 0.7906549539577494, - "grad_norm": 0.9686805009841919, - "learning_rate": 1.3593897840097366e-05, - "loss": 0.3272, - "step": 8393 - }, - { - "epoch": 0.7907491580509173, - "grad_norm": 0.6351611018180847, - "learning_rate": 1.359248866905631e-05, - "loss": 0.3036, - "step": 8394 - }, - { - "epoch": 0.7908433621440851, - "grad_norm": 0.7274337410926819, - "learning_rate": 1.3591079416100955e-05, - "loss": 0.3087, - "step": 8395 - }, - { - "epoch": 0.790937566237253, - "grad_norm": 0.7062101364135742, - "learning_rate": 1.3589670081263432e-05, - "loss": 0.292, - "step": 8396 - }, - { - "epoch": 0.7910317703304208, - "grad_norm": 0.6788089871406555, - "learning_rate": 1.3588260664575885e-05, - "loss": 0.3035, - "step": 8397 - }, - { - "epoch": 0.7911259744235887, - "grad_norm": 0.6554300785064697, - "learning_rate": 1.3586851166070445e-05, - "loss": 0.2749, - "step": 8398 - }, - { - "epoch": 0.7912201785167565, - "grad_norm": 0.7202410697937012, - "learning_rate": 1.358544158577925e-05, - "loss": 0.3206, - "step": 8399 - }, - { - "epoch": 0.7913143826099244, - "grad_norm": 0.7300735712051392, - "learning_rate": 1.3584031923734443e-05, - "loss": 0.304, - "step": 8400 - }, - { - "epoch": 0.7914085867030922, - "grad_norm": 0.8400070667266846, - "learning_rate": 1.3582622179968161e-05, - "loss": 0.3198, - "step": 8401 - }, - { - "epoch": 0.7915027907962601, - "grad_norm": 0.7179476618766785, - "learning_rate": 1.3581212354512558e-05, - "loss": 0.2923, - "step": 8402 - }, - { - "epoch": 0.7915969948894279, - "grad_norm": 0.7091476917266846, - "learning_rate": 1.3579802447399773e-05, - "loss": 0.3092, - "step": 8403 - }, - { - "epoch": 0.7916911989825958, - "grad_norm": 0.7615411281585693, - "learning_rate": 1.3578392458661956e-05, - "loss": 0.3169, - "step": 8404 - }, - { - "epoch": 0.7917854030757636, - "grad_norm": 0.5997017621994019, - "learning_rate": 1.3576982388331258e-05, - "loss": 0.2648, - "step": 8405 - }, - { - "epoch": 0.7918796071689315, - "grad_norm": 0.7050553560256958, - "learning_rate": 1.3575572236439828e-05, - "loss": 0.3208, - "step": 8406 - }, - { - "epoch": 0.7919738112620993, - "grad_norm": 0.8503166437149048, - "learning_rate": 1.3574162003019819e-05, - "loss": 0.3104, - "step": 8407 - }, - { - "epoch": 0.7920680153552672, - "grad_norm": 0.6925212144851685, - "learning_rate": 1.3572751688103394e-05, - "loss": 0.303, - "step": 8408 - }, - { - "epoch": 0.792162219448435, - "grad_norm": 0.7317957282066345, - "learning_rate": 1.3571341291722701e-05, - "loss": 0.3057, - "step": 8409 - }, - { - "epoch": 0.7922564235416029, - "grad_norm": 0.7509272694587708, - "learning_rate": 1.3569930813909904e-05, - "loss": 0.2666, - "step": 8410 - }, - { - "epoch": 0.7923506276347707, - "grad_norm": 0.7998015880584717, - "learning_rate": 1.3568520254697166e-05, - "loss": 0.3611, - "step": 8411 - }, - { - "epoch": 0.7924448317279386, - "grad_norm": 0.77497398853302, - "learning_rate": 1.3567109614116643e-05, - "loss": 0.2746, - "step": 8412 - }, - { - "epoch": 0.7925390358211064, - "grad_norm": 0.7679684162139893, - "learning_rate": 1.3565698892200507e-05, - "loss": 0.311, - "step": 8413 - }, - { - "epoch": 0.7926332399142743, - "grad_norm": 0.8640990853309631, - "learning_rate": 1.3564288088980923e-05, - "loss": 0.3185, - "step": 8414 - }, - { - "epoch": 0.7927274440074421, - "grad_norm": 0.6967403292655945, - "learning_rate": 1.3562877204490058e-05, - "loss": 0.2685, - "step": 8415 - }, - { - "epoch": 0.79282164810061, - "grad_norm": 0.7341375350952148, - "learning_rate": 1.356146623876008e-05, - "loss": 0.3051, - "step": 8416 - }, - { - "epoch": 0.7929158521937778, - "grad_norm": 0.706219494342804, - "learning_rate": 1.3560055191823165e-05, - "loss": 0.2888, - "step": 8417 - }, - { - "epoch": 0.7930100562869457, - "grad_norm": 0.7508575320243835, - "learning_rate": 1.3558644063711489e-05, - "loss": 0.2669, - "step": 8418 - }, - { - "epoch": 0.7931042603801135, - "grad_norm": 0.7251358032226562, - "learning_rate": 1.355723285445722e-05, - "loss": 0.3131, - "step": 8419 - }, - { - "epoch": 0.7931984644732814, - "grad_norm": 0.667508065700531, - "learning_rate": 1.3555821564092544e-05, - "loss": 0.2753, - "step": 8420 - }, - { - "epoch": 0.7932926685664492, - "grad_norm": 0.7205508351325989, - "learning_rate": 1.3554410192649634e-05, - "loss": 0.2861, - "step": 8421 - }, - { - "epoch": 0.793386872659617, - "grad_norm": 0.7302700877189636, - "learning_rate": 1.3552998740160676e-05, - "loss": 0.297, - "step": 8422 - }, - { - "epoch": 0.7934810767527849, - "grad_norm": 0.9575607180595398, - "learning_rate": 1.3551587206657855e-05, - "loss": 0.2917, - "step": 8423 - }, - { - "epoch": 0.7935752808459527, - "grad_norm": 0.7603209614753723, - "learning_rate": 1.3550175592173347e-05, - "loss": 0.2802, - "step": 8424 - }, - { - "epoch": 0.7936694849391206, - "grad_norm": 0.7659693360328674, - "learning_rate": 1.3548763896739351e-05, - "loss": 0.2844, - "step": 8425 - }, - { - "epoch": 0.7937636890322884, - "grad_norm": 0.8542771339416504, - "learning_rate": 1.3547352120388046e-05, - "loss": 0.3169, - "step": 8426 - }, - { - "epoch": 0.7938578931254563, - "grad_norm": 0.9261520504951477, - "learning_rate": 1.3545940263151627e-05, - "loss": 0.3304, - "step": 8427 - }, - { - "epoch": 0.7939520972186241, - "grad_norm": 0.885272741317749, - "learning_rate": 1.3544528325062289e-05, - "loss": 0.3193, - "step": 8428 - }, - { - "epoch": 0.794046301311792, - "grad_norm": 0.749832272529602, - "learning_rate": 1.354311630615222e-05, - "loss": 0.2886, - "step": 8429 - }, - { - "epoch": 0.7941405054049598, - "grad_norm": 0.8688532710075378, - "learning_rate": 1.354170420645362e-05, - "loss": 0.3752, - "step": 8430 - }, - { - "epoch": 0.7942347094981277, - "grad_norm": 0.7473974227905273, - "learning_rate": 1.354029202599869e-05, - "loss": 0.3138, - "step": 8431 - }, - { - "epoch": 0.7943289135912955, - "grad_norm": 0.6640375256538391, - "learning_rate": 1.3538879764819624e-05, - "loss": 0.3087, - "step": 8432 - }, - { - "epoch": 0.7944231176844634, - "grad_norm": 0.7127041816711426, - "learning_rate": 1.3537467422948626e-05, - "loss": 0.3381, - "step": 8433 - }, - { - "epoch": 0.7945173217776312, - "grad_norm": 0.7646892666816711, - "learning_rate": 1.3536055000417903e-05, - "loss": 0.338, - "step": 8434 - }, - { - "epoch": 0.7946115258707991, - "grad_norm": 0.6835283637046814, - "learning_rate": 1.3534642497259656e-05, - "loss": 0.3147, - "step": 8435 - }, - { - "epoch": 0.7947057299639669, - "grad_norm": 0.7504387497901917, - "learning_rate": 1.353322991350609e-05, - "loss": 0.3455, - "step": 8436 - }, - { - "epoch": 0.7947999340571348, - "grad_norm": 0.8048112392425537, - "learning_rate": 1.353181724918942e-05, - "loss": 0.3455, - "step": 8437 - }, - { - "epoch": 0.7948941381503026, - "grad_norm": 1.639927625656128, - "learning_rate": 1.3530404504341856e-05, - "loss": 0.3101, - "step": 8438 - }, - { - "epoch": 0.7949883422434705, - "grad_norm": 0.9135071635246277, - "learning_rate": 1.352899167899561e-05, - "loss": 0.3383, - "step": 8439 - }, - { - "epoch": 0.7950825463366383, - "grad_norm": 0.8039232492446899, - "learning_rate": 1.3527578773182895e-05, - "loss": 0.335, - "step": 8440 - }, - { - "epoch": 0.7951767504298062, - "grad_norm": 0.626367449760437, - "learning_rate": 1.3526165786935926e-05, - "loss": 0.2672, - "step": 8441 - }, - { - "epoch": 0.795270954522974, - "grad_norm": 0.8004292249679565, - "learning_rate": 1.3524752720286927e-05, - "loss": 0.296, - "step": 8442 - }, - { - "epoch": 0.7953651586161419, - "grad_norm": 0.7011231780052185, - "learning_rate": 1.3523339573268116e-05, - "loss": 0.3, - "step": 8443 - }, - { - "epoch": 0.7954593627093097, - "grad_norm": 0.6995598077774048, - "learning_rate": 1.352192634591171e-05, - "loss": 0.3172, - "step": 8444 - }, - { - "epoch": 0.7955535668024776, - "grad_norm": 0.897178590297699, - "learning_rate": 1.3520513038249939e-05, - "loss": 0.3036, - "step": 8445 - }, - { - "epoch": 0.7956477708956454, - "grad_norm": 0.6888857483863831, - "learning_rate": 1.3519099650315023e-05, - "loss": 0.3188, - "step": 8446 - }, - { - "epoch": 0.7957419749888133, - "grad_norm": 0.7783874273300171, - "learning_rate": 1.3517686182139193e-05, - "loss": 0.2787, - "step": 8447 - }, - { - "epoch": 0.7958361790819811, - "grad_norm": 0.7472442984580994, - "learning_rate": 1.3516272633754684e-05, - "loss": 0.3276, - "step": 8448 - }, - { - "epoch": 0.795930383175149, - "grad_norm": 0.8034288287162781, - "learning_rate": 1.3514859005193714e-05, - "loss": 0.3242, - "step": 8449 - }, - { - "epoch": 0.7960245872683168, - "grad_norm": 0.6877335906028748, - "learning_rate": 1.3513445296488525e-05, - "loss": 0.3051, - "step": 8450 - }, - { - "epoch": 0.7961187913614847, - "grad_norm": 0.7368878126144409, - "learning_rate": 1.351203150767135e-05, - "loss": 0.317, - "step": 8451 - }, - { - "epoch": 0.7962129954546525, - "grad_norm": 0.8141829967498779, - "learning_rate": 1.3510617638774424e-05, - "loss": 0.3725, - "step": 8452 - }, - { - "epoch": 0.7963071995478204, - "grad_norm": 0.7533358335494995, - "learning_rate": 1.3509203689829988e-05, - "loss": 0.2992, - "step": 8453 - }, - { - "epoch": 0.7964014036409882, - "grad_norm": 0.7177354693412781, - "learning_rate": 1.3507789660870282e-05, - "loss": 0.3515, - "step": 8454 - }, - { - "epoch": 0.7964956077341561, - "grad_norm": 1.0493124723434448, - "learning_rate": 1.3506375551927546e-05, - "loss": 0.3108, - "step": 8455 - }, - { - "epoch": 0.7965898118273239, - "grad_norm": 0.8071884512901306, - "learning_rate": 1.3504961363034024e-05, - "loss": 0.334, - "step": 8456 - }, - { - "epoch": 0.7966840159204918, - "grad_norm": 0.7486411333084106, - "learning_rate": 1.350354709422196e-05, - "loss": 0.3259, - "step": 8457 - }, - { - "epoch": 0.7967782200136596, - "grad_norm": 0.659052312374115, - "learning_rate": 1.3502132745523609e-05, - "loss": 0.3128, - "step": 8458 - }, - { - "epoch": 0.7968724241068275, - "grad_norm": 0.7426510453224182, - "learning_rate": 1.3500718316971213e-05, - "loss": 0.2573, - "step": 8459 - }, - { - "epoch": 0.7969666281999953, - "grad_norm": 0.7538797855377197, - "learning_rate": 1.3499303808597024e-05, - "loss": 0.3087, - "step": 8460 - }, - { - "epoch": 0.7970608322931632, - "grad_norm": 0.8201055526733398, - "learning_rate": 1.3497889220433296e-05, - "loss": 0.3069, - "step": 8461 - }, - { - "epoch": 0.797155036386331, - "grad_norm": 0.7548934817314148, - "learning_rate": 1.3496474552512286e-05, - "loss": 0.2948, - "step": 8462 - }, - { - "epoch": 0.7972492404794989, - "grad_norm": 0.7931002974510193, - "learning_rate": 1.3495059804866251e-05, - "loss": 0.3261, - "step": 8463 - }, - { - "epoch": 0.7973434445726667, - "grad_norm": 0.7292978763580322, - "learning_rate": 1.349364497752744e-05, - "loss": 0.3252, - "step": 8464 - }, - { - "epoch": 0.7974376486658346, - "grad_norm": 0.6160940527915955, - "learning_rate": 1.349223007052813e-05, - "loss": 0.3014, - "step": 8465 - }, - { - "epoch": 0.7975318527590024, - "grad_norm": 1.3209818601608276, - "learning_rate": 1.3490815083900566e-05, - "loss": 0.3094, - "step": 8466 - }, - { - "epoch": 0.7976260568521703, - "grad_norm": 0.7098581790924072, - "learning_rate": 1.3489400017677022e-05, - "loss": 0.2818, - "step": 8467 - }, - { - "epoch": 0.7977202609453381, - "grad_norm": 0.6893822550773621, - "learning_rate": 1.3487984871889763e-05, - "loss": 0.3562, - "step": 8468 - }, - { - "epoch": 0.797814465038506, - "grad_norm": 0.7777463793754578, - "learning_rate": 1.3486569646571051e-05, - "loss": 0.3225, - "step": 8469 - }, - { - "epoch": 0.7979086691316738, - "grad_norm": 0.7464541792869568, - "learning_rate": 1.3485154341753161e-05, - "loss": 0.3276, - "step": 8470 - }, - { - "epoch": 0.7980028732248416, - "grad_norm": 0.675532877445221, - "learning_rate": 1.3483738957468364e-05, - "loss": 0.2811, - "step": 8471 - }, - { - "epoch": 0.7980970773180095, - "grad_norm": 0.8053826093673706, - "learning_rate": 1.3482323493748926e-05, - "loss": 0.3372, - "step": 8472 - }, - { - "epoch": 0.7981912814111773, - "grad_norm": 0.7255682945251465, - "learning_rate": 1.348090795062713e-05, - "loss": 0.2961, - "step": 8473 - }, - { - "epoch": 0.7982854855043452, - "grad_norm": 0.9220722317695618, - "learning_rate": 1.3479492328135251e-05, - "loss": 0.2961, - "step": 8474 - }, - { - "epoch": 0.798379689597513, - "grad_norm": 0.7277863025665283, - "learning_rate": 1.3478076626305563e-05, - "loss": 0.2953, - "step": 8475 - }, - { - "epoch": 0.7984738936906809, - "grad_norm": 0.9244139790534973, - "learning_rate": 1.347666084517035e-05, - "loss": 0.3297, - "step": 8476 - }, - { - "epoch": 0.7985680977838487, - "grad_norm": 0.6993528008460999, - "learning_rate": 1.3475244984761893e-05, - "loss": 0.3188, - "step": 8477 - }, - { - "epoch": 0.7986623018770166, - "grad_norm": 0.7611246705055237, - "learning_rate": 1.3473829045112476e-05, - "loss": 0.2533, - "step": 8478 - }, - { - "epoch": 0.7987565059701844, - "grad_norm": 0.7787326574325562, - "learning_rate": 1.3472413026254385e-05, - "loss": 0.3388, - "step": 8479 - }, - { - "epoch": 0.7988507100633523, - "grad_norm": 0.7947128415107727, - "learning_rate": 1.3470996928219906e-05, - "loss": 0.3487, - "step": 8480 - }, - { - "epoch": 0.7989449141565201, - "grad_norm": 0.881558895111084, - "learning_rate": 1.3469580751041327e-05, - "loss": 0.356, - "step": 8481 - }, - { - "epoch": 0.799039118249688, - "grad_norm": 0.7980902791023254, - "learning_rate": 1.3468164494750944e-05, - "loss": 0.3097, - "step": 8482 - }, - { - "epoch": 0.7991333223428558, - "grad_norm": 0.6231949925422668, - "learning_rate": 1.3466748159381047e-05, - "loss": 0.2444, - "step": 8483 - }, - { - "epoch": 0.7992275264360237, - "grad_norm": 0.7653826475143433, - "learning_rate": 1.3465331744963928e-05, - "loss": 0.3333, - "step": 8484 - }, - { - "epoch": 0.7993217305291915, - "grad_norm": 0.7707959413528442, - "learning_rate": 1.346391525153189e-05, - "loss": 0.2909, - "step": 8485 - }, - { - "epoch": 0.7994159346223594, - "grad_norm": 0.8047236204147339, - "learning_rate": 1.3462498679117224e-05, - "loss": 0.3081, - "step": 8486 - }, - { - "epoch": 0.7995101387155272, - "grad_norm": 0.7369197010993958, - "learning_rate": 1.346108202775223e-05, - "loss": 0.3547, - "step": 8487 - }, - { - "epoch": 0.7996043428086951, - "grad_norm": 0.7479953169822693, - "learning_rate": 1.3459665297469221e-05, - "loss": 0.3355, - "step": 8488 - }, - { - "epoch": 0.7996985469018629, - "grad_norm": 0.8746562004089355, - "learning_rate": 1.3458248488300487e-05, - "loss": 0.3311, - "step": 8489 - }, - { - "epoch": 0.7997927509950308, - "grad_norm": 0.7730000615119934, - "learning_rate": 1.3456831600278341e-05, - "loss": 0.2851, - "step": 8490 - }, - { - "epoch": 0.7998869550881986, - "grad_norm": 0.6304970979690552, - "learning_rate": 1.3455414633435089e-05, - "loss": 0.2694, - "step": 8491 - }, - { - "epoch": 0.7999811591813665, - "grad_norm": 0.7707965970039368, - "learning_rate": 1.3453997587803039e-05, - "loss": 0.297, - "step": 8492 - }, - { - "epoch": 0.8000753632745343, - "grad_norm": 0.6389234066009521, - "learning_rate": 1.3452580463414501e-05, - "loss": 0.2608, - "step": 8493 - }, - { - "epoch": 0.8001695673677022, - "grad_norm": 0.8090130090713501, - "learning_rate": 1.3451163260301791e-05, - "loss": 0.331, - "step": 8494 - }, - { - "epoch": 0.80026377146087, - "grad_norm": 0.8407273292541504, - "learning_rate": 1.344974597849722e-05, - "loss": 0.3259, - "step": 8495 - }, - { - "epoch": 0.8003579755540379, - "grad_norm": 0.8513500690460205, - "learning_rate": 1.3448328618033109e-05, - "loss": 0.2912, - "step": 8496 - }, - { - "epoch": 0.8004521796472057, - "grad_norm": 0.7660210728645325, - "learning_rate": 1.3446911178941766e-05, - "loss": 0.3085, - "step": 8497 - }, - { - "epoch": 0.8005463837403736, - "grad_norm": 0.725410521030426, - "learning_rate": 1.3445493661255523e-05, - "loss": 0.2573, - "step": 8498 - }, - { - "epoch": 0.8006405878335414, - "grad_norm": 0.8393821120262146, - "learning_rate": 1.3444076065006692e-05, - "loss": 0.3604, - "step": 8499 - }, - { - "epoch": 0.8007347919267093, - "grad_norm": 0.7441098093986511, - "learning_rate": 1.3442658390227604e-05, - "loss": 0.3247, - "step": 8500 - }, - { - "epoch": 0.8008289960198771, - "grad_norm": 0.7958167195320129, - "learning_rate": 1.3441240636950577e-05, - "loss": 0.356, - "step": 8501 - }, - { - "epoch": 0.800923200113045, - "grad_norm": 0.7425439357757568, - "learning_rate": 1.3439822805207942e-05, - "loss": 0.2755, - "step": 8502 - }, - { - "epoch": 0.8010174042062128, - "grad_norm": 1.0281422138214111, - "learning_rate": 1.3438404895032032e-05, - "loss": 0.3532, - "step": 8503 - }, - { - "epoch": 0.8011116082993807, - "grad_norm": 0.8411215543746948, - "learning_rate": 1.3436986906455167e-05, - "loss": 0.3284, - "step": 8504 - }, - { - "epoch": 0.8012058123925485, - "grad_norm": 0.7676801681518555, - "learning_rate": 1.343556883950969e-05, - "loss": 0.2912, - "step": 8505 - }, - { - "epoch": 0.8013000164857164, - "grad_norm": 0.7142693400382996, - "learning_rate": 1.3434150694227925e-05, - "loss": 0.3041, - "step": 8506 - }, - { - "epoch": 0.8013942205788841, - "grad_norm": 0.7693013548851013, - "learning_rate": 1.3432732470642216e-05, - "loss": 0.3037, - "step": 8507 - }, - { - "epoch": 0.801488424672052, - "grad_norm": 0.6982400417327881, - "learning_rate": 1.3431314168784899e-05, - "loss": 0.2771, - "step": 8508 - }, - { - "epoch": 0.8015826287652198, - "grad_norm": 0.8211315274238586, - "learning_rate": 1.342989578868831e-05, - "loss": 0.3382, - "step": 8509 - }, - { - "epoch": 0.8016768328583876, - "grad_norm": 0.6667990684509277, - "learning_rate": 1.3428477330384792e-05, - "loss": 0.2864, - "step": 8510 - }, - { - "epoch": 0.8017710369515555, - "grad_norm": 0.8006913065910339, - "learning_rate": 1.3427058793906693e-05, - "loss": 0.3313, - "step": 8511 - }, - { - "epoch": 0.8018652410447233, - "grad_norm": 0.7486048936843872, - "learning_rate": 1.3425640179286348e-05, - "loss": 0.3317, - "step": 8512 - }, - { - "epoch": 0.8019594451378912, - "grad_norm": 0.7044640183448792, - "learning_rate": 1.3424221486556113e-05, - "loss": 0.3234, - "step": 8513 - }, - { - "epoch": 0.802053649231059, - "grad_norm": 0.6485394835472107, - "learning_rate": 1.3422802715748331e-05, - "loss": 0.3069, - "step": 8514 - }, - { - "epoch": 0.8021478533242269, - "grad_norm": 0.575886607170105, - "learning_rate": 1.3421383866895355e-05, - "loss": 0.2625, - "step": 8515 - }, - { - "epoch": 0.8022420574173947, - "grad_norm": 0.6978061199188232, - "learning_rate": 1.3419964940029533e-05, - "loss": 0.2913, - "step": 8516 - }, - { - "epoch": 0.8023362615105626, - "grad_norm": 1.1715340614318848, - "learning_rate": 1.3418545935183222e-05, - "loss": 0.3217, - "step": 8517 - }, - { - "epoch": 0.8024304656037304, - "grad_norm": 1.9922049045562744, - "learning_rate": 1.3417126852388777e-05, - "loss": 0.295, - "step": 8518 - }, - { - "epoch": 0.8025246696968983, - "grad_norm": 0.7390188574790955, - "learning_rate": 1.3415707691678557e-05, - "loss": 0.3365, - "step": 8519 - }, - { - "epoch": 0.8026188737900661, - "grad_norm": 0.8231778740882874, - "learning_rate": 1.3414288453084918e-05, - "loss": 0.3827, - "step": 8520 - }, - { - "epoch": 0.802713077883234, - "grad_norm": 0.6996579170227051, - "learning_rate": 1.3412869136640221e-05, - "loss": 0.3223, - "step": 8521 - }, - { - "epoch": 0.8028072819764018, - "grad_norm": 1.0390639305114746, - "learning_rate": 1.341144974237683e-05, - "loss": 0.3384, - "step": 8522 - }, - { - "epoch": 0.8029014860695697, - "grad_norm": 0.6792234182357788, - "learning_rate": 1.341003027032711e-05, - "loss": 0.2906, - "step": 8523 - }, - { - "epoch": 0.8029956901627375, - "grad_norm": 0.6705979108810425, - "learning_rate": 1.3408610720523423e-05, - "loss": 0.2696, - "step": 8524 - }, - { - "epoch": 0.8030898942559054, - "grad_norm": 0.8168100118637085, - "learning_rate": 1.3407191092998146e-05, - "loss": 0.3198, - "step": 8525 - }, - { - "epoch": 0.8031840983490732, - "grad_norm": 0.6742059588432312, - "learning_rate": 1.3405771387783637e-05, - "loss": 0.2927, - "step": 8526 - }, - { - "epoch": 0.8032783024422411, - "grad_norm": 0.6609785556793213, - "learning_rate": 1.3404351604912275e-05, - "loss": 0.2444, - "step": 8527 - }, - { - "epoch": 0.8033725065354089, - "grad_norm": 0.9732012748718262, - "learning_rate": 1.3402931744416432e-05, - "loss": 0.3582, - "step": 8528 - }, - { - "epoch": 0.8034667106285768, - "grad_norm": 0.7385396957397461, - "learning_rate": 1.3401511806328483e-05, - "loss": 0.3011, - "step": 8529 - }, - { - "epoch": 0.8035609147217446, - "grad_norm": 0.6966911554336548, - "learning_rate": 1.3400091790680802e-05, - "loss": 0.2904, - "step": 8530 - }, - { - "epoch": 0.8036551188149125, - "grad_norm": 0.6504485011100769, - "learning_rate": 1.3398671697505772e-05, - "loss": 0.2724, - "step": 8531 - }, - { - "epoch": 0.8037493229080803, - "grad_norm": 0.8288382291793823, - "learning_rate": 1.3397251526835771e-05, - "loss": 0.3318, - "step": 8532 - }, - { - "epoch": 0.8038435270012482, - "grad_norm": 0.6640185117721558, - "learning_rate": 1.339583127870318e-05, - "loss": 0.2726, - "step": 8533 - }, - { - "epoch": 0.803937731094416, - "grad_norm": 0.7489379644393921, - "learning_rate": 1.3394410953140384e-05, - "loss": 0.2984, - "step": 8534 - }, - { - "epoch": 0.8040319351875839, - "grad_norm": 0.6930230855941772, - "learning_rate": 1.3392990550179773e-05, - "loss": 0.2948, - "step": 8535 - }, - { - "epoch": 0.8041261392807517, - "grad_norm": 0.6925107836723328, - "learning_rate": 1.3391570069853725e-05, - "loss": 0.3007, - "step": 8536 - }, - { - "epoch": 0.8042203433739196, - "grad_norm": 0.6697828769683838, - "learning_rate": 1.3390149512194635e-05, - "loss": 0.2751, - "step": 8537 - }, - { - "epoch": 0.8043145474670874, - "grad_norm": 0.8221737146377563, - "learning_rate": 1.3388728877234894e-05, - "loss": 0.3089, - "step": 8538 - }, - { - "epoch": 0.8044087515602553, - "grad_norm": 0.7250677943229675, - "learning_rate": 1.3387308165006894e-05, - "loss": 0.3314, - "step": 8539 - }, - { - "epoch": 0.8045029556534231, - "grad_norm": 0.7382446527481079, - "learning_rate": 1.3385887375543029e-05, - "loss": 0.3181, - "step": 8540 - }, - { - "epoch": 0.804597159746591, - "grad_norm": 0.7235196828842163, - "learning_rate": 1.3384466508875696e-05, - "loss": 0.2801, - "step": 8541 - }, - { - "epoch": 0.8046913638397588, - "grad_norm": 0.7349414825439453, - "learning_rate": 1.3383045565037292e-05, - "loss": 0.3033, - "step": 8542 - }, - { - "epoch": 0.8047855679329267, - "grad_norm": 0.7318891882896423, - "learning_rate": 1.3381624544060219e-05, - "loss": 0.32, - "step": 8543 - }, - { - "epoch": 0.8048797720260945, - "grad_norm": 0.7649834156036377, - "learning_rate": 1.3380203445976871e-05, - "loss": 0.3422, - "step": 8544 - }, - { - "epoch": 0.8049739761192624, - "grad_norm": 0.80607008934021, - "learning_rate": 1.3378782270819663e-05, - "loss": 0.3185, - "step": 8545 - }, - { - "epoch": 0.8050681802124302, - "grad_norm": 0.7321194410324097, - "learning_rate": 1.3377361018620991e-05, - "loss": 0.2997, - "step": 8546 - }, - { - "epoch": 0.8051623843055981, - "grad_norm": 0.7480506896972656, - "learning_rate": 1.3375939689413264e-05, - "loss": 0.3253, - "step": 8547 - }, - { - "epoch": 0.8052565883987659, - "grad_norm": 0.7666994333267212, - "learning_rate": 1.3374518283228895e-05, - "loss": 0.3142, - "step": 8548 - }, - { - "epoch": 0.8053507924919338, - "grad_norm": 0.8464433550834656, - "learning_rate": 1.3373096800100285e-05, - "loss": 0.3648, - "step": 8549 - }, - { - "epoch": 0.8054449965851016, - "grad_norm": 0.8145337700843811, - "learning_rate": 1.3371675240059853e-05, - "loss": 0.3464, - "step": 8550 - }, - { - "epoch": 0.8055392006782695, - "grad_norm": 0.7238718271255493, - "learning_rate": 1.3370253603140013e-05, - "loss": 0.2839, - "step": 8551 - }, - { - "epoch": 0.8056334047714373, - "grad_norm": 0.6761792302131653, - "learning_rate": 1.3368831889373178e-05, - "loss": 0.3055, - "step": 8552 - }, - { - "epoch": 0.8057276088646051, - "grad_norm": 0.745002031326294, - "learning_rate": 1.3367410098791764e-05, - "loss": 0.305, - "step": 8553 - }, - { - "epoch": 0.805821812957773, - "grad_norm": 0.7277778387069702, - "learning_rate": 1.3365988231428194e-05, - "loss": 0.2918, - "step": 8554 - }, - { - "epoch": 0.8059160170509408, - "grad_norm": 0.8258783221244812, - "learning_rate": 1.3364566287314888e-05, - "loss": 0.2675, - "step": 8555 - }, - { - "epoch": 0.8060102211441087, - "grad_norm": 0.6759886741638184, - "learning_rate": 1.3363144266484268e-05, - "loss": 0.2788, - "step": 8556 - }, - { - "epoch": 0.8061044252372765, - "grad_norm": 0.7083367705345154, - "learning_rate": 1.3361722168968751e-05, - "loss": 0.3005, - "step": 8557 - }, - { - "epoch": 0.8061986293304444, - "grad_norm": 0.7281975150108337, - "learning_rate": 1.3360299994800774e-05, - "loss": 0.3118, - "step": 8558 - }, - { - "epoch": 0.8062928334236122, - "grad_norm": 0.6623421907424927, - "learning_rate": 1.3358877744012762e-05, - "loss": 0.2769, - "step": 8559 - }, - { - "epoch": 0.8063870375167801, - "grad_norm": 0.6906394362449646, - "learning_rate": 1.3357455416637139e-05, - "loss": 0.2747, - "step": 8560 - }, - { - "epoch": 0.8064812416099479, - "grad_norm": 0.8033696413040161, - "learning_rate": 1.3356033012706342e-05, - "loss": 0.3488, - "step": 8561 - }, - { - "epoch": 0.8065754457031158, - "grad_norm": 0.7539247274398804, - "learning_rate": 1.3354610532252803e-05, - "loss": 0.3444, - "step": 8562 - }, - { - "epoch": 0.8066696497962836, - "grad_norm": 0.7532894611358643, - "learning_rate": 1.3353187975308954e-05, - "loss": 0.2898, - "step": 8563 - }, - { - "epoch": 0.8067638538894515, - "grad_norm": 0.7262154817581177, - "learning_rate": 1.3351765341907232e-05, - "loss": 0.3176, - "step": 8564 - }, - { - "epoch": 0.8068580579826193, - "grad_norm": 0.7816476225852966, - "learning_rate": 1.3350342632080081e-05, - "loss": 0.2781, - "step": 8565 - }, - { - "epoch": 0.8069522620757872, - "grad_norm": 0.8794928789138794, - "learning_rate": 1.3348919845859934e-05, - "loss": 0.3208, - "step": 8566 - }, - { - "epoch": 0.807046466168955, - "grad_norm": 0.7433298826217651, - "learning_rate": 1.3347496983279235e-05, - "loss": 0.2907, - "step": 8567 - }, - { - "epoch": 0.8071406702621229, - "grad_norm": 0.893782377243042, - "learning_rate": 1.334607404437043e-05, - "loss": 0.2912, - "step": 8568 - }, - { - "epoch": 0.8072348743552907, - "grad_norm": 0.7018021941184998, - "learning_rate": 1.3344651029165959e-05, - "loss": 0.2955, - "step": 8569 - }, - { - "epoch": 0.8073290784484586, - "grad_norm": 0.7630308270454407, - "learning_rate": 1.3343227937698273e-05, - "loss": 0.307, - "step": 8570 - }, - { - "epoch": 0.8074232825416264, - "grad_norm": 0.7988154888153076, - "learning_rate": 1.334180476999982e-05, - "loss": 0.3109, - "step": 8571 - }, - { - "epoch": 0.8075174866347943, - "grad_norm": 0.7222198843955994, - "learning_rate": 1.334038152610305e-05, - "loss": 0.3285, - "step": 8572 - }, - { - "epoch": 0.8076116907279621, - "grad_norm": 0.6700325012207031, - "learning_rate": 1.3338958206040418e-05, - "loss": 0.3099, - "step": 8573 - }, - { - "epoch": 0.80770589482113, - "grad_norm": 0.7429363131523132, - "learning_rate": 1.3337534809844371e-05, - "loss": 0.3587, - "step": 8574 - }, - { - "epoch": 0.8078000989142978, - "grad_norm": 0.7255023717880249, - "learning_rate": 1.3336111337547369e-05, - "loss": 0.2952, - "step": 8575 - }, - { - "epoch": 0.8078943030074657, - "grad_norm": 0.8426859378814697, - "learning_rate": 1.333468778918187e-05, - "loss": 0.3165, - "step": 8576 - }, - { - "epoch": 0.8079885071006335, - "grad_norm": 0.6773214340209961, - "learning_rate": 1.333326416478033e-05, - "loss": 0.3051, - "step": 8577 - }, - { - "epoch": 0.8080827111938014, - "grad_norm": 0.8392713665962219, - "learning_rate": 1.3331840464375216e-05, - "loss": 0.3193, - "step": 8578 - }, - { - "epoch": 0.8081769152869692, - "grad_norm": 0.7934224605560303, - "learning_rate": 1.3330416687998987e-05, - "loss": 0.3303, - "step": 8579 - }, - { - "epoch": 0.8082711193801371, - "grad_norm": 0.7700300812721252, - "learning_rate": 1.3328992835684105e-05, - "loss": 0.3536, - "step": 8580 - }, - { - "epoch": 0.8083653234733049, - "grad_norm": 0.7183011174201965, - "learning_rate": 1.3327568907463036e-05, - "loss": 0.3046, - "step": 8581 - }, - { - "epoch": 0.8084595275664728, - "grad_norm": 0.7285114526748657, - "learning_rate": 1.3326144903368253e-05, - "loss": 0.2879, - "step": 8582 - }, - { - "epoch": 0.8085537316596406, - "grad_norm": 0.6812309622764587, - "learning_rate": 1.332472082343222e-05, - "loss": 0.2987, - "step": 8583 - }, - { - "epoch": 0.8086479357528085, - "grad_norm": 0.8201087117195129, - "learning_rate": 1.332329666768741e-05, - "loss": 0.315, - "step": 8584 - }, - { - "epoch": 0.8087421398459763, - "grad_norm": 0.768406331539154, - "learning_rate": 1.33218724361663e-05, - "loss": 0.2689, - "step": 8585 - }, - { - "epoch": 0.8088363439391442, - "grad_norm": 0.7975949048995972, - "learning_rate": 1.3320448128901357e-05, - "loss": 0.3409, - "step": 8586 - }, - { - "epoch": 0.808930548032312, - "grad_norm": 0.6615134477615356, - "learning_rate": 1.3319023745925064e-05, - "loss": 0.2684, - "step": 8587 - }, - { - "epoch": 0.8090247521254799, - "grad_norm": 0.7283914089202881, - "learning_rate": 1.3317599287269896e-05, - "loss": 0.3284, - "step": 8588 - }, - { - "epoch": 0.8091189562186477, - "grad_norm": 0.6497752070426941, - "learning_rate": 1.3316174752968331e-05, - "loss": 0.2859, - "step": 8589 - }, - { - "epoch": 0.8092131603118156, - "grad_norm": 0.8077973127365112, - "learning_rate": 1.3314750143052857e-05, - "loss": 0.2659, - "step": 8590 - }, - { - "epoch": 0.8093073644049834, - "grad_norm": 0.645475447177887, - "learning_rate": 1.3313325457555953e-05, - "loss": 0.2922, - "step": 8591 - }, - { - "epoch": 0.8094015684981513, - "grad_norm": 0.7311770915985107, - "learning_rate": 1.33119006965101e-05, - "loss": 0.3266, - "step": 8592 - }, - { - "epoch": 0.8094957725913191, - "grad_norm": 0.8049769997596741, - "learning_rate": 1.3310475859947796e-05, - "loss": 0.3088, - "step": 8593 - }, - { - "epoch": 0.809589976684487, - "grad_norm": 0.8716092705726624, - "learning_rate": 1.3309050947901518e-05, - "loss": 0.3068, - "step": 8594 - }, - { - "epoch": 0.8096841807776548, - "grad_norm": 0.7258637547492981, - "learning_rate": 1.3307625960403763e-05, - "loss": 0.2921, - "step": 8595 - }, - { - "epoch": 0.8097783848708227, - "grad_norm": 0.7388122081756592, - "learning_rate": 1.330620089748702e-05, - "loss": 0.2809, - "step": 8596 - }, - { - "epoch": 0.8098725889639905, - "grad_norm": 0.6835913062095642, - "learning_rate": 1.330477575918378e-05, - "loss": 0.3199, - "step": 8597 - }, - { - "epoch": 0.8099667930571584, - "grad_norm": 0.7428380250930786, - "learning_rate": 1.3303350545526545e-05, - "loss": 0.3051, - "step": 8598 - }, - { - "epoch": 0.8100609971503262, - "grad_norm": 0.7595810294151306, - "learning_rate": 1.330192525654781e-05, - "loss": 0.3717, - "step": 8599 - }, - { - "epoch": 0.810155201243494, - "grad_norm": 1.198453426361084, - "learning_rate": 1.3300499892280071e-05, - "loss": 0.3035, - "step": 8600 - }, - { - "epoch": 0.8102494053366619, - "grad_norm": 0.6743183135986328, - "learning_rate": 1.3299074452755829e-05, - "loss": 0.3328, - "step": 8601 - }, - { - "epoch": 0.8103436094298297, - "grad_norm": 0.7611492276191711, - "learning_rate": 1.329764893800759e-05, - "loss": 0.2831, - "step": 8602 - }, - { - "epoch": 0.8104378135229976, - "grad_norm": 0.7529149055480957, - "learning_rate": 1.3296223348067855e-05, - "loss": 0.2884, - "step": 8603 - }, - { - "epoch": 0.8105320176161654, - "grad_norm": 0.6950092911720276, - "learning_rate": 1.3294797682969127e-05, - "loss": 0.2806, - "step": 8604 - }, - { - "epoch": 0.8106262217093333, - "grad_norm": 0.7328523993492126, - "learning_rate": 1.329337194274392e-05, - "loss": 0.3027, - "step": 8605 - }, - { - "epoch": 0.8107204258025011, - "grad_norm": 0.7506288290023804, - "learning_rate": 1.3291946127424738e-05, - "loss": 0.3624, - "step": 8606 - }, - { - "epoch": 0.810814629895669, - "grad_norm": 0.8609302639961243, - "learning_rate": 1.3290520237044094e-05, - "loss": 0.3084, - "step": 8607 - }, - { - "epoch": 0.8109088339888368, - "grad_norm": 0.876285195350647, - "learning_rate": 1.3289094271634498e-05, - "loss": 0.3229, - "step": 8608 - }, - { - "epoch": 0.8110030380820047, - "grad_norm": 0.720151960849762, - "learning_rate": 1.3287668231228465e-05, - "loss": 0.2708, - "step": 8609 - }, - { - "epoch": 0.8110972421751725, - "grad_norm": 0.783524751663208, - "learning_rate": 1.3286242115858515e-05, - "loss": 0.3274, - "step": 8610 - }, - { - "epoch": 0.8111914462683404, - "grad_norm": 0.7985210418701172, - "learning_rate": 1.328481592555716e-05, - "loss": 0.3124, - "step": 8611 - }, - { - "epoch": 0.8112856503615082, - "grad_norm": 0.7318972945213318, - "learning_rate": 1.328338966035692e-05, - "loss": 0.3238, - "step": 8612 - }, - { - "epoch": 0.8113798544546761, - "grad_norm": 0.7627847790718079, - "learning_rate": 1.3281963320290325e-05, - "loss": 0.2968, - "step": 8613 - }, - { - "epoch": 0.8114740585478439, - "grad_norm": 0.7643986940383911, - "learning_rate": 1.3280536905389885e-05, - "loss": 0.289, - "step": 8614 - }, - { - "epoch": 0.8115682626410118, - "grad_norm": 0.6229587197303772, - "learning_rate": 1.3279110415688133e-05, - "loss": 0.2873, - "step": 8615 - }, - { - "epoch": 0.8116624667341796, - "grad_norm": 0.8171026110649109, - "learning_rate": 1.3277683851217588e-05, - "loss": 0.3554, - "step": 8616 - }, - { - "epoch": 0.8117566708273475, - "grad_norm": 0.663148045539856, - "learning_rate": 1.3276257212010784e-05, - "loss": 0.2949, - "step": 8617 - }, - { - "epoch": 0.8118508749205153, - "grad_norm": 0.943187415599823, - "learning_rate": 1.3274830498100251e-05, - "loss": 0.3426, - "step": 8618 - }, - { - "epoch": 0.8119450790136832, - "grad_norm": 0.8217980265617371, - "learning_rate": 1.3273403709518518e-05, - "loss": 0.2635, - "step": 8619 - }, - { - "epoch": 0.812039283106851, - "grad_norm": 0.678004801273346, - "learning_rate": 1.3271976846298116e-05, - "loss": 0.2893, - "step": 8620 - }, - { - "epoch": 0.8121334872000189, - "grad_norm": 0.7480604648590088, - "learning_rate": 1.3270549908471581e-05, - "loss": 0.3442, - "step": 8621 - }, - { - "epoch": 0.8122276912931867, - "grad_norm": 0.6902352571487427, - "learning_rate": 1.3269122896071452e-05, - "loss": 0.3241, - "step": 8622 - }, - { - "epoch": 0.8123218953863546, - "grad_norm": 0.7468249797821045, - "learning_rate": 1.3267695809130264e-05, - "loss": 0.3032, - "step": 8623 - }, - { - "epoch": 0.8124160994795224, - "grad_norm": 0.7557992935180664, - "learning_rate": 1.3266268647680558e-05, - "loss": 0.2954, - "step": 8624 - }, - { - "epoch": 0.8125103035726903, - "grad_norm": 0.6958978176116943, - "learning_rate": 1.3264841411754878e-05, - "loss": 0.3289, - "step": 8625 - }, - { - "epoch": 0.8126045076658581, - "grad_norm": 0.7431649565696716, - "learning_rate": 1.326341410138576e-05, - "loss": 0.3093, - "step": 8626 - }, - { - "epoch": 0.812698711759026, - "grad_norm": 0.6600211262702942, - "learning_rate": 1.3261986716605756e-05, - "loss": 0.295, - "step": 8627 - }, - { - "epoch": 0.8127929158521938, - "grad_norm": 0.787713348865509, - "learning_rate": 1.3260559257447412e-05, - "loss": 0.3667, - "step": 8628 - }, - { - "epoch": 0.8128871199453617, - "grad_norm": 0.7798208594322205, - "learning_rate": 1.3259131723943273e-05, - "loss": 0.3057, - "step": 8629 - }, - { - "epoch": 0.8129813240385295, - "grad_norm": 0.7302875518798828, - "learning_rate": 1.3257704116125889e-05, - "loss": 0.2969, - "step": 8630 - }, - { - "epoch": 0.8130755281316974, - "grad_norm": 0.7487370371818542, - "learning_rate": 1.3256276434027815e-05, - "loss": 0.3595, - "step": 8631 - }, - { - "epoch": 0.8131697322248652, - "grad_norm": 0.658248245716095, - "learning_rate": 1.32548486776816e-05, - "loss": 0.3326, - "step": 8632 - }, - { - "epoch": 0.8132639363180331, - "grad_norm": 0.797067403793335, - "learning_rate": 1.3253420847119804e-05, - "loss": 0.3373, - "step": 8633 - }, - { - "epoch": 0.8133581404112009, - "grad_norm": 0.6556969881057739, - "learning_rate": 1.3251992942374978e-05, - "loss": 0.2896, - "step": 8634 - }, - { - "epoch": 0.8134523445043688, - "grad_norm": 0.665752112865448, - "learning_rate": 1.3250564963479686e-05, - "loss": 0.2911, - "step": 8635 - }, - { - "epoch": 0.8135465485975366, - "grad_norm": 0.8131089210510254, - "learning_rate": 1.3249136910466487e-05, - "loss": 0.3108, - "step": 8636 - }, - { - "epoch": 0.8136407526907045, - "grad_norm": 0.7381729483604431, - "learning_rate": 1.3247708783367939e-05, - "loss": 0.3565, - "step": 8637 - }, - { - "epoch": 0.8137349567838723, - "grad_norm": 0.6386496424674988, - "learning_rate": 1.3246280582216608e-05, - "loss": 0.2983, - "step": 8638 - }, - { - "epoch": 0.8138291608770402, - "grad_norm": 0.6995457410812378, - "learning_rate": 1.3244852307045062e-05, - "loss": 0.3003, - "step": 8639 - }, - { - "epoch": 0.813923364970208, - "grad_norm": 0.7102161049842834, - "learning_rate": 1.3243423957885864e-05, - "loss": 0.3335, - "step": 8640 - }, - { - "epoch": 0.8140175690633759, - "grad_norm": 0.6372678279876709, - "learning_rate": 1.324199553477158e-05, - "loss": 0.303, - "step": 8641 - }, - { - "epoch": 0.8141117731565437, - "grad_norm": 0.726729154586792, - "learning_rate": 1.3240567037734789e-05, - "loss": 0.2963, - "step": 8642 - }, - { - "epoch": 0.8142059772497116, - "grad_norm": 0.8117896914482117, - "learning_rate": 1.3239138466808055e-05, - "loss": 0.3094, - "step": 8643 - }, - { - "epoch": 0.8143001813428794, - "grad_norm": 0.7660683393478394, - "learning_rate": 1.3237709822023956e-05, - "loss": 0.3232, - "step": 8644 - }, - { - "epoch": 0.8143943854360471, - "grad_norm": 0.7786475419998169, - "learning_rate": 1.3236281103415064e-05, - "loss": 0.3104, - "step": 8645 - }, - { - "epoch": 0.814488589529215, - "grad_norm": 0.6333548426628113, - "learning_rate": 1.3234852311013959e-05, - "loss": 0.3038, - "step": 8646 - }, - { - "epoch": 0.8145827936223828, - "grad_norm": 0.6839102506637573, - "learning_rate": 1.3233423444853219e-05, - "loss": 0.2659, - "step": 8647 - }, - { - "epoch": 0.8146769977155507, - "grad_norm": 0.7871906757354736, - "learning_rate": 1.3231994504965424e-05, - "loss": 0.2786, - "step": 8648 - }, - { - "epoch": 0.8147712018087185, - "grad_norm": 0.8053258061408997, - "learning_rate": 1.3230565491383153e-05, - "loss": 0.3204, - "step": 8649 - }, - { - "epoch": 0.8148654059018864, - "grad_norm": 0.9789053201675415, - "learning_rate": 1.3229136404138996e-05, - "loss": 0.3266, - "step": 8650 - }, - { - "epoch": 0.8149596099950542, - "grad_norm": 0.6746540069580078, - "learning_rate": 1.3227707243265534e-05, - "loss": 0.2996, - "step": 8651 - }, - { - "epoch": 0.8150538140882221, - "grad_norm": 0.6631419658660889, - "learning_rate": 1.3226278008795355e-05, - "loss": 0.3128, - "step": 8652 - }, - { - "epoch": 0.8151480181813899, - "grad_norm": 0.679245114326477, - "learning_rate": 1.322484870076105e-05, - "loss": 0.2672, - "step": 8653 - }, - { - "epoch": 0.8152422222745578, - "grad_norm": 0.6654112339019775, - "learning_rate": 1.3223419319195206e-05, - "loss": 0.3015, - "step": 8654 - }, - { - "epoch": 0.8153364263677256, - "grad_norm": 0.7208624482154846, - "learning_rate": 1.3221989864130414e-05, - "loss": 0.3241, - "step": 8655 - }, - { - "epoch": 0.8154306304608935, - "grad_norm": 0.6933826804161072, - "learning_rate": 1.3220560335599272e-05, - "loss": 0.3262, - "step": 8656 - }, - { - "epoch": 0.8155248345540613, - "grad_norm": 0.6548870801925659, - "learning_rate": 1.3219130733634374e-05, - "loss": 0.2805, - "step": 8657 - }, - { - "epoch": 0.8156190386472292, - "grad_norm": 0.81364905834198, - "learning_rate": 1.3217701058268315e-05, - "loss": 0.318, - "step": 8658 - }, - { - "epoch": 0.815713242740397, - "grad_norm": 0.7878438830375671, - "learning_rate": 1.32162713095337e-05, - "loss": 0.3259, - "step": 8659 - }, - { - "epoch": 0.8158074468335649, - "grad_norm": 0.8270474672317505, - "learning_rate": 1.321484148746312e-05, - "loss": 0.3321, - "step": 8660 - }, - { - "epoch": 0.8159016509267327, - "grad_norm": 0.7580147981643677, - "learning_rate": 1.3213411592089184e-05, - "loss": 0.3335, - "step": 8661 - }, - { - "epoch": 0.8159958550199006, - "grad_norm": 0.7953311204910278, - "learning_rate": 1.3211981623444494e-05, - "loss": 0.2963, - "step": 8662 - }, - { - "epoch": 0.8160900591130684, - "grad_norm": 0.6340729594230652, - "learning_rate": 1.3210551581561657e-05, - "loss": 0.2878, - "step": 8663 - }, - { - "epoch": 0.8161842632062363, - "grad_norm": 0.6745175123214722, - "learning_rate": 1.3209121466473278e-05, - "loss": 0.311, - "step": 8664 - }, - { - "epoch": 0.8162784672994041, - "grad_norm": 1.4273256063461304, - "learning_rate": 1.3207691278211967e-05, - "loss": 0.3401, - "step": 8665 - }, - { - "epoch": 0.816372671392572, - "grad_norm": 0.7651469111442566, - "learning_rate": 1.320626101681033e-05, - "loss": 0.3327, - "step": 8666 - }, - { - "epoch": 0.8164668754857398, - "grad_norm": 0.6671456694602966, - "learning_rate": 1.3204830682300988e-05, - "loss": 0.3076, - "step": 8667 - }, - { - "epoch": 0.8165610795789077, - "grad_norm": 1.3122570514678955, - "learning_rate": 1.3203400274716549e-05, - "loss": 0.3336, - "step": 8668 - }, - { - "epoch": 0.8166552836720755, - "grad_norm": 0.6474205255508423, - "learning_rate": 1.3201969794089629e-05, - "loss": 0.3058, - "step": 8669 - }, - { - "epoch": 0.8167494877652434, - "grad_norm": 0.6456495523452759, - "learning_rate": 1.3200539240452847e-05, - "loss": 0.2962, - "step": 8670 - }, - { - "epoch": 0.8168436918584112, - "grad_norm": 0.6807658076286316, - "learning_rate": 1.3199108613838818e-05, - "loss": 0.3159, - "step": 8671 - }, - { - "epoch": 0.8169378959515791, - "grad_norm": 0.9739202260971069, - "learning_rate": 1.3197677914280166e-05, - "loss": 0.359, - "step": 8672 - }, - { - "epoch": 0.8170321000447469, - "grad_norm": 0.8745432496070862, - "learning_rate": 1.3196247141809515e-05, - "loss": 0.3103, - "step": 8673 - }, - { - "epoch": 0.8171263041379148, - "grad_norm": 0.843333899974823, - "learning_rate": 1.3194816296459483e-05, - "loss": 0.3447, - "step": 8674 - }, - { - "epoch": 0.8172205082310826, - "grad_norm": 0.8933060765266418, - "learning_rate": 1.31933853782627e-05, - "loss": 0.3144, - "step": 8675 - }, - { - "epoch": 0.8173147123242505, - "grad_norm": 0.7140727043151855, - "learning_rate": 1.319195438725179e-05, - "loss": 0.3006, - "step": 8676 - }, - { - "epoch": 0.8174089164174183, - "grad_norm": 0.8899981379508972, - "learning_rate": 1.3190523323459385e-05, - "loss": 0.3025, - "step": 8677 - }, - { - "epoch": 0.8175031205105862, - "grad_norm": 0.7381685376167297, - "learning_rate": 1.3189092186918113e-05, - "loss": 0.2817, - "step": 8678 - }, - { - "epoch": 0.817597324603754, - "grad_norm": 0.7613934874534607, - "learning_rate": 1.3187660977660608e-05, - "loss": 0.3594, - "step": 8679 - }, - { - "epoch": 0.8176915286969219, - "grad_norm": 0.7203506231307983, - "learning_rate": 1.3186229695719504e-05, - "loss": 0.3037, - "step": 8680 - }, - { - "epoch": 0.8177857327900897, - "grad_norm": 0.7194360494613647, - "learning_rate": 1.3184798341127435e-05, - "loss": 0.3096, - "step": 8681 - }, - { - "epoch": 0.8178799368832576, - "grad_norm": 0.807916522026062, - "learning_rate": 1.3183366913917036e-05, - "loss": 0.3621, - "step": 8682 - }, - { - "epoch": 0.8179741409764254, - "grad_norm": 0.6928552389144897, - "learning_rate": 1.318193541412095e-05, - "loss": 0.3188, - "step": 8683 - }, - { - "epoch": 0.8180683450695933, - "grad_norm": 0.7741749882698059, - "learning_rate": 1.3180503841771817e-05, - "loss": 0.3321, - "step": 8684 - }, - { - "epoch": 0.8181625491627611, - "grad_norm": 0.7837627530097961, - "learning_rate": 1.3179072196902274e-05, - "loss": 0.3519, - "step": 8685 - }, - { - "epoch": 0.818256753255929, - "grad_norm": 0.6680743098258972, - "learning_rate": 1.317764047954497e-05, - "loss": 0.3106, - "step": 8686 - }, - { - "epoch": 0.8183509573490968, - "grad_norm": 0.7467548847198486, - "learning_rate": 1.317620868973255e-05, - "loss": 0.3074, - "step": 8687 - }, - { - "epoch": 0.8184451614422646, - "grad_norm": 0.6809502840042114, - "learning_rate": 1.317477682749766e-05, - "loss": 0.2725, - "step": 8688 - }, - { - "epoch": 0.8185393655354325, - "grad_norm": 0.7166069746017456, - "learning_rate": 1.3173344892872946e-05, - "loss": 0.3012, - "step": 8689 - }, - { - "epoch": 0.8186335696286003, - "grad_norm": 0.7136755585670471, - "learning_rate": 1.3171912885891063e-05, - "loss": 0.2999, - "step": 8690 - }, - { - "epoch": 0.8187277737217682, - "grad_norm": 0.7500886917114258, - "learning_rate": 1.3170480806584658e-05, - "loss": 0.2897, - "step": 8691 - }, - { - "epoch": 0.818821977814936, - "grad_norm": 0.7573025822639465, - "learning_rate": 1.3169048654986387e-05, - "loss": 0.2996, - "step": 8692 - }, - { - "epoch": 0.8189161819081039, - "grad_norm": 0.897589385509491, - "learning_rate": 1.316761643112891e-05, - "loss": 0.3356, - "step": 8693 - }, - { - "epoch": 0.8190103860012717, - "grad_norm": 0.7774412035942078, - "learning_rate": 1.3166184135044877e-05, - "loss": 0.322, - "step": 8694 - }, - { - "epoch": 0.8191045900944396, - "grad_norm": 0.7652751207351685, - "learning_rate": 1.316475176676695e-05, - "loss": 0.3587, - "step": 8695 - }, - { - "epoch": 0.8191987941876074, - "grad_norm": 0.7835428714752197, - "learning_rate": 1.3163319326327788e-05, - "loss": 0.3127, - "step": 8696 - }, - { - "epoch": 0.8192929982807753, - "grad_norm": 0.8188920021057129, - "learning_rate": 1.3161886813760052e-05, - "loss": 0.2686, - "step": 8697 - }, - { - "epoch": 0.8193872023739431, - "grad_norm": 0.7695569396018982, - "learning_rate": 1.316045422909641e-05, - "loss": 0.3348, - "step": 8698 - }, - { - "epoch": 0.819481406467111, - "grad_norm": 0.6132376790046692, - "learning_rate": 1.3159021572369522e-05, - "loss": 0.2984, - "step": 8699 - }, - { - "epoch": 0.8195756105602788, - "grad_norm": 0.7349730730056763, - "learning_rate": 1.3157588843612055e-05, - "loss": 0.3424, - "step": 8700 - }, - { - "epoch": 0.8196698146534467, - "grad_norm": 0.8512101173400879, - "learning_rate": 1.3156156042856678e-05, - "loss": 0.3698, - "step": 8701 - }, - { - "epoch": 0.8197640187466145, - "grad_norm": 0.7094159722328186, - "learning_rate": 1.3154723170136065e-05, - "loss": 0.2975, - "step": 8702 - }, - { - "epoch": 0.8198582228397824, - "grad_norm": 0.7288559079170227, - "learning_rate": 1.3153290225482884e-05, - "loss": 0.3033, - "step": 8703 - }, - { - "epoch": 0.8199524269329502, - "grad_norm": 1.3191355466842651, - "learning_rate": 1.315185720892981e-05, - "loss": 0.2933, - "step": 8704 - }, - { - "epoch": 0.8200466310261181, - "grad_norm": 0.6795741319656372, - "learning_rate": 1.3150424120509518e-05, - "loss": 0.3179, - "step": 8705 - }, - { - "epoch": 0.8201408351192859, - "grad_norm": 0.8080680966377258, - "learning_rate": 1.3148990960254683e-05, - "loss": 0.2983, - "step": 8706 - }, - { - "epoch": 0.8202350392124538, - "grad_norm": 0.6550801396369934, - "learning_rate": 1.3147557728197984e-05, - "loss": 0.2597, - "step": 8707 - }, - { - "epoch": 0.8203292433056216, - "grad_norm": 0.7870469093322754, - "learning_rate": 1.31461244243721e-05, - "loss": 0.323, - "step": 8708 - }, - { - "epoch": 0.8204234473987895, - "grad_norm": 0.9124608635902405, - "learning_rate": 1.3144691048809713e-05, - "loss": 0.3393, - "step": 8709 - }, - { - "epoch": 0.8205176514919573, - "grad_norm": 0.6336477398872375, - "learning_rate": 1.314325760154351e-05, - "loss": 0.2425, - "step": 8710 - }, - { - "epoch": 0.8206118555851252, - "grad_norm": 0.7198314070701599, - "learning_rate": 1.3141824082606167e-05, - "loss": 0.3006, - "step": 8711 - }, - { - "epoch": 0.820706059678293, - "grad_norm": 0.7337607741355896, - "learning_rate": 1.314039049203038e-05, - "loss": 0.314, - "step": 8712 - }, - { - "epoch": 0.8208002637714609, - "grad_norm": 0.7488465309143066, - "learning_rate": 1.3138956829848834e-05, - "loss": 0.2873, - "step": 8713 - }, - { - "epoch": 0.8208944678646287, - "grad_norm": 0.6986730694770813, - "learning_rate": 1.3137523096094216e-05, - "loss": 0.3213, - "step": 8714 - }, - { - "epoch": 0.8209886719577966, - "grad_norm": 0.76261967420578, - "learning_rate": 1.3136089290799219e-05, - "loss": 0.3603, - "step": 8715 - }, - { - "epoch": 0.8210828760509644, - "grad_norm": 0.7061699628829956, - "learning_rate": 1.3134655413996538e-05, - "loss": 0.3344, - "step": 8716 - }, - { - "epoch": 0.8211770801441323, - "grad_norm": 0.6306097507476807, - "learning_rate": 1.3133221465718861e-05, - "loss": 0.295, - "step": 8717 - }, - { - "epoch": 0.8212712842373001, - "grad_norm": 0.7545305490493774, - "learning_rate": 1.3131787445998894e-05, - "loss": 0.3605, - "step": 8718 - }, - { - "epoch": 0.821365488330468, - "grad_norm": 0.6534841060638428, - "learning_rate": 1.3130353354869327e-05, - "loss": 0.3048, - "step": 8719 - }, - { - "epoch": 0.8214596924236358, - "grad_norm": 0.7317593693733215, - "learning_rate": 1.3128919192362864e-05, - "loss": 0.2834, - "step": 8720 - }, - { - "epoch": 0.8215538965168037, - "grad_norm": 0.709783673286438, - "learning_rate": 1.3127484958512202e-05, - "loss": 0.2992, - "step": 8721 - }, - { - "epoch": 0.8216481006099715, - "grad_norm": 0.782183825969696, - "learning_rate": 1.3126050653350049e-05, - "loss": 0.3334, - "step": 8722 - }, - { - "epoch": 0.8217423047031394, - "grad_norm": 0.7888424396514893, - "learning_rate": 1.3124616276909106e-05, - "loss": 0.3249, - "step": 8723 - }, - { - "epoch": 0.8218365087963072, - "grad_norm": 0.7548894286155701, - "learning_rate": 1.312318182922208e-05, - "loss": 0.3512, - "step": 8724 - }, - { - "epoch": 0.8219307128894751, - "grad_norm": 0.7767762541770935, - "learning_rate": 1.3121747310321678e-05, - "loss": 0.3204, - "step": 8725 - }, - { - "epoch": 0.8220249169826429, - "grad_norm": 0.8024447560310364, - "learning_rate": 1.3120312720240607e-05, - "loss": 0.3089, - "step": 8726 - }, - { - "epoch": 0.8221191210758108, - "grad_norm": 0.7439122796058655, - "learning_rate": 1.3118878059011583e-05, - "loss": 0.3111, - "step": 8727 - }, - { - "epoch": 0.8222133251689786, - "grad_norm": 0.7834168076515198, - "learning_rate": 1.3117443326667316e-05, - "loss": 0.2829, - "step": 8728 - }, - { - "epoch": 0.8223075292621465, - "grad_norm": 0.7352764010429382, - "learning_rate": 1.3116008523240518e-05, - "loss": 0.2919, - "step": 8729 - }, - { - "epoch": 0.8224017333553143, - "grad_norm": 0.6868788599967957, - "learning_rate": 1.311457364876391e-05, - "loss": 0.2825, - "step": 8730 - }, - { - "epoch": 0.8224959374484822, - "grad_norm": 0.6694072484970093, - "learning_rate": 1.3113138703270201e-05, - "loss": 0.3106, - "step": 8731 - }, - { - "epoch": 0.82259014154165, - "grad_norm": 0.6664419174194336, - "learning_rate": 1.3111703686792115e-05, - "loss": 0.2988, - "step": 8732 - }, - { - "epoch": 0.8226843456348178, - "grad_norm": 0.7029130458831787, - "learning_rate": 1.3110268599362378e-05, - "loss": 0.2944, - "step": 8733 - }, - { - "epoch": 0.8227785497279857, - "grad_norm": 0.7045284509658813, - "learning_rate": 1.3108833441013702e-05, - "loss": 0.3049, - "step": 8734 - }, - { - "epoch": 0.8228727538211535, - "grad_norm": 0.9626039862632751, - "learning_rate": 1.3107398211778818e-05, - "loss": 0.3167, - "step": 8735 - }, - { - "epoch": 0.8229669579143214, - "grad_norm": 0.7633686661720276, - "learning_rate": 1.3105962911690449e-05, - "loss": 0.3412, - "step": 8736 - }, - { - "epoch": 0.8230611620074892, - "grad_norm": 0.6560431122779846, - "learning_rate": 1.3104527540781323e-05, - "loss": 0.3063, - "step": 8737 - }, - { - "epoch": 0.8231553661006571, - "grad_norm": 0.7915542721748352, - "learning_rate": 1.3103092099084166e-05, - "loss": 0.3123, - "step": 8738 - }, - { - "epoch": 0.8232495701938249, - "grad_norm": 0.8178728818893433, - "learning_rate": 1.310165658663171e-05, - "loss": 0.3104, - "step": 8739 - }, - { - "epoch": 0.8233437742869928, - "grad_norm": 0.6345885992050171, - "learning_rate": 1.3100221003456688e-05, - "loss": 0.2837, - "step": 8740 - }, - { - "epoch": 0.8234379783801606, - "grad_norm": 0.6778290271759033, - "learning_rate": 1.3098785349591832e-05, - "loss": 0.3121, - "step": 8741 - }, - { - "epoch": 0.8235321824733285, - "grad_norm": 0.7523201107978821, - "learning_rate": 1.3097349625069878e-05, - "loss": 0.2865, - "step": 8742 - }, - { - "epoch": 0.8236263865664963, - "grad_norm": 0.6583874225616455, - "learning_rate": 1.3095913829923563e-05, - "loss": 0.2946, - "step": 8743 - }, - { - "epoch": 0.8237205906596642, - "grad_norm": 0.6852019429206848, - "learning_rate": 1.3094477964185624e-05, - "loss": 0.3193, - "step": 8744 - }, - { - "epoch": 0.823814794752832, - "grad_norm": 0.758893609046936, - "learning_rate": 1.3093042027888803e-05, - "loss": 0.3296, - "step": 8745 - }, - { - "epoch": 0.8239089988459999, - "grad_norm": 0.8637267351150513, - "learning_rate": 1.3091606021065836e-05, - "loss": 0.2976, - "step": 8746 - }, - { - "epoch": 0.8240032029391677, - "grad_norm": 0.7250096797943115, - "learning_rate": 1.3090169943749475e-05, - "loss": 0.3296, - "step": 8747 - }, - { - "epoch": 0.8240974070323356, - "grad_norm": 0.6529019474983215, - "learning_rate": 1.3088733795972459e-05, - "loss": 0.3255, - "step": 8748 - }, - { - "epoch": 0.8241916111255034, - "grad_norm": 0.6772639751434326, - "learning_rate": 1.3087297577767536e-05, - "loss": 0.2874, - "step": 8749 - }, - { - "epoch": 0.8242858152186713, - "grad_norm": 0.7299693822860718, - "learning_rate": 1.3085861289167454e-05, - "loss": 0.3123, - "step": 8750 - }, - { - "epoch": 0.8243800193118391, - "grad_norm": 0.8791813254356384, - "learning_rate": 1.3084424930204959e-05, - "loss": 0.3366, - "step": 8751 - }, - { - "epoch": 0.824474223405007, - "grad_norm": 0.7638731598854065, - "learning_rate": 1.3082988500912807e-05, - "loss": 0.289, - "step": 8752 - }, - { - "epoch": 0.8245684274981748, - "grad_norm": 0.6731742024421692, - "learning_rate": 1.3081552001323754e-05, - "loss": 0.3075, - "step": 8753 - }, - { - "epoch": 0.8246626315913427, - "grad_norm": 0.7070082426071167, - "learning_rate": 1.3080115431470543e-05, - "loss": 0.2748, - "step": 8754 - }, - { - "epoch": 0.8247568356845105, - "grad_norm": 0.7427701950073242, - "learning_rate": 1.307867879138594e-05, - "loss": 0.3052, - "step": 8755 - }, - { - "epoch": 0.8248510397776784, - "grad_norm": 0.6710155010223389, - "learning_rate": 1.3077242081102699e-05, - "loss": 0.2963, - "step": 8756 - }, - { - "epoch": 0.8249452438708462, - "grad_norm": 0.7390181422233582, - "learning_rate": 1.3075805300653577e-05, - "loss": 0.2995, - "step": 8757 - }, - { - "epoch": 0.8250394479640141, - "grad_norm": 0.715648353099823, - "learning_rate": 1.3074368450071342e-05, - "loss": 0.257, - "step": 8758 - }, - { - "epoch": 0.8251336520571819, - "grad_norm": 0.8312877416610718, - "learning_rate": 1.3072931529388748e-05, - "loss": 0.3267, - "step": 8759 - }, - { - "epoch": 0.8252278561503498, - "grad_norm": 0.8177542090415955, - "learning_rate": 1.3071494538638565e-05, - "loss": 0.3304, - "step": 8760 - }, - { - "epoch": 0.8253220602435176, - "grad_norm": 0.7219865918159485, - "learning_rate": 1.3070057477853557e-05, - "loss": 0.2874, - "step": 8761 - }, - { - "epoch": 0.8254162643366855, - "grad_norm": 0.6797481775283813, - "learning_rate": 1.3068620347066485e-05, - "loss": 0.2984, - "step": 8762 - }, - { - "epoch": 0.8255104684298533, - "grad_norm": 0.7699027061462402, - "learning_rate": 1.306718314631013e-05, - "loss": 0.3144, - "step": 8763 - }, - { - "epoch": 0.8256046725230212, - "grad_norm": 0.7195324897766113, - "learning_rate": 1.3065745875617252e-05, - "loss": 0.3282, - "step": 8764 - }, - { - "epoch": 0.825698876616189, - "grad_norm": 0.7257832884788513, - "learning_rate": 1.306430853502063e-05, - "loss": 0.3201, - "step": 8765 - }, - { - "epoch": 0.8257930807093569, - "grad_norm": 0.7184802889823914, - "learning_rate": 1.306287112455303e-05, - "loss": 0.3083, - "step": 8766 - }, - { - "epoch": 0.8258872848025247, - "grad_norm": 0.6480834484100342, - "learning_rate": 1.3061433644247234e-05, - "loss": 0.2621, - "step": 8767 - }, - { - "epoch": 0.8259814888956926, - "grad_norm": 0.977313756942749, - "learning_rate": 1.3059996094136016e-05, - "loss": 0.3171, - "step": 8768 - }, - { - "epoch": 0.8260756929888604, - "grad_norm": 0.9768969416618347, - "learning_rate": 1.3058558474252154e-05, - "loss": 0.3278, - "step": 8769 - }, - { - "epoch": 0.8261698970820283, - "grad_norm": 0.8026149868965149, - "learning_rate": 1.3057120784628432e-05, - "loss": 0.3038, - "step": 8770 - }, - { - "epoch": 0.8262641011751961, - "grad_norm": 0.7975835204124451, - "learning_rate": 1.3055683025297623e-05, - "loss": 0.3161, - "step": 8771 - }, - { - "epoch": 0.826358305268364, - "grad_norm": 1.1126796007156372, - "learning_rate": 1.3054245196292517e-05, - "loss": 0.3402, - "step": 8772 - }, - { - "epoch": 0.8264525093615318, - "grad_norm": 0.6256473064422607, - "learning_rate": 1.30528072976459e-05, - "loss": 0.3191, - "step": 8773 - }, - { - "epoch": 0.8265467134546997, - "grad_norm": 0.7434393763542175, - "learning_rate": 1.3051369329390551e-05, - "loss": 0.2777, - "step": 8774 - }, - { - "epoch": 0.8266409175478675, - "grad_norm": 0.6980218291282654, - "learning_rate": 1.3049931291559266e-05, - "loss": 0.3074, - "step": 8775 - }, - { - "epoch": 0.8267351216410354, - "grad_norm": 0.8950267434120178, - "learning_rate": 1.3048493184184829e-05, - "loss": 0.3538, - "step": 8776 - }, - { - "epoch": 0.8268293257342032, - "grad_norm": 0.7111105918884277, - "learning_rate": 1.3047055007300031e-05, - "loss": 0.2921, - "step": 8777 - }, - { - "epoch": 0.826923529827371, - "grad_norm": 0.8025537133216858, - "learning_rate": 1.3045616760937669e-05, - "loss": 0.3031, - "step": 8778 - }, - { - "epoch": 0.8270177339205389, - "grad_norm": 0.7283792495727539, - "learning_rate": 1.3044178445130535e-05, - "loss": 0.3289, - "step": 8779 - }, - { - "epoch": 0.8271119380137067, - "grad_norm": 0.7437872886657715, - "learning_rate": 1.3042740059911425e-05, - "loss": 0.3069, - "step": 8780 - }, - { - "epoch": 0.8272061421068746, - "grad_norm": 0.8354877829551697, - "learning_rate": 1.3041301605313135e-05, - "loss": 0.3501, - "step": 8781 - }, - { - "epoch": 0.8273003462000424, - "grad_norm": 1.0320316553115845, - "learning_rate": 1.3039863081368464e-05, - "loss": 0.3382, - "step": 8782 - }, - { - "epoch": 0.8273945502932102, - "grad_norm": 0.8121195435523987, - "learning_rate": 1.3038424488110215e-05, - "loss": 0.3437, - "step": 8783 - }, - { - "epoch": 0.827488754386378, - "grad_norm": 0.6380713582038879, - "learning_rate": 1.3036985825571189e-05, - "loss": 0.275, - "step": 8784 - }, - { - "epoch": 0.8275829584795459, - "grad_norm": 0.9364820122718811, - "learning_rate": 1.3035547093784187e-05, - "loss": 0.3129, - "step": 8785 - }, - { - "epoch": 0.8276771625727137, - "grad_norm": 0.6679378747940063, - "learning_rate": 1.3034108292782017e-05, - "loss": 0.3092, - "step": 8786 - }, - { - "epoch": 0.8277713666658816, - "grad_norm": 0.8502500057220459, - "learning_rate": 1.3032669422597485e-05, - "loss": 0.3505, - "step": 8787 - }, - { - "epoch": 0.8278655707590494, - "grad_norm": 0.823781430721283, - "learning_rate": 1.3031230483263405e-05, - "loss": 0.3118, - "step": 8788 - }, - { - "epoch": 0.8279597748522173, - "grad_norm": 0.7144771218299866, - "learning_rate": 1.3029791474812576e-05, - "loss": 0.2487, - "step": 8789 - }, - { - "epoch": 0.8280539789453851, - "grad_norm": 0.8531436920166016, - "learning_rate": 1.3028352397277821e-05, - "loss": 0.2593, - "step": 8790 - }, - { - "epoch": 0.828148183038553, - "grad_norm": 0.678350031375885, - "learning_rate": 1.3026913250691943e-05, - "loss": 0.3018, - "step": 8791 - }, - { - "epoch": 0.8282423871317208, - "grad_norm": 0.749647319316864, - "learning_rate": 1.3025474035087764e-05, - "loss": 0.3294, - "step": 8792 - }, - { - "epoch": 0.8283365912248887, - "grad_norm": 0.7394024729728699, - "learning_rate": 1.30240347504981e-05, - "loss": 0.2794, - "step": 8793 - }, - { - "epoch": 0.8284307953180565, - "grad_norm": 0.7589126825332642, - "learning_rate": 1.3022595396955761e-05, - "loss": 0.3293, - "step": 8794 - }, - { - "epoch": 0.8285249994112244, - "grad_norm": 0.6819366216659546, - "learning_rate": 1.3021155974493578e-05, - "loss": 0.2904, - "step": 8795 - }, - { - "epoch": 0.8286192035043922, - "grad_norm": 0.7277462482452393, - "learning_rate": 1.3019716483144365e-05, - "loss": 0.3042, - "step": 8796 - }, - { - "epoch": 0.8287134075975601, - "grad_norm": 0.984160840511322, - "learning_rate": 1.3018276922940945e-05, - "loss": 0.3633, - "step": 8797 - }, - { - "epoch": 0.8288076116907279, - "grad_norm": 0.6747549176216125, - "learning_rate": 1.3016837293916145e-05, - "loss": 0.2695, - "step": 8798 - }, - { - "epoch": 0.8289018157838958, - "grad_norm": 0.7443104386329651, - "learning_rate": 1.3015397596102788e-05, - "loss": 0.2958, - "step": 8799 - }, - { - "epoch": 0.8289960198770636, - "grad_norm": 0.7657442092895508, - "learning_rate": 1.3013957829533702e-05, - "loss": 0.3293, - "step": 8800 - }, - { - "epoch": 0.8290902239702315, - "grad_norm": 0.7322391271591187, - "learning_rate": 1.301251799424172e-05, - "loss": 0.2865, - "step": 8801 - }, - { - "epoch": 0.8291844280633993, - "grad_norm": 0.7862289547920227, - "learning_rate": 1.3011078090259663e-05, - "loss": 0.283, - "step": 8802 - }, - { - "epoch": 0.8292786321565672, - "grad_norm": 0.7371636033058167, - "learning_rate": 1.3009638117620371e-05, - "loss": 0.3134, - "step": 8803 - }, - { - "epoch": 0.829372836249735, - "grad_norm": 0.7526691555976868, - "learning_rate": 1.3008198076356678e-05, - "loss": 0.2759, - "step": 8804 - }, - { - "epoch": 0.8294670403429029, - "grad_norm": 0.7201641201972961, - "learning_rate": 1.3006757966501415e-05, - "loss": 0.3202, - "step": 8805 - }, - { - "epoch": 0.8295612444360707, - "grad_norm": 0.7679572701454163, - "learning_rate": 1.300531778808742e-05, - "loss": 0.3211, - "step": 8806 - }, - { - "epoch": 0.8296554485292386, - "grad_norm": 0.898105263710022, - "learning_rate": 1.3003877541147532e-05, - "loss": 0.3182, - "step": 8807 - }, - { - "epoch": 0.8297496526224064, - "grad_norm": 0.7773075103759766, - "learning_rate": 1.3002437225714588e-05, - "loss": 0.3266, - "step": 8808 - }, - { - "epoch": 0.8298438567155743, - "grad_norm": 0.7979598045349121, - "learning_rate": 1.3000996841821433e-05, - "loss": 0.2846, - "step": 8809 - }, - { - "epoch": 0.8299380608087421, - "grad_norm": 0.7142004370689392, - "learning_rate": 1.2999556389500914e-05, - "loss": 0.3376, - "step": 8810 - }, - { - "epoch": 0.83003226490191, - "grad_norm": 0.7944036722183228, - "learning_rate": 1.2998115868785864e-05, - "loss": 0.3459, - "step": 8811 - }, - { - "epoch": 0.8301264689950778, - "grad_norm": 0.6650295257568359, - "learning_rate": 1.2996675279709135e-05, - "loss": 0.3046, - "step": 8812 - }, - { - "epoch": 0.8302206730882457, - "grad_norm": 0.667598307132721, - "learning_rate": 1.299523462230358e-05, - "loss": 0.2687, - "step": 8813 - }, - { - "epoch": 0.8303148771814135, - "grad_norm": 0.7344732880592346, - "learning_rate": 1.299379389660204e-05, - "loss": 0.3375, - "step": 8814 - }, - { - "epoch": 0.8304090812745814, - "grad_norm": 0.7699588537216187, - "learning_rate": 1.2992353102637372e-05, - "loss": 0.3441, - "step": 8815 - }, - { - "epoch": 0.8305032853677492, - "grad_norm": 0.9087995886802673, - "learning_rate": 1.2990912240442424e-05, - "loss": 0.2961, - "step": 8816 - }, - { - "epoch": 0.830597489460917, - "grad_norm": 0.7107911705970764, - "learning_rate": 1.298947131005005e-05, - "loss": 0.2929, - "step": 8817 - }, - { - "epoch": 0.8306916935540849, - "grad_norm": 0.7627518773078918, - "learning_rate": 1.2988030311493107e-05, - "loss": 0.2942, - "step": 8818 - }, - { - "epoch": 0.8307858976472527, - "grad_norm": 0.9217396378517151, - "learning_rate": 1.2986589244804455e-05, - "loss": 0.3642, - "step": 8819 - }, - { - "epoch": 0.8308801017404206, - "grad_norm": 0.7422599792480469, - "learning_rate": 1.2985148110016947e-05, - "loss": 0.3382, - "step": 8820 - }, - { - "epoch": 0.8309743058335884, - "grad_norm": 0.7851346135139465, - "learning_rate": 1.2983706907163447e-05, - "loss": 0.3288, - "step": 8821 - }, - { - "epoch": 0.8310685099267563, - "grad_norm": 1.0454436540603638, - "learning_rate": 1.2982265636276812e-05, - "loss": 0.3543, - "step": 8822 - }, - { - "epoch": 0.8311627140199241, - "grad_norm": 0.6407907605171204, - "learning_rate": 1.2980824297389912e-05, - "loss": 0.2743, - "step": 8823 - }, - { - "epoch": 0.831256918113092, - "grad_norm": 0.6632653474807739, - "learning_rate": 1.2979382890535606e-05, - "loss": 0.272, - "step": 8824 - }, - { - "epoch": 0.8313511222062598, - "grad_norm": 0.7241531014442444, - "learning_rate": 1.2977941415746763e-05, - "loss": 0.3037, - "step": 8825 - }, - { - "epoch": 0.8314453262994277, - "grad_norm": 1.239408254623413, - "learning_rate": 1.2976499873056248e-05, - "loss": 0.3521, - "step": 8826 - }, - { - "epoch": 0.8315395303925955, - "grad_norm": 0.8416794538497925, - "learning_rate": 1.2975058262496936e-05, - "loss": 0.3393, - "step": 8827 - }, - { - "epoch": 0.8316337344857634, - "grad_norm": 0.6784628033638, - "learning_rate": 1.2973616584101694e-05, - "loss": 0.2875, - "step": 8828 - }, - { - "epoch": 0.8317279385789312, - "grad_norm": 0.6785228848457336, - "learning_rate": 1.2972174837903392e-05, - "loss": 0.3017, - "step": 8829 - }, - { - "epoch": 0.8318221426720991, - "grad_norm": 0.9164506196975708, - "learning_rate": 1.2970733023934911e-05, - "loss": 0.3663, - "step": 8830 - }, - { - "epoch": 0.8319163467652669, - "grad_norm": 0.7244361042976379, - "learning_rate": 1.296929114222912e-05, - "loss": 0.3027, - "step": 8831 - }, - { - "epoch": 0.8320105508584348, - "grad_norm": 0.7114465236663818, - "learning_rate": 1.2967849192818899e-05, - "loss": 0.3005, - "step": 8832 - }, - { - "epoch": 0.8321047549516026, - "grad_norm": 0.6782700419425964, - "learning_rate": 1.296640717573713e-05, - "loss": 0.2951, - "step": 8833 - }, - { - "epoch": 0.8321989590447705, - "grad_norm": 0.8049957156181335, - "learning_rate": 1.2964965091016687e-05, - "loss": 0.2946, - "step": 8834 - }, - { - "epoch": 0.8322931631379383, - "grad_norm": 0.7354930639266968, - "learning_rate": 1.2963522938690454e-05, - "loss": 0.3217, - "step": 8835 - }, - { - "epoch": 0.8323873672311062, - "grad_norm": 0.78761887550354, - "learning_rate": 1.2962080718791316e-05, - "loss": 0.3156, - "step": 8836 - }, - { - "epoch": 0.832481571324274, - "grad_norm": 0.7231392860412598, - "learning_rate": 1.2960638431352155e-05, - "loss": 0.2999, - "step": 8837 - }, - { - "epoch": 0.8325757754174419, - "grad_norm": 0.8216593265533447, - "learning_rate": 1.295919607640586e-05, - "loss": 0.3143, - "step": 8838 - }, - { - "epoch": 0.8326699795106097, - "grad_norm": 0.7374890446662903, - "learning_rate": 1.2957753653985319e-05, - "loss": 0.3323, - "step": 8839 - }, - { - "epoch": 0.8327641836037776, - "grad_norm": 0.7227234840393066, - "learning_rate": 1.2956311164123421e-05, - "loss": 0.2892, - "step": 8840 - }, - { - "epoch": 0.8328583876969454, - "grad_norm": 0.9002649188041687, - "learning_rate": 1.2954868606853058e-05, - "loss": 0.364, - "step": 8841 - }, - { - "epoch": 0.8329525917901133, - "grad_norm": 0.6368039846420288, - "learning_rate": 1.2953425982207116e-05, - "loss": 0.2662, - "step": 8842 - }, - { - "epoch": 0.8330467958832811, - "grad_norm": 0.6957955360412598, - "learning_rate": 1.2951983290218495e-05, - "loss": 0.3073, - "step": 8843 - }, - { - "epoch": 0.833140999976449, - "grad_norm": 0.6631066799163818, - "learning_rate": 1.2950540530920092e-05, - "loss": 0.2702, - "step": 8844 - }, - { - "epoch": 0.8332352040696168, - "grad_norm": 0.6305803060531616, - "learning_rate": 1.2949097704344802e-05, - "loss": 0.3124, - "step": 8845 - }, - { - "epoch": 0.8333294081627847, - "grad_norm": 0.7423282861709595, - "learning_rate": 1.2947654810525521e-05, - "loss": 0.3166, - "step": 8846 - }, - { - "epoch": 0.8334236122559525, - "grad_norm": 0.8736042380332947, - "learning_rate": 1.2946211849495152e-05, - "loss": 0.3018, - "step": 8847 - }, - { - "epoch": 0.8335178163491204, - "grad_norm": 0.7889606952667236, - "learning_rate": 1.2944768821286597e-05, - "loss": 0.2897, - "step": 8848 - }, - { - "epoch": 0.8336120204422882, - "grad_norm": 0.6764425039291382, - "learning_rate": 1.2943325725932759e-05, - "loss": 0.2907, - "step": 8849 - }, - { - "epoch": 0.8337062245354561, - "grad_norm": 0.6257145404815674, - "learning_rate": 1.2941882563466543e-05, - "loss": 0.2429, - "step": 8850 - }, - { - "epoch": 0.8338004286286239, - "grad_norm": 0.6930012106895447, - "learning_rate": 1.2940439333920853e-05, - "loss": 0.3016, - "step": 8851 - }, - { - "epoch": 0.8338946327217918, - "grad_norm": 0.8111011981964111, - "learning_rate": 1.2938996037328601e-05, - "loss": 0.2841, - "step": 8852 - }, - { - "epoch": 0.8339888368149596, - "grad_norm": 0.7202403545379639, - "learning_rate": 1.2937552673722695e-05, - "loss": 0.3125, - "step": 8853 - }, - { - "epoch": 0.8340830409081275, - "grad_norm": 0.7113397717475891, - "learning_rate": 1.2936109243136041e-05, - "loss": 0.2947, - "step": 8854 - }, - { - "epoch": 0.8341772450012953, - "grad_norm": 0.7299506068229675, - "learning_rate": 1.2934665745601557e-05, - "loss": 0.2489, - "step": 8855 - }, - { - "epoch": 0.8342714490944632, - "grad_norm": 0.7287810444831848, - "learning_rate": 1.2933222181152158e-05, - "loss": 0.3338, - "step": 8856 - }, - { - "epoch": 0.834365653187631, - "grad_norm": 0.7398520112037659, - "learning_rate": 1.2931778549820753e-05, - "loss": 0.2905, - "step": 8857 - }, - { - "epoch": 0.8344598572807989, - "grad_norm": 0.8753147125244141, - "learning_rate": 1.2930334851640268e-05, - "loss": 0.3433, - "step": 8858 - }, - { - "epoch": 0.8345540613739667, - "grad_norm": 0.7349756360054016, - "learning_rate": 1.2928891086643611e-05, - "loss": 0.3046, - "step": 8859 - }, - { - "epoch": 0.8346482654671346, - "grad_norm": 0.7209146618843079, - "learning_rate": 1.2927447254863712e-05, - "loss": 0.3194, - "step": 8860 - }, - { - "epoch": 0.8347424695603024, - "grad_norm": 0.7802855372428894, - "learning_rate": 1.2926003356333487e-05, - "loss": 0.3245, - "step": 8861 - }, - { - "epoch": 0.8348366736534703, - "grad_norm": 0.781139075756073, - "learning_rate": 1.2924559391085858e-05, - "loss": 0.338, - "step": 8862 - }, - { - "epoch": 0.8349308777466381, - "grad_norm": 0.7366265654563904, - "learning_rate": 1.2923115359153755e-05, - "loss": 0.3095, - "step": 8863 - }, - { - "epoch": 0.835025081839806, - "grad_norm": 0.7250460386276245, - "learning_rate": 1.2921671260570099e-05, - "loss": 0.29, - "step": 8864 - }, - { - "epoch": 0.8351192859329738, - "grad_norm": 0.6194658279418945, - "learning_rate": 1.2920227095367822e-05, - "loss": 0.2863, - "step": 8865 - }, - { - "epoch": 0.8352134900261416, - "grad_norm": 0.788934588432312, - "learning_rate": 1.2918782863579846e-05, - "loss": 0.29, - "step": 8866 - }, - { - "epoch": 0.8353076941193095, - "grad_norm": 0.8553847074508667, - "learning_rate": 1.2917338565239112e-05, - "loss": 0.3084, - "step": 8867 - }, - { - "epoch": 0.8354018982124773, - "grad_norm": 0.722250759601593, - "learning_rate": 1.2915894200378547e-05, - "loss": 0.302, - "step": 8868 - }, - { - "epoch": 0.8354961023056452, - "grad_norm": 0.6709815263748169, - "learning_rate": 1.2914449769031081e-05, - "loss": 0.2819, - "step": 8869 - }, - { - "epoch": 0.835590306398813, - "grad_norm": 0.6778731346130371, - "learning_rate": 1.2913005271229657e-05, - "loss": 0.2895, - "step": 8870 - }, - { - "epoch": 0.8356845104919809, - "grad_norm": 0.7274365425109863, - "learning_rate": 1.2911560707007204e-05, - "loss": 0.32, - "step": 8871 - }, - { - "epoch": 0.8357787145851487, - "grad_norm": 0.7306736707687378, - "learning_rate": 1.2910116076396669e-05, - "loss": 0.3174, - "step": 8872 - }, - { - "epoch": 0.8358729186783166, - "grad_norm": 0.7256240248680115, - "learning_rate": 1.2908671379430982e-05, - "loss": 0.2735, - "step": 8873 - }, - { - "epoch": 0.8359671227714844, - "grad_norm": 0.656222939491272, - "learning_rate": 1.2907226616143089e-05, - "loss": 0.3148, - "step": 8874 - }, - { - "epoch": 0.8360613268646523, - "grad_norm": 0.7254332304000854, - "learning_rate": 1.2905781786565936e-05, - "loss": 0.3441, - "step": 8875 - }, - { - "epoch": 0.8361555309578201, - "grad_norm": 0.637316107749939, - "learning_rate": 1.2904336890732462e-05, - "loss": 0.2629, - "step": 8876 - }, - { - "epoch": 0.836249735050988, - "grad_norm": 0.7101510763168335, - "learning_rate": 1.2902891928675616e-05, - "loss": 0.3213, - "step": 8877 - }, - { - "epoch": 0.8363439391441558, - "grad_norm": 0.6917797327041626, - "learning_rate": 1.290144690042835e-05, - "loss": 0.3181, - "step": 8878 - }, - { - "epoch": 0.8364381432373237, - "grad_norm": 0.7629390358924866, - "learning_rate": 1.29000018060236e-05, - "loss": 0.286, - "step": 8879 - }, - { - "epoch": 0.8365323473304915, - "grad_norm": 0.663982093334198, - "learning_rate": 1.2898556645494327e-05, - "loss": 0.2563, - "step": 8880 - }, - { - "epoch": 0.8366265514236594, - "grad_norm": 0.7203670144081116, - "learning_rate": 1.289711141887348e-05, - "loss": 0.3328, - "step": 8881 - }, - { - "epoch": 0.8367207555168272, - "grad_norm": 0.8644741773605347, - "learning_rate": 1.2895666126194009e-05, - "loss": 0.3524, - "step": 8882 - }, - { - "epoch": 0.8368149596099951, - "grad_norm": 0.7667208909988403, - "learning_rate": 1.2894220767488877e-05, - "loss": 0.3135, - "step": 8883 - }, - { - "epoch": 0.8369091637031629, - "grad_norm": 0.7790444493293762, - "learning_rate": 1.2892775342791033e-05, - "loss": 0.2839, - "step": 8884 - }, - { - "epoch": 0.8370033677963308, - "grad_norm": 0.6937950849533081, - "learning_rate": 1.2891329852133438e-05, - "loss": 0.2866, - "step": 8885 - }, - { - "epoch": 0.8370975718894986, - "grad_norm": 0.6426435708999634, - "learning_rate": 1.288988429554905e-05, - "loss": 0.3224, - "step": 8886 - }, - { - "epoch": 0.8371917759826665, - "grad_norm": 0.7044891119003296, - "learning_rate": 1.288843867307083e-05, - "loss": 0.2921, - "step": 8887 - }, - { - "epoch": 0.8372859800758343, - "grad_norm": 0.7194440960884094, - "learning_rate": 1.2886992984731743e-05, - "loss": 0.2857, - "step": 8888 - }, - { - "epoch": 0.8373801841690022, - "grad_norm": 0.746876060962677, - "learning_rate": 1.2885547230564748e-05, - "loss": 0.2876, - "step": 8889 - }, - { - "epoch": 0.83747438826217, - "grad_norm": 0.9977391362190247, - "learning_rate": 1.2884101410602821e-05, - "loss": 0.373, - "step": 8890 - }, - { - "epoch": 0.8375685923553379, - "grad_norm": 0.662267804145813, - "learning_rate": 1.2882655524878914e-05, - "loss": 0.2887, - "step": 8891 - }, - { - "epoch": 0.8376627964485057, - "grad_norm": 0.6523311138153076, - "learning_rate": 1.2881209573426005e-05, - "loss": 0.2587, - "step": 8892 - }, - { - "epoch": 0.8377570005416736, - "grad_norm": 0.7460758686065674, - "learning_rate": 1.2879763556277062e-05, - "loss": 0.3022, - "step": 8893 - }, - { - "epoch": 0.8378512046348414, - "grad_norm": 0.7284391522407532, - "learning_rate": 1.2878317473465056e-05, - "loss": 0.3235, - "step": 8894 - }, - { - "epoch": 0.8379454087280093, - "grad_norm": 0.6337546706199646, - "learning_rate": 1.2876871325022962e-05, - "loss": 0.3178, - "step": 8895 - }, - { - "epoch": 0.8380396128211771, - "grad_norm": 0.7316828370094299, - "learning_rate": 1.2875425110983753e-05, - "loss": 0.3159, - "step": 8896 - }, - { - "epoch": 0.838133816914345, - "grad_norm": 0.6527437567710876, - "learning_rate": 1.28739788313804e-05, - "loss": 0.3163, - "step": 8897 - }, - { - "epoch": 0.8382280210075128, - "grad_norm": 0.649264395236969, - "learning_rate": 1.287253248624589e-05, - "loss": 0.2581, - "step": 8898 - }, - { - "epoch": 0.8383222251006807, - "grad_norm": 0.9922779202461243, - "learning_rate": 1.2871086075613196e-05, - "loss": 0.3281, - "step": 8899 - }, - { - "epoch": 0.8384164291938485, - "grad_norm": 0.7026650309562683, - "learning_rate": 1.2869639599515295e-05, - "loss": 0.3276, - "step": 8900 - }, - { - "epoch": 0.8385106332870164, - "grad_norm": 0.7148386240005493, - "learning_rate": 1.286819305798518e-05, - "loss": 0.3371, - "step": 8901 - }, - { - "epoch": 0.8386048373801842, - "grad_norm": 0.6871752142906189, - "learning_rate": 1.2866746451055821e-05, - "loss": 0.315, - "step": 8902 - }, - { - "epoch": 0.8386990414733521, - "grad_norm": 0.7981586456298828, - "learning_rate": 1.2865299778760212e-05, - "loss": 0.3586, - "step": 8903 - }, - { - "epoch": 0.8387932455665199, - "grad_norm": 0.8237510323524475, - "learning_rate": 1.2863853041131338e-05, - "loss": 0.346, - "step": 8904 - }, - { - "epoch": 0.8388874496596878, - "grad_norm": 0.7409459948539734, - "learning_rate": 1.2862406238202186e-05, - "loss": 0.3541, - "step": 8905 - }, - { - "epoch": 0.8389816537528556, - "grad_norm": 0.768153965473175, - "learning_rate": 1.286095937000574e-05, - "loss": 0.293, - "step": 8906 - }, - { - "epoch": 0.8390758578460235, - "grad_norm": 0.6621012091636658, - "learning_rate": 1.2859512436574998e-05, - "loss": 0.2943, - "step": 8907 - }, - { - "epoch": 0.8391700619391913, - "grad_norm": 0.6803906559944153, - "learning_rate": 1.2858065437942955e-05, - "loss": 0.2384, - "step": 8908 - }, - { - "epoch": 0.8392642660323592, - "grad_norm": 0.6534746289253235, - "learning_rate": 1.2856618374142594e-05, - "loss": 0.2841, - "step": 8909 - }, - { - "epoch": 0.839358470125527, - "grad_norm": 0.7702908515930176, - "learning_rate": 1.2855171245206922e-05, - "loss": 0.3456, - "step": 8910 - }, - { - "epoch": 0.8394526742186948, - "grad_norm": 0.7710719704627991, - "learning_rate": 1.2853724051168922e-05, - "loss": 0.3285, - "step": 8911 - }, - { - "epoch": 0.8395468783118627, - "grad_norm": 0.6994277238845825, - "learning_rate": 1.2852276792061607e-05, - "loss": 0.3521, - "step": 8912 - }, - { - "epoch": 0.8396410824050305, - "grad_norm": 0.7324875593185425, - "learning_rate": 1.285082946791797e-05, - "loss": 0.3128, - "step": 8913 - }, - { - "epoch": 0.8397352864981984, - "grad_norm": 0.7418799996376038, - "learning_rate": 1.2849382078771006e-05, - "loss": 0.338, - "step": 8914 - }, - { - "epoch": 0.8398294905913662, - "grad_norm": 0.7456151247024536, - "learning_rate": 1.2847934624653728e-05, - "loss": 0.2961, - "step": 8915 - }, - { - "epoch": 0.8399236946845341, - "grad_norm": 0.7044605016708374, - "learning_rate": 1.284648710559914e-05, - "loss": 0.3607, - "step": 8916 - }, - { - "epoch": 0.8400178987777019, - "grad_norm": 0.7057334184646606, - "learning_rate": 1.2845039521640236e-05, - "loss": 0.3021, - "step": 8917 - }, - { - "epoch": 0.8401121028708698, - "grad_norm": 0.6334710121154785, - "learning_rate": 1.2843591872810039e-05, - "loss": 0.267, - "step": 8918 - }, - { - "epoch": 0.8402063069640376, - "grad_norm": 0.6703156232833862, - "learning_rate": 1.2842144159141543e-05, - "loss": 0.2582, - "step": 8919 - }, - { - "epoch": 0.8403005110572055, - "grad_norm": 0.8019157648086548, - "learning_rate": 1.2840696380667768e-05, - "loss": 0.3043, - "step": 8920 - }, - { - "epoch": 0.8403947151503732, - "grad_norm": 0.7276331782341003, - "learning_rate": 1.2839248537421722e-05, - "loss": 0.2852, - "step": 8921 - }, - { - "epoch": 0.8404889192435411, - "grad_norm": 0.6563223004341125, - "learning_rate": 1.2837800629436417e-05, - "loss": 0.2811, - "step": 8922 - }, - { - "epoch": 0.8405831233367089, - "grad_norm": 0.7013585567474365, - "learning_rate": 1.283635265674487e-05, - "loss": 0.3154, - "step": 8923 - }, - { - "epoch": 0.8406773274298768, - "grad_norm": 0.6967423558235168, - "learning_rate": 1.2834904619380097e-05, - "loss": 0.283, - "step": 8924 - }, - { - "epoch": 0.8407715315230446, - "grad_norm": 0.757985532283783, - "learning_rate": 1.2833456517375113e-05, - "loss": 0.2977, - "step": 8925 - }, - { - "epoch": 0.8408657356162125, - "grad_norm": 0.6783421635627747, - "learning_rate": 1.2832008350762937e-05, - "loss": 0.3367, - "step": 8926 - }, - { - "epoch": 0.8409599397093803, - "grad_norm": 0.6910302639007568, - "learning_rate": 1.2830560119576591e-05, - "loss": 0.3217, - "step": 8927 - }, - { - "epoch": 0.8410541438025482, - "grad_norm": 0.6359944939613342, - "learning_rate": 1.28291118238491e-05, - "loss": 0.2843, - "step": 8928 - }, - { - "epoch": 0.841148347895716, - "grad_norm": 0.8597601056098938, - "learning_rate": 1.2827663463613482e-05, - "loss": 0.3409, - "step": 8929 - }, - { - "epoch": 0.8412425519888839, - "grad_norm": 0.690007746219635, - "learning_rate": 1.2826215038902765e-05, - "loss": 0.3109, - "step": 8930 - }, - { - "epoch": 0.8413367560820517, - "grad_norm": 0.7544068694114685, - "learning_rate": 1.2824766549749972e-05, - "loss": 0.3121, - "step": 8931 - }, - { - "epoch": 0.8414309601752196, - "grad_norm": 0.6776837706565857, - "learning_rate": 1.2823317996188136e-05, - "loss": 0.3134, - "step": 8932 - }, - { - "epoch": 0.8415251642683874, - "grad_norm": 0.7241583466529846, - "learning_rate": 1.2821869378250283e-05, - "loss": 0.2893, - "step": 8933 - }, - { - "epoch": 0.8416193683615553, - "grad_norm": 0.6826990246772766, - "learning_rate": 1.282042069596944e-05, - "loss": 0.2992, - "step": 8934 - }, - { - "epoch": 0.8417135724547231, - "grad_norm": 0.7980566620826721, - "learning_rate": 1.2818971949378647e-05, - "loss": 0.3343, - "step": 8935 - }, - { - "epoch": 0.841807776547891, - "grad_norm": 0.7511221766471863, - "learning_rate": 1.2817523138510934e-05, - "loss": 0.2972, - "step": 8936 - }, - { - "epoch": 0.8419019806410588, - "grad_norm": 0.7395924925804138, - "learning_rate": 1.2816074263399335e-05, - "loss": 0.3441, - "step": 8937 - }, - { - "epoch": 0.8419961847342267, - "grad_norm": 0.6874242424964905, - "learning_rate": 1.281462532407689e-05, - "loss": 0.2941, - "step": 8938 - }, - { - "epoch": 0.8420903888273945, - "grad_norm": 0.7330230474472046, - "learning_rate": 1.281317632057663e-05, - "loss": 0.3146, - "step": 8939 - }, - { - "epoch": 0.8421845929205624, - "grad_norm": 0.6668033599853516, - "learning_rate": 1.2811727252931602e-05, - "loss": 0.2824, - "step": 8940 - }, - { - "epoch": 0.8422787970137302, - "grad_norm": 0.7982563972473145, - "learning_rate": 1.2810278121174844e-05, - "loss": 0.2901, - "step": 8941 - }, - { - "epoch": 0.8423730011068981, - "grad_norm": 0.6179084777832031, - "learning_rate": 1.2808828925339398e-05, - "loss": 0.2847, - "step": 8942 - }, - { - "epoch": 0.8424672052000659, - "grad_norm": 0.8197647333145142, - "learning_rate": 1.280737966545831e-05, - "loss": 0.3355, - "step": 8943 - }, - { - "epoch": 0.8425614092932338, - "grad_norm": 0.6531780958175659, - "learning_rate": 1.2805930341564622e-05, - "loss": 0.2772, - "step": 8944 - }, - { - "epoch": 0.8426556133864016, - "grad_norm": 0.6549702286720276, - "learning_rate": 1.2804480953691388e-05, - "loss": 0.2578, - "step": 8945 - }, - { - "epoch": 0.8427498174795695, - "grad_norm": 0.7435251474380493, - "learning_rate": 1.2803031501871643e-05, - "loss": 0.2965, - "step": 8946 - }, - { - "epoch": 0.8428440215727373, - "grad_norm": 0.8284091353416443, - "learning_rate": 1.280158198613845e-05, - "loss": 0.3444, - "step": 8947 - }, - { - "epoch": 0.8429382256659051, - "grad_norm": 0.8054177165031433, - "learning_rate": 1.2800132406524854e-05, - "loss": 0.3605, - "step": 8948 - }, - { - "epoch": 0.843032429759073, - "grad_norm": 0.7610667943954468, - "learning_rate": 1.2798682763063907e-05, - "loss": 0.2951, - "step": 8949 - }, - { - "epoch": 0.8431266338522408, - "grad_norm": 0.8656819462776184, - "learning_rate": 1.279723305578867e-05, - "loss": 0.3427, - "step": 8950 - }, - { - "epoch": 0.8432208379454087, - "grad_norm": 0.7053451538085938, - "learning_rate": 1.2795783284732186e-05, - "loss": 0.2875, - "step": 8951 - }, - { - "epoch": 0.8433150420385765, - "grad_norm": 0.7782831192016602, - "learning_rate": 1.2794333449927522e-05, - "loss": 0.3648, - "step": 8952 - }, - { - "epoch": 0.8434092461317444, - "grad_norm": 0.6942363381385803, - "learning_rate": 1.2792883551407738e-05, - "loss": 0.2661, - "step": 8953 - }, - { - "epoch": 0.8435034502249122, - "grad_norm": 0.7131900191307068, - "learning_rate": 1.2791433589205884e-05, - "loss": 0.2847, - "step": 8954 - }, - { - "epoch": 0.8435976543180801, - "grad_norm": 0.7252687811851501, - "learning_rate": 1.2789983563355031e-05, - "loss": 0.2764, - "step": 8955 - }, - { - "epoch": 0.8436918584112479, - "grad_norm": 0.7174544334411621, - "learning_rate": 1.2788533473888235e-05, - "loss": 0.3268, - "step": 8956 - }, - { - "epoch": 0.8437860625044158, - "grad_norm": 0.6812805533409119, - "learning_rate": 1.2787083320838566e-05, - "loss": 0.2781, - "step": 8957 - }, - { - "epoch": 0.8438802665975836, - "grad_norm": 0.7570432424545288, - "learning_rate": 1.2785633104239085e-05, - "loss": 0.3372, - "step": 8958 - }, - { - "epoch": 0.8439744706907515, - "grad_norm": 0.7210245132446289, - "learning_rate": 1.2784182824122862e-05, - "loss": 0.3028, - "step": 8959 - }, - { - "epoch": 0.8440686747839193, - "grad_norm": 0.8742713332176208, - "learning_rate": 1.2782732480522966e-05, - "loss": 0.309, - "step": 8960 - }, - { - "epoch": 0.8441628788770872, - "grad_norm": 0.9560702443122864, - "learning_rate": 1.2781282073472463e-05, - "loss": 0.3315, - "step": 8961 - }, - { - "epoch": 0.844257082970255, - "grad_norm": 0.8701736330986023, - "learning_rate": 1.2779831603004426e-05, - "loss": 0.2895, - "step": 8962 - }, - { - "epoch": 0.8443512870634229, - "grad_norm": 0.6817653775215149, - "learning_rate": 1.2778381069151935e-05, - "loss": 0.283, - "step": 8963 - }, - { - "epoch": 0.8444454911565907, - "grad_norm": 0.674261212348938, - "learning_rate": 1.2776930471948057e-05, - "loss": 0.2854, - "step": 8964 - }, - { - "epoch": 0.8445396952497586, - "grad_norm": 0.7767412662506104, - "learning_rate": 1.2775479811425868e-05, - "loss": 0.3446, - "step": 8965 - }, - { - "epoch": 0.8446338993429264, - "grad_norm": 0.7883347272872925, - "learning_rate": 1.2774029087618448e-05, - "loss": 0.3551, - "step": 8966 - }, - { - "epoch": 0.8447281034360943, - "grad_norm": 0.7268330454826355, - "learning_rate": 1.2772578300558874e-05, - "loss": 0.2994, - "step": 8967 - }, - { - "epoch": 0.8448223075292621, - "grad_norm": 1.1079233884811401, - "learning_rate": 1.2771127450280227e-05, - "loss": 0.3112, - "step": 8968 - }, - { - "epoch": 0.84491651162243, - "grad_norm": 0.815112292766571, - "learning_rate": 1.2769676536815589e-05, - "loss": 0.3286, - "step": 8969 - }, - { - "epoch": 0.8450107157155978, - "grad_norm": 0.7777073383331299, - "learning_rate": 1.2768225560198043e-05, - "loss": 0.3416, - "step": 8970 - }, - { - "epoch": 0.8451049198087657, - "grad_norm": 0.7789881229400635, - "learning_rate": 1.2766774520460672e-05, - "loss": 0.3076, - "step": 8971 - }, - { - "epoch": 0.8451991239019335, - "grad_norm": 0.6999005675315857, - "learning_rate": 1.2765323417636561e-05, - "loss": 0.3362, - "step": 8972 - }, - { - "epoch": 0.8452933279951014, - "grad_norm": 0.7440067529678345, - "learning_rate": 1.2763872251758804e-05, - "loss": 0.3184, - "step": 8973 - }, - { - "epoch": 0.8453875320882692, - "grad_norm": 0.6921179294586182, - "learning_rate": 1.276242102286048e-05, - "loss": 0.3165, - "step": 8974 - }, - { - "epoch": 0.8454817361814371, - "grad_norm": 0.7694404125213623, - "learning_rate": 1.2760969730974692e-05, - "loss": 0.3311, - "step": 8975 - }, - { - "epoch": 0.8455759402746049, - "grad_norm": 0.7318397760391235, - "learning_rate": 1.2759518376134516e-05, - "loss": 0.3249, - "step": 8976 - }, - { - "epoch": 0.8456701443677728, - "grad_norm": 0.7305333018302917, - "learning_rate": 1.2758066958373056e-05, - "loss": 0.2975, - "step": 8977 - }, - { - "epoch": 0.8457643484609406, - "grad_norm": 0.8558382391929626, - "learning_rate": 1.2756615477723408e-05, - "loss": 0.3352, - "step": 8978 - }, - { - "epoch": 0.8458585525541085, - "grad_norm": 0.7120863199234009, - "learning_rate": 1.275516393421866e-05, - "loss": 0.2938, - "step": 8979 - }, - { - "epoch": 0.8459527566472763, - "grad_norm": 0.7065699100494385, - "learning_rate": 1.2753712327891915e-05, - "loss": 0.301, - "step": 8980 - }, - { - "epoch": 0.8460469607404442, - "grad_norm": 0.7297618985176086, - "learning_rate": 1.275226065877627e-05, - "loss": 0.2715, - "step": 8981 - }, - { - "epoch": 0.846141164833612, - "grad_norm": 0.7805727124214172, - "learning_rate": 1.2750808926904822e-05, - "loss": 0.3078, - "step": 8982 - }, - { - "epoch": 0.8462353689267799, - "grad_norm": 0.6316404938697815, - "learning_rate": 1.2749357132310683e-05, - "loss": 0.3178, - "step": 8983 - }, - { - "epoch": 0.8463295730199477, - "grad_norm": 0.7475721836090088, - "learning_rate": 1.2747905275026943e-05, - "loss": 0.2786, - "step": 8984 - }, - { - "epoch": 0.8464237771131156, - "grad_norm": 0.8101813793182373, - "learning_rate": 1.2746453355086719e-05, - "loss": 0.33, - "step": 8985 - }, - { - "epoch": 0.8465179812062834, - "grad_norm": 0.9295817017555237, - "learning_rate": 1.2745001372523105e-05, - "loss": 0.3421, - "step": 8986 - }, - { - "epoch": 0.8466121852994513, - "grad_norm": 0.7078453302383423, - "learning_rate": 1.2743549327369218e-05, - "loss": 0.3213, - "step": 8987 - }, - { - "epoch": 0.8467063893926191, - "grad_norm": 0.7733709812164307, - "learning_rate": 1.2742097219658162e-05, - "loss": 0.3138, - "step": 8988 - }, - { - "epoch": 0.846800593485787, - "grad_norm": 0.7479760050773621, - "learning_rate": 1.274064504942305e-05, - "loss": 0.3532, - "step": 8989 - }, - { - "epoch": 0.8468947975789548, - "grad_norm": 0.7153126001358032, - "learning_rate": 1.2739192816696992e-05, - "loss": 0.3314, - "step": 8990 - }, - { - "epoch": 0.8469890016721227, - "grad_norm": 0.7419144511222839, - "learning_rate": 1.27377405215131e-05, - "loss": 0.3355, - "step": 8991 - }, - { - "epoch": 0.8470832057652905, - "grad_norm": 0.6762439608573914, - "learning_rate": 1.2736288163904493e-05, - "loss": 0.3082, - "step": 8992 - }, - { - "epoch": 0.8471774098584584, - "grad_norm": 0.8105480074882507, - "learning_rate": 1.2734835743904283e-05, - "loss": 0.3407, - "step": 8993 - }, - { - "epoch": 0.8472716139516262, - "grad_norm": 0.6770057082176208, - "learning_rate": 1.2733383261545586e-05, - "loss": 0.2747, - "step": 8994 - }, - { - "epoch": 0.847365818044794, - "grad_norm": 0.7751746773719788, - "learning_rate": 1.2731930716861527e-05, - "loss": 0.3293, - "step": 8995 - }, - { - "epoch": 0.8474600221379619, - "grad_norm": 0.8555806279182434, - "learning_rate": 1.2730478109885221e-05, - "loss": 0.3222, - "step": 8996 - }, - { - "epoch": 0.8475542262311297, - "grad_norm": 0.7186029553413391, - "learning_rate": 1.272902544064979e-05, - "loss": 0.299, - "step": 8997 - }, - { - "epoch": 0.8476484303242976, - "grad_norm": 0.8223687410354614, - "learning_rate": 1.2727572709188362e-05, - "loss": 0.3534, - "step": 8998 - }, - { - "epoch": 0.8477426344174654, - "grad_norm": 0.958514392375946, - "learning_rate": 1.2726119915534054e-05, - "loss": 0.323, - "step": 8999 - }, - { - "epoch": 0.8478368385106333, - "grad_norm": 0.6684522032737732, - "learning_rate": 1.2724667059719997e-05, - "loss": 0.2721, - "step": 9000 - }, - { - "epoch": 0.8479310426038011, - "grad_norm": 0.6773232221603394, - "learning_rate": 1.272321414177932e-05, - "loss": 0.3213, - "step": 9001 - }, - { - "epoch": 0.848025246696969, - "grad_norm": 0.8274404406547546, - "learning_rate": 1.2721761161745145e-05, - "loss": 0.2966, - "step": 9002 - }, - { - "epoch": 0.8481194507901368, - "grad_norm": 0.7804051041603088, - "learning_rate": 1.2720308119650608e-05, - "loss": 0.3115, - "step": 9003 - }, - { - "epoch": 0.8482136548833047, - "grad_norm": 0.779718816280365, - "learning_rate": 1.271885501552884e-05, - "loss": 0.3141, - "step": 9004 - }, - { - "epoch": 0.8483078589764725, - "grad_norm": 0.6873429417610168, - "learning_rate": 1.2717401849412972e-05, - "loss": 0.2849, - "step": 9005 - }, - { - "epoch": 0.8484020630696404, - "grad_norm": 0.7158425450325012, - "learning_rate": 1.2715948621336139e-05, - "loss": 0.2973, - "step": 9006 - }, - { - "epoch": 0.8484962671628082, - "grad_norm": 0.7210104465484619, - "learning_rate": 1.2714495331331475e-05, - "loss": 0.305, - "step": 9007 - }, - { - "epoch": 0.8485904712559761, - "grad_norm": 0.7635478973388672, - "learning_rate": 1.2713041979432124e-05, - "loss": 0.32, - "step": 9008 - }, - { - "epoch": 0.8486846753491439, - "grad_norm": 0.660316526889801, - "learning_rate": 1.2711588565671217e-05, - "loss": 0.2652, - "step": 9009 - }, - { - "epoch": 0.8487788794423118, - "grad_norm": 0.7859675288200378, - "learning_rate": 1.27101350900819e-05, - "loss": 0.317, - "step": 9010 - }, - { - "epoch": 0.8488730835354796, - "grad_norm": 0.6476109623908997, - "learning_rate": 1.2708681552697306e-05, - "loss": 0.2452, - "step": 9011 - }, - { - "epoch": 0.8489672876286475, - "grad_norm": 0.7139787077903748, - "learning_rate": 1.270722795355059e-05, - "loss": 0.287, - "step": 9012 - }, - { - "epoch": 0.8490614917218153, - "grad_norm": 0.8316739797592163, - "learning_rate": 1.2705774292674886e-05, - "loss": 0.3582, - "step": 9013 - }, - { - "epoch": 0.8491556958149832, - "grad_norm": 0.6787524819374084, - "learning_rate": 1.2704320570103343e-05, - "loss": 0.3222, - "step": 9014 - }, - { - "epoch": 0.849249899908151, - "grad_norm": 0.7387852668762207, - "learning_rate": 1.2702866785869112e-05, - "loss": 0.3029, - "step": 9015 - }, - { - "epoch": 0.8493441040013189, - "grad_norm": 0.6867208480834961, - "learning_rate": 1.2701412940005335e-05, - "loss": 0.286, - "step": 9016 - }, - { - "epoch": 0.8494383080944867, - "grad_norm": 0.7119094133377075, - "learning_rate": 1.2699959032545164e-05, - "loss": 0.2965, - "step": 9017 - }, - { - "epoch": 0.8495325121876546, - "grad_norm": 0.7823882102966309, - "learning_rate": 1.2698505063521758e-05, - "loss": 0.3153, - "step": 9018 - }, - { - "epoch": 0.8496267162808224, - "grad_norm": 0.6950855851173401, - "learning_rate": 1.2697051032968257e-05, - "loss": 0.2875, - "step": 9019 - }, - { - "epoch": 0.8497209203739903, - "grad_norm": 0.7047410011291504, - "learning_rate": 1.2695596940917825e-05, - "loss": 0.2879, - "step": 9020 - }, - { - "epoch": 0.8498151244671581, - "grad_norm": 0.6160603761672974, - "learning_rate": 1.2694142787403614e-05, - "loss": 0.3105, - "step": 9021 - }, - { - "epoch": 0.849909328560326, - "grad_norm": 0.6546329259872437, - "learning_rate": 1.2692688572458776e-05, - "loss": 0.2806, - "step": 9022 - }, - { - "epoch": 0.8500035326534938, - "grad_norm": 0.6774280667304993, - "learning_rate": 1.2691234296116479e-05, - "loss": 0.2833, - "step": 9023 - }, - { - "epoch": 0.8500977367466617, - "grad_norm": 0.664818286895752, - "learning_rate": 1.2689779958409876e-05, - "loss": 0.2724, - "step": 9024 - }, - { - "epoch": 0.8501919408398295, - "grad_norm": 0.7551313042640686, - "learning_rate": 1.2688325559372132e-05, - "loss": 0.3082, - "step": 9025 - }, - { - "epoch": 0.8502861449329974, - "grad_norm": 0.8147274255752563, - "learning_rate": 1.2686871099036405e-05, - "loss": 0.3745, - "step": 9026 - }, - { - "epoch": 0.8503803490261652, - "grad_norm": 0.5757870674133301, - "learning_rate": 1.2685416577435863e-05, - "loss": 0.2709, - "step": 9027 - }, - { - "epoch": 0.8504745531193331, - "grad_norm": 0.7894459962844849, - "learning_rate": 1.268396199460367e-05, - "loss": 0.2726, - "step": 9028 - }, - { - "epoch": 0.8505687572125009, - "grad_norm": 0.6999770402908325, - "learning_rate": 1.2682507350572995e-05, - "loss": 0.2746, - "step": 9029 - }, - { - "epoch": 0.8506629613056688, - "grad_norm": 0.7287282943725586, - "learning_rate": 1.2681052645377001e-05, - "loss": 0.3125, - "step": 9030 - }, - { - "epoch": 0.8507571653988366, - "grad_norm": 0.7133800983428955, - "learning_rate": 1.267959787904886e-05, - "loss": 0.3326, - "step": 9031 - }, - { - "epoch": 0.8508513694920045, - "grad_norm": 1.2192795276641846, - "learning_rate": 1.2678143051621743e-05, - "loss": 0.3675, - "step": 9032 - }, - { - "epoch": 0.8509455735851723, - "grad_norm": 0.6774299144744873, - "learning_rate": 1.2676688163128826e-05, - "loss": 0.305, - "step": 9033 - }, - { - "epoch": 0.8510397776783402, - "grad_norm": 0.6948447823524475, - "learning_rate": 1.2675233213603275e-05, - "loss": 0.295, - "step": 9034 - }, - { - "epoch": 0.851133981771508, - "grad_norm": 0.6044574975967407, - "learning_rate": 1.2673778203078274e-05, - "loss": 0.298, - "step": 9035 - }, - { - "epoch": 0.8512281858646759, - "grad_norm": 0.7736443281173706, - "learning_rate": 1.267232313158699e-05, - "loss": 0.3249, - "step": 9036 - }, - { - "epoch": 0.8513223899578437, - "grad_norm": 0.6680286526679993, - "learning_rate": 1.2670867999162608e-05, - "loss": 0.2785, - "step": 9037 - }, - { - "epoch": 0.8514165940510116, - "grad_norm": 0.7220253944396973, - "learning_rate": 1.2669412805838305e-05, - "loss": 0.3349, - "step": 9038 - }, - { - "epoch": 0.8515107981441794, - "grad_norm": 0.8169474005699158, - "learning_rate": 1.2667957551647263e-05, - "loss": 0.3097, - "step": 9039 - }, - { - "epoch": 0.8516050022373473, - "grad_norm": 0.7603136897087097, - "learning_rate": 1.2666502236622662e-05, - "loss": 0.3081, - "step": 9040 - }, - { - "epoch": 0.8516992063305151, - "grad_norm": 0.6987825632095337, - "learning_rate": 1.2665046860797686e-05, - "loss": 0.3067, - "step": 9041 - }, - { - "epoch": 0.851793410423683, - "grad_norm": 0.8207119107246399, - "learning_rate": 1.2663591424205519e-05, - "loss": 0.3267, - "step": 9042 - }, - { - "epoch": 0.8518876145168508, - "grad_norm": 0.7657036185264587, - "learning_rate": 1.266213592687935e-05, - "loss": 0.2911, - "step": 9043 - }, - { - "epoch": 0.8519818186100186, - "grad_norm": 0.7158383131027222, - "learning_rate": 1.2660680368852363e-05, - "loss": 0.2853, - "step": 9044 - }, - { - "epoch": 0.8520760227031865, - "grad_norm": 0.8265485167503357, - "learning_rate": 1.265922475015775e-05, - "loss": 0.3354, - "step": 9045 - }, - { - "epoch": 0.8521702267963543, - "grad_norm": 0.6983233690261841, - "learning_rate": 1.2657769070828698e-05, - "loss": 0.2999, - "step": 9046 - }, - { - "epoch": 0.8522644308895222, - "grad_norm": 0.6697626113891602, - "learning_rate": 1.2656313330898401e-05, - "loss": 0.3032, - "step": 9047 - }, - { - "epoch": 0.85235863498269, - "grad_norm": 0.8416416645050049, - "learning_rate": 1.2654857530400055e-05, - "loss": 0.2701, - "step": 9048 - }, - { - "epoch": 0.8524528390758579, - "grad_norm": 0.6764236092567444, - "learning_rate": 1.2653401669366852e-05, - "loss": 0.3055, - "step": 9049 - }, - { - "epoch": 0.8525470431690257, - "grad_norm": 0.6430860757827759, - "learning_rate": 1.2651945747831987e-05, - "loss": 0.3017, - "step": 9050 - }, - { - "epoch": 0.8526412472621936, - "grad_norm": 0.6810445189476013, - "learning_rate": 1.2650489765828653e-05, - "loss": 0.2986, - "step": 9051 - }, - { - "epoch": 0.8527354513553614, - "grad_norm": 0.6868563890457153, - "learning_rate": 1.2649033723390056e-05, - "loss": 0.2996, - "step": 9052 - }, - { - "epoch": 0.8528296554485293, - "grad_norm": 0.8139315843582153, - "learning_rate": 1.2647577620549396e-05, - "loss": 0.2883, - "step": 9053 - }, - { - "epoch": 0.8529238595416971, - "grad_norm": 0.6818740963935852, - "learning_rate": 1.2646121457339866e-05, - "loss": 0.3122, - "step": 9054 - }, - { - "epoch": 0.853018063634865, - "grad_norm": 0.6979965567588806, - "learning_rate": 1.2644665233794682e-05, - "loss": 0.2644, - "step": 9055 - }, - { - "epoch": 0.8531122677280328, - "grad_norm": 1.5787235498428345, - "learning_rate": 1.2643208949947035e-05, - "loss": 0.2998, - "step": 9056 - }, - { - "epoch": 0.8532064718212007, - "grad_norm": 0.6594725251197815, - "learning_rate": 1.2641752605830136e-05, - "loss": 0.3081, - "step": 9057 - }, - { - "epoch": 0.8533006759143685, - "grad_norm": 0.6624385714530945, - "learning_rate": 1.2640296201477195e-05, - "loss": 0.2871, - "step": 9058 - }, - { - "epoch": 0.8533948800075364, - "grad_norm": 0.6775038242340088, - "learning_rate": 1.2638839736921415e-05, - "loss": 0.2817, - "step": 9059 - }, - { - "epoch": 0.8534890841007041, - "grad_norm": 0.7073119878768921, - "learning_rate": 1.2637383212196008e-05, - "loss": 0.2938, - "step": 9060 - }, - { - "epoch": 0.853583288193872, - "grad_norm": 0.6494279503822327, - "learning_rate": 1.2635926627334188e-05, - "loss": 0.2903, - "step": 9061 - }, - { - "epoch": 0.8536774922870398, - "grad_norm": 0.8409245014190674, - "learning_rate": 1.263446998236916e-05, - "loss": 0.3619, - "step": 9062 - }, - { - "epoch": 0.8537716963802077, - "grad_norm": 0.6711735129356384, - "learning_rate": 1.2633013277334145e-05, - "loss": 0.2936, - "step": 9063 - }, - { - "epoch": 0.8538659004733755, - "grad_norm": 0.6291319131851196, - "learning_rate": 1.2631556512262356e-05, - "loss": 0.2554, - "step": 9064 - }, - { - "epoch": 0.8539601045665434, - "grad_norm": 0.6612778306007385, - "learning_rate": 1.2630099687187007e-05, - "loss": 0.2804, - "step": 9065 - }, - { - "epoch": 0.8540543086597112, - "grad_norm": 0.7076035737991333, - "learning_rate": 1.2628642802141317e-05, - "loss": 0.3447, - "step": 9066 - }, - { - "epoch": 0.8541485127528791, - "grad_norm": 0.7418968677520752, - "learning_rate": 1.2627185857158507e-05, - "loss": 0.2744, - "step": 9067 - }, - { - "epoch": 0.8542427168460469, - "grad_norm": 0.7586404085159302, - "learning_rate": 1.2625728852271795e-05, - "loss": 0.3439, - "step": 9068 - }, - { - "epoch": 0.8543369209392148, - "grad_norm": 0.9554126262664795, - "learning_rate": 1.2624271787514406e-05, - "loss": 0.3124, - "step": 9069 - }, - { - "epoch": 0.8544311250323826, - "grad_norm": 0.7535426616668701, - "learning_rate": 1.2622814662919562e-05, - "loss": 0.3191, - "step": 9070 - }, - { - "epoch": 0.8545253291255505, - "grad_norm": 0.8399681448936462, - "learning_rate": 1.2621357478520486e-05, - "loss": 0.3096, - "step": 9071 - }, - { - "epoch": 0.8546195332187183, - "grad_norm": 0.7496296167373657, - "learning_rate": 1.2619900234350406e-05, - "loss": 0.3137, - "step": 9072 - }, - { - "epoch": 0.8547137373118862, - "grad_norm": 0.7252075672149658, - "learning_rate": 1.2618442930442549e-05, - "loss": 0.3052, - "step": 9073 - }, - { - "epoch": 0.854807941405054, - "grad_norm": 0.6654471158981323, - "learning_rate": 1.2616985566830142e-05, - "loss": 0.3134, - "step": 9074 - }, - { - "epoch": 0.8549021454982219, - "grad_norm": 0.7226504683494568, - "learning_rate": 1.2615528143546423e-05, - "loss": 0.3024, - "step": 9075 - }, - { - "epoch": 0.8549963495913897, - "grad_norm": 0.8002542853355408, - "learning_rate": 1.261407066062461e-05, - "loss": 0.3179, - "step": 9076 - }, - { - "epoch": 0.8550905536845576, - "grad_norm": 0.8656417727470398, - "learning_rate": 1.2612613118097945e-05, - "loss": 0.3597, - "step": 9077 - }, - { - "epoch": 0.8551847577777254, - "grad_norm": 0.6418734192848206, - "learning_rate": 1.2611155515999665e-05, - "loss": 0.3104, - "step": 9078 - }, - { - "epoch": 0.8552789618708933, - "grad_norm": 0.6727639436721802, - "learning_rate": 1.2609697854362995e-05, - "loss": 0.2858, - "step": 9079 - }, - { - "epoch": 0.8553731659640611, - "grad_norm": 0.8004004955291748, - "learning_rate": 1.2608240133221181e-05, - "loss": 0.3094, - "step": 9080 - }, - { - "epoch": 0.855467370057229, - "grad_norm": 0.6943681836128235, - "learning_rate": 1.2606782352607458e-05, - "loss": 0.2923, - "step": 9081 - }, - { - "epoch": 0.8555615741503968, - "grad_norm": 0.6713337302207947, - "learning_rate": 1.2605324512555064e-05, - "loss": 0.2749, - "step": 9082 - }, - { - "epoch": 0.8556557782435646, - "grad_norm": 0.6937925815582275, - "learning_rate": 1.2603866613097245e-05, - "loss": 0.287, - "step": 9083 - }, - { - "epoch": 0.8557499823367325, - "grad_norm": 0.7038596868515015, - "learning_rate": 1.2602408654267237e-05, - "loss": 0.3168, - "step": 9084 - }, - { - "epoch": 0.8558441864299003, - "grad_norm": 0.7447677850723267, - "learning_rate": 1.2600950636098292e-05, - "loss": 0.32, - "step": 9085 - }, - { - "epoch": 0.8559383905230682, - "grad_norm": 0.7252728343009949, - "learning_rate": 1.2599492558623646e-05, - "loss": 0.3038, - "step": 9086 - }, - { - "epoch": 0.856032594616236, - "grad_norm": 0.8138175010681152, - "learning_rate": 1.2598034421876548e-05, - "loss": 0.3457, - "step": 9087 - }, - { - "epoch": 0.8561267987094039, - "grad_norm": 0.6639115214347839, - "learning_rate": 1.2596576225890251e-05, - "loss": 0.3187, - "step": 9088 - }, - { - "epoch": 0.8562210028025717, - "grad_norm": 0.6821640133857727, - "learning_rate": 1.2595117970697998e-05, - "loss": 0.2966, - "step": 9089 - }, - { - "epoch": 0.8563152068957396, - "grad_norm": 0.7528424859046936, - "learning_rate": 1.2593659656333044e-05, - "loss": 0.3216, - "step": 9090 - }, - { - "epoch": 0.8564094109889074, - "grad_norm": 0.8091188073158264, - "learning_rate": 1.2592201282828635e-05, - "loss": 0.3225, - "step": 9091 - }, - { - "epoch": 0.8565036150820753, - "grad_norm": 0.6671088933944702, - "learning_rate": 1.2590742850218031e-05, - "loss": 0.2696, - "step": 9092 - }, - { - "epoch": 0.8565978191752431, - "grad_norm": 0.7548508048057556, - "learning_rate": 1.2589284358534486e-05, - "loss": 0.2939, - "step": 9093 - }, - { - "epoch": 0.856692023268411, - "grad_norm": 0.7009268999099731, - "learning_rate": 1.2587825807811248e-05, - "loss": 0.3123, - "step": 9094 - }, - { - "epoch": 0.8567862273615788, - "grad_norm": 0.7987003922462463, - "learning_rate": 1.2586367198081582e-05, - "loss": 0.34, - "step": 9095 - }, - { - "epoch": 0.8568804314547467, - "grad_norm": 0.9772810935974121, - "learning_rate": 1.2584908529378743e-05, - "loss": 0.3126, - "step": 9096 - }, - { - "epoch": 0.8569746355479145, - "grad_norm": 0.8526176810264587, - "learning_rate": 1.258344980173599e-05, - "loss": 0.3678, - "step": 9097 - }, - { - "epoch": 0.8570688396410824, - "grad_norm": 0.8225265145301819, - "learning_rate": 1.2581991015186592e-05, - "loss": 0.2985, - "step": 9098 - }, - { - "epoch": 0.8571630437342502, - "grad_norm": 0.739586353302002, - "learning_rate": 1.2580532169763799e-05, - "loss": 0.2975, - "step": 9099 - }, - { - "epoch": 0.8572572478274181, - "grad_norm": 0.8607017397880554, - "learning_rate": 1.2579073265500886e-05, - "loss": 0.2715, - "step": 9100 - }, - { - "epoch": 0.8573514519205859, - "grad_norm": 0.7922728061676025, - "learning_rate": 1.2577614302431114e-05, - "loss": 0.2931, - "step": 9101 - }, - { - "epoch": 0.8574456560137538, - "grad_norm": 0.7574107646942139, - "learning_rate": 1.2576155280587745e-05, - "loss": 0.3322, - "step": 9102 - }, - { - "epoch": 0.8575398601069216, - "grad_norm": 0.7002633810043335, - "learning_rate": 1.2574696200004055e-05, - "loss": 0.3099, - "step": 9103 - }, - { - "epoch": 0.8576340642000895, - "grad_norm": 0.8383626341819763, - "learning_rate": 1.257323706071331e-05, - "loss": 0.3548, - "step": 9104 - }, - { - "epoch": 0.8577282682932573, - "grad_norm": 0.8669114112854004, - "learning_rate": 1.2571777862748782e-05, - "loss": 0.3364, - "step": 9105 - }, - { - "epoch": 0.8578224723864252, - "grad_norm": 0.706563413143158, - "learning_rate": 1.257031860614374e-05, - "loss": 0.2956, - "step": 9106 - }, - { - "epoch": 0.857916676479593, - "grad_norm": 0.7623336911201477, - "learning_rate": 1.2568859290931454e-05, - "loss": 0.3228, - "step": 9107 - }, - { - "epoch": 0.8580108805727609, - "grad_norm": 0.7400816082954407, - "learning_rate": 1.2567399917145208e-05, - "loss": 0.3163, - "step": 9108 - }, - { - "epoch": 0.8581050846659287, - "grad_norm": 0.7918370366096497, - "learning_rate": 1.2565940484818273e-05, - "loss": 0.3487, - "step": 9109 - }, - { - "epoch": 0.8581992887590966, - "grad_norm": 0.7184942364692688, - "learning_rate": 1.2564480993983928e-05, - "loss": 0.2998, - "step": 9110 - }, - { - "epoch": 0.8582934928522644, - "grad_norm": 0.7348983287811279, - "learning_rate": 1.2563021444675447e-05, - "loss": 0.3106, - "step": 9111 - }, - { - "epoch": 0.8583876969454323, - "grad_norm": 0.6605958342552185, - "learning_rate": 1.2561561836926115e-05, - "loss": 0.2437, - "step": 9112 - }, - { - "epoch": 0.8584819010386001, - "grad_norm": 0.7503715753555298, - "learning_rate": 1.2560102170769212e-05, - "loss": 0.3322, - "step": 9113 - }, - { - "epoch": 0.858576105131768, - "grad_norm": 0.703746497631073, - "learning_rate": 1.255864244623802e-05, - "loss": 0.3332, - "step": 9114 - }, - { - "epoch": 0.8586703092249358, - "grad_norm": 0.6568114161491394, - "learning_rate": 1.2557182663365823e-05, - "loss": 0.3007, - "step": 9115 - }, - { - "epoch": 0.8587645133181037, - "grad_norm": 0.6698923110961914, - "learning_rate": 1.2555722822185906e-05, - "loss": 0.2909, - "step": 9116 - }, - { - "epoch": 0.8588587174112715, - "grad_norm": 0.9063276648521423, - "learning_rate": 1.2554262922731555e-05, - "loss": 0.3045, - "step": 9117 - }, - { - "epoch": 0.8589529215044394, - "grad_norm": 0.7718333005905151, - "learning_rate": 1.2552802965036063e-05, - "loss": 0.2899, - "step": 9118 - }, - { - "epoch": 0.8590471255976072, - "grad_norm": 0.7325762510299683, - "learning_rate": 1.2551342949132713e-05, - "loss": 0.2974, - "step": 9119 - }, - { - "epoch": 0.8591413296907751, - "grad_norm": 0.7019739747047424, - "learning_rate": 1.2549882875054797e-05, - "loss": 0.3174, - "step": 9120 - }, - { - "epoch": 0.8592355337839429, - "grad_norm": 0.8704683780670166, - "learning_rate": 1.2548422742835608e-05, - "loss": 0.2874, - "step": 9121 - }, - { - "epoch": 0.8593297378771108, - "grad_norm": 0.7827633023262024, - "learning_rate": 1.254696255250844e-05, - "loss": 0.3136, - "step": 9122 - }, - { - "epoch": 0.8594239419702786, - "grad_norm": 0.7730802893638611, - "learning_rate": 1.2545502304106588e-05, - "loss": 0.3098, - "step": 9123 - }, - { - "epoch": 0.8595181460634465, - "grad_norm": 0.7378262281417847, - "learning_rate": 1.2544041997663348e-05, - "loss": 0.3058, - "step": 9124 - }, - { - "epoch": 0.8596123501566143, - "grad_norm": 0.6844967603683472, - "learning_rate": 1.2542581633212015e-05, - "loss": 0.3135, - "step": 9125 - }, - { - "epoch": 0.8597065542497822, - "grad_norm": 0.6816369891166687, - "learning_rate": 1.2541121210785887e-05, - "loss": 0.2725, - "step": 9126 - }, - { - "epoch": 0.85980075834295, - "grad_norm": 0.7545812129974365, - "learning_rate": 1.2539660730418264e-05, - "loss": 0.3462, - "step": 9127 - }, - { - "epoch": 0.8598949624361178, - "grad_norm": 0.6996229887008667, - "learning_rate": 1.2538200192142451e-05, - "loss": 0.2869, - "step": 9128 - }, - { - "epoch": 0.8599891665292857, - "grad_norm": 0.7229344248771667, - "learning_rate": 1.253673959599175e-05, - "loss": 0.3101, - "step": 9129 - }, - { - "epoch": 0.8600833706224535, - "grad_norm": 0.637445330619812, - "learning_rate": 1.253527894199946e-05, - "loss": 0.3117, - "step": 9130 - }, - { - "epoch": 0.8601775747156214, - "grad_norm": 0.7283216714859009, - "learning_rate": 1.2533818230198889e-05, - "loss": 0.3119, - "step": 9131 - }, - { - "epoch": 0.8602717788087892, - "grad_norm": 0.69550621509552, - "learning_rate": 1.2532357460623345e-05, - "loss": 0.3136, - "step": 9132 - }, - { - "epoch": 0.8603659829019571, - "grad_norm": 0.7138452529907227, - "learning_rate": 1.2530896633306136e-05, - "loss": 0.2971, - "step": 9133 - }, - { - "epoch": 0.8604601869951249, - "grad_norm": 0.7830208539962769, - "learning_rate": 1.2529435748280566e-05, - "loss": 0.3536, - "step": 9134 - }, - { - "epoch": 0.8605543910882928, - "grad_norm": 0.8075652718544006, - "learning_rate": 1.2527974805579954e-05, - "loss": 0.3704, - "step": 9135 - }, - { - "epoch": 0.8606485951814606, - "grad_norm": 0.7236867547035217, - "learning_rate": 1.2526513805237604e-05, - "loss": 0.2996, - "step": 9136 - }, - { - "epoch": 0.8607427992746285, - "grad_norm": 0.6777334809303284, - "learning_rate": 1.2525052747286832e-05, - "loss": 0.2915, - "step": 9137 - }, - { - "epoch": 0.8608370033677963, - "grad_norm": 0.6908157467842102, - "learning_rate": 1.2523591631760952e-05, - "loss": 0.3125, - "step": 9138 - }, - { - "epoch": 0.8609312074609642, - "grad_norm": 0.7357950806617737, - "learning_rate": 1.2522130458693278e-05, - "loss": 0.3328, - "step": 9139 - }, - { - "epoch": 0.861025411554132, - "grad_norm": 0.861672580242157, - "learning_rate": 1.2520669228117132e-05, - "loss": 0.3602, - "step": 9140 - }, - { - "epoch": 0.8611196156472999, - "grad_norm": 0.6655526161193848, - "learning_rate": 1.251920794006583e-05, - "loss": 0.2671, - "step": 9141 - }, - { - "epoch": 0.8612138197404677, - "grad_norm": 0.6929822564125061, - "learning_rate": 1.2517746594572688e-05, - "loss": 0.3119, - "step": 9142 - }, - { - "epoch": 0.8613080238336356, - "grad_norm": 0.7310613393783569, - "learning_rate": 1.2516285191671031e-05, - "loss": 0.3053, - "step": 9143 - }, - { - "epoch": 0.8614022279268034, - "grad_norm": 0.7642453908920288, - "learning_rate": 1.2514823731394182e-05, - "loss": 0.285, - "step": 9144 - }, - { - "epoch": 0.8614964320199713, - "grad_norm": 0.6509019732475281, - "learning_rate": 1.2513362213775462e-05, - "loss": 0.3139, - "step": 9145 - }, - { - "epoch": 0.8615906361131391, - "grad_norm": 0.6920650005340576, - "learning_rate": 1.2511900638848196e-05, - "loss": 0.334, - "step": 9146 - }, - { - "epoch": 0.861684840206307, - "grad_norm": 0.6638849973678589, - "learning_rate": 1.2510439006645707e-05, - "loss": 0.3166, - "step": 9147 - }, - { - "epoch": 0.8617790442994748, - "grad_norm": 0.727704644203186, - "learning_rate": 1.2508977317201332e-05, - "loss": 0.3115, - "step": 9148 - }, - { - "epoch": 0.8618732483926427, - "grad_norm": 0.7568182349205017, - "learning_rate": 1.2507515570548392e-05, - "loss": 0.3262, - "step": 9149 - }, - { - "epoch": 0.8619674524858105, - "grad_norm": 0.7067240476608276, - "learning_rate": 1.250605376672022e-05, - "loss": 0.3153, - "step": 9150 - }, - { - "epoch": 0.8620616565789784, - "grad_norm": 0.7335924506187439, - "learning_rate": 1.2504591905750143e-05, - "loss": 0.3367, - "step": 9151 - }, - { - "epoch": 0.8621558606721462, - "grad_norm": 0.6466672420501709, - "learning_rate": 1.25031299876715e-05, - "loss": 0.2883, - "step": 9152 - }, - { - "epoch": 0.8622500647653141, - "grad_norm": 0.7279981970787048, - "learning_rate": 1.250166801251762e-05, - "loss": 0.3255, - "step": 9153 - }, - { - "epoch": 0.8623442688584819, - "grad_norm": 0.6738430261611938, - "learning_rate": 1.250020598032184e-05, - "loss": 0.2918, - "step": 9154 - }, - { - "epoch": 0.8624384729516498, - "grad_norm": 0.7319638729095459, - "learning_rate": 1.2498743891117502e-05, - "loss": 0.2955, - "step": 9155 - }, - { - "epoch": 0.8625326770448176, - "grad_norm": 0.8512725830078125, - "learning_rate": 1.2497281744937934e-05, - "loss": 0.3254, - "step": 9156 - }, - { - "epoch": 0.8626268811379855, - "grad_norm": 0.7730419635772705, - "learning_rate": 1.2495819541816483e-05, - "loss": 0.2859, - "step": 9157 - }, - { - "epoch": 0.8627210852311533, - "grad_norm": 0.6937309503555298, - "learning_rate": 1.2494357281786487e-05, - "loss": 0.3012, - "step": 9158 - }, - { - "epoch": 0.8628152893243212, - "grad_norm": 0.8749970197677612, - "learning_rate": 1.2492894964881282e-05, - "loss": 0.3223, - "step": 9159 - }, - { - "epoch": 0.862909493417489, - "grad_norm": 0.6235557198524475, - "learning_rate": 1.249143259113422e-05, - "loss": 0.2971, - "step": 9160 - }, - { - "epoch": 0.8630036975106569, - "grad_norm": 0.6645923256874084, - "learning_rate": 1.2489970160578645e-05, - "loss": 0.3051, - "step": 9161 - }, - { - "epoch": 0.8630979016038247, - "grad_norm": 0.8053779602050781, - "learning_rate": 1.2488507673247894e-05, - "loss": 0.3286, - "step": 9162 - }, - { - "epoch": 0.8631921056969926, - "grad_norm": 0.8143567442893982, - "learning_rate": 1.2487045129175322e-05, - "loss": 0.3026, - "step": 9163 - }, - { - "epoch": 0.8632863097901604, - "grad_norm": 0.6536929607391357, - "learning_rate": 1.2485582528394276e-05, - "loss": 0.2902, - "step": 9164 - }, - { - "epoch": 0.8633805138833283, - "grad_norm": 0.7779171466827393, - "learning_rate": 1.2484119870938102e-05, - "loss": 0.3174, - "step": 9165 - }, - { - "epoch": 0.8634747179764961, - "grad_norm": 0.697607159614563, - "learning_rate": 1.2482657156840157e-05, - "loss": 0.3127, - "step": 9166 - }, - { - "epoch": 0.863568922069664, - "grad_norm": 0.880972683429718, - "learning_rate": 1.2481194386133784e-05, - "loss": 0.3126, - "step": 9167 - }, - { - "epoch": 0.8636631261628318, - "grad_norm": 0.8075266480445862, - "learning_rate": 1.2479731558852345e-05, - "loss": 0.3384, - "step": 9168 - }, - { - "epoch": 0.8637573302559997, - "grad_norm": 0.7963753342628479, - "learning_rate": 1.2478268675029193e-05, - "loss": 0.3422, - "step": 9169 - }, - { - "epoch": 0.8638515343491675, - "grad_norm": 0.751092255115509, - "learning_rate": 1.2476805734697679e-05, - "loss": 0.292, - "step": 9170 - }, - { - "epoch": 0.8639457384423354, - "grad_norm": 0.7625300884246826, - "learning_rate": 1.2475342737891164e-05, - "loss": 0.313, - "step": 9171 - }, - { - "epoch": 0.8640399425355032, - "grad_norm": 0.6691493988037109, - "learning_rate": 1.2473879684643006e-05, - "loss": 0.2766, - "step": 9172 - }, - { - "epoch": 0.864134146628671, - "grad_norm": 0.7864203453063965, - "learning_rate": 1.247241657498657e-05, - "loss": 0.3185, - "step": 9173 - }, - { - "epoch": 0.8642283507218389, - "grad_norm": 0.8323874473571777, - "learning_rate": 1.2470953408955206e-05, - "loss": 0.2968, - "step": 9174 - }, - { - "epoch": 0.8643225548150067, - "grad_norm": 0.6916950941085815, - "learning_rate": 1.2469490186582289e-05, - "loss": 0.2846, - "step": 9175 - }, - { - "epoch": 0.8644167589081746, - "grad_norm": 0.7014223337173462, - "learning_rate": 1.2468026907901171e-05, - "loss": 0.3205, - "step": 9176 - }, - { - "epoch": 0.8645109630013424, - "grad_norm": 0.6320071220397949, - "learning_rate": 1.2466563572945228e-05, - "loss": 0.2632, - "step": 9177 - }, - { - "epoch": 0.8646051670945103, - "grad_norm": 0.8776123523712158, - "learning_rate": 1.2465100181747817e-05, - "loss": 0.3685, - "step": 9178 - }, - { - "epoch": 0.8646993711876781, - "grad_norm": 0.8309136629104614, - "learning_rate": 1.246363673434231e-05, - "loss": 0.3196, - "step": 9179 - }, - { - "epoch": 0.864793575280846, - "grad_norm": 0.8322733640670776, - "learning_rate": 1.2462173230762078e-05, - "loss": 0.2985, - "step": 9180 - }, - { - "epoch": 0.8648877793740138, - "grad_norm": 0.6591123938560486, - "learning_rate": 1.2460709671040486e-05, - "loss": 0.3048, - "step": 9181 - }, - { - "epoch": 0.8649819834671817, - "grad_norm": 0.7137584686279297, - "learning_rate": 1.2459246055210907e-05, - "loss": 0.2903, - "step": 9182 - }, - { - "epoch": 0.8650761875603495, - "grad_norm": 0.6407827734947205, - "learning_rate": 1.2457782383306719e-05, - "loss": 0.304, - "step": 9183 - }, - { - "epoch": 0.8651703916535174, - "grad_norm": 0.7770898938179016, - "learning_rate": 1.2456318655361288e-05, - "loss": 0.2805, - "step": 9184 - }, - { - "epoch": 0.8652645957466852, - "grad_norm": 0.8176446557044983, - "learning_rate": 1.2454854871407993e-05, - "loss": 0.3625, - "step": 9185 - }, - { - "epoch": 0.8653587998398531, - "grad_norm": 0.7962294816970825, - "learning_rate": 1.2453391031480214e-05, - "loss": 0.299, - "step": 9186 - }, - { - "epoch": 0.8654530039330209, - "grad_norm": 0.6298891305923462, - "learning_rate": 1.2451927135611319e-05, - "loss": 0.2982, - "step": 9187 - }, - { - "epoch": 0.8655472080261888, - "grad_norm": 0.7367516160011292, - "learning_rate": 1.2450463183834697e-05, - "loss": 0.301, - "step": 9188 - }, - { - "epoch": 0.8656414121193566, - "grad_norm": 0.7628706097602844, - "learning_rate": 1.2448999176183725e-05, - "loss": 0.3391, - "step": 9189 - }, - { - "epoch": 0.8657356162125245, - "grad_norm": 0.7715069055557251, - "learning_rate": 1.2447535112691784e-05, - "loss": 0.3473, - "step": 9190 - }, - { - "epoch": 0.8658298203056923, - "grad_norm": 0.6850812435150146, - "learning_rate": 1.2446070993392257e-05, - "loss": 0.3036, - "step": 9191 - }, - { - "epoch": 0.8659240243988602, - "grad_norm": 0.7510053515434265, - "learning_rate": 1.2444606818318528e-05, - "loss": 0.337, - "step": 9192 - }, - { - "epoch": 0.866018228492028, - "grad_norm": 0.76163250207901, - "learning_rate": 1.2443142587503983e-05, - "loss": 0.3318, - "step": 9193 - }, - { - "epoch": 0.8661124325851959, - "grad_norm": 0.671679675579071, - "learning_rate": 1.2441678300982007e-05, - "loss": 0.3011, - "step": 9194 - }, - { - "epoch": 0.8662066366783637, - "grad_norm": 0.6736225485801697, - "learning_rate": 1.2440213958785994e-05, - "loss": 0.2872, - "step": 9195 - }, - { - "epoch": 0.8663008407715316, - "grad_norm": 0.779719352722168, - "learning_rate": 1.2438749560949325e-05, - "loss": 0.3442, - "step": 9196 - }, - { - "epoch": 0.8663950448646994, - "grad_norm": 0.7523724436759949, - "learning_rate": 1.2437285107505397e-05, - "loss": 0.2835, - "step": 9197 - }, - { - "epoch": 0.8664892489578672, - "grad_norm": 0.7877782583236694, - "learning_rate": 1.2435820598487599e-05, - "loss": 0.3011, - "step": 9198 - }, - { - "epoch": 0.866583453051035, - "grad_norm": 0.8221269249916077, - "learning_rate": 1.243435603392932e-05, - "loss": 0.3554, - "step": 9199 - }, - { - "epoch": 0.8666776571442029, - "grad_norm": 0.6900723576545715, - "learning_rate": 1.2432891413863964e-05, - "loss": 0.2813, - "step": 9200 - }, - { - "epoch": 0.8667718612373707, - "grad_norm": 0.6873626112937927, - "learning_rate": 1.2431426738324919e-05, - "loss": 0.2665, - "step": 9201 - }, - { - "epoch": 0.8668660653305386, - "grad_norm": 0.8400952219963074, - "learning_rate": 1.2429962007345584e-05, - "loss": 0.3231, - "step": 9202 - }, - { - "epoch": 0.8669602694237064, - "grad_norm": 0.7722880840301514, - "learning_rate": 1.2428497220959359e-05, - "loss": 0.3236, - "step": 9203 - }, - { - "epoch": 0.8670544735168743, - "grad_norm": 0.6867839694023132, - "learning_rate": 1.242703237919964e-05, - "loss": 0.3303, - "step": 9204 - }, - { - "epoch": 0.8671486776100421, - "grad_norm": 0.7867183685302734, - "learning_rate": 1.242556748209983e-05, - "loss": 0.3048, - "step": 9205 - }, - { - "epoch": 0.86724288170321, - "grad_norm": 0.7227653861045837, - "learning_rate": 1.242410252969333e-05, - "loss": 0.2925, - "step": 9206 - }, - { - "epoch": 0.8673370857963778, - "grad_norm": 0.6544963717460632, - "learning_rate": 1.242263752201354e-05, - "loss": 0.3005, - "step": 9207 - }, - { - "epoch": 0.8674312898895457, - "grad_norm": 0.7285255789756775, - "learning_rate": 1.242117245909387e-05, - "loss": 0.2526, - "step": 9208 - }, - { - "epoch": 0.8675254939827135, - "grad_norm": 0.7026352286338806, - "learning_rate": 1.2419707340967726e-05, - "loss": 0.287, - "step": 9209 - }, - { - "epoch": 0.8676196980758814, - "grad_norm": 0.7239752411842346, - "learning_rate": 1.241824216766851e-05, - "loss": 0.3454, - "step": 9210 - }, - { - "epoch": 0.8677139021690492, - "grad_norm": 0.7591168880462646, - "learning_rate": 1.2416776939229633e-05, - "loss": 0.3133, - "step": 9211 - }, - { - "epoch": 0.867808106262217, - "grad_norm": 0.8220389485359192, - "learning_rate": 1.2415311655684506e-05, - "loss": 0.3254, - "step": 9212 - }, - { - "epoch": 0.8679023103553849, - "grad_norm": 0.7573966383934021, - "learning_rate": 1.2413846317066535e-05, - "loss": 0.3084, - "step": 9213 - }, - { - "epoch": 0.8679965144485527, - "grad_norm": 0.7685338854789734, - "learning_rate": 1.2412380923409138e-05, - "loss": 0.3149, - "step": 9214 - }, - { - "epoch": 0.8680907185417206, - "grad_norm": 0.6167116165161133, - "learning_rate": 1.2410915474745724e-05, - "loss": 0.2629, - "step": 9215 - }, - { - "epoch": 0.8681849226348884, - "grad_norm": 0.823922872543335, - "learning_rate": 1.2409449971109705e-05, - "loss": 0.2957, - "step": 9216 - }, - { - "epoch": 0.8682791267280563, - "grad_norm": 0.8153708577156067, - "learning_rate": 1.2407984412534507e-05, - "loss": 0.3213, - "step": 9217 - }, - { - "epoch": 0.8683733308212241, - "grad_norm": 0.8279568552970886, - "learning_rate": 1.2406518799053538e-05, - "loss": 0.3536, - "step": 9218 - }, - { - "epoch": 0.868467534914392, - "grad_norm": 0.7307692766189575, - "learning_rate": 1.2405053130700215e-05, - "loss": 0.2734, - "step": 9219 - }, - { - "epoch": 0.8685617390075598, - "grad_norm": 0.7823532819747925, - "learning_rate": 1.2403587407507965e-05, - "loss": 0.3167, - "step": 9220 - }, - { - "epoch": 0.8686559431007277, - "grad_norm": 0.7097548246383667, - "learning_rate": 1.2402121629510202e-05, - "loss": 0.3035, - "step": 9221 - }, - { - "epoch": 0.8687501471938955, - "grad_norm": 0.7290064096450806, - "learning_rate": 1.240065579674035e-05, - "loss": 0.2687, - "step": 9222 - }, - { - "epoch": 0.8688443512870634, - "grad_norm": 0.642393946647644, - "learning_rate": 1.2399189909231838e-05, - "loss": 0.2765, - "step": 9223 - }, - { - "epoch": 0.8689385553802312, - "grad_norm": 0.7616701722145081, - "learning_rate": 1.2397723967018083e-05, - "loss": 0.2903, - "step": 9224 - }, - { - "epoch": 0.8690327594733991, - "grad_norm": 0.6706497073173523, - "learning_rate": 1.2396257970132514e-05, - "loss": 0.3465, - "step": 9225 - }, - { - "epoch": 0.8691269635665669, - "grad_norm": 0.6539916396141052, - "learning_rate": 1.2394791918608557e-05, - "loss": 0.2923, - "step": 9226 - }, - { - "epoch": 0.8692211676597348, - "grad_norm": 0.7657091617584229, - "learning_rate": 1.239332581247964e-05, - "loss": 0.3147, - "step": 9227 - }, - { - "epoch": 0.8693153717529026, - "grad_norm": 0.8091147541999817, - "learning_rate": 1.2391859651779195e-05, - "loss": 0.3214, - "step": 9228 - }, - { - "epoch": 0.8694095758460705, - "grad_norm": 0.7738662362098694, - "learning_rate": 1.2390393436540649e-05, - "loss": 0.336, - "step": 9229 - }, - { - "epoch": 0.8695037799392383, - "grad_norm": 0.6700285077095032, - "learning_rate": 1.2388927166797438e-05, - "loss": 0.2857, - "step": 9230 - }, - { - "epoch": 0.8695979840324062, - "grad_norm": 0.7972094416618347, - "learning_rate": 1.238746084258299e-05, - "loss": 0.3035, - "step": 9231 - }, - { - "epoch": 0.869692188125574, - "grad_norm": 0.8680013418197632, - "learning_rate": 1.2385994463930743e-05, - "loss": 0.3114, - "step": 9232 - }, - { - "epoch": 0.8697863922187419, - "grad_norm": 0.7896369099617004, - "learning_rate": 1.2384528030874134e-05, - "loss": 0.2947, - "step": 9233 - }, - { - "epoch": 0.8698805963119097, - "grad_norm": 0.6831520795822144, - "learning_rate": 1.2383061543446596e-05, - "loss": 0.3035, - "step": 9234 - }, - { - "epoch": 0.8699748004050776, - "grad_norm": 0.8190691471099854, - "learning_rate": 1.2381595001681574e-05, - "loss": 0.2804, - "step": 9235 - }, - { - "epoch": 0.8700690044982454, - "grad_norm": 0.615748941898346, - "learning_rate": 1.23801284056125e-05, - "loss": 0.2636, - "step": 9236 - }, - { - "epoch": 0.8701632085914133, - "grad_norm": 0.7815951704978943, - "learning_rate": 1.2378661755272817e-05, - "loss": 0.3178, - "step": 9237 - }, - { - "epoch": 0.8702574126845811, - "grad_norm": 0.71673184633255, - "learning_rate": 1.2377195050695967e-05, - "loss": 0.3242, - "step": 9238 - }, - { - "epoch": 0.870351616777749, - "grad_norm": 0.835582971572876, - "learning_rate": 1.2375728291915391e-05, - "loss": 0.3205, - "step": 9239 - }, - { - "epoch": 0.8704458208709168, - "grad_norm": 0.6454624533653259, - "learning_rate": 1.237426147896454e-05, - "loss": 0.3111, - "step": 9240 - }, - { - "epoch": 0.8705400249640847, - "grad_norm": 0.7328464984893799, - "learning_rate": 1.2372794611876855e-05, - "loss": 0.2991, - "step": 9241 - }, - { - "epoch": 0.8706342290572525, - "grad_norm": 0.7155019640922546, - "learning_rate": 1.237132769068578e-05, - "loss": 0.3049, - "step": 9242 - }, - { - "epoch": 0.8707284331504204, - "grad_norm": 1.338820219039917, - "learning_rate": 1.236986071542477e-05, - "loss": 0.3122, - "step": 9243 - }, - { - "epoch": 0.8708226372435882, - "grad_norm": 0.9869731664657593, - "learning_rate": 1.236839368612727e-05, - "loss": 0.2854, - "step": 9244 - }, - { - "epoch": 0.8709168413367561, - "grad_norm": 0.7557142376899719, - "learning_rate": 1.236692660282673e-05, - "loss": 0.3155, - "step": 9245 - }, - { - "epoch": 0.8710110454299239, - "grad_norm": 0.7705155611038208, - "learning_rate": 1.2365459465556603e-05, - "loss": 0.3244, - "step": 9246 - }, - { - "epoch": 0.8711052495230918, - "grad_norm": 0.7242844700813293, - "learning_rate": 1.236399227435034e-05, - "loss": 0.3464, - "step": 9247 - }, - { - "epoch": 0.8711994536162596, - "grad_norm": 0.6580681204795837, - "learning_rate": 1.2362525029241401e-05, - "loss": 0.2681, - "step": 9248 - }, - { - "epoch": 0.8712936577094275, - "grad_norm": 0.79903244972229, - "learning_rate": 1.2361057730263235e-05, - "loss": 0.3381, - "step": 9249 - }, - { - "epoch": 0.8713878618025953, - "grad_norm": 0.7240850329399109, - "learning_rate": 1.2359590377449301e-05, - "loss": 0.3111, - "step": 9250 - }, - { - "epoch": 0.8714820658957632, - "grad_norm": 0.7803201079368591, - "learning_rate": 1.235812297083306e-05, - "loss": 0.2922, - "step": 9251 - }, - { - "epoch": 0.871576269988931, - "grad_norm": 0.801443874835968, - "learning_rate": 1.2356655510447966e-05, - "loss": 0.3139, - "step": 9252 - }, - { - "epoch": 0.8716704740820989, - "grad_norm": 0.7281584143638611, - "learning_rate": 1.2355187996327484e-05, - "loss": 0.3238, - "step": 9253 - }, - { - "epoch": 0.8717646781752667, - "grad_norm": 0.6576111912727356, - "learning_rate": 1.2353720428505072e-05, - "loss": 0.2689, - "step": 9254 - }, - { - "epoch": 0.8718588822684346, - "grad_norm": 0.7188935875892639, - "learning_rate": 1.2352252807014194e-05, - "loss": 0.3289, - "step": 9255 - }, - { - "epoch": 0.8719530863616024, - "grad_norm": 0.7349696755409241, - "learning_rate": 1.2350785131888311e-05, - "loss": 0.3006, - "step": 9256 - }, - { - "epoch": 0.8720472904547703, - "grad_norm": 0.7604920268058777, - "learning_rate": 1.2349317403160897e-05, - "loss": 0.302, - "step": 9257 - }, - { - "epoch": 0.8721414945479381, - "grad_norm": 0.6442971229553223, - "learning_rate": 1.234784962086541e-05, - "loss": 0.2962, - "step": 9258 - }, - { - "epoch": 0.872235698641106, - "grad_norm": 0.6648150682449341, - "learning_rate": 1.234638178503532e-05, - "loss": 0.2951, - "step": 9259 - }, - { - "epoch": 0.8723299027342738, - "grad_norm": 0.7383939623832703, - "learning_rate": 1.2344913895704099e-05, - "loss": 0.2839, - "step": 9260 - }, - { - "epoch": 0.8724241068274416, - "grad_norm": 0.7782507538795471, - "learning_rate": 1.2343445952905212e-05, - "loss": 0.3066, - "step": 9261 - }, - { - "epoch": 0.8725183109206095, - "grad_norm": 0.66548752784729, - "learning_rate": 1.2341977956672135e-05, - "loss": 0.2874, - "step": 9262 - }, - { - "epoch": 0.8726125150137773, - "grad_norm": 0.7760818600654602, - "learning_rate": 1.2340509907038341e-05, - "loss": 0.3039, - "step": 9263 - }, - { - "epoch": 0.8727067191069452, - "grad_norm": 0.7823361754417419, - "learning_rate": 1.2339041804037294e-05, - "loss": 0.292, - "step": 9264 - }, - { - "epoch": 0.872800923200113, - "grad_norm": 0.7515829205513, - "learning_rate": 1.2337573647702483e-05, - "loss": 0.3131, - "step": 9265 - }, - { - "epoch": 0.8728951272932809, - "grad_norm": 0.7954232096672058, - "learning_rate": 1.2336105438067376e-05, - "loss": 0.324, - "step": 9266 - }, - { - "epoch": 0.8729893313864487, - "grad_norm": 0.8027656078338623, - "learning_rate": 1.2334637175165451e-05, - "loss": 0.3211, - "step": 9267 - }, - { - "epoch": 0.8730835354796166, - "grad_norm": 0.7220860719680786, - "learning_rate": 1.233316885903019e-05, - "loss": 0.27, - "step": 9268 - }, - { - "epoch": 0.8731777395727844, - "grad_norm": 0.7339596748352051, - "learning_rate": 1.2331700489695069e-05, - "loss": 0.2881, - "step": 9269 - }, - { - "epoch": 0.8732719436659523, - "grad_norm": 0.7125227451324463, - "learning_rate": 1.2330232067193572e-05, - "loss": 0.2795, - "step": 9270 - }, - { - "epoch": 0.8733661477591201, - "grad_norm": 0.6848975419998169, - "learning_rate": 1.2328763591559176e-05, - "loss": 0.2748, - "step": 9271 - }, - { - "epoch": 0.873460351852288, - "grad_norm": 0.7000641226768494, - "learning_rate": 1.2327295062825373e-05, - "loss": 0.2804, - "step": 9272 - }, - { - "epoch": 0.8735545559454558, - "grad_norm": 0.7253985404968262, - "learning_rate": 1.2325826481025644e-05, - "loss": 0.32, - "step": 9273 - }, - { - "epoch": 0.8736487600386237, - "grad_norm": 0.7008634209632874, - "learning_rate": 1.2324357846193472e-05, - "loss": 0.2772, - "step": 9274 - }, - { - "epoch": 0.8737429641317915, - "grad_norm": 0.6910400390625, - "learning_rate": 1.2322889158362347e-05, - "loss": 0.2928, - "step": 9275 - }, - { - "epoch": 0.8738371682249594, - "grad_norm": 0.8178048729896545, - "learning_rate": 1.2321420417565757e-05, - "loss": 0.3169, - "step": 9276 - }, - { - "epoch": 0.8739313723181272, - "grad_norm": 0.7599455118179321, - "learning_rate": 1.2319951623837189e-05, - "loss": 0.3185, - "step": 9277 - }, - { - "epoch": 0.8740255764112951, - "grad_norm": 0.6750515699386597, - "learning_rate": 1.231848277721014e-05, - "loss": 0.3254, - "step": 9278 - }, - { - "epoch": 0.8741197805044629, - "grad_norm": 0.7708296179771423, - "learning_rate": 1.2317013877718096e-05, - "loss": 0.3207, - "step": 9279 - }, - { - "epoch": 0.8742139845976308, - "grad_norm": 0.8172353506088257, - "learning_rate": 1.2315544925394553e-05, - "loss": 0.302, - "step": 9280 - }, - { - "epoch": 0.8743081886907986, - "grad_norm": 0.7476603388786316, - "learning_rate": 1.2314075920273002e-05, - "loss": 0.3016, - "step": 9281 - }, - { - "epoch": 0.8744023927839665, - "grad_norm": 0.767647385597229, - "learning_rate": 1.2312606862386942e-05, - "loss": 0.3304, - "step": 9282 - }, - { - "epoch": 0.8744965968771343, - "grad_norm": 0.7349490523338318, - "learning_rate": 1.2311137751769875e-05, - "loss": 0.2832, - "step": 9283 - }, - { - "epoch": 0.8745908009703022, - "grad_norm": 0.6973784565925598, - "learning_rate": 1.2309668588455285e-05, - "loss": 0.2557, - "step": 9284 - }, - { - "epoch": 0.87468500506347, - "grad_norm": 0.8286645412445068, - "learning_rate": 1.2308199372476683e-05, - "loss": 0.3596, - "step": 9285 - }, - { - "epoch": 0.8747792091566379, - "grad_norm": 0.744421660900116, - "learning_rate": 1.2306730103867565e-05, - "loss": 0.2555, - "step": 9286 - }, - { - "epoch": 0.8748734132498057, - "grad_norm": 0.7113873958587646, - "learning_rate": 1.2305260782661433e-05, - "loss": 0.2527, - "step": 9287 - }, - { - "epoch": 0.8749676173429736, - "grad_norm": 0.8271864056587219, - "learning_rate": 1.2303791408891792e-05, - "loss": 0.3028, - "step": 9288 - }, - { - "epoch": 0.8750618214361414, - "grad_norm": 0.7463380098342896, - "learning_rate": 1.2302321982592142e-05, - "loss": 0.2915, - "step": 9289 - }, - { - "epoch": 0.8751560255293093, - "grad_norm": 0.8846245408058167, - "learning_rate": 1.2300852503795993e-05, - "loss": 0.3512, - "step": 9290 - }, - { - "epoch": 0.8752502296224771, - "grad_norm": 0.7288370728492737, - "learning_rate": 1.2299382972536842e-05, - "loss": 0.3388, - "step": 9291 - }, - { - "epoch": 0.875344433715645, - "grad_norm": 0.6647352576255798, - "learning_rate": 1.2297913388848208e-05, - "loss": 0.3287, - "step": 9292 - }, - { - "epoch": 0.8754386378088128, - "grad_norm": 0.8051317930221558, - "learning_rate": 1.2296443752763597e-05, - "loss": 0.3185, - "step": 9293 - }, - { - "epoch": 0.8755328419019807, - "grad_norm": 0.8401761651039124, - "learning_rate": 1.2294974064316513e-05, - "loss": 0.3296, - "step": 9294 - }, - { - "epoch": 0.8756270459951485, - "grad_norm": 0.8169759511947632, - "learning_rate": 1.2293504323540473e-05, - "loss": 0.3165, - "step": 9295 - }, - { - "epoch": 0.8757212500883164, - "grad_norm": 0.7035739421844482, - "learning_rate": 1.2292034530468986e-05, - "loss": 0.2821, - "step": 9296 - }, - { - "epoch": 0.8758154541814842, - "grad_norm": 0.6984587907791138, - "learning_rate": 1.2290564685135566e-05, - "loss": 0.2905, - "step": 9297 - }, - { - "epoch": 0.8759096582746521, - "grad_norm": 0.7788722515106201, - "learning_rate": 1.2289094787573732e-05, - "loss": 0.3047, - "step": 9298 - }, - { - "epoch": 0.8760038623678199, - "grad_norm": 0.6994571685791016, - "learning_rate": 1.2287624837816993e-05, - "loss": 0.3134, - "step": 9299 - }, - { - "epoch": 0.8760980664609878, - "grad_norm": 0.9961441159248352, - "learning_rate": 1.2286154835898876e-05, - "loss": 0.3159, - "step": 9300 - }, - { - "epoch": 0.8761922705541556, - "grad_norm": 0.8256969451904297, - "learning_rate": 1.2284684781852887e-05, - "loss": 0.3122, - "step": 9301 - }, - { - "epoch": 0.8762864746473235, - "grad_norm": 0.8606594800949097, - "learning_rate": 1.228321467571255e-05, - "loss": 0.3509, - "step": 9302 - }, - { - "epoch": 0.8763806787404913, - "grad_norm": 0.7667705416679382, - "learning_rate": 1.2281744517511396e-05, - "loss": 0.338, - "step": 9303 - }, - { - "epoch": 0.8764748828336592, - "grad_norm": 0.6364054083824158, - "learning_rate": 1.2280274307282932e-05, - "loss": 0.2827, - "step": 9304 - }, - { - "epoch": 0.876569086926827, - "grad_norm": 0.8485642671585083, - "learning_rate": 1.2278804045060688e-05, - "loss": 0.2519, - "step": 9305 - }, - { - "epoch": 0.8766632910199949, - "grad_norm": 0.8302154541015625, - "learning_rate": 1.227733373087819e-05, - "loss": 0.3256, - "step": 9306 - }, - { - "epoch": 0.8767574951131627, - "grad_norm": 0.8487189412117004, - "learning_rate": 1.2275863364768956e-05, - "loss": 0.2902, - "step": 9307 - }, - { - "epoch": 0.8768516992063305, - "grad_norm": 0.7642054557800293, - "learning_rate": 1.2274392946766522e-05, - "loss": 0.3033, - "step": 9308 - }, - { - "epoch": 0.8769459032994984, - "grad_norm": 0.6982212066650391, - "learning_rate": 1.2272922476904409e-05, - "loss": 0.3033, - "step": 9309 - }, - { - "epoch": 0.8770401073926662, - "grad_norm": 0.6717838644981384, - "learning_rate": 1.2271451955216151e-05, - "loss": 0.2991, - "step": 9310 - }, - { - "epoch": 0.8771343114858341, - "grad_norm": 0.7980400919914246, - "learning_rate": 1.2269981381735272e-05, - "loss": 0.2955, - "step": 9311 - }, - { - "epoch": 0.8772285155790019, - "grad_norm": 0.7617857456207275, - "learning_rate": 1.2268510756495312e-05, - "loss": 0.2952, - "step": 9312 - }, - { - "epoch": 0.8773227196721698, - "grad_norm": 0.9512116312980652, - "learning_rate": 1.2267040079529794e-05, - "loss": 0.3152, - "step": 9313 - }, - { - "epoch": 0.8774169237653376, - "grad_norm": 0.7181690335273743, - "learning_rate": 1.2265569350872257e-05, - "loss": 0.2613, - "step": 9314 - }, - { - "epoch": 0.8775111278585055, - "grad_norm": 0.8266440629959106, - "learning_rate": 1.2264098570556238e-05, - "loss": 0.2696, - "step": 9315 - }, - { - "epoch": 0.8776053319516733, - "grad_norm": 0.7346266508102417, - "learning_rate": 1.2262627738615265e-05, - "loss": 0.2991, - "step": 9316 - }, - { - "epoch": 0.8776995360448412, - "grad_norm": 0.8172141909599304, - "learning_rate": 1.2261156855082882e-05, - "loss": 0.3409, - "step": 9317 - }, - { - "epoch": 0.877793740138009, - "grad_norm": 0.6860228776931763, - "learning_rate": 1.225968591999263e-05, - "loss": 0.2855, - "step": 9318 - }, - { - "epoch": 0.8778879442311769, - "grad_norm": 0.6955971121788025, - "learning_rate": 1.2258214933378038e-05, - "loss": 0.3146, - "step": 9319 - }, - { - "epoch": 0.8779821483243447, - "grad_norm": 0.7971096038818359, - "learning_rate": 1.225674389527266e-05, - "loss": 0.3222, - "step": 9320 - }, - { - "epoch": 0.8780763524175126, - "grad_norm": 0.7157099843025208, - "learning_rate": 1.2255272805710026e-05, - "loss": 0.3392, - "step": 9321 - }, - { - "epoch": 0.8781705565106804, - "grad_norm": 0.6945052146911621, - "learning_rate": 1.2253801664723683e-05, - "loss": 0.3118, - "step": 9322 - }, - { - "epoch": 0.8782647606038483, - "grad_norm": 0.7462865114212036, - "learning_rate": 1.2252330472347183e-05, - "loss": 0.3189, - "step": 9323 - }, - { - "epoch": 0.8783589646970161, - "grad_norm": 0.7465896010398865, - "learning_rate": 1.2250859228614058e-05, - "loss": 0.283, - "step": 9324 - }, - { - "epoch": 0.878453168790184, - "grad_norm": 0.6610503792762756, - "learning_rate": 1.2249387933557864e-05, - "loss": 0.2933, - "step": 9325 - }, - { - "epoch": 0.8785473728833518, - "grad_norm": 0.6051380634307861, - "learning_rate": 1.2247916587212147e-05, - "loss": 0.2686, - "step": 9326 - }, - { - "epoch": 0.8786415769765197, - "grad_norm": 0.8131787180900574, - "learning_rate": 1.2246445189610455e-05, - "loss": 0.2976, - "step": 9327 - }, - { - "epoch": 0.8787357810696875, - "grad_norm": 0.6534878015518188, - "learning_rate": 1.2244973740786338e-05, - "loss": 0.2913, - "step": 9328 - }, - { - "epoch": 0.8788299851628554, - "grad_norm": 0.6280990242958069, - "learning_rate": 1.2243502240773348e-05, - "loss": 0.2646, - "step": 9329 - }, - { - "epoch": 0.8789241892560232, - "grad_norm": 0.6751523613929749, - "learning_rate": 1.224203068960504e-05, - "loss": 0.3356, - "step": 9330 - }, - { - "epoch": 0.8790183933491911, - "grad_norm": 0.8297321796417236, - "learning_rate": 1.224055908731496e-05, - "loss": 0.2952, - "step": 9331 - }, - { - "epoch": 0.8791125974423589, - "grad_norm": 0.6817976832389832, - "learning_rate": 1.2239087433936672e-05, - "loss": 0.2778, - "step": 9332 - }, - { - "epoch": 0.8792068015355268, - "grad_norm": 0.7363234162330627, - "learning_rate": 1.2237615729503726e-05, - "loss": 0.3439, - "step": 9333 - }, - { - "epoch": 0.8793010056286946, - "grad_norm": 0.7076244354248047, - "learning_rate": 1.2236143974049682e-05, - "loss": 0.2871, - "step": 9334 - }, - { - "epoch": 0.8793952097218625, - "grad_norm": 0.7310746312141418, - "learning_rate": 1.2234672167608095e-05, - "loss": 0.2654, - "step": 9335 - }, - { - "epoch": 0.8794894138150302, - "grad_norm": 0.6136433482170105, - "learning_rate": 1.2233200310212527e-05, - "loss": 0.263, - "step": 9336 - }, - { - "epoch": 0.8795836179081981, - "grad_norm": 0.8622340559959412, - "learning_rate": 1.2231728401896539e-05, - "loss": 0.2987, - "step": 9337 - }, - { - "epoch": 0.8796778220013659, - "grad_norm": 1.2962377071380615, - "learning_rate": 1.2230256442693693e-05, - "loss": 0.3027, - "step": 9338 - }, - { - "epoch": 0.8797720260945338, - "grad_norm": 1.405983567237854, - "learning_rate": 1.2228784432637548e-05, - "loss": 0.318, - "step": 9339 - }, - { - "epoch": 0.8798662301877016, - "grad_norm": 0.692185640335083, - "learning_rate": 1.2227312371761675e-05, - "loss": 0.2841, - "step": 9340 - }, - { - "epoch": 0.8799604342808695, - "grad_norm": 0.740404486656189, - "learning_rate": 1.222584026009963e-05, - "loss": 0.3323, - "step": 9341 - }, - { - "epoch": 0.8800546383740373, - "grad_norm": 0.7664980888366699, - "learning_rate": 1.2224368097684986e-05, - "loss": 0.3248, - "step": 9342 - }, - { - "epoch": 0.8801488424672052, - "grad_norm": 0.6764711141586304, - "learning_rate": 1.2222895884551315e-05, - "loss": 0.266, - "step": 9343 - }, - { - "epoch": 0.880243046560373, - "grad_norm": 0.6800090670585632, - "learning_rate": 1.2221423620732175e-05, - "loss": 0.298, - "step": 9344 - }, - { - "epoch": 0.8803372506535408, - "grad_norm": 0.695341169834137, - "learning_rate": 1.2219951306261141e-05, - "loss": 0.3057, - "step": 9345 - }, - { - "epoch": 0.8804314547467087, - "grad_norm": 0.7294071316719055, - "learning_rate": 1.2218478941171787e-05, - "loss": 0.2919, - "step": 9346 - }, - { - "epoch": 0.8805256588398765, - "grad_norm": 0.8023799061775208, - "learning_rate": 1.2217006525497678e-05, - "loss": 0.3484, - "step": 9347 - }, - { - "epoch": 0.8806198629330444, - "grad_norm": 0.8050774931907654, - "learning_rate": 1.2215534059272396e-05, - "loss": 0.2917, - "step": 9348 - }, - { - "epoch": 0.8807140670262122, - "grad_norm": 0.6966261267662048, - "learning_rate": 1.221406154252951e-05, - "loss": 0.3082, - "step": 9349 - }, - { - "epoch": 0.8808082711193801, - "grad_norm": 0.7030807733535767, - "learning_rate": 1.2212588975302595e-05, - "loss": 0.2577, - "step": 9350 - }, - { - "epoch": 0.8809024752125479, - "grad_norm": 0.7408086657524109, - "learning_rate": 1.2211116357625228e-05, - "loss": 0.2876, - "step": 9351 - }, - { - "epoch": 0.8809966793057158, - "grad_norm": 0.9203299880027771, - "learning_rate": 1.2209643689530993e-05, - "loss": 0.324, - "step": 9352 - }, - { - "epoch": 0.8810908833988836, - "grad_norm": 0.6748857498168945, - "learning_rate": 1.220817097105346e-05, - "loss": 0.3035, - "step": 9353 - }, - { - "epoch": 0.8811850874920515, - "grad_norm": 0.655755341053009, - "learning_rate": 1.220669820222622e-05, - "loss": 0.2817, - "step": 9354 - }, - { - "epoch": 0.8812792915852193, - "grad_norm": 0.6666610836982727, - "learning_rate": 1.2205225383082844e-05, - "loss": 0.2832, - "step": 9355 - }, - { - "epoch": 0.8813734956783872, - "grad_norm": 0.7367080450057983, - "learning_rate": 1.2203752513656917e-05, - "loss": 0.296, - "step": 9356 - }, - { - "epoch": 0.881467699771555, - "grad_norm": 0.7991181015968323, - "learning_rate": 1.2202279593982026e-05, - "loss": 0.3193, - "step": 9357 - }, - { - "epoch": 0.8815619038647229, - "grad_norm": 0.816676914691925, - "learning_rate": 1.2200806624091756e-05, - "loss": 0.2916, - "step": 9358 - }, - { - "epoch": 0.8816561079578907, - "grad_norm": 0.656477689743042, - "learning_rate": 1.219933360401969e-05, - "loss": 0.2861, - "step": 9359 - }, - { - "epoch": 0.8817503120510586, - "grad_norm": 0.7736837267875671, - "learning_rate": 1.2197860533799419e-05, - "loss": 0.3243, - "step": 9360 - }, - { - "epoch": 0.8818445161442264, - "grad_norm": 0.60536128282547, - "learning_rate": 1.2196387413464525e-05, - "loss": 0.286, - "step": 9361 - }, - { - "epoch": 0.8819387202373943, - "grad_norm": 0.7745570540428162, - "learning_rate": 1.2194914243048602e-05, - "loss": 0.3209, - "step": 9362 - }, - { - "epoch": 0.8820329243305621, - "grad_norm": 0.69176185131073, - "learning_rate": 1.2193441022585244e-05, - "loss": 0.2918, - "step": 9363 - }, - { - "epoch": 0.88212712842373, - "grad_norm": 0.8644979000091553, - "learning_rate": 1.219196775210803e-05, - "loss": 0.3146, - "step": 9364 - }, - { - "epoch": 0.8822213325168978, - "grad_norm": 0.657992959022522, - "learning_rate": 1.219049443165057e-05, - "loss": 0.3122, - "step": 9365 - }, - { - "epoch": 0.8823155366100657, - "grad_norm": 0.7901890277862549, - "learning_rate": 1.2189021061246447e-05, - "loss": 0.3294, - "step": 9366 - }, - { - "epoch": 0.8824097407032335, - "grad_norm": 0.7417951822280884, - "learning_rate": 1.2187547640929253e-05, - "loss": 0.2868, - "step": 9367 - }, - { - "epoch": 0.8825039447964014, - "grad_norm": 0.7628073692321777, - "learning_rate": 1.2186074170732596e-05, - "loss": 0.3154, - "step": 9368 - }, - { - "epoch": 0.8825981488895692, - "grad_norm": 0.8040744066238403, - "learning_rate": 1.2184600650690067e-05, - "loss": 0.3613, - "step": 9369 - }, - { - "epoch": 0.8826923529827371, - "grad_norm": 0.7684886455535889, - "learning_rate": 1.2183127080835262e-05, - "loss": 0.3367, - "step": 9370 - }, - { - "epoch": 0.8827865570759049, - "grad_norm": 1.0408079624176025, - "learning_rate": 1.2181653461201783e-05, - "loss": 0.3462, - "step": 9371 - }, - { - "epoch": 0.8828807611690728, - "grad_norm": 0.643016517162323, - "learning_rate": 1.218017979182323e-05, - "loss": 0.2872, - "step": 9372 - }, - { - "epoch": 0.8829749652622406, - "grad_norm": 0.7180635929107666, - "learning_rate": 1.2178706072733209e-05, - "loss": 0.311, - "step": 9373 - }, - { - "epoch": 0.8830691693554085, - "grad_norm": 1.2569615840911865, - "learning_rate": 1.217723230396532e-05, - "loss": 0.2956, - "step": 9374 - }, - { - "epoch": 0.8831633734485763, - "grad_norm": 1.8318754434585571, - "learning_rate": 1.2175758485553166e-05, - "loss": 0.2862, - "step": 9375 - }, - { - "epoch": 0.8832575775417442, - "grad_norm": 0.8067878484725952, - "learning_rate": 1.2174284617530354e-05, - "loss": 0.28, - "step": 9376 - }, - { - "epoch": 0.883351781634912, - "grad_norm": 0.652657687664032, - "learning_rate": 1.217281069993049e-05, - "loss": 0.3104, - "step": 9377 - }, - { - "epoch": 0.8834459857280799, - "grad_norm": 0.7842876315116882, - "learning_rate": 1.2171336732787183e-05, - "loss": 0.3545, - "step": 9378 - }, - { - "epoch": 0.8835401898212477, - "grad_norm": 0.8068026304244995, - "learning_rate": 1.2169862716134037e-05, - "loss": 0.2847, - "step": 9379 - }, - { - "epoch": 0.8836343939144156, - "grad_norm": 1.1152421236038208, - "learning_rate": 1.216838865000467e-05, - "loss": 0.3525, - "step": 9380 - }, - { - "epoch": 0.8837285980075834, - "grad_norm": 0.7238123416900635, - "learning_rate": 1.2166914534432686e-05, - "loss": 0.2845, - "step": 9381 - }, - { - "epoch": 0.8838228021007513, - "grad_norm": 0.7315871715545654, - "learning_rate": 1.2165440369451695e-05, - "loss": 0.2881, - "step": 9382 - }, - { - "epoch": 0.8839170061939191, - "grad_norm": 0.7550876140594482, - "learning_rate": 1.2163966155095323e-05, - "loss": 0.3484, - "step": 9383 - }, - { - "epoch": 0.884011210287087, - "grad_norm": 0.6491653919219971, - "learning_rate": 1.216249189139717e-05, - "loss": 0.3005, - "step": 9384 - }, - { - "epoch": 0.8841054143802548, - "grad_norm": 0.7400387525558472, - "learning_rate": 1.2161017578390862e-05, - "loss": 0.2821, - "step": 9385 - }, - { - "epoch": 0.8841996184734227, - "grad_norm": 0.7085525393486023, - "learning_rate": 1.2159543216110008e-05, - "loss": 0.2992, - "step": 9386 - }, - { - "epoch": 0.8842938225665905, - "grad_norm": 0.7039757370948792, - "learning_rate": 1.2158068804588228e-05, - "loss": 0.2984, - "step": 9387 - }, - { - "epoch": 0.8843880266597584, - "grad_norm": 0.7835156917572021, - "learning_rate": 1.2156594343859146e-05, - "loss": 0.3119, - "step": 9388 - }, - { - "epoch": 0.8844822307529262, - "grad_norm": 0.6807800531387329, - "learning_rate": 1.2155119833956373e-05, - "loss": 0.2852, - "step": 9389 - }, - { - "epoch": 0.884576434846094, - "grad_norm": 0.6613379120826721, - "learning_rate": 1.2153645274913537e-05, - "loss": 0.3071, - "step": 9390 - }, - { - "epoch": 0.8846706389392619, - "grad_norm": 0.8164822459220886, - "learning_rate": 1.2152170666764258e-05, - "loss": 0.3069, - "step": 9391 - }, - { - "epoch": 0.8847648430324297, - "grad_norm": 1.0489635467529297, - "learning_rate": 1.215069600954216e-05, - "loss": 0.2741, - "step": 9392 - }, - { - "epoch": 0.8848590471255976, - "grad_norm": 0.6539410948753357, - "learning_rate": 1.2149221303280865e-05, - "loss": 0.3041, - "step": 9393 - }, - { - "epoch": 0.8849532512187654, - "grad_norm": 0.6886880397796631, - "learning_rate": 1.2147746548014003e-05, - "loss": 0.2711, - "step": 9394 - }, - { - "epoch": 0.8850474553119333, - "grad_norm": 0.7714412808418274, - "learning_rate": 1.2146271743775198e-05, - "loss": 0.2941, - "step": 9395 - }, - { - "epoch": 0.8851416594051011, - "grad_norm": 0.7667918801307678, - "learning_rate": 1.2144796890598074e-05, - "loss": 0.2733, - "step": 9396 - }, - { - "epoch": 0.885235863498269, - "grad_norm": 0.9158183932304382, - "learning_rate": 1.2143321988516267e-05, - "loss": 0.3071, - "step": 9397 - }, - { - "epoch": 0.8853300675914368, - "grad_norm": 0.7339330315589905, - "learning_rate": 1.2141847037563405e-05, - "loss": 0.2645, - "step": 9398 - }, - { - "epoch": 0.8854242716846047, - "grad_norm": 1.1290115118026733, - "learning_rate": 1.2140372037773114e-05, - "loss": 0.3165, - "step": 9399 - }, - { - "epoch": 0.8855184757777725, - "grad_norm": 0.7589887380599976, - "learning_rate": 1.2138896989179037e-05, - "loss": 0.3021, - "step": 9400 - }, - { - "epoch": 0.8856126798709404, - "grad_norm": 0.8948603272438049, - "learning_rate": 1.2137421891814796e-05, - "loss": 0.3435, - "step": 9401 - }, - { - "epoch": 0.8857068839641082, - "grad_norm": 0.9510778188705444, - "learning_rate": 1.2135946745714028e-05, - "loss": 0.3555, - "step": 9402 - }, - { - "epoch": 0.8858010880572761, - "grad_norm": 0.8120291233062744, - "learning_rate": 1.2134471550910379e-05, - "loss": 0.3195, - "step": 9403 - }, - { - "epoch": 0.8858952921504439, - "grad_norm": 0.7990771532058716, - "learning_rate": 1.213299630743747e-05, - "loss": 0.2977, - "step": 9404 - }, - { - "epoch": 0.8859894962436118, - "grad_norm": 0.7180115580558777, - "learning_rate": 1.2131521015328948e-05, - "loss": 0.3232, - "step": 9405 - }, - { - "epoch": 0.8860837003367796, - "grad_norm": 0.7164108157157898, - "learning_rate": 1.2130045674618453e-05, - "loss": 0.284, - "step": 9406 - }, - { - "epoch": 0.8861779044299475, - "grad_norm": 0.7136685848236084, - "learning_rate": 1.212857028533962e-05, - "loss": 0.3216, - "step": 9407 - }, - { - "epoch": 0.8862721085231153, - "grad_norm": 0.7712353467941284, - "learning_rate": 1.2127094847526093e-05, - "loss": 0.3209, - "step": 9408 - }, - { - "epoch": 0.8863663126162832, - "grad_norm": 0.7543731331825256, - "learning_rate": 1.2125619361211517e-05, - "loss": 0.318, - "step": 9409 - }, - { - "epoch": 0.886460516709451, - "grad_norm": 0.6535468101501465, - "learning_rate": 1.2124143826429529e-05, - "loss": 0.3081, - "step": 9410 - }, - { - "epoch": 0.8865547208026189, - "grad_norm": 0.7212768197059631, - "learning_rate": 1.2122668243213779e-05, - "loss": 0.2945, - "step": 9411 - }, - { - "epoch": 0.8866489248957867, - "grad_norm": 0.9539259076118469, - "learning_rate": 1.2121192611597905e-05, - "loss": 0.3176, - "step": 9412 - }, - { - "epoch": 0.8867431289889546, - "grad_norm": 0.707371175289154, - "learning_rate": 1.2119716931615564e-05, - "loss": 0.2729, - "step": 9413 - }, - { - "epoch": 0.8868373330821224, - "grad_norm": 0.7178236842155457, - "learning_rate": 1.2118241203300398e-05, - "loss": 0.2651, - "step": 9414 - }, - { - "epoch": 0.8869315371752903, - "grad_norm": 0.7159601449966431, - "learning_rate": 1.2116765426686057e-05, - "loss": 0.2959, - "step": 9415 - }, - { - "epoch": 0.8870257412684581, - "grad_norm": 0.7148156762123108, - "learning_rate": 1.2115289601806186e-05, - "loss": 0.273, - "step": 9416 - }, - { - "epoch": 0.887119945361626, - "grad_norm": 0.6918371319770813, - "learning_rate": 1.2113813728694447e-05, - "loss": 0.2962, - "step": 9417 - }, - { - "epoch": 0.8872141494547938, - "grad_norm": 0.6912872791290283, - "learning_rate": 1.2112337807384482e-05, - "loss": 0.293, - "step": 9418 - }, - { - "epoch": 0.8873083535479617, - "grad_norm": 0.8349743485450745, - "learning_rate": 1.2110861837909948e-05, - "loss": 0.2838, - "step": 9419 - }, - { - "epoch": 0.8874025576411295, - "grad_norm": 0.6753384470939636, - "learning_rate": 1.2109385820304504e-05, - "loss": 0.2874, - "step": 9420 - }, - { - "epoch": 0.8874967617342974, - "grad_norm": 0.7217723727226257, - "learning_rate": 1.2107909754601796e-05, - "loss": 0.3223, - "step": 9421 - }, - { - "epoch": 0.8875909658274652, - "grad_norm": 0.8470667004585266, - "learning_rate": 1.2106433640835487e-05, - "loss": 0.3289, - "step": 9422 - }, - { - "epoch": 0.8876851699206331, - "grad_norm": 0.7165049314498901, - "learning_rate": 1.2104957479039237e-05, - "loss": 0.3103, - "step": 9423 - }, - { - "epoch": 0.8877793740138009, - "grad_norm": 0.7034558057785034, - "learning_rate": 1.2103481269246697e-05, - "loss": 0.3056, - "step": 9424 - }, - { - "epoch": 0.8878735781069688, - "grad_norm": 0.8210559487342834, - "learning_rate": 1.2102005011491534e-05, - "loss": 0.3153, - "step": 9425 - }, - { - "epoch": 0.8879677822001366, - "grad_norm": 0.6928260922431946, - "learning_rate": 1.2100528705807402e-05, - "loss": 0.2682, - "step": 9426 - }, - { - "epoch": 0.8880619862933045, - "grad_norm": 0.8357961177825928, - "learning_rate": 1.209905235222797e-05, - "loss": 0.3144, - "step": 9427 - }, - { - "epoch": 0.8881561903864723, - "grad_norm": 0.6881080865859985, - "learning_rate": 1.2097575950786898e-05, - "loss": 0.289, - "step": 9428 - }, - { - "epoch": 0.8882503944796402, - "grad_norm": 0.7533964514732361, - "learning_rate": 1.2096099501517849e-05, - "loss": 0.3189, - "step": 9429 - }, - { - "epoch": 0.888344598572808, - "grad_norm": 0.8324847221374512, - "learning_rate": 1.209462300445449e-05, - "loss": 0.3248, - "step": 9430 - }, - { - "epoch": 0.8884388026659759, - "grad_norm": 0.7079195380210876, - "learning_rate": 1.2093146459630488e-05, - "loss": 0.2863, - "step": 9431 - }, - { - "epoch": 0.8885330067591437, - "grad_norm": 0.7742490172386169, - "learning_rate": 1.2091669867079507e-05, - "loss": 0.3061, - "step": 9432 - }, - { - "epoch": 0.8886272108523116, - "grad_norm": 0.721615195274353, - "learning_rate": 1.209019322683522e-05, - "loss": 0.3128, - "step": 9433 - }, - { - "epoch": 0.8887214149454794, - "grad_norm": 0.7562946081161499, - "learning_rate": 1.2088716538931296e-05, - "loss": 0.2989, - "step": 9434 - }, - { - "epoch": 0.8888156190386473, - "grad_norm": 0.6650596261024475, - "learning_rate": 1.2087239803401404e-05, - "loss": 0.2734, - "step": 9435 - }, - { - "epoch": 0.8889098231318151, - "grad_norm": 0.6545528173446655, - "learning_rate": 1.2085763020279215e-05, - "loss": 0.2617, - "step": 9436 - }, - { - "epoch": 0.889004027224983, - "grad_norm": 0.752198338508606, - "learning_rate": 1.2084286189598404e-05, - "loss": 0.3128, - "step": 9437 - }, - { - "epoch": 0.8890982313181508, - "grad_norm": 0.6593581438064575, - "learning_rate": 1.2082809311392647e-05, - "loss": 0.2906, - "step": 9438 - }, - { - "epoch": 0.8891924354113186, - "grad_norm": 0.7499755620956421, - "learning_rate": 1.2081332385695612e-05, - "loss": 0.3219, - "step": 9439 - }, - { - "epoch": 0.8892866395044865, - "grad_norm": 0.6921376585960388, - "learning_rate": 1.2079855412540986e-05, - "loss": 0.3054, - "step": 9440 - }, - { - "epoch": 0.8893808435976543, - "grad_norm": 0.8174970149993896, - "learning_rate": 1.2078378391962436e-05, - "loss": 0.3361, - "step": 9441 - }, - { - "epoch": 0.8894750476908222, - "grad_norm": 0.831191897392273, - "learning_rate": 1.2076901323993644e-05, - "loss": 0.318, - "step": 9442 - }, - { - "epoch": 0.88956925178399, - "grad_norm": 0.764427125453949, - "learning_rate": 1.2075424208668291e-05, - "loss": 0.2995, - "step": 9443 - }, - { - "epoch": 0.8896634558771579, - "grad_norm": 0.7526426315307617, - "learning_rate": 1.2073947046020056e-05, - "loss": 0.3039, - "step": 9444 - }, - { - "epoch": 0.8897576599703257, - "grad_norm": 0.8434804677963257, - "learning_rate": 1.207246983608262e-05, - "loss": 0.3452, - "step": 9445 - }, - { - "epoch": 0.8898518640634936, - "grad_norm": 0.7737138867378235, - "learning_rate": 1.2070992578889668e-05, - "loss": 0.3529, - "step": 9446 - }, - { - "epoch": 0.8899460681566614, - "grad_norm": 0.7623361945152283, - "learning_rate": 1.2069515274474882e-05, - "loss": 0.3127, - "step": 9447 - }, - { - "epoch": 0.8900402722498293, - "grad_norm": 0.9720628261566162, - "learning_rate": 1.2068037922871947e-05, - "loss": 0.3202, - "step": 9448 - }, - { - "epoch": 0.8901344763429971, - "grad_norm": 0.7618057131767273, - "learning_rate": 1.206656052411455e-05, - "loss": 0.3202, - "step": 9449 - }, - { - "epoch": 0.890228680436165, - "grad_norm": 0.739187479019165, - "learning_rate": 1.2065083078236375e-05, - "loss": 0.3048, - "step": 9450 - }, - { - "epoch": 0.8903228845293328, - "grad_norm": 0.7619025111198425, - "learning_rate": 1.2063605585271114e-05, - "loss": 0.301, - "step": 9451 - }, - { - "epoch": 0.8904170886225007, - "grad_norm": 0.7123521566390991, - "learning_rate": 1.2062128045252453e-05, - "loss": 0.2904, - "step": 9452 - }, - { - "epoch": 0.8905112927156685, - "grad_norm": 0.8152437806129456, - "learning_rate": 1.2060650458214085e-05, - "loss": 0.2973, - "step": 9453 - }, - { - "epoch": 0.8906054968088364, - "grad_norm": 0.6467706561088562, - "learning_rate": 1.2059172824189698e-05, - "loss": 0.2951, - "step": 9454 - }, - { - "epoch": 0.8906997009020042, - "grad_norm": 0.7300913333892822, - "learning_rate": 1.2057695143212986e-05, - "loss": 0.3186, - "step": 9455 - }, - { - "epoch": 0.8907939049951721, - "grad_norm": 0.6898703575134277, - "learning_rate": 1.2056217415317643e-05, - "loss": 0.3181, - "step": 9456 - }, - { - "epoch": 0.8908881090883399, - "grad_norm": 0.8182839155197144, - "learning_rate": 1.2054739640537363e-05, - "loss": 0.3176, - "step": 9457 - }, - { - "epoch": 0.8909823131815078, - "grad_norm": 0.8536497354507446, - "learning_rate": 1.2053261818905843e-05, - "loss": 0.3706, - "step": 9458 - }, - { - "epoch": 0.8910765172746756, - "grad_norm": 0.7779721617698669, - "learning_rate": 1.2051783950456775e-05, - "loss": 0.3076, - "step": 9459 - }, - { - "epoch": 0.8911707213678435, - "grad_norm": 0.6434833407402039, - "learning_rate": 1.2050306035223864e-05, - "loss": 0.2862, - "step": 9460 - }, - { - "epoch": 0.8912649254610113, - "grad_norm": 0.662363588809967, - "learning_rate": 1.20488280732408e-05, - "loss": 0.2584, - "step": 9461 - }, - { - "epoch": 0.8913591295541792, - "grad_norm": 0.8131331205368042, - "learning_rate": 1.204735006454129e-05, - "loss": 0.3348, - "step": 9462 - }, - { - "epoch": 0.891453333647347, - "grad_norm": 0.7505772113800049, - "learning_rate": 1.2045872009159033e-05, - "loss": 0.2982, - "step": 9463 - }, - { - "epoch": 0.8915475377405149, - "grad_norm": 0.7602881789207458, - "learning_rate": 1.2044393907127728e-05, - "loss": 0.3084, - "step": 9464 - }, - { - "epoch": 0.8916417418336827, - "grad_norm": 0.7289739847183228, - "learning_rate": 1.204291575848108e-05, - "loss": 0.2774, - "step": 9465 - }, - { - "epoch": 0.8917359459268506, - "grad_norm": 0.683515727519989, - "learning_rate": 1.2041437563252794e-05, - "loss": 0.2551, - "step": 9466 - }, - { - "epoch": 0.8918301500200184, - "grad_norm": 0.7214856147766113, - "learning_rate": 1.2039959321476574e-05, - "loss": 0.3255, - "step": 9467 - }, - { - "epoch": 0.8919243541131863, - "grad_norm": 0.8039005994796753, - "learning_rate": 1.2038481033186127e-05, - "loss": 0.3097, - "step": 9468 - }, - { - "epoch": 0.8920185582063541, - "grad_norm": 0.7016351222991943, - "learning_rate": 1.2037002698415161e-05, - "loss": 0.3161, - "step": 9469 - }, - { - "epoch": 0.892112762299522, - "grad_norm": 0.8027306199073792, - "learning_rate": 1.2035524317197382e-05, - "loss": 0.3336, - "step": 9470 - }, - { - "epoch": 0.8922069663926898, - "grad_norm": 0.7177056670188904, - "learning_rate": 1.2034045889566502e-05, - "loss": 0.2785, - "step": 9471 - }, - { - "epoch": 0.8923011704858577, - "grad_norm": 0.879801869392395, - "learning_rate": 1.2032567415556226e-05, - "loss": 0.3042, - "step": 9472 - }, - { - "epoch": 0.8923953745790255, - "grad_norm": 0.8894309401512146, - "learning_rate": 1.2031088895200273e-05, - "loss": 0.3238, - "step": 9473 - }, - { - "epoch": 0.8924895786721934, - "grad_norm": 0.8481342196464539, - "learning_rate": 1.2029610328532354e-05, - "loss": 0.2825, - "step": 9474 - }, - { - "epoch": 0.8925837827653611, - "grad_norm": 0.9710609912872314, - "learning_rate": 1.2028131715586177e-05, - "loss": 0.3322, - "step": 9475 - }, - { - "epoch": 0.892677986858529, - "grad_norm": 0.7773035764694214, - "learning_rate": 1.2026653056395461e-05, - "loss": 0.3022, - "step": 9476 - }, - { - "epoch": 0.8927721909516968, - "grad_norm": 0.7003985643386841, - "learning_rate": 1.2025174350993923e-05, - "loss": 0.2831, - "step": 9477 - }, - { - "epoch": 0.8928663950448646, - "grad_norm": 0.6839850544929504, - "learning_rate": 1.2023695599415275e-05, - "loss": 0.2878, - "step": 9478 - }, - { - "epoch": 0.8929605991380325, - "grad_norm": 1.114830732345581, - "learning_rate": 1.2022216801693239e-05, - "loss": 0.3468, - "step": 9479 - }, - { - "epoch": 0.8930548032312003, - "grad_norm": 0.7672768235206604, - "learning_rate": 1.2020737957861534e-05, - "loss": 0.3178, - "step": 9480 - }, - { - "epoch": 0.8931490073243682, - "grad_norm": 0.888733983039856, - "learning_rate": 1.2019259067953875e-05, - "loss": 0.3392, - "step": 9481 - }, - { - "epoch": 0.893243211417536, - "grad_norm": 0.6489064693450928, - "learning_rate": 1.2017780132003989e-05, - "loss": 0.292, - "step": 9482 - }, - { - "epoch": 0.8933374155107039, - "grad_norm": 0.8030021786689758, - "learning_rate": 1.2016301150045595e-05, - "loss": 0.2649, - "step": 9483 - }, - { - "epoch": 0.8934316196038717, - "grad_norm": 0.7990246415138245, - "learning_rate": 1.2014822122112416e-05, - "loss": 0.2964, - "step": 9484 - }, - { - "epoch": 0.8935258236970396, - "grad_norm": 1.6609044075012207, - "learning_rate": 1.2013343048238176e-05, - "loss": 0.3307, - "step": 9485 - }, - { - "epoch": 0.8936200277902074, - "grad_norm": 0.7960792779922485, - "learning_rate": 1.2011863928456601e-05, - "loss": 0.28, - "step": 9486 - }, - { - "epoch": 0.8937142318833753, - "grad_norm": 0.7090703845024109, - "learning_rate": 1.2010384762801417e-05, - "loss": 0.2901, - "step": 9487 - }, - { - "epoch": 0.8938084359765431, - "grad_norm": 0.7948774099349976, - "learning_rate": 1.2008905551306356e-05, - "loss": 0.3213, - "step": 9488 - }, - { - "epoch": 0.893902640069711, - "grad_norm": 0.796562135219574, - "learning_rate": 1.2007426294005135e-05, - "loss": 0.2879, - "step": 9489 - }, - { - "epoch": 0.8939968441628788, - "grad_norm": 1.003172516822815, - "learning_rate": 1.2005946990931492e-05, - "loss": 0.304, - "step": 9490 - }, - { - "epoch": 0.8940910482560467, - "grad_norm": 0.7436158061027527, - "learning_rate": 1.2004467642119158e-05, - "loss": 0.3429, - "step": 9491 - }, - { - "epoch": 0.8941852523492145, - "grad_norm": 0.8177036046981812, - "learning_rate": 1.2002988247601856e-05, - "loss": 0.3, - "step": 9492 - }, - { - "epoch": 0.8942794564423824, - "grad_norm": 0.724937915802002, - "learning_rate": 1.2001508807413329e-05, - "loss": 0.2238, - "step": 9493 - }, - { - "epoch": 0.8943736605355502, - "grad_norm": 0.8515589237213135, - "learning_rate": 1.2000029321587305e-05, - "loss": 0.3237, - "step": 9494 - }, - { - "epoch": 0.8944678646287181, - "grad_norm": 0.7247291207313538, - "learning_rate": 1.199854979015752e-05, - "loss": 0.2979, - "step": 9495 - }, - { - "epoch": 0.8945620687218859, - "grad_norm": 1.1423425674438477, - "learning_rate": 1.1997070213157707e-05, - "loss": 0.2829, - "step": 9496 - }, - { - "epoch": 0.8946562728150538, - "grad_norm": 0.6843664646148682, - "learning_rate": 1.1995590590621607e-05, - "loss": 0.3035, - "step": 9497 - }, - { - "epoch": 0.8947504769082216, - "grad_norm": 3.6836884021759033, - "learning_rate": 1.1994110922582953e-05, - "loss": 0.308, - "step": 9498 - }, - { - "epoch": 0.8948446810013895, - "grad_norm": 0.6240441203117371, - "learning_rate": 1.1992631209075484e-05, - "loss": 0.2731, - "step": 9499 - }, - { - "epoch": 0.8949388850945573, - "grad_norm": 0.7034563422203064, - "learning_rate": 1.199115145013295e-05, - "loss": 0.3324, - "step": 9500 - }, - { - "epoch": 0.8950330891877252, - "grad_norm": 1.343197226524353, - "learning_rate": 1.1989671645789077e-05, - "loss": 0.3416, - "step": 9501 - }, - { - "epoch": 0.895127293280893, - "grad_norm": 0.759353518486023, - "learning_rate": 1.1988191796077615e-05, - "loss": 0.3248, - "step": 9502 - }, - { - "epoch": 0.8952214973740609, - "grad_norm": 0.6849160194396973, - "learning_rate": 1.1986711901032304e-05, - "loss": 0.3282, - "step": 9503 - }, - { - "epoch": 0.8953157014672287, - "grad_norm": 0.6803606152534485, - "learning_rate": 1.198523196068689e-05, - "loss": 0.3005, - "step": 9504 - }, - { - "epoch": 0.8954099055603966, - "grad_norm": 0.7187288999557495, - "learning_rate": 1.1983751975075118e-05, - "loss": 0.2735, - "step": 9505 - }, - { - "epoch": 0.8955041096535644, - "grad_norm": 0.716555655002594, - "learning_rate": 1.198227194423073e-05, - "loss": 0.3009, - "step": 9506 - }, - { - "epoch": 0.8955983137467323, - "grad_norm": 0.7791658639907837, - "learning_rate": 1.1980791868187477e-05, - "loss": 0.3438, - "step": 9507 - }, - { - "epoch": 0.8956925178399001, - "grad_norm": 0.6820909380912781, - "learning_rate": 1.197931174697911e-05, - "loss": 0.3183, - "step": 9508 - }, - { - "epoch": 0.895786721933068, - "grad_norm": 0.7278851866722107, - "learning_rate": 1.197783158063937e-05, - "loss": 0.3451, - "step": 9509 - }, - { - "epoch": 0.8958809260262358, - "grad_norm": 0.9999712705612183, - "learning_rate": 1.1976351369202013e-05, - "loss": 0.312, - "step": 9510 - }, - { - "epoch": 0.8959751301194037, - "grad_norm": 0.8277131915092468, - "learning_rate": 1.1974871112700788e-05, - "loss": 0.2992, - "step": 9511 - }, - { - "epoch": 0.8960693342125715, - "grad_norm": 0.7437383532524109, - "learning_rate": 1.1973390811169447e-05, - "loss": 0.2881, - "step": 9512 - }, - { - "epoch": 0.8961635383057394, - "grad_norm": 0.7816392183303833, - "learning_rate": 1.1971910464641745e-05, - "loss": 0.3304, - "step": 9513 - }, - { - "epoch": 0.8962577423989072, - "grad_norm": 0.7216968536376953, - "learning_rate": 1.1970430073151434e-05, - "loss": 0.3244, - "step": 9514 - }, - { - "epoch": 0.8963519464920751, - "grad_norm": 0.9721806645393372, - "learning_rate": 1.196894963673227e-05, - "loss": 0.2979, - "step": 9515 - }, - { - "epoch": 0.8964461505852429, - "grad_norm": 0.6242602467536926, - "learning_rate": 1.1967469155418005e-05, - "loss": 0.284, - "step": 9516 - }, - { - "epoch": 0.8965403546784108, - "grad_norm": 0.762058675289154, - "learning_rate": 1.1965988629242407e-05, - "loss": 0.3289, - "step": 9517 - }, - { - "epoch": 0.8966345587715786, - "grad_norm": 0.7527956366539001, - "learning_rate": 1.1964508058239226e-05, - "loss": 0.3258, - "step": 9518 - }, - { - "epoch": 0.8967287628647465, - "grad_norm": 0.7868747115135193, - "learning_rate": 1.1963027442442221e-05, - "loss": 0.3116, - "step": 9519 - }, - { - "epoch": 0.8968229669579143, - "grad_norm": 1.0974806547164917, - "learning_rate": 1.1961546781885156e-05, - "loss": 0.3058, - "step": 9520 - }, - { - "epoch": 0.8969171710510822, - "grad_norm": 0.7916780114173889, - "learning_rate": 1.196006607660179e-05, - "loss": 0.291, - "step": 9521 - }, - { - "epoch": 0.89701137514425, - "grad_norm": 0.8335264921188354, - "learning_rate": 1.1958585326625886e-05, - "loss": 0.3464, - "step": 9522 - }, - { - "epoch": 0.8971055792374178, - "grad_norm": 0.6414207220077515, - "learning_rate": 1.195710453199121e-05, - "loss": 0.3112, - "step": 9523 - }, - { - "epoch": 0.8971997833305857, - "grad_norm": 2.4155995845794678, - "learning_rate": 1.195562369273152e-05, - "loss": 0.2633, - "step": 9524 - }, - { - "epoch": 0.8972939874237535, - "grad_norm": 0.6785614490509033, - "learning_rate": 1.1954142808880589e-05, - "loss": 0.3065, - "step": 9525 - }, - { - "epoch": 0.8973881915169214, - "grad_norm": 0.7296954989433289, - "learning_rate": 1.1952661880472178e-05, - "loss": 0.2965, - "step": 9526 - }, - { - "epoch": 0.8974823956100892, - "grad_norm": 0.7682005763053894, - "learning_rate": 1.1951180907540057e-05, - "loss": 0.3251, - "step": 9527 - }, - { - "epoch": 0.8975765997032571, - "grad_norm": 0.7406534552574158, - "learning_rate": 1.1949699890117994e-05, - "loss": 0.2952, - "step": 9528 - }, - { - "epoch": 0.8976708037964249, - "grad_norm": 0.660403847694397, - "learning_rate": 1.1948218828239757e-05, - "loss": 0.3052, - "step": 9529 - }, - { - "epoch": 0.8977650078895928, - "grad_norm": 0.7532671093940735, - "learning_rate": 1.1946737721939118e-05, - "loss": 0.2863, - "step": 9530 - }, - { - "epoch": 0.8978592119827606, - "grad_norm": 0.6479833126068115, - "learning_rate": 1.194525657124985e-05, - "loss": 0.274, - "step": 9531 - }, - { - "epoch": 0.8979534160759285, - "grad_norm": 0.9556598663330078, - "learning_rate": 1.194377537620572e-05, - "loss": 0.346, - "step": 9532 - }, - { - "epoch": 0.8980476201690963, - "grad_norm": 0.6388131976127625, - "learning_rate": 1.1942294136840508e-05, - "loss": 0.2835, - "step": 9533 - }, - { - "epoch": 0.8981418242622642, - "grad_norm": 0.7453655004501343, - "learning_rate": 1.1940812853187987e-05, - "loss": 0.3264, - "step": 9534 - }, - { - "epoch": 0.898236028355432, - "grad_norm": 3.139777898788452, - "learning_rate": 1.193933152528193e-05, - "loss": 0.3175, - "step": 9535 - }, - { - "epoch": 0.8983302324485999, - "grad_norm": 0.7302778959274292, - "learning_rate": 1.1937850153156115e-05, - "loss": 0.3122, - "step": 9536 - }, - { - "epoch": 0.8984244365417677, - "grad_norm": 0.7076626420021057, - "learning_rate": 1.1936368736844319e-05, - "loss": 0.322, - "step": 9537 - }, - { - "epoch": 0.8985186406349356, - "grad_norm": 0.7167888283729553, - "learning_rate": 1.1934887276380323e-05, - "loss": 0.3075, - "step": 9538 - }, - { - "epoch": 0.8986128447281034, - "grad_norm": 0.6941359639167786, - "learning_rate": 1.19334057717979e-05, - "loss": 0.2818, - "step": 9539 - }, - { - "epoch": 0.8987070488212713, - "grad_norm": 0.6347949504852295, - "learning_rate": 1.1931924223130842e-05, - "loss": 0.2934, - "step": 9540 - }, - { - "epoch": 0.8988012529144391, - "grad_norm": 0.7852635979652405, - "learning_rate": 1.193044263041292e-05, - "loss": 0.3103, - "step": 9541 - }, - { - "epoch": 0.898895457007607, - "grad_norm": 0.8677423000335693, - "learning_rate": 1.1928960993677921e-05, - "loss": 0.3197, - "step": 9542 - }, - { - "epoch": 0.8989896611007748, - "grad_norm": 0.7845224142074585, - "learning_rate": 1.1927479312959629e-05, - "loss": 0.3129, - "step": 9543 - }, - { - "epoch": 0.8990838651939427, - "grad_norm": 0.757025420665741, - "learning_rate": 1.1925997588291827e-05, - "loss": 0.3196, - "step": 9544 - }, - { - "epoch": 0.8991780692871105, - "grad_norm": 0.6933902502059937, - "learning_rate": 1.19245158197083e-05, - "loss": 0.3045, - "step": 9545 - }, - { - "epoch": 0.8992722733802784, - "grad_norm": 0.6712954044342041, - "learning_rate": 1.192303400724284e-05, - "loss": 0.3038, - "step": 9546 - }, - { - "epoch": 0.8993664774734462, - "grad_norm": 0.7381793856620789, - "learning_rate": 1.1921552150929225e-05, - "loss": 0.3107, - "step": 9547 - }, - { - "epoch": 0.8994606815666141, - "grad_norm": 0.6141394972801208, - "learning_rate": 1.1920070250801254e-05, - "loss": 0.3125, - "step": 9548 - }, - { - "epoch": 0.8995548856597819, - "grad_norm": 0.7098128795623779, - "learning_rate": 1.1918588306892709e-05, - "loss": 0.3016, - "step": 9549 - }, - { - "epoch": 0.8996490897529498, - "grad_norm": 0.6733649373054504, - "learning_rate": 1.1917106319237386e-05, - "loss": 0.3008, - "step": 9550 - }, - { - "epoch": 0.8997432938461176, - "grad_norm": 1.0957714319229126, - "learning_rate": 1.1915624287869072e-05, - "loss": 0.2848, - "step": 9551 - }, - { - "epoch": 0.8998374979392855, - "grad_norm": 0.8120837211608887, - "learning_rate": 1.1914142212821563e-05, - "loss": 0.3224, - "step": 9552 - }, - { - "epoch": 0.8999317020324533, - "grad_norm": 1.230957269668579, - "learning_rate": 1.191266009412865e-05, - "loss": 0.3266, - "step": 9553 - }, - { - "epoch": 0.9000259061256212, - "grad_norm": 0.6974923014640808, - "learning_rate": 1.191117793182413e-05, - "loss": 0.2877, - "step": 9554 - }, - { - "epoch": 0.900120110218789, - "grad_norm": 0.7688418030738831, - "learning_rate": 1.1909695725941797e-05, - "loss": 0.3275, - "step": 9555 - }, - { - "epoch": 0.9002143143119569, - "grad_norm": 0.9805521368980408, - "learning_rate": 1.1908213476515447e-05, - "loss": 0.3278, - "step": 9556 - }, - { - "epoch": 0.9003085184051247, - "grad_norm": 0.727298378944397, - "learning_rate": 1.190673118357888e-05, - "loss": 0.2928, - "step": 9557 - }, - { - "epoch": 0.9004027224982926, - "grad_norm": 0.8055543303489685, - "learning_rate": 1.1905248847165893e-05, - "loss": 0.337, - "step": 9558 - }, - { - "epoch": 0.9004969265914604, - "grad_norm": 0.776944637298584, - "learning_rate": 1.1903766467310288e-05, - "loss": 0.3403, - "step": 9559 - }, - { - "epoch": 0.9005911306846283, - "grad_norm": 0.7132166028022766, - "learning_rate": 1.190228404404586e-05, - "loss": 0.3153, - "step": 9560 - }, - { - "epoch": 0.9006853347777961, - "grad_norm": 0.7273151278495789, - "learning_rate": 1.1900801577406413e-05, - "loss": 0.3289, - "step": 9561 - }, - { - "epoch": 0.900779538870964, - "grad_norm": 0.6589033007621765, - "learning_rate": 1.1899319067425752e-05, - "loss": 0.2663, - "step": 9562 - }, - { - "epoch": 0.9008737429641318, - "grad_norm": 0.8022077083587646, - "learning_rate": 1.189783651413768e-05, - "loss": 0.3546, - "step": 9563 - }, - { - "epoch": 0.9009679470572997, - "grad_norm": 0.73952317237854, - "learning_rate": 1.1896353917575997e-05, - "loss": 0.327, - "step": 9564 - }, - { - "epoch": 0.9010621511504675, - "grad_norm": 0.8198251128196716, - "learning_rate": 1.1894871277774515e-05, - "loss": 0.3466, - "step": 9565 - }, - { - "epoch": 0.9011563552436354, - "grad_norm": 0.743465781211853, - "learning_rate": 1.189338859476704e-05, - "loss": 0.3166, - "step": 9566 - }, - { - "epoch": 0.9012505593368032, - "grad_norm": 0.6905034780502319, - "learning_rate": 1.189190586858737e-05, - "loss": 0.2999, - "step": 9567 - }, - { - "epoch": 0.901344763429971, - "grad_norm": 1.2066441774368286, - "learning_rate": 1.1890423099269327e-05, - "loss": 0.3278, - "step": 9568 - }, - { - "epoch": 0.9014389675231389, - "grad_norm": 0.717666506767273, - "learning_rate": 1.1888940286846708e-05, - "loss": 0.3042, - "step": 9569 - }, - { - "epoch": 0.9015331716163067, - "grad_norm": 1.3240638971328735, - "learning_rate": 1.1887457431353333e-05, - "loss": 0.3384, - "step": 9570 - }, - { - "epoch": 0.9016273757094746, - "grad_norm": 0.6661431789398193, - "learning_rate": 1.188597453282301e-05, - "loss": 0.3076, - "step": 9571 - }, - { - "epoch": 0.9017215798026424, - "grad_norm": 0.7865625619888306, - "learning_rate": 1.1884491591289546e-05, - "loss": 0.3067, - "step": 9572 - }, - { - "epoch": 0.9018157838958103, - "grad_norm": 0.7785285115242004, - "learning_rate": 1.1883008606786763e-05, - "loss": 0.269, - "step": 9573 - }, - { - "epoch": 0.9019099879889781, - "grad_norm": 0.8562577962875366, - "learning_rate": 1.1881525579348474e-05, - "loss": 0.3158, - "step": 9574 - }, - { - "epoch": 0.902004192082146, - "grad_norm": 0.712498664855957, - "learning_rate": 1.188004250900849e-05, - "loss": 0.3268, - "step": 9575 - }, - { - "epoch": 0.9020983961753138, - "grad_norm": 0.6795501708984375, - "learning_rate": 1.1878559395800627e-05, - "loss": 0.3064, - "step": 9576 - }, - { - "epoch": 0.9021926002684817, - "grad_norm": 0.7840029001235962, - "learning_rate": 1.1877076239758704e-05, - "loss": 0.3238, - "step": 9577 - }, - { - "epoch": 0.9022868043616495, - "grad_norm": 0.7358947396278381, - "learning_rate": 1.1875593040916544e-05, - "loss": 0.2751, - "step": 9578 - }, - { - "epoch": 0.9023810084548174, - "grad_norm": 0.7333950400352478, - "learning_rate": 1.187410979930796e-05, - "loss": 0.3044, - "step": 9579 - }, - { - "epoch": 0.9024752125479852, - "grad_norm": 0.6706371307373047, - "learning_rate": 1.1872626514966774e-05, - "loss": 0.2949, - "step": 9580 - }, - { - "epoch": 0.9025694166411531, - "grad_norm": 0.6611641645431519, - "learning_rate": 1.1871143187926805e-05, - "loss": 0.2739, - "step": 9581 - }, - { - "epoch": 0.9026636207343209, - "grad_norm": 0.7098549604415894, - "learning_rate": 1.1869659818221881e-05, - "loss": 0.3084, - "step": 9582 - }, - { - "epoch": 0.9027578248274888, - "grad_norm": 0.6501350402832031, - "learning_rate": 1.1868176405885824e-05, - "loss": 0.2853, - "step": 9583 - }, - { - "epoch": 0.9028520289206566, - "grad_norm": 0.6925662755966187, - "learning_rate": 1.1866692950952447e-05, - "loss": 0.2668, - "step": 9584 - }, - { - "epoch": 0.9029462330138245, - "grad_norm": 0.6863833665847778, - "learning_rate": 1.1865209453455593e-05, - "loss": 0.2608, - "step": 9585 - }, - { - "epoch": 0.9030404371069923, - "grad_norm": 0.7631823420524597, - "learning_rate": 1.1863725913429075e-05, - "loss": 0.3226, - "step": 9586 - }, - { - "epoch": 0.9031346412001602, - "grad_norm": 0.821094810962677, - "learning_rate": 1.1862242330906722e-05, - "loss": 0.2749, - "step": 9587 - }, - { - "epoch": 0.903228845293328, - "grad_norm": 0.7188436985015869, - "learning_rate": 1.1860758705922368e-05, - "loss": 0.2917, - "step": 9588 - }, - { - "epoch": 0.9033230493864959, - "grad_norm": 0.6913095116615295, - "learning_rate": 1.1859275038509833e-05, - "loss": 0.2685, - "step": 9589 - }, - { - "epoch": 0.9034172534796637, - "grad_norm": 0.7232502698898315, - "learning_rate": 1.1857791328702955e-05, - "loss": 0.3107, - "step": 9590 - }, - { - "epoch": 0.9035114575728316, - "grad_norm": 0.669734537601471, - "learning_rate": 1.1856307576535562e-05, - "loss": 0.2923, - "step": 9591 - }, - { - "epoch": 0.9036056616659994, - "grad_norm": 0.6539066433906555, - "learning_rate": 1.1854823782041483e-05, - "loss": 0.2882, - "step": 9592 - }, - { - "epoch": 0.9036998657591673, - "grad_norm": 0.6612775921821594, - "learning_rate": 1.1853339945254555e-05, - "loss": 0.3008, - "step": 9593 - }, - { - "epoch": 0.9037940698523351, - "grad_norm": 0.8304771184921265, - "learning_rate": 1.1851856066208609e-05, - "loss": 0.3396, - "step": 9594 - }, - { - "epoch": 0.903888273945503, - "grad_norm": 0.7078601121902466, - "learning_rate": 1.185037214493748e-05, - "loss": 0.2715, - "step": 9595 - }, - { - "epoch": 0.9039824780386708, - "grad_norm": 0.8279368281364441, - "learning_rate": 1.1848888181475005e-05, - "loss": 0.3121, - "step": 9596 - }, - { - "epoch": 0.9040766821318387, - "grad_norm": 0.9535421133041382, - "learning_rate": 1.1847404175855021e-05, - "loss": 0.2851, - "step": 9597 - }, - { - "epoch": 0.9041708862250065, - "grad_norm": 0.732578694820404, - "learning_rate": 1.1845920128111366e-05, - "loss": 0.309, - "step": 9598 - }, - { - "epoch": 0.9042650903181744, - "grad_norm": 0.7540335655212402, - "learning_rate": 1.1844436038277876e-05, - "loss": 0.2867, - "step": 9599 - }, - { - "epoch": 0.9043592944113422, - "grad_norm": 0.8256245851516724, - "learning_rate": 1.184295190638839e-05, - "loss": 0.3496, - "step": 9600 - }, - { - "epoch": 0.9044534985045101, - "grad_norm": 1.0604828596115112, - "learning_rate": 1.1841467732476752e-05, - "loss": 0.3124, - "step": 9601 - }, - { - "epoch": 0.9045477025976779, - "grad_norm": 0.7529759407043457, - "learning_rate": 1.1839983516576802e-05, - "loss": 0.3051, - "step": 9602 - }, - { - "epoch": 0.9046419066908458, - "grad_norm": 0.7639031410217285, - "learning_rate": 1.1838499258722383e-05, - "loss": 0.3123, - "step": 9603 - }, - { - "epoch": 0.9047361107840136, - "grad_norm": 0.6991181969642639, - "learning_rate": 1.1837014958947335e-05, - "loss": 0.3201, - "step": 9604 - }, - { - "epoch": 0.9048303148771815, - "grad_norm": 0.6521792411804199, - "learning_rate": 1.1835530617285509e-05, - "loss": 0.2865, - "step": 9605 - }, - { - "epoch": 0.9049245189703493, - "grad_norm": 0.8181825876235962, - "learning_rate": 1.1834046233770742e-05, - "loss": 0.2623, - "step": 9606 - }, - { - "epoch": 0.9050187230635172, - "grad_norm": 0.7247796654701233, - "learning_rate": 1.1832561808436887e-05, - "loss": 0.3361, - "step": 9607 - }, - { - "epoch": 0.905112927156685, - "grad_norm": 0.6888623237609863, - "learning_rate": 1.1831077341317789e-05, - "loss": 0.2539, - "step": 9608 - }, - { - "epoch": 0.9052071312498529, - "grad_norm": 0.6608344316482544, - "learning_rate": 1.1829592832447295e-05, - "loss": 0.2463, - "step": 9609 - }, - { - "epoch": 0.9053013353430207, - "grad_norm": 0.785529375076294, - "learning_rate": 1.1828108281859252e-05, - "loss": 0.3544, - "step": 9610 - }, - { - "epoch": 0.9053955394361886, - "grad_norm": 0.7120524048805237, - "learning_rate": 1.1826623689587518e-05, - "loss": 0.308, - "step": 9611 - }, - { - "epoch": 0.9054897435293564, - "grad_norm": 0.7383785843849182, - "learning_rate": 1.1825139055665934e-05, - "loss": 0.2889, - "step": 9612 - }, - { - "epoch": 0.9055839476225241, - "grad_norm": 0.6540294885635376, - "learning_rate": 1.182365438012836e-05, - "loss": 0.3068, - "step": 9613 - }, - { - "epoch": 0.905678151715692, - "grad_norm": 0.6447115540504456, - "learning_rate": 1.1822169663008646e-05, - "loss": 0.2977, - "step": 9614 - }, - { - "epoch": 0.9057723558088598, - "grad_norm": 0.6795375347137451, - "learning_rate": 1.1820684904340645e-05, - "loss": 0.2884, - "step": 9615 - }, - { - "epoch": 0.9058665599020277, - "grad_norm": 0.7206788659095764, - "learning_rate": 1.181920010415821e-05, - "loss": 0.2875, - "step": 9616 - }, - { - "epoch": 0.9059607639951955, - "grad_norm": 0.7569628953933716, - "learning_rate": 1.1817715262495202e-05, - "loss": 0.3527, - "step": 9617 - }, - { - "epoch": 0.9060549680883634, - "grad_norm": 0.6905651688575745, - "learning_rate": 1.1816230379385475e-05, - "loss": 0.2894, - "step": 9618 - }, - { - "epoch": 0.9061491721815312, - "grad_norm": 0.7624897360801697, - "learning_rate": 1.1814745454862887e-05, - "loss": 0.3582, - "step": 9619 - }, - { - "epoch": 0.9062433762746991, - "grad_norm": 0.7739545106887817, - "learning_rate": 1.1813260488961295e-05, - "loss": 0.3259, - "step": 9620 - }, - { - "epoch": 0.9063375803678669, - "grad_norm": 0.6178655624389648, - "learning_rate": 1.1811775481714558e-05, - "loss": 0.3016, - "step": 9621 - }, - { - "epoch": 0.9064317844610348, - "grad_norm": 0.6559436917304993, - "learning_rate": 1.1810290433156539e-05, - "loss": 0.3, - "step": 9622 - }, - { - "epoch": 0.9065259885542026, - "grad_norm": 0.7467542290687561, - "learning_rate": 1.1808805343321102e-05, - "loss": 0.3143, - "step": 9623 - }, - { - "epoch": 0.9066201926473705, - "grad_norm": 0.7773847579956055, - "learning_rate": 1.1807320212242099e-05, - "loss": 0.3232, - "step": 9624 - }, - { - "epoch": 0.9067143967405383, - "grad_norm": 0.8095197677612305, - "learning_rate": 1.1805835039953408e-05, - "loss": 0.3258, - "step": 9625 - }, - { - "epoch": 0.9068086008337062, - "grad_norm": 0.6602053642272949, - "learning_rate": 1.1804349826488879e-05, - "loss": 0.2945, - "step": 9626 - }, - { - "epoch": 0.906902804926874, - "grad_norm": 0.6551075577735901, - "learning_rate": 1.1802864571882383e-05, - "loss": 0.2817, - "step": 9627 - }, - { - "epoch": 0.9069970090200419, - "grad_norm": 0.706739068031311, - "learning_rate": 1.1801379276167792e-05, - "loss": 0.3067, - "step": 9628 - }, - { - "epoch": 0.9070912131132097, - "grad_norm": 0.6379954814910889, - "learning_rate": 1.1799893939378964e-05, - "loss": 0.2902, - "step": 9629 - }, - { - "epoch": 0.9071854172063776, - "grad_norm": 0.7236230373382568, - "learning_rate": 1.1798408561549773e-05, - "loss": 0.3103, - "step": 9630 - }, - { - "epoch": 0.9072796212995454, - "grad_norm": 0.6442583203315735, - "learning_rate": 1.1796923142714083e-05, - "loss": 0.2837, - "step": 9631 - }, - { - "epoch": 0.9073738253927133, - "grad_norm": 0.7341998815536499, - "learning_rate": 1.1795437682905765e-05, - "loss": 0.3241, - "step": 9632 - }, - { - "epoch": 0.9074680294858811, - "grad_norm": 0.8652843832969666, - "learning_rate": 1.1793952182158694e-05, - "loss": 0.3223, - "step": 9633 - }, - { - "epoch": 0.907562233579049, - "grad_norm": 0.7407996654510498, - "learning_rate": 1.1792466640506741e-05, - "loss": 0.2794, - "step": 9634 - }, - { - "epoch": 0.9076564376722168, - "grad_norm": 0.6820136308670044, - "learning_rate": 1.1790981057983772e-05, - "loss": 0.2754, - "step": 9635 - }, - { - "epoch": 0.9077506417653847, - "grad_norm": 0.7229933738708496, - "learning_rate": 1.1789495434623665e-05, - "loss": 0.3104, - "step": 9636 - }, - { - "epoch": 0.9078448458585525, - "grad_norm": 0.7318028211593628, - "learning_rate": 1.1788009770460297e-05, - "loss": 0.3201, - "step": 9637 - }, - { - "epoch": 0.9079390499517204, - "grad_norm": 0.7525824904441833, - "learning_rate": 1.1786524065527543e-05, - "loss": 0.2861, - "step": 9638 - }, - { - "epoch": 0.9080332540448882, - "grad_norm": 0.6937665343284607, - "learning_rate": 1.1785038319859274e-05, - "loss": 0.2864, - "step": 9639 - }, - { - "epoch": 0.9081274581380561, - "grad_norm": 0.6146497130393982, - "learning_rate": 1.1783552533489372e-05, - "loss": 0.3043, - "step": 9640 - }, - { - "epoch": 0.9082216622312239, - "grad_norm": 0.960955023765564, - "learning_rate": 1.1782066706451713e-05, - "loss": 0.3027, - "step": 9641 - }, - { - "epoch": 0.9083158663243918, - "grad_norm": 0.8236584663391113, - "learning_rate": 1.1780580838780177e-05, - "loss": 0.2882, - "step": 9642 - }, - { - "epoch": 0.9084100704175596, - "grad_norm": 0.8055413365364075, - "learning_rate": 1.1779094930508646e-05, - "loss": 0.3456, - "step": 9643 - }, - { - "epoch": 0.9085042745107275, - "grad_norm": 0.6914135813713074, - "learning_rate": 1.1777608981670997e-05, - "loss": 0.287, - "step": 9644 - }, - { - "epoch": 0.9085984786038953, - "grad_norm": 0.668573260307312, - "learning_rate": 1.1776122992301118e-05, - "loss": 0.3079, - "step": 9645 - }, - { - "epoch": 0.9086926826970632, - "grad_norm": 0.7232000231742859, - "learning_rate": 1.1774636962432881e-05, - "loss": 0.3582, - "step": 9646 - }, - { - "epoch": 0.908786886790231, - "grad_norm": 0.6752644181251526, - "learning_rate": 1.177315089210018e-05, - "loss": 0.3008, - "step": 9647 - }, - { - "epoch": 0.9088810908833989, - "grad_norm": 0.6755600571632385, - "learning_rate": 1.17716647813369e-05, - "loss": 0.2911, - "step": 9648 - }, - { - "epoch": 0.9089752949765667, - "grad_norm": 0.7653055191040039, - "learning_rate": 1.1770178630176918e-05, - "loss": 0.3149, - "step": 9649 - }, - { - "epoch": 0.9090694990697346, - "grad_norm": 0.7899324893951416, - "learning_rate": 1.1768692438654128e-05, - "loss": 0.3856, - "step": 9650 - }, - { - "epoch": 0.9091637031629024, - "grad_norm": 0.6736056208610535, - "learning_rate": 1.1767206206802416e-05, - "loss": 0.3078, - "step": 9651 - }, - { - "epoch": 0.9092579072560703, - "grad_norm": 0.6566187143325806, - "learning_rate": 1.1765719934655667e-05, - "loss": 0.3031, - "step": 9652 - }, - { - "epoch": 0.9093521113492381, - "grad_norm": 0.7438850402832031, - "learning_rate": 1.1764233622247774e-05, - "loss": 0.3434, - "step": 9653 - }, - { - "epoch": 0.909446315442406, - "grad_norm": 0.7408274412155151, - "learning_rate": 1.1762747269612627e-05, - "loss": 0.3291, - "step": 9654 - }, - { - "epoch": 0.9095405195355738, - "grad_norm": 0.7000565528869629, - "learning_rate": 1.1761260876784115e-05, - "loss": 0.3327, - "step": 9655 - }, - { - "epoch": 0.9096347236287416, - "grad_norm": 0.8495182394981384, - "learning_rate": 1.175977444379613e-05, - "loss": 0.3426, - "step": 9656 - }, - { - "epoch": 0.9097289277219095, - "grad_norm": 0.7792713046073914, - "learning_rate": 1.1758287970682566e-05, - "loss": 0.2978, - "step": 9657 - }, - { - "epoch": 0.9098231318150773, - "grad_norm": 0.6760193109512329, - "learning_rate": 1.1756801457477321e-05, - "loss": 0.2846, - "step": 9658 - }, - { - "epoch": 0.9099173359082452, - "grad_norm": 0.6466975808143616, - "learning_rate": 1.1755314904214284e-05, - "loss": 0.2558, - "step": 9659 - }, - { - "epoch": 0.910011540001413, - "grad_norm": 0.6765708327293396, - "learning_rate": 1.175382831092735e-05, - "loss": 0.3029, - "step": 9660 - }, - { - "epoch": 0.9101057440945809, - "grad_norm": 0.6689856648445129, - "learning_rate": 1.175234167765042e-05, - "loss": 0.2646, - "step": 9661 - }, - { - "epoch": 0.9101999481877487, - "grad_norm": 0.829488217830658, - "learning_rate": 1.175085500441739e-05, - "loss": 0.3299, - "step": 9662 - }, - { - "epoch": 0.9102941522809166, - "grad_norm": 0.6252883672714233, - "learning_rate": 1.1749368291262158e-05, - "loss": 0.2917, - "step": 9663 - }, - { - "epoch": 0.9103883563740844, - "grad_norm": 0.7971799969673157, - "learning_rate": 1.1747881538218622e-05, - "loss": 0.3722, - "step": 9664 - }, - { - "epoch": 0.9104825604672523, - "grad_norm": 0.7582257390022278, - "learning_rate": 1.1746394745320689e-05, - "loss": 0.3084, - "step": 9665 - }, - { - "epoch": 0.9105767645604201, - "grad_norm": 0.8081191778182983, - "learning_rate": 1.1744907912602248e-05, - "loss": 0.2696, - "step": 9666 - }, - { - "epoch": 0.910670968653588, - "grad_norm": 0.6928661465644836, - "learning_rate": 1.1743421040097209e-05, - "loss": 0.2898, - "step": 9667 - }, - { - "epoch": 0.9107651727467558, - "grad_norm": 0.7181206345558167, - "learning_rate": 1.1741934127839479e-05, - "loss": 0.2697, - "step": 9668 - }, - { - "epoch": 0.9108593768399237, - "grad_norm": 0.6876206994056702, - "learning_rate": 1.1740447175862953e-05, - "loss": 0.2896, - "step": 9669 - }, - { - "epoch": 0.9109535809330915, - "grad_norm": 0.7033278346061707, - "learning_rate": 1.173896018420154e-05, - "loss": 0.2986, - "step": 9670 - }, - { - "epoch": 0.9110477850262594, - "grad_norm": 0.6977961659431458, - "learning_rate": 1.1737473152889147e-05, - "loss": 0.2923, - "step": 9671 - }, - { - "epoch": 0.9111419891194272, - "grad_norm": 0.6762495040893555, - "learning_rate": 1.1735986081959676e-05, - "loss": 0.3171, - "step": 9672 - }, - { - "epoch": 0.9112361932125951, - "grad_norm": 0.7551417350769043, - "learning_rate": 1.1734498971447041e-05, - "loss": 0.3043, - "step": 9673 - }, - { - "epoch": 0.9113303973057629, - "grad_norm": 0.6652315258979797, - "learning_rate": 1.1733011821385148e-05, - "loss": 0.2951, - "step": 9674 - }, - { - "epoch": 0.9114246013989308, - "grad_norm": 0.7785511612892151, - "learning_rate": 1.1731524631807903e-05, - "loss": 0.3246, - "step": 9675 - }, - { - "epoch": 0.9115188054920986, - "grad_norm": 0.7023170590400696, - "learning_rate": 1.1730037402749219e-05, - "loss": 0.3026, - "step": 9676 - }, - { - "epoch": 0.9116130095852665, - "grad_norm": 0.7303873300552368, - "learning_rate": 1.1728550134243004e-05, - "loss": 0.3279, - "step": 9677 - }, - { - "epoch": 0.9117072136784343, - "grad_norm": 0.7390693426132202, - "learning_rate": 1.1727062826323174e-05, - "loss": 0.3266, - "step": 9678 - }, - { - "epoch": 0.9118014177716022, - "grad_norm": 0.7931835651397705, - "learning_rate": 1.1725575479023644e-05, - "loss": 0.338, - "step": 9679 - }, - { - "epoch": 0.91189562186477, - "grad_norm": 0.705696165561676, - "learning_rate": 1.1724088092378324e-05, - "loss": 0.3047, - "step": 9680 - }, - { - "epoch": 0.9119898259579379, - "grad_norm": 0.7783174514770508, - "learning_rate": 1.1722600666421125e-05, - "loss": 0.3069, - "step": 9681 - }, - { - "epoch": 0.9120840300511057, - "grad_norm": 0.6609805822372437, - "learning_rate": 1.1721113201185967e-05, - "loss": 0.2575, - "step": 9682 - }, - { - "epoch": 0.9121782341442736, - "grad_norm": 0.7529018521308899, - "learning_rate": 1.1719625696706772e-05, - "loss": 0.2972, - "step": 9683 - }, - { - "epoch": 0.9122724382374414, - "grad_norm": 0.7474442720413208, - "learning_rate": 1.1718138153017445e-05, - "loss": 0.3023, - "step": 9684 - }, - { - "epoch": 0.9123666423306093, - "grad_norm": 0.7671797275543213, - "learning_rate": 1.1716650570151915e-05, - "loss": 0.3207, - "step": 9685 - }, - { - "epoch": 0.9124608464237771, - "grad_norm": 0.6554548740386963, - "learning_rate": 1.1715162948144094e-05, - "loss": 0.3088, - "step": 9686 - }, - { - "epoch": 0.912555050516945, - "grad_norm": 0.6867275238037109, - "learning_rate": 1.1713675287027906e-05, - "loss": 0.2612, - "step": 9687 - }, - { - "epoch": 0.9126492546101128, - "grad_norm": 0.7825465798377991, - "learning_rate": 1.1712187586837276e-05, - "loss": 0.3124, - "step": 9688 - }, - { - "epoch": 0.9127434587032807, - "grad_norm": 0.736897349357605, - "learning_rate": 1.1710699847606116e-05, - "loss": 0.2912, - "step": 9689 - }, - { - "epoch": 0.9128376627964485, - "grad_norm": 0.7599862813949585, - "learning_rate": 1.1709212069368357e-05, - "loss": 0.2825, - "step": 9690 - }, - { - "epoch": 0.9129318668896164, - "grad_norm": 0.7896125912666321, - "learning_rate": 1.1707724252157917e-05, - "loss": 0.2976, - "step": 9691 - }, - { - "epoch": 0.9130260709827842, - "grad_norm": 0.7773582935333252, - "learning_rate": 1.1706236396008723e-05, - "loss": 0.3227, - "step": 9692 - }, - { - "epoch": 0.9131202750759521, - "grad_norm": 0.6425527334213257, - "learning_rate": 1.1704748500954702e-05, - "loss": 0.2724, - "step": 9693 - }, - { - "epoch": 0.9132144791691199, - "grad_norm": 0.90715092420578, - "learning_rate": 1.1703260567029777e-05, - "loss": 0.2973, - "step": 9694 - }, - { - "epoch": 0.9133086832622878, - "grad_norm": 0.6834738254547119, - "learning_rate": 1.1701772594267879e-05, - "loss": 0.2708, - "step": 9695 - }, - { - "epoch": 0.9134028873554556, - "grad_norm": 0.8027157783508301, - "learning_rate": 1.1700284582702933e-05, - "loss": 0.3065, - "step": 9696 - }, - { - "epoch": 0.9134970914486235, - "grad_norm": 0.716576337814331, - "learning_rate": 1.1698796532368869e-05, - "loss": 0.3185, - "step": 9697 - }, - { - "epoch": 0.9135912955417913, - "grad_norm": 0.7953327894210815, - "learning_rate": 1.1697308443299615e-05, - "loss": 0.3345, - "step": 9698 - }, - { - "epoch": 0.9136854996349592, - "grad_norm": 0.7189897298812866, - "learning_rate": 1.1695820315529108e-05, - "loss": 0.3063, - "step": 9699 - }, - { - "epoch": 0.913779703728127, - "grad_norm": 0.5961300730705261, - "learning_rate": 1.1694332149091272e-05, - "loss": 0.253, - "step": 9700 - }, - { - "epoch": 0.9138739078212949, - "grad_norm": 0.7950000166893005, - "learning_rate": 1.1692843944020041e-05, - "loss": 0.2983, - "step": 9701 - }, - { - "epoch": 0.9139681119144627, - "grad_norm": 0.691615104675293, - "learning_rate": 1.1691355700349351e-05, - "loss": 0.3202, - "step": 9702 - }, - { - "epoch": 0.9140623160076305, - "grad_norm": 0.6892995238304138, - "learning_rate": 1.1689867418113138e-05, - "loss": 0.3255, - "step": 9703 - }, - { - "epoch": 0.9141565201007984, - "grad_norm": 0.7660693526268005, - "learning_rate": 1.168837909734533e-05, - "loss": 0.2855, - "step": 9704 - }, - { - "epoch": 0.9142507241939662, - "grad_norm": 0.7879043817520142, - "learning_rate": 1.1686890738079874e-05, - "loss": 0.2495, - "step": 9705 - }, - { - "epoch": 0.9143449282871341, - "grad_norm": 0.7355771064758301, - "learning_rate": 1.1685402340350695e-05, - "loss": 0.3274, - "step": 9706 - }, - { - "epoch": 0.9144391323803019, - "grad_norm": 0.645781934261322, - "learning_rate": 1.1683913904191737e-05, - "loss": 0.2694, - "step": 9707 - }, - { - "epoch": 0.9145333364734698, - "grad_norm": 0.6447874307632446, - "learning_rate": 1.168242542963694e-05, - "loss": 0.2997, - "step": 9708 - }, - { - "epoch": 0.9146275405666376, - "grad_norm": 0.6921257972717285, - "learning_rate": 1.168093691672024e-05, - "loss": 0.2729, - "step": 9709 - }, - { - "epoch": 0.9147217446598055, - "grad_norm": 0.5812404751777649, - "learning_rate": 1.1679448365475579e-05, - "loss": 0.2846, - "step": 9710 - }, - { - "epoch": 0.9148159487529733, - "grad_norm": 0.8316332697868347, - "learning_rate": 1.1677959775936898e-05, - "loss": 0.299, - "step": 9711 - }, - { - "epoch": 0.9149101528461412, - "grad_norm": 0.6069605946540833, - "learning_rate": 1.1676471148138136e-05, - "loss": 0.2762, - "step": 9712 - }, - { - "epoch": 0.915004356939309, - "grad_norm": 0.6823188066482544, - "learning_rate": 1.1674982482113242e-05, - "loss": 0.2947, - "step": 9713 - }, - { - "epoch": 0.9150985610324769, - "grad_norm": 0.8961073160171509, - "learning_rate": 1.1673493777896157e-05, - "loss": 0.2727, - "step": 9714 - }, - { - "epoch": 0.9151927651256447, - "grad_norm": 0.7483460307121277, - "learning_rate": 1.1672005035520826e-05, - "loss": 0.2909, - "step": 9715 - }, - { - "epoch": 0.9152869692188126, - "grad_norm": 0.6980855464935303, - "learning_rate": 1.1670516255021193e-05, - "loss": 0.2999, - "step": 9716 - }, - { - "epoch": 0.9153811733119804, - "grad_norm": 0.732695460319519, - "learning_rate": 1.1669027436431205e-05, - "loss": 0.3461, - "step": 9717 - }, - { - "epoch": 0.9154753774051483, - "grad_norm": 0.9831451773643494, - "learning_rate": 1.1667538579784813e-05, - "loss": 0.3111, - "step": 9718 - }, - { - "epoch": 0.9155695814983161, - "grad_norm": 0.6631169319152832, - "learning_rate": 1.1666049685115963e-05, - "loss": 0.317, - "step": 9719 - }, - { - "epoch": 0.915663785591484, - "grad_norm": 0.6973643898963928, - "learning_rate": 1.1664560752458602e-05, - "loss": 0.2831, - "step": 9720 - }, - { - "epoch": 0.9157579896846518, - "grad_norm": 0.7669511437416077, - "learning_rate": 1.166307178184668e-05, - "loss": 0.3111, - "step": 9721 - }, - { - "epoch": 0.9158521937778197, - "grad_norm": 0.6711779236793518, - "learning_rate": 1.1661582773314151e-05, - "loss": 0.3339, - "step": 9722 - }, - { - "epoch": 0.9159463978709875, - "grad_norm": 0.7318465709686279, - "learning_rate": 1.1660093726894966e-05, - "loss": 0.3181, - "step": 9723 - }, - { - "epoch": 0.9160406019641554, - "grad_norm": 0.6497591733932495, - "learning_rate": 1.1658604642623075e-05, - "loss": 0.2568, - "step": 9724 - }, - { - "epoch": 0.9161348060573232, - "grad_norm": 0.7274473309516907, - "learning_rate": 1.1657115520532436e-05, - "loss": 0.3035, - "step": 9725 - }, - { - "epoch": 0.9162290101504911, - "grad_norm": 0.7577809691429138, - "learning_rate": 1.1655626360656998e-05, - "loss": 0.3076, - "step": 9726 - }, - { - "epoch": 0.9163232142436589, - "grad_norm": 0.6475155353546143, - "learning_rate": 1.1654137163030714e-05, - "loss": 0.2598, - "step": 9727 - }, - { - "epoch": 0.9164174183368268, - "grad_norm": 0.7633470892906189, - "learning_rate": 1.1652647927687553e-05, - "loss": 0.2993, - "step": 9728 - }, - { - "epoch": 0.9165116224299946, - "grad_norm": 0.6230948567390442, - "learning_rate": 1.1651158654661458e-05, - "loss": 0.2753, - "step": 9729 - }, - { - "epoch": 0.9166058265231625, - "grad_norm": 0.7631728053092957, - "learning_rate": 1.1649669343986393e-05, - "loss": 0.3044, - "step": 9730 - }, - { - "epoch": 0.9167000306163303, - "grad_norm": 0.6901342868804932, - "learning_rate": 1.1648179995696319e-05, - "loss": 0.3139, - "step": 9731 - }, - { - "epoch": 0.9167942347094982, - "grad_norm": 0.720503032207489, - "learning_rate": 1.1646690609825186e-05, - "loss": 0.359, - "step": 9732 - }, - { - "epoch": 0.916888438802666, - "grad_norm": 0.6577574610710144, - "learning_rate": 1.1645201186406965e-05, - "loss": 0.2636, - "step": 9733 - }, - { - "epoch": 0.9169826428958339, - "grad_norm": 0.7542081475257874, - "learning_rate": 1.1643711725475613e-05, - "loss": 0.3102, - "step": 9734 - }, - { - "epoch": 0.9170768469890017, - "grad_norm": 0.6242924928665161, - "learning_rate": 1.164222222706509e-05, - "loss": 0.2623, - "step": 9735 - }, - { - "epoch": 0.9171710510821696, - "grad_norm": 0.7153643369674683, - "learning_rate": 1.164073269120936e-05, - "loss": 0.3032, - "step": 9736 - }, - { - "epoch": 0.9172652551753374, - "grad_norm": 0.8726444840431213, - "learning_rate": 1.1639243117942387e-05, - "loss": 0.3244, - "step": 9737 - }, - { - "epoch": 0.9173594592685053, - "grad_norm": 0.7901492714881897, - "learning_rate": 1.1637753507298138e-05, - "loss": 0.2913, - "step": 9738 - }, - { - "epoch": 0.9174536633616731, - "grad_norm": 0.799914538860321, - "learning_rate": 1.1636263859310572e-05, - "loss": 0.3358, - "step": 9739 - }, - { - "epoch": 0.917547867454841, - "grad_norm": 0.7553713321685791, - "learning_rate": 1.1634774174013664e-05, - "loss": 0.2947, - "step": 9740 - }, - { - "epoch": 0.9176420715480088, - "grad_norm": 0.7834179401397705, - "learning_rate": 1.1633284451441373e-05, - "loss": 0.3046, - "step": 9741 - }, - { - "epoch": 0.9177362756411767, - "grad_norm": 0.9598224759101868, - "learning_rate": 1.1631794691627673e-05, - "loss": 0.2837, - "step": 9742 - }, - { - "epoch": 0.9178304797343445, - "grad_norm": 0.7087194919586182, - "learning_rate": 1.163030489460653e-05, - "loss": 0.3184, - "step": 9743 - }, - { - "epoch": 0.9179246838275124, - "grad_norm": 0.7266285419464111, - "learning_rate": 1.1628815060411913e-05, - "loss": 0.3478, - "step": 9744 - }, - { - "epoch": 0.9180188879206802, - "grad_norm": 0.7172000408172607, - "learning_rate": 1.1627325189077796e-05, - "loss": 0.3551, - "step": 9745 - }, - { - "epoch": 0.918113092013848, - "grad_norm": 0.7453349232673645, - "learning_rate": 1.1625835280638147e-05, - "loss": 0.2823, - "step": 9746 - }, - { - "epoch": 0.9182072961070159, - "grad_norm": 0.672122061252594, - "learning_rate": 1.1624345335126939e-05, - "loss": 0.2861, - "step": 9747 - }, - { - "epoch": 0.9183015002001838, - "grad_norm": 0.7447899580001831, - "learning_rate": 1.1622855352578144e-05, - "loss": 0.3103, - "step": 9748 - }, - { - "epoch": 0.9183957042933516, - "grad_norm": 0.6708574295043945, - "learning_rate": 1.1621365333025736e-05, - "loss": 0.3195, - "step": 9749 - }, - { - "epoch": 0.9184899083865194, - "grad_norm": 0.6426267027854919, - "learning_rate": 1.1619875276503694e-05, - "loss": 0.3075, - "step": 9750 - }, - { - "epoch": 0.9185841124796872, - "grad_norm": 0.628358781337738, - "learning_rate": 1.1618385183045991e-05, - "loss": 0.3084, - "step": 9751 - }, - { - "epoch": 0.918678316572855, - "grad_norm": 0.7134820818901062, - "learning_rate": 1.1616895052686598e-05, - "loss": 0.302, - "step": 9752 - }, - { - "epoch": 0.9187725206660229, - "grad_norm": 1.484278917312622, - "learning_rate": 1.1615404885459503e-05, - "loss": 0.3166, - "step": 9753 - }, - { - "epoch": 0.9188667247591907, - "grad_norm": 0.7471532225608826, - "learning_rate": 1.1613914681398677e-05, - "loss": 0.3484, - "step": 9754 - }, - { - "epoch": 0.9189609288523586, - "grad_norm": 0.7318947315216064, - "learning_rate": 1.1612424440538099e-05, - "loss": 0.2967, - "step": 9755 - }, - { - "epoch": 0.9190551329455264, - "grad_norm": 0.8191625475883484, - "learning_rate": 1.1610934162911751e-05, - "loss": 0.287, - "step": 9756 - }, - { - "epoch": 0.9191493370386943, - "grad_norm": 0.6804456114768982, - "learning_rate": 1.160944384855361e-05, - "loss": 0.3196, - "step": 9757 - }, - { - "epoch": 0.9192435411318621, - "grad_norm": 0.6854345798492432, - "learning_rate": 1.1607953497497664e-05, - "loss": 0.3422, - "step": 9758 - }, - { - "epoch": 0.91933774522503, - "grad_norm": 0.6621114611625671, - "learning_rate": 1.160646310977789e-05, - "loss": 0.3121, - "step": 9759 - }, - { - "epoch": 0.9194319493181978, - "grad_norm": 0.7258914113044739, - "learning_rate": 1.1604972685428273e-05, - "loss": 0.2967, - "step": 9760 - }, - { - "epoch": 0.9195261534113657, - "grad_norm": 0.7967356443405151, - "learning_rate": 1.1603482224482793e-05, - "loss": 0.2794, - "step": 9761 - }, - { - "epoch": 0.9196203575045335, - "grad_norm": 0.7581629157066345, - "learning_rate": 1.1601991726975443e-05, - "loss": 0.3259, - "step": 9762 - }, - { - "epoch": 0.9197145615977014, - "grad_norm": 0.8769176006317139, - "learning_rate": 1.1600501192940203e-05, - "loss": 0.3529, - "step": 9763 - }, - { - "epoch": 0.9198087656908692, - "grad_norm": 0.7249931693077087, - "learning_rate": 1.159901062241106e-05, - "loss": 0.2776, - "step": 9764 - }, - { - "epoch": 0.9199029697840371, - "grad_norm": 0.8734459280967712, - "learning_rate": 1.1597520015422003e-05, - "loss": 0.3294, - "step": 9765 - }, - { - "epoch": 0.9199971738772049, - "grad_norm": 0.8458078503608704, - "learning_rate": 1.1596029372007018e-05, - "loss": 0.2858, - "step": 9766 - }, - { - "epoch": 0.9200913779703728, - "grad_norm": 0.7283058166503906, - "learning_rate": 1.1594538692200094e-05, - "loss": 0.3031, - "step": 9767 - }, - { - "epoch": 0.9201855820635406, - "grad_norm": 0.6813495755195618, - "learning_rate": 1.1593047976035226e-05, - "loss": 0.2686, - "step": 9768 - }, - { - "epoch": 0.9202797861567085, - "grad_norm": 0.7182820439338684, - "learning_rate": 1.1591557223546394e-05, - "loss": 0.3068, - "step": 9769 - }, - { - "epoch": 0.9203739902498763, - "grad_norm": 0.9071686863899231, - "learning_rate": 1.15900664347676e-05, - "loss": 0.2864, - "step": 9770 - }, - { - "epoch": 0.9204681943430442, - "grad_norm": 0.7065544724464417, - "learning_rate": 1.1588575609732833e-05, - "loss": 0.3142, - "step": 9771 - }, - { - "epoch": 0.920562398436212, - "grad_norm": 0.9485142827033997, - "learning_rate": 1.1587084748476082e-05, - "loss": 0.3159, - "step": 9772 - }, - { - "epoch": 0.9206566025293799, - "grad_norm": 0.7493008375167847, - "learning_rate": 1.1585593851031346e-05, - "loss": 0.3286, - "step": 9773 - }, - { - "epoch": 0.9207508066225477, - "grad_norm": 0.7871357202529907, - "learning_rate": 1.158410291743262e-05, - "loss": 0.2836, - "step": 9774 - }, - { - "epoch": 0.9208450107157156, - "grad_norm": 0.815697193145752, - "learning_rate": 1.1582611947713896e-05, - "loss": 0.3324, - "step": 9775 - }, - { - "epoch": 0.9209392148088834, - "grad_norm": 0.7253048419952393, - "learning_rate": 1.1581120941909172e-05, - "loss": 0.2837, - "step": 9776 - }, - { - "epoch": 0.9210334189020513, - "grad_norm": 0.7250169515609741, - "learning_rate": 1.1579629900052442e-05, - "loss": 0.3094, - "step": 9777 - }, - { - "epoch": 0.9211276229952191, - "grad_norm": 2.9205780029296875, - "learning_rate": 1.1578138822177711e-05, - "loss": 0.3058, - "step": 9778 - }, - { - "epoch": 0.921221827088387, - "grad_norm": 0.6662350296974182, - "learning_rate": 1.1576647708318975e-05, - "loss": 0.2927, - "step": 9779 - }, - { - "epoch": 0.9213160311815548, - "grad_norm": 0.8149073719978333, - "learning_rate": 1.1575156558510232e-05, - "loss": 0.3248, - "step": 9780 - }, - { - "epoch": 0.9214102352747227, - "grad_norm": 0.7538041472434998, - "learning_rate": 1.1573665372785482e-05, - "loss": 0.2923, - "step": 9781 - }, - { - "epoch": 0.9215044393678905, - "grad_norm": 0.6875706315040588, - "learning_rate": 1.157217415117873e-05, - "loss": 0.3075, - "step": 9782 - }, - { - "epoch": 0.9215986434610584, - "grad_norm": 0.733664333820343, - "learning_rate": 1.1570682893723975e-05, - "loss": 0.3223, - "step": 9783 - }, - { - "epoch": 0.9216928475542262, - "grad_norm": 0.6781983971595764, - "learning_rate": 1.1569191600455219e-05, - "loss": 0.3025, - "step": 9784 - }, - { - "epoch": 0.921787051647394, - "grad_norm": 0.915701687335968, - "learning_rate": 1.1567700271406473e-05, - "loss": 0.3676, - "step": 9785 - }, - { - "epoch": 0.9218812557405619, - "grad_norm": 0.697933554649353, - "learning_rate": 1.1566208906611728e-05, - "loss": 0.3016, - "step": 9786 - }, - { - "epoch": 0.9219754598337297, - "grad_norm": 0.734963059425354, - "learning_rate": 1.1564717506105006e-05, - "loss": 0.3465, - "step": 9787 - }, - { - "epoch": 0.9220696639268976, - "grad_norm": 0.6184594631195068, - "learning_rate": 1.15632260699203e-05, - "loss": 0.2867, - "step": 9788 - }, - { - "epoch": 0.9221638680200654, - "grad_norm": 0.756898045539856, - "learning_rate": 1.1561734598091624e-05, - "loss": 0.2658, - "step": 9789 - }, - { - "epoch": 0.9222580721132333, - "grad_norm": 0.7471708059310913, - "learning_rate": 1.1560243090652982e-05, - "loss": 0.283, - "step": 9790 - }, - { - "epoch": 0.9223522762064011, - "grad_norm": 1.0527830123901367, - "learning_rate": 1.1558751547638387e-05, - "loss": 0.3118, - "step": 9791 - }, - { - "epoch": 0.922446480299569, - "grad_norm": 0.7618290781974792, - "learning_rate": 1.1557259969081841e-05, - "loss": 0.2852, - "step": 9792 - }, - { - "epoch": 0.9225406843927368, - "grad_norm": 0.7631360292434692, - "learning_rate": 1.1555768355017368e-05, - "loss": 0.3068, - "step": 9793 - }, - { - "epoch": 0.9226348884859047, - "grad_norm": 0.7242071032524109, - "learning_rate": 1.1554276705478964e-05, - "loss": 0.292, - "step": 9794 - }, - { - "epoch": 0.9227290925790725, - "grad_norm": 0.6794859170913696, - "learning_rate": 1.155278502050065e-05, - "loss": 0.3321, - "step": 9795 - }, - { - "epoch": 0.9228232966722404, - "grad_norm": 0.7960864901542664, - "learning_rate": 1.1551293300116435e-05, - "loss": 0.3055, - "step": 9796 - }, - { - "epoch": 0.9229175007654082, - "grad_norm": 0.6520786285400391, - "learning_rate": 1.1549801544360333e-05, - "loss": 0.3028, - "step": 9797 - }, - { - "epoch": 0.9230117048585761, - "grad_norm": 0.6842049956321716, - "learning_rate": 1.154830975326636e-05, - "loss": 0.2905, - "step": 9798 - }, - { - "epoch": 0.9231059089517439, - "grad_norm": 1.0332279205322266, - "learning_rate": 1.1546817926868529e-05, - "loss": 0.325, - "step": 9799 - }, - { - "epoch": 0.9232001130449118, - "grad_norm": 0.6430279016494751, - "learning_rate": 1.154532606520086e-05, - "loss": 0.2846, - "step": 9800 - }, - { - "epoch": 0.9232943171380796, - "grad_norm": 0.7600269317626953, - "learning_rate": 1.1543834168297363e-05, - "loss": 0.3688, - "step": 9801 - }, - { - "epoch": 0.9233885212312475, - "grad_norm": 0.66355299949646, - "learning_rate": 1.154234223619206e-05, - "loss": 0.2635, - "step": 9802 - }, - { - "epoch": 0.9234827253244153, - "grad_norm": 0.7806983590126038, - "learning_rate": 1.1540850268918973e-05, - "loss": 0.3218, - "step": 9803 - }, - { - "epoch": 0.9235769294175832, - "grad_norm": 0.6844689846038818, - "learning_rate": 1.1539358266512114e-05, - "loss": 0.3022, - "step": 9804 - }, - { - "epoch": 0.923671133510751, - "grad_norm": 0.7546319961547852, - "learning_rate": 1.1537866229005505e-05, - "loss": 0.3212, - "step": 9805 - }, - { - "epoch": 0.9237653376039189, - "grad_norm": 0.7607617378234863, - "learning_rate": 1.1536374156433168e-05, - "loss": 0.3094, - "step": 9806 - }, - { - "epoch": 0.9238595416970867, - "grad_norm": 0.6887412071228027, - "learning_rate": 1.1534882048829126e-05, - "loss": 0.3002, - "step": 9807 - }, - { - "epoch": 0.9239537457902546, - "grad_norm": 0.7532636523246765, - "learning_rate": 1.1533389906227396e-05, - "loss": 0.2705, - "step": 9808 - }, - { - "epoch": 0.9240479498834224, - "grad_norm": 0.7626668810844421, - "learning_rate": 1.1531897728662008e-05, - "loss": 0.298, - "step": 9809 - }, - { - "epoch": 0.9241421539765903, - "grad_norm": 0.6450669765472412, - "learning_rate": 1.153040551616698e-05, - "loss": 0.2722, - "step": 9810 - }, - { - "epoch": 0.9242363580697581, - "grad_norm": 0.9094744324684143, - "learning_rate": 1.1528913268776342e-05, - "loss": 0.3366, - "step": 9811 - }, - { - "epoch": 0.924330562162926, - "grad_norm": 0.6546444296836853, - "learning_rate": 1.1527420986524114e-05, - "loss": 0.3202, - "step": 9812 - }, - { - "epoch": 0.9244247662560938, - "grad_norm": 0.8807068467140198, - "learning_rate": 1.152592866944433e-05, - "loss": 0.3083, - "step": 9813 - }, - { - "epoch": 0.9245189703492617, - "grad_norm": 0.7280904054641724, - "learning_rate": 1.1524436317571008e-05, - "loss": 0.2781, - "step": 9814 - }, - { - "epoch": 0.9246131744424295, - "grad_norm": 0.8132057785987854, - "learning_rate": 1.1522943930938185e-05, - "loss": 0.3002, - "step": 9815 - }, - { - "epoch": 0.9247073785355974, - "grad_norm": 0.7954848408699036, - "learning_rate": 1.152145150957988e-05, - "loss": 0.2989, - "step": 9816 - }, - { - "epoch": 0.9248015826287652, - "grad_norm": 0.7015737295150757, - "learning_rate": 1.151995905353013e-05, - "loss": 0.3033, - "step": 9817 - }, - { - "epoch": 0.9248957867219331, - "grad_norm": 0.8260770440101624, - "learning_rate": 1.1518466562822961e-05, - "loss": 0.3103, - "step": 9818 - }, - { - "epoch": 0.9249899908151009, - "grad_norm": 0.6957612633705139, - "learning_rate": 1.1516974037492408e-05, - "loss": 0.2967, - "step": 9819 - }, - { - "epoch": 0.9250841949082688, - "grad_norm": 0.7427754402160645, - "learning_rate": 1.1515481477572502e-05, - "loss": 0.3448, - "step": 9820 - }, - { - "epoch": 0.9251783990014366, - "grad_norm": 0.8178347945213318, - "learning_rate": 1.1513988883097271e-05, - "loss": 0.3638, - "step": 9821 - }, - { - "epoch": 0.9252726030946045, - "grad_norm": 0.8922828435897827, - "learning_rate": 1.1512496254100756e-05, - "loss": 0.2693, - "step": 9822 - }, - { - "epoch": 0.9253668071877723, - "grad_norm": 0.9412736892700195, - "learning_rate": 1.1511003590616984e-05, - "loss": 0.3258, - "step": 9823 - }, - { - "epoch": 0.9254610112809402, - "grad_norm": 0.6674445271492004, - "learning_rate": 1.1509510892679994e-05, - "loss": 0.2777, - "step": 9824 - }, - { - "epoch": 0.925555215374108, - "grad_norm": 0.595525860786438, - "learning_rate": 1.1508018160323825e-05, - "loss": 0.2653, - "step": 9825 - }, - { - "epoch": 0.9256494194672759, - "grad_norm": 0.7466319799423218, - "learning_rate": 1.1506525393582505e-05, - "loss": 0.3316, - "step": 9826 - }, - { - "epoch": 0.9257436235604437, - "grad_norm": 0.7886889576911926, - "learning_rate": 1.1505032592490077e-05, - "loss": 0.3134, - "step": 9827 - }, - { - "epoch": 0.9258378276536116, - "grad_norm": 0.704491913318634, - "learning_rate": 1.150353975708058e-05, - "loss": 0.3163, - "step": 9828 - }, - { - "epoch": 0.9259320317467794, - "grad_norm": 0.6642715334892273, - "learning_rate": 1.150204688738805e-05, - "loss": 0.2488, - "step": 9829 - }, - { - "epoch": 0.9260262358399473, - "grad_norm": 0.610035240650177, - "learning_rate": 1.1500553983446527e-05, - "loss": 0.2435, - "step": 9830 - }, - { - "epoch": 0.9261204399331151, - "grad_norm": 0.7585195899009705, - "learning_rate": 1.1499061045290057e-05, - "loss": 0.312, - "step": 9831 - }, - { - "epoch": 0.926214644026283, - "grad_norm": 0.6760469675064087, - "learning_rate": 1.149756807295267e-05, - "loss": 0.2902, - "step": 9832 - }, - { - "epoch": 0.9263088481194508, - "grad_norm": 0.6611597537994385, - "learning_rate": 1.1496075066468422e-05, - "loss": 0.293, - "step": 9833 - }, - { - "epoch": 0.9264030522126186, - "grad_norm": 0.7652198672294617, - "learning_rate": 1.1494582025871343e-05, - "loss": 0.3129, - "step": 9834 - }, - { - "epoch": 0.9264972563057865, - "grad_norm": 0.6663600206375122, - "learning_rate": 1.1493088951195486e-05, - "loss": 0.2845, - "step": 9835 - }, - { - "epoch": 0.9265914603989543, - "grad_norm": 0.7061387300491333, - "learning_rate": 1.1491595842474892e-05, - "loss": 0.3229, - "step": 9836 - }, - { - "epoch": 0.9266856644921222, - "grad_norm": 0.6897725462913513, - "learning_rate": 1.1490102699743602e-05, - "loss": 0.3143, - "step": 9837 - }, - { - "epoch": 0.92677986858529, - "grad_norm": 0.575584888458252, - "learning_rate": 1.1488609523035667e-05, - "loss": 0.2256, - "step": 9838 - }, - { - "epoch": 0.9268740726784579, - "grad_norm": 0.8263952136039734, - "learning_rate": 1.1487116312385135e-05, - "loss": 0.3084, - "step": 9839 - }, - { - "epoch": 0.9269682767716257, - "grad_norm": 0.6921063661575317, - "learning_rate": 1.1485623067826053e-05, - "loss": 0.3259, - "step": 9840 - }, - { - "epoch": 0.9270624808647936, - "grad_norm": 0.7334921956062317, - "learning_rate": 1.1484129789392462e-05, - "loss": 0.312, - "step": 9841 - }, - { - "epoch": 0.9271566849579614, - "grad_norm": 0.688546895980835, - "learning_rate": 1.148263647711842e-05, - "loss": 0.268, - "step": 9842 - }, - { - "epoch": 0.9272508890511293, - "grad_norm": 0.7391108274459839, - "learning_rate": 1.1481143131037976e-05, - "loss": 0.2781, - "step": 9843 - }, - { - "epoch": 0.9273450931442971, - "grad_norm": 0.7827560305595398, - "learning_rate": 1.147964975118517e-05, - "loss": 0.3151, - "step": 9844 - }, - { - "epoch": 0.927439297237465, - "grad_norm": 0.7109737396240234, - "learning_rate": 1.147815633759407e-05, - "loss": 0.299, - "step": 9845 - }, - { - "epoch": 0.9275335013306328, - "grad_norm": 0.7421813011169434, - "learning_rate": 1.1476662890298713e-05, - "loss": 0.286, - "step": 9846 - }, - { - "epoch": 0.9276277054238007, - "grad_norm": 0.7915611863136292, - "learning_rate": 1.1475169409333163e-05, - "loss": 0.2693, - "step": 9847 - }, - { - "epoch": 0.9277219095169685, - "grad_norm": 0.7203260064125061, - "learning_rate": 1.1473675894731468e-05, - "loss": 0.3195, - "step": 9848 - }, - { - "epoch": 0.9278161136101364, - "grad_norm": 0.7064208984375, - "learning_rate": 1.147218234652768e-05, - "loss": 0.3031, - "step": 9849 - }, - { - "epoch": 0.9279103177033042, - "grad_norm": 0.6969903707504272, - "learning_rate": 1.1470688764755862e-05, - "loss": 0.2761, - "step": 9850 - }, - { - "epoch": 0.9280045217964721, - "grad_norm": 0.7239713668823242, - "learning_rate": 1.1469195149450063e-05, - "loss": 0.2885, - "step": 9851 - }, - { - "epoch": 0.9280987258896399, - "grad_norm": 0.7491258978843689, - "learning_rate": 1.1467701500644344e-05, - "loss": 0.303, - "step": 9852 - }, - { - "epoch": 0.9281929299828078, - "grad_norm": 0.6255550980567932, - "learning_rate": 1.1466207818372764e-05, - "loss": 0.2871, - "step": 9853 - }, - { - "epoch": 0.9282871340759756, - "grad_norm": 0.8438575863838196, - "learning_rate": 1.146471410266937e-05, - "loss": 0.3044, - "step": 9854 - }, - { - "epoch": 0.9283813381691435, - "grad_norm": 0.7754700183868408, - "learning_rate": 1.1463220353568236e-05, - "loss": 0.2985, - "step": 9855 - }, - { - "epoch": 0.9284755422623113, - "grad_norm": 0.6884915232658386, - "learning_rate": 1.1461726571103413e-05, - "loss": 0.3185, - "step": 9856 - }, - { - "epoch": 0.9285697463554792, - "grad_norm": 0.6350425481796265, - "learning_rate": 1.1460232755308962e-05, - "loss": 0.2889, - "step": 9857 - }, - { - "epoch": 0.928663950448647, - "grad_norm": 0.7048108577728271, - "learning_rate": 1.1458738906218947e-05, - "loss": 0.3242, - "step": 9858 - }, - { - "epoch": 0.9287581545418149, - "grad_norm": 0.8387030959129333, - "learning_rate": 1.145724502386743e-05, - "loss": 0.3983, - "step": 9859 - }, - { - "epoch": 0.9288523586349827, - "grad_norm": 0.8022667765617371, - "learning_rate": 1.1455751108288474e-05, - "loss": 0.2726, - "step": 9860 - }, - { - "epoch": 0.9289465627281506, - "grad_norm": 0.6724227070808411, - "learning_rate": 1.1454257159516139e-05, - "loss": 0.2752, - "step": 9861 - }, - { - "epoch": 0.9290407668213184, - "grad_norm": 0.6579088568687439, - "learning_rate": 1.1452763177584491e-05, - "loss": 0.291, - "step": 9862 - }, - { - "epoch": 0.9291349709144863, - "grad_norm": 0.6796449422836304, - "learning_rate": 1.1451269162527598e-05, - "loss": 0.3241, - "step": 9863 - }, - { - "epoch": 0.9292291750076541, - "grad_norm": 0.6931931972503662, - "learning_rate": 1.1449775114379523e-05, - "loss": 0.3118, - "step": 9864 - }, - { - "epoch": 0.929323379100822, - "grad_norm": 0.7058824896812439, - "learning_rate": 1.1448281033174333e-05, - "loss": 0.3212, - "step": 9865 - }, - { - "epoch": 0.9294175831939898, - "grad_norm": 0.7156518697738647, - "learning_rate": 1.1446786918946094e-05, - "loss": 0.3393, - "step": 9866 - }, - { - "epoch": 0.9295117872871577, - "grad_norm": 0.7381418943405151, - "learning_rate": 1.1445292771728877e-05, - "loss": 0.3464, - "step": 9867 - }, - { - "epoch": 0.9296059913803255, - "grad_norm": 0.725773811340332, - "learning_rate": 1.1443798591556751e-05, - "loss": 0.2916, - "step": 9868 - }, - { - "epoch": 0.9297001954734934, - "grad_norm": 0.8342340588569641, - "learning_rate": 1.1442304378463782e-05, - "loss": 0.3409, - "step": 9869 - }, - { - "epoch": 0.9297943995666612, - "grad_norm": 0.6926758885383606, - "learning_rate": 1.1440810132484043e-05, - "loss": 0.2702, - "step": 9870 - }, - { - "epoch": 0.9298886036598291, - "grad_norm": 0.721227765083313, - "learning_rate": 1.1439315853651607e-05, - "loss": 0.2773, - "step": 9871 - }, - { - "epoch": 0.9299828077529969, - "grad_norm": 0.8517300486564636, - "learning_rate": 1.143782154200054e-05, - "loss": 0.292, - "step": 9872 - }, - { - "epoch": 0.9300770118461648, - "grad_norm": 0.649409830570221, - "learning_rate": 1.1436327197564926e-05, - "loss": 0.2606, - "step": 9873 - }, - { - "epoch": 0.9301712159393326, - "grad_norm": 0.7920855283737183, - "learning_rate": 1.1434832820378821e-05, - "loss": 0.2314, - "step": 9874 - }, - { - "epoch": 0.9302654200325005, - "grad_norm": 0.6702718734741211, - "learning_rate": 1.1433338410476313e-05, - "loss": 0.2517, - "step": 9875 - }, - { - "epoch": 0.9303596241256683, - "grad_norm": 0.6656098961830139, - "learning_rate": 1.1431843967891471e-05, - "loss": 0.2636, - "step": 9876 - }, - { - "epoch": 0.9304538282188362, - "grad_norm": 0.6624272465705872, - "learning_rate": 1.1430349492658372e-05, - "loss": 0.2923, - "step": 9877 - }, - { - "epoch": 0.930548032312004, - "grad_norm": 0.7577387690544128, - "learning_rate": 1.1428854984811095e-05, - "loss": 0.2981, - "step": 9878 - }, - { - "epoch": 0.9306422364051719, - "grad_norm": 0.6593563556671143, - "learning_rate": 1.1427360444383715e-05, - "loss": 0.2602, - "step": 9879 - }, - { - "epoch": 0.9307364404983397, - "grad_norm": 0.6353920102119446, - "learning_rate": 1.1425865871410306e-05, - "loss": 0.2895, - "step": 9880 - }, - { - "epoch": 0.9308306445915075, - "grad_norm": 0.9484451413154602, - "learning_rate": 1.1424371265924951e-05, - "loss": 0.3441, - "step": 9881 - }, - { - "epoch": 0.9309248486846754, - "grad_norm": 0.7541933655738831, - "learning_rate": 1.142287662796173e-05, - "loss": 0.3365, - "step": 9882 - }, - { - "epoch": 0.9310190527778432, - "grad_norm": 1.1388295888900757, - "learning_rate": 1.142138195755472e-05, - "loss": 0.3024, - "step": 9883 - }, - { - "epoch": 0.9311132568710111, - "grad_norm": 0.7818012237548828, - "learning_rate": 1.1419887254738005e-05, - "loss": 0.3064, - "step": 9884 - }, - { - "epoch": 0.931207460964179, - "grad_norm": 0.8127809762954712, - "learning_rate": 1.1418392519545665e-05, - "loss": 0.3171, - "step": 9885 - }, - { - "epoch": 0.9313016650573468, - "grad_norm": 0.8229526877403259, - "learning_rate": 1.1416897752011777e-05, - "loss": 0.3034, - "step": 9886 - }, - { - "epoch": 0.9313958691505146, - "grad_norm": 0.7284834980964661, - "learning_rate": 1.1415402952170434e-05, - "loss": 0.2828, - "step": 9887 - }, - { - "epoch": 0.9314900732436825, - "grad_norm": 0.7439441680908203, - "learning_rate": 1.1413908120055712e-05, - "loss": 0.3078, - "step": 9888 - }, - { - "epoch": 0.9315842773368503, - "grad_norm": 0.7771740555763245, - "learning_rate": 1.1412413255701698e-05, - "loss": 0.3444, - "step": 9889 - }, - { - "epoch": 0.9316784814300181, - "grad_norm": 0.8692653179168701, - "learning_rate": 1.1410918359142482e-05, - "loss": 0.2878, - "step": 9890 - }, - { - "epoch": 0.9317726855231859, - "grad_norm": 0.7473660707473755, - "learning_rate": 1.1409423430412141e-05, - "loss": 0.281, - "step": 9891 - }, - { - "epoch": 0.9318668896163538, - "grad_norm": 0.6817148327827454, - "learning_rate": 1.1407928469544765e-05, - "loss": 0.2449, - "step": 9892 - }, - { - "epoch": 0.9319610937095216, - "grad_norm": 0.7222283482551575, - "learning_rate": 1.1406433476574446e-05, - "loss": 0.309, - "step": 9893 - }, - { - "epoch": 0.9320552978026895, - "grad_norm": 0.6757322549819946, - "learning_rate": 1.1404938451535265e-05, - "loss": 0.2896, - "step": 9894 - }, - { - "epoch": 0.9321495018958573, - "grad_norm": 0.6814132332801819, - "learning_rate": 1.1403443394461318e-05, - "loss": 0.3168, - "step": 9895 - }, - { - "epoch": 0.9322437059890252, - "grad_norm": 0.7547572255134583, - "learning_rate": 1.140194830538669e-05, - "loss": 0.3021, - "step": 9896 - }, - { - "epoch": 0.932337910082193, - "grad_norm": 0.6935548186302185, - "learning_rate": 1.140045318434547e-05, - "loss": 0.2886, - "step": 9897 - }, - { - "epoch": 0.9324321141753609, - "grad_norm": 0.7061550617218018, - "learning_rate": 1.1398958031371756e-05, - "loss": 0.3078, - "step": 9898 - }, - { - "epoch": 0.9325263182685287, - "grad_norm": 0.8823407292366028, - "learning_rate": 1.1397462846499633e-05, - "loss": 0.2965, - "step": 9899 - }, - { - "epoch": 0.9326205223616966, - "grad_norm": 0.7356183528900146, - "learning_rate": 1.1395967629763196e-05, - "loss": 0.3226, - "step": 9900 - }, - { - "epoch": 0.9327147264548644, - "grad_norm": 0.6505039930343628, - "learning_rate": 1.1394472381196537e-05, - "loss": 0.2615, - "step": 9901 - }, - { - "epoch": 0.9328089305480323, - "grad_norm": 0.6908877491950989, - "learning_rate": 1.1392977100833753e-05, - "loss": 0.2697, - "step": 9902 - }, - { - "epoch": 0.9329031346412001, - "grad_norm": 0.8316591382026672, - "learning_rate": 1.1391481788708937e-05, - "loss": 0.3014, - "step": 9903 - }, - { - "epoch": 0.932997338734368, - "grad_norm": 0.6694990396499634, - "learning_rate": 1.1389986444856184e-05, - "loss": 0.2987, - "step": 9904 - }, - { - "epoch": 0.9330915428275358, - "grad_norm": 0.708440899848938, - "learning_rate": 1.138849106930959e-05, - "loss": 0.2957, - "step": 9905 - }, - { - "epoch": 0.9331857469207037, - "grad_norm": 0.745836615562439, - "learning_rate": 1.138699566210325e-05, - "loss": 0.2988, - "step": 9906 - }, - { - "epoch": 0.9332799510138715, - "grad_norm": 0.779005229473114, - "learning_rate": 1.1385500223271266e-05, - "loss": 0.2996, - "step": 9907 - }, - { - "epoch": 0.9333741551070394, - "grad_norm": 0.777803897857666, - "learning_rate": 1.1384004752847734e-05, - "loss": 0.2515, - "step": 9908 - }, - { - "epoch": 0.9334683592002072, - "grad_norm": 0.7843090295791626, - "learning_rate": 1.1382509250866754e-05, - "loss": 0.3664, - "step": 9909 - }, - { - "epoch": 0.9335625632933751, - "grad_norm": 0.6366344094276428, - "learning_rate": 1.1381013717362426e-05, - "loss": 0.2819, - "step": 9910 - }, - { - "epoch": 0.9336567673865429, - "grad_norm": 0.8661206960678101, - "learning_rate": 1.1379518152368846e-05, - "loss": 0.3445, - "step": 9911 - }, - { - "epoch": 0.9337509714797108, - "grad_norm": 0.7795618772506714, - "learning_rate": 1.1378022555920119e-05, - "loss": 0.2953, - "step": 9912 - }, - { - "epoch": 0.9338451755728786, - "grad_norm": 0.7671486139297485, - "learning_rate": 1.137652692805035e-05, - "loss": 0.3227, - "step": 9913 - }, - { - "epoch": 0.9339393796660465, - "grad_norm": 0.6926409006118774, - "learning_rate": 1.1375031268793638e-05, - "loss": 0.2894, - "step": 9914 - }, - { - "epoch": 0.9340335837592143, - "grad_norm": 0.6194040775299072, - "learning_rate": 1.1373535578184083e-05, - "loss": 0.2646, - "step": 9915 - }, - { - "epoch": 0.9341277878523822, - "grad_norm": 0.738124430179596, - "learning_rate": 1.1372039856255795e-05, - "loss": 0.3311, - "step": 9916 - }, - { - "epoch": 0.93422199194555, - "grad_norm": 0.7832310199737549, - "learning_rate": 1.1370544103042875e-05, - "loss": 0.2662, - "step": 9917 - }, - { - "epoch": 0.9343161960387178, - "grad_norm": 0.7839985489845276, - "learning_rate": 1.1369048318579429e-05, - "loss": 0.3003, - "step": 9918 - }, - { - "epoch": 0.9344104001318857, - "grad_norm": 0.6679766178131104, - "learning_rate": 1.1367552502899568e-05, - "loss": 0.2668, - "step": 9919 - }, - { - "epoch": 0.9345046042250535, - "grad_norm": 0.6642523407936096, - "learning_rate": 1.1366056656037395e-05, - "loss": 0.2585, - "step": 9920 - }, - { - "epoch": 0.9345988083182214, - "grad_norm": 0.7177343964576721, - "learning_rate": 1.1364560778027011e-05, - "loss": 0.2941, - "step": 9921 - }, - { - "epoch": 0.9346930124113892, - "grad_norm": 0.7531906366348267, - "learning_rate": 1.1363064868902536e-05, - "loss": 0.2959, - "step": 9922 - }, - { - "epoch": 0.9347872165045571, - "grad_norm": 0.7104706764221191, - "learning_rate": 1.1361568928698074e-05, - "loss": 0.3115, - "step": 9923 - }, - { - "epoch": 0.9348814205977249, - "grad_norm": 0.7429775595664978, - "learning_rate": 1.1360072957447734e-05, - "loss": 0.3076, - "step": 9924 - }, - { - "epoch": 0.9349756246908928, - "grad_norm": 0.8083294630050659, - "learning_rate": 1.135857695518563e-05, - "loss": 0.3072, - "step": 9925 - }, - { - "epoch": 0.9350698287840606, - "grad_norm": 0.8144860863685608, - "learning_rate": 1.1357080921945865e-05, - "loss": 0.2975, - "step": 9926 - }, - { - "epoch": 0.9351640328772285, - "grad_norm": 0.6939120292663574, - "learning_rate": 1.1355584857762559e-05, - "loss": 0.301, - "step": 9927 - }, - { - "epoch": 0.9352582369703963, - "grad_norm": 0.7598026394844055, - "learning_rate": 1.1354088762669822e-05, - "loss": 0.306, - "step": 9928 - }, - { - "epoch": 0.9353524410635642, - "grad_norm": 0.9581432938575745, - "learning_rate": 1.1352592636701765e-05, - "loss": 0.2699, - "step": 9929 - }, - { - "epoch": 0.935446645156732, - "grad_norm": 0.7145228385925293, - "learning_rate": 1.1351096479892508e-05, - "loss": 0.3034, - "step": 9930 - }, - { - "epoch": 0.9355408492498999, - "grad_norm": 0.7225978374481201, - "learning_rate": 1.1349600292276158e-05, - "loss": 0.3001, - "step": 9931 - }, - { - "epoch": 0.9356350533430677, - "grad_norm": 0.9868305921554565, - "learning_rate": 1.1348104073886831e-05, - "loss": 0.2675, - "step": 9932 - }, - { - "epoch": 0.9357292574362356, - "grad_norm": 0.7206736207008362, - "learning_rate": 1.1346607824758656e-05, - "loss": 0.2776, - "step": 9933 - }, - { - "epoch": 0.9358234615294034, - "grad_norm": 0.7475821375846863, - "learning_rate": 1.1345111544925734e-05, - "loss": 0.3236, - "step": 9934 - }, - { - "epoch": 0.9359176656225713, - "grad_norm": 0.8190663456916809, - "learning_rate": 1.1343615234422188e-05, - "loss": 0.2922, - "step": 9935 - }, - { - "epoch": 0.9360118697157391, - "grad_norm": 0.6947363018989563, - "learning_rate": 1.1342118893282139e-05, - "loss": 0.3138, - "step": 9936 - }, - { - "epoch": 0.936106073808907, - "grad_norm": 0.7332528233528137, - "learning_rate": 1.13406225215397e-05, - "loss": 0.3075, - "step": 9937 - }, - { - "epoch": 0.9362002779020748, - "grad_norm": 0.7393556833267212, - "learning_rate": 1.1339126119228999e-05, - "loss": 0.3154, - "step": 9938 - }, - { - "epoch": 0.9362944819952427, - "grad_norm": 0.7267153263092041, - "learning_rate": 1.1337629686384149e-05, - "loss": 0.3139, - "step": 9939 - }, - { - "epoch": 0.9363886860884105, - "grad_norm": 0.7483210563659668, - "learning_rate": 1.1336133223039274e-05, - "loss": 0.3265, - "step": 9940 - }, - { - "epoch": 0.9364828901815784, - "grad_norm": 0.6914429664611816, - "learning_rate": 1.1334636729228493e-05, - "loss": 0.2977, - "step": 9941 - }, - { - "epoch": 0.9365770942747462, - "grad_norm": 0.6975411176681519, - "learning_rate": 1.1333140204985933e-05, - "loss": 0.3084, - "step": 9942 - }, - { - "epoch": 0.9366712983679141, - "grad_norm": 0.7118569016456604, - "learning_rate": 1.1331643650345715e-05, - "loss": 0.2696, - "step": 9943 - }, - { - "epoch": 0.9367655024610819, - "grad_norm": 0.8160148859024048, - "learning_rate": 1.1330147065341962e-05, - "loss": 0.3043, - "step": 9944 - }, - { - "epoch": 0.9368597065542498, - "grad_norm": 0.7015519738197327, - "learning_rate": 1.1328650450008798e-05, - "loss": 0.3057, - "step": 9945 - }, - { - "epoch": 0.9369539106474176, - "grad_norm": 0.7510275840759277, - "learning_rate": 1.1327153804380346e-05, - "loss": 0.3295, - "step": 9946 - }, - { - "epoch": 0.9370481147405855, - "grad_norm": 0.7069833278656006, - "learning_rate": 1.1325657128490739e-05, - "loss": 0.329, - "step": 9947 - }, - { - "epoch": 0.9371423188337533, - "grad_norm": 0.7049331068992615, - "learning_rate": 1.13241604223741e-05, - "loss": 0.3128, - "step": 9948 - }, - { - "epoch": 0.9372365229269212, - "grad_norm": 0.7377355098724365, - "learning_rate": 1.132266368606455e-05, - "loss": 0.3248, - "step": 9949 - }, - { - "epoch": 0.937330727020089, - "grad_norm": 0.6691461205482483, - "learning_rate": 1.132116691959623e-05, - "loss": 0.2914, - "step": 9950 - }, - { - "epoch": 0.9374249311132569, - "grad_norm": 1.0119857788085938, - "learning_rate": 1.1319670123003254e-05, - "loss": 0.3384, - "step": 9951 - }, - { - "epoch": 0.9375191352064247, - "grad_norm": 0.7098222970962524, - "learning_rate": 1.1318173296319761e-05, - "loss": 0.2547, - "step": 9952 - }, - { - "epoch": 0.9376133392995926, - "grad_norm": 0.7042816281318665, - "learning_rate": 1.1316676439579881e-05, - "loss": 0.2686, - "step": 9953 - }, - { - "epoch": 0.9377075433927604, - "grad_norm": 0.7121096849441528, - "learning_rate": 1.131517955281774e-05, - "loss": 0.3142, - "step": 9954 - }, - { - "epoch": 0.9378017474859283, - "grad_norm": 0.7205197811126709, - "learning_rate": 1.131368263606747e-05, - "loss": 0.3285, - "step": 9955 - }, - { - "epoch": 0.9378959515790961, - "grad_norm": 0.7869858741760254, - "learning_rate": 1.1312185689363204e-05, - "loss": 0.2983, - "step": 9956 - }, - { - "epoch": 0.937990155672264, - "grad_norm": 0.6667638421058655, - "learning_rate": 1.1310688712739076e-05, - "loss": 0.2635, - "step": 9957 - }, - { - "epoch": 0.9380843597654318, - "grad_norm": 0.6950367093086243, - "learning_rate": 1.1309191706229216e-05, - "loss": 0.3376, - "step": 9958 - }, - { - "epoch": 0.9381785638585997, - "grad_norm": 0.7347050309181213, - "learning_rate": 1.1307694669867765e-05, - "loss": 0.274, - "step": 9959 - }, - { - "epoch": 0.9382727679517675, - "grad_norm": 0.7395270466804504, - "learning_rate": 1.130619760368885e-05, - "loss": 0.3042, - "step": 9960 - }, - { - "epoch": 0.9383669720449354, - "grad_norm": 0.8214524984359741, - "learning_rate": 1.130470050772661e-05, - "loss": 0.293, - "step": 9961 - }, - { - "epoch": 0.9384611761381032, - "grad_norm": 0.6774492263793945, - "learning_rate": 1.1303203382015182e-05, - "loss": 0.2689, - "step": 9962 - }, - { - "epoch": 0.938555380231271, - "grad_norm": 0.7465950846672058, - "learning_rate": 1.13017062265887e-05, - "loss": 0.2746, - "step": 9963 - }, - { - "epoch": 0.9386495843244389, - "grad_norm": 0.5934969186782837, - "learning_rate": 1.1300209041481304e-05, - "loss": 0.2624, - "step": 9964 - }, - { - "epoch": 0.9387437884176068, - "grad_norm": 0.8484015464782715, - "learning_rate": 1.129871182672713e-05, - "loss": 0.3006, - "step": 9965 - }, - { - "epoch": 0.9388379925107746, - "grad_norm": 0.7751550674438477, - "learning_rate": 1.1297214582360319e-05, - "loss": 0.3313, - "step": 9966 - }, - { - "epoch": 0.9389321966039424, - "grad_norm": 0.7646822333335876, - "learning_rate": 1.1295717308415009e-05, - "loss": 0.299, - "step": 9967 - }, - { - "epoch": 0.9390264006971103, - "grad_norm": 0.7235328555107117, - "learning_rate": 1.129422000492534e-05, - "loss": 0.296, - "step": 9968 - }, - { - "epoch": 0.9391206047902781, - "grad_norm": 0.7357309460639954, - "learning_rate": 1.1292722671925451e-05, - "loss": 0.2723, - "step": 9969 - }, - { - "epoch": 0.939214808883446, - "grad_norm": 0.747172474861145, - "learning_rate": 1.1291225309449492e-05, - "loss": 0.2914, - "step": 9970 - }, - { - "epoch": 0.9393090129766138, - "grad_norm": 0.8299241065979004, - "learning_rate": 1.1289727917531593e-05, - "loss": 0.3233, - "step": 9971 - }, - { - "epoch": 0.9394032170697817, - "grad_norm": 0.6824995279312134, - "learning_rate": 1.1288230496205904e-05, - "loss": 0.2949, - "step": 9972 - }, - { - "epoch": 0.9394974211629495, - "grad_norm": 0.817967414855957, - "learning_rate": 1.128673304550657e-05, - "loss": 0.3349, - "step": 9973 - }, - { - "epoch": 0.9395916252561174, - "grad_norm": 0.7143617272377014, - "learning_rate": 1.1285235565467731e-05, - "loss": 0.3303, - "step": 9974 - }, - { - "epoch": 0.9396858293492852, - "grad_norm": 0.6996434330940247, - "learning_rate": 1.1283738056123535e-05, - "loss": 0.2744, - "step": 9975 - }, - { - "epoch": 0.9397800334424531, - "grad_norm": 0.6489402651786804, - "learning_rate": 1.1282240517508123e-05, - "loss": 0.3029, - "step": 9976 - }, - { - "epoch": 0.9398742375356209, - "grad_norm": 0.7647433280944824, - "learning_rate": 1.1280742949655646e-05, - "loss": 0.3179, - "step": 9977 - }, - { - "epoch": 0.9399684416287888, - "grad_norm": 0.5993421673774719, - "learning_rate": 1.1279245352600248e-05, - "loss": 0.2802, - "step": 9978 - }, - { - "epoch": 0.9400626457219566, - "grad_norm": 0.7115922570228577, - "learning_rate": 1.1277747726376078e-05, - "loss": 0.2926, - "step": 9979 - }, - { - "epoch": 0.9401568498151245, - "grad_norm": 0.8292811512947083, - "learning_rate": 1.1276250071017284e-05, - "loss": 0.2971, - "step": 9980 - }, - { - "epoch": 0.9402510539082923, - "grad_norm": 0.7397706508636475, - "learning_rate": 1.1274752386558017e-05, - "loss": 0.2909, - "step": 9981 - }, - { - "epoch": 0.9403452580014602, - "grad_norm": 0.7106196880340576, - "learning_rate": 1.127325467303242e-05, - "loss": 0.3205, - "step": 9982 - }, - { - "epoch": 0.940439462094628, - "grad_norm": 0.65058434009552, - "learning_rate": 1.1271756930474651e-05, - "loss": 0.2966, - "step": 9983 - }, - { - "epoch": 0.9405336661877959, - "grad_norm": 0.6805201768875122, - "learning_rate": 1.1270259158918855e-05, - "loss": 0.2574, - "step": 9984 - }, - { - "epoch": 0.9406278702809637, - "grad_norm": 0.7617127895355225, - "learning_rate": 1.1268761358399187e-05, - "loss": 0.3205, - "step": 9985 - }, - { - "epoch": 0.9407220743741316, - "grad_norm": 0.8599696755409241, - "learning_rate": 1.1267263528949794e-05, - "loss": 0.3309, - "step": 9986 - }, - { - "epoch": 0.9408162784672994, - "grad_norm": 0.74057537317276, - "learning_rate": 1.1265765670604838e-05, - "loss": 0.2995, - "step": 9987 - }, - { - "epoch": 0.9409104825604673, - "grad_norm": 0.8807875514030457, - "learning_rate": 1.1264267783398463e-05, - "loss": 0.3159, - "step": 9988 - }, - { - "epoch": 0.9410046866536351, - "grad_norm": 0.7203329801559448, - "learning_rate": 1.1262769867364828e-05, - "loss": 0.2876, - "step": 9989 - }, - { - "epoch": 0.941098890746803, - "grad_norm": 0.5872389078140259, - "learning_rate": 1.1261271922538093e-05, - "loss": 0.279, - "step": 9990 - }, - { - "epoch": 0.9411930948399708, - "grad_norm": 0.9001421332359314, - "learning_rate": 1.12597739489524e-05, - "loss": 0.3243, - "step": 9991 - }, - { - "epoch": 0.9412872989331387, - "grad_norm": 0.6735695600509644, - "learning_rate": 1.1258275946641915e-05, - "loss": 0.287, - "step": 9992 - }, - { - "epoch": 0.9413815030263065, - "grad_norm": 0.6174083352088928, - "learning_rate": 1.1256777915640796e-05, - "loss": 0.2673, - "step": 9993 - }, - { - "epoch": 0.9414757071194744, - "grad_norm": 0.6570601463317871, - "learning_rate": 1.125527985598319e-05, - "loss": 0.267, - "step": 9994 - }, - { - "epoch": 0.9415699112126422, - "grad_norm": 0.789585292339325, - "learning_rate": 1.1253781767703267e-05, - "loss": 0.2891, - "step": 9995 - }, - { - "epoch": 0.9416641153058101, - "grad_norm": 0.8143724799156189, - "learning_rate": 1.1252283650835181e-05, - "loss": 0.3323, - "step": 9996 - }, - { - "epoch": 0.9417583193989779, - "grad_norm": 0.657049834728241, - "learning_rate": 1.1250785505413087e-05, - "loss": 0.2655, - "step": 9997 - }, - { - "epoch": 0.9418525234921458, - "grad_norm": 0.7499266266822815, - "learning_rate": 1.1249287331471152e-05, - "loss": 0.3034, - "step": 9998 - }, - { - "epoch": 0.9419467275853136, - "grad_norm": 0.6467655897140503, - "learning_rate": 1.1247789129043534e-05, - "loss": 0.2865, - "step": 9999 - }, - { - "epoch": 0.9420409316784815, - "grad_norm": 0.7246831059455872, - "learning_rate": 1.1246290898164393e-05, - "loss": 0.3224, - "step": 10000 - }, - { - "epoch": 0.9421351357716493, - "grad_norm": 0.7325423359870911, - "learning_rate": 1.1244792638867895e-05, - "loss": 0.3302, - "step": 10001 - }, - { - "epoch": 0.9422293398648172, - "grad_norm": 0.696479082107544, - "learning_rate": 1.1243294351188196e-05, - "loss": 0.3, - "step": 10002 - }, - { - "epoch": 0.942323543957985, - "grad_norm": 0.7129887342453003, - "learning_rate": 1.1241796035159464e-05, - "loss": 0.3351, - "step": 10003 - }, - { - "epoch": 0.9424177480511529, - "grad_norm": 1.0603524446487427, - "learning_rate": 1.1240297690815862e-05, - "loss": 0.338, - "step": 10004 - }, - { - "epoch": 0.9425119521443207, - "grad_norm": 0.7714053988456726, - "learning_rate": 1.1238799318191556e-05, - "loss": 0.3243, - "step": 10005 - }, - { - "epoch": 0.9426061562374886, - "grad_norm": 0.6713439226150513, - "learning_rate": 1.1237300917320708e-05, - "loss": 0.2984, - "step": 10006 - }, - { - "epoch": 0.9427003603306564, - "grad_norm": 0.6070764064788818, - "learning_rate": 1.1235802488237486e-05, - "loss": 0.2728, - "step": 10007 - }, - { - "epoch": 0.9427945644238243, - "grad_norm": 0.7760406732559204, - "learning_rate": 1.1234304030976055e-05, - "loss": 0.2977, - "step": 10008 - }, - { - "epoch": 0.9428887685169921, - "grad_norm": 0.8591163754463196, - "learning_rate": 1.1232805545570585e-05, - "loss": 0.3368, - "step": 10009 - }, - { - "epoch": 0.94298297261016, - "grad_norm": 0.691236138343811, - "learning_rate": 1.1231307032055243e-05, - "loss": 0.3039, - "step": 10010 - }, - { - "epoch": 0.9430771767033278, - "grad_norm": 0.7132549285888672, - "learning_rate": 1.1229808490464193e-05, - "loss": 0.305, - "step": 10011 - }, - { - "epoch": 0.9431713807964957, - "grad_norm": 0.682148277759552, - "learning_rate": 1.1228309920831608e-05, - "loss": 0.3053, - "step": 10012 - }, - { - "epoch": 0.9432655848896635, - "grad_norm": 0.7111742496490479, - "learning_rate": 1.122681132319166e-05, - "loss": 0.3239, - "step": 10013 - }, - { - "epoch": 0.9433597889828313, - "grad_norm": 1.16665780544281, - "learning_rate": 1.1225312697578514e-05, - "loss": 0.2779, - "step": 10014 - }, - { - "epoch": 0.9434539930759992, - "grad_norm": 0.6893547177314758, - "learning_rate": 1.1223814044026344e-05, - "loss": 0.2827, - "step": 10015 - }, - { - "epoch": 0.943548197169167, - "grad_norm": 0.7077202796936035, - "learning_rate": 1.1222315362569323e-05, - "loss": 0.3069, - "step": 10016 - }, - { - "epoch": 0.9436424012623349, - "grad_norm": 0.7060577273368835, - "learning_rate": 1.1220816653241617e-05, - "loss": 0.312, - "step": 10017 - }, - { - "epoch": 0.9437366053555027, - "grad_norm": 1.0103267431259155, - "learning_rate": 1.1219317916077407e-05, - "loss": 0.306, - "step": 10018 - }, - { - "epoch": 0.9438308094486706, - "grad_norm": 0.7344710826873779, - "learning_rate": 1.1217819151110864e-05, - "loss": 0.3348, - "step": 10019 - }, - { - "epoch": 0.9439250135418384, - "grad_norm": 0.6737074851989746, - "learning_rate": 1.1216320358376158e-05, - "loss": 0.2857, - "step": 10020 - }, - { - "epoch": 0.9440192176350063, - "grad_norm": 0.7511048913002014, - "learning_rate": 1.1214821537907469e-05, - "loss": 0.3162, - "step": 10021 - }, - { - "epoch": 0.9441134217281741, - "grad_norm": 0.674433708190918, - "learning_rate": 1.1213322689738968e-05, - "loss": 0.2954, - "step": 10022 - }, - { - "epoch": 0.944207625821342, - "grad_norm": 0.71649169921875, - "learning_rate": 1.1211823813904834e-05, - "loss": 0.2882, - "step": 10023 - }, - { - "epoch": 0.9443018299145098, - "grad_norm": 0.600676953792572, - "learning_rate": 1.1210324910439242e-05, - "loss": 0.2599, - "step": 10024 - }, - { - "epoch": 0.9443960340076777, - "grad_norm": 0.7122073173522949, - "learning_rate": 1.1208825979376374e-05, - "loss": 0.3038, - "step": 10025 - }, - { - "epoch": 0.9444902381008455, - "grad_norm": 0.9700125455856323, - "learning_rate": 1.12073270207504e-05, - "loss": 0.3057, - "step": 10026 - }, - { - "epoch": 0.9445844421940134, - "grad_norm": 0.7915657758712769, - "learning_rate": 1.1205828034595506e-05, - "loss": 0.3049, - "step": 10027 - }, - { - "epoch": 0.9446786462871811, - "grad_norm": 0.8250691890716553, - "learning_rate": 1.1204329020945866e-05, - "loss": 0.3537, - "step": 10028 - }, - { - "epoch": 0.944772850380349, - "grad_norm": 0.8041085600852966, - "learning_rate": 1.1202829979835658e-05, - "loss": 0.2847, - "step": 10029 - }, - { - "epoch": 0.9448670544735168, - "grad_norm": 0.702141523361206, - "learning_rate": 1.1201330911299076e-05, - "loss": 0.3104, - "step": 10030 - }, - { - "epoch": 0.9449612585666847, - "grad_norm": 1.1383745670318604, - "learning_rate": 1.1199831815370284e-05, - "loss": 0.338, - "step": 10031 - }, - { - "epoch": 0.9450554626598525, - "grad_norm": 0.6468948721885681, - "learning_rate": 1.1198332692083469e-05, - "loss": 0.2807, - "step": 10032 - }, - { - "epoch": 0.9451496667530204, - "grad_norm": 0.752659261226654, - "learning_rate": 1.1196833541472823e-05, - "loss": 0.298, - "step": 10033 - }, - { - "epoch": 0.9452438708461882, - "grad_norm": 0.6877515316009521, - "learning_rate": 1.1195334363572513e-05, - "loss": 0.2745, - "step": 10034 - }, - { - "epoch": 0.9453380749393561, - "grad_norm": 0.8536050915718079, - "learning_rate": 1.1193835158416737e-05, - "loss": 0.3299, - "step": 10035 - }, - { - "epoch": 0.9454322790325239, - "grad_norm": 0.6285669803619385, - "learning_rate": 1.1192335926039673e-05, - "loss": 0.2666, - "step": 10036 - }, - { - "epoch": 0.9455264831256918, - "grad_norm": 0.7156527638435364, - "learning_rate": 1.1190836666475503e-05, - "loss": 0.2997, - "step": 10037 - }, - { - "epoch": 0.9456206872188596, - "grad_norm": 0.8850581049919128, - "learning_rate": 1.1189337379758415e-05, - "loss": 0.3337, - "step": 10038 - }, - { - "epoch": 0.9457148913120275, - "grad_norm": 0.7407930493354797, - "learning_rate": 1.1187838065922598e-05, - "loss": 0.2895, - "step": 10039 - }, - { - "epoch": 0.9458090954051953, - "grad_norm": 0.791225254535675, - "learning_rate": 1.1186338725002238e-05, - "loss": 0.2869, - "step": 10040 - }, - { - "epoch": 0.9459032994983632, - "grad_norm": 0.6969056129455566, - "learning_rate": 1.1184839357031516e-05, - "loss": 0.2986, - "step": 10041 - }, - { - "epoch": 0.945997503591531, - "grad_norm": 0.6812271475791931, - "learning_rate": 1.1183339962044624e-05, - "loss": 0.3017, - "step": 10042 - }, - { - "epoch": 0.9460917076846989, - "grad_norm": 0.7941222190856934, - "learning_rate": 1.1181840540075752e-05, - "loss": 0.3112, - "step": 10043 - }, - { - "epoch": 0.9461859117778667, - "grad_norm": 0.7960657477378845, - "learning_rate": 1.1180341091159091e-05, - "loss": 0.275, - "step": 10044 - }, - { - "epoch": 0.9462801158710346, - "grad_norm": 0.954747200012207, - "learning_rate": 1.1178841615328824e-05, - "loss": 0.3356, - "step": 10045 - }, - { - "epoch": 0.9463743199642024, - "grad_norm": 0.7375147342681885, - "learning_rate": 1.1177342112619145e-05, - "loss": 0.3079, - "step": 10046 - }, - { - "epoch": 0.9464685240573703, - "grad_norm": 0.8293735384941101, - "learning_rate": 1.1175842583064247e-05, - "loss": 0.3262, - "step": 10047 - }, - { - "epoch": 0.9465627281505381, - "grad_norm": 0.7221106886863708, - "learning_rate": 1.1174343026698318e-05, - "loss": 0.3709, - "step": 10048 - }, - { - "epoch": 0.946656932243706, - "grad_norm": 0.7418912649154663, - "learning_rate": 1.1172843443555552e-05, - "loss": 0.2864, - "step": 10049 - }, - { - "epoch": 0.9467511363368738, - "grad_norm": 0.6042212843894958, - "learning_rate": 1.1171343833670146e-05, - "loss": 0.2939, - "step": 10050 - }, - { - "epoch": 0.9468453404300416, - "grad_norm": 0.6550147533416748, - "learning_rate": 1.1169844197076282e-05, - "loss": 0.2701, - "step": 10051 - }, - { - "epoch": 0.9469395445232095, - "grad_norm": 0.7457708716392517, - "learning_rate": 1.1168344533808164e-05, - "loss": 0.3481, - "step": 10052 - }, - { - "epoch": 0.9470337486163773, - "grad_norm": 0.7013445496559143, - "learning_rate": 1.1166844843899986e-05, - "loss": 0.3074, - "step": 10053 - }, - { - "epoch": 0.9471279527095452, - "grad_norm": 0.5988826751708984, - "learning_rate": 1.1165345127385938e-05, - "loss": 0.243, - "step": 10054 - }, - { - "epoch": 0.947222156802713, - "grad_norm": 0.7739607095718384, - "learning_rate": 1.116384538430022e-05, - "loss": 0.3289, - "step": 10055 - }, - { - "epoch": 0.9473163608958809, - "grad_norm": 0.7369787693023682, - "learning_rate": 1.1162345614677029e-05, - "loss": 0.3182, - "step": 10056 - }, - { - "epoch": 0.9474105649890487, - "grad_norm": 0.6781864762306213, - "learning_rate": 1.1160845818550556e-05, - "loss": 0.2643, - "step": 10057 - }, - { - "epoch": 0.9475047690822166, - "grad_norm": 0.7162042260169983, - "learning_rate": 1.1159345995955007e-05, - "loss": 0.2918, - "step": 10058 - }, - { - "epoch": 0.9475989731753844, - "grad_norm": 0.887563169002533, - "learning_rate": 1.1157846146924576e-05, - "loss": 0.3374, - "step": 10059 - }, - { - "epoch": 0.9476931772685523, - "grad_norm": 0.6928210258483887, - "learning_rate": 1.1156346271493461e-05, - "loss": 0.2709, - "step": 10060 - }, - { - "epoch": 0.9477873813617201, - "grad_norm": 0.6558769941329956, - "learning_rate": 1.1154846369695864e-05, - "loss": 0.2921, - "step": 10061 - }, - { - "epoch": 0.947881585454888, - "grad_norm": 0.7086412310600281, - "learning_rate": 1.115334644156598e-05, - "loss": 0.307, - "step": 10062 - }, - { - "epoch": 0.9479757895480558, - "grad_norm": 0.7829697728157043, - "learning_rate": 1.1151846487138016e-05, - "loss": 0.3213, - "step": 10063 - }, - { - "epoch": 0.9480699936412237, - "grad_norm": 0.8936841487884521, - "learning_rate": 1.1150346506446173e-05, - "loss": 0.2704, - "step": 10064 - }, - { - "epoch": 0.9481641977343915, - "grad_norm": 0.6925931572914124, - "learning_rate": 1.1148846499524648e-05, - "loss": 0.2528, - "step": 10065 - }, - { - "epoch": 0.9482584018275594, - "grad_norm": 0.6956936120986938, - "learning_rate": 1.1147346466407645e-05, - "loss": 0.2561, - "step": 10066 - }, - { - "epoch": 0.9483526059207272, - "grad_norm": 0.6767364740371704, - "learning_rate": 1.1145846407129371e-05, - "loss": 0.3033, - "step": 10067 - }, - { - "epoch": 0.9484468100138951, - "grad_norm": 0.7587303519248962, - "learning_rate": 1.1144346321724027e-05, - "loss": 0.318, - "step": 10068 - }, - { - "epoch": 0.9485410141070629, - "grad_norm": 1.040582299232483, - "learning_rate": 1.1142846210225812e-05, - "loss": 0.3675, - "step": 10069 - }, - { - "epoch": 0.9486352182002308, - "grad_norm": 1.0852214097976685, - "learning_rate": 1.1141346072668944e-05, - "loss": 0.3118, - "step": 10070 - }, - { - "epoch": 0.9487294222933986, - "grad_norm": 0.6972841024398804, - "learning_rate": 1.1139845909087614e-05, - "loss": 0.2794, - "step": 10071 - }, - { - "epoch": 0.9488236263865665, - "grad_norm": 0.7939976453781128, - "learning_rate": 1.1138345719516038e-05, - "loss": 0.3207, - "step": 10072 - }, - { - "epoch": 0.9489178304797343, - "grad_norm": 0.733759880065918, - "learning_rate": 1.1136845503988418e-05, - "loss": 0.3217, - "step": 10073 - }, - { - "epoch": 0.9490120345729022, - "grad_norm": 0.8559455871582031, - "learning_rate": 1.113534526253896e-05, - "loss": 0.3239, - "step": 10074 - }, - { - "epoch": 0.94910623866607, - "grad_norm": 0.8254885077476501, - "learning_rate": 1.1133844995201877e-05, - "loss": 0.3143, - "step": 10075 - }, - { - "epoch": 0.9492004427592379, - "grad_norm": 0.8626786470413208, - "learning_rate": 1.1132344702011375e-05, - "loss": 0.2763, - "step": 10076 - }, - { - "epoch": 0.9492946468524057, - "grad_norm": 0.786437451839447, - "learning_rate": 1.1130844383001658e-05, - "loss": 0.3107, - "step": 10077 - }, - { - "epoch": 0.9493888509455736, - "grad_norm": 0.7566415667533875, - "learning_rate": 1.1129344038206945e-05, - "loss": 0.26, - "step": 10078 - }, - { - "epoch": 0.9494830550387414, - "grad_norm": 0.6886942982673645, - "learning_rate": 1.112784366766144e-05, - "loss": 0.3093, - "step": 10079 - }, - { - "epoch": 0.9495772591319093, - "grad_norm": 0.7516670227050781, - "learning_rate": 1.1126343271399356e-05, - "loss": 0.2878, - "step": 10080 - }, - { - "epoch": 0.9496714632250771, - "grad_norm": 0.5929188132286072, - "learning_rate": 1.1124842849454903e-05, - "loss": 0.2698, - "step": 10081 - }, - { - "epoch": 0.949765667318245, - "grad_norm": 0.6839563846588135, - "learning_rate": 1.1123342401862292e-05, - "loss": 0.3097, - "step": 10082 - }, - { - "epoch": 0.9498598714114128, - "grad_norm": 0.7374429702758789, - "learning_rate": 1.1121841928655739e-05, - "loss": 0.2625, - "step": 10083 - }, - { - "epoch": 0.9499540755045807, - "grad_norm": 0.6900131106376648, - "learning_rate": 1.1120341429869454e-05, - "loss": 0.2946, - "step": 10084 - }, - { - "epoch": 0.9500482795977485, - "grad_norm": 0.7028419375419617, - "learning_rate": 1.111884090553765e-05, - "loss": 0.2934, - "step": 10085 - }, - { - "epoch": 0.9501424836909164, - "grad_norm": 0.745921790599823, - "learning_rate": 1.1117340355694544e-05, - "loss": 0.31, - "step": 10086 - }, - { - "epoch": 0.9502366877840842, - "grad_norm": 0.84515780210495, - "learning_rate": 1.111583978037435e-05, - "loss": 0.3079, - "step": 10087 - }, - { - "epoch": 0.9503308918772521, - "grad_norm": 0.7456156015396118, - "learning_rate": 1.1114339179611286e-05, - "loss": 0.3248, - "step": 10088 - }, - { - "epoch": 0.9504250959704199, - "grad_norm": 1.3288429975509644, - "learning_rate": 1.1112838553439563e-05, - "loss": 0.2677, - "step": 10089 - }, - { - "epoch": 0.9505193000635878, - "grad_norm": 0.6854212880134583, - "learning_rate": 1.1111337901893402e-05, - "loss": 0.265, - "step": 10090 - }, - { - "epoch": 0.9506135041567556, - "grad_norm": 0.7276402115821838, - "learning_rate": 1.1109837225007014e-05, - "loss": 0.2898, - "step": 10091 - }, - { - "epoch": 0.9507077082499235, - "grad_norm": 0.7948499917984009, - "learning_rate": 1.1108336522814624e-05, - "loss": 0.3183, - "step": 10092 - }, - { - "epoch": 0.9508019123430913, - "grad_norm": 0.6931252479553223, - "learning_rate": 1.1106835795350448e-05, - "loss": 0.3133, - "step": 10093 - }, - { - "epoch": 0.9508961164362592, - "grad_norm": 0.6482195258140564, - "learning_rate": 1.1105335042648701e-05, - "loss": 0.275, - "step": 10094 - }, - { - "epoch": 0.950990320529427, - "grad_norm": 0.7661250233650208, - "learning_rate": 1.1103834264743607e-05, - "loss": 0.3256, - "step": 10095 - }, - { - "epoch": 0.9510845246225949, - "grad_norm": 0.8063918352127075, - "learning_rate": 1.1102333461669386e-05, - "loss": 0.3035, - "step": 10096 - }, - { - "epoch": 0.9511787287157627, - "grad_norm": 0.8431673645973206, - "learning_rate": 1.1100832633460254e-05, - "loss": 0.317, - "step": 10097 - }, - { - "epoch": 0.9512729328089305, - "grad_norm": 0.8334606885910034, - "learning_rate": 1.109933178015044e-05, - "loss": 0.2756, - "step": 10098 - }, - { - "epoch": 0.9513671369020984, - "grad_norm": 0.7119786143302917, - "learning_rate": 1.1097830901774159e-05, - "loss": 0.3085, - "step": 10099 - }, - { - "epoch": 0.9514613409952662, - "grad_norm": 0.6422887444496155, - "learning_rate": 1.1096329998365636e-05, - "loss": 0.3312, - "step": 10100 - }, - { - "epoch": 0.9515555450884341, - "grad_norm": 1.3474273681640625, - "learning_rate": 1.1094829069959092e-05, - "loss": 0.3206, - "step": 10101 - }, - { - "epoch": 0.951649749181602, - "grad_norm": 0.6415268182754517, - "learning_rate": 1.1093328116588753e-05, - "loss": 0.2597, - "step": 10102 - }, - { - "epoch": 0.9517439532747698, - "grad_norm": 0.7060954570770264, - "learning_rate": 1.1091827138288842e-05, - "loss": 0.298, - "step": 10103 - }, - { - "epoch": 0.9518381573679376, - "grad_norm": 0.787318766117096, - "learning_rate": 1.1090326135093584e-05, - "loss": 0.3084, - "step": 10104 - }, - { - "epoch": 0.9519323614611055, - "grad_norm": 0.7191594839096069, - "learning_rate": 1.1088825107037204e-05, - "loss": 0.3231, - "step": 10105 - }, - { - "epoch": 0.9520265655542733, - "grad_norm": 0.7752695679664612, - "learning_rate": 1.1087324054153925e-05, - "loss": 0.3025, - "step": 10106 - }, - { - "epoch": 0.9521207696474412, - "grad_norm": 0.7337049245834351, - "learning_rate": 1.108582297647798e-05, - "loss": 0.2811, - "step": 10107 - }, - { - "epoch": 0.952214973740609, - "grad_norm": 0.6960436701774597, - "learning_rate": 1.108432187404359e-05, - "loss": 0.2939, - "step": 10108 - }, - { - "epoch": 0.9523091778337769, - "grad_norm": 0.8235780596733093, - "learning_rate": 1.1082820746884984e-05, - "loss": 0.3027, - "step": 10109 - }, - { - "epoch": 0.9524033819269447, - "grad_norm": 0.6529624462127686, - "learning_rate": 1.1081319595036392e-05, - "loss": 0.3243, - "step": 10110 - }, - { - "epoch": 0.9524975860201126, - "grad_norm": 0.7159841656684875, - "learning_rate": 1.107981841853204e-05, - "loss": 0.3139, - "step": 10111 - }, - { - "epoch": 0.9525917901132804, - "grad_norm": 0.7163367867469788, - "learning_rate": 1.1078317217406158e-05, - "loss": 0.3001, - "step": 10112 - }, - { - "epoch": 0.9526859942064483, - "grad_norm": 0.7652973532676697, - "learning_rate": 1.1076815991692978e-05, - "loss": 0.2614, - "step": 10113 - }, - { - "epoch": 0.9527801982996161, - "grad_norm": 0.692310094833374, - "learning_rate": 1.1075314741426721e-05, - "loss": 0.2799, - "step": 10114 - }, - { - "epoch": 0.952874402392784, - "grad_norm": 0.787175714969635, - "learning_rate": 1.1073813466641633e-05, - "loss": 0.3231, - "step": 10115 - }, - { - "epoch": 0.9529686064859518, - "grad_norm": 0.6837242245674133, - "learning_rate": 1.1072312167371932e-05, - "loss": 0.3428, - "step": 10116 - }, - { - "epoch": 0.9530628105791197, - "grad_norm": 1.015246033668518, - "learning_rate": 1.1070810843651856e-05, - "loss": 0.3004, - "step": 10117 - }, - { - "epoch": 0.9531570146722875, - "grad_norm": 0.7722873091697693, - "learning_rate": 1.1069309495515642e-05, - "loss": 0.2799, - "step": 10118 - }, - { - "epoch": 0.9532512187654554, - "grad_norm": 0.7015636563301086, - "learning_rate": 1.1067808122997511e-05, - "loss": 0.2809, - "step": 10119 - }, - { - "epoch": 0.9533454228586232, - "grad_norm": 0.6829482316970825, - "learning_rate": 1.1066306726131709e-05, - "loss": 0.3179, - "step": 10120 - }, - { - "epoch": 0.9534396269517911, - "grad_norm": 0.6982603073120117, - "learning_rate": 1.1064805304952459e-05, - "loss": 0.3144, - "step": 10121 - }, - { - "epoch": 0.9535338310449589, - "grad_norm": 0.7205075621604919, - "learning_rate": 1.1063303859494004e-05, - "loss": 0.3069, - "step": 10122 - }, - { - "epoch": 0.9536280351381268, - "grad_norm": 0.6632360219955444, - "learning_rate": 1.1061802389790576e-05, - "loss": 0.318, - "step": 10123 - }, - { - "epoch": 0.9537222392312946, - "grad_norm": 0.7651366591453552, - "learning_rate": 1.1060300895876412e-05, - "loss": 0.3239, - "step": 10124 - }, - { - "epoch": 0.9538164433244625, - "grad_norm": 0.6194216012954712, - "learning_rate": 1.105879937778575e-05, - "loss": 0.2508, - "step": 10125 - }, - { - "epoch": 0.9539106474176303, - "grad_norm": 0.9242895841598511, - "learning_rate": 1.105729783555282e-05, - "loss": 0.3313, - "step": 10126 - }, - { - "epoch": 0.9540048515107982, - "grad_norm": 0.7633430361747742, - "learning_rate": 1.1055796269211868e-05, - "loss": 0.2767, - "step": 10127 - }, - { - "epoch": 0.954099055603966, - "grad_norm": 0.8665676116943359, - "learning_rate": 1.1054294678797126e-05, - "loss": 0.3245, - "step": 10128 - }, - { - "epoch": 0.9541932596971339, - "grad_norm": 0.7046676278114319, - "learning_rate": 1.1052793064342835e-05, - "loss": 0.3082, - "step": 10129 - }, - { - "epoch": 0.9542874637903017, - "grad_norm": 0.6994553804397583, - "learning_rate": 1.1051291425883237e-05, - "loss": 0.2944, - "step": 10130 - }, - { - "epoch": 0.9543816678834696, - "grad_norm": 0.6774550080299377, - "learning_rate": 1.1049789763452565e-05, - "loss": 0.3189, - "step": 10131 - }, - { - "epoch": 0.9544758719766374, - "grad_norm": 0.7153673768043518, - "learning_rate": 1.1048288077085065e-05, - "loss": 0.2849, - "step": 10132 - }, - { - "epoch": 0.9545700760698053, - "grad_norm": 0.8802182674407959, - "learning_rate": 1.1046786366814974e-05, - "loss": 0.3355, - "step": 10133 - }, - { - "epoch": 0.9546642801629731, - "grad_norm": 0.7563669681549072, - "learning_rate": 1.1045284632676535e-05, - "loss": 0.3308, - "step": 10134 - }, - { - "epoch": 0.954758484256141, - "grad_norm": 0.7985135316848755, - "learning_rate": 1.1043782874703992e-05, - "loss": 0.3175, - "step": 10135 - }, - { - "epoch": 0.9548526883493088, - "grad_norm": 0.6169210076332092, - "learning_rate": 1.1042281092931584e-05, - "loss": 0.2815, - "step": 10136 - }, - { - "epoch": 0.9549468924424767, - "grad_norm": 0.6767523884773254, - "learning_rate": 1.1040779287393553e-05, - "loss": 0.3045, - "step": 10137 - }, - { - "epoch": 0.9550410965356445, - "grad_norm": 0.6888035535812378, - "learning_rate": 1.103927745812415e-05, - "loss": 0.288, - "step": 10138 - }, - { - "epoch": 0.9551353006288124, - "grad_norm": 0.7685443162918091, - "learning_rate": 1.1037775605157608e-05, - "loss": 0.3002, - "step": 10139 - }, - { - "epoch": 0.9552295047219802, - "grad_norm": 0.6887650489807129, - "learning_rate": 1.103627372852818e-05, - "loss": 0.3077, - "step": 10140 - }, - { - "epoch": 0.955323708815148, - "grad_norm": 0.8367592692375183, - "learning_rate": 1.1034771828270107e-05, - "loss": 0.315, - "step": 10141 - }, - { - "epoch": 0.9554179129083159, - "grad_norm": 0.8385595679283142, - "learning_rate": 1.1033269904417636e-05, - "loss": 0.2844, - "step": 10142 - }, - { - "epoch": 0.9555121170014838, - "grad_norm": 0.8150827884674072, - "learning_rate": 1.1031767957005015e-05, - "loss": 0.301, - "step": 10143 - }, - { - "epoch": 0.9556063210946516, - "grad_norm": 0.6458438634872437, - "learning_rate": 1.1030265986066488e-05, - "loss": 0.3448, - "step": 10144 - }, - { - "epoch": 0.9557005251878194, - "grad_norm": 0.8682982921600342, - "learning_rate": 1.1028763991636304e-05, - "loss": 0.2766, - "step": 10145 - }, - { - "epoch": 0.9557947292809873, - "grad_norm": 0.7733973264694214, - "learning_rate": 1.1027261973748709e-05, - "loss": 0.3141, - "step": 10146 - }, - { - "epoch": 0.9558889333741551, - "grad_norm": 0.7197454571723938, - "learning_rate": 1.1025759932437951e-05, - "loss": 0.3315, - "step": 10147 - }, - { - "epoch": 0.955983137467323, - "grad_norm": 0.6769589781761169, - "learning_rate": 1.1024257867738284e-05, - "loss": 0.2866, - "step": 10148 - }, - { - "epoch": 0.9560773415604908, - "grad_norm": 0.8776320219039917, - "learning_rate": 1.1022755779683949e-05, - "loss": 0.3154, - "step": 10149 - }, - { - "epoch": 0.9561715456536587, - "grad_norm": 0.6937875747680664, - "learning_rate": 1.1021253668309206e-05, - "loss": 0.2806, - "step": 10150 - }, - { - "epoch": 0.9562657497468265, - "grad_norm": 0.6783882975578308, - "learning_rate": 1.1019751533648295e-05, - "loss": 0.3129, - "step": 10151 - }, - { - "epoch": 0.9563599538399944, - "grad_norm": 0.6711874604225159, - "learning_rate": 1.1018249375735475e-05, - "loss": 0.2802, - "step": 10152 - }, - { - "epoch": 0.9564541579331622, - "grad_norm": 0.7765811085700989, - "learning_rate": 1.1016747194604994e-05, - "loss": 0.3678, - "step": 10153 - }, - { - "epoch": 0.9565483620263301, - "grad_norm": 0.8686683773994446, - "learning_rate": 1.10152449902911e-05, - "loss": 0.2879, - "step": 10154 - }, - { - "epoch": 0.9566425661194979, - "grad_norm": 0.7165391445159912, - "learning_rate": 1.1013742762828054e-05, - "loss": 0.3097, - "step": 10155 - }, - { - "epoch": 0.9567367702126658, - "grad_norm": 0.6474335789680481, - "learning_rate": 1.1012240512250107e-05, - "loss": 0.2584, - "step": 10156 - }, - { - "epoch": 0.9568309743058336, - "grad_norm": 0.6844523549079895, - "learning_rate": 1.1010738238591507e-05, - "loss": 0.3393, - "step": 10157 - }, - { - "epoch": 0.9569251783990015, - "grad_norm": 0.6997516751289368, - "learning_rate": 1.1009235941886516e-05, - "loss": 0.3498, - "step": 10158 - }, - { - "epoch": 0.9570193824921693, - "grad_norm": 0.6587337851524353, - "learning_rate": 1.1007733622169381e-05, - "loss": 0.3142, - "step": 10159 - }, - { - "epoch": 0.9571135865853372, - "grad_norm": 0.6753538846969604, - "learning_rate": 1.1006231279474365e-05, - "loss": 0.3001, - "step": 10160 - }, - { - "epoch": 0.957207790678505, - "grad_norm": 0.626148521900177, - "learning_rate": 1.1004728913835717e-05, - "loss": 0.2723, - "step": 10161 - }, - { - "epoch": 0.9573019947716729, - "grad_norm": 0.8601760864257812, - "learning_rate": 1.1003226525287693e-05, - "loss": 0.3074, - "step": 10162 - }, - { - "epoch": 0.9573961988648407, - "grad_norm": 0.7324545383453369, - "learning_rate": 1.1001724113864558e-05, - "loss": 0.2976, - "step": 10163 - }, - { - "epoch": 0.9574904029580086, - "grad_norm": 0.789679765701294, - "learning_rate": 1.1000221679600562e-05, - "loss": 0.3237, - "step": 10164 - }, - { - "epoch": 0.9575846070511764, - "grad_norm": 0.6558475494384766, - "learning_rate": 1.0998719222529966e-05, - "loss": 0.2752, - "step": 10165 - }, - { - "epoch": 0.9576788111443442, - "grad_norm": 0.8714675307273865, - "learning_rate": 1.0997216742687022e-05, - "loss": 0.2974, - "step": 10166 - }, - { - "epoch": 0.957773015237512, - "grad_norm": 0.8132731914520264, - "learning_rate": 1.0995714240105999e-05, - "loss": 0.2722, - "step": 10167 - }, - { - "epoch": 0.9578672193306799, - "grad_norm": 0.6529768705368042, - "learning_rate": 1.099421171482115e-05, - "loss": 0.2786, - "step": 10168 - }, - { - "epoch": 0.9579614234238477, - "grad_norm": 0.7080832719802856, - "learning_rate": 1.0992709166866738e-05, - "loss": 0.2823, - "step": 10169 - }, - { - "epoch": 0.9580556275170156, - "grad_norm": 0.6487346291542053, - "learning_rate": 1.0991206596277023e-05, - "loss": 0.2689, - "step": 10170 - }, - { - "epoch": 0.9581498316101834, - "grad_norm": 0.8792241215705872, - "learning_rate": 1.098970400308626e-05, - "loss": 0.3442, - "step": 10171 - }, - { - "epoch": 0.9582440357033513, - "grad_norm": 0.6926685571670532, - "learning_rate": 1.0988201387328716e-05, - "loss": 0.3028, - "step": 10172 - }, - { - "epoch": 0.9583382397965191, - "grad_norm": 0.7001669406890869, - "learning_rate": 1.0986698749038654e-05, - "loss": 0.2992, - "step": 10173 - }, - { - "epoch": 0.958432443889687, - "grad_norm": 0.6960703730583191, - "learning_rate": 1.0985196088250332e-05, - "loss": 0.3035, - "step": 10174 - }, - { - "epoch": 0.9585266479828548, - "grad_norm": 1.045765995979309, - "learning_rate": 1.098369340499802e-05, - "loss": 0.3026, - "step": 10175 - }, - { - "epoch": 0.9586208520760227, - "grad_norm": 0.7972753643989563, - "learning_rate": 1.0982190699315974e-05, - "loss": 0.3068, - "step": 10176 - }, - { - "epoch": 0.9587150561691905, - "grad_norm": 0.7426035404205322, - "learning_rate": 1.098068797123846e-05, - "loss": 0.3237, - "step": 10177 - }, - { - "epoch": 0.9588092602623584, - "grad_norm": 0.6838176846504211, - "learning_rate": 1.0979185220799747e-05, - "loss": 0.2966, - "step": 10178 - }, - { - "epoch": 0.9589034643555262, - "grad_norm": 0.7067477107048035, - "learning_rate": 1.0977682448034092e-05, - "loss": 0.3412, - "step": 10179 - }, - { - "epoch": 0.958997668448694, - "grad_norm": 0.8020039200782776, - "learning_rate": 1.0976179652975769e-05, - "loss": 0.3128, - "step": 10180 - }, - { - "epoch": 0.9590918725418619, - "grad_norm": 0.7284244894981384, - "learning_rate": 1.0974676835659039e-05, - "loss": 0.2662, - "step": 10181 - }, - { - "epoch": 0.9591860766350297, - "grad_norm": 0.7223349213600159, - "learning_rate": 1.0973173996118169e-05, - "loss": 0.3001, - "step": 10182 - }, - { - "epoch": 0.9592802807281976, - "grad_norm": 0.7770244479179382, - "learning_rate": 1.097167113438743e-05, - "loss": 0.3341, - "step": 10183 - }, - { - "epoch": 0.9593744848213654, - "grad_norm": 0.6680354475975037, - "learning_rate": 1.0970168250501083e-05, - "loss": 0.3141, - "step": 10184 - }, - { - "epoch": 0.9594686889145333, - "grad_norm": 0.7461274862289429, - "learning_rate": 1.09686653444934e-05, - "loss": 0.3054, - "step": 10185 - }, - { - "epoch": 0.9595628930077011, - "grad_norm": 0.6635683178901672, - "learning_rate": 1.0967162416398649e-05, - "loss": 0.2738, - "step": 10186 - }, - { - "epoch": 0.959657097100869, - "grad_norm": 0.5671108365058899, - "learning_rate": 1.0965659466251102e-05, - "loss": 0.2548, - "step": 10187 - }, - { - "epoch": 0.9597513011940368, - "grad_norm": 0.8083769083023071, - "learning_rate": 1.0964156494085023e-05, - "loss": 0.3534, - "step": 10188 - }, - { - "epoch": 0.9598455052872047, - "grad_norm": 0.6782791018486023, - "learning_rate": 1.0962653499934686e-05, - "loss": 0.2924, - "step": 10189 - }, - { - "epoch": 0.9599397093803725, - "grad_norm": 0.697952926158905, - "learning_rate": 1.096115048383436e-05, - "loss": 0.2853, - "step": 10190 - }, - { - "epoch": 0.9600339134735404, - "grad_norm": 0.7625333070755005, - "learning_rate": 1.0959647445818315e-05, - "loss": 0.3413, - "step": 10191 - }, - { - "epoch": 0.9601281175667082, - "grad_norm": 0.6706798076629639, - "learning_rate": 1.0958144385920826e-05, - "loss": 0.3111, - "step": 10192 - }, - { - "epoch": 0.9602223216598761, - "grad_norm": 0.7211931943893433, - "learning_rate": 1.0956641304176164e-05, - "loss": 0.2685, - "step": 10193 - }, - { - "epoch": 0.9603165257530439, - "grad_norm": 0.7056351900100708, - "learning_rate": 1.0955138200618598e-05, - "loss": 0.2943, - "step": 10194 - }, - { - "epoch": 0.9604107298462118, - "grad_norm": 0.7085780501365662, - "learning_rate": 1.0953635075282405e-05, - "loss": 0.2904, - "step": 10195 - }, - { - "epoch": 0.9605049339393796, - "grad_norm": 0.6431682705879211, - "learning_rate": 1.095213192820186e-05, - "loss": 0.2553, - "step": 10196 - }, - { - "epoch": 0.9605991380325475, - "grad_norm": 0.680701494216919, - "learning_rate": 1.095062875941123e-05, - "loss": 0.3144, - "step": 10197 - }, - { - "epoch": 0.9606933421257153, - "grad_norm": 0.7396478056907654, - "learning_rate": 1.0949125568944799e-05, - "loss": 0.2858, - "step": 10198 - }, - { - "epoch": 0.9607875462188832, - "grad_norm": 0.6372470855712891, - "learning_rate": 1.0947622356836834e-05, - "loss": 0.281, - "step": 10199 - }, - { - "epoch": 0.960881750312051, - "grad_norm": 0.7818843722343445, - "learning_rate": 1.0946119123121615e-05, - "loss": 0.3254, - "step": 10200 - }, - { - "epoch": 0.9609759544052189, - "grad_norm": 0.6971850395202637, - "learning_rate": 1.0944615867833415e-05, - "loss": 0.3234, - "step": 10201 - }, - { - "epoch": 0.9610701584983867, - "grad_norm": 0.7708349823951721, - "learning_rate": 1.0943112591006514e-05, - "loss": 0.3201, - "step": 10202 - }, - { - "epoch": 0.9611643625915546, - "grad_norm": 0.6892606616020203, - "learning_rate": 1.0941609292675186e-05, - "loss": 0.2662, - "step": 10203 - }, - { - "epoch": 0.9612585666847224, - "grad_norm": 0.7433465123176575, - "learning_rate": 1.094010597287371e-05, - "loss": 0.3191, - "step": 10204 - }, - { - "epoch": 0.9613527707778903, - "grad_norm": 0.6859291195869446, - "learning_rate": 1.0938602631636366e-05, - "loss": 0.2704, - "step": 10205 - }, - { - "epoch": 0.9614469748710581, - "grad_norm": 0.6968269348144531, - "learning_rate": 1.0937099268997428e-05, - "loss": 0.3008, - "step": 10206 - }, - { - "epoch": 0.961541178964226, - "grad_norm": 0.6571604013442993, - "learning_rate": 1.093559588499118e-05, - "loss": 0.2769, - "step": 10207 - }, - { - "epoch": 0.9616353830573938, - "grad_norm": 0.6723941564559937, - "learning_rate": 1.0934092479651897e-05, - "loss": 0.3109, - "step": 10208 - }, - { - "epoch": 0.9617295871505617, - "grad_norm": 0.6984074711799622, - "learning_rate": 1.0932589053013862e-05, - "loss": 0.3219, - "step": 10209 - }, - { - "epoch": 0.9618237912437295, - "grad_norm": 0.5909333229064941, - "learning_rate": 1.0931085605111354e-05, - "loss": 0.2634, - "step": 10210 - }, - { - "epoch": 0.9619179953368974, - "grad_norm": 0.7381008863449097, - "learning_rate": 1.0929582135978651e-05, - "loss": 0.3142, - "step": 10211 - }, - { - "epoch": 0.9620121994300652, - "grad_norm": 0.7191177606582642, - "learning_rate": 1.0928078645650042e-05, - "loss": 0.2785, - "step": 10212 - }, - { - "epoch": 0.9621064035232331, - "grad_norm": 0.7118578553199768, - "learning_rate": 1.0926575134159805e-05, - "loss": 0.3119, - "step": 10213 - }, - { - "epoch": 0.9622006076164009, - "grad_norm": 0.799423336982727, - "learning_rate": 1.0925071601542218e-05, - "loss": 0.3041, - "step": 10214 - }, - { - "epoch": 0.9622948117095688, - "grad_norm": 0.7251821756362915, - "learning_rate": 1.0923568047831572e-05, - "loss": 0.3398, - "step": 10215 - }, - { - "epoch": 0.9623890158027366, - "grad_norm": 0.856343686580658, - "learning_rate": 1.0922064473062144e-05, - "loss": 0.2934, - "step": 10216 - }, - { - "epoch": 0.9624832198959045, - "grad_norm": 0.6228210926055908, - "learning_rate": 1.0920560877268218e-05, - "loss": 0.2785, - "step": 10217 - }, - { - "epoch": 0.9625774239890723, - "grad_norm": 0.7916138172149658, - "learning_rate": 1.0919057260484084e-05, - "loss": 0.3263, - "step": 10218 - }, - { - "epoch": 0.9626716280822402, - "grad_norm": 0.678949236869812, - "learning_rate": 1.0917553622744022e-05, - "loss": 0.2778, - "step": 10219 - }, - { - "epoch": 0.962765832175408, - "grad_norm": 0.6774210929870605, - "learning_rate": 1.0916049964082319e-05, - "loss": 0.2849, - "step": 10220 - }, - { - "epoch": 0.9628600362685759, - "grad_norm": 0.8104318380355835, - "learning_rate": 1.0914546284533259e-05, - "loss": 0.3602, - "step": 10221 - }, - { - "epoch": 0.9629542403617437, - "grad_norm": 0.7064844965934753, - "learning_rate": 1.0913042584131126e-05, - "loss": 0.334, - "step": 10222 - }, - { - "epoch": 0.9630484444549116, - "grad_norm": 0.8207798600196838, - "learning_rate": 1.0911538862910213e-05, - "loss": 0.2751, - "step": 10223 - }, - { - "epoch": 0.9631426485480794, - "grad_norm": 0.7319862842559814, - "learning_rate": 1.0910035120904807e-05, - "loss": 0.276, - "step": 10224 - }, - { - "epoch": 0.9632368526412473, - "grad_norm": 0.7391321659088135, - "learning_rate": 1.090853135814919e-05, - "loss": 0.332, - "step": 10225 - }, - { - "epoch": 0.9633310567344151, - "grad_norm": 0.6955612301826477, - "learning_rate": 1.0907027574677653e-05, - "loss": 0.3061, - "step": 10226 - }, - { - "epoch": 0.963425260827583, - "grad_norm": 0.6962710618972778, - "learning_rate": 1.0905523770524485e-05, - "loss": 0.3074, - "step": 10227 - }, - { - "epoch": 0.9635194649207508, - "grad_norm": 0.8167105913162231, - "learning_rate": 1.0904019945723976e-05, - "loss": 0.273, - "step": 10228 - }, - { - "epoch": 0.9636136690139187, - "grad_norm": 0.7759659290313721, - "learning_rate": 1.0902516100310412e-05, - "loss": 0.3294, - "step": 10229 - }, - { - "epoch": 0.9637078731070865, - "grad_norm": 0.7253290414810181, - "learning_rate": 1.0901012234318088e-05, - "loss": 0.2891, - "step": 10230 - }, - { - "epoch": 0.9638020772002543, - "grad_norm": 0.7606688737869263, - "learning_rate": 1.0899508347781287e-05, - "loss": 0.3169, - "step": 10231 - }, - { - "epoch": 0.9638962812934222, - "grad_norm": 0.9359011650085449, - "learning_rate": 1.0898004440734306e-05, - "loss": 0.3641, - "step": 10232 - }, - { - "epoch": 0.96399048538659, - "grad_norm": 0.7431262135505676, - "learning_rate": 1.0896500513211438e-05, - "loss": 0.2878, - "step": 10233 - }, - { - "epoch": 0.9640846894797579, - "grad_norm": 1.0001311302185059, - "learning_rate": 1.0894996565246969e-05, - "loss": 0.3094, - "step": 10234 - }, - { - "epoch": 0.9641788935729257, - "grad_norm": 0.784214198589325, - "learning_rate": 1.0893492596875197e-05, - "loss": 0.3009, - "step": 10235 - }, - { - "epoch": 0.9642730976660936, - "grad_norm": 0.7183936834335327, - "learning_rate": 1.0891988608130409e-05, - "loss": 0.2952, - "step": 10236 - }, - { - "epoch": 0.9643673017592614, - "grad_norm": 0.7166551947593689, - "learning_rate": 1.08904845990469e-05, - "loss": 0.2988, - "step": 10237 - }, - { - "epoch": 0.9644615058524293, - "grad_norm": 0.6997542977333069, - "learning_rate": 1.0888980569658968e-05, - "loss": 0.306, - "step": 10238 - }, - { - "epoch": 0.9645557099455971, - "grad_norm": 0.787003219127655, - "learning_rate": 1.0887476520000904e-05, - "loss": 0.3121, - "step": 10239 - }, - { - "epoch": 0.964649914038765, - "grad_norm": 0.771612823009491, - "learning_rate": 1.0885972450107003e-05, - "loss": 0.2869, - "step": 10240 - }, - { - "epoch": 0.9647441181319328, - "grad_norm": 0.6689867377281189, - "learning_rate": 1.088446836001156e-05, - "loss": 0.2983, - "step": 10241 - }, - { - "epoch": 0.9648383222251007, - "grad_norm": 0.8120307922363281, - "learning_rate": 1.0882964249748868e-05, - "loss": 0.3111, - "step": 10242 - }, - { - "epoch": 0.9649325263182685, - "grad_norm": 0.6862969398498535, - "learning_rate": 1.0881460119353227e-05, - "loss": 0.3418, - "step": 10243 - }, - { - "epoch": 0.9650267304114364, - "grad_norm": 0.7631906270980835, - "learning_rate": 1.0879955968858932e-05, - "loss": 0.2827, - "step": 10244 - }, - { - "epoch": 0.9651209345046042, - "grad_norm": 0.8895875811576843, - "learning_rate": 1.0878451798300282e-05, - "loss": 0.3556, - "step": 10245 - }, - { - "epoch": 0.9652151385977721, - "grad_norm": 0.8678480386734009, - "learning_rate": 1.087694760771157e-05, - "loss": 0.2989, - "step": 10246 - }, - { - "epoch": 0.9653093426909399, - "grad_norm": 0.8030105233192444, - "learning_rate": 1.0875443397127096e-05, - "loss": 0.3347, - "step": 10247 - }, - { - "epoch": 0.9654035467841078, - "grad_norm": 0.6832453012466431, - "learning_rate": 1.0873939166581163e-05, - "loss": 0.3018, - "step": 10248 - }, - { - "epoch": 0.9654977508772756, - "grad_norm": 0.7587977647781372, - "learning_rate": 1.0872434916108061e-05, - "loss": 0.2979, - "step": 10249 - }, - { - "epoch": 0.9655919549704435, - "grad_norm": 0.6465497612953186, - "learning_rate": 1.0870930645742098e-05, - "loss": 0.2519, - "step": 10250 - }, - { - "epoch": 0.9656861590636113, - "grad_norm": 0.7264884114265442, - "learning_rate": 1.0869426355517562e-05, - "loss": 0.2611, - "step": 10251 - }, - { - "epoch": 0.9657803631567792, - "grad_norm": 0.6426544189453125, - "learning_rate": 1.0867922045468766e-05, - "loss": 0.2739, - "step": 10252 - }, - { - "epoch": 0.965874567249947, - "grad_norm": 0.8288610577583313, - "learning_rate": 1.0866417715630005e-05, - "loss": 0.342, - "step": 10253 - }, - { - "epoch": 0.9659687713431149, - "grad_norm": 0.7182257175445557, - "learning_rate": 1.0864913366035577e-05, - "loss": 0.3289, - "step": 10254 - }, - { - "epoch": 0.9660629754362827, - "grad_norm": 0.8025250434875488, - "learning_rate": 1.0863408996719792e-05, - "loss": 0.3216, - "step": 10255 - }, - { - "epoch": 0.9661571795294506, - "grad_norm": 0.7690209150314331, - "learning_rate": 1.0861904607716942e-05, - "loss": 0.2928, - "step": 10256 - }, - { - "epoch": 0.9662513836226184, - "grad_norm": 0.7235234975814819, - "learning_rate": 1.0860400199061332e-05, - "loss": 0.3086, - "step": 10257 - }, - { - "epoch": 0.9663455877157863, - "grad_norm": 0.7197400331497192, - "learning_rate": 1.0858895770787275e-05, - "loss": 0.283, - "step": 10258 - }, - { - "epoch": 0.9664397918089541, - "grad_norm": 0.6490856409072876, - "learning_rate": 1.0857391322929059e-05, - "loss": 0.2786, - "step": 10259 - }, - { - "epoch": 0.966533995902122, - "grad_norm": 0.6831339001655579, - "learning_rate": 1.0855886855520996e-05, - "loss": 0.261, - "step": 10260 - }, - { - "epoch": 0.9666281999952898, - "grad_norm": 0.6745830774307251, - "learning_rate": 1.0854382368597391e-05, - "loss": 0.3096, - "step": 10261 - }, - { - "epoch": 0.9667224040884577, - "grad_norm": 0.6728605031967163, - "learning_rate": 1.0852877862192543e-05, - "loss": 0.2733, - "step": 10262 - }, - { - "epoch": 0.9668166081816255, - "grad_norm": 0.7601017951965332, - "learning_rate": 1.0851373336340765e-05, - "loss": 0.3254, - "step": 10263 - }, - { - "epoch": 0.9669108122747934, - "grad_norm": 0.783606231212616, - "learning_rate": 1.0849868791076358e-05, - "loss": 0.3069, - "step": 10264 - }, - { - "epoch": 0.9670050163679612, - "grad_norm": 0.7248871326446533, - "learning_rate": 1.0848364226433627e-05, - "loss": 0.2864, - "step": 10265 - }, - { - "epoch": 0.9670992204611291, - "grad_norm": 0.8170263767242432, - "learning_rate": 1.0846859642446878e-05, - "loss": 0.3202, - "step": 10266 - }, - { - "epoch": 0.9671934245542969, - "grad_norm": 0.8528434038162231, - "learning_rate": 1.0845355039150423e-05, - "loss": 0.2834, - "step": 10267 - }, - { - "epoch": 0.9672876286474648, - "grad_norm": 0.8308974504470825, - "learning_rate": 1.0843850416578563e-05, - "loss": 0.3271, - "step": 10268 - }, - { - "epoch": 0.9673818327406326, - "grad_norm": 0.6711557507514954, - "learning_rate": 1.084234577476561e-05, - "loss": 0.2953, - "step": 10269 - }, - { - "epoch": 0.9674760368338005, - "grad_norm": 0.7348352074623108, - "learning_rate": 1.0840841113745871e-05, - "loss": 0.2895, - "step": 10270 - }, - { - "epoch": 0.9675702409269683, - "grad_norm": 1.1439179182052612, - "learning_rate": 1.0839336433553651e-05, - "loss": 0.3044, - "step": 10271 - }, - { - "epoch": 0.9676644450201362, - "grad_norm": 0.7765445113182068, - "learning_rate": 1.0837831734223266e-05, - "loss": 0.3103, - "step": 10272 - }, - { - "epoch": 0.967758649113304, - "grad_norm": 0.7693551778793335, - "learning_rate": 1.0836327015789018e-05, - "loss": 0.3007, - "step": 10273 - }, - { - "epoch": 0.9678528532064719, - "grad_norm": 0.7013092041015625, - "learning_rate": 1.0834822278285221e-05, - "loss": 0.2802, - "step": 10274 - }, - { - "epoch": 0.9679470572996397, - "grad_norm": 0.6682015657424927, - "learning_rate": 1.0833317521746192e-05, - "loss": 0.2832, - "step": 10275 - }, - { - "epoch": 0.9680412613928076, - "grad_norm": 0.6417153477668762, - "learning_rate": 1.0831812746206228e-05, - "loss": 0.2715, - "step": 10276 - }, - { - "epoch": 0.9681354654859754, - "grad_norm": 0.7720031142234802, - "learning_rate": 1.0830307951699646e-05, - "loss": 0.3253, - "step": 10277 - }, - { - "epoch": 0.9682296695791432, - "grad_norm": 0.8440465331077576, - "learning_rate": 1.0828803138260765e-05, - "loss": 0.3455, - "step": 10278 - }, - { - "epoch": 0.9683238736723111, - "grad_norm": 0.8167608380317688, - "learning_rate": 1.0827298305923884e-05, - "loss": 0.2866, - "step": 10279 - }, - { - "epoch": 0.968418077765479, - "grad_norm": 0.7632136344909668, - "learning_rate": 1.0825793454723325e-05, - "loss": 0.308, - "step": 10280 - }, - { - "epoch": 0.9685122818586468, - "grad_norm": 2.2348220348358154, - "learning_rate": 1.0824288584693399e-05, - "loss": 0.3379, - "step": 10281 - }, - { - "epoch": 0.9686064859518146, - "grad_norm": 0.6997781991958618, - "learning_rate": 1.0822783695868414e-05, - "loss": 0.3148, - "step": 10282 - }, - { - "epoch": 0.9687006900449825, - "grad_norm": 0.7183207273483276, - "learning_rate": 1.0821278788282694e-05, - "loss": 0.3103, - "step": 10283 - }, - { - "epoch": 0.9687948941381503, - "grad_norm": 0.7390533089637756, - "learning_rate": 1.0819773861970547e-05, - "loss": 0.3037, - "step": 10284 - }, - { - "epoch": 0.9688890982313182, - "grad_norm": 0.9706563353538513, - "learning_rate": 1.0818268916966286e-05, - "loss": 0.3684, - "step": 10285 - }, - { - "epoch": 0.968983302324486, - "grad_norm": 0.7521471977233887, - "learning_rate": 1.0816763953304228e-05, - "loss": 0.2829, - "step": 10286 - }, - { - "epoch": 0.9690775064176539, - "grad_norm": 0.7831737399101257, - "learning_rate": 1.0815258971018687e-05, - "loss": 0.3107, - "step": 10287 - }, - { - "epoch": 0.9691717105108217, - "grad_norm": 0.8647821545600891, - "learning_rate": 1.0813753970143985e-05, - "loss": 0.2824, - "step": 10288 - }, - { - "epoch": 0.9692659146039896, - "grad_norm": 0.68235182762146, - "learning_rate": 1.0812248950714433e-05, - "loss": 0.2735, - "step": 10289 - }, - { - "epoch": 0.9693601186971574, - "grad_norm": 0.8001142144203186, - "learning_rate": 1.0810743912764348e-05, - "loss": 0.3009, - "step": 10290 - }, - { - "epoch": 0.9694543227903253, - "grad_norm": 0.8159835934638977, - "learning_rate": 1.0809238856328047e-05, - "loss": 0.3137, - "step": 10291 - }, - { - "epoch": 0.9695485268834931, - "grad_norm": 0.7067996263504028, - "learning_rate": 1.080773378143985e-05, - "loss": 0.2873, - "step": 10292 - }, - { - "epoch": 0.969642730976661, - "grad_norm": 0.6628618240356445, - "learning_rate": 1.080622868813407e-05, - "loss": 0.307, - "step": 10293 - }, - { - "epoch": 0.9697369350698288, - "grad_norm": 0.7208366990089417, - "learning_rate": 1.0804723576445031e-05, - "loss": 0.3128, - "step": 10294 - }, - { - "epoch": 0.9698311391629967, - "grad_norm": 0.625045657157898, - "learning_rate": 1.0803218446407054e-05, - "loss": 0.2761, - "step": 10295 - }, - { - "epoch": 0.9699253432561645, - "grad_norm": 0.7062149047851562, - "learning_rate": 1.080171329805445e-05, - "loss": 0.2652, - "step": 10296 - }, - { - "epoch": 0.9700195473493324, - "grad_norm": 0.7849803566932678, - "learning_rate": 1.0800208131421542e-05, - "loss": 0.29, - "step": 10297 - }, - { - "epoch": 0.9701137514425002, - "grad_norm": 0.6654258966445923, - "learning_rate": 1.0798702946542657e-05, - "loss": 0.3138, - "step": 10298 - }, - { - "epoch": 0.9702079555356681, - "grad_norm": 0.8465496301651001, - "learning_rate": 1.0797197743452104e-05, - "loss": 0.3054, - "step": 10299 - }, - { - "epoch": 0.9703021596288359, - "grad_norm": 0.702889084815979, - "learning_rate": 1.0795692522184211e-05, - "loss": 0.3081, - "step": 10300 - }, - { - "epoch": 0.9703963637220038, - "grad_norm": 0.681449830532074, - "learning_rate": 1.0794187282773298e-05, - "loss": 0.2785, - "step": 10301 - }, - { - "epoch": 0.9704905678151716, - "grad_norm": 0.7109034657478333, - "learning_rate": 1.0792682025253684e-05, - "loss": 0.2984, - "step": 10302 - }, - { - "epoch": 0.9705847719083395, - "grad_norm": 0.6962335109710693, - "learning_rate": 1.0791176749659697e-05, - "loss": 0.2884, - "step": 10303 - }, - { - "epoch": 0.9706789760015073, - "grad_norm": 0.7099591493606567, - "learning_rate": 1.078967145602566e-05, - "loss": 0.2751, - "step": 10304 - }, - { - "epoch": 0.9707731800946751, - "grad_norm": 0.7957950830459595, - "learning_rate": 1.0788166144385888e-05, - "loss": 0.2776, - "step": 10305 - }, - { - "epoch": 0.9708673841878429, - "grad_norm": 0.6819398403167725, - "learning_rate": 1.078666081477471e-05, - "loss": 0.2879, - "step": 10306 - }, - { - "epoch": 0.9709615882810108, - "grad_norm": 0.7258957028388977, - "learning_rate": 1.0785155467226447e-05, - "loss": 0.3034, - "step": 10307 - }, - { - "epoch": 0.9710557923741786, - "grad_norm": 1.0399584770202637, - "learning_rate": 1.0783650101775426e-05, - "loss": 0.3274, - "step": 10308 - }, - { - "epoch": 0.9711499964673465, - "grad_norm": 0.6950438618659973, - "learning_rate": 1.0782144718455973e-05, - "loss": 0.2864, - "step": 10309 - }, - { - "epoch": 0.9712442005605143, - "grad_norm": 0.7077235579490662, - "learning_rate": 1.0780639317302411e-05, - "loss": 0.2906, - "step": 10310 - }, - { - "epoch": 0.9713384046536822, - "grad_norm": 0.6788902878761292, - "learning_rate": 1.0779133898349061e-05, - "loss": 0.291, - "step": 10311 - }, - { - "epoch": 0.97143260874685, - "grad_norm": 0.7881458401679993, - "learning_rate": 1.0777628461630256e-05, - "loss": 0.315, - "step": 10312 - }, - { - "epoch": 0.9715268128400179, - "grad_norm": 0.6583048105239868, - "learning_rate": 1.077612300718032e-05, - "loss": 0.2472, - "step": 10313 - }, - { - "epoch": 0.9716210169331857, - "grad_norm": 0.7339376211166382, - "learning_rate": 1.0774617535033575e-05, - "loss": 0.3042, - "step": 10314 - }, - { - "epoch": 0.9717152210263535, - "grad_norm": 0.794305145740509, - "learning_rate": 1.0773112045224361e-05, - "loss": 0.326, - "step": 10315 - }, - { - "epoch": 0.9718094251195214, - "grad_norm": 0.8386337757110596, - "learning_rate": 1.077160653778699e-05, - "loss": 0.302, - "step": 10316 - }, - { - "epoch": 0.9719036292126892, - "grad_norm": 0.6783521771430969, - "learning_rate": 1.0770101012755796e-05, - "loss": 0.2899, - "step": 10317 - }, - { - "epoch": 0.9719978333058571, - "grad_norm": 1.1961525678634644, - "learning_rate": 1.0768595470165111e-05, - "loss": 0.3049, - "step": 10318 - }, - { - "epoch": 0.9720920373990249, - "grad_norm": 0.790729820728302, - "learning_rate": 1.0767089910049258e-05, - "loss": 0.2792, - "step": 10319 - }, - { - "epoch": 0.9721862414921928, - "grad_norm": 0.7801897525787354, - "learning_rate": 1.0765584332442572e-05, - "loss": 0.296, - "step": 10320 - }, - { - "epoch": 0.9722804455853606, - "grad_norm": 0.6824880838394165, - "learning_rate": 1.0764078737379378e-05, - "loss": 0.2898, - "step": 10321 - }, - { - "epoch": 0.9723746496785285, - "grad_norm": 0.6680640578269958, - "learning_rate": 1.0762573124894004e-05, - "loss": 0.3144, - "step": 10322 - }, - { - "epoch": 0.9724688537716963, - "grad_norm": 0.8827727437019348, - "learning_rate": 1.0761067495020787e-05, - "loss": 0.3371, - "step": 10323 - }, - { - "epoch": 0.9725630578648642, - "grad_norm": 0.8456539511680603, - "learning_rate": 1.0759561847794053e-05, - "loss": 0.3434, - "step": 10324 - }, - { - "epoch": 0.972657261958032, - "grad_norm": 0.7169541716575623, - "learning_rate": 1.0758056183248135e-05, - "loss": 0.2596, - "step": 10325 - }, - { - "epoch": 0.9727514660511999, - "grad_norm": 0.7187843322753906, - "learning_rate": 1.0756550501417361e-05, - "loss": 0.262, - "step": 10326 - }, - { - "epoch": 0.9728456701443677, - "grad_norm": 0.7109638452529907, - "learning_rate": 1.0755044802336067e-05, - "loss": 0.3325, - "step": 10327 - }, - { - "epoch": 0.9729398742375356, - "grad_norm": 0.6972149610519409, - "learning_rate": 1.0753539086038584e-05, - "loss": 0.2626, - "step": 10328 - }, - { - "epoch": 0.9730340783307034, - "grad_norm": 0.6843149065971375, - "learning_rate": 1.0752033352559246e-05, - "loss": 0.259, - "step": 10329 - }, - { - "epoch": 0.9731282824238713, - "grad_norm": 0.6997199058532715, - "learning_rate": 1.0750527601932384e-05, - "loss": 0.2713, - "step": 10330 - }, - { - "epoch": 0.9732224865170391, - "grad_norm": 0.750153660774231, - "learning_rate": 1.074902183419233e-05, - "loss": 0.3021, - "step": 10331 - }, - { - "epoch": 0.973316690610207, - "grad_norm": 0.7035261392593384, - "learning_rate": 1.074751604937342e-05, - "loss": 0.3244, - "step": 10332 - }, - { - "epoch": 0.9734108947033748, - "grad_norm": 0.7157569527626038, - "learning_rate": 1.074601024750999e-05, - "loss": 0.2886, - "step": 10333 - }, - { - "epoch": 0.9735050987965427, - "grad_norm": 0.7078803181648254, - "learning_rate": 1.074450442863637e-05, - "loss": 0.3366, - "step": 10334 - }, - { - "epoch": 0.9735993028897105, - "grad_norm": 0.7504318952560425, - "learning_rate": 1.0742998592786902e-05, - "loss": 0.3068, - "step": 10335 - }, - { - "epoch": 0.9736935069828784, - "grad_norm": 0.7075466513633728, - "learning_rate": 1.0741492739995913e-05, - "loss": 0.3001, - "step": 10336 - }, - { - "epoch": 0.9737877110760462, - "grad_norm": 0.7589266896247864, - "learning_rate": 1.0739986870297743e-05, - "loss": 0.3228, - "step": 10337 - }, - { - "epoch": 0.9738819151692141, - "grad_norm": 0.7659551501274109, - "learning_rate": 1.073848098372673e-05, - "loss": 0.3021, - "step": 10338 - }, - { - "epoch": 0.9739761192623819, - "grad_norm": 0.7225384712219238, - "learning_rate": 1.0736975080317206e-05, - "loss": 0.2984, - "step": 10339 - }, - { - "epoch": 0.9740703233555498, - "grad_norm": 0.6561002731323242, - "learning_rate": 1.0735469160103514e-05, - "loss": 0.2875, - "step": 10340 - }, - { - "epoch": 0.9741645274487176, - "grad_norm": 0.7799216508865356, - "learning_rate": 1.0733963223119986e-05, - "loss": 0.288, - "step": 10341 - }, - { - "epoch": 0.9742587315418855, - "grad_norm": 0.7443419098854065, - "learning_rate": 1.073245726940096e-05, - "loss": 0.2611, - "step": 10342 - }, - { - "epoch": 0.9743529356350533, - "grad_norm": 0.8268982172012329, - "learning_rate": 1.0730951298980776e-05, - "loss": 0.3118, - "step": 10343 - }, - { - "epoch": 0.9744471397282212, - "grad_norm": 0.7606011629104614, - "learning_rate": 1.0729445311893773e-05, - "loss": 0.2986, - "step": 10344 - }, - { - "epoch": 0.974541343821389, - "grad_norm": 0.7274667620658875, - "learning_rate": 1.0727939308174289e-05, - "loss": 0.2986, - "step": 10345 - }, - { - "epoch": 0.9746355479145569, - "grad_norm": 0.6468337774276733, - "learning_rate": 1.0726433287856664e-05, - "loss": 0.2774, - "step": 10346 - }, - { - "epoch": 0.9747297520077247, - "grad_norm": 0.9713823199272156, - "learning_rate": 1.0724927250975232e-05, - "loss": 0.2726, - "step": 10347 - }, - { - "epoch": 0.9748239561008926, - "grad_norm": 0.6387431025505066, - "learning_rate": 1.072342119756434e-05, - "loss": 0.2803, - "step": 10348 - }, - { - "epoch": 0.9749181601940604, - "grad_norm": 0.7059564590454102, - "learning_rate": 1.0721915127658329e-05, - "loss": 0.2579, - "step": 10349 - }, - { - "epoch": 0.9750123642872283, - "grad_norm": 0.7223420739173889, - "learning_rate": 1.0720409041291533e-05, - "loss": 0.3094, - "step": 10350 - }, - { - "epoch": 0.9751065683803961, - "grad_norm": 0.6966363191604614, - "learning_rate": 1.0718902938498296e-05, - "loss": 0.2888, - "step": 10351 - }, - { - "epoch": 0.975200772473564, - "grad_norm": 0.7751204371452332, - "learning_rate": 1.0717396819312961e-05, - "loss": 0.3039, - "step": 10352 - }, - { - "epoch": 0.9752949765667318, - "grad_norm": 0.7578683495521545, - "learning_rate": 1.0715890683769872e-05, - "loss": 0.2725, - "step": 10353 - }, - { - "epoch": 0.9753891806598997, - "grad_norm": 0.6831819415092468, - "learning_rate": 1.0714384531903365e-05, - "loss": 0.2739, - "step": 10354 - }, - { - "epoch": 0.9754833847530675, - "grad_norm": 0.7249935269355774, - "learning_rate": 1.071287836374779e-05, - "loss": 0.3009, - "step": 10355 - }, - { - "epoch": 0.9755775888462354, - "grad_norm": 0.6584034562110901, - "learning_rate": 1.071137217933748e-05, - "loss": 0.2563, - "step": 10356 - }, - { - "epoch": 0.9756717929394032, - "grad_norm": 0.730527400970459, - "learning_rate": 1.0709865978706789e-05, - "loss": 0.2708, - "step": 10357 - }, - { - "epoch": 0.975765997032571, - "grad_norm": 0.8241204023361206, - "learning_rate": 1.0708359761890053e-05, - "loss": 0.3543, - "step": 10358 - }, - { - "epoch": 0.9758602011257389, - "grad_norm": 0.8479092121124268, - "learning_rate": 1.0706853528921618e-05, - "loss": 0.3116, - "step": 10359 - }, - { - "epoch": 0.9759544052189068, - "grad_norm": 0.7382973432540894, - "learning_rate": 1.070534727983583e-05, - "loss": 0.302, - "step": 10360 - }, - { - "epoch": 0.9760486093120746, - "grad_norm": 0.7855536341667175, - "learning_rate": 1.0703841014667037e-05, - "loss": 0.3572, - "step": 10361 - }, - { - "epoch": 0.9761428134052424, - "grad_norm": 0.7271184325218201, - "learning_rate": 1.0702334733449575e-05, - "loss": 0.3006, - "step": 10362 - }, - { - "epoch": 0.9762370174984103, - "grad_norm": 0.6775922775268555, - "learning_rate": 1.0700828436217798e-05, - "loss": 0.3102, - "step": 10363 - }, - { - "epoch": 0.9763312215915781, - "grad_norm": 0.7173593640327454, - "learning_rate": 1.0699322123006051e-05, - "loss": 0.2864, - "step": 10364 - }, - { - "epoch": 0.976425425684746, - "grad_norm": 0.7771925330162048, - "learning_rate": 1.0697815793848676e-05, - "loss": 0.2918, - "step": 10365 - }, - { - "epoch": 0.9765196297779138, - "grad_norm": 0.7389848232269287, - "learning_rate": 1.069630944878002e-05, - "loss": 0.2738, - "step": 10366 - }, - { - "epoch": 0.9766138338710817, - "grad_norm": 0.6529532074928284, - "learning_rate": 1.0694803087834431e-05, - "loss": 0.3264, - "step": 10367 - }, - { - "epoch": 0.9767080379642495, - "grad_norm": 0.8133367300033569, - "learning_rate": 1.069329671104626e-05, - "loss": 0.2879, - "step": 10368 - }, - { - "epoch": 0.9768022420574174, - "grad_norm": 0.8488715887069702, - "learning_rate": 1.069179031844985e-05, - "loss": 0.3017, - "step": 10369 - }, - { - "epoch": 0.9768964461505852, - "grad_norm": 0.6893444061279297, - "learning_rate": 1.0690283910079553e-05, - "loss": 0.3052, - "step": 10370 - }, - { - "epoch": 0.9769906502437531, - "grad_norm": 0.7704743146896362, - "learning_rate": 1.0688777485969713e-05, - "loss": 0.2782, - "step": 10371 - }, - { - "epoch": 0.9770848543369209, - "grad_norm": 0.9016629457473755, - "learning_rate": 1.0687271046154684e-05, - "loss": 0.2984, - "step": 10372 - }, - { - "epoch": 0.9771790584300888, - "grad_norm": 0.7004095315933228, - "learning_rate": 1.068576459066881e-05, - "loss": 0.3166, - "step": 10373 - }, - { - "epoch": 0.9772732625232566, - "grad_norm": 0.6561674475669861, - "learning_rate": 1.068425811954644e-05, - "loss": 0.2771, - "step": 10374 - }, - { - "epoch": 0.9773674666164245, - "grad_norm": 0.7221732139587402, - "learning_rate": 1.0682751632821933e-05, - "loss": 0.3031, - "step": 10375 - }, - { - "epoch": 0.9774616707095923, - "grad_norm": 0.6564479470252991, - "learning_rate": 1.0681245130529627e-05, - "loss": 0.2721, - "step": 10376 - }, - { - "epoch": 0.9775558748027602, - "grad_norm": 0.7864837050437927, - "learning_rate": 1.0679738612703882e-05, - "loss": 0.3361, - "step": 10377 - }, - { - "epoch": 0.977650078895928, - "grad_norm": 0.7487345337867737, - "learning_rate": 1.0678232079379045e-05, - "loss": 0.3285, - "step": 10378 - }, - { - "epoch": 0.9777442829890959, - "grad_norm": 0.6579439043998718, - "learning_rate": 1.0676725530589467e-05, - "loss": 0.2895, - "step": 10379 - }, - { - "epoch": 0.9778384870822637, - "grad_norm": 0.7135502099990845, - "learning_rate": 1.06752189663695e-05, - "loss": 0.3288, - "step": 10380 - }, - { - "epoch": 0.9779326911754316, - "grad_norm": 0.919353187084198, - "learning_rate": 1.0673712386753496e-05, - "loss": 0.3116, - "step": 10381 - }, - { - "epoch": 0.9780268952685994, - "grad_norm": 0.6300274133682251, - "learning_rate": 1.0672205791775807e-05, - "loss": 0.261, - "step": 10382 - }, - { - "epoch": 0.9781210993617673, - "grad_norm": 0.6668910384178162, - "learning_rate": 1.067069918147079e-05, - "loss": 0.2945, - "step": 10383 - }, - { - "epoch": 0.9782153034549351, - "grad_norm": 0.8164402842521667, - "learning_rate": 1.0669192555872791e-05, - "loss": 0.2734, - "step": 10384 - }, - { - "epoch": 0.978309507548103, - "grad_norm": 0.6610966324806213, - "learning_rate": 1.0667685915016168e-05, - "loss": 0.343, - "step": 10385 - }, - { - "epoch": 0.9784037116412708, - "grad_norm": 0.7320244908332825, - "learning_rate": 1.0666179258935274e-05, - "loss": 0.2982, - "step": 10386 - }, - { - "epoch": 0.9784979157344387, - "grad_norm": 0.7519034743309021, - "learning_rate": 1.0664672587664462e-05, - "loss": 0.3295, - "step": 10387 - }, - { - "epoch": 0.9785921198276065, - "grad_norm": 0.7669429779052734, - "learning_rate": 1.0663165901238088e-05, - "loss": 0.3212, - "step": 10388 - }, - { - "epoch": 0.9786863239207744, - "grad_norm": 0.6347747445106506, - "learning_rate": 1.0661659199690505e-05, - "loss": 0.29, - "step": 10389 - }, - { - "epoch": 0.9787805280139422, - "grad_norm": 0.7060967683792114, - "learning_rate": 1.066015248305607e-05, - "loss": 0.3017, - "step": 10390 - }, - { - "epoch": 0.9788747321071101, - "grad_norm": 0.7511218786239624, - "learning_rate": 1.0658645751369134e-05, - "loss": 0.2958, - "step": 10391 - }, - { - "epoch": 0.9789689362002779, - "grad_norm": 0.7689938545227051, - "learning_rate": 1.0657139004664058e-05, - "loss": 0.3165, - "step": 10392 - }, - { - "epoch": 0.9790631402934458, - "grad_norm": 1.196301817893982, - "learning_rate": 1.06556322429752e-05, - "loss": 0.2614, - "step": 10393 - }, - { - "epoch": 0.9791573443866136, - "grad_norm": 0.733065128326416, - "learning_rate": 1.0654125466336907e-05, - "loss": 0.3121, - "step": 10394 - }, - { - "epoch": 0.9792515484797815, - "grad_norm": 0.7399395108222961, - "learning_rate": 1.0652618674783549e-05, - "loss": 0.2614, - "step": 10395 - }, - { - "epoch": 0.9793457525729493, - "grad_norm": 0.719694972038269, - "learning_rate": 1.0651111868349469e-05, - "loss": 0.3156, - "step": 10396 - }, - { - "epoch": 0.9794399566661172, - "grad_norm": 1.1388739347457886, - "learning_rate": 1.0649605047069034e-05, - "loss": 0.2794, - "step": 10397 - }, - { - "epoch": 0.979534160759285, - "grad_norm": 0.7065989971160889, - "learning_rate": 1.06480982109766e-05, - "loss": 0.2849, - "step": 10398 - }, - { - "epoch": 0.9796283648524529, - "grad_norm": 0.8086827993392944, - "learning_rate": 1.0646591360106524e-05, - "loss": 0.3242, - "step": 10399 - }, - { - "epoch": 0.9797225689456207, - "grad_norm": 0.6797522306442261, - "learning_rate": 1.0645084494493166e-05, - "loss": 0.3007, - "step": 10400 - }, - { - "epoch": 0.9798167730387886, - "grad_norm": 0.8553151488304138, - "learning_rate": 1.064357761417088e-05, - "loss": 0.3233, - "step": 10401 - }, - { - "epoch": 0.9799109771319564, - "grad_norm": 0.6668685674667358, - "learning_rate": 1.0642070719174031e-05, - "loss": 0.2742, - "step": 10402 - }, - { - "epoch": 0.9800051812251243, - "grad_norm": 0.7501025795936584, - "learning_rate": 1.064056380953698e-05, - "loss": 0.339, - "step": 10403 - }, - { - "epoch": 0.9800993853182921, - "grad_norm": 0.743687629699707, - "learning_rate": 1.0639056885294082e-05, - "loss": 0.3128, - "step": 10404 - }, - { - "epoch": 0.98019358941146, - "grad_norm": 0.5909627676010132, - "learning_rate": 1.0637549946479698e-05, - "loss": 0.2524, - "step": 10405 - }, - { - "epoch": 0.9802877935046278, - "grad_norm": 0.6835361123085022, - "learning_rate": 1.0636042993128188e-05, - "loss": 0.2923, - "step": 10406 - }, - { - "epoch": 0.9803819975977957, - "grad_norm": 0.7401196956634521, - "learning_rate": 1.0634536025273914e-05, - "loss": 0.3109, - "step": 10407 - }, - { - "epoch": 0.9804762016909635, - "grad_norm": 0.7609918713569641, - "learning_rate": 1.0633029042951239e-05, - "loss": 0.3087, - "step": 10408 - }, - { - "epoch": 0.9805704057841313, - "grad_norm": 0.6468635201454163, - "learning_rate": 1.0631522046194522e-05, - "loss": 0.2722, - "step": 10409 - }, - { - "epoch": 0.9806646098772992, - "grad_norm": 0.7094852328300476, - "learning_rate": 1.0630015035038125e-05, - "loss": 0.2677, - "step": 10410 - }, - { - "epoch": 0.980758813970467, - "grad_norm": 0.7232741117477417, - "learning_rate": 1.0628508009516412e-05, - "loss": 0.2533, - "step": 10411 - }, - { - "epoch": 0.9808530180636349, - "grad_norm": 0.7106176018714905, - "learning_rate": 1.0627000969663743e-05, - "loss": 0.3123, - "step": 10412 - }, - { - "epoch": 0.9809472221568027, - "grad_norm": 0.6309818625450134, - "learning_rate": 1.0625493915514485e-05, - "loss": 0.2648, - "step": 10413 - }, - { - "epoch": 0.9810414262499706, - "grad_norm": 0.7269724011421204, - "learning_rate": 1.0623986847102994e-05, - "loss": 0.3464, - "step": 10414 - }, - { - "epoch": 0.9811356303431384, - "grad_norm": 0.6727473735809326, - "learning_rate": 1.0622479764463645e-05, - "loss": 0.3051, - "step": 10415 - }, - { - "epoch": 0.9812298344363063, - "grad_norm": 0.6313793063163757, - "learning_rate": 1.0620972667630787e-05, - "loss": 0.2706, - "step": 10416 - }, - { - "epoch": 0.9813240385294741, - "grad_norm": 0.7293052673339844, - "learning_rate": 1.0619465556638797e-05, - "loss": 0.2566, - "step": 10417 - }, - { - "epoch": 0.981418242622642, - "grad_norm": 0.7017768025398254, - "learning_rate": 1.0617958431522034e-05, - "loss": 0.3137, - "step": 10418 - }, - { - "epoch": 0.9815124467158098, - "grad_norm": 0.7957296371459961, - "learning_rate": 1.0616451292314858e-05, - "loss": 0.3159, - "step": 10419 - }, - { - "epoch": 0.9816066508089777, - "grad_norm": 0.7720123529434204, - "learning_rate": 1.0614944139051644e-05, - "loss": 0.3147, - "step": 10420 - }, - { - "epoch": 0.9817008549021455, - "grad_norm": 0.9559768438339233, - "learning_rate": 1.061343697176675e-05, - "loss": 0.2942, - "step": 10421 - }, - { - "epoch": 0.9817950589953134, - "grad_norm": 0.7781189680099487, - "learning_rate": 1.0611929790494543e-05, - "loss": 0.3103, - "step": 10422 - }, - { - "epoch": 0.9818892630884812, - "grad_norm": 0.7041938900947571, - "learning_rate": 1.0610422595269396e-05, - "loss": 0.3141, - "step": 10423 - }, - { - "epoch": 0.9819834671816491, - "grad_norm": 0.6958533525466919, - "learning_rate": 1.0608915386125667e-05, - "loss": 0.2783, - "step": 10424 - }, - { - "epoch": 0.9820776712748169, - "grad_norm": 0.6525768041610718, - "learning_rate": 1.0607408163097725e-05, - "loss": 0.3082, - "step": 10425 - }, - { - "epoch": 0.9821718753679848, - "grad_norm": 0.6321045160293579, - "learning_rate": 1.060590092621994e-05, - "loss": 0.2484, - "step": 10426 - }, - { - "epoch": 0.9822660794611526, - "grad_norm": 0.8602375984191895, - "learning_rate": 1.0604393675526672e-05, - "loss": 0.3413, - "step": 10427 - }, - { - "epoch": 0.9823602835543205, - "grad_norm": 0.7665597796440125, - "learning_rate": 1.0602886411052295e-05, - "loss": 0.3293, - "step": 10428 - }, - { - "epoch": 0.9824544876474883, - "grad_norm": 0.7466394305229187, - "learning_rate": 1.0601379132831177e-05, - "loss": 0.3092, - "step": 10429 - }, - { - "epoch": 0.9825486917406562, - "grad_norm": 0.7269977331161499, - "learning_rate": 1.0599871840897687e-05, - "loss": 0.3493, - "step": 10430 - }, - { - "epoch": 0.982642895833824, - "grad_norm": 0.6265439987182617, - "learning_rate": 1.0598364535286186e-05, - "loss": 0.2718, - "step": 10431 - }, - { - "epoch": 0.9827370999269919, - "grad_norm": 0.749789297580719, - "learning_rate": 1.0596857216031051e-05, - "loss": 0.3097, - "step": 10432 - }, - { - "epoch": 0.9828313040201597, - "grad_norm": 0.7870447039604187, - "learning_rate": 1.0595349883166648e-05, - "loss": 0.3272, - "step": 10433 - }, - { - "epoch": 0.9829255081133276, - "grad_norm": 0.8104708194732666, - "learning_rate": 1.0593842536727345e-05, - "loss": 0.3392, - "step": 10434 - }, - { - "epoch": 0.9830197122064954, - "grad_norm": 0.7863287925720215, - "learning_rate": 1.0592335176747518e-05, - "loss": 0.3183, - "step": 10435 - }, - { - "epoch": 0.9831139162996633, - "grad_norm": 0.628355860710144, - "learning_rate": 1.059082780326153e-05, - "loss": 0.3132, - "step": 10436 - }, - { - "epoch": 0.9832081203928311, - "grad_norm": 0.6768056750297546, - "learning_rate": 1.0589320416303756e-05, - "loss": 0.2725, - "step": 10437 - }, - { - "epoch": 0.983302324485999, - "grad_norm": 0.7083317041397095, - "learning_rate": 1.0587813015908566e-05, - "loss": 0.305, - "step": 10438 - }, - { - "epoch": 0.9833965285791668, - "grad_norm": 0.6532204747200012, - "learning_rate": 1.0586305602110326e-05, - "loss": 0.3015, - "step": 10439 - }, - { - "epoch": 0.9834907326723347, - "grad_norm": 0.7867683172225952, - "learning_rate": 1.0584798174943414e-05, - "loss": 0.3143, - "step": 10440 - }, - { - "epoch": 0.9835849367655025, - "grad_norm": 0.8491891622543335, - "learning_rate": 1.0583290734442199e-05, - "loss": 0.3207, - "step": 10441 - }, - { - "epoch": 0.9836791408586704, - "grad_norm": 0.6541094779968262, - "learning_rate": 1.0581783280641051e-05, - "loss": 0.26, - "step": 10442 - }, - { - "epoch": 0.9837733449518381, - "grad_norm": 0.7106014490127563, - "learning_rate": 1.058027581357435e-05, - "loss": 0.2963, - "step": 10443 - }, - { - "epoch": 0.983867549045006, - "grad_norm": 0.7384240031242371, - "learning_rate": 1.0578768333276458e-05, - "loss": 0.2582, - "step": 10444 - }, - { - "epoch": 0.9839617531381738, - "grad_norm": 0.7477413415908813, - "learning_rate": 1.0577260839781756e-05, - "loss": 0.3046, - "step": 10445 - }, - { - "epoch": 0.9840559572313416, - "grad_norm": 0.7462862133979797, - "learning_rate": 1.0575753333124615e-05, - "loss": 0.2727, - "step": 10446 - }, - { - "epoch": 0.9841501613245095, - "grad_norm": 0.8706429600715637, - "learning_rate": 1.0574245813339404e-05, - "loss": 0.3115, - "step": 10447 - }, - { - "epoch": 0.9842443654176773, - "grad_norm": 0.66429603099823, - "learning_rate": 1.05727382804605e-05, - "loss": 0.2772, - "step": 10448 - }, - { - "epoch": 0.9843385695108452, - "grad_norm": 0.6963960528373718, - "learning_rate": 1.057123073452228e-05, - "loss": 0.2912, - "step": 10449 - }, - { - "epoch": 0.984432773604013, - "grad_norm": 0.7144415974617004, - "learning_rate": 1.0569723175559116e-05, - "loss": 0.3204, - "step": 10450 - }, - { - "epoch": 0.9845269776971809, - "grad_norm": 0.822078287601471, - "learning_rate": 1.0568215603605378e-05, - "loss": 0.2814, - "step": 10451 - }, - { - "epoch": 0.9846211817903487, - "grad_norm": 0.8428067564964294, - "learning_rate": 1.056670801869545e-05, - "loss": 0.3445, - "step": 10452 - }, - { - "epoch": 0.9847153858835166, - "grad_norm": 0.6889626979827881, - "learning_rate": 1.0565200420863704e-05, - "loss": 0.2855, - "step": 10453 - }, - { - "epoch": 0.9848095899766844, - "grad_norm": 0.6799871921539307, - "learning_rate": 1.056369281014451e-05, - "loss": 0.3132, - "step": 10454 - }, - { - "epoch": 0.9849037940698523, - "grad_norm": 0.7179400324821472, - "learning_rate": 1.0562185186572253e-05, - "loss": 0.2966, - "step": 10455 - }, - { - "epoch": 0.9849979981630201, - "grad_norm": 0.6540926694869995, - "learning_rate": 1.05606775501813e-05, - "loss": 0.2944, - "step": 10456 - }, - { - "epoch": 0.985092202256188, - "grad_norm": 0.6726449131965637, - "learning_rate": 1.0559169901006035e-05, - "loss": 0.2665, - "step": 10457 - }, - { - "epoch": 0.9851864063493558, - "grad_norm": 0.7637098431587219, - "learning_rate": 1.0557662239080828e-05, - "loss": 0.3253, - "step": 10458 - }, - { - "epoch": 0.9852806104425237, - "grad_norm": 0.7527045011520386, - "learning_rate": 1.055615456444006e-05, - "loss": 0.315, - "step": 10459 - }, - { - "epoch": 0.9853748145356915, - "grad_norm": 0.702427327632904, - "learning_rate": 1.055464687711811e-05, - "loss": 0.309, - "step": 10460 - }, - { - "epoch": 0.9854690186288594, - "grad_norm": 0.7186090350151062, - "learning_rate": 1.0553139177149354e-05, - "loss": 0.2892, - "step": 10461 - }, - { - "epoch": 0.9855632227220272, - "grad_norm": 0.6679766178131104, - "learning_rate": 1.0551631464568167e-05, - "loss": 0.3033, - "step": 10462 - }, - { - "epoch": 0.9856574268151951, - "grad_norm": 0.7070468664169312, - "learning_rate": 1.0550123739408931e-05, - "loss": 0.3331, - "step": 10463 - }, - { - "epoch": 0.9857516309083629, - "grad_norm": 0.7657076716423035, - "learning_rate": 1.054861600170602e-05, - "loss": 0.3279, - "step": 10464 - }, - { - "epoch": 0.9858458350015308, - "grad_norm": 0.7013779878616333, - "learning_rate": 1.054710825149382e-05, - "loss": 0.3321, - "step": 10465 - }, - { - "epoch": 0.9859400390946986, - "grad_norm": 0.6874800324440002, - "learning_rate": 1.0545600488806704e-05, - "loss": 0.2896, - "step": 10466 - }, - { - "epoch": 0.9860342431878665, - "grad_norm": 0.7229523062705994, - "learning_rate": 1.054409271367905e-05, - "loss": 0.3216, - "step": 10467 - }, - { - "epoch": 0.9861284472810343, - "grad_norm": 0.7149501442909241, - "learning_rate": 1.0542584926145244e-05, - "loss": 0.3407, - "step": 10468 - }, - { - "epoch": 0.9862226513742022, - "grad_norm": 0.6803480982780457, - "learning_rate": 1.0541077126239663e-05, - "loss": 0.2873, - "step": 10469 - }, - { - "epoch": 0.98631685546737, - "grad_norm": 0.7159126400947571, - "learning_rate": 1.0539569313996687e-05, - "loss": 0.2917, - "step": 10470 - }, - { - "epoch": 0.9864110595605379, - "grad_norm": 0.8723127841949463, - "learning_rate": 1.0538061489450692e-05, - "loss": 0.3273, - "step": 10471 - }, - { - "epoch": 0.9865052636537057, - "grad_norm": 0.6796349883079529, - "learning_rate": 1.0536553652636068e-05, - "loss": 0.2872, - "step": 10472 - }, - { - "epoch": 0.9865994677468736, - "grad_norm": 1.1715443134307861, - "learning_rate": 1.0535045803587189e-05, - "loss": 0.3398, - "step": 10473 - }, - { - "epoch": 0.9866936718400414, - "grad_norm": 0.9951081871986389, - "learning_rate": 1.053353794233844e-05, - "loss": 0.2785, - "step": 10474 - }, - { - "epoch": 0.9867878759332093, - "grad_norm": 0.8327958583831787, - "learning_rate": 1.0532030068924198e-05, - "loss": 0.3256, - "step": 10475 - }, - { - "epoch": 0.9868820800263771, - "grad_norm": 0.7180770039558411, - "learning_rate": 1.0530522183378846e-05, - "loss": 0.3308, - "step": 10476 - }, - { - "epoch": 0.986976284119545, - "grad_norm": 0.7871127724647522, - "learning_rate": 1.0529014285736772e-05, - "loss": 0.3154, - "step": 10477 - }, - { - "epoch": 0.9870704882127128, - "grad_norm": 0.7883843779563904, - "learning_rate": 1.0527506376032352e-05, - "loss": 0.3313, - "step": 10478 - }, - { - "epoch": 0.9871646923058807, - "grad_norm": 0.7578481435775757, - "learning_rate": 1.052599845429997e-05, - "loss": 0.3048, - "step": 10479 - }, - { - "epoch": 0.9872588963990485, - "grad_norm": 37.6483039855957, - "learning_rate": 1.0524490520574011e-05, - "loss": 0.3027, - "step": 10480 - }, - { - "epoch": 0.9873531004922164, - "grad_norm": 0.7600743174552917, - "learning_rate": 1.0522982574888857e-05, - "loss": 0.3268, - "step": 10481 - }, - { - "epoch": 0.9874473045853842, - "grad_norm": 0.7806258797645569, - "learning_rate": 1.052147461727889e-05, - "loss": 0.3235, - "step": 10482 - }, - { - "epoch": 0.9875415086785521, - "grad_norm": 0.7319111824035645, - "learning_rate": 1.0519966647778499e-05, - "loss": 0.3504, - "step": 10483 - }, - { - "epoch": 0.9876357127717199, - "grad_norm": 0.7317287921905518, - "learning_rate": 1.051845866642206e-05, - "loss": 0.307, - "step": 10484 - }, - { - "epoch": 0.9877299168648878, - "grad_norm": 0.7134479284286499, - "learning_rate": 1.0516950673243965e-05, - "loss": 0.276, - "step": 10485 - }, - { - "epoch": 0.9878241209580556, - "grad_norm": 0.7581774592399597, - "learning_rate": 1.0515442668278595e-05, - "loss": 0.2814, - "step": 10486 - }, - { - "epoch": 0.9879183250512235, - "grad_norm": 0.7357668280601501, - "learning_rate": 1.051393465156033e-05, - "loss": 0.327, - "step": 10487 - }, - { - "epoch": 0.9880125291443913, - "grad_norm": 0.6782602071762085, - "learning_rate": 1.0512426623123566e-05, - "loss": 0.2757, - "step": 10488 - }, - { - "epoch": 0.9881067332375592, - "grad_norm": 0.7066102623939514, - "learning_rate": 1.051091858300268e-05, - "loss": 0.2976, - "step": 10489 - }, - { - "epoch": 0.988200937330727, - "grad_norm": 0.674705445766449, - "learning_rate": 1.050941053123206e-05, - "loss": 0.3039, - "step": 10490 - }, - { - "epoch": 0.9882951414238949, - "grad_norm": 0.7260640263557434, - "learning_rate": 1.0507902467846092e-05, - "loss": 0.2665, - "step": 10491 - }, - { - "epoch": 0.9883893455170627, - "grad_norm": 0.7960880994796753, - "learning_rate": 1.0506394392879165e-05, - "loss": 0.3039, - "step": 10492 - }, - { - "epoch": 0.9884835496102305, - "grad_norm": 0.7668411731719971, - "learning_rate": 1.050488630636566e-05, - "loss": 0.2966, - "step": 10493 - }, - { - "epoch": 0.9885777537033984, - "grad_norm": 0.6721380949020386, - "learning_rate": 1.0503378208339968e-05, - "loss": 0.2783, - "step": 10494 - }, - { - "epoch": 0.9886719577965662, - "grad_norm": 0.8086415529251099, - "learning_rate": 1.0501870098836473e-05, - "loss": 0.3181, - "step": 10495 - }, - { - "epoch": 0.9887661618897341, - "grad_norm": 0.8411343693733215, - "learning_rate": 1.0500361977889562e-05, - "loss": 0.3261, - "step": 10496 - }, - { - "epoch": 0.988860365982902, - "grad_norm": 0.6973224878311157, - "learning_rate": 1.0498853845533628e-05, - "loss": 0.2869, - "step": 10497 - }, - { - "epoch": 0.9889545700760698, - "grad_norm": 0.8233364224433899, - "learning_rate": 1.0497345701803052e-05, - "loss": 0.2975, - "step": 10498 - }, - { - "epoch": 0.9890487741692376, - "grad_norm": 0.7598685622215271, - "learning_rate": 1.0495837546732224e-05, - "loss": 0.3326, - "step": 10499 - }, - { - "epoch": 0.9891429782624055, - "grad_norm": 0.7478805780410767, - "learning_rate": 1.0494329380355535e-05, - "loss": 0.2836, - "step": 10500 - }, - { - "epoch": 0.9892371823555733, - "grad_norm": 0.743462324142456, - "learning_rate": 1.0492821202707373e-05, - "loss": 0.2974, - "step": 10501 - }, - { - "epoch": 0.9893313864487412, - "grad_norm": 0.693759560585022, - "learning_rate": 1.0491313013822122e-05, - "loss": 0.2839, - "step": 10502 - }, - { - "epoch": 0.989425590541909, - "grad_norm": 0.727215051651001, - "learning_rate": 1.0489804813734176e-05, - "loss": 0.3064, - "step": 10503 - }, - { - "epoch": 0.9895197946350769, - "grad_norm": 0.8441318869590759, - "learning_rate": 1.0488296602477923e-05, - "loss": 0.3066, - "step": 10504 - }, - { - "epoch": 0.9896139987282447, - "grad_norm": 0.7129061818122864, - "learning_rate": 1.0486788380087754e-05, - "loss": 0.2813, - "step": 10505 - }, - { - "epoch": 0.9897082028214126, - "grad_norm": 0.6906604170799255, - "learning_rate": 1.0485280146598055e-05, - "loss": 0.3135, - "step": 10506 - }, - { - "epoch": 0.9898024069145804, - "grad_norm": 0.8256998062133789, - "learning_rate": 1.0483771902043216e-05, - "loss": 0.327, - "step": 10507 - }, - { - "epoch": 0.9898966110077483, - "grad_norm": 0.775102972984314, - "learning_rate": 1.0482263646457632e-05, - "loss": 0.2884, - "step": 10508 - }, - { - "epoch": 0.9899908151009161, - "grad_norm": 0.6462504267692566, - "learning_rate": 1.0480755379875693e-05, - "loss": 0.2908, - "step": 10509 - }, - { - "epoch": 0.990085019194084, - "grad_norm": 0.7563470602035522, - "learning_rate": 1.0479247102331787e-05, - "loss": 0.2923, - "step": 10510 - }, - { - "epoch": 0.9901792232872518, - "grad_norm": 0.7145566940307617, - "learning_rate": 1.0477738813860303e-05, - "loss": 0.2962, - "step": 10511 - }, - { - "epoch": 0.9902734273804197, - "grad_norm": 0.7924150228500366, - "learning_rate": 1.0476230514495636e-05, - "loss": 0.2747, - "step": 10512 - }, - { - "epoch": 0.9903676314735875, - "grad_norm": 0.7354665994644165, - "learning_rate": 1.0474722204272178e-05, - "loss": 0.2917, - "step": 10513 - }, - { - "epoch": 0.9904618355667554, - "grad_norm": 0.7262409329414368, - "learning_rate": 1.0473213883224321e-05, - "loss": 0.3027, - "step": 10514 - }, - { - "epoch": 0.9905560396599232, - "grad_norm": 0.8383549451828003, - "learning_rate": 1.0471705551386453e-05, - "loss": 0.3241, - "step": 10515 - }, - { - "epoch": 0.9906502437530911, - "grad_norm": 0.7284969091415405, - "learning_rate": 1.047019720879297e-05, - "loss": 0.2917, - "step": 10516 - }, - { - "epoch": 0.9907444478462589, - "grad_norm": 0.7100862860679626, - "learning_rate": 1.0468688855478265e-05, - "loss": 0.2953, - "step": 10517 - }, - { - "epoch": 0.9908386519394268, - "grad_norm": 0.7132425308227539, - "learning_rate": 1.0467180491476725e-05, - "loss": 0.3224, - "step": 10518 - }, - { - "epoch": 0.9909328560325946, - "grad_norm": 0.7249391078948975, - "learning_rate": 1.0465672116822749e-05, - "loss": 0.3134, - "step": 10519 - }, - { - "epoch": 0.9910270601257625, - "grad_norm": 0.7816787958145142, - "learning_rate": 1.0464163731550731e-05, - "loss": 0.3201, - "step": 10520 - }, - { - "epoch": 0.9911212642189303, - "grad_norm": 0.8399257659912109, - "learning_rate": 1.046265533569506e-05, - "loss": 0.306, - "step": 10521 - }, - { - "epoch": 0.9912154683120982, - "grad_norm": 0.8124042749404907, - "learning_rate": 1.046114692929013e-05, - "loss": 0.2871, - "step": 10522 - }, - { - "epoch": 0.991309672405266, - "grad_norm": 0.754949152469635, - "learning_rate": 1.0459638512370343e-05, - "loss": 0.2695, - "step": 10523 - }, - { - "epoch": 0.9914038764984339, - "grad_norm": 0.6890938878059387, - "learning_rate": 1.0458130084970082e-05, - "loss": 0.3235, - "step": 10524 - }, - { - "epoch": 0.9914980805916017, - "grad_norm": 0.7518118619918823, - "learning_rate": 1.0456621647123748e-05, - "loss": 0.305, - "step": 10525 - }, - { - "epoch": 0.9915922846847696, - "grad_norm": 0.7360654473304749, - "learning_rate": 1.0455113198865734e-05, - "loss": 0.3069, - "step": 10526 - }, - { - "epoch": 0.9916864887779374, - "grad_norm": 0.9580203294754028, - "learning_rate": 1.0453604740230434e-05, - "loss": 0.3491, - "step": 10527 - }, - { - "epoch": 0.9917806928711053, - "grad_norm": 0.8052199482917786, - "learning_rate": 1.0452096271252246e-05, - "loss": 0.3402, - "step": 10528 - }, - { - "epoch": 0.9918748969642731, - "grad_norm": 0.6957103610038757, - "learning_rate": 1.0450587791965562e-05, - "loss": 0.2986, - "step": 10529 - }, - { - "epoch": 0.991969101057441, - "grad_norm": 0.7063989639282227, - "learning_rate": 1.0449079302404783e-05, - "loss": 0.2928, - "step": 10530 - }, - { - "epoch": 0.9920633051506088, - "grad_norm": 0.8932196497917175, - "learning_rate": 1.0447570802604298e-05, - "loss": 0.3508, - "step": 10531 - }, - { - "epoch": 0.9921575092437767, - "grad_norm": 0.600779116153717, - "learning_rate": 1.0446062292598508e-05, - "loss": 0.2767, - "step": 10532 - }, - { - "epoch": 0.9922517133369445, - "grad_norm": 0.7640441060066223, - "learning_rate": 1.0444553772421808e-05, - "loss": 0.345, - "step": 10533 - }, - { - "epoch": 0.9923459174301124, - "grad_norm": 0.8529372811317444, - "learning_rate": 1.0443045242108596e-05, - "loss": 0.2843, - "step": 10534 - }, - { - "epoch": 0.9924401215232802, - "grad_norm": 0.9221624135971069, - "learning_rate": 1.0441536701693266e-05, - "loss": 0.2849, - "step": 10535 - }, - { - "epoch": 0.992534325616448, - "grad_norm": 0.8267768025398254, - "learning_rate": 1.0440028151210213e-05, - "loss": 0.3047, - "step": 10536 - }, - { - "epoch": 0.9926285297096159, - "grad_norm": 0.7317991852760315, - "learning_rate": 1.0438519590693842e-05, - "loss": 0.3379, - "step": 10537 - }, - { - "epoch": 0.9927227338027838, - "grad_norm": 0.7200465798377991, - "learning_rate": 1.0437011020178544e-05, - "loss": 0.3151, - "step": 10538 - }, - { - "epoch": 0.9928169378959516, - "grad_norm": 0.8047158122062683, - "learning_rate": 1.0435502439698719e-05, - "loss": 0.2999, - "step": 10539 - }, - { - "epoch": 0.9929111419891195, - "grad_norm": 0.7040883898735046, - "learning_rate": 1.0433993849288768e-05, - "loss": 0.2768, - "step": 10540 - }, - { - "epoch": 0.9930053460822873, - "grad_norm": 0.6733881831169128, - "learning_rate": 1.0432485248983081e-05, - "loss": 0.2618, - "step": 10541 - }, - { - "epoch": 0.9930995501754551, - "grad_norm": 1.1577588319778442, - "learning_rate": 1.0430976638816064e-05, - "loss": 0.2659, - "step": 10542 - }, - { - "epoch": 0.993193754268623, - "grad_norm": 0.8506754040718079, - "learning_rate": 1.0429468018822115e-05, - "loss": 0.3336, - "step": 10543 - }, - { - "epoch": 0.9932879583617908, - "grad_norm": 0.6909233331680298, - "learning_rate": 1.0427959389035626e-05, - "loss": 0.271, - "step": 10544 - }, - { - "epoch": 0.9933821624549587, - "grad_norm": 0.7297505140304565, - "learning_rate": 1.0426450749491006e-05, - "loss": 0.3096, - "step": 10545 - }, - { - "epoch": 0.9934763665481265, - "grad_norm": 0.7545581459999084, - "learning_rate": 1.0424942100222648e-05, - "loss": 0.3125, - "step": 10546 - }, - { - "epoch": 0.9935705706412944, - "grad_norm": 0.6902565956115723, - "learning_rate": 1.0423433441264952e-05, - "loss": 0.3131, - "step": 10547 - }, - { - "epoch": 0.9936647747344622, - "grad_norm": 0.7904943227767944, - "learning_rate": 1.042192477265232e-05, - "loss": 0.2775, - "step": 10548 - }, - { - "epoch": 0.9937589788276301, - "grad_norm": 0.7091799974441528, - "learning_rate": 1.042041609441915e-05, - "loss": 0.2871, - "step": 10549 - }, - { - "epoch": 0.9938531829207979, - "grad_norm": 0.7303634285926819, - "learning_rate": 1.0418907406599844e-05, - "loss": 0.3114, - "step": 10550 - }, - { - "epoch": 0.9939473870139658, - "grad_norm": 0.6448248624801636, - "learning_rate": 1.0417398709228797e-05, - "loss": 0.2664, - "step": 10551 - }, - { - "epoch": 0.9940415911071336, - "grad_norm": 0.9918820858001709, - "learning_rate": 1.0415890002340417e-05, - "loss": 0.2608, - "step": 10552 - }, - { - "epoch": 0.9941357952003015, - "grad_norm": 0.6706486344337463, - "learning_rate": 1.0414381285969102e-05, - "loss": 0.2819, - "step": 10553 - }, - { - "epoch": 0.9942299992934693, - "grad_norm": 0.7919266223907471, - "learning_rate": 1.0412872560149254e-05, - "loss": 0.2864, - "step": 10554 - }, - { - "epoch": 0.9943242033866372, - "grad_norm": 0.9205607175827026, - "learning_rate": 1.041136382491527e-05, - "loss": 0.3748, - "step": 10555 - }, - { - "epoch": 0.994418407479805, - "grad_norm": 0.7769069075584412, - "learning_rate": 1.0409855080301556e-05, - "loss": 0.3131, - "step": 10556 - }, - { - "epoch": 0.9945126115729729, - "grad_norm": 0.847683846950531, - "learning_rate": 1.0408346326342514e-05, - "loss": 0.3019, - "step": 10557 - }, - { - "epoch": 0.9946068156661407, - "grad_norm": 0.7511101365089417, - "learning_rate": 1.0406837563072542e-05, - "loss": 0.2966, - "step": 10558 - }, - { - "epoch": 0.9947010197593086, - "grad_norm": 0.7299827933311462, - "learning_rate": 1.0405328790526043e-05, - "loss": 0.3143, - "step": 10559 - }, - { - "epoch": 0.9947952238524764, - "grad_norm": 0.7897573709487915, - "learning_rate": 1.0403820008737426e-05, - "loss": 0.2425, - "step": 10560 - }, - { - "epoch": 0.9948894279456443, - "grad_norm": 0.6592634916305542, - "learning_rate": 1.0402311217741083e-05, - "loss": 0.2787, - "step": 10561 - }, - { - "epoch": 0.9949836320388121, - "grad_norm": 0.6745985150337219, - "learning_rate": 1.0400802417571423e-05, - "loss": 0.283, - "step": 10562 - }, - { - "epoch": 0.99507783613198, - "grad_norm": 0.7424817681312561, - "learning_rate": 1.0399293608262853e-05, - "loss": 0.3466, - "step": 10563 - }, - { - "epoch": 0.9951720402251478, - "grad_norm": 0.6457298398017883, - "learning_rate": 1.0397784789849765e-05, - "loss": 0.2557, - "step": 10564 - }, - { - "epoch": 0.9952662443183157, - "grad_norm": 0.7130741477012634, - "learning_rate": 1.039627596236657e-05, - "loss": 0.2846, - "step": 10565 - }, - { - "epoch": 0.9953604484114835, - "grad_norm": 0.7130588293075562, - "learning_rate": 1.0394767125847673e-05, - "loss": 0.2589, - "step": 10566 - }, - { - "epoch": 0.9954546525046514, - "grad_norm": 0.7500724196434021, - "learning_rate": 1.0393258280327471e-05, - "loss": 0.3023, - "step": 10567 - }, - { - "epoch": 0.9955488565978192, - "grad_norm": 0.6984314322471619, - "learning_rate": 1.0391749425840376e-05, - "loss": 0.2962, - "step": 10568 - }, - { - "epoch": 0.9956430606909871, - "grad_norm": 0.7248314619064331, - "learning_rate": 1.0390240562420785e-05, - "loss": 0.3181, - "step": 10569 - }, - { - "epoch": 0.9957372647841549, - "grad_norm": 0.7185055017471313, - "learning_rate": 1.0388731690103108e-05, - "loss": 0.256, - "step": 10570 - }, - { - "epoch": 0.9958314688773228, - "grad_norm": 0.7030216455459595, - "learning_rate": 1.0387222808921746e-05, - "loss": 0.2612, - "step": 10571 - }, - { - "epoch": 0.9959256729704906, - "grad_norm": 0.792762041091919, - "learning_rate": 1.0385713918911104e-05, - "loss": 0.3206, - "step": 10572 - }, - { - "epoch": 0.9960198770636585, - "grad_norm": 0.7445237040519714, - "learning_rate": 1.038420502010559e-05, - "loss": 0.2767, - "step": 10573 - }, - { - "epoch": 0.9961140811568263, - "grad_norm": 0.9818106293678284, - "learning_rate": 1.038269611253961e-05, - "loss": 0.3443, - "step": 10574 - }, - { - "epoch": 0.9962082852499942, - "grad_norm": 0.705763578414917, - "learning_rate": 1.0381187196247564e-05, - "loss": 0.2888, - "step": 10575 - }, - { - "epoch": 0.996302489343162, - "grad_norm": 0.9176934957504272, - "learning_rate": 1.0379678271263858e-05, - "loss": 0.2826, - "step": 10576 - }, - { - "epoch": 0.9963966934363299, - "grad_norm": 0.7013608813285828, - "learning_rate": 1.0378169337622903e-05, - "loss": 0.2778, - "step": 10577 - }, - { - "epoch": 0.9964908975294977, - "grad_norm": 0.7365220785140991, - "learning_rate": 1.0376660395359102e-05, - "loss": 0.3191, - "step": 10578 - }, - { - "epoch": 0.9965851016226656, - "grad_norm": 0.732226550579071, - "learning_rate": 1.037515144450686e-05, - "loss": 0.2999, - "step": 10579 - }, - { - "epoch": 0.9966793057158334, - "grad_norm": 0.7567113637924194, - "learning_rate": 1.0373642485100588e-05, - "loss": 0.2923, - "step": 10580 - }, - { - "epoch": 0.9967735098090011, - "grad_norm": 0.7371570467948914, - "learning_rate": 1.0372133517174688e-05, - "loss": 0.2887, - "step": 10581 - }, - { - "epoch": 0.996867713902169, - "grad_norm": 0.7575872540473938, - "learning_rate": 1.0370624540763565e-05, - "loss": 0.3108, - "step": 10582 - }, - { - "epoch": 0.9969619179953368, - "grad_norm": 0.6133670210838318, - "learning_rate": 1.0369115555901635e-05, - "loss": 0.2714, - "step": 10583 - }, - { - "epoch": 0.9970561220885047, - "grad_norm": 0.7405552864074707, - "learning_rate": 1.0367606562623294e-05, - "loss": 0.3138, - "step": 10584 - }, - { - "epoch": 0.9971503261816725, - "grad_norm": 0.7196900248527527, - "learning_rate": 1.0366097560962957e-05, - "loss": 0.3005, - "step": 10585 - }, - { - "epoch": 0.9972445302748404, - "grad_norm": 0.7500787377357483, - "learning_rate": 1.036458855095503e-05, - "loss": 0.2924, - "step": 10586 - }, - { - "epoch": 0.9973387343680082, - "grad_norm": 0.7023894190788269, - "learning_rate": 1.036307953263392e-05, - "loss": 0.2997, - "step": 10587 - }, - { - "epoch": 0.9974329384611761, - "grad_norm": 0.7843096256256104, - "learning_rate": 1.0361570506034036e-05, - "loss": 0.3239, - "step": 10588 - }, - { - "epoch": 0.9975271425543439, - "grad_norm": 0.7718815207481384, - "learning_rate": 1.0360061471189784e-05, - "loss": 0.3153, - "step": 10589 - }, - { - "epoch": 0.9976213466475118, - "grad_norm": 0.7284173369407654, - "learning_rate": 1.0358552428135576e-05, - "loss": 0.2914, - "step": 10590 - }, - { - "epoch": 0.9977155507406796, - "grad_norm": 0.6925811767578125, - "learning_rate": 1.0357043376905816e-05, - "loss": 0.2854, - "step": 10591 - }, - { - "epoch": 0.9978097548338475, - "grad_norm": 0.7613288760185242, - "learning_rate": 1.0355534317534914e-05, - "loss": 0.3135, - "step": 10592 - }, - { - "epoch": 0.9979039589270153, - "grad_norm": 0.756247341632843, - "learning_rate": 1.035402525005728e-05, - "loss": 0.2934, - "step": 10593 - }, - { - "epoch": 0.9979981630201832, - "grad_norm": 0.7116743326187134, - "learning_rate": 1.0352516174507325e-05, - "loss": 0.2988, - "step": 10594 - }, - { - "epoch": 0.998092367113351, - "grad_norm": 0.7530072331428528, - "learning_rate": 1.0351007090919457e-05, - "loss": 0.3, - "step": 10595 - }, - { - "epoch": 0.9981865712065189, - "grad_norm": 0.7185389995574951, - "learning_rate": 1.0349497999328077e-05, - "loss": 0.2883, - "step": 10596 - }, - { - "epoch": 0.9982807752996867, - "grad_norm": 0.6984468102455139, - "learning_rate": 1.034798889976761e-05, - "loss": 0.268, - "step": 10597 - }, - { - "epoch": 0.9983749793928546, - "grad_norm": 0.7011573910713196, - "learning_rate": 1.0346479792272454e-05, - "loss": 0.2977, - "step": 10598 - }, - { - "epoch": 0.9984691834860224, - "grad_norm": 0.7571693658828735, - "learning_rate": 1.0344970676877021e-05, - "loss": 0.3152, - "step": 10599 - }, - { - "epoch": 0.9985633875791903, - "grad_norm": 0.9057065844535828, - "learning_rate": 1.034346155361573e-05, - "loss": 0.3215, - "step": 10600 - }, - { - "epoch": 0.9986575916723581, - "grad_norm": 0.7775444388389587, - "learning_rate": 1.0341952422522979e-05, - "loss": 0.337, - "step": 10601 - }, - { - "epoch": 0.998751795765526, - "grad_norm": 0.6971397995948792, - "learning_rate": 1.0340443283633183e-05, - "loss": 0.309, - "step": 10602 - }, - { - "epoch": 0.9988459998586938, - "grad_norm": 0.7716683745384216, - "learning_rate": 1.033893413698076e-05, - "loss": 0.3643, - "step": 10603 - }, - { - "epoch": 0.9989402039518617, - "grad_norm": 0.6274122595787048, - "learning_rate": 1.0337424982600109e-05, - "loss": 0.2364, - "step": 10604 - }, - { - "epoch": 0.9990344080450295, - "grad_norm": 0.8103241324424744, - "learning_rate": 1.0335915820525649e-05, - "loss": 0.3426, - "step": 10605 - }, - { - "epoch": 0.9991286121381974, - "grad_norm": 0.700080931186676, - "learning_rate": 1.0334406650791788e-05, - "loss": 0.2933, - "step": 10606 - }, - { - "epoch": 0.9992228162313652, - "grad_norm": 0.7084948420524597, - "learning_rate": 1.0332897473432937e-05, - "loss": 0.2848, - "step": 10607 - }, - { - "epoch": 0.9993170203245331, - "grad_norm": 0.7010513544082642, - "learning_rate": 1.033138828848351e-05, - "loss": 0.2712, - "step": 10608 - }, - { - "epoch": 0.9994112244177009, - "grad_norm": 0.7678072452545166, - "learning_rate": 1.0329879095977917e-05, - "loss": 0.2786, - "step": 10609 - }, - { - "epoch": 0.9995054285108688, - "grad_norm": 0.7514888644218445, - "learning_rate": 1.0328369895950572e-05, - "loss": 0.2837, - "step": 10610 - }, - { - "epoch": 0.9995996326040366, - "grad_norm": 0.6897455453872681, - "learning_rate": 1.0326860688435884e-05, - "loss": 0.3043, - "step": 10611 - }, - { - "epoch": 0.9996938366972045, - "grad_norm": 0.823022186756134, - "learning_rate": 1.0325351473468265e-05, - "loss": 0.3451, - "step": 10612 - }, - { - "epoch": 0.9997880407903723, - "grad_norm": 0.7792565822601318, - "learning_rate": 1.0323842251082132e-05, - "loss": 0.296, - "step": 10613 - }, - { - "epoch": 0.9998822448835402, - "grad_norm": 0.9984522461891174, - "learning_rate": 1.0322333021311896e-05, - "loss": 0.3364, - "step": 10614 - }, - { - "epoch": 0.999976448976708, - "grad_norm": 0.7459888458251953, - "learning_rate": 1.0320823784191967e-05, - "loss": 0.2825, - "step": 10615 - }, - { - "epoch": 1.000070653069876, - "grad_norm": 0.5776287317276001, - "learning_rate": 1.0319314539756758e-05, - "loss": 0.2124, - "step": 10616 - }, - { - "epoch": 1.0001648571630437, - "grad_norm": 0.611218273639679, - "learning_rate": 1.0317805288040686e-05, - "loss": 0.2471, - "step": 10617 - }, - { - "epoch": 1.0002590612562117, - "grad_norm": 0.6110851168632507, - "learning_rate": 1.031629602907816e-05, - "loss": 0.1887, - "step": 10618 - }, - { - "epoch": 1.0003532653493794, - "grad_norm": 0.556437611579895, - "learning_rate": 1.0314786762903595e-05, - "loss": 0.2225, - "step": 10619 - }, - { - "epoch": 1.0004474694425474, - "grad_norm": 0.5871204137802124, - "learning_rate": 1.031327748955141e-05, - "loss": 0.2372, - "step": 10620 - }, - { - "epoch": 1.000541673535715, - "grad_norm": 0.6837234497070312, - "learning_rate": 1.0311768209056008e-05, - "loss": 0.2108, - "step": 10621 - }, - { - "epoch": 1.000635877628883, - "grad_norm": 0.5930719971656799, - "learning_rate": 1.031025892145181e-05, - "loss": 0.2146, - "step": 10622 - }, - { - "epoch": 1.0007300817220508, - "grad_norm": 0.6106625199317932, - "learning_rate": 1.0308749626773231e-05, - "loss": 0.2136, - "step": 10623 - }, - { - "epoch": 1.0008242858152188, - "grad_norm": 0.7455571889877319, - "learning_rate": 1.030724032505468e-05, - "loss": 0.2166, - "step": 10624 - }, - { - "epoch": 1.0009184899083865, - "grad_norm": 0.6277820467948914, - "learning_rate": 1.0305731016330575e-05, - "loss": 0.2535, - "step": 10625 - }, - { - "epoch": 1.0010126940015545, - "grad_norm": 0.6769752502441406, - "learning_rate": 1.0304221700635333e-05, - "loss": 0.216, - "step": 10626 - }, - { - "epoch": 1.0011068980947222, - "grad_norm": 0.728179395198822, - "learning_rate": 1.0302712378003364e-05, - "loss": 0.2411, - "step": 10627 - }, - { - "epoch": 1.0012011021878902, - "grad_norm": 0.6951244473457336, - "learning_rate": 1.0301203048469084e-05, - "loss": 0.208, - "step": 10628 - }, - { - "epoch": 1.001295306281058, - "grad_norm": 0.6990310549736023, - "learning_rate": 1.0299693712066909e-05, - "loss": 0.2267, - "step": 10629 - }, - { - "epoch": 1.0013895103742259, - "grad_norm": 0.6202535629272461, - "learning_rate": 1.0298184368831254e-05, - "loss": 0.2249, - "step": 10630 - }, - { - "epoch": 1.0014837144673936, - "grad_norm": 0.5896943211555481, - "learning_rate": 1.0296675018796536e-05, - "loss": 0.2036, - "step": 10631 - }, - { - "epoch": 1.0015779185605616, - "grad_norm": 0.6231122016906738, - "learning_rate": 1.0295165661997164e-05, - "loss": 0.2477, - "step": 10632 - }, - { - "epoch": 1.0016721226537293, - "grad_norm": 0.6731133460998535, - "learning_rate": 1.0293656298467562e-05, - "loss": 0.2097, - "step": 10633 - }, - { - "epoch": 1.0017663267468973, - "grad_norm": 0.645645797252655, - "learning_rate": 1.0292146928242141e-05, - "loss": 0.2147, - "step": 10634 - }, - { - "epoch": 1.001860530840065, - "grad_norm": 0.7247464656829834, - "learning_rate": 1.029063755135532e-05, - "loss": 0.1859, - "step": 10635 - }, - { - "epoch": 1.001954734933233, - "grad_norm": 0.811730146408081, - "learning_rate": 1.028912816784151e-05, - "loss": 0.2021, - "step": 10636 - }, - { - "epoch": 1.0020489390264007, - "grad_norm": 0.7885605692863464, - "learning_rate": 1.0287618777735132e-05, - "loss": 0.2245, - "step": 10637 - }, - { - "epoch": 1.0021431431195686, - "grad_norm": 0.652739405632019, - "learning_rate": 1.0286109381070603e-05, - "loss": 0.2233, - "step": 10638 - }, - { - "epoch": 1.0022373472127364, - "grad_norm": 0.7091842889785767, - "learning_rate": 1.0284599977882336e-05, - "loss": 0.1906, - "step": 10639 - }, - { - "epoch": 1.0023315513059043, - "grad_norm": 0.6556228399276733, - "learning_rate": 1.0283090568204752e-05, - "loss": 0.2275, - "step": 10640 - }, - { - "epoch": 1.002425755399072, - "grad_norm": 0.7560815811157227, - "learning_rate": 1.0281581152072261e-05, - "loss": 0.1991, - "step": 10641 - }, - { - "epoch": 1.00251995949224, - "grad_norm": 0.6686923503875732, - "learning_rate": 1.0280071729519285e-05, - "loss": 0.2113, - "step": 10642 - }, - { - "epoch": 1.0026141635854078, - "grad_norm": 0.6659157872200012, - "learning_rate": 1.0278562300580246e-05, - "loss": 0.2184, - "step": 10643 - }, - { - "epoch": 1.0027083676785757, - "grad_norm": 0.7567133903503418, - "learning_rate": 1.027705286528955e-05, - "loss": 0.2211, - "step": 10644 - }, - { - "epoch": 1.0028025717717435, - "grad_norm": 0.654885470867157, - "learning_rate": 1.0275543423681622e-05, - "loss": 0.2028, - "step": 10645 - }, - { - "epoch": 1.0028967758649114, - "grad_norm": 0.7210602164268494, - "learning_rate": 1.0274033975790878e-05, - "loss": 0.2532, - "step": 10646 - }, - { - "epoch": 1.0029909799580792, - "grad_norm": 0.8513142466545105, - "learning_rate": 1.0272524521651732e-05, - "loss": 0.202, - "step": 10647 - }, - { - "epoch": 1.0030851840512471, - "grad_norm": 0.8838748335838318, - "learning_rate": 1.027101506129861e-05, - "loss": 0.2338, - "step": 10648 - }, - { - "epoch": 1.0031793881444149, - "grad_norm": 0.9266117215156555, - "learning_rate": 1.0269505594765925e-05, - "loss": 0.2567, - "step": 10649 - }, - { - "epoch": 1.0032735922375826, - "grad_norm": 0.6988517045974731, - "learning_rate": 1.0267996122088095e-05, - "loss": 0.1968, - "step": 10650 - }, - { - "epoch": 1.0033677963307506, - "grad_norm": 0.6875061988830566, - "learning_rate": 1.0266486643299539e-05, - "loss": 0.2122, - "step": 10651 - }, - { - "epoch": 1.0034620004239183, - "grad_norm": 0.7033585906028748, - "learning_rate": 1.0264977158434673e-05, - "loss": 0.2196, - "step": 10652 - }, - { - "epoch": 1.0035562045170863, - "grad_norm": 0.7428792715072632, - "learning_rate": 1.026346766752792e-05, - "loss": 0.2183, - "step": 10653 - }, - { - "epoch": 1.003650408610254, - "grad_norm": 0.6367530822753906, - "learning_rate": 1.0261958170613697e-05, - "loss": 0.2193, - "step": 10654 - }, - { - "epoch": 1.003744612703422, - "grad_norm": 0.6572229862213135, - "learning_rate": 1.0260448667726424e-05, - "loss": 0.2164, - "step": 10655 - }, - { - "epoch": 1.0038388167965897, - "grad_norm": 0.5711113810539246, - "learning_rate": 1.0258939158900514e-05, - "loss": 0.1956, - "step": 10656 - }, - { - "epoch": 1.0039330208897577, - "grad_norm": 0.6450173258781433, - "learning_rate": 1.0257429644170393e-05, - "loss": 0.1928, - "step": 10657 - }, - { - "epoch": 1.0040272249829254, - "grad_norm": 0.6765127182006836, - "learning_rate": 1.025592012357048e-05, - "loss": 0.2384, - "step": 10658 - }, - { - "epoch": 1.0041214290760934, - "grad_norm": 0.6757077574729919, - "learning_rate": 1.0254410597135189e-05, - "loss": 0.2337, - "step": 10659 - }, - { - "epoch": 1.004215633169261, - "grad_norm": 0.611599862575531, - "learning_rate": 1.0252901064898949e-05, - "loss": 0.19, - "step": 10660 - }, - { - "epoch": 1.004309837262429, - "grad_norm": 0.5806671380996704, - "learning_rate": 1.0251391526896169e-05, - "loss": 0.1947, - "step": 10661 - }, - { - "epoch": 1.0044040413555968, - "grad_norm": 0.67947918176651, - "learning_rate": 1.0249881983161272e-05, - "loss": 0.2318, - "step": 10662 - }, - { - "epoch": 1.0044982454487648, - "grad_norm": 0.725274384021759, - "learning_rate": 1.0248372433728682e-05, - "loss": 0.2054, - "step": 10663 - }, - { - "epoch": 1.0045924495419325, - "grad_norm": 0.6442673802375793, - "learning_rate": 1.0246862878632815e-05, - "loss": 0.1971, - "step": 10664 - }, - { - "epoch": 1.0046866536351005, - "grad_norm": 0.6970698833465576, - "learning_rate": 1.0245353317908094e-05, - "loss": 0.2096, - "step": 10665 - }, - { - "epoch": 1.0047808577282682, - "grad_norm": 0.8498097658157349, - "learning_rate": 1.0243843751588937e-05, - "loss": 0.2019, - "step": 10666 - }, - { - "epoch": 1.0048750618214362, - "grad_norm": 0.60224848985672, - "learning_rate": 1.0242334179709763e-05, - "loss": 0.1702, - "step": 10667 - }, - { - "epoch": 1.004969265914604, - "grad_norm": 0.6674405932426453, - "learning_rate": 1.0240824602304997e-05, - "loss": 0.2119, - "step": 10668 - }, - { - "epoch": 1.0050634700077719, - "grad_norm": 0.59090256690979, - "learning_rate": 1.023931501940906e-05, - "loss": 0.1856, - "step": 10669 - }, - { - "epoch": 1.0051576741009396, - "grad_norm": 0.5844379663467407, - "learning_rate": 1.0237805431056369e-05, - "loss": 0.1678, - "step": 10670 - }, - { - "epoch": 1.0052518781941076, - "grad_norm": 0.7779214978218079, - "learning_rate": 1.0236295837281347e-05, - "loss": 0.2177, - "step": 10671 - }, - { - "epoch": 1.0053460822872753, - "grad_norm": 0.7210147380828857, - "learning_rate": 1.0234786238118411e-05, - "loss": 0.1968, - "step": 10672 - }, - { - "epoch": 1.0054402863804432, - "grad_norm": 0.6426734924316406, - "learning_rate": 1.0233276633601986e-05, - "loss": 0.204, - "step": 10673 - }, - { - "epoch": 1.005534490473611, - "grad_norm": 0.786760687828064, - "learning_rate": 1.0231767023766497e-05, - "loss": 0.2029, - "step": 10674 - }, - { - "epoch": 1.005628694566779, - "grad_norm": 0.6654820442199707, - "learning_rate": 1.023025740864636e-05, - "loss": 0.2221, - "step": 10675 - }, - { - "epoch": 1.0057228986599467, - "grad_norm": 0.6283759474754333, - "learning_rate": 1.0228747788275997e-05, - "loss": 0.2045, - "step": 10676 - }, - { - "epoch": 1.0058171027531146, - "grad_norm": 0.6686487197875977, - "learning_rate": 1.0227238162689832e-05, - "loss": 0.1847, - "step": 10677 - }, - { - "epoch": 1.0059113068462824, - "grad_norm": 0.7455902695655823, - "learning_rate": 1.0225728531922284e-05, - "loss": 0.252, - "step": 10678 - }, - { - "epoch": 1.0060055109394503, - "grad_norm": 0.6051475405693054, - "learning_rate": 1.0224218896007776e-05, - "loss": 0.2022, - "step": 10679 - }, - { - "epoch": 1.006099715032618, - "grad_norm": 0.6798374652862549, - "learning_rate": 1.0222709254980733e-05, - "loss": 0.1825, - "step": 10680 - }, - { - "epoch": 1.006193919125786, - "grad_norm": 0.8446162939071655, - "learning_rate": 1.0221199608875572e-05, - "loss": 0.2211, - "step": 10681 - }, - { - "epoch": 1.0062881232189538, - "grad_norm": 0.6640855073928833, - "learning_rate": 1.021968995772672e-05, - "loss": 0.2039, - "step": 10682 - }, - { - "epoch": 1.0063823273121217, - "grad_norm": 0.7586714029312134, - "learning_rate": 1.0218180301568595e-05, - "loss": 0.2162, - "step": 10683 - }, - { - "epoch": 1.0064765314052895, - "grad_norm": 0.6467330455780029, - "learning_rate": 1.0216670640435622e-05, - "loss": 0.1713, - "step": 10684 - }, - { - "epoch": 1.0065707354984574, - "grad_norm": 0.5871751308441162, - "learning_rate": 1.0215160974362224e-05, - "loss": 0.1872, - "step": 10685 - }, - { - "epoch": 1.0066649395916252, - "grad_norm": 0.6697161197662354, - "learning_rate": 1.0213651303382824e-05, - "loss": 0.2269, - "step": 10686 - }, - { - "epoch": 1.0067591436847931, - "grad_norm": 0.6335304975509644, - "learning_rate": 1.021214162753184e-05, - "loss": 0.2163, - "step": 10687 - }, - { - "epoch": 1.0068533477779609, - "grad_norm": 0.5786669254302979, - "learning_rate": 1.0210631946843703e-05, - "loss": 0.2004, - "step": 10688 - }, - { - "epoch": 1.0069475518711288, - "grad_norm": 0.6199154257774353, - "learning_rate": 1.0209122261352831e-05, - "loss": 0.2083, - "step": 10689 - }, - { - "epoch": 1.0070417559642966, - "grad_norm": 0.7040600180625916, - "learning_rate": 1.0207612571093648e-05, - "loss": 0.2471, - "step": 10690 - }, - { - "epoch": 1.0071359600574645, - "grad_norm": 0.7345625162124634, - "learning_rate": 1.0206102876100576e-05, - "loss": 0.228, - "step": 10691 - }, - { - "epoch": 1.0072301641506323, - "grad_norm": 0.6340999007225037, - "learning_rate": 1.020459317640804e-05, - "loss": 0.2024, - "step": 10692 - }, - { - "epoch": 1.0073243682438002, - "grad_norm": 0.7458804249763489, - "learning_rate": 1.0203083472050463e-05, - "loss": 0.2293, - "step": 10693 - }, - { - "epoch": 1.007418572336968, - "grad_norm": 0.5789769291877747, - "learning_rate": 1.0201573763062272e-05, - "loss": 0.1882, - "step": 10694 - }, - { - "epoch": 1.007512776430136, - "grad_norm": 0.624512255191803, - "learning_rate": 1.0200064049477885e-05, - "loss": 0.2011, - "step": 10695 - }, - { - "epoch": 1.0076069805233037, - "grad_norm": 0.9252504706382751, - "learning_rate": 1.0198554331331725e-05, - "loss": 0.2212, - "step": 10696 - }, - { - "epoch": 1.0077011846164716, - "grad_norm": 0.7407596111297607, - "learning_rate": 1.0197044608658223e-05, - "loss": 0.221, - "step": 10697 - }, - { - "epoch": 1.0077953887096394, - "grad_norm": 0.6435703039169312, - "learning_rate": 1.01955348814918e-05, - "loss": 0.2135, - "step": 10698 - }, - { - "epoch": 1.0078895928028073, - "grad_norm": 0.5939406156539917, - "learning_rate": 1.0194025149866875e-05, - "loss": 0.1927, - "step": 10699 - }, - { - "epoch": 1.007983796895975, - "grad_norm": 0.6581075191497803, - "learning_rate": 1.0192515413817882e-05, - "loss": 0.2022, - "step": 10700 - }, - { - "epoch": 1.008078000989143, - "grad_norm": 0.7556645274162292, - "learning_rate": 1.0191005673379235e-05, - "loss": 0.2247, - "step": 10701 - }, - { - "epoch": 1.0081722050823108, - "grad_norm": 0.671176016330719, - "learning_rate": 1.0189495928585367e-05, - "loss": 0.234, - "step": 10702 - }, - { - "epoch": 1.0082664091754787, - "grad_norm": 0.6827737092971802, - "learning_rate": 1.0187986179470698e-05, - "loss": 0.2139, - "step": 10703 - }, - { - "epoch": 1.0083606132686465, - "grad_norm": 0.9258418679237366, - "learning_rate": 1.0186476426069649e-05, - "loss": 0.2034, - "step": 10704 - }, - { - "epoch": 1.0084548173618144, - "grad_norm": 0.6644185781478882, - "learning_rate": 1.0184966668416653e-05, - "loss": 0.2176, - "step": 10705 - }, - { - "epoch": 1.0085490214549822, - "grad_norm": 0.5872127413749695, - "learning_rate": 1.0183456906546132e-05, - "loss": 0.178, - "step": 10706 - }, - { - "epoch": 1.0086432255481501, - "grad_norm": 0.6497471928596497, - "learning_rate": 1.0181947140492507e-05, - "loss": 0.2437, - "step": 10707 - }, - { - "epoch": 1.0087374296413179, - "grad_norm": 3.966637372970581, - "learning_rate": 1.018043737029021e-05, - "loss": 0.2113, - "step": 10708 - }, - { - "epoch": 1.0088316337344858, - "grad_norm": 0.6871297359466553, - "learning_rate": 1.0178927595973658e-05, - "loss": 0.2056, - "step": 10709 - }, - { - "epoch": 1.0089258378276535, - "grad_norm": 0.6563407182693481, - "learning_rate": 1.0177417817577282e-05, - "loss": 0.2176, - "step": 10710 - }, - { - "epoch": 1.0090200419208215, - "grad_norm": 0.7464789748191833, - "learning_rate": 1.0175908035135505e-05, - "loss": 0.2365, - "step": 10711 - }, - { - "epoch": 1.0091142460139892, - "grad_norm": 0.6258571743965149, - "learning_rate": 1.017439824868275e-05, - "loss": 0.1954, - "step": 10712 - }, - { - "epoch": 1.0092084501071572, - "grad_norm": 0.7025963664054871, - "learning_rate": 1.0172888458253447e-05, - "loss": 0.2411, - "step": 10713 - }, - { - "epoch": 1.009302654200325, - "grad_norm": 0.7283008098602295, - "learning_rate": 1.017137866388202e-05, - "loss": 0.1888, - "step": 10714 - }, - { - "epoch": 1.009396858293493, - "grad_norm": 0.6318821907043457, - "learning_rate": 1.0169868865602896e-05, - "loss": 0.1918, - "step": 10715 - }, - { - "epoch": 1.0094910623866606, - "grad_norm": 0.6122881770133972, - "learning_rate": 1.0168359063450496e-05, - "loss": 0.1975, - "step": 10716 - }, - { - "epoch": 1.0095852664798286, - "grad_norm": 0.6684684157371521, - "learning_rate": 1.0166849257459252e-05, - "loss": 0.2264, - "step": 10717 - }, - { - "epoch": 1.0096794705729963, - "grad_norm": 0.6784617304801941, - "learning_rate": 1.0165339447663586e-05, - "loss": 0.2152, - "step": 10718 - }, - { - "epoch": 1.0097736746661643, - "grad_norm": 0.5989708304405212, - "learning_rate": 1.0163829634097924e-05, - "loss": 0.1978, - "step": 10719 - }, - { - "epoch": 1.009867878759332, - "grad_norm": 0.6263830661773682, - "learning_rate": 1.0162319816796695e-05, - "loss": 0.1702, - "step": 10720 - }, - { - "epoch": 1.0099620828525, - "grad_norm": 0.7663062214851379, - "learning_rate": 1.016080999579432e-05, - "loss": 0.2473, - "step": 10721 - }, - { - "epoch": 1.0100562869456677, - "grad_norm": 0.7144594192504883, - "learning_rate": 1.0159300171125232e-05, - "loss": 0.2081, - "step": 10722 - }, - { - "epoch": 1.0101504910388357, - "grad_norm": 0.6704587936401367, - "learning_rate": 1.0157790342823852e-05, - "loss": 0.2102, - "step": 10723 - }, - { - "epoch": 1.0102446951320034, - "grad_norm": 0.6900226473808289, - "learning_rate": 1.0156280510924605e-05, - "loss": 0.2229, - "step": 10724 - }, - { - "epoch": 1.0103388992251714, - "grad_norm": 0.630725085735321, - "learning_rate": 1.0154770675461925e-05, - "loss": 0.2113, - "step": 10725 - }, - { - "epoch": 1.0104331033183391, - "grad_norm": 0.6396355032920837, - "learning_rate": 1.0153260836470233e-05, - "loss": 0.2056, - "step": 10726 - }, - { - "epoch": 1.010527307411507, - "grad_norm": 0.6431342959403992, - "learning_rate": 1.0151750993983956e-05, - "loss": 0.2118, - "step": 10727 - }, - { - "epoch": 1.0106215115046748, - "grad_norm": 0.6093985438346863, - "learning_rate": 1.0150241148037526e-05, - "loss": 0.1958, - "step": 10728 - }, - { - "epoch": 1.0107157155978428, - "grad_norm": 0.7072876691818237, - "learning_rate": 1.014873129866536e-05, - "loss": 0.2537, - "step": 10729 - }, - { - "epoch": 1.0108099196910105, - "grad_norm": 0.6593238711357117, - "learning_rate": 1.0147221445901893e-05, - "loss": 0.2219, - "step": 10730 - }, - { - "epoch": 1.0109041237841785, - "grad_norm": 0.6366419792175293, - "learning_rate": 1.0145711589781549e-05, - "loss": 0.1772, - "step": 10731 - }, - { - "epoch": 1.0109983278773462, - "grad_norm": 0.781413197517395, - "learning_rate": 1.0144201730338754e-05, - "loss": 0.2034, - "step": 10732 - }, - { - "epoch": 1.0110925319705142, - "grad_norm": 0.6375175714492798, - "learning_rate": 1.0142691867607937e-05, - "loss": 0.2266, - "step": 10733 - }, - { - "epoch": 1.011186736063682, - "grad_norm": 0.6226152181625366, - "learning_rate": 1.0141182001623526e-05, - "loss": 0.2056, - "step": 10734 - }, - { - "epoch": 1.0112809401568499, - "grad_norm": 0.6545280814170837, - "learning_rate": 1.0139672132419946e-05, - "loss": 0.2032, - "step": 10735 - }, - { - "epoch": 1.0113751442500176, - "grad_norm": 0.6611790657043457, - "learning_rate": 1.0138162260031625e-05, - "loss": 0.2471, - "step": 10736 - }, - { - "epoch": 1.0114693483431856, - "grad_norm": 0.6579394340515137, - "learning_rate": 1.0136652384492993e-05, - "loss": 0.2119, - "step": 10737 - }, - { - "epoch": 1.0115635524363533, - "grad_norm": 0.6166621446609497, - "learning_rate": 1.0135142505838473e-05, - "loss": 0.1873, - "step": 10738 - }, - { - "epoch": 1.0116577565295213, - "grad_norm": 0.6546235680580139, - "learning_rate": 1.0133632624102495e-05, - "loss": 0.2196, - "step": 10739 - }, - { - "epoch": 1.011751960622689, - "grad_norm": 0.728863000869751, - "learning_rate": 1.013212273931949e-05, - "loss": 0.1993, - "step": 10740 - }, - { - "epoch": 1.011846164715857, - "grad_norm": 0.9198817014694214, - "learning_rate": 1.0130612851523877e-05, - "loss": 0.2055, - "step": 10741 - }, - { - "epoch": 1.0119403688090247, - "grad_norm": 0.7382006049156189, - "learning_rate": 1.0129102960750092e-05, - "loss": 0.2055, - "step": 10742 - }, - { - "epoch": 1.0120345729021927, - "grad_norm": 0.6523581147193909, - "learning_rate": 1.012759306703256e-05, - "loss": 0.2293, - "step": 10743 - }, - { - "epoch": 1.0121287769953604, - "grad_norm": 0.6328745484352112, - "learning_rate": 1.0126083170405707e-05, - "loss": 0.1897, - "step": 10744 - }, - { - "epoch": 1.0122229810885284, - "grad_norm": 0.700139045715332, - "learning_rate": 1.0124573270903963e-05, - "loss": 0.2323, - "step": 10745 - }, - { - "epoch": 1.012317185181696, - "grad_norm": 0.7212764620780945, - "learning_rate": 1.0123063368561759e-05, - "loss": 0.2214, - "step": 10746 - }, - { - "epoch": 1.012411389274864, - "grad_norm": 0.6312685608863831, - "learning_rate": 1.0121553463413514e-05, - "loss": 0.2082, - "step": 10747 - }, - { - "epoch": 1.0125055933680318, - "grad_norm": 0.6925767064094543, - "learning_rate": 1.0120043555493669e-05, - "loss": 0.1998, - "step": 10748 - }, - { - "epoch": 1.0125997974611998, - "grad_norm": 0.6870275139808655, - "learning_rate": 1.0118533644836638e-05, - "loss": 0.2121, - "step": 10749 - }, - { - "epoch": 1.0126940015543675, - "grad_norm": 0.7039703726768494, - "learning_rate": 1.0117023731476863e-05, - "loss": 0.2, - "step": 10750 - }, - { - "epoch": 1.0127882056475355, - "grad_norm": 0.6422266364097595, - "learning_rate": 1.0115513815448763e-05, - "loss": 0.2028, - "step": 10751 - }, - { - "epoch": 1.0128824097407032, - "grad_norm": 0.6739658117294312, - "learning_rate": 1.0114003896786768e-05, - "loss": 0.2156, - "step": 10752 - }, - { - "epoch": 1.0129766138338712, - "grad_norm": 0.7240651249885559, - "learning_rate": 1.011249397552531e-05, - "loss": 0.2283, - "step": 10753 - }, - { - "epoch": 1.013070817927039, - "grad_norm": 0.6318476796150208, - "learning_rate": 1.0110984051698815e-05, - "loss": 0.1977, - "step": 10754 - }, - { - "epoch": 1.0131650220202069, - "grad_norm": 0.6608891487121582, - "learning_rate": 1.0109474125341714e-05, - "loss": 0.2149, - "step": 10755 - }, - { - "epoch": 1.0132592261133746, - "grad_norm": 0.6064279675483704, - "learning_rate": 1.0107964196488429e-05, - "loss": 0.2058, - "step": 10756 - }, - { - "epoch": 1.0133534302065426, - "grad_norm": 0.6015726327896118, - "learning_rate": 1.0106454265173396e-05, - "loss": 0.1744, - "step": 10757 - }, - { - "epoch": 1.0134476342997103, - "grad_norm": 0.6723529100418091, - "learning_rate": 1.0104944331431042e-05, - "loss": 0.2059, - "step": 10758 - }, - { - "epoch": 1.0135418383928783, - "grad_norm": 0.6537312865257263, - "learning_rate": 1.0103434395295792e-05, - "loss": 0.1999, - "step": 10759 - }, - { - "epoch": 1.013636042486046, - "grad_norm": 0.5958169102668762, - "learning_rate": 1.0101924456802081e-05, - "loss": 0.186, - "step": 10760 - }, - { - "epoch": 1.013730246579214, - "grad_norm": 0.6968947052955627, - "learning_rate": 1.0100414515984334e-05, - "loss": 0.2188, - "step": 10761 - }, - { - "epoch": 1.0138244506723817, - "grad_norm": 0.6198421120643616, - "learning_rate": 1.009890457287698e-05, - "loss": 0.2056, - "step": 10762 - }, - { - "epoch": 1.0139186547655497, - "grad_norm": 0.667673647403717, - "learning_rate": 1.009739462751445e-05, - "loss": 0.2032, - "step": 10763 - }, - { - "epoch": 1.0140128588587174, - "grad_norm": 0.6936910152435303, - "learning_rate": 1.0095884679931167e-05, - "loss": 0.2003, - "step": 10764 - }, - { - "epoch": 1.0141070629518854, - "grad_norm": 0.7887927293777466, - "learning_rate": 1.009437473016157e-05, - "loss": 0.1833, - "step": 10765 - }, - { - "epoch": 1.014201267045053, - "grad_norm": 0.7284408211708069, - "learning_rate": 1.0092864778240083e-05, - "loss": 0.2124, - "step": 10766 - }, - { - "epoch": 1.014295471138221, - "grad_norm": 0.6176424622535706, - "learning_rate": 1.0091354824201132e-05, - "loss": 0.2068, - "step": 10767 - }, - { - "epoch": 1.0143896752313888, - "grad_norm": 0.6302241086959839, - "learning_rate": 1.0089844868079154e-05, - "loss": 0.2106, - "step": 10768 - }, - { - "epoch": 1.0144838793245567, - "grad_norm": 0.58731609582901, - "learning_rate": 1.0088334909908573e-05, - "loss": 0.2072, - "step": 10769 - }, - { - "epoch": 1.0145780834177245, - "grad_norm": 0.6222063302993774, - "learning_rate": 1.0086824949723819e-05, - "loss": 0.1731, - "step": 10770 - }, - { - "epoch": 1.0146722875108924, - "grad_norm": 0.7802337408065796, - "learning_rate": 1.0085314987559323e-05, - "loss": 0.1853, - "step": 10771 - }, - { - "epoch": 1.0147664916040602, - "grad_norm": 0.7324212789535522, - "learning_rate": 1.008380502344951e-05, - "loss": 0.2386, - "step": 10772 - }, - { - "epoch": 1.0148606956972281, - "grad_norm": 0.6973498463630676, - "learning_rate": 1.0082295057428815e-05, - "loss": 0.2009, - "step": 10773 - }, - { - "epoch": 1.0149548997903959, - "grad_norm": 0.6584519743919373, - "learning_rate": 1.0080785089531664e-05, - "loss": 0.213, - "step": 10774 - }, - { - "epoch": 1.0150491038835638, - "grad_norm": 0.6616883277893066, - "learning_rate": 1.007927511979249e-05, - "loss": 0.2092, - "step": 10775 - }, - { - "epoch": 1.0151433079767316, - "grad_norm": 0.6478118300437927, - "learning_rate": 1.0077765148245719e-05, - "loss": 0.2311, - "step": 10776 - }, - { - "epoch": 1.0152375120698995, - "grad_norm": 0.6347939372062683, - "learning_rate": 1.0076255174925784e-05, - "loss": 0.1993, - "step": 10777 - }, - { - "epoch": 1.0153317161630673, - "grad_norm": 0.6749362945556641, - "learning_rate": 1.0074745199867112e-05, - "loss": 0.2187, - "step": 10778 - }, - { - "epoch": 1.0154259202562352, - "grad_norm": 0.6531940698623657, - "learning_rate": 1.0073235223104134e-05, - "loss": 0.2265, - "step": 10779 - }, - { - "epoch": 1.015520124349403, - "grad_norm": 0.6377983689308167, - "learning_rate": 1.0071725244671281e-05, - "loss": 0.2221, - "step": 10780 - }, - { - "epoch": 1.015614328442571, - "grad_norm": 0.6461182236671448, - "learning_rate": 1.0070215264602979e-05, - "loss": 0.2089, - "step": 10781 - }, - { - "epoch": 1.0157085325357387, - "grad_norm": 0.6446294188499451, - "learning_rate": 1.0068705282933663e-05, - "loss": 0.1976, - "step": 10782 - }, - { - "epoch": 1.0158027366289066, - "grad_norm": 0.6441165208816528, - "learning_rate": 1.0067195299697759e-05, - "loss": 0.2274, - "step": 10783 - }, - { - "epoch": 1.0158969407220744, - "grad_norm": 0.6639395952224731, - "learning_rate": 1.0065685314929696e-05, - "loss": 0.2471, - "step": 10784 - }, - { - "epoch": 1.0159911448152423, - "grad_norm": 0.6566084027290344, - "learning_rate": 1.0064175328663909e-05, - "loss": 0.2144, - "step": 10785 - }, - { - "epoch": 1.01608534890841, - "grad_norm": 0.667574942111969, - "learning_rate": 1.0062665340934826e-05, - "loss": 0.2055, - "step": 10786 - }, - { - "epoch": 1.016179553001578, - "grad_norm": 0.6373234987258911, - "learning_rate": 1.0061155351776872e-05, - "loss": 0.1934, - "step": 10787 - }, - { - "epoch": 1.0162737570947458, - "grad_norm": 0.6480892300605774, - "learning_rate": 1.0059645361224489e-05, - "loss": 0.1978, - "step": 10788 - }, - { - "epoch": 1.0163679611879135, - "grad_norm": 0.6134656071662903, - "learning_rate": 1.0058135369312091e-05, - "loss": 0.2006, - "step": 10789 - }, - { - "epoch": 1.0164621652810815, - "grad_norm": 0.6004596948623657, - "learning_rate": 1.0056625376074122e-05, - "loss": 0.1936, - "step": 10790 - }, - { - "epoch": 1.0165563693742492, - "grad_norm": 0.6127801537513733, - "learning_rate": 1.0055115381545006e-05, - "loss": 0.2298, - "step": 10791 - }, - { - "epoch": 1.0166505734674172, - "grad_norm": 0.6786653995513916, - "learning_rate": 1.0053605385759174e-05, - "loss": 0.2204, - "step": 10792 - }, - { - "epoch": 1.016744777560585, - "grad_norm": 0.7291155457496643, - "learning_rate": 1.0052095388751054e-05, - "loss": 0.1885, - "step": 10793 - }, - { - "epoch": 1.0168389816537529, - "grad_norm": 0.9409392476081848, - "learning_rate": 1.0050585390555082e-05, - "loss": 0.2066, - "step": 10794 - }, - { - "epoch": 1.0169331857469206, - "grad_norm": 0.6275922060012817, - "learning_rate": 1.0049075391205682e-05, - "loss": 0.2104, - "step": 10795 - }, - { - "epoch": 1.0170273898400886, - "grad_norm": 0.6719104647636414, - "learning_rate": 1.0047565390737289e-05, - "loss": 0.2161, - "step": 10796 - }, - { - "epoch": 1.0171215939332563, - "grad_norm": 0.679837703704834, - "learning_rate": 1.004605538918433e-05, - "loss": 0.2249, - "step": 10797 - }, - { - "epoch": 1.0172157980264243, - "grad_norm": 0.6401480436325073, - "learning_rate": 1.004454538658124e-05, - "loss": 0.2195, - "step": 10798 - }, - { - "epoch": 1.017310002119592, - "grad_norm": 0.6298019886016846, - "learning_rate": 1.0043035382962443e-05, - "loss": 0.205, - "step": 10799 - }, - { - "epoch": 1.01740420621276, - "grad_norm": 0.6132528781890869, - "learning_rate": 1.0041525378362376e-05, - "loss": 0.1985, - "step": 10800 - }, - { - "epoch": 1.0174984103059277, - "grad_norm": 0.7292924523353577, - "learning_rate": 1.0040015372815461e-05, - "loss": 0.1995, - "step": 10801 - }, - { - "epoch": 1.0175926143990957, - "grad_norm": 1.1963964700698853, - "learning_rate": 1.0038505366356137e-05, - "loss": 0.2185, - "step": 10802 - }, - { - "epoch": 1.0176868184922634, - "grad_norm": 0.5991237163543701, - "learning_rate": 1.0036995359018833e-05, - "loss": 0.2053, - "step": 10803 - }, - { - "epoch": 1.0177810225854313, - "grad_norm": 0.6105757355690002, - "learning_rate": 1.0035485350837972e-05, - "loss": 0.2409, - "step": 10804 - }, - { - "epoch": 1.017875226678599, - "grad_norm": 0.7335970997810364, - "learning_rate": 1.0033975341847995e-05, - "loss": 0.2392, - "step": 10805 - }, - { - "epoch": 1.017969430771767, - "grad_norm": 0.6251050233840942, - "learning_rate": 1.0032465332083326e-05, - "loss": 0.1974, - "step": 10806 - }, - { - "epoch": 1.0180636348649348, - "grad_norm": 0.7072638273239136, - "learning_rate": 1.0030955321578396e-05, - "loss": 0.228, - "step": 10807 - }, - { - "epoch": 1.0181578389581027, - "grad_norm": 0.6788835525512695, - "learning_rate": 1.002944531036764e-05, - "loss": 0.1957, - "step": 10808 - }, - { - "epoch": 1.0182520430512705, - "grad_norm": 0.6908219456672668, - "learning_rate": 1.0027935298485483e-05, - "loss": 0.2295, - "step": 10809 - }, - { - "epoch": 1.0183462471444384, - "grad_norm": 0.6195359230041504, - "learning_rate": 1.0026425285966359e-05, - "loss": 0.2202, - "step": 10810 - }, - { - "epoch": 1.0184404512376062, - "grad_norm": 0.6958038210868835, - "learning_rate": 1.0024915272844697e-05, - "loss": 0.1981, - "step": 10811 - }, - { - "epoch": 1.0185346553307741, - "grad_norm": 0.7101927995681763, - "learning_rate": 1.0023405259154928e-05, - "loss": 0.1961, - "step": 10812 - }, - { - "epoch": 1.0186288594239419, - "grad_norm": 0.6944587230682373, - "learning_rate": 1.0021895244931484e-05, - "loss": 0.2171, - "step": 10813 - }, - { - "epoch": 1.0187230635171098, - "grad_norm": 0.7558724880218506, - "learning_rate": 1.0020385230208793e-05, - "loss": 0.2125, - "step": 10814 - }, - { - "epoch": 1.0188172676102776, - "grad_norm": 0.6603408455848694, - "learning_rate": 1.0018875215021289e-05, - "loss": 0.201, - "step": 10815 - }, - { - "epoch": 1.0189114717034455, - "grad_norm": 0.671822726726532, - "learning_rate": 1.00173651994034e-05, - "loss": 0.2273, - "step": 10816 - }, - { - "epoch": 1.0190056757966133, - "grad_norm": 0.5749891400337219, - "learning_rate": 1.0015855183389555e-05, - "loss": 0.1738, - "step": 10817 - }, - { - "epoch": 1.0190998798897812, - "grad_norm": 0.6361928582191467, - "learning_rate": 1.0014345167014192e-05, - "loss": 0.221, - "step": 10818 - }, - { - "epoch": 1.019194083982949, - "grad_norm": 0.6200592517852783, - "learning_rate": 1.0012835150311735e-05, - "loss": 0.1937, - "step": 10819 - }, - { - "epoch": 1.019288288076117, - "grad_norm": 0.6894503831863403, - "learning_rate": 1.0011325133316618e-05, - "loss": 0.2092, - "step": 10820 - }, - { - "epoch": 1.0193824921692847, - "grad_norm": 0.6642686128616333, - "learning_rate": 1.0009815116063266e-05, - "loss": 0.2247, - "step": 10821 - }, - { - "epoch": 1.0194766962624526, - "grad_norm": 0.6428064703941345, - "learning_rate": 1.0008305098586118e-05, - "loss": 0.2111, - "step": 10822 - }, - { - "epoch": 1.0195709003556204, - "grad_norm": 0.7898526787757874, - "learning_rate": 1.00067950809196e-05, - "loss": 0.2023, - "step": 10823 - }, - { - "epoch": 1.0196651044487883, - "grad_norm": 0.7990137934684753, - "learning_rate": 1.0005285063098142e-05, - "loss": 0.2154, - "step": 10824 - }, - { - "epoch": 1.019759308541956, - "grad_norm": 0.6745801568031311, - "learning_rate": 1.0003775045156181e-05, - "loss": 0.1916, - "step": 10825 - }, - { - "epoch": 1.019853512635124, - "grad_norm": 0.5938249826431274, - "learning_rate": 1.000226502712814e-05, - "loss": 0.184, - "step": 10826 - }, - { - "epoch": 1.0199477167282918, - "grad_norm": 0.7442857623100281, - "learning_rate": 1.0000755009048451e-05, - "loss": 0.2054, - "step": 10827 - }, - { - "epoch": 1.0200419208214597, - "grad_norm": 0.7236988544464111, - "learning_rate": 9.999244990951552e-06, - "loss": 0.2061, - "step": 10828 - }, - { - "epoch": 1.0201361249146275, - "grad_norm": 0.6749070286750793, - "learning_rate": 9.997734972871866e-06, - "loss": 0.2091, - "step": 10829 - }, - { - "epoch": 1.0202303290077954, - "grad_norm": 0.7106935977935791, - "learning_rate": 9.99622495484382e-06, - "loss": 0.2112, - "step": 10830 - }, - { - "epoch": 1.0203245331009632, - "grad_norm": 0.6278476715087891, - "learning_rate": 9.99471493690186e-06, - "loss": 0.1964, - "step": 10831 - }, - { - "epoch": 1.0204187371941311, - "grad_norm": 0.6725781559944153, - "learning_rate": 9.993204919080403e-06, - "loss": 0.2281, - "step": 10832 - }, - { - "epoch": 1.0205129412872989, - "grad_norm": 0.8848792910575867, - "learning_rate": 9.991694901413884e-06, - "loss": 0.2322, - "step": 10833 - }, - { - "epoch": 1.0206071453804668, - "grad_norm": 0.6739000678062439, - "learning_rate": 9.990184883936737e-06, - "loss": 0.2271, - "step": 10834 - }, - { - "epoch": 1.0207013494736346, - "grad_norm": 0.6107320785522461, - "learning_rate": 9.988674866683387e-06, - "loss": 0.2006, - "step": 10835 - }, - { - "epoch": 1.0207955535668025, - "grad_norm": 0.690765917301178, - "learning_rate": 9.987164849688268e-06, - "loss": 0.2221, - "step": 10836 - }, - { - "epoch": 1.0208897576599703, - "grad_norm": 1.850305199623108, - "learning_rate": 9.985654832985811e-06, - "loss": 0.2109, - "step": 10837 - }, - { - "epoch": 1.0209839617531382, - "grad_norm": 0.6318346858024597, - "learning_rate": 9.984144816610446e-06, - "loss": 0.2066, - "step": 10838 - }, - { - "epoch": 1.021078165846306, - "grad_norm": 0.6936797499656677, - "learning_rate": 9.982634800596605e-06, - "loss": 0.2236, - "step": 10839 - }, - { - "epoch": 1.021172369939474, - "grad_norm": 0.6386310458183289, - "learning_rate": 9.981124784978715e-06, - "loss": 0.2171, - "step": 10840 - }, - { - "epoch": 1.0212665740326416, - "grad_norm": 0.6437522768974304, - "learning_rate": 9.97961476979121e-06, - "loss": 0.2275, - "step": 10841 - }, - { - "epoch": 1.0213607781258096, - "grad_norm": 0.7574900388717651, - "learning_rate": 9.97810475506852e-06, - "loss": 0.2567, - "step": 10842 - }, - { - "epoch": 1.0214549822189773, - "grad_norm": 0.7311277389526367, - "learning_rate": 9.976594740845074e-06, - "loss": 0.2187, - "step": 10843 - }, - { - "epoch": 1.0215491863121453, - "grad_norm": 0.6731746196746826, - "learning_rate": 9.975084727155305e-06, - "loss": 0.2042, - "step": 10844 - }, - { - "epoch": 1.021643390405313, - "grad_norm": 0.6768555641174316, - "learning_rate": 9.973574714033646e-06, - "loss": 0.2212, - "step": 10845 - }, - { - "epoch": 1.021737594498481, - "grad_norm": 0.6536171436309814, - "learning_rate": 9.972064701514517e-06, - "loss": 0.2525, - "step": 10846 - }, - { - "epoch": 1.0218317985916487, - "grad_norm": 0.6061612963676453, - "learning_rate": 9.970554689632362e-06, - "loss": 0.1982, - "step": 10847 - }, - { - "epoch": 1.0219260026848167, - "grad_norm": 0.7942737936973572, - "learning_rate": 9.969044678421606e-06, - "loss": 0.2146, - "step": 10848 - }, - { - "epoch": 1.0220202067779844, - "grad_norm": 0.6257474422454834, - "learning_rate": 9.967534667916679e-06, - "loss": 0.2119, - "step": 10849 - }, - { - "epoch": 1.0221144108711524, - "grad_norm": 0.6741740703582764, - "learning_rate": 9.966024658152008e-06, - "loss": 0.2179, - "step": 10850 - }, - { - "epoch": 1.0222086149643201, - "grad_norm": 0.6527076363563538, - "learning_rate": 9.96451464916203e-06, - "loss": 0.2225, - "step": 10851 - }, - { - "epoch": 1.022302819057488, - "grad_norm": 0.8687999248504639, - "learning_rate": 9.963004640981173e-06, - "loss": 0.2367, - "step": 10852 - }, - { - "epoch": 1.0223970231506558, - "grad_norm": 0.626018762588501, - "learning_rate": 9.961494633643865e-06, - "loss": 0.2015, - "step": 10853 - }, - { - "epoch": 1.0224912272438238, - "grad_norm": 0.6199095249176025, - "learning_rate": 9.959984627184542e-06, - "loss": 0.199, - "step": 10854 - }, - { - "epoch": 1.0225854313369915, - "grad_norm": 0.6614289283752441, - "learning_rate": 9.958474621637631e-06, - "loss": 0.2275, - "step": 10855 - }, - { - "epoch": 1.0226796354301595, - "grad_norm": 0.5417171120643616, - "learning_rate": 9.956964617037559e-06, - "loss": 0.1752, - "step": 10856 - }, - { - "epoch": 1.0227738395233272, - "grad_norm": 0.6422715783119202, - "learning_rate": 9.955454613418764e-06, - "loss": 0.2182, - "step": 10857 - }, - { - "epoch": 1.0228680436164952, - "grad_norm": 0.690487802028656, - "learning_rate": 9.953944610815672e-06, - "loss": 0.163, - "step": 10858 - }, - { - "epoch": 1.022962247709663, - "grad_norm": 0.6234443187713623, - "learning_rate": 9.952434609262714e-06, - "loss": 0.1922, - "step": 10859 - }, - { - "epoch": 1.023056451802831, - "grad_norm": 0.7341983914375305, - "learning_rate": 9.95092460879432e-06, - "loss": 0.1863, - "step": 10860 - }, - { - "epoch": 1.0231506558959986, - "grad_norm": 0.6341428160667419, - "learning_rate": 9.949414609444922e-06, - "loss": 0.2195, - "step": 10861 - }, - { - "epoch": 1.0232448599891666, - "grad_norm": 0.6796596646308899, - "learning_rate": 9.947904611248949e-06, - "loss": 0.1961, - "step": 10862 - }, - { - "epoch": 1.0233390640823343, - "grad_norm": 0.6194525361061096, - "learning_rate": 9.946394614240828e-06, - "loss": 0.2029, - "step": 10863 - }, - { - "epoch": 1.0234332681755023, - "grad_norm": 0.7756684422492981, - "learning_rate": 9.944884618454996e-06, - "loss": 0.1822, - "step": 10864 - }, - { - "epoch": 1.02352747226867, - "grad_norm": 0.6143449544906616, - "learning_rate": 9.943374623925883e-06, - "loss": 0.1737, - "step": 10865 - }, - { - "epoch": 1.023621676361838, - "grad_norm": 0.6507072448730469, - "learning_rate": 9.941864630687909e-06, - "loss": 0.2248, - "step": 10866 - }, - { - "epoch": 1.0237158804550057, - "grad_norm": 0.6425033807754517, - "learning_rate": 9.940354638775514e-06, - "loss": 0.2249, - "step": 10867 - }, - { - "epoch": 1.0238100845481737, - "grad_norm": 0.6521602272987366, - "learning_rate": 9.938844648223131e-06, - "loss": 0.2257, - "step": 10868 - }, - { - "epoch": 1.0239042886413414, - "grad_norm": 0.6503294706344604, - "learning_rate": 9.93733465906518e-06, - "loss": 0.2155, - "step": 10869 - }, - { - "epoch": 1.0239984927345094, - "grad_norm": 0.6127226948738098, - "learning_rate": 9.935824671336094e-06, - "loss": 0.1817, - "step": 10870 - }, - { - "epoch": 1.0240926968276771, - "grad_norm": 0.6373822093009949, - "learning_rate": 9.934314685070306e-06, - "loss": 0.1933, - "step": 10871 - }, - { - "epoch": 1.024186900920845, - "grad_norm": 0.653621256351471, - "learning_rate": 9.932804700302246e-06, - "loss": 0.2156, - "step": 10872 - }, - { - "epoch": 1.0242811050140128, - "grad_norm": 0.6706910133361816, - "learning_rate": 9.93129471706634e-06, - "loss": 0.1948, - "step": 10873 - }, - { - "epoch": 1.0243753091071808, - "grad_norm": 0.6643623113632202, - "learning_rate": 9.929784735397023e-06, - "loss": 0.2363, - "step": 10874 - }, - { - "epoch": 1.0244695132003485, - "grad_norm": 0.6217973232269287, - "learning_rate": 9.928274755328724e-06, - "loss": 0.1918, - "step": 10875 - }, - { - "epoch": 1.0245637172935165, - "grad_norm": 0.664268970489502, - "learning_rate": 9.926764776895867e-06, - "loss": 0.2057, - "step": 10876 - }, - { - "epoch": 1.0246579213866842, - "grad_norm": 0.7142734527587891, - "learning_rate": 9.925254800132891e-06, - "loss": 0.2035, - "step": 10877 - }, - { - "epoch": 1.0247521254798522, - "grad_norm": 0.6665133833885193, - "learning_rate": 9.92374482507422e-06, - "loss": 0.225, - "step": 10878 - }, - { - "epoch": 1.02484632957302, - "grad_norm": 0.6146891117095947, - "learning_rate": 9.922234851754284e-06, - "loss": 0.2156, - "step": 10879 - }, - { - "epoch": 1.0249405336661879, - "grad_norm": 0.5748670101165771, - "learning_rate": 9.920724880207511e-06, - "loss": 0.1864, - "step": 10880 - }, - { - "epoch": 1.0250347377593556, - "grad_norm": 0.6536259055137634, - "learning_rate": 9.919214910468337e-06, - "loss": 0.1927, - "step": 10881 - }, - { - "epoch": 1.0251289418525236, - "grad_norm": 0.6859068870544434, - "learning_rate": 9.917704942571188e-06, - "loss": 0.1908, - "step": 10882 - }, - { - "epoch": 1.0252231459456913, - "grad_norm": 0.6591455340385437, - "learning_rate": 9.916194976550492e-06, - "loss": 0.2106, - "step": 10883 - }, - { - "epoch": 1.0253173500388593, - "grad_norm": 0.746976375579834, - "learning_rate": 9.914685012440682e-06, - "loss": 0.2315, - "step": 10884 - }, - { - "epoch": 1.025411554132027, - "grad_norm": 0.6313568353652954, - "learning_rate": 9.913175050276186e-06, - "loss": 0.213, - "step": 10885 - }, - { - "epoch": 1.025505758225195, - "grad_norm": 0.6824766993522644, - "learning_rate": 9.911665090091428e-06, - "loss": 0.2032, - "step": 10886 - }, - { - "epoch": 1.0255999623183627, - "grad_norm": 0.6259440183639526, - "learning_rate": 9.910155131920847e-06, - "loss": 0.2026, - "step": 10887 - }, - { - "epoch": 1.0256941664115307, - "grad_norm": 0.6822636723518372, - "learning_rate": 9.90864517579887e-06, - "loss": 0.2129, - "step": 10888 - }, - { - "epoch": 1.0257883705046984, - "grad_norm": 0.6597427129745483, - "learning_rate": 9.907135221759923e-06, - "loss": 0.224, - "step": 10889 - }, - { - "epoch": 1.0258825745978664, - "grad_norm": 0.6362802386283875, - "learning_rate": 9.905625269838433e-06, - "loss": 0.2391, - "step": 10890 - }, - { - "epoch": 1.025976778691034, - "grad_norm": 0.5726861357688904, - "learning_rate": 9.904115320068834e-06, - "loss": 0.2121, - "step": 10891 - }, - { - "epoch": 1.026070982784202, - "grad_norm": 0.6730748414993286, - "learning_rate": 9.902605372485557e-06, - "loss": 0.2164, - "step": 10892 - }, - { - "epoch": 1.0261651868773698, - "grad_norm": 0.6526716351509094, - "learning_rate": 9.901095427123023e-06, - "loss": 0.2212, - "step": 10893 - }, - { - "epoch": 1.0262593909705378, - "grad_norm": 0.6757125854492188, - "learning_rate": 9.89958548401567e-06, - "loss": 0.2075, - "step": 10894 - }, - { - "epoch": 1.0263535950637055, - "grad_norm": 0.6140555143356323, - "learning_rate": 9.898075543197922e-06, - "loss": 0.2261, - "step": 10895 - }, - { - "epoch": 1.0264477991568735, - "grad_norm": 0.5689371228218079, - "learning_rate": 9.89656560470421e-06, - "loss": 0.183, - "step": 10896 - }, - { - "epoch": 1.0265420032500412, - "grad_norm": 0.6465091705322266, - "learning_rate": 9.895055668568961e-06, - "loss": 0.2144, - "step": 10897 - }, - { - "epoch": 1.0266362073432092, - "grad_norm": 0.5773385167121887, - "learning_rate": 9.893545734826607e-06, - "loss": 0.1752, - "step": 10898 - }, - { - "epoch": 1.0267304114363769, - "grad_norm": 0.6092222332954407, - "learning_rate": 9.892035803511573e-06, - "loss": 0.2131, - "step": 10899 - }, - { - "epoch": 1.0268246155295448, - "grad_norm": 0.6916167140007019, - "learning_rate": 9.89052587465829e-06, - "loss": 0.2065, - "step": 10900 - }, - { - "epoch": 1.0269188196227126, - "grad_norm": 0.6038517355918884, - "learning_rate": 9.889015948301187e-06, - "loss": 0.1852, - "step": 10901 - }, - { - "epoch": 1.0270130237158805, - "grad_norm": 0.6681758165359497, - "learning_rate": 9.887506024474693e-06, - "loss": 0.2398, - "step": 10902 - }, - { - "epoch": 1.0271072278090483, - "grad_norm": 0.6332974433898926, - "learning_rate": 9.885996103213232e-06, - "loss": 0.2119, - "step": 10903 - }, - { - "epoch": 1.0272014319022162, - "grad_norm": 0.6054478287696838, - "learning_rate": 9.88448618455124e-06, - "loss": 0.1904, - "step": 10904 - }, - { - "epoch": 1.027295635995384, - "grad_norm": 0.6860628128051758, - "learning_rate": 9.882976268523142e-06, - "loss": 0.2044, - "step": 10905 - }, - { - "epoch": 1.027389840088552, - "grad_norm": 0.7579703330993652, - "learning_rate": 9.88146635516336e-06, - "loss": 0.2086, - "step": 10906 - }, - { - "epoch": 1.0274840441817197, - "grad_norm": 0.6385505199432373, - "learning_rate": 9.879956444506335e-06, - "loss": 0.222, - "step": 10907 - }, - { - "epoch": 1.0275782482748876, - "grad_norm": 0.653588056564331, - "learning_rate": 9.878446536586488e-06, - "loss": 0.2068, - "step": 10908 - }, - { - "epoch": 1.0276724523680554, - "grad_norm": 0.6662572622299194, - "learning_rate": 9.876936631438248e-06, - "loss": 0.2241, - "step": 10909 - }, - { - "epoch": 1.0277666564612233, - "grad_norm": 0.8349880576133728, - "learning_rate": 9.875426729096039e-06, - "loss": 0.2215, - "step": 10910 - }, - { - "epoch": 1.027860860554391, - "grad_norm": 0.6116170287132263, - "learning_rate": 9.873916829594297e-06, - "loss": 0.1833, - "step": 10911 - }, - { - "epoch": 1.027955064647559, - "grad_norm": 0.6977446675300598, - "learning_rate": 9.872406932967444e-06, - "loss": 0.2297, - "step": 10912 - }, - { - "epoch": 1.0280492687407268, - "grad_norm": 0.7331418395042419, - "learning_rate": 9.87089703924991e-06, - "loss": 0.2323, - "step": 10913 - }, - { - "epoch": 1.0281434728338947, - "grad_norm": 0.7110864520072937, - "learning_rate": 9.869387148476124e-06, - "loss": 0.2375, - "step": 10914 - }, - { - "epoch": 1.0282376769270625, - "grad_norm": 0.630600094795227, - "learning_rate": 9.867877260680515e-06, - "loss": 0.183, - "step": 10915 - }, - { - "epoch": 1.0283318810202304, - "grad_norm": 0.6984477639198303, - "learning_rate": 9.866367375897505e-06, - "loss": 0.2169, - "step": 10916 - }, - { - "epoch": 1.0284260851133982, - "grad_norm": 0.6631993055343628, - "learning_rate": 9.864857494161529e-06, - "loss": 0.2074, - "step": 10917 - }, - { - "epoch": 1.0285202892065661, - "grad_norm": 0.7021864652633667, - "learning_rate": 9.86334761550701e-06, - "loss": 0.2312, - "step": 10918 - }, - { - "epoch": 1.0286144932997339, - "grad_norm": 0.7496550679206848, - "learning_rate": 9.861837739968378e-06, - "loss": 0.197, - "step": 10919 - }, - { - "epoch": 1.0287086973929018, - "grad_norm": 0.6108748316764832, - "learning_rate": 9.860327867580056e-06, - "loss": 0.1962, - "step": 10920 - }, - { - "epoch": 1.0288029014860696, - "grad_norm": 0.8925476670265198, - "learning_rate": 9.858817998376477e-06, - "loss": 0.1996, - "step": 10921 - }, - { - "epoch": 1.0288971055792375, - "grad_norm": 0.7436450123786926, - "learning_rate": 9.857308132392068e-06, - "loss": 0.2193, - "step": 10922 - }, - { - "epoch": 1.0289913096724053, - "grad_norm": 0.6071893572807312, - "learning_rate": 9.855798269661247e-06, - "loss": 0.1978, - "step": 10923 - }, - { - "epoch": 1.0290855137655732, - "grad_norm": 0.6819487810134888, - "learning_rate": 9.854288410218455e-06, - "loss": 0.2203, - "step": 10924 - }, - { - "epoch": 1.029179717858741, - "grad_norm": 0.6285249590873718, - "learning_rate": 9.852778554098112e-06, - "loss": 0.2207, - "step": 10925 - }, - { - "epoch": 1.029273921951909, - "grad_norm": 0.6124079823493958, - "learning_rate": 9.851268701334641e-06, - "loss": 0.1822, - "step": 10926 - }, - { - "epoch": 1.0293681260450767, - "grad_norm": 0.7141607999801636, - "learning_rate": 9.849758851962478e-06, - "loss": 0.2219, - "step": 10927 - }, - { - "epoch": 1.0294623301382444, - "grad_norm": 0.6542896032333374, - "learning_rate": 9.848249006016045e-06, - "loss": 0.2122, - "step": 10928 - }, - { - "epoch": 1.0295565342314124, - "grad_norm": 0.6309689283370972, - "learning_rate": 9.846739163529772e-06, - "loss": 0.2033, - "step": 10929 - }, - { - "epoch": 1.02965073832458, - "grad_norm": 0.6441240906715393, - "learning_rate": 9.845229324538076e-06, - "loss": 0.2164, - "step": 10930 - }, - { - "epoch": 1.029744942417748, - "grad_norm": 0.6353326439857483, - "learning_rate": 9.843719489075396e-06, - "loss": 0.1908, - "step": 10931 - }, - { - "epoch": 1.0298391465109158, - "grad_norm": 0.7253024578094482, - "learning_rate": 9.842209657176153e-06, - "loss": 0.2615, - "step": 10932 - }, - { - "epoch": 1.0299333506040838, - "grad_norm": 0.669043779373169, - "learning_rate": 9.840699828874771e-06, - "loss": 0.2226, - "step": 10933 - }, - { - "epoch": 1.0300275546972515, - "grad_norm": 0.5972657799720764, - "learning_rate": 9.839190004205683e-06, - "loss": 0.2161, - "step": 10934 - }, - { - "epoch": 1.0301217587904195, - "grad_norm": 0.6432631015777588, - "learning_rate": 9.83768018320331e-06, - "loss": 0.2063, - "step": 10935 - }, - { - "epoch": 1.0302159628835872, - "grad_norm": 0.7154762744903564, - "learning_rate": 9.836170365902077e-06, - "loss": 0.2356, - "step": 10936 - }, - { - "epoch": 1.0303101669767551, - "grad_norm": 0.7650432586669922, - "learning_rate": 9.834660552336415e-06, - "loss": 0.2184, - "step": 10937 - }, - { - "epoch": 1.0304043710699229, - "grad_norm": 0.6580452919006348, - "learning_rate": 9.83315074254075e-06, - "loss": 0.2188, - "step": 10938 - }, - { - "epoch": 1.0304985751630908, - "grad_norm": 0.6253951787948608, - "learning_rate": 9.831640936549505e-06, - "loss": 0.2178, - "step": 10939 - }, - { - "epoch": 1.0305927792562586, - "grad_norm": 0.6630215048789978, - "learning_rate": 9.830131134397106e-06, - "loss": 0.2232, - "step": 10940 - }, - { - "epoch": 1.0306869833494265, - "grad_norm": 0.7034470438957214, - "learning_rate": 9.828621336117981e-06, - "loss": 0.2141, - "step": 10941 - }, - { - "epoch": 1.0307811874425943, - "grad_norm": 0.6886473894119263, - "learning_rate": 9.827111541746558e-06, - "loss": 0.2268, - "step": 10942 - }, - { - "epoch": 1.0308753915357622, - "grad_norm": 0.6353757977485657, - "learning_rate": 9.82560175131725e-06, - "loss": 0.2147, - "step": 10943 - }, - { - "epoch": 1.03096959562893, - "grad_norm": 0.6014813184738159, - "learning_rate": 9.824091964864499e-06, - "loss": 0.218, - "step": 10944 - }, - { - "epoch": 1.031063799722098, - "grad_norm": 0.6371111273765564, - "learning_rate": 9.822582182422723e-06, - "loss": 0.2031, - "step": 10945 - }, - { - "epoch": 1.0311580038152657, - "grad_norm": 0.8969886302947998, - "learning_rate": 9.821072404026344e-06, - "loss": 0.2276, - "step": 10946 - }, - { - "epoch": 1.0312522079084336, - "grad_norm": 0.6839308142662048, - "learning_rate": 9.819562629709793e-06, - "loss": 0.2224, - "step": 10947 - }, - { - "epoch": 1.0313464120016014, - "grad_norm": 0.6386514902114868, - "learning_rate": 9.818052859507497e-06, - "loss": 0.2296, - "step": 10948 - }, - { - "epoch": 1.0314406160947693, - "grad_norm": 0.6815319657325745, - "learning_rate": 9.816543093453873e-06, - "loss": 0.2071, - "step": 10949 - }, - { - "epoch": 1.031534820187937, - "grad_norm": 0.6995004415512085, - "learning_rate": 9.81503333158335e-06, - "loss": 0.2135, - "step": 10950 - }, - { - "epoch": 1.031629024281105, - "grad_norm": 0.6554959416389465, - "learning_rate": 9.813523573930353e-06, - "loss": 0.2238, - "step": 10951 - }, - { - "epoch": 1.0317232283742728, - "grad_norm": 0.6632347106933594, - "learning_rate": 9.812013820529307e-06, - "loss": 0.2303, - "step": 10952 - }, - { - "epoch": 1.0318174324674407, - "grad_norm": 0.652472734451294, - "learning_rate": 9.810504071414637e-06, - "loss": 0.1878, - "step": 10953 - }, - { - "epoch": 1.0319116365606085, - "grad_norm": 0.6303935647010803, - "learning_rate": 9.808994326620767e-06, - "loss": 0.2169, - "step": 10954 - }, - { - "epoch": 1.0320058406537764, - "grad_norm": 0.6247922778129578, - "learning_rate": 9.807484586182123e-06, - "loss": 0.2248, - "step": 10955 - }, - { - "epoch": 1.0321000447469442, - "grad_norm": 0.6658794283866882, - "learning_rate": 9.805974850133125e-06, - "loss": 0.2139, - "step": 10956 - }, - { - "epoch": 1.0321942488401121, - "grad_norm": 0.6316248774528503, - "learning_rate": 9.804465118508203e-06, - "loss": 0.1928, - "step": 10957 - }, - { - "epoch": 1.0322884529332799, - "grad_norm": 0.6552563309669495, - "learning_rate": 9.802955391341779e-06, - "loss": 0.216, - "step": 10958 - }, - { - "epoch": 1.0323826570264478, - "grad_norm": 0.6724657416343689, - "learning_rate": 9.801445668668278e-06, - "loss": 0.2377, - "step": 10959 - }, - { - "epoch": 1.0324768611196156, - "grad_norm": 0.8886076807975769, - "learning_rate": 9.79993595052212e-06, - "loss": 0.21, - "step": 10960 - }, - { - "epoch": 1.0325710652127835, - "grad_norm": 0.6898882389068604, - "learning_rate": 9.798426236937733e-06, - "loss": 0.2221, - "step": 10961 - }, - { - "epoch": 1.0326652693059513, - "grad_norm": 0.6199621558189392, - "learning_rate": 9.796916527949542e-06, - "loss": 0.1927, - "step": 10962 - }, - { - "epoch": 1.0327594733991192, - "grad_norm": 0.7419622540473938, - "learning_rate": 9.795406823591962e-06, - "loss": 0.2262, - "step": 10963 - }, - { - "epoch": 1.032853677492287, - "grad_norm": 0.6263675689697266, - "learning_rate": 9.793897123899426e-06, - "loss": 0.1947, - "step": 10964 - }, - { - "epoch": 1.032947881585455, - "grad_norm": 0.6655123233795166, - "learning_rate": 9.792387428906358e-06, - "loss": 0.2115, - "step": 10965 - }, - { - "epoch": 1.0330420856786227, - "grad_norm": 0.6305708289146423, - "learning_rate": 9.79087773864717e-06, - "loss": 0.2126, - "step": 10966 - }, - { - "epoch": 1.0331362897717906, - "grad_norm": 0.6121314167976379, - "learning_rate": 9.7893680531563e-06, - "loss": 0.2015, - "step": 10967 - }, - { - "epoch": 1.0332304938649584, - "grad_norm": 0.6803262829780579, - "learning_rate": 9.787858372468163e-06, - "loss": 0.2536, - "step": 10968 - }, - { - "epoch": 1.0333246979581263, - "grad_norm": 0.6640075445175171, - "learning_rate": 9.786348696617183e-06, - "loss": 0.1957, - "step": 10969 - }, - { - "epoch": 1.033418902051294, - "grad_norm": 0.6151512861251831, - "learning_rate": 9.78483902563778e-06, - "loss": 0.1923, - "step": 10970 - }, - { - "epoch": 1.033513106144462, - "grad_norm": 0.5444778203964233, - "learning_rate": 9.783329359564383e-06, - "loss": 0.1618, - "step": 10971 - }, - { - "epoch": 1.0336073102376298, - "grad_norm": 0.6734565496444702, - "learning_rate": 9.78181969843141e-06, - "loss": 0.2121, - "step": 10972 - }, - { - "epoch": 1.0337015143307977, - "grad_norm": 0.5637355446815491, - "learning_rate": 9.780310042273284e-06, - "loss": 0.2118, - "step": 10973 - }, - { - "epoch": 1.0337957184239654, - "grad_norm": 0.6288042664527893, - "learning_rate": 9.778800391124431e-06, - "loss": 0.2194, - "step": 10974 - }, - { - "epoch": 1.0338899225171334, - "grad_norm": 0.6832186579704285, - "learning_rate": 9.777290745019272e-06, - "loss": 0.2185, - "step": 10975 - }, - { - "epoch": 1.0339841266103011, - "grad_norm": 0.6883850693702698, - "learning_rate": 9.775781103992226e-06, - "loss": 0.2508, - "step": 10976 - }, - { - "epoch": 1.034078330703469, - "grad_norm": 0.7300069332122803, - "learning_rate": 9.774271468077718e-06, - "loss": 0.2227, - "step": 10977 - }, - { - "epoch": 1.0341725347966368, - "grad_norm": 0.6199600696563721, - "learning_rate": 9.772761837310172e-06, - "loss": 0.2047, - "step": 10978 - }, - { - "epoch": 1.0342667388898048, - "grad_norm": 0.6576501727104187, - "learning_rate": 9.771252211724006e-06, - "loss": 0.1924, - "step": 10979 - }, - { - "epoch": 1.0343609429829725, - "grad_norm": 0.6371155977249146, - "learning_rate": 9.769742591353642e-06, - "loss": 0.1941, - "step": 10980 - }, - { - "epoch": 1.0344551470761405, - "grad_norm": 0.6634193062782288, - "learning_rate": 9.768232976233505e-06, - "loss": 0.2037, - "step": 10981 - }, - { - "epoch": 1.0345493511693082, - "grad_norm": 0.6280701756477356, - "learning_rate": 9.766723366398017e-06, - "loss": 0.1811, - "step": 10982 - }, - { - "epoch": 1.0346435552624762, - "grad_norm": 0.708373486995697, - "learning_rate": 9.76521376188159e-06, - "loss": 0.1923, - "step": 10983 - }, - { - "epoch": 1.034737759355644, - "grad_norm": 0.6773320436477661, - "learning_rate": 9.763704162718656e-06, - "loss": 0.2172, - "step": 10984 - }, - { - "epoch": 1.034831963448812, - "grad_norm": 0.6665389537811279, - "learning_rate": 9.762194568943636e-06, - "loss": 0.2116, - "step": 10985 - }, - { - "epoch": 1.0349261675419796, - "grad_norm": 0.6778730750083923, - "learning_rate": 9.760684980590942e-06, - "loss": 0.2022, - "step": 10986 - }, - { - "epoch": 1.0350203716351476, - "grad_norm": 0.7299953699111938, - "learning_rate": 9.759175397695004e-06, - "loss": 0.2095, - "step": 10987 - }, - { - "epoch": 1.0351145757283153, - "grad_norm": 0.6160703301429749, - "learning_rate": 9.757665820290239e-06, - "loss": 0.2156, - "step": 10988 - }, - { - "epoch": 1.0352087798214833, - "grad_norm": 0.6470587849617004, - "learning_rate": 9.756156248411068e-06, - "loss": 0.1955, - "step": 10989 - }, - { - "epoch": 1.035302983914651, - "grad_norm": 0.6191893219947815, - "learning_rate": 9.75464668209191e-06, - "loss": 0.2266, - "step": 10990 - }, - { - "epoch": 1.035397188007819, - "grad_norm": 0.668607234954834, - "learning_rate": 9.753137121367188e-06, - "loss": 0.1918, - "step": 10991 - }, - { - "epoch": 1.0354913921009867, - "grad_norm": 0.604935348033905, - "learning_rate": 9.751627566271323e-06, - "loss": 0.2174, - "step": 10992 - }, - { - "epoch": 1.0355855961941547, - "grad_norm": 0.6688987612724304, - "learning_rate": 9.75011801683873e-06, - "loss": 0.2052, - "step": 10993 - }, - { - "epoch": 1.0356798002873224, - "grad_norm": 0.6503415703773499, - "learning_rate": 9.748608473103836e-06, - "loss": 0.1955, - "step": 10994 - }, - { - "epoch": 1.0357740043804904, - "grad_norm": 0.7032379508018494, - "learning_rate": 9.747098935101056e-06, - "loss": 0.2387, - "step": 10995 - }, - { - "epoch": 1.0358682084736581, - "grad_norm": 0.6519894599914551, - "learning_rate": 9.745589402864811e-06, - "loss": 0.2263, - "step": 10996 - }, - { - "epoch": 1.035962412566826, - "grad_norm": 0.6277257204055786, - "learning_rate": 9.744079876429522e-06, - "loss": 0.2053, - "step": 10997 - }, - { - "epoch": 1.0360566166599938, - "grad_norm": 0.5464719533920288, - "learning_rate": 9.742570355829608e-06, - "loss": 0.1853, - "step": 10998 - }, - { - "epoch": 1.0361508207531618, - "grad_norm": 0.635711669921875, - "learning_rate": 9.74106084109949e-06, - "loss": 0.1883, - "step": 10999 - }, - { - "epoch": 1.0362450248463295, - "grad_norm": 0.6020753979682922, - "learning_rate": 9.73955133227358e-06, - "loss": 0.2011, - "step": 11000 - }, - { - "epoch": 1.0363392289394975, - "grad_norm": 0.6382079720497131, - "learning_rate": 9.738041829386306e-06, - "loss": 0.2134, - "step": 11001 - }, - { - "epoch": 1.0364334330326652, - "grad_norm": 0.6182296276092529, - "learning_rate": 9.736532332472085e-06, - "loss": 0.2061, - "step": 11002 - }, - { - "epoch": 1.0365276371258332, - "grad_norm": 0.6536548733711243, - "learning_rate": 9.735022841565329e-06, - "loss": 0.2012, - "step": 11003 - }, - { - "epoch": 1.036621841219001, - "grad_norm": 0.7733579277992249, - "learning_rate": 9.733513356700465e-06, - "loss": 0.241, - "step": 11004 - }, - { - "epoch": 1.0367160453121689, - "grad_norm": 0.6516435146331787, - "learning_rate": 9.73200387791191e-06, - "loss": 0.1928, - "step": 11005 - }, - { - "epoch": 1.0368102494053366, - "grad_norm": 0.6627474427223206, - "learning_rate": 9.730494405234077e-06, - "loss": 0.2008, - "step": 11006 - }, - { - "epoch": 1.0369044534985046, - "grad_norm": 0.6160629987716675, - "learning_rate": 9.728984938701393e-06, - "loss": 0.2325, - "step": 11007 - }, - { - "epoch": 1.0369986575916723, - "grad_norm": 0.6392260193824768, - "learning_rate": 9.72747547834827e-06, - "loss": 0.224, - "step": 11008 - }, - { - "epoch": 1.0370928616848403, - "grad_norm": 0.6877565979957581, - "learning_rate": 9.725966024209128e-06, - "loss": 0.2315, - "step": 11009 - }, - { - "epoch": 1.037187065778008, - "grad_norm": 0.6176539063453674, - "learning_rate": 9.724456576318383e-06, - "loss": 0.2242, - "step": 11010 - }, - { - "epoch": 1.037281269871176, - "grad_norm": 0.6558178067207336, - "learning_rate": 9.722947134710453e-06, - "loss": 0.2257, - "step": 11011 - }, - { - "epoch": 1.0373754739643437, - "grad_norm": 0.5705680847167969, - "learning_rate": 9.72143769941976e-06, - "loss": 0.1796, - "step": 11012 - }, - { - "epoch": 1.0374696780575117, - "grad_norm": 0.6435481309890747, - "learning_rate": 9.719928270480715e-06, - "loss": 0.1914, - "step": 11013 - }, - { - "epoch": 1.0375638821506794, - "grad_norm": 0.6671053171157837, - "learning_rate": 9.71841884792774e-06, - "loss": 0.2176, - "step": 11014 - }, - { - "epoch": 1.0376580862438474, - "grad_norm": 0.6727092862129211, - "learning_rate": 9.716909431795251e-06, - "loss": 0.2306, - "step": 11015 - }, - { - "epoch": 1.037752290337015, - "grad_norm": 0.6252629160881042, - "learning_rate": 9.715400022117665e-06, - "loss": 0.2012, - "step": 11016 - }, - { - "epoch": 1.037846494430183, - "grad_norm": 0.7260280847549438, - "learning_rate": 9.713890618929398e-06, - "loss": 0.2412, - "step": 11017 - }, - { - "epoch": 1.0379406985233508, - "grad_norm": 0.6636776924133301, - "learning_rate": 9.712381222264869e-06, - "loss": 0.2062, - "step": 11018 - }, - { - "epoch": 1.0380349026165188, - "grad_norm": 0.7553560733795166, - "learning_rate": 9.71087183215849e-06, - "loss": 0.1941, - "step": 11019 - }, - { - "epoch": 1.0381291067096865, - "grad_norm": 0.5909751057624817, - "learning_rate": 9.709362448644682e-06, - "loss": 0.2143, - "step": 11020 - }, - { - "epoch": 1.0382233108028545, - "grad_norm": 0.7922289371490479, - "learning_rate": 9.707853071757862e-06, - "loss": 0.2381, - "step": 11021 - }, - { - "epoch": 1.0383175148960222, - "grad_norm": 0.6449324488639832, - "learning_rate": 9.706343701532443e-06, - "loss": 0.2166, - "step": 11022 - }, - { - "epoch": 1.0384117189891902, - "grad_norm": 0.6129333972930908, - "learning_rate": 9.704834338002836e-06, - "loss": 0.1894, - "step": 11023 - }, - { - "epoch": 1.038505923082358, - "grad_norm": 0.6376360058784485, - "learning_rate": 9.703324981203467e-06, - "loss": 0.2108, - "step": 11024 - }, - { - "epoch": 1.0386001271755259, - "grad_norm": 0.9939081072807312, - "learning_rate": 9.70181563116875e-06, - "loss": 0.2334, - "step": 11025 - }, - { - "epoch": 1.0386943312686936, - "grad_norm": 0.6956937909126282, - "learning_rate": 9.700306287933093e-06, - "loss": 0.1997, - "step": 11026 - }, - { - "epoch": 1.0387885353618616, - "grad_norm": 0.6432368159294128, - "learning_rate": 9.69879695153092e-06, - "loss": 0.2151, - "step": 11027 - }, - { - "epoch": 1.0388827394550293, - "grad_norm": 0.6428807377815247, - "learning_rate": 9.697287621996641e-06, - "loss": 0.2081, - "step": 11028 - }, - { - "epoch": 1.0389769435481973, - "grad_norm": 0.6315459609031677, - "learning_rate": 9.695778299364672e-06, - "loss": 0.2133, - "step": 11029 - }, - { - "epoch": 1.039071147641365, - "grad_norm": 0.5934397578239441, - "learning_rate": 9.694268983669427e-06, - "loss": 0.2324, - "step": 11030 - }, - { - "epoch": 1.039165351734533, - "grad_norm": 0.612694501876831, - "learning_rate": 9.692759674945322e-06, - "loss": 0.2326, - "step": 11031 - }, - { - "epoch": 1.0392595558277007, - "grad_norm": 0.6528658866882324, - "learning_rate": 9.691250373226774e-06, - "loss": 0.1921, - "step": 11032 - }, - { - "epoch": 1.0393537599208686, - "grad_norm": 0.6557685136795044, - "learning_rate": 9.689741078548191e-06, - "loss": 0.2006, - "step": 11033 - }, - { - "epoch": 1.0394479640140364, - "grad_norm": 0.7540048360824585, - "learning_rate": 9.688231790943996e-06, - "loss": 0.2121, - "step": 11034 - }, - { - "epoch": 1.0395421681072043, - "grad_norm": 0.7017741203308105, - "learning_rate": 9.686722510448595e-06, - "loss": 0.2058, - "step": 11035 - }, - { - "epoch": 1.039636372200372, - "grad_norm": 0.617885947227478, - "learning_rate": 9.685213237096405e-06, - "loss": 0.1962, - "step": 11036 - }, - { - "epoch": 1.03973057629354, - "grad_norm": 0.640734851360321, - "learning_rate": 9.683703970921841e-06, - "loss": 0.2028, - "step": 11037 - }, - { - "epoch": 1.0398247803867078, - "grad_norm": 0.6963291764259338, - "learning_rate": 9.682194711959318e-06, - "loss": 0.2363, - "step": 11038 - }, - { - "epoch": 1.0399189844798757, - "grad_norm": 0.657353401184082, - "learning_rate": 9.680685460243247e-06, - "loss": 0.1941, - "step": 11039 - }, - { - "epoch": 1.0400131885730435, - "grad_norm": 0.664580762386322, - "learning_rate": 9.679176215808037e-06, - "loss": 0.2117, - "step": 11040 - }, - { - "epoch": 1.0401073926662114, - "grad_norm": 0.6473305225372314, - "learning_rate": 9.677666978688108e-06, - "loss": 0.2196, - "step": 11041 - }, - { - "epoch": 1.0402015967593792, - "grad_norm": 0.6388459205627441, - "learning_rate": 9.676157748917873e-06, - "loss": 0.2136, - "step": 11042 - }, - { - "epoch": 1.0402958008525471, - "grad_norm": 0.6057829856872559, - "learning_rate": 9.674648526531735e-06, - "loss": 0.201, - "step": 11043 - }, - { - "epoch": 1.0403900049457149, - "grad_norm": 0.6156550049781799, - "learning_rate": 9.673139311564118e-06, - "loss": 0.1977, - "step": 11044 - }, - { - "epoch": 1.0404842090388828, - "grad_norm": 0.6064395904541016, - "learning_rate": 9.671630104049433e-06, - "loss": 0.191, - "step": 11045 - }, - { - "epoch": 1.0405784131320506, - "grad_norm": 0.6397035717964172, - "learning_rate": 9.670120904022083e-06, - "loss": 0.1881, - "step": 11046 - }, - { - "epoch": 1.0406726172252185, - "grad_norm": 0.6849590539932251, - "learning_rate": 9.668611711516494e-06, - "loss": 0.2182, - "step": 11047 - }, - { - "epoch": 1.0407668213183863, - "grad_norm": 0.6409408450126648, - "learning_rate": 9.667102526567068e-06, - "loss": 0.1893, - "step": 11048 - }, - { - "epoch": 1.0408610254115542, - "grad_norm": 0.6189360618591309, - "learning_rate": 9.665593349208218e-06, - "loss": 0.195, - "step": 11049 - }, - { - "epoch": 1.040955229504722, - "grad_norm": 0.6303878426551819, - "learning_rate": 9.664084179474354e-06, - "loss": 0.2559, - "step": 11050 - }, - { - "epoch": 1.04104943359789, - "grad_norm": 0.6208665370941162, - "learning_rate": 9.662575017399894e-06, - "loss": 0.2201, - "step": 11051 - }, - { - "epoch": 1.0411436376910577, - "grad_norm": 0.7549915313720703, - "learning_rate": 9.661065863019246e-06, - "loss": 0.2151, - "step": 11052 - }, - { - "epoch": 1.0412378417842256, - "grad_norm": 0.7950330376625061, - "learning_rate": 9.659556716366817e-06, - "loss": 0.2223, - "step": 11053 - }, - { - "epoch": 1.0413320458773934, - "grad_norm": 0.5968732833862305, - "learning_rate": 9.658047577477022e-06, - "loss": 0.2203, - "step": 11054 - }, - { - "epoch": 1.0414262499705613, - "grad_norm": 0.7243004441261292, - "learning_rate": 9.656538446384275e-06, - "loss": 0.2219, - "step": 11055 - }, - { - "epoch": 1.041520454063729, - "grad_norm": 0.6180680394172668, - "learning_rate": 9.655029323122977e-06, - "loss": 0.203, - "step": 11056 - }, - { - "epoch": 1.041614658156897, - "grad_norm": 0.688209593296051, - "learning_rate": 9.653520207727547e-06, - "loss": 0.1887, - "step": 11057 - }, - { - "epoch": 1.0417088622500648, - "grad_norm": 0.6917232275009155, - "learning_rate": 9.652011100232394e-06, - "loss": 0.2015, - "step": 11058 - }, - { - "epoch": 1.0418030663432327, - "grad_norm": 0.7278743982315063, - "learning_rate": 9.650502000671926e-06, - "loss": 0.2152, - "step": 11059 - }, - { - "epoch": 1.0418972704364005, - "grad_norm": 0.6593081951141357, - "learning_rate": 9.648992909080548e-06, - "loss": 0.2227, - "step": 11060 - }, - { - "epoch": 1.0419914745295684, - "grad_norm": 0.691805899143219, - "learning_rate": 9.647483825492678e-06, - "loss": 0.2047, - "step": 11061 - }, - { - "epoch": 1.0420856786227362, - "grad_norm": 0.6252630352973938, - "learning_rate": 9.645974749942725e-06, - "loss": 0.1945, - "step": 11062 - }, - { - "epoch": 1.042179882715904, - "grad_norm": 0.6684446334838867, - "learning_rate": 9.644465682465088e-06, - "loss": 0.2126, - "step": 11063 - }, - { - "epoch": 1.0422740868090719, - "grad_norm": 0.6513932943344116, - "learning_rate": 9.642956623094187e-06, - "loss": 0.2104, - "step": 11064 - }, - { - "epoch": 1.0423682909022398, - "grad_norm": 0.6777861714363098, - "learning_rate": 9.641447571864429e-06, - "loss": 0.2193, - "step": 11065 - }, - { - "epoch": 1.0424624949954076, - "grad_norm": 0.643218457698822, - "learning_rate": 9.639938528810217e-06, - "loss": 0.2164, - "step": 11066 - }, - { - "epoch": 1.0425566990885753, - "grad_norm": 0.6765201687812805, - "learning_rate": 9.638429493965967e-06, - "loss": 0.2277, - "step": 11067 - }, - { - "epoch": 1.0426509031817432, - "grad_norm": 0.6563106775283813, - "learning_rate": 9.636920467366082e-06, - "loss": 0.2017, - "step": 11068 - }, - { - "epoch": 1.0427451072749112, - "grad_norm": 0.6716633439064026, - "learning_rate": 9.635411449044974e-06, - "loss": 0.205, - "step": 11069 - }, - { - "epoch": 1.042839311368079, - "grad_norm": 0.6161030530929565, - "learning_rate": 9.633902439037044e-06, - "loss": 0.2199, - "step": 11070 - }, - { - "epoch": 1.0429335154612467, - "grad_norm": 0.6739822030067444, - "learning_rate": 9.63239343737671e-06, - "loss": 0.2062, - "step": 11071 - }, - { - "epoch": 1.0430277195544146, - "grad_norm": 0.6891419291496277, - "learning_rate": 9.63088444409837e-06, - "loss": 0.197, - "step": 11072 - }, - { - "epoch": 1.0431219236475824, - "grad_norm": 0.7476945519447327, - "learning_rate": 9.629375459236437e-06, - "loss": 0.2007, - "step": 11073 - }, - { - "epoch": 1.0432161277407503, - "grad_norm": 0.6027140617370605, - "learning_rate": 9.627866482825316e-06, - "loss": 0.209, - "step": 11074 - }, - { - "epoch": 1.043310331833918, - "grad_norm": 0.6068741083145142, - "learning_rate": 9.626357514899417e-06, - "loss": 0.226, - "step": 11075 - }, - { - "epoch": 1.043404535927086, - "grad_norm": 0.6412231922149658, - "learning_rate": 9.62484855549314e-06, - "loss": 0.2042, - "step": 11076 - }, - { - "epoch": 1.0434987400202538, - "grad_norm": 0.6265538930892944, - "learning_rate": 9.623339604640901e-06, - "loss": 0.2413, - "step": 11077 - }, - { - "epoch": 1.0435929441134217, - "grad_norm": 0.6812928318977356, - "learning_rate": 9.6218306623771e-06, - "loss": 0.1969, - "step": 11078 - }, - { - "epoch": 1.0436871482065895, - "grad_norm": 0.6189242601394653, - "learning_rate": 9.620321728736147e-06, - "loss": 0.2004, - "step": 11079 - }, - { - "epoch": 1.0437813522997574, - "grad_norm": 0.7051004767417908, - "learning_rate": 9.61881280375244e-06, - "loss": 0.2268, - "step": 11080 - }, - { - "epoch": 1.0438755563929252, - "grad_norm": 0.5873299837112427, - "learning_rate": 9.617303887460393e-06, - "loss": 0.1984, - "step": 11081 - }, - { - "epoch": 1.0439697604860931, - "grad_norm": 0.636796236038208, - "learning_rate": 9.615794979894414e-06, - "loss": 0.2032, - "step": 11082 - }, - { - "epoch": 1.0440639645792609, - "grad_norm": 0.7137956023216248, - "learning_rate": 9.614286081088895e-06, - "loss": 0.2417, - "step": 11083 - }, - { - "epoch": 1.0441581686724288, - "grad_norm": 0.6883149743080139, - "learning_rate": 9.612777191078257e-06, - "loss": 0.2662, - "step": 11084 - }, - { - "epoch": 1.0442523727655966, - "grad_norm": 0.6227160692214966, - "learning_rate": 9.611268309896897e-06, - "loss": 0.216, - "step": 11085 - }, - { - "epoch": 1.0443465768587645, - "grad_norm": 0.6602711081504822, - "learning_rate": 9.609759437579215e-06, - "loss": 0.1916, - "step": 11086 - }, - { - "epoch": 1.0444407809519323, - "grad_norm": 0.5795473456382751, - "learning_rate": 9.608250574159627e-06, - "loss": 0.195, - "step": 11087 - }, - { - "epoch": 1.0445349850451002, - "grad_norm": 0.6337112784385681, - "learning_rate": 9.606741719672532e-06, - "loss": 0.2007, - "step": 11088 - }, - { - "epoch": 1.044629189138268, - "grad_norm": 0.6955795884132385, - "learning_rate": 9.605232874152333e-06, - "loss": 0.2133, - "step": 11089 - }, - { - "epoch": 1.044723393231436, - "grad_norm": 0.6677690744400024, - "learning_rate": 9.603724037633431e-06, - "loss": 0.2169, - "step": 11090 - }, - { - "epoch": 1.0448175973246037, - "grad_norm": 0.7634860873222351, - "learning_rate": 9.602215210150238e-06, - "loss": 0.2089, - "step": 11091 - }, - { - "epoch": 1.0449118014177716, - "grad_norm": 0.6360622048377991, - "learning_rate": 9.600706391737154e-06, - "loss": 0.1844, - "step": 11092 - }, - { - "epoch": 1.0450060055109394, - "grad_norm": 0.5976507067680359, - "learning_rate": 9.599197582428577e-06, - "loss": 0.1806, - "step": 11093 - }, - { - "epoch": 1.0451002096041073, - "grad_norm": 0.7010049223899841, - "learning_rate": 9.59768878225892e-06, - "loss": 0.2238, - "step": 11094 - }, - { - "epoch": 1.045194413697275, - "grad_norm": 0.6693528890609741, - "learning_rate": 9.596179991262579e-06, - "loss": 0.2199, - "step": 11095 - }, - { - "epoch": 1.045288617790443, - "grad_norm": 0.5872026085853577, - "learning_rate": 9.594671209473957e-06, - "loss": 0.206, - "step": 11096 - }, - { - "epoch": 1.0453828218836108, - "grad_norm": 0.615420401096344, - "learning_rate": 9.593162436927461e-06, - "loss": 0.2318, - "step": 11097 - }, - { - "epoch": 1.0454770259767787, - "grad_norm": 0.6226284503936768, - "learning_rate": 9.59165367365749e-06, - "loss": 0.2395, - "step": 11098 - }, - { - "epoch": 1.0455712300699465, - "grad_norm": 0.6633398532867432, - "learning_rate": 9.59014491969845e-06, - "loss": 0.2263, - "step": 11099 - }, - { - "epoch": 1.0456654341631144, - "grad_norm": 0.6114773750305176, - "learning_rate": 9.58863617508473e-06, - "loss": 0.1905, - "step": 11100 - }, - { - "epoch": 1.0457596382562822, - "grad_norm": 0.7100580930709839, - "learning_rate": 9.587127439850749e-06, - "loss": 0.2085, - "step": 11101 - }, - { - "epoch": 1.0458538423494501, - "grad_norm": 0.6336383819580078, - "learning_rate": 9.585618714030903e-06, - "loss": 0.2293, - "step": 11102 - }, - { - "epoch": 1.0459480464426179, - "grad_norm": 0.691080629825592, - "learning_rate": 9.584109997659583e-06, - "loss": 0.2264, - "step": 11103 - }, - { - "epoch": 1.0460422505357858, - "grad_norm": 0.7221998572349548, - "learning_rate": 9.582601290771206e-06, - "loss": 0.2094, - "step": 11104 - }, - { - "epoch": 1.0461364546289535, - "grad_norm": 0.7102656960487366, - "learning_rate": 9.581092593400163e-06, - "loss": 0.22, - "step": 11105 - }, - { - "epoch": 1.0462306587221215, - "grad_norm": 0.6529754996299744, - "learning_rate": 9.579583905580851e-06, - "loss": 0.2149, - "step": 11106 - }, - { - "epoch": 1.0463248628152892, - "grad_norm": 0.5638948082923889, - "learning_rate": 9.578075227347684e-06, - "loss": 0.1858, - "step": 11107 - }, - { - "epoch": 1.0464190669084572, - "grad_norm": 0.6523354053497314, - "learning_rate": 9.576566558735053e-06, - "loss": 0.2017, - "step": 11108 - }, - { - "epoch": 1.046513271001625, - "grad_norm": 0.6081815361976624, - "learning_rate": 9.575057899777357e-06, - "loss": 0.1962, - "step": 11109 - }, - { - "epoch": 1.046607475094793, - "grad_norm": 0.6833523511886597, - "learning_rate": 9.573549250508996e-06, - "loss": 0.2032, - "step": 11110 - }, - { - "epoch": 1.0467016791879606, - "grad_norm": 0.5638574957847595, - "learning_rate": 9.572040610964376e-06, - "loss": 0.18, - "step": 11111 - }, - { - "epoch": 1.0467958832811286, - "grad_norm": 0.6635776162147522, - "learning_rate": 9.57053198117789e-06, - "loss": 0.2159, - "step": 11112 - }, - { - "epoch": 1.0468900873742963, - "grad_norm": 0.6376252770423889, - "learning_rate": 9.569023361183938e-06, - "loss": 0.238, - "step": 11113 - }, - { - "epoch": 1.0469842914674643, - "grad_norm": 0.6444991827011108, - "learning_rate": 9.56751475101692e-06, - "loss": 0.2136, - "step": 11114 - }, - { - "epoch": 1.047078495560632, - "grad_norm": 0.6417834162712097, - "learning_rate": 9.566006150711237e-06, - "loss": 0.2129, - "step": 11115 - }, - { - "epoch": 1.0471726996538, - "grad_norm": 0.6237179040908813, - "learning_rate": 9.564497560301281e-06, - "loss": 0.2141, - "step": 11116 - }, - { - "epoch": 1.0472669037469677, - "grad_norm": 0.6647252440452576, - "learning_rate": 9.562988979821457e-06, - "loss": 0.2051, - "step": 11117 - }, - { - "epoch": 1.0473611078401357, - "grad_norm": 0.6281054019927979, - "learning_rate": 9.561480409306161e-06, - "loss": 0.2059, - "step": 11118 - }, - { - "epoch": 1.0474553119333034, - "grad_norm": 0.8527956008911133, - "learning_rate": 9.55997184878979e-06, - "loss": 0.2117, - "step": 11119 - }, - { - "epoch": 1.0475495160264714, - "grad_norm": 0.6849957704544067, - "learning_rate": 9.558463298306737e-06, - "loss": 0.2004, - "step": 11120 - }, - { - "epoch": 1.0476437201196391, - "grad_norm": 0.5989345908164978, - "learning_rate": 9.556954757891408e-06, - "loss": 0.1716, - "step": 11121 - }, - { - "epoch": 1.047737924212807, - "grad_norm": 0.6292548179626465, - "learning_rate": 9.555446227578198e-06, - "loss": 0.226, - "step": 11122 - }, - { - "epoch": 1.0478321283059748, - "grad_norm": 0.6838903427124023, - "learning_rate": 9.553937707401492e-06, - "loss": 0.2272, - "step": 11123 - }, - { - "epoch": 1.0479263323991428, - "grad_norm": 0.6710837483406067, - "learning_rate": 9.552429197395705e-06, - "loss": 0.2254, - "step": 11124 - }, - { - "epoch": 1.0480205364923105, - "grad_norm": 0.6228378415107727, - "learning_rate": 9.550920697595222e-06, - "loss": 0.2361, - "step": 11125 - }, - { - "epoch": 1.0481147405854785, - "grad_norm": 0.6198393106460571, - "learning_rate": 9.549412208034436e-06, - "loss": 0.2086, - "step": 11126 - }, - { - "epoch": 1.0482089446786462, - "grad_norm": 0.6320314407348633, - "learning_rate": 9.547903728747758e-06, - "loss": 0.1968, - "step": 11127 - }, - { - "epoch": 1.0483031487718142, - "grad_norm": 0.7348677515983582, - "learning_rate": 9.546395259769569e-06, - "loss": 0.223, - "step": 11128 - }, - { - "epoch": 1.048397352864982, - "grad_norm": 0.7054775953292847, - "learning_rate": 9.54488680113427e-06, - "loss": 0.208, - "step": 11129 - }, - { - "epoch": 1.0484915569581499, - "grad_norm": 0.613116443157196, - "learning_rate": 9.543378352876256e-06, - "loss": 0.2086, - "step": 11130 - }, - { - "epoch": 1.0485857610513176, - "grad_norm": 0.625372588634491, - "learning_rate": 9.541869915029923e-06, - "loss": 0.2189, - "step": 11131 - }, - { - "epoch": 1.0486799651444856, - "grad_norm": 0.6397774815559387, - "learning_rate": 9.540361487629662e-06, - "loss": 0.1869, - "step": 11132 - }, - { - "epoch": 1.0487741692376533, - "grad_norm": 0.6696832776069641, - "learning_rate": 9.538853070709871e-06, - "loss": 0.1887, - "step": 11133 - }, - { - "epoch": 1.0488683733308213, - "grad_norm": 0.6500979661941528, - "learning_rate": 9.537344664304943e-06, - "loss": 0.2168, - "step": 11134 - }, - { - "epoch": 1.048962577423989, - "grad_norm": 0.5896238088607788, - "learning_rate": 9.535836268449272e-06, - "loss": 0.1997, - "step": 11135 - }, - { - "epoch": 1.049056781517157, - "grad_norm": 0.657002329826355, - "learning_rate": 9.534327883177251e-06, - "loss": 0.2252, - "step": 11136 - }, - { - "epoch": 1.0491509856103247, - "grad_norm": 0.7159499526023865, - "learning_rate": 9.532819508523277e-06, - "loss": 0.1993, - "step": 11137 - }, - { - "epoch": 1.0492451897034927, - "grad_norm": 0.6413044929504395, - "learning_rate": 9.53131114452174e-06, - "loss": 0.2157, - "step": 11138 - }, - { - "epoch": 1.0493393937966604, - "grad_norm": 0.6382307410240173, - "learning_rate": 9.529802791207035e-06, - "loss": 0.2204, - "step": 11139 - }, - { - "epoch": 1.0494335978898284, - "grad_norm": 0.7286831140518188, - "learning_rate": 9.528294448613548e-06, - "loss": 0.2294, - "step": 11140 - }, - { - "epoch": 1.0495278019829961, - "grad_norm": 0.6109779477119446, - "learning_rate": 9.526786116775682e-06, - "loss": 0.1874, - "step": 11141 - }, - { - "epoch": 1.049622006076164, - "grad_norm": 0.662490963935852, - "learning_rate": 9.525277795727827e-06, - "loss": 0.232, - "step": 11142 - }, - { - "epoch": 1.0497162101693318, - "grad_norm": 0.6784884333610535, - "learning_rate": 9.523769485504364e-06, - "loss": 0.2213, - "step": 11143 - }, - { - "epoch": 1.0498104142624998, - "grad_norm": 0.6889349222183228, - "learning_rate": 9.5222611861397e-06, - "loss": 0.212, - "step": 11144 - }, - { - "epoch": 1.0499046183556675, - "grad_norm": 0.6845928430557251, - "learning_rate": 9.52075289766822e-06, - "loss": 0.198, - "step": 11145 - }, - { - "epoch": 1.0499988224488355, - "grad_norm": 0.6736336350440979, - "learning_rate": 9.519244620124309e-06, - "loss": 0.2574, - "step": 11146 - }, - { - "epoch": 1.0500930265420032, - "grad_norm": 0.5495937466621399, - "learning_rate": 9.51773635354237e-06, - "loss": 0.1832, - "step": 11147 - }, - { - "epoch": 1.0501872306351712, - "grad_norm": 0.6508906483650208, - "learning_rate": 9.516228097956787e-06, - "loss": 0.2156, - "step": 11148 - }, - { - "epoch": 1.050281434728339, - "grad_norm": 0.6272395253181458, - "learning_rate": 9.51471985340195e-06, - "loss": 0.2307, - "step": 11149 - }, - { - "epoch": 1.0503756388215069, - "grad_norm": 0.8044071793556213, - "learning_rate": 9.51321161991225e-06, - "loss": 0.2278, - "step": 11150 - }, - { - "epoch": 1.0504698429146746, - "grad_norm": 0.7866191864013672, - "learning_rate": 9.51170339752208e-06, - "loss": 0.2312, - "step": 11151 - }, - { - "epoch": 1.0505640470078426, - "grad_norm": 0.5838554501533508, - "learning_rate": 9.510195186265827e-06, - "loss": 0.208, - "step": 11152 - }, - { - "epoch": 1.0506582511010103, - "grad_norm": 0.6547302007675171, - "learning_rate": 9.50868698617788e-06, - "loss": 0.2101, - "step": 11153 - }, - { - "epoch": 1.0507524551941783, - "grad_norm": 0.6752921938896179, - "learning_rate": 9.50717879729263e-06, - "loss": 0.1913, - "step": 11154 - }, - { - "epoch": 1.050846659287346, - "grad_norm": 0.7714093327522278, - "learning_rate": 9.505670619644468e-06, - "loss": 0.2245, - "step": 11155 - }, - { - "epoch": 1.050940863380514, - "grad_norm": 0.6669865846633911, - "learning_rate": 9.504162453267776e-06, - "loss": 0.1967, - "step": 11156 - }, - { - "epoch": 1.0510350674736817, - "grad_norm": 0.6319842338562012, - "learning_rate": 9.502654298196952e-06, - "loss": 0.2148, - "step": 11157 - }, - { - "epoch": 1.0511292715668497, - "grad_norm": 0.7395252585411072, - "learning_rate": 9.501146154466377e-06, - "loss": 0.2034, - "step": 11158 - }, - { - "epoch": 1.0512234756600174, - "grad_norm": 0.6132408976554871, - "learning_rate": 9.499638022110443e-06, - "loss": 0.2215, - "step": 11159 - }, - { - "epoch": 1.0513176797531854, - "grad_norm": 0.6248003244400024, - "learning_rate": 9.49812990116353e-06, - "loss": 0.2071, - "step": 11160 - }, - { - "epoch": 1.051411883846353, - "grad_norm": 0.6967898607254028, - "learning_rate": 9.496621791660036e-06, - "loss": 0.2356, - "step": 11161 - }, - { - "epoch": 1.051506087939521, - "grad_norm": 0.6121363043785095, - "learning_rate": 9.495113693634346e-06, - "loss": 0.192, - "step": 11162 - }, - { - "epoch": 1.0516002920326888, - "grad_norm": 0.6453368067741394, - "learning_rate": 9.493605607120837e-06, - "loss": 0.214, - "step": 11163 - }, - { - "epoch": 1.0516944961258567, - "grad_norm": 0.968544065952301, - "learning_rate": 9.492097532153911e-06, - "loss": 0.2009, - "step": 11164 - }, - { - "epoch": 1.0517887002190245, - "grad_norm": 0.6154932379722595, - "learning_rate": 9.490589468767944e-06, - "loss": 0.1957, - "step": 11165 - }, - { - "epoch": 1.0518829043121924, - "grad_norm": 0.6451613306999207, - "learning_rate": 9.48908141699732e-06, - "loss": 0.227, - "step": 11166 - }, - { - "epoch": 1.0519771084053602, - "grad_norm": 0.6756893396377563, - "learning_rate": 9.487573376876437e-06, - "loss": 0.2165, - "step": 11167 - }, - { - "epoch": 1.0520713124985281, - "grad_norm": 0.6194762587547302, - "learning_rate": 9.486065348439671e-06, - "loss": 0.2002, - "step": 11168 - }, - { - "epoch": 1.0521655165916959, - "grad_norm": 0.7091140151023865, - "learning_rate": 9.48455733172141e-06, - "loss": 0.2079, - "step": 11169 - }, - { - "epoch": 1.0522597206848638, - "grad_norm": 0.6055117845535278, - "learning_rate": 9.483049326756037e-06, - "loss": 0.1973, - "step": 11170 - }, - { - "epoch": 1.0523539247780316, - "grad_norm": 0.6349391937255859, - "learning_rate": 9.481541333577942e-06, - "loss": 0.1835, - "step": 11171 - }, - { - "epoch": 1.0524481288711995, - "grad_norm": 0.6326323747634888, - "learning_rate": 9.480033352221506e-06, - "loss": 0.2033, - "step": 11172 - }, - { - "epoch": 1.0525423329643673, - "grad_norm": 0.679470419883728, - "learning_rate": 9.478525382721111e-06, - "loss": 0.2208, - "step": 11173 - }, - { - "epoch": 1.0526365370575352, - "grad_norm": 0.6802979111671448, - "learning_rate": 9.477017425111146e-06, - "loss": 0.2196, - "step": 11174 - }, - { - "epoch": 1.052730741150703, - "grad_norm": 0.6467894315719604, - "learning_rate": 9.475509479425992e-06, - "loss": 0.2216, - "step": 11175 - }, - { - "epoch": 1.052824945243871, - "grad_norm": 0.7351828813552856, - "learning_rate": 9.474001545700031e-06, - "loss": 0.2204, - "step": 11176 - }, - { - "epoch": 1.0529191493370387, - "grad_norm": 0.6442490220069885, - "learning_rate": 9.472493623967651e-06, - "loss": 0.1981, - "step": 11177 - }, - { - "epoch": 1.0530133534302066, - "grad_norm": 0.6150367259979248, - "learning_rate": 9.470985714263232e-06, - "loss": 0.2111, - "step": 11178 - }, - { - "epoch": 1.0531075575233744, - "grad_norm": 0.635668158531189, - "learning_rate": 9.46947781662116e-06, - "loss": 0.2153, - "step": 11179 - }, - { - "epoch": 1.0532017616165423, - "grad_norm": 0.7739942669868469, - "learning_rate": 9.467969931075805e-06, - "loss": 0.2399, - "step": 11180 - }, - { - "epoch": 1.05329596570971, - "grad_norm": 0.6315284967422485, - "learning_rate": 9.466462057661564e-06, - "loss": 0.1995, - "step": 11181 - }, - { - "epoch": 1.053390169802878, - "grad_norm": 0.6025282740592957, - "learning_rate": 9.464954196412816e-06, - "loss": 0.2135, - "step": 11182 - }, - { - "epoch": 1.0534843738960458, - "grad_norm": 0.6514477729797363, - "learning_rate": 9.463446347363933e-06, - "loss": 0.2194, - "step": 11183 - }, - { - "epoch": 1.0535785779892137, - "grad_norm": 0.6931238770484924, - "learning_rate": 9.46193851054931e-06, - "loss": 0.2023, - "step": 11184 - }, - { - "epoch": 1.0536727820823815, - "grad_norm": 0.7367113828659058, - "learning_rate": 9.460430686003318e-06, - "loss": 0.2308, - "step": 11185 - }, - { - "epoch": 1.0537669861755494, - "grad_norm": 0.5855939984321594, - "learning_rate": 9.458922873760337e-06, - "loss": 0.189, - "step": 11186 - }, - { - "epoch": 1.0538611902687172, - "grad_norm": 0.6554651260375977, - "learning_rate": 9.457415073854757e-06, - "loss": 0.228, - "step": 11187 - }, - { - "epoch": 1.0539553943618851, - "grad_norm": 0.6301096677780151, - "learning_rate": 9.455907286320953e-06, - "loss": 0.2024, - "step": 11188 - }, - { - "epoch": 1.0540495984550529, - "grad_norm": 0.6394056081771851, - "learning_rate": 9.454399511193302e-06, - "loss": 0.2447, - "step": 11189 - }, - { - "epoch": 1.0541438025482208, - "grad_norm": 0.6422435641288757, - "learning_rate": 9.452891748506183e-06, - "loss": 0.2062, - "step": 11190 - }, - { - "epoch": 1.0542380066413886, - "grad_norm": 1.5789260864257812, - "learning_rate": 9.451383998293981e-06, - "loss": 0.1762, - "step": 11191 - }, - { - "epoch": 1.0543322107345565, - "grad_norm": 0.6653083562850952, - "learning_rate": 9.449876260591074e-06, - "loss": 0.2014, - "step": 11192 - }, - { - "epoch": 1.0544264148277243, - "grad_norm": 0.6630948781967163, - "learning_rate": 9.448368535431835e-06, - "loss": 0.1933, - "step": 11193 - }, - { - "epoch": 1.0545206189208922, - "grad_norm": 0.6700137853622437, - "learning_rate": 9.44686082285065e-06, - "loss": 0.2047, - "step": 11194 - }, - { - "epoch": 1.05461482301406, - "grad_norm": 0.6125686168670654, - "learning_rate": 9.445353122881893e-06, - "loss": 0.2084, - "step": 11195 - }, - { - "epoch": 1.054709027107228, - "grad_norm": 0.636107325553894, - "learning_rate": 9.443845435559941e-06, - "loss": 0.2062, - "step": 11196 - }, - { - "epoch": 1.0548032312003957, - "grad_norm": 0.6367817521095276, - "learning_rate": 9.442337760919174e-06, - "loss": 0.2403, - "step": 11197 - }, - { - "epoch": 1.0548974352935636, - "grad_norm": 0.6435432434082031, - "learning_rate": 9.440830098993969e-06, - "loss": 0.2289, - "step": 11198 - }, - { - "epoch": 1.0549916393867314, - "grad_norm": 0.5958701372146606, - "learning_rate": 9.439322449818705e-06, - "loss": 0.1877, - "step": 11199 - }, - { - "epoch": 1.0550858434798993, - "grad_norm": 0.6421607136726379, - "learning_rate": 9.43781481342775e-06, - "loss": 0.1893, - "step": 11200 - }, - { - "epoch": 1.055180047573067, - "grad_norm": 0.6000587344169617, - "learning_rate": 9.436307189855492e-06, - "loss": 0.1936, - "step": 11201 - }, - { - "epoch": 1.0552742516662348, - "grad_norm": 0.61556077003479, - "learning_rate": 9.434799579136301e-06, - "loss": 0.1812, - "step": 11202 - }, - { - "epoch": 1.0553684557594027, - "grad_norm": 0.5927000045776367, - "learning_rate": 9.43329198130455e-06, - "loss": 0.1851, - "step": 11203 - }, - { - "epoch": 1.0554626598525707, - "grad_norm": 0.6748753190040588, - "learning_rate": 9.431784396394624e-06, - "loss": 0.2094, - "step": 11204 - }, - { - "epoch": 1.0555568639457384, - "grad_norm": 0.800971269607544, - "learning_rate": 9.430276824440889e-06, - "loss": 0.2309, - "step": 11205 - }, - { - "epoch": 1.0556510680389062, - "grad_norm": 0.6645275950431824, - "learning_rate": 9.42876926547772e-06, - "loss": 0.2186, - "step": 11206 - }, - { - "epoch": 1.0557452721320741, - "grad_norm": 0.7603010535240173, - "learning_rate": 9.427261719539502e-06, - "loss": 0.2148, - "step": 11207 - }, - { - "epoch": 1.0558394762252419, - "grad_norm": 0.6783728003501892, - "learning_rate": 9.425754186660601e-06, - "loss": 0.2174, - "step": 11208 - }, - { - "epoch": 1.0559336803184098, - "grad_norm": 0.7164636254310608, - "learning_rate": 9.424246666875392e-06, - "loss": 0.2398, - "step": 11209 - }, - { - "epoch": 1.0560278844115776, - "grad_norm": 0.6651371121406555, - "learning_rate": 9.422739160218248e-06, - "loss": 0.2181, - "step": 11210 - }, - { - "epoch": 1.0561220885047455, - "grad_norm": 0.6751751899719238, - "learning_rate": 9.421231666723543e-06, - "loss": 0.2154, - "step": 11211 - }, - { - "epoch": 1.0562162925979133, - "grad_norm": 0.6399869918823242, - "learning_rate": 9.419724186425654e-06, - "loss": 0.1956, - "step": 11212 - }, - { - "epoch": 1.0563104966910812, - "grad_norm": 0.7180332541465759, - "learning_rate": 9.418216719358947e-06, - "loss": 0.2125, - "step": 11213 - }, - { - "epoch": 1.056404700784249, - "grad_norm": 0.654536247253418, - "learning_rate": 9.416709265557803e-06, - "loss": 0.2027, - "step": 11214 - }, - { - "epoch": 1.056498904877417, - "grad_norm": 0.5757598280906677, - "learning_rate": 9.41520182505659e-06, - "loss": 0.1865, - "step": 11215 - }, - { - "epoch": 1.0565931089705847, - "grad_norm": 0.6821197271347046, - "learning_rate": 9.413694397889676e-06, - "loss": 0.2014, - "step": 11216 - }, - { - "epoch": 1.0566873130637526, - "grad_norm": 0.8251752853393555, - "learning_rate": 9.412186984091438e-06, - "loss": 0.2462, - "step": 11217 - }, - { - "epoch": 1.0567815171569204, - "grad_norm": 0.6823638081550598, - "learning_rate": 9.410679583696247e-06, - "loss": 0.1921, - "step": 11218 - }, - { - "epoch": 1.0568757212500883, - "grad_norm": 0.6674739122390747, - "learning_rate": 9.409172196738474e-06, - "loss": 0.2096, - "step": 11219 - }, - { - "epoch": 1.056969925343256, - "grad_norm": 0.5945629477500916, - "learning_rate": 9.407664823252483e-06, - "loss": 0.1812, - "step": 11220 - }, - { - "epoch": 1.057064129436424, - "grad_norm": 0.5962544083595276, - "learning_rate": 9.406157463272657e-06, - "loss": 0.1884, - "step": 11221 - }, - { - "epoch": 1.0571583335295918, - "grad_norm": 0.6588229537010193, - "learning_rate": 9.404650116833357e-06, - "loss": 0.2061, - "step": 11222 - }, - { - "epoch": 1.0572525376227597, - "grad_norm": 0.6700302362442017, - "learning_rate": 9.40314278396895e-06, - "loss": 0.2181, - "step": 11223 - }, - { - "epoch": 1.0573467417159275, - "grad_norm": 0.6322035789489746, - "learning_rate": 9.401635464713817e-06, - "loss": 0.1991, - "step": 11224 - }, - { - "epoch": 1.0574409458090954, - "grad_norm": 0.7199676036834717, - "learning_rate": 9.40012815910232e-06, - "loss": 0.2043, - "step": 11225 - }, - { - "epoch": 1.0575351499022632, - "grad_norm": 0.6011017560958862, - "learning_rate": 9.398620867168823e-06, - "loss": 0.1958, - "step": 11226 - }, - { - "epoch": 1.0576293539954311, - "grad_norm": 0.5843146443367004, - "learning_rate": 9.397113588947708e-06, - "loss": 0.1841, - "step": 11227 - }, - { - "epoch": 1.0577235580885989, - "grad_norm": 0.5755860805511475, - "learning_rate": 9.395606324473331e-06, - "loss": 0.1828, - "step": 11228 - }, - { - "epoch": 1.0578177621817668, - "grad_norm": 0.7034083604812622, - "learning_rate": 9.394099073780066e-06, - "loss": 0.2021, - "step": 11229 - }, - { - "epoch": 1.0579119662749346, - "grad_norm": 0.6693611741065979, - "learning_rate": 9.392591836902278e-06, - "loss": 0.2441, - "step": 11230 - }, - { - "epoch": 1.0580061703681025, - "grad_norm": 0.5777305364608765, - "learning_rate": 9.391084613874337e-06, - "loss": 0.2235, - "step": 11231 - }, - { - "epoch": 1.0581003744612703, - "grad_norm": 0.5965675711631775, - "learning_rate": 9.389577404730607e-06, - "loss": 0.1691, - "step": 11232 - }, - { - "epoch": 1.0581945785544382, - "grad_norm": 0.6609938740730286, - "learning_rate": 9.388070209505457e-06, - "loss": 0.2126, - "step": 11233 - }, - { - "epoch": 1.058288782647606, - "grad_norm": 0.655796468257904, - "learning_rate": 9.386563028233253e-06, - "loss": 0.2003, - "step": 11234 - }, - { - "epoch": 1.058382986740774, - "grad_norm": 0.6271539926528931, - "learning_rate": 9.38505586094836e-06, - "loss": 0.2071, - "step": 11235 - }, - { - "epoch": 1.0584771908339417, - "grad_norm": 0.6357953548431396, - "learning_rate": 9.383548707685144e-06, - "loss": 0.1967, - "step": 11236 - }, - { - "epoch": 1.0585713949271096, - "grad_norm": 0.6697683334350586, - "learning_rate": 9.382041568477972e-06, - "loss": 0.2285, - "step": 11237 - }, - { - "epoch": 1.0586655990202773, - "grad_norm": 0.6043030023574829, - "learning_rate": 9.380534443361206e-06, - "loss": 0.1843, - "step": 11238 - }, - { - "epoch": 1.0587598031134453, - "grad_norm": 0.6747494339942932, - "learning_rate": 9.379027332369217e-06, - "loss": 0.2013, - "step": 11239 - }, - { - "epoch": 1.058854007206613, - "grad_norm": 0.6584952473640442, - "learning_rate": 9.377520235536358e-06, - "loss": 0.2164, - "step": 11240 - }, - { - "epoch": 1.058948211299781, - "grad_norm": 0.6754341125488281, - "learning_rate": 9.376013152897008e-06, - "loss": 0.2374, - "step": 11241 - }, - { - "epoch": 1.0590424153929487, - "grad_norm": 0.743152916431427, - "learning_rate": 9.37450608448552e-06, - "loss": 0.2149, - "step": 11242 - }, - { - "epoch": 1.0591366194861167, - "grad_norm": 0.6113778352737427, - "learning_rate": 9.372999030336257e-06, - "loss": 0.221, - "step": 11243 - }, - { - "epoch": 1.0592308235792844, - "grad_norm": 0.7457681894302368, - "learning_rate": 9.371491990483591e-06, - "loss": 0.1781, - "step": 11244 - }, - { - "epoch": 1.0593250276724524, - "grad_norm": 0.6920148134231567, - "learning_rate": 9.36998496496188e-06, - "loss": 0.2149, - "step": 11245 - }, - { - "epoch": 1.0594192317656201, - "grad_norm": 0.6757007241249084, - "learning_rate": 9.368477953805481e-06, - "loss": 0.2273, - "step": 11246 - }, - { - "epoch": 1.059513435858788, - "grad_norm": 0.6448094248771667, - "learning_rate": 9.366970957048764e-06, - "loss": 0.2001, - "step": 11247 - }, - { - "epoch": 1.0596076399519558, - "grad_norm": 0.687659740447998, - "learning_rate": 9.365463974726089e-06, - "loss": 0.2095, - "step": 11248 - }, - { - "epoch": 1.0597018440451238, - "grad_norm": 0.6897541880607605, - "learning_rate": 9.363957006871817e-06, - "loss": 0.196, - "step": 11249 - }, - { - "epoch": 1.0597960481382915, - "grad_norm": 0.7084280252456665, - "learning_rate": 9.362450053520307e-06, - "loss": 0.2137, - "step": 11250 - }, - { - "epoch": 1.0598902522314595, - "grad_norm": 0.6229495406150818, - "learning_rate": 9.360943114705923e-06, - "loss": 0.2077, - "step": 11251 - }, - { - "epoch": 1.0599844563246272, - "grad_norm": 0.7185355424880981, - "learning_rate": 9.359436190463025e-06, - "loss": 0.203, - "step": 11252 - }, - { - "epoch": 1.0600786604177952, - "grad_norm": 0.6672647595405579, - "learning_rate": 9.357929280825967e-06, - "loss": 0.2254, - "step": 11253 - }, - { - "epoch": 1.060172864510963, - "grad_norm": 0.5557436943054199, - "learning_rate": 9.35642238582912e-06, - "loss": 0.1844, - "step": 11254 - }, - { - "epoch": 1.060267068604131, - "grad_norm": 0.6353731751441956, - "learning_rate": 9.354915505506839e-06, - "loss": 0.2042, - "step": 11255 - }, - { - "epoch": 1.0603612726972986, - "grad_norm": 0.6660376787185669, - "learning_rate": 9.353408639893477e-06, - "loss": 0.2472, - "step": 11256 - }, - { - "epoch": 1.0604554767904666, - "grad_norm": 0.7244245409965515, - "learning_rate": 9.351901789023402e-06, - "loss": 0.2262, - "step": 11257 - }, - { - "epoch": 1.0605496808836343, - "grad_norm": 0.7049255967140198, - "learning_rate": 9.350394952930968e-06, - "loss": 0.2233, - "step": 11258 - }, - { - "epoch": 1.0606438849768023, - "grad_norm": 0.5961927175521851, - "learning_rate": 9.348888131650536e-06, - "loss": 0.2, - "step": 11259 - }, - { - "epoch": 1.06073808906997, - "grad_norm": 0.6763758063316345, - "learning_rate": 9.347381325216455e-06, - "loss": 0.2219, - "step": 11260 - }, - { - "epoch": 1.060832293163138, - "grad_norm": 0.6583464741706848, - "learning_rate": 9.345874533663095e-06, - "loss": 0.1839, - "step": 11261 - }, - { - "epoch": 1.0609264972563057, - "grad_norm": 0.6635313034057617, - "learning_rate": 9.344367757024807e-06, - "loss": 0.2039, - "step": 11262 - }, - { - "epoch": 1.0610207013494737, - "grad_norm": 0.6379163265228271, - "learning_rate": 9.34286099533594e-06, - "loss": 0.1773, - "step": 11263 - }, - { - "epoch": 1.0611149054426414, - "grad_norm": 0.6061589121818542, - "learning_rate": 9.341354248630868e-06, - "loss": 0.1876, - "step": 11264 - }, - { - "epoch": 1.0612091095358094, - "grad_norm": 0.702111005783081, - "learning_rate": 9.339847516943935e-06, - "loss": 0.2026, - "step": 11265 - }, - { - "epoch": 1.0613033136289771, - "grad_norm": 0.6586620211601257, - "learning_rate": 9.338340800309498e-06, - "loss": 0.2251, - "step": 11266 - }, - { - "epoch": 1.061397517722145, - "grad_norm": 0.6755000948905945, - "learning_rate": 9.336834098761915e-06, - "loss": 0.2407, - "step": 11267 - }, - { - "epoch": 1.0614917218153128, - "grad_norm": 0.5839751958847046, - "learning_rate": 9.335327412335541e-06, - "loss": 0.1932, - "step": 11268 - }, - { - "epoch": 1.0615859259084808, - "grad_norm": 0.6575443148612976, - "learning_rate": 9.33382074106473e-06, - "loss": 0.1992, - "step": 11269 - }, - { - "epoch": 1.0616801300016485, - "grad_norm": 0.65425705909729, - "learning_rate": 9.332314084983834e-06, - "loss": 0.2018, - "step": 11270 - }, - { - "epoch": 1.0617743340948165, - "grad_norm": 0.7227454781532288, - "learning_rate": 9.33080744412721e-06, - "loss": 0.2277, - "step": 11271 - }, - { - "epoch": 1.0618685381879842, - "grad_norm": 0.6328867673873901, - "learning_rate": 9.329300818529215e-06, - "loss": 0.229, - "step": 11272 - }, - { - "epoch": 1.0619627422811522, - "grad_norm": 0.6115502119064331, - "learning_rate": 9.327794208224193e-06, - "loss": 0.1956, - "step": 11273 - }, - { - "epoch": 1.06205694637432, - "grad_norm": 1.076008677482605, - "learning_rate": 9.326287613246506e-06, - "loss": 0.2003, - "step": 11274 - }, - { - "epoch": 1.0621511504674879, - "grad_norm": 0.6074058413505554, - "learning_rate": 9.324781033630504e-06, - "loss": 0.1988, - "step": 11275 - }, - { - "epoch": 1.0622453545606556, - "grad_norm": 0.5642720460891724, - "learning_rate": 9.323274469410535e-06, - "loss": 0.1781, - "step": 11276 - }, - { - "epoch": 1.0623395586538236, - "grad_norm": 0.6526278257369995, - "learning_rate": 9.321767920620958e-06, - "loss": 0.216, - "step": 11277 - }, - { - "epoch": 1.0624337627469913, - "grad_norm": 0.5836901664733887, - "learning_rate": 9.32026138729612e-06, - "loss": 0.1925, - "step": 11278 - }, - { - "epoch": 1.0625279668401593, - "grad_norm": 0.6546528935432434, - "learning_rate": 9.318754869470376e-06, - "loss": 0.1974, - "step": 11279 - }, - { - "epoch": 1.062622170933327, - "grad_norm": 0.6255246996879578, - "learning_rate": 9.31724836717807e-06, - "loss": 0.2095, - "step": 11280 - }, - { - "epoch": 1.062716375026495, - "grad_norm": 0.5917826890945435, - "learning_rate": 9.315741880453562e-06, - "loss": 0.217, - "step": 11281 - }, - { - "epoch": 1.0628105791196627, - "grad_norm": 0.6791194677352905, - "learning_rate": 9.314235409331196e-06, - "loss": 0.2142, - "step": 11282 - }, - { - "epoch": 1.0629047832128307, - "grad_norm": 0.6301477551460266, - "learning_rate": 9.312728953845318e-06, - "loss": 0.2018, - "step": 11283 - }, - { - "epoch": 1.0629989873059984, - "grad_norm": 0.6922203898429871, - "learning_rate": 9.31122251403029e-06, - "loss": 0.2069, - "step": 11284 - }, - { - "epoch": 1.0630931913991664, - "grad_norm": 0.6575744152069092, - "learning_rate": 9.30971608992045e-06, - "loss": 0.1947, - "step": 11285 - }, - { - "epoch": 1.063187395492334, - "grad_norm": 0.7077332735061646, - "learning_rate": 9.308209681550151e-06, - "loss": 0.2213, - "step": 11286 - }, - { - "epoch": 1.063281599585502, - "grad_norm": 0.6976880431175232, - "learning_rate": 9.306703288953742e-06, - "loss": 0.199, - "step": 11287 - }, - { - "epoch": 1.0633758036786698, - "grad_norm": 0.6417021751403809, - "learning_rate": 9.30519691216557e-06, - "loss": 0.2011, - "step": 11288 - }, - { - "epoch": 1.0634700077718378, - "grad_norm": 0.6133342981338501, - "learning_rate": 9.303690551219983e-06, - "loss": 0.1947, - "step": 11289 - }, - { - "epoch": 1.0635642118650055, - "grad_norm": 0.5926858186721802, - "learning_rate": 9.302184206151328e-06, - "loss": 0.1868, - "step": 11290 - }, - { - "epoch": 1.0636584159581735, - "grad_norm": 0.7458446025848389, - "learning_rate": 9.300677876993954e-06, - "loss": 0.2519, - "step": 11291 - }, - { - "epoch": 1.0637526200513412, - "grad_norm": 0.6571434140205383, - "learning_rate": 9.299171563782204e-06, - "loss": 0.2131, - "step": 11292 - }, - { - "epoch": 1.0638468241445092, - "grad_norm": 0.6311631798744202, - "learning_rate": 9.297665266550425e-06, - "loss": 0.2142, - "step": 11293 - }, - { - "epoch": 1.063941028237677, - "grad_norm": 0.6098727583885193, - "learning_rate": 9.296158985332966e-06, - "loss": 0.2132, - "step": 11294 - }, - { - "epoch": 1.0640352323308448, - "grad_norm": 0.6526821851730347, - "learning_rate": 9.29465272016417e-06, - "loss": 0.1984, - "step": 11295 - }, - { - "epoch": 1.0641294364240126, - "grad_norm": 0.6677765846252441, - "learning_rate": 9.293146471078383e-06, - "loss": 0.2243, - "step": 11296 - }, - { - "epoch": 1.0642236405171805, - "grad_norm": 0.6665986776351929, - "learning_rate": 9.291640238109949e-06, - "loss": 0.2035, - "step": 11297 - }, - { - "epoch": 1.0643178446103483, - "grad_norm": 0.7022556662559509, - "learning_rate": 9.290134021293215e-06, - "loss": 0.2245, - "step": 11298 - }, - { - "epoch": 1.0644120487035162, - "grad_norm": 0.6495939493179321, - "learning_rate": 9.288627820662525e-06, - "loss": 0.1929, - "step": 11299 - }, - { - "epoch": 1.064506252796684, - "grad_norm": 0.6828340291976929, - "learning_rate": 9.287121636252214e-06, - "loss": 0.2223, - "step": 11300 - }, - { - "epoch": 1.064600456889852, - "grad_norm": 0.7064345479011536, - "learning_rate": 9.285615468096638e-06, - "loss": 0.2131, - "step": 11301 - }, - { - "epoch": 1.0646946609830197, - "grad_norm": 0.5541938543319702, - "learning_rate": 9.284109316230133e-06, - "loss": 0.1714, - "step": 11302 - }, - { - "epoch": 1.0647888650761876, - "grad_norm": 0.714510440826416, - "learning_rate": 9.282603180687037e-06, - "loss": 0.2158, - "step": 11303 - }, - { - "epoch": 1.0648830691693554, - "grad_norm": 0.7199380993843079, - "learning_rate": 9.281097061501707e-06, - "loss": 0.2386, - "step": 11304 - }, - { - "epoch": 1.0649772732625233, - "grad_norm": 0.6494677066802979, - "learning_rate": 9.279590958708472e-06, - "loss": 0.2296, - "step": 11305 - }, - { - "epoch": 1.065071477355691, - "grad_norm": 0.6022455096244812, - "learning_rate": 9.278084872341675e-06, - "loss": 0.2045, - "step": 11306 - }, - { - "epoch": 1.065165681448859, - "grad_norm": 0.6770690083503723, - "learning_rate": 9.276578802435661e-06, - "loss": 0.2057, - "step": 11307 - }, - { - "epoch": 1.0652598855420268, - "grad_norm": 0.765457034111023, - "learning_rate": 9.275072749024771e-06, - "loss": 0.2148, - "step": 11308 - }, - { - "epoch": 1.0653540896351947, - "grad_norm": 0.6799235343933105, - "learning_rate": 9.273566712143343e-06, - "loss": 0.2046, - "step": 11309 - }, - { - "epoch": 1.0654482937283625, - "grad_norm": 0.6458833813667297, - "learning_rate": 9.272060691825714e-06, - "loss": 0.1892, - "step": 11310 - }, - { - "epoch": 1.0655424978215304, - "grad_norm": 0.6030128598213196, - "learning_rate": 9.270554688106229e-06, - "loss": 0.1804, - "step": 11311 - }, - { - "epoch": 1.0656367019146982, - "grad_norm": 0.6258808374404907, - "learning_rate": 9.269048701019226e-06, - "loss": 0.1843, - "step": 11312 - }, - { - "epoch": 1.0657309060078661, - "grad_norm": 0.6587503552436829, - "learning_rate": 9.267542730599042e-06, - "loss": 0.1917, - "step": 11313 - }, - { - "epoch": 1.0658251101010339, - "grad_norm": 0.7243843078613281, - "learning_rate": 9.266036776880016e-06, - "loss": 0.2126, - "step": 11314 - }, - { - "epoch": 1.0659193141942018, - "grad_norm": 0.6364871859550476, - "learning_rate": 9.26453083989649e-06, - "loss": 0.2465, - "step": 11315 - }, - { - "epoch": 1.0660135182873696, - "grad_norm": 0.6186479926109314, - "learning_rate": 9.263024919682794e-06, - "loss": 0.2243, - "step": 11316 - }, - { - "epoch": 1.0661077223805375, - "grad_norm": 0.7175317406654358, - "learning_rate": 9.261519016273271e-06, - "loss": 0.2146, - "step": 11317 - }, - { - "epoch": 1.0662019264737053, - "grad_norm": 0.6257708072662354, - "learning_rate": 9.26001312970226e-06, - "loss": 0.1892, - "step": 11318 - }, - { - "epoch": 1.0662961305668732, - "grad_norm": 0.6223662495613098, - "learning_rate": 9.258507260004092e-06, - "loss": 0.1993, - "step": 11319 - }, - { - "epoch": 1.066390334660041, - "grad_norm": 0.6748189926147461, - "learning_rate": 9.2570014072131e-06, - "loss": 0.1911, - "step": 11320 - }, - { - "epoch": 1.066484538753209, - "grad_norm": 0.6674708724021912, - "learning_rate": 9.255495571363631e-06, - "loss": 0.1946, - "step": 11321 - }, - { - "epoch": 1.0665787428463767, - "grad_norm": 0.7132311463356018, - "learning_rate": 9.253989752490014e-06, - "loss": 0.192, - "step": 11322 - }, - { - "epoch": 1.0666729469395446, - "grad_norm": 0.6067463159561157, - "learning_rate": 9.25248395062658e-06, - "loss": 0.2067, - "step": 11323 - }, - { - "epoch": 1.0667671510327124, - "grad_norm": 0.6910226941108704, - "learning_rate": 9.250978165807672e-06, - "loss": 0.2155, - "step": 11324 - }, - { - "epoch": 1.0668613551258803, - "grad_norm": 0.6239519119262695, - "learning_rate": 9.24947239806762e-06, - "loss": 0.232, - "step": 11325 - }, - { - "epoch": 1.066955559219048, - "grad_norm": 0.5738794803619385, - "learning_rate": 9.247966647440755e-06, - "loss": 0.1773, - "step": 11326 - }, - { - "epoch": 1.067049763312216, - "grad_norm": 0.6636790633201599, - "learning_rate": 9.246460913961417e-06, - "loss": 0.2303, - "step": 11327 - }, - { - "epoch": 1.0671439674053838, - "grad_norm": 0.604157030582428, - "learning_rate": 9.244955197663934e-06, - "loss": 0.1934, - "step": 11328 - }, - { - "epoch": 1.0672381714985517, - "grad_norm": 0.6229135990142822, - "learning_rate": 9.243449498582642e-06, - "loss": 0.2159, - "step": 11329 - }, - { - "epoch": 1.0673323755917195, - "grad_norm": 0.6797246932983398, - "learning_rate": 9.241943816751868e-06, - "loss": 0.2194, - "step": 11330 - }, - { - "epoch": 1.0674265796848874, - "grad_norm": 0.5860040783882141, - "learning_rate": 9.24043815220595e-06, - "loss": 0.1858, - "step": 11331 - }, - { - "epoch": 1.0675207837780551, - "grad_norm": 0.6930223703384399, - "learning_rate": 9.238932504979217e-06, - "loss": 0.2176, - "step": 11332 - }, - { - "epoch": 1.0676149878712229, - "grad_norm": 0.6356987357139587, - "learning_rate": 9.237426875105998e-06, - "loss": 0.2453, - "step": 11333 - }, - { - "epoch": 1.0677091919643908, - "grad_norm": 0.6183640956878662, - "learning_rate": 9.235921262620625e-06, - "loss": 0.2038, - "step": 11334 - }, - { - "epoch": 1.0678033960575588, - "grad_norm": 0.6134758591651917, - "learning_rate": 9.234415667557432e-06, - "loss": 0.1974, - "step": 11335 - }, - { - "epoch": 1.0678976001507265, - "grad_norm": 0.650878369808197, - "learning_rate": 9.232910089950743e-06, - "loss": 0.1967, - "step": 11336 - }, - { - "epoch": 1.0679918042438943, - "grad_norm": 0.6640805006027222, - "learning_rate": 9.23140452983489e-06, - "loss": 0.2245, - "step": 11337 - }, - { - "epoch": 1.0680860083370622, - "grad_norm": 0.6172104477882385, - "learning_rate": 9.229898987244207e-06, - "loss": 0.1994, - "step": 11338 - }, - { - "epoch": 1.0681802124302302, - "grad_norm": 0.6730762720108032, - "learning_rate": 9.228393462213017e-06, - "loss": 0.2197, - "step": 11339 - }, - { - "epoch": 1.068274416523398, - "grad_norm": 0.6202109456062317, - "learning_rate": 9.226887954775642e-06, - "loss": 0.1859, - "step": 11340 - }, - { - "epoch": 1.0683686206165657, - "grad_norm": 0.587462842464447, - "learning_rate": 9.225382464966426e-06, - "loss": 0.1813, - "step": 11341 - }, - { - "epoch": 1.0684628247097336, - "grad_norm": 0.7010436058044434, - "learning_rate": 9.223876992819685e-06, - "loss": 0.2041, - "step": 11342 - }, - { - "epoch": 1.0685570288029016, - "grad_norm": 0.6356572508811951, - "learning_rate": 9.222371538369744e-06, - "loss": 0.2076, - "step": 11343 - }, - { - "epoch": 1.0686512328960693, - "grad_norm": 0.7032904624938965, - "learning_rate": 9.220866101650942e-06, - "loss": 0.2438, - "step": 11344 - }, - { - "epoch": 1.068745436989237, - "grad_norm": 0.5845944881439209, - "learning_rate": 9.219360682697594e-06, - "loss": 0.1816, - "step": 11345 - }, - { - "epoch": 1.068839641082405, - "grad_norm": 0.6367470622062683, - "learning_rate": 9.21785528154403e-06, - "loss": 0.2149, - "step": 11346 - }, - { - "epoch": 1.068933845175573, - "grad_norm": 0.6324123740196228, - "learning_rate": 9.216349898224575e-06, - "loss": 0.2021, - "step": 11347 - }, - { - "epoch": 1.0690280492687407, - "grad_norm": 0.6973478198051453, - "learning_rate": 9.214844532773557e-06, - "loss": 0.2401, - "step": 11348 - }, - { - "epoch": 1.0691222533619085, - "grad_norm": 0.6931522488594055, - "learning_rate": 9.213339185225294e-06, - "loss": 0.2096, - "step": 11349 - }, - { - "epoch": 1.0692164574550764, - "grad_norm": 0.6650674939155579, - "learning_rate": 9.211833855614115e-06, - "loss": 0.2045, - "step": 11350 - }, - { - "epoch": 1.0693106615482442, - "grad_norm": 0.8251972198486328, - "learning_rate": 9.210328543974346e-06, - "loss": 0.2136, - "step": 11351 - }, - { - "epoch": 1.0694048656414121, - "grad_norm": 0.6624812483787537, - "learning_rate": 9.208823250340305e-06, - "loss": 0.2299, - "step": 11352 - }, - { - "epoch": 1.0694990697345799, - "grad_norm": 0.6316957473754883, - "learning_rate": 9.207317974746314e-06, - "loss": 0.1824, - "step": 11353 - }, - { - "epoch": 1.0695932738277478, - "grad_norm": 0.8904557228088379, - "learning_rate": 9.205812717226705e-06, - "loss": 0.2263, - "step": 11354 - }, - { - "epoch": 1.0696874779209156, - "grad_norm": 0.6415508389472961, - "learning_rate": 9.204307477815792e-06, - "loss": 0.22, - "step": 11355 - }, - { - "epoch": 1.0697816820140835, - "grad_norm": 0.5946205258369446, - "learning_rate": 9.202802256547897e-06, - "loss": 0.1842, - "step": 11356 - }, - { - "epoch": 1.0698758861072513, - "grad_norm": 0.6503834128379822, - "learning_rate": 9.201297053457348e-06, - "loss": 0.2193, - "step": 11357 - }, - { - "epoch": 1.0699700902004192, - "grad_norm": 0.722682774066925, - "learning_rate": 9.19979186857846e-06, - "loss": 0.2533, - "step": 11358 - }, - { - "epoch": 1.070064294293587, - "grad_norm": 0.6491664052009583, - "learning_rate": 9.198286701945556e-06, - "loss": 0.1835, - "step": 11359 - }, - { - "epoch": 1.070158498386755, - "grad_norm": 0.623892605304718, - "learning_rate": 9.196781553592948e-06, - "loss": 0.2086, - "step": 11360 - }, - { - "epoch": 1.0702527024799227, - "grad_norm": 0.6431731581687927, - "learning_rate": 9.19527642355497e-06, - "loss": 0.1842, - "step": 11361 - }, - { - "epoch": 1.0703469065730906, - "grad_norm": 0.7300872206687927, - "learning_rate": 9.193771311865933e-06, - "loss": 0.2475, - "step": 11362 - }, - { - "epoch": 1.0704411106662584, - "grad_norm": 0.6609445214271545, - "learning_rate": 9.192266218560156e-06, - "loss": 0.2091, - "step": 11363 - }, - { - "epoch": 1.0705353147594263, - "grad_norm": 0.6846212148666382, - "learning_rate": 9.190761143671958e-06, - "loss": 0.2429, - "step": 11364 - }, - { - "epoch": 1.070629518852594, - "grad_norm": 0.7073555588722229, - "learning_rate": 9.189256087235657e-06, - "loss": 0.2215, - "step": 11365 - }, - { - "epoch": 1.070723722945762, - "grad_norm": 0.7998649477958679, - "learning_rate": 9.18775104928557e-06, - "loss": 0.1956, - "step": 11366 - }, - { - "epoch": 1.0708179270389298, - "grad_norm": 0.6570374965667725, - "learning_rate": 9.186246029856019e-06, - "loss": 0.2422, - "step": 11367 - }, - { - "epoch": 1.0709121311320977, - "grad_norm": 0.6321113109588623, - "learning_rate": 9.184741028981314e-06, - "loss": 0.2223, - "step": 11368 - }, - { - "epoch": 1.0710063352252654, - "grad_norm": 0.7635213732719421, - "learning_rate": 9.183236046695777e-06, - "loss": 0.2479, - "step": 11369 - }, - { - "epoch": 1.0711005393184334, - "grad_norm": 0.6423308253288269, - "learning_rate": 9.181731083033719e-06, - "loss": 0.1775, - "step": 11370 - }, - { - "epoch": 1.0711947434116011, - "grad_norm": 0.7300950288772583, - "learning_rate": 9.180226138029458e-06, - "loss": 0.1821, - "step": 11371 - }, - { - "epoch": 1.071288947504769, - "grad_norm": 0.670215368270874, - "learning_rate": 9.17872121171731e-06, - "loss": 0.1956, - "step": 11372 - }, - { - "epoch": 1.0713831515979368, - "grad_norm": 0.6570760011672974, - "learning_rate": 9.177216304131586e-06, - "loss": 0.2121, - "step": 11373 - }, - { - "epoch": 1.0714773556911048, - "grad_norm": 0.6348326206207275, - "learning_rate": 9.175711415306604e-06, - "loss": 0.2124, - "step": 11374 - }, - { - "epoch": 1.0715715597842725, - "grad_norm": 0.6312444806098938, - "learning_rate": 9.174206545276678e-06, - "loss": 0.2139, - "step": 11375 - }, - { - "epoch": 1.0716657638774405, - "grad_norm": 0.6658427715301514, - "learning_rate": 9.172701694076118e-06, - "loss": 0.2017, - "step": 11376 - }, - { - "epoch": 1.0717599679706082, - "grad_norm": 0.5940864682197571, - "learning_rate": 9.17119686173924e-06, - "loss": 0.2113, - "step": 11377 - }, - { - "epoch": 1.0718541720637762, - "grad_norm": 0.6423086524009705, - "learning_rate": 9.169692048300357e-06, - "loss": 0.2081, - "step": 11378 - }, - { - "epoch": 1.071948376156944, - "grad_norm": 0.5828261971473694, - "learning_rate": 9.168187253793779e-06, - "loss": 0.196, - "step": 11379 - }, - { - "epoch": 1.072042580250112, - "grad_norm": 0.691644012928009, - "learning_rate": 9.166682478253812e-06, - "loss": 0.2051, - "step": 11380 - }, - { - "epoch": 1.0721367843432796, - "grad_norm": 0.6989017128944397, - "learning_rate": 9.16517772171478e-06, - "loss": 0.2251, - "step": 11381 - }, - { - "epoch": 1.0722309884364476, - "grad_norm": 0.6330791711807251, - "learning_rate": 9.163672984210985e-06, - "loss": 0.1756, - "step": 11382 - }, - { - "epoch": 1.0723251925296153, - "grad_norm": 0.659974992275238, - "learning_rate": 9.162168265776739e-06, - "loss": 0.2001, - "step": 11383 - }, - { - "epoch": 1.0724193966227833, - "grad_norm": 0.5928322672843933, - "learning_rate": 9.160663566446352e-06, - "loss": 0.2088, - "step": 11384 - }, - { - "epoch": 1.072513600715951, - "grad_norm": 0.7007295489311218, - "learning_rate": 9.159158886254134e-06, - "loss": 0.1932, - "step": 11385 - }, - { - "epoch": 1.072607804809119, - "grad_norm": 0.7131476998329163, - "learning_rate": 9.157654225234392e-06, - "loss": 0.2272, - "step": 11386 - }, - { - "epoch": 1.0727020089022867, - "grad_norm": 0.6879523396492004, - "learning_rate": 9.15614958342144e-06, - "loss": 0.2103, - "step": 11387 - }, - { - "epoch": 1.0727962129954547, - "grad_norm": 0.6187193989753723, - "learning_rate": 9.154644960849582e-06, - "loss": 0.2147, - "step": 11388 - }, - { - "epoch": 1.0728904170886224, - "grad_norm": 0.6488523483276367, - "learning_rate": 9.153140357553124e-06, - "loss": 0.2265, - "step": 11389 - }, - { - "epoch": 1.0729846211817904, - "grad_norm": 0.6526038646697998, - "learning_rate": 9.151635773566376e-06, - "loss": 0.2202, - "step": 11390 - }, - { - "epoch": 1.0730788252749581, - "grad_norm": 0.7171820402145386, - "learning_rate": 9.150131208923645e-06, - "loss": 0.2127, - "step": 11391 - }, - { - "epoch": 1.073173029368126, - "grad_norm": 0.6093924641609192, - "learning_rate": 9.148626663659237e-06, - "loss": 0.1775, - "step": 11392 - }, - { - "epoch": 1.0732672334612938, - "grad_norm": 0.6443316340446472, - "learning_rate": 9.147122137807456e-06, - "loss": 0.2021, - "step": 11393 - }, - { - "epoch": 1.0733614375544618, - "grad_norm": 0.6613069772720337, - "learning_rate": 9.145617631402612e-06, - "loss": 0.216, - "step": 11394 - }, - { - "epoch": 1.0734556416476295, - "grad_norm": 0.6950787305831909, - "learning_rate": 9.144113144479006e-06, - "loss": 0.223, - "step": 11395 - }, - { - "epoch": 1.0735498457407975, - "grad_norm": 0.6569063663482666, - "learning_rate": 9.142608677070943e-06, - "loss": 0.2013, - "step": 11396 - }, - { - "epoch": 1.0736440498339652, - "grad_norm": 0.6433458924293518, - "learning_rate": 9.14110422921273e-06, - "loss": 0.2061, - "step": 11397 - }, - { - "epoch": 1.0737382539271332, - "grad_norm": 0.6032097935676575, - "learning_rate": 9.13959980093867e-06, - "loss": 0.2157, - "step": 11398 - }, - { - "epoch": 1.073832458020301, - "grad_norm": 0.6845543384552002, - "learning_rate": 9.138095392283063e-06, - "loss": 0.2149, - "step": 11399 - }, - { - "epoch": 1.0739266621134689, - "grad_norm": 0.7228466868400574, - "learning_rate": 9.13659100328021e-06, - "loss": 0.2223, - "step": 11400 - }, - { - "epoch": 1.0740208662066366, - "grad_norm": 0.6130920648574829, - "learning_rate": 9.135086633964427e-06, - "loss": 0.198, - "step": 11401 - }, - { - "epoch": 1.0741150702998046, - "grad_norm": 0.6409655809402466, - "learning_rate": 9.13358228437e-06, - "loss": 0.1913, - "step": 11402 - }, - { - "epoch": 1.0742092743929723, - "grad_norm": 0.6924681663513184, - "learning_rate": 9.132077954531236e-06, - "loss": 0.1838, - "step": 11403 - }, - { - "epoch": 1.0743034784861403, - "grad_norm": 0.6371403336524963, - "learning_rate": 9.13057364448244e-06, - "loss": 0.1925, - "step": 11404 - }, - { - "epoch": 1.074397682579308, - "grad_norm": 0.6719380021095276, - "learning_rate": 9.129069354257909e-06, - "loss": 0.2164, - "step": 11405 - }, - { - "epoch": 1.074491886672476, - "grad_norm": 0.7450310587882996, - "learning_rate": 9.127565083891942e-06, - "loss": 0.2039, - "step": 11406 - }, - { - "epoch": 1.0745860907656437, - "grad_norm": 0.6084794402122498, - "learning_rate": 9.12606083341884e-06, - "loss": 0.2046, - "step": 11407 - }, - { - "epoch": 1.0746802948588117, - "grad_norm": 0.6674574613571167, - "learning_rate": 9.124556602872905e-06, - "loss": 0.2092, - "step": 11408 - }, - { - "epoch": 1.0747744989519794, - "grad_norm": 0.6063975691795349, - "learning_rate": 9.123052392288433e-06, - "loss": 0.1961, - "step": 11409 - }, - { - "epoch": 1.0748687030451474, - "grad_norm": 0.6519051194190979, - "learning_rate": 9.121548201699721e-06, - "loss": 0.2143, - "step": 11410 - }, - { - "epoch": 1.074962907138315, - "grad_norm": 0.6986181735992432, - "learning_rate": 9.12004403114107e-06, - "loss": 0.2124, - "step": 11411 - }, - { - "epoch": 1.075057111231483, - "grad_norm": 0.7183970808982849, - "learning_rate": 9.118539880646775e-06, - "loss": 0.2766, - "step": 11412 - }, - { - "epoch": 1.0751513153246508, - "grad_norm": 0.6105647087097168, - "learning_rate": 9.117035750251134e-06, - "loss": 0.2162, - "step": 11413 - }, - { - "epoch": 1.0752455194178188, - "grad_norm": 0.6653372645378113, - "learning_rate": 9.115531639988443e-06, - "loss": 0.185, - "step": 11414 - }, - { - "epoch": 1.0753397235109865, - "grad_norm": 0.650631844997406, - "learning_rate": 9.114027549893e-06, - "loss": 0.2188, - "step": 11415 - }, - { - "epoch": 1.0754339276041545, - "grad_norm": 0.6427672505378723, - "learning_rate": 9.112523479999096e-06, - "loss": 0.1923, - "step": 11416 - }, - { - "epoch": 1.0755281316973222, - "grad_norm": 0.7197262048721313, - "learning_rate": 9.111019430341033e-06, - "loss": 0.2176, - "step": 11417 - }, - { - "epoch": 1.0756223357904902, - "grad_norm": 0.746242344379425, - "learning_rate": 9.109515400953102e-06, - "loss": 0.2164, - "step": 11418 - }, - { - "epoch": 1.075716539883658, - "grad_norm": 0.6563817262649536, - "learning_rate": 9.108011391869596e-06, - "loss": 0.1778, - "step": 11419 - }, - { - "epoch": 1.0758107439768259, - "grad_norm": 0.6394875049591064, - "learning_rate": 9.106507403124805e-06, - "loss": 0.2326, - "step": 11420 - }, - { - "epoch": 1.0759049480699936, - "grad_norm": 0.633727490901947, - "learning_rate": 9.105003434753035e-06, - "loss": 0.1941, - "step": 11421 - }, - { - "epoch": 1.0759991521631616, - "grad_norm": 0.6746935844421387, - "learning_rate": 9.103499486788567e-06, - "loss": 0.2017, - "step": 11422 - }, - { - "epoch": 1.0760933562563293, - "grad_norm": 0.7106413841247559, - "learning_rate": 9.101995559265696e-06, - "loss": 0.2331, - "step": 11423 - }, - { - "epoch": 1.0761875603494973, - "grad_norm": 0.6105821132659912, - "learning_rate": 9.100491652218716e-06, - "loss": 0.1998, - "step": 11424 - }, - { - "epoch": 1.076281764442665, - "grad_norm": 0.6974877715110779, - "learning_rate": 9.098987765681917e-06, - "loss": 0.2107, - "step": 11425 - }, - { - "epoch": 1.076375968535833, - "grad_norm": 0.6682244539260864, - "learning_rate": 9.09748389968959e-06, - "loss": 0.1996, - "step": 11426 - }, - { - "epoch": 1.0764701726290007, - "grad_norm": 0.6583214998245239, - "learning_rate": 9.095980054276027e-06, - "loss": 0.2036, - "step": 11427 - }, - { - "epoch": 1.0765643767221686, - "grad_norm": 0.6335369348526001, - "learning_rate": 9.094476229475517e-06, - "loss": 0.2029, - "step": 11428 - }, - { - "epoch": 1.0766585808153364, - "grad_norm": 0.6718069314956665, - "learning_rate": 9.09297242532235e-06, - "loss": 0.2091, - "step": 11429 - }, - { - "epoch": 1.0767527849085043, - "grad_norm": 0.5730756521224976, - "learning_rate": 9.091468641850812e-06, - "loss": 0.1944, - "step": 11430 - }, - { - "epoch": 1.076846989001672, - "grad_norm": 0.6275970339775085, - "learning_rate": 9.089964879095197e-06, - "loss": 0.217, - "step": 11431 - }, - { - "epoch": 1.07694119309484, - "grad_norm": 0.6263253092765808, - "learning_rate": 9.088461137089788e-06, - "loss": 0.2075, - "step": 11432 - }, - { - "epoch": 1.0770353971880078, - "grad_norm": 0.5872949361801147, - "learning_rate": 9.086957415868874e-06, - "loss": 0.2032, - "step": 11433 - }, - { - "epoch": 1.0771296012811757, - "grad_norm": 0.6495922803878784, - "learning_rate": 9.085453715466746e-06, - "loss": 0.2209, - "step": 11434 - }, - { - "epoch": 1.0772238053743435, - "grad_norm": 0.5955852270126343, - "learning_rate": 9.083950035917688e-06, - "loss": 0.1929, - "step": 11435 - }, - { - "epoch": 1.0773180094675114, - "grad_norm": 0.7622807025909424, - "learning_rate": 9.08244637725598e-06, - "loss": 0.1952, - "step": 11436 - }, - { - "epoch": 1.0774122135606792, - "grad_norm": 0.6685764789581299, - "learning_rate": 9.080942739515917e-06, - "loss": 0.1969, - "step": 11437 - }, - { - "epoch": 1.0775064176538471, - "grad_norm": 0.6288415193557739, - "learning_rate": 9.079439122731786e-06, - "loss": 0.2077, - "step": 11438 - }, - { - "epoch": 1.0776006217470149, - "grad_norm": 0.6691920757293701, - "learning_rate": 9.077935526937862e-06, - "loss": 0.207, - "step": 11439 - }, - { - "epoch": 1.0776948258401828, - "grad_norm": 0.6530357003211975, - "learning_rate": 9.076431952168432e-06, - "loss": 0.1769, - "step": 11440 - }, - { - "epoch": 1.0777890299333506, - "grad_norm": 0.6887516975402832, - "learning_rate": 9.074928398457785e-06, - "loss": 0.1851, - "step": 11441 - }, - { - "epoch": 1.0778832340265185, - "grad_norm": 0.6515269875526428, - "learning_rate": 9.073424865840202e-06, - "loss": 0.2113, - "step": 11442 - }, - { - "epoch": 1.0779774381196863, - "grad_norm": 0.6228629350662231, - "learning_rate": 9.071921354349961e-06, - "loss": 0.2162, - "step": 11443 - }, - { - "epoch": 1.0780716422128542, - "grad_norm": 0.5888656973838806, - "learning_rate": 9.07041786402135e-06, - "loss": 0.1957, - "step": 11444 - }, - { - "epoch": 1.078165846306022, - "grad_norm": 0.67299485206604, - "learning_rate": 9.068914394888651e-06, - "loss": 0.1915, - "step": 11445 - }, - { - "epoch": 1.07826005039919, - "grad_norm": 0.6621066927909851, - "learning_rate": 9.06741094698614e-06, - "loss": 0.202, - "step": 11446 - }, - { - "epoch": 1.0783542544923577, - "grad_norm": 0.6297824382781982, - "learning_rate": 9.065907520348104e-06, - "loss": 0.1877, - "step": 11447 - }, - { - "epoch": 1.0784484585855256, - "grad_norm": 0.6334999203681946, - "learning_rate": 9.064404115008824e-06, - "loss": 0.2458, - "step": 11448 - }, - { - "epoch": 1.0785426626786934, - "grad_norm": 0.5947049856185913, - "learning_rate": 9.062900731002575e-06, - "loss": 0.1841, - "step": 11449 - }, - { - "epoch": 1.0786368667718613, - "grad_norm": 0.6204099059104919, - "learning_rate": 9.061397368363635e-06, - "loss": 0.2255, - "step": 11450 - }, - { - "epoch": 1.078731070865029, - "grad_norm": 0.6177732348442078, - "learning_rate": 9.05989402712629e-06, - "loss": 0.2146, - "step": 11451 - }, - { - "epoch": 1.078825274958197, - "grad_norm": 0.7066769599914551, - "learning_rate": 9.058390707324817e-06, - "loss": 0.2223, - "step": 11452 - }, - { - "epoch": 1.0789194790513648, - "grad_norm": 0.6135818958282471, - "learning_rate": 9.056887408993488e-06, - "loss": 0.2093, - "step": 11453 - }, - { - "epoch": 1.0790136831445327, - "grad_norm": 0.5583428144454956, - "learning_rate": 9.055384132166587e-06, - "loss": 0.2067, - "step": 11454 - }, - { - "epoch": 1.0791078872377005, - "grad_norm": 0.7905045747756958, - "learning_rate": 9.053880876878392e-06, - "loss": 0.1852, - "step": 11455 - }, - { - "epoch": 1.0792020913308684, - "grad_norm": 0.7285098433494568, - "learning_rate": 9.052377643163168e-06, - "loss": 0.2386, - "step": 11456 - }, - { - "epoch": 1.0792962954240362, - "grad_norm": 0.6515780091285706, - "learning_rate": 9.050874431055205e-06, - "loss": 0.2123, - "step": 11457 - }, - { - "epoch": 1.0793904995172041, - "grad_norm": 0.6538022756576538, - "learning_rate": 9.049371240588774e-06, - "loss": 0.1863, - "step": 11458 - }, - { - "epoch": 1.0794847036103719, - "grad_norm": 0.6690369844436646, - "learning_rate": 9.047868071798146e-06, - "loss": 0.2184, - "step": 11459 - }, - { - "epoch": 1.0795789077035398, - "grad_norm": 0.6839024424552917, - "learning_rate": 9.046364924717598e-06, - "loss": 0.2219, - "step": 11460 - }, - { - "epoch": 1.0796731117967076, - "grad_norm": 0.6652877330780029, - "learning_rate": 9.044861799381407e-06, - "loss": 0.2138, - "step": 11461 - }, - { - "epoch": 1.0797673158898755, - "grad_norm": 0.6577285528182983, - "learning_rate": 9.043358695823841e-06, - "loss": 0.2233, - "step": 11462 - }, - { - "epoch": 1.0798615199830432, - "grad_norm": 0.724801242351532, - "learning_rate": 9.041855614079177e-06, - "loss": 0.2355, - "step": 11463 - }, - { - "epoch": 1.0799557240762112, - "grad_norm": 0.5934066772460938, - "learning_rate": 9.040352554181688e-06, - "loss": 0.174, - "step": 11464 - }, - { - "epoch": 1.080049928169379, - "grad_norm": 0.7215653657913208, - "learning_rate": 9.038849516165643e-06, - "loss": 0.2055, - "step": 11465 - }, - { - "epoch": 1.080144132262547, - "grad_norm": 0.6846879720687866, - "learning_rate": 9.037346500065318e-06, - "loss": 0.2189, - "step": 11466 - }, - { - "epoch": 1.0802383363557146, - "grad_norm": 0.6117352247238159, - "learning_rate": 9.03584350591498e-06, - "loss": 0.2015, - "step": 11467 - }, - { - "epoch": 1.0803325404488826, - "grad_norm": 0.6313402652740479, - "learning_rate": 9.034340533748901e-06, - "loss": 0.198, - "step": 11468 - }, - { - "epoch": 1.0804267445420503, - "grad_norm": 0.6113153100013733, - "learning_rate": 9.032837583601354e-06, - "loss": 0.2173, - "step": 11469 - }, - { - "epoch": 1.0805209486352183, - "grad_norm": 0.6613195538520813, - "learning_rate": 9.031334655506601e-06, - "loss": 0.2144, - "step": 11470 - }, - { - "epoch": 1.080615152728386, - "grad_norm": 0.661891520023346, - "learning_rate": 9.02983174949892e-06, - "loss": 0.1997, - "step": 11471 - }, - { - "epoch": 1.0807093568215538, - "grad_norm": 0.6959601044654846, - "learning_rate": 9.028328865612574e-06, - "loss": 0.2257, - "step": 11472 - }, - { - "epoch": 1.0808035609147217, - "grad_norm": 0.701431930065155, - "learning_rate": 9.026826003881831e-06, - "loss": 0.2089, - "step": 11473 - }, - { - "epoch": 1.0808977650078897, - "grad_norm": 0.6624088287353516, - "learning_rate": 9.025323164340962e-06, - "loss": 0.1993, - "step": 11474 - }, - { - "epoch": 1.0809919691010574, - "grad_norm": 0.684364378452301, - "learning_rate": 9.023820347024236e-06, - "loss": 0.1855, - "step": 11475 - }, - { - "epoch": 1.0810861731942252, - "grad_norm": 0.6876487135887146, - "learning_rate": 9.022317551965908e-06, - "loss": 0.2242, - "step": 11476 - }, - { - "epoch": 1.0811803772873931, - "grad_norm": 0.5800148248672485, - "learning_rate": 9.020814779200255e-06, - "loss": 0.1951, - "step": 11477 - }, - { - "epoch": 1.081274581380561, - "grad_norm": 0.8090377449989319, - "learning_rate": 9.019312028761544e-06, - "loss": 0.2272, - "step": 11478 - }, - { - "epoch": 1.0813687854737288, - "grad_norm": 0.6785503625869751, - "learning_rate": 9.017809300684031e-06, - "loss": 0.2314, - "step": 11479 - }, - { - "epoch": 1.0814629895668966, - "grad_norm": 0.7226163744926453, - "learning_rate": 9.016306595001985e-06, - "loss": 0.2475, - "step": 11480 - }, - { - "epoch": 1.0815571936600645, - "grad_norm": 0.6443579196929932, - "learning_rate": 9.01480391174967e-06, - "loss": 0.1798, - "step": 11481 - }, - { - "epoch": 1.0816513977532325, - "grad_norm": 0.6113689541816711, - "learning_rate": 9.013301250961351e-06, - "loss": 0.185, - "step": 11482 - }, - { - "epoch": 1.0817456018464002, - "grad_norm": 0.6521504521369934, - "learning_rate": 9.011798612671286e-06, - "loss": 0.1943, - "step": 11483 - }, - { - "epoch": 1.081839805939568, - "grad_norm": 0.6238689422607422, - "learning_rate": 9.010295996913744e-06, - "loss": 0.2107, - "step": 11484 - }, - { - "epoch": 1.081934010032736, - "grad_norm": 0.6215823888778687, - "learning_rate": 9.008793403722984e-06, - "loss": 0.2387, - "step": 11485 - }, - { - "epoch": 1.0820282141259039, - "grad_norm": 0.5636431574821472, - "learning_rate": 9.007290833133264e-06, - "loss": 0.1883, - "step": 11486 - }, - { - "epoch": 1.0821224182190716, - "grad_norm": 0.6293860077857971, - "learning_rate": 9.005788285178851e-06, - "loss": 0.2032, - "step": 11487 - }, - { - "epoch": 1.0822166223122394, - "grad_norm": 0.6379905343055725, - "learning_rate": 9.004285759894004e-06, - "loss": 0.211, - "step": 11488 - }, - { - "epoch": 1.0823108264054073, - "grad_norm": 0.6105638742446899, - "learning_rate": 9.00278325731298e-06, - "loss": 0.1981, - "step": 11489 - }, - { - "epoch": 1.082405030498575, - "grad_norm": 0.6221534609794617, - "learning_rate": 9.001280777470038e-06, - "loss": 0.1923, - "step": 11490 - }, - { - "epoch": 1.082499234591743, - "grad_norm": 0.6618968844413757, - "learning_rate": 8.999778320399441e-06, - "loss": 0.1764, - "step": 11491 - }, - { - "epoch": 1.0825934386849108, - "grad_norm": 0.6597989201545715, - "learning_rate": 8.998275886135446e-06, - "loss": 0.2261, - "step": 11492 - }, - { - "epoch": 1.0826876427780787, - "grad_norm": 0.6791762709617615, - "learning_rate": 8.996773474712307e-06, - "loss": 0.234, - "step": 11493 - }, - { - "epoch": 1.0827818468712465, - "grad_norm": 0.5946647524833679, - "learning_rate": 8.995271086164287e-06, - "loss": 0.1826, - "step": 11494 - }, - { - "epoch": 1.0828760509644144, - "grad_norm": 0.5931686162948608, - "learning_rate": 8.993768720525642e-06, - "loss": 0.202, - "step": 11495 - }, - { - "epoch": 1.0829702550575822, - "grad_norm": 0.6495991945266724, - "learning_rate": 8.992266377830619e-06, - "loss": 0.2053, - "step": 11496 - }, - { - "epoch": 1.0830644591507501, - "grad_norm": 0.6207659840583801, - "learning_rate": 8.990764058113486e-06, - "loss": 0.1994, - "step": 11497 - }, - { - "epoch": 1.0831586632439179, - "grad_norm": 0.6050601601600647, - "learning_rate": 8.989261761408496e-06, - "loss": 0.196, - "step": 11498 - }, - { - "epoch": 1.0832528673370858, - "grad_norm": 0.6067149639129639, - "learning_rate": 8.9877594877499e-06, - "loss": 0.2223, - "step": 11499 - }, - { - "epoch": 1.0833470714302535, - "grad_norm": 0.6345522403717041, - "learning_rate": 8.986257237171947e-06, - "loss": 0.2142, - "step": 11500 - }, - { - "epoch": 1.0834412755234215, - "grad_norm": 0.682036817073822, - "learning_rate": 8.984755009708903e-06, - "loss": 0.2266, - "step": 11501 - }, - { - "epoch": 1.0835354796165892, - "grad_norm": 0.6006115674972534, - "learning_rate": 8.983252805395011e-06, - "loss": 0.1968, - "step": 11502 - }, - { - "epoch": 1.0836296837097572, - "grad_norm": 0.6100013852119446, - "learning_rate": 8.98175062426453e-06, - "loss": 0.223, - "step": 11503 - }, - { - "epoch": 1.083723887802925, - "grad_norm": 0.6218665838241577, - "learning_rate": 8.980248466351708e-06, - "loss": 0.201, - "step": 11504 - }, - { - "epoch": 1.083818091896093, - "grad_norm": 0.7070738077163696, - "learning_rate": 8.978746331690799e-06, - "loss": 0.2226, - "step": 11505 - }, - { - "epoch": 1.0839122959892606, - "grad_norm": 0.5810721516609192, - "learning_rate": 8.977244220316051e-06, - "loss": 0.2086, - "step": 11506 - }, - { - "epoch": 1.0840065000824286, - "grad_norm": 0.6965169310569763, - "learning_rate": 8.975742132261719e-06, - "loss": 0.23, - "step": 11507 - }, - { - "epoch": 1.0841007041755963, - "grad_norm": 0.7126672863960266, - "learning_rate": 8.97424006756205e-06, - "loss": 0.2139, - "step": 11508 - }, - { - "epoch": 1.0841949082687643, - "grad_norm": 0.7475795745849609, - "learning_rate": 8.972738026251295e-06, - "loss": 0.1893, - "step": 11509 - }, - { - "epoch": 1.084289112361932, - "grad_norm": 0.6632144451141357, - "learning_rate": 8.971236008363698e-06, - "loss": 0.2225, - "step": 11510 - }, - { - "epoch": 1.0843833164551, - "grad_norm": 0.6255399584770203, - "learning_rate": 8.969734013933514e-06, - "loss": 0.2064, - "step": 11511 - }, - { - "epoch": 1.0844775205482677, - "grad_norm": 0.7544078230857849, - "learning_rate": 8.968232042994988e-06, - "loss": 0.2123, - "step": 11512 - }, - { - "epoch": 1.0845717246414357, - "grad_norm": 0.7256659269332886, - "learning_rate": 8.966730095582363e-06, - "loss": 0.1864, - "step": 11513 - }, - { - "epoch": 1.0846659287346034, - "grad_norm": 0.6728140711784363, - "learning_rate": 8.965228171729894e-06, - "loss": 0.2011, - "step": 11514 - }, - { - "epoch": 1.0847601328277714, - "grad_norm": 0.688495397567749, - "learning_rate": 8.963726271471825e-06, - "loss": 0.2061, - "step": 11515 - }, - { - "epoch": 1.0848543369209391, - "grad_norm": 0.6378021240234375, - "learning_rate": 8.962224394842393e-06, - "loss": 0.2243, - "step": 11516 - }, - { - "epoch": 1.084948541014107, - "grad_norm": 0.6383763551712036, - "learning_rate": 8.960722541875854e-06, - "loss": 0.193, - "step": 11517 - }, - { - "epoch": 1.0850427451072748, - "grad_norm": 0.6939751505851746, - "learning_rate": 8.95922071260645e-06, - "loss": 0.2142, - "step": 11518 - }, - { - "epoch": 1.0851369492004428, - "grad_norm": 0.7284465432167053, - "learning_rate": 8.957718907068422e-06, - "loss": 0.2245, - "step": 11519 - }, - { - "epoch": 1.0852311532936105, - "grad_norm": 0.618031919002533, - "learning_rate": 8.956217125296011e-06, - "loss": 0.2071, - "step": 11520 - }, - { - "epoch": 1.0853253573867785, - "grad_norm": 0.5998331308364868, - "learning_rate": 8.954715367323468e-06, - "loss": 0.1908, - "step": 11521 - }, - { - "epoch": 1.0854195614799462, - "grad_norm": 0.6736962795257568, - "learning_rate": 8.95321363318503e-06, - "loss": 0.221, - "step": 11522 - }, - { - "epoch": 1.0855137655731142, - "grad_norm": 0.6591629385948181, - "learning_rate": 8.951711922914937e-06, - "loss": 0.1875, - "step": 11523 - }, - { - "epoch": 1.085607969666282, - "grad_norm": 0.5983251929283142, - "learning_rate": 8.950210236547437e-06, - "loss": 0.1992, - "step": 11524 - }, - { - "epoch": 1.0857021737594499, - "grad_norm": 0.6645702719688416, - "learning_rate": 8.948708574116768e-06, - "loss": 0.2453, - "step": 11525 - }, - { - "epoch": 1.0857963778526176, - "grad_norm": 0.6778318285942078, - "learning_rate": 8.947206935657165e-06, - "loss": 0.1903, - "step": 11526 - }, - { - "epoch": 1.0858905819457856, - "grad_norm": 0.6595224142074585, - "learning_rate": 8.945705321202876e-06, - "loss": 0.1976, - "step": 11527 - }, - { - "epoch": 1.0859847860389533, - "grad_norm": 0.6204866766929626, - "learning_rate": 8.944203730788134e-06, - "loss": 0.2079, - "step": 11528 - }, - { - "epoch": 1.0860789901321213, - "grad_norm": 0.6215004920959473, - "learning_rate": 8.942702164447181e-06, - "loss": 0.1862, - "step": 11529 - }, - { - "epoch": 1.086173194225289, - "grad_norm": 0.6346989274024963, - "learning_rate": 8.941200622214254e-06, - "loss": 0.2117, - "step": 11530 - }, - { - "epoch": 1.086267398318457, - "grad_norm": 0.6370419263839722, - "learning_rate": 8.93969910412359e-06, - "loss": 0.2039, - "step": 11531 - }, - { - "epoch": 1.0863616024116247, - "grad_norm": 0.5927845239639282, - "learning_rate": 8.938197610209429e-06, - "loss": 0.197, - "step": 11532 - }, - { - "epoch": 1.0864558065047927, - "grad_norm": 0.6324030160903931, - "learning_rate": 8.936696140505997e-06, - "loss": 0.2092, - "step": 11533 - }, - { - "epoch": 1.0865500105979604, - "grad_norm": 0.6234959959983826, - "learning_rate": 8.935194695047543e-06, - "loss": 0.2032, - "step": 11534 - }, - { - "epoch": 1.0866442146911284, - "grad_norm": 0.6283422112464905, - "learning_rate": 8.933693273868298e-06, - "loss": 0.1923, - "step": 11535 - }, - { - "epoch": 1.0867384187842961, - "grad_norm": 0.6398184895515442, - "learning_rate": 8.93219187700249e-06, - "loss": 0.2109, - "step": 11536 - }, - { - "epoch": 1.086832622877464, - "grad_norm": 0.7055505514144897, - "learning_rate": 8.930690504484362e-06, - "loss": 0.2371, - "step": 11537 - }, - { - "epoch": 1.0869268269706318, - "grad_norm": 0.6347362399101257, - "learning_rate": 8.929189156348146e-06, - "loss": 0.2087, - "step": 11538 - }, - { - "epoch": 1.0870210310637998, - "grad_norm": 0.6415491700172424, - "learning_rate": 8.927687832628072e-06, - "loss": 0.227, - "step": 11539 - }, - { - "epoch": 1.0871152351569675, - "grad_norm": 0.6096305847167969, - "learning_rate": 8.92618653335837e-06, - "loss": 0.1748, - "step": 11540 - }, - { - "epoch": 1.0872094392501355, - "grad_norm": 0.6415346264839172, - "learning_rate": 8.92468525857328e-06, - "loss": 0.199, - "step": 11541 - }, - { - "epoch": 1.0873036433433032, - "grad_norm": 0.6052883267402649, - "learning_rate": 8.923184008307029e-06, - "loss": 0.1776, - "step": 11542 - }, - { - "epoch": 1.0873978474364712, - "grad_norm": 0.6329769492149353, - "learning_rate": 8.921682782593844e-06, - "loss": 0.1992, - "step": 11543 - }, - { - "epoch": 1.087492051529639, - "grad_norm": 0.6819673180580139, - "learning_rate": 8.920181581467963e-06, - "loss": 0.2024, - "step": 11544 - }, - { - "epoch": 1.0875862556228069, - "grad_norm": 0.6423011422157288, - "learning_rate": 8.918680404963613e-06, - "loss": 0.2095, - "step": 11545 - }, - { - "epoch": 1.0876804597159746, - "grad_norm": 0.5891927480697632, - "learning_rate": 8.917179253115018e-06, - "loss": 0.2009, - "step": 11546 - }, - { - "epoch": 1.0877746638091426, - "grad_norm": 0.6175965666770935, - "learning_rate": 8.915678125956411e-06, - "loss": 0.2024, - "step": 11547 - }, - { - "epoch": 1.0878688679023103, - "grad_norm": 0.623192548751831, - "learning_rate": 8.914177023522022e-06, - "loss": 0.2193, - "step": 11548 - }, - { - "epoch": 1.0879630719954783, - "grad_norm": 0.6816789507865906, - "learning_rate": 8.912675945846076e-06, - "loss": 0.1949, - "step": 11549 - }, - { - "epoch": 1.088057276088646, - "grad_norm": 0.5938009023666382, - "learning_rate": 8.911174892962798e-06, - "loss": 0.1997, - "step": 11550 - }, - { - "epoch": 1.088151480181814, - "grad_norm": 0.6361944079399109, - "learning_rate": 8.909673864906418e-06, - "loss": 0.1786, - "step": 11551 - }, - { - "epoch": 1.0882456842749817, - "grad_norm": 0.6905481219291687, - "learning_rate": 8.908172861711165e-06, - "loss": 0.197, - "step": 11552 - }, - { - "epoch": 1.0883398883681497, - "grad_norm": 0.6696801781654358, - "learning_rate": 8.906671883411248e-06, - "loss": 0.2219, - "step": 11553 - }, - { - "epoch": 1.0884340924613174, - "grad_norm": 0.6810381412506104, - "learning_rate": 8.905170930040911e-06, - "loss": 0.2015, - "step": 11554 - }, - { - "epoch": 1.0885282965544854, - "grad_norm": 0.6643819808959961, - "learning_rate": 8.90367000163437e-06, - "loss": 0.2155, - "step": 11555 - }, - { - "epoch": 1.088622500647653, - "grad_norm": 0.800377607345581, - "learning_rate": 8.902169098225843e-06, - "loss": 0.2062, - "step": 11556 - }, - { - "epoch": 1.088716704740821, - "grad_norm": 0.6669199466705322, - "learning_rate": 8.900668219849562e-06, - "loss": 0.2056, - "step": 11557 - }, - { - "epoch": 1.0888109088339888, - "grad_norm": 0.6585336327552795, - "learning_rate": 8.899167366539748e-06, - "loss": 0.2203, - "step": 11558 - }, - { - "epoch": 1.0889051129271567, - "grad_norm": 0.6978430151939392, - "learning_rate": 8.897666538330619e-06, - "loss": 0.1806, - "step": 11559 - }, - { - "epoch": 1.0889993170203245, - "grad_norm": 0.6860094666481018, - "learning_rate": 8.896165735256396e-06, - "loss": 0.2212, - "step": 11560 - }, - { - "epoch": 1.0890935211134924, - "grad_norm": 0.6172075867652893, - "learning_rate": 8.894664957351302e-06, - "loss": 0.2004, - "step": 11561 - }, - { - "epoch": 1.0891877252066602, - "grad_norm": 0.6051881909370422, - "learning_rate": 8.893164204649557e-06, - "loss": 0.222, - "step": 11562 - }, - { - "epoch": 1.0892819292998281, - "grad_norm": 0.6277562975883484, - "learning_rate": 8.891663477185378e-06, - "loss": 0.1965, - "step": 11563 - }, - { - "epoch": 1.0893761333929959, - "grad_norm": 0.6940679550170898, - "learning_rate": 8.890162774992988e-06, - "loss": 0.2307, - "step": 11564 - }, - { - "epoch": 1.0894703374861638, - "grad_norm": 0.75794517993927, - "learning_rate": 8.888662098106603e-06, - "loss": 0.2627, - "step": 11565 - }, - { - "epoch": 1.0895645415793316, - "grad_norm": 0.6346994638442993, - "learning_rate": 8.887161446560439e-06, - "loss": 0.1979, - "step": 11566 - }, - { - "epoch": 1.0896587456724995, - "grad_norm": 0.6490880250930786, - "learning_rate": 8.885660820388717e-06, - "loss": 0.2133, - "step": 11567 - }, - { - "epoch": 1.0897529497656673, - "grad_norm": 0.6442879438400269, - "learning_rate": 8.884160219625651e-06, - "loss": 0.2047, - "step": 11568 - }, - { - "epoch": 1.0898471538588352, - "grad_norm": 0.644882321357727, - "learning_rate": 8.882659644305457e-06, - "loss": 0.2047, - "step": 11569 - }, - { - "epoch": 1.089941357952003, - "grad_norm": 0.6053950786590576, - "learning_rate": 8.881159094462351e-06, - "loss": 0.2019, - "step": 11570 - }, - { - "epoch": 1.090035562045171, - "grad_norm": 0.5669155120849609, - "learning_rate": 8.879658570130549e-06, - "loss": 0.2011, - "step": 11571 - }, - { - "epoch": 1.0901297661383387, - "grad_norm": 0.6672794222831726, - "learning_rate": 8.878158071344266e-06, - "loss": 0.2326, - "step": 11572 - }, - { - "epoch": 1.0902239702315066, - "grad_norm": 0.6535966396331787, - "learning_rate": 8.87665759813771e-06, - "loss": 0.2188, - "step": 11573 - }, - { - "epoch": 1.0903181743246744, - "grad_norm": 0.7254478335380554, - "learning_rate": 8.875157150545099e-06, - "loss": 0.2112, - "step": 11574 - }, - { - "epoch": 1.0904123784178423, - "grad_norm": 0.6615368127822876, - "learning_rate": 8.873656728600649e-06, - "loss": 0.2244, - "step": 11575 - }, - { - "epoch": 1.09050658251101, - "grad_norm": 0.675292432308197, - "learning_rate": 8.87215633233856e-06, - "loss": 0.2232, - "step": 11576 - }, - { - "epoch": 1.090600786604178, - "grad_norm": 0.5594111084938049, - "learning_rate": 8.870655961793057e-06, - "loss": 0.1753, - "step": 11577 - }, - { - "epoch": 1.0906949906973458, - "grad_norm": 0.5725761651992798, - "learning_rate": 8.869155616998343e-06, - "loss": 0.1941, - "step": 11578 - }, - { - "epoch": 1.0907891947905137, - "grad_norm": 0.6048107147216797, - "learning_rate": 8.86765529798863e-06, - "loss": 0.1874, - "step": 11579 - }, - { - "epoch": 1.0908833988836815, - "grad_norm": 0.6969088315963745, - "learning_rate": 8.866155004798127e-06, - "loss": 0.1968, - "step": 11580 - }, - { - "epoch": 1.0909776029768494, - "grad_norm": 0.6184073090553284, - "learning_rate": 8.864654737461042e-06, - "loss": 0.2067, - "step": 11581 - }, - { - "epoch": 1.0910718070700172, - "grad_norm": 0.6290350556373596, - "learning_rate": 8.863154496011588e-06, - "loss": 0.2249, - "step": 11582 - }, - { - "epoch": 1.0911660111631851, - "grad_norm": 0.6085337996482849, - "learning_rate": 8.861654280483965e-06, - "loss": 0.1897, - "step": 11583 - }, - { - "epoch": 1.0912602152563529, - "grad_norm": 0.5991551280021667, - "learning_rate": 8.860154090912388e-06, - "loss": 0.1789, - "step": 11584 - }, - { - "epoch": 1.0913544193495208, - "grad_norm": 0.6495198607444763, - "learning_rate": 8.858653927331061e-06, - "loss": 0.1981, - "step": 11585 - }, - { - "epoch": 1.0914486234426886, - "grad_norm": 0.5962744355201721, - "learning_rate": 8.857153789774188e-06, - "loss": 0.1957, - "step": 11586 - }, - { - "epoch": 1.0915428275358565, - "grad_norm": 0.6588103175163269, - "learning_rate": 8.855653678275977e-06, - "loss": 0.2188, - "step": 11587 - }, - { - "epoch": 1.0916370316290243, - "grad_norm": 0.5866307020187378, - "learning_rate": 8.85415359287063e-06, - "loss": 0.196, - "step": 11588 - }, - { - "epoch": 1.0917312357221922, - "grad_norm": 0.5782949328422546, - "learning_rate": 8.852653533592356e-06, - "loss": 0.1938, - "step": 11589 - }, - { - "epoch": 1.09182543981536, - "grad_norm": 0.5999405980110168, - "learning_rate": 8.851153500475354e-06, - "loss": 0.1614, - "step": 11590 - }, - { - "epoch": 1.091919643908528, - "grad_norm": 0.652942955493927, - "learning_rate": 8.84965349355383e-06, - "loss": 0.2082, - "step": 11591 - }, - { - "epoch": 1.0920138480016957, - "grad_norm": 0.6853984594345093, - "learning_rate": 8.848153512861987e-06, - "loss": 0.2133, - "step": 11592 - }, - { - "epoch": 1.0921080520948636, - "grad_norm": 0.7114432454109192, - "learning_rate": 8.84665355843402e-06, - "loss": 0.21, - "step": 11593 - }, - { - "epoch": 1.0922022561880314, - "grad_norm": 0.6580626964569092, - "learning_rate": 8.84515363030414e-06, - "loss": 0.2203, - "step": 11594 - }, - { - "epoch": 1.0922964602811993, - "grad_norm": 0.6535913348197937, - "learning_rate": 8.843653728506544e-06, - "loss": 0.2227, - "step": 11595 - }, - { - "epoch": 1.092390664374367, - "grad_norm": 0.6289659142494202, - "learning_rate": 8.842153853075426e-06, - "loss": 0.1904, - "step": 11596 - }, - { - "epoch": 1.092484868467535, - "grad_norm": 0.7105277180671692, - "learning_rate": 8.840654004044996e-06, - "loss": 0.1893, - "step": 11597 - }, - { - "epoch": 1.0925790725607027, - "grad_norm": 0.6600250601768494, - "learning_rate": 8.839154181449447e-06, - "loss": 0.1956, - "step": 11598 - }, - { - "epoch": 1.0926732766538707, - "grad_norm": 0.6647025942802429, - "learning_rate": 8.837654385322976e-06, - "loss": 0.1906, - "step": 11599 - }, - { - "epoch": 1.0927674807470384, - "grad_norm": 0.6685451865196228, - "learning_rate": 8.836154615699782e-06, - "loss": 0.2304, - "step": 11600 - }, - { - "epoch": 1.0928616848402064, - "grad_norm": 0.590814471244812, - "learning_rate": 8.834654872614065e-06, - "loss": 0.1957, - "step": 11601 - }, - { - "epoch": 1.0929558889333741, - "grad_norm": 0.637969434261322, - "learning_rate": 8.83315515610002e-06, - "loss": 0.2056, - "step": 11602 - }, - { - "epoch": 1.093050093026542, - "grad_norm": 0.652006983757019, - "learning_rate": 8.831655466191837e-06, - "loss": 0.1958, - "step": 11603 - }, - { - "epoch": 1.0931442971197098, - "grad_norm": 0.6946149468421936, - "learning_rate": 8.830155802923721e-06, - "loss": 0.2242, - "step": 11604 - }, - { - "epoch": 1.0932385012128778, - "grad_norm": 0.6098273992538452, - "learning_rate": 8.828656166329861e-06, - "loss": 0.2052, - "step": 11605 - }, - { - "epoch": 1.0933327053060455, - "grad_norm": 0.622878909111023, - "learning_rate": 8.82715655644445e-06, - "loss": 0.2085, - "step": 11606 - }, - { - "epoch": 1.0934269093992135, - "grad_norm": 0.6437327861785889, - "learning_rate": 8.825656973301684e-06, - "loss": 0.2518, - "step": 11607 - }, - { - "epoch": 1.0935211134923812, - "grad_norm": 0.5694655179977417, - "learning_rate": 8.824157416935756e-06, - "loss": 0.2081, - "step": 11608 - }, - { - "epoch": 1.0936153175855492, - "grad_norm": 0.5934538841247559, - "learning_rate": 8.822657887380857e-06, - "loss": 0.206, - "step": 11609 - }, - { - "epoch": 1.093709521678717, - "grad_norm": 0.6617633104324341, - "learning_rate": 8.82115838467118e-06, - "loss": 0.2397, - "step": 11610 - }, - { - "epoch": 1.0938037257718847, - "grad_norm": 0.6666903495788574, - "learning_rate": 8.819658908840914e-06, - "loss": 0.2311, - "step": 11611 - }, - { - "epoch": 1.0938979298650526, - "grad_norm": 0.6600042581558228, - "learning_rate": 8.818159459924253e-06, - "loss": 0.2169, - "step": 11612 - }, - { - "epoch": 1.0939921339582206, - "grad_norm": 0.6306442022323608, - "learning_rate": 8.816660037955377e-06, - "loss": 0.2284, - "step": 11613 - }, - { - "epoch": 1.0940863380513883, - "grad_norm": 0.6237883567810059, - "learning_rate": 8.815160642968487e-06, - "loss": 0.2261, - "step": 11614 - }, - { - "epoch": 1.094180542144556, - "grad_norm": 0.7384260892868042, - "learning_rate": 8.813661274997769e-06, - "loss": 0.2222, - "step": 11615 - }, - { - "epoch": 1.094274746237724, - "grad_norm": 0.6339855194091797, - "learning_rate": 8.812161934077402e-06, - "loss": 0.2098, - "step": 11616 - }, - { - "epoch": 1.094368950330892, - "grad_norm": 0.6157028675079346, - "learning_rate": 8.810662620241586e-06, - "loss": 0.1952, - "step": 11617 - }, - { - "epoch": 1.0944631544240597, - "grad_norm": 0.5984799861907959, - "learning_rate": 8.8091633335245e-06, - "loss": 0.1948, - "step": 11618 - }, - { - "epoch": 1.0945573585172275, - "grad_norm": 0.6805873513221741, - "learning_rate": 8.807664073960332e-06, - "loss": 0.2006, - "step": 11619 - }, - { - "epoch": 1.0946515626103954, - "grad_norm": 0.6823112964630127, - "learning_rate": 8.806164841583266e-06, - "loss": 0.2344, - "step": 11620 - }, - { - "epoch": 1.0947457667035634, - "grad_norm": 0.632782518863678, - "learning_rate": 8.804665636427488e-06, - "loss": 0.1886, - "step": 11621 - }, - { - "epoch": 1.0948399707967311, - "grad_norm": 0.6591693758964539, - "learning_rate": 8.803166458527182e-06, - "loss": 0.2164, - "step": 11622 - }, - { - "epoch": 1.0949341748898989, - "grad_norm": 0.5850099325180054, - "learning_rate": 8.801667307916531e-06, - "loss": 0.1872, - "step": 11623 - }, - { - "epoch": 1.0950283789830668, - "grad_norm": 0.7214787006378174, - "learning_rate": 8.80016818462972e-06, - "loss": 0.2192, - "step": 11624 - }, - { - "epoch": 1.0951225830762348, - "grad_norm": 0.6723693609237671, - "learning_rate": 8.79866908870093e-06, - "loss": 0.2203, - "step": 11625 - }, - { - "epoch": 1.0952167871694025, - "grad_norm": 0.6110305190086365, - "learning_rate": 8.79717002016434e-06, - "loss": 0.1854, - "step": 11626 - }, - { - "epoch": 1.0953109912625703, - "grad_norm": 0.5688706636428833, - "learning_rate": 8.795670979054137e-06, - "loss": 0.1741, - "step": 11627 - }, - { - "epoch": 1.0954051953557382, - "grad_norm": 0.6427212953567505, - "learning_rate": 8.7941719654045e-06, - "loss": 0.2174, - "step": 11628 - }, - { - "epoch": 1.095499399448906, - "grad_norm": 0.636704683303833, - "learning_rate": 8.792672979249603e-06, - "loss": 0.1924, - "step": 11629 - }, - { - "epoch": 1.095593603542074, - "grad_norm": 0.6884451508522034, - "learning_rate": 8.79117402062363e-06, - "loss": 0.2185, - "step": 11630 - }, - { - "epoch": 1.0956878076352417, - "grad_norm": 0.711936891078949, - "learning_rate": 8.78967508956076e-06, - "loss": 0.2183, - "step": 11631 - }, - { - "epoch": 1.0957820117284096, - "grad_norm": 0.6035027503967285, - "learning_rate": 8.78817618609517e-06, - "loss": 0.2006, - "step": 11632 - }, - { - "epoch": 1.0958762158215773, - "grad_norm": 0.6631470322608948, - "learning_rate": 8.786677310261032e-06, - "loss": 0.2043, - "step": 11633 - }, - { - "epoch": 1.0959704199147453, - "grad_norm": 0.6563029885292053, - "learning_rate": 8.785178462092533e-06, - "loss": 0.2068, - "step": 11634 - }, - { - "epoch": 1.096064624007913, - "grad_norm": 0.6526097059249878, - "learning_rate": 8.783679641623845e-06, - "loss": 0.2003, - "step": 11635 - }, - { - "epoch": 1.096158828101081, - "grad_norm": 0.6027750372886658, - "learning_rate": 8.782180848889138e-06, - "loss": 0.1854, - "step": 11636 - }, - { - "epoch": 1.0962530321942487, - "grad_norm": 0.6719428300857544, - "learning_rate": 8.780682083922594e-06, - "loss": 0.193, - "step": 11637 - }, - { - "epoch": 1.0963472362874167, - "grad_norm": 0.6075634360313416, - "learning_rate": 8.779183346758384e-06, - "loss": 0.2078, - "step": 11638 - }, - { - "epoch": 1.0964414403805844, - "grad_norm": 0.6529176831245422, - "learning_rate": 8.777684637430682e-06, - "loss": 0.1811, - "step": 11639 - }, - { - "epoch": 1.0965356444737524, - "grad_norm": 0.5914357900619507, - "learning_rate": 8.776185955973658e-06, - "loss": 0.184, - "step": 11640 - }, - { - "epoch": 1.0966298485669201, - "grad_norm": 0.63169264793396, - "learning_rate": 8.774687302421488e-06, - "loss": 0.2126, - "step": 11641 - }, - { - "epoch": 1.096724052660088, - "grad_norm": 0.7162529826164246, - "learning_rate": 8.773188676808344e-06, - "loss": 0.2254, - "step": 11642 - }, - { - "epoch": 1.0968182567532558, - "grad_norm": 0.6307613849639893, - "learning_rate": 8.771690079168394e-06, - "loss": 0.1867, - "step": 11643 - }, - { - "epoch": 1.0969124608464238, - "grad_norm": 0.625088632106781, - "learning_rate": 8.77019150953581e-06, - "loss": 0.2097, - "step": 11644 - }, - { - "epoch": 1.0970066649395915, - "grad_norm": 0.6546489000320435, - "learning_rate": 8.768692967944762e-06, - "loss": 0.2039, - "step": 11645 - }, - { - "epoch": 1.0971008690327595, - "grad_norm": 0.6251490712165833, - "learning_rate": 8.767194454429417e-06, - "loss": 0.1926, - "step": 11646 - }, - { - "epoch": 1.0971950731259272, - "grad_norm": 0.7071504592895508, - "learning_rate": 8.765695969023946e-06, - "loss": 0.2273, - "step": 11647 - }, - { - "epoch": 1.0972892772190952, - "grad_norm": 0.669477641582489, - "learning_rate": 8.764197511762518e-06, - "loss": 0.2302, - "step": 11648 - }, - { - "epoch": 1.097383481312263, - "grad_norm": 0.6609790325164795, - "learning_rate": 8.762699082679298e-06, - "loss": 0.1951, - "step": 11649 - }, - { - "epoch": 1.097477685405431, - "grad_norm": 0.7205175757408142, - "learning_rate": 8.761200681808446e-06, - "loss": 0.2342, - "step": 11650 - }, - { - "epoch": 1.0975718894985986, - "grad_norm": 0.6903570294380188, - "learning_rate": 8.75970230918414e-06, - "loss": 0.2139, - "step": 11651 - }, - { - "epoch": 1.0976660935917666, - "grad_norm": 0.7559558153152466, - "learning_rate": 8.758203964840541e-06, - "loss": 0.2194, - "step": 11652 - }, - { - "epoch": 1.0977602976849343, - "grad_norm": 0.6063870787620544, - "learning_rate": 8.756705648811805e-06, - "loss": 0.202, - "step": 11653 - }, - { - "epoch": 1.0978545017781023, - "grad_norm": 0.6134804487228394, - "learning_rate": 8.755207361132109e-06, - "loss": 0.2014, - "step": 11654 - }, - { - "epoch": 1.09794870587127, - "grad_norm": 0.5935834050178528, - "learning_rate": 8.75370910183561e-06, - "loss": 0.2243, - "step": 11655 - }, - { - "epoch": 1.098042909964438, - "grad_norm": 0.7034485340118408, - "learning_rate": 8.752210870956466e-06, - "loss": 0.2343, - "step": 11656 - }, - { - "epoch": 1.0981371140576057, - "grad_norm": 0.6824043393135071, - "learning_rate": 8.75071266852885e-06, - "loss": 0.2055, - "step": 11657 - }, - { - "epoch": 1.0982313181507737, - "grad_norm": 0.6324033141136169, - "learning_rate": 8.749214494586915e-06, - "loss": 0.1887, - "step": 11658 - }, - { - "epoch": 1.0983255222439414, - "grad_norm": 0.7541441917419434, - "learning_rate": 8.747716349164826e-06, - "loss": 0.2306, - "step": 11659 - }, - { - "epoch": 1.0984197263371094, - "grad_norm": 0.6856192946434021, - "learning_rate": 8.746218232296735e-06, - "loss": 0.2238, - "step": 11660 - }, - { - "epoch": 1.0985139304302771, - "grad_norm": 0.7444167733192444, - "learning_rate": 8.744720144016812e-06, - "loss": 0.2246, - "step": 11661 - }, - { - "epoch": 1.098608134523445, - "grad_norm": 0.7645224332809448, - "learning_rate": 8.743222084359211e-06, - "loss": 0.2135, - "step": 11662 - }, - { - "epoch": 1.0987023386166128, - "grad_norm": 0.6658270955085754, - "learning_rate": 8.741724053358087e-06, - "loss": 0.2117, - "step": 11663 - }, - { - "epoch": 1.0987965427097808, - "grad_norm": 0.6010409593582153, - "learning_rate": 8.740226051047602e-06, - "loss": 0.1861, - "step": 11664 - }, - { - "epoch": 1.0988907468029485, - "grad_norm": 0.6358391046524048, - "learning_rate": 8.738728077461913e-06, - "loss": 0.2139, - "step": 11665 - }, - { - "epoch": 1.0989849508961165, - "grad_norm": 0.6551379561424255, - "learning_rate": 8.737230132635172e-06, - "loss": 0.2146, - "step": 11666 - }, - { - "epoch": 1.0990791549892842, - "grad_norm": 0.6351631879806519, - "learning_rate": 8.735732216601538e-06, - "loss": 0.1985, - "step": 11667 - }, - { - "epoch": 1.0991733590824522, - "grad_norm": 0.6814453601837158, - "learning_rate": 8.734234329395165e-06, - "loss": 0.2227, - "step": 11668 - }, - { - "epoch": 1.09926756317562, - "grad_norm": 0.6973292827606201, - "learning_rate": 8.73273647105021e-06, - "loss": 0.2359, - "step": 11669 - }, - { - "epoch": 1.0993617672687879, - "grad_norm": 0.5711583495140076, - "learning_rate": 8.731238641600816e-06, - "loss": 0.1736, - "step": 11670 - }, - { - "epoch": 1.0994559713619556, - "grad_norm": 0.6745663285255432, - "learning_rate": 8.729740841081148e-06, - "loss": 0.2029, - "step": 11671 - }, - { - "epoch": 1.0995501754551236, - "grad_norm": 0.6072559356689453, - "learning_rate": 8.728243069525355e-06, - "loss": 0.1888, - "step": 11672 - }, - { - "epoch": 1.0996443795482913, - "grad_norm": 0.6745792627334595, - "learning_rate": 8.726745326967581e-06, - "loss": 0.202, - "step": 11673 - }, - { - "epoch": 1.0997385836414593, - "grad_norm": 0.6435964107513428, - "learning_rate": 8.725247613441985e-06, - "loss": 0.2054, - "step": 11674 - }, - { - "epoch": 1.099832787734627, - "grad_norm": 0.7328150272369385, - "learning_rate": 8.723749928982719e-06, - "loss": 0.2138, - "step": 11675 - }, - { - "epoch": 1.099926991827795, - "grad_norm": 0.6047179102897644, - "learning_rate": 8.72225227362392e-06, - "loss": 0.1979, - "step": 11676 - }, - { - "epoch": 1.1000211959209627, - "grad_norm": 0.6192255616188049, - "learning_rate": 8.720754647399754e-06, - "loss": 0.1978, - "step": 11677 - }, - { - "epoch": 1.1001154000141307, - "grad_norm": 0.6152708530426025, - "learning_rate": 8.719257050344359e-06, - "loss": 0.1798, - "step": 11678 - }, - { - "epoch": 1.1002096041072984, - "grad_norm": 0.6412601470947266, - "learning_rate": 8.71775948249188e-06, - "loss": 0.1605, - "step": 11679 - }, - { - "epoch": 1.1003038082004664, - "grad_norm": 0.6825234889984131, - "learning_rate": 8.71626194387647e-06, - "loss": 0.2359, - "step": 11680 - }, - { - "epoch": 1.100398012293634, - "grad_norm": 0.6711715459823608, - "learning_rate": 8.714764434532272e-06, - "loss": 0.2102, - "step": 11681 - }, - { - "epoch": 1.100492216386802, - "grad_norm": 0.6632339954376221, - "learning_rate": 8.713266954493434e-06, - "loss": 0.2191, - "step": 11682 - }, - { - "epoch": 1.1005864204799698, - "grad_norm": 0.6640236973762512, - "learning_rate": 8.711769503794096e-06, - "loss": 0.199, - "step": 11683 - }, - { - "epoch": 1.1006806245731378, - "grad_norm": 0.6721137762069702, - "learning_rate": 8.710272082468409e-06, - "loss": 0.2014, - "step": 11684 - }, - { - "epoch": 1.1007748286663055, - "grad_norm": 0.7337496876716614, - "learning_rate": 8.708774690550513e-06, - "loss": 0.2101, - "step": 11685 - }, - { - "epoch": 1.1008690327594735, - "grad_norm": 0.8009522557258606, - "learning_rate": 8.707277328074549e-06, - "loss": 0.2162, - "step": 11686 - }, - { - "epoch": 1.1009632368526412, - "grad_norm": 0.6302037835121155, - "learning_rate": 8.705779995074664e-06, - "loss": 0.1856, - "step": 11687 - }, - { - "epoch": 1.1010574409458092, - "grad_norm": 0.6537947654724121, - "learning_rate": 8.704282691584995e-06, - "loss": 0.195, - "step": 11688 - }, - { - "epoch": 1.101151645038977, - "grad_norm": 0.6836397647857666, - "learning_rate": 8.702785417639688e-06, - "loss": 0.1874, - "step": 11689 - }, - { - "epoch": 1.1012458491321448, - "grad_norm": 0.6943057775497437, - "learning_rate": 8.701288173272871e-06, - "loss": 0.2013, - "step": 11690 - }, - { - "epoch": 1.1013400532253126, - "grad_norm": 0.6722994446754456, - "learning_rate": 8.699790958518699e-06, - "loss": 0.1942, - "step": 11691 - }, - { - "epoch": 1.1014342573184805, - "grad_norm": 0.6170687675476074, - "learning_rate": 8.698293773411305e-06, - "loss": 0.1767, - "step": 11692 - }, - { - "epoch": 1.1015284614116483, - "grad_norm": 0.7113544940948486, - "learning_rate": 8.69679661798482e-06, - "loss": 0.1988, - "step": 11693 - }, - { - "epoch": 1.1016226655048162, - "grad_norm": 0.7036173939704895, - "learning_rate": 8.695299492273392e-06, - "loss": 0.2662, - "step": 11694 - }, - { - "epoch": 1.101716869597984, - "grad_norm": 0.7076472640037537, - "learning_rate": 8.693802396311154e-06, - "loss": 0.2451, - "step": 11695 - }, - { - "epoch": 1.101811073691152, - "grad_norm": 0.6691346168518066, - "learning_rate": 8.692305330132236e-06, - "loss": 0.203, - "step": 11696 - }, - { - "epoch": 1.1019052777843197, - "grad_norm": 0.7017386555671692, - "learning_rate": 8.690808293770786e-06, - "loss": 0.1932, - "step": 11697 - }, - { - "epoch": 1.1019994818774876, - "grad_norm": 0.6661994457244873, - "learning_rate": 8.689311287260928e-06, - "loss": 0.2003, - "step": 11698 - }, - { - "epoch": 1.1020936859706554, - "grad_norm": 0.5827733874320984, - "learning_rate": 8.6878143106368e-06, - "loss": 0.2073, - "step": 11699 - }, - { - "epoch": 1.1021878900638233, - "grad_norm": 0.7229766845703125, - "learning_rate": 8.686317363932534e-06, - "loss": 0.2429, - "step": 11700 - }, - { - "epoch": 1.102282094156991, - "grad_norm": 0.6007014513015747, - "learning_rate": 8.684820447182266e-06, - "loss": 0.1814, - "step": 11701 - }, - { - "epoch": 1.102376298250159, - "grad_norm": 0.6572347283363342, - "learning_rate": 8.683323560420124e-06, - "loss": 0.1853, - "step": 11702 - }, - { - "epoch": 1.1024705023433268, - "grad_norm": 0.635355532169342, - "learning_rate": 8.681826703680239e-06, - "loss": 0.2203, - "step": 11703 - }, - { - "epoch": 1.1025647064364947, - "grad_norm": 0.5935506820678711, - "learning_rate": 8.680329876996748e-06, - "loss": 0.1947, - "step": 11704 - }, - { - "epoch": 1.1026589105296625, - "grad_norm": 0.6532561182975769, - "learning_rate": 8.678833080403775e-06, - "loss": 0.1956, - "step": 11705 - }, - { - "epoch": 1.1027531146228304, - "grad_norm": 0.7315452694892883, - "learning_rate": 8.67733631393545e-06, - "loss": 0.2611, - "step": 11706 - }, - { - "epoch": 1.1028473187159982, - "grad_norm": 0.6310397386550903, - "learning_rate": 8.675839577625905e-06, - "loss": 0.226, - "step": 11707 - }, - { - "epoch": 1.1029415228091661, - "grad_norm": 0.8106204271316528, - "learning_rate": 8.674342871509264e-06, - "loss": 0.1859, - "step": 11708 - }, - { - "epoch": 1.1030357269023339, - "grad_norm": 0.6311600804328918, - "learning_rate": 8.672846195619657e-06, - "loss": 0.1975, - "step": 11709 - }, - { - "epoch": 1.1031299309955018, - "grad_norm": 0.6800163388252258, - "learning_rate": 8.671349549991205e-06, - "loss": 0.2085, - "step": 11710 - }, - { - "epoch": 1.1032241350886696, - "grad_norm": 0.6995189785957336, - "learning_rate": 8.669852934658042e-06, - "loss": 0.2297, - "step": 11711 - }, - { - "epoch": 1.1033183391818375, - "grad_norm": 0.7022854089736938, - "learning_rate": 8.66835634965429e-06, - "loss": 0.2305, - "step": 11712 - }, - { - "epoch": 1.1034125432750053, - "grad_norm": 0.6383044123649597, - "learning_rate": 8.666859795014068e-06, - "loss": 0.2095, - "step": 11713 - }, - { - "epoch": 1.1035067473681732, - "grad_norm": 0.6150676012039185, - "learning_rate": 8.665363270771509e-06, - "loss": 0.2127, - "step": 11714 - }, - { - "epoch": 1.103600951461341, - "grad_norm": 0.759537935256958, - "learning_rate": 8.663866776960731e-06, - "loss": 0.2217, - "step": 11715 - }, - { - "epoch": 1.103695155554509, - "grad_norm": 0.7568871974945068, - "learning_rate": 8.662370313615853e-06, - "loss": 0.2085, - "step": 11716 - }, - { - "epoch": 1.1037893596476767, - "grad_norm": 0.697677731513977, - "learning_rate": 8.660873880771006e-06, - "loss": 0.203, - "step": 11717 - }, - { - "epoch": 1.1038835637408446, - "grad_norm": 0.8748740553855896, - "learning_rate": 8.659377478460302e-06, - "loss": 0.2642, - "step": 11718 - }, - { - "epoch": 1.1039777678340124, - "grad_norm": 0.7091565132141113, - "learning_rate": 8.657881106717868e-06, - "loss": 0.2184, - "step": 11719 - }, - { - "epoch": 1.1040719719271803, - "grad_norm": 0.9920811653137207, - "learning_rate": 8.656384765577814e-06, - "loss": 0.2077, - "step": 11720 - }, - { - "epoch": 1.104166176020348, - "grad_norm": 0.6734057664871216, - "learning_rate": 8.654888455074271e-06, - "loss": 0.2222, - "step": 11721 - }, - { - "epoch": 1.104260380113516, - "grad_norm": 0.6326597332954407, - "learning_rate": 8.65339217524135e-06, - "loss": 0.2028, - "step": 11722 - }, - { - "epoch": 1.1043545842066838, - "grad_norm": 0.6460282206535339, - "learning_rate": 8.651895926113167e-06, - "loss": 0.2109, - "step": 11723 - }, - { - "epoch": 1.1044487882998517, - "grad_norm": 0.6907562017440796, - "learning_rate": 8.650399707723846e-06, - "loss": 0.2058, - "step": 11724 - }, - { - "epoch": 1.1045429923930195, - "grad_norm": 2.4076602458953857, - "learning_rate": 8.648903520107497e-06, - "loss": 0.2338, - "step": 11725 - }, - { - "epoch": 1.1046371964861874, - "grad_norm": 0.6401410698890686, - "learning_rate": 8.647407363298237e-06, - "loss": 0.1814, - "step": 11726 - }, - { - "epoch": 1.1047314005793551, - "grad_norm": 0.6281068921089172, - "learning_rate": 8.645911237330182e-06, - "loss": 0.2007, - "step": 11727 - }, - { - "epoch": 1.104825604672523, - "grad_norm": 0.6514576077461243, - "learning_rate": 8.644415142237444e-06, - "loss": 0.1875, - "step": 11728 - }, - { - "epoch": 1.1049198087656908, - "grad_norm": 0.5694442987442017, - "learning_rate": 8.64291907805414e-06, - "loss": 0.1759, - "step": 11729 - }, - { - "epoch": 1.1050140128588588, - "grad_norm": 0.7495072484016418, - "learning_rate": 8.641423044814375e-06, - "loss": 0.2444, - "step": 11730 - }, - { - "epoch": 1.1051082169520265, - "grad_norm": 0.7319170236587524, - "learning_rate": 8.639927042552268e-06, - "loss": 0.186, - "step": 11731 - }, - { - "epoch": 1.1052024210451945, - "grad_norm": 1.0071709156036377, - "learning_rate": 8.63843107130193e-06, - "loss": 0.2278, - "step": 11732 - }, - { - "epoch": 1.1052966251383622, - "grad_norm": 0.7035053372383118, - "learning_rate": 8.636935131097464e-06, - "loss": 0.2202, - "step": 11733 - }, - { - "epoch": 1.1053908292315302, - "grad_norm": 0.6001750230789185, - "learning_rate": 8.63543922197299e-06, - "loss": 0.1793, - "step": 11734 - }, - { - "epoch": 1.105485033324698, - "grad_norm": 0.6339883804321289, - "learning_rate": 8.633943343962612e-06, - "loss": 0.2, - "step": 11735 - }, - { - "epoch": 1.105579237417866, - "grad_norm": 0.7535054683685303, - "learning_rate": 8.632447497100432e-06, - "loss": 0.2263, - "step": 11736 - }, - { - "epoch": 1.1056734415110336, - "grad_norm": 0.6110571026802063, - "learning_rate": 8.630951681420573e-06, - "loss": 0.2038, - "step": 11737 - }, - { - "epoch": 1.1057676456042016, - "grad_norm": 0.7600181102752686, - "learning_rate": 8.629455896957128e-06, - "loss": 0.2065, - "step": 11738 - }, - { - "epoch": 1.1058618496973693, - "grad_norm": 0.6588578224182129, - "learning_rate": 8.62796014374421e-06, - "loss": 0.2164, - "step": 11739 - }, - { - "epoch": 1.1059560537905373, - "grad_norm": 0.6615300178527832, - "learning_rate": 8.626464421815919e-06, - "loss": 0.213, - "step": 11740 - }, - { - "epoch": 1.106050257883705, - "grad_norm": 0.6047887206077576, - "learning_rate": 8.624968731206367e-06, - "loss": 0.1913, - "step": 11741 - }, - { - "epoch": 1.106144461976873, - "grad_norm": 0.5762773752212524, - "learning_rate": 8.623473071949653e-06, - "loss": 0.1804, - "step": 11742 - }, - { - "epoch": 1.1062386660700407, - "grad_norm": 0.643365204334259, - "learning_rate": 8.62197744407988e-06, - "loss": 0.1802, - "step": 11743 - }, - { - "epoch": 1.1063328701632087, - "grad_norm": 0.7280917167663574, - "learning_rate": 8.620481847631155e-06, - "loss": 0.191, - "step": 11744 - }, - { - "epoch": 1.1064270742563764, - "grad_norm": 0.6445494294166565, - "learning_rate": 8.618986282637578e-06, - "loss": 0.1941, - "step": 11745 - }, - { - "epoch": 1.1065212783495444, - "grad_norm": 0.7732815742492676, - "learning_rate": 8.617490749133248e-06, - "loss": 0.194, - "step": 11746 - }, - { - "epoch": 1.1066154824427121, - "grad_norm": 0.6079898476600647, - "learning_rate": 8.615995247152267e-06, - "loss": 0.2002, - "step": 11747 - }, - { - "epoch": 1.10670968653588, - "grad_norm": 0.6569620370864868, - "learning_rate": 8.614499776728736e-06, - "loss": 0.2097, - "step": 11748 - }, - { - "epoch": 1.1068038906290478, - "grad_norm": 0.6424642205238342, - "learning_rate": 8.613004337896755e-06, - "loss": 0.2209, - "step": 11749 - }, - { - "epoch": 1.1068980947222156, - "grad_norm": 0.7390209436416626, - "learning_rate": 8.611508930690413e-06, - "loss": 0.2421, - "step": 11750 - }, - { - "epoch": 1.1069922988153835, - "grad_norm": 0.6789264678955078, - "learning_rate": 8.61001355514382e-06, - "loss": 0.2249, - "step": 11751 - }, - { - "epoch": 1.1070865029085515, - "grad_norm": 0.6255180239677429, - "learning_rate": 8.608518211291068e-06, - "loss": 0.2085, - "step": 11752 - }, - { - "epoch": 1.1071807070017192, - "grad_norm": 0.7084512114524841, - "learning_rate": 8.607022899166247e-06, - "loss": 0.228, - "step": 11753 - }, - { - "epoch": 1.107274911094887, - "grad_norm": 0.5759256482124329, - "learning_rate": 8.605527618803465e-06, - "loss": 0.1984, - "step": 11754 - }, - { - "epoch": 1.107369115188055, - "grad_norm": 0.6552428007125854, - "learning_rate": 8.604032370236807e-06, - "loss": 0.2124, - "step": 11755 - }, - { - "epoch": 1.1074633192812229, - "grad_norm": 0.6375650763511658, - "learning_rate": 8.602537153500368e-06, - "loss": 0.2087, - "step": 11756 - }, - { - "epoch": 1.1075575233743906, - "grad_norm": 0.6352382302284241, - "learning_rate": 8.601041968628247e-06, - "loss": 0.2042, - "step": 11757 - }, - { - "epoch": 1.1076517274675584, - "grad_norm": 0.6800411343574524, - "learning_rate": 8.59954681565453e-06, - "loss": 0.1964, - "step": 11758 - }, - { - "epoch": 1.1077459315607263, - "grad_norm": 0.687708854675293, - "learning_rate": 8.598051694613315e-06, - "loss": 0.2422, - "step": 11759 - }, - { - "epoch": 1.1078401356538943, - "grad_norm": 0.6685158610343933, - "learning_rate": 8.596556605538685e-06, - "loss": 0.2075, - "step": 11760 - }, - { - "epoch": 1.107934339747062, - "grad_norm": 0.6764061450958252, - "learning_rate": 8.595061548464736e-06, - "loss": 0.2299, - "step": 11761 - }, - { - "epoch": 1.1080285438402298, - "grad_norm": 0.6402743458747864, - "learning_rate": 8.593566523425559e-06, - "loss": 0.2019, - "step": 11762 - }, - { - "epoch": 1.1081227479333977, - "grad_norm": 0.7387195229530334, - "learning_rate": 8.592071530455236e-06, - "loss": 0.2048, - "step": 11763 - }, - { - "epoch": 1.1082169520265657, - "grad_norm": 0.6278302073478699, - "learning_rate": 8.590576569587862e-06, - "loss": 0.2075, - "step": 11764 - }, - { - "epoch": 1.1083111561197334, - "grad_norm": 0.6486092209815979, - "learning_rate": 8.589081640857523e-06, - "loss": 0.2137, - "step": 11765 - }, - { - "epoch": 1.1084053602129011, - "grad_norm": 0.6360762119293213, - "learning_rate": 8.587586744298302e-06, - "loss": 0.2009, - "step": 11766 - }, - { - "epoch": 1.108499564306069, - "grad_norm": 0.6645756959915161, - "learning_rate": 8.58609187994429e-06, - "loss": 0.2031, - "step": 11767 - }, - { - "epoch": 1.1085937683992368, - "grad_norm": 0.6493579745292664, - "learning_rate": 8.58459704782957e-06, - "loss": 0.2101, - "step": 11768 - }, - { - "epoch": 1.1086879724924048, - "grad_norm": 0.7045307159423828, - "learning_rate": 8.583102247988228e-06, - "loss": 0.2002, - "step": 11769 - }, - { - "epoch": 1.1087821765855725, - "grad_norm": 0.6780544519424438, - "learning_rate": 8.581607480454338e-06, - "loss": 0.1974, - "step": 11770 - }, - { - "epoch": 1.1088763806787405, - "grad_norm": 0.6566255688667297, - "learning_rate": 8.580112745261997e-06, - "loss": 0.2079, - "step": 11771 - }, - { - "epoch": 1.1089705847719082, - "grad_norm": 0.8988806009292603, - "learning_rate": 8.578618042445284e-06, - "loss": 0.1921, - "step": 11772 - }, - { - "epoch": 1.1090647888650762, - "grad_norm": 0.6179317235946655, - "learning_rate": 8.57712337203827e-06, - "loss": 0.2279, - "step": 11773 - }, - { - "epoch": 1.109158992958244, - "grad_norm": 0.5989957451820374, - "learning_rate": 8.57562873407505e-06, - "loss": 0.1734, - "step": 11774 - }, - { - "epoch": 1.109253197051412, - "grad_norm": 0.6424252986907959, - "learning_rate": 8.574134128589697e-06, - "loss": 0.2012, - "step": 11775 - }, - { - "epoch": 1.1093474011445796, - "grad_norm": 0.6136184334754944, - "learning_rate": 8.572639555616286e-06, - "loss": 0.1978, - "step": 11776 - }, - { - "epoch": 1.1094416052377476, - "grad_norm": 0.6892116069793701, - "learning_rate": 8.571145015188908e-06, - "loss": 0.2275, - "step": 11777 - }, - { - "epoch": 1.1095358093309153, - "grad_norm": 0.6547119617462158, - "learning_rate": 8.56965050734163e-06, - "loss": 0.212, - "step": 11778 - }, - { - "epoch": 1.1096300134240833, - "grad_norm": 0.6131852865219116, - "learning_rate": 8.568156032108532e-06, - "loss": 0.1803, - "step": 11779 - }, - { - "epoch": 1.109724217517251, - "grad_norm": 0.6796516180038452, - "learning_rate": 8.56666158952369e-06, - "loss": 0.2181, - "step": 11780 - }, - { - "epoch": 1.109818421610419, - "grad_norm": 0.6487653851509094, - "learning_rate": 8.565167179621182e-06, - "loss": 0.2054, - "step": 11781 - }, - { - "epoch": 1.1099126257035867, - "grad_norm": 0.5514594912528992, - "learning_rate": 8.563672802435081e-06, - "loss": 0.1671, - "step": 11782 - }, - { - "epoch": 1.1100068297967547, - "grad_norm": 0.6476712226867676, - "learning_rate": 8.56217845799946e-06, - "loss": 0.2163, - "step": 11783 - }, - { - "epoch": 1.1101010338899224, - "grad_norm": 0.642052173614502, - "learning_rate": 8.560684146348396e-06, - "loss": 0.1897, - "step": 11784 - }, - { - "epoch": 1.1101952379830904, - "grad_norm": 0.6319531202316284, - "learning_rate": 8.55918986751596e-06, - "loss": 0.2055, - "step": 11785 - }, - { - "epoch": 1.1102894420762581, - "grad_norm": 0.7351044416427612, - "learning_rate": 8.55769562153622e-06, - "loss": 0.2262, - "step": 11786 - }, - { - "epoch": 1.110383646169426, - "grad_norm": 0.678261399269104, - "learning_rate": 8.556201408443252e-06, - "loss": 0.2186, - "step": 11787 - }, - { - "epoch": 1.1104778502625938, - "grad_norm": 0.6289682388305664, - "learning_rate": 8.554707228271126e-06, - "loss": 0.2083, - "step": 11788 - }, - { - "epoch": 1.1105720543557618, - "grad_norm": 0.6691647171974182, - "learning_rate": 8.553213081053911e-06, - "loss": 0.1934, - "step": 11789 - }, - { - "epoch": 1.1106662584489295, - "grad_norm": 0.6942034363746643, - "learning_rate": 8.55171896682567e-06, - "loss": 0.249, - "step": 11790 - }, - { - "epoch": 1.1107604625420975, - "grad_norm": 0.6387681365013123, - "learning_rate": 8.55022488562048e-06, - "loss": 0.203, - "step": 11791 - }, - { - "epoch": 1.1108546666352652, - "grad_norm": 0.700570285320282, - "learning_rate": 8.548730837472407e-06, - "loss": 0.2319, - "step": 11792 - }, - { - "epoch": 1.1109488707284332, - "grad_norm": 0.5858840942382812, - "learning_rate": 8.54723682241551e-06, - "loss": 0.1965, - "step": 11793 - }, - { - "epoch": 1.111043074821601, - "grad_norm": 0.6187609434127808, - "learning_rate": 8.545742840483866e-06, - "loss": 0.2105, - "step": 11794 - }, - { - "epoch": 1.1111372789147689, - "grad_norm": 0.6499419212341309, - "learning_rate": 8.544248891711531e-06, - "loss": 0.2445, - "step": 11795 - }, - { - "epoch": 1.1112314830079366, - "grad_norm": 0.7379053235054016, - "learning_rate": 8.54275497613257e-06, - "loss": 0.2381, - "step": 11796 - }, - { - "epoch": 1.1113256871011046, - "grad_norm": 0.6513186097145081, - "learning_rate": 8.541261093781054e-06, - "loss": 0.1893, - "step": 11797 - }, - { - "epoch": 1.1114198911942723, - "grad_norm": 0.6960347294807434, - "learning_rate": 8.539767244691041e-06, - "loss": 0.2097, - "step": 11798 - }, - { - "epoch": 1.1115140952874403, - "grad_norm": 0.6200478076934814, - "learning_rate": 8.538273428896592e-06, - "loss": 0.1919, - "step": 11799 - }, - { - "epoch": 1.111608299380608, - "grad_norm": 0.6413607001304626, - "learning_rate": 8.536779646431768e-06, - "loss": 0.2083, - "step": 11800 - }, - { - "epoch": 1.111702503473776, - "grad_norm": 0.6362359523773193, - "learning_rate": 8.535285897330631e-06, - "loss": 0.2185, - "step": 11801 - }, - { - "epoch": 1.1117967075669437, - "grad_norm": 0.6481806039810181, - "learning_rate": 8.533792181627243e-06, - "loss": 0.2027, - "step": 11802 - }, - { - "epoch": 1.1118909116601117, - "grad_norm": 0.6828119158744812, - "learning_rate": 8.532298499355657e-06, - "loss": 0.1832, - "step": 11803 - }, - { - "epoch": 1.1119851157532794, - "grad_norm": 0.5892437696456909, - "learning_rate": 8.530804850549939e-06, - "loss": 0.1835, - "step": 11804 - }, - { - "epoch": 1.1120793198464474, - "grad_norm": 0.6388912796974182, - "learning_rate": 8.529311235244143e-06, - "loss": 0.1828, - "step": 11805 - }, - { - "epoch": 1.112173523939615, - "grad_norm": 0.6662203073501587, - "learning_rate": 8.52781765347232e-06, - "loss": 0.2192, - "step": 11806 - }, - { - "epoch": 1.112267728032783, - "grad_norm": 0.712853193283081, - "learning_rate": 8.526324105268535e-06, - "loss": 0.2299, - "step": 11807 - }, - { - "epoch": 1.1123619321259508, - "grad_norm": 0.6238781809806824, - "learning_rate": 8.524830590666839e-06, - "loss": 0.1947, - "step": 11808 - }, - { - "epoch": 1.1124561362191188, - "grad_norm": 0.6149243116378784, - "learning_rate": 8.52333710970129e-06, - "loss": 0.1874, - "step": 11809 - }, - { - "epoch": 1.1125503403122865, - "grad_norm": 0.6006473898887634, - "learning_rate": 8.521843662405933e-06, - "loss": 0.2126, - "step": 11810 - }, - { - "epoch": 1.1126445444054545, - "grad_norm": 0.6180854439735413, - "learning_rate": 8.520350248814831e-06, - "loss": 0.2052, - "step": 11811 - }, - { - "epoch": 1.1127387484986222, - "grad_norm": 0.6338762640953064, - "learning_rate": 8.518856868962031e-06, - "loss": 0.2085, - "step": 11812 - }, - { - "epoch": 1.1128329525917902, - "grad_norm": 0.6446571946144104, - "learning_rate": 8.51736352288158e-06, - "loss": 0.2099, - "step": 11813 - }, - { - "epoch": 1.112927156684958, - "grad_norm": 0.6481737494468689, - "learning_rate": 8.51587021060754e-06, - "loss": 0.1995, - "step": 11814 - }, - { - "epoch": 1.1130213607781259, - "grad_norm": 0.629331648349762, - "learning_rate": 8.514376932173952e-06, - "loss": 0.1833, - "step": 11815 - }, - { - "epoch": 1.1131155648712936, - "grad_norm": 0.6144024729728699, - "learning_rate": 8.512883687614863e-06, - "loss": 0.2161, - "step": 11816 - }, - { - "epoch": 1.1132097689644616, - "grad_norm": 0.6038481593132019, - "learning_rate": 8.511390476964334e-06, - "loss": 0.1959, - "step": 11817 - }, - { - "epoch": 1.1133039730576293, - "grad_norm": 0.6613956689834595, - "learning_rate": 8.509897300256402e-06, - "loss": 0.1957, - "step": 11818 - }, - { - "epoch": 1.1133981771507973, - "grad_norm": 0.6780512928962708, - "learning_rate": 8.508404157525115e-06, - "loss": 0.228, - "step": 11819 - }, - { - "epoch": 1.113492381243965, - "grad_norm": 0.607157826423645, - "learning_rate": 8.506911048804517e-06, - "loss": 0.2018, - "step": 11820 - }, - { - "epoch": 1.113586585337133, - "grad_norm": 0.7037463784217834, - "learning_rate": 8.505417974128658e-06, - "loss": 0.1987, - "step": 11821 - }, - { - "epoch": 1.1136807894303007, - "grad_norm": 0.6566210985183716, - "learning_rate": 8.503924933531583e-06, - "loss": 0.2101, - "step": 11822 - }, - { - "epoch": 1.1137749935234686, - "grad_norm": 0.6401715278625488, - "learning_rate": 8.50243192704733e-06, - "loss": 0.2123, - "step": 11823 - }, - { - "epoch": 1.1138691976166364, - "grad_norm": 0.5940774083137512, - "learning_rate": 8.500938954709948e-06, - "loss": 0.1861, - "step": 11824 - }, - { - "epoch": 1.1139634017098043, - "grad_norm": 0.5777052044868469, - "learning_rate": 8.499446016553475e-06, - "loss": 0.1898, - "step": 11825 - }, - { - "epoch": 1.114057605802972, - "grad_norm": 0.6409168243408203, - "learning_rate": 8.497953112611952e-06, - "loss": 0.2081, - "step": 11826 - }, - { - "epoch": 1.11415180989614, - "grad_norm": 0.604097843170166, - "learning_rate": 8.496460242919422e-06, - "loss": 0.2036, - "step": 11827 - }, - { - "epoch": 1.1142460139893078, - "grad_norm": 0.6331413984298706, - "learning_rate": 8.494967407509925e-06, - "loss": 0.2257, - "step": 11828 - }, - { - "epoch": 1.1143402180824757, - "grad_norm": 0.6245782375335693, - "learning_rate": 8.4934746064175e-06, - "loss": 0.2282, - "step": 11829 - }, - { - "epoch": 1.1144344221756435, - "grad_norm": 0.6485175490379333, - "learning_rate": 8.491981839676177e-06, - "loss": 0.2185, - "step": 11830 - }, - { - "epoch": 1.1145286262688114, - "grad_norm": 0.5743849277496338, - "learning_rate": 8.490489107320009e-06, - "loss": 0.1938, - "step": 11831 - }, - { - "epoch": 1.1146228303619792, - "grad_norm": 0.6654359698295593, - "learning_rate": 8.48899640938302e-06, - "loss": 0.1899, - "step": 11832 - }, - { - "epoch": 1.1147170344551471, - "grad_norm": 0.6442403793334961, - "learning_rate": 8.487503745899246e-06, - "loss": 0.1992, - "step": 11833 - }, - { - "epoch": 1.1148112385483149, - "grad_norm": 0.639529287815094, - "learning_rate": 8.486011116902732e-06, - "loss": 0.2036, - "step": 11834 - }, - { - "epoch": 1.1149054426414828, - "grad_norm": 0.6333575248718262, - "learning_rate": 8.484518522427503e-06, - "loss": 0.2089, - "step": 11835 - }, - { - "epoch": 1.1149996467346506, - "grad_norm": 0.6548263430595398, - "learning_rate": 8.483025962507592e-06, - "loss": 0.2195, - "step": 11836 - }, - { - "epoch": 1.1150938508278185, - "grad_norm": 0.60414057970047, - "learning_rate": 8.48153343717704e-06, - "loss": 0.1989, - "step": 11837 - }, - { - "epoch": 1.1151880549209863, - "grad_norm": 0.6815771460533142, - "learning_rate": 8.480040946469875e-06, - "loss": 0.2001, - "step": 11838 - }, - { - "epoch": 1.1152822590141542, - "grad_norm": 0.609862744808197, - "learning_rate": 8.478548490420125e-06, - "loss": 0.196, - "step": 11839 - }, - { - "epoch": 1.115376463107322, - "grad_norm": 0.6562811136245728, - "learning_rate": 8.47705606906182e-06, - "loss": 0.2332, - "step": 11840 - }, - { - "epoch": 1.11547066720049, - "grad_norm": 0.6821162104606628, - "learning_rate": 8.475563682428995e-06, - "loss": 0.2138, - "step": 11841 - }, - { - "epoch": 1.1155648712936577, - "grad_norm": 0.6668700575828552, - "learning_rate": 8.474071330555675e-06, - "loss": 0.2076, - "step": 11842 - }, - { - "epoch": 1.1156590753868256, - "grad_norm": 0.643703818321228, - "learning_rate": 8.472579013475886e-06, - "loss": 0.2125, - "step": 11843 - }, - { - "epoch": 1.1157532794799934, - "grad_norm": 0.686159610748291, - "learning_rate": 8.47108673122366e-06, - "loss": 0.214, - "step": 11844 - }, - { - "epoch": 1.1158474835731613, - "grad_norm": 0.6090878248214722, - "learning_rate": 8.469594483833022e-06, - "loss": 0.2332, - "step": 11845 - }, - { - "epoch": 1.115941687666329, - "grad_norm": 0.6408948302268982, - "learning_rate": 8.468102271337994e-06, - "loss": 0.2, - "step": 11846 - }, - { - "epoch": 1.116035891759497, - "grad_norm": 0.6762485504150391, - "learning_rate": 8.466610093772605e-06, - "loss": 0.2055, - "step": 11847 - }, - { - "epoch": 1.1161300958526648, - "grad_norm": 0.6224777102470398, - "learning_rate": 8.465117951170879e-06, - "loss": 0.2134, - "step": 11848 - }, - { - "epoch": 1.1162242999458327, - "grad_norm": 0.6820507049560547, - "learning_rate": 8.463625843566836e-06, - "loss": 0.2147, - "step": 11849 - }, - { - "epoch": 1.1163185040390005, - "grad_norm": 0.6299790143966675, - "learning_rate": 8.462133770994496e-06, - "loss": 0.2125, - "step": 11850 - }, - { - "epoch": 1.1164127081321684, - "grad_norm": 0.7445378303527832, - "learning_rate": 8.460641733487891e-06, - "loss": 0.2122, - "step": 11851 - }, - { - "epoch": 1.1165069122253362, - "grad_norm": 0.5993331074714661, - "learning_rate": 8.459149731081032e-06, - "loss": 0.2066, - "step": 11852 - }, - { - "epoch": 1.1166011163185041, - "grad_norm": 0.6086986660957336, - "learning_rate": 8.457657763807938e-06, - "loss": 0.1964, - "step": 11853 - }, - { - "epoch": 1.1166953204116719, - "grad_norm": 0.618959903717041, - "learning_rate": 8.45616583170264e-06, - "loss": 0.1907, - "step": 11854 - }, - { - "epoch": 1.1167895245048398, - "grad_norm": 0.6163588166236877, - "learning_rate": 8.454673934799145e-06, - "loss": 0.2023, - "step": 11855 - }, - { - "epoch": 1.1168837285980076, - "grad_norm": 0.702180027961731, - "learning_rate": 8.453182073131473e-06, - "loss": 0.2241, - "step": 11856 - }, - { - "epoch": 1.1169779326911755, - "grad_norm": 0.6430617570877075, - "learning_rate": 8.451690246733642e-06, - "loss": 0.2058, - "step": 11857 - }, - { - "epoch": 1.1170721367843433, - "grad_norm": 0.599604070186615, - "learning_rate": 8.45019845563967e-06, - "loss": 0.2253, - "step": 11858 - }, - { - "epoch": 1.1171663408775112, - "grad_norm": 0.6289446949958801, - "learning_rate": 8.44870669988357e-06, - "loss": 0.2062, - "step": 11859 - }, - { - "epoch": 1.117260544970679, - "grad_norm": 0.7407355904579163, - "learning_rate": 8.447214979499353e-06, - "loss": 0.2297, - "step": 11860 - }, - { - "epoch": 1.117354749063847, - "grad_norm": 0.6510844230651855, - "learning_rate": 8.445723294521039e-06, - "loss": 0.2355, - "step": 11861 - }, - { - "epoch": 1.1174489531570146, - "grad_norm": 0.6228619813919067, - "learning_rate": 8.444231644982637e-06, - "loss": 0.2265, - "step": 11862 - }, - { - "epoch": 1.1175431572501826, - "grad_norm": 0.6194037199020386, - "learning_rate": 8.442740030918157e-06, - "loss": 0.2022, - "step": 11863 - }, - { - "epoch": 1.1176373613433503, - "grad_norm": 0.6037282943725586, - "learning_rate": 8.441248452361616e-06, - "loss": 0.2143, - "step": 11864 - }, - { - "epoch": 1.1177315654365183, - "grad_norm": 0.6462535858154297, - "learning_rate": 8.439756909347021e-06, - "loss": 0.1977, - "step": 11865 - }, - { - "epoch": 1.117825769529686, - "grad_norm": 0.6289598941802979, - "learning_rate": 8.438265401908378e-06, - "loss": 0.2045, - "step": 11866 - }, - { - "epoch": 1.117919973622854, - "grad_norm": 0.6735430359840393, - "learning_rate": 8.436773930079703e-06, - "loss": 0.2309, - "step": 11867 - }, - { - "epoch": 1.1180141777160217, - "grad_norm": 0.6193204522132874, - "learning_rate": 8.435282493894999e-06, - "loss": 0.2137, - "step": 11868 - }, - { - "epoch": 1.1181083818091897, - "grad_norm": 0.7990676760673523, - "learning_rate": 8.433791093388275e-06, - "loss": 0.1938, - "step": 11869 - }, - { - "epoch": 1.1182025859023574, - "grad_norm": 0.6005562543869019, - "learning_rate": 8.43229972859353e-06, - "loss": 0.1974, - "step": 11870 - }, - { - "epoch": 1.1182967899955254, - "grad_norm": 0.6522964835166931, - "learning_rate": 8.430808399544785e-06, - "loss": 0.1952, - "step": 11871 - }, - { - "epoch": 1.1183909940886931, - "grad_norm": 0.6723132729530334, - "learning_rate": 8.42931710627603e-06, - "loss": 0.2225, - "step": 11872 - }, - { - "epoch": 1.118485198181861, - "grad_norm": 0.6188422441482544, - "learning_rate": 8.427825848821272e-06, - "loss": 0.1956, - "step": 11873 - }, - { - "epoch": 1.1185794022750288, - "grad_norm": 0.6260895729064941, - "learning_rate": 8.42633462721452e-06, - "loss": 0.217, - "step": 11874 - }, - { - "epoch": 1.1186736063681968, - "grad_norm": 0.6683898568153381, - "learning_rate": 8.424843441489773e-06, - "loss": 0.2198, - "step": 11875 - }, - { - "epoch": 1.1187678104613645, - "grad_norm": 0.6727320551872253, - "learning_rate": 8.423352291681027e-06, - "loss": 0.206, - "step": 11876 - }, - { - "epoch": 1.1188620145545325, - "grad_norm": 0.6028059124946594, - "learning_rate": 8.42186117782229e-06, - "loss": 0.1869, - "step": 11877 - }, - { - "epoch": 1.1189562186477002, - "grad_norm": 0.6548950672149658, - "learning_rate": 8.42037009994756e-06, - "loss": 0.2057, - "step": 11878 - }, - { - "epoch": 1.1190504227408682, - "grad_norm": 0.6151114702224731, - "learning_rate": 8.418879058090833e-06, - "loss": 0.2108, - "step": 11879 - }, - { - "epoch": 1.119144626834036, - "grad_norm": 0.6699797511100769, - "learning_rate": 8.417388052286108e-06, - "loss": 0.2083, - "step": 11880 - }, - { - "epoch": 1.1192388309272039, - "grad_norm": 0.6789939403533936, - "learning_rate": 8.415897082567383e-06, - "loss": 0.2274, - "step": 11881 - }, - { - "epoch": 1.1193330350203716, - "grad_norm": 0.687531054019928, - "learning_rate": 8.414406148968657e-06, - "loss": 0.24, - "step": 11882 - }, - { - "epoch": 1.1194272391135396, - "grad_norm": 0.7133904099464417, - "learning_rate": 8.41291525152392e-06, - "loss": 0.1995, - "step": 11883 - }, - { - "epoch": 1.1195214432067073, - "grad_norm": 0.6760929822921753, - "learning_rate": 8.41142439026717e-06, - "loss": 0.2037, - "step": 11884 - }, - { - "epoch": 1.1196156472998753, - "grad_norm": 0.575566828250885, - "learning_rate": 8.409933565232402e-06, - "loss": 0.1958, - "step": 11885 - }, - { - "epoch": 1.119709851393043, - "grad_norm": 0.6504658460617065, - "learning_rate": 8.408442776453606e-06, - "loss": 0.2191, - "step": 11886 - }, - { - "epoch": 1.119804055486211, - "grad_norm": 0.643571138381958, - "learning_rate": 8.406952023964778e-06, - "loss": 0.1965, - "step": 11887 - }, - { - "epoch": 1.1198982595793787, - "grad_norm": 0.6231205463409424, - "learning_rate": 8.405461307799909e-06, - "loss": 0.193, - "step": 11888 - }, - { - "epoch": 1.1199924636725465, - "grad_norm": 0.5547332763671875, - "learning_rate": 8.403970627992988e-06, - "loss": 0.174, - "step": 11889 - }, - { - "epoch": 1.1200866677657144, - "grad_norm": 0.6632810235023499, - "learning_rate": 8.402479984578e-06, - "loss": 0.2057, - "step": 11890 - }, - { - "epoch": 1.1201808718588824, - "grad_norm": 0.688934862613678, - "learning_rate": 8.400989377588944e-06, - "loss": 0.2127, - "step": 11891 - }, - { - "epoch": 1.1202750759520501, - "grad_norm": 0.6259660124778748, - "learning_rate": 8.399498807059802e-06, - "loss": 0.1898, - "step": 11892 - }, - { - "epoch": 1.1203692800452179, - "grad_norm": 0.6416435241699219, - "learning_rate": 8.398008273024557e-06, - "loss": 0.2004, - "step": 11893 - }, - { - "epoch": 1.1204634841383858, - "grad_norm": 0.6270011067390442, - "learning_rate": 8.396517775517208e-06, - "loss": 0.2, - "step": 11894 - }, - { - "epoch": 1.1205576882315538, - "grad_norm": 0.628013014793396, - "learning_rate": 8.395027314571734e-06, - "loss": 0.2063, - "step": 11895 - }, - { - "epoch": 1.1206518923247215, - "grad_norm": 0.6344326138496399, - "learning_rate": 8.393536890222114e-06, - "loss": 0.206, - "step": 11896 - }, - { - "epoch": 1.1207460964178892, - "grad_norm": 0.7040308713912964, - "learning_rate": 8.39204650250234e-06, - "loss": 0.2091, - "step": 11897 - }, - { - "epoch": 1.1208403005110572, - "grad_norm": 0.6208798289299011, - "learning_rate": 8.390556151446393e-06, - "loss": 0.1957, - "step": 11898 - }, - { - "epoch": 1.1209345046042252, - "grad_norm": 0.7067717909812927, - "learning_rate": 8.389065837088254e-06, - "loss": 0.219, - "step": 11899 - }, - { - "epoch": 1.121028708697393, - "grad_norm": 0.6420107483863831, - "learning_rate": 8.387575559461905e-06, - "loss": 0.205, - "step": 11900 - }, - { - "epoch": 1.1211229127905606, - "grad_norm": 0.651452362537384, - "learning_rate": 8.386085318601328e-06, - "loss": 0.211, - "step": 11901 - }, - { - "epoch": 1.1212171168837286, - "grad_norm": 0.6510005593299866, - "learning_rate": 8.3845951145405e-06, - "loss": 0.2113, - "step": 11902 - }, - { - "epoch": 1.1213113209768966, - "grad_norm": 0.632051408290863, - "learning_rate": 8.3831049473134e-06, - "loss": 0.1981, - "step": 11903 - }, - { - "epoch": 1.1214055250700643, - "grad_norm": 0.7081713080406189, - "learning_rate": 8.381614816954012e-06, - "loss": 0.2134, - "step": 11904 - }, - { - "epoch": 1.121499729163232, - "grad_norm": 0.6058084964752197, - "learning_rate": 8.380124723496308e-06, - "loss": 0.175, - "step": 11905 - }, - { - "epoch": 1.1215939332564, - "grad_norm": 0.6154360771179199, - "learning_rate": 8.378634666974264e-06, - "loss": 0.2037, - "step": 11906 - }, - { - "epoch": 1.1216881373495677, - "grad_norm": 0.6388738751411438, - "learning_rate": 8.37714464742186e-06, - "loss": 0.1925, - "step": 11907 - }, - { - "epoch": 1.1217823414427357, - "grad_norm": 0.6988288760185242, - "learning_rate": 8.375654664873065e-06, - "loss": 0.211, - "step": 11908 - }, - { - "epoch": 1.1218765455359034, - "grad_norm": 0.7009318470954895, - "learning_rate": 8.37416471936186e-06, - "loss": 0.2147, - "step": 11909 - }, - { - "epoch": 1.1219707496290714, - "grad_norm": 0.7610530853271484, - "learning_rate": 8.372674810922206e-06, - "loss": 0.2121, - "step": 11910 - }, - { - "epoch": 1.1220649537222391, - "grad_norm": 0.6230190396308899, - "learning_rate": 8.37118493958809e-06, - "loss": 0.2024, - "step": 11911 - }, - { - "epoch": 1.122159157815407, - "grad_norm": 0.6252520084381104, - "learning_rate": 8.369695105393474e-06, - "loss": 0.1882, - "step": 11912 - }, - { - "epoch": 1.1222533619085748, - "grad_norm": 0.6650586724281311, - "learning_rate": 8.368205308372327e-06, - "loss": 0.2058, - "step": 11913 - }, - { - "epoch": 1.1223475660017428, - "grad_norm": 0.7434647679328918, - "learning_rate": 8.36671554855863e-06, - "loss": 0.1781, - "step": 11914 - }, - { - "epoch": 1.1224417700949105, - "grad_norm": 0.6390784382820129, - "learning_rate": 8.365225825986341e-06, - "loss": 0.1926, - "step": 11915 - }, - { - "epoch": 1.1225359741880785, - "grad_norm": 0.6207288503646851, - "learning_rate": 8.36373614068943e-06, - "loss": 0.1966, - "step": 11916 - }, - { - "epoch": 1.1226301782812462, - "grad_norm": 0.6809665560722351, - "learning_rate": 8.362246492701866e-06, - "loss": 0.2353, - "step": 11917 - }, - { - "epoch": 1.1227243823744142, - "grad_norm": 0.7236821055412292, - "learning_rate": 8.360756882057617e-06, - "loss": 0.2126, - "step": 11918 - }, - { - "epoch": 1.122818586467582, - "grad_norm": 0.6286883354187012, - "learning_rate": 8.359267308790644e-06, - "loss": 0.19, - "step": 11919 - }, - { - "epoch": 1.1229127905607499, - "grad_norm": 0.647922158241272, - "learning_rate": 8.357777772934914e-06, - "loss": 0.1853, - "step": 11920 - }, - { - "epoch": 1.1230069946539176, - "grad_norm": 0.7578187584877014, - "learning_rate": 8.356288274524392e-06, - "loss": 0.2371, - "step": 11921 - }, - { - "epoch": 1.1231011987470856, - "grad_norm": 0.6257461905479431, - "learning_rate": 8.354798813593038e-06, - "loss": 0.2035, - "step": 11922 - }, - { - "epoch": 1.1231954028402533, - "grad_norm": 0.711329996585846, - "learning_rate": 8.353309390174814e-06, - "loss": 0.196, - "step": 11923 - }, - { - "epoch": 1.1232896069334213, - "grad_norm": 0.6793182492256165, - "learning_rate": 8.351820004303686e-06, - "loss": 0.2159, - "step": 11924 - }, - { - "epoch": 1.123383811026589, - "grad_norm": 0.6541587114334106, - "learning_rate": 8.350330656013608e-06, - "loss": 0.1909, - "step": 11925 - }, - { - "epoch": 1.123478015119757, - "grad_norm": 0.6262333393096924, - "learning_rate": 8.348841345338544e-06, - "loss": 0.2198, - "step": 11926 - }, - { - "epoch": 1.1235722192129247, - "grad_norm": 0.6772410273551941, - "learning_rate": 8.34735207231245e-06, - "loss": 0.24, - "step": 11927 - }, - { - "epoch": 1.1236664233060927, - "grad_norm": 0.5929433703422546, - "learning_rate": 8.345862836969288e-06, - "loss": 0.2013, - "step": 11928 - }, - { - "epoch": 1.1237606273992604, - "grad_norm": 0.6401875615119934, - "learning_rate": 8.344373639343008e-06, - "loss": 0.1817, - "step": 11929 - }, - { - "epoch": 1.1238548314924284, - "grad_norm": 0.670573890209198, - "learning_rate": 8.342884479467566e-06, - "loss": 0.2177, - "step": 11930 - }, - { - "epoch": 1.1239490355855961, - "grad_norm": 0.6134161353111267, - "learning_rate": 8.341395357376928e-06, - "loss": 0.1717, - "step": 11931 - }, - { - "epoch": 1.124043239678764, - "grad_norm": 0.6540322303771973, - "learning_rate": 8.339906273105038e-06, - "loss": 0.2413, - "step": 11932 - }, - { - "epoch": 1.1241374437719318, - "grad_norm": 0.6331095099449158, - "learning_rate": 8.338417226685849e-06, - "loss": 0.2031, - "step": 11933 - }, - { - "epoch": 1.1242316478650998, - "grad_norm": 0.6158044338226318, - "learning_rate": 8.336928218153322e-06, - "loss": 0.1873, - "step": 11934 - }, - { - "epoch": 1.1243258519582675, - "grad_norm": 0.6778693199157715, - "learning_rate": 8.335439247541403e-06, - "loss": 0.207, - "step": 11935 - }, - { - "epoch": 1.1244200560514355, - "grad_norm": 0.6944397687911987, - "learning_rate": 8.333950314884039e-06, - "loss": 0.212, - "step": 11936 - }, - { - "epoch": 1.1245142601446032, - "grad_norm": 0.6102316975593567, - "learning_rate": 8.332461420215188e-06, - "loss": 0.1995, - "step": 11937 - }, - { - "epoch": 1.1246084642377712, - "grad_norm": 0.6247611045837402, - "learning_rate": 8.330972563568796e-06, - "loss": 0.2118, - "step": 11938 - }, - { - "epoch": 1.124702668330939, - "grad_norm": 0.6267822980880737, - "learning_rate": 8.32948374497881e-06, - "loss": 0.2012, - "step": 11939 - }, - { - "epoch": 1.1247968724241069, - "grad_norm": 0.6360173225402832, - "learning_rate": 8.327994964479177e-06, - "loss": 0.2037, - "step": 11940 - }, - { - "epoch": 1.1248910765172746, - "grad_norm": 0.5730154514312744, - "learning_rate": 8.326506222103845e-06, - "loss": 0.1711, - "step": 11941 - }, - { - "epoch": 1.1249852806104426, - "grad_norm": 0.6362870335578918, - "learning_rate": 8.325017517886761e-06, - "loss": 0.2035, - "step": 11942 - }, - { - "epoch": 1.1250794847036103, - "grad_norm": 1.0078284740447998, - "learning_rate": 8.323528851861864e-06, - "loss": 0.2354, - "step": 11943 - }, - { - "epoch": 1.1251736887967783, - "grad_norm": 0.6259084343910217, - "learning_rate": 8.322040224063105e-06, - "loss": 0.1933, - "step": 11944 - }, - { - "epoch": 1.125267892889946, - "grad_norm": 0.6518905162811279, - "learning_rate": 8.320551634524424e-06, - "loss": 0.2141, - "step": 11945 - }, - { - "epoch": 1.125362096983114, - "grad_norm": 0.6681150794029236, - "learning_rate": 8.319063083279761e-06, - "loss": 0.2225, - "step": 11946 - }, - { - "epoch": 1.1254563010762817, - "grad_norm": 0.6188362240791321, - "learning_rate": 8.317574570363063e-06, - "loss": 0.1716, - "step": 11947 - }, - { - "epoch": 1.1255505051694497, - "grad_norm": 0.6135340332984924, - "learning_rate": 8.316086095808268e-06, - "loss": 0.1972, - "step": 11948 - }, - { - "epoch": 1.1256447092626174, - "grad_norm": 0.7542541027069092, - "learning_rate": 8.31459765964931e-06, - "loss": 0.2325, - "step": 11949 - }, - { - "epoch": 1.1257389133557854, - "grad_norm": 0.6483733057975769, - "learning_rate": 8.313109261920127e-06, - "loss": 0.1796, - "step": 11950 - }, - { - "epoch": 1.125833117448953, - "grad_norm": 0.7310289144515991, - "learning_rate": 8.311620902654672e-06, - "loss": 0.2056, - "step": 11951 - }, - { - "epoch": 1.125927321542121, - "grad_norm": 0.6474401950836182, - "learning_rate": 8.310132581886867e-06, - "loss": 0.1949, - "step": 11952 - }, - { - "epoch": 1.1260215256352888, - "grad_norm": 0.6151375770568848, - "learning_rate": 8.308644299650649e-06, - "loss": 0.21, - "step": 11953 - }, - { - "epoch": 1.1261157297284567, - "grad_norm": 0.6289949417114258, - "learning_rate": 8.307156055979962e-06, - "loss": 0.1941, - "step": 11954 - }, - { - "epoch": 1.1262099338216245, - "grad_norm": 0.7216406464576721, - "learning_rate": 8.305667850908733e-06, - "loss": 0.2065, - "step": 11955 - }, - { - "epoch": 1.1263041379147924, - "grad_norm": 0.6386764645576477, - "learning_rate": 8.304179684470897e-06, - "loss": 0.2036, - "step": 11956 - }, - { - "epoch": 1.1263983420079602, - "grad_norm": 0.6260892748832703, - "learning_rate": 8.302691556700387e-06, - "loss": 0.2045, - "step": 11957 - }, - { - "epoch": 1.1264925461011281, - "grad_norm": 0.6668972969055176, - "learning_rate": 8.301203467631136e-06, - "loss": 0.238, - "step": 11958 - }, - { - "epoch": 1.1265867501942959, - "grad_norm": 0.7042335867881775, - "learning_rate": 8.299715417297072e-06, - "loss": 0.2159, - "step": 11959 - }, - { - "epoch": 1.1266809542874638, - "grad_norm": 0.6877856850624084, - "learning_rate": 8.298227405732124e-06, - "loss": 0.2075, - "step": 11960 - }, - { - "epoch": 1.1267751583806316, - "grad_norm": 0.6739022135734558, - "learning_rate": 8.296739432970225e-06, - "loss": 0.2085, - "step": 11961 - }, - { - "epoch": 1.1268693624737995, - "grad_norm": 0.6096248626708984, - "learning_rate": 8.295251499045303e-06, - "loss": 0.189, - "step": 11962 - }, - { - "epoch": 1.1269635665669673, - "grad_norm": 0.599459707736969, - "learning_rate": 8.293763603991279e-06, - "loss": 0.1856, - "step": 11963 - }, - { - "epoch": 1.1270577706601352, - "grad_norm": 0.6436136960983276, - "learning_rate": 8.292275747842086e-06, - "loss": 0.1912, - "step": 11964 - }, - { - "epoch": 1.127151974753303, - "grad_norm": 0.6300147771835327, - "learning_rate": 8.290787930631648e-06, - "loss": 0.1908, - "step": 11965 - }, - { - "epoch": 1.127246178846471, - "grad_norm": 0.5731162428855896, - "learning_rate": 8.289300152393884e-06, - "loss": 0.1888, - "step": 11966 - }, - { - "epoch": 1.1273403829396387, - "grad_norm": 0.6125643253326416, - "learning_rate": 8.287812413162727e-06, - "loss": 0.1915, - "step": 11967 - }, - { - "epoch": 1.1274345870328066, - "grad_norm": 0.6469168066978455, - "learning_rate": 8.286324712972095e-06, - "loss": 0.2075, - "step": 11968 - }, - { - "epoch": 1.1275287911259744, - "grad_norm": 0.6300848126411438, - "learning_rate": 8.28483705185591e-06, - "loss": 0.2087, - "step": 11969 - }, - { - "epoch": 1.1276229952191423, - "grad_norm": 0.6567912697792053, - "learning_rate": 8.283349429848087e-06, - "loss": 0.1985, - "step": 11970 - }, - { - "epoch": 1.12771719931231, - "grad_norm": 0.6428489089012146, - "learning_rate": 8.281861846982558e-06, - "loss": 0.2212, - "step": 11971 - }, - { - "epoch": 1.127811403405478, - "grad_norm": 0.7107322812080383, - "learning_rate": 8.280374303293235e-06, - "loss": 0.2077, - "step": 11972 - }, - { - "epoch": 1.1279056074986458, - "grad_norm": 0.680479884147644, - "learning_rate": 8.278886798814034e-06, - "loss": 0.2132, - "step": 11973 - }, - { - "epoch": 1.1279998115918137, - "grad_norm": 0.610516369342804, - "learning_rate": 8.27739933357888e-06, - "loss": 0.1945, - "step": 11974 - }, - { - "epoch": 1.1280940156849815, - "grad_norm": 0.6916232705116272, - "learning_rate": 8.275911907621683e-06, - "loss": 0.2075, - "step": 11975 - }, - { - "epoch": 1.1281882197781494, - "grad_norm": 0.6947224140167236, - "learning_rate": 8.274424520976358e-06, - "loss": 0.2099, - "step": 11976 - }, - { - "epoch": 1.1282824238713172, - "grad_norm": 0.6378065347671509, - "learning_rate": 8.272937173676828e-06, - "loss": 0.2184, - "step": 11977 - }, - { - "epoch": 1.1283766279644851, - "grad_norm": 0.7788832187652588, - "learning_rate": 8.271449865757e-06, - "loss": 0.2184, - "step": 11978 - }, - { - "epoch": 1.1284708320576529, - "grad_norm": 0.6401863694190979, - "learning_rate": 8.269962597250786e-06, - "loss": 0.2462, - "step": 11979 - }, - { - "epoch": 1.1285650361508208, - "grad_norm": 0.6124123930931091, - "learning_rate": 8.2684753681921e-06, - "loss": 0.2183, - "step": 11980 - }, - { - "epoch": 1.1286592402439886, - "grad_norm": 0.7102654576301575, - "learning_rate": 8.266988178614857e-06, - "loss": 0.2293, - "step": 11981 - }, - { - "epoch": 1.1287534443371565, - "grad_norm": 0.6006560921669006, - "learning_rate": 8.26550102855296e-06, - "loss": 0.1955, - "step": 11982 - }, - { - "epoch": 1.1288476484303243, - "grad_norm": 0.6482653617858887, - "learning_rate": 8.264013918040324e-06, - "loss": 0.2279, - "step": 11983 - }, - { - "epoch": 1.1289418525234922, - "grad_norm": 0.6651721000671387, - "learning_rate": 8.262526847110856e-06, - "loss": 0.178, - "step": 11984 - }, - { - "epoch": 1.12903605661666, - "grad_norm": 0.6731470823287964, - "learning_rate": 8.261039815798461e-06, - "loss": 0.2186, - "step": 11985 - }, - { - "epoch": 1.129130260709828, - "grad_norm": 0.7015734314918518, - "learning_rate": 8.259552824137049e-06, - "loss": 0.2227, - "step": 11986 - }, - { - "epoch": 1.1292244648029957, - "grad_norm": 0.6767539978027344, - "learning_rate": 8.258065872160523e-06, - "loss": 0.191, - "step": 11987 - }, - { - "epoch": 1.1293186688961636, - "grad_norm": 0.6386114358901978, - "learning_rate": 8.256578959902794e-06, - "loss": 0.2152, - "step": 11988 - }, - { - "epoch": 1.1294128729893314, - "grad_norm": 0.6761164665222168, - "learning_rate": 8.255092087397757e-06, - "loss": 0.1971, - "step": 11989 - }, - { - "epoch": 1.1295070770824993, - "grad_norm": 0.6379042863845825, - "learning_rate": 8.253605254679316e-06, - "loss": 0.2519, - "step": 11990 - }, - { - "epoch": 1.129601281175667, - "grad_norm": 0.7005086541175842, - "learning_rate": 8.252118461781381e-06, - "loss": 0.2181, - "step": 11991 - }, - { - "epoch": 1.129695485268835, - "grad_norm": 0.7167004942893982, - "learning_rate": 8.250631708737848e-06, - "loss": 0.2465, - "step": 11992 - }, - { - "epoch": 1.1297896893620027, - "grad_norm": 0.6855339407920837, - "learning_rate": 8.249144995582613e-06, - "loss": 0.2097, - "step": 11993 - }, - { - "epoch": 1.1298838934551707, - "grad_norm": 0.6948375105857849, - "learning_rate": 8.247658322349583e-06, - "loss": 0.2115, - "step": 11994 - }, - { - "epoch": 1.1299780975483384, - "grad_norm": 0.6016854643821716, - "learning_rate": 8.246171689072654e-06, - "loss": 0.1975, - "step": 11995 - }, - { - "epoch": 1.1300723016415064, - "grad_norm": 0.701055109500885, - "learning_rate": 8.24468509578572e-06, - "loss": 0.2097, - "step": 11996 - }, - { - "epoch": 1.1301665057346741, - "grad_norm": 0.5774961709976196, - "learning_rate": 8.243198542522682e-06, - "loss": 0.1831, - "step": 11997 - }, - { - "epoch": 1.130260709827842, - "grad_norm": 0.6355651617050171, - "learning_rate": 8.241712029317435e-06, - "loss": 0.1829, - "step": 11998 - }, - { - "epoch": 1.1303549139210098, - "grad_norm": 0.6253888607025146, - "learning_rate": 8.240225556203874e-06, - "loss": 0.2275, - "step": 11999 - }, - { - "epoch": 1.1304491180141778, - "grad_norm": 0.6373803019523621, - "learning_rate": 8.238739123215887e-06, - "loss": 0.1924, - "step": 12000 - }, - { - "epoch": 1.1305433221073455, - "grad_norm": 0.6929498314857483, - "learning_rate": 8.237252730387376e-06, - "loss": 0.2081, - "step": 12001 - }, - { - "epoch": 1.1306375262005135, - "grad_norm": 0.7598393559455872, - "learning_rate": 8.235766377752229e-06, - "loss": 0.2178, - "step": 12002 - }, - { - "epoch": 1.1307317302936812, - "grad_norm": 0.649556577205658, - "learning_rate": 8.234280065344335e-06, - "loss": 0.1958, - "step": 12003 - }, - { - "epoch": 1.1308259343868492, - "grad_norm": 0.6266392469406128, - "learning_rate": 8.232793793197586e-06, - "loss": 0.1946, - "step": 12004 - }, - { - "epoch": 1.130920138480017, - "grad_norm": 0.7205418348312378, - "learning_rate": 8.231307561345874e-06, - "loss": 0.2089, - "step": 12005 - }, - { - "epoch": 1.131014342573185, - "grad_norm": 0.6992611289024353, - "learning_rate": 8.229821369823082e-06, - "loss": 0.221, - "step": 12006 - }, - { - "epoch": 1.1311085466663526, - "grad_norm": 0.6830902099609375, - "learning_rate": 8.228335218663103e-06, - "loss": 0.2184, - "step": 12007 - }, - { - "epoch": 1.1312027507595206, - "grad_norm": 1.000243067741394, - "learning_rate": 8.226849107899822e-06, - "loss": 0.1867, - "step": 12008 - }, - { - "epoch": 1.1312969548526883, - "grad_norm": 0.5924058556556702, - "learning_rate": 8.225363037567122e-06, - "loss": 0.2159, - "step": 12009 - }, - { - "epoch": 1.1313911589458563, - "grad_norm": 0.6228819489479065, - "learning_rate": 8.223877007698885e-06, - "loss": 0.1992, - "step": 12010 - }, - { - "epoch": 1.131485363039024, - "grad_norm": 0.6410822868347168, - "learning_rate": 8.222391018329007e-06, - "loss": 0.2064, - "step": 12011 - }, - { - "epoch": 1.131579567132192, - "grad_norm": 0.6461016535758972, - "learning_rate": 8.220905069491359e-06, - "loss": 0.208, - "step": 12012 - }, - { - "epoch": 1.1316737712253597, - "grad_norm": 0.7207659482955933, - "learning_rate": 8.219419161219825e-06, - "loss": 0.1996, - "step": 12013 - }, - { - "epoch": 1.1317679753185277, - "grad_norm": 0.6678380966186523, - "learning_rate": 8.21793329354829e-06, - "loss": 0.211, - "step": 12014 - }, - { - "epoch": 1.1318621794116954, - "grad_norm": 0.6664488911628723, - "learning_rate": 8.216447466510633e-06, - "loss": 0.1997, - "step": 12015 - }, - { - "epoch": 1.1319563835048632, - "grad_norm": 0.649957001209259, - "learning_rate": 8.214961680140728e-06, - "loss": 0.2047, - "step": 12016 - }, - { - "epoch": 1.1320505875980311, - "grad_norm": 0.6333931088447571, - "learning_rate": 8.21347593447246e-06, - "loss": 0.2266, - "step": 12017 - }, - { - "epoch": 1.132144791691199, - "grad_norm": 0.6505257487297058, - "learning_rate": 8.211990229539704e-06, - "loss": 0.2159, - "step": 12018 - }, - { - "epoch": 1.1322389957843668, - "grad_norm": 0.5816699862480164, - "learning_rate": 8.210504565376337e-06, - "loss": 0.1764, - "step": 12019 - }, - { - "epoch": 1.1323331998775346, - "grad_norm": 0.8375851511955261, - "learning_rate": 8.20901894201623e-06, - "loss": 0.2656, - "step": 12020 - }, - { - "epoch": 1.1324274039707025, - "grad_norm": 0.7156752943992615, - "learning_rate": 8.207533359493262e-06, - "loss": 0.2226, - "step": 12021 - }, - { - "epoch": 1.1325216080638705, - "grad_norm": 0.7169298529624939, - "learning_rate": 8.206047817841308e-06, - "loss": 0.2352, - "step": 12022 - }, - { - "epoch": 1.1326158121570382, - "grad_norm": 0.6565631628036499, - "learning_rate": 8.204562317094235e-06, - "loss": 0.2273, - "step": 12023 - }, - { - "epoch": 1.132710016250206, - "grad_norm": 0.6851234436035156, - "learning_rate": 8.203076857285919e-06, - "loss": 0.2034, - "step": 12024 - }, - { - "epoch": 1.132804220343374, - "grad_norm": 0.5998619198799133, - "learning_rate": 8.20159143845023e-06, - "loss": 0.1939, - "step": 12025 - }, - { - "epoch": 1.1328984244365419, - "grad_norm": 0.6665204763412476, - "learning_rate": 8.200106060621036e-06, - "loss": 0.1994, - "step": 12026 - }, - { - "epoch": 1.1329926285297096, - "grad_norm": 0.6673848628997803, - "learning_rate": 8.19862072383221e-06, - "loss": 0.2355, - "step": 12027 - }, - { - "epoch": 1.1330868326228773, - "grad_norm": 0.622778594493866, - "learning_rate": 8.197135428117618e-06, - "loss": 0.1933, - "step": 12028 - }, - { - "epoch": 1.1331810367160453, - "grad_norm": 0.6481777429580688, - "learning_rate": 8.195650173511127e-06, - "loss": 0.1957, - "step": 12029 - }, - { - "epoch": 1.1332752408092133, - "grad_norm": 0.6352075338363647, - "learning_rate": 8.194164960046595e-06, - "loss": 0.1932, - "step": 12030 - }, - { - "epoch": 1.133369444902381, - "grad_norm": 0.6170874834060669, - "learning_rate": 8.192679787757903e-06, - "loss": 0.208, - "step": 12031 - }, - { - "epoch": 1.1334636489955487, - "grad_norm": 0.6173102855682373, - "learning_rate": 8.191194656678905e-06, - "loss": 0.1949, - "step": 12032 - }, - { - "epoch": 1.1335578530887167, - "grad_norm": 0.7190761566162109, - "learning_rate": 8.189709566843463e-06, - "loss": 0.2199, - "step": 12033 - }, - { - "epoch": 1.1336520571818847, - "grad_norm": 0.6681669354438782, - "learning_rate": 8.188224518285445e-06, - "loss": 0.1875, - "step": 12034 - }, - { - "epoch": 1.1337462612750524, - "grad_norm": 0.6125658750534058, - "learning_rate": 8.18673951103871e-06, - "loss": 0.194, - "step": 12035 - }, - { - "epoch": 1.1338404653682201, - "grad_norm": 0.5775545239448547, - "learning_rate": 8.185254545137115e-06, - "loss": 0.1859, - "step": 12036 - }, - { - "epoch": 1.133934669461388, - "grad_norm": 0.5781084299087524, - "learning_rate": 8.183769620614528e-06, - "loss": 0.1816, - "step": 12037 - }, - { - "epoch": 1.134028873554556, - "grad_norm": 0.6491709351539612, - "learning_rate": 8.1822847375048e-06, - "loss": 0.1779, - "step": 12038 - }, - { - "epoch": 1.1341230776477238, - "grad_norm": 0.6196013689041138, - "learning_rate": 8.180799895841793e-06, - "loss": 0.1854, - "step": 12039 - }, - { - "epoch": 1.1342172817408915, - "grad_norm": 0.6702143549919128, - "learning_rate": 8.179315095659358e-06, - "loss": 0.2014, - "step": 12040 - }, - { - "epoch": 1.1343114858340595, - "grad_norm": 0.639610230922699, - "learning_rate": 8.177830336991357e-06, - "loss": 0.1889, - "step": 12041 - }, - { - "epoch": 1.1344056899272275, - "grad_norm": 0.59451824426651, - "learning_rate": 8.176345619871643e-06, - "loss": 0.2034, - "step": 12042 - }, - { - "epoch": 1.1344998940203952, - "grad_norm": 0.6340506076812744, - "learning_rate": 8.174860944334067e-06, - "loss": 0.1984, - "step": 12043 - }, - { - "epoch": 1.134594098113563, - "grad_norm": 0.5687589049339294, - "learning_rate": 8.173376310412486e-06, - "loss": 0.1574, - "step": 12044 - }, - { - "epoch": 1.134688302206731, - "grad_norm": 0.5838512182235718, - "learning_rate": 8.171891718140753e-06, - "loss": 0.1831, - "step": 12045 - }, - { - "epoch": 1.1347825062998989, - "grad_norm": 0.6016603708267212, - "learning_rate": 8.170407167552707e-06, - "loss": 0.211, - "step": 12046 - }, - { - "epoch": 1.1348767103930666, - "grad_norm": 0.6274499893188477, - "learning_rate": 8.168922658682214e-06, - "loss": 0.2022, - "step": 12047 - }, - { - "epoch": 1.1349709144862343, - "grad_norm": 0.6957648396492004, - "learning_rate": 8.167438191563118e-06, - "loss": 0.2305, - "step": 12048 - }, - { - "epoch": 1.1350651185794023, - "grad_norm": 0.742519736289978, - "learning_rate": 8.165953766229263e-06, - "loss": 0.2084, - "step": 12049 - }, - { - "epoch": 1.1351593226725702, - "grad_norm": 0.630852222442627, - "learning_rate": 8.164469382714493e-06, - "loss": 0.1887, - "step": 12050 - }, - { - "epoch": 1.135253526765738, - "grad_norm": 0.612669825553894, - "learning_rate": 8.162985041052668e-06, - "loss": 0.2063, - "step": 12051 - }, - { - "epoch": 1.1353477308589057, - "grad_norm": 0.6570709943771362, - "learning_rate": 8.16150074127762e-06, - "loss": 0.2442, - "step": 12052 - }, - { - "epoch": 1.1354419349520737, - "grad_norm": 0.6268219947814941, - "learning_rate": 8.1600164834232e-06, - "loss": 0.208, - "step": 12053 - }, - { - "epoch": 1.1355361390452414, - "grad_norm": 0.6095151901245117, - "learning_rate": 8.15853226752325e-06, - "loss": 0.2111, - "step": 12054 - }, - { - "epoch": 1.1356303431384094, - "grad_norm": 0.6499700546264648, - "learning_rate": 8.157048093611613e-06, - "loss": 0.1994, - "step": 12055 - }, - { - "epoch": 1.1357245472315771, - "grad_norm": 0.7538737654685974, - "learning_rate": 8.155563961722127e-06, - "loss": 0.1941, - "step": 12056 - }, - { - "epoch": 1.135818751324745, - "grad_norm": 0.7377210259437561, - "learning_rate": 8.154079871888637e-06, - "loss": 0.2218, - "step": 12057 - }, - { - "epoch": 1.1359129554179128, - "grad_norm": 0.7277575135231018, - "learning_rate": 8.15259582414498e-06, - "loss": 0.2024, - "step": 12058 - }, - { - "epoch": 1.1360071595110808, - "grad_norm": 0.5854521989822388, - "learning_rate": 8.151111818524997e-06, - "loss": 0.1743, - "step": 12059 - }, - { - "epoch": 1.1361013636042485, - "grad_norm": 0.625900149345398, - "learning_rate": 8.149627855062521e-06, - "loss": 0.223, - "step": 12060 - }, - { - "epoch": 1.1361955676974165, - "grad_norm": 0.6748207807540894, - "learning_rate": 8.148143933791393e-06, - "loss": 0.2024, - "step": 12061 - }, - { - "epoch": 1.1362897717905842, - "grad_norm": 0.6645220518112183, - "learning_rate": 8.146660054745449e-06, - "loss": 0.2194, - "step": 12062 - }, - { - "epoch": 1.1363839758837522, - "grad_norm": 0.6353769302368164, - "learning_rate": 8.145176217958519e-06, - "loss": 0.2102, - "step": 12063 - }, - { - "epoch": 1.13647817997692, - "grad_norm": 0.6586669683456421, - "learning_rate": 8.143692423464442e-06, - "loss": 0.2161, - "step": 12064 - }, - { - "epoch": 1.1365723840700879, - "grad_norm": 0.6031149625778198, - "learning_rate": 8.14220867129705e-06, - "loss": 0.1881, - "step": 12065 - }, - { - "epoch": 1.1366665881632556, - "grad_norm": 0.7279676795005798, - "learning_rate": 8.140724961490167e-06, - "loss": 0.2362, - "step": 12066 - }, - { - "epoch": 1.1367607922564236, - "grad_norm": 0.6587242484092712, - "learning_rate": 8.139241294077636e-06, - "loss": 0.2243, - "step": 12067 - }, - { - "epoch": 1.1368549963495913, - "grad_norm": 0.6596072912216187, - "learning_rate": 8.137757669093283e-06, - "loss": 0.2028, - "step": 12068 - }, - { - "epoch": 1.1369492004427593, - "grad_norm": 0.6711840629577637, - "learning_rate": 8.13627408657093e-06, - "loss": 0.2048, - "step": 12069 - }, - { - "epoch": 1.137043404535927, - "grad_norm": 0.6759728789329529, - "learning_rate": 8.134790546544409e-06, - "loss": 0.2269, - "step": 12070 - }, - { - "epoch": 1.137137608629095, - "grad_norm": 0.7026951313018799, - "learning_rate": 8.133307049047554e-06, - "loss": 0.2362, - "step": 12071 - }, - { - "epoch": 1.1372318127222627, - "grad_norm": 0.7006614804267883, - "learning_rate": 8.131823594114183e-06, - "loss": 0.2183, - "step": 12072 - }, - { - "epoch": 1.1373260168154307, - "grad_norm": 0.6411715149879456, - "learning_rate": 8.13034018177812e-06, - "loss": 0.2158, - "step": 12073 - }, - { - "epoch": 1.1374202209085984, - "grad_norm": 0.6297891139984131, - "learning_rate": 8.128856812073196e-06, - "loss": 0.1788, - "step": 12074 - }, - { - "epoch": 1.1375144250017664, - "grad_norm": 0.5406700968742371, - "learning_rate": 8.127373485033231e-06, - "loss": 0.2036, - "step": 12075 - }, - { - "epoch": 1.137608629094934, - "grad_norm": 0.669718861579895, - "learning_rate": 8.125890200692043e-06, - "loss": 0.196, - "step": 12076 - }, - { - "epoch": 1.137702833188102, - "grad_norm": 0.6460605263710022, - "learning_rate": 8.124406959083459e-06, - "loss": 0.184, - "step": 12077 - }, - { - "epoch": 1.1377970372812698, - "grad_norm": 0.6431288719177246, - "learning_rate": 8.122923760241297e-06, - "loss": 0.1834, - "step": 12078 - }, - { - "epoch": 1.1378912413744378, - "grad_norm": 0.6451385617256165, - "learning_rate": 8.121440604199378e-06, - "loss": 0.1941, - "step": 12079 - }, - { - "epoch": 1.1379854454676055, - "grad_norm": 0.5862488746643066, - "learning_rate": 8.119957490991514e-06, - "loss": 0.2062, - "step": 12080 - }, - { - "epoch": 1.1380796495607735, - "grad_norm": 0.7165570259094238, - "learning_rate": 8.11847442065153e-06, - "loss": 0.2116, - "step": 12081 - }, - { - "epoch": 1.1381738536539412, - "grad_norm": 0.5758606791496277, - "learning_rate": 8.116991393213239e-06, - "loss": 0.194, - "step": 12082 - }, - { - "epoch": 1.1382680577471092, - "grad_norm": 0.6506445407867432, - "learning_rate": 8.115508408710454e-06, - "loss": 0.1872, - "step": 12083 - }, - { - "epoch": 1.138362261840277, - "grad_norm": 0.5742412805557251, - "learning_rate": 8.114025467176994e-06, - "loss": 0.1747, - "step": 12084 - }, - { - "epoch": 1.1384564659334448, - "grad_norm": 0.6786984801292419, - "learning_rate": 8.112542568646672e-06, - "loss": 0.2083, - "step": 12085 - }, - { - "epoch": 1.1385506700266126, - "grad_norm": 0.6596458554267883, - "learning_rate": 8.11105971315329e-06, - "loss": 0.1861, - "step": 12086 - }, - { - "epoch": 1.1386448741197805, - "grad_norm": 0.7559734582901001, - "learning_rate": 8.109576900730676e-06, - "loss": 0.2352, - "step": 12087 - }, - { - "epoch": 1.1387390782129483, - "grad_norm": 0.6362413167953491, - "learning_rate": 8.108094131412633e-06, - "loss": 0.198, - "step": 12088 - }, - { - "epoch": 1.1388332823061162, - "grad_norm": 0.6498036980628967, - "learning_rate": 8.106611405232967e-06, - "loss": 0.2126, - "step": 12089 - }, - { - "epoch": 1.138927486399284, - "grad_norm": 0.6648655533790588, - "learning_rate": 8.105128722225486e-06, - "loss": 0.2004, - "step": 12090 - }, - { - "epoch": 1.139021690492452, - "grad_norm": 0.657875120639801, - "learning_rate": 8.103646082424004e-06, - "loss": 0.2038, - "step": 12091 - }, - { - "epoch": 1.1391158945856197, - "grad_norm": 0.744015097618103, - "learning_rate": 8.102163485862324e-06, - "loss": 0.2375, - "step": 12092 - }, - { - "epoch": 1.1392100986787876, - "grad_norm": 0.6036882996559143, - "learning_rate": 8.10068093257425e-06, - "loss": 0.2114, - "step": 12093 - }, - { - "epoch": 1.1393043027719554, - "grad_norm": 0.6374387145042419, - "learning_rate": 8.099198422593589e-06, - "loss": 0.2025, - "step": 12094 - }, - { - "epoch": 1.1393985068651233, - "grad_norm": 0.6632814407348633, - "learning_rate": 8.097715955954145e-06, - "loss": 0.2065, - "step": 12095 - }, - { - "epoch": 1.139492710958291, - "grad_norm": 0.6246944665908813, - "learning_rate": 8.096233532689718e-06, - "loss": 0.2078, - "step": 12096 - }, - { - "epoch": 1.139586915051459, - "grad_norm": 0.6394679546356201, - "learning_rate": 8.094751152834109e-06, - "loss": 0.1938, - "step": 12097 - }, - { - "epoch": 1.1396811191446268, - "grad_norm": 0.5818282961845398, - "learning_rate": 8.093268816421122e-06, - "loss": 0.1783, - "step": 12098 - }, - { - "epoch": 1.1397753232377947, - "grad_norm": 0.649113118648529, - "learning_rate": 8.091786523484557e-06, - "loss": 0.2074, - "step": 12099 - }, - { - "epoch": 1.1398695273309625, - "grad_norm": 0.6660482287406921, - "learning_rate": 8.090304274058205e-06, - "loss": 0.2113, - "step": 12100 - }, - { - "epoch": 1.1399637314241304, - "grad_norm": 0.7327325344085693, - "learning_rate": 8.088822068175872e-06, - "loss": 0.2187, - "step": 12101 - }, - { - "epoch": 1.1400579355172982, - "grad_norm": 0.699038028717041, - "learning_rate": 8.087339905871354e-06, - "loss": 0.2182, - "step": 12102 - }, - { - "epoch": 1.1401521396104661, - "grad_norm": 0.6511684060096741, - "learning_rate": 8.085857787178439e-06, - "loss": 0.1888, - "step": 12103 - }, - { - "epoch": 1.1402463437036339, - "grad_norm": 0.6134809851646423, - "learning_rate": 8.08437571213093e-06, - "loss": 0.2038, - "step": 12104 - }, - { - "epoch": 1.1403405477968018, - "grad_norm": 0.6674476265907288, - "learning_rate": 8.082893680762619e-06, - "loss": 0.2282, - "step": 12105 - }, - { - "epoch": 1.1404347518899696, - "grad_norm": 0.5779377222061157, - "learning_rate": 8.081411693107291e-06, - "loss": 0.2, - "step": 12106 - }, - { - "epoch": 1.1405289559831375, - "grad_norm": 0.6250529885292053, - "learning_rate": 8.079929749198748e-06, - "loss": 0.2253, - "step": 12107 - }, - { - "epoch": 1.1406231600763053, - "grad_norm": 0.6807493567466736, - "learning_rate": 8.078447849070777e-06, - "loss": 0.2181, - "step": 12108 - }, - { - "epoch": 1.1407173641694732, - "grad_norm": 0.715938150882721, - "learning_rate": 8.076965992757166e-06, - "loss": 0.221, - "step": 12109 - }, - { - "epoch": 1.140811568262641, - "grad_norm": 0.6325286030769348, - "learning_rate": 8.075484180291702e-06, - "loss": 0.1948, - "step": 12110 - }, - { - "epoch": 1.140905772355809, - "grad_norm": 0.6969076991081238, - "learning_rate": 8.074002411708177e-06, - "loss": 0.2131, - "step": 12111 - }, - { - "epoch": 1.1409999764489767, - "grad_norm": 0.7182613611221313, - "learning_rate": 8.072520687040376e-06, - "loss": 0.1993, - "step": 12112 - }, - { - "epoch": 1.1410941805421446, - "grad_norm": 0.6815456748008728, - "learning_rate": 8.07103900632208e-06, - "loss": 0.1952, - "step": 12113 - }, - { - "epoch": 1.1411883846353124, - "grad_norm": 0.6510999798774719, - "learning_rate": 8.069557369587084e-06, - "loss": 0.1902, - "step": 12114 - }, - { - "epoch": 1.1412825887284803, - "grad_norm": 0.6855291128158569, - "learning_rate": 8.068075776869163e-06, - "loss": 0.2093, - "step": 12115 - }, - { - "epoch": 1.141376792821648, - "grad_norm": 0.6667503118515015, - "learning_rate": 8.066594228202101e-06, - "loss": 0.2271, - "step": 12116 - }, - { - "epoch": 1.141470996914816, - "grad_norm": 0.6633936762809753, - "learning_rate": 8.06511272361968e-06, - "loss": 0.1893, - "step": 12117 - }, - { - "epoch": 1.1415652010079838, - "grad_norm": 0.6098134517669678, - "learning_rate": 8.063631263155684e-06, - "loss": 0.1943, - "step": 12118 - }, - { - "epoch": 1.1416594051011517, - "grad_norm": 0.6373329758644104, - "learning_rate": 8.06214984684389e-06, - "loss": 0.2103, - "step": 12119 - }, - { - "epoch": 1.1417536091943195, - "grad_norm": 0.6690663695335388, - "learning_rate": 8.060668474718072e-06, - "loss": 0.2412, - "step": 12120 - }, - { - "epoch": 1.1418478132874874, - "grad_norm": 0.6494943499565125, - "learning_rate": 8.059187146812015e-06, - "loss": 0.1868, - "step": 12121 - }, - { - "epoch": 1.1419420173806551, - "grad_norm": 0.5967381596565247, - "learning_rate": 8.057705863159493e-06, - "loss": 0.199, - "step": 12122 - }, - { - "epoch": 1.142036221473823, - "grad_norm": 0.7099337577819824, - "learning_rate": 8.05622462379428e-06, - "loss": 0.2196, - "step": 12123 - }, - { - "epoch": 1.1421304255669908, - "grad_norm": 0.6998152732849121, - "learning_rate": 8.054743428750153e-06, - "loss": 0.2266, - "step": 12124 - }, - { - "epoch": 1.1422246296601588, - "grad_norm": 0.6493479609489441, - "learning_rate": 8.053262278060887e-06, - "loss": 0.1901, - "step": 12125 - }, - { - "epoch": 1.1423188337533265, - "grad_norm": 0.6034460067749023, - "learning_rate": 8.051781171760244e-06, - "loss": 0.1894, - "step": 12126 - }, - { - "epoch": 1.1424130378464945, - "grad_norm": 0.6795820593833923, - "learning_rate": 8.050300109882008e-06, - "loss": 0.2199, - "step": 12127 - }, - { - "epoch": 1.1425072419396622, - "grad_norm": 0.6849047541618347, - "learning_rate": 8.048819092459947e-06, - "loss": 0.2142, - "step": 12128 - }, - { - "epoch": 1.1426014460328302, - "grad_norm": 0.6387856006622314, - "learning_rate": 8.047338119527827e-06, - "loss": 0.1775, - "step": 12129 - }, - { - "epoch": 1.142695650125998, - "grad_norm": 0.6189826726913452, - "learning_rate": 8.045857191119414e-06, - "loss": 0.2052, - "step": 12130 - }, - { - "epoch": 1.142789854219166, - "grad_norm": 0.6556556224822998, - "learning_rate": 8.044376307268482e-06, - "loss": 0.2027, - "step": 12131 - }, - { - "epoch": 1.1428840583123336, - "grad_norm": 0.6616007685661316, - "learning_rate": 8.042895468008794e-06, - "loss": 0.2151, - "step": 12132 - }, - { - "epoch": 1.1429782624055016, - "grad_norm": 0.6787793040275574, - "learning_rate": 8.041414673374116e-06, - "loss": 0.2132, - "step": 12133 - }, - { - "epoch": 1.1430724664986693, - "grad_norm": 0.6502068042755127, - "learning_rate": 8.039933923398213e-06, - "loss": 0.1681, - "step": 12134 - }, - { - "epoch": 1.1431666705918373, - "grad_norm": 0.6827070713043213, - "learning_rate": 8.038453218114847e-06, - "loss": 0.2444, - "step": 12135 - }, - { - "epoch": 1.143260874685005, - "grad_norm": 0.7281052470207214, - "learning_rate": 8.03697255755778e-06, - "loss": 0.2186, - "step": 12136 - }, - { - "epoch": 1.143355078778173, - "grad_norm": 0.5791061520576477, - "learning_rate": 8.035491941760779e-06, - "loss": 0.2034, - "step": 12137 - }, - { - "epoch": 1.1434492828713407, - "grad_norm": 0.5991633534431458, - "learning_rate": 8.034011370757596e-06, - "loss": 0.2068, - "step": 12138 - }, - { - "epoch": 1.1435434869645087, - "grad_norm": 0.7107877731323242, - "learning_rate": 8.032530844581997e-06, - "loss": 0.2051, - "step": 12139 - }, - { - "epoch": 1.1436376910576764, - "grad_norm": 0.5817581415176392, - "learning_rate": 8.031050363267733e-06, - "loss": 0.2096, - "step": 12140 - }, - { - "epoch": 1.1437318951508444, - "grad_norm": 0.6221569776535034, - "learning_rate": 8.029569926848571e-06, - "loss": 0.2036, - "step": 12141 - }, - { - "epoch": 1.1438260992440121, - "grad_norm": 0.6726136207580566, - "learning_rate": 8.02808953535826e-06, - "loss": 0.2004, - "step": 12142 - }, - { - "epoch": 1.14392030333718, - "grad_norm": 0.6434982419013977, - "learning_rate": 8.026609188830554e-06, - "loss": 0.2298, - "step": 12143 - }, - { - "epoch": 1.1440145074303478, - "grad_norm": 0.6025883555412292, - "learning_rate": 8.025128887299213e-06, - "loss": 0.201, - "step": 12144 - }, - { - "epoch": 1.1441087115235158, - "grad_norm": 0.6930113434791565, - "learning_rate": 8.02364863079799e-06, - "loss": 0.2143, - "step": 12145 - }, - { - "epoch": 1.1442029156166835, - "grad_norm": 0.6260280013084412, - "learning_rate": 8.02216841936063e-06, - "loss": 0.1886, - "step": 12146 - }, - { - "epoch": 1.1442971197098515, - "grad_norm": 0.6329609155654907, - "learning_rate": 8.020688253020891e-06, - "loss": 0.2103, - "step": 12147 - }, - { - "epoch": 1.1443913238030192, - "grad_norm": 0.6741276979446411, - "learning_rate": 8.019208131812524e-06, - "loss": 0.2093, - "step": 12148 - }, - { - "epoch": 1.1444855278961872, - "grad_norm": 0.6147240400314331, - "learning_rate": 8.017728055769274e-06, - "loss": 0.1918, - "step": 12149 - }, - { - "epoch": 1.144579731989355, - "grad_norm": 0.7587670683860779, - "learning_rate": 8.016248024924886e-06, - "loss": 0.2144, - "step": 12150 - }, - { - "epoch": 1.1446739360825229, - "grad_norm": 0.666145920753479, - "learning_rate": 8.014768039313114e-06, - "loss": 0.1831, - "step": 12151 - }, - { - "epoch": 1.1447681401756906, - "grad_norm": 0.6701474189758301, - "learning_rate": 8.013288098967701e-06, - "loss": 0.2252, - "step": 12152 - }, - { - "epoch": 1.1448623442688586, - "grad_norm": 0.6293619871139526, - "learning_rate": 8.01180820392239e-06, - "loss": 0.1958, - "step": 12153 - }, - { - "epoch": 1.1449565483620263, - "grad_norm": 0.7255494594573975, - "learning_rate": 8.010328354210928e-06, - "loss": 0.2127, - "step": 12154 - }, - { - "epoch": 1.145050752455194, - "grad_norm": 0.6386851072311401, - "learning_rate": 8.008848549867057e-06, - "loss": 0.2206, - "step": 12155 - }, - { - "epoch": 1.145144956548362, - "grad_norm": 0.6834396123886108, - "learning_rate": 8.007368790924514e-06, - "loss": 0.1989, - "step": 12156 - }, - { - "epoch": 1.14523916064153, - "grad_norm": 0.614005446434021, - "learning_rate": 8.00588907741705e-06, - "loss": 0.2109, - "step": 12157 - }, - { - "epoch": 1.1453333647346977, - "grad_norm": 0.6057419776916504, - "learning_rate": 8.004409409378398e-06, - "loss": 0.2006, - "step": 12158 - }, - { - "epoch": 1.1454275688278654, - "grad_norm": 0.63899165391922, - "learning_rate": 8.002929786842297e-06, - "loss": 0.1942, - "step": 12159 - }, - { - "epoch": 1.1455217729210334, - "grad_norm": 0.713495135307312, - "learning_rate": 8.001450209842483e-06, - "loss": 0.2145, - "step": 12160 - }, - { - "epoch": 1.1456159770142014, - "grad_norm": 0.7448211312294006, - "learning_rate": 7.999970678412697e-06, - "loss": 0.2467, - "step": 12161 - }, - { - "epoch": 1.145710181107369, - "grad_norm": 0.6611918210983276, - "learning_rate": 7.998491192586676e-06, - "loss": 0.211, - "step": 12162 - }, - { - "epoch": 1.1458043852005368, - "grad_norm": 0.691112220287323, - "learning_rate": 7.997011752398144e-06, - "loss": 0.2324, - "step": 12163 - }, - { - "epoch": 1.1458985892937048, - "grad_norm": 0.7479667663574219, - "learning_rate": 7.995532357880847e-06, - "loss": 0.238, - "step": 12164 - }, - { - "epoch": 1.1459927933868728, - "grad_norm": 0.6619879603385925, - "learning_rate": 7.994053009068513e-06, - "loss": 0.1808, - "step": 12165 - }, - { - "epoch": 1.1460869974800405, - "grad_norm": 0.687891960144043, - "learning_rate": 7.992573705994867e-06, - "loss": 0.2338, - "step": 12166 - }, - { - "epoch": 1.1461812015732082, - "grad_norm": 0.5902931690216064, - "learning_rate": 7.991094448693648e-06, - "loss": 0.1774, - "step": 12167 - }, - { - "epoch": 1.1462754056663762, - "grad_norm": 0.6471607089042664, - "learning_rate": 7.989615237198585e-06, - "loss": 0.2051, - "step": 12168 - }, - { - "epoch": 1.1463696097595442, - "grad_norm": 0.6792701482772827, - "learning_rate": 7.988136071543404e-06, - "loss": 0.236, - "step": 12169 - }, - { - "epoch": 1.146463813852712, - "grad_norm": 0.6185738444328308, - "learning_rate": 7.986656951761826e-06, - "loss": 0.2208, - "step": 12170 - }, - { - "epoch": 1.1465580179458796, - "grad_norm": 0.648489236831665, - "learning_rate": 7.98517787788759e-06, - "loss": 0.2062, - "step": 12171 - }, - { - "epoch": 1.1466522220390476, - "grad_norm": 0.6400814652442932, - "learning_rate": 7.98369884995441e-06, - "loss": 0.2254, - "step": 12172 - }, - { - "epoch": 1.1467464261322156, - "grad_norm": 0.629786491394043, - "learning_rate": 7.982219867996013e-06, - "loss": 0.2239, - "step": 12173 - }, - { - "epoch": 1.1468406302253833, - "grad_norm": 0.8736804127693176, - "learning_rate": 7.980740932046126e-06, - "loss": 0.169, - "step": 12174 - }, - { - "epoch": 1.146934834318551, - "grad_norm": 0.762713611125946, - "learning_rate": 7.979262042138472e-06, - "loss": 0.2463, - "step": 12175 - }, - { - "epoch": 1.147029038411719, - "grad_norm": 0.6414589881896973, - "learning_rate": 7.977783198306763e-06, - "loss": 0.2084, - "step": 12176 - }, - { - "epoch": 1.147123242504887, - "grad_norm": 0.6481419801712036, - "learning_rate": 7.976304400584726e-06, - "loss": 0.1933, - "step": 12177 - }, - { - "epoch": 1.1472174465980547, - "grad_norm": 0.8233293890953064, - "learning_rate": 7.974825649006082e-06, - "loss": 0.1863, - "step": 12178 - }, - { - "epoch": 1.1473116506912224, - "grad_norm": 0.7363768219947815, - "learning_rate": 7.973346943604542e-06, - "loss": 0.2164, - "step": 12179 - }, - { - "epoch": 1.1474058547843904, - "grad_norm": 0.6874502897262573, - "learning_rate": 7.971868284413824e-06, - "loss": 0.2156, - "step": 12180 - }, - { - "epoch": 1.1475000588775583, - "grad_norm": 0.6711171269416809, - "learning_rate": 7.970389671467648e-06, - "loss": 0.2253, - "step": 12181 - }, - { - "epoch": 1.147594262970726, - "grad_norm": 0.6555377244949341, - "learning_rate": 7.96891110479973e-06, - "loss": 0.2056, - "step": 12182 - }, - { - "epoch": 1.1476884670638938, - "grad_norm": 0.6161142587661743, - "learning_rate": 7.967432584443772e-06, - "loss": 0.2113, - "step": 12183 - }, - { - "epoch": 1.1477826711570618, - "grad_norm": 0.631897509098053, - "learning_rate": 7.9659541104335e-06, - "loss": 0.1881, - "step": 12184 - }, - { - "epoch": 1.1478768752502297, - "grad_norm": 0.590204656124115, - "learning_rate": 7.964475682802623e-06, - "loss": 0.1989, - "step": 12185 - }, - { - "epoch": 1.1479710793433975, - "grad_norm": 0.6091127991676331, - "learning_rate": 7.962997301584839e-06, - "loss": 0.1945, - "step": 12186 - }, - { - "epoch": 1.1480652834365652, - "grad_norm": 0.6342639327049255, - "learning_rate": 7.961518966813876e-06, - "loss": 0.2219, - "step": 12187 - }, - { - "epoch": 1.1481594875297332, - "grad_norm": 0.6492775678634644, - "learning_rate": 7.96004067852343e-06, - "loss": 0.2176, - "step": 12188 - }, - { - "epoch": 1.148253691622901, - "grad_norm": 0.5854193568229675, - "learning_rate": 7.958562436747211e-06, - "loss": 0.1888, - "step": 12189 - }, - { - "epoch": 1.1483478957160689, - "grad_norm": 0.7202615737915039, - "learning_rate": 7.957084241518922e-06, - "loss": 0.1923, - "step": 12190 - }, - { - "epoch": 1.1484420998092366, - "grad_norm": 0.6243315935134888, - "learning_rate": 7.955606092872277e-06, - "loss": 0.1824, - "step": 12191 - }, - { - "epoch": 1.1485363039024046, - "grad_norm": 0.6550903916358948, - "learning_rate": 7.954127990840972e-06, - "loss": 0.212, - "step": 12192 - }, - { - "epoch": 1.1486305079955723, - "grad_norm": 0.6427294611930847, - "learning_rate": 7.952649935458713e-06, - "loss": 0.2087, - "step": 12193 - }, - { - "epoch": 1.1487247120887403, - "grad_norm": 0.6429688930511475, - "learning_rate": 7.951171926759202e-06, - "loss": 0.2163, - "step": 12194 - }, - { - "epoch": 1.148818916181908, - "grad_norm": 0.6400426030158997, - "learning_rate": 7.949693964776141e-06, - "loss": 0.1919, - "step": 12195 - }, - { - "epoch": 1.148913120275076, - "grad_norm": 0.6039251685142517, - "learning_rate": 7.948216049543226e-06, - "loss": 0.203, - "step": 12196 - }, - { - "epoch": 1.1490073243682437, - "grad_norm": 0.632346510887146, - "learning_rate": 7.94673818109416e-06, - "loss": 0.1949, - "step": 12197 - }, - { - "epoch": 1.1491015284614117, - "grad_norm": 0.6787921786308289, - "learning_rate": 7.945260359462638e-06, - "loss": 0.2552, - "step": 12198 - }, - { - "epoch": 1.1491957325545794, - "grad_norm": 0.7587026357650757, - "learning_rate": 7.94378258468236e-06, - "loss": 0.2462, - "step": 12199 - }, - { - "epoch": 1.1492899366477474, - "grad_norm": 0.6545671820640564, - "learning_rate": 7.942304856787016e-06, - "loss": 0.2238, - "step": 12200 - }, - { - "epoch": 1.149384140740915, - "grad_norm": 0.685653567314148, - "learning_rate": 7.940827175810305e-06, - "loss": 0.1981, - "step": 12201 - }, - { - "epoch": 1.149478344834083, - "grad_norm": 0.6183797121047974, - "learning_rate": 7.939349541785922e-06, - "loss": 0.2105, - "step": 12202 - }, - { - "epoch": 1.1495725489272508, - "grad_norm": 0.7067667245864868, - "learning_rate": 7.937871954747548e-06, - "loss": 0.2069, - "step": 12203 - }, - { - "epoch": 1.1496667530204188, - "grad_norm": 0.6052165627479553, - "learning_rate": 7.936394414728889e-06, - "loss": 0.1944, - "step": 12204 - }, - { - "epoch": 1.1497609571135865, - "grad_norm": 0.710982620716095, - "learning_rate": 7.93491692176363e-06, - "loss": 0.196, - "step": 12205 - }, - { - "epoch": 1.1498551612067545, - "grad_norm": 0.6403450965881348, - "learning_rate": 7.933439475885452e-06, - "loss": 0.2085, - "step": 12206 - }, - { - "epoch": 1.1499493652999222, - "grad_norm": 0.6260942220687866, - "learning_rate": 7.931962077128058e-06, - "loss": 0.2026, - "step": 12207 - }, - { - "epoch": 1.1500435693930902, - "grad_norm": 0.6076931953430176, - "learning_rate": 7.930484725525123e-06, - "loss": 0.208, - "step": 12208 - }, - { - "epoch": 1.150137773486258, - "grad_norm": 0.640314519405365, - "learning_rate": 7.929007421110337e-06, - "loss": 0.207, - "step": 12209 - }, - { - "epoch": 1.1502319775794259, - "grad_norm": 0.7645244002342224, - "learning_rate": 7.927530163917383e-06, - "loss": 0.2023, - "step": 12210 - }, - { - "epoch": 1.1503261816725936, - "grad_norm": 0.632294774055481, - "learning_rate": 7.926052953979948e-06, - "loss": 0.1963, - "step": 12211 - }, - { - "epoch": 1.1504203857657616, - "grad_norm": 0.6345540285110474, - "learning_rate": 7.924575791331714e-06, - "loss": 0.1985, - "step": 12212 - }, - { - "epoch": 1.1505145898589293, - "grad_norm": 0.7057055234909058, - "learning_rate": 7.923098676006358e-06, - "loss": 0.2406, - "step": 12213 - }, - { - "epoch": 1.1506087939520973, - "grad_norm": 0.6412834525108337, - "learning_rate": 7.921621608037568e-06, - "loss": 0.2069, - "step": 12214 - }, - { - "epoch": 1.150702998045265, - "grad_norm": 0.5932299494743347, - "learning_rate": 7.920144587459019e-06, - "loss": 0.1827, - "step": 12215 - }, - { - "epoch": 1.150797202138433, - "grad_norm": 0.6430896520614624, - "learning_rate": 7.918667614304388e-06, - "loss": 0.2161, - "step": 12216 - }, - { - "epoch": 1.1508914062316007, - "grad_norm": 0.7487487196922302, - "learning_rate": 7.917190688607356e-06, - "loss": 0.224, - "step": 12217 - }, - { - "epoch": 1.1509856103247686, - "grad_norm": 0.6349984407424927, - "learning_rate": 7.915713810401598e-06, - "loss": 0.2256, - "step": 12218 - }, - { - "epoch": 1.1510798144179364, - "grad_norm": 0.7175769805908203, - "learning_rate": 7.914236979720787e-06, - "loss": 0.2441, - "step": 12219 - }, - { - "epoch": 1.1511740185111043, - "grad_norm": 0.6573382616043091, - "learning_rate": 7.912760196598599e-06, - "loss": 0.2042, - "step": 12220 - }, - { - "epoch": 1.151268222604272, - "grad_norm": 0.6913100481033325, - "learning_rate": 7.911283461068705e-06, - "loss": 0.1968, - "step": 12221 - }, - { - "epoch": 1.15136242669744, - "grad_norm": 0.6502411961555481, - "learning_rate": 7.909806773164784e-06, - "loss": 0.2123, - "step": 12222 - }, - { - "epoch": 1.1514566307906078, - "grad_norm": 0.6149377226829529, - "learning_rate": 7.908330132920495e-06, - "loss": 0.1897, - "step": 12223 - }, - { - "epoch": 1.1515508348837757, - "grad_norm": 0.6500223278999329, - "learning_rate": 7.906853540369514e-06, - "loss": 0.2148, - "step": 12224 - }, - { - "epoch": 1.1516450389769435, - "grad_norm": 0.6620779633522034, - "learning_rate": 7.905376995545516e-06, - "loss": 0.2227, - "step": 12225 - }, - { - "epoch": 1.1517392430701114, - "grad_norm": 0.6362714767456055, - "learning_rate": 7.903900498482153e-06, - "loss": 0.2076, - "step": 12226 - }, - { - "epoch": 1.1518334471632792, - "grad_norm": 0.657136082649231, - "learning_rate": 7.902424049213107e-06, - "loss": 0.2269, - "step": 12227 - }, - { - "epoch": 1.1519276512564471, - "grad_norm": 0.705045223236084, - "learning_rate": 7.900947647772036e-06, - "loss": 0.2058, - "step": 12228 - }, - { - "epoch": 1.1520218553496149, - "grad_norm": 0.6532936692237854, - "learning_rate": 7.899471294192602e-06, - "loss": 0.1965, - "step": 12229 - }, - { - "epoch": 1.1521160594427828, - "grad_norm": 0.5644189119338989, - "learning_rate": 7.897994988508471e-06, - "loss": 0.1666, - "step": 12230 - }, - { - "epoch": 1.1522102635359506, - "grad_norm": 0.6563928127288818, - "learning_rate": 7.896518730753307e-06, - "loss": 0.1903, - "step": 12231 - }, - { - "epoch": 1.1523044676291185, - "grad_norm": 0.6150180697441101, - "learning_rate": 7.895042520960768e-06, - "loss": 0.2156, - "step": 12232 - }, - { - "epoch": 1.1523986717222863, - "grad_norm": 0.7883591651916504, - "learning_rate": 7.893566359164513e-06, - "loss": 0.2516, - "step": 12233 - }, - { - "epoch": 1.1524928758154542, - "grad_norm": 0.5889561772346497, - "learning_rate": 7.892090245398206e-06, - "loss": 0.1712, - "step": 12234 - }, - { - "epoch": 1.152587079908622, - "grad_norm": 0.6123316287994385, - "learning_rate": 7.8906141796955e-06, - "loss": 0.2013, - "step": 12235 - }, - { - "epoch": 1.15268128400179, - "grad_norm": 0.6226065158843994, - "learning_rate": 7.889138162090052e-06, - "loss": 0.2058, - "step": 12236 - }, - { - "epoch": 1.1527754880949577, - "grad_norm": 0.6096005439758301, - "learning_rate": 7.88766219261552e-06, - "loss": 0.2002, - "step": 12237 - }, - { - "epoch": 1.1528696921881256, - "grad_norm": 0.571349561214447, - "learning_rate": 7.886186271305557e-06, - "loss": 0.1857, - "step": 12238 - }, - { - "epoch": 1.1529638962812934, - "grad_norm": 0.7446098327636719, - "learning_rate": 7.884710398193815e-06, - "loss": 0.2326, - "step": 12239 - }, - { - "epoch": 1.1530581003744613, - "grad_norm": 0.6592982411384583, - "learning_rate": 7.883234573313948e-06, - "loss": 0.2201, - "step": 12240 - }, - { - "epoch": 1.153152304467629, - "grad_norm": 0.6531952619552612, - "learning_rate": 7.881758796699605e-06, - "loss": 0.1954, - "step": 12241 - }, - { - "epoch": 1.153246508560797, - "grad_norm": 0.5881925821304321, - "learning_rate": 7.880283068384441e-06, - "loss": 0.1895, - "step": 12242 - }, - { - "epoch": 1.1533407126539648, - "grad_norm": 0.6539220213890076, - "learning_rate": 7.878807388402095e-06, - "loss": 0.2017, - "step": 12243 - }, - { - "epoch": 1.1534349167471327, - "grad_norm": 0.7027574777603149, - "learning_rate": 7.877331756786225e-06, - "loss": 0.1787, - "step": 12244 - }, - { - "epoch": 1.1535291208403005, - "grad_norm": 0.6493908166885376, - "learning_rate": 7.875856173570476e-06, - "loss": 0.2037, - "step": 12245 - }, - { - "epoch": 1.1536233249334684, - "grad_norm": 0.5787128806114197, - "learning_rate": 7.874380638788485e-06, - "loss": 0.1722, - "step": 12246 - }, - { - "epoch": 1.1537175290266362, - "grad_norm": 0.6447485685348511, - "learning_rate": 7.87290515247391e-06, - "loss": 0.2114, - "step": 12247 - }, - { - "epoch": 1.1538117331198041, - "grad_norm": 0.6590207815170288, - "learning_rate": 7.871429714660383e-06, - "loss": 0.2085, - "step": 12248 - }, - { - "epoch": 1.1539059372129719, - "grad_norm": 0.7718537449836731, - "learning_rate": 7.869954325381552e-06, - "loss": 0.1774, - "step": 12249 - }, - { - "epoch": 1.1540001413061398, - "grad_norm": 0.6921826004981995, - "learning_rate": 7.868478984671054e-06, - "loss": 0.2331, - "step": 12250 - }, - { - "epoch": 1.1540943453993076, - "grad_norm": 0.6519464254379272, - "learning_rate": 7.867003692562533e-06, - "loss": 0.2406, - "step": 12251 - }, - { - "epoch": 1.1541885494924755, - "grad_norm": 0.6791825890541077, - "learning_rate": 7.865528449089628e-06, - "loss": 0.2313, - "step": 12252 - }, - { - "epoch": 1.1542827535856433, - "grad_norm": 0.7045342922210693, - "learning_rate": 7.864053254285973e-06, - "loss": 0.2058, - "step": 12253 - }, - { - "epoch": 1.1543769576788112, - "grad_norm": 0.6821885704994202, - "learning_rate": 7.862578108185208e-06, - "loss": 0.2188, - "step": 12254 - }, - { - "epoch": 1.154471161771979, - "grad_norm": 0.7017951011657715, - "learning_rate": 7.861103010820968e-06, - "loss": 0.224, - "step": 12255 - }, - { - "epoch": 1.154565365865147, - "grad_norm": 0.6462750434875488, - "learning_rate": 7.859627962226886e-06, - "loss": 0.2187, - "step": 12256 - }, - { - "epoch": 1.1546595699583146, - "grad_norm": 0.6207707524299622, - "learning_rate": 7.858152962436598e-06, - "loss": 0.2381, - "step": 12257 - }, - { - "epoch": 1.1547537740514826, - "grad_norm": 0.6876768469810486, - "learning_rate": 7.856678011483734e-06, - "loss": 0.204, - "step": 12258 - }, - { - "epoch": 1.1548479781446503, - "grad_norm": 0.6457028985023499, - "learning_rate": 7.85520310940193e-06, - "loss": 0.2276, - "step": 12259 - }, - { - "epoch": 1.1549421822378183, - "grad_norm": 0.6530483365058899, - "learning_rate": 7.853728256224806e-06, - "loss": 0.2135, - "step": 12260 - }, - { - "epoch": 1.155036386330986, - "grad_norm": 0.6017844080924988, - "learning_rate": 7.852253451986e-06, - "loss": 0.186, - "step": 12261 - }, - { - "epoch": 1.155130590424154, - "grad_norm": 0.658333420753479, - "learning_rate": 7.850778696719139e-06, - "loss": 0.2166, - "step": 12262 - }, - { - "epoch": 1.1552247945173217, - "grad_norm": 0.6860843300819397, - "learning_rate": 7.849303990457842e-06, - "loss": 0.2184, - "step": 12263 - }, - { - "epoch": 1.1553189986104897, - "grad_norm": 0.5907440185546875, - "learning_rate": 7.847829333235744e-06, - "loss": 0.1875, - "step": 12264 - }, - { - "epoch": 1.1554132027036574, - "grad_norm": 0.6708579659461975, - "learning_rate": 7.846354725086467e-06, - "loss": 0.2166, - "step": 12265 - }, - { - "epoch": 1.1555074067968254, - "grad_norm": 0.7163122892379761, - "learning_rate": 7.844880166043627e-06, - "loss": 0.2338, - "step": 12266 - }, - { - "epoch": 1.1556016108899931, - "grad_norm": 0.6653092503547668, - "learning_rate": 7.84340565614086e-06, - "loss": 0.2095, - "step": 12267 - }, - { - "epoch": 1.155695814983161, - "grad_norm": 0.7395054697990417, - "learning_rate": 7.841931195411775e-06, - "loss": 0.1974, - "step": 12268 - }, - { - "epoch": 1.1557900190763288, - "grad_norm": 0.6676589250564575, - "learning_rate": 7.840456783889997e-06, - "loss": 0.2337, - "step": 12269 - }, - { - "epoch": 1.1558842231694968, - "grad_norm": 0.5892910957336426, - "learning_rate": 7.838982421609143e-06, - "loss": 0.2136, - "step": 12270 - }, - { - "epoch": 1.1559784272626645, - "grad_norm": 0.6500517129898071, - "learning_rate": 7.837508108602833e-06, - "loss": 0.2204, - "step": 12271 - }, - { - "epoch": 1.1560726313558325, - "grad_norm": 0.5860692858695984, - "learning_rate": 7.836033844904683e-06, - "loss": 0.1869, - "step": 12272 - }, - { - "epoch": 1.1561668354490002, - "grad_norm": 0.5967850685119629, - "learning_rate": 7.834559630548305e-06, - "loss": 0.1981, - "step": 12273 - }, - { - "epoch": 1.1562610395421682, - "grad_norm": 0.6711938381195068, - "learning_rate": 7.833085465567318e-06, - "loss": 0.2193, - "step": 12274 - }, - { - "epoch": 1.156355243635336, - "grad_norm": 0.6680728793144226, - "learning_rate": 7.831611349995335e-06, - "loss": 0.2319, - "step": 12275 - }, - { - "epoch": 1.1564494477285039, - "grad_norm": 0.5889575481414795, - "learning_rate": 7.830137283865965e-06, - "loss": 0.1822, - "step": 12276 - }, - { - "epoch": 1.1565436518216716, - "grad_norm": 0.6993216276168823, - "learning_rate": 7.82866326721282e-06, - "loss": 0.2094, - "step": 12277 - }, - { - "epoch": 1.1566378559148396, - "grad_norm": 0.6535523533821106, - "learning_rate": 7.827189300069513e-06, - "loss": 0.2183, - "step": 12278 - }, - { - "epoch": 1.1567320600080073, - "grad_norm": 0.6322247385978699, - "learning_rate": 7.825715382469651e-06, - "loss": 0.2107, - "step": 12279 - }, - { - "epoch": 1.1568262641011753, - "grad_norm": 0.6894719004631042, - "learning_rate": 7.824241514446835e-06, - "loss": 0.2256, - "step": 12280 - }, - { - "epoch": 1.156920468194343, - "grad_norm": 0.6726341247558594, - "learning_rate": 7.822767696034683e-06, - "loss": 0.2103, - "step": 12281 - }, - { - "epoch": 1.157014672287511, - "grad_norm": 0.629125714302063, - "learning_rate": 7.821293927266795e-06, - "loss": 0.1987, - "step": 12282 - }, - { - "epoch": 1.1571088763806787, - "grad_norm": 0.5876413583755493, - "learning_rate": 7.819820208176769e-06, - "loss": 0.1881, - "step": 12283 - }, - { - "epoch": 1.1572030804738467, - "grad_norm": 0.8241168260574341, - "learning_rate": 7.818346538798219e-06, - "loss": 0.2325, - "step": 12284 - }, - { - "epoch": 1.1572972845670144, - "grad_norm": 0.6371445059776306, - "learning_rate": 7.816872919164745e-06, - "loss": 0.2025, - "step": 12285 - }, - { - "epoch": 1.1573914886601824, - "grad_norm": 0.723773181438446, - "learning_rate": 7.815399349309935e-06, - "loss": 0.1986, - "step": 12286 - }, - { - "epoch": 1.1574856927533501, - "grad_norm": 0.599209725856781, - "learning_rate": 7.813925829267407e-06, - "loss": 0.1793, - "step": 12287 - }, - { - "epoch": 1.157579896846518, - "grad_norm": 0.608439028263092, - "learning_rate": 7.812452359070748e-06, - "loss": 0.1624, - "step": 12288 - }, - { - "epoch": 1.1576741009396858, - "grad_norm": 0.58980393409729, - "learning_rate": 7.81097893875356e-06, - "loss": 0.1982, - "step": 12289 - }, - { - "epoch": 1.1577683050328538, - "grad_norm": 0.6739835739135742, - "learning_rate": 7.809505568349434e-06, - "loss": 0.2403, - "step": 12290 - }, - { - "epoch": 1.1578625091260215, - "grad_norm": 0.6799923777580261, - "learning_rate": 7.80803224789197e-06, - "loss": 0.2229, - "step": 12291 - }, - { - "epoch": 1.1579567132191895, - "grad_norm": 0.745564341545105, - "learning_rate": 7.806558977414763e-06, - "loss": 0.1875, - "step": 12292 - }, - { - "epoch": 1.1580509173123572, - "grad_norm": 0.6821733117103577, - "learning_rate": 7.8050857569514e-06, - "loss": 0.2278, - "step": 12293 - }, - { - "epoch": 1.158145121405525, - "grad_norm": 0.7225754857063293, - "learning_rate": 7.803612586535478e-06, - "loss": 0.2225, - "step": 12294 - }, - { - "epoch": 1.158239325498693, - "grad_norm": 0.6468100547790527, - "learning_rate": 7.802139466200586e-06, - "loss": 0.2064, - "step": 12295 - }, - { - "epoch": 1.1583335295918609, - "grad_norm": 0.6715960502624512, - "learning_rate": 7.80066639598031e-06, - "loss": 0.2039, - "step": 12296 - }, - { - "epoch": 1.1584277336850286, - "grad_norm": 0.5877363681793213, - "learning_rate": 7.799193375908245e-06, - "loss": 0.1999, - "step": 12297 - }, - { - "epoch": 1.1585219377781963, - "grad_norm": 0.6958502531051636, - "learning_rate": 7.797720406017975e-06, - "loss": 0.212, - "step": 12298 - }, - { - "epoch": 1.1586161418713643, - "grad_norm": 0.6634026169776917, - "learning_rate": 7.796247486343088e-06, - "loss": 0.2296, - "step": 12299 - }, - { - "epoch": 1.1587103459645323, - "grad_norm": 0.660721480846405, - "learning_rate": 7.79477461691716e-06, - "loss": 0.1861, - "step": 12300 - }, - { - "epoch": 1.1588045500577, - "grad_norm": 0.6169300079345703, - "learning_rate": 7.793301797773785e-06, - "loss": 0.1884, - "step": 12301 - }, - { - "epoch": 1.1588987541508677, - "grad_norm": 0.616231381893158, - "learning_rate": 7.791829028946544e-06, - "loss": 0.1909, - "step": 12302 - }, - { - "epoch": 1.1589929582440357, - "grad_norm": 0.6795166730880737, - "learning_rate": 7.790356310469009e-06, - "loss": 0.2078, - "step": 12303 - }, - { - "epoch": 1.1590871623372037, - "grad_norm": 0.6517577171325684, - "learning_rate": 7.788883642374774e-06, - "loss": 0.2032, - "step": 12304 - }, - { - "epoch": 1.1591813664303714, - "grad_norm": 0.6272116899490356, - "learning_rate": 7.78741102469741e-06, - "loss": 0.1733, - "step": 12305 - }, - { - "epoch": 1.1592755705235391, - "grad_norm": 0.6892327070236206, - "learning_rate": 7.785938457470492e-06, - "loss": 0.2244, - "step": 12306 - }, - { - "epoch": 1.159369774616707, - "grad_norm": 0.6756359338760376, - "learning_rate": 7.784465940727608e-06, - "loss": 0.2017, - "step": 12307 - }, - { - "epoch": 1.159463978709875, - "grad_norm": 0.6559037566184998, - "learning_rate": 7.782993474502323e-06, - "loss": 0.2332, - "step": 12308 - }, - { - "epoch": 1.1595581828030428, - "grad_norm": 0.6312324404716492, - "learning_rate": 7.781521058828218e-06, - "loss": 0.2001, - "step": 12309 - }, - { - "epoch": 1.1596523868962105, - "grad_norm": 0.6661595702171326, - "learning_rate": 7.78004869373886e-06, - "loss": 0.2121, - "step": 12310 - }, - { - "epoch": 1.1597465909893785, - "grad_norm": 0.752112865447998, - "learning_rate": 7.778576379267828e-06, - "loss": 0.2355, - "step": 12311 - }, - { - "epoch": 1.1598407950825464, - "grad_norm": 0.674813985824585, - "learning_rate": 7.77710411544869e-06, - "loss": 0.2039, - "step": 12312 - }, - { - "epoch": 1.1599349991757142, - "grad_norm": 0.653131365776062, - "learning_rate": 7.775631902315012e-06, - "loss": 0.2443, - "step": 12313 - }, - { - "epoch": 1.160029203268882, - "grad_norm": 0.6176143288612366, - "learning_rate": 7.774159739900371e-06, - "loss": 0.21, - "step": 12314 - }, - { - "epoch": 1.1601234073620499, - "grad_norm": 0.6649085879325867, - "learning_rate": 7.77268762823833e-06, - "loss": 0.1977, - "step": 12315 - }, - { - "epoch": 1.1602176114552178, - "grad_norm": 0.6967582106590271, - "learning_rate": 7.771215567362454e-06, - "loss": 0.1981, - "step": 12316 - }, - { - "epoch": 1.1603118155483856, - "grad_norm": 0.6099845170974731, - "learning_rate": 7.76974355730631e-06, - "loss": 0.192, - "step": 12317 - }, - { - "epoch": 1.1604060196415533, - "grad_norm": 0.7052638530731201, - "learning_rate": 7.768271598103465e-06, - "loss": 0.2115, - "step": 12318 - }, - { - "epoch": 1.1605002237347213, - "grad_norm": 0.6722414493560791, - "learning_rate": 7.766799689787478e-06, - "loss": 0.24, - "step": 12319 - }, - { - "epoch": 1.1605944278278892, - "grad_norm": 0.674592912197113, - "learning_rate": 7.765327832391907e-06, - "loss": 0.2174, - "step": 12320 - }, - { - "epoch": 1.160688631921057, - "grad_norm": 0.6404471397399902, - "learning_rate": 7.763856025950321e-06, - "loss": 0.2006, - "step": 12321 - }, - { - "epoch": 1.1607828360142247, - "grad_norm": 0.6440375447273254, - "learning_rate": 7.762384270496279e-06, - "loss": 0.2116, - "step": 12322 - }, - { - "epoch": 1.1608770401073927, - "grad_norm": 0.6806245446205139, - "learning_rate": 7.760912566063328e-06, - "loss": 0.216, - "step": 12323 - }, - { - "epoch": 1.1609712442005606, - "grad_norm": 0.6037777066230774, - "learning_rate": 7.759440912685043e-06, - "loss": 0.2031, - "step": 12324 - }, - { - "epoch": 1.1610654482937284, - "grad_norm": 0.6117852330207825, - "learning_rate": 7.757969310394965e-06, - "loss": 0.195, - "step": 12325 - }, - { - "epoch": 1.1611596523868961, - "grad_norm": 0.6487354040145874, - "learning_rate": 7.756497759226652e-06, - "loss": 0.1964, - "step": 12326 - }, - { - "epoch": 1.161253856480064, - "grad_norm": 0.5659785866737366, - "learning_rate": 7.755026259213665e-06, - "loss": 0.1934, - "step": 12327 - }, - { - "epoch": 1.1613480605732318, - "grad_norm": 0.5937870740890503, - "learning_rate": 7.753554810389549e-06, - "loss": 0.1827, - "step": 12328 - }, - { - "epoch": 1.1614422646663998, - "grad_norm": 0.6416780352592468, - "learning_rate": 7.752083412787858e-06, - "loss": 0.2114, - "step": 12329 - }, - { - "epoch": 1.1615364687595675, - "grad_norm": 0.6302727460861206, - "learning_rate": 7.750612066442138e-06, - "loss": 0.2041, - "step": 12330 - }, - { - "epoch": 1.1616306728527355, - "grad_norm": 0.6919407248497009, - "learning_rate": 7.749140771385945e-06, - "loss": 0.2045, - "step": 12331 - }, - { - "epoch": 1.1617248769459032, - "grad_norm": 0.6700026988983154, - "learning_rate": 7.747669527652824e-06, - "loss": 0.2063, - "step": 12332 - }, - { - "epoch": 1.1618190810390712, - "grad_norm": 0.5566019415855408, - "learning_rate": 7.746198335276318e-06, - "loss": 0.2084, - "step": 12333 - }, - { - "epoch": 1.161913285132239, - "grad_norm": 0.7124121189117432, - "learning_rate": 7.744727194289977e-06, - "loss": 0.2255, - "step": 12334 - }, - { - "epoch": 1.1620074892254069, - "grad_norm": 0.6468402147293091, - "learning_rate": 7.743256104727346e-06, - "loss": 0.2163, - "step": 12335 - }, - { - "epoch": 1.1621016933185746, - "grad_norm": 0.7208757400512695, - "learning_rate": 7.741785066621962e-06, - "loss": 0.199, - "step": 12336 - }, - { - "epoch": 1.1621958974117426, - "grad_norm": 0.6091558933258057, - "learning_rate": 7.740314080007374e-06, - "loss": 0.1943, - "step": 12337 - }, - { - "epoch": 1.1622901015049103, - "grad_norm": 0.646746039390564, - "learning_rate": 7.738843144917119e-06, - "loss": 0.2223, - "step": 12338 - }, - { - "epoch": 1.1623843055980783, - "grad_norm": 0.661420464515686, - "learning_rate": 7.737372261384738e-06, - "loss": 0.2055, - "step": 12339 - }, - { - "epoch": 1.162478509691246, - "grad_norm": 0.6283174753189087, - "learning_rate": 7.735901429443766e-06, - "loss": 0.1886, - "step": 12340 - }, - { - "epoch": 1.162572713784414, - "grad_norm": 0.6365818977355957, - "learning_rate": 7.734430649127745e-06, - "loss": 0.1838, - "step": 12341 - }, - { - "epoch": 1.1626669178775817, - "grad_norm": 0.6031051874160767, - "learning_rate": 7.73295992047021e-06, - "loss": 0.2094, - "step": 12342 - }, - { - "epoch": 1.1627611219707497, - "grad_norm": 0.6952539086341858, - "learning_rate": 7.73148924350469e-06, - "loss": 0.2276, - "step": 12343 - }, - { - "epoch": 1.1628553260639174, - "grad_norm": 0.6067687273025513, - "learning_rate": 7.73001861826473e-06, - "loss": 0.192, - "step": 12344 - }, - { - "epoch": 1.1629495301570854, - "grad_norm": 0.6541218161582947, - "learning_rate": 7.728548044783854e-06, - "loss": 0.2019, - "step": 12345 - }, - { - "epoch": 1.163043734250253, - "grad_norm": 0.7503097057342529, - "learning_rate": 7.72707752309559e-06, - "loss": 0.1968, - "step": 12346 - }, - { - "epoch": 1.163137938343421, - "grad_norm": 0.7333747148513794, - "learning_rate": 7.725607053233482e-06, - "loss": 0.2061, - "step": 12347 - }, - { - "epoch": 1.1632321424365888, - "grad_norm": 0.6139345765113831, - "learning_rate": 7.724136635231047e-06, - "loss": 0.2112, - "step": 12348 - }, - { - "epoch": 1.1633263465297567, - "grad_norm": 0.6344771981239319, - "learning_rate": 7.722666269121815e-06, - "loss": 0.2271, - "step": 12349 - }, - { - "epoch": 1.1634205506229245, - "grad_norm": 0.6152917146682739, - "learning_rate": 7.721195954939315e-06, - "loss": 0.2004, - "step": 12350 - }, - { - "epoch": 1.1635147547160924, - "grad_norm": 0.5891529321670532, - "learning_rate": 7.719725692717071e-06, - "loss": 0.2099, - "step": 12351 - }, - { - "epoch": 1.1636089588092602, - "grad_norm": 0.6408778429031372, - "learning_rate": 7.718255482488609e-06, - "loss": 0.2316, - "step": 12352 - }, - { - "epoch": 1.1637031629024281, - "grad_norm": 0.6090258359909058, - "learning_rate": 7.716785324287447e-06, - "loss": 0.1987, - "step": 12353 - }, - { - "epoch": 1.1637973669955959, - "grad_norm": 0.6803856492042542, - "learning_rate": 7.715315218147116e-06, - "loss": 0.2157, - "step": 12354 - }, - { - "epoch": 1.1638915710887638, - "grad_norm": 0.7470532655715942, - "learning_rate": 7.71384516410113e-06, - "loss": 0.2351, - "step": 12355 - }, - { - "epoch": 1.1639857751819316, - "grad_norm": 0.6469044089317322, - "learning_rate": 7.712375162183007e-06, - "loss": 0.2039, - "step": 12356 - }, - { - "epoch": 1.1640799792750995, - "grad_norm": 0.6264247894287109, - "learning_rate": 7.710905212426271e-06, - "loss": 0.2036, - "step": 12357 - }, - { - "epoch": 1.1641741833682673, - "grad_norm": 0.6687905788421631, - "learning_rate": 7.709435314864435e-06, - "loss": 0.1995, - "step": 12358 - }, - { - "epoch": 1.1642683874614352, - "grad_norm": 0.5562198162078857, - "learning_rate": 7.70796546953102e-06, - "loss": 0.1949, - "step": 12359 - }, - { - "epoch": 1.164362591554603, - "grad_norm": 0.6398401260375977, - "learning_rate": 7.70649567645953e-06, - "loss": 0.2098, - "step": 12360 - }, - { - "epoch": 1.164456795647771, - "grad_norm": 0.6486343741416931, - "learning_rate": 7.70502593568349e-06, - "loss": 0.2288, - "step": 12361 - }, - { - "epoch": 1.1645509997409387, - "grad_norm": 0.6357438564300537, - "learning_rate": 7.70355624723641e-06, - "loss": 0.2155, - "step": 12362 - }, - { - "epoch": 1.1646452038341066, - "grad_norm": 0.8581362962722778, - "learning_rate": 7.702086611151792e-06, - "loss": 0.1986, - "step": 12363 - }, - { - "epoch": 1.1647394079272744, - "grad_norm": 0.6492730975151062, - "learning_rate": 7.70061702746316e-06, - "loss": 0.228, - "step": 12364 - }, - { - "epoch": 1.1648336120204423, - "grad_norm": 0.6923218369483948, - "learning_rate": 7.699147496204014e-06, - "loss": 0.2222, - "step": 12365 - }, - { - "epoch": 1.16492781611361, - "grad_norm": 0.6648165583610535, - "learning_rate": 7.697678017407858e-06, - "loss": 0.2008, - "step": 12366 - }, - { - "epoch": 1.165022020206778, - "grad_norm": 0.6043445467948914, - "learning_rate": 7.696208591108211e-06, - "loss": 0.1895, - "step": 12367 - }, - { - "epoch": 1.1651162242999458, - "grad_norm": 0.597754955291748, - "learning_rate": 7.694739217338569e-06, - "loss": 0.1972, - "step": 12368 - }, - { - "epoch": 1.1652104283931137, - "grad_norm": 0.6218483448028564, - "learning_rate": 7.693269896132438e-06, - "loss": 0.1866, - "step": 12369 - }, - { - "epoch": 1.1653046324862815, - "grad_norm": 0.6251735091209412, - "learning_rate": 7.691800627523319e-06, - "loss": 0.1971, - "step": 12370 - }, - { - "epoch": 1.1653988365794494, - "grad_norm": 0.5660892724990845, - "learning_rate": 7.690331411544716e-06, - "loss": 0.1685, - "step": 12371 - }, - { - "epoch": 1.1654930406726172, - "grad_norm": 0.646435022354126, - "learning_rate": 7.688862248230132e-06, - "loss": 0.2005, - "step": 12372 - }, - { - "epoch": 1.1655872447657851, - "grad_norm": 0.6149870753288269, - "learning_rate": 7.687393137613056e-06, - "loss": 0.1722, - "step": 12373 - }, - { - "epoch": 1.1656814488589529, - "grad_norm": 0.7423371076583862, - "learning_rate": 7.685924079727e-06, - "loss": 0.2273, - "step": 12374 - }, - { - "epoch": 1.1657756529521208, - "grad_norm": 0.8053909540176392, - "learning_rate": 7.684455074605452e-06, - "loss": 0.2573, - "step": 12375 - }, - { - "epoch": 1.1658698570452886, - "grad_norm": 0.7121588587760925, - "learning_rate": 7.682986122281906e-06, - "loss": 0.2391, - "step": 12376 - }, - { - "epoch": 1.1659640611384565, - "grad_norm": 0.6963463425636292, - "learning_rate": 7.681517222789863e-06, - "loss": 0.2259, - "step": 12377 - }, - { - "epoch": 1.1660582652316243, - "grad_norm": 0.6686302423477173, - "learning_rate": 7.680048376162813e-06, - "loss": 0.2208, - "step": 12378 - }, - { - "epoch": 1.1661524693247922, - "grad_norm": 0.5913288593292236, - "learning_rate": 7.67857958243425e-06, - "loss": 0.2128, - "step": 12379 - }, - { - "epoch": 1.16624667341796, - "grad_norm": 0.7313753366470337, - "learning_rate": 7.677110841637654e-06, - "loss": 0.2324, - "step": 12380 - }, - { - "epoch": 1.166340877511128, - "grad_norm": 0.7491220831871033, - "learning_rate": 7.675642153806531e-06, - "loss": 0.2179, - "step": 12381 - }, - { - "epoch": 1.1664350816042957, - "grad_norm": 0.6844172477722168, - "learning_rate": 7.674173518974362e-06, - "loss": 0.1929, - "step": 12382 - }, - { - "epoch": 1.1665292856974636, - "grad_norm": 0.5895171165466309, - "learning_rate": 7.672704937174627e-06, - "loss": 0.2041, - "step": 12383 - }, - { - "epoch": 1.1666234897906314, - "grad_norm": 0.6461688876152039, - "learning_rate": 7.671236408440826e-06, - "loss": 0.1769, - "step": 12384 - }, - { - "epoch": 1.1667176938837993, - "grad_norm": 0.6043210625648499, - "learning_rate": 7.669767932806433e-06, - "loss": 0.1939, - "step": 12385 - }, - { - "epoch": 1.166811897976967, - "grad_norm": 0.6969326734542847, - "learning_rate": 7.668299510304931e-06, - "loss": 0.2281, - "step": 12386 - }, - { - "epoch": 1.166906102070135, - "grad_norm": 0.5954148173332214, - "learning_rate": 7.666831140969814e-06, - "loss": 0.201, - "step": 12387 - }, - { - "epoch": 1.1670003061633027, - "grad_norm": 0.6823163032531738, - "learning_rate": 7.665362824834552e-06, - "loss": 0.2005, - "step": 12388 - }, - { - "epoch": 1.1670945102564707, - "grad_norm": 0.6804181337356567, - "learning_rate": 7.663894561932628e-06, - "loss": 0.211, - "step": 12389 - }, - { - "epoch": 1.1671887143496384, - "grad_norm": 0.6363169550895691, - "learning_rate": 7.662426352297519e-06, - "loss": 0.2165, - "step": 12390 - }, - { - "epoch": 1.1672829184428064, - "grad_norm": 0.6093109846115112, - "learning_rate": 7.660958195962707e-06, - "loss": 0.198, - "step": 12391 - }, - { - "epoch": 1.1673771225359741, - "grad_norm": 0.6550191044807434, - "learning_rate": 7.659490092961665e-06, - "loss": 0.183, - "step": 12392 - }, - { - "epoch": 1.167471326629142, - "grad_norm": 0.9614073634147644, - "learning_rate": 7.658022043327867e-06, - "loss": 0.2112, - "step": 12393 - }, - { - "epoch": 1.1675655307223098, - "grad_norm": 0.6851538419723511, - "learning_rate": 7.65655404709479e-06, - "loss": 0.2063, - "step": 12394 - }, - { - "epoch": 1.1676597348154778, - "grad_norm": 0.6280253529548645, - "learning_rate": 7.655086104295904e-06, - "loss": 0.2169, - "step": 12395 - }, - { - "epoch": 1.1677539389086455, - "grad_norm": 0.6450567841529846, - "learning_rate": 7.65361821496468e-06, - "loss": 0.2165, - "step": 12396 - }, - { - "epoch": 1.1678481430018135, - "grad_norm": 0.6430543661117554, - "learning_rate": 7.652150379134593e-06, - "loss": 0.2103, - "step": 12397 - }, - { - "epoch": 1.1679423470949812, - "grad_norm": 0.6612107753753662, - "learning_rate": 7.650682596839107e-06, - "loss": 0.2159, - "step": 12398 - }, - { - "epoch": 1.1680365511881492, - "grad_norm": 0.6067182421684265, - "learning_rate": 7.649214868111692e-06, - "loss": 0.2196, - "step": 12399 - }, - { - "epoch": 1.168130755281317, - "grad_norm": 0.6457757353782654, - "learning_rate": 7.647747192985808e-06, - "loss": 0.209, - "step": 12400 - }, - { - "epoch": 1.168224959374485, - "grad_norm": 0.6386038064956665, - "learning_rate": 7.646279571494931e-06, - "loss": 0.1793, - "step": 12401 - }, - { - "epoch": 1.1683191634676526, - "grad_norm": 0.6418023705482483, - "learning_rate": 7.644812003672521e-06, - "loss": 0.2076, - "step": 12402 - }, - { - "epoch": 1.1684133675608206, - "grad_norm": 0.6170333027839661, - "learning_rate": 7.643344489552033e-06, - "loss": 0.1892, - "step": 12403 - }, - { - "epoch": 1.1685075716539883, - "grad_norm": 0.5894877910614014, - "learning_rate": 7.641877029166943e-06, - "loss": 0.1613, - "step": 12404 - }, - { - "epoch": 1.1686017757471563, - "grad_norm": 0.5999869108200073, - "learning_rate": 7.640409622550702e-06, - "loss": 0.2207, - "step": 12405 - }, - { - "epoch": 1.168695979840324, - "grad_norm": 0.641161322593689, - "learning_rate": 7.638942269736765e-06, - "loss": 0.2084, - "step": 12406 - }, - { - "epoch": 1.168790183933492, - "grad_norm": 0.616152286529541, - "learning_rate": 7.637474970758602e-06, - "loss": 0.2041, - "step": 12407 - }, - { - "epoch": 1.1688843880266597, - "grad_norm": 0.6475275754928589, - "learning_rate": 7.636007725649662e-06, - "loss": 0.2315, - "step": 12408 - }, - { - "epoch": 1.1689785921198277, - "grad_norm": 0.698611855506897, - "learning_rate": 7.634540534443402e-06, - "loss": 0.1858, - "step": 12409 - }, - { - "epoch": 1.1690727962129954, - "grad_norm": 0.6852428913116455, - "learning_rate": 7.633073397173274e-06, - "loss": 0.2193, - "step": 12410 - }, - { - "epoch": 1.1691670003061634, - "grad_norm": 0.6028615832328796, - "learning_rate": 7.631606313872736e-06, - "loss": 0.2073, - "step": 12411 - }, - { - "epoch": 1.1692612043993311, - "grad_norm": 0.5900949835777283, - "learning_rate": 7.630139284575233e-06, - "loss": 0.1696, - "step": 12412 - }, - { - "epoch": 1.169355408492499, - "grad_norm": 0.64311683177948, - "learning_rate": 7.628672309314221e-06, - "loss": 0.2132, - "step": 12413 - }, - { - "epoch": 1.1694496125856668, - "grad_norm": 0.6563279032707214, - "learning_rate": 7.627205388123149e-06, - "loss": 0.2041, - "step": 12414 - }, - { - "epoch": 1.1695438166788348, - "grad_norm": 0.6046981811523438, - "learning_rate": 7.625738521035463e-06, - "loss": 0.1825, - "step": 12415 - }, - { - "epoch": 1.1696380207720025, - "grad_norm": 0.6331227421760559, - "learning_rate": 7.6242717080846096e-06, - "loss": 0.1966, - "step": 12416 - }, - { - "epoch": 1.1697322248651705, - "grad_norm": 0.6417523622512817, - "learning_rate": 7.622804949304037e-06, - "loss": 0.2222, - "step": 12417 - }, - { - "epoch": 1.1698264289583382, - "grad_norm": 0.610335111618042, - "learning_rate": 7.6213382447271875e-06, - "loss": 0.1899, - "step": 12418 - }, - { - "epoch": 1.1699206330515062, - "grad_norm": 0.5904601216316223, - "learning_rate": 7.619871594387507e-06, - "loss": 0.1816, - "step": 12419 - }, - { - "epoch": 1.170014837144674, - "grad_norm": 0.6495140790939331, - "learning_rate": 7.618404998318428e-06, - "loss": 0.2124, - "step": 12420 - }, - { - "epoch": 1.1701090412378419, - "grad_norm": 0.630237877368927, - "learning_rate": 7.616938456553405e-06, - "loss": 0.2125, - "step": 12421 - }, - { - "epoch": 1.1702032453310096, - "grad_norm": 0.6966006755828857, - "learning_rate": 7.6154719691258696e-06, - "loss": 0.1984, - "step": 12422 - }, - { - "epoch": 1.1702974494241776, - "grad_norm": 0.6157569289207458, - "learning_rate": 7.614005536069257e-06, - "loss": 0.1825, - "step": 12423 - }, - { - "epoch": 1.1703916535173453, - "grad_norm": 0.6618136763572693, - "learning_rate": 7.612539157417013e-06, - "loss": 0.2246, - "step": 12424 - }, - { - "epoch": 1.1704858576105133, - "grad_norm": 0.6306236982345581, - "learning_rate": 7.611072833202568e-06, - "loss": 0.2135, - "step": 12425 - }, - { - "epoch": 1.170580061703681, - "grad_norm": 0.6721271276473999, - "learning_rate": 7.609606563459351e-06, - "loss": 0.1914, - "step": 12426 - }, - { - "epoch": 1.170674265796849, - "grad_norm": 0.6963659524917603, - "learning_rate": 7.608140348220808e-06, - "loss": 0.2175, - "step": 12427 - }, - { - "epoch": 1.1707684698900167, - "grad_norm": 0.5983977317810059, - "learning_rate": 7.606674187520362e-06, - "loss": 0.199, - "step": 12428 - }, - { - "epoch": 1.1708626739831847, - "grad_norm": 0.6682076454162598, - "learning_rate": 7.6052080813914466e-06, - "loss": 0.2196, - "step": 12429 - }, - { - "epoch": 1.1709568780763524, - "grad_norm": 0.7540499567985535, - "learning_rate": 7.603742029867488e-06, - "loss": 0.2503, - "step": 12430 - }, - { - "epoch": 1.1710510821695204, - "grad_norm": 0.7160568833351135, - "learning_rate": 7.602276032981919e-06, - "loss": 0.2099, - "step": 12431 - }, - { - "epoch": 1.171145286262688, - "grad_norm": 0.6036234498023987, - "learning_rate": 7.600810090768165e-06, - "loss": 0.2025, - "step": 12432 - }, - { - "epoch": 1.1712394903558558, - "grad_norm": 0.5967129468917847, - "learning_rate": 7.599344203259648e-06, - "loss": 0.1864, - "step": 12433 - }, - { - "epoch": 1.1713336944490238, - "grad_norm": 0.6145238280296326, - "learning_rate": 7.5978783704898e-06, - "loss": 0.198, - "step": 12434 - }, - { - "epoch": 1.1714278985421918, - "grad_norm": 0.6215044260025024, - "learning_rate": 7.5964125924920395e-06, - "loss": 0.1923, - "step": 12435 - }, - { - "epoch": 1.1715221026353595, - "grad_norm": 0.6010145545005798, - "learning_rate": 7.5949468692997865e-06, - "loss": 0.1897, - "step": 12436 - }, - { - "epoch": 1.1716163067285272, - "grad_norm": 0.6129345893859863, - "learning_rate": 7.593481200946467e-06, - "loss": 0.2052, - "step": 12437 - }, - { - "epoch": 1.1717105108216952, - "grad_norm": 0.6434243321418762, - "learning_rate": 7.5920155874654965e-06, - "loss": 0.196, - "step": 12438 - }, - { - "epoch": 1.1718047149148632, - "grad_norm": 0.5903400778770447, - "learning_rate": 7.590550028890298e-06, - "loss": 0.2001, - "step": 12439 - }, - { - "epoch": 1.171898919008031, - "grad_norm": 0.7123304009437561, - "learning_rate": 7.589084525254278e-06, - "loss": 0.2554, - "step": 12440 - }, - { - "epoch": 1.1719931231011986, - "grad_norm": 0.63892662525177, - "learning_rate": 7.587619076590867e-06, - "loss": 0.2232, - "step": 12441 - }, - { - "epoch": 1.1720873271943666, - "grad_norm": 0.7637938857078552, - "learning_rate": 7.586153682933468e-06, - "loss": 0.1986, - "step": 12442 - }, - { - "epoch": 1.1721815312875346, - "grad_norm": 0.6968367695808411, - "learning_rate": 7.584688344315495e-06, - "loss": 0.1966, - "step": 12443 - }, - { - "epoch": 1.1722757353807023, - "grad_norm": 0.6212029457092285, - "learning_rate": 7.5832230607703696e-06, - "loss": 0.2226, - "step": 12444 - }, - { - "epoch": 1.17236993947387, - "grad_norm": 0.6339184045791626, - "learning_rate": 7.5817578323314935e-06, - "loss": 0.2022, - "step": 12445 - }, - { - "epoch": 1.172464143567038, - "grad_norm": 0.6979104280471802, - "learning_rate": 7.580292659032274e-06, - "loss": 0.202, - "step": 12446 - }, - { - "epoch": 1.172558347660206, - "grad_norm": 0.6808230876922607, - "learning_rate": 7.578827540906132e-06, - "loss": 0.2121, - "step": 12447 - }, - { - "epoch": 1.1726525517533737, - "grad_norm": 0.6211710572242737, - "learning_rate": 7.577362477986463e-06, - "loss": 0.22, - "step": 12448 - }, - { - "epoch": 1.1727467558465414, - "grad_norm": 0.5926870703697205, - "learning_rate": 7.575897470306677e-06, - "loss": 0.185, - "step": 12449 - }, - { - "epoch": 1.1728409599397094, - "grad_norm": 0.6486456394195557, - "learning_rate": 7.574432517900174e-06, - "loss": 0.1874, - "step": 12450 - }, - { - "epoch": 1.1729351640328773, - "grad_norm": 0.5857219696044922, - "learning_rate": 7.572967620800364e-06, - "loss": 0.1827, - "step": 12451 - }, - { - "epoch": 1.173029368126045, - "grad_norm": 0.6361328363418579, - "learning_rate": 7.571502779040646e-06, - "loss": 0.1992, - "step": 12452 - }, - { - "epoch": 1.1731235722192128, - "grad_norm": 0.6601576209068298, - "learning_rate": 7.570037992654418e-06, - "loss": 0.2079, - "step": 12453 - }, - { - "epoch": 1.1732177763123808, - "grad_norm": 0.6744426488876343, - "learning_rate": 7.568573261675083e-06, - "loss": 0.2184, - "step": 12454 - }, - { - "epoch": 1.1733119804055487, - "grad_norm": 0.6544932723045349, - "learning_rate": 7.56710858613604e-06, - "loss": 0.2035, - "step": 12455 - }, - { - "epoch": 1.1734061844987165, - "grad_norm": 0.6785704493522644, - "learning_rate": 7.5656439660706795e-06, - "loss": 0.1711, - "step": 12456 - }, - { - "epoch": 1.1735003885918842, - "grad_norm": 0.6942324638366699, - "learning_rate": 7.564179401512404e-06, - "loss": 0.2108, - "step": 12457 - }, - { - "epoch": 1.1735945926850522, - "grad_norm": 0.7869208455085754, - "learning_rate": 7.562714892494606e-06, - "loss": 0.2006, - "step": 12458 - }, - { - "epoch": 1.1736887967782201, - "grad_norm": 0.6158934831619263, - "learning_rate": 7.561250439050679e-06, - "loss": 0.1902, - "step": 12459 - }, - { - "epoch": 1.1737830008713879, - "grad_norm": 0.6476064324378967, - "learning_rate": 7.559786041214008e-06, - "loss": 0.2208, - "step": 12460 - }, - { - "epoch": 1.1738772049645556, - "grad_norm": 0.6589324474334717, - "learning_rate": 7.558321699017995e-06, - "loss": 0.2154, - "step": 12461 - }, - { - "epoch": 1.1739714090577236, - "grad_norm": 0.6076422333717346, - "learning_rate": 7.556857412496021e-06, - "loss": 0.1895, - "step": 12462 - }, - { - "epoch": 1.1740656131508915, - "grad_norm": 0.6216500401496887, - "learning_rate": 7.555393181681473e-06, - "loss": 0.2136, - "step": 12463 - }, - { - "epoch": 1.1741598172440593, - "grad_norm": 0.7523494958877563, - "learning_rate": 7.553929006607747e-06, - "loss": 0.2385, - "step": 12464 - }, - { - "epoch": 1.174254021337227, - "grad_norm": 0.6163733005523682, - "learning_rate": 7.55246488730822e-06, - "loss": 0.1742, - "step": 12465 - }, - { - "epoch": 1.174348225430395, - "grad_norm": 0.6610745191574097, - "learning_rate": 7.551000823816278e-06, - "loss": 0.2228, - "step": 12466 - }, - { - "epoch": 1.1744424295235627, - "grad_norm": 0.7351732850074768, - "learning_rate": 7.549536816165306e-06, - "loss": 0.2204, - "step": 12467 - }, - { - "epoch": 1.1745366336167307, - "grad_norm": 0.708152711391449, - "learning_rate": 7.548072864388684e-06, - "loss": 0.2432, - "step": 12468 - }, - { - "epoch": 1.1746308377098984, - "grad_norm": 0.5890923142433167, - "learning_rate": 7.546608968519793e-06, - "loss": 0.1723, - "step": 12469 - }, - { - "epoch": 1.1747250418030664, - "grad_norm": 0.689551055431366, - "learning_rate": 7.545145128592009e-06, - "loss": 0.243, - "step": 12470 - }, - { - "epoch": 1.174819245896234, - "grad_norm": 0.6270465850830078, - "learning_rate": 7.543681344638716e-06, - "loss": 0.1835, - "step": 12471 - }, - { - "epoch": 1.174913449989402, - "grad_norm": 0.6303161978721619, - "learning_rate": 7.542217616693286e-06, - "loss": 0.2158, - "step": 12472 - }, - { - "epoch": 1.1750076540825698, - "grad_norm": 0.6889188289642334, - "learning_rate": 7.540753944789094e-06, - "loss": 0.1872, - "step": 12473 - }, - { - "epoch": 1.1751018581757378, - "grad_norm": 0.6560834646224976, - "learning_rate": 7.539290328959517e-06, - "loss": 0.1973, - "step": 12474 - }, - { - "epoch": 1.1751960622689055, - "grad_norm": 0.668216347694397, - "learning_rate": 7.537826769237926e-06, - "loss": 0.1869, - "step": 12475 - }, - { - "epoch": 1.1752902663620735, - "grad_norm": 0.7283650040626526, - "learning_rate": 7.53636326565769e-06, - "loss": 0.2107, - "step": 12476 - }, - { - "epoch": 1.1753844704552412, - "grad_norm": 0.6102532744407654, - "learning_rate": 7.534899818252185e-06, - "loss": 0.2, - "step": 12477 - }, - { - "epoch": 1.1754786745484092, - "grad_norm": 0.6530992984771729, - "learning_rate": 7.533436427054776e-06, - "loss": 0.1785, - "step": 12478 - }, - { - "epoch": 1.175572878641577, - "grad_norm": 0.6950181722640991, - "learning_rate": 7.531973092098832e-06, - "loss": 0.1912, - "step": 12479 - }, - { - "epoch": 1.1756670827347449, - "grad_norm": 0.7558364272117615, - "learning_rate": 7.5305098134177135e-06, - "loss": 0.2076, - "step": 12480 - }, - { - "epoch": 1.1757612868279126, - "grad_norm": 0.6683874130249023, - "learning_rate": 7.5290465910447966e-06, - "loss": 0.2078, - "step": 12481 - }, - { - "epoch": 1.1758554909210805, - "grad_norm": 0.6612247228622437, - "learning_rate": 7.527583425013436e-06, - "loss": 0.2154, - "step": 12482 - }, - { - "epoch": 1.1759496950142483, - "grad_norm": 0.6922287940979004, - "learning_rate": 7.526120315356993e-06, - "loss": 0.2315, - "step": 12483 - }, - { - "epoch": 1.1760438991074162, - "grad_norm": 0.6004050374031067, - "learning_rate": 7.524657262108839e-06, - "loss": 0.1611, - "step": 12484 - }, - { - "epoch": 1.176138103200584, - "grad_norm": 0.7073184847831726, - "learning_rate": 7.523194265302326e-06, - "loss": 0.2229, - "step": 12485 - }, - { - "epoch": 1.176232307293752, - "grad_norm": 0.7124186754226685, - "learning_rate": 7.521731324970812e-06, - "loss": 0.2222, - "step": 12486 - }, - { - "epoch": 1.1763265113869197, - "grad_norm": 0.6939953565597534, - "learning_rate": 7.520268441147658e-06, - "loss": 0.2161, - "step": 12487 - }, - { - "epoch": 1.1764207154800876, - "grad_norm": 0.6451032161712646, - "learning_rate": 7.518805613866219e-06, - "loss": 0.194, - "step": 12488 - }, - { - "epoch": 1.1765149195732554, - "grad_norm": 0.6584877371788025, - "learning_rate": 7.517342843159849e-06, - "loss": 0.2101, - "step": 12489 - }, - { - "epoch": 1.1766091236664233, - "grad_norm": 0.6335259675979614, - "learning_rate": 7.5158801290619e-06, - "loss": 0.2135, - "step": 12490 - }, - { - "epoch": 1.176703327759591, - "grad_norm": 0.6652558445930481, - "learning_rate": 7.514417471605728e-06, - "loss": 0.2138, - "step": 12491 - }, - { - "epoch": 1.176797531852759, - "grad_norm": 0.6807827949523926, - "learning_rate": 7.5129548708246805e-06, - "loss": 0.2554, - "step": 12492 - }, - { - "epoch": 1.1768917359459268, - "grad_norm": 0.589968204498291, - "learning_rate": 7.511492326752107e-06, - "loss": 0.1976, - "step": 12493 - }, - { - "epoch": 1.1769859400390947, - "grad_norm": 0.6328553557395935, - "learning_rate": 7.510029839421359e-06, - "loss": 0.1914, - "step": 12494 - }, - { - "epoch": 1.1770801441322625, - "grad_norm": 0.6645973920822144, - "learning_rate": 7.508567408865781e-06, - "loss": 0.2058, - "step": 12495 - }, - { - "epoch": 1.1771743482254304, - "grad_norm": 0.6332281827926636, - "learning_rate": 7.507105035118718e-06, - "loss": 0.1884, - "step": 12496 - }, - { - "epoch": 1.1772685523185982, - "grad_norm": 0.5919604897499084, - "learning_rate": 7.5056427182135175e-06, - "loss": 0.2119, - "step": 12497 - }, - { - "epoch": 1.1773627564117661, - "grad_norm": 0.6027520298957825, - "learning_rate": 7.50418045818352e-06, - "loss": 0.1999, - "step": 12498 - }, - { - "epoch": 1.1774569605049339, - "grad_norm": 0.7183255553245544, - "learning_rate": 7.502718255062071e-06, - "loss": 0.2141, - "step": 12499 - }, - { - "epoch": 1.1775511645981018, - "grad_norm": 0.6482458114624023, - "learning_rate": 7.5012561088825e-06, - "loss": 0.2052, - "step": 12500 - }, - { - "epoch": 1.1776453686912696, - "grad_norm": 0.6224600672721863, - "learning_rate": 7.499794019678162e-06, - "loss": 0.207, - "step": 12501 - }, - { - "epoch": 1.1777395727844375, - "grad_norm": 0.6822807192802429, - "learning_rate": 7.4983319874823835e-06, - "loss": 0.2177, - "step": 12502 - }, - { - "epoch": 1.1778337768776053, - "grad_norm": 0.6278730034828186, - "learning_rate": 7.496870012328501e-06, - "loss": 0.2042, - "step": 12503 - }, - { - "epoch": 1.1779279809707732, - "grad_norm": 0.6707857847213745, - "learning_rate": 7.4954080942498605e-06, - "loss": 0.2379, - "step": 12504 - }, - { - "epoch": 1.178022185063941, - "grad_norm": 0.6079289317131042, - "learning_rate": 7.493946233279787e-06, - "loss": 0.1798, - "step": 12505 - }, - { - "epoch": 1.178116389157109, - "grad_norm": 0.6748082041740417, - "learning_rate": 7.492484429451611e-06, - "loss": 0.1939, - "step": 12506 - }, - { - "epoch": 1.1782105932502767, - "grad_norm": 0.6287468075752258, - "learning_rate": 7.491022682798671e-06, - "loss": 0.1925, - "step": 12507 - }, - { - "epoch": 1.1783047973434446, - "grad_norm": 0.7353914380073547, - "learning_rate": 7.489560993354295e-06, - "loss": 0.2013, - "step": 12508 - }, - { - "epoch": 1.1783990014366124, - "grad_norm": 0.6673984527587891, - "learning_rate": 7.4880993611518095e-06, - "loss": 0.2013, - "step": 12509 - }, - { - "epoch": 1.1784932055297803, - "grad_norm": 0.6351611614227295, - "learning_rate": 7.486637786224542e-06, - "loss": 0.1962, - "step": 12510 - }, - { - "epoch": 1.178587409622948, - "grad_norm": 0.596457302570343, - "learning_rate": 7.485176268605821e-06, - "loss": 0.1843, - "step": 12511 - }, - { - "epoch": 1.178681613716116, - "grad_norm": 0.7209934592247009, - "learning_rate": 7.483714808328971e-06, - "loss": 0.2134, - "step": 12512 - }, - { - "epoch": 1.1787758178092838, - "grad_norm": 0.6333665251731873, - "learning_rate": 7.4822534054273135e-06, - "loss": 0.2021, - "step": 12513 - }, - { - "epoch": 1.1788700219024517, - "grad_norm": 0.57803875207901, - "learning_rate": 7.480792059934173e-06, - "loss": 0.2022, - "step": 12514 - }, - { - "epoch": 1.1789642259956195, - "grad_norm": 0.7156559824943542, - "learning_rate": 7.47933077188287e-06, - "loss": 0.2289, - "step": 12515 - }, - { - "epoch": 1.1790584300887874, - "grad_norm": 0.6474944949150085, - "learning_rate": 7.477869541306721e-06, - "loss": 0.2016, - "step": 12516 - }, - { - "epoch": 1.1791526341819552, - "grad_norm": 0.623877763748169, - "learning_rate": 7.476408368239051e-06, - "loss": 0.191, - "step": 12517 - }, - { - "epoch": 1.179246838275123, - "grad_norm": 0.7743015289306641, - "learning_rate": 7.474947252713171e-06, - "loss": 0.2005, - "step": 12518 - }, - { - "epoch": 1.1793410423682908, - "grad_norm": 0.6596753597259521, - "learning_rate": 7.473486194762403e-06, - "loss": 0.2034, - "step": 12519 - }, - { - "epoch": 1.1794352464614588, - "grad_norm": 0.6988006830215454, - "learning_rate": 7.47202519442005e-06, - "loss": 0.2149, - "step": 12520 - }, - { - "epoch": 1.1795294505546265, - "grad_norm": 0.6761751174926758, - "learning_rate": 7.470564251719437e-06, - "loss": 0.2149, - "step": 12521 - }, - { - "epoch": 1.1796236546477945, - "grad_norm": 0.7213813662528992, - "learning_rate": 7.46910336669387e-06, - "loss": 0.2026, - "step": 12522 - }, - { - "epoch": 1.1797178587409622, - "grad_norm": 0.6943807601928711, - "learning_rate": 7.467642539376655e-06, - "loss": 0.2007, - "step": 12523 - }, - { - "epoch": 1.1798120628341302, - "grad_norm": 0.6666421294212341, - "learning_rate": 7.4661817698011145e-06, - "loss": 0.2192, - "step": 12524 - }, - { - "epoch": 1.179906266927298, - "grad_norm": 0.6767361760139465, - "learning_rate": 7.4647210580005445e-06, - "loss": 0.2038, - "step": 12525 - }, - { - "epoch": 1.180000471020466, - "grad_norm": 0.6544678807258606, - "learning_rate": 7.4632604040082545e-06, - "loss": 0.1918, - "step": 12526 - }, - { - "epoch": 1.1800946751136336, - "grad_norm": 0.6642581224441528, - "learning_rate": 7.4617998078575515e-06, - "loss": 0.2051, - "step": 12527 - }, - { - "epoch": 1.1801888792068016, - "grad_norm": 0.6531805396080017, - "learning_rate": 7.460339269581739e-06, - "loss": 0.2206, - "step": 12528 - }, - { - "epoch": 1.1802830832999693, - "grad_norm": 0.7012746334075928, - "learning_rate": 7.458878789214119e-06, - "loss": 0.1871, - "step": 12529 - }, - { - "epoch": 1.1803772873931373, - "grad_norm": 0.6027641892433167, - "learning_rate": 7.4574183667879895e-06, - "loss": 0.2015, - "step": 12530 - }, - { - "epoch": 1.180471491486305, - "grad_norm": 0.6755235195159912, - "learning_rate": 7.455958002336656e-06, - "loss": 0.2006, - "step": 12531 - }, - { - "epoch": 1.180565695579473, - "grad_norm": 0.696238100528717, - "learning_rate": 7.454497695893415e-06, - "loss": 0.2063, - "step": 12532 - }, - { - "epoch": 1.1806598996726407, - "grad_norm": 0.6899204850196838, - "learning_rate": 7.45303744749156e-06, - "loss": 0.1945, - "step": 12533 - }, - { - "epoch": 1.1807541037658087, - "grad_norm": 0.6807505488395691, - "learning_rate": 7.451577257164393e-06, - "loss": 0.2066, - "step": 12534 - }, - { - "epoch": 1.1808483078589764, - "grad_norm": 0.7076438069343567, - "learning_rate": 7.450117124945206e-06, - "loss": 0.2182, - "step": 12535 - }, - { - "epoch": 1.1809425119521444, - "grad_norm": 0.5715807676315308, - "learning_rate": 7.44865705086729e-06, - "loss": 0.1783, - "step": 12536 - }, - { - "epoch": 1.1810367160453121, - "grad_norm": 0.6518290638923645, - "learning_rate": 7.44719703496394e-06, - "loss": 0.202, - "step": 12537 - }, - { - "epoch": 1.18113092013848, - "grad_norm": 0.6332467794418335, - "learning_rate": 7.445737077268448e-06, - "loss": 0.1998, - "step": 12538 - }, - { - "epoch": 1.1812251242316478, - "grad_norm": 0.6892487406730652, - "learning_rate": 7.444277177814099e-06, - "loss": 0.2227, - "step": 12539 - }, - { - "epoch": 1.1813193283248158, - "grad_norm": 0.6697837710380554, - "learning_rate": 7.442817336634178e-06, - "loss": 0.2212, - "step": 12540 - }, - { - "epoch": 1.1814135324179835, - "grad_norm": 0.6435509324073792, - "learning_rate": 7.441357553761984e-06, - "loss": 0.2123, - "step": 12541 - }, - { - "epoch": 1.1815077365111515, - "grad_norm": 0.6908721327781677, - "learning_rate": 7.439897829230793e-06, - "loss": 0.216, - "step": 12542 - }, - { - "epoch": 1.1816019406043192, - "grad_norm": 0.6827598214149475, - "learning_rate": 7.438438163073884e-06, - "loss": 0.2277, - "step": 12543 - }, - { - "epoch": 1.1816961446974872, - "grad_norm": 0.6248451471328735, - "learning_rate": 7.436978555324556e-06, - "loss": 0.1872, - "step": 12544 - }, - { - "epoch": 1.181790348790655, - "grad_norm": 0.6349536776542664, - "learning_rate": 7.435519006016077e-06, - "loss": 0.2059, - "step": 12545 - }, - { - "epoch": 1.1818845528838229, - "grad_norm": 0.6186675429344177, - "learning_rate": 7.434059515181729e-06, - "loss": 0.1891, - "step": 12546 - }, - { - "epoch": 1.1819787569769906, - "grad_norm": 0.64864581823349, - "learning_rate": 7.432600082854794e-06, - "loss": 0.1901, - "step": 12547 - }, - { - "epoch": 1.1820729610701586, - "grad_norm": 0.5495665669441223, - "learning_rate": 7.431140709068547e-06, - "loss": 0.1865, - "step": 12548 - }, - { - "epoch": 1.1821671651633263, - "grad_norm": 0.7360191941261292, - "learning_rate": 7.429681393856266e-06, - "loss": 0.1868, - "step": 12549 - }, - { - "epoch": 1.1822613692564943, - "grad_norm": 0.6350335478782654, - "learning_rate": 7.428222137251222e-06, - "loss": 0.2165, - "step": 12550 - }, - { - "epoch": 1.182355573349662, - "grad_norm": 0.6836426854133606, - "learning_rate": 7.426762939286693e-06, - "loss": 0.1853, - "step": 12551 - }, - { - "epoch": 1.18244977744283, - "grad_norm": 0.5950830578804016, - "learning_rate": 7.425303799995946e-06, - "loss": 0.1647, - "step": 12552 - }, - { - "epoch": 1.1825439815359977, - "grad_norm": 0.6418923139572144, - "learning_rate": 7.423844719412255e-06, - "loss": 0.2196, - "step": 12553 - }, - { - "epoch": 1.1826381856291657, - "grad_norm": 0.5919622182846069, - "learning_rate": 7.42238569756889e-06, - "loss": 0.1896, - "step": 12554 - }, - { - "epoch": 1.1827323897223334, - "grad_norm": 0.6559080481529236, - "learning_rate": 7.420926734499117e-06, - "loss": 0.2069, - "step": 12555 - }, - { - "epoch": 1.1828265938155014, - "grad_norm": 0.6933526396751404, - "learning_rate": 7.419467830236201e-06, - "loss": 0.1995, - "step": 12556 - }, - { - "epoch": 1.182920797908669, - "grad_norm": 0.6160508394241333, - "learning_rate": 7.418008984813412e-06, - "loss": 0.2059, - "step": 12557 - }, - { - "epoch": 1.183015002001837, - "grad_norm": 0.5865676403045654, - "learning_rate": 7.416550198264012e-06, - "loss": 0.1793, - "step": 12558 - }, - { - "epoch": 1.1831092060950048, - "grad_norm": 0.5630083680152893, - "learning_rate": 7.415091470621263e-06, - "loss": 0.2157, - "step": 12559 - }, - { - "epoch": 1.1832034101881728, - "grad_norm": 0.6468684673309326, - "learning_rate": 7.41363280191842e-06, - "loss": 0.225, - "step": 12560 - }, - { - "epoch": 1.1832976142813405, - "grad_norm": 0.6333187222480774, - "learning_rate": 7.412174192188756e-06, - "loss": 0.2012, - "step": 12561 - }, - { - "epoch": 1.1833918183745085, - "grad_norm": 0.6520709991455078, - "learning_rate": 7.41071564146552e-06, - "loss": 0.2156, - "step": 12562 - }, - { - "epoch": 1.1834860224676762, - "grad_norm": 0.679338276386261, - "learning_rate": 7.409257149781968e-06, - "loss": 0.2315, - "step": 12563 - }, - { - "epoch": 1.1835802265608442, - "grad_norm": 0.6333425045013428, - "learning_rate": 7.407798717171366e-06, - "loss": 0.2022, - "step": 12564 - }, - { - "epoch": 1.183674430654012, - "grad_norm": 0.6089506149291992, - "learning_rate": 7.40634034366696e-06, - "loss": 0.2, - "step": 12565 - }, - { - "epoch": 1.1837686347471799, - "grad_norm": 0.6142938137054443, - "learning_rate": 7.404882029302003e-06, - "loss": 0.1756, - "step": 12566 - }, - { - "epoch": 1.1838628388403476, - "grad_norm": 0.708831787109375, - "learning_rate": 7.403423774109751e-06, - "loss": 0.2034, - "step": 12567 - }, - { - "epoch": 1.1839570429335156, - "grad_norm": 0.664275050163269, - "learning_rate": 7.401965578123453e-06, - "loss": 0.1882, - "step": 12568 - }, - { - "epoch": 1.1840512470266833, - "grad_norm": 0.6040635704994202, - "learning_rate": 7.400507441376359e-06, - "loss": 0.1848, - "step": 12569 - }, - { - "epoch": 1.1841454511198513, - "grad_norm": 0.6533285975456238, - "learning_rate": 7.399049363901712e-06, - "loss": 0.2238, - "step": 12570 - }, - { - "epoch": 1.184239655213019, - "grad_norm": 0.823172390460968, - "learning_rate": 7.397591345732764e-06, - "loss": 0.2184, - "step": 12571 - }, - { - "epoch": 1.1843338593061867, - "grad_norm": 0.7458483576774597, - "learning_rate": 7.396133386902758e-06, - "loss": 0.2068, - "step": 12572 - }, - { - "epoch": 1.1844280633993547, - "grad_norm": 0.674351155757904, - "learning_rate": 7.394675487444936e-06, - "loss": 0.2016, - "step": 12573 - }, - { - "epoch": 1.1845222674925227, - "grad_norm": 0.7157514691352844, - "learning_rate": 7.393217647392545e-06, - "loss": 0.2092, - "step": 12574 - }, - { - "epoch": 1.1846164715856904, - "grad_norm": 0.605705738067627, - "learning_rate": 7.391759866778821e-06, - "loss": 0.2076, - "step": 12575 - }, - { - "epoch": 1.1847106756788581, - "grad_norm": 0.5888360738754272, - "learning_rate": 7.390302145637005e-06, - "loss": 0.1777, - "step": 12576 - }, - { - "epoch": 1.184804879772026, - "grad_norm": 0.6310580372810364, - "learning_rate": 7.388844484000339e-06, - "loss": 0.2215, - "step": 12577 - }, - { - "epoch": 1.184899083865194, - "grad_norm": 0.690157413482666, - "learning_rate": 7.387386881902058e-06, - "loss": 0.2221, - "step": 12578 - }, - { - "epoch": 1.1849932879583618, - "grad_norm": 0.8984596133232117, - "learning_rate": 7.385929339375395e-06, - "loss": 0.1956, - "step": 12579 - }, - { - "epoch": 1.1850874920515295, - "grad_norm": 0.6356021761894226, - "learning_rate": 7.384471856453581e-06, - "loss": 0.2152, - "step": 12580 - }, - { - "epoch": 1.1851816961446975, - "grad_norm": 0.6510480642318726, - "learning_rate": 7.383014433169859e-06, - "loss": 0.2015, - "step": 12581 - }, - { - "epoch": 1.1852759002378654, - "grad_norm": 0.6540406942367554, - "learning_rate": 7.381557069557454e-06, - "loss": 0.2114, - "step": 12582 - }, - { - "epoch": 1.1853701043310332, - "grad_norm": 0.6514879465103149, - "learning_rate": 7.380099765649598e-06, - "loss": 0.2137, - "step": 12583 - }, - { - "epoch": 1.185464308424201, - "grad_norm": 0.7055293321609497, - "learning_rate": 7.3786425214795176e-06, - "loss": 0.2143, - "step": 12584 - }, - { - "epoch": 1.1855585125173689, - "grad_norm": 0.6761283874511719, - "learning_rate": 7.377185337080443e-06, - "loss": 0.218, - "step": 12585 - }, - { - "epoch": 1.1856527166105368, - "grad_norm": 0.6523527503013611, - "learning_rate": 7.375728212485597e-06, - "loss": 0.1639, - "step": 12586 - }, - { - "epoch": 1.1857469207037046, - "grad_norm": 0.6509504914283752, - "learning_rate": 7.374271147728207e-06, - "loss": 0.2084, - "step": 12587 - }, - { - "epoch": 1.1858411247968723, - "grad_norm": 0.6438972353935242, - "learning_rate": 7.372814142841498e-06, - "loss": 0.1865, - "step": 12588 - }, - { - "epoch": 1.1859353288900403, - "grad_norm": 0.644361674785614, - "learning_rate": 7.371357197858687e-06, - "loss": 0.2121, - "step": 12589 - }, - { - "epoch": 1.1860295329832082, - "grad_norm": 0.6154738068580627, - "learning_rate": 7.3699003128129964e-06, - "loss": 0.1989, - "step": 12590 - }, - { - "epoch": 1.186123737076376, - "grad_norm": 0.6335304975509644, - "learning_rate": 7.368443487737648e-06, - "loss": 0.1922, - "step": 12591 - }, - { - "epoch": 1.1862179411695437, - "grad_norm": 0.7136722207069397, - "learning_rate": 7.366986722665858e-06, - "loss": 0.2122, - "step": 12592 - }, - { - "epoch": 1.1863121452627117, - "grad_norm": 0.6671651601791382, - "learning_rate": 7.365530017630842e-06, - "loss": 0.2194, - "step": 12593 - }, - { - "epoch": 1.1864063493558796, - "grad_norm": 0.6870070099830627, - "learning_rate": 7.364073372665816e-06, - "loss": 0.1895, - "step": 12594 - }, - { - "epoch": 1.1865005534490474, - "grad_norm": 0.6628912687301636, - "learning_rate": 7.362616787803993e-06, - "loss": 0.2084, - "step": 12595 - }, - { - "epoch": 1.186594757542215, - "grad_norm": 0.6799843311309814, - "learning_rate": 7.361160263078586e-06, - "loss": 0.2107, - "step": 12596 - }, - { - "epoch": 1.186688961635383, - "grad_norm": 0.6107242107391357, - "learning_rate": 7.359703798522808e-06, - "loss": 0.1985, - "step": 12597 - }, - { - "epoch": 1.186783165728551, - "grad_norm": 0.6183947324752808, - "learning_rate": 7.358247394169868e-06, - "loss": 0.177, - "step": 12598 - }, - { - "epoch": 1.1868773698217188, - "grad_norm": 0.6542305946350098, - "learning_rate": 7.356791050052972e-06, - "loss": 0.1907, - "step": 12599 - }, - { - "epoch": 1.1869715739148865, - "grad_norm": 0.677675187587738, - "learning_rate": 7.355334766205322e-06, - "loss": 0.2207, - "step": 12600 - }, - { - "epoch": 1.1870657780080545, - "grad_norm": 0.667528510093689, - "learning_rate": 7.3538785426601354e-06, - "loss": 0.2138, - "step": 12601 - }, - { - "epoch": 1.1871599821012224, - "grad_norm": 0.6824607849121094, - "learning_rate": 7.35242237945061e-06, - "loss": 0.2155, - "step": 12602 - }, - { - "epoch": 1.1872541861943902, - "grad_norm": 0.6306003332138062, - "learning_rate": 7.3509662766099455e-06, - "loss": 0.1909, - "step": 12603 - }, - { - "epoch": 1.187348390287558, - "grad_norm": 0.7506347298622131, - "learning_rate": 7.34951023417135e-06, - "loss": 0.1972, - "step": 12604 - }, - { - "epoch": 1.1874425943807259, - "grad_norm": 0.7019657492637634, - "learning_rate": 7.34805425216802e-06, - "loss": 0.2044, - "step": 12605 - }, - { - "epoch": 1.1875367984738936, - "grad_norm": 0.6580947041511536, - "learning_rate": 7.346598330633151e-06, - "loss": 0.2005, - "step": 12606 - }, - { - "epoch": 1.1876310025670616, - "grad_norm": 0.6721909642219543, - "learning_rate": 7.345142469599947e-06, - "loss": 0.1894, - "step": 12607 - }, - { - "epoch": 1.1877252066602293, - "grad_norm": 0.6698289513587952, - "learning_rate": 7.343686669101599e-06, - "loss": 0.2035, - "step": 12608 - }, - { - "epoch": 1.1878194107533973, - "grad_norm": 0.6518452167510986, - "learning_rate": 7.342230929171305e-06, - "loss": 0.2063, - "step": 12609 - }, - { - "epoch": 1.187913614846565, - "grad_norm": 0.6841627955436707, - "learning_rate": 7.3407752498422535e-06, - "loss": 0.182, - "step": 12610 - }, - { - "epoch": 1.188007818939733, - "grad_norm": 0.6407322287559509, - "learning_rate": 7.33931963114764e-06, - "loss": 0.2162, - "step": 12611 - }, - { - "epoch": 1.1881020230329007, - "grad_norm": 0.6667520403862, - "learning_rate": 7.337864073120655e-06, - "loss": 0.2067, - "step": 12612 - }, - { - "epoch": 1.1881962271260686, - "grad_norm": 0.6596465110778809, - "learning_rate": 7.336408575794482e-06, - "loss": 0.2074, - "step": 12613 - }, - { - "epoch": 1.1882904312192364, - "grad_norm": 0.6905499696731567, - "learning_rate": 7.334953139202317e-06, - "loss": 0.2295, - "step": 12614 - }, - { - "epoch": 1.1883846353124043, - "grad_norm": 0.7734467387199402, - "learning_rate": 7.333497763377342e-06, - "loss": 0.2074, - "step": 12615 - }, - { - "epoch": 1.188478839405572, - "grad_norm": 0.6414253115653992, - "learning_rate": 7.3320424483527385e-06, - "loss": 0.1943, - "step": 12616 - }, - { - "epoch": 1.18857304349874, - "grad_norm": 0.6604819893836975, - "learning_rate": 7.330587194161696e-06, - "loss": 0.2206, - "step": 12617 - }, - { - "epoch": 1.1886672475919078, - "grad_norm": 0.6642550826072693, - "learning_rate": 7.329132000837395e-06, - "loss": 0.2202, - "step": 12618 - }, - { - "epoch": 1.1887614516850757, - "grad_norm": 0.6313382983207703, - "learning_rate": 7.327676868413014e-06, - "loss": 0.1991, - "step": 12619 - }, - { - "epoch": 1.1888556557782435, - "grad_norm": 0.615211546421051, - "learning_rate": 7.326221796921729e-06, - "loss": 0.2157, - "step": 12620 - }, - { - "epoch": 1.1889498598714114, - "grad_norm": 0.6930890083312988, - "learning_rate": 7.324766786396728e-06, - "loss": 0.1996, - "step": 12621 - }, - { - "epoch": 1.1890440639645792, - "grad_norm": 0.7138121724128723, - "learning_rate": 7.32331183687118e-06, - "loss": 0.2139, - "step": 12622 - }, - { - "epoch": 1.1891382680577471, - "grad_norm": 0.7115221619606018, - "learning_rate": 7.321856948378259e-06, - "loss": 0.2063, - "step": 12623 - }, - { - "epoch": 1.1892324721509149, - "grad_norm": 0.6268814206123352, - "learning_rate": 7.320402120951143e-06, - "loss": 0.1943, - "step": 12624 - }, - { - "epoch": 1.1893266762440828, - "grad_norm": 0.6795153617858887, - "learning_rate": 7.318947354623004e-06, - "loss": 0.2382, - "step": 12625 - }, - { - "epoch": 1.1894208803372506, - "grad_norm": 0.6876603364944458, - "learning_rate": 7.317492649427009e-06, - "loss": 0.2209, - "step": 12626 - }, - { - "epoch": 1.1895150844304185, - "grad_norm": 0.643279492855072, - "learning_rate": 7.316038005396332e-06, - "loss": 0.1934, - "step": 12627 - }, - { - "epoch": 1.1896092885235863, - "grad_norm": 0.6221898794174194, - "learning_rate": 7.314583422564139e-06, - "loss": 0.2046, - "step": 12628 - }, - { - "epoch": 1.1897034926167542, - "grad_norm": 0.668470561504364, - "learning_rate": 7.313128900963597e-06, - "loss": 0.1938, - "step": 12629 - }, - { - "epoch": 1.189797696709922, - "grad_norm": 0.7933385372161865, - "learning_rate": 7.311674440627872e-06, - "loss": 0.2082, - "step": 12630 - }, - { - "epoch": 1.18989190080309, - "grad_norm": 0.5909566283226013, - "learning_rate": 7.310220041590126e-06, - "loss": 0.1987, - "step": 12631 - }, - { - "epoch": 1.1899861048962577, - "grad_norm": 0.6252971291542053, - "learning_rate": 7.308765703883525e-06, - "loss": 0.2297, - "step": 12632 - }, - { - "epoch": 1.1900803089894256, - "grad_norm": 0.6908274292945862, - "learning_rate": 7.307311427541224e-06, - "loss": 0.2108, - "step": 12633 - }, - { - "epoch": 1.1901745130825934, - "grad_norm": 0.661973774433136, - "learning_rate": 7.30585721259639e-06, - "loss": 0.2129, - "step": 12634 - }, - { - "epoch": 1.1902687171757613, - "grad_norm": 0.6261277794837952, - "learning_rate": 7.304403059082179e-06, - "loss": 0.2081, - "step": 12635 - }, - { - "epoch": 1.190362921268929, - "grad_norm": 0.6275283098220825, - "learning_rate": 7.302948967031744e-06, - "loss": 0.1968, - "step": 12636 - }, - { - "epoch": 1.190457125362097, - "grad_norm": 0.6762524843215942, - "learning_rate": 7.301494936478245e-06, - "loss": 0.211, - "step": 12637 - }, - { - "epoch": 1.1905513294552648, - "grad_norm": 0.6919872164726257, - "learning_rate": 7.300040967454838e-06, - "loss": 0.2074, - "step": 12638 - }, - { - "epoch": 1.1906455335484327, - "grad_norm": 0.7369116544723511, - "learning_rate": 7.29858705999467e-06, - "loss": 0.2365, - "step": 12639 - }, - { - "epoch": 1.1907397376416005, - "grad_norm": 0.6140055060386658, - "learning_rate": 7.297133214130891e-06, - "loss": 0.1955, - "step": 12640 - }, - { - "epoch": 1.1908339417347684, - "grad_norm": 0.6694202423095703, - "learning_rate": 7.295679429896661e-06, - "loss": 0.1833, - "step": 12641 - }, - { - "epoch": 1.1909281458279362, - "grad_norm": 0.633593738079071, - "learning_rate": 7.29422570732512e-06, - "loss": 0.1973, - "step": 12642 - }, - { - "epoch": 1.1910223499211041, - "grad_norm": 0.6085483431816101, - "learning_rate": 7.292772046449415e-06, - "loss": 0.2112, - "step": 12643 - }, - { - "epoch": 1.1911165540142719, - "grad_norm": 0.6511340141296387, - "learning_rate": 7.291318447302695e-06, - "loss": 0.2361, - "step": 12644 - }, - { - "epoch": 1.1912107581074398, - "grad_norm": 0.6604608297348022, - "learning_rate": 7.289864909918107e-06, - "loss": 0.2217, - "step": 12645 - }, - { - "epoch": 1.1913049622006076, - "grad_norm": 0.7020126581192017, - "learning_rate": 7.288411434328786e-06, - "loss": 0.2117, - "step": 12646 - }, - { - "epoch": 1.1913991662937755, - "grad_norm": 0.7185226082801819, - "learning_rate": 7.28695802056788e-06, - "loss": 0.2367, - "step": 12647 - }, - { - "epoch": 1.1914933703869433, - "grad_norm": 0.6534253358840942, - "learning_rate": 7.285504668668526e-06, - "loss": 0.2128, - "step": 12648 - }, - { - "epoch": 1.1915875744801112, - "grad_norm": 0.6461052298545837, - "learning_rate": 7.284051378663865e-06, - "loss": 0.2209, - "step": 12649 - }, - { - "epoch": 1.191681778573279, - "grad_norm": 0.5969422459602356, - "learning_rate": 7.282598150587032e-06, - "loss": 0.2062, - "step": 12650 - }, - { - "epoch": 1.191775982666447, - "grad_norm": 0.5316562056541443, - "learning_rate": 7.281144984471163e-06, - "loss": 0.1623, - "step": 12651 - }, - { - "epoch": 1.1918701867596146, - "grad_norm": 0.61534583568573, - "learning_rate": 7.279691880349395e-06, - "loss": 0.2, - "step": 12652 - }, - { - "epoch": 1.1919643908527826, - "grad_norm": 0.6426992416381836, - "learning_rate": 7.278238838254857e-06, - "loss": 0.1846, - "step": 12653 - }, - { - "epoch": 1.1920585949459503, - "grad_norm": 0.630556583404541, - "learning_rate": 7.276785858220684e-06, - "loss": 0.2134, - "step": 12654 - }, - { - "epoch": 1.1921527990391183, - "grad_norm": 0.588656485080719, - "learning_rate": 7.275332940280006e-06, - "loss": 0.1721, - "step": 12655 - }, - { - "epoch": 1.192247003132286, - "grad_norm": 0.6570435166358948, - "learning_rate": 7.273880084465947e-06, - "loss": 0.1993, - "step": 12656 - }, - { - "epoch": 1.192341207225454, - "grad_norm": 0.6573622226715088, - "learning_rate": 7.272427290811641e-06, - "loss": 0.2247, - "step": 12657 - }, - { - "epoch": 1.1924354113186217, - "grad_norm": 0.6184468865394592, - "learning_rate": 7.270974559350214e-06, - "loss": 0.1763, - "step": 12658 - }, - { - "epoch": 1.1925296154117897, - "grad_norm": 0.6759619116783142, - "learning_rate": 7.269521890114785e-06, - "loss": 0.1932, - "step": 12659 - }, - { - "epoch": 1.1926238195049574, - "grad_norm": 0.6438455581665039, - "learning_rate": 7.268069283138475e-06, - "loss": 0.1992, - "step": 12660 - }, - { - "epoch": 1.1927180235981254, - "grad_norm": 0.5674619078636169, - "learning_rate": 7.2666167384544175e-06, - "loss": 0.1669, - "step": 12661 - }, - { - "epoch": 1.1928122276912931, - "grad_norm": 0.6349161863327026, - "learning_rate": 7.265164256095723e-06, - "loss": 0.1925, - "step": 12662 - }, - { - "epoch": 1.192906431784461, - "grad_norm": 0.6780090928077698, - "learning_rate": 7.26371183609551e-06, - "loss": 0.2053, - "step": 12663 - }, - { - "epoch": 1.1930006358776288, - "grad_norm": 0.6443299651145935, - "learning_rate": 7.262259478486901e-06, - "loss": 0.2151, - "step": 12664 - }, - { - "epoch": 1.1930948399707968, - "grad_norm": 0.604374885559082, - "learning_rate": 7.260807183303011e-06, - "loss": 0.1814, - "step": 12665 - }, - { - "epoch": 1.1931890440639645, - "grad_norm": 0.6148853302001953, - "learning_rate": 7.259354950576951e-06, - "loss": 0.1961, - "step": 12666 - }, - { - "epoch": 1.1932832481571325, - "grad_norm": 0.6508539319038391, - "learning_rate": 7.257902780341839e-06, - "loss": 0.1921, - "step": 12667 - }, - { - "epoch": 1.1933774522503002, - "grad_norm": 0.7368025779724121, - "learning_rate": 7.256450672630785e-06, - "loss": 0.2105, - "step": 12668 - }, - { - "epoch": 1.1934716563434682, - "grad_norm": 0.6457901000976562, - "learning_rate": 7.254998627476897e-06, - "loss": 0.1963, - "step": 12669 - }, - { - "epoch": 1.193565860436636, - "grad_norm": 0.6276993751525879, - "learning_rate": 7.253546644913285e-06, - "loss": 0.1979, - "step": 12670 - }, - { - "epoch": 1.1936600645298039, - "grad_norm": 0.6053666472434998, - "learning_rate": 7.252094724973057e-06, - "loss": 0.1601, - "step": 12671 - }, - { - "epoch": 1.1937542686229716, - "grad_norm": 0.7851318717002869, - "learning_rate": 7.250642867689322e-06, - "loss": 0.2185, - "step": 12672 - }, - { - "epoch": 1.1938484727161396, - "grad_norm": 0.6257517337799072, - "learning_rate": 7.249191073095176e-06, - "loss": 0.1791, - "step": 12673 - }, - { - "epoch": 1.1939426768093073, - "grad_norm": 0.8658941388130188, - "learning_rate": 7.2477393412237314e-06, - "loss": 0.2269, - "step": 12674 - }, - { - "epoch": 1.1940368809024753, - "grad_norm": 0.6948623657226562, - "learning_rate": 7.24628767210809e-06, - "loss": 0.2226, - "step": 12675 - }, - { - "epoch": 1.194131084995643, - "grad_norm": 0.6017744541168213, - "learning_rate": 7.24483606578134e-06, - "loss": 0.203, - "step": 12676 - }, - { - "epoch": 1.194225289088811, - "grad_norm": 0.7382233738899231, - "learning_rate": 7.243384522276593e-06, - "loss": 0.2513, - "step": 12677 - }, - { - "epoch": 1.1943194931819787, - "grad_norm": 0.6766628623008728, - "learning_rate": 7.241933041626945e-06, - "loss": 0.1978, - "step": 12678 - }, - { - "epoch": 1.1944136972751467, - "grad_norm": 0.6479530334472656, - "learning_rate": 7.240481623865488e-06, - "loss": 0.1964, - "step": 12679 - }, - { - "epoch": 1.1945079013683144, - "grad_norm": 0.6650206446647644, - "learning_rate": 7.239030269025311e-06, - "loss": 0.1894, - "step": 12680 - }, - { - "epoch": 1.1946021054614824, - "grad_norm": 0.634466826915741, - "learning_rate": 7.237578977139521e-06, - "loss": 0.2038, - "step": 12681 - }, - { - "epoch": 1.1946963095546501, - "grad_norm": 0.6568295359611511, - "learning_rate": 7.236127748241201e-06, - "loss": 0.2111, - "step": 12682 - }, - { - "epoch": 1.194790513647818, - "grad_norm": 0.6371886730194092, - "learning_rate": 7.23467658236344e-06, - "loss": 0.2103, - "step": 12683 - }, - { - "epoch": 1.1948847177409858, - "grad_norm": 0.5894503593444824, - "learning_rate": 7.2332254795393315e-06, - "loss": 0.182, - "step": 12684 - }, - { - "epoch": 1.1949789218341538, - "grad_norm": 0.6459717750549316, - "learning_rate": 7.2317744398019616e-06, - "loss": 0.1934, - "step": 12685 - }, - { - "epoch": 1.1950731259273215, - "grad_norm": 0.6525779962539673, - "learning_rate": 7.230323463184414e-06, - "loss": 0.2003, - "step": 12686 - }, - { - "epoch": 1.1951673300204895, - "grad_norm": 0.6206629872322083, - "learning_rate": 7.228872549719776e-06, - "loss": 0.22, - "step": 12687 - }, - { - "epoch": 1.1952615341136572, - "grad_norm": 0.8373090028762817, - "learning_rate": 7.227421699441129e-06, - "loss": 0.2092, - "step": 12688 - }, - { - "epoch": 1.1953557382068252, - "grad_norm": 0.6752511858940125, - "learning_rate": 7.225970912381557e-06, - "loss": 0.1905, - "step": 12689 - }, - { - "epoch": 1.195449942299993, - "grad_norm": 0.7148832082748413, - "learning_rate": 7.224520188574134e-06, - "loss": 0.1988, - "step": 12690 - }, - { - "epoch": 1.1955441463931609, - "grad_norm": 0.6180127263069153, - "learning_rate": 7.223069528051947e-06, - "loss": 0.195, - "step": 12691 - }, - { - "epoch": 1.1956383504863286, - "grad_norm": 0.6713067889213562, - "learning_rate": 7.2216189308480675e-06, - "loss": 0.2345, - "step": 12692 - }, - { - "epoch": 1.1957325545794966, - "grad_norm": 0.5889583826065063, - "learning_rate": 7.220168396995573e-06, - "loss": 0.1793, - "step": 12693 - }, - { - "epoch": 1.1958267586726643, - "grad_norm": 0.654589831829071, - "learning_rate": 7.218717926527539e-06, - "loss": 0.2051, - "step": 12694 - }, - { - "epoch": 1.1959209627658323, - "grad_norm": 0.6394796371459961, - "learning_rate": 7.21726751947704e-06, - "loss": 0.1782, - "step": 12695 - }, - { - "epoch": 1.196015166859, - "grad_norm": 0.6581370830535889, - "learning_rate": 7.21581717587714e-06, - "loss": 0.2304, - "step": 12696 - }, - { - "epoch": 1.196109370952168, - "grad_norm": 0.7615405917167664, - "learning_rate": 7.214366895760916e-06, - "loss": 0.2245, - "step": 12697 - }, - { - "epoch": 1.1962035750453357, - "grad_norm": 0.5310841798782349, - "learning_rate": 7.2129166791614395e-06, - "loss": 0.1746, - "step": 12698 - }, - { - "epoch": 1.1962977791385037, - "grad_norm": 0.6486091017723083, - "learning_rate": 7.21146652611177e-06, - "loss": 0.1939, - "step": 12699 - }, - { - "epoch": 1.1963919832316714, - "grad_norm": 0.6611186861991882, - "learning_rate": 7.2100164366449736e-06, - "loss": 0.21, - "step": 12700 - }, - { - "epoch": 1.1964861873248394, - "grad_norm": 0.7448510527610779, - "learning_rate": 7.208566410794119e-06, - "loss": 0.2355, - "step": 12701 - }, - { - "epoch": 1.196580391418007, - "grad_norm": 0.6172866225242615, - "learning_rate": 7.207116448592269e-06, - "loss": 0.1887, - "step": 12702 - }, - { - "epoch": 1.196674595511175, - "grad_norm": 0.6458644866943359, - "learning_rate": 7.205666550072478e-06, - "loss": 0.1809, - "step": 12703 - }, - { - "epoch": 1.1967687996043428, - "grad_norm": 0.7035966515541077, - "learning_rate": 7.204216715267817e-06, - "loss": 0.2233, - "step": 12704 - }, - { - "epoch": 1.1968630036975108, - "grad_norm": 0.6677559614181519, - "learning_rate": 7.202766944211337e-06, - "loss": 0.2208, - "step": 12705 - }, - { - "epoch": 1.1969572077906785, - "grad_norm": 0.6460862755775452, - "learning_rate": 7.201317236936094e-06, - "loss": 0.206, - "step": 12706 - }, - { - "epoch": 1.1970514118838465, - "grad_norm": 0.706049382686615, - "learning_rate": 7.199867593475149e-06, - "loss": 0.1983, - "step": 12707 - }, - { - "epoch": 1.1971456159770142, - "grad_norm": 0.6077711582183838, - "learning_rate": 7.198418013861553e-06, - "loss": 0.1878, - "step": 12708 - }, - { - "epoch": 1.1972398200701821, - "grad_norm": 0.7669473886489868, - "learning_rate": 7.196968498128359e-06, - "loss": 0.2075, - "step": 12709 - }, - { - "epoch": 1.1973340241633499, - "grad_norm": 0.6662476062774658, - "learning_rate": 7.195519046308616e-06, - "loss": 0.2073, - "step": 12710 - }, - { - "epoch": 1.1974282282565176, - "grad_norm": 0.6097497940063477, - "learning_rate": 7.1940696584353784e-06, - "loss": 0.1908, - "step": 12711 - }, - { - "epoch": 1.1975224323496856, - "grad_norm": 0.6982675194740295, - "learning_rate": 7.1926203345416935e-06, - "loss": 0.2524, - "step": 12712 - }, - { - "epoch": 1.1976166364428535, - "grad_norm": 0.6172206997871399, - "learning_rate": 7.191171074660603e-06, - "loss": 0.199, - "step": 12713 - }, - { - "epoch": 1.1977108405360213, - "grad_norm": 0.6649495959281921, - "learning_rate": 7.189721878825157e-06, - "loss": 0.2139, - "step": 12714 - }, - { - "epoch": 1.197805044629189, - "grad_norm": 0.6137988567352295, - "learning_rate": 7.188272747068404e-06, - "loss": 0.2042, - "step": 12715 - }, - { - "epoch": 1.197899248722357, - "grad_norm": 0.6602396965026855, - "learning_rate": 7.186823679423371e-06, - "loss": 0.2083, - "step": 12716 - }, - { - "epoch": 1.197993452815525, - "grad_norm": 0.6809267997741699, - "learning_rate": 7.185374675923114e-06, - "loss": 0.2304, - "step": 12717 - }, - { - "epoch": 1.1980876569086927, - "grad_norm": 0.6102619171142578, - "learning_rate": 7.18392573660067e-06, - "loss": 0.1923, - "step": 12718 - }, - { - "epoch": 1.1981818610018604, - "grad_norm": 0.6653514504432678, - "learning_rate": 7.182476861489072e-06, - "loss": 0.2469, - "step": 12719 - }, - { - "epoch": 1.1982760650950284, - "grad_norm": 0.5470622181892395, - "learning_rate": 7.181028050621355e-06, - "loss": 0.1868, - "step": 12720 - }, - { - "epoch": 1.1983702691881963, - "grad_norm": 0.6350684762001038, - "learning_rate": 7.179579304030562e-06, - "loss": 0.2037, - "step": 12721 - }, - { - "epoch": 1.198464473281364, - "grad_norm": 0.6286614537239075, - "learning_rate": 7.178130621749722e-06, - "loss": 0.1995, - "step": 12722 - }, - { - "epoch": 1.1985586773745318, - "grad_norm": 0.6691330075263977, - "learning_rate": 7.176682003811868e-06, - "loss": 0.225, - "step": 12723 - }, - { - "epoch": 1.1986528814676998, - "grad_norm": 0.7239378690719604, - "learning_rate": 7.17523345025003e-06, - "loss": 0.2046, - "step": 12724 - }, - { - "epoch": 1.1987470855608677, - "grad_norm": 0.6537780165672302, - "learning_rate": 7.173784961097239e-06, - "loss": 0.236, - "step": 12725 - }, - { - "epoch": 1.1988412896540355, - "grad_norm": 0.7614167928695679, - "learning_rate": 7.172336536386519e-06, - "loss": 0.2231, - "step": 12726 - }, - { - "epoch": 1.1989354937472032, - "grad_norm": 0.5911201238632202, - "learning_rate": 7.170888176150903e-06, - "loss": 0.1741, - "step": 12727 - }, - { - "epoch": 1.1990296978403712, - "grad_norm": 0.6751967668533325, - "learning_rate": 7.16943988042341e-06, - "loss": 0.2134, - "step": 12728 - }, - { - "epoch": 1.1991239019335391, - "grad_norm": 0.7988777160644531, - "learning_rate": 7.167991649237066e-06, - "loss": 0.1947, - "step": 12729 - }, - { - "epoch": 1.1992181060267069, - "grad_norm": 0.7002336382865906, - "learning_rate": 7.16654348262489e-06, - "loss": 0.2236, - "step": 12730 - }, - { - "epoch": 1.1993123101198746, - "grad_norm": 0.5490429997444153, - "learning_rate": 7.165095380619906e-06, - "loss": 0.1928, - "step": 12731 - }, - { - "epoch": 1.1994065142130426, - "grad_norm": 0.6369600892066956, - "learning_rate": 7.163647343255134e-06, - "loss": 0.195, - "step": 12732 - }, - { - "epoch": 1.1995007183062105, - "grad_norm": 0.6094090342521667, - "learning_rate": 7.162199370563585e-06, - "loss": 0.1936, - "step": 12733 - }, - { - "epoch": 1.1995949223993783, - "grad_norm": 0.6757724285125732, - "learning_rate": 7.160751462578282e-06, - "loss": 0.1858, - "step": 12734 - }, - { - "epoch": 1.199689126492546, - "grad_norm": 0.5965824127197266, - "learning_rate": 7.159303619332236e-06, - "loss": 0.1963, - "step": 12735 - }, - { - "epoch": 1.199783330585714, - "grad_norm": 0.6731762886047363, - "learning_rate": 7.157855840858457e-06, - "loss": 0.2255, - "step": 12736 - }, - { - "epoch": 1.199877534678882, - "grad_norm": 0.6442179679870605, - "learning_rate": 7.156408127189964e-06, - "loss": 0.2162, - "step": 12737 - }, - { - "epoch": 1.1999717387720497, - "grad_norm": 0.6489914059638977, - "learning_rate": 7.154960478359766e-06, - "loss": 0.2052, - "step": 12738 - }, - { - "epoch": 1.2000659428652174, - "grad_norm": 0.6417892575263977, - "learning_rate": 7.1535128944008666e-06, - "loss": 0.1991, - "step": 12739 - }, - { - "epoch": 1.2001601469583854, - "grad_norm": 0.6218274235725403, - "learning_rate": 7.152065375346273e-06, - "loss": 0.2104, - "step": 12740 - }, - { - "epoch": 1.2002543510515533, - "grad_norm": 0.6693999171257019, - "learning_rate": 7.150617921228995e-06, - "loss": 0.2329, - "step": 12741 - }, - { - "epoch": 1.200348555144721, - "grad_norm": 0.7353677153587341, - "learning_rate": 7.149170532082037e-06, - "loss": 0.2242, - "step": 12742 - }, - { - "epoch": 1.2004427592378888, - "grad_norm": 0.6996892094612122, - "learning_rate": 7.147723207938395e-06, - "loss": 0.1921, - "step": 12743 - }, - { - "epoch": 1.2005369633310568, - "grad_norm": 0.6604074239730835, - "learning_rate": 7.146275948831078e-06, - "loss": 0.2125, - "step": 12744 - }, - { - "epoch": 1.2006311674242245, - "grad_norm": 0.686569333076477, - "learning_rate": 7.144828754793084e-06, - "loss": 0.171, - "step": 12745 - }, - { - "epoch": 1.2007253715173924, - "grad_norm": 0.6470799446105957, - "learning_rate": 7.143381625857407e-06, - "loss": 0.214, - "step": 12746 - }, - { - "epoch": 1.2008195756105602, - "grad_norm": 0.6509439945220947, - "learning_rate": 7.141934562057049e-06, - "loss": 0.2175, - "step": 12747 - }, - { - "epoch": 1.2009137797037281, - "grad_norm": 0.6378494501113892, - "learning_rate": 7.1404875634250026e-06, - "loss": 0.1989, - "step": 12748 - }, - { - "epoch": 1.2010079837968959, - "grad_norm": 0.6776915192604065, - "learning_rate": 7.139040629994263e-06, - "loss": 0.2172, - "step": 12749 - }, - { - "epoch": 1.2011021878900638, - "grad_norm": 0.6282821297645569, - "learning_rate": 7.137593761797818e-06, - "loss": 0.2073, - "step": 12750 - }, - { - "epoch": 1.2011963919832316, - "grad_norm": 0.5688050389289856, - "learning_rate": 7.136146958868666e-06, - "loss": 0.1773, - "step": 12751 - }, - { - "epoch": 1.2012905960763995, - "grad_norm": 0.7232409715652466, - "learning_rate": 7.134700221239793e-06, - "loss": 0.2, - "step": 12752 - }, - { - "epoch": 1.2013848001695673, - "grad_norm": 0.6493895649909973, - "learning_rate": 7.133253548944181e-06, - "loss": 0.2054, - "step": 12753 - }, - { - "epoch": 1.2014790042627352, - "grad_norm": 0.6467964053153992, - "learning_rate": 7.131806942014825e-06, - "loss": 0.1916, - "step": 12754 - }, - { - "epoch": 1.201573208355903, - "grad_norm": 0.6178547143936157, - "learning_rate": 7.1303604004847085e-06, - "loss": 0.2295, - "step": 12755 - }, - { - "epoch": 1.201667412449071, - "grad_norm": 0.7066540122032166, - "learning_rate": 7.128913924386807e-06, - "loss": 0.2297, - "step": 12756 - }, - { - "epoch": 1.2017616165422387, - "grad_norm": 0.6456404328346252, - "learning_rate": 7.127467513754112e-06, - "loss": 0.2108, - "step": 12757 - }, - { - "epoch": 1.2018558206354066, - "grad_norm": 0.6754060387611389, - "learning_rate": 7.1260211686196035e-06, - "loss": 0.2083, - "step": 12758 - }, - { - "epoch": 1.2019500247285744, - "grad_norm": 0.6716510653495789, - "learning_rate": 7.124574889016254e-06, - "loss": 0.2238, - "step": 12759 - }, - { - "epoch": 1.2020442288217423, - "grad_norm": 0.6841642260551453, - "learning_rate": 7.1231286749770416e-06, - "loss": 0.1973, - "step": 12760 - }, - { - "epoch": 1.20213843291491, - "grad_norm": 0.5917529463768005, - "learning_rate": 7.1216825265349465e-06, - "loss": 0.2067, - "step": 12761 - }, - { - "epoch": 1.202232637008078, - "grad_norm": 0.6807586550712585, - "learning_rate": 7.120236443722941e-06, - "loss": 0.2049, - "step": 12762 - }, - { - "epoch": 1.2023268411012458, - "grad_norm": 0.6509501934051514, - "learning_rate": 7.118790426573997e-06, - "loss": 0.2004, - "step": 12763 - }, - { - "epoch": 1.2024210451944137, - "grad_norm": 0.675783097743988, - "learning_rate": 7.1173444751210885e-06, - "loss": 0.2322, - "step": 12764 - }, - { - "epoch": 1.2025152492875815, - "grad_norm": 0.6668880581855774, - "learning_rate": 7.115898589397185e-06, - "loss": 0.2457, - "step": 12765 - }, - { - "epoch": 1.2026094533807494, - "grad_norm": 0.6287373900413513, - "learning_rate": 7.114452769435252e-06, - "loss": 0.2001, - "step": 12766 - }, - { - "epoch": 1.2027036574739172, - "grad_norm": 0.6526023745536804, - "learning_rate": 7.1130070152682605e-06, - "loss": 0.1813, - "step": 12767 - }, - { - "epoch": 1.2027978615670851, - "grad_norm": 0.6208986043930054, - "learning_rate": 7.111561326929173e-06, - "loss": 0.1963, - "step": 12768 - }, - { - "epoch": 1.2028920656602529, - "grad_norm": 0.6641842126846313, - "learning_rate": 7.110115704450955e-06, - "loss": 0.1975, - "step": 12769 - }, - { - "epoch": 1.2029862697534208, - "grad_norm": 0.6185021996498108, - "learning_rate": 7.108670147866565e-06, - "loss": 0.1696, - "step": 12770 - }, - { - "epoch": 1.2030804738465886, - "grad_norm": 0.6088213920593262, - "learning_rate": 7.107224657208971e-06, - "loss": 0.2202, - "step": 12771 - }, - { - "epoch": 1.2031746779397565, - "grad_norm": 0.6018186211585999, - "learning_rate": 7.10577923251113e-06, - "loss": 0.2033, - "step": 12772 - }, - { - "epoch": 1.2032688820329243, - "grad_norm": 0.6003065705299377, - "learning_rate": 7.104333873805991e-06, - "loss": 0.1905, - "step": 12773 - }, - { - "epoch": 1.2033630861260922, - "grad_norm": 0.6481710076332092, - "learning_rate": 7.102888581126523e-06, - "loss": 0.2095, - "step": 12774 - }, - { - "epoch": 1.20345729021926, - "grad_norm": 0.7129554748535156, - "learning_rate": 7.1014433545056785e-06, - "loss": 0.2175, - "step": 12775 - }, - { - "epoch": 1.203551494312428, - "grad_norm": 0.5934905409812927, - "learning_rate": 7.099998193976401e-06, - "loss": 0.1825, - "step": 12776 - }, - { - "epoch": 1.2036456984055957, - "grad_norm": 0.6513676047325134, - "learning_rate": 7.098553099571654e-06, - "loss": 0.2241, - "step": 12777 - }, - { - "epoch": 1.2037399024987636, - "grad_norm": 0.7061363458633423, - "learning_rate": 7.097108071324386e-06, - "loss": 0.2314, - "step": 12778 - }, - { - "epoch": 1.2038341065919314, - "grad_norm": 0.708442747592926, - "learning_rate": 7.095663109267541e-06, - "loss": 0.2111, - "step": 12779 - }, - { - "epoch": 1.2039283106850993, - "grad_norm": 0.6820908188819885, - "learning_rate": 7.094218213434068e-06, - "loss": 0.2339, - "step": 12780 - }, - { - "epoch": 1.204022514778267, - "grad_norm": 0.6267956495285034, - "learning_rate": 7.092773383856913e-06, - "loss": 0.1869, - "step": 12781 - }, - { - "epoch": 1.204116718871435, - "grad_norm": 0.7776097655296326, - "learning_rate": 7.091328620569023e-06, - "loss": 0.1977, - "step": 12782 - }, - { - "epoch": 1.2042109229646027, - "grad_norm": 0.6159158945083618, - "learning_rate": 7.0898839236033355e-06, - "loss": 0.2093, - "step": 12783 - }, - { - "epoch": 1.2043051270577707, - "grad_norm": 0.7958076000213623, - "learning_rate": 7.088439292992798e-06, - "loss": 0.1876, - "step": 12784 - }, - { - "epoch": 1.2043993311509384, - "grad_norm": 0.732624351978302, - "learning_rate": 7.086994728770348e-06, - "loss": 0.2307, - "step": 12785 - }, - { - "epoch": 1.2044935352441064, - "grad_norm": 0.9008014798164368, - "learning_rate": 7.085550230968921e-06, - "loss": 0.2152, - "step": 12786 - }, - { - "epoch": 1.2045877393372741, - "grad_norm": 0.6954534649848938, - "learning_rate": 7.084105799621457e-06, - "loss": 0.2121, - "step": 12787 - }, - { - "epoch": 1.204681943430442, - "grad_norm": 0.5858094096183777, - "learning_rate": 7.08266143476089e-06, - "loss": 0.1702, - "step": 12788 - }, - { - "epoch": 1.2047761475236098, - "grad_norm": 0.6322008371353149, - "learning_rate": 7.081217136420155e-06, - "loss": 0.2233, - "step": 12789 - }, - { - "epoch": 1.2048703516167778, - "grad_norm": 0.7262994050979614, - "learning_rate": 7.079772904632181e-06, - "loss": 0.2287, - "step": 12790 - }, - { - "epoch": 1.2049645557099455, - "grad_norm": 0.7194874286651611, - "learning_rate": 7.078328739429903e-06, - "loss": 0.2145, - "step": 12791 - }, - { - "epoch": 1.2050587598031135, - "grad_norm": 0.6091543436050415, - "learning_rate": 7.076884640846251e-06, - "loss": 0.1949, - "step": 12792 - }, - { - "epoch": 1.2051529638962812, - "grad_norm": 0.5965726971626282, - "learning_rate": 7.075440608914143e-06, - "loss": 0.1778, - "step": 12793 - }, - { - "epoch": 1.2052471679894492, - "grad_norm": 0.768684446811676, - "learning_rate": 7.073996643666516e-06, - "loss": 0.1985, - "step": 12794 - }, - { - "epoch": 1.205341372082617, - "grad_norm": 0.6657359004020691, - "learning_rate": 7.072552745136293e-06, - "loss": 0.2113, - "step": 12795 - }, - { - "epoch": 1.205435576175785, - "grad_norm": 0.6386681199073792, - "learning_rate": 7.071108913356388e-06, - "loss": 0.1982, - "step": 12796 - }, - { - "epoch": 1.2055297802689526, - "grad_norm": 0.6536575555801392, - "learning_rate": 7.069665148359737e-06, - "loss": 0.2181, - "step": 12797 - }, - { - "epoch": 1.2056239843621206, - "grad_norm": 0.6982464790344238, - "learning_rate": 7.068221450179249e-06, - "loss": 0.2056, - "step": 12798 - }, - { - "epoch": 1.2057181884552883, - "grad_norm": 0.7056099772453308, - "learning_rate": 7.066777818847847e-06, - "loss": 0.2309, - "step": 12799 - }, - { - "epoch": 1.2058123925484563, - "grad_norm": 0.6622529029846191, - "learning_rate": 7.065334254398444e-06, - "loss": 0.1976, - "step": 12800 - }, - { - "epoch": 1.205906596641624, - "grad_norm": 0.7267027497291565, - "learning_rate": 7.063890756863961e-06, - "loss": 0.224, - "step": 12801 - }, - { - "epoch": 1.206000800734792, - "grad_norm": 0.6865708231925964, - "learning_rate": 7.06244732627731e-06, - "loss": 0.1954, - "step": 12802 - }, - { - "epoch": 1.2060950048279597, - "grad_norm": 0.7472881078720093, - "learning_rate": 7.061003962671401e-06, - "loss": 0.2098, - "step": 12803 - }, - { - "epoch": 1.2061892089211277, - "grad_norm": 0.58888179063797, - "learning_rate": 7.059560666079148e-06, - "loss": 0.1774, - "step": 12804 - }, - { - "epoch": 1.2062834130142954, - "grad_norm": 0.6867105960845947, - "learning_rate": 7.05811743653346e-06, - "loss": 0.2048, - "step": 12805 - }, - { - "epoch": 1.2063776171074634, - "grad_norm": 0.7627989053726196, - "learning_rate": 7.056674274067242e-06, - "loss": 0.2333, - "step": 12806 - }, - { - "epoch": 1.2064718212006311, - "grad_norm": 0.6554869413375854, - "learning_rate": 7.055231178713404e-06, - "loss": 0.2053, - "step": 12807 - }, - { - "epoch": 1.206566025293799, - "grad_norm": 0.6362926959991455, - "learning_rate": 7.05378815050485e-06, - "loss": 0.2228, - "step": 12808 - }, - { - "epoch": 1.2066602293869668, - "grad_norm": 0.62162184715271, - "learning_rate": 7.052345189474483e-06, - "loss": 0.2073, - "step": 12809 - }, - { - "epoch": 1.2067544334801348, - "grad_norm": 0.6365129351615906, - "learning_rate": 7.050902295655202e-06, - "loss": 0.1994, - "step": 12810 - }, - { - "epoch": 1.2068486375733025, - "grad_norm": 0.6029601693153381, - "learning_rate": 7.049459469079911e-06, - "loss": 0.1914, - "step": 12811 - }, - { - "epoch": 1.2069428416664705, - "grad_norm": 0.6337880492210388, - "learning_rate": 7.048016709781509e-06, - "loss": 0.2087, - "step": 12812 - }, - { - "epoch": 1.2070370457596382, - "grad_norm": 0.6876749396324158, - "learning_rate": 7.046574017792887e-06, - "loss": 0.1919, - "step": 12813 - }, - { - "epoch": 1.2071312498528062, - "grad_norm": 0.607933521270752, - "learning_rate": 7.045131393146947e-06, - "loss": 0.1916, - "step": 12814 - }, - { - "epoch": 1.207225453945974, - "grad_norm": 0.725727379322052, - "learning_rate": 7.043688835876583e-06, - "loss": 0.2104, - "step": 12815 - }, - { - "epoch": 1.2073196580391419, - "grad_norm": 0.626183032989502, - "learning_rate": 7.04224634601468e-06, - "loss": 0.2267, - "step": 12816 - }, - { - "epoch": 1.2074138621323096, - "grad_norm": 0.6115753650665283, - "learning_rate": 7.0408039235941415e-06, - "loss": 0.2166, - "step": 12817 - }, - { - "epoch": 1.2075080662254776, - "grad_norm": 0.6772529482841492, - "learning_rate": 7.039361568647847e-06, - "loss": 0.2137, - "step": 12818 - }, - { - "epoch": 1.2076022703186453, - "grad_norm": 0.5911723971366882, - "learning_rate": 7.0379192812086885e-06, - "loss": 0.2021, - "step": 12819 - }, - { - "epoch": 1.2076964744118133, - "grad_norm": 0.6694115400314331, - "learning_rate": 7.036477061309548e-06, - "loss": 0.2092, - "step": 12820 - }, - { - "epoch": 1.207790678504981, - "grad_norm": 0.6884358525276184, - "learning_rate": 7.035034908983317e-06, - "loss": 0.223, - "step": 12821 - }, - { - "epoch": 1.207884882598149, - "grad_norm": 0.6839742064476013, - "learning_rate": 7.033592824262875e-06, - "loss": 0.2128, - "step": 12822 - }, - { - "epoch": 1.2079790866913167, - "grad_norm": 0.671143651008606, - "learning_rate": 7.032150807181102e-06, - "loss": 0.2202, - "step": 12823 - }, - { - "epoch": 1.2080732907844847, - "grad_norm": 0.621971845626831, - "learning_rate": 7.030708857770883e-06, - "loss": 0.1729, - "step": 12824 - }, - { - "epoch": 1.2081674948776524, - "grad_norm": 0.7871326804161072, - "learning_rate": 7.029266976065092e-06, - "loss": 0.1986, - "step": 12825 - }, - { - "epoch": 1.2082616989708204, - "grad_norm": 0.6898002028465271, - "learning_rate": 7.027825162096609e-06, - "loss": 0.1771, - "step": 12826 - }, - { - "epoch": 1.208355903063988, - "grad_norm": 0.658204197883606, - "learning_rate": 7.0263834158983105e-06, - "loss": 0.2062, - "step": 12827 - }, - { - "epoch": 1.208450107157156, - "grad_norm": 0.6371948719024658, - "learning_rate": 7.024941737503067e-06, - "loss": 0.2236, - "step": 12828 - }, - { - "epoch": 1.2085443112503238, - "grad_norm": 0.6982995867729187, - "learning_rate": 7.023500126943754e-06, - "loss": 0.1935, - "step": 12829 - }, - { - "epoch": 1.2086385153434918, - "grad_norm": 0.6304657459259033, - "learning_rate": 7.02205858425324e-06, - "loss": 0.1816, - "step": 12830 - }, - { - "epoch": 1.2087327194366595, - "grad_norm": 0.7354641556739807, - "learning_rate": 7.020617109464397e-06, - "loss": 0.2334, - "step": 12831 - }, - { - "epoch": 1.2088269235298275, - "grad_norm": 0.6110231280326843, - "learning_rate": 7.019175702610095e-06, - "loss": 0.1985, - "step": 12832 - }, - { - "epoch": 1.2089211276229952, - "grad_norm": 0.5807055234909058, - "learning_rate": 7.017734363723189e-06, - "loss": 0.1814, - "step": 12833 - }, - { - "epoch": 1.2090153317161632, - "grad_norm": 0.5862298011779785, - "learning_rate": 7.016293092836556e-06, - "loss": 0.1934, - "step": 12834 - }, - { - "epoch": 1.209109535809331, - "grad_norm": 0.6219956874847412, - "learning_rate": 7.014851889983058e-06, - "loss": 0.1884, - "step": 12835 - }, - { - "epoch": 1.2092037399024989, - "grad_norm": 0.700542688369751, - "learning_rate": 7.013410755195547e-06, - "loss": 0.2139, - "step": 12836 - }, - { - "epoch": 1.2092979439956666, - "grad_norm": 0.6179965138435364, - "learning_rate": 7.011969688506894e-06, - "loss": 0.2129, - "step": 12837 - }, - { - "epoch": 1.2093921480888346, - "grad_norm": 0.7157810926437378, - "learning_rate": 7.010528689949954e-06, - "loss": 0.2378, - "step": 12838 - }, - { - "epoch": 1.2094863521820023, - "grad_norm": 0.6352458596229553, - "learning_rate": 7.009087759557581e-06, - "loss": 0.2309, - "step": 12839 - }, - { - "epoch": 1.2095805562751702, - "grad_norm": 0.6100900769233704, - "learning_rate": 7.007646897362632e-06, - "loss": 0.1816, - "step": 12840 - }, - { - "epoch": 1.209674760368338, - "grad_norm": 0.6023179888725281, - "learning_rate": 7.006206103397962e-06, - "loss": 0.2074, - "step": 12841 - }, - { - "epoch": 1.209768964461506, - "grad_norm": 0.7667863368988037, - "learning_rate": 7.004765377696424e-06, - "loss": 0.2076, - "step": 12842 - }, - { - "epoch": 1.2098631685546737, - "grad_norm": 0.6673808693885803, - "learning_rate": 7.003324720290865e-06, - "loss": 0.2148, - "step": 12843 - }, - { - "epoch": 1.2099573726478416, - "grad_norm": 0.690650224685669, - "learning_rate": 7.001884131214141e-06, - "loss": 0.2546, - "step": 12844 - }, - { - "epoch": 1.2100515767410094, - "grad_norm": 0.5930016040802002, - "learning_rate": 7.0004436104990925e-06, - "loss": 0.2089, - "step": 12845 - }, - { - "epoch": 1.2101457808341771, - "grad_norm": 0.6467576622962952, - "learning_rate": 6.999003158178568e-06, - "loss": 0.2155, - "step": 12846 - }, - { - "epoch": 1.210239984927345, - "grad_norm": 0.655545711517334, - "learning_rate": 6.997562774285413e-06, - "loss": 0.1981, - "step": 12847 - }, - { - "epoch": 1.210334189020513, - "grad_norm": 0.6148264408111572, - "learning_rate": 6.996122458852472e-06, - "loss": 0.1938, - "step": 12848 - }, - { - "epoch": 1.2104283931136808, - "grad_norm": 0.6048044562339783, - "learning_rate": 6.994682211912585e-06, - "loss": 0.2, - "step": 12849 - }, - { - "epoch": 1.2105225972068485, - "grad_norm": 0.6152675747871399, - "learning_rate": 6.993242033498589e-06, - "loss": 0.2074, - "step": 12850 - }, - { - "epoch": 1.2106168013000165, - "grad_norm": 0.9023056626319885, - "learning_rate": 6.991801923643324e-06, - "loss": 0.2084, - "step": 12851 - }, - { - "epoch": 1.2107110053931844, - "grad_norm": 0.6928418278694153, - "learning_rate": 6.990361882379633e-06, - "loss": 0.2043, - "step": 12852 - }, - { - "epoch": 1.2108052094863522, - "grad_norm": 0.6762405037879944, - "learning_rate": 6.988921909740338e-06, - "loss": 0.2149, - "step": 12853 - }, - { - "epoch": 1.21089941357952, - "grad_norm": 0.7207145094871521, - "learning_rate": 6.987482005758284e-06, - "loss": 0.2114, - "step": 12854 - }, - { - "epoch": 1.2109936176726879, - "grad_norm": 0.6876195073127747, - "learning_rate": 6.986042170466301e-06, - "loss": 0.1991, - "step": 12855 - }, - { - "epoch": 1.2110878217658558, - "grad_norm": 0.7374923229217529, - "learning_rate": 6.9846024038972115e-06, - "loss": 0.212, - "step": 12856 - }, - { - "epoch": 1.2111820258590236, - "grad_norm": 0.6431059837341309, - "learning_rate": 6.983162706083858e-06, - "loss": 0.1802, - "step": 12857 - }, - { - "epoch": 1.2112762299521913, - "grad_norm": 0.6324191093444824, - "learning_rate": 6.981723077059057e-06, - "loss": 0.1873, - "step": 12858 - }, - { - "epoch": 1.2113704340453593, - "grad_norm": 0.6531680226325989, - "learning_rate": 6.9802835168556395e-06, - "loss": 0.188, - "step": 12859 - }, - { - "epoch": 1.2114646381385272, - "grad_norm": 0.6321462392807007, - "learning_rate": 6.978844025506424e-06, - "loss": 0.1879, - "step": 12860 - }, - { - "epoch": 1.211558842231695, - "grad_norm": 0.6635614633560181, - "learning_rate": 6.97740460304424e-06, - "loss": 0.1584, - "step": 12861 - }, - { - "epoch": 1.2116530463248627, - "grad_norm": 1.053592562675476, - "learning_rate": 6.975965249501906e-06, - "loss": 0.2125, - "step": 12862 - }, - { - "epoch": 1.2117472504180307, - "grad_norm": 0.6097428798675537, - "learning_rate": 6.974525964912238e-06, - "loss": 0.185, - "step": 12863 - }, - { - "epoch": 1.2118414545111986, - "grad_norm": 0.6370858550071716, - "learning_rate": 6.97308674930806e-06, - "loss": 0.1891, - "step": 12864 - }, - { - "epoch": 1.2119356586043664, - "grad_norm": 0.8104294538497925, - "learning_rate": 6.9716476027221845e-06, - "loss": 0.227, - "step": 12865 - }, - { - "epoch": 1.212029862697534, - "grad_norm": 0.6168524622917175, - "learning_rate": 6.970208525187425e-06, - "loss": 0.2064, - "step": 12866 - }, - { - "epoch": 1.212124066790702, - "grad_norm": 0.6625261902809143, - "learning_rate": 6.9687695167366e-06, - "loss": 0.2277, - "step": 12867 - }, - { - "epoch": 1.21221827088387, - "grad_norm": 0.7567079663276672, - "learning_rate": 6.967330577402516e-06, - "loss": 0.2451, - "step": 12868 - }, - { - "epoch": 1.2123124749770378, - "grad_norm": 1.0120052099227905, - "learning_rate": 6.965891707217989e-06, - "loss": 0.2097, - "step": 12869 - }, - { - "epoch": 1.2124066790702055, - "grad_norm": 0.6066929697990417, - "learning_rate": 6.964452906215815e-06, - "loss": 0.1947, - "step": 12870 - }, - { - "epoch": 1.2125008831633735, - "grad_norm": 0.6516382694244385, - "learning_rate": 6.963014174428815e-06, - "loss": 0.2025, - "step": 12871 - }, - { - "epoch": 1.2125950872565414, - "grad_norm": 0.605207085609436, - "learning_rate": 6.961575511889791e-06, - "loss": 0.1864, - "step": 12872 - }, - { - "epoch": 1.2126892913497092, - "grad_norm": 0.644787073135376, - "learning_rate": 6.960136918631537e-06, - "loss": 0.1974, - "step": 12873 - }, - { - "epoch": 1.212783495442877, - "grad_norm": 0.7836694121360779, - "learning_rate": 6.9586983946868665e-06, - "loss": 0.2035, - "step": 12874 - }, - { - "epoch": 1.2128776995360449, - "grad_norm": 0.6779301762580872, - "learning_rate": 6.9572599400885796e-06, - "loss": 0.2137, - "step": 12875 - }, - { - "epoch": 1.2129719036292128, - "grad_norm": 0.6337977051734924, - "learning_rate": 6.9558215548694645e-06, - "loss": 0.2075, - "step": 12876 - }, - { - "epoch": 1.2130661077223805, - "grad_norm": 0.6754133701324463, - "learning_rate": 6.954383239062332e-06, - "loss": 0.187, - "step": 12877 - }, - { - "epoch": 1.2131603118155483, - "grad_norm": 0.665571391582489, - "learning_rate": 6.952944992699971e-06, - "loss": 0.2194, - "step": 12878 - }, - { - "epoch": 1.2132545159087162, - "grad_norm": 0.6134076118469238, - "learning_rate": 6.9515068158151745e-06, - "loss": 0.2023, - "step": 12879 - }, - { - "epoch": 1.2133487200018842, - "grad_norm": 0.6333722472190857, - "learning_rate": 6.950068708440737e-06, - "loss": 0.1778, - "step": 12880 - }, - { - "epoch": 1.213442924095052, - "grad_norm": 0.6231043338775635, - "learning_rate": 6.948630670609451e-06, - "loss": 0.2145, - "step": 12881 - }, - { - "epoch": 1.2135371281882197, - "grad_norm": 0.6057214140892029, - "learning_rate": 6.947192702354104e-06, - "loss": 0.1738, - "step": 12882 - }, - { - "epoch": 1.2136313322813876, - "grad_norm": 0.6275814175605774, - "learning_rate": 6.945754803707484e-06, - "loss": 0.2013, - "step": 12883 - }, - { - "epoch": 1.2137255363745554, - "grad_norm": 0.6310911774635315, - "learning_rate": 6.944316974702379e-06, - "loss": 0.1786, - "step": 12884 - }, - { - "epoch": 1.2138197404677233, - "grad_norm": 0.6078299283981323, - "learning_rate": 6.9428792153715744e-06, - "loss": 0.1735, - "step": 12885 - }, - { - "epoch": 1.213913944560891, - "grad_norm": 0.6392536163330078, - "learning_rate": 6.941441525747847e-06, - "loss": 0.204, - "step": 12886 - }, - { - "epoch": 1.214008148654059, - "grad_norm": 0.7176835536956787, - "learning_rate": 6.940003905863986e-06, - "loss": 0.2473, - "step": 12887 - }, - { - "epoch": 1.2141023527472268, - "grad_norm": 0.7947714924812317, - "learning_rate": 6.938566355752769e-06, - "loss": 0.2024, - "step": 12888 - }, - { - "epoch": 1.2141965568403947, - "grad_norm": 0.633015513420105, - "learning_rate": 6.937128875446975e-06, - "loss": 0.1943, - "step": 12889 - }, - { - "epoch": 1.2142907609335625, - "grad_norm": 0.6805613040924072, - "learning_rate": 6.935691464979374e-06, - "loss": 0.2059, - "step": 12890 - }, - { - "epoch": 1.2143849650267304, - "grad_norm": 0.7668185830116272, - "learning_rate": 6.93425412438275e-06, - "loss": 0.2454, - "step": 12891 - }, - { - "epoch": 1.2144791691198982, - "grad_norm": 0.6264731884002686, - "learning_rate": 6.932816853689875e-06, - "loss": 0.1765, - "step": 12892 - }, - { - "epoch": 1.2145733732130661, - "grad_norm": 0.618474006652832, - "learning_rate": 6.931379652933514e-06, - "loss": 0.1966, - "step": 12893 - }, - { - "epoch": 1.2146675773062339, - "grad_norm": 0.7000207901000977, - "learning_rate": 6.929942522146446e-06, - "loss": 0.2274, - "step": 12894 - }, - { - "epoch": 1.2147617813994018, - "grad_norm": 0.6906728148460388, - "learning_rate": 6.928505461361439e-06, - "loss": 0.197, - "step": 12895 - }, - { - "epoch": 1.2148559854925696, - "grad_norm": 0.6946936249732971, - "learning_rate": 6.9270684706112515e-06, - "loss": 0.1914, - "step": 12896 - }, - { - "epoch": 1.2149501895857375, - "grad_norm": 0.6604578495025635, - "learning_rate": 6.925631549928662e-06, - "loss": 0.201, - "step": 12897 - }, - { - "epoch": 1.2150443936789053, - "grad_norm": 0.5688307285308838, - "learning_rate": 6.924194699346425e-06, - "loss": 0.1949, - "step": 12898 - }, - { - "epoch": 1.2151385977720732, - "grad_norm": 0.5732439160346985, - "learning_rate": 6.922757918897305e-06, - "loss": 0.1829, - "step": 12899 - }, - { - "epoch": 1.215232801865241, - "grad_norm": 0.6947095394134521, - "learning_rate": 6.9213212086140624e-06, - "loss": 0.2043, - "step": 12900 - }, - { - "epoch": 1.215327005958409, - "grad_norm": 0.6432653665542603, - "learning_rate": 6.9198845685294595e-06, - "loss": 0.2041, - "step": 12901 - }, - { - "epoch": 1.2154212100515767, - "grad_norm": 0.6288147568702698, - "learning_rate": 6.918447998676252e-06, - "loss": 0.1804, - "step": 12902 - }, - { - "epoch": 1.2155154141447446, - "grad_norm": 0.6071980595588684, - "learning_rate": 6.917011499087193e-06, - "loss": 0.1827, - "step": 12903 - }, - { - "epoch": 1.2156096182379124, - "grad_norm": 0.610538899898529, - "learning_rate": 6.915575069795042e-06, - "loss": 0.2186, - "step": 12904 - }, - { - "epoch": 1.2157038223310803, - "grad_norm": 0.681826114654541, - "learning_rate": 6.91413871083255e-06, - "loss": 0.2009, - "step": 12905 - }, - { - "epoch": 1.215798026424248, - "grad_norm": 0.6242051720619202, - "learning_rate": 6.912702422232466e-06, - "loss": 0.1945, - "step": 12906 - }, - { - "epoch": 1.215892230517416, - "grad_norm": 0.6371919512748718, - "learning_rate": 6.911266204027542e-06, - "loss": 0.1884, - "step": 12907 - }, - { - "epoch": 1.2159864346105838, - "grad_norm": 0.6108184456825256, - "learning_rate": 6.909830056250527e-06, - "loss": 0.1836, - "step": 12908 - }, - { - "epoch": 1.2160806387037517, - "grad_norm": 0.6858425140380859, - "learning_rate": 6.908393978934163e-06, - "loss": 0.202, - "step": 12909 - }, - { - "epoch": 1.2161748427969195, - "grad_norm": 0.6188270449638367, - "learning_rate": 6.906957972111199e-06, - "loss": 0.2185, - "step": 12910 - }, - { - "epoch": 1.2162690468900874, - "grad_norm": 0.7016656994819641, - "learning_rate": 6.905522035814378e-06, - "loss": 0.2181, - "step": 12911 - }, - { - "epoch": 1.2163632509832552, - "grad_norm": 0.6962555050849915, - "learning_rate": 6.9040861700764415e-06, - "loss": 0.2095, - "step": 12912 - }, - { - "epoch": 1.216457455076423, - "grad_norm": 0.6605420112609863, - "learning_rate": 6.902650374930122e-06, - "loss": 0.2255, - "step": 12913 - }, - { - "epoch": 1.2165516591695908, - "grad_norm": 0.6797463893890381, - "learning_rate": 6.901214650408171e-06, - "loss": 0.1823, - "step": 12914 - }, - { - "epoch": 1.2166458632627588, - "grad_norm": 0.6401837468147278, - "learning_rate": 6.899778996543316e-06, - "loss": 0.2159, - "step": 12915 - }, - { - "epoch": 1.2167400673559265, - "grad_norm": 0.6821116209030151, - "learning_rate": 6.89834341336829e-06, - "loss": 0.2272, - "step": 12916 - }, - { - "epoch": 1.2168342714490945, - "grad_norm": 0.6074873208999634, - "learning_rate": 6.896907900915837e-06, - "loss": 0.2048, - "step": 12917 - }, - { - "epoch": 1.2169284755422622, - "grad_norm": 0.5764397978782654, - "learning_rate": 6.8954724592186815e-06, - "loss": 0.2006, - "step": 12918 - }, - { - "epoch": 1.2170226796354302, - "grad_norm": 0.6415478587150574, - "learning_rate": 6.894037088309551e-06, - "loss": 0.2061, - "step": 12919 - }, - { - "epoch": 1.217116883728598, - "grad_norm": 0.6077975630760193, - "learning_rate": 6.892601788221185e-06, - "loss": 0.2006, - "step": 12920 - }, - { - "epoch": 1.217211087821766, - "grad_norm": 0.6001858115196228, - "learning_rate": 6.8911665589863e-06, - "loss": 0.1894, - "step": 12921 - }, - { - "epoch": 1.2173052919149336, - "grad_norm": 0.6244654059410095, - "learning_rate": 6.889731400637627e-06, - "loss": 0.1916, - "step": 12922 - }, - { - "epoch": 1.2173994960081016, - "grad_norm": 0.629303514957428, - "learning_rate": 6.888296313207885e-06, - "loss": 0.2089, - "step": 12923 - }, - { - "epoch": 1.2174937001012693, - "grad_norm": 0.5962958931922913, - "learning_rate": 6.886861296729803e-06, - "loss": 0.2066, - "step": 12924 - }, - { - "epoch": 1.2175879041944373, - "grad_norm": 0.7022969126701355, - "learning_rate": 6.885426351236097e-06, - "loss": 0.2109, - "step": 12925 - }, - { - "epoch": 1.217682108287605, - "grad_norm": 0.6517074704170227, - "learning_rate": 6.883991476759484e-06, - "loss": 0.2242, - "step": 12926 - }, - { - "epoch": 1.217776312380773, - "grad_norm": 0.6240322589874268, - "learning_rate": 6.882556673332687e-06, - "loss": 0.2162, - "step": 12927 - }, - { - "epoch": 1.2178705164739407, - "grad_norm": 0.7480376958847046, - "learning_rate": 6.88112194098842e-06, - "loss": 0.2004, - "step": 12928 - }, - { - "epoch": 1.2179647205671087, - "grad_norm": 0.6312969923019409, - "learning_rate": 6.8796872797593935e-06, - "loss": 0.224, - "step": 12929 - }, - { - "epoch": 1.2180589246602764, - "grad_norm": 0.6434739828109741, - "learning_rate": 6.878252689678326e-06, - "loss": 0.2132, - "step": 12930 - }, - { - "epoch": 1.2181531287534444, - "grad_norm": 0.6692473292350769, - "learning_rate": 6.876818170777924e-06, - "loss": 0.2147, - "step": 12931 - }, - { - "epoch": 1.2182473328466121, - "grad_norm": 0.6293768882751465, - "learning_rate": 6.875383723090898e-06, - "loss": 0.1832, - "step": 12932 - }, - { - "epoch": 1.21834153693978, - "grad_norm": 0.6315598487854004, - "learning_rate": 6.873949346649951e-06, - "loss": 0.1979, - "step": 12933 - }, - { - "epoch": 1.2184357410329478, - "grad_norm": 0.6284828186035156, - "learning_rate": 6.872515041487799e-06, - "loss": 0.2065, - "step": 12934 - }, - { - "epoch": 1.2185299451261158, - "grad_norm": 0.5833081603050232, - "learning_rate": 6.87108080763714e-06, - "loss": 0.1788, - "step": 12935 - }, - { - "epoch": 1.2186241492192835, - "grad_norm": 0.7753840684890747, - "learning_rate": 6.869646645130673e-06, - "loss": 0.2163, - "step": 12936 - }, - { - "epoch": 1.2187183533124515, - "grad_norm": 0.7362518310546875, - "learning_rate": 6.86821255400111e-06, - "loss": 0.2166, - "step": 12937 - }, - { - "epoch": 1.2188125574056192, - "grad_norm": 0.6860774159431458, - "learning_rate": 6.866778534281141e-06, - "loss": 0.2301, - "step": 12938 - }, - { - "epoch": 1.2189067614987872, - "grad_norm": 0.6610662937164307, - "learning_rate": 6.865344586003464e-06, - "loss": 0.1869, - "step": 12939 - }, - { - "epoch": 1.219000965591955, - "grad_norm": 0.6065652370452881, - "learning_rate": 6.863910709200784e-06, - "loss": 0.1961, - "step": 12940 - }, - { - "epoch": 1.2190951696851229, - "grad_norm": 0.6814299821853638, - "learning_rate": 6.862476903905788e-06, - "loss": 0.2068, - "step": 12941 - }, - { - "epoch": 1.2191893737782906, - "grad_norm": 0.7525548934936523, - "learning_rate": 6.8610431701511705e-06, - "loss": 0.24, - "step": 12942 - }, - { - "epoch": 1.2192835778714586, - "grad_norm": 0.535211980342865, - "learning_rate": 6.859609507969621e-06, - "loss": 0.1864, - "step": 12943 - }, - { - "epoch": 1.2193777819646263, - "grad_norm": 0.8989139199256897, - "learning_rate": 6.858175917393834e-06, - "loss": 0.2182, - "step": 12944 - }, - { - "epoch": 1.2194719860577943, - "grad_norm": 0.6886752843856812, - "learning_rate": 6.8567423984564955e-06, - "loss": 0.2273, - "step": 12945 - }, - { - "epoch": 1.219566190150962, - "grad_norm": 0.6629517674446106, - "learning_rate": 6.8553089511902896e-06, - "loss": 0.2219, - "step": 12946 - }, - { - "epoch": 1.21966039424413, - "grad_norm": 0.6721383333206177, - "learning_rate": 6.853875575627903e-06, - "loss": 0.1996, - "step": 12947 - }, - { - "epoch": 1.2197545983372977, - "grad_norm": 0.7022584676742554, - "learning_rate": 6.85244227180202e-06, - "loss": 0.2102, - "step": 12948 - }, - { - "epoch": 1.2198488024304657, - "grad_norm": 0.7004144787788391, - "learning_rate": 6.85100903974532e-06, - "loss": 0.2159, - "step": 12949 - }, - { - "epoch": 1.2199430065236334, - "grad_norm": 0.7221656441688538, - "learning_rate": 6.8495758794904845e-06, - "loss": 0.2281, - "step": 12950 - }, - { - "epoch": 1.2200372106168014, - "grad_norm": 0.5384569764137268, - "learning_rate": 6.8481427910701915e-06, - "loss": 0.1882, - "step": 12951 - }, - { - "epoch": 1.220131414709969, - "grad_norm": 0.738702654838562, - "learning_rate": 6.84670977451712e-06, - "loss": 0.2285, - "step": 12952 - }, - { - "epoch": 1.220225618803137, - "grad_norm": 0.6407803893089294, - "learning_rate": 6.845276829863935e-06, - "loss": 0.2044, - "step": 12953 - }, - { - "epoch": 1.2203198228963048, - "grad_norm": 0.6032186150550842, - "learning_rate": 6.843843957143324e-06, - "loss": 0.2278, - "step": 12954 - }, - { - "epoch": 1.2204140269894728, - "grad_norm": 0.6539852619171143, - "learning_rate": 6.842411156387949e-06, - "loss": 0.198, - "step": 12955 - }, - { - "epoch": 1.2205082310826405, - "grad_norm": 0.6179254651069641, - "learning_rate": 6.8409784276304805e-06, - "loss": 0.1949, - "step": 12956 - }, - { - "epoch": 1.2206024351758085, - "grad_norm": 0.7868675589561462, - "learning_rate": 6.839545770903595e-06, - "loss": 0.2299, - "step": 12957 - }, - { - "epoch": 1.2206966392689762, - "grad_norm": 0.5924388766288757, - "learning_rate": 6.838113186239951e-06, - "loss": 0.1879, - "step": 12958 - }, - { - "epoch": 1.2207908433621442, - "grad_norm": 0.6939521431922913, - "learning_rate": 6.836680673672214e-06, - "loss": 0.2126, - "step": 12959 - }, - { - "epoch": 1.220885047455312, - "grad_norm": 0.5712323784828186, - "learning_rate": 6.835248233233052e-06, - "loss": 0.1882, - "step": 12960 - }, - { - "epoch": 1.2209792515484799, - "grad_norm": 0.6114972829818726, - "learning_rate": 6.833815864955126e-06, - "loss": 0.1519, - "step": 12961 - }, - { - "epoch": 1.2210734556416476, - "grad_norm": 0.6172329187393188, - "learning_rate": 6.832383568871093e-06, - "loss": 0.2085, - "step": 12962 - }, - { - "epoch": 1.2211676597348156, - "grad_norm": 0.6652126908302307, - "learning_rate": 6.830951345013612e-06, - "loss": 0.1959, - "step": 12963 - }, - { - "epoch": 1.2212618638279833, - "grad_norm": 0.6360315680503845, - "learning_rate": 6.8295191934153435e-06, - "loss": 0.1829, - "step": 12964 - }, - { - "epoch": 1.2213560679211513, - "grad_norm": 0.6886136531829834, - "learning_rate": 6.8280871141089415e-06, - "loss": 0.2186, - "step": 12965 - }, - { - "epoch": 1.221450272014319, - "grad_norm": 0.6091665625572205, - "learning_rate": 6.826655107127056e-06, - "loss": 0.1843, - "step": 12966 - }, - { - "epoch": 1.221544476107487, - "grad_norm": 0.7013200521469116, - "learning_rate": 6.825223172502344e-06, - "loss": 0.2184, - "step": 12967 - }, - { - "epoch": 1.2216386802006547, - "grad_norm": 0.6541852355003357, - "learning_rate": 6.823791310267454e-06, - "loss": 0.2055, - "step": 12968 - }, - { - "epoch": 1.2217328842938227, - "grad_norm": 0.6683152914047241, - "learning_rate": 6.822359520455031e-06, - "loss": 0.2137, - "step": 12969 - }, - { - "epoch": 1.2218270883869904, - "grad_norm": 0.683961033821106, - "learning_rate": 6.820927803097728e-06, - "loss": 0.2301, - "step": 12970 - }, - { - "epoch": 1.2219212924801583, - "grad_norm": 0.6960816383361816, - "learning_rate": 6.819496158228187e-06, - "loss": 0.241, - "step": 12971 - }, - { - "epoch": 1.222015496573326, - "grad_norm": 0.6431972980499268, - "learning_rate": 6.818064585879055e-06, - "loss": 0.19, - "step": 12972 - }, - { - "epoch": 1.222109700666494, - "grad_norm": 0.7428820729255676, - "learning_rate": 6.816633086082964e-06, - "loss": 0.2434, - "step": 12973 - }, - { - "epoch": 1.2222039047596618, - "grad_norm": 0.6971383690834045, - "learning_rate": 6.8152016588725704e-06, - "loss": 0.2148, - "step": 12974 - }, - { - "epoch": 1.2222981088528297, - "grad_norm": 0.6649903059005737, - "learning_rate": 6.813770304280501e-06, - "loss": 0.1937, - "step": 12975 - }, - { - "epoch": 1.2223923129459975, - "grad_norm": 0.546851396560669, - "learning_rate": 6.812339022339391e-06, - "loss": 0.2076, - "step": 12976 - }, - { - "epoch": 1.2224865170391654, - "grad_norm": 0.7050590515136719, - "learning_rate": 6.810907813081888e-06, - "loss": 0.211, - "step": 12977 - }, - { - "epoch": 1.2225807211323332, - "grad_norm": 0.6709058880805969, - "learning_rate": 6.809476676540618e-06, - "loss": 0.2297, - "step": 12978 - }, - { - "epoch": 1.2226749252255011, - "grad_norm": 0.5935243964195251, - "learning_rate": 6.808045612748211e-06, - "loss": 0.1976, - "step": 12979 - }, - { - "epoch": 1.2227691293186689, - "grad_norm": 0.610144317150116, - "learning_rate": 6.806614621737303e-06, - "loss": 0.1971, - "step": 12980 - }, - { - "epoch": 1.2228633334118368, - "grad_norm": 0.6993465423583984, - "learning_rate": 6.80518370354052e-06, - "loss": 0.2394, - "step": 12981 - }, - { - "epoch": 1.2229575375050046, - "grad_norm": 0.6231632232666016, - "learning_rate": 6.803752858190489e-06, - "loss": 0.1884, - "step": 12982 - }, - { - "epoch": 1.2230517415981725, - "grad_norm": 0.8581657409667969, - "learning_rate": 6.8023220857198345e-06, - "loss": 0.236, - "step": 12983 - }, - { - "epoch": 1.2231459456913403, - "grad_norm": 0.6415079236030579, - "learning_rate": 6.800891386161184e-06, - "loss": 0.197, - "step": 12984 - }, - { - "epoch": 1.223240149784508, - "grad_norm": 0.7081721425056458, - "learning_rate": 6.7994607595471565e-06, - "loss": 0.2041, - "step": 12985 - }, - { - "epoch": 1.223334353877676, - "grad_norm": 0.6858395934104919, - "learning_rate": 6.798030205910373e-06, - "loss": 0.2151, - "step": 12986 - }, - { - "epoch": 1.223428557970844, - "grad_norm": 0.6121916770935059, - "learning_rate": 6.796599725283453e-06, - "loss": 0.1799, - "step": 12987 - }, - { - "epoch": 1.2235227620640117, - "grad_norm": 0.6130219101905823, - "learning_rate": 6.795169317699014e-06, - "loss": 0.1897, - "step": 12988 - }, - { - "epoch": 1.2236169661571794, - "grad_norm": 0.7238254547119141, - "learning_rate": 6.793738983189668e-06, - "loss": 0.2447, - "step": 12989 - }, - { - "epoch": 1.2237111702503474, - "grad_norm": 0.664619505405426, - "learning_rate": 6.792308721788035e-06, - "loss": 0.195, - "step": 12990 - }, - { - "epoch": 1.2238053743435153, - "grad_norm": 0.6615363359451294, - "learning_rate": 6.7908785335267245e-06, - "loss": 0.2067, - "step": 12991 - }, - { - "epoch": 1.223899578436683, - "grad_norm": 0.6361871957778931, - "learning_rate": 6.789448418438348e-06, - "loss": 0.2296, - "step": 12992 - }, - { - "epoch": 1.2239937825298508, - "grad_norm": 0.6958268880844116, - "learning_rate": 6.788018376555506e-06, - "loss": 0.2273, - "step": 12993 - }, - { - "epoch": 1.2240879866230188, - "grad_norm": 0.6189342141151428, - "learning_rate": 6.786588407910819e-06, - "loss": 0.2197, - "step": 12994 - }, - { - "epoch": 1.2241821907161867, - "grad_norm": 0.7021915316581726, - "learning_rate": 6.785158512536884e-06, - "loss": 0.203, - "step": 12995 - }, - { - "epoch": 1.2242763948093545, - "grad_norm": 0.7075203061103821, - "learning_rate": 6.783728690466302e-06, - "loss": 0.229, - "step": 12996 - }, - { - "epoch": 1.2243705989025222, - "grad_norm": 1.0234602689743042, - "learning_rate": 6.782298941731686e-06, - "loss": 0.2225, - "step": 12997 - }, - { - "epoch": 1.2244648029956902, - "grad_norm": 0.6870042681694031, - "learning_rate": 6.780869266365629e-06, - "loss": 0.2115, - "step": 12998 - }, - { - "epoch": 1.2245590070888581, - "grad_norm": 0.6313785910606384, - "learning_rate": 6.77943966440073e-06, - "loss": 0.1857, - "step": 12999 - }, - { - "epoch": 1.2246532111820259, - "grad_norm": 0.6202945709228516, - "learning_rate": 6.778010135869588e-06, - "loss": 0.2032, - "step": 13000 - }, - { - "epoch": 1.2247474152751936, - "grad_norm": 0.6480250358581543, - "learning_rate": 6.776580680804799e-06, - "loss": 0.2405, - "step": 13001 - }, - { - "epoch": 1.2248416193683616, - "grad_norm": 0.6614483594894409, - "learning_rate": 6.7751512992389535e-06, - "loss": 0.1711, - "step": 13002 - }, - { - "epoch": 1.2249358234615295, - "grad_norm": 0.6586751341819763, - "learning_rate": 6.773721991204646e-06, - "loss": 0.2226, - "step": 13003 - }, - { - "epoch": 1.2250300275546973, - "grad_norm": 0.6510355472564697, - "learning_rate": 6.7722927567344664e-06, - "loss": 0.2223, - "step": 13004 - }, - { - "epoch": 1.225124231647865, - "grad_norm": 0.6354719996452332, - "learning_rate": 6.770863595861006e-06, - "loss": 0.2112, - "step": 13005 - }, - { - "epoch": 1.225218435741033, - "grad_norm": 0.6637195348739624, - "learning_rate": 6.769434508616846e-06, - "loss": 0.1985, - "step": 13006 - }, - { - "epoch": 1.225312639834201, - "grad_norm": 0.6359204649925232, - "learning_rate": 6.768005495034577e-06, - "loss": 0.2042, - "step": 13007 - }, - { - "epoch": 1.2254068439273686, - "grad_norm": 0.659364640712738, - "learning_rate": 6.7665765551467835e-06, - "loss": 0.2073, - "step": 13008 - }, - { - "epoch": 1.2255010480205364, - "grad_norm": 0.6522590517997742, - "learning_rate": 6.765147688986041e-06, - "loss": 0.1839, - "step": 13009 - }, - { - "epoch": 1.2255952521137043, - "grad_norm": 0.6877027153968811, - "learning_rate": 6.7637188965849365e-06, - "loss": 0.2395, - "step": 13010 - }, - { - "epoch": 1.2256894562068723, - "grad_norm": 0.6307399272918701, - "learning_rate": 6.762290177976046e-06, - "loss": 0.1898, - "step": 13011 - }, - { - "epoch": 1.22578366030004, - "grad_norm": 0.645378589630127, - "learning_rate": 6.7608615331919496e-06, - "loss": 0.2109, - "step": 13012 - }, - { - "epoch": 1.2258778643932078, - "grad_norm": 0.6725184917449951, - "learning_rate": 6.759432962265214e-06, - "loss": 0.2086, - "step": 13013 - }, - { - "epoch": 1.2259720684863757, - "grad_norm": 0.7793787121772766, - "learning_rate": 6.758004465228423e-06, - "loss": 0.1951, - "step": 13014 - }, - { - "epoch": 1.2260662725795437, - "grad_norm": 0.6769189238548279, - "learning_rate": 6.756576042114143e-06, - "loss": 0.1976, - "step": 13015 - }, - { - "epoch": 1.2261604766727114, - "grad_norm": 0.6077859401702881, - "learning_rate": 6.7551476929549396e-06, - "loss": 0.2024, - "step": 13016 - }, - { - "epoch": 1.2262546807658792, - "grad_norm": 0.5559185743331909, - "learning_rate": 6.753719417783394e-06, - "loss": 0.1905, - "step": 13017 - }, - { - "epoch": 1.2263488848590471, - "grad_norm": 0.7667734026908875, - "learning_rate": 6.752291216632065e-06, - "loss": 0.2025, - "step": 13018 - }, - { - "epoch": 1.2264430889522149, - "grad_norm": 0.6821576952934265, - "learning_rate": 6.750863089533516e-06, - "loss": 0.1975, - "step": 13019 - }, - { - "epoch": 1.2265372930453828, - "grad_norm": 0.6921886205673218, - "learning_rate": 6.749435036520315e-06, - "loss": 0.1745, - "step": 13020 - }, - { - "epoch": 1.2266314971385506, - "grad_norm": 0.658531904220581, - "learning_rate": 6.748007057625023e-06, - "loss": 0.2207, - "step": 13021 - }, - { - "epoch": 1.2267257012317185, - "grad_norm": 0.6247740387916565, - "learning_rate": 6.746579152880201e-06, - "loss": 0.2025, - "step": 13022 - }, - { - "epoch": 1.2268199053248863, - "grad_norm": 0.6983477473258972, - "learning_rate": 6.745151322318402e-06, - "loss": 0.2195, - "step": 13023 - }, - { - "epoch": 1.2269141094180542, - "grad_norm": 0.70487380027771, - "learning_rate": 6.743723565972189e-06, - "loss": 0.2205, - "step": 13024 - }, - { - "epoch": 1.227008313511222, - "grad_norm": 0.6091448068618774, - "learning_rate": 6.742295883874114e-06, - "loss": 0.1991, - "step": 13025 - }, - { - "epoch": 1.22710251760439, - "grad_norm": 0.6577531099319458, - "learning_rate": 6.740868276056729e-06, - "loss": 0.1923, - "step": 13026 - }, - { - "epoch": 1.2271967216975577, - "grad_norm": 0.5893779397010803, - "learning_rate": 6.73944074255259e-06, - "loss": 0.1886, - "step": 13027 - }, - { - "epoch": 1.2272909257907256, - "grad_norm": 0.708914577960968, - "learning_rate": 6.738013283394244e-06, - "loss": 0.2096, - "step": 13028 - }, - { - "epoch": 1.2273851298838934, - "grad_norm": 0.7507368922233582, - "learning_rate": 6.73658589861424e-06, - "loss": 0.2191, - "step": 13029 - }, - { - "epoch": 1.2274793339770613, - "grad_norm": 0.6944422125816345, - "learning_rate": 6.735158588245125e-06, - "loss": 0.2026, - "step": 13030 - }, - { - "epoch": 1.227573538070229, - "grad_norm": 0.6723152995109558, - "learning_rate": 6.733731352319446e-06, - "loss": 0.226, - "step": 13031 - }, - { - "epoch": 1.227667742163397, - "grad_norm": 0.5919938087463379, - "learning_rate": 6.73230419086974e-06, - "loss": 0.1834, - "step": 13032 - }, - { - "epoch": 1.2277619462565648, - "grad_norm": 0.6095983386039734, - "learning_rate": 6.7308771039285496e-06, - "loss": 0.1924, - "step": 13033 - }, - { - "epoch": 1.2278561503497327, - "grad_norm": 0.6341801881790161, - "learning_rate": 6.729450091528422e-06, - "loss": 0.2097, - "step": 13034 - }, - { - "epoch": 1.2279503544429005, - "grad_norm": 0.6179428100585938, - "learning_rate": 6.728023153701889e-06, - "loss": 0.1781, - "step": 13035 - }, - { - "epoch": 1.2280445585360684, - "grad_norm": 0.6255068182945251, - "learning_rate": 6.726596290481484e-06, - "loss": 0.2111, - "step": 13036 - }, - { - "epoch": 1.2281387626292362, - "grad_norm": 0.5949823260307312, - "learning_rate": 6.725169501899752e-06, - "loss": 0.2022, - "step": 13037 - }, - { - "epoch": 1.2282329667224041, - "grad_norm": 0.6200029850006104, - "learning_rate": 6.7237427879892184e-06, - "loss": 0.1996, - "step": 13038 - }, - { - "epoch": 1.2283271708155719, - "grad_norm": 0.7356598973274231, - "learning_rate": 6.7223161487824125e-06, - "loss": 0.2112, - "step": 13039 - }, - { - "epoch": 1.2284213749087398, - "grad_norm": 0.5932050347328186, - "learning_rate": 6.720889584311871e-06, - "loss": 0.2067, - "step": 13040 - }, - { - "epoch": 1.2285155790019076, - "grad_norm": 0.565879762172699, - "learning_rate": 6.7194630946101195e-06, - "loss": 0.18, - "step": 13041 - }, - { - "epoch": 1.2286097830950755, - "grad_norm": 0.6121799349784851, - "learning_rate": 6.718036679709681e-06, - "loss": 0.2064, - "step": 13042 - }, - { - "epoch": 1.2287039871882433, - "grad_norm": 1.1347873210906982, - "learning_rate": 6.716610339643079e-06, - "loss": 0.2086, - "step": 13043 - }, - { - "epoch": 1.2287981912814112, - "grad_norm": 0.6285647749900818, - "learning_rate": 6.715184074442842e-06, - "loss": 0.1982, - "step": 13044 - }, - { - "epoch": 1.228892395374579, - "grad_norm": 0.6440559029579163, - "learning_rate": 6.713757884141489e-06, - "loss": 0.21, - "step": 13045 - }, - { - "epoch": 1.228986599467747, - "grad_norm": 0.5830798149108887, - "learning_rate": 6.712331768771536e-06, - "loss": 0.1839, - "step": 13046 - }, - { - "epoch": 1.2290808035609146, - "grad_norm": 0.699811577796936, - "learning_rate": 6.710905728365504e-06, - "loss": 0.2108, - "step": 13047 - }, - { - "epoch": 1.2291750076540826, - "grad_norm": 0.9867917895317078, - "learning_rate": 6.7094797629559105e-06, - "loss": 0.2024, - "step": 13048 - }, - { - "epoch": 1.2292692117472503, - "grad_norm": 0.7100919485092163, - "learning_rate": 6.708053872575264e-06, - "loss": 0.1807, - "step": 13049 - }, - { - "epoch": 1.2293634158404183, - "grad_norm": 0.6222515106201172, - "learning_rate": 6.706628057256082e-06, - "loss": 0.1939, - "step": 13050 - }, - { - "epoch": 1.229457619933586, - "grad_norm": 0.6623128056526184, - "learning_rate": 6.705202317030876e-06, - "loss": 0.2179, - "step": 13051 - }, - { - "epoch": 1.229551824026754, - "grad_norm": 0.6505099534988403, - "learning_rate": 6.70377665193215e-06, - "loss": 0.2026, - "step": 13052 - }, - { - "epoch": 1.2296460281199217, - "grad_norm": 0.6078628301620483, - "learning_rate": 6.702351061992411e-06, - "loss": 0.1868, - "step": 13053 - }, - { - "epoch": 1.2297402322130897, - "grad_norm": 0.6993672251701355, - "learning_rate": 6.700925547244173e-06, - "loss": 0.2292, - "step": 13054 - }, - { - "epoch": 1.2298344363062574, - "grad_norm": 0.6512143611907959, - "learning_rate": 6.699500107719933e-06, - "loss": 0.183, - "step": 13055 - }, - { - "epoch": 1.2299286403994254, - "grad_norm": 0.6094197034835815, - "learning_rate": 6.69807474345219e-06, - "loss": 0.188, - "step": 13056 - }, - { - "epoch": 1.2300228444925931, - "grad_norm": 0.6811914443969727, - "learning_rate": 6.696649454473456e-06, - "loss": 0.2339, - "step": 13057 - }, - { - "epoch": 1.230117048585761, - "grad_norm": 0.5943988561630249, - "learning_rate": 6.695224240816223e-06, - "loss": 0.1789, - "step": 13058 - }, - { - "epoch": 1.2302112526789288, - "grad_norm": 0.6743707060813904, - "learning_rate": 6.693799102512983e-06, - "loss": 0.2097, - "step": 13059 - }, - { - "epoch": 1.2303054567720968, - "grad_norm": 0.6571220755577087, - "learning_rate": 6.692374039596241e-06, - "loss": 0.2069, - "step": 13060 - }, - { - "epoch": 1.2303996608652645, - "grad_norm": 0.6759471297264099, - "learning_rate": 6.690949052098486e-06, - "loss": 0.223, - "step": 13061 - }, - { - "epoch": 1.2304938649584325, - "grad_norm": 0.6499541401863098, - "learning_rate": 6.6895241400522085e-06, - "loss": 0.1854, - "step": 13062 - }, - { - "epoch": 1.2305880690516002, - "grad_norm": 0.644608736038208, - "learning_rate": 6.6880993034898985e-06, - "loss": 0.1943, - "step": 13063 - }, - { - "epoch": 1.2306822731447682, - "grad_norm": 0.6547313332557678, - "learning_rate": 6.68667454244405e-06, - "loss": 0.2098, - "step": 13064 - }, - { - "epoch": 1.230776477237936, - "grad_norm": 0.6327990293502808, - "learning_rate": 6.685249856947146e-06, - "loss": 0.2028, - "step": 13065 - }, - { - "epoch": 1.2308706813311039, - "grad_norm": 0.7770459651947021, - "learning_rate": 6.683825247031668e-06, - "loss": 0.2084, - "step": 13066 - }, - { - "epoch": 1.2309648854242716, - "grad_norm": 0.6875342726707458, - "learning_rate": 6.682400712730106e-06, - "loss": 0.1967, - "step": 13067 - }, - { - "epoch": 1.2310590895174396, - "grad_norm": 0.6348468065261841, - "learning_rate": 6.6809762540749375e-06, - "loss": 0.1977, - "step": 13068 - }, - { - "epoch": 1.2311532936106073, - "grad_norm": 0.5387022495269775, - "learning_rate": 6.679551871098644e-06, - "loss": 0.1653, - "step": 13069 - }, - { - "epoch": 1.2312474977037753, - "grad_norm": 0.6535850167274475, - "learning_rate": 6.678127563833703e-06, - "loss": 0.2006, - "step": 13070 - }, - { - "epoch": 1.231341701796943, - "grad_norm": 0.632869303226471, - "learning_rate": 6.676703332312593e-06, - "loss": 0.2389, - "step": 13071 - }, - { - "epoch": 1.231435905890111, - "grad_norm": 0.7358940839767456, - "learning_rate": 6.675279176567785e-06, - "loss": 0.2206, - "step": 13072 - }, - { - "epoch": 1.2315301099832787, - "grad_norm": 0.6896353363990784, - "learning_rate": 6.67385509663175e-06, - "loss": 0.2249, - "step": 13073 - }, - { - "epoch": 1.2316243140764467, - "grad_norm": 0.6563900709152222, - "learning_rate": 6.672431092536968e-06, - "loss": 0.2229, - "step": 13074 - }, - { - "epoch": 1.2317185181696144, - "grad_norm": 0.6559754014015198, - "learning_rate": 6.671007164315901e-06, - "loss": 0.2029, - "step": 13075 - }, - { - "epoch": 1.2318127222627824, - "grad_norm": 0.5931310653686523, - "learning_rate": 6.6695833120010165e-06, - "loss": 0.2023, - "step": 13076 - }, - { - "epoch": 1.2319069263559501, - "grad_norm": 0.6085942387580872, - "learning_rate": 6.668159535624786e-06, - "loss": 0.2113, - "step": 13077 - }, - { - "epoch": 1.232001130449118, - "grad_norm": 0.6777974367141724, - "learning_rate": 6.666735835219671e-06, - "loss": 0.2165, - "step": 13078 - }, - { - "epoch": 1.2320953345422858, - "grad_norm": 0.6456210613250732, - "learning_rate": 6.665312210818133e-06, - "loss": 0.2127, - "step": 13079 - }, - { - "epoch": 1.2321895386354538, - "grad_norm": 0.5929754972457886, - "learning_rate": 6.663888662452634e-06, - "loss": 0.1873, - "step": 13080 - }, - { - "epoch": 1.2322837427286215, - "grad_norm": 0.7054176330566406, - "learning_rate": 6.662465190155633e-06, - "loss": 0.2113, - "step": 13081 - }, - { - "epoch": 1.2323779468217895, - "grad_norm": 0.6892966032028198, - "learning_rate": 6.661041793959588e-06, - "loss": 0.2558, - "step": 13082 - }, - { - "epoch": 1.2324721509149572, - "grad_norm": 0.7085790634155273, - "learning_rate": 6.659618473896951e-06, - "loss": 0.2193, - "step": 13083 - }, - { - "epoch": 1.2325663550081252, - "grad_norm": 0.6542141437530518, - "learning_rate": 6.658195230000182e-06, - "loss": 0.1873, - "step": 13084 - }, - { - "epoch": 1.232660559101293, - "grad_norm": 0.7204039692878723, - "learning_rate": 6.656772062301729e-06, - "loss": 0.2189, - "step": 13085 - }, - { - "epoch": 1.2327547631944609, - "grad_norm": 0.6094175577163696, - "learning_rate": 6.655348970834042e-06, - "loss": 0.1922, - "step": 13086 - }, - { - "epoch": 1.2328489672876286, - "grad_norm": 0.6026403903961182, - "learning_rate": 6.6539259556295735e-06, - "loss": 0.2012, - "step": 13087 - }, - { - "epoch": 1.2329431713807966, - "grad_norm": 0.6669145822525024, - "learning_rate": 6.652503016720767e-06, - "loss": 0.2139, - "step": 13088 - }, - { - "epoch": 1.2330373754739643, - "grad_norm": 0.6415684223175049, - "learning_rate": 6.6510801541400674e-06, - "loss": 0.1934, - "step": 13089 - }, - { - "epoch": 1.2331315795671323, - "grad_norm": 0.5711910724639893, - "learning_rate": 6.649657367919922e-06, - "loss": 0.1879, - "step": 13090 - }, - { - "epoch": 1.2332257836603, - "grad_norm": 0.6705195903778076, - "learning_rate": 6.648234658092771e-06, - "loss": 0.202, - "step": 13091 - }, - { - "epoch": 1.233319987753468, - "grad_norm": 0.6863791942596436, - "learning_rate": 6.646812024691052e-06, - "loss": 0.2085, - "step": 13092 - }, - { - "epoch": 1.2334141918466357, - "grad_norm": 0.6593004465103149, - "learning_rate": 6.645389467747198e-06, - "loss": 0.1631, - "step": 13093 - }, - { - "epoch": 1.2335083959398037, - "grad_norm": 0.6083840131759644, - "learning_rate": 6.643966987293662e-06, - "loss": 0.1942, - "step": 13094 - }, - { - "epoch": 1.2336026000329714, - "grad_norm": 0.7365472912788391, - "learning_rate": 6.642544583362865e-06, - "loss": 0.2064, - "step": 13095 - }, - { - "epoch": 1.2336968041261394, - "grad_norm": 0.5965930223464966, - "learning_rate": 6.641122255987242e-06, - "loss": 0.2155, - "step": 13096 - }, - { - "epoch": 1.233791008219307, - "grad_norm": 0.6303234696388245, - "learning_rate": 6.639700005199228e-06, - "loss": 0.1824, - "step": 13097 - }, - { - "epoch": 1.233885212312475, - "grad_norm": 0.6314939260482788, - "learning_rate": 6.6382778310312515e-06, - "loss": 0.2012, - "step": 13098 - }, - { - "epoch": 1.2339794164056428, - "grad_norm": 0.6179655194282532, - "learning_rate": 6.6368557335157365e-06, - "loss": 0.2054, - "step": 13099 - }, - { - "epoch": 1.2340736204988108, - "grad_norm": 0.6266142129898071, - "learning_rate": 6.635433712685115e-06, - "loss": 0.1828, - "step": 13100 - }, - { - "epoch": 1.2341678245919785, - "grad_norm": 0.6063579320907593, - "learning_rate": 6.634011768571807e-06, - "loss": 0.175, - "step": 13101 - }, - { - "epoch": 1.2342620286851465, - "grad_norm": 0.6964307427406311, - "learning_rate": 6.6325899012082375e-06, - "loss": 0.1942, - "step": 13102 - }, - { - "epoch": 1.2343562327783142, - "grad_norm": 0.6033285856246948, - "learning_rate": 6.631168110626825e-06, - "loss": 0.193, - "step": 13103 - }, - { - "epoch": 1.2344504368714821, - "grad_norm": 0.729873538017273, - "learning_rate": 6.629746396859989e-06, - "loss": 0.2272, - "step": 13104 - }, - { - "epoch": 1.2345446409646499, - "grad_norm": 0.7651042342185974, - "learning_rate": 6.6283247599401475e-06, - "loss": 0.2374, - "step": 13105 - }, - { - "epoch": 1.2346388450578178, - "grad_norm": 0.6986647844314575, - "learning_rate": 6.626903199899716e-06, - "loss": 0.2072, - "step": 13106 - }, - { - "epoch": 1.2347330491509856, - "grad_norm": 0.6632575392723083, - "learning_rate": 6.62548171677111e-06, - "loss": 0.1976, - "step": 13107 - }, - { - "epoch": 1.2348272532441535, - "grad_norm": 0.582164466381073, - "learning_rate": 6.624060310586737e-06, - "loss": 0.1782, - "step": 13108 - }, - { - "epoch": 1.2349214573373213, - "grad_norm": 0.7106314897537231, - "learning_rate": 6.622638981379011e-06, - "loss": 0.184, - "step": 13109 - }, - { - "epoch": 1.2350156614304892, - "grad_norm": 0.6218223571777344, - "learning_rate": 6.621217729180338e-06, - "loss": 0.2154, - "step": 13110 - }, - { - "epoch": 1.235109865523657, - "grad_norm": 0.6390554308891296, - "learning_rate": 6.619796554023131e-06, - "loss": 0.2115, - "step": 13111 - }, - { - "epoch": 1.235204069616825, - "grad_norm": 0.6510376930236816, - "learning_rate": 6.618375455939787e-06, - "loss": 0.2305, - "step": 13112 - }, - { - "epoch": 1.2352982737099927, - "grad_norm": 0.6226624250411987, - "learning_rate": 6.616954434962709e-06, - "loss": 0.1773, - "step": 13113 - }, - { - "epoch": 1.2353924778031606, - "grad_norm": 0.615502655506134, - "learning_rate": 6.615533491124307e-06, - "loss": 0.1965, - "step": 13114 - }, - { - "epoch": 1.2354866818963284, - "grad_norm": 0.6183832883834839, - "learning_rate": 6.614112624456974e-06, - "loss": 0.192, - "step": 13115 - }, - { - "epoch": 1.2355808859894963, - "grad_norm": 0.6593360304832458, - "learning_rate": 6.612691834993108e-06, - "loss": 0.221, - "step": 13116 - }, - { - "epoch": 1.235675090082664, - "grad_norm": 0.6409652233123779, - "learning_rate": 6.6112711227651085e-06, - "loss": 0.2043, - "step": 13117 - }, - { - "epoch": 1.235769294175832, - "grad_norm": 0.5980530977249146, - "learning_rate": 6.609850487805368e-06, - "loss": 0.1988, - "step": 13118 - }, - { - "epoch": 1.2358634982689998, - "grad_norm": 0.6702038049697876, - "learning_rate": 6.6084299301462776e-06, - "loss": 0.1993, - "step": 13119 - }, - { - "epoch": 1.2359577023621677, - "grad_norm": 0.7119449377059937, - "learning_rate": 6.607009449820232e-06, - "loss": 0.2048, - "step": 13120 - }, - { - "epoch": 1.2360519064553355, - "grad_norm": 0.6192128658294678, - "learning_rate": 6.6055890468596175e-06, - "loss": 0.1799, - "step": 13121 - }, - { - "epoch": 1.2361461105485034, - "grad_norm": 0.6519134640693665, - "learning_rate": 6.604168721296823e-06, - "loss": 0.2091, - "step": 13122 - }, - { - "epoch": 1.2362403146416712, - "grad_norm": 0.5972312688827515, - "learning_rate": 6.602748473164231e-06, - "loss": 0.1808, - "step": 13123 - }, - { - "epoch": 1.236334518734839, - "grad_norm": 0.5865551233291626, - "learning_rate": 6.6013283024942295e-06, - "loss": 0.1718, - "step": 13124 - }, - { - "epoch": 1.2364287228280069, - "grad_norm": 0.6026672720909119, - "learning_rate": 6.5999082093192e-06, - "loss": 0.2056, - "step": 13125 - }, - { - "epoch": 1.2365229269211748, - "grad_norm": 0.6767292022705078, - "learning_rate": 6.5984881936715195e-06, - "loss": 0.2118, - "step": 13126 - }, - { - "epoch": 1.2366171310143426, - "grad_norm": 0.6915835738182068, - "learning_rate": 6.59706825558357e-06, - "loss": 0.2153, - "step": 13127 - }, - { - "epoch": 1.2367113351075103, - "grad_norm": 0.7222739458084106, - "learning_rate": 6.595648395087728e-06, - "loss": 0.1925, - "step": 13128 - }, - { - "epoch": 1.2368055392006783, - "grad_norm": 0.7652611136436462, - "learning_rate": 6.594228612216365e-06, - "loss": 0.2099, - "step": 13129 - }, - { - "epoch": 1.2368997432938462, - "grad_norm": 0.6785929203033447, - "learning_rate": 6.5928089070018576e-06, - "loss": 0.2151, - "step": 13130 - }, - { - "epoch": 1.236993947387014, - "grad_norm": 0.6909286379814148, - "learning_rate": 6.591389279476579e-06, - "loss": 0.2083, - "step": 13131 - }, - { - "epoch": 1.2370881514801817, - "grad_norm": 0.7029983997344971, - "learning_rate": 6.589969729672896e-06, - "loss": 0.2132, - "step": 13132 - }, - { - "epoch": 1.2371823555733497, - "grad_norm": 0.6916367411613464, - "learning_rate": 6.588550257623171e-06, - "loss": 0.2134, - "step": 13133 - }, - { - "epoch": 1.2372765596665176, - "grad_norm": 0.6737584471702576, - "learning_rate": 6.587130863359783e-06, - "loss": 0.2042, - "step": 13134 - }, - { - "epoch": 1.2373707637596854, - "grad_norm": 0.7271801233291626, - "learning_rate": 6.585711546915087e-06, - "loss": 0.2137, - "step": 13135 - }, - { - "epoch": 1.237464967852853, - "grad_norm": 0.6153900623321533, - "learning_rate": 6.584292308321445e-06, - "loss": 0.1869, - "step": 13136 - }, - { - "epoch": 1.237559171946021, - "grad_norm": 0.7527621388435364, - "learning_rate": 6.582873147611224e-06, - "loss": 0.2296, - "step": 13137 - }, - { - "epoch": 1.237653376039189, - "grad_norm": 0.5928955078125, - "learning_rate": 6.581454064816781e-06, - "loss": 0.1997, - "step": 13138 - }, - { - "epoch": 1.2377475801323568, - "grad_norm": 0.6899813413619995, - "learning_rate": 6.5800350599704684e-06, - "loss": 0.1963, - "step": 13139 - }, - { - "epoch": 1.2378417842255245, - "grad_norm": 0.6595364212989807, - "learning_rate": 6.578616133104648e-06, - "loss": 0.2243, - "step": 13140 - }, - { - "epoch": 1.2379359883186924, - "grad_norm": 0.5497678518295288, - "learning_rate": 6.5771972842516715e-06, - "loss": 0.1798, - "step": 13141 - }, - { - "epoch": 1.2380301924118604, - "grad_norm": 0.6328285932540894, - "learning_rate": 6.575778513443891e-06, - "loss": 0.1983, - "step": 13142 - }, - { - "epoch": 1.2381243965050281, - "grad_norm": 0.6856815218925476, - "learning_rate": 6.574359820713653e-06, - "loss": 0.2272, - "step": 13143 - }, - { - "epoch": 1.2382186005981959, - "grad_norm": 0.6073868274688721, - "learning_rate": 6.572941206093311e-06, - "loss": 0.1959, - "step": 13144 - }, - { - "epoch": 1.2383128046913638, - "grad_norm": 0.736967921257019, - "learning_rate": 6.571522669615209e-06, - "loss": 0.2434, - "step": 13145 - }, - { - "epoch": 1.2384070087845318, - "grad_norm": 0.6768428683280945, - "learning_rate": 6.570104211311692e-06, - "loss": 0.1868, - "step": 13146 - }, - { - "epoch": 1.2385012128776995, - "grad_norm": 0.591189444065094, - "learning_rate": 6.568685831215105e-06, - "loss": 0.1737, - "step": 13147 - }, - { - "epoch": 1.2385954169708673, - "grad_norm": 0.7081210017204285, - "learning_rate": 6.5672675293577885e-06, - "loss": 0.2168, - "step": 13148 - }, - { - "epoch": 1.2386896210640352, - "grad_norm": 0.6531855463981628, - "learning_rate": 6.565849305772075e-06, - "loss": 0.2216, - "step": 13149 - }, - { - "epoch": 1.2387838251572032, - "grad_norm": 0.6034048199653625, - "learning_rate": 6.564431160490313e-06, - "loss": 0.1951, - "step": 13150 - }, - { - "epoch": 1.238878029250371, - "grad_norm": 0.6722594499588013, - "learning_rate": 6.563013093544837e-06, - "loss": 0.2166, - "step": 13151 - }, - { - "epoch": 1.2389722333435387, - "grad_norm": 0.6729581952095032, - "learning_rate": 6.561595104967975e-06, - "loss": 0.2155, - "step": 13152 - }, - { - "epoch": 1.2390664374367066, - "grad_norm": 0.627149224281311, - "learning_rate": 6.560177194792057e-06, - "loss": 0.1976, - "step": 13153 - }, - { - "epoch": 1.2391606415298746, - "grad_norm": 0.673635721206665, - "learning_rate": 6.558759363049426e-06, - "loss": 0.1841, - "step": 13154 - }, - { - "epoch": 1.2392548456230423, - "grad_norm": 0.6006341576576233, - "learning_rate": 6.5573416097724e-06, - "loss": 0.1847, - "step": 13155 - }, - { - "epoch": 1.23934904971621, - "grad_norm": 0.8298212289810181, - "learning_rate": 6.555923934993309e-06, - "loss": 0.2264, - "step": 13156 - }, - { - "epoch": 1.239443253809378, - "grad_norm": 0.6421175599098206, - "learning_rate": 6.554506338744482e-06, - "loss": 0.1883, - "step": 13157 - }, - { - "epoch": 1.2395374579025458, - "grad_norm": 0.6622430086135864, - "learning_rate": 6.553088821058237e-06, - "loss": 0.2039, - "step": 13158 - }, - { - "epoch": 1.2396316619957137, - "grad_norm": 0.6573102474212646, - "learning_rate": 6.5516713819668955e-06, - "loss": 0.1962, - "step": 13159 - }, - { - "epoch": 1.2397258660888815, - "grad_norm": 0.8151679635047913, - "learning_rate": 6.550254021502782e-06, - "loss": 0.2009, - "step": 13160 - }, - { - "epoch": 1.2398200701820494, - "grad_norm": 0.6960872411727905, - "learning_rate": 6.548836739698212e-06, - "loss": 0.2179, - "step": 13161 - }, - { - "epoch": 1.2399142742752172, - "grad_norm": 0.659559965133667, - "learning_rate": 6.547419536585502e-06, - "loss": 0.2065, - "step": 13162 - }, - { - "epoch": 1.2400084783683851, - "grad_norm": 0.6673386693000793, - "learning_rate": 6.5460024121969635e-06, - "loss": 0.2142, - "step": 13163 - }, - { - "epoch": 1.2401026824615529, - "grad_norm": 0.6193254590034485, - "learning_rate": 6.544585366564913e-06, - "loss": 0.2057, - "step": 13164 - }, - { - "epoch": 1.2401968865547208, - "grad_norm": 0.6263452768325806, - "learning_rate": 6.543168399721661e-06, - "loss": 0.1869, - "step": 13165 - }, - { - "epoch": 1.2402910906478886, - "grad_norm": 0.6214220523834229, - "learning_rate": 6.541751511699514e-06, - "loss": 0.199, - "step": 13166 - }, - { - "epoch": 1.2403852947410565, - "grad_norm": 0.6071861982345581, - "learning_rate": 6.540334702530782e-06, - "loss": 0.199, - "step": 13167 - }, - { - "epoch": 1.2404794988342243, - "grad_norm": 0.6378260254859924, - "learning_rate": 6.538917972247771e-06, - "loss": 0.2381, - "step": 13168 - }, - { - "epoch": 1.2405737029273922, - "grad_norm": 0.661163330078125, - "learning_rate": 6.537501320882778e-06, - "loss": 0.1988, - "step": 13169 - }, - { - "epoch": 1.24066790702056, - "grad_norm": 0.7192729115486145, - "learning_rate": 6.536084748468114e-06, - "loss": 0.1879, - "step": 13170 - }, - { - "epoch": 1.240762111113728, - "grad_norm": 0.662153959274292, - "learning_rate": 6.534668255036075e-06, - "loss": 0.1885, - "step": 13171 - }, - { - "epoch": 1.2408563152068957, - "grad_norm": 0.5854468941688538, - "learning_rate": 6.533251840618958e-06, - "loss": 0.185, - "step": 13172 - }, - { - "epoch": 1.2409505193000636, - "grad_norm": 0.5961412191390991, - "learning_rate": 6.531835505249057e-06, - "loss": 0.2056, - "step": 13173 - }, - { - "epoch": 1.2410447233932314, - "grad_norm": 0.6531262397766113, - "learning_rate": 6.530419248958675e-06, - "loss": 0.201, - "step": 13174 - }, - { - "epoch": 1.2411389274863993, - "grad_norm": 0.683880627155304, - "learning_rate": 6.529003071780098e-06, - "loss": 0.2197, - "step": 13175 - }, - { - "epoch": 1.241233131579567, - "grad_norm": 0.6978038549423218, - "learning_rate": 6.527586973745619e-06, - "loss": 0.2409, - "step": 13176 - }, - { - "epoch": 1.241327335672735, - "grad_norm": 0.632899284362793, - "learning_rate": 6.526170954887528e-06, - "loss": 0.189, - "step": 13177 - }, - { - "epoch": 1.2414215397659027, - "grad_norm": 0.5654722452163696, - "learning_rate": 6.52475501523811e-06, - "loss": 0.1709, - "step": 13178 - }, - { - "epoch": 1.2415157438590707, - "grad_norm": 0.6315605640411377, - "learning_rate": 6.523339154829651e-06, - "loss": 0.2222, - "step": 13179 - }, - { - "epoch": 1.2416099479522384, - "grad_norm": 0.6636592745780945, - "learning_rate": 6.5219233736944384e-06, - "loss": 0.2049, - "step": 13180 - }, - { - "epoch": 1.2417041520454064, - "grad_norm": 0.6793553233146667, - "learning_rate": 6.520507671864753e-06, - "loss": 0.2112, - "step": 13181 - }, - { - "epoch": 1.2417983561385741, - "grad_norm": 0.6310015916824341, - "learning_rate": 6.519092049372873e-06, - "loss": 0.2142, - "step": 13182 - }, - { - "epoch": 1.241892560231742, - "grad_norm": 0.6464283466339111, - "learning_rate": 6.517676506251074e-06, - "loss": 0.1928, - "step": 13183 - }, - { - "epoch": 1.2419867643249098, - "grad_norm": 0.5940302610397339, - "learning_rate": 6.51626104253164e-06, - "loss": 0.1871, - "step": 13184 - }, - { - "epoch": 1.2420809684180778, - "grad_norm": 0.7057653069496155, - "learning_rate": 6.5148456582468424e-06, - "loss": 0.1981, - "step": 13185 - }, - { - "epoch": 1.2421751725112455, - "grad_norm": 0.6060236692428589, - "learning_rate": 6.51343035342895e-06, - "loss": 0.2021, - "step": 13186 - }, - { - "epoch": 1.2422693766044135, - "grad_norm": 0.6369514465332031, - "learning_rate": 6.512015128110241e-06, - "loss": 0.1832, - "step": 13187 - }, - { - "epoch": 1.2423635806975812, - "grad_norm": 0.6441729068756104, - "learning_rate": 6.510599982322982e-06, - "loss": 0.2138, - "step": 13188 - }, - { - "epoch": 1.2424577847907492, - "grad_norm": 0.5923165082931519, - "learning_rate": 6.509184916099433e-06, - "loss": 0.1976, - "step": 13189 - }, - { - "epoch": 1.242551988883917, - "grad_norm": 0.5233913064002991, - "learning_rate": 6.507769929471875e-06, - "loss": 0.1697, - "step": 13190 - }, - { - "epoch": 1.242646192977085, - "grad_norm": 0.6921975612640381, - "learning_rate": 6.506355022472561e-06, - "loss": 0.2013, - "step": 13191 - }, - { - "epoch": 1.2427403970702526, - "grad_norm": 0.6269617080688477, - "learning_rate": 6.504940195133755e-06, - "loss": 0.1975, - "step": 13192 - }, - { - "epoch": 1.2428346011634206, - "grad_norm": 0.7483435273170471, - "learning_rate": 6.503525447487717e-06, - "loss": 0.2226, - "step": 13193 - }, - { - "epoch": 1.2429288052565883, - "grad_norm": 0.6120871305465698, - "learning_rate": 6.502110779566706e-06, - "loss": 0.2191, - "step": 13194 - }, - { - "epoch": 1.2430230093497563, - "grad_norm": 0.5768830180168152, - "learning_rate": 6.50069619140298e-06, - "loss": 0.1729, - "step": 13195 - }, - { - "epoch": 1.243117213442924, - "grad_norm": 0.7296568155288696, - "learning_rate": 6.499281683028791e-06, - "loss": 0.1988, - "step": 13196 - }, - { - "epoch": 1.243211417536092, - "grad_norm": 0.6651104092597961, - "learning_rate": 6.497867254476395e-06, - "loss": 0.1935, - "step": 13197 - }, - { - "epoch": 1.2433056216292597, - "grad_norm": 0.6365832686424255, - "learning_rate": 6.496452905778041e-06, - "loss": 0.1862, - "step": 13198 - }, - { - "epoch": 1.2433998257224277, - "grad_norm": 0.7073401212692261, - "learning_rate": 6.495038636965978e-06, - "loss": 0.2095, - "step": 13199 - }, - { - "epoch": 1.2434940298155954, - "grad_norm": 0.627632200717926, - "learning_rate": 6.4936244480724575e-06, - "loss": 0.1928, - "step": 13200 - }, - { - "epoch": 1.2435882339087634, - "grad_norm": 0.7091456651687622, - "learning_rate": 6.492210339129721e-06, - "loss": 0.2011, - "step": 13201 - }, - { - "epoch": 1.2436824380019311, - "grad_norm": 0.6287287473678589, - "learning_rate": 6.490796310170013e-06, - "loss": 0.2135, - "step": 13202 - }, - { - "epoch": 1.243776642095099, - "grad_norm": 0.6407252550125122, - "learning_rate": 6.489382361225576e-06, - "loss": 0.2096, - "step": 13203 - }, - { - "epoch": 1.2438708461882668, - "grad_norm": 0.6490053534507751, - "learning_rate": 6.487968492328651e-06, - "loss": 0.2196, - "step": 13204 - }, - { - "epoch": 1.2439650502814348, - "grad_norm": 0.6701541543006897, - "learning_rate": 6.486554703511477e-06, - "loss": 0.2147, - "step": 13205 - }, - { - "epoch": 1.2440592543746025, - "grad_norm": 0.6274252533912659, - "learning_rate": 6.4851409948062875e-06, - "loss": 0.2021, - "step": 13206 - }, - { - "epoch": 1.2441534584677705, - "grad_norm": 0.7174579501152039, - "learning_rate": 6.48372736624532e-06, - "loss": 0.2408, - "step": 13207 - }, - { - "epoch": 1.2442476625609382, - "grad_norm": 0.6236835718154907, - "learning_rate": 6.482313817860809e-06, - "loss": 0.2154, - "step": 13208 - }, - { - "epoch": 1.2443418666541062, - "grad_norm": 0.6155478358268738, - "learning_rate": 6.480900349684977e-06, - "loss": 0.1959, - "step": 13209 - }, - { - "epoch": 1.244436070747274, - "grad_norm": 0.7192291617393494, - "learning_rate": 6.479486961750065e-06, - "loss": 0.2438, - "step": 13210 - }, - { - "epoch": 1.2445302748404419, - "grad_norm": 0.6841741800308228, - "learning_rate": 6.478073654088295e-06, - "loss": 0.1929, - "step": 13211 - }, - { - "epoch": 1.2446244789336096, - "grad_norm": 0.6367154121398926, - "learning_rate": 6.476660426731891e-06, - "loss": 0.201, - "step": 13212 - }, - { - "epoch": 1.2447186830267776, - "grad_norm": 0.6178945899009705, - "learning_rate": 6.475247279713076e-06, - "loss": 0.2105, - "step": 13213 - }, - { - "epoch": 1.2448128871199453, - "grad_norm": 0.6798058152198792, - "learning_rate": 6.4738342130640764e-06, - "loss": 0.2114, - "step": 13214 - }, - { - "epoch": 1.2449070912131133, - "grad_norm": 0.6734654903411865, - "learning_rate": 6.47242122681711e-06, - "loss": 0.2306, - "step": 13215 - }, - { - "epoch": 1.245001295306281, - "grad_norm": 0.764380931854248, - "learning_rate": 6.471008321004393e-06, - "loss": 0.2278, - "step": 13216 - }, - { - "epoch": 1.245095499399449, - "grad_norm": 0.6479675769805908, - "learning_rate": 6.4695954956581464e-06, - "loss": 0.2103, - "step": 13217 - }, - { - "epoch": 1.2451897034926167, - "grad_norm": 0.628743052482605, - "learning_rate": 6.468182750810582e-06, - "loss": 0.1941, - "step": 13218 - }, - { - "epoch": 1.2452839075857847, - "grad_norm": 0.6989083290100098, - "learning_rate": 6.466770086493911e-06, - "loss": 0.213, - "step": 13219 - }, - { - "epoch": 1.2453781116789524, - "grad_norm": 0.6205032467842102, - "learning_rate": 6.4653575027403485e-06, - "loss": 0.2083, - "step": 13220 - }, - { - "epoch": 1.2454723157721204, - "grad_norm": 0.6287810206413269, - "learning_rate": 6.463944999582102e-06, - "loss": 0.2182, - "step": 13221 - }, - { - "epoch": 1.245566519865288, - "grad_norm": 0.6180534958839417, - "learning_rate": 6.462532577051377e-06, - "loss": 0.1902, - "step": 13222 - }, - { - "epoch": 1.245660723958456, - "grad_norm": 0.6427865624427795, - "learning_rate": 6.461120235180378e-06, - "loss": 0.218, - "step": 13223 - }, - { - "epoch": 1.2457549280516238, - "grad_norm": 0.624610424041748, - "learning_rate": 6.4597079740013126e-06, - "loss": 0.1779, - "step": 13224 - }, - { - "epoch": 1.2458491321447918, - "grad_norm": 0.6102461218833923, - "learning_rate": 6.458295793546381e-06, - "loss": 0.2159, - "step": 13225 - }, - { - "epoch": 1.2459433362379595, - "grad_norm": 0.7143837809562683, - "learning_rate": 6.456883693847781e-06, - "loss": 0.2104, - "step": 13226 - }, - { - "epoch": 1.2460375403311275, - "grad_norm": 0.7000229954719543, - "learning_rate": 6.455471674937714e-06, - "loss": 0.2043, - "step": 13227 - }, - { - "epoch": 1.2461317444242952, - "grad_norm": 0.6932652592658997, - "learning_rate": 6.454059736848376e-06, - "loss": 0.2209, - "step": 13228 - }, - { - "epoch": 1.2462259485174632, - "grad_norm": 0.6495449542999268, - "learning_rate": 6.4526478796119555e-06, - "loss": 0.2079, - "step": 13229 - }, - { - "epoch": 1.246320152610631, - "grad_norm": 0.5908781886100769, - "learning_rate": 6.451236103260652e-06, - "loss": 0.184, - "step": 13230 - }, - { - "epoch": 1.2464143567037989, - "grad_norm": 0.6017096638679504, - "learning_rate": 6.449824407826655e-06, - "loss": 0.1765, - "step": 13231 - }, - { - "epoch": 1.2465085607969666, - "grad_norm": 0.6833210587501526, - "learning_rate": 6.4484127933421514e-06, - "loss": 0.2077, - "step": 13232 - }, - { - "epoch": 1.2466027648901346, - "grad_norm": 0.6991371512413025, - "learning_rate": 6.447001259839325e-06, - "loss": 0.2494, - "step": 13233 - }, - { - "epoch": 1.2466969689833023, - "grad_norm": 0.5941142439842224, - "learning_rate": 6.445589807350369e-06, - "loss": 0.2145, - "step": 13234 - }, - { - "epoch": 1.2467911730764702, - "grad_norm": 0.6017299890518188, - "learning_rate": 6.444178435907461e-06, - "loss": 0.1924, - "step": 13235 - }, - { - "epoch": 1.246885377169638, - "grad_norm": 0.6834784150123596, - "learning_rate": 6.442767145542782e-06, - "loss": 0.2098, - "step": 13236 - }, - { - "epoch": 1.246979581262806, - "grad_norm": 0.6336193680763245, - "learning_rate": 6.441355936288516e-06, - "loss": 0.1907, - "step": 13237 - }, - { - "epoch": 1.2470737853559737, - "grad_norm": 0.6442963480949402, - "learning_rate": 6.439944808176837e-06, - "loss": 0.1934, - "step": 13238 - }, - { - "epoch": 1.2471679894491416, - "grad_norm": 0.6089239716529846, - "learning_rate": 6.4385337612399215e-06, - "loss": 0.2017, - "step": 13239 - }, - { - "epoch": 1.2472621935423094, - "grad_norm": 0.5581820607185364, - "learning_rate": 6.437122795509945e-06, - "loss": 0.1831, - "step": 13240 - }, - { - "epoch": 1.2473563976354773, - "grad_norm": 0.7005428671836853, - "learning_rate": 6.43571191101908e-06, - "loss": 0.1856, - "step": 13241 - }, - { - "epoch": 1.247450601728645, - "grad_norm": 0.631710946559906, - "learning_rate": 6.434301107799494e-06, - "loss": 0.2002, - "step": 13242 - }, - { - "epoch": 1.247544805821813, - "grad_norm": 0.7596853375434875, - "learning_rate": 6.432890385883357e-06, - "loss": 0.2144, - "step": 13243 - }, - { - "epoch": 1.2476390099149808, - "grad_norm": 0.6215640306472778, - "learning_rate": 6.431479745302838e-06, - "loss": 0.2075, - "step": 13244 - }, - { - "epoch": 1.2477332140081487, - "grad_norm": 0.6445016264915466, - "learning_rate": 6.4300691860900975e-06, - "loss": 0.2068, - "step": 13245 - }, - { - "epoch": 1.2478274181013165, - "grad_norm": 0.6773838400840759, - "learning_rate": 6.4286587082773e-06, - "loss": 0.2452, - "step": 13246 - }, - { - "epoch": 1.2479216221944844, - "grad_norm": 0.5760672688484192, - "learning_rate": 6.42724831189661e-06, - "loss": 0.2073, - "step": 13247 - }, - { - "epoch": 1.2480158262876522, - "grad_norm": 0.614971935749054, - "learning_rate": 6.4258379969801846e-06, - "loss": 0.1849, - "step": 13248 - }, - { - "epoch": 1.2481100303808201, - "grad_norm": 0.6382244825363159, - "learning_rate": 6.424427763560175e-06, - "loss": 0.1835, - "step": 13249 - }, - { - "epoch": 1.2482042344739879, - "grad_norm": 0.6506362557411194, - "learning_rate": 6.423017611668745e-06, - "loss": 0.2272, - "step": 13250 - }, - { - "epoch": 1.2482984385671558, - "grad_norm": 0.6593846678733826, - "learning_rate": 6.421607541338049e-06, - "loss": 0.2088, - "step": 13251 - }, - { - "epoch": 1.2483926426603236, - "grad_norm": 0.6460914015769958, - "learning_rate": 6.420197552600232e-06, - "loss": 0.1972, - "step": 13252 - }, - { - "epoch": 1.2484868467534915, - "grad_norm": 0.6168376207351685, - "learning_rate": 6.418787645487446e-06, - "loss": 0.2213, - "step": 13253 - }, - { - "epoch": 1.2485810508466593, - "grad_norm": 0.7077895998954773, - "learning_rate": 6.41737782003184e-06, - "loss": 0.2051, - "step": 13254 - }, - { - "epoch": 1.2486752549398272, - "grad_norm": 0.6611896753311157, - "learning_rate": 6.415968076265562e-06, - "loss": 0.2222, - "step": 13255 - }, - { - "epoch": 1.248769459032995, - "grad_norm": 0.6410412788391113, - "learning_rate": 6.4145584142207525e-06, - "loss": 0.2132, - "step": 13256 - }, - { - "epoch": 1.248863663126163, - "grad_norm": 0.6798258423805237, - "learning_rate": 6.413148833929559e-06, - "loss": 0.205, - "step": 13257 - }, - { - "epoch": 1.2489578672193307, - "grad_norm": 0.6173869967460632, - "learning_rate": 6.411739335424118e-06, - "loss": 0.2124, - "step": 13258 - }, - { - "epoch": 1.2490520713124986, - "grad_norm": 0.5916337966918945, - "learning_rate": 6.410329918736568e-06, - "loss": 0.1915, - "step": 13259 - }, - { - "epoch": 1.2491462754056664, - "grad_norm": 0.6927295923233032, - "learning_rate": 6.408920583899049e-06, - "loss": 0.2081, - "step": 13260 - }, - { - "epoch": 1.2492404794988343, - "grad_norm": 0.6642054915428162, - "learning_rate": 6.407511330943694e-06, - "loss": 0.1902, - "step": 13261 - }, - { - "epoch": 1.249334683592002, - "grad_norm": 0.6179870963096619, - "learning_rate": 6.406102159902638e-06, - "loss": 0.2046, - "step": 13262 - }, - { - "epoch": 1.2494288876851698, - "grad_norm": 0.6349109411239624, - "learning_rate": 6.404693070808008e-06, - "loss": 0.2286, - "step": 13263 - }, - { - "epoch": 1.2495230917783378, - "grad_norm": 0.5897269248962402, - "learning_rate": 6.403284063691938e-06, - "loss": 0.1878, - "step": 13264 - }, - { - "epoch": 1.2496172958715057, - "grad_norm": 0.6208135485649109, - "learning_rate": 6.401875138586557e-06, - "loss": 0.1983, - "step": 13265 - }, - { - "epoch": 1.2497114999646735, - "grad_norm": 0.7107505798339844, - "learning_rate": 6.400466295523979e-06, - "loss": 0.2373, - "step": 13266 - }, - { - "epoch": 1.2498057040578412, - "grad_norm": 0.6204399466514587, - "learning_rate": 6.399057534536342e-06, - "loss": 0.1887, - "step": 13267 - }, - { - "epoch": 1.2498999081510092, - "grad_norm": 0.7137466669082642, - "learning_rate": 6.397648855655765e-06, - "loss": 0.2237, - "step": 13268 - }, - { - "epoch": 1.2499941122441771, - "grad_norm": 0.6079708933830261, - "learning_rate": 6.396240258914357e-06, - "loss": 0.2368, - "step": 13269 - }, - { - "epoch": 1.2500883163373449, - "grad_norm": 0.6251336336135864, - "learning_rate": 6.3948317443442496e-06, - "loss": 0.1881, - "step": 13270 - }, - { - "epoch": 1.2501825204305126, - "grad_norm": 0.6043210625648499, - "learning_rate": 6.393423311977556e-06, - "loss": 0.1919, - "step": 13271 - }, - { - "epoch": 1.2502767245236805, - "grad_norm": 0.5962233543395996, - "learning_rate": 6.392014961846387e-06, - "loss": 0.1889, - "step": 13272 - }, - { - "epoch": 1.2503709286168485, - "grad_norm": 0.5834617614746094, - "learning_rate": 6.3906066939828546e-06, - "loss": 0.1879, - "step": 13273 - }, - { - "epoch": 1.2504651327100162, - "grad_norm": 0.597581148147583, - "learning_rate": 6.389198508419072e-06, - "loss": 0.179, - "step": 13274 - }, - { - "epoch": 1.250559336803184, - "grad_norm": 0.6518176794052124, - "learning_rate": 6.38779040518715e-06, - "loss": 0.2046, - "step": 13275 - }, - { - "epoch": 1.250653540896352, - "grad_norm": 0.6201210618019104, - "learning_rate": 6.38638238431919e-06, - "loss": 0.2061, - "step": 13276 - }, - { - "epoch": 1.25074774498952, - "grad_norm": 0.6848750114440918, - "learning_rate": 6.384974445847302e-06, - "loss": 0.1925, - "step": 13277 - }, - { - "epoch": 1.2508419490826876, - "grad_norm": 0.6894055008888245, - "learning_rate": 6.383566589803587e-06, - "loss": 0.2056, - "step": 13278 - }, - { - "epoch": 1.2509361531758554, - "grad_norm": 0.6124905943870544, - "learning_rate": 6.382158816220146e-06, - "loss": 0.1922, - "step": 13279 - }, - { - "epoch": 1.2510303572690233, - "grad_norm": 0.644424319267273, - "learning_rate": 6.38075112512908e-06, - "loss": 0.2004, - "step": 13280 - }, - { - "epoch": 1.2511245613621913, - "grad_norm": 0.5365655422210693, - "learning_rate": 6.3793435165624866e-06, - "loss": 0.1776, - "step": 13281 - }, - { - "epoch": 1.251218765455359, - "grad_norm": 0.6420202255249023, - "learning_rate": 6.377935990552459e-06, - "loss": 0.1911, - "step": 13282 - }, - { - "epoch": 1.2513129695485268, - "grad_norm": 0.6588460803031921, - "learning_rate": 6.376528547131091e-06, - "loss": 0.2131, - "step": 13283 - }, - { - "epoch": 1.2514071736416947, - "grad_norm": 0.6637923717498779, - "learning_rate": 6.375121186330478e-06, - "loss": 0.1893, - "step": 13284 - }, - { - "epoch": 1.2515013777348627, - "grad_norm": 0.6343114376068115, - "learning_rate": 6.373713908182711e-06, - "loss": 0.1873, - "step": 13285 - }, - { - "epoch": 1.2515955818280304, - "grad_norm": 0.60640549659729, - "learning_rate": 6.372306712719868e-06, - "loss": 0.2026, - "step": 13286 - }, - { - "epoch": 1.2516897859211982, - "grad_norm": 0.6385098099708557, - "learning_rate": 6.370899599974047e-06, - "loss": 0.1905, - "step": 13287 - }, - { - "epoch": 1.2517839900143661, - "grad_norm": 0.7508695125579834, - "learning_rate": 6.369492569977329e-06, - "loss": 0.2229, - "step": 13288 - }, - { - "epoch": 1.251878194107534, - "grad_norm": 0.5951728820800781, - "learning_rate": 6.368085622761788e-06, - "loss": 0.1854, - "step": 13289 - }, - { - "epoch": 1.2519723982007018, - "grad_norm": 0.579662024974823, - "learning_rate": 6.366678758359517e-06, - "loss": 0.1888, - "step": 13290 - }, - { - "epoch": 1.2520666022938696, - "grad_norm": 0.6184574365615845, - "learning_rate": 6.3652719768025915e-06, - "loss": 0.204, - "step": 13291 - }, - { - "epoch": 1.2521608063870375, - "grad_norm": 0.7084358334541321, - "learning_rate": 6.363865278123085e-06, - "loss": 0.2039, - "step": 13292 - }, - { - "epoch": 1.2522550104802055, - "grad_norm": 0.6930648684501648, - "learning_rate": 6.362458662353069e-06, - "loss": 0.2441, - "step": 13293 - }, - { - "epoch": 1.2523492145733732, - "grad_norm": 0.6676106452941895, - "learning_rate": 6.361052129524625e-06, - "loss": 0.2223, - "step": 13294 - }, - { - "epoch": 1.252443418666541, - "grad_norm": 0.6763599514961243, - "learning_rate": 6.3596456796698195e-06, - "loss": 0.222, - "step": 13295 - }, - { - "epoch": 1.252537622759709, - "grad_norm": 0.7015537619590759, - "learning_rate": 6.3582393128207206e-06, - "loss": 0.1883, - "step": 13296 - }, - { - "epoch": 1.2526318268528769, - "grad_norm": 0.6660821437835693, - "learning_rate": 6.3568330290094e-06, - "loss": 0.1755, - "step": 13297 - }, - { - "epoch": 1.2527260309460446, - "grad_norm": 0.6238716244697571, - "learning_rate": 6.3554268282679196e-06, - "loss": 0.2131, - "step": 13298 - }, - { - "epoch": 1.2528202350392124, - "grad_norm": 0.6290259957313538, - "learning_rate": 6.354020710628342e-06, - "loss": 0.2021, - "step": 13299 - }, - { - "epoch": 1.2529144391323803, - "grad_norm": 0.6270294785499573, - "learning_rate": 6.352614676122734e-06, - "loss": 0.211, - "step": 13300 - }, - { - "epoch": 1.2530086432255483, - "grad_norm": 0.6475179195404053, - "learning_rate": 6.351208724783151e-06, - "loss": 0.2159, - "step": 13301 - }, - { - "epoch": 1.253102847318716, - "grad_norm": 0.6810320019721985, - "learning_rate": 6.349802856641653e-06, - "loss": 0.1928, - "step": 13302 - }, - { - "epoch": 1.2531970514118838, - "grad_norm": 0.7074736952781677, - "learning_rate": 6.3483970717302925e-06, - "loss": 0.2214, - "step": 13303 - }, - { - "epoch": 1.2532912555050517, - "grad_norm": 0.7083766460418701, - "learning_rate": 6.346991370081128e-06, - "loss": 0.2, - "step": 13304 - }, - { - "epoch": 1.2533854595982197, - "grad_norm": 0.7196433544158936, - "learning_rate": 6.345585751726211e-06, - "loss": 0.2319, - "step": 13305 - }, - { - "epoch": 1.2534796636913874, - "grad_norm": 0.7915971279144287, - "learning_rate": 6.344180216697585e-06, - "loss": 0.2138, - "step": 13306 - }, - { - "epoch": 1.2535738677845552, - "grad_norm": 0.6494907140731812, - "learning_rate": 6.342774765027309e-06, - "loss": 0.1991, - "step": 13307 - }, - { - "epoch": 1.2536680718777231, - "grad_norm": 0.6098228096961975, - "learning_rate": 6.341369396747426e-06, - "loss": 0.1907, - "step": 13308 - }, - { - "epoch": 1.253762275970891, - "grad_norm": 0.6455572843551636, - "learning_rate": 6.339964111889971e-06, - "loss": 0.205, - "step": 13309 - }, - { - "epoch": 1.2538564800640588, - "grad_norm": 0.626918375492096, - "learning_rate": 6.3385589104870024e-06, - "loss": 0.2093, - "step": 13310 - }, - { - "epoch": 1.2539506841572265, - "grad_norm": 0.7116323709487915, - "learning_rate": 6.337153792570551e-06, - "loss": 0.2154, - "step": 13311 - }, - { - "epoch": 1.2540448882503945, - "grad_norm": 0.6362070441246033, - "learning_rate": 6.335748758172658e-06, - "loss": 0.2016, - "step": 13312 - }, - { - "epoch": 1.2541390923435622, - "grad_norm": 0.6597411036491394, - "learning_rate": 6.334343807325358e-06, - "loss": 0.1964, - "step": 13313 - }, - { - "epoch": 1.2542332964367302, - "grad_norm": 0.5408357381820679, - "learning_rate": 6.33293894006069e-06, - "loss": 0.1695, - "step": 13314 - }, - { - "epoch": 1.254327500529898, - "grad_norm": 0.6397354006767273, - "learning_rate": 6.331534156410686e-06, - "loss": 0.2136, - "step": 13315 - }, - { - "epoch": 1.254421704623066, - "grad_norm": 0.6547505855560303, - "learning_rate": 6.330129456407374e-06, - "loss": 0.2044, - "step": 13316 - }, - { - "epoch": 1.2545159087162336, - "grad_norm": 0.5775501132011414, - "learning_rate": 6.328724840082787e-06, - "loss": 0.1859, - "step": 13317 - }, - { - "epoch": 1.2546101128094016, - "grad_norm": 0.6217550039291382, - "learning_rate": 6.3273203074689535e-06, - "loss": 0.2066, - "step": 13318 - }, - { - "epoch": 1.2547043169025693, - "grad_norm": 0.6779605150222778, - "learning_rate": 6.325915858597893e-06, - "loss": 0.2246, - "step": 13319 - }, - { - "epoch": 1.2547985209957373, - "grad_norm": 0.639426589012146, - "learning_rate": 6.324511493501636e-06, - "loss": 0.2087, - "step": 13320 - }, - { - "epoch": 1.254892725088905, - "grad_norm": 0.6547714471817017, - "learning_rate": 6.3231072122122e-06, - "loss": 0.1762, - "step": 13321 - }, - { - "epoch": 1.254986929182073, - "grad_norm": 0.694246768951416, - "learning_rate": 6.321703014761609e-06, - "loss": 0.2019, - "step": 13322 - }, - { - "epoch": 1.2550811332752407, - "grad_norm": 0.6800451874732971, - "learning_rate": 6.320298901181874e-06, - "loss": 0.1962, - "step": 13323 - }, - { - "epoch": 1.2551753373684087, - "grad_norm": 0.7721226811408997, - "learning_rate": 6.318894871505016e-06, - "loss": 0.2348, - "step": 13324 - }, - { - "epoch": 1.2552695414615764, - "grad_norm": 0.6430573463439941, - "learning_rate": 6.31749092576305e-06, - "loss": 0.1966, - "step": 13325 - }, - { - "epoch": 1.2553637455547444, - "grad_norm": 0.6110111474990845, - "learning_rate": 6.3160870639879815e-06, - "loss": 0.1898, - "step": 13326 - }, - { - "epoch": 1.2554579496479121, - "grad_norm": 0.6774904727935791, - "learning_rate": 6.314683286211828e-06, - "loss": 0.2139, - "step": 13327 - }, - { - "epoch": 1.25555215374108, - "grad_norm": 0.7119840383529663, - "learning_rate": 6.313279592466596e-06, - "loss": 0.2097, - "step": 13328 - }, - { - "epoch": 1.2556463578342478, - "grad_norm": 0.6127558350563049, - "learning_rate": 6.311875982784288e-06, - "loss": 0.1954, - "step": 13329 - }, - { - "epoch": 1.2557405619274158, - "grad_norm": 0.5999763607978821, - "learning_rate": 6.310472457196914e-06, - "loss": 0.1867, - "step": 13330 - }, - { - "epoch": 1.2558347660205835, - "grad_norm": 0.667149543762207, - "learning_rate": 6.309069015736475e-06, - "loss": 0.2136, - "step": 13331 - }, - { - "epoch": 1.2559289701137515, - "grad_norm": 0.77800452709198, - "learning_rate": 6.3076656584349695e-06, - "loss": 0.1929, - "step": 13332 - }, - { - "epoch": 1.2560231742069192, - "grad_norm": 0.6693803668022156, - "learning_rate": 6.3062623853243945e-06, - "loss": 0.1877, - "step": 13333 - }, - { - "epoch": 1.2561173783000872, - "grad_norm": 0.6125097274780273, - "learning_rate": 6.304859196436752e-06, - "loss": 0.1985, - "step": 13334 - }, - { - "epoch": 1.256211582393255, - "grad_norm": 0.7701629400253296, - "learning_rate": 6.303456091804034e-06, - "loss": 0.1776, - "step": 13335 - }, - { - "epoch": 1.2563057864864229, - "grad_norm": 0.6409590244293213, - "learning_rate": 6.302053071458233e-06, - "loss": 0.2042, - "step": 13336 - }, - { - "epoch": 1.2563999905795906, - "grad_norm": 0.5813066363334656, - "learning_rate": 6.300650135431342e-06, - "loss": 0.185, - "step": 13337 - }, - { - "epoch": 1.2564941946727586, - "grad_norm": 0.6590962409973145, - "learning_rate": 6.2992472837553495e-06, - "loss": 0.1678, - "step": 13338 - }, - { - "epoch": 1.2565883987659263, - "grad_norm": 0.5903682708740234, - "learning_rate": 6.2978445164622405e-06, - "loss": 0.2212, - "step": 13339 - }, - { - "epoch": 1.2566826028590943, - "grad_norm": 0.7023627758026123, - "learning_rate": 6.296441833584004e-06, - "loss": 0.2191, - "step": 13340 - }, - { - "epoch": 1.256776806952262, - "grad_norm": 0.5824342370033264, - "learning_rate": 6.29503923515262e-06, - "loss": 0.1962, - "step": 13341 - }, - { - "epoch": 1.25687101104543, - "grad_norm": 0.7049176692962646, - "learning_rate": 6.293636721200074e-06, - "loss": 0.212, - "step": 13342 - }, - { - "epoch": 1.2569652151385977, - "grad_norm": 0.6386911273002625, - "learning_rate": 6.292234291758339e-06, - "loss": 0.1746, - "step": 13343 - }, - { - "epoch": 1.2570594192317657, - "grad_norm": 0.7633404731750488, - "learning_rate": 6.290831946859397e-06, - "loss": 0.2032, - "step": 13344 - }, - { - "epoch": 1.2571536233249334, - "grad_norm": 0.7216797471046448, - "learning_rate": 6.289429686535226e-06, - "loss": 0.2268, - "step": 13345 - }, - { - "epoch": 1.2572478274181014, - "grad_norm": 0.673564076423645, - "learning_rate": 6.2880275108177915e-06, - "loss": 0.2085, - "step": 13346 - }, - { - "epoch": 1.257342031511269, - "grad_norm": 0.6010403037071228, - "learning_rate": 6.2866254197390744e-06, - "loss": 0.1894, - "step": 13347 - }, - { - "epoch": 1.257436235604437, - "grad_norm": 0.6412175297737122, - "learning_rate": 6.285223413331043e-06, - "loss": 0.1874, - "step": 13348 - }, - { - "epoch": 1.2575304396976048, - "grad_norm": 0.6892516016960144, - "learning_rate": 6.283821491625655e-06, - "loss": 0.2266, - "step": 13349 - }, - { - "epoch": 1.2576246437907728, - "grad_norm": 0.5850574374198914, - "learning_rate": 6.2824196546548925e-06, - "loss": 0.1903, - "step": 13350 - }, - { - "epoch": 1.2577188478839405, - "grad_norm": 0.6382246613502502, - "learning_rate": 6.281017902450707e-06, - "loss": 0.2172, - "step": 13351 - }, - { - "epoch": 1.2578130519771085, - "grad_norm": 0.5774824619293213, - "learning_rate": 6.279616235045065e-06, - "loss": 0.1953, - "step": 13352 - }, - { - "epoch": 1.2579072560702762, - "grad_norm": 0.6158215403556824, - "learning_rate": 6.278214652469925e-06, - "loss": 0.2159, - "step": 13353 - }, - { - "epoch": 1.2580014601634442, - "grad_norm": 0.6581766605377197, - "learning_rate": 6.27681315475725e-06, - "loss": 0.2442, - "step": 13354 - }, - { - "epoch": 1.258095664256612, - "grad_norm": 0.5932873487472534, - "learning_rate": 6.275411741938991e-06, - "loss": 0.2015, - "step": 13355 - }, - { - "epoch": 1.2581898683497799, - "grad_norm": 0.6409937739372253, - "learning_rate": 6.274010414047105e-06, - "loss": 0.2025, - "step": 13356 - }, - { - "epoch": 1.2582840724429476, - "grad_norm": 0.678022027015686, - "learning_rate": 6.272609171113544e-06, - "loss": 0.2082, - "step": 13357 - }, - { - "epoch": 1.2583782765361156, - "grad_norm": 0.6111551523208618, - "learning_rate": 6.271208013170258e-06, - "loss": 0.181, - "step": 13358 - }, - { - "epoch": 1.2584724806292833, - "grad_norm": 0.7439393997192383, - "learning_rate": 6.269806940249194e-06, - "loss": 0.2055, - "step": 13359 - }, - { - "epoch": 1.2585666847224513, - "grad_norm": 0.6639959216117859, - "learning_rate": 6.268405952382304e-06, - "loss": 0.1895, - "step": 13360 - }, - { - "epoch": 1.258660888815619, - "grad_norm": 0.5995284914970398, - "learning_rate": 6.267005049601529e-06, - "loss": 0.2069, - "step": 13361 - }, - { - "epoch": 1.258755092908787, - "grad_norm": 0.6568467617034912, - "learning_rate": 6.2656042319388145e-06, - "loss": 0.1799, - "step": 13362 - }, - { - "epoch": 1.2588492970019547, - "grad_norm": 0.6851602792739868, - "learning_rate": 6.264203499426092e-06, - "loss": 0.231, - "step": 13363 - }, - { - "epoch": 1.2589435010951227, - "grad_norm": 0.6350014805793762, - "learning_rate": 6.262802852095311e-06, - "loss": 0.1828, - "step": 13364 - }, - { - "epoch": 1.2590377051882904, - "grad_norm": 0.6139953136444092, - "learning_rate": 6.261402289978407e-06, - "loss": 0.1922, - "step": 13365 - }, - { - "epoch": 1.2591319092814584, - "grad_norm": 0.6572669148445129, - "learning_rate": 6.260001813107307e-06, - "loss": 0.2082, - "step": 13366 - }, - { - "epoch": 1.259226113374626, - "grad_norm": 0.6333560943603516, - "learning_rate": 6.258601421513954e-06, - "loss": 0.2076, - "step": 13367 - }, - { - "epoch": 1.259320317467794, - "grad_norm": 0.6473051905632019, - "learning_rate": 6.257201115230276e-06, - "loss": 0.1914, - "step": 13368 - }, - { - "epoch": 1.2594145215609618, - "grad_norm": 0.6263172030448914, - "learning_rate": 6.255800894288196e-06, - "loss": 0.1881, - "step": 13369 - }, - { - "epoch": 1.2595087256541297, - "grad_norm": 0.6993263363838196, - "learning_rate": 6.2544007587196496e-06, - "loss": 0.2003, - "step": 13370 - }, - { - "epoch": 1.2596029297472975, - "grad_norm": 0.748926043510437, - "learning_rate": 6.253000708556558e-06, - "loss": 0.2363, - "step": 13371 - }, - { - "epoch": 1.2596971338404654, - "grad_norm": 0.7410542368888855, - "learning_rate": 6.2516007438308456e-06, - "loss": 0.2338, - "step": 13372 - }, - { - "epoch": 1.2597913379336332, - "grad_norm": 0.6676450967788696, - "learning_rate": 6.250200864574432e-06, - "loss": 0.2038, - "step": 13373 - }, - { - "epoch": 1.2598855420268011, - "grad_norm": 0.593265950679779, - "learning_rate": 6.2488010708192385e-06, - "loss": 0.2039, - "step": 13374 - }, - { - "epoch": 1.2599797461199689, - "grad_norm": 0.6724021434783936, - "learning_rate": 6.247401362597182e-06, - "loss": 0.2267, - "step": 13375 - }, - { - "epoch": 1.2600739502131368, - "grad_norm": 0.5940662622451782, - "learning_rate": 6.246001739940175e-06, - "loss": 0.1859, - "step": 13376 - }, - { - "epoch": 1.2601681543063046, - "grad_norm": 0.6112838387489319, - "learning_rate": 6.244602202880138e-06, - "loss": 0.1858, - "step": 13377 - }, - { - "epoch": 1.2602623583994725, - "grad_norm": 0.6411683559417725, - "learning_rate": 6.2432027514489776e-06, - "loss": 0.1986, - "step": 13378 - }, - { - "epoch": 1.2603565624926403, - "grad_norm": 0.7092647552490234, - "learning_rate": 6.241803385678603e-06, - "loss": 0.2276, - "step": 13379 - }, - { - "epoch": 1.2604507665858082, - "grad_norm": 0.6743448972702026, - "learning_rate": 6.240404105600925e-06, - "loss": 0.2167, - "step": 13380 - }, - { - "epoch": 1.260544970678976, - "grad_norm": 0.6665029525756836, - "learning_rate": 6.239004911247848e-06, - "loss": 0.186, - "step": 13381 - }, - { - "epoch": 1.2606391747721437, - "grad_norm": 0.6669281721115112, - "learning_rate": 6.237605802651277e-06, - "loss": 0.1897, - "step": 13382 - }, - { - "epoch": 1.2607333788653117, - "grad_norm": 0.617443323135376, - "learning_rate": 6.236206779843106e-06, - "loss": 0.1941, - "step": 13383 - }, - { - "epoch": 1.2608275829584796, - "grad_norm": 0.6149744391441345, - "learning_rate": 6.234807842855246e-06, - "loss": 0.2075, - "step": 13384 - }, - { - "epoch": 1.2609217870516474, - "grad_norm": 0.6505521535873413, - "learning_rate": 6.233408991719591e-06, - "loss": 0.1847, - "step": 13385 - }, - { - "epoch": 1.261015991144815, - "grad_norm": 0.6811007261276245, - "learning_rate": 6.2320102264680325e-06, - "loss": 0.2021, - "step": 13386 - }, - { - "epoch": 1.261110195237983, - "grad_norm": 0.7151916027069092, - "learning_rate": 6.23061154713247e-06, - "loss": 0.2231, - "step": 13387 - }, - { - "epoch": 1.261204399331151, - "grad_norm": 0.6212832927703857, - "learning_rate": 6.229212953744796e-06, - "loss": 0.1759, - "step": 13388 - }, - { - "epoch": 1.2612986034243188, - "grad_norm": 0.7134900093078613, - "learning_rate": 6.227814446336894e-06, - "loss": 0.212, - "step": 13389 - }, - { - "epoch": 1.2613928075174865, - "grad_norm": 0.6666131615638733, - "learning_rate": 6.226416024940661e-06, - "loss": 0.1975, - "step": 13390 - }, - { - "epoch": 1.2614870116106545, - "grad_norm": 0.6203562021255493, - "learning_rate": 6.225017689587978e-06, - "loss": 0.1948, - "step": 13391 - }, - { - "epoch": 1.2615812157038224, - "grad_norm": 0.6093359589576721, - "learning_rate": 6.2236194403107275e-06, - "loss": 0.2369, - "step": 13392 - }, - { - "epoch": 1.2616754197969902, - "grad_norm": 0.7116411924362183, - "learning_rate": 6.222221277140793e-06, - "loss": 0.2183, - "step": 13393 - }, - { - "epoch": 1.261769623890158, - "grad_norm": 0.6080445051193237, - "learning_rate": 6.220823200110058e-06, - "loss": 0.2132, - "step": 13394 - }, - { - "epoch": 1.2618638279833259, - "grad_norm": 0.6164963841438293, - "learning_rate": 6.219425209250399e-06, - "loss": 0.2267, - "step": 13395 - }, - { - "epoch": 1.2619580320764938, - "grad_norm": 0.6323091983795166, - "learning_rate": 6.21802730459369e-06, - "loss": 0.2003, - "step": 13396 - }, - { - "epoch": 1.2620522361696616, - "grad_norm": 0.6259250640869141, - "learning_rate": 6.216629486171808e-06, - "loss": 0.2062, - "step": 13397 - }, - { - "epoch": 1.2621464402628293, - "grad_norm": 0.6342028379440308, - "learning_rate": 6.215231754016626e-06, - "loss": 0.1967, - "step": 13398 - }, - { - "epoch": 1.2622406443559973, - "grad_norm": 0.7478952407836914, - "learning_rate": 6.213834108160011e-06, - "loss": 0.2082, - "step": 13399 - }, - { - "epoch": 1.2623348484491652, - "grad_norm": 0.6865331530570984, - "learning_rate": 6.212436548633836e-06, - "loss": 0.2007, - "step": 13400 - }, - { - "epoch": 1.262429052542333, - "grad_norm": 0.6410707235336304, - "learning_rate": 6.211039075469964e-06, - "loss": 0.2128, - "step": 13401 - }, - { - "epoch": 1.2625232566355007, - "grad_norm": 0.7006101608276367, - "learning_rate": 6.209641688700265e-06, - "loss": 0.2193, - "step": 13402 - }, - { - "epoch": 1.2626174607286687, - "grad_norm": 0.5957962274551392, - "learning_rate": 6.2082443883565905e-06, - "loss": 0.1995, - "step": 13403 - }, - { - "epoch": 1.2627116648218366, - "grad_norm": 0.6570765376091003, - "learning_rate": 6.206847174470811e-06, - "loss": 0.1983, - "step": 13404 - }, - { - "epoch": 1.2628058689150043, - "grad_norm": 0.7158840298652649, - "learning_rate": 6.205450047074786e-06, - "loss": 0.2459, - "step": 13405 - }, - { - "epoch": 1.262900073008172, - "grad_norm": 0.6220098733901978, - "learning_rate": 6.204053006200361e-06, - "loss": 0.2014, - "step": 13406 - }, - { - "epoch": 1.26299427710134, - "grad_norm": 0.7102717757225037, - "learning_rate": 6.202656051879405e-06, - "loss": 0.1923, - "step": 13407 - }, - { - "epoch": 1.263088481194508, - "grad_norm": 0.6392085552215576, - "learning_rate": 6.20125918414376e-06, - "loss": 0.1891, - "step": 13408 - }, - { - "epoch": 1.2631826852876757, - "grad_norm": 0.5897543430328369, - "learning_rate": 6.199862403025278e-06, - "loss": 0.2113, - "step": 13409 - }, - { - "epoch": 1.2632768893808435, - "grad_norm": 0.5956061482429504, - "learning_rate": 6.198465708555815e-06, - "loss": 0.1926, - "step": 13410 - }, - { - "epoch": 1.2633710934740114, - "grad_norm": 0.6666398048400879, - "learning_rate": 6.1970691007672124e-06, - "loss": 0.227, - "step": 13411 - }, - { - "epoch": 1.2634652975671794, - "grad_norm": 0.7923960089683533, - "learning_rate": 6.195672579691314e-06, - "loss": 0.186, - "step": 13412 - }, - { - "epoch": 1.2635595016603471, - "grad_norm": 0.6867488622665405, - "learning_rate": 6.194276145359963e-06, - "loss": 0.2213, - "step": 13413 - }, - { - "epoch": 1.2636537057535149, - "grad_norm": 0.6683698892593384, - "learning_rate": 6.192879797805005e-06, - "loss": 0.2064, - "step": 13414 - }, - { - "epoch": 1.2637479098466828, - "grad_norm": 0.6710454821586609, - "learning_rate": 6.191483537058274e-06, - "loss": 0.197, - "step": 13415 - }, - { - "epoch": 1.2638421139398508, - "grad_norm": 0.6741254329681396, - "learning_rate": 6.1900873631516064e-06, - "loss": 0.2063, - "step": 13416 - }, - { - "epoch": 1.2639363180330185, - "grad_norm": 0.703450083732605, - "learning_rate": 6.188691276116841e-06, - "loss": 0.2154, - "step": 13417 - }, - { - "epoch": 1.2640305221261863, - "grad_norm": 0.6495195627212524, - "learning_rate": 6.18729527598581e-06, - "loss": 0.2049, - "step": 13418 - }, - { - "epoch": 1.2641247262193542, - "grad_norm": 0.6587666869163513, - "learning_rate": 6.18589936279034e-06, - "loss": 0.2528, - "step": 13419 - }, - { - "epoch": 1.2642189303125222, - "grad_norm": 0.6717892289161682, - "learning_rate": 6.1845035365622655e-06, - "loss": 0.2048, - "step": 13420 - }, - { - "epoch": 1.26431313440569, - "grad_norm": 0.6473823189735413, - "learning_rate": 6.183107797333411e-06, - "loss": 0.1894, - "step": 13421 - }, - { - "epoch": 1.2644073384988577, - "grad_norm": 0.6968402862548828, - "learning_rate": 6.181712145135603e-06, - "loss": 0.2301, - "step": 13422 - }, - { - "epoch": 1.2645015425920256, - "grad_norm": 0.6945221424102783, - "learning_rate": 6.1803165800006585e-06, - "loss": 0.2142, - "step": 13423 - }, - { - "epoch": 1.2645957466851936, - "grad_norm": 0.6499255895614624, - "learning_rate": 6.178921101960407e-06, - "loss": 0.2055, - "step": 13424 - }, - { - "epoch": 1.2646899507783613, - "grad_norm": 0.674542248249054, - "learning_rate": 6.177525711046664e-06, - "loss": 0.2271, - "step": 13425 - }, - { - "epoch": 1.264784154871529, - "grad_norm": 0.6025372743606567, - "learning_rate": 6.176130407291243e-06, - "loss": 0.1968, - "step": 13426 - }, - { - "epoch": 1.264878358964697, - "grad_norm": 0.6595885753631592, - "learning_rate": 6.174735190725967e-06, - "loss": 0.2335, - "step": 13427 - }, - { - "epoch": 1.264972563057865, - "grad_norm": 0.6103795170783997, - "learning_rate": 6.173340061382642e-06, - "loss": 0.2081, - "step": 13428 - }, - { - "epoch": 1.2650667671510327, - "grad_norm": 0.596925675868988, - "learning_rate": 6.1719450192930786e-06, - "loss": 0.1965, - "step": 13429 - }, - { - "epoch": 1.2651609712442005, - "grad_norm": 0.6182538270950317, - "learning_rate": 6.1705500644890946e-06, - "loss": 0.198, - "step": 13430 - }, - { - "epoch": 1.2652551753373684, - "grad_norm": 0.6547569632530212, - "learning_rate": 6.16915519700249e-06, - "loss": 0.1984, - "step": 13431 - }, - { - "epoch": 1.2653493794305364, - "grad_norm": 0.6183830499649048, - "learning_rate": 6.1677604168650705e-06, - "loss": 0.1706, - "step": 13432 - }, - { - "epoch": 1.2654435835237041, - "grad_norm": 0.6266723275184631, - "learning_rate": 6.1663657241086385e-06, - "loss": 0.1862, - "step": 13433 - }, - { - "epoch": 1.2655377876168719, - "grad_norm": 0.6035991907119751, - "learning_rate": 6.164971118764999e-06, - "loss": 0.1855, - "step": 13434 - }, - { - "epoch": 1.2656319917100398, - "grad_norm": 0.7422029376029968, - "learning_rate": 6.16357660086595e-06, - "loss": 0.2166, - "step": 13435 - }, - { - "epoch": 1.2657261958032078, - "grad_norm": 0.6359850168228149, - "learning_rate": 6.162182170443285e-06, - "loss": 0.2069, - "step": 13436 - }, - { - "epoch": 1.2658203998963755, - "grad_norm": 0.5942542552947998, - "learning_rate": 6.160787827528802e-06, - "loss": 0.1769, - "step": 13437 - }, - { - "epoch": 1.2659146039895433, - "grad_norm": 0.6017497777938843, - "learning_rate": 6.159393572154296e-06, - "loss": 0.1845, - "step": 13438 - }, - { - "epoch": 1.2660088080827112, - "grad_norm": 0.5920788049697876, - "learning_rate": 6.1579994043515535e-06, - "loss": 0.2143, - "step": 13439 - }, - { - "epoch": 1.2661030121758792, - "grad_norm": 0.6443082690238953, - "learning_rate": 6.156605324152369e-06, - "loss": 0.1766, - "step": 13440 - }, - { - "epoch": 1.266197216269047, - "grad_norm": 0.7014357447624207, - "learning_rate": 6.155211331588527e-06, - "loss": 0.1884, - "step": 13441 - }, - { - "epoch": 1.2662914203622146, - "grad_norm": 0.6396356225013733, - "learning_rate": 6.153817426691813e-06, - "loss": 0.1896, - "step": 13442 - }, - { - "epoch": 1.2663856244553826, - "grad_norm": 0.6797584891319275, - "learning_rate": 6.152423609494005e-06, - "loss": 0.2122, - "step": 13443 - }, - { - "epoch": 1.2664798285485506, - "grad_norm": 0.60710209608078, - "learning_rate": 6.151029880026893e-06, - "loss": 0.1645, - "step": 13444 - }, - { - "epoch": 1.2665740326417183, - "grad_norm": 0.6762518286705017, - "learning_rate": 6.149636238322255e-06, - "loss": 0.2045, - "step": 13445 - }, - { - "epoch": 1.266668236734886, - "grad_norm": 0.6514323949813843, - "learning_rate": 6.148242684411859e-06, - "loss": 0.1799, - "step": 13446 - }, - { - "epoch": 1.266762440828054, - "grad_norm": 0.5996586084365845, - "learning_rate": 6.146849218327493e-06, - "loss": 0.2042, - "step": 13447 - }, - { - "epoch": 1.266856644921222, - "grad_norm": 0.6697123050689697, - "learning_rate": 6.145455840100921e-06, - "loss": 0.2215, - "step": 13448 - }, - { - "epoch": 1.2669508490143897, - "grad_norm": 0.6317233443260193, - "learning_rate": 6.144062549763914e-06, - "loss": 0.2042, - "step": 13449 - }, - { - "epoch": 1.2670450531075574, - "grad_norm": 0.6346283555030823, - "learning_rate": 6.142669347348249e-06, - "loss": 0.2037, - "step": 13450 - }, - { - "epoch": 1.2671392572007254, - "grad_norm": 0.6377195119857788, - "learning_rate": 6.141276232885687e-06, - "loss": 0.1988, - "step": 13451 - }, - { - "epoch": 1.2672334612938931, - "grad_norm": 0.6562709808349609, - "learning_rate": 6.139883206407995e-06, - "loss": 0.2204, - "step": 13452 - }, - { - "epoch": 1.267327665387061, - "grad_norm": 0.6999305486679077, - "learning_rate": 6.138490267946933e-06, - "loss": 0.2134, - "step": 13453 - }, - { - "epoch": 1.2674218694802288, - "grad_norm": 0.6550266146659851, - "learning_rate": 6.137097417534268e-06, - "loss": 0.1927, - "step": 13454 - }, - { - "epoch": 1.2675160735733968, - "grad_norm": 0.6951824426651001, - "learning_rate": 6.135704655201755e-06, - "loss": 0.2155, - "step": 13455 - }, - { - "epoch": 1.2676102776665645, - "grad_norm": 0.666174590587616, - "learning_rate": 6.134311980981149e-06, - "loss": 0.2001, - "step": 13456 - }, - { - "epoch": 1.2677044817597325, - "grad_norm": 0.6004818677902222, - "learning_rate": 6.132919394904212e-06, - "loss": 0.184, - "step": 13457 - }, - { - "epoch": 1.2677986858529002, - "grad_norm": 0.612799882888794, - "learning_rate": 6.131526897002693e-06, - "loss": 0.1976, - "step": 13458 - }, - { - "epoch": 1.2678928899460682, - "grad_norm": 0.6072046756744385, - "learning_rate": 6.130134487308341e-06, - "loss": 0.1762, - "step": 13459 - }, - { - "epoch": 1.267987094039236, - "grad_norm": 0.648533821105957, - "learning_rate": 6.12874216585291e-06, - "loss": 0.2029, - "step": 13460 - }, - { - "epoch": 1.268081298132404, - "grad_norm": 0.6512607336044312, - "learning_rate": 6.127349932668144e-06, - "loss": 0.2426, - "step": 13461 - }, - { - "epoch": 1.2681755022255716, - "grad_norm": 0.6310539841651917, - "learning_rate": 6.12595778778579e-06, - "loss": 0.1868, - "step": 13462 - }, - { - "epoch": 1.2682697063187396, - "grad_norm": 0.6344117522239685, - "learning_rate": 6.124565731237586e-06, - "loss": 0.1886, - "step": 13463 - }, - { - "epoch": 1.2683639104119073, - "grad_norm": 0.6713270545005798, - "learning_rate": 6.123173763055279e-06, - "loss": 0.1888, - "step": 13464 - }, - { - "epoch": 1.2684581145050753, - "grad_norm": 0.6484018564224243, - "learning_rate": 6.121781883270609e-06, - "loss": 0.1955, - "step": 13465 - }, - { - "epoch": 1.268552318598243, - "grad_norm": 0.6407499313354492, - "learning_rate": 6.1203900919153026e-06, - "loss": 0.2027, - "step": 13466 - }, - { - "epoch": 1.268646522691411, - "grad_norm": 0.6809627413749695, - "learning_rate": 6.118998389021109e-06, - "loss": 0.2135, - "step": 13467 - }, - { - "epoch": 1.2687407267845787, - "grad_norm": 0.6053674221038818, - "learning_rate": 6.117606774619752e-06, - "loss": 0.1995, - "step": 13468 - }, - { - "epoch": 1.2688349308777467, - "grad_norm": 0.6646608710289001, - "learning_rate": 6.116215248742961e-06, - "loss": 0.2257, - "step": 13469 - }, - { - "epoch": 1.2689291349709144, - "grad_norm": 0.6139371991157532, - "learning_rate": 6.114823811422474e-06, - "loss": 0.2173, - "step": 13470 - }, - { - "epoch": 1.2690233390640824, - "grad_norm": 0.6624774932861328, - "learning_rate": 6.1134324626900125e-06, - "loss": 0.1883, - "step": 13471 - }, - { - "epoch": 1.2691175431572501, - "grad_norm": 0.746110200881958, - "learning_rate": 6.112041202577299e-06, - "loss": 0.2286, - "step": 13472 - }, - { - "epoch": 1.269211747250418, - "grad_norm": 0.6624348759651184, - "learning_rate": 6.110650031116059e-06, - "loss": 0.2179, - "step": 13473 - }, - { - "epoch": 1.2693059513435858, - "grad_norm": 0.7462780475616455, - "learning_rate": 6.1092589483380126e-06, - "loss": 0.2055, - "step": 13474 - }, - { - "epoch": 1.2694001554367538, - "grad_norm": 0.6260996460914612, - "learning_rate": 6.107867954274882e-06, - "loss": 0.2193, - "step": 13475 - }, - { - "epoch": 1.2694943595299215, - "grad_norm": 0.6321893334388733, - "learning_rate": 6.106477048958378e-06, - "loss": 0.2063, - "step": 13476 - }, - { - "epoch": 1.2695885636230895, - "grad_norm": 0.6717593669891357, - "learning_rate": 6.105086232420221e-06, - "loss": 0.1941, - "step": 13477 - }, - { - "epoch": 1.2696827677162572, - "grad_norm": 0.6348603367805481, - "learning_rate": 6.103695504692122e-06, - "loss": 0.1913, - "step": 13478 - }, - { - "epoch": 1.2697769718094252, - "grad_norm": 0.7140622735023499, - "learning_rate": 6.1023048658057886e-06, - "loss": 0.2087, - "step": 13479 - }, - { - "epoch": 1.269871175902593, - "grad_norm": 0.6726442575454712, - "learning_rate": 6.100914315792934e-06, - "loss": 0.228, - "step": 13480 - }, - { - "epoch": 1.2699653799957609, - "grad_norm": 0.6332194805145264, - "learning_rate": 6.099523854685264e-06, - "loss": 0.2115, - "step": 13481 - }, - { - "epoch": 1.2700595840889286, - "grad_norm": 0.6968733668327332, - "learning_rate": 6.098133482514483e-06, - "loss": 0.1848, - "step": 13482 - }, - { - "epoch": 1.2701537881820966, - "grad_norm": 0.7023800015449524, - "learning_rate": 6.096743199312289e-06, - "loss": 0.225, - "step": 13483 - }, - { - "epoch": 1.2702479922752643, - "grad_norm": 0.7038373947143555, - "learning_rate": 6.095353005110389e-06, - "loss": 0.2299, - "step": 13484 - }, - { - "epoch": 1.2703421963684323, - "grad_norm": 0.7013296484947205, - "learning_rate": 6.093962899940482e-06, - "loss": 0.2276, - "step": 13485 - }, - { - "epoch": 1.2704364004616, - "grad_norm": 0.6218545436859131, - "learning_rate": 6.0925728838342545e-06, - "loss": 0.1978, - "step": 13486 - }, - { - "epoch": 1.270530604554768, - "grad_norm": 0.5693235993385315, - "learning_rate": 6.091182956823415e-06, - "loss": 0.1663, - "step": 13487 - }, - { - "epoch": 1.2706248086479357, - "grad_norm": 0.6664445400238037, - "learning_rate": 6.089793118939646e-06, - "loss": 0.2001, - "step": 13488 - }, - { - "epoch": 1.2707190127411037, - "grad_norm": 0.6295485496520996, - "learning_rate": 6.088403370214639e-06, - "loss": 0.2095, - "step": 13489 - }, - { - "epoch": 1.2708132168342714, - "grad_norm": 0.7190636992454529, - "learning_rate": 6.08701371068009e-06, - "loss": 0.1925, - "step": 13490 - }, - { - "epoch": 1.2709074209274394, - "grad_norm": 0.7192946672439575, - "learning_rate": 6.085624140367677e-06, - "loss": 0.252, - "step": 13491 - }, - { - "epoch": 1.271001625020607, - "grad_norm": 0.5965147018432617, - "learning_rate": 6.084234659309088e-06, - "loss": 0.1955, - "step": 13492 - }, - { - "epoch": 1.271095829113775, - "grad_norm": 0.6417478919029236, - "learning_rate": 6.082845267536003e-06, - "loss": 0.1896, - "step": 13493 - }, - { - "epoch": 1.2711900332069428, - "grad_norm": 0.6431136131286621, - "learning_rate": 6.081455965080105e-06, - "loss": 0.1938, - "step": 13494 - }, - { - "epoch": 1.2712842373001108, - "grad_norm": 0.689927339553833, - "learning_rate": 6.080066751973073e-06, - "loss": 0.1996, - "step": 13495 - }, - { - "epoch": 1.2713784413932785, - "grad_norm": 0.6282612085342407, - "learning_rate": 6.078677628246577e-06, - "loss": 0.2118, - "step": 13496 - }, - { - "epoch": 1.2714726454864465, - "grad_norm": 0.6963714957237244, - "learning_rate": 6.077288593932298e-06, - "loss": 0.1892, - "step": 13497 - }, - { - "epoch": 1.2715668495796142, - "grad_norm": 0.6831355094909668, - "learning_rate": 6.075899649061907e-06, - "loss": 0.2163, - "step": 13498 - }, - { - "epoch": 1.2716610536727821, - "grad_norm": 0.7394452691078186, - "learning_rate": 6.07451079366707e-06, - "loss": 0.2024, - "step": 13499 - }, - { - "epoch": 1.2717552577659499, - "grad_norm": 0.6782084703445435, - "learning_rate": 6.073122027779459e-06, - "loss": 0.2091, - "step": 13500 - }, - { - "epoch": 1.2718494618591178, - "grad_norm": 0.6251419186592102, - "learning_rate": 6.071733351430739e-06, - "loss": 0.2007, - "step": 13501 - }, - { - "epoch": 1.2719436659522856, - "grad_norm": 0.6253593564033508, - "learning_rate": 6.070344764652577e-06, - "loss": 0.195, - "step": 13502 - }, - { - "epoch": 1.2720378700454535, - "grad_norm": 0.6126936078071594, - "learning_rate": 6.068956267476624e-06, - "loss": 0.1963, - "step": 13503 - }, - { - "epoch": 1.2721320741386213, - "grad_norm": 0.6560893058776855, - "learning_rate": 6.067567859934553e-06, - "loss": 0.1872, - "step": 13504 - }, - { - "epoch": 1.2722262782317892, - "grad_norm": 0.7075398564338684, - "learning_rate": 6.0661795420580185e-06, - "loss": 0.2172, - "step": 13505 - }, - { - "epoch": 1.272320482324957, - "grad_norm": 0.5794890522956848, - "learning_rate": 6.064791313878667e-06, - "loss": 0.1711, - "step": 13506 - }, - { - "epoch": 1.272414686418125, - "grad_norm": 0.6572920680046082, - "learning_rate": 6.063403175428166e-06, - "loss": 0.1983, - "step": 13507 - }, - { - "epoch": 1.2725088905112927, - "grad_norm": 0.6745020151138306, - "learning_rate": 6.0620151267381585e-06, - "loss": 0.2091, - "step": 13508 - }, - { - "epoch": 1.2726030946044606, - "grad_norm": 0.6793653964996338, - "learning_rate": 6.060627167840294e-06, - "loss": 0.2253, - "step": 13509 - }, - { - "epoch": 1.2726972986976284, - "grad_norm": 0.7092230916023254, - "learning_rate": 6.059239298766226e-06, - "loss": 0.254, - "step": 13510 - }, - { - "epoch": 1.2727915027907963, - "grad_norm": 0.6768363118171692, - "learning_rate": 6.057851519547595e-06, - "loss": 0.2019, - "step": 13511 - }, - { - "epoch": 1.272885706883964, - "grad_norm": 0.5751312971115112, - "learning_rate": 6.0564638302160474e-06, - "loss": 0.1833, - "step": 13512 - }, - { - "epoch": 1.272979910977132, - "grad_norm": 0.6538204550743103, - "learning_rate": 6.05507623080322e-06, - "loss": 0.1964, - "step": 13513 - }, - { - "epoch": 1.2730741150702998, - "grad_norm": 0.7615554332733154, - "learning_rate": 6.053688721340758e-06, - "loss": 0.2163, - "step": 13514 - }, - { - "epoch": 1.2731683191634677, - "grad_norm": 0.6217525005340576, - "learning_rate": 6.052301301860296e-06, - "loss": 0.2232, - "step": 13515 - }, - { - "epoch": 1.2732625232566355, - "grad_norm": 0.62110435962677, - "learning_rate": 6.050913972393468e-06, - "loss": 0.1924, - "step": 13516 - }, - { - "epoch": 1.2733567273498034, - "grad_norm": 0.6933391690254211, - "learning_rate": 6.049526732971911e-06, - "loss": 0.2219, - "step": 13517 - }, - { - "epoch": 1.2734509314429712, - "grad_norm": 0.6840419769287109, - "learning_rate": 6.048139583627252e-06, - "loss": 0.1849, - "step": 13518 - }, - { - "epoch": 1.2735451355361391, - "grad_norm": 0.6440111398696899, - "learning_rate": 6.046752524391122e-06, - "loss": 0.1937, - "step": 13519 - }, - { - "epoch": 1.2736393396293069, - "grad_norm": 0.6548159122467041, - "learning_rate": 6.045365555295151e-06, - "loss": 0.2187, - "step": 13520 - }, - { - "epoch": 1.2737335437224746, - "grad_norm": 0.7605534791946411, - "learning_rate": 6.04397867637096e-06, - "loss": 0.1825, - "step": 13521 - }, - { - "epoch": 1.2738277478156426, - "grad_norm": 0.6676763892173767, - "learning_rate": 6.042591887650175e-06, - "loss": 0.2044, - "step": 13522 - }, - { - "epoch": 1.2739219519088105, - "grad_norm": 0.633726179599762, - "learning_rate": 6.04120518916441e-06, - "loss": 0.1913, - "step": 13523 - }, - { - "epoch": 1.2740161560019783, - "grad_norm": 0.7962849140167236, - "learning_rate": 6.039818580945293e-06, - "loss": 0.2209, - "step": 13524 - }, - { - "epoch": 1.274110360095146, - "grad_norm": 0.6224062442779541, - "learning_rate": 6.038432063024437e-06, - "loss": 0.2078, - "step": 13525 - }, - { - "epoch": 1.274204564188314, - "grad_norm": 0.6988505125045776, - "learning_rate": 6.037045635433454e-06, - "loss": 0.2157, - "step": 13526 - }, - { - "epoch": 1.274298768281482, - "grad_norm": 0.6654592752456665, - "learning_rate": 6.035659298203963e-06, - "loss": 0.1987, - "step": 13527 - }, - { - "epoch": 1.2743929723746497, - "grad_norm": 0.6687403917312622, - "learning_rate": 6.03427305136757e-06, - "loss": 0.2183, - "step": 13528 - }, - { - "epoch": 1.2744871764678174, - "grad_norm": 0.6197983026504517, - "learning_rate": 6.03288689495588e-06, - "loss": 0.21, - "step": 13529 - }, - { - "epoch": 1.2745813805609854, - "grad_norm": 0.6117048263549805, - "learning_rate": 6.031500829000509e-06, - "loss": 0.2103, - "step": 13530 - }, - { - "epoch": 1.2746755846541533, - "grad_norm": 0.7111307382583618, - "learning_rate": 6.030114853533057e-06, - "loss": 0.2159, - "step": 13531 - }, - { - "epoch": 1.274769788747321, - "grad_norm": 0.6280469298362732, - "learning_rate": 6.028728968585125e-06, - "loss": 0.1944, - "step": 13532 - }, - { - "epoch": 1.2748639928404888, - "grad_norm": 1.2519681453704834, - "learning_rate": 6.0273431741883115e-06, - "loss": 0.202, - "step": 13533 - }, - { - "epoch": 1.2749581969336568, - "grad_norm": 0.641857385635376, - "learning_rate": 6.02595747037422e-06, - "loss": 0.2074, - "step": 13534 - }, - { - "epoch": 1.2750524010268247, - "grad_norm": 0.6303014159202576, - "learning_rate": 6.024571857174443e-06, - "loss": 0.1903, - "step": 13535 - }, - { - "epoch": 1.2751466051199924, - "grad_norm": 0.7295275330543518, - "learning_rate": 6.023186334620574e-06, - "loss": 0.1933, - "step": 13536 - }, - { - "epoch": 1.2752408092131602, - "grad_norm": 0.6660329699516296, - "learning_rate": 6.0218009027442105e-06, - "loss": 0.2011, - "step": 13537 - }, - { - "epoch": 1.2753350133063281, - "grad_norm": 0.6415559649467468, - "learning_rate": 6.020415561576938e-06, - "loss": 0.2096, - "step": 13538 - }, - { - "epoch": 1.275429217399496, - "grad_norm": 0.6854776740074158, - "learning_rate": 6.019030311150342e-06, - "loss": 0.2016, - "step": 13539 - }, - { - "epoch": 1.2755234214926638, - "grad_norm": 0.6685968637466431, - "learning_rate": 6.017645151496015e-06, - "loss": 0.2119, - "step": 13540 - }, - { - "epoch": 1.2756176255858316, - "grad_norm": 0.6606480479240417, - "learning_rate": 6.0162600826455375e-06, - "loss": 0.1943, - "step": 13541 - }, - { - "epoch": 1.2757118296789995, - "grad_norm": 0.954875648021698, - "learning_rate": 6.014875104630493e-06, - "loss": 0.2248, - "step": 13542 - }, - { - "epoch": 1.2758060337721675, - "grad_norm": 0.7003217339515686, - "learning_rate": 6.013490217482452e-06, - "loss": 0.2327, - "step": 13543 - }, - { - "epoch": 1.2759002378653352, - "grad_norm": 0.6598342657089233, - "learning_rate": 6.0121054212330066e-06, - "loss": 0.2404, - "step": 13544 - }, - { - "epoch": 1.275994441958503, - "grad_norm": 0.6676200032234192, - "learning_rate": 6.010720715913723e-06, - "loss": 0.2383, - "step": 13545 - }, - { - "epoch": 1.276088646051671, - "grad_norm": 0.6191518306732178, - "learning_rate": 6.009336101556171e-06, - "loss": 0.1803, - "step": 13546 - }, - { - "epoch": 1.276182850144839, - "grad_norm": 0.6574147343635559, - "learning_rate": 6.007951578191935e-06, - "loss": 0.2073, - "step": 13547 - }, - { - "epoch": 1.2762770542380066, - "grad_norm": 0.5903410911560059, - "learning_rate": 6.006567145852575e-06, - "loss": 0.1949, - "step": 13548 - }, - { - "epoch": 1.2763712583311744, - "grad_norm": 0.7233710289001465, - "learning_rate": 6.0051828045696555e-06, - "loss": 0.2356, - "step": 13549 - }, - { - "epoch": 1.2764654624243423, - "grad_norm": 0.6821407675743103, - "learning_rate": 6.0037985543747524e-06, - "loss": 0.2295, - "step": 13550 - }, - { - "epoch": 1.2765596665175103, - "grad_norm": 0.6873706579208374, - "learning_rate": 6.00241439529942e-06, - "loss": 0.2101, - "step": 13551 - }, - { - "epoch": 1.276653870610678, - "grad_norm": 0.5550234913825989, - "learning_rate": 6.001030327375222e-06, - "loss": 0.1893, - "step": 13552 - }, - { - "epoch": 1.2767480747038458, - "grad_norm": 0.7132258415222168, - "learning_rate": 5.999646350633715e-06, - "loss": 0.2239, - "step": 13553 - }, - { - "epoch": 1.2768422787970137, - "grad_norm": 0.6445050239562988, - "learning_rate": 5.9982624651064605e-06, - "loss": 0.2303, - "step": 13554 - }, - { - "epoch": 1.2769364828901817, - "grad_norm": 0.5875824093818665, - "learning_rate": 5.99687867082501e-06, - "loss": 0.1981, - "step": 13555 - }, - { - "epoch": 1.2770306869833494, - "grad_norm": 0.5886663794517517, - "learning_rate": 5.995494967820915e-06, - "loss": 0.1927, - "step": 13556 - }, - { - "epoch": 1.2771248910765172, - "grad_norm": 0.6198879480361938, - "learning_rate": 5.994111356125729e-06, - "loss": 0.1979, - "step": 13557 - }, - { - "epoch": 1.2772190951696851, - "grad_norm": 0.7277728915214539, - "learning_rate": 5.992727835771002e-06, - "loss": 0.2291, - "step": 13558 - }, - { - "epoch": 1.277313299262853, - "grad_norm": 0.6491997838020325, - "learning_rate": 5.9913444067882735e-06, - "loss": 0.2108, - "step": 13559 - }, - { - "epoch": 1.2774075033560208, - "grad_norm": 0.7521607279777527, - "learning_rate": 5.989961069209094e-06, - "loss": 0.2438, - "step": 13560 - }, - { - "epoch": 1.2775017074491886, - "grad_norm": 0.6627978086471558, - "learning_rate": 5.9885778230650024e-06, - "loss": 0.2285, - "step": 13561 - }, - { - "epoch": 1.2775959115423565, - "grad_norm": 0.6448333859443665, - "learning_rate": 5.9871946683875444e-06, - "loss": 0.2119, - "step": 13562 - }, - { - "epoch": 1.2776901156355245, - "grad_norm": 0.6985797882080078, - "learning_rate": 5.985811605208247e-06, - "loss": 0.2284, - "step": 13563 - }, - { - "epoch": 1.2777843197286922, - "grad_norm": 0.640409529209137, - "learning_rate": 5.984428633558661e-06, - "loss": 0.1924, - "step": 13564 - }, - { - "epoch": 1.27787852382186, - "grad_norm": 0.6922116279602051, - "learning_rate": 5.983045753470308e-06, - "loss": 0.2006, - "step": 13565 - }, - { - "epoch": 1.277972727915028, - "grad_norm": 0.6037483811378479, - "learning_rate": 5.981662964974721e-06, - "loss": 0.1798, - "step": 13566 - }, - { - "epoch": 1.2780669320081959, - "grad_norm": 0.7847719788551331, - "learning_rate": 5.980280268103439e-06, - "loss": 0.2183, - "step": 13567 - }, - { - "epoch": 1.2781611361013636, - "grad_norm": 0.7188629508018494, - "learning_rate": 5.978897662887982e-06, - "loss": 0.1801, - "step": 13568 - }, - { - "epoch": 1.2782553401945314, - "grad_norm": 0.6025604009628296, - "learning_rate": 5.9775151493598735e-06, - "loss": 0.178, - "step": 13569 - }, - { - "epoch": 1.2783495442876993, - "grad_norm": 0.6027699708938599, - "learning_rate": 5.9761327275506435e-06, - "loss": 0.1783, - "step": 13570 - }, - { - "epoch": 1.2784437483808673, - "grad_norm": 0.6953678727149963, - "learning_rate": 5.9747503974918105e-06, - "loss": 0.2163, - "step": 13571 - }, - { - "epoch": 1.278537952474035, - "grad_norm": 0.5966035723686218, - "learning_rate": 5.973368159214893e-06, - "loss": 0.1852, - "step": 13572 - }, - { - "epoch": 1.2786321565672027, - "grad_norm": 0.6455698609352112, - "learning_rate": 5.971986012751407e-06, - "loss": 0.2162, - "step": 13573 - }, - { - "epoch": 1.2787263606603707, - "grad_norm": 0.624878466129303, - "learning_rate": 5.970603958132871e-06, - "loss": 0.2072, - "step": 13574 - }, - { - "epoch": 1.2788205647535387, - "grad_norm": 0.6461345553398132, - "learning_rate": 5.969221995390797e-06, - "loss": 0.1863, - "step": 13575 - }, - { - "epoch": 1.2789147688467064, - "grad_norm": 0.6307018995285034, - "learning_rate": 5.967840124556693e-06, - "loss": 0.1924, - "step": 13576 - }, - { - "epoch": 1.2790089729398741, - "grad_norm": 0.6108476519584656, - "learning_rate": 5.966458345662072e-06, - "loss": 0.1875, - "step": 13577 - }, - { - "epoch": 1.279103177033042, - "grad_norm": 0.6464184522628784, - "learning_rate": 5.965076658738439e-06, - "loss": 0.204, - "step": 13578 - }, - { - "epoch": 1.27919738112621, - "grad_norm": 0.6568793058395386, - "learning_rate": 5.963695063817297e-06, - "loss": 0.2057, - "step": 13579 - }, - { - "epoch": 1.2792915852193778, - "grad_norm": 0.6844866275787354, - "learning_rate": 5.9623135609301495e-06, - "loss": 0.2215, - "step": 13580 - }, - { - "epoch": 1.2793857893125455, - "grad_norm": 0.6494469046592712, - "learning_rate": 5.960932150108498e-06, - "loss": 0.225, - "step": 13581 - }, - { - "epoch": 1.2794799934057135, - "grad_norm": 0.695708155632019, - "learning_rate": 5.959550831383842e-06, - "loss": 0.2069, - "step": 13582 - }, - { - "epoch": 1.2795741974988815, - "grad_norm": 0.7102089524269104, - "learning_rate": 5.9581696047876714e-06, - "loss": 0.2391, - "step": 13583 - }, - { - "epoch": 1.2796684015920492, - "grad_norm": 0.6285651326179504, - "learning_rate": 5.956788470351489e-06, - "loss": 0.204, - "step": 13584 - }, - { - "epoch": 1.279762605685217, - "grad_norm": 0.6996090412139893, - "learning_rate": 5.955407428106781e-06, - "loss": 0.2024, - "step": 13585 - }, - { - "epoch": 1.279856809778385, - "grad_norm": 0.7680042386054993, - "learning_rate": 5.954026478085035e-06, - "loss": 0.2115, - "step": 13586 - }, - { - "epoch": 1.2799510138715529, - "grad_norm": 0.6397778987884521, - "learning_rate": 5.952645620317748e-06, - "loss": 0.1866, - "step": 13587 - }, - { - "epoch": 1.2800452179647206, - "grad_norm": 0.6503490805625916, - "learning_rate": 5.951264854836398e-06, - "loss": 0.1947, - "step": 13588 - }, - { - "epoch": 1.2801394220578883, - "grad_norm": 0.645462155342102, - "learning_rate": 5.949884181672469e-06, - "loss": 0.1991, - "step": 13589 - }, - { - "epoch": 1.2802336261510563, - "grad_norm": 0.7461054921150208, - "learning_rate": 5.9485036008574475e-06, - "loss": 0.2373, - "step": 13590 - }, - { - "epoch": 1.280327830244224, - "grad_norm": 0.6423100233078003, - "learning_rate": 5.947123112422808e-06, - "loss": 0.1909, - "step": 13591 - }, - { - "epoch": 1.280422034337392, - "grad_norm": 0.634678065776825, - "learning_rate": 5.94574271640003e-06, - "loss": 0.2063, - "step": 13592 - }, - { - "epoch": 1.2805162384305597, - "grad_norm": 0.6117005348205566, - "learning_rate": 5.944362412820586e-06, - "loss": 0.218, - "step": 13593 - }, - { - "epoch": 1.2806104425237277, - "grad_norm": 0.6724438071250916, - "learning_rate": 5.942982201715954e-06, - "loss": 0.2084, - "step": 13594 - }, - { - "epoch": 1.2807046466168954, - "grad_norm": 0.684286892414093, - "learning_rate": 5.941602083117601e-06, - "loss": 0.2231, - "step": 13595 - }, - { - "epoch": 1.2807988507100634, - "grad_norm": 0.7269381880760193, - "learning_rate": 5.9402220570569945e-06, - "loss": 0.1986, - "step": 13596 - }, - { - "epoch": 1.2808930548032311, - "grad_norm": 0.6402116417884827, - "learning_rate": 5.9388421235656065e-06, - "loss": 0.216, - "step": 13597 - }, - { - "epoch": 1.280987258896399, - "grad_norm": 0.5580009818077087, - "learning_rate": 5.9374622826748994e-06, - "loss": 0.1756, - "step": 13598 - }, - { - "epoch": 1.2810814629895668, - "grad_norm": 0.6470601558685303, - "learning_rate": 5.936082534416332e-06, - "loss": 0.2119, - "step": 13599 - }, - { - "epoch": 1.2811756670827348, - "grad_norm": 0.7546946406364441, - "learning_rate": 5.934702878821371e-06, - "loss": 0.229, - "step": 13600 - }, - { - "epoch": 1.2812698711759025, - "grad_norm": 0.5874007344245911, - "learning_rate": 5.933323315921471e-06, - "loss": 0.2011, - "step": 13601 - }, - { - "epoch": 1.2813640752690705, - "grad_norm": 0.5410974025726318, - "learning_rate": 5.931943845748089e-06, - "loss": 0.1747, - "step": 13602 - }, - { - "epoch": 1.2814582793622382, - "grad_norm": 0.5804749727249146, - "learning_rate": 5.9305644683326755e-06, - "loss": 0.187, - "step": 13603 - }, - { - "epoch": 1.2815524834554062, - "grad_norm": 0.6447020173072815, - "learning_rate": 5.929185183706689e-06, - "loss": 0.2153, - "step": 13604 - }, - { - "epoch": 1.281646687548574, - "grad_norm": 0.6586759090423584, - "learning_rate": 5.927805991901576e-06, - "loss": 0.2184, - "step": 13605 - }, - { - "epoch": 1.2817408916417419, - "grad_norm": 0.5643503665924072, - "learning_rate": 5.926426892948779e-06, - "loss": 0.1957, - "step": 13606 - }, - { - "epoch": 1.2818350957349096, - "grad_norm": 0.6107988357543945, - "learning_rate": 5.925047886879756e-06, - "loss": 0.185, - "step": 13607 - }, - { - "epoch": 1.2819292998280776, - "grad_norm": 0.6334426403045654, - "learning_rate": 5.92366897372594e-06, - "loss": 0.2218, - "step": 13608 - }, - { - "epoch": 1.2820235039212453, - "grad_norm": 0.6359449028968811, - "learning_rate": 5.922290153518772e-06, - "loss": 0.2093, - "step": 13609 - }, - { - "epoch": 1.2821177080144133, - "grad_norm": 0.5687635540962219, - "learning_rate": 5.9209114262897e-06, - "loss": 0.1728, - "step": 13610 - }, - { - "epoch": 1.282211912107581, - "grad_norm": 0.6605468988418579, - "learning_rate": 5.919532792070154e-06, - "loss": 0.2201, - "step": 13611 - }, - { - "epoch": 1.282306116200749, - "grad_norm": 0.6663120985031128, - "learning_rate": 5.918154250891573e-06, - "loss": 0.1738, - "step": 13612 - }, - { - "epoch": 1.2824003202939167, - "grad_norm": 0.7010716199874878, - "learning_rate": 5.9167758027853824e-06, - "loss": 0.1961, - "step": 13613 - }, - { - "epoch": 1.2824945243870847, - "grad_norm": 0.5978226065635681, - "learning_rate": 5.9153974477830226e-06, - "loss": 0.1954, - "step": 13614 - }, - { - "epoch": 1.2825887284802524, - "grad_norm": 0.6706185936927795, - "learning_rate": 5.914019185915918e-06, - "loss": 0.2116, - "step": 13615 - }, - { - "epoch": 1.2826829325734204, - "grad_norm": 0.641293466091156, - "learning_rate": 5.912641017215493e-06, - "loss": 0.1982, - "step": 13616 - }, - { - "epoch": 1.282777136666588, - "grad_norm": 0.5829498171806335, - "learning_rate": 5.9112629417131736e-06, - "loss": 0.195, - "step": 13617 - }, - { - "epoch": 1.282871340759756, - "grad_norm": 0.6060450077056885, - "learning_rate": 5.909884959440385e-06, - "loss": 0.1757, - "step": 13618 - }, - { - "epoch": 1.2829655448529238, - "grad_norm": 0.62343430519104, - "learning_rate": 5.908507070428542e-06, - "loss": 0.1968, - "step": 13619 - }, - { - "epoch": 1.2830597489460918, - "grad_norm": 0.7056381106376648, - "learning_rate": 5.907129274709068e-06, - "loss": 0.2494, - "step": 13620 - }, - { - "epoch": 1.2831539530392595, - "grad_norm": 0.6520238518714905, - "learning_rate": 5.905751572313376e-06, - "loss": 0.2319, - "step": 13621 - }, - { - "epoch": 1.2832481571324275, - "grad_norm": 0.711362361907959, - "learning_rate": 5.904373963272882e-06, - "loss": 0.2195, - "step": 13622 - }, - { - "epoch": 1.2833423612255952, - "grad_norm": 0.6747616529464722, - "learning_rate": 5.902996447618989e-06, - "loss": 0.2164, - "step": 13623 - }, - { - "epoch": 1.2834365653187632, - "grad_norm": 0.6405224204063416, - "learning_rate": 5.901619025383121e-06, - "loss": 0.1935, - "step": 13624 - }, - { - "epoch": 1.283530769411931, - "grad_norm": 0.5808284282684326, - "learning_rate": 5.900241696596673e-06, - "loss": 0.1784, - "step": 13625 - }, - { - "epoch": 1.2836249735050989, - "grad_norm": 0.6893551349639893, - "learning_rate": 5.898864461291052e-06, - "loss": 0.2121, - "step": 13626 - }, - { - "epoch": 1.2837191775982666, - "grad_norm": 0.6487012505531311, - "learning_rate": 5.89748731949767e-06, - "loss": 0.2099, - "step": 13627 - }, - { - "epoch": 1.2838133816914346, - "grad_norm": 0.6735448837280273, - "learning_rate": 5.896110271247919e-06, - "loss": 0.2086, - "step": 13628 - }, - { - "epoch": 1.2839075857846023, - "grad_norm": 0.6319160461425781, - "learning_rate": 5.8947333165732006e-06, - "loss": 0.1938, - "step": 13629 - }, - { - "epoch": 1.2840017898777702, - "grad_norm": 0.5781620740890503, - "learning_rate": 5.893356455504911e-06, - "loss": 0.2113, - "step": 13630 - }, - { - "epoch": 1.284095993970938, - "grad_norm": 0.7069408893585205, - "learning_rate": 5.891979688074446e-06, - "loss": 0.1988, - "step": 13631 - }, - { - "epoch": 1.284190198064106, - "grad_norm": 0.5846056938171387, - "learning_rate": 5.890603014313199e-06, - "loss": 0.188, - "step": 13632 - }, - { - "epoch": 1.2842844021572737, - "grad_norm": 0.5766475200653076, - "learning_rate": 5.889226434252554e-06, - "loss": 0.1715, - "step": 13633 - }, - { - "epoch": 1.2843786062504416, - "grad_norm": 0.6382126212120056, - "learning_rate": 5.887849947923907e-06, - "loss": 0.2044, - "step": 13634 - }, - { - "epoch": 1.2844728103436094, - "grad_norm": 0.6012054681777954, - "learning_rate": 5.886473555358641e-06, - "loss": 0.1729, - "step": 13635 - }, - { - "epoch": 1.2845670144367773, - "grad_norm": 0.6249715685844421, - "learning_rate": 5.885097256588137e-06, - "loss": 0.2089, - "step": 13636 - }, - { - "epoch": 1.284661218529945, - "grad_norm": 0.6467832922935486, - "learning_rate": 5.883721051643782e-06, - "loss": 0.2052, - "step": 13637 - }, - { - "epoch": 1.284755422623113, - "grad_norm": 0.6491999626159668, - "learning_rate": 5.8823449405569525e-06, - "loss": 0.2208, - "step": 13638 - }, - { - "epoch": 1.2848496267162808, - "grad_norm": 0.9389375448226929, - "learning_rate": 5.8809689233590235e-06, - "loss": 0.2331, - "step": 13639 - }, - { - "epoch": 1.2849438308094487, - "grad_norm": 0.646766185760498, - "learning_rate": 5.879593000081376e-06, - "loss": 0.1751, - "step": 13640 - }, - { - "epoch": 1.2850380349026165, - "grad_norm": 0.7001878619194031, - "learning_rate": 5.878217170755383e-06, - "loss": 0.2209, - "step": 13641 - }, - { - "epoch": 1.2851322389957844, - "grad_norm": 0.6537686586380005, - "learning_rate": 5.87684143541241e-06, - "loss": 0.1908, - "step": 13642 - }, - { - "epoch": 1.2852264430889522, - "grad_norm": 0.5841006636619568, - "learning_rate": 5.875465794083827e-06, - "loss": 0.198, - "step": 13643 - }, - { - "epoch": 1.2853206471821201, - "grad_norm": 0.6095043420791626, - "learning_rate": 5.8740902468010075e-06, - "loss": 0.2225, - "step": 13644 - }, - { - "epoch": 1.2854148512752879, - "grad_norm": 0.583604097366333, - "learning_rate": 5.872714793595309e-06, - "loss": 0.1786, - "step": 13645 - }, - { - "epoch": 1.2855090553684558, - "grad_norm": 0.6523245573043823, - "learning_rate": 5.8713394344980915e-06, - "loss": 0.1986, - "step": 13646 - }, - { - "epoch": 1.2856032594616236, - "grad_norm": 0.7673590183258057, - "learning_rate": 5.869964169540726e-06, - "loss": 0.2112, - "step": 13647 - }, - { - "epoch": 1.2856974635547915, - "grad_norm": 0.6839298605918884, - "learning_rate": 5.868588998754563e-06, - "loss": 0.2285, - "step": 13648 - }, - { - "epoch": 1.2857916676479593, - "grad_norm": 0.7030475735664368, - "learning_rate": 5.867213922170958e-06, - "loss": 0.2442, - "step": 13649 - }, - { - "epoch": 1.2858858717411272, - "grad_norm": 0.6308186650276184, - "learning_rate": 5.86583893982127e-06, - "loss": 0.2036, - "step": 13650 - }, - { - "epoch": 1.285980075834295, - "grad_norm": 0.6027628779411316, - "learning_rate": 5.864464051736847e-06, - "loss": 0.1992, - "step": 13651 - }, - { - "epoch": 1.286074279927463, - "grad_norm": 0.653380811214447, - "learning_rate": 5.8630892579490396e-06, - "loss": 0.2127, - "step": 13652 - }, - { - "epoch": 1.2861684840206307, - "grad_norm": 0.7756394147872925, - "learning_rate": 5.8617145584891935e-06, - "loss": 0.2122, - "step": 13653 - }, - { - "epoch": 1.2862626881137986, - "grad_norm": 0.6326111555099487, - "learning_rate": 5.860339953388656e-06, - "loss": 0.2074, - "step": 13654 - }, - { - "epoch": 1.2863568922069664, - "grad_norm": 0.6357513070106506, - "learning_rate": 5.8589654426787715e-06, - "loss": 0.1884, - "step": 13655 - }, - { - "epoch": 1.2864510963001343, - "grad_norm": 0.6499336361885071, - "learning_rate": 5.857591026390877e-06, - "loss": 0.2115, - "step": 13656 - }, - { - "epoch": 1.286545300393302, - "grad_norm": 0.636991024017334, - "learning_rate": 5.856216704556313e-06, - "loss": 0.1918, - "step": 13657 - }, - { - "epoch": 1.28663950448647, - "grad_norm": 0.6284456849098206, - "learning_rate": 5.854842477206419e-06, - "loss": 0.1919, - "step": 13658 - }, - { - "epoch": 1.2867337085796378, - "grad_norm": 0.6406119465827942, - "learning_rate": 5.853468344372524e-06, - "loss": 0.2064, - "step": 13659 - }, - { - "epoch": 1.2868279126728055, - "grad_norm": 0.6799442172050476, - "learning_rate": 5.852094306085966e-06, - "loss": 0.1912, - "step": 13660 - }, - { - "epoch": 1.2869221167659735, - "grad_norm": 0.6351364850997925, - "learning_rate": 5.850720362378074e-06, - "loss": 0.2067, - "step": 13661 - }, - { - "epoch": 1.2870163208591414, - "grad_norm": 0.6258965134620667, - "learning_rate": 5.8493465132801745e-06, - "loss": 0.2074, - "step": 13662 - }, - { - "epoch": 1.2871105249523092, - "grad_norm": 0.6602813005447388, - "learning_rate": 5.847972758823588e-06, - "loss": 0.1906, - "step": 13663 - }, - { - "epoch": 1.287204729045477, - "grad_norm": 0.6425490379333496, - "learning_rate": 5.846599099039649e-06, - "loss": 0.1922, - "step": 13664 - }, - { - "epoch": 1.2872989331386449, - "grad_norm": 0.6882752180099487, - "learning_rate": 5.845225533959673e-06, - "loss": 0.2176, - "step": 13665 - }, - { - "epoch": 1.2873931372318128, - "grad_norm": 0.6884061694145203, - "learning_rate": 5.843852063614977e-06, - "loss": 0.2046, - "step": 13666 - }, - { - "epoch": 1.2874873413249805, - "grad_norm": 0.6444416642189026, - "learning_rate": 5.842478688036887e-06, - "loss": 0.1875, - "step": 13667 - }, - { - "epoch": 1.2875815454181483, - "grad_norm": 0.6255632638931274, - "learning_rate": 5.841105407256711e-06, - "loss": 0.2066, - "step": 13668 - }, - { - "epoch": 1.2876757495113162, - "grad_norm": 0.6718289256095886, - "learning_rate": 5.83973222130576e-06, - "loss": 0.2069, - "step": 13669 - }, - { - "epoch": 1.2877699536044842, - "grad_norm": 0.6975240707397461, - "learning_rate": 5.838359130215352e-06, - "loss": 0.206, - "step": 13670 - }, - { - "epoch": 1.287864157697652, - "grad_norm": 0.5970094203948975, - "learning_rate": 5.836986134016793e-06, - "loss": 0.1783, - "step": 13671 - }, - { - "epoch": 1.2879583617908197, - "grad_norm": 0.6502562761306763, - "learning_rate": 5.835613232741386e-06, - "loss": 0.1959, - "step": 13672 - }, - { - "epoch": 1.2880525658839876, - "grad_norm": 0.6309152841567993, - "learning_rate": 5.8342404264204365e-06, - "loss": 0.1994, - "step": 13673 - }, - { - "epoch": 1.2881467699771556, - "grad_norm": 0.693664014339447, - "learning_rate": 5.832867715085251e-06, - "loss": 0.2097, - "step": 13674 - }, - { - "epoch": 1.2882409740703233, - "grad_norm": 0.7204006314277649, - "learning_rate": 5.831495098767124e-06, - "loss": 0.2076, - "step": 13675 - }, - { - "epoch": 1.288335178163491, - "grad_norm": 0.6644877195358276, - "learning_rate": 5.830122577497353e-06, - "loss": 0.2134, - "step": 13676 - }, - { - "epoch": 1.288429382256659, - "grad_norm": 0.8421399593353271, - "learning_rate": 5.828750151307241e-06, - "loss": 0.2231, - "step": 13677 - }, - { - "epoch": 1.288523586349827, - "grad_norm": 0.7198042869567871, - "learning_rate": 5.827377820228073e-06, - "loss": 0.2206, - "step": 13678 - }, - { - "epoch": 1.2886177904429947, - "grad_norm": 0.707188606262207, - "learning_rate": 5.826005584291144e-06, - "loss": 0.2178, - "step": 13679 - }, - { - "epoch": 1.2887119945361625, - "grad_norm": 0.6541299223899841, - "learning_rate": 5.824633443527748e-06, - "loss": 0.2257, - "step": 13680 - }, - { - "epoch": 1.2888061986293304, - "grad_norm": 0.6398420929908752, - "learning_rate": 5.82326139796916e-06, - "loss": 0.2146, - "step": 13681 - }, - { - "epoch": 1.2889004027224984, - "grad_norm": 0.7083244919776917, - "learning_rate": 5.821889447646678e-06, - "loss": 0.2084, - "step": 13682 - }, - { - "epoch": 1.2889946068156661, - "grad_norm": 0.6632961630821228, - "learning_rate": 5.820517592591573e-06, - "loss": 0.2084, - "step": 13683 - }, - { - "epoch": 1.2890888109088339, - "grad_norm": 0.6450549960136414, - "learning_rate": 5.819145832835131e-06, - "loss": 0.1961, - "step": 13684 - }, - { - "epoch": 1.2891830150020018, - "grad_norm": 0.6083947420120239, - "learning_rate": 5.817774168408632e-06, - "loss": 0.1724, - "step": 13685 - }, - { - "epoch": 1.2892772190951698, - "grad_norm": 0.7011882066726685, - "learning_rate": 5.816402599343348e-06, - "loss": 0.2077, - "step": 13686 - }, - { - "epoch": 1.2893714231883375, - "grad_norm": 0.7546470165252686, - "learning_rate": 5.815031125670554e-06, - "loss": 0.2163, - "step": 13687 - }, - { - "epoch": 1.2894656272815053, - "grad_norm": 0.6526637673377991, - "learning_rate": 5.813659747421527e-06, - "loss": 0.2132, - "step": 13688 - }, - { - "epoch": 1.2895598313746732, - "grad_norm": 0.6755617260932922, - "learning_rate": 5.812288464627528e-06, - "loss": 0.1823, - "step": 13689 - }, - { - "epoch": 1.2896540354678412, - "grad_norm": 0.6853237152099609, - "learning_rate": 5.810917277319827e-06, - "loss": 0.1956, - "step": 13690 - }, - { - "epoch": 1.289748239561009, - "grad_norm": 0.6459473967552185, - "learning_rate": 5.809546185529697e-06, - "loss": 0.1937, - "step": 13691 - }, - { - "epoch": 1.2898424436541767, - "grad_norm": 0.6184282302856445, - "learning_rate": 5.808175189288394e-06, - "loss": 0.2082, - "step": 13692 - }, - { - "epoch": 1.2899366477473446, - "grad_norm": 0.6942335367202759, - "learning_rate": 5.806804288627171e-06, - "loss": 0.2055, - "step": 13693 - }, - { - "epoch": 1.2900308518405126, - "grad_norm": 0.600774347782135, - "learning_rate": 5.805433483577303e-06, - "loss": 0.206, - "step": 13694 - }, - { - "epoch": 1.2901250559336803, - "grad_norm": 0.7072567343711853, - "learning_rate": 5.804062774170038e-06, - "loss": 0.1871, - "step": 13695 - }, - { - "epoch": 1.290219260026848, - "grad_norm": 0.6618983745574951, - "learning_rate": 5.802692160436628e-06, - "loss": 0.2237, - "step": 13696 - }, - { - "epoch": 1.290313464120016, - "grad_norm": 0.6313806176185608, - "learning_rate": 5.801321642408328e-06, - "loss": 0.1948, - "step": 13697 - }, - { - "epoch": 1.290407668213184, - "grad_norm": 0.6524603962898254, - "learning_rate": 5.799951220116391e-06, - "loss": 0.1927, - "step": 13698 - }, - { - "epoch": 1.2905018723063517, - "grad_norm": 0.6209326386451721, - "learning_rate": 5.798580893592058e-06, - "loss": 0.1866, - "step": 13699 - }, - { - "epoch": 1.2905960763995195, - "grad_norm": 0.6452550888061523, - "learning_rate": 5.797210662866579e-06, - "loss": 0.1991, - "step": 13700 - }, - { - "epoch": 1.2906902804926874, - "grad_norm": 0.5927140116691589, - "learning_rate": 5.795840527971199e-06, - "loss": 0.1664, - "step": 13701 - }, - { - "epoch": 1.2907844845858554, - "grad_norm": 0.6361879706382751, - "learning_rate": 5.794470488937154e-06, - "loss": 0.2066, - "step": 13702 - }, - { - "epoch": 1.2908786886790231, - "grad_norm": 0.7191894054412842, - "learning_rate": 5.793100545795687e-06, - "loss": 0.1902, - "step": 13703 - }, - { - "epoch": 1.2909728927721908, - "grad_norm": 0.612771213054657, - "learning_rate": 5.791730698578035e-06, - "loss": 0.1967, - "step": 13704 - }, - { - "epoch": 1.2910670968653588, - "grad_norm": 0.6511619687080383, - "learning_rate": 5.7903609473154295e-06, - "loss": 0.1983, - "step": 13705 - }, - { - "epoch": 1.2911613009585268, - "grad_norm": 0.6181254386901855, - "learning_rate": 5.788991292039103e-06, - "loss": 0.1811, - "step": 13706 - }, - { - "epoch": 1.2912555050516945, - "grad_norm": 0.7074618339538574, - "learning_rate": 5.7876217327802935e-06, - "loss": 0.2239, - "step": 13707 - }, - { - "epoch": 1.2913497091448622, - "grad_norm": 0.6534847021102905, - "learning_rate": 5.786252269570219e-06, - "loss": 0.1901, - "step": 13708 - }, - { - "epoch": 1.2914439132380302, - "grad_norm": 0.6207404732704163, - "learning_rate": 5.784882902440108e-06, - "loss": 0.1915, - "step": 13709 - }, - { - "epoch": 1.2915381173311982, - "grad_norm": 0.7236878275871277, - "learning_rate": 5.7835136314211894e-06, - "loss": 0.2041, - "step": 13710 - }, - { - "epoch": 1.291632321424366, - "grad_norm": 0.7188735604286194, - "learning_rate": 5.782144456544681e-06, - "loss": 0.2091, - "step": 13711 - }, - { - "epoch": 1.2917265255175336, - "grad_norm": 0.6913007497787476, - "learning_rate": 5.780775377841799e-06, - "loss": 0.1969, - "step": 13712 - }, - { - "epoch": 1.2918207296107016, - "grad_norm": 0.6481049060821533, - "learning_rate": 5.779406395343763e-06, - "loss": 0.211, - "step": 13713 - }, - { - "epoch": 1.2919149337038696, - "grad_norm": 0.6210039258003235, - "learning_rate": 5.778037509081793e-06, - "loss": 0.1969, - "step": 13714 - }, - { - "epoch": 1.2920091377970373, - "grad_norm": 0.5863572955131531, - "learning_rate": 5.776668719087092e-06, - "loss": 0.2072, - "step": 13715 - }, - { - "epoch": 1.292103341890205, - "grad_norm": 0.7057060599327087, - "learning_rate": 5.775300025390876e-06, - "loss": 0.2216, - "step": 13716 - }, - { - "epoch": 1.292197545983373, - "grad_norm": 0.6310413479804993, - "learning_rate": 5.773931428024357e-06, - "loss": 0.2117, - "step": 13717 - }, - { - "epoch": 1.292291750076541, - "grad_norm": 0.6071088910102844, - "learning_rate": 5.772562927018734e-06, - "loss": 0.1998, - "step": 13718 - }, - { - "epoch": 1.2923859541697087, - "grad_norm": 0.6977028846740723, - "learning_rate": 5.771194522405215e-06, - "loss": 0.2134, - "step": 13719 - }, - { - "epoch": 1.2924801582628764, - "grad_norm": 0.6454960703849792, - "learning_rate": 5.769826214215003e-06, - "loss": 0.1865, - "step": 13720 - }, - { - "epoch": 1.2925743623560444, - "grad_norm": 0.9823235273361206, - "learning_rate": 5.768458002479292e-06, - "loss": 0.1691, - "step": 13721 - }, - { - "epoch": 1.2926685664492124, - "grad_norm": 0.5987262725830078, - "learning_rate": 5.767089887229287e-06, - "loss": 0.1599, - "step": 13722 - }, - { - "epoch": 1.29276277054238, - "grad_norm": 0.6353163719177246, - "learning_rate": 5.765721868496175e-06, - "loss": 0.2048, - "step": 13723 - }, - { - "epoch": 1.2928569746355478, - "grad_norm": 0.6583973169326782, - "learning_rate": 5.764353946311152e-06, - "loss": 0.2125, - "step": 13724 - }, - { - "epoch": 1.2929511787287158, - "grad_norm": 0.6165882349014282, - "learning_rate": 5.7629861207054135e-06, - "loss": 0.2145, - "step": 13725 - }, - { - "epoch": 1.2930453828218837, - "grad_norm": 0.646360456943512, - "learning_rate": 5.761618391710142e-06, - "loss": 0.2082, - "step": 13726 - }, - { - "epoch": 1.2931395869150515, - "grad_norm": 0.6963431239128113, - "learning_rate": 5.760250759356525e-06, - "loss": 0.1844, - "step": 13727 - }, - { - "epoch": 1.2932337910082192, - "grad_norm": 0.7241135835647583, - "learning_rate": 5.758883223675751e-06, - "loss": 0.2107, - "step": 13728 - }, - { - "epoch": 1.2933279951013872, - "grad_norm": 0.7736554145812988, - "learning_rate": 5.7575157846989945e-06, - "loss": 0.1818, - "step": 13729 - }, - { - "epoch": 1.293422199194555, - "grad_norm": 0.5883830189704895, - "learning_rate": 5.7561484424574385e-06, - "loss": 0.1905, - "step": 13730 - }, - { - "epoch": 1.2935164032877229, - "grad_norm": 0.5926435589790344, - "learning_rate": 5.754781196982266e-06, - "loss": 0.1995, - "step": 13731 - }, - { - "epoch": 1.2936106073808906, - "grad_norm": 0.6940343379974365, - "learning_rate": 5.753414048304649e-06, - "loss": 0.2249, - "step": 13732 - }, - { - "epoch": 1.2937048114740586, - "grad_norm": 0.6366355419158936, - "learning_rate": 5.75204699645575e-06, - "loss": 0.188, - "step": 13733 - }, - { - "epoch": 1.2937990155672263, - "grad_norm": 0.8059617280960083, - "learning_rate": 5.750680041466756e-06, - "loss": 0.2258, - "step": 13734 - }, - { - "epoch": 1.2938932196603943, - "grad_norm": 0.656348466873169, - "learning_rate": 5.7493131833688306e-06, - "loss": 0.2244, - "step": 13735 - }, - { - "epoch": 1.293987423753562, - "grad_norm": 0.7119974493980408, - "learning_rate": 5.747946422193133e-06, - "loss": 0.2015, - "step": 13736 - }, - { - "epoch": 1.29408162784673, - "grad_norm": 0.5967922806739807, - "learning_rate": 5.746579757970834e-06, - "loss": 0.1944, - "step": 13737 - }, - { - "epoch": 1.2941758319398977, - "grad_norm": 0.6940702199935913, - "learning_rate": 5.745213190733099e-06, - "loss": 0.2004, - "step": 13738 - }, - { - "epoch": 1.2942700360330657, - "grad_norm": 0.6587916612625122, - "learning_rate": 5.7438467205110785e-06, - "loss": 0.2202, - "step": 13739 - }, - { - "epoch": 1.2943642401262334, - "grad_norm": 0.6120479106903076, - "learning_rate": 5.742480347335935e-06, - "loss": 0.186, - "step": 13740 - }, - { - "epoch": 1.2944584442194014, - "grad_norm": 0.6482627987861633, - "learning_rate": 5.7411140712388284e-06, - "loss": 0.1993, - "step": 13741 - }, - { - "epoch": 1.294552648312569, - "grad_norm": 0.687244176864624, - "learning_rate": 5.739747892250902e-06, - "loss": 0.208, - "step": 13742 - }, - { - "epoch": 1.294646852405737, - "grad_norm": 0.6568077802658081, - "learning_rate": 5.738381810403314e-06, - "loss": 0.1788, - "step": 13743 - }, - { - "epoch": 1.2947410564989048, - "grad_norm": 0.6496370434761047, - "learning_rate": 5.737015825727215e-06, - "loss": 0.1862, - "step": 13744 - }, - { - "epoch": 1.2948352605920728, - "grad_norm": 0.699585497379303, - "learning_rate": 5.735649938253743e-06, - "loss": 0.2315, - "step": 13745 - }, - { - "epoch": 1.2949294646852405, - "grad_norm": 0.6499279737472534, - "learning_rate": 5.734284148014049e-06, - "loss": 0.1862, - "step": 13746 - }, - { - "epoch": 1.2950236687784085, - "grad_norm": 0.6824267506599426, - "learning_rate": 5.7329184550392756e-06, - "loss": 0.2133, - "step": 13747 - }, - { - "epoch": 1.2951178728715762, - "grad_norm": 0.6080122590065002, - "learning_rate": 5.731552859360563e-06, - "loss": 0.2011, - "step": 13748 - }, - { - "epoch": 1.2952120769647442, - "grad_norm": 0.6081357002258301, - "learning_rate": 5.730187361009036e-06, - "loss": 0.1969, - "step": 13749 - }, - { - "epoch": 1.295306281057912, - "grad_norm": 0.6595403552055359, - "learning_rate": 5.72882196001585e-06, - "loss": 0.1994, - "step": 13750 - }, - { - "epoch": 1.2954004851510799, - "grad_norm": 0.7420370578765869, - "learning_rate": 5.727456656412129e-06, - "loss": 0.2013, - "step": 13751 - }, - { - "epoch": 1.2954946892442476, - "grad_norm": 0.6704427003860474, - "learning_rate": 5.726091450228999e-06, - "loss": 0.1823, - "step": 13752 - }, - { - "epoch": 1.2955888933374156, - "grad_norm": 0.6598203182220459, - "learning_rate": 5.724726341497594e-06, - "loss": 0.2126, - "step": 13753 - }, - { - "epoch": 1.2956830974305833, - "grad_norm": 0.6840913891792297, - "learning_rate": 5.723361330249044e-06, - "loss": 0.1949, - "step": 13754 - }, - { - "epoch": 1.2957773015237513, - "grad_norm": 0.6585990786552429, - "learning_rate": 5.721996416514466e-06, - "loss": 0.1956, - "step": 13755 - }, - { - "epoch": 1.295871505616919, - "grad_norm": 0.6387620568275452, - "learning_rate": 5.720631600324986e-06, - "loss": 0.2115, - "step": 13756 - }, - { - "epoch": 1.295965709710087, - "grad_norm": 0.6068645119667053, - "learning_rate": 5.719266881711727e-06, - "loss": 0.1771, - "step": 13757 - }, - { - "epoch": 1.2960599138032547, - "grad_norm": 0.766531765460968, - "learning_rate": 5.7179022607058e-06, - "loss": 0.2383, - "step": 13758 - }, - { - "epoch": 1.2961541178964227, - "grad_norm": 0.6244555711746216, - "learning_rate": 5.716537737338324e-06, - "loss": 0.1807, - "step": 13759 - }, - { - "epoch": 1.2962483219895904, - "grad_norm": 0.5754210948944092, - "learning_rate": 5.715173311640415e-06, - "loss": 0.176, - "step": 13760 - }, - { - "epoch": 1.2963425260827584, - "grad_norm": 0.6409897208213806, - "learning_rate": 5.713808983643179e-06, - "loss": 0.2236, - "step": 13761 - }, - { - "epoch": 1.296436730175926, - "grad_norm": 0.7322799563407898, - "learning_rate": 5.712444753377728e-06, - "loss": 0.2276, - "step": 13762 - }, - { - "epoch": 1.296530934269094, - "grad_norm": 0.648629367351532, - "learning_rate": 5.711080620875165e-06, - "loss": 0.2307, - "step": 13763 - }, - { - "epoch": 1.2966251383622618, - "grad_norm": 0.5882104635238647, - "learning_rate": 5.709716586166598e-06, - "loss": 0.1862, - "step": 13764 - }, - { - "epoch": 1.2967193424554297, - "grad_norm": 0.6721855401992798, - "learning_rate": 5.708352649283131e-06, - "loss": 0.2068, - "step": 13765 - }, - { - "epoch": 1.2968135465485975, - "grad_norm": 0.6700777411460876, - "learning_rate": 5.706988810255856e-06, - "loss": 0.1987, - "step": 13766 - }, - { - "epoch": 1.2969077506417654, - "grad_norm": 0.7214812636375427, - "learning_rate": 5.705625069115877e-06, - "loss": 0.2063, - "step": 13767 - }, - { - "epoch": 1.2970019547349332, - "grad_norm": 0.6548411846160889, - "learning_rate": 5.70426142589429e-06, - "loss": 0.2105, - "step": 13768 - }, - { - "epoch": 1.2970961588281011, - "grad_norm": 0.6924688220024109, - "learning_rate": 5.702897880622182e-06, - "loss": 0.1953, - "step": 13769 - }, - { - "epoch": 1.2971903629212689, - "grad_norm": 0.6035183668136597, - "learning_rate": 5.701534433330649e-06, - "loss": 0.1868, - "step": 13770 - }, - { - "epoch": 1.2972845670144368, - "grad_norm": 0.6803979277610779, - "learning_rate": 5.700171084050783e-06, - "loss": 0.2206, - "step": 13771 - }, - { - "epoch": 1.2973787711076046, - "grad_norm": 0.661728024482727, - "learning_rate": 5.698807832813664e-06, - "loss": 0.2133, - "step": 13772 - }, - { - "epoch": 1.2974729752007725, - "grad_norm": 0.8313462138175964, - "learning_rate": 5.697444679650372e-06, - "loss": 0.2311, - "step": 13773 - }, - { - "epoch": 1.2975671792939403, - "grad_norm": 0.61605304479599, - "learning_rate": 5.696081624592002e-06, - "loss": 0.2055, - "step": 13774 - }, - { - "epoch": 1.2976613833871082, - "grad_norm": 0.645388662815094, - "learning_rate": 5.6947186676696295e-06, - "loss": 0.1816, - "step": 13775 - }, - { - "epoch": 1.297755587480276, - "grad_norm": 0.7084368467330933, - "learning_rate": 5.693355808914325e-06, - "loss": 0.2265, - "step": 13776 - }, - { - "epoch": 1.297849791573444, - "grad_norm": 0.6451153755187988, - "learning_rate": 5.691993048357168e-06, - "loss": 0.181, - "step": 13777 - }, - { - "epoch": 1.2979439956666117, - "grad_norm": 0.7079511284828186, - "learning_rate": 5.690630386029235e-06, - "loss": 0.2225, - "step": 13778 - }, - { - "epoch": 1.2980381997597796, - "grad_norm": 0.6755489110946655, - "learning_rate": 5.689267821961591e-06, - "loss": 0.2087, - "step": 13779 - }, - { - "epoch": 1.2981324038529474, - "grad_norm": 0.7519961595535278, - "learning_rate": 5.687905356185306e-06, - "loss": 0.2136, - "step": 13780 - }, - { - "epoch": 1.2982266079461153, - "grad_norm": 0.612177312374115, - "learning_rate": 5.686542988731451e-06, - "loss": 0.2046, - "step": 13781 - }, - { - "epoch": 1.298320812039283, - "grad_norm": 0.6108668446540833, - "learning_rate": 5.685180719631085e-06, - "loss": 0.1926, - "step": 13782 - }, - { - "epoch": 1.298415016132451, - "grad_norm": 0.6176031231880188, - "learning_rate": 5.6838185489152696e-06, - "loss": 0.1759, - "step": 13783 - }, - { - "epoch": 1.2985092202256188, - "grad_norm": 0.6078556180000305, - "learning_rate": 5.6824564766150724e-06, - "loss": 0.1891, - "step": 13784 - }, - { - "epoch": 1.2986034243187867, - "grad_norm": 0.6300124526023865, - "learning_rate": 5.68109450276154e-06, - "loss": 0.1937, - "step": 13785 - }, - { - "epoch": 1.2986976284119545, - "grad_norm": 0.6361894011497498, - "learning_rate": 5.679732627385732e-06, - "loss": 0.226, - "step": 13786 - }, - { - "epoch": 1.2987918325051224, - "grad_norm": 0.7193924188613892, - "learning_rate": 5.6783708505187065e-06, - "loss": 0.2157, - "step": 13787 - }, - { - "epoch": 1.2988860365982902, - "grad_norm": 0.7525556087493896, - "learning_rate": 5.677009172191508e-06, - "loss": 0.2335, - "step": 13788 - }, - { - "epoch": 1.2989802406914581, - "grad_norm": 0.6641455888748169, - "learning_rate": 5.675647592435177e-06, - "loss": 0.1991, - "step": 13789 - }, - { - "epoch": 1.2990744447846259, - "grad_norm": 0.6445804238319397, - "learning_rate": 5.674286111280778e-06, - "loss": 0.2174, - "step": 13790 - }, - { - "epoch": 1.2991686488777938, - "grad_norm": 0.7037086486816406, - "learning_rate": 5.672924728759346e-06, - "loss": 0.2001, - "step": 13791 - }, - { - "epoch": 1.2992628529709616, - "grad_norm": 0.653767466545105, - "learning_rate": 5.671563444901917e-06, - "loss": 0.2076, - "step": 13792 - }, - { - "epoch": 1.2993570570641295, - "grad_norm": 0.753557026386261, - "learning_rate": 5.670202259739537e-06, - "loss": 0.2196, - "step": 13793 - }, - { - "epoch": 1.2994512611572973, - "grad_norm": 0.6928074955940247, - "learning_rate": 5.668841173303244e-06, - "loss": 0.2221, - "step": 13794 - }, - { - "epoch": 1.2995454652504652, - "grad_norm": 0.6821420788764954, - "learning_rate": 5.667480185624068e-06, - "loss": 0.213, - "step": 13795 - }, - { - "epoch": 1.299639669343633, - "grad_norm": 0.6530633568763733, - "learning_rate": 5.666119296733042e-06, - "loss": 0.207, - "step": 13796 - }, - { - "epoch": 1.299733873436801, - "grad_norm": 0.5936562418937683, - "learning_rate": 5.6647585066612045e-06, - "loss": 0.2037, - "step": 13797 - }, - { - "epoch": 1.2998280775299687, - "grad_norm": 1.7025306224822998, - "learning_rate": 5.663397815439573e-06, - "loss": 0.203, - "step": 13798 - }, - { - "epoch": 1.2999222816231364, - "grad_norm": 0.63300621509552, - "learning_rate": 5.662037223099177e-06, - "loss": 0.2105, - "step": 13799 - }, - { - "epoch": 1.3000164857163043, - "grad_norm": 0.5539723634719849, - "learning_rate": 5.660676729671046e-06, - "loss": 0.191, - "step": 13800 - }, - { - "epoch": 1.3001106898094723, - "grad_norm": 0.7422416806221008, - "learning_rate": 5.659316335186194e-06, - "loss": 0.2484, - "step": 13801 - }, - { - "epoch": 1.30020489390264, - "grad_norm": 0.6566030383110046, - "learning_rate": 5.657956039675645e-06, - "loss": 0.2175, - "step": 13802 - }, - { - "epoch": 1.3002990979958078, - "grad_norm": 0.6515787839889526, - "learning_rate": 5.656595843170409e-06, - "loss": 0.1981, - "step": 13803 - }, - { - "epoch": 1.3003933020889757, - "grad_norm": 1.1477673053741455, - "learning_rate": 5.655235745701506e-06, - "loss": 0.2261, - "step": 13804 - }, - { - "epoch": 1.3004875061821437, - "grad_norm": 0.7515124678611755, - "learning_rate": 5.653875747299951e-06, - "loss": 0.176, - "step": 13805 - }, - { - "epoch": 1.3005817102753114, - "grad_norm": 0.7207934856414795, - "learning_rate": 5.652515847996744e-06, - "loss": 0.2471, - "step": 13806 - }, - { - "epoch": 1.3006759143684792, - "grad_norm": 0.6418449282646179, - "learning_rate": 5.651156047822903e-06, - "loss": 0.22, - "step": 13807 - }, - { - "epoch": 1.3007701184616471, - "grad_norm": 0.5678517818450928, - "learning_rate": 5.64979634680943e-06, - "loss": 0.1719, - "step": 13808 - }, - { - "epoch": 1.300864322554815, - "grad_norm": 0.6331684589385986, - "learning_rate": 5.6484367449873265e-06, - "loss": 0.1984, - "step": 13809 - }, - { - "epoch": 1.3009585266479828, - "grad_norm": 0.6593651175498962, - "learning_rate": 5.6470772423875954e-06, - "loss": 0.1984, - "step": 13810 - }, - { - "epoch": 1.3010527307411506, - "grad_norm": 0.5979335904121399, - "learning_rate": 5.645717839041238e-06, - "loss": 0.1898, - "step": 13811 - }, - { - "epoch": 1.3011469348343185, - "grad_norm": 0.676989734172821, - "learning_rate": 5.644358534979248e-06, - "loss": 0.2151, - "step": 13812 - }, - { - "epoch": 1.3012411389274865, - "grad_norm": 0.546859085559845, - "learning_rate": 5.642999330232617e-06, - "loss": 0.1553, - "step": 13813 - }, - { - "epoch": 1.3013353430206542, - "grad_norm": 0.6827539801597595, - "learning_rate": 5.6416402248323375e-06, - "loss": 0.2003, - "step": 13814 - }, - { - "epoch": 1.301429547113822, - "grad_norm": 0.638532817363739, - "learning_rate": 5.6402812188094055e-06, - "loss": 0.2062, - "step": 13815 - }, - { - "epoch": 1.30152375120699, - "grad_norm": 0.7166432738304138, - "learning_rate": 5.6389223121948014e-06, - "loss": 0.2417, - "step": 13816 - }, - { - "epoch": 1.301617955300158, - "grad_norm": 0.6156617403030396, - "learning_rate": 5.637563505019512e-06, - "loss": 0.2008, - "step": 13817 - }, - { - "epoch": 1.3017121593933256, - "grad_norm": 0.6777030825614929, - "learning_rate": 5.636204797314526e-06, - "loss": 0.2132, - "step": 13818 - }, - { - "epoch": 1.3018063634864934, - "grad_norm": 0.6603621244430542, - "learning_rate": 5.634846189110814e-06, - "loss": 0.2028, - "step": 13819 - }, - { - "epoch": 1.3019005675796613, - "grad_norm": 0.6802692413330078, - "learning_rate": 5.633487680439362e-06, - "loss": 0.2, - "step": 13820 - }, - { - "epoch": 1.3019947716728293, - "grad_norm": 0.7136988639831543, - "learning_rate": 5.632129271331146e-06, - "loss": 0.2039, - "step": 13821 - }, - { - "epoch": 1.302088975765997, - "grad_norm": 0.6380264759063721, - "learning_rate": 5.630770961817134e-06, - "loss": 0.2167, - "step": 13822 - }, - { - "epoch": 1.3021831798591648, - "grad_norm": 0.5513056516647339, - "learning_rate": 5.629412751928301e-06, - "loss": 0.1999, - "step": 13823 - }, - { - "epoch": 1.3022773839523327, - "grad_norm": 0.6224511861801147, - "learning_rate": 5.628054641695622e-06, - "loss": 0.1798, - "step": 13824 - }, - { - "epoch": 1.3023715880455007, - "grad_norm": 0.6389832496643066, - "learning_rate": 5.626696631150053e-06, - "loss": 0.2107, - "step": 13825 - }, - { - "epoch": 1.3024657921386684, - "grad_norm": 0.6147251725196838, - "learning_rate": 5.625338720322564e-06, - "loss": 0.1967, - "step": 13826 - }, - { - "epoch": 1.3025599962318362, - "grad_norm": 0.6606024503707886, - "learning_rate": 5.6239809092441225e-06, - "loss": 0.23, - "step": 13827 - }, - { - "epoch": 1.3026542003250041, - "grad_norm": 0.6261757016181946, - "learning_rate": 5.622623197945684e-06, - "loss": 0.1968, - "step": 13828 - }, - { - "epoch": 1.302748404418172, - "grad_norm": 0.6423500180244446, - "learning_rate": 5.6212655864581976e-06, - "loss": 0.2067, - "step": 13829 - }, - { - "epoch": 1.3028426085113398, - "grad_norm": 0.6579574942588806, - "learning_rate": 5.619908074812637e-06, - "loss": 0.2011, - "step": 13830 - }, - { - "epoch": 1.3029368126045076, - "grad_norm": 0.6220484375953674, - "learning_rate": 5.618550663039945e-06, - "loss": 0.2038, - "step": 13831 - }, - { - "epoch": 1.3030310166976755, - "grad_norm": 0.6784374117851257, - "learning_rate": 5.6171933511710705e-06, - "loss": 0.2172, - "step": 13832 - }, - { - "epoch": 1.3031252207908435, - "grad_norm": 0.7078768610954285, - "learning_rate": 5.6158361392369655e-06, - "loss": 0.2181, - "step": 13833 - }, - { - "epoch": 1.3032194248840112, - "grad_norm": 0.6488476395606995, - "learning_rate": 5.61447902726858e-06, - "loss": 0.2415, - "step": 13834 - }, - { - "epoch": 1.303313628977179, - "grad_norm": 0.654567539691925, - "learning_rate": 5.6131220152968525e-06, - "loss": 0.1912, - "step": 13835 - }, - { - "epoch": 1.303407833070347, - "grad_norm": 0.6786851286888123, - "learning_rate": 5.611765103352727e-06, - "loss": 0.2128, - "step": 13836 - }, - { - "epoch": 1.3035020371635149, - "grad_norm": 0.6657833456993103, - "learning_rate": 5.6104082914671465e-06, - "loss": 0.1913, - "step": 13837 - }, - { - "epoch": 1.3035962412566826, - "grad_norm": 0.7241305112838745, - "learning_rate": 5.609051579671043e-06, - "loss": 0.2056, - "step": 13838 - }, - { - "epoch": 1.3036904453498503, - "grad_norm": 0.6167240738868713, - "learning_rate": 5.607694967995354e-06, - "loss": 0.1905, - "step": 13839 - }, - { - "epoch": 1.3037846494430183, - "grad_norm": 0.7255936861038208, - "learning_rate": 5.606338456471017e-06, - "loss": 0.2272, - "step": 13840 - }, - { - "epoch": 1.3038788535361863, - "grad_norm": 0.6236115097999573, - "learning_rate": 5.604982045128953e-06, - "loss": 0.2019, - "step": 13841 - }, - { - "epoch": 1.303973057629354, - "grad_norm": 0.6566281318664551, - "learning_rate": 5.6036257340000995e-06, - "loss": 0.2097, - "step": 13842 - }, - { - "epoch": 1.3040672617225217, - "grad_norm": 0.6480876803398132, - "learning_rate": 5.6022695231153754e-06, - "loss": 0.1963, - "step": 13843 - }, - { - "epoch": 1.3041614658156897, - "grad_norm": 0.661703884601593, - "learning_rate": 5.600913412505707e-06, - "loss": 0.1948, - "step": 13844 - }, - { - "epoch": 1.3042556699088577, - "grad_norm": 0.6555445194244385, - "learning_rate": 5.599557402202019e-06, - "loss": 0.1893, - "step": 13845 - }, - { - "epoch": 1.3043498740020254, - "grad_norm": 0.6487663984298706, - "learning_rate": 5.598201492235224e-06, - "loss": 0.1893, - "step": 13846 - }, - { - "epoch": 1.3044440780951931, - "grad_norm": 0.7296391725540161, - "learning_rate": 5.596845682636245e-06, - "loss": 0.2082, - "step": 13847 - }, - { - "epoch": 1.304538282188361, - "grad_norm": 0.8082395792007446, - "learning_rate": 5.595489973435995e-06, - "loss": 0.2096, - "step": 13848 - }, - { - "epoch": 1.304632486281529, - "grad_norm": 0.6186919212341309, - "learning_rate": 5.594134364665383e-06, - "loss": 0.2006, - "step": 13849 - }, - { - "epoch": 1.3047266903746968, - "grad_norm": 1.2049318552017212, - "learning_rate": 5.592778856355321e-06, - "loss": 0.2264, - "step": 13850 - }, - { - "epoch": 1.3048208944678645, - "grad_norm": 0.7251076102256775, - "learning_rate": 5.591423448536719e-06, - "loss": 0.2148, - "step": 13851 - }, - { - "epoch": 1.3049150985610325, - "grad_norm": 0.608109176158905, - "learning_rate": 5.590068141240483e-06, - "loss": 0.1774, - "step": 13852 - }, - { - "epoch": 1.3050093026542005, - "grad_norm": 0.6641905307769775, - "learning_rate": 5.588712934497509e-06, - "loss": 0.1819, - "step": 13853 - }, - { - "epoch": 1.3051035067473682, - "grad_norm": 0.6662217974662781, - "learning_rate": 5.587357828338702e-06, - "loss": 0.2048, - "step": 13854 - }, - { - "epoch": 1.305197710840536, - "grad_norm": 0.6918042898178101, - "learning_rate": 5.586002822794964e-06, - "loss": 0.2212, - "step": 13855 - }, - { - "epoch": 1.305291914933704, - "grad_norm": 0.6342300772666931, - "learning_rate": 5.584647917897185e-06, - "loss": 0.2008, - "step": 13856 - }, - { - "epoch": 1.3053861190268718, - "grad_norm": 0.540745735168457, - "learning_rate": 5.58329311367626e-06, - "loss": 0.1788, - "step": 13857 - }, - { - "epoch": 1.3054803231200396, - "grad_norm": 0.6319723129272461, - "learning_rate": 5.5819384101630895e-06, - "loss": 0.1926, - "step": 13858 - }, - { - "epoch": 1.3055745272132073, - "grad_norm": 0.604201078414917, - "learning_rate": 5.58058380738855e-06, - "loss": 0.1842, - "step": 13859 - }, - { - "epoch": 1.3056687313063753, - "grad_norm": 0.5605629682540894, - "learning_rate": 5.5792293053835354e-06, - "loss": 0.1952, - "step": 13860 - }, - { - "epoch": 1.3057629353995432, - "grad_norm": 0.6751049757003784, - "learning_rate": 5.577874904178932e-06, - "loss": 0.2054, - "step": 13861 - }, - { - "epoch": 1.305857139492711, - "grad_norm": 0.6867413520812988, - "learning_rate": 5.5765206038056175e-06, - "loss": 0.2158, - "step": 13862 - }, - { - "epoch": 1.3059513435858787, - "grad_norm": 0.6905479431152344, - "learning_rate": 5.5751664042944745e-06, - "loss": 0.1897, - "step": 13863 - }, - { - "epoch": 1.3060455476790467, - "grad_norm": 0.602834939956665, - "learning_rate": 5.573812305676383e-06, - "loss": 0.1759, - "step": 13864 - }, - { - "epoch": 1.3061397517722146, - "grad_norm": 0.7618058919906616, - "learning_rate": 5.572458307982216e-06, - "loss": 0.2228, - "step": 13865 - }, - { - "epoch": 1.3062339558653824, - "grad_norm": 0.6712250709533691, - "learning_rate": 5.57110441124284e-06, - "loss": 0.2045, - "step": 13866 - }, - { - "epoch": 1.3063281599585501, - "grad_norm": 0.6632776260375977, - "learning_rate": 5.56975061548914e-06, - "loss": 0.2149, - "step": 13867 - }, - { - "epoch": 1.306422364051718, - "grad_norm": 0.6257486939430237, - "learning_rate": 5.5683969207519795e-06, - "loss": 0.2044, - "step": 13868 - }, - { - "epoch": 1.3065165681448858, - "grad_norm": 0.6668272018432617, - "learning_rate": 5.567043327062212e-06, - "loss": 0.2594, - "step": 13869 - }, - { - "epoch": 1.3066107722380538, - "grad_norm": 0.6111574172973633, - "learning_rate": 5.5656898344507224e-06, - "loss": 0.1894, - "step": 13870 - }, - { - "epoch": 1.3067049763312215, - "grad_norm": 0.636658787727356, - "learning_rate": 5.564336442948362e-06, - "loss": 0.2069, - "step": 13871 - }, - { - "epoch": 1.3067991804243895, - "grad_norm": 0.6372724175453186, - "learning_rate": 5.562983152585986e-06, - "loss": 0.2114, - "step": 13872 - }, - { - "epoch": 1.3068933845175572, - "grad_norm": 0.7113171219825745, - "learning_rate": 5.561629963394457e-06, - "loss": 0.2186, - "step": 13873 - }, - { - "epoch": 1.3069875886107252, - "grad_norm": 0.6116046905517578, - "learning_rate": 5.560276875404631e-06, - "loss": 0.2081, - "step": 13874 - }, - { - "epoch": 1.307081792703893, - "grad_norm": 0.6473698616027832, - "learning_rate": 5.5589238886473565e-06, - "loss": 0.1983, - "step": 13875 - }, - { - "epoch": 1.3071759967970609, - "grad_norm": 0.723466157913208, - "learning_rate": 5.557571003153485e-06, - "loss": 0.2395, - "step": 13876 - }, - { - "epoch": 1.3072702008902286, - "grad_norm": 0.7086714506149292, - "learning_rate": 5.5562182189538684e-06, - "loss": 0.2052, - "step": 13877 - }, - { - "epoch": 1.3073644049833966, - "grad_norm": 0.6335019469261169, - "learning_rate": 5.554865536079346e-06, - "loss": 0.1835, - "step": 13878 - }, - { - "epoch": 1.3074586090765643, - "grad_norm": 0.631964921951294, - "learning_rate": 5.5535129545607625e-06, - "loss": 0.2088, - "step": 13879 - }, - { - "epoch": 1.3075528131697323, - "grad_norm": 0.6842601299285889, - "learning_rate": 5.552160474428965e-06, - "loss": 0.2314, - "step": 13880 - }, - { - "epoch": 1.3076470172629, - "grad_norm": 0.6788905262947083, - "learning_rate": 5.550808095714784e-06, - "loss": 0.1907, - "step": 13881 - }, - { - "epoch": 1.307741221356068, - "grad_norm": 0.6198939681053162, - "learning_rate": 5.549455818449061e-06, - "loss": 0.2042, - "step": 13882 - }, - { - "epoch": 1.3078354254492357, - "grad_norm": 0.6979933977127075, - "learning_rate": 5.548103642662626e-06, - "loss": 0.1756, - "step": 13883 - }, - { - "epoch": 1.3079296295424037, - "grad_norm": 0.779558539390564, - "learning_rate": 5.546751568386313e-06, - "loss": 0.2351, - "step": 13884 - }, - { - "epoch": 1.3080238336355714, - "grad_norm": 0.6771942377090454, - "learning_rate": 5.545399595650956e-06, - "loss": 0.2274, - "step": 13885 - }, - { - "epoch": 1.3081180377287394, - "grad_norm": 0.6119150519371033, - "learning_rate": 5.544047724487371e-06, - "loss": 0.1871, - "step": 13886 - }, - { - "epoch": 1.308212241821907, - "grad_norm": 0.6790033578872681, - "learning_rate": 5.542695954926391e-06, - "loss": 0.199, - "step": 13887 - }, - { - "epoch": 1.308306445915075, - "grad_norm": 0.6950446963310242, - "learning_rate": 5.54134428699884e-06, - "loss": 0.2046, - "step": 13888 - }, - { - "epoch": 1.3084006500082428, - "grad_norm": 0.6052107214927673, - "learning_rate": 5.5399927207355305e-06, - "loss": 0.2023, - "step": 13889 - }, - { - "epoch": 1.3084948541014108, - "grad_norm": 0.649046003818512, - "learning_rate": 5.538641256167285e-06, - "loss": 0.205, - "step": 13890 - }, - { - "epoch": 1.3085890581945785, - "grad_norm": 0.6405214071273804, - "learning_rate": 5.537289893324922e-06, - "loss": 0.1841, - "step": 13891 - }, - { - "epoch": 1.3086832622877465, - "grad_norm": 0.6337867379188538, - "learning_rate": 5.535938632239253e-06, - "loss": 0.179, - "step": 13892 - }, - { - "epoch": 1.3087774663809142, - "grad_norm": 0.7240697145462036, - "learning_rate": 5.53458747294108e-06, - "loss": 0.2294, - "step": 13893 - }, - { - "epoch": 1.3088716704740821, - "grad_norm": 0.7006561756134033, - "learning_rate": 5.533236415461221e-06, - "loss": 0.1989, - "step": 13894 - }, - { - "epoch": 1.3089658745672499, - "grad_norm": 0.6407173871994019, - "learning_rate": 5.531885459830481e-06, - "loss": 0.1989, - "step": 13895 - }, - { - "epoch": 1.3090600786604178, - "grad_norm": 0.5988685488700867, - "learning_rate": 5.53053460607966e-06, - "loss": 0.1932, - "step": 13896 - }, - { - "epoch": 1.3091542827535856, - "grad_norm": 0.6343111395835876, - "learning_rate": 5.529183854239563e-06, - "loss": 0.1818, - "step": 13897 - }, - { - "epoch": 1.3092484868467535, - "grad_norm": 0.6254624128341675, - "learning_rate": 5.527833204340992e-06, - "loss": 0.2141, - "step": 13898 - }, - { - "epoch": 1.3093426909399213, - "grad_norm": 0.6266101002693176, - "learning_rate": 5.526482656414737e-06, - "loss": 0.1938, - "step": 13899 - }, - { - "epoch": 1.3094368950330892, - "grad_norm": 0.6078035235404968, - "learning_rate": 5.525132210491595e-06, - "loss": 0.1739, - "step": 13900 - }, - { - "epoch": 1.309531099126257, - "grad_norm": 0.5897152423858643, - "learning_rate": 5.523781866602362e-06, - "loss": 0.1692, - "step": 13901 - }, - { - "epoch": 1.309625303219425, - "grad_norm": 0.6647896766662598, - "learning_rate": 5.522431624777822e-06, - "loss": 0.1866, - "step": 13902 - }, - { - "epoch": 1.3097195073125927, - "grad_norm": 0.7052650451660156, - "learning_rate": 5.521081485048766e-06, - "loss": 0.1921, - "step": 13903 - }, - { - "epoch": 1.3098137114057606, - "grad_norm": 0.5926150679588318, - "learning_rate": 5.519731447445983e-06, - "loss": 0.185, - "step": 13904 - }, - { - "epoch": 1.3099079154989284, - "grad_norm": 0.7053104639053345, - "learning_rate": 5.518381512000252e-06, - "loss": 0.2235, - "step": 13905 - }, - { - "epoch": 1.3100021195920963, - "grad_norm": 0.6310542821884155, - "learning_rate": 5.517031678742344e-06, - "loss": 0.187, - "step": 13906 - }, - { - "epoch": 1.310096323685264, - "grad_norm": 0.6946753859519958, - "learning_rate": 5.515681947703055e-06, - "loss": 0.2191, - "step": 13907 - }, - { - "epoch": 1.310190527778432, - "grad_norm": 0.6307712197303772, - "learning_rate": 5.514332318913154e-06, - "loss": 0.2025, - "step": 13908 - }, - { - "epoch": 1.3102847318715998, - "grad_norm": 0.667898416519165, - "learning_rate": 5.512982792403405e-06, - "loss": 0.2105, - "step": 13909 - }, - { - "epoch": 1.3103789359647677, - "grad_norm": 0.6130701899528503, - "learning_rate": 5.511633368204596e-06, - "loss": 0.1994, - "step": 13910 - }, - { - "epoch": 1.3104731400579355, - "grad_norm": 0.6175673007965088, - "learning_rate": 5.510284046347487e-06, - "loss": 0.1743, - "step": 13911 - }, - { - "epoch": 1.3105673441511034, - "grad_norm": 0.6781598925590515, - "learning_rate": 5.508934826862842e-06, - "loss": 0.2126, - "step": 13912 - }, - { - "epoch": 1.3106615482442712, - "grad_norm": 0.6490588188171387, - "learning_rate": 5.507585709781427e-06, - "loss": 0.2067, - "step": 13913 - }, - { - "epoch": 1.3107557523374391, - "grad_norm": 0.6518041491508484, - "learning_rate": 5.5062366951340105e-06, - "loss": 0.2147, - "step": 13914 - }, - { - "epoch": 1.3108499564306069, - "grad_norm": 0.6279114484786987, - "learning_rate": 5.504887782951343e-06, - "loss": 0.2067, - "step": 13915 - }, - { - "epoch": 1.3109441605237748, - "grad_norm": 0.6991190910339355, - "learning_rate": 5.503538973264185e-06, - "loss": 0.2022, - "step": 13916 - }, - { - "epoch": 1.3110383646169426, - "grad_norm": 0.6147141456604004, - "learning_rate": 5.502190266103298e-06, - "loss": 0.1905, - "step": 13917 - }, - { - "epoch": 1.3111325687101105, - "grad_norm": 0.6829583644866943, - "learning_rate": 5.5008416614994235e-06, - "loss": 0.1844, - "step": 13918 - }, - { - "epoch": 1.3112267728032783, - "grad_norm": 0.6109396815299988, - "learning_rate": 5.499493159483318e-06, - "loss": 0.2091, - "step": 13919 - }, - { - "epoch": 1.3113209768964462, - "grad_norm": 0.6405788064002991, - "learning_rate": 5.498144760085732e-06, - "loss": 0.2102, - "step": 13920 - }, - { - "epoch": 1.311415180989614, - "grad_norm": 0.702020525932312, - "learning_rate": 5.496796463337404e-06, - "loss": 0.188, - "step": 13921 - }, - { - "epoch": 1.311509385082782, - "grad_norm": 0.7264140844345093, - "learning_rate": 5.495448269269085e-06, - "loss": 0.1992, - "step": 13922 - }, - { - "epoch": 1.3116035891759497, - "grad_norm": 0.5828751921653748, - "learning_rate": 5.494100177911508e-06, - "loss": 0.1694, - "step": 13923 - }, - { - "epoch": 1.3116977932691176, - "grad_norm": 0.6680941581726074, - "learning_rate": 5.492752189295415e-06, - "loss": 0.2218, - "step": 13924 - }, - { - "epoch": 1.3117919973622854, - "grad_norm": 0.6263971328735352, - "learning_rate": 5.491404303451546e-06, - "loss": 0.2012, - "step": 13925 - }, - { - "epoch": 1.3118862014554533, - "grad_norm": 0.6877226829528809, - "learning_rate": 5.49005652041063e-06, - "loss": 0.209, - "step": 13926 - }, - { - "epoch": 1.311980405548621, - "grad_norm": 0.704127311706543, - "learning_rate": 5.488708840203398e-06, - "loss": 0.2108, - "step": 13927 - }, - { - "epoch": 1.312074609641789, - "grad_norm": 0.6672877669334412, - "learning_rate": 5.487361262860587e-06, - "loss": 0.2166, - "step": 13928 - }, - { - "epoch": 1.3121688137349568, - "grad_norm": 0.6394596099853516, - "learning_rate": 5.486013788412912e-06, - "loss": 0.1595, - "step": 13929 - }, - { - "epoch": 1.3122630178281247, - "grad_norm": 0.659265398979187, - "learning_rate": 5.484666416891109e-06, - "loss": 0.1943, - "step": 13930 - }, - { - "epoch": 1.3123572219212924, - "grad_norm": 0.6087794899940491, - "learning_rate": 5.483319148325891e-06, - "loss": 0.1968, - "step": 13931 - }, - { - "epoch": 1.3124514260144604, - "grad_norm": 0.7124664783477783, - "learning_rate": 5.481971982747985e-06, - "loss": 0.2051, - "step": 13932 - }, - { - "epoch": 1.3125456301076281, - "grad_norm": 0.5750867128372192, - "learning_rate": 5.4806249201881e-06, - "loss": 0.1912, - "step": 13933 - }, - { - "epoch": 1.312639834200796, - "grad_norm": 0.6283868551254272, - "learning_rate": 5.479277960676959e-06, - "loss": 0.2112, - "step": 13934 - }, - { - "epoch": 1.3127340382939638, - "grad_norm": 0.6514611840248108, - "learning_rate": 5.4779311042452735e-06, - "loss": 0.1917, - "step": 13935 - }, - { - "epoch": 1.3128282423871318, - "grad_norm": 0.6681877374649048, - "learning_rate": 5.476584350923749e-06, - "loss": 0.2039, - "step": 13936 - }, - { - "epoch": 1.3129224464802995, - "grad_norm": 0.6883458495140076, - "learning_rate": 5.4752377007430966e-06, - "loss": 0.2169, - "step": 13937 - }, - { - "epoch": 1.3130166505734673, - "grad_norm": 0.610941469669342, - "learning_rate": 5.4738911537340275e-06, - "loss": 0.1983, - "step": 13938 - }, - { - "epoch": 1.3131108546666352, - "grad_norm": 0.6219098567962646, - "learning_rate": 5.472544709927234e-06, - "loss": 0.1824, - "step": 13939 - }, - { - "epoch": 1.3132050587598032, - "grad_norm": 0.7274148464202881, - "learning_rate": 5.471198369353425e-06, - "loss": 0.2126, - "step": 13940 - }, - { - "epoch": 1.313299262852971, - "grad_norm": 0.7503207921981812, - "learning_rate": 5.469852132043301e-06, - "loss": 0.2256, - "step": 13941 - }, - { - "epoch": 1.3133934669461387, - "grad_norm": 0.6928357481956482, - "learning_rate": 5.468505998027549e-06, - "loss": 0.206, - "step": 13942 - }, - { - "epoch": 1.3134876710393066, - "grad_norm": 0.6850817799568176, - "learning_rate": 5.467159967336868e-06, - "loss": 0.2092, - "step": 13943 - }, - { - "epoch": 1.3135818751324746, - "grad_norm": 0.6698310971260071, - "learning_rate": 5.465814040001955e-06, - "loss": 0.2052, - "step": 13944 - }, - { - "epoch": 1.3136760792256423, - "grad_norm": 0.6120530962944031, - "learning_rate": 5.464468216053493e-06, - "loss": 0.1855, - "step": 13945 - }, - { - "epoch": 1.31377028331881, - "grad_norm": 0.6629364490509033, - "learning_rate": 5.463122495522164e-06, - "loss": 0.2162, - "step": 13946 - }, - { - "epoch": 1.313864487411978, - "grad_norm": 0.6264070272445679, - "learning_rate": 5.461776878438665e-06, - "loss": 0.1888, - "step": 13947 - }, - { - "epoch": 1.313958691505146, - "grad_norm": 0.5900087952613831, - "learning_rate": 5.460431364833673e-06, - "loss": 0.1791, - "step": 13948 - }, - { - "epoch": 1.3140528955983137, - "grad_norm": 0.7165248394012451, - "learning_rate": 5.459085954737857e-06, - "loss": 0.1723, - "step": 13949 - }, - { - "epoch": 1.3141470996914815, - "grad_norm": 0.6292446851730347, - "learning_rate": 5.4577406481819125e-06, - "loss": 0.2123, - "step": 13950 - }, - { - "epoch": 1.3142413037846494, - "grad_norm": 0.6633747220039368, - "learning_rate": 5.456395445196506e-06, - "loss": 0.2055, - "step": 13951 - }, - { - "epoch": 1.3143355078778174, - "grad_norm": 0.6716272234916687, - "learning_rate": 5.455050345812306e-06, - "loss": 0.2315, - "step": 13952 - }, - { - "epoch": 1.3144297119709851, - "grad_norm": 0.693078875541687, - "learning_rate": 5.453705350059988e-06, - "loss": 0.2241, - "step": 13953 - }, - { - "epoch": 1.3145239160641529, - "grad_norm": 0.8464998602867126, - "learning_rate": 5.452360457970222e-06, - "loss": 0.2064, - "step": 13954 - }, - { - "epoch": 1.3146181201573208, - "grad_norm": 0.6772229075431824, - "learning_rate": 5.451015669573666e-06, - "loss": 0.2122, - "step": 13955 - }, - { - "epoch": 1.3147123242504888, - "grad_norm": 0.6633017063140869, - "learning_rate": 5.449670984900988e-06, - "loss": 0.2097, - "step": 13956 - }, - { - "epoch": 1.3148065283436565, - "grad_norm": 0.7297033071517944, - "learning_rate": 5.448326403982854e-06, - "loss": 0.2433, - "step": 13957 - }, - { - "epoch": 1.3149007324368243, - "grad_norm": 0.5902170538902283, - "learning_rate": 5.446981926849912e-06, - "loss": 0.1731, - "step": 13958 - }, - { - "epoch": 1.3149949365299922, - "grad_norm": 0.6542876362800598, - "learning_rate": 5.445637553532825e-06, - "loss": 0.1998, - "step": 13959 - }, - { - "epoch": 1.3150891406231602, - "grad_norm": 0.7086982727050781, - "learning_rate": 5.444293284062248e-06, - "loss": 0.215, - "step": 13960 - }, - { - "epoch": 1.315183344716328, - "grad_norm": 0.7683100700378418, - "learning_rate": 5.442949118468826e-06, - "loss": 0.1876, - "step": 13961 - }, - { - "epoch": 1.3152775488094957, - "grad_norm": 0.7870001792907715, - "learning_rate": 5.441605056783216e-06, - "loss": 0.2091, - "step": 13962 - }, - { - "epoch": 1.3153717529026636, - "grad_norm": 0.6686782240867615, - "learning_rate": 5.440261099036056e-06, - "loss": 0.1932, - "step": 13963 - }, - { - "epoch": 1.3154659569958316, - "grad_norm": 0.7002503275871277, - "learning_rate": 5.4389172452579956e-06, - "loss": 0.2336, - "step": 13964 - }, - { - "epoch": 1.3155601610889993, - "grad_norm": 0.743320643901825, - "learning_rate": 5.437573495479678e-06, - "loss": 0.2307, - "step": 13965 - }, - { - "epoch": 1.315654365182167, - "grad_norm": 0.6570819020271301, - "learning_rate": 5.436229849731738e-06, - "loss": 0.1951, - "step": 13966 - }, - { - "epoch": 1.315748569275335, - "grad_norm": 0.6411550641059875, - "learning_rate": 5.434886308044816e-06, - "loss": 0.1872, - "step": 13967 - }, - { - "epoch": 1.315842773368503, - "grad_norm": 0.6603879928588867, - "learning_rate": 5.433542870449549e-06, - "loss": 0.2077, - "step": 13968 - }, - { - "epoch": 1.3159369774616707, - "grad_norm": 0.6315999031066895, - "learning_rate": 5.432199536976563e-06, - "loss": 0.2121, - "step": 13969 - }, - { - "epoch": 1.3160311815548384, - "grad_norm": 0.6271128058433533, - "learning_rate": 5.430856307656495e-06, - "loss": 0.2052, - "step": 13970 - }, - { - "epoch": 1.3161253856480064, - "grad_norm": 0.6487036347389221, - "learning_rate": 5.429513182519967e-06, - "loss": 0.195, - "step": 13971 - }, - { - "epoch": 1.3162195897411744, - "grad_norm": 0.6588313579559326, - "learning_rate": 5.42817016159761e-06, - "loss": 0.1976, - "step": 13972 - }, - { - "epoch": 1.316313793834342, - "grad_norm": 0.6937627196311951, - "learning_rate": 5.426827244920041e-06, - "loss": 0.2223, - "step": 13973 - }, - { - "epoch": 1.3164079979275098, - "grad_norm": 0.6528636813163757, - "learning_rate": 5.425484432517882e-06, - "loss": 0.2179, - "step": 13974 - }, - { - "epoch": 1.3165022020206778, - "grad_norm": 0.6710507273674011, - "learning_rate": 5.424141724421757e-06, - "loss": 0.1884, - "step": 13975 - }, - { - "epoch": 1.3165964061138458, - "grad_norm": 0.65396648645401, - "learning_rate": 5.422799120662273e-06, - "loss": 0.1754, - "step": 13976 - }, - { - "epoch": 1.3166906102070135, - "grad_norm": 0.6715162396430969, - "learning_rate": 5.421456621270048e-06, - "loss": 0.1903, - "step": 13977 - }, - { - "epoch": 1.3167848143001812, - "grad_norm": 0.6242902278900146, - "learning_rate": 5.420114226275697e-06, - "loss": 0.2103, - "step": 13978 - }, - { - "epoch": 1.3168790183933492, - "grad_norm": 0.68259197473526, - "learning_rate": 5.418771935709821e-06, - "loss": 0.2247, - "step": 13979 - }, - { - "epoch": 1.3169732224865172, - "grad_norm": 0.63609379529953, - "learning_rate": 5.417429749603029e-06, - "loss": 0.2048, - "step": 13980 - }, - { - "epoch": 1.317067426579685, - "grad_norm": 0.6944010853767395, - "learning_rate": 5.416087667985931e-06, - "loss": 0.2493, - "step": 13981 - }, - { - "epoch": 1.3171616306728526, - "grad_norm": 0.7394487261772156, - "learning_rate": 5.414745690889122e-06, - "loss": 0.215, - "step": 13982 - }, - { - "epoch": 1.3172558347660206, - "grad_norm": 0.6646633148193359, - "learning_rate": 5.413403818343195e-06, - "loss": 0.2234, - "step": 13983 - }, - { - "epoch": 1.3173500388591886, - "grad_norm": 0.5564711093902588, - "learning_rate": 5.412062050378764e-06, - "loss": 0.1872, - "step": 13984 - }, - { - "epoch": 1.3174442429523563, - "grad_norm": 0.691422164440155, - "learning_rate": 5.410720387026411e-06, - "loss": 0.2264, - "step": 13985 - }, - { - "epoch": 1.317538447045524, - "grad_norm": 0.7079923152923584, - "learning_rate": 5.4093788283167235e-06, - "loss": 0.2186, - "step": 13986 - }, - { - "epoch": 1.317632651138692, - "grad_norm": 0.7273160815238953, - "learning_rate": 5.408037374280306e-06, - "loss": 0.1958, - "step": 13987 - }, - { - "epoch": 1.31772685523186, - "grad_norm": 0.7292044758796692, - "learning_rate": 5.406696024947739e-06, - "loss": 0.2151, - "step": 13988 - }, - { - "epoch": 1.3178210593250277, - "grad_norm": 0.7436671853065491, - "learning_rate": 5.4053547803495966e-06, - "loss": 0.2196, - "step": 13989 - }, - { - "epoch": 1.3179152634181954, - "grad_norm": 0.6820604801177979, - "learning_rate": 5.4040136405164815e-06, - "loss": 0.2045, - "step": 13990 - }, - { - "epoch": 1.3180094675113634, - "grad_norm": 0.6173348426818848, - "learning_rate": 5.40267260547896e-06, - "loss": 0.1934, - "step": 13991 - }, - { - "epoch": 1.3181036716045313, - "grad_norm": 0.5947291851043701, - "learning_rate": 5.401331675267612e-06, - "loss": 0.1781, - "step": 13992 - }, - { - "epoch": 1.318197875697699, - "grad_norm": 0.6224139332771301, - "learning_rate": 5.399990849913013e-06, - "loss": 0.1892, - "step": 13993 - }, - { - "epoch": 1.3182920797908668, - "grad_norm": 0.6537177562713623, - "learning_rate": 5.3986501294457396e-06, - "loss": 0.1859, - "step": 13994 - }, - { - "epoch": 1.3183862838840348, - "grad_norm": 0.6509084105491638, - "learning_rate": 5.397309513896356e-06, - "loss": 0.1821, - "step": 13995 - }, - { - "epoch": 1.3184804879772027, - "grad_norm": 0.575751543045044, - "learning_rate": 5.395969003295434e-06, - "loss": 0.1887, - "step": 13996 - }, - { - "epoch": 1.3185746920703705, - "grad_norm": 1.0063947439193726, - "learning_rate": 5.394628597673541e-06, - "loss": 0.2199, - "step": 13997 - }, - { - "epoch": 1.3186688961635382, - "grad_norm": 0.7250304818153381, - "learning_rate": 5.393288297061237e-06, - "loss": 0.2445, - "step": 13998 - }, - { - "epoch": 1.3187631002567062, - "grad_norm": 0.6793080568313599, - "learning_rate": 5.391948101489083e-06, - "loss": 0.2043, - "step": 13999 - }, - { - "epoch": 1.3188573043498741, - "grad_norm": 0.6111436486244202, - "learning_rate": 5.3906080109876435e-06, - "loss": 0.2122, - "step": 14000 - }, - { - "epoch": 1.3189515084430419, - "grad_norm": 0.6745696663856506, - "learning_rate": 5.389268025587465e-06, - "loss": 0.2288, - "step": 14001 - }, - { - "epoch": 1.3190457125362096, - "grad_norm": 0.6720831990242004, - "learning_rate": 5.38792814531911e-06, - "loss": 0.1859, - "step": 14002 - }, - { - "epoch": 1.3191399166293776, - "grad_norm": 0.6277100443840027, - "learning_rate": 5.386588370213124e-06, - "loss": 0.1928, - "step": 14003 - }, - { - "epoch": 1.3192341207225453, - "grad_norm": 0.6584670543670654, - "learning_rate": 5.385248700300055e-06, - "loss": 0.2018, - "step": 14004 - }, - { - "epoch": 1.3193283248157133, - "grad_norm": 0.5974646210670471, - "learning_rate": 5.383909135610459e-06, - "loss": 0.1668, - "step": 14005 - }, - { - "epoch": 1.319422528908881, - "grad_norm": 0.7366122603416443, - "learning_rate": 5.382569676174868e-06, - "loss": 0.211, - "step": 14006 - }, - { - "epoch": 1.319516733002049, - "grad_norm": 0.6093755960464478, - "learning_rate": 5.3812303220238295e-06, - "loss": 0.1678, - "step": 14007 - }, - { - "epoch": 1.3196109370952167, - "grad_norm": 0.6341589093208313, - "learning_rate": 5.379891073187888e-06, - "loss": 0.1884, - "step": 14008 - }, - { - "epoch": 1.3197051411883847, - "grad_norm": 0.6902104616165161, - "learning_rate": 5.378551929697571e-06, - "loss": 0.2128, - "step": 14009 - }, - { - "epoch": 1.3197993452815524, - "grad_norm": 0.6470848321914673, - "learning_rate": 5.377212891583419e-06, - "loss": 0.1853, - "step": 14010 - }, - { - "epoch": 1.3198935493747204, - "grad_norm": 0.6759555339813232, - "learning_rate": 5.375873958875961e-06, - "loss": 0.2055, - "step": 14011 - }, - { - "epoch": 1.319987753467888, - "grad_norm": 0.6148934960365295, - "learning_rate": 5.374535131605731e-06, - "loss": 0.2123, - "step": 14012 - }, - { - "epoch": 1.320081957561056, - "grad_norm": 0.6794771552085876, - "learning_rate": 5.3731964098032495e-06, - "loss": 0.2107, - "step": 14013 - }, - { - "epoch": 1.3201761616542238, - "grad_norm": 0.6981692314147949, - "learning_rate": 5.3718577934990446e-06, - "loss": 0.1738, - "step": 14014 - }, - { - "epoch": 1.3202703657473918, - "grad_norm": 0.6872310042381287, - "learning_rate": 5.370519282723644e-06, - "loss": 0.1796, - "step": 14015 - }, - { - "epoch": 1.3203645698405595, - "grad_norm": 0.6473656296730042, - "learning_rate": 5.36918087750756e-06, - "loss": 0.2132, - "step": 14016 - }, - { - "epoch": 1.3204587739337275, - "grad_norm": 0.6503841876983643, - "learning_rate": 5.367842577881313e-06, - "loss": 0.2144, - "step": 14017 - }, - { - "epoch": 1.3205529780268952, - "grad_norm": 0.666084349155426, - "learning_rate": 5.3665043838754235e-06, - "loss": 0.2195, - "step": 14018 - }, - { - "epoch": 1.3206471821200632, - "grad_norm": 0.7067003846168518, - "learning_rate": 5.365166295520395e-06, - "loss": 0.2094, - "step": 14019 - }, - { - "epoch": 1.320741386213231, - "grad_norm": 0.6496502161026001, - "learning_rate": 5.363828312846744e-06, - "loss": 0.2137, - "step": 14020 - }, - { - "epoch": 1.3208355903063989, - "grad_norm": 0.6001673340797424, - "learning_rate": 5.362490435884982e-06, - "loss": 0.2141, - "step": 14021 - }, - { - "epoch": 1.3209297943995666, - "grad_norm": 0.6753737926483154, - "learning_rate": 5.361152664665608e-06, - "loss": 0.2276, - "step": 14022 - }, - { - "epoch": 1.3210239984927346, - "grad_norm": 0.595683217048645, - "learning_rate": 5.3598149992191204e-06, - "loss": 0.1954, - "step": 14023 - }, - { - "epoch": 1.3211182025859023, - "grad_norm": 0.6467253565788269, - "learning_rate": 5.358477439576035e-06, - "loss": 0.2154, - "step": 14024 - }, - { - "epoch": 1.3212124066790703, - "grad_norm": 0.7814813852310181, - "learning_rate": 5.357139985766843e-06, - "loss": 0.1985, - "step": 14025 - }, - { - "epoch": 1.321306610772238, - "grad_norm": 0.6634308695793152, - "learning_rate": 5.355802637822031e-06, - "loss": 0.1991, - "step": 14026 - }, - { - "epoch": 1.321400814865406, - "grad_norm": 0.6398400068283081, - "learning_rate": 5.354465395772108e-06, - "loss": 0.1898, - "step": 14027 - }, - { - "epoch": 1.3214950189585737, - "grad_norm": 0.6499345302581787, - "learning_rate": 5.35312825964756e-06, - "loss": 0.2161, - "step": 14028 - }, - { - "epoch": 1.3215892230517416, - "grad_norm": 0.6460532546043396, - "learning_rate": 5.351791229478866e-06, - "loss": 0.2072, - "step": 14029 - }, - { - "epoch": 1.3216834271449094, - "grad_norm": 0.6906152963638306, - "learning_rate": 5.350454305296529e-06, - "loss": 0.2221, - "step": 14030 - }, - { - "epoch": 1.3217776312380773, - "grad_norm": 0.7326046228408813, - "learning_rate": 5.349117487131024e-06, - "loss": 0.2123, - "step": 14031 - }, - { - "epoch": 1.321871835331245, - "grad_norm": 0.6765629053115845, - "learning_rate": 5.347780775012828e-06, - "loss": 0.2138, - "step": 14032 - }, - { - "epoch": 1.321966039424413, - "grad_norm": 0.6705592274665833, - "learning_rate": 5.346444168972426e-06, - "loss": 0.2313, - "step": 14033 - }, - { - "epoch": 1.3220602435175808, - "grad_norm": 0.6718334555625916, - "learning_rate": 5.345107669040298e-06, - "loss": 0.2331, - "step": 14034 - }, - { - "epoch": 1.3221544476107487, - "grad_norm": 0.6745386123657227, - "learning_rate": 5.34377127524691e-06, - "loss": 0.1979, - "step": 14035 - }, - { - "epoch": 1.3222486517039165, - "grad_norm": 0.6806681156158447, - "learning_rate": 5.342434987622738e-06, - "loss": 0.2322, - "step": 14036 - }, - { - "epoch": 1.3223428557970844, - "grad_norm": 0.6321537494659424, - "learning_rate": 5.3410988061982545e-06, - "loss": 0.1948, - "step": 14037 - }, - { - "epoch": 1.3224370598902522, - "grad_norm": 0.6890372633934021, - "learning_rate": 5.3397627310039215e-06, - "loss": 0.2133, - "step": 14038 - }, - { - "epoch": 1.3225312639834201, - "grad_norm": 0.5901936292648315, - "learning_rate": 5.338426762070204e-06, - "loss": 0.1788, - "step": 14039 - }, - { - "epoch": 1.3226254680765879, - "grad_norm": 0.7906286120414734, - "learning_rate": 5.337090899427568e-06, - "loss": 0.207, - "step": 14040 - }, - { - "epoch": 1.3227196721697558, - "grad_norm": 0.7934432625770569, - "learning_rate": 5.335755143106469e-06, - "loss": 0.2294, - "step": 14041 - }, - { - "epoch": 1.3228138762629236, - "grad_norm": 0.6328506469726562, - "learning_rate": 5.33441949313737e-06, - "loss": 0.205, - "step": 14042 - }, - { - "epoch": 1.3229080803560915, - "grad_norm": 0.5785630941390991, - "learning_rate": 5.333083949550717e-06, - "loss": 0.1797, - "step": 14043 - }, - { - "epoch": 1.3230022844492593, - "grad_norm": 0.6074365973472595, - "learning_rate": 5.3317485123769685e-06, - "loss": 0.1917, - "step": 14044 - }, - { - "epoch": 1.3230964885424272, - "grad_norm": 0.6671708822250366, - "learning_rate": 5.330413181646577e-06, - "loss": 0.2148, - "step": 14045 - }, - { - "epoch": 1.323190692635595, - "grad_norm": 0.6195216774940491, - "learning_rate": 5.329077957389982e-06, - "loss": 0.1682, - "step": 14046 - }, - { - "epoch": 1.323284896728763, - "grad_norm": 0.7096898555755615, - "learning_rate": 5.327742839637637e-06, - "loss": 0.19, - "step": 14047 - }, - { - "epoch": 1.3233791008219307, - "grad_norm": 0.9569317102432251, - "learning_rate": 5.32640782841998e-06, - "loss": 0.2011, - "step": 14048 - }, - { - "epoch": 1.3234733049150986, - "grad_norm": 0.6340876817703247, - "learning_rate": 5.325072923767449e-06, - "loss": 0.1843, - "step": 14049 - }, - { - "epoch": 1.3235675090082664, - "grad_norm": 0.6779504418373108, - "learning_rate": 5.323738125710492e-06, - "loss": 0.2316, - "step": 14050 - }, - { - "epoch": 1.3236617131014343, - "grad_norm": 0.6356896162033081, - "learning_rate": 5.322403434279531e-06, - "loss": 0.2039, - "step": 14051 - }, - { - "epoch": 1.323755917194602, - "grad_norm": 0.5953193306922913, - "learning_rate": 5.321068849505011e-06, - "loss": 0.2044, - "step": 14052 - }, - { - "epoch": 1.32385012128777, - "grad_norm": 0.6592140793800354, - "learning_rate": 5.319734371417353e-06, - "loss": 0.1971, - "step": 14053 - }, - { - "epoch": 1.3239443253809378, - "grad_norm": 0.6701865196228027, - "learning_rate": 5.318400000046991e-06, - "loss": 0.189, - "step": 14054 - }, - { - "epoch": 1.3240385294741057, - "grad_norm": 0.6589224934577942, - "learning_rate": 5.317065735424352e-06, - "loss": 0.2135, - "step": 14055 - }, - { - "epoch": 1.3241327335672735, - "grad_norm": 0.7157045006752014, - "learning_rate": 5.315731577579853e-06, - "loss": 0.2019, - "step": 14056 - }, - { - "epoch": 1.3242269376604414, - "grad_norm": 0.6550059914588928, - "learning_rate": 5.314397526543917e-06, - "loss": 0.1998, - "step": 14057 - }, - { - "epoch": 1.3243211417536092, - "grad_norm": 0.6386902332305908, - "learning_rate": 5.313063582346969e-06, - "loss": 0.2047, - "step": 14058 - }, - { - "epoch": 1.3244153458467771, - "grad_norm": 0.6854854822158813, - "learning_rate": 5.311729745019416e-06, - "loss": 0.2276, - "step": 14059 - }, - { - "epoch": 1.3245095499399449, - "grad_norm": 0.6264967918395996, - "learning_rate": 5.310396014591675e-06, - "loss": 0.1905, - "step": 14060 - }, - { - "epoch": 1.3246037540331128, - "grad_norm": 0.674786388874054, - "learning_rate": 5.309062391094161e-06, - "loss": 0.2458, - "step": 14061 - }, - { - "epoch": 1.3246979581262806, - "grad_norm": 0.6399668455123901, - "learning_rate": 5.3077288745572794e-06, - "loss": 0.1985, - "step": 14062 - }, - { - "epoch": 1.3247921622194485, - "grad_norm": 0.6307435631752014, - "learning_rate": 5.306395465011429e-06, - "loss": 0.2084, - "step": 14063 - }, - { - "epoch": 1.3248863663126162, - "grad_norm": 0.686042845249176, - "learning_rate": 5.305062162487028e-06, - "loss": 0.1987, - "step": 14064 - }, - { - "epoch": 1.3249805704057842, - "grad_norm": 0.6557334065437317, - "learning_rate": 5.30372896701447e-06, - "loss": 0.2039, - "step": 14065 - }, - { - "epoch": 1.325074774498952, - "grad_norm": 0.7092450857162476, - "learning_rate": 5.302395878624148e-06, - "loss": 0.1889, - "step": 14066 - }, - { - "epoch": 1.32516897859212, - "grad_norm": 0.7731506824493408, - "learning_rate": 5.3010628973464716e-06, - "loss": 0.222, - "step": 14067 - }, - { - "epoch": 1.3252631826852876, - "grad_norm": 0.6983768343925476, - "learning_rate": 5.299730023211829e-06, - "loss": 0.2242, - "step": 14068 - }, - { - "epoch": 1.3253573867784556, - "grad_norm": 0.6925291419029236, - "learning_rate": 5.2983972562506025e-06, - "loss": 0.1994, - "step": 14069 - }, - { - "epoch": 1.3254515908716233, - "grad_norm": 0.6160935163497925, - "learning_rate": 5.297064596493198e-06, - "loss": 0.1922, - "step": 14070 - }, - { - "epoch": 1.3255457949647913, - "grad_norm": 0.6398634314537048, - "learning_rate": 5.2957320439699944e-06, - "loss": 0.2239, - "step": 14071 - }, - { - "epoch": 1.325639999057959, - "grad_norm": 0.5992037057876587, - "learning_rate": 5.2943995987113706e-06, - "loss": 0.1808, - "step": 14072 - }, - { - "epoch": 1.3257342031511268, - "grad_norm": 0.6360060572624207, - "learning_rate": 5.293067260747714e-06, - "loss": 0.2062, - "step": 14073 - }, - { - "epoch": 1.3258284072442947, - "grad_norm": 0.6519641280174255, - "learning_rate": 5.291735030109407e-06, - "loss": 0.2274, - "step": 14074 - }, - { - "epoch": 1.3259226113374627, - "grad_norm": 0.6662633419036865, - "learning_rate": 5.290402906826819e-06, - "loss": 0.2107, - "step": 14075 - }, - { - "epoch": 1.3260168154306304, - "grad_norm": 0.6444947123527527, - "learning_rate": 5.289070890930328e-06, - "loss": 0.2291, - "step": 14076 - }, - { - "epoch": 1.3261110195237982, - "grad_norm": 0.6289883255958557, - "learning_rate": 5.28773898245031e-06, - "loss": 0.183, - "step": 14077 - }, - { - "epoch": 1.3262052236169661, - "grad_norm": 0.6775181293487549, - "learning_rate": 5.286407181417128e-06, - "loss": 0.1887, - "step": 14078 - }, - { - "epoch": 1.326299427710134, - "grad_norm": 0.7216343283653259, - "learning_rate": 5.285075487861151e-06, - "loss": 0.1822, - "step": 14079 - }, - { - "epoch": 1.3263936318033018, - "grad_norm": 0.5782265663146973, - "learning_rate": 5.283743901812748e-06, - "loss": 0.1906, - "step": 14080 - }, - { - "epoch": 1.3264878358964696, - "grad_norm": 0.6510884761810303, - "learning_rate": 5.282412423302274e-06, - "loss": 0.2207, - "step": 14081 - }, - { - "epoch": 1.3265820399896375, - "grad_norm": 0.625861406326294, - "learning_rate": 5.281081052360098e-06, - "loss": 0.1882, - "step": 14082 - }, - { - "epoch": 1.3266762440828055, - "grad_norm": 0.602749764919281, - "learning_rate": 5.279749789016567e-06, - "loss": 0.1887, - "step": 14083 - }, - { - "epoch": 1.3267704481759732, - "grad_norm": 0.6150605082511902, - "learning_rate": 5.278418633302041e-06, - "loss": 0.1932, - "step": 14084 - }, - { - "epoch": 1.326864652269141, - "grad_norm": 0.7779563665390015, - "learning_rate": 5.277087585246876e-06, - "loss": 0.2358, - "step": 14085 - }, - { - "epoch": 1.326958856362309, - "grad_norm": 0.7492592930793762, - "learning_rate": 5.275756644881416e-06, - "loss": 0.2374, - "step": 14086 - }, - { - "epoch": 1.3270530604554769, - "grad_norm": 0.6166776418685913, - "learning_rate": 5.274425812236013e-06, - "loss": 0.196, - "step": 14087 - }, - { - "epoch": 1.3271472645486446, - "grad_norm": 0.7160036563873291, - "learning_rate": 5.2730950873410045e-06, - "loss": 0.2326, - "step": 14088 - }, - { - "epoch": 1.3272414686418124, - "grad_norm": 0.6125269532203674, - "learning_rate": 5.27176447022674e-06, - "loss": 0.2014, - "step": 14089 - }, - { - "epoch": 1.3273356727349803, - "grad_norm": 0.6119320392608643, - "learning_rate": 5.270433960923562e-06, - "loss": 0.2027, - "step": 14090 - }, - { - "epoch": 1.3274298768281483, - "grad_norm": 0.6584473252296448, - "learning_rate": 5.269103559461799e-06, - "loss": 0.2095, - "step": 14091 - }, - { - "epoch": 1.327524080921316, - "grad_norm": 0.5868294835090637, - "learning_rate": 5.2677732658717965e-06, - "loss": 0.1906, - "step": 14092 - }, - { - "epoch": 1.3276182850144838, - "grad_norm": 0.6940475106239319, - "learning_rate": 5.266443080183879e-06, - "loss": 0.1973, - "step": 14093 - }, - { - "epoch": 1.3277124891076517, - "grad_norm": 0.7088175415992737, - "learning_rate": 5.265113002428377e-06, - "loss": 0.2628, - "step": 14094 - }, - { - "epoch": 1.3278066932008197, - "grad_norm": 0.5621417760848999, - "learning_rate": 5.263783032635627e-06, - "loss": 0.1865, - "step": 14095 - }, - { - "epoch": 1.3279008972939874, - "grad_norm": 0.6609739661216736, - "learning_rate": 5.262453170835944e-06, - "loss": 0.2021, - "step": 14096 - }, - { - "epoch": 1.3279951013871552, - "grad_norm": 0.7245957851409912, - "learning_rate": 5.261123417059656e-06, - "loss": 0.1936, - "step": 14097 - }, - { - "epoch": 1.3280893054803231, - "grad_norm": 1.0036548376083374, - "learning_rate": 5.2597937713370865e-06, - "loss": 0.1882, - "step": 14098 - }, - { - "epoch": 1.328183509573491, - "grad_norm": 0.6218024492263794, - "learning_rate": 5.258464233698546e-06, - "loss": 0.1988, - "step": 14099 - }, - { - "epoch": 1.3282777136666588, - "grad_norm": 0.6181890964508057, - "learning_rate": 5.257134804174353e-06, - "loss": 0.18, - "step": 14100 - }, - { - "epoch": 1.3283719177598265, - "grad_norm": 0.6878771781921387, - "learning_rate": 5.255805482794827e-06, - "loss": 0.1988, - "step": 14101 - }, - { - "epoch": 1.3284661218529945, - "grad_norm": 0.659048855304718, - "learning_rate": 5.2544762695902715e-06, - "loss": 0.1829, - "step": 14102 - }, - { - "epoch": 1.3285603259461625, - "grad_norm": 0.6403631567955017, - "learning_rate": 5.25314716459099e-06, - "loss": 0.2128, - "step": 14103 - }, - { - "epoch": 1.3286545300393302, - "grad_norm": 0.6543837785720825, - "learning_rate": 5.2518181678273e-06, - "loss": 0.1944, - "step": 14104 - }, - { - "epoch": 1.328748734132498, - "grad_norm": 0.605215847492218, - "learning_rate": 5.250489279329501e-06, - "loss": 0.2278, - "step": 14105 - }, - { - "epoch": 1.328842938225666, - "grad_norm": 0.6835913062095642, - "learning_rate": 5.249160499127883e-06, - "loss": 0.1957, - "step": 14106 - }, - { - "epoch": 1.3289371423188339, - "grad_norm": 0.6238964796066284, - "learning_rate": 5.247831827252761e-06, - "loss": 0.1934, - "step": 14107 - }, - { - "epoch": 1.3290313464120016, - "grad_norm": 0.639494001865387, - "learning_rate": 5.246503263734422e-06, - "loss": 0.2141, - "step": 14108 - }, - { - "epoch": 1.3291255505051693, - "grad_norm": 0.6721289753913879, - "learning_rate": 5.245174808603153e-06, - "loss": 0.1906, - "step": 14109 - }, - { - "epoch": 1.3292197545983373, - "grad_norm": 0.6276894211769104, - "learning_rate": 5.24384646188926e-06, - "loss": 0.1941, - "step": 14110 - }, - { - "epoch": 1.3293139586915053, - "grad_norm": 0.6959485411643982, - "learning_rate": 5.242518223623022e-06, - "loss": 0.1932, - "step": 14111 - }, - { - "epoch": 1.329408162784673, - "grad_norm": 0.677562415599823, - "learning_rate": 5.2411900938347224e-06, - "loss": 0.1928, - "step": 14112 - }, - { - "epoch": 1.3295023668778407, - "grad_norm": 0.6668716669082642, - "learning_rate": 5.239862072554649e-06, - "loss": 0.1907, - "step": 14113 - }, - { - "epoch": 1.3295965709710087, - "grad_norm": 0.6348514556884766, - "learning_rate": 5.238534159813085e-06, - "loss": 0.2039, - "step": 14114 - }, - { - "epoch": 1.3296907750641767, - "grad_norm": 0.6691227555274963, - "learning_rate": 5.237206355640303e-06, - "loss": 0.2223, - "step": 14115 - }, - { - "epoch": 1.3297849791573444, - "grad_norm": 0.6502666473388672, - "learning_rate": 5.235878660066582e-06, - "loss": 0.2171, - "step": 14116 - }, - { - "epoch": 1.3298791832505121, - "grad_norm": 0.5818041563034058, - "learning_rate": 5.234551073122199e-06, - "loss": 0.2066, - "step": 14117 - }, - { - "epoch": 1.32997338734368, - "grad_norm": 0.7705210447311401, - "learning_rate": 5.2332235948374165e-06, - "loss": 0.2094, - "step": 14118 - }, - { - "epoch": 1.330067591436848, - "grad_norm": 0.6761450171470642, - "learning_rate": 5.23189622524251e-06, - "loss": 0.2167, - "step": 14119 - }, - { - "epoch": 1.3301617955300158, - "grad_norm": 0.6264675855636597, - "learning_rate": 5.230568964367746e-06, - "loss": 0.1916, - "step": 14120 - }, - { - "epoch": 1.3302559996231835, - "grad_norm": 0.5645196437835693, - "learning_rate": 5.229241812243382e-06, - "loss": 0.2047, - "step": 14121 - }, - { - "epoch": 1.3303502037163515, - "grad_norm": 0.6507833003997803, - "learning_rate": 5.227914768899687e-06, - "loss": 0.2037, - "step": 14122 - }, - { - "epoch": 1.3304444078095194, - "grad_norm": 0.6598931550979614, - "learning_rate": 5.226587834366911e-06, - "loss": 0.2128, - "step": 14123 - }, - { - "epoch": 1.3305386119026872, - "grad_norm": 0.6404772996902466, - "learning_rate": 5.225261008675315e-06, - "loss": 0.185, - "step": 14124 - }, - { - "epoch": 1.330632815995855, - "grad_norm": 0.6718472242355347, - "learning_rate": 5.223934291855155e-06, - "loss": 0.1919, - "step": 14125 - }, - { - "epoch": 1.3307270200890229, - "grad_norm": 0.6085643172264099, - "learning_rate": 5.222607683936678e-06, - "loss": 0.1915, - "step": 14126 - }, - { - "epoch": 1.3308212241821908, - "grad_norm": 0.7111129760742188, - "learning_rate": 5.221281184950135e-06, - "loss": 0.2161, - "step": 14127 - }, - { - "epoch": 1.3309154282753586, - "grad_norm": 0.6403906345367432, - "learning_rate": 5.21995479492577e-06, - "loss": 0.2101, - "step": 14128 - }, - { - "epoch": 1.3310096323685263, - "grad_norm": 0.7159192562103271, - "learning_rate": 5.218628513893829e-06, - "loss": 0.1727, - "step": 14129 - }, - { - "epoch": 1.3311038364616943, - "grad_norm": 0.6612852215766907, - "learning_rate": 5.217302341884554e-06, - "loss": 0.2318, - "step": 14130 - }, - { - "epoch": 1.3311980405548622, - "grad_norm": 0.6722060441970825, - "learning_rate": 5.215976278928179e-06, - "loss": 0.2087, - "step": 14131 - }, - { - "epoch": 1.33129224464803, - "grad_norm": 0.6088632941246033, - "learning_rate": 5.214650325054947e-06, - "loss": 0.1798, - "step": 14132 - }, - { - "epoch": 1.3313864487411977, - "grad_norm": 0.7001636624336243, - "learning_rate": 5.2133244802950855e-06, - "loss": 0.1932, - "step": 14133 - }, - { - "epoch": 1.3314806528343657, - "grad_norm": 0.6143582463264465, - "learning_rate": 5.211998744678828e-06, - "loss": 0.186, - "step": 14134 - }, - { - "epoch": 1.3315748569275336, - "grad_norm": 0.678594708442688, - "learning_rate": 5.210673118236407e-06, - "loss": 0.2423, - "step": 14135 - }, - { - "epoch": 1.3316690610207014, - "grad_norm": 1.064894199371338, - "learning_rate": 5.209347600998043e-06, - "loss": 0.1962, - "step": 14136 - }, - { - "epoch": 1.331763265113869, - "grad_norm": 0.6495254635810852, - "learning_rate": 5.208022192993963e-06, - "loss": 0.2058, - "step": 14137 - }, - { - "epoch": 1.331857469207037, - "grad_norm": 0.6739198565483093, - "learning_rate": 5.206696894254389e-06, - "loss": 0.2022, - "step": 14138 - }, - { - "epoch": 1.331951673300205, - "grad_norm": 0.6579096913337708, - "learning_rate": 5.205371704809537e-06, - "loss": 0.2033, - "step": 14139 - }, - { - "epoch": 1.3320458773933728, - "grad_norm": 0.5793762803077698, - "learning_rate": 5.204046624689625e-06, - "loss": 0.1724, - "step": 14140 - }, - { - "epoch": 1.3321400814865405, - "grad_norm": 0.6498485803604126, - "learning_rate": 5.202721653924872e-06, - "loss": 0.188, - "step": 14141 - }, - { - "epoch": 1.3322342855797085, - "grad_norm": 0.587631344795227, - "learning_rate": 5.201396792545483e-06, - "loss": 0.1688, - "step": 14142 - }, - { - "epoch": 1.3323284896728762, - "grad_norm": 0.7063856720924377, - "learning_rate": 5.200072040581661e-06, - "loss": 0.2104, - "step": 14143 - }, - { - "epoch": 1.3324226937660442, - "grad_norm": 0.6430703997612, - "learning_rate": 5.1987473980636275e-06, - "loss": 0.1869, - "step": 14144 - }, - { - "epoch": 1.332516897859212, - "grad_norm": 0.6656419038772583, - "learning_rate": 5.197422865021577e-06, - "loss": 0.2291, - "step": 14145 - }, - { - "epoch": 1.3326111019523799, - "grad_norm": 0.6350959539413452, - "learning_rate": 5.196098441485707e-06, - "loss": 0.2052, - "step": 14146 - }, - { - "epoch": 1.3327053060455476, - "grad_norm": 0.7392879128456116, - "learning_rate": 5.19477412748623e-06, - "loss": 0.2164, - "step": 14147 - }, - { - "epoch": 1.3327995101387156, - "grad_norm": 0.6880599856376648, - "learning_rate": 5.1934499230533335e-06, - "loss": 0.2199, - "step": 14148 - }, - { - "epoch": 1.3328937142318833, - "grad_norm": 0.6650460958480835, - "learning_rate": 5.192125828217203e-06, - "loss": 0.184, - "step": 14149 - }, - { - "epoch": 1.3329879183250513, - "grad_norm": 0.667985737323761, - "learning_rate": 5.190801843008049e-06, - "loss": 0.1913, - "step": 14150 - }, - { - "epoch": 1.333082122418219, - "grad_norm": 0.6559428572654724, - "learning_rate": 5.189477967456049e-06, - "loss": 0.2034, - "step": 14151 - }, - { - "epoch": 1.333176326511387, - "grad_norm": 0.671495258808136, - "learning_rate": 5.188154201591387e-06, - "loss": 0.1852, - "step": 14152 - }, - { - "epoch": 1.3332705306045547, - "grad_norm": 0.7077657580375671, - "learning_rate": 5.186830545444252e-06, - "loss": 0.2078, - "step": 14153 - }, - { - "epoch": 1.3333647346977227, - "grad_norm": 0.6105444431304932, - "learning_rate": 5.185506999044828e-06, - "loss": 0.206, - "step": 14154 - }, - { - "epoch": 1.3334589387908904, - "grad_norm": 0.7339476346969604, - "learning_rate": 5.184183562423286e-06, - "loss": 0.2156, - "step": 14155 - }, - { - "epoch": 1.3335531428840584, - "grad_norm": 0.6311266422271729, - "learning_rate": 5.182860235609807e-06, - "loss": 0.2014, - "step": 14156 - }, - { - "epoch": 1.333647346977226, - "grad_norm": 0.6553947329521179, - "learning_rate": 5.181537018634568e-06, - "loss": 0.2231, - "step": 14157 - }, - { - "epoch": 1.333741551070394, - "grad_norm": 0.6227595210075378, - "learning_rate": 5.180213911527733e-06, - "loss": 0.2078, - "step": 14158 - }, - { - "epoch": 1.3338357551635618, - "grad_norm": 0.6377168297767639, - "learning_rate": 5.178890914319474e-06, - "loss": 0.1951, - "step": 14159 - }, - { - "epoch": 1.3339299592567297, - "grad_norm": 0.7014271020889282, - "learning_rate": 5.177568027039963e-06, - "loss": 0.2146, - "step": 14160 - }, - { - "epoch": 1.3340241633498975, - "grad_norm": 0.6947354674339294, - "learning_rate": 5.176245249719355e-06, - "loss": 0.1796, - "step": 14161 - }, - { - "epoch": 1.3341183674430654, - "grad_norm": 0.728440523147583, - "learning_rate": 5.174922582387819e-06, - "loss": 0.2094, - "step": 14162 - }, - { - "epoch": 1.3342125715362332, - "grad_norm": 0.6573033928871155, - "learning_rate": 5.173600025075507e-06, - "loss": 0.1899, - "step": 14163 - }, - { - "epoch": 1.3343067756294011, - "grad_norm": 0.6331418752670288, - "learning_rate": 5.1722775778125815e-06, - "loss": 0.2263, - "step": 14164 - }, - { - "epoch": 1.3344009797225689, - "grad_norm": 0.6571514010429382, - "learning_rate": 5.17095524062919e-06, - "loss": 0.1897, - "step": 14165 - }, - { - "epoch": 1.3344951838157368, - "grad_norm": 0.6402251720428467, - "learning_rate": 5.169633013555487e-06, - "loss": 0.1998, - "step": 14166 - }, - { - "epoch": 1.3345893879089046, - "grad_norm": 0.7158523797988892, - "learning_rate": 5.168310896621626e-06, - "loss": 0.2141, - "step": 14167 - }, - { - "epoch": 1.3346835920020725, - "grad_norm": 0.5873801112174988, - "learning_rate": 5.166988889857745e-06, - "loss": 0.2081, - "step": 14168 - }, - { - "epoch": 1.3347777960952403, - "grad_norm": 0.7097365856170654, - "learning_rate": 5.165666993293992e-06, - "loss": 0.2091, - "step": 14169 - }, - { - "epoch": 1.3348720001884082, - "grad_norm": 0.873871922492981, - "learning_rate": 5.1643452069605105e-06, - "loss": 0.1907, - "step": 14170 - }, - { - "epoch": 1.334966204281576, - "grad_norm": 0.7245442867279053, - "learning_rate": 5.163023530887435e-06, - "loss": 0.2258, - "step": 14171 - }, - { - "epoch": 1.335060408374744, - "grad_norm": 0.6157423853874207, - "learning_rate": 5.161701965104906e-06, - "loss": 0.1792, - "step": 14172 - }, - { - "epoch": 1.3351546124679117, - "grad_norm": 0.656654953956604, - "learning_rate": 5.1603805096430506e-06, - "loss": 0.206, - "step": 14173 - }, - { - "epoch": 1.3352488165610796, - "grad_norm": 0.6471630334854126, - "learning_rate": 5.159059164532005e-06, - "loss": 0.1657, - "step": 14174 - }, - { - "epoch": 1.3353430206542474, - "grad_norm": 0.6502017378807068, - "learning_rate": 5.1577379298019e-06, - "loss": 0.1684, - "step": 14175 - }, - { - "epoch": 1.3354372247474153, - "grad_norm": 0.6079532504081726, - "learning_rate": 5.1564168054828555e-06, - "loss": 0.1905, - "step": 14176 - }, - { - "epoch": 1.335531428840583, - "grad_norm": 0.692517876625061, - "learning_rate": 5.155095791605e-06, - "loss": 0.2175, - "step": 14177 - }, - { - "epoch": 1.335625632933751, - "grad_norm": 0.6979208588600159, - "learning_rate": 5.153774888198455e-06, - "loss": 0.1987, - "step": 14178 - }, - { - "epoch": 1.3357198370269188, - "grad_norm": 0.5712969303131104, - "learning_rate": 5.1524540952933345e-06, - "loss": 0.173, - "step": 14179 - }, - { - "epoch": 1.3358140411200867, - "grad_norm": 0.6468443274497986, - "learning_rate": 5.151133412919758e-06, - "loss": 0.2328, - "step": 14180 - }, - { - "epoch": 1.3359082452132545, - "grad_norm": 0.6241378784179688, - "learning_rate": 5.149812841107842e-06, - "loss": 0.2098, - "step": 14181 - }, - { - "epoch": 1.3360024493064224, - "grad_norm": 0.5934635996818542, - "learning_rate": 5.1484923798876926e-06, - "loss": 0.1844, - "step": 14182 - }, - { - "epoch": 1.3360966533995902, - "grad_norm": 0.6325280070304871, - "learning_rate": 5.147172029289415e-06, - "loss": 0.1992, - "step": 14183 - }, - { - "epoch": 1.3361908574927581, - "grad_norm": 0.6002156138420105, - "learning_rate": 5.145851789343126e-06, - "loss": 0.177, - "step": 14184 - }, - { - "epoch": 1.3362850615859259, - "grad_norm": 0.6655933856964111, - "learning_rate": 5.1445316600789245e-06, - "loss": 0.1864, - "step": 14185 - }, - { - "epoch": 1.3363792656790938, - "grad_norm": 0.6828367710113525, - "learning_rate": 5.1432116415269014e-06, - "loss": 0.1963, - "step": 14186 - }, - { - "epoch": 1.3364734697722616, - "grad_norm": 0.7178785800933838, - "learning_rate": 5.141891733717173e-06, - "loss": 0.1999, - "step": 14187 - }, - { - "epoch": 1.3365676738654295, - "grad_norm": 0.6871474385261536, - "learning_rate": 5.140571936679825e-06, - "loss": 0.2338, - "step": 14188 - }, - { - "epoch": 1.3366618779585973, - "grad_norm": 0.6616499423980713, - "learning_rate": 5.139252250444948e-06, - "loss": 0.2309, - "step": 14189 - }, - { - "epoch": 1.3367560820517652, - "grad_norm": 0.6596075892448425, - "learning_rate": 5.137932675042638e-06, - "loss": 0.1888, - "step": 14190 - }, - { - "epoch": 1.336850286144933, - "grad_norm": 0.6166261434555054, - "learning_rate": 5.136613210502986e-06, - "loss": 0.1731, - "step": 14191 - }, - { - "epoch": 1.336944490238101, - "grad_norm": 0.7741532921791077, - "learning_rate": 5.13529385685607e-06, - "loss": 0.1808, - "step": 14192 - }, - { - "epoch": 1.3370386943312687, - "grad_norm": 0.6858149766921997, - "learning_rate": 5.133974614131978e-06, - "loss": 0.2063, - "step": 14193 - }, - { - "epoch": 1.3371328984244366, - "grad_norm": 0.9508662223815918, - "learning_rate": 5.132655482360795e-06, - "loss": 0.1987, - "step": 14194 - }, - { - "epoch": 1.3372271025176043, - "grad_norm": 0.6633398532867432, - "learning_rate": 5.1313364615725895e-06, - "loss": 0.217, - "step": 14195 - }, - { - "epoch": 1.3373213066107723, - "grad_norm": 0.6646075248718262, - "learning_rate": 5.1300175517974415e-06, - "loss": 0.195, - "step": 14196 - }, - { - "epoch": 1.33741551070394, - "grad_norm": 0.6978083252906799, - "learning_rate": 5.128698753065429e-06, - "loss": 0.226, - "step": 14197 - }, - { - "epoch": 1.337509714797108, - "grad_norm": 0.5929834842681885, - "learning_rate": 5.127380065406615e-06, - "loss": 0.2008, - "step": 14198 - }, - { - "epoch": 1.3376039188902757, - "grad_norm": 0.6304779052734375, - "learning_rate": 5.126061488851072e-06, - "loss": 0.1965, - "step": 14199 - }, - { - "epoch": 1.3376981229834437, - "grad_norm": 0.6507366895675659, - "learning_rate": 5.124743023428867e-06, - "loss": 0.1939, - "step": 14200 - }, - { - "epoch": 1.3377923270766114, - "grad_norm": 0.6634584665298462, - "learning_rate": 5.123424669170058e-06, - "loss": 0.201, - "step": 14201 - }, - { - "epoch": 1.3378865311697794, - "grad_norm": 0.7605651617050171, - "learning_rate": 5.122106426104713e-06, - "loss": 0.2121, - "step": 14202 - }, - { - "epoch": 1.3379807352629471, - "grad_norm": 0.8019225597381592, - "learning_rate": 5.1207882942628795e-06, - "loss": 0.2436, - "step": 14203 - }, - { - "epoch": 1.338074939356115, - "grad_norm": 0.6724850535392761, - "learning_rate": 5.1194702736746235e-06, - "loss": 0.2057, - "step": 14204 - }, - { - "epoch": 1.3381691434492828, - "grad_norm": 0.6152219772338867, - "learning_rate": 5.118152364369991e-06, - "loss": 0.1956, - "step": 14205 - }, - { - "epoch": 1.3382633475424508, - "grad_norm": 0.6260862350463867, - "learning_rate": 5.116834566379032e-06, - "loss": 0.1862, - "step": 14206 - }, - { - "epoch": 1.3383575516356185, - "grad_norm": 0.617588222026825, - "learning_rate": 5.115516879731801e-06, - "loss": 0.1997, - "step": 14207 - }, - { - "epoch": 1.3384517557287865, - "grad_norm": 0.7096170783042908, - "learning_rate": 5.114199304458336e-06, - "loss": 0.2151, - "step": 14208 - }, - { - "epoch": 1.3385459598219542, - "grad_norm": 0.6094770431518555, - "learning_rate": 5.112881840588683e-06, - "loss": 0.2004, - "step": 14209 - }, - { - "epoch": 1.3386401639151222, - "grad_norm": 0.6757852435112, - "learning_rate": 5.111564488152885e-06, - "loss": 0.2301, - "step": 14210 - }, - { - "epoch": 1.33873436800829, - "grad_norm": 0.8722229599952698, - "learning_rate": 5.110247247180974e-06, - "loss": 0.2083, - "step": 14211 - }, - { - "epoch": 1.3388285721014577, - "grad_norm": 0.6497089862823486, - "learning_rate": 5.108930117702992e-06, - "loss": 0.2004, - "step": 14212 - }, - { - "epoch": 1.3389227761946256, - "grad_norm": 0.6538980007171631, - "learning_rate": 5.107613099748963e-06, - "loss": 0.2081, - "step": 14213 - }, - { - "epoch": 1.3390169802877936, - "grad_norm": 0.8271617293357849, - "learning_rate": 5.106296193348921e-06, - "loss": 0.2366, - "step": 14214 - }, - { - "epoch": 1.3391111843809613, - "grad_norm": 0.593307375907898, - "learning_rate": 5.104979398532899e-06, - "loss": 0.1701, - "step": 14215 - }, - { - "epoch": 1.339205388474129, - "grad_norm": 0.6513305902481079, - "learning_rate": 5.103662715330912e-06, - "loss": 0.2111, - "step": 14216 - }, - { - "epoch": 1.339299592567297, - "grad_norm": 0.6432667970657349, - "learning_rate": 5.102346143772988e-06, - "loss": 0.1873, - "step": 14217 - }, - { - "epoch": 1.339393796660465, - "grad_norm": 0.6495787501335144, - "learning_rate": 5.10102968388915e-06, - "loss": 0.2132, - "step": 14218 - }, - { - "epoch": 1.3394880007536327, - "grad_norm": 0.6786971688270569, - "learning_rate": 5.099713335709409e-06, - "loss": 0.1986, - "step": 14219 - }, - { - "epoch": 1.3395822048468005, - "grad_norm": 0.572080671787262, - "learning_rate": 5.098397099263781e-06, - "loss": 0.19, - "step": 14220 - }, - { - "epoch": 1.3396764089399684, - "grad_norm": 0.8995034098625183, - "learning_rate": 5.097080974582283e-06, - "loss": 0.2059, - "step": 14221 - }, - { - "epoch": 1.3397706130331364, - "grad_norm": 0.7047062516212463, - "learning_rate": 5.095764961694923e-06, - "loss": 0.2367, - "step": 14222 - }, - { - "epoch": 1.3398648171263041, - "grad_norm": 0.6372144818305969, - "learning_rate": 5.094449060631697e-06, - "loss": 0.2181, - "step": 14223 - }, - { - "epoch": 1.3399590212194719, - "grad_norm": 0.6339496374130249, - "learning_rate": 5.093133271422628e-06, - "loss": 0.1862, - "step": 14224 - }, - { - "epoch": 1.3400532253126398, - "grad_norm": 0.7703357338905334, - "learning_rate": 5.091817594097708e-06, - "loss": 0.2211, - "step": 14225 - }, - { - "epoch": 1.3401474294058078, - "grad_norm": 0.66709965467453, - "learning_rate": 5.09050202868693e-06, - "loss": 0.206, - "step": 14226 - }, - { - "epoch": 1.3402416334989755, - "grad_norm": 0.6412050724029541, - "learning_rate": 5.089186575220307e-06, - "loss": 0.1962, - "step": 14227 - }, - { - "epoch": 1.3403358375921433, - "grad_norm": 0.665880560874939, - "learning_rate": 5.087871233727824e-06, - "loss": 0.2207, - "step": 14228 - }, - { - "epoch": 1.3404300416853112, - "grad_norm": 0.6656131148338318, - "learning_rate": 5.086556004239471e-06, - "loss": 0.211, - "step": 14229 - }, - { - "epoch": 1.3405242457784792, - "grad_norm": 0.6042746305465698, - "learning_rate": 5.085240886785239e-06, - "loss": 0.2029, - "step": 14230 - }, - { - "epoch": 1.340618449871647, - "grad_norm": 0.6668102741241455, - "learning_rate": 5.083925881395119e-06, - "loss": 0.1974, - "step": 14231 - }, - { - "epoch": 1.3407126539648146, - "grad_norm": 0.6841421127319336, - "learning_rate": 5.082610988099088e-06, - "loss": 0.2134, - "step": 14232 - }, - { - "epoch": 1.3408068580579826, - "grad_norm": 0.6074081659317017, - "learning_rate": 5.081296206927132e-06, - "loss": 0.2043, - "step": 14233 - }, - { - "epoch": 1.3409010621511506, - "grad_norm": 0.6494576334953308, - "learning_rate": 5.079981537909233e-06, - "loss": 0.1955, - "step": 14234 - }, - { - "epoch": 1.3409952662443183, - "grad_norm": 0.6407979130744934, - "learning_rate": 5.078666981075359e-06, - "loss": 0.172, - "step": 14235 - }, - { - "epoch": 1.341089470337486, - "grad_norm": 0.6389541625976562, - "learning_rate": 5.077352536455491e-06, - "loss": 0.1847, - "step": 14236 - }, - { - "epoch": 1.341183674430654, - "grad_norm": 0.6453443169593811, - "learning_rate": 5.076038204079601e-06, - "loss": 0.2112, - "step": 14237 - }, - { - "epoch": 1.341277878523822, - "grad_norm": 0.7297416925430298, - "learning_rate": 5.07472398397765e-06, - "loss": 0.2461, - "step": 14238 - }, - { - "epoch": 1.3413720826169897, - "grad_norm": 0.6424687504768372, - "learning_rate": 5.07340987617961e-06, - "loss": 0.181, - "step": 14239 - }, - { - "epoch": 1.3414662867101574, - "grad_norm": 0.6002586483955383, - "learning_rate": 5.0720958807154485e-06, - "loss": 0.1755, - "step": 14240 - }, - { - "epoch": 1.3415604908033254, - "grad_norm": 0.6982072591781616, - "learning_rate": 5.0707819976151175e-06, - "loss": 0.1645, - "step": 14241 - }, - { - "epoch": 1.3416546948964934, - "grad_norm": 0.7341618537902832, - "learning_rate": 5.069468226908585e-06, - "loss": 0.2407, - "step": 14242 - }, - { - "epoch": 1.341748898989661, - "grad_norm": 0.797859787940979, - "learning_rate": 5.068154568625797e-06, - "loss": 0.1932, - "step": 14243 - }, - { - "epoch": 1.3418431030828288, - "grad_norm": 0.661933958530426, - "learning_rate": 5.066841022796716e-06, - "loss": 0.2066, - "step": 14244 - }, - { - "epoch": 1.3419373071759968, - "grad_norm": 0.614425778388977, - "learning_rate": 5.065527589451286e-06, - "loss": 0.2158, - "step": 14245 - }, - { - "epoch": 1.3420315112691648, - "grad_norm": 0.6313828229904175, - "learning_rate": 5.064214268619458e-06, - "loss": 0.1832, - "step": 14246 - }, - { - "epoch": 1.3421257153623325, - "grad_norm": 0.652474582195282, - "learning_rate": 5.062901060331181e-06, - "loss": 0.2091, - "step": 14247 - }, - { - "epoch": 1.3422199194555002, - "grad_norm": 0.6407245993614197, - "learning_rate": 5.0615879646163926e-06, - "loss": 0.2062, - "step": 14248 - }, - { - "epoch": 1.3423141235486682, - "grad_norm": 0.7015902400016785, - "learning_rate": 5.060274981505035e-06, - "loss": 0.1906, - "step": 14249 - }, - { - "epoch": 1.3424083276418362, - "grad_norm": 0.7468515634536743, - "learning_rate": 5.058962111027051e-06, - "loss": 0.2116, - "step": 14250 - }, - { - "epoch": 1.342502531735004, - "grad_norm": 0.6226329207420349, - "learning_rate": 5.057649353212371e-06, - "loss": 0.2064, - "step": 14251 - }, - { - "epoch": 1.3425967358281716, - "grad_norm": 0.6461039781570435, - "learning_rate": 5.05633670809093e-06, - "loss": 0.2217, - "step": 14252 - }, - { - "epoch": 1.3426909399213396, - "grad_norm": 0.6401330232620239, - "learning_rate": 5.055024175692655e-06, - "loss": 0.2199, - "step": 14253 - }, - { - "epoch": 1.3427851440145075, - "grad_norm": 0.6089904308319092, - "learning_rate": 5.053711756047476e-06, - "loss": 0.1928, - "step": 14254 - }, - { - "epoch": 1.3428793481076753, - "grad_norm": 0.6451438069343567, - "learning_rate": 5.0523994491853225e-06, - "loss": 0.1826, - "step": 14255 - }, - { - "epoch": 1.342973552200843, - "grad_norm": 0.6480076313018799, - "learning_rate": 5.051087255136109e-06, - "loss": 0.2015, - "step": 14256 - }, - { - "epoch": 1.343067756294011, - "grad_norm": 0.6460951566696167, - "learning_rate": 5.049775173929761e-06, - "loss": 0.1955, - "step": 14257 - }, - { - "epoch": 1.343161960387179, - "grad_norm": 0.6903976798057556, - "learning_rate": 5.048463205596197e-06, - "loss": 0.2334, - "step": 14258 - }, - { - "epoch": 1.3432561644803467, - "grad_norm": 0.6762567758560181, - "learning_rate": 5.047151350165327e-06, - "loss": 0.2068, - "step": 14259 - }, - { - "epoch": 1.3433503685735144, - "grad_norm": 0.6496309638023376, - "learning_rate": 5.045839607667065e-06, - "loss": 0.1789, - "step": 14260 - }, - { - "epoch": 1.3434445726666824, - "grad_norm": 0.6982924342155457, - "learning_rate": 5.044527978131326e-06, - "loss": 0.2129, - "step": 14261 - }, - { - "epoch": 1.3435387767598503, - "grad_norm": 0.7475513219833374, - "learning_rate": 5.043216461588012e-06, - "loss": 0.2316, - "step": 14262 - }, - { - "epoch": 1.343632980853018, - "grad_norm": 0.6481485962867737, - "learning_rate": 5.041905058067023e-06, - "loss": 0.2212, - "step": 14263 - }, - { - "epoch": 1.3437271849461858, - "grad_norm": 0.630750298500061, - "learning_rate": 5.040593767598272e-06, - "loss": 0.1975, - "step": 14264 - }, - { - "epoch": 1.3438213890393538, - "grad_norm": 0.683118999004364, - "learning_rate": 5.039282590211654e-06, - "loss": 0.2035, - "step": 14265 - }, - { - "epoch": 1.3439155931325217, - "grad_norm": 0.6639612317085266, - "learning_rate": 5.037971525937056e-06, - "loss": 0.2073, - "step": 14266 - }, - { - "epoch": 1.3440097972256895, - "grad_norm": 0.619592547416687, - "learning_rate": 5.036660574804391e-06, - "loss": 0.1966, - "step": 14267 - }, - { - "epoch": 1.3441040013188572, - "grad_norm": 0.622658371925354, - "learning_rate": 5.035349736843539e-06, - "loss": 0.2005, - "step": 14268 - }, - { - "epoch": 1.3441982054120252, - "grad_norm": 0.66368168592453, - "learning_rate": 5.034039012084388e-06, - "loss": 0.2344, - "step": 14269 - }, - { - "epoch": 1.3442924095051931, - "grad_norm": 0.6013482213020325, - "learning_rate": 5.032728400556827e-06, - "loss": 0.1912, - "step": 14270 - }, - { - "epoch": 1.3443866135983609, - "grad_norm": 0.6582673192024231, - "learning_rate": 5.031417902290745e-06, - "loss": 0.1966, - "step": 14271 - }, - { - "epoch": 1.3444808176915286, - "grad_norm": 0.6961972713470459, - "learning_rate": 5.030107517316014e-06, - "loss": 0.2309, - "step": 14272 - }, - { - "epoch": 1.3445750217846966, - "grad_norm": 0.6419699192047119, - "learning_rate": 5.028797245662518e-06, - "loss": 0.198, - "step": 14273 - }, - { - "epoch": 1.3446692258778645, - "grad_norm": 0.6680853366851807, - "learning_rate": 5.027487087360138e-06, - "loss": 0.1949, - "step": 14274 - }, - { - "epoch": 1.3447634299710323, - "grad_norm": 0.702159583568573, - "learning_rate": 5.026177042438738e-06, - "loss": 0.2036, - "step": 14275 - }, - { - "epoch": 1.3448576340642, - "grad_norm": 0.6947282552719116, - "learning_rate": 5.024867110928193e-06, - "loss": 0.2071, - "step": 14276 - }, - { - "epoch": 1.344951838157368, - "grad_norm": 0.6485502123832703, - "learning_rate": 5.0235572928583766e-06, - "loss": 0.1854, - "step": 14277 - }, - { - "epoch": 1.345046042250536, - "grad_norm": 0.5859835147857666, - "learning_rate": 5.022247588259146e-06, - "loss": 0.1833, - "step": 14278 - }, - { - "epoch": 1.3451402463437037, - "grad_norm": 0.6901947855949402, - "learning_rate": 5.020937997160369e-06, - "loss": 0.2525, - "step": 14279 - }, - { - "epoch": 1.3452344504368714, - "grad_norm": 0.6361029744148254, - "learning_rate": 5.019628519591908e-06, - "loss": 0.2009, - "step": 14280 - }, - { - "epoch": 1.3453286545300394, - "grad_norm": 0.7549346685409546, - "learning_rate": 5.018319155583621e-06, - "loss": 0.2108, - "step": 14281 - }, - { - "epoch": 1.345422858623207, - "grad_norm": 0.5660572648048401, - "learning_rate": 5.017009905165357e-06, - "loss": 0.1904, - "step": 14282 - }, - { - "epoch": 1.345517062716375, - "grad_norm": 0.710254967212677, - "learning_rate": 5.015700768366973e-06, - "loss": 0.2061, - "step": 14283 - }, - { - "epoch": 1.3456112668095428, - "grad_norm": 0.731178879737854, - "learning_rate": 5.014391745218325e-06, - "loss": 0.2061, - "step": 14284 - }, - { - "epoch": 1.3457054709027108, - "grad_norm": 0.7249963879585266, - "learning_rate": 5.013082835749252e-06, - "loss": 0.2092, - "step": 14285 - }, - { - "epoch": 1.3457996749958785, - "grad_norm": 0.636004626750946, - "learning_rate": 5.011774039989601e-06, - "loss": 0.2026, - "step": 14286 - }, - { - "epoch": 1.3458938790890465, - "grad_norm": 0.670627236366272, - "learning_rate": 5.010465357969221e-06, - "loss": 0.235, - "step": 14287 - }, - { - "epoch": 1.3459880831822142, - "grad_norm": 0.6595027446746826, - "learning_rate": 5.0091567897179435e-06, - "loss": 0.1939, - "step": 14288 - }, - { - "epoch": 1.3460822872753821, - "grad_norm": 0.8129286170005798, - "learning_rate": 5.00784833526561e-06, - "loss": 0.2327, - "step": 14289 - }, - { - "epoch": 1.3461764913685499, - "grad_norm": 0.6742135882377625, - "learning_rate": 5.006539994642059e-06, - "loss": 0.2083, - "step": 14290 - }, - { - "epoch": 1.3462706954617178, - "grad_norm": 0.6889356374740601, - "learning_rate": 5.005231767877114e-06, - "loss": 0.2256, - "step": 14291 - }, - { - "epoch": 1.3463648995548856, - "grad_norm": 0.5851003527641296, - "learning_rate": 5.003923655000613e-06, - "loss": 0.1693, - "step": 14292 - }, - { - "epoch": 1.3464591036480535, - "grad_norm": 0.6751152873039246, - "learning_rate": 5.002615656042376e-06, - "loss": 0.2319, - "step": 14293 - }, - { - "epoch": 1.3465533077412213, - "grad_norm": 0.7556543350219727, - "learning_rate": 5.001307771032231e-06, - "loss": 0.2017, - "step": 14294 - }, - { - "epoch": 1.3466475118343892, - "grad_norm": 0.6427310109138489, - "learning_rate": 5.000000000000003e-06, - "loss": 0.2051, - "step": 14295 - }, - { - "epoch": 1.346741715927557, - "grad_norm": 0.7643883228302002, - "learning_rate": 4.998692342975503e-06, - "loss": 0.2072, - "step": 14296 - }, - { - "epoch": 1.346835920020725, - "grad_norm": 0.7492932677268982, - "learning_rate": 4.997384799988553e-06, - "loss": 0.2097, - "step": 14297 - }, - { - "epoch": 1.3469301241138927, - "grad_norm": 0.678108811378479, - "learning_rate": 4.996077371068969e-06, - "loss": 0.2083, - "step": 14298 - }, - { - "epoch": 1.3470243282070606, - "grad_norm": 0.9397788643836975, - "learning_rate": 4.9947700562465576e-06, - "loss": 0.1968, - "step": 14299 - }, - { - "epoch": 1.3471185323002284, - "grad_norm": 0.708877444267273, - "learning_rate": 4.993462855551129e-06, - "loss": 0.2006, - "step": 14300 - }, - { - "epoch": 1.3472127363933963, - "grad_norm": 0.6410995125770569, - "learning_rate": 4.992155769012493e-06, - "loss": 0.2128, - "step": 14301 - }, - { - "epoch": 1.347306940486564, - "grad_norm": 0.7376231551170349, - "learning_rate": 4.990848796660451e-06, - "loss": 0.2444, - "step": 14302 - }, - { - "epoch": 1.347401144579732, - "grad_norm": 0.6626389622688293, - "learning_rate": 4.989541938524796e-06, - "loss": 0.1856, - "step": 14303 - }, - { - "epoch": 1.3474953486728998, - "grad_norm": 0.6304222941398621, - "learning_rate": 4.98823519463534e-06, - "loss": 0.1866, - "step": 14304 - }, - { - "epoch": 1.3475895527660677, - "grad_norm": 0.5900949835777283, - "learning_rate": 4.986928565021874e-06, - "loss": 0.2034, - "step": 14305 - }, - { - "epoch": 1.3476837568592355, - "grad_norm": 0.6261608600616455, - "learning_rate": 4.985622049714185e-06, - "loss": 0.2052, - "step": 14306 - }, - { - "epoch": 1.3477779609524034, - "grad_norm": 0.6565543413162231, - "learning_rate": 4.984315648742068e-06, - "loss": 0.1795, - "step": 14307 - }, - { - "epoch": 1.3478721650455712, - "grad_norm": 0.7810153961181641, - "learning_rate": 4.983009362135315e-06, - "loss": 0.2179, - "step": 14308 - }, - { - "epoch": 1.3479663691387391, - "grad_norm": 0.6561579704284668, - "learning_rate": 4.9817031899237035e-06, - "loss": 0.2061, - "step": 14309 - }, - { - "epoch": 1.3480605732319069, - "grad_norm": 0.6692266464233398, - "learning_rate": 4.98039713213702e-06, - "loss": 0.2063, - "step": 14310 - }, - { - "epoch": 1.3481547773250748, - "grad_norm": 0.6777259111404419, - "learning_rate": 4.97909118880505e-06, - "loss": 0.2199, - "step": 14311 - }, - { - "epoch": 1.3482489814182426, - "grad_norm": 0.6281754374504089, - "learning_rate": 4.97778535995756e-06, - "loss": 0.2021, - "step": 14312 - }, - { - "epoch": 1.3483431855114105, - "grad_norm": 0.6651512980461121, - "learning_rate": 4.976479645624332e-06, - "loss": 0.2239, - "step": 14313 - }, - { - "epoch": 1.3484373896045783, - "grad_norm": 0.6048790812492371, - "learning_rate": 4.975174045835141e-06, - "loss": 0.1762, - "step": 14314 - }, - { - "epoch": 1.3485315936977462, - "grad_norm": 0.6577955484390259, - "learning_rate": 4.973868560619749e-06, - "loss": 0.1938, - "step": 14315 - }, - { - "epoch": 1.348625797790914, - "grad_norm": 0.6613869667053223, - "learning_rate": 4.972563190007927e-06, - "loss": 0.2085, - "step": 14316 - }, - { - "epoch": 1.348720001884082, - "grad_norm": 0.774599552154541, - "learning_rate": 4.971257934029442e-06, - "loss": 0.2079, - "step": 14317 - }, - { - "epoch": 1.3488142059772497, - "grad_norm": 0.6561988592147827, - "learning_rate": 4.96995279271405e-06, - "loss": 0.2109, - "step": 14318 - }, - { - "epoch": 1.3489084100704176, - "grad_norm": 0.6106441020965576, - "learning_rate": 4.968647766091514e-06, - "loss": 0.1914, - "step": 14319 - }, - { - "epoch": 1.3490026141635854, - "grad_norm": 0.6479611992835999, - "learning_rate": 4.9673428541915934e-06, - "loss": 0.1861, - "step": 14320 - }, - { - "epoch": 1.3490968182567533, - "grad_norm": 0.6346365213394165, - "learning_rate": 4.966038057044039e-06, - "loss": 0.1925, - "step": 14321 - }, - { - "epoch": 1.349191022349921, - "grad_norm": 0.6005086898803711, - "learning_rate": 4.964733374678599e-06, - "loss": 0.1784, - "step": 14322 - }, - { - "epoch": 1.349285226443089, - "grad_norm": 0.6776334643363953, - "learning_rate": 4.963428807125024e-06, - "loss": 0.2008, - "step": 14323 - }, - { - "epoch": 1.3493794305362568, - "grad_norm": 0.6856703162193298, - "learning_rate": 4.962124354413066e-06, - "loss": 0.2233, - "step": 14324 - }, - { - "epoch": 1.3494736346294247, - "grad_norm": 0.6597799062728882, - "learning_rate": 4.960820016572459e-06, - "loss": 0.2146, - "step": 14325 - }, - { - "epoch": 1.3495678387225924, - "grad_norm": 0.6462965607643127, - "learning_rate": 4.959515793632951e-06, - "loss": 0.2043, - "step": 14326 - }, - { - "epoch": 1.3496620428157604, - "grad_norm": 0.7108558416366577, - "learning_rate": 4.95821168562428e-06, - "loss": 0.2151, - "step": 14327 - }, - { - "epoch": 1.3497562469089281, - "grad_norm": 0.6805381774902344, - "learning_rate": 4.9569076925761775e-06, - "loss": 0.1905, - "step": 14328 - }, - { - "epoch": 1.349850451002096, - "grad_norm": 0.6004214882850647, - "learning_rate": 4.955603814518378e-06, - "loss": 0.1929, - "step": 14329 - }, - { - "epoch": 1.3499446550952638, - "grad_norm": 0.6933792233467102, - "learning_rate": 4.954300051480617e-06, - "loss": 0.2059, - "step": 14330 - }, - { - "epoch": 1.3500388591884318, - "grad_norm": 0.6043687462806702, - "learning_rate": 4.952996403492614e-06, - "loss": 0.1781, - "step": 14331 - }, - { - "epoch": 1.3501330632815995, - "grad_norm": 0.6481543183326721, - "learning_rate": 4.951692870584103e-06, - "loss": 0.1779, - "step": 14332 - }, - { - "epoch": 1.3502272673747675, - "grad_norm": 0.7032328248023987, - "learning_rate": 4.950389452784796e-06, - "loss": 0.2296, - "step": 14333 - }, - { - "epoch": 1.3503214714679352, - "grad_norm": 0.6082679033279419, - "learning_rate": 4.949086150124421e-06, - "loss": 0.1965, - "step": 14334 - }, - { - "epoch": 1.3504156755611032, - "grad_norm": 0.639819860458374, - "learning_rate": 4.9477829626326965e-06, - "loss": 0.2027, - "step": 14335 - }, - { - "epoch": 1.350509879654271, - "grad_norm": 0.6563467979431152, - "learning_rate": 4.9464798903393295e-06, - "loss": 0.224, - "step": 14336 - }, - { - "epoch": 1.350604083747439, - "grad_norm": 0.6432325839996338, - "learning_rate": 4.945176933274036e-06, - "loss": 0.1891, - "step": 14337 - }, - { - "epoch": 1.3506982878406066, - "grad_norm": 0.5720778107643127, - "learning_rate": 4.943874091466531e-06, - "loss": 0.2023, - "step": 14338 - }, - { - "epoch": 1.3507924919337746, - "grad_norm": 0.6740782260894775, - "learning_rate": 4.942571364946511e-06, - "loss": 0.1683, - "step": 14339 - }, - { - "epoch": 1.3508866960269423, - "grad_norm": 0.6470614671707153, - "learning_rate": 4.9412687537436865e-06, - "loss": 0.2147, - "step": 14340 - }, - { - "epoch": 1.3509809001201103, - "grad_norm": 0.5793033242225647, - "learning_rate": 4.939966257887762e-06, - "loss": 0.1833, - "step": 14341 - }, - { - "epoch": 1.351075104213278, - "grad_norm": 0.6842806339263916, - "learning_rate": 4.938663877408432e-06, - "loss": 0.1908, - "step": 14342 - }, - { - "epoch": 1.351169308306446, - "grad_norm": 0.6015111207962036, - "learning_rate": 4.937361612335384e-06, - "loss": 0.1764, - "step": 14343 - }, - { - "epoch": 1.3512635123996137, - "grad_norm": 0.5633746981620789, - "learning_rate": 4.936059462698329e-06, - "loss": 0.1553, - "step": 14344 - }, - { - "epoch": 1.3513577164927817, - "grad_norm": 0.6662188768386841, - "learning_rate": 4.934757428526951e-06, - "loss": 0.2186, - "step": 14345 - }, - { - "epoch": 1.3514519205859494, - "grad_norm": 0.6492788791656494, - "learning_rate": 4.933455509850933e-06, - "loss": 0.2015, - "step": 14346 - }, - { - "epoch": 1.3515461246791174, - "grad_norm": 0.607024610042572, - "learning_rate": 4.932153706699964e-06, - "loss": 0.2103, - "step": 14347 - }, - { - "epoch": 1.3516403287722851, - "grad_norm": 0.6619748473167419, - "learning_rate": 4.930852019103732e-06, - "loss": 0.2104, - "step": 14348 - }, - { - "epoch": 1.351734532865453, - "grad_norm": 0.7663852572441101, - "learning_rate": 4.929550447091911e-06, - "loss": 0.2038, - "step": 14349 - }, - { - "epoch": 1.3518287369586208, - "grad_norm": 0.6927057504653931, - "learning_rate": 4.92824899069418e-06, - "loss": 0.1969, - "step": 14350 - }, - { - "epoch": 1.3519229410517886, - "grad_norm": 0.6270477771759033, - "learning_rate": 4.92694764994022e-06, - "loss": 0.1893, - "step": 14351 - }, - { - "epoch": 1.3520171451449565, - "grad_norm": 0.6877565979957581, - "learning_rate": 4.925646424859696e-06, - "loss": 0.2246, - "step": 14352 - }, - { - "epoch": 1.3521113492381245, - "grad_norm": 0.6178605556488037, - "learning_rate": 4.92434531548228e-06, - "loss": 0.2082, - "step": 14353 - }, - { - "epoch": 1.3522055533312922, - "grad_norm": 0.6962509155273438, - "learning_rate": 4.923044321837645e-06, - "loss": 0.227, - "step": 14354 - }, - { - "epoch": 1.35229975742446, - "grad_norm": 0.7804440855979919, - "learning_rate": 4.921743443955447e-06, - "loss": 0.2344, - "step": 14355 - }, - { - "epoch": 1.352393961517628, - "grad_norm": 0.6728218793869019, - "learning_rate": 4.920442681865351e-06, - "loss": 0.223, - "step": 14356 - }, - { - "epoch": 1.3524881656107959, - "grad_norm": 0.6981995701789856, - "learning_rate": 4.9191420355970246e-06, - "loss": 0.2027, - "step": 14357 - }, - { - "epoch": 1.3525823697039636, - "grad_norm": 0.6677607297897339, - "learning_rate": 4.917841505180115e-06, - "loss": 0.2086, - "step": 14358 - }, - { - "epoch": 1.3526765737971314, - "grad_norm": 0.7647161483764648, - "learning_rate": 4.916541090644271e-06, - "loss": 0.1902, - "step": 14359 - }, - { - "epoch": 1.3527707778902993, - "grad_norm": 0.736425518989563, - "learning_rate": 4.9152407920191604e-06, - "loss": 0.1852, - "step": 14360 - }, - { - "epoch": 1.3528649819834673, - "grad_norm": 0.7363700866699219, - "learning_rate": 4.913940609334423e-06, - "loss": 0.1981, - "step": 14361 - }, - { - "epoch": 1.352959186076635, - "grad_norm": 0.6129119992256165, - "learning_rate": 4.912640542619702e-06, - "loss": 0.184, - "step": 14362 - }, - { - "epoch": 1.3530533901698027, - "grad_norm": 0.6737158894538879, - "learning_rate": 4.911340591904644e-06, - "loss": 0.2427, - "step": 14363 - }, - { - "epoch": 1.3531475942629707, - "grad_norm": 0.636288046836853, - "learning_rate": 4.910040757218894e-06, - "loss": 0.1717, - "step": 14364 - }, - { - "epoch": 1.3532417983561387, - "grad_norm": 0.6598813533782959, - "learning_rate": 4.908741038592084e-06, - "loss": 0.1993, - "step": 14365 - }, - { - "epoch": 1.3533360024493064, - "grad_norm": 0.6319726705551147, - "learning_rate": 4.907441436053852e-06, - "loss": 0.1955, - "step": 14366 - }, - { - "epoch": 1.3534302065424741, - "grad_norm": 0.7078801393508911, - "learning_rate": 4.906141949633832e-06, - "loss": 0.1933, - "step": 14367 - }, - { - "epoch": 1.353524410635642, - "grad_norm": 0.706860363483429, - "learning_rate": 4.904842579361653e-06, - "loss": 0.208, - "step": 14368 - }, - { - "epoch": 1.35361861472881, - "grad_norm": 0.6156027913093567, - "learning_rate": 4.903543325266941e-06, - "loss": 0.1861, - "step": 14369 - }, - { - "epoch": 1.3537128188219778, - "grad_norm": 0.6501897573471069, - "learning_rate": 4.902244187379327e-06, - "loss": 0.203, - "step": 14370 - }, - { - "epoch": 1.3538070229151455, - "grad_norm": 0.6195113658905029, - "learning_rate": 4.900945165728426e-06, - "loss": 0.176, - "step": 14371 - }, - { - "epoch": 1.3539012270083135, - "grad_norm": 0.7405468821525574, - "learning_rate": 4.899646260343866e-06, - "loss": 0.2067, - "step": 14372 - }, - { - "epoch": 1.3539954311014815, - "grad_norm": 0.9037397503852844, - "learning_rate": 4.898347471255253e-06, - "loss": 0.2006, - "step": 14373 - }, - { - "epoch": 1.3540896351946492, - "grad_norm": 0.6409209966659546, - "learning_rate": 4.897048798492209e-06, - "loss": 0.217, - "step": 14374 - }, - { - "epoch": 1.354183839287817, - "grad_norm": 0.6748372912406921, - "learning_rate": 4.895750242084347e-06, - "loss": 0.214, - "step": 14375 - }, - { - "epoch": 1.354278043380985, - "grad_norm": 0.6084638237953186, - "learning_rate": 4.894451802061271e-06, - "loss": 0.2, - "step": 14376 - }, - { - "epoch": 1.3543722474741529, - "grad_norm": 0.6398811936378479, - "learning_rate": 4.893153478452588e-06, - "loss": 0.2296, - "step": 14377 - }, - { - "epoch": 1.3544664515673206, - "grad_norm": 0.677300214767456, - "learning_rate": 4.891855271287909e-06, - "loss": 0.2095, - "step": 14378 - }, - { - "epoch": 1.3545606556604883, - "grad_norm": 0.7081146240234375, - "learning_rate": 4.890557180596826e-06, - "loss": 0.1932, - "step": 14379 - }, - { - "epoch": 1.3546548597536563, - "grad_norm": 0.6429355144500732, - "learning_rate": 4.8892592064089394e-06, - "loss": 0.2209, - "step": 14380 - }, - { - "epoch": 1.3547490638468243, - "grad_norm": 0.658106803894043, - "learning_rate": 4.887961348753852e-06, - "loss": 0.2157, - "step": 14381 - }, - { - "epoch": 1.354843267939992, - "grad_norm": 0.5953934788703918, - "learning_rate": 4.8866636076611515e-06, - "loss": 0.1764, - "step": 14382 - }, - { - "epoch": 1.3549374720331597, - "grad_norm": 0.6248713731765747, - "learning_rate": 4.88536598316042e-06, - "loss": 0.2047, - "step": 14383 - }, - { - "epoch": 1.3550316761263277, - "grad_norm": 0.6855310797691345, - "learning_rate": 4.884068475281264e-06, - "loss": 0.1919, - "step": 14384 - }, - { - "epoch": 1.3551258802194956, - "grad_norm": 0.7052231431007385, - "learning_rate": 4.882771084053257e-06, - "loss": 0.2219, - "step": 14385 - }, - { - "epoch": 1.3552200843126634, - "grad_norm": 0.6247349381446838, - "learning_rate": 4.88147380950598e-06, - "loss": 0.1908, - "step": 14386 - }, - { - "epoch": 1.3553142884058311, - "grad_norm": 0.6833993196487427, - "learning_rate": 4.880176651669015e-06, - "loss": 0.1899, - "step": 14387 - }, - { - "epoch": 1.355408492498999, - "grad_norm": 0.627804160118103, - "learning_rate": 4.878879610571946e-06, - "loss": 0.1818, - "step": 14388 - }, - { - "epoch": 1.355502696592167, - "grad_norm": 0.6863963603973389, - "learning_rate": 4.877582686244337e-06, - "loss": 0.2066, - "step": 14389 - }, - { - "epoch": 1.3555969006853348, - "grad_norm": 0.683441698551178, - "learning_rate": 4.876285878715764e-06, - "loss": 0.2144, - "step": 14390 - }, - { - "epoch": 1.3556911047785025, - "grad_norm": 0.6739716529846191, - "learning_rate": 4.8749891880158015e-06, - "loss": 0.2108, - "step": 14391 - }, - { - "epoch": 1.3557853088716705, - "grad_norm": 0.6303501725196838, - "learning_rate": 4.873692614174008e-06, - "loss": 0.2197, - "step": 14392 - }, - { - "epoch": 1.3558795129648384, - "grad_norm": 0.6293514966964722, - "learning_rate": 4.87239615721995e-06, - "loss": 0.1909, - "step": 14393 - }, - { - "epoch": 1.3559737170580062, - "grad_norm": 0.6505168676376343, - "learning_rate": 4.871099817183195e-06, - "loss": 0.203, - "step": 14394 - }, - { - "epoch": 1.356067921151174, - "grad_norm": 0.5902092456817627, - "learning_rate": 4.869803594093291e-06, - "loss": 0.184, - "step": 14395 - }, - { - "epoch": 1.3561621252443419, - "grad_norm": 0.6372312903404236, - "learning_rate": 4.868507487979799e-06, - "loss": 0.2336, - "step": 14396 - }, - { - "epoch": 1.3562563293375098, - "grad_norm": 0.6709840297698975, - "learning_rate": 4.867211498872276e-06, - "loss": 0.2035, - "step": 14397 - }, - { - "epoch": 1.3563505334306776, - "grad_norm": 0.6329782009124756, - "learning_rate": 4.865915626800269e-06, - "loss": 0.2051, - "step": 14398 - }, - { - "epoch": 1.3564447375238453, - "grad_norm": 0.6777759194374084, - "learning_rate": 4.864619871793319e-06, - "loss": 0.2065, - "step": 14399 - }, - { - "epoch": 1.3565389416170133, - "grad_norm": 0.6504156589508057, - "learning_rate": 4.863324233880985e-06, - "loss": 0.1853, - "step": 14400 - }, - { - "epoch": 1.3566331457101812, - "grad_norm": 0.6352477073669434, - "learning_rate": 4.862028713092802e-06, - "loss": 0.2052, - "step": 14401 - }, - { - "epoch": 1.356727349803349, - "grad_norm": 0.7121332883834839, - "learning_rate": 4.860733309458308e-06, - "loss": 0.2366, - "step": 14402 - }, - { - "epoch": 1.3568215538965167, - "grad_norm": 0.6510499715805054, - "learning_rate": 4.859438023007041e-06, - "loss": 0.215, - "step": 14403 - }, - { - "epoch": 1.3569157579896847, - "grad_norm": 0.5392564535140991, - "learning_rate": 4.858142853768541e-06, - "loss": 0.1715, - "step": 14404 - }, - { - "epoch": 1.3570099620828526, - "grad_norm": 0.632756233215332, - "learning_rate": 4.856847801772333e-06, - "loss": 0.2118, - "step": 14405 - }, - { - "epoch": 1.3571041661760204, - "grad_norm": 0.6321601867675781, - "learning_rate": 4.855552867047949e-06, - "loss": 0.194, - "step": 14406 - }, - { - "epoch": 1.357198370269188, - "grad_norm": 0.633243203163147, - "learning_rate": 4.854258049624919e-06, - "loss": 0.2026, - "step": 14407 - }, - { - "epoch": 1.357292574362356, - "grad_norm": 0.6365585327148438, - "learning_rate": 4.852963349532761e-06, - "loss": 0.2002, - "step": 14408 - }, - { - "epoch": 1.357386778455524, - "grad_norm": 0.749879777431488, - "learning_rate": 4.851668766800998e-06, - "loss": 0.2071, - "step": 14409 - }, - { - "epoch": 1.3574809825486918, - "grad_norm": 0.7128868699073792, - "learning_rate": 4.850374301459152e-06, - "loss": 0.2049, - "step": 14410 - }, - { - "epoch": 1.3575751866418595, - "grad_norm": 0.6514534950256348, - "learning_rate": 4.849079953536733e-06, - "loss": 0.223, - "step": 14411 - }, - { - "epoch": 1.3576693907350275, - "grad_norm": 0.6466713547706604, - "learning_rate": 4.847785723063261e-06, - "loss": 0.1996, - "step": 14412 - }, - { - "epoch": 1.3577635948281954, - "grad_norm": 0.6942420601844788, - "learning_rate": 4.846491610068238e-06, - "loss": 0.1871, - "step": 14413 - }, - { - "epoch": 1.3578577989213632, - "grad_norm": 0.6090893745422363, - "learning_rate": 4.845197614581177e-06, - "loss": 0.2192, - "step": 14414 - }, - { - "epoch": 1.357952003014531, - "grad_norm": 0.5880915522575378, - "learning_rate": 4.843903736631585e-06, - "loss": 0.1927, - "step": 14415 - }, - { - "epoch": 1.3580462071076989, - "grad_norm": 0.7274464964866638, - "learning_rate": 4.84260997624896e-06, - "loss": 0.2063, - "step": 14416 - }, - { - "epoch": 1.3581404112008668, - "grad_norm": 0.6381358504295349, - "learning_rate": 4.841316333462803e-06, - "loss": 0.1974, - "step": 14417 - }, - { - "epoch": 1.3582346152940346, - "grad_norm": 0.6187579035758972, - "learning_rate": 4.840022808302616e-06, - "loss": 0.2264, - "step": 14418 - }, - { - "epoch": 1.3583288193872023, - "grad_norm": 0.6147592067718506, - "learning_rate": 4.838729400797884e-06, - "loss": 0.2046, - "step": 14419 - }, - { - "epoch": 1.3584230234803703, - "grad_norm": 0.6889422535896301, - "learning_rate": 4.837436110978104e-06, - "loss": 0.1896, - "step": 14420 - }, - { - "epoch": 1.358517227573538, - "grad_norm": 0.6578227877616882, - "learning_rate": 4.836142938872769e-06, - "loss": 0.1819, - "step": 14421 - }, - { - "epoch": 1.358611431666706, - "grad_norm": 0.6827560067176819, - "learning_rate": 4.83484988451136e-06, - "loss": 0.1985, - "step": 14422 - }, - { - "epoch": 1.3587056357598737, - "grad_norm": 0.6549921631813049, - "learning_rate": 4.833556947923359e-06, - "loss": 0.1961, - "step": 14423 - }, - { - "epoch": 1.3587998398530416, - "grad_norm": 0.6283363699913025, - "learning_rate": 4.832264129138249e-06, - "loss": 0.2096, - "step": 14424 - }, - { - "epoch": 1.3588940439462094, - "grad_norm": 0.6205511093139648, - "learning_rate": 4.830971428185514e-06, - "loss": 0.2339, - "step": 14425 - }, - { - "epoch": 1.3589882480393773, - "grad_norm": 0.5928133130073547, - "learning_rate": 4.829678845094619e-06, - "loss": 0.1698, - "step": 14426 - }, - { - "epoch": 1.359082452132545, - "grad_norm": 0.6639288067817688, - "learning_rate": 4.828386379895043e-06, - "loss": 0.1921, - "step": 14427 - }, - { - "epoch": 1.359176656225713, - "grad_norm": 0.8254418969154358, - "learning_rate": 4.82709403261626e-06, - "loss": 0.1797, - "step": 14428 - }, - { - "epoch": 1.3592708603188808, - "grad_norm": 0.7011244893074036, - "learning_rate": 4.825801803287728e-06, - "loss": 0.205, - "step": 14429 - }, - { - "epoch": 1.3593650644120487, - "grad_norm": 0.840096652507782, - "learning_rate": 4.824509691938918e-06, - "loss": 0.1902, - "step": 14430 - }, - { - "epoch": 1.3594592685052165, - "grad_norm": 0.6223124861717224, - "learning_rate": 4.823217698599294e-06, - "loss": 0.1894, - "step": 14431 - }, - { - "epoch": 1.3595534725983844, - "grad_norm": 0.6484492421150208, - "learning_rate": 4.82192582329831e-06, - "loss": 0.1826, - "step": 14432 - }, - { - "epoch": 1.3596476766915522, - "grad_norm": 0.607126772403717, - "learning_rate": 4.820634066065424e-06, - "loss": 0.2202, - "step": 14433 - }, - { - "epoch": 1.3597418807847201, - "grad_norm": 0.6780961751937866, - "learning_rate": 4.819342426930096e-06, - "loss": 0.1947, - "step": 14434 - }, - { - "epoch": 1.3598360848778879, - "grad_norm": 0.6182903051376343, - "learning_rate": 4.818050905921768e-06, - "loss": 0.1917, - "step": 14435 - }, - { - "epoch": 1.3599302889710558, - "grad_norm": 0.646899402141571, - "learning_rate": 4.816759503069894e-06, - "loss": 0.2014, - "step": 14436 - }, - { - "epoch": 1.3600244930642236, - "grad_norm": 0.6887809634208679, - "learning_rate": 4.815468218403923e-06, - "loss": 0.2184, - "step": 14437 - }, - { - "epoch": 1.3601186971573915, - "grad_norm": 0.5613549947738647, - "learning_rate": 4.814177051953296e-06, - "loss": 0.1781, - "step": 14438 - }, - { - "epoch": 1.3602129012505593, - "grad_norm": 0.6463780403137207, - "learning_rate": 4.812886003747443e-06, - "loss": 0.2099, - "step": 14439 - }, - { - "epoch": 1.3603071053437272, - "grad_norm": 0.6642791032791138, - "learning_rate": 4.811595073815819e-06, - "loss": 0.214, - "step": 14440 - }, - { - "epoch": 1.360401309436895, - "grad_norm": 0.575738251209259, - "learning_rate": 4.8103042621878515e-06, - "loss": 0.1846, - "step": 14441 - }, - { - "epoch": 1.360495513530063, - "grad_norm": 0.6691033840179443, - "learning_rate": 4.809013568892969e-06, - "loss": 0.202, - "step": 14442 - }, - { - "epoch": 1.3605897176232307, - "grad_norm": 0.659534752368927, - "learning_rate": 4.807722993960605e-06, - "loss": 0.1923, - "step": 14443 - }, - { - "epoch": 1.3606839217163986, - "grad_norm": 0.7245131134986877, - "learning_rate": 4.806432537420191e-06, - "loss": 0.1847, - "step": 14444 - }, - { - "epoch": 1.3607781258095664, - "grad_norm": 0.634385347366333, - "learning_rate": 4.805142199301144e-06, - "loss": 0.1814, - "step": 14445 - }, - { - "epoch": 1.3608723299027343, - "grad_norm": 0.6050102114677429, - "learning_rate": 4.803851979632887e-06, - "loss": 0.1936, - "step": 14446 - }, - { - "epoch": 1.360966533995902, - "grad_norm": 0.6620184779167175, - "learning_rate": 4.802561878444846e-06, - "loss": 0.206, - "step": 14447 - }, - { - "epoch": 1.36106073808907, - "grad_norm": 0.8133841156959534, - "learning_rate": 4.801271895766429e-06, - "loss": 0.2338, - "step": 14448 - }, - { - "epoch": 1.3611549421822378, - "grad_norm": 0.5997223258018494, - "learning_rate": 4.79998203162705e-06, - "loss": 0.2005, - "step": 14449 - }, - { - "epoch": 1.3612491462754057, - "grad_norm": 0.7362989187240601, - "learning_rate": 4.798692286056129e-06, - "loss": 0.2131, - "step": 14450 - }, - { - "epoch": 1.3613433503685735, - "grad_norm": 0.7201138138771057, - "learning_rate": 4.797402659083064e-06, - "loss": 0.2652, - "step": 14451 - }, - { - "epoch": 1.3614375544617414, - "grad_norm": 0.7321445345878601, - "learning_rate": 4.796113150737267e-06, - "loss": 0.2121, - "step": 14452 - }, - { - "epoch": 1.3615317585549092, - "grad_norm": 0.6794480085372925, - "learning_rate": 4.794823761048134e-06, - "loss": 0.219, - "step": 14453 - }, - { - "epoch": 1.3616259626480771, - "grad_norm": 0.6611748337745667, - "learning_rate": 4.793534490045071e-06, - "loss": 0.1921, - "step": 14454 - }, - { - "epoch": 1.3617201667412449, - "grad_norm": 0.6154462695121765, - "learning_rate": 4.792245337757476e-06, - "loss": 0.1971, - "step": 14455 - }, - { - "epoch": 1.3618143708344128, - "grad_norm": 0.6860254406929016, - "learning_rate": 4.7909563042147375e-06, - "loss": 0.2116, - "step": 14456 - }, - { - "epoch": 1.3619085749275806, - "grad_norm": 0.6639959812164307, - "learning_rate": 4.789667389446252e-06, - "loss": 0.1944, - "step": 14457 - }, - { - "epoch": 1.3620027790207485, - "grad_norm": 0.7993869781494141, - "learning_rate": 4.788378593481411e-06, - "loss": 0.2247, - "step": 14458 - }, - { - "epoch": 1.3620969831139162, - "grad_norm": 0.7196150422096252, - "learning_rate": 4.787089916349594e-06, - "loss": 0.1944, - "step": 14459 - }, - { - "epoch": 1.3621911872070842, - "grad_norm": 0.6921483278274536, - "learning_rate": 4.7858013580801895e-06, - "loss": 0.2085, - "step": 14460 - }, - { - "epoch": 1.362285391300252, - "grad_norm": 0.6668462157249451, - "learning_rate": 4.784512918702582e-06, - "loss": 0.2286, - "step": 14461 - }, - { - "epoch": 1.36237959539342, - "grad_norm": 0.7428612112998962, - "learning_rate": 4.783224598246146e-06, - "loss": 0.1979, - "step": 14462 - }, - { - "epoch": 1.3624737994865876, - "grad_norm": 0.5864380598068237, - "learning_rate": 4.781936396740252e-06, - "loss": 0.1703, - "step": 14463 - }, - { - "epoch": 1.3625680035797556, - "grad_norm": 0.6457656025886536, - "learning_rate": 4.780648314214279e-06, - "loss": 0.2028, - "step": 14464 - }, - { - "epoch": 1.3626622076729233, - "grad_norm": 0.5920119285583496, - "learning_rate": 4.779360350697599e-06, - "loss": 0.1912, - "step": 14465 - }, - { - "epoch": 1.3627564117660913, - "grad_norm": 0.5777595043182373, - "learning_rate": 4.778072506219575e-06, - "loss": 0.183, - "step": 14466 - }, - { - "epoch": 1.362850615859259, - "grad_norm": 0.6431694626808167, - "learning_rate": 4.776784780809571e-06, - "loss": 0.2048, - "step": 14467 - }, - { - "epoch": 1.362944819952427, - "grad_norm": 0.6484269499778748, - "learning_rate": 4.775497174496958e-06, - "loss": 0.2075, - "step": 14468 - }, - { - "epoch": 1.3630390240455947, - "grad_norm": 0.6658286452293396, - "learning_rate": 4.774209687311085e-06, - "loss": 0.203, - "step": 14469 - }, - { - "epoch": 1.3631332281387627, - "grad_norm": 0.6317464113235474, - "learning_rate": 4.772922319281312e-06, - "loss": 0.2124, - "step": 14470 - }, - { - "epoch": 1.3632274322319304, - "grad_norm": 0.5848267674446106, - "learning_rate": 4.771635070436998e-06, - "loss": 0.1766, - "step": 14471 - }, - { - "epoch": 1.3633216363250984, - "grad_norm": 0.6971248388290405, - "learning_rate": 4.770347940807488e-06, - "loss": 0.1944, - "step": 14472 - }, - { - "epoch": 1.3634158404182661, - "grad_norm": 0.6944621801376343, - "learning_rate": 4.769060930422132e-06, - "loss": 0.1833, - "step": 14473 - }, - { - "epoch": 1.363510044511434, - "grad_norm": 0.6648178100585938, - "learning_rate": 4.76777403931028e-06, - "loss": 0.1825, - "step": 14474 - }, - { - "epoch": 1.3636042486046018, - "grad_norm": 0.632527232170105, - "learning_rate": 4.76648726750127e-06, - "loss": 0.195, - "step": 14475 - }, - { - "epoch": 1.3636984526977698, - "grad_norm": 0.6708411574363708, - "learning_rate": 4.765200615024439e-06, - "loss": 0.1986, - "step": 14476 - }, - { - "epoch": 1.3637926567909375, - "grad_norm": 0.8647854924201965, - "learning_rate": 4.7639140819091365e-06, - "loss": 0.2349, - "step": 14477 - }, - { - "epoch": 1.3638868608841055, - "grad_norm": 0.6907919645309448, - "learning_rate": 4.762627668184692e-06, - "loss": 0.2094, - "step": 14478 - }, - { - "epoch": 1.3639810649772732, - "grad_norm": 0.6024240851402283, - "learning_rate": 4.761341373880427e-06, - "loss": 0.1988, - "step": 14479 - }, - { - "epoch": 1.3640752690704412, - "grad_norm": 0.6763929724693298, - "learning_rate": 4.760055199025688e-06, - "loss": 0.2245, - "step": 14480 - }, - { - "epoch": 1.364169473163609, - "grad_norm": 0.6466813683509827, - "learning_rate": 4.758769143649795e-06, - "loss": 0.1956, - "step": 14481 - }, - { - "epoch": 1.3642636772567769, - "grad_norm": 0.6607981324195862, - "learning_rate": 4.757483207782068e-06, - "loss": 0.1926, - "step": 14482 - }, - { - "epoch": 1.3643578813499446, - "grad_norm": 0.5871493816375732, - "learning_rate": 4.75619739145183e-06, - "loss": 0.1769, - "step": 14483 - }, - { - "epoch": 1.3644520854431126, - "grad_norm": 0.7563601732254028, - "learning_rate": 4.754911694688405e-06, - "loss": 0.2157, - "step": 14484 - }, - { - "epoch": 1.3645462895362803, - "grad_norm": 0.6513185501098633, - "learning_rate": 4.753626117521103e-06, - "loss": 0.2007, - "step": 14485 - }, - { - "epoch": 1.3646404936294483, - "grad_norm": 0.5829171538352966, - "learning_rate": 4.752340659979239e-06, - "loss": 0.1691, - "step": 14486 - }, - { - "epoch": 1.364734697722616, - "grad_norm": 0.5896437764167786, - "learning_rate": 4.751055322092126e-06, - "loss": 0.1949, - "step": 14487 - }, - { - "epoch": 1.364828901815784, - "grad_norm": 0.6572392582893372, - "learning_rate": 4.7497701038890664e-06, - "loss": 0.1934, - "step": 14488 - }, - { - "epoch": 1.3649231059089517, - "grad_norm": 0.6673678755760193, - "learning_rate": 4.748485005399367e-06, - "loss": 0.2097, - "step": 14489 - }, - { - "epoch": 1.3650173100021195, - "grad_norm": 0.6368674635887146, - "learning_rate": 4.7472000266523364e-06, - "loss": 0.2025, - "step": 14490 - }, - { - "epoch": 1.3651115140952874, - "grad_norm": 0.7557327747344971, - "learning_rate": 4.745915167677264e-06, - "loss": 0.2029, - "step": 14491 - }, - { - "epoch": 1.3652057181884554, - "grad_norm": 0.6197017431259155, - "learning_rate": 4.744630428503455e-06, - "loss": 0.2242, - "step": 14492 - }, - { - "epoch": 1.3652999222816231, - "grad_norm": 0.6237503886222839, - "learning_rate": 4.743345809160197e-06, - "loss": 0.2016, - "step": 14493 - }, - { - "epoch": 1.3653941263747909, - "grad_norm": 0.6416782736778259, - "learning_rate": 4.742061309676783e-06, - "loss": 0.2004, - "step": 14494 - }, - { - "epoch": 1.3654883304679588, - "grad_norm": 0.6770577430725098, - "learning_rate": 4.740776930082508e-06, - "loss": 0.2125, - "step": 14495 - }, - { - "epoch": 1.3655825345611268, - "grad_norm": 0.6506226658821106, - "learning_rate": 4.739492670406648e-06, - "loss": 0.19, - "step": 14496 - }, - { - "epoch": 1.3656767386542945, - "grad_norm": 0.5910604000091553, - "learning_rate": 4.7382085306784895e-06, - "loss": 0.1606, - "step": 14497 - }, - { - "epoch": 1.3657709427474622, - "grad_norm": 0.5863729119300842, - "learning_rate": 4.736924510927319e-06, - "loss": 0.1861, - "step": 14498 - }, - { - "epoch": 1.3658651468406302, - "grad_norm": 0.6519709229469299, - "learning_rate": 4.735640611182405e-06, - "loss": 0.1939, - "step": 14499 - }, - { - "epoch": 1.3659593509337982, - "grad_norm": 0.7937561869621277, - "learning_rate": 4.734356831473027e-06, - "loss": 0.229, - "step": 14500 - }, - { - "epoch": 1.366053555026966, - "grad_norm": 0.7065807580947876, - "learning_rate": 4.733073171828461e-06, - "loss": 0.1873, - "step": 14501 - }, - { - "epoch": 1.3661477591201336, - "grad_norm": 0.6730491518974304, - "learning_rate": 4.7317896322779715e-06, - "loss": 0.2056, - "step": 14502 - }, - { - "epoch": 1.3662419632133016, - "grad_norm": 0.6207157373428345, - "learning_rate": 4.730506212850822e-06, - "loss": 0.1908, - "step": 14503 - }, - { - "epoch": 1.3663361673064696, - "grad_norm": 0.6879037022590637, - "learning_rate": 4.729222913576279e-06, - "loss": 0.1863, - "step": 14504 - }, - { - "epoch": 1.3664303713996373, - "grad_norm": 0.5929299592971802, - "learning_rate": 4.72793973448361e-06, - "loss": 0.1964, - "step": 14505 - }, - { - "epoch": 1.366524575492805, - "grad_norm": 0.6904355883598328, - "learning_rate": 4.726656675602065e-06, - "loss": 0.1881, - "step": 14506 - }, - { - "epoch": 1.366618779585973, - "grad_norm": 0.6281517744064331, - "learning_rate": 4.725373736960903e-06, - "loss": 0.1956, - "step": 14507 - }, - { - "epoch": 1.366712983679141, - "grad_norm": 0.6572969555854797, - "learning_rate": 4.7240909185893804e-06, - "loss": 0.2053, - "step": 14508 - }, - { - "epoch": 1.3668071877723087, - "grad_norm": 0.6697401404380798, - "learning_rate": 4.7228082205167414e-06, - "loss": 0.2059, - "step": 14509 - }, - { - "epoch": 1.3669013918654764, - "grad_norm": 0.5750896334648132, - "learning_rate": 4.721525642772236e-06, - "loss": 0.1748, - "step": 14510 - }, - { - "epoch": 1.3669955959586444, - "grad_norm": 0.6491417288780212, - "learning_rate": 4.7202431853851116e-06, - "loss": 0.1849, - "step": 14511 - }, - { - "epoch": 1.3670898000518124, - "grad_norm": 0.6947598457336426, - "learning_rate": 4.718960848384605e-06, - "loss": 0.1903, - "step": 14512 - }, - { - "epoch": 1.36718400414498, - "grad_norm": 0.7724454402923584, - "learning_rate": 4.717678631799959e-06, - "loss": 0.2262, - "step": 14513 - }, - { - "epoch": 1.3672782082381478, - "grad_norm": 0.6440337300300598, - "learning_rate": 4.716396535660412e-06, - "loss": 0.1996, - "step": 14514 - }, - { - "epoch": 1.3673724123313158, - "grad_norm": 0.6808662414550781, - "learning_rate": 4.715114559995197e-06, - "loss": 0.1859, - "step": 14515 - }, - { - "epoch": 1.3674666164244837, - "grad_norm": 1.4499611854553223, - "learning_rate": 4.713832704833534e-06, - "loss": 0.2303, - "step": 14516 - }, - { - "epoch": 1.3675608205176515, - "grad_norm": 0.7500669360160828, - "learning_rate": 4.712550970204669e-06, - "loss": 0.2175, - "step": 14517 - }, - { - "epoch": 1.3676550246108192, - "grad_norm": 0.6843932271003723, - "learning_rate": 4.711269356137819e-06, - "loss": 0.1801, - "step": 14518 - }, - { - "epoch": 1.3677492287039872, - "grad_norm": 0.7365272045135498, - "learning_rate": 4.709987862662199e-06, - "loss": 0.2196, - "step": 14519 - }, - { - "epoch": 1.3678434327971551, - "grad_norm": 0.690955638885498, - "learning_rate": 4.708706489807046e-06, - "loss": 0.2039, - "step": 14520 - }, - { - "epoch": 1.3679376368903229, - "grad_norm": 1.121690273284912, - "learning_rate": 4.707425237601566e-06, - "loss": 0.2303, - "step": 14521 - }, - { - "epoch": 1.3680318409834906, - "grad_norm": 0.6832505464553833, - "learning_rate": 4.706144106074972e-06, - "loss": 0.1973, - "step": 14522 - }, - { - "epoch": 1.3681260450766586, - "grad_norm": 0.6005591154098511, - "learning_rate": 4.704863095256481e-06, - "loss": 0.1916, - "step": 14523 - }, - { - "epoch": 1.3682202491698265, - "grad_norm": 0.7369061708450317, - "learning_rate": 4.7035822051753035e-06, - "loss": 0.1958, - "step": 14524 - }, - { - "epoch": 1.3683144532629943, - "grad_norm": 0.556412398815155, - "learning_rate": 4.702301435860639e-06, - "loss": 0.1805, - "step": 14525 - }, - { - "epoch": 1.368408657356162, - "grad_norm": 0.7060864567756653, - "learning_rate": 4.7010207873416945e-06, - "loss": 0.2371, - "step": 14526 - }, - { - "epoch": 1.36850286144933, - "grad_norm": 0.6817953586578369, - "learning_rate": 4.699740259647676e-06, - "loss": 0.2131, - "step": 14527 - }, - { - "epoch": 1.368597065542498, - "grad_norm": 0.7367711663246155, - "learning_rate": 4.698459852807772e-06, - "loss": 0.2062, - "step": 14528 - }, - { - "epoch": 1.3686912696356657, - "grad_norm": 0.5938516855239868, - "learning_rate": 4.697179566851182e-06, - "loss": 0.2106, - "step": 14529 - }, - { - "epoch": 1.3687854737288334, - "grad_norm": 0.7921022176742554, - "learning_rate": 4.695899401807104e-06, - "loss": 0.2256, - "step": 14530 - }, - { - "epoch": 1.3688796778220014, - "grad_norm": 0.6134877800941467, - "learning_rate": 4.694619357704718e-06, - "loss": 0.1965, - "step": 14531 - }, - { - "epoch": 1.3689738819151693, - "grad_norm": 0.7247481942176819, - "learning_rate": 4.693339434573219e-06, - "loss": 0.1929, - "step": 14532 - }, - { - "epoch": 1.369068086008337, - "grad_norm": 0.6652804017066956, - "learning_rate": 4.692059632441783e-06, - "loss": 0.1963, - "step": 14533 - }, - { - "epoch": 1.3691622901015048, - "grad_norm": 0.6574836373329163, - "learning_rate": 4.690779951339598e-06, - "loss": 0.1866, - "step": 14534 - }, - { - "epoch": 1.3692564941946728, - "grad_norm": 0.645953893661499, - "learning_rate": 4.689500391295844e-06, - "loss": 0.2129, - "step": 14535 - }, - { - "epoch": 1.3693506982878407, - "grad_norm": 0.609062910079956, - "learning_rate": 4.688220952339691e-06, - "loss": 0.1809, - "step": 14536 - }, - { - "epoch": 1.3694449023810085, - "grad_norm": 0.6129270792007446, - "learning_rate": 4.6869416345003136e-06, - "loss": 0.199, - "step": 14537 - }, - { - "epoch": 1.3695391064741762, - "grad_norm": 0.6726804971694946, - "learning_rate": 4.6856624378068886e-06, - "loss": 0.2218, - "step": 14538 - }, - { - "epoch": 1.3696333105673442, - "grad_norm": 0.6016710996627808, - "learning_rate": 4.684383362288575e-06, - "loss": 0.1836, - "step": 14539 - }, - { - "epoch": 1.3697275146605121, - "grad_norm": 0.6364812850952148, - "learning_rate": 4.683104407974545e-06, - "loss": 0.2038, - "step": 14540 - }, - { - "epoch": 1.3698217187536799, - "grad_norm": 0.8082037568092346, - "learning_rate": 4.681825574893953e-06, - "loss": 0.2099, - "step": 14541 - }, - { - "epoch": 1.3699159228468476, - "grad_norm": 0.5484848022460938, - "learning_rate": 4.680546863075968e-06, - "loss": 0.1739, - "step": 14542 - }, - { - "epoch": 1.3700101269400156, - "grad_norm": 0.63649982213974, - "learning_rate": 4.679268272549737e-06, - "loss": 0.1933, - "step": 14543 - }, - { - "epoch": 1.3701043310331835, - "grad_norm": 0.6741502285003662, - "learning_rate": 4.677989803344416e-06, - "loss": 0.2078, - "step": 14544 - }, - { - "epoch": 1.3701985351263513, - "grad_norm": 0.6861146688461304, - "learning_rate": 4.6767114554891634e-06, - "loss": 0.1827, - "step": 14545 - }, - { - "epoch": 1.370292739219519, - "grad_norm": 1.6177656650543213, - "learning_rate": 4.675433229013119e-06, - "loss": 0.2072, - "step": 14546 - }, - { - "epoch": 1.370386943312687, - "grad_norm": 0.7017530202865601, - "learning_rate": 4.674155123945431e-06, - "loss": 0.215, - "step": 14547 - }, - { - "epoch": 1.370481147405855, - "grad_norm": 0.5845447778701782, - "learning_rate": 4.6728771403152465e-06, - "loss": 0.1644, - "step": 14548 - }, - { - "epoch": 1.3705753514990227, - "grad_norm": 0.6896893382072449, - "learning_rate": 4.671599278151697e-06, - "loss": 0.2359, - "step": 14549 - }, - { - "epoch": 1.3706695555921904, - "grad_norm": 0.6154652833938599, - "learning_rate": 4.6703215374839265e-06, - "loss": 0.2019, - "step": 14550 - }, - { - "epoch": 1.3707637596853584, - "grad_norm": 0.6628214120864868, - "learning_rate": 4.66904391834107e-06, - "loss": 0.1817, - "step": 14551 - }, - { - "epoch": 1.3708579637785263, - "grad_norm": 0.6417022347450256, - "learning_rate": 4.6677664207522535e-06, - "loss": 0.2035, - "step": 14552 - }, - { - "epoch": 1.370952167871694, - "grad_norm": 0.6868710517883301, - "learning_rate": 4.6664890447466085e-06, - "loss": 0.204, - "step": 14553 - }, - { - "epoch": 1.3710463719648618, - "grad_norm": 0.6142571568489075, - "learning_rate": 4.665211790353265e-06, - "loss": 0.1924, - "step": 14554 - }, - { - "epoch": 1.3711405760580297, - "grad_norm": 0.7381865978240967, - "learning_rate": 4.663934657601344e-06, - "loss": 0.197, - "step": 14555 - }, - { - "epoch": 1.3712347801511977, - "grad_norm": 0.6057921051979065, - "learning_rate": 4.662657646519957e-06, - "loss": 0.2076, - "step": 14556 - }, - { - "epoch": 1.3713289842443654, - "grad_norm": 0.6941007971763611, - "learning_rate": 4.661380757138238e-06, - "loss": 0.2078, - "step": 14557 - }, - { - "epoch": 1.3714231883375332, - "grad_norm": 0.6600576639175415, - "learning_rate": 4.660103989485294e-06, - "loss": 0.204, - "step": 14558 - }, - { - "epoch": 1.3715173924307011, - "grad_norm": 0.5912672877311707, - "learning_rate": 4.6588273435902295e-06, - "loss": 0.178, - "step": 14559 - }, - { - "epoch": 1.3716115965238689, - "grad_norm": 0.6179364919662476, - "learning_rate": 4.65755081948217e-06, - "loss": 0.1926, - "step": 14560 - }, - { - "epoch": 1.3717058006170368, - "grad_norm": 0.7076285481452942, - "learning_rate": 4.6562744171902144e-06, - "loss": 0.2357, - "step": 14561 - }, - { - "epoch": 1.3718000047102046, - "grad_norm": 0.6799371242523193, - "learning_rate": 4.6549981367434615e-06, - "loss": 0.1939, - "step": 14562 - }, - { - "epoch": 1.3718942088033725, - "grad_norm": 0.5989409685134888, - "learning_rate": 4.6537219781710176e-06, - "loss": 0.18, - "step": 14563 - }, - { - "epoch": 1.3719884128965403, - "grad_norm": 0.6571546792984009, - "learning_rate": 4.652445941501984e-06, - "loss": 0.1963, - "step": 14564 - }, - { - "epoch": 1.3720826169897082, - "grad_norm": 0.6644889116287231, - "learning_rate": 4.65117002676545e-06, - "loss": 0.2163, - "step": 14565 - }, - { - "epoch": 1.372176821082876, - "grad_norm": 0.6616732478141785, - "learning_rate": 4.649894233990512e-06, - "loss": 0.1853, - "step": 14566 - }, - { - "epoch": 1.372271025176044, - "grad_norm": 0.6491537094116211, - "learning_rate": 4.648618563206263e-06, - "loss": 0.1922, - "step": 14567 - }, - { - "epoch": 1.3723652292692117, - "grad_norm": 0.6798130869865417, - "learning_rate": 4.647343014441782e-06, - "loss": 0.2214, - "step": 14568 - }, - { - "epoch": 1.3724594333623796, - "grad_norm": 0.7253738045692444, - "learning_rate": 4.646067587726159e-06, - "loss": 0.227, - "step": 14569 - }, - { - "epoch": 1.3725536374555474, - "grad_norm": 0.7082908749580383, - "learning_rate": 4.644792283088479e-06, - "loss": 0.2212, - "step": 14570 - }, - { - "epoch": 1.3726478415487153, - "grad_norm": 0.7631024718284607, - "learning_rate": 4.643517100557814e-06, - "loss": 0.2174, - "step": 14571 - }, - { - "epoch": 1.372742045641883, - "grad_norm": 0.6379669904708862, - "learning_rate": 4.642242040163245e-06, - "loss": 0.1863, - "step": 14572 - }, - { - "epoch": 1.372836249735051, - "grad_norm": 0.593402624130249, - "learning_rate": 4.640967101933841e-06, - "loss": 0.1695, - "step": 14573 - }, - { - "epoch": 1.3729304538282188, - "grad_norm": 0.6995849013328552, - "learning_rate": 4.6396922858986745e-06, - "loss": 0.2205, - "step": 14574 - }, - { - "epoch": 1.3730246579213867, - "grad_norm": 0.7164720892906189, - "learning_rate": 4.638417592086818e-06, - "loss": 0.205, - "step": 14575 - }, - { - "epoch": 1.3731188620145545, - "grad_norm": 0.681452214717865, - "learning_rate": 4.637143020527329e-06, - "loss": 0.2013, - "step": 14576 - }, - { - "epoch": 1.3732130661077224, - "grad_norm": 0.6576045751571655, - "learning_rate": 4.635868571249271e-06, - "loss": 0.1955, - "step": 14577 - }, - { - "epoch": 1.3733072702008902, - "grad_norm": 0.6932193040847778, - "learning_rate": 4.634594244281711e-06, - "loss": 0.2218, - "step": 14578 - }, - { - "epoch": 1.3734014742940581, - "grad_norm": 0.6721796989440918, - "learning_rate": 4.633320039653695e-06, - "loss": 0.2075, - "step": 14579 - }, - { - "epoch": 1.3734956783872259, - "grad_norm": 0.675926923751831, - "learning_rate": 4.632045957394286e-06, - "loss": 0.1931, - "step": 14580 - }, - { - "epoch": 1.3735898824803938, - "grad_norm": 0.6279994249343872, - "learning_rate": 4.630771997532526e-06, - "loss": 0.1912, - "step": 14581 - }, - { - "epoch": 1.3736840865735616, - "grad_norm": 0.6979064345359802, - "learning_rate": 4.629498160097473e-06, - "loss": 0.2116, - "step": 14582 - }, - { - "epoch": 1.3737782906667295, - "grad_norm": 0.5609949231147766, - "learning_rate": 4.628224445118165e-06, - "loss": 0.1645, - "step": 14583 - }, - { - "epoch": 1.3738724947598973, - "grad_norm": 0.6987993121147156, - "learning_rate": 4.626950852623645e-06, - "loss": 0.2077, - "step": 14584 - }, - { - "epoch": 1.3739666988530652, - "grad_norm": 0.797755777835846, - "learning_rate": 4.62567738264296e-06, - "loss": 0.1966, - "step": 14585 - }, - { - "epoch": 1.374060902946233, - "grad_norm": 0.6203672885894775, - "learning_rate": 4.624404035205139e-06, - "loss": 0.1979, - "step": 14586 - }, - { - "epoch": 1.374155107039401, - "grad_norm": 0.6443046927452087, - "learning_rate": 4.623130810339219e-06, - "loss": 0.2079, - "step": 14587 - }, - { - "epoch": 1.3742493111325687, - "grad_norm": 0.8126762509346008, - "learning_rate": 4.621857708074236e-06, - "loss": 0.2083, - "step": 14588 - }, - { - "epoch": 1.3743435152257366, - "grad_norm": 0.7203518152236938, - "learning_rate": 4.62058472843921e-06, - "loss": 0.2132, - "step": 14589 - }, - { - "epoch": 1.3744377193189043, - "grad_norm": 0.673168420791626, - "learning_rate": 4.619311871463172e-06, - "loss": 0.1998, - "step": 14590 - }, - { - "epoch": 1.3745319234120723, - "grad_norm": 0.6929827332496643, - "learning_rate": 4.618039137175149e-06, - "loss": 0.2139, - "step": 14591 - }, - { - "epoch": 1.37462612750524, - "grad_norm": 0.6129652857780457, - "learning_rate": 4.616766525604157e-06, - "loss": 0.1924, - "step": 14592 - }, - { - "epoch": 1.374720331598408, - "grad_norm": 0.6677462458610535, - "learning_rate": 4.615494036779206e-06, - "loss": 0.206, - "step": 14593 - }, - { - "epoch": 1.3748145356915757, - "grad_norm": 0.6840497851371765, - "learning_rate": 4.614221670729325e-06, - "loss": 0.2012, - "step": 14594 - }, - { - "epoch": 1.3749087397847437, - "grad_norm": 0.6231470108032227, - "learning_rate": 4.61294942748352e-06, - "loss": 0.2111, - "step": 14595 - }, - { - "epoch": 1.3750029438779114, - "grad_norm": 0.6912837028503418, - "learning_rate": 4.611677307070792e-06, - "loss": 0.2024, - "step": 14596 - }, - { - "epoch": 1.3750971479710794, - "grad_norm": 0.6089299917221069, - "learning_rate": 4.610405309520162e-06, - "loss": 0.211, - "step": 14597 - }, - { - "epoch": 1.3751913520642471, - "grad_norm": 0.6354824900627136, - "learning_rate": 4.609133434860626e-06, - "loss": 0.2091, - "step": 14598 - }, - { - "epoch": 1.375285556157415, - "grad_norm": 0.6570692658424377, - "learning_rate": 4.607861683121176e-06, - "loss": 0.1945, - "step": 14599 - }, - { - "epoch": 1.3753797602505828, - "grad_norm": 0.643208920955658, - "learning_rate": 4.606590054330827e-06, - "loss": 0.1923, - "step": 14600 - }, - { - "epoch": 1.3754739643437508, - "grad_norm": 0.6634544730186462, - "learning_rate": 4.605318548518567e-06, - "loss": 0.1826, - "step": 14601 - }, - { - "epoch": 1.3755681684369185, - "grad_norm": 0.6547496914863586, - "learning_rate": 4.604047165713382e-06, - "loss": 0.2249, - "step": 14602 - }, - { - "epoch": 1.3756623725300865, - "grad_norm": 0.629440426826477, - "learning_rate": 4.602775905944268e-06, - "loss": 0.1893, - "step": 14603 - }, - { - "epoch": 1.3757565766232542, - "grad_norm": 0.6593378782272339, - "learning_rate": 4.601504769240212e-06, - "loss": 0.1874, - "step": 14604 - }, - { - "epoch": 1.3758507807164222, - "grad_norm": 0.6591216921806335, - "learning_rate": 4.600233755630194e-06, - "loss": 0.2197, - "step": 14605 - }, - { - "epoch": 1.37594498480959, - "grad_norm": 0.5971760153770447, - "learning_rate": 4.5989628651431975e-06, - "loss": 0.1856, - "step": 14606 - }, - { - "epoch": 1.376039188902758, - "grad_norm": 0.5898448824882507, - "learning_rate": 4.597692097808203e-06, - "loss": 0.1814, - "step": 14607 - }, - { - "epoch": 1.3761333929959256, - "grad_norm": 0.8175941705703735, - "learning_rate": 4.596421453654181e-06, - "loss": 0.2445, - "step": 14608 - }, - { - "epoch": 1.3762275970890936, - "grad_norm": 0.8518378734588623, - "learning_rate": 4.595150932710107e-06, - "loss": 0.2034, - "step": 14609 - }, - { - "epoch": 1.3763218011822613, - "grad_norm": 0.6331554651260376, - "learning_rate": 4.593880535004952e-06, - "loss": 0.1975, - "step": 14610 - }, - { - "epoch": 1.3764160052754293, - "grad_norm": 0.6141476631164551, - "learning_rate": 4.592610260567679e-06, - "loss": 0.1889, - "step": 14611 - }, - { - "epoch": 1.376510209368597, - "grad_norm": 0.6104586124420166, - "learning_rate": 4.591340109427259e-06, - "loss": 0.2009, - "step": 14612 - }, - { - "epoch": 1.376604413461765, - "grad_norm": 0.666061282157898, - "learning_rate": 4.590070081612645e-06, - "loss": 0.2276, - "step": 14613 - }, - { - "epoch": 1.3766986175549327, - "grad_norm": 0.7101260423660278, - "learning_rate": 4.5888001771528e-06, - "loss": 0.1969, - "step": 14614 - }, - { - "epoch": 1.3767928216481007, - "grad_norm": 0.6600731015205383, - "learning_rate": 4.587530396076683e-06, - "loss": 0.2017, - "step": 14615 - }, - { - "epoch": 1.3768870257412684, - "grad_norm": 0.7134162783622742, - "learning_rate": 4.5862607384132395e-06, - "loss": 0.1936, - "step": 14616 - }, - { - "epoch": 1.3769812298344364, - "grad_norm": 0.6632982492446899, - "learning_rate": 4.584991204191424e-06, - "loss": 0.2248, - "step": 14617 - }, - { - "epoch": 1.3770754339276041, - "grad_norm": 0.6679424047470093, - "learning_rate": 4.583721793440188e-06, - "loss": 0.2035, - "step": 14618 - }, - { - "epoch": 1.377169638020772, - "grad_norm": 0.5961079001426697, - "learning_rate": 4.582452506188467e-06, - "loss": 0.2105, - "step": 14619 - }, - { - "epoch": 1.3772638421139398, - "grad_norm": 0.6959073543548584, - "learning_rate": 4.581183342465211e-06, - "loss": 0.2066, - "step": 14620 - }, - { - "epoch": 1.3773580462071078, - "grad_norm": 0.6452242732048035, - "learning_rate": 4.579914302299352e-06, - "loss": 0.2099, - "step": 14621 - }, - { - "epoch": 1.3774522503002755, - "grad_norm": 0.6384733319282532, - "learning_rate": 4.578645385719832e-06, - "loss": 0.1713, - "step": 14622 - }, - { - "epoch": 1.3775464543934435, - "grad_norm": 0.7627642154693604, - "learning_rate": 4.577376592755578e-06, - "loss": 0.1767, - "step": 14623 - }, - { - "epoch": 1.3776406584866112, - "grad_norm": 0.6539168357849121, - "learning_rate": 4.576107923435524e-06, - "loss": 0.2151, - "step": 14624 - }, - { - "epoch": 1.3777348625797792, - "grad_norm": 0.7202804684638977, - "learning_rate": 4.574839377788601e-06, - "loss": 0.2211, - "step": 14625 - }, - { - "epoch": 1.377829066672947, - "grad_norm": 0.6229695677757263, - "learning_rate": 4.573570955843728e-06, - "loss": 0.1937, - "step": 14626 - }, - { - "epoch": 1.3779232707661149, - "grad_norm": 0.6452165246009827, - "learning_rate": 4.572302657629828e-06, - "loss": 0.1662, - "step": 14627 - }, - { - "epoch": 1.3780174748592826, - "grad_norm": 0.7254365682601929, - "learning_rate": 4.571034483175826e-06, - "loss": 0.2067, - "step": 14628 - }, - { - "epoch": 1.3781116789524503, - "grad_norm": 0.6990476250648499, - "learning_rate": 4.56976643251063e-06, - "loss": 0.1784, - "step": 14629 - }, - { - "epoch": 1.3782058830456183, - "grad_norm": 0.6916759610176086, - "learning_rate": 4.568498505663157e-06, - "loss": 0.1989, - "step": 14630 - }, - { - "epoch": 1.3783000871387863, - "grad_norm": 0.6801608204841614, - "learning_rate": 4.567230702662322e-06, - "loss": 0.1871, - "step": 14631 - }, - { - "epoch": 1.378394291231954, - "grad_norm": 0.7456927299499512, - "learning_rate": 4.565963023537029e-06, - "loss": 0.2056, - "step": 14632 - }, - { - "epoch": 1.3784884953251217, - "grad_norm": 0.7449910044670105, - "learning_rate": 4.564695468316175e-06, - "loss": 0.1914, - "step": 14633 - }, - { - "epoch": 1.3785826994182897, - "grad_norm": 0.6321032643318176, - "learning_rate": 4.563428037028677e-06, - "loss": 0.2096, - "step": 14634 - }, - { - "epoch": 1.3786769035114577, - "grad_norm": 0.6685911417007446, - "learning_rate": 4.56216072970343e-06, - "loss": 0.1888, - "step": 14635 - }, - { - "epoch": 1.3787711076046254, - "grad_norm": 0.670963704586029, - "learning_rate": 4.560893546369318e-06, - "loss": 0.2113, - "step": 14636 - }, - { - "epoch": 1.3788653116977931, - "grad_norm": 0.6442837119102478, - "learning_rate": 4.559626487055254e-06, - "loss": 0.1871, - "step": 14637 - }, - { - "epoch": 1.378959515790961, - "grad_norm": 0.6563900113105774, - "learning_rate": 4.558359551790119e-06, - "loss": 0.1958, - "step": 14638 - }, - { - "epoch": 1.379053719884129, - "grad_norm": 0.7210825681686401, - "learning_rate": 4.5570927406027955e-06, - "loss": 0.2129, - "step": 14639 - }, - { - "epoch": 1.3791479239772968, - "grad_norm": 0.6628056764602661, - "learning_rate": 4.555826053522182e-06, - "loss": 0.2185, - "step": 14640 - }, - { - "epoch": 1.3792421280704645, - "grad_norm": 0.6475352644920349, - "learning_rate": 4.554559490577154e-06, - "loss": 0.1932, - "step": 14641 - }, - { - "epoch": 1.3793363321636325, - "grad_norm": 0.651343584060669, - "learning_rate": 4.553293051796587e-06, - "loss": 0.1904, - "step": 14642 - }, - { - "epoch": 1.3794305362568005, - "grad_norm": 0.6923327445983887, - "learning_rate": 4.552026737209362e-06, - "loss": 0.1776, - "step": 14643 - }, - { - "epoch": 1.3795247403499682, - "grad_norm": 0.6525247693061829, - "learning_rate": 4.5507605468443575e-06, - "loss": 0.1962, - "step": 14644 - }, - { - "epoch": 1.379618944443136, - "grad_norm": 0.6223458051681519, - "learning_rate": 4.549494480730435e-06, - "loss": 0.1838, - "step": 14645 - }, - { - "epoch": 1.379713148536304, - "grad_norm": 0.6536427140235901, - "learning_rate": 4.548228538896468e-06, - "loss": 0.1914, - "step": 14646 - }, - { - "epoch": 1.3798073526294719, - "grad_norm": 0.6627947688102722, - "learning_rate": 4.546962721371326e-06, - "loss": 0.2063, - "step": 14647 - }, - { - "epoch": 1.3799015567226396, - "grad_norm": 0.6367174983024597, - "learning_rate": 4.545697028183863e-06, - "loss": 0.1973, - "step": 14648 - }, - { - "epoch": 1.3799957608158073, - "grad_norm": 0.6368837952613831, - "learning_rate": 4.544431459362943e-06, - "loss": 0.1913, - "step": 14649 - }, - { - "epoch": 1.3800899649089753, - "grad_norm": 0.6525770425796509, - "learning_rate": 4.543166014937427e-06, - "loss": 0.202, - "step": 14650 - }, - { - "epoch": 1.3801841690021432, - "grad_norm": 0.668809711933136, - "learning_rate": 4.541900694936161e-06, - "loss": 0.1944, - "step": 14651 - }, - { - "epoch": 1.380278373095311, - "grad_norm": 0.6212360262870789, - "learning_rate": 4.540635499388004e-06, - "loss": 0.1893, - "step": 14652 - }, - { - "epoch": 1.3803725771884787, - "grad_norm": 0.6254076361656189, - "learning_rate": 4.539370428321798e-06, - "loss": 0.1778, - "step": 14653 - }, - { - "epoch": 1.3804667812816467, - "grad_norm": 0.6460777521133423, - "learning_rate": 4.538105481766389e-06, - "loss": 0.2101, - "step": 14654 - }, - { - "epoch": 1.3805609853748146, - "grad_norm": 0.6969109177589417, - "learning_rate": 4.536840659750628e-06, - "loss": 0.2052, - "step": 14655 - }, - { - "epoch": 1.3806551894679824, - "grad_norm": 0.6455351710319519, - "learning_rate": 4.535575962303344e-06, - "loss": 0.2069, - "step": 14656 - }, - { - "epoch": 1.3807493935611501, - "grad_norm": 0.626264750957489, - "learning_rate": 4.534311389453383e-06, - "loss": 0.1732, - "step": 14657 - }, - { - "epoch": 1.380843597654318, - "grad_norm": 0.6563705801963806, - "learning_rate": 4.533046941229571e-06, - "loss": 0.1942, - "step": 14658 - }, - { - "epoch": 1.380937801747486, - "grad_norm": 0.6639990210533142, - "learning_rate": 4.531782617660744e-06, - "loss": 0.2129, - "step": 14659 - }, - { - "epoch": 1.3810320058406538, - "grad_norm": 0.731053352355957, - "learning_rate": 4.530518418775734e-06, - "loss": 0.2214, - "step": 14660 - }, - { - "epoch": 1.3811262099338215, - "grad_norm": 0.7711619138717651, - "learning_rate": 4.529254344603358e-06, - "loss": 0.1677, - "step": 14661 - }, - { - "epoch": 1.3812204140269895, - "grad_norm": 0.7596476078033447, - "learning_rate": 4.527990395172448e-06, - "loss": 0.2339, - "step": 14662 - }, - { - "epoch": 1.3813146181201574, - "grad_norm": 0.6887319684028625, - "learning_rate": 4.526726570511816e-06, - "loss": 0.1914, - "step": 14663 - }, - { - "epoch": 1.3814088222133252, - "grad_norm": 0.7025315165519714, - "learning_rate": 4.525462870650282e-06, - "loss": 0.1815, - "step": 14664 - }, - { - "epoch": 1.381503026306493, - "grad_norm": 0.652842104434967, - "learning_rate": 4.524199295616666e-06, - "loss": 0.216, - "step": 14665 - }, - { - "epoch": 1.3815972303996609, - "grad_norm": 0.7267138361930847, - "learning_rate": 4.522935845439771e-06, - "loss": 0.1935, - "step": 14666 - }, - { - "epoch": 1.3816914344928288, - "grad_norm": 0.6524989604949951, - "learning_rate": 4.521672520148408e-06, - "loss": 0.2066, - "step": 14667 - }, - { - "epoch": 1.3817856385859966, - "grad_norm": 0.6495004296302795, - "learning_rate": 4.520409319771388e-06, - "loss": 0.1989, - "step": 14668 - }, - { - "epoch": 1.3818798426791643, - "grad_norm": 0.7611294388771057, - "learning_rate": 4.519146244337506e-06, - "loss": 0.2103, - "step": 14669 - }, - { - "epoch": 1.3819740467723323, - "grad_norm": 0.6148377656936646, - "learning_rate": 4.517883293875567e-06, - "loss": 0.2041, - "step": 14670 - }, - { - "epoch": 1.3820682508655002, - "grad_norm": 0.621082067489624, - "learning_rate": 4.51662046841437e-06, - "loss": 0.1843, - "step": 14671 - }, - { - "epoch": 1.382162454958668, - "grad_norm": 0.7805433869361877, - "learning_rate": 4.515357767982706e-06, - "loss": 0.1999, - "step": 14672 - }, - { - "epoch": 1.3822566590518357, - "grad_norm": 0.7082175612449646, - "learning_rate": 4.5140951926093615e-06, - "loss": 0.2322, - "step": 14673 - }, - { - "epoch": 1.3823508631450037, - "grad_norm": 0.704624593257904, - "learning_rate": 4.512832742323137e-06, - "loss": 0.2121, - "step": 14674 - }, - { - "epoch": 1.3824450672381716, - "grad_norm": 0.6305441856384277, - "learning_rate": 4.5115704171528105e-06, - "loss": 0.1668, - "step": 14675 - }, - { - "epoch": 1.3825392713313394, - "grad_norm": 0.6836445927619934, - "learning_rate": 4.510308217127162e-06, - "loss": 0.201, - "step": 14676 - }, - { - "epoch": 1.382633475424507, - "grad_norm": 0.7017547488212585, - "learning_rate": 4.509046142274981e-06, - "loss": 0.2054, - "step": 14677 - }, - { - "epoch": 1.382727679517675, - "grad_norm": 0.6677689552307129, - "learning_rate": 4.507784192625041e-06, - "loss": 0.219, - "step": 14678 - }, - { - "epoch": 1.382821883610843, - "grad_norm": 0.6143436431884766, - "learning_rate": 4.5065223682061075e-06, - "loss": 0.2066, - "step": 14679 - }, - { - "epoch": 1.3829160877040108, - "grad_norm": 2.129192590713501, - "learning_rate": 4.505260669046968e-06, - "loss": 0.1941, - "step": 14680 - }, - { - "epoch": 1.3830102917971785, - "grad_norm": 0.6475026607513428, - "learning_rate": 4.503999095176382e-06, - "loss": 0.2101, - "step": 14681 - }, - { - "epoch": 1.3831044958903465, - "grad_norm": 0.6193498969078064, - "learning_rate": 4.502737646623114e-06, - "loss": 0.1986, - "step": 14682 - }, - { - "epoch": 1.3831986999835144, - "grad_norm": 0.6228429079055786, - "learning_rate": 4.5014763234159285e-06, - "loss": 0.2105, - "step": 14683 - }, - { - "epoch": 1.3832929040766822, - "grad_norm": 0.6349548101425171, - "learning_rate": 4.50021512558359e-06, - "loss": 0.1967, - "step": 14684 - }, - { - "epoch": 1.3833871081698499, - "grad_norm": 0.7757889628410339, - "learning_rate": 4.498954053154849e-06, - "loss": 0.2268, - "step": 14685 - }, - { - "epoch": 1.3834813122630178, - "grad_norm": 0.6858643889427185, - "learning_rate": 4.497693106158462e-06, - "loss": 0.2143, - "step": 14686 - }, - { - "epoch": 1.3835755163561858, - "grad_norm": 0.6161576509475708, - "learning_rate": 4.496432284623186e-06, - "loss": 0.1807, - "step": 14687 - }, - { - "epoch": 1.3836697204493535, - "grad_norm": 0.6419311761856079, - "learning_rate": 4.495171588577763e-06, - "loss": 0.2197, - "step": 14688 - }, - { - "epoch": 1.3837639245425213, - "grad_norm": 0.6505982875823975, - "learning_rate": 4.493911018050941e-06, - "loss": 0.1718, - "step": 14689 - }, - { - "epoch": 1.3838581286356892, - "grad_norm": 0.7509188652038574, - "learning_rate": 4.492650573071465e-06, - "loss": 0.2127, - "step": 14690 - }, - { - "epoch": 1.3839523327288572, - "grad_norm": 0.6153393983840942, - "learning_rate": 4.49139025366807e-06, - "loss": 0.1963, - "step": 14691 - }, - { - "epoch": 1.384046536822025, - "grad_norm": 0.6083927750587463, - "learning_rate": 4.490130059869501e-06, - "loss": 0.188, - "step": 14692 - }, - { - "epoch": 1.3841407409151927, - "grad_norm": 0.6401993036270142, - "learning_rate": 4.488869991704483e-06, - "loss": 0.2107, - "step": 14693 - }, - { - "epoch": 1.3842349450083606, - "grad_norm": 0.6734372973442078, - "learning_rate": 4.4876100492017535e-06, - "loss": 0.2009, - "step": 14694 - }, - { - "epoch": 1.3843291491015286, - "grad_norm": 0.7103176712989807, - "learning_rate": 4.486350232390043e-06, - "loss": 0.1718, - "step": 14695 - }, - { - "epoch": 1.3844233531946963, - "grad_norm": 0.7211053967475891, - "learning_rate": 4.485090541298071e-06, - "loss": 0.2088, - "step": 14696 - }, - { - "epoch": 1.384517557287864, - "grad_norm": 0.6296860575675964, - "learning_rate": 4.483830975954566e-06, - "loss": 0.196, - "step": 14697 - }, - { - "epoch": 1.384611761381032, - "grad_norm": 0.6704601645469666, - "learning_rate": 4.482571536388244e-06, - "loss": 0.185, - "step": 14698 - }, - { - "epoch": 1.3847059654741998, - "grad_norm": 0.6774282455444336, - "learning_rate": 4.481312222627823e-06, - "loss": 0.1811, - "step": 14699 - }, - { - "epoch": 1.3848001695673677, - "grad_norm": 0.6754395365715027, - "learning_rate": 4.480053034702021e-06, - "loss": 0.1911, - "step": 14700 - }, - { - "epoch": 1.3848943736605355, - "grad_norm": 0.6918966770172119, - "learning_rate": 4.478793972639544e-06, - "loss": 0.1954, - "step": 14701 - }, - { - "epoch": 1.3849885777537034, - "grad_norm": 0.6784451603889465, - "learning_rate": 4.477535036469106e-06, - "loss": 0.234, - "step": 14702 - }, - { - "epoch": 1.3850827818468712, - "grad_norm": 0.6600942015647888, - "learning_rate": 4.476276226219406e-06, - "loss": 0.2479, - "step": 14703 - }, - { - "epoch": 1.3851769859400391, - "grad_norm": 0.5945283770561218, - "learning_rate": 4.475017541919151e-06, - "loss": 0.1902, - "step": 14704 - }, - { - "epoch": 1.3852711900332069, - "grad_norm": 0.705072820186615, - "learning_rate": 4.473758983597044e-06, - "loss": 0.1962, - "step": 14705 - }, - { - "epoch": 1.3853653941263748, - "grad_norm": 0.6650243997573853, - "learning_rate": 4.472500551281776e-06, - "loss": 0.1855, - "step": 14706 - }, - { - "epoch": 1.3854595982195426, - "grad_norm": 0.6843200922012329, - "learning_rate": 4.471242245002043e-06, - "loss": 0.2173, - "step": 14707 - }, - { - "epoch": 1.3855538023127105, - "grad_norm": 0.650365948677063, - "learning_rate": 4.4699840647865414e-06, - "loss": 0.2053, - "step": 14708 - }, - { - "epoch": 1.3856480064058783, - "grad_norm": 0.6803451776504517, - "learning_rate": 4.46872601066395e-06, - "loss": 0.2103, - "step": 14709 - }, - { - "epoch": 1.3857422104990462, - "grad_norm": 0.6168040037155151, - "learning_rate": 4.4674680826629626e-06, - "loss": 0.1888, - "step": 14710 - }, - { - "epoch": 1.385836414592214, - "grad_norm": 0.6308485865592957, - "learning_rate": 4.466210280812261e-06, - "loss": 0.179, - "step": 14711 - }, - { - "epoch": 1.385930618685382, - "grad_norm": 0.6388193368911743, - "learning_rate": 4.464952605140525e-06, - "loss": 0.205, - "step": 14712 - }, - { - "epoch": 1.3860248227785497, - "grad_norm": 0.7123376727104187, - "learning_rate": 4.46369505567642e-06, - "loss": 0.2082, - "step": 14713 - }, - { - "epoch": 1.3861190268717176, - "grad_norm": 0.6630170941352844, - "learning_rate": 4.462437632448639e-06, - "loss": 0.175, - "step": 14714 - }, - { - "epoch": 1.3862132309648854, - "grad_norm": 0.7099169492721558, - "learning_rate": 4.461180335485843e-06, - "loss": 0.194, - "step": 14715 - }, - { - "epoch": 1.3863074350580533, - "grad_norm": 0.7670761942863464, - "learning_rate": 4.459923164816694e-06, - "loss": 0.1955, - "step": 14716 - }, - { - "epoch": 1.386401639151221, - "grad_norm": 0.651181697845459, - "learning_rate": 4.458666120469872e-06, - "loss": 0.2138, - "step": 14717 - }, - { - "epoch": 1.386495843244389, - "grad_norm": 0.6150261163711548, - "learning_rate": 4.457409202474033e-06, - "loss": 0.1928, - "step": 14718 - }, - { - "epoch": 1.3865900473375568, - "grad_norm": 0.7484995722770691, - "learning_rate": 4.456152410857828e-06, - "loss": 0.1955, - "step": 14719 - }, - { - "epoch": 1.3866842514307247, - "grad_norm": 0.6253624558448792, - "learning_rate": 4.454895745649929e-06, - "loss": 0.1983, - "step": 14720 - }, - { - "epoch": 1.3867784555238925, - "grad_norm": 0.6854663491249084, - "learning_rate": 4.453639206878982e-06, - "loss": 0.2288, - "step": 14721 - }, - { - "epoch": 1.3868726596170604, - "grad_norm": 0.6492166519165039, - "learning_rate": 4.452382794573636e-06, - "loss": 0.193, - "step": 14722 - }, - { - "epoch": 1.3869668637102281, - "grad_norm": 0.6651354432106018, - "learning_rate": 4.451126508762542e-06, - "loss": 0.1984, - "step": 14723 - }, - { - "epoch": 1.387061067803396, - "grad_norm": 0.7173478603363037, - "learning_rate": 4.449870349474349e-06, - "loss": 0.2114, - "step": 14724 - }, - { - "epoch": 1.3871552718965638, - "grad_norm": 0.6437345147132874, - "learning_rate": 4.448614316737692e-06, - "loss": 0.2038, - "step": 14725 - }, - { - "epoch": 1.3872494759897318, - "grad_norm": 0.8587262630462646, - "learning_rate": 4.4473584105812125e-06, - "loss": 0.1895, - "step": 14726 - }, - { - "epoch": 1.3873436800828995, - "grad_norm": 0.6873801946640015, - "learning_rate": 4.446102631033553e-06, - "loss": 0.1808, - "step": 14727 - }, - { - "epoch": 1.3874378841760675, - "grad_norm": 0.6526833772659302, - "learning_rate": 4.444846978123339e-06, - "loss": 0.1973, - "step": 14728 - }, - { - "epoch": 1.3875320882692352, - "grad_norm": 0.745194137096405, - "learning_rate": 4.4435914518792055e-06, - "loss": 0.2133, - "step": 14729 - }, - { - "epoch": 1.3876262923624032, - "grad_norm": 1.006110429763794, - "learning_rate": 4.4423360523297835e-06, - "loss": 0.2134, - "step": 14730 - }, - { - "epoch": 1.387720496455571, - "grad_norm": 0.7415178418159485, - "learning_rate": 4.44108077950369e-06, - "loss": 0.2155, - "step": 14731 - }, - { - "epoch": 1.387814700548739, - "grad_norm": 0.6302402019500732, - "learning_rate": 4.439825633429558e-06, - "loss": 0.196, - "step": 14732 - }, - { - "epoch": 1.3879089046419066, - "grad_norm": 0.759882926940918, - "learning_rate": 4.438570614135994e-06, - "loss": 0.1934, - "step": 14733 - }, - { - "epoch": 1.3880031087350746, - "grad_norm": 0.7392933368682861, - "learning_rate": 4.437315721651623e-06, - "loss": 0.2178, - "step": 14734 - }, - { - "epoch": 1.3880973128282423, - "grad_norm": 0.6102693676948547, - "learning_rate": 4.4360609560050585e-06, - "loss": 0.1844, - "step": 14735 - }, - { - "epoch": 1.3881915169214103, - "grad_norm": 0.5957104563713074, - "learning_rate": 4.434806317224905e-06, - "loss": 0.1815, - "step": 14736 - }, - { - "epoch": 1.388285721014578, - "grad_norm": 0.6966644525527954, - "learning_rate": 4.433551805339779e-06, - "loss": 0.2062, - "step": 14737 - }, - { - "epoch": 1.388379925107746, - "grad_norm": 0.5965523719787598, - "learning_rate": 4.4322974203782776e-06, - "loss": 0.1767, - "step": 14738 - }, - { - "epoch": 1.3884741292009137, - "grad_norm": 0.6402685642242432, - "learning_rate": 4.431043162369005e-06, - "loss": 0.1839, - "step": 14739 - }, - { - "epoch": 1.3885683332940817, - "grad_norm": 0.7522590160369873, - "learning_rate": 4.429789031340565e-06, - "loss": 0.2459, - "step": 14740 - }, - { - "epoch": 1.3886625373872494, - "grad_norm": 0.6181792616844177, - "learning_rate": 4.428535027321544e-06, - "loss": 0.1985, - "step": 14741 - }, - { - "epoch": 1.3887567414804174, - "grad_norm": 0.6564207673072815, - "learning_rate": 4.427281150340547e-06, - "loss": 0.1993, - "step": 14742 - }, - { - "epoch": 1.3888509455735851, - "grad_norm": 0.6626052856445312, - "learning_rate": 4.426027400426152e-06, - "loss": 0.227, - "step": 14743 - }, - { - "epoch": 1.388945149666753, - "grad_norm": 0.6792217493057251, - "learning_rate": 4.424773777606955e-06, - "loss": 0.2034, - "step": 14744 - }, - { - "epoch": 1.3890393537599208, - "grad_norm": 0.6681301593780518, - "learning_rate": 4.42352028191154e-06, - "loss": 0.2135, - "step": 14745 - }, - { - "epoch": 1.3891335578530888, - "grad_norm": 0.6481303572654724, - "learning_rate": 4.422266913368484e-06, - "loss": 0.1823, - "step": 14746 - }, - { - "epoch": 1.3892277619462565, - "grad_norm": 0.6512411832809448, - "learning_rate": 4.4210136720063665e-06, - "loss": 0.1708, - "step": 14747 - }, - { - "epoch": 1.3893219660394245, - "grad_norm": 0.5567074418067932, - "learning_rate": 4.4197605578537715e-06, - "loss": 0.1779, - "step": 14748 - }, - { - "epoch": 1.3894161701325922, - "grad_norm": 0.6283254027366638, - "learning_rate": 4.418507570939261e-06, - "loss": 0.1952, - "step": 14749 - }, - { - "epoch": 1.3895103742257602, - "grad_norm": 0.6755726933479309, - "learning_rate": 4.41725471129141e-06, - "loss": 0.2124, - "step": 14750 - }, - { - "epoch": 1.389604578318928, - "grad_norm": 0.6136601567268372, - "learning_rate": 4.416001978938789e-06, - "loss": 0.1875, - "step": 14751 - }, - { - "epoch": 1.3896987824120959, - "grad_norm": 0.6659301519393921, - "learning_rate": 4.414749373909959e-06, - "loss": 0.206, - "step": 14752 - }, - { - "epoch": 1.3897929865052636, - "grad_norm": 0.6104385256767273, - "learning_rate": 4.413496896233474e-06, - "loss": 0.1812, - "step": 14753 - }, - { - "epoch": 1.3898871905984316, - "grad_norm": 0.7210302948951721, - "learning_rate": 4.412244545937906e-06, - "loss": 0.1923, - "step": 14754 - }, - { - "epoch": 1.3899813946915993, - "grad_norm": 0.6369888782501221, - "learning_rate": 4.4109923230518045e-06, - "loss": 0.2016, - "step": 14755 - }, - { - "epoch": 1.3900755987847673, - "grad_norm": 0.6671775579452515, - "learning_rate": 4.409740227603715e-06, - "loss": 0.206, - "step": 14756 - }, - { - "epoch": 1.390169802877935, - "grad_norm": 0.6954997181892395, - "learning_rate": 4.4084882596222e-06, - "loss": 0.2128, - "step": 14757 - }, - { - "epoch": 1.390264006971103, - "grad_norm": 0.6379964351654053, - "learning_rate": 4.4072364191358006e-06, - "loss": 0.2058, - "step": 14758 - }, - { - "epoch": 1.3903582110642707, - "grad_norm": 0.6217644810676575, - "learning_rate": 4.405984706173052e-06, - "loss": 0.171, - "step": 14759 - }, - { - "epoch": 1.3904524151574387, - "grad_norm": 1.2221981287002563, - "learning_rate": 4.404733120762512e-06, - "loss": 0.1833, - "step": 14760 - }, - { - "epoch": 1.3905466192506064, - "grad_norm": 0.6969944834709167, - "learning_rate": 4.4034816629327095e-06, - "loss": 0.2197, - "step": 14761 - }, - { - "epoch": 1.3906408233437744, - "grad_norm": 0.8289557099342346, - "learning_rate": 4.402230332712176e-06, - "loss": 0.209, - "step": 14762 - }, - { - "epoch": 1.390735027436942, - "grad_norm": 0.9347938299179077, - "learning_rate": 4.400979130129449e-06, - "loss": 0.2159, - "step": 14763 - }, - { - "epoch": 1.39082923153011, - "grad_norm": 0.7640525102615356, - "learning_rate": 4.39972805521306e-06, - "loss": 0.1962, - "step": 14764 - }, - { - "epoch": 1.3909234356232778, - "grad_norm": 0.6633443236351013, - "learning_rate": 4.398477107991529e-06, - "loss": 0.1916, - "step": 14765 - }, - { - "epoch": 1.3910176397164458, - "grad_norm": 0.6027562618255615, - "learning_rate": 4.397226288493384e-06, - "loss": 0.1719, - "step": 14766 - }, - { - "epoch": 1.3911118438096135, - "grad_norm": 0.6049935817718506, - "learning_rate": 4.3959755967471465e-06, - "loss": 0.1979, - "step": 14767 - }, - { - "epoch": 1.3912060479027812, - "grad_norm": 0.736307680606842, - "learning_rate": 4.394725032781328e-06, - "loss": 0.2163, - "step": 14768 - }, - { - "epoch": 1.3913002519959492, - "grad_norm": 0.6756777167320251, - "learning_rate": 4.393474596624449e-06, - "loss": 0.2139, - "step": 14769 - }, - { - "epoch": 1.3913944560891172, - "grad_norm": 0.6903586387634277, - "learning_rate": 4.3922242883050226e-06, - "loss": 0.2032, - "step": 14770 - }, - { - "epoch": 1.391488660182285, - "grad_norm": 0.6733536124229431, - "learning_rate": 4.390974107851552e-06, - "loss": 0.1946, - "step": 14771 - }, - { - "epoch": 1.3915828642754526, - "grad_norm": 0.6567028164863586, - "learning_rate": 4.389724055292549e-06, - "loss": 0.205, - "step": 14772 - }, - { - "epoch": 1.3916770683686206, - "grad_norm": 0.6626735925674438, - "learning_rate": 4.388474130656512e-06, - "loss": 0.1845, - "step": 14773 - }, - { - "epoch": 1.3917712724617886, - "grad_norm": 0.668620765209198, - "learning_rate": 4.387224333971946e-06, - "loss": 0.2279, - "step": 14774 - }, - { - "epoch": 1.3918654765549563, - "grad_norm": 0.6802232265472412, - "learning_rate": 4.3859746652673405e-06, - "loss": 0.2005, - "step": 14775 - }, - { - "epoch": 1.391959680648124, - "grad_norm": 0.7972701787948608, - "learning_rate": 4.3847251245711965e-06, - "loss": 0.2036, - "step": 14776 - }, - { - "epoch": 1.392053884741292, - "grad_norm": 0.7019937038421631, - "learning_rate": 4.383475711912007e-06, - "loss": 0.1799, - "step": 14777 - }, - { - "epoch": 1.39214808883446, - "grad_norm": 0.672942042350769, - "learning_rate": 4.3822264273182536e-06, - "loss": 0.1762, - "step": 14778 - }, - { - "epoch": 1.3922422929276277, - "grad_norm": 0.6258542537689209, - "learning_rate": 4.380977270818426e-06, - "loss": 0.1916, - "step": 14779 - }, - { - "epoch": 1.3923364970207954, - "grad_norm": 0.6168133020401001, - "learning_rate": 4.379728242441011e-06, - "loss": 0.1858, - "step": 14780 - }, - { - "epoch": 1.3924307011139634, - "grad_norm": 0.640653669834137, - "learning_rate": 4.378479342214479e-06, - "loss": 0.1749, - "step": 14781 - }, - { - "epoch": 1.3925249052071313, - "grad_norm": 0.6405872106552124, - "learning_rate": 4.377230570167316e-06, - "loss": 0.1764, - "step": 14782 - }, - { - "epoch": 1.392619109300299, - "grad_norm": 0.6704461574554443, - "learning_rate": 4.375981926327988e-06, - "loss": 0.1984, - "step": 14783 - }, - { - "epoch": 1.3927133133934668, - "grad_norm": 0.6826492547988892, - "learning_rate": 4.374733410724969e-06, - "loss": 0.1943, - "step": 14784 - }, - { - "epoch": 1.3928075174866348, - "grad_norm": 0.8903515934944153, - "learning_rate": 4.373485023386733e-06, - "loss": 0.2058, - "step": 14785 - }, - { - "epoch": 1.3929017215798027, - "grad_norm": 0.6510199308395386, - "learning_rate": 4.3722367643417365e-06, - "loss": 0.1864, - "step": 14786 - }, - { - "epoch": 1.3929959256729705, - "grad_norm": 0.641514241695404, - "learning_rate": 4.370988633618445e-06, - "loss": 0.2059, - "step": 14787 - }, - { - "epoch": 1.3930901297661382, - "grad_norm": 0.6693086624145508, - "learning_rate": 4.369740631245321e-06, - "loss": 0.2201, - "step": 14788 - }, - { - "epoch": 1.3931843338593062, - "grad_norm": 0.5776128768920898, - "learning_rate": 4.368492757250814e-06, - "loss": 0.1851, - "step": 14789 - }, - { - "epoch": 1.3932785379524741, - "grad_norm": 0.6831786632537842, - "learning_rate": 4.367245011663383e-06, - "loss": 0.1958, - "step": 14790 - }, - { - "epoch": 1.3933727420456419, - "grad_norm": 0.6280180811882019, - "learning_rate": 4.365997394511479e-06, - "loss": 0.1823, - "step": 14791 - }, - { - "epoch": 1.3934669461388096, - "grad_norm": 0.6874216794967651, - "learning_rate": 4.364749905823549e-06, - "loss": 0.223, - "step": 14792 - }, - { - "epoch": 1.3935611502319776, - "grad_norm": 0.648345947265625, - "learning_rate": 4.3635025456280275e-06, - "loss": 0.2489, - "step": 14793 - }, - { - "epoch": 1.3936553543251455, - "grad_norm": 0.6531689167022705, - "learning_rate": 4.3622553139533726e-06, - "loss": 0.1857, - "step": 14794 - }, - { - "epoch": 1.3937495584183133, - "grad_norm": 0.6035807132720947, - "learning_rate": 4.361008210828016e-06, - "loss": 0.185, - "step": 14795 - }, - { - "epoch": 1.393843762511481, - "grad_norm": 0.6098361015319824, - "learning_rate": 4.3597612362803854e-06, - "loss": 0.1778, - "step": 14796 - }, - { - "epoch": 1.393937966604649, - "grad_norm": 0.6855610609054565, - "learning_rate": 4.358514390338929e-06, - "loss": 0.2105, - "step": 14797 - }, - { - "epoch": 1.394032170697817, - "grad_norm": 0.5912315249443054, - "learning_rate": 4.357267673032069e-06, - "loss": 0.2294, - "step": 14798 - }, - { - "epoch": 1.3941263747909847, - "grad_norm": 0.6672865152359009, - "learning_rate": 4.35602108438823e-06, - "loss": 0.1979, - "step": 14799 - }, - { - "epoch": 1.3942205788841524, - "grad_norm": 0.7342973351478577, - "learning_rate": 4.354774624435838e-06, - "loss": 0.2132, - "step": 14800 - }, - { - "epoch": 1.3943147829773204, - "grad_norm": 0.6629995107650757, - "learning_rate": 4.353528293203318e-06, - "loss": 0.2207, - "step": 14801 - }, - { - "epoch": 1.3944089870704883, - "grad_norm": 0.6592957377433777, - "learning_rate": 4.352282090719083e-06, - "loss": 0.1848, - "step": 14802 - }, - { - "epoch": 1.394503191163656, - "grad_norm": 0.6145618557929993, - "learning_rate": 4.351036017011551e-06, - "loss": 0.1883, - "step": 14803 - }, - { - "epoch": 1.3945973952568238, - "grad_norm": 0.5742574334144592, - "learning_rate": 4.349790072109136e-06, - "loss": 0.1716, - "step": 14804 - }, - { - "epoch": 1.3946915993499918, - "grad_norm": 0.6625123620033264, - "learning_rate": 4.348544256040244e-06, - "loss": 0.2041, - "step": 14805 - }, - { - "epoch": 1.3947858034431597, - "grad_norm": 0.6488248705863953, - "learning_rate": 4.347298568833281e-06, - "loss": 0.2035, - "step": 14806 - }, - { - "epoch": 1.3948800075363275, - "grad_norm": 0.747687816619873, - "learning_rate": 4.346053010516657e-06, - "loss": 0.223, - "step": 14807 - }, - { - "epoch": 1.3949742116294952, - "grad_norm": 0.6713259220123291, - "learning_rate": 4.344807581118765e-06, - "loss": 0.1567, - "step": 14808 - }, - { - "epoch": 1.3950684157226632, - "grad_norm": 0.6101518869400024, - "learning_rate": 4.343562280668006e-06, - "loss": 0.2271, - "step": 14809 - }, - { - "epoch": 1.3951626198158311, - "grad_norm": 0.6190088987350464, - "learning_rate": 4.34231710919278e-06, - "loss": 0.1702, - "step": 14810 - }, - { - "epoch": 1.3952568239089989, - "grad_norm": 0.6656995415687561, - "learning_rate": 4.341072066721468e-06, - "loss": 0.1987, - "step": 14811 - }, - { - "epoch": 1.3953510280021666, - "grad_norm": 0.6109845638275146, - "learning_rate": 4.339827153282469e-06, - "loss": 0.1997, - "step": 14812 - }, - { - "epoch": 1.3954452320953346, - "grad_norm": 0.6390396952629089, - "learning_rate": 4.338582368904161e-06, - "loss": 0.1853, - "step": 14813 - }, - { - "epoch": 1.3955394361885025, - "grad_norm": 0.6447051763534546, - "learning_rate": 4.337337713614933e-06, - "loss": 0.2291, - "step": 14814 - }, - { - "epoch": 1.3956336402816703, - "grad_norm": 0.6061784029006958, - "learning_rate": 4.33609318744316e-06, - "loss": 0.196, - "step": 14815 - }, - { - "epoch": 1.395727844374838, - "grad_norm": 0.7119218707084656, - "learning_rate": 4.334848790417222e-06, - "loss": 0.2341, - "step": 14816 - }, - { - "epoch": 1.395822048468006, - "grad_norm": 0.7430229187011719, - "learning_rate": 4.333604522565496e-06, - "loss": 0.2351, - "step": 14817 - }, - { - "epoch": 1.395916252561174, - "grad_norm": 0.790756106376648, - "learning_rate": 4.332360383916347e-06, - "loss": 0.1935, - "step": 14818 - }, - { - "epoch": 1.3960104566543416, - "grad_norm": 0.5808947086334229, - "learning_rate": 4.331116374498145e-06, - "loss": 0.2016, - "step": 14819 - }, - { - "epoch": 1.3961046607475094, - "grad_norm": 0.6004825830459595, - "learning_rate": 4.329872494339262e-06, - "loss": 0.1751, - "step": 14820 - }, - { - "epoch": 1.3961988648406773, - "grad_norm": 0.6784185171127319, - "learning_rate": 4.328628743468051e-06, - "loss": 0.2106, - "step": 14821 - }, - { - "epoch": 1.3962930689338453, - "grad_norm": 0.6481770873069763, - "learning_rate": 4.32738512191288e-06, - "loss": 0.2326, - "step": 14822 - }, - { - "epoch": 1.396387273027013, - "grad_norm": 0.6379073858261108, - "learning_rate": 4.326141629702096e-06, - "loss": 0.2036, - "step": 14823 - }, - { - "epoch": 1.3964814771201808, - "grad_norm": 0.6903553009033203, - "learning_rate": 4.3248982668640585e-06, - "loss": 0.2207, - "step": 14824 - }, - { - "epoch": 1.3965756812133487, - "grad_norm": 0.7104666829109192, - "learning_rate": 4.32365503342712e-06, - "loss": 0.205, - "step": 14825 - }, - { - "epoch": 1.3966698853065167, - "grad_norm": 0.7103066444396973, - "learning_rate": 4.322411929419623e-06, - "loss": 0.1741, - "step": 14826 - }, - { - "epoch": 1.3967640893996844, - "grad_norm": 0.759509265422821, - "learning_rate": 4.321168954869913e-06, - "loss": 0.2221, - "step": 14827 - }, - { - "epoch": 1.3968582934928522, - "grad_norm": 0.6699749231338501, - "learning_rate": 4.319926109806339e-06, - "loss": 0.2105, - "step": 14828 - }, - { - "epoch": 1.3969524975860201, - "grad_norm": 0.642577588558197, - "learning_rate": 4.318683394257229e-06, - "loss": 0.1994, - "step": 14829 - }, - { - "epoch": 1.397046701679188, - "grad_norm": 0.6244491934776306, - "learning_rate": 4.317440808250924e-06, - "loss": 0.1963, - "step": 14830 - }, - { - "epoch": 1.3971409057723558, - "grad_norm": 0.6352891325950623, - "learning_rate": 4.316198351815761e-06, - "loss": 0.2109, - "step": 14831 - }, - { - "epoch": 1.3972351098655236, - "grad_norm": 0.6272392868995667, - "learning_rate": 4.314956024980066e-06, - "loss": 0.2042, - "step": 14832 - }, - { - "epoch": 1.3973293139586915, - "grad_norm": 0.711333692073822, - "learning_rate": 4.313713827772158e-06, - "loss": 0.1999, - "step": 14833 - }, - { - "epoch": 1.3974235180518593, - "grad_norm": 0.7087350487709045, - "learning_rate": 4.312471760220376e-06, - "loss": 0.2068, - "step": 14834 - }, - { - "epoch": 1.3975177221450272, - "grad_norm": 0.8110741972923279, - "learning_rate": 4.3112298223530335e-06, - "loss": 0.2539, - "step": 14835 - }, - { - "epoch": 1.397611926238195, - "grad_norm": 0.7614244222640991, - "learning_rate": 4.309988014198442e-06, - "loss": 0.2175, - "step": 14836 - }, - { - "epoch": 1.397706130331363, - "grad_norm": 0.6266860365867615, - "learning_rate": 4.30874633578493e-06, - "loss": 0.179, - "step": 14837 - }, - { - "epoch": 1.3978003344245307, - "grad_norm": 0.7377480268478394, - "learning_rate": 4.307504787140805e-06, - "loss": 0.2445, - "step": 14838 - }, - { - "epoch": 1.3978945385176986, - "grad_norm": 0.755540668964386, - "learning_rate": 4.306263368294369e-06, - "loss": 0.2168, - "step": 14839 - }, - { - "epoch": 1.3979887426108664, - "grad_norm": 0.7447509765625, - "learning_rate": 4.305022079273935e-06, - "loss": 0.2055, - "step": 14840 - }, - { - "epoch": 1.3980829467040343, - "grad_norm": 0.6671401262283325, - "learning_rate": 4.303780920107807e-06, - "loss": 0.1839, - "step": 14841 - }, - { - "epoch": 1.398177150797202, - "grad_norm": 0.7704858183860779, - "learning_rate": 4.302539890824282e-06, - "loss": 0.2014, - "step": 14842 - }, - { - "epoch": 1.39827135489037, - "grad_norm": 0.6026281714439392, - "learning_rate": 4.3012989914516575e-06, - "loss": 0.1825, - "step": 14843 - }, - { - "epoch": 1.3983655589835378, - "grad_norm": 0.7292162775993347, - "learning_rate": 4.300058222018233e-06, - "loss": 0.2114, - "step": 14844 - }, - { - "epoch": 1.3984597630767057, - "grad_norm": 0.6992906928062439, - "learning_rate": 4.2988175825522924e-06, - "loss": 0.1925, - "step": 14845 - }, - { - "epoch": 1.3985539671698735, - "grad_norm": 0.551853358745575, - "learning_rate": 4.297577073082129e-06, - "loss": 0.1658, - "step": 14846 - }, - { - "epoch": 1.3986481712630414, - "grad_norm": 0.772172212600708, - "learning_rate": 4.296336693636029e-06, - "loss": 0.221, - "step": 14847 - }, - { - "epoch": 1.3987423753562092, - "grad_norm": 0.7460121512413025, - "learning_rate": 4.295096444242272e-06, - "loss": 0.2145, - "step": 14848 - }, - { - "epoch": 1.3988365794493771, - "grad_norm": 0.6584548354148865, - "learning_rate": 4.293856324929137e-06, - "loss": 0.1972, - "step": 14849 - }, - { - "epoch": 1.3989307835425449, - "grad_norm": 0.7437769174575806, - "learning_rate": 4.292616335724908e-06, - "loss": 0.2435, - "step": 14850 - }, - { - "epoch": 1.3990249876357128, - "grad_norm": 0.7641134858131409, - "learning_rate": 4.291376476657847e-06, - "loss": 0.2023, - "step": 14851 - }, - { - "epoch": 1.3991191917288806, - "grad_norm": 0.6327671408653259, - "learning_rate": 4.290136747756236e-06, - "loss": 0.2069, - "step": 14852 - }, - { - "epoch": 1.3992133958220485, - "grad_norm": 0.582234799861908, - "learning_rate": 4.288897149048334e-06, - "loss": 0.1903, - "step": 14853 - }, - { - "epoch": 1.3993075999152162, - "grad_norm": 0.6909761428833008, - "learning_rate": 4.287657680562412e-06, - "loss": 0.2087, - "step": 14854 - }, - { - "epoch": 1.3994018040083842, - "grad_norm": 0.7836440205574036, - "learning_rate": 4.286418342326727e-06, - "loss": 0.2565, - "step": 14855 - }, - { - "epoch": 1.399496008101552, - "grad_norm": 0.6624511480331421, - "learning_rate": 4.285179134369539e-06, - "loss": 0.1986, - "step": 14856 - }, - { - "epoch": 1.39959021219472, - "grad_norm": 0.645569920539856, - "learning_rate": 4.283940056719109e-06, - "loss": 0.2202, - "step": 14857 - }, - { - "epoch": 1.3996844162878876, - "grad_norm": 0.6882280111312866, - "learning_rate": 4.282701109403683e-06, - "loss": 0.1751, - "step": 14858 - }, - { - "epoch": 1.3997786203810556, - "grad_norm": 0.6280230283737183, - "learning_rate": 4.281462292451512e-06, - "loss": 0.1894, - "step": 14859 - }, - { - "epoch": 1.3998728244742233, - "grad_norm": 0.7303670644760132, - "learning_rate": 4.28022360589085e-06, - "loss": 0.2032, - "step": 14860 - }, - { - "epoch": 1.3999670285673913, - "grad_norm": 0.6263623237609863, - "learning_rate": 4.278985049749931e-06, - "loss": 0.1623, - "step": 14861 - }, - { - "epoch": 1.400061232660559, - "grad_norm": 0.7629262208938599, - "learning_rate": 4.277746624057003e-06, - "loss": 0.2052, - "step": 14862 - }, - { - "epoch": 1.400155436753727, - "grad_norm": 0.6788503527641296, - "learning_rate": 4.2765083288403e-06, - "loss": 0.1945, - "step": 14863 - }, - { - "epoch": 1.4002496408468947, - "grad_norm": 0.6659932136535645, - "learning_rate": 4.275270164128057e-06, - "loss": 0.2245, - "step": 14864 - }, - { - "epoch": 1.4003438449400627, - "grad_norm": 0.8249582052230835, - "learning_rate": 4.274032129948512e-06, - "loss": 0.1851, - "step": 14865 - }, - { - "epoch": 1.4004380490332304, - "grad_norm": 0.6572917103767395, - "learning_rate": 4.272794226329887e-06, - "loss": 0.1768, - "step": 14866 - }, - { - "epoch": 1.4005322531263984, - "grad_norm": 0.6544477939605713, - "learning_rate": 4.271556453300411e-06, - "loss": 0.2145, - "step": 14867 - }, - { - "epoch": 1.4006264572195661, - "grad_norm": 0.6506238579750061, - "learning_rate": 4.2703188108883096e-06, - "loss": 0.2071, - "step": 14868 - }, - { - "epoch": 1.400720661312734, - "grad_norm": 0.6459051966667175, - "learning_rate": 4.269081299121797e-06, - "loss": 0.2219, - "step": 14869 - }, - { - "epoch": 1.4008148654059018, - "grad_norm": 0.661430299282074, - "learning_rate": 4.267843918029094e-06, - "loss": 0.1978, - "step": 14870 - }, - { - "epoch": 1.4009090694990698, - "grad_norm": 0.6441071629524231, - "learning_rate": 4.266606667638418e-06, - "loss": 0.1898, - "step": 14871 - }, - { - "epoch": 1.4010032735922375, - "grad_norm": 0.6597195863723755, - "learning_rate": 4.265369547977978e-06, - "loss": 0.2211, - "step": 14872 - }, - { - "epoch": 1.4010974776854055, - "grad_norm": 0.6670068502426147, - "learning_rate": 4.264132559075972e-06, - "loss": 0.1806, - "step": 14873 - }, - { - "epoch": 1.4011916817785732, - "grad_norm": 0.7369149327278137, - "learning_rate": 4.262895700960623e-06, - "loss": 0.1813, - "step": 14874 - }, - { - "epoch": 1.4012858858717412, - "grad_norm": 0.6494218707084656, - "learning_rate": 4.261658973660124e-06, - "loss": 0.2155, - "step": 14875 - }, - { - "epoch": 1.401380089964909, - "grad_norm": 0.6344252824783325, - "learning_rate": 4.260422377202668e-06, - "loss": 0.1963, - "step": 14876 - }, - { - "epoch": 1.4014742940580769, - "grad_norm": 0.7019323110580444, - "learning_rate": 4.259185911616465e-06, - "loss": 0.2249, - "step": 14877 - }, - { - "epoch": 1.4015684981512446, - "grad_norm": 0.6826676726341248, - "learning_rate": 4.257949576929702e-06, - "loss": 0.1947, - "step": 14878 - }, - { - "epoch": 1.4016627022444126, - "grad_norm": 0.6142680644989014, - "learning_rate": 4.256713373170565e-06, - "loss": 0.1994, - "step": 14879 - }, - { - "epoch": 1.4017569063375803, - "grad_norm": 0.6876006722450256, - "learning_rate": 4.255477300367246e-06, - "loss": 0.2296, - "step": 14880 - }, - { - "epoch": 1.4018511104307483, - "grad_norm": 0.7480155825614929, - "learning_rate": 4.2542413585479305e-06, - "loss": 0.2161, - "step": 14881 - }, - { - "epoch": 1.401945314523916, - "grad_norm": 0.6555769443511963, - "learning_rate": 4.253005547740796e-06, - "loss": 0.1943, - "step": 14882 - }, - { - "epoch": 1.402039518617084, - "grad_norm": 0.6414076685905457, - "learning_rate": 4.251769867974022e-06, - "loss": 0.1755, - "step": 14883 - }, - { - "epoch": 1.4021337227102517, - "grad_norm": 0.6698834300041199, - "learning_rate": 4.250534319275789e-06, - "loss": 0.2087, - "step": 14884 - }, - { - "epoch": 1.4022279268034197, - "grad_norm": 0.6921601891517639, - "learning_rate": 4.249298901674261e-06, - "loss": 0.2169, - "step": 14885 - }, - { - "epoch": 1.4023221308965874, - "grad_norm": 0.6521345973014832, - "learning_rate": 4.248063615197613e-06, - "loss": 0.1967, - "step": 14886 - }, - { - "epoch": 1.4024163349897554, - "grad_norm": 0.6779271960258484, - "learning_rate": 4.246828459874013e-06, - "loss": 0.2298, - "step": 14887 - }, - { - "epoch": 1.4025105390829231, - "grad_norm": 0.6023015975952148, - "learning_rate": 4.2455934357316176e-06, - "loss": 0.1777, - "step": 14888 - }, - { - "epoch": 1.402604743176091, - "grad_norm": 0.6468263864517212, - "learning_rate": 4.244358542798591e-06, - "loss": 0.1821, - "step": 14889 - }, - { - "epoch": 1.4026989472692588, - "grad_norm": 0.6531599760055542, - "learning_rate": 4.243123781103096e-06, - "loss": 0.197, - "step": 14890 - }, - { - "epoch": 1.4027931513624268, - "grad_norm": 0.6184870600700378, - "learning_rate": 4.241889150673281e-06, - "loss": 0.1917, - "step": 14891 - }, - { - "epoch": 1.4028873554555945, - "grad_norm": 0.6046724915504456, - "learning_rate": 4.240654651537294e-06, - "loss": 0.1834, - "step": 14892 - }, - { - "epoch": 1.4029815595487625, - "grad_norm": 0.644598126411438, - "learning_rate": 4.239420283723289e-06, - "loss": 0.2027, - "step": 14893 - }, - { - "epoch": 1.4030757636419302, - "grad_norm": 0.6100983619689941, - "learning_rate": 4.238186047259414e-06, - "loss": 0.1999, - "step": 14894 - }, - { - "epoch": 1.4031699677350982, - "grad_norm": 0.610946774482727, - "learning_rate": 4.236951942173803e-06, - "loss": 0.1955, - "step": 14895 - }, - { - "epoch": 1.403264171828266, - "grad_norm": 0.6465967893600464, - "learning_rate": 4.235717968494601e-06, - "loss": 0.1841, - "step": 14896 - }, - { - "epoch": 1.4033583759214339, - "grad_norm": 0.5939137935638428, - "learning_rate": 4.234484126249949e-06, - "loss": 0.1968, - "step": 14897 - }, - { - "epoch": 1.4034525800146016, - "grad_norm": 0.7123169898986816, - "learning_rate": 4.233250415467971e-06, - "loss": 0.2198, - "step": 14898 - }, - { - "epoch": 1.4035467841077696, - "grad_norm": 0.6703714728355408, - "learning_rate": 4.232016836176802e-06, - "loss": 0.237, - "step": 14899 - }, - { - "epoch": 1.4036409882009373, - "grad_norm": 0.7021177411079407, - "learning_rate": 4.230783388404573e-06, - "loss": 0.2179, - "step": 14900 - }, - { - "epoch": 1.4037351922941053, - "grad_norm": 0.6722543835639954, - "learning_rate": 4.229550072179401e-06, - "loss": 0.2293, - "step": 14901 - }, - { - "epoch": 1.403829396387273, - "grad_norm": 0.6194466352462769, - "learning_rate": 4.228316887529416e-06, - "loss": 0.1964, - "step": 14902 - }, - { - "epoch": 1.4039236004804407, - "grad_norm": 0.7914484143257141, - "learning_rate": 4.2270838344827285e-06, - "loss": 0.2117, - "step": 14903 - }, - { - "epoch": 1.4040178045736087, - "grad_norm": 0.6260371804237366, - "learning_rate": 4.225850913067457e-06, - "loss": 0.1961, - "step": 14904 - }, - { - "epoch": 1.4041120086667767, - "grad_norm": 0.5706886649131775, - "learning_rate": 4.224618123311718e-06, - "loss": 0.1881, - "step": 14905 - }, - { - "epoch": 1.4042062127599444, - "grad_norm": 0.6768814325332642, - "learning_rate": 4.2233854652436145e-06, - "loss": 0.1978, - "step": 14906 - }, - { - "epoch": 1.4043004168531121, - "grad_norm": 0.6236286163330078, - "learning_rate": 4.222152938891255e-06, - "loss": 0.2103, - "step": 14907 - }, - { - "epoch": 1.40439462094628, - "grad_norm": 0.6196622252464294, - "learning_rate": 4.2209205442827494e-06, - "loss": 0.1989, - "step": 14908 - }, - { - "epoch": 1.404488825039448, - "grad_norm": 0.6381906270980835, - "learning_rate": 4.219688281446188e-06, - "loss": 0.2065, - "step": 14909 - }, - { - "epoch": 1.4045830291326158, - "grad_norm": 0.6047495603561401, - "learning_rate": 4.218456150409673e-06, - "loss": 0.1887, - "step": 14910 - }, - { - "epoch": 1.4046772332257835, - "grad_norm": 0.5788443684577942, - "learning_rate": 4.217224151201303e-06, - "loss": 0.1988, - "step": 14911 - }, - { - "epoch": 1.4047714373189515, - "grad_norm": 0.698949933052063, - "learning_rate": 4.2159922838491675e-06, - "loss": 0.1992, - "step": 14912 - }, - { - "epoch": 1.4048656414121194, - "grad_norm": 0.667930543422699, - "learning_rate": 4.214760548381344e-06, - "loss": 0.1912, - "step": 14913 - }, - { - "epoch": 1.4049598455052872, - "grad_norm": 0.7808729410171509, - "learning_rate": 4.2135289448259346e-06, - "loss": 0.1954, - "step": 14914 - }, - { - "epoch": 1.405054049598455, - "grad_norm": 0.6170253157615662, - "learning_rate": 4.212297473211014e-06, - "loss": 0.1929, - "step": 14915 - }, - { - "epoch": 1.4051482536916229, - "grad_norm": 0.6978352665901184, - "learning_rate": 4.2110661335646585e-06, - "loss": 0.2145, - "step": 14916 - }, - { - "epoch": 1.4052424577847908, - "grad_norm": 0.6556413173675537, - "learning_rate": 4.2098349259149475e-06, - "loss": 0.2212, - "step": 14917 - }, - { - "epoch": 1.4053366618779586, - "grad_norm": 0.6603764891624451, - "learning_rate": 4.208603850289958e-06, - "loss": 0.231, - "step": 14918 - }, - { - "epoch": 1.4054308659711263, - "grad_norm": 0.6079287528991699, - "learning_rate": 4.207372906717755e-06, - "loss": 0.1884, - "step": 14919 - }, - { - "epoch": 1.4055250700642943, - "grad_norm": 0.6266393661499023, - "learning_rate": 4.206142095226408e-06, - "loss": 0.1858, - "step": 14920 - }, - { - "epoch": 1.4056192741574622, - "grad_norm": 0.6841825246810913, - "learning_rate": 4.204911415843985e-06, - "loss": 0.198, - "step": 14921 - }, - { - "epoch": 1.40571347825063, - "grad_norm": 0.6131460666656494, - "learning_rate": 4.2036808685985395e-06, - "loss": 0.2017, - "step": 14922 - }, - { - "epoch": 1.4058076823437977, - "grad_norm": 0.6430876851081848, - "learning_rate": 4.202450453518136e-06, - "loss": 0.213, - "step": 14923 - }, - { - "epoch": 1.4059018864369657, - "grad_norm": 0.5820490717887878, - "learning_rate": 4.201220170630831e-06, - "loss": 0.2016, - "step": 14924 - }, - { - "epoch": 1.4059960905301336, - "grad_norm": 0.6232588887214661, - "learning_rate": 4.1999900199646705e-06, - "loss": 0.1939, - "step": 14925 - }, - { - "epoch": 1.4060902946233014, - "grad_norm": 0.6412118673324585, - "learning_rate": 4.198760001547707e-06, - "loss": 0.2192, - "step": 14926 - }, - { - "epoch": 1.4061844987164691, - "grad_norm": 0.6292945742607117, - "learning_rate": 4.197530115407991e-06, - "loss": 0.1728, - "step": 14927 - }, - { - "epoch": 1.406278702809637, - "grad_norm": 0.7885642647743225, - "learning_rate": 4.196300361573559e-06, - "loss": 0.1836, - "step": 14928 - }, - { - "epoch": 1.406372906902805, - "grad_norm": 0.6131870150566101, - "learning_rate": 4.195070740072454e-06, - "loss": 0.19, - "step": 14929 - }, - { - "epoch": 1.4064671109959728, - "grad_norm": 0.681182861328125, - "learning_rate": 4.193841250932718e-06, - "loss": 0.1778, - "step": 14930 - }, - { - "epoch": 1.4065613150891405, - "grad_norm": 0.6163570880889893, - "learning_rate": 4.19261189418238e-06, - "loss": 0.1817, - "step": 14931 - }, - { - "epoch": 1.4066555191823085, - "grad_norm": 0.6059005260467529, - "learning_rate": 4.191382669849469e-06, - "loss": 0.2087, - "step": 14932 - }, - { - "epoch": 1.4067497232754764, - "grad_norm": 0.5996084809303284, - "learning_rate": 4.190153577962015e-06, - "loss": 0.1999, - "step": 14933 - }, - { - "epoch": 1.4068439273686442, - "grad_norm": 0.6310654282569885, - "learning_rate": 4.188924618548049e-06, - "loss": 0.1936, - "step": 14934 - }, - { - "epoch": 1.406938131461812, - "grad_norm": 0.6701446771621704, - "learning_rate": 4.187695791635585e-06, - "loss": 0.2152, - "step": 14935 - }, - { - "epoch": 1.4070323355549799, - "grad_norm": 0.6180370450019836, - "learning_rate": 4.186467097252646e-06, - "loss": 0.1728, - "step": 14936 - }, - { - "epoch": 1.4071265396481478, - "grad_norm": 0.6977247595787048, - "learning_rate": 4.185238535427253e-06, - "loss": 0.2078, - "step": 14937 - }, - { - "epoch": 1.4072207437413156, - "grad_norm": 0.7770658731460571, - "learning_rate": 4.184010106187409e-06, - "loss": 0.2163, - "step": 14938 - }, - { - "epoch": 1.4073149478344833, - "grad_norm": 0.6317363381385803, - "learning_rate": 4.182781809561129e-06, - "loss": 0.1835, - "step": 14939 - }, - { - "epoch": 1.4074091519276513, - "grad_norm": 0.707531213760376, - "learning_rate": 4.181553645576424e-06, - "loss": 0.2072, - "step": 14940 - }, - { - "epoch": 1.4075033560208192, - "grad_norm": 0.6822757124900818, - "learning_rate": 4.1803256142612914e-06, - "loss": 0.2022, - "step": 14941 - }, - { - "epoch": 1.407597560113987, - "grad_norm": 0.6395441293716431, - "learning_rate": 4.179097715643737e-06, - "loss": 0.1643, - "step": 14942 - }, - { - "epoch": 1.4076917642071547, - "grad_norm": 0.7331756353378296, - "learning_rate": 4.177869949751755e-06, - "loss": 0.226, - "step": 14943 - }, - { - "epoch": 1.4077859683003227, - "grad_norm": 0.6839967966079712, - "learning_rate": 4.176642316613342e-06, - "loss": 0.2189, - "step": 14944 - }, - { - "epoch": 1.4078801723934906, - "grad_norm": 0.6518542170524597, - "learning_rate": 4.175414816256494e-06, - "loss": 0.1798, - "step": 14945 - }, - { - "epoch": 1.4079743764866584, - "grad_norm": 0.6746199727058411, - "learning_rate": 4.174187448709192e-06, - "loss": 0.1834, - "step": 14946 - }, - { - "epoch": 1.408068580579826, - "grad_norm": 0.7846388816833496, - "learning_rate": 4.172960213999425e-06, - "loss": 0.2066, - "step": 14947 - }, - { - "epoch": 1.408162784672994, - "grad_norm": 0.6446897387504578, - "learning_rate": 4.171733112155182e-06, - "loss": 0.2383, - "step": 14948 - }, - { - "epoch": 1.408256988766162, - "grad_norm": 0.6351571679115295, - "learning_rate": 4.170506143204432e-06, - "loss": 0.1955, - "step": 14949 - }, - { - "epoch": 1.4083511928593297, - "grad_norm": 0.6581653356552124, - "learning_rate": 4.169279307175159e-06, - "loss": 0.1846, - "step": 14950 - }, - { - "epoch": 1.4084453969524975, - "grad_norm": 0.6995300650596619, - "learning_rate": 4.16805260409534e-06, - "loss": 0.2146, - "step": 14951 - }, - { - "epoch": 1.4085396010456654, - "grad_norm": 0.6600801348686218, - "learning_rate": 4.166826033992939e-06, - "loss": 0.2, - "step": 14952 - }, - { - "epoch": 1.4086338051388334, - "grad_norm": 0.690256655216217, - "learning_rate": 4.165599596895919e-06, - "loss": 0.196, - "step": 14953 - }, - { - "epoch": 1.4087280092320011, - "grad_norm": 0.6114471554756165, - "learning_rate": 4.164373292832258e-06, - "loss": 0.1857, - "step": 14954 - }, - { - "epoch": 1.4088222133251689, - "grad_norm": 0.6888704895973206, - "learning_rate": 4.163147121829911e-06, - "loss": 0.2215, - "step": 14955 - }, - { - "epoch": 1.4089164174183368, - "grad_norm": 0.7572583556175232, - "learning_rate": 4.161921083916833e-06, - "loss": 0.1904, - "step": 14956 - }, - { - "epoch": 1.4090106215115048, - "grad_norm": 0.6744598746299744, - "learning_rate": 4.160695179120983e-06, - "loss": 0.206, - "step": 14957 - }, - { - "epoch": 1.4091048256046725, - "grad_norm": 0.6713089346885681, - "learning_rate": 4.159469407470318e-06, - "loss": 0.188, - "step": 14958 - }, - { - "epoch": 1.4091990296978403, - "grad_norm": 0.6134618520736694, - "learning_rate": 4.158243768992778e-06, - "loss": 0.1675, - "step": 14959 - }, - { - "epoch": 1.4092932337910082, - "grad_norm": 0.6265523433685303, - "learning_rate": 4.1570182637163155e-06, - "loss": 0.1993, - "step": 14960 - }, - { - "epoch": 1.4093874378841762, - "grad_norm": 0.6656590104103088, - "learning_rate": 4.155792891668876e-06, - "loss": 0.1601, - "step": 14961 - }, - { - "epoch": 1.409481641977344, - "grad_norm": 0.6455593109130859, - "learning_rate": 4.154567652878394e-06, - "loss": 0.191, - "step": 14962 - }, - { - "epoch": 1.4095758460705117, - "grad_norm": 0.6836377382278442, - "learning_rate": 4.15334254737281e-06, - "loss": 0.1979, - "step": 14963 - }, - { - "epoch": 1.4096700501636796, - "grad_norm": 0.6486380696296692, - "learning_rate": 4.15211757518006e-06, - "loss": 0.2023, - "step": 14964 - }, - { - "epoch": 1.4097642542568476, - "grad_norm": 0.6408259868621826, - "learning_rate": 4.1508927363280705e-06, - "loss": 0.2027, - "step": 14965 - }, - { - "epoch": 1.4098584583500153, - "grad_norm": 0.6494690179824829, - "learning_rate": 4.149668030844772e-06, - "loss": 0.2095, - "step": 14966 - }, - { - "epoch": 1.409952662443183, - "grad_norm": 0.6200953722000122, - "learning_rate": 4.1484434587580935e-06, - "loss": 0.2054, - "step": 14967 - }, - { - "epoch": 1.410046866536351, - "grad_norm": 0.6189218759536743, - "learning_rate": 4.147219020095955e-06, - "loss": 0.1962, - "step": 14968 - }, - { - "epoch": 1.410141070629519, - "grad_norm": 0.6298625469207764, - "learning_rate": 4.145994714886266e-06, - "loss": 0.2122, - "step": 14969 - }, - { - "epoch": 1.4102352747226867, - "grad_norm": 0.7043928503990173, - "learning_rate": 4.144770543156959e-06, - "loss": 0.2267, - "step": 14970 - }, - { - "epoch": 1.4103294788158545, - "grad_norm": 0.7395245432853699, - "learning_rate": 4.143546504935938e-06, - "loss": 0.2588, - "step": 14971 - }, - { - "epoch": 1.4104236829090224, - "grad_norm": 0.6496203541755676, - "learning_rate": 4.1423226002511105e-06, - "loss": 0.1913, - "step": 14972 - }, - { - "epoch": 1.4105178870021902, - "grad_norm": 0.6287446022033691, - "learning_rate": 4.141098829130386e-06, - "loss": 0.2041, - "step": 14973 - }, - { - "epoch": 1.4106120910953581, - "grad_norm": 0.6247981786727905, - "learning_rate": 4.139875191601674e-06, - "loss": 0.1992, - "step": 14974 - }, - { - "epoch": 1.4107062951885259, - "grad_norm": 0.669303834438324, - "learning_rate": 4.1386516876928675e-06, - "loss": 0.207, - "step": 14975 - }, - { - "epoch": 1.4108004992816938, - "grad_norm": 0.7559048533439636, - "learning_rate": 4.137428317431866e-06, - "loss": 0.2009, - "step": 14976 - }, - { - "epoch": 1.4108947033748616, - "grad_norm": 0.6676762104034424, - "learning_rate": 4.136205080846569e-06, - "loss": 0.1957, - "step": 14977 - }, - { - "epoch": 1.4109889074680295, - "grad_norm": 0.6410189270973206, - "learning_rate": 4.134981977964862e-06, - "loss": 0.2169, - "step": 14978 - }, - { - "epoch": 1.4110831115611973, - "grad_norm": 0.6580454707145691, - "learning_rate": 4.133759008814636e-06, - "loss": 0.2029, - "step": 14979 - }, - { - "epoch": 1.4111773156543652, - "grad_norm": 0.6293115019798279, - "learning_rate": 4.13253617342378e-06, - "loss": 0.1742, - "step": 14980 - }, - { - "epoch": 1.411271519747533, - "grad_norm": 1.1270183324813843, - "learning_rate": 4.131313471820171e-06, - "loss": 0.2201, - "step": 14981 - }, - { - "epoch": 1.411365723840701, - "grad_norm": 0.6704361438751221, - "learning_rate": 4.130090904031694e-06, - "loss": 0.1931, - "step": 14982 - }, - { - "epoch": 1.4114599279338687, - "grad_norm": 0.6646503806114197, - "learning_rate": 4.128868470086218e-06, - "loss": 0.1955, - "step": 14983 - }, - { - "epoch": 1.4115541320270366, - "grad_norm": 0.6644883155822754, - "learning_rate": 4.127646170011621e-06, - "loss": 0.2071, - "step": 14984 - }, - { - "epoch": 1.4116483361202044, - "grad_norm": 0.6573548316955566, - "learning_rate": 4.1264240038357775e-06, - "loss": 0.1831, - "step": 14985 - }, - { - "epoch": 1.4117425402133723, - "grad_norm": 0.8367358446121216, - "learning_rate": 4.125201971586546e-06, - "loss": 0.2103, - "step": 14986 - }, - { - "epoch": 1.41183674430654, - "grad_norm": 0.7015900015830994, - "learning_rate": 4.123980073291796e-06, - "loss": 0.2075, - "step": 14987 - }, - { - "epoch": 1.411930948399708, - "grad_norm": 0.7227832078933716, - "learning_rate": 4.12275830897939e-06, - "loss": 0.1934, - "step": 14988 - }, - { - "epoch": 1.4120251524928757, - "grad_norm": 0.7276207804679871, - "learning_rate": 4.121536678677181e-06, - "loss": 0.1913, - "step": 14989 - }, - { - "epoch": 1.4121193565860437, - "grad_norm": 0.6449877023696899, - "learning_rate": 4.120315182413027e-06, - "loss": 0.2095, - "step": 14990 - }, - { - "epoch": 1.4122135606792114, - "grad_norm": 0.6663293242454529, - "learning_rate": 4.119093820214783e-06, - "loss": 0.2052, - "step": 14991 - }, - { - "epoch": 1.4123077647723794, - "grad_norm": 0.6059328317642212, - "learning_rate": 4.117872592110296e-06, - "loss": 0.1906, - "step": 14992 - }, - { - "epoch": 1.4124019688655471, - "grad_norm": 0.5791782140731812, - "learning_rate": 4.116651498127403e-06, - "loss": 0.1996, - "step": 14993 - }, - { - "epoch": 1.412496172958715, - "grad_norm": 0.6193673014640808, - "learning_rate": 4.115430538293962e-06, - "loss": 0.1535, - "step": 14994 - }, - { - "epoch": 1.4125903770518828, - "grad_norm": 0.7219436764717102, - "learning_rate": 4.114209712637806e-06, - "loss": 0.1959, - "step": 14995 - }, - { - "epoch": 1.4126845811450508, - "grad_norm": 0.6128940582275391, - "learning_rate": 4.112989021186766e-06, - "loss": 0.1865, - "step": 14996 - }, - { - "epoch": 1.4127787852382185, - "grad_norm": 1.6297779083251953, - "learning_rate": 4.111768463968682e-06, - "loss": 0.1732, - "step": 14997 - }, - { - "epoch": 1.4128729893313865, - "grad_norm": 0.635675311088562, - "learning_rate": 4.110548041011387e-06, - "loss": 0.1906, - "step": 14998 - }, - { - "epoch": 1.4129671934245542, - "grad_norm": 0.6329396963119507, - "learning_rate": 4.1093277523427e-06, - "loss": 0.1984, - "step": 14999 - }, - { - "epoch": 1.4130613975177222, - "grad_norm": 0.6555243134498596, - "learning_rate": 4.108107597990451e-06, - "loss": 0.2251, - "step": 15000 - }, - { - "epoch": 1.41315560161089, - "grad_norm": 0.5970786213874817, - "learning_rate": 4.106887577982464e-06, - "loss": 0.1892, - "step": 15001 - }, - { - "epoch": 1.413249805704058, - "grad_norm": 0.622941792011261, - "learning_rate": 4.105667692346551e-06, - "loss": 0.1785, - "step": 15002 - }, - { - "epoch": 1.4133440097972256, - "grad_norm": 0.5970519781112671, - "learning_rate": 4.1044479411105295e-06, - "loss": 0.1794, - "step": 15003 - }, - { - "epoch": 1.4134382138903936, - "grad_norm": 0.7031348347663879, - "learning_rate": 4.103228324302218e-06, - "loss": 0.2173, - "step": 15004 - }, - { - "epoch": 1.4135324179835613, - "grad_norm": 0.6484546661376953, - "learning_rate": 4.1020088419494156e-06, - "loss": 0.1931, - "step": 15005 - }, - { - "epoch": 1.4136266220767293, - "grad_norm": 0.6778308153152466, - "learning_rate": 4.100789494079932e-06, - "loss": 0.1821, - "step": 15006 - }, - { - "epoch": 1.413720826169897, - "grad_norm": 0.6300435662269592, - "learning_rate": 4.099570280721577e-06, - "loss": 0.1813, - "step": 15007 - }, - { - "epoch": 1.413815030263065, - "grad_norm": 0.6649633049964905, - "learning_rate": 4.098351201902143e-06, - "loss": 0.1821, - "step": 15008 - }, - { - "epoch": 1.4139092343562327, - "grad_norm": 0.6492722630500793, - "learning_rate": 4.097132257649422e-06, - "loss": 0.2151, - "step": 15009 - }, - { - "epoch": 1.4140034384494007, - "grad_norm": 0.6942569017410278, - "learning_rate": 4.095913447991222e-06, - "loss": 0.1975, - "step": 15010 - }, - { - "epoch": 1.4140976425425684, - "grad_norm": 0.6553627252578735, - "learning_rate": 4.094694772955325e-06, - "loss": 0.1769, - "step": 15011 - }, - { - "epoch": 1.4141918466357364, - "grad_norm": 0.6954379081726074, - "learning_rate": 4.093476232569519e-06, - "loss": 0.1863, - "step": 15012 - }, - { - "epoch": 1.4142860507289041, - "grad_norm": 0.7249972820281982, - "learning_rate": 4.092257826861587e-06, - "loss": 0.2101, - "step": 15013 - }, - { - "epoch": 1.414380254822072, - "grad_norm": 0.5798452496528625, - "learning_rate": 4.091039555859317e-06, - "loss": 0.1684, - "step": 15014 - }, - { - "epoch": 1.4144744589152398, - "grad_norm": 0.6643664240837097, - "learning_rate": 4.08982141959048e-06, - "loss": 0.1948, - "step": 15015 - }, - { - "epoch": 1.4145686630084078, - "grad_norm": 0.6006837487220764, - "learning_rate": 4.088603418082856e-06, - "loss": 0.1747, - "step": 15016 - }, - { - "epoch": 1.4146628671015755, - "grad_norm": 0.6077139377593994, - "learning_rate": 4.087385551364219e-06, - "loss": 0.2052, - "step": 15017 - }, - { - "epoch": 1.4147570711947435, - "grad_norm": 0.656932532787323, - "learning_rate": 4.086167819462332e-06, - "loss": 0.1915, - "step": 15018 - }, - { - "epoch": 1.4148512752879112, - "grad_norm": 0.6349786520004272, - "learning_rate": 4.0849502224049655e-06, - "loss": 0.1946, - "step": 15019 - }, - { - "epoch": 1.4149454793810792, - "grad_norm": 0.6988180875778198, - "learning_rate": 4.083732760219884e-06, - "loss": 0.1825, - "step": 15020 - }, - { - "epoch": 1.415039683474247, - "grad_norm": 0.7015491127967834, - "learning_rate": 4.082515432934842e-06, - "loss": 0.2342, - "step": 15021 - }, - { - "epoch": 1.4151338875674149, - "grad_norm": 0.6982563734054565, - "learning_rate": 4.081298240577603e-06, - "loss": 0.2207, - "step": 15022 - }, - { - "epoch": 1.4152280916605826, - "grad_norm": 0.7221394777297974, - "learning_rate": 4.0800811831759145e-06, - "loss": 0.2016, - "step": 15023 - }, - { - "epoch": 1.4153222957537506, - "grad_norm": 0.6412881016731262, - "learning_rate": 4.07886426075753e-06, - "loss": 0.2026, - "step": 15024 - }, - { - "epoch": 1.4154164998469183, - "grad_norm": 0.6701502203941345, - "learning_rate": 4.077647473350201e-06, - "loss": 0.2252, - "step": 15025 - }, - { - "epoch": 1.4155107039400863, - "grad_norm": 0.6229698657989502, - "learning_rate": 4.076430820981666e-06, - "loss": 0.1761, - "step": 15026 - }, - { - "epoch": 1.415604908033254, - "grad_norm": 0.7415137887001038, - "learning_rate": 4.075214303679669e-06, - "loss": 0.2075, - "step": 15027 - }, - { - "epoch": 1.415699112126422, - "grad_norm": 1.9039305448532104, - "learning_rate": 4.073997921471951e-06, - "loss": 0.2086, - "step": 15028 - }, - { - "epoch": 1.4157933162195897, - "grad_norm": 0.6051740646362305, - "learning_rate": 4.072781674386243e-06, - "loss": 0.1788, - "step": 15029 - }, - { - "epoch": 1.4158875203127577, - "grad_norm": 0.6752517223358154, - "learning_rate": 4.0715655624502805e-06, - "loss": 0.2028, - "step": 15030 - }, - { - "epoch": 1.4159817244059254, - "grad_norm": 0.7240444421768188, - "learning_rate": 4.0703495856917926e-06, - "loss": 0.2339, - "step": 15031 - }, - { - "epoch": 1.4160759284990934, - "grad_norm": 0.6766936182975769, - "learning_rate": 4.069133744138506e-06, - "loss": 0.2161, - "step": 15032 - }, - { - "epoch": 1.416170132592261, - "grad_norm": 0.6281977295875549, - "learning_rate": 4.067918037818138e-06, - "loss": 0.1734, - "step": 15033 - }, - { - "epoch": 1.416264336685429, - "grad_norm": 0.6772282123565674, - "learning_rate": 4.0667024667584145e-06, - "loss": 0.2405, - "step": 15034 - }, - { - "epoch": 1.4163585407785968, - "grad_norm": 0.566911518573761, - "learning_rate": 4.0654870309870535e-06, - "loss": 0.1648, - "step": 15035 - }, - { - "epoch": 1.4164527448717648, - "grad_norm": 0.6752029061317444, - "learning_rate": 4.064271730531761e-06, - "loss": 0.2056, - "step": 15036 - }, - { - "epoch": 1.4165469489649325, - "grad_norm": 0.6714105010032654, - "learning_rate": 4.0630565654202545e-06, - "loss": 0.1809, - "step": 15037 - }, - { - "epoch": 1.4166411530581005, - "grad_norm": 0.6412304639816284, - "learning_rate": 4.061841535680243e-06, - "loss": 0.1811, - "step": 15038 - }, - { - "epoch": 1.4167353571512682, - "grad_norm": 0.7268863916397095, - "learning_rate": 4.060626641339425e-06, - "loss": 0.2007, - "step": 15039 - }, - { - "epoch": 1.4168295612444362, - "grad_norm": 0.6820250749588013, - "learning_rate": 4.059411882425504e-06, - "loss": 0.2006, - "step": 15040 - }, - { - "epoch": 1.416923765337604, - "grad_norm": 0.5848016738891602, - "learning_rate": 4.058197258966183e-06, - "loss": 0.1965, - "step": 15041 - }, - { - "epoch": 1.4170179694307716, - "grad_norm": 0.638249397277832, - "learning_rate": 4.056982770989151e-06, - "loss": 0.1976, - "step": 15042 - }, - { - "epoch": 1.4171121735239396, - "grad_norm": 0.7490016222000122, - "learning_rate": 4.055768418522102e-06, - "loss": 0.2164, - "step": 15043 - }, - { - "epoch": 1.4172063776171075, - "grad_norm": 0.6565622687339783, - "learning_rate": 4.054554201592731e-06, - "loss": 0.2184, - "step": 15044 - }, - { - "epoch": 1.4173005817102753, - "grad_norm": 0.6109570264816284, - "learning_rate": 4.053340120228714e-06, - "loss": 0.1941, - "step": 15045 - }, - { - "epoch": 1.417394785803443, - "grad_norm": 0.6661521196365356, - "learning_rate": 4.052126174457739e-06, - "loss": 0.2016, - "step": 15046 - }, - { - "epoch": 1.417488989896611, - "grad_norm": 0.6523756980895996, - "learning_rate": 4.05091236430749e-06, - "loss": 0.2075, - "step": 15047 - }, - { - "epoch": 1.417583193989779, - "grad_norm": 0.7215223908424377, - "learning_rate": 4.049698689805639e-06, - "loss": 0.2237, - "step": 15048 - }, - { - "epoch": 1.4176773980829467, - "grad_norm": 0.6820694804191589, - "learning_rate": 4.048485150979854e-06, - "loss": 0.2189, - "step": 15049 - }, - { - "epoch": 1.4177716021761144, - "grad_norm": 0.6522488594055176, - "learning_rate": 4.0472717478578185e-06, - "loss": 0.1967, - "step": 15050 - }, - { - "epoch": 1.4178658062692824, - "grad_norm": 0.6289029121398926, - "learning_rate": 4.046058480467192e-06, - "loss": 0.2103, - "step": 15051 - }, - { - "epoch": 1.4179600103624503, - "grad_norm": 0.6443459391593933, - "learning_rate": 4.044845348835637e-06, - "loss": 0.1938, - "step": 15052 - }, - { - "epoch": 1.418054214455618, - "grad_norm": 0.6975728273391724, - "learning_rate": 4.0436323529908195e-06, - "loss": 0.2133, - "step": 15053 - }, - { - "epoch": 1.4181484185487858, - "grad_norm": 0.5422704219818115, - "learning_rate": 4.042419492960398e-06, - "loss": 0.1493, - "step": 15054 - }, - { - "epoch": 1.4182426226419538, - "grad_norm": 0.7132324576377869, - "learning_rate": 4.041206768772023e-06, - "loss": 0.1918, - "step": 15055 - }, - { - "epoch": 1.4183368267351217, - "grad_norm": 0.6467621922492981, - "learning_rate": 4.039994180453348e-06, - "loss": 0.1918, - "step": 15056 - }, - { - "epoch": 1.4184310308282895, - "grad_norm": 0.7953943610191345, - "learning_rate": 4.038781728032027e-06, - "loss": 0.2027, - "step": 15057 - }, - { - "epoch": 1.4185252349214572, - "grad_norm": 0.6614832282066345, - "learning_rate": 4.0375694115356986e-06, - "loss": 0.2045, - "step": 15058 - }, - { - "epoch": 1.4186194390146252, - "grad_norm": 0.6388512253761292, - "learning_rate": 4.036357230992009e-06, - "loss": 0.1934, - "step": 15059 - }, - { - "epoch": 1.4187136431077931, - "grad_norm": 0.6661310791969299, - "learning_rate": 4.0351451864286e-06, - "loss": 0.2051, - "step": 15060 - }, - { - "epoch": 1.4188078472009609, - "grad_norm": 0.6153001189231873, - "learning_rate": 4.033933277873104e-06, - "loss": 0.1955, - "step": 15061 - }, - { - "epoch": 1.4189020512941286, - "grad_norm": 0.689215362071991, - "learning_rate": 4.032721505353157e-06, - "loss": 0.2164, - "step": 15062 - }, - { - "epoch": 1.4189962553872966, - "grad_norm": 0.6913338303565979, - "learning_rate": 4.031509868896386e-06, - "loss": 0.1969, - "step": 15063 - }, - { - "epoch": 1.4190904594804645, - "grad_norm": 0.6429192423820496, - "learning_rate": 4.03029836853042e-06, - "loss": 0.2035, - "step": 15064 - }, - { - "epoch": 1.4191846635736323, - "grad_norm": 0.632829487323761, - "learning_rate": 4.029087004282888e-06, - "loss": 0.1768, - "step": 15065 - }, - { - "epoch": 1.4192788676668, - "grad_norm": 0.6011409759521484, - "learning_rate": 4.027875776181402e-06, - "loss": 0.1851, - "step": 15066 - }, - { - "epoch": 1.419373071759968, - "grad_norm": 0.6467154026031494, - "learning_rate": 4.026664684253584e-06, - "loss": 0.1947, - "step": 15067 - }, - { - "epoch": 1.419467275853136, - "grad_norm": 0.673510730266571, - "learning_rate": 4.025453728527053e-06, - "loss": 0.1949, - "step": 15068 - }, - { - "epoch": 1.4195614799463037, - "grad_norm": 0.656452476978302, - "learning_rate": 4.024242909029414e-06, - "loss": 0.2034, - "step": 15069 - }, - { - "epoch": 1.4196556840394714, - "grad_norm": 0.6704924702644348, - "learning_rate": 4.023032225788278e-06, - "loss": 0.2027, - "step": 15070 - }, - { - "epoch": 1.4197498881326394, - "grad_norm": 0.6681562662124634, - "learning_rate": 4.021821678831255e-06, - "loss": 0.2195, - "step": 15071 - }, - { - "epoch": 1.4198440922258073, - "grad_norm": 0.5850578546524048, - "learning_rate": 4.020611268185942e-06, - "loss": 0.2025, - "step": 15072 - }, - { - "epoch": 1.419938296318975, - "grad_norm": 0.608880341053009, - "learning_rate": 4.0194009938799365e-06, - "loss": 0.1768, - "step": 15073 - }, - { - "epoch": 1.4200325004121428, - "grad_norm": 0.7008351683616638, - "learning_rate": 4.018190855940837e-06, - "loss": 0.2058, - "step": 15074 - }, - { - "epoch": 1.4201267045053108, - "grad_norm": 0.616750955581665, - "learning_rate": 4.01698085439624e-06, - "loss": 0.1581, - "step": 15075 - }, - { - "epoch": 1.4202209085984787, - "grad_norm": 0.7935291528701782, - "learning_rate": 4.015770989273731e-06, - "loss": 0.1921, - "step": 15076 - }, - { - "epoch": 1.4203151126916465, - "grad_norm": 0.5856050848960876, - "learning_rate": 4.014561260600896e-06, - "loss": 0.2114, - "step": 15077 - }, - { - "epoch": 1.4204093167848142, - "grad_norm": 0.6580625176429749, - "learning_rate": 4.0133516684053264e-06, - "loss": 0.2019, - "step": 15078 - }, - { - "epoch": 1.4205035208779822, - "grad_norm": 0.7024085521697998, - "learning_rate": 4.012142212714593e-06, - "loss": 0.2201, - "step": 15079 - }, - { - "epoch": 1.42059772497115, - "grad_norm": 0.7355886697769165, - "learning_rate": 4.010932893556278e-06, - "loss": 0.2039, - "step": 15080 - }, - { - "epoch": 1.4206919290643178, - "grad_norm": 0.6439719796180725, - "learning_rate": 4.009723710957957e-06, - "loss": 0.2211, - "step": 15081 - }, - { - "epoch": 1.4207861331574856, - "grad_norm": 0.608222484588623, - "learning_rate": 4.008514664947198e-06, - "loss": 0.1888, - "step": 15082 - }, - { - "epoch": 1.4208803372506535, - "grad_norm": 0.6365930438041687, - "learning_rate": 4.00730575555157e-06, - "loss": 0.1883, - "step": 15083 - }, - { - "epoch": 1.4209745413438215, - "grad_norm": 0.5954399108886719, - "learning_rate": 4.006096982798642e-06, - "loss": 0.2025, - "step": 15084 - }, - { - "epoch": 1.4210687454369892, - "grad_norm": 0.6623506546020508, - "learning_rate": 4.004888346715972e-06, - "loss": 0.2047, - "step": 15085 - }, - { - "epoch": 1.421162949530157, - "grad_norm": 0.6372186541557312, - "learning_rate": 4.0036798473311125e-06, - "loss": 0.1964, - "step": 15086 - }, - { - "epoch": 1.421257153623325, - "grad_norm": 0.7034417390823364, - "learning_rate": 4.002471484671634e-06, - "loss": 0.2035, - "step": 15087 - }, - { - "epoch": 1.421351357716493, - "grad_norm": 0.6938350796699524, - "learning_rate": 4.0012632587650804e-06, - "loss": 0.1868, - "step": 15088 - }, - { - "epoch": 1.4214455618096606, - "grad_norm": 0.6624138951301575, - "learning_rate": 4.000055169638994e-06, - "loss": 0.2198, - "step": 15089 - }, - { - "epoch": 1.4215397659028284, - "grad_norm": 0.6621091961860657, - "learning_rate": 3.998847217320937e-06, - "loss": 0.212, - "step": 15090 - }, - { - "epoch": 1.4216339699959963, - "grad_norm": 0.6874463558197021, - "learning_rate": 3.997639401838444e-06, - "loss": 0.2276, - "step": 15091 - }, - { - "epoch": 1.4217281740891643, - "grad_norm": 0.7190878391265869, - "learning_rate": 3.9964317232190516e-06, - "loss": 0.1954, - "step": 15092 - }, - { - "epoch": 1.421822378182332, - "grad_norm": 0.749897301197052, - "learning_rate": 3.995224181490301e-06, - "loss": 0.2388, - "step": 15093 - }, - { - "epoch": 1.4219165822754998, - "grad_norm": 0.7081459164619446, - "learning_rate": 3.994016776679729e-06, - "loss": 0.2082, - "step": 15094 - }, - { - "epoch": 1.4220107863686677, - "grad_norm": 0.7634052634239197, - "learning_rate": 3.992809508814859e-06, - "loss": 0.2275, - "step": 15095 - }, - { - "epoch": 1.4221049904618357, - "grad_norm": 0.7295844554901123, - "learning_rate": 3.991602377923222e-06, - "loss": 0.22, - "step": 15096 - }, - { - "epoch": 1.4221991945550034, - "grad_norm": 0.6493740081787109, - "learning_rate": 3.990395384032348e-06, - "loss": 0.2059, - "step": 15097 - }, - { - "epoch": 1.4222933986481712, - "grad_norm": 0.5462346076965332, - "learning_rate": 3.989188527169749e-06, - "loss": 0.1789, - "step": 15098 - }, - { - "epoch": 1.4223876027413391, - "grad_norm": 0.6551412343978882, - "learning_rate": 3.987981807362948e-06, - "loss": 0.2172, - "step": 15099 - }, - { - "epoch": 1.422481806834507, - "grad_norm": 0.5961939096450806, - "learning_rate": 3.986775224639463e-06, - "loss": 0.1929, - "step": 15100 - }, - { - "epoch": 1.4225760109276748, - "grad_norm": 0.9297695159912109, - "learning_rate": 3.985568779026798e-06, - "loss": 0.1809, - "step": 15101 - }, - { - "epoch": 1.4226702150208426, - "grad_norm": 0.6887254118919373, - "learning_rate": 3.984362470552471e-06, - "loss": 0.2126, - "step": 15102 - }, - { - "epoch": 1.4227644191140105, - "grad_norm": 0.6743797659873962, - "learning_rate": 3.98315629924398e-06, - "loss": 0.1958, - "step": 15103 - }, - { - "epoch": 1.4228586232071785, - "grad_norm": 0.6334644556045532, - "learning_rate": 3.981950265128829e-06, - "loss": 0.1921, - "step": 15104 - }, - { - "epoch": 1.4229528273003462, - "grad_norm": 0.6796209216117859, - "learning_rate": 3.980744368234524e-06, - "loss": 0.2051, - "step": 15105 - }, - { - "epoch": 1.423047031393514, - "grad_norm": 0.6645632982254028, - "learning_rate": 3.979538608588552e-06, - "loss": 0.207, - "step": 15106 - }, - { - "epoch": 1.423141235486682, - "grad_norm": 0.6518348455429077, - "learning_rate": 3.97833298621841e-06, - "loss": 0.2077, - "step": 15107 - }, - { - "epoch": 1.4232354395798499, - "grad_norm": 0.6250102519989014, - "learning_rate": 3.977127501151593e-06, - "loss": 0.2145, - "step": 15108 - }, - { - "epoch": 1.4233296436730176, - "grad_norm": 0.6662275791168213, - "learning_rate": 3.975922153415579e-06, - "loss": 0.2448, - "step": 15109 - }, - { - "epoch": 1.4234238477661854, - "grad_norm": 0.6598562598228455, - "learning_rate": 3.974716943037858e-06, - "loss": 0.2005, - "step": 15110 - }, - { - "epoch": 1.4235180518593533, - "grad_norm": 0.7167573571205139, - "learning_rate": 3.973511870045911e-06, - "loss": 0.214, - "step": 15111 - }, - { - "epoch": 1.423612255952521, - "grad_norm": 0.8242859244346619, - "learning_rate": 3.972306934467214e-06, - "loss": 0.21, - "step": 15112 - }, - { - "epoch": 1.423706460045689, - "grad_norm": 0.6715300679206848, - "learning_rate": 3.9711021363292376e-06, - "loss": 0.1957, - "step": 15113 - }, - { - "epoch": 1.4238006641388568, - "grad_norm": 0.620347797870636, - "learning_rate": 3.969897475659457e-06, - "loss": 0.196, - "step": 15114 - }, - { - "epoch": 1.4238948682320247, - "grad_norm": 0.633220911026001, - "learning_rate": 3.968692952485341e-06, - "loss": 0.2279, - "step": 15115 - }, - { - "epoch": 1.4239890723251925, - "grad_norm": 0.6173372864723206, - "learning_rate": 3.967488566834352e-06, - "loss": 0.172, - "step": 15116 - }, - { - "epoch": 1.4240832764183604, - "grad_norm": 0.8390200734138489, - "learning_rate": 3.966284318733952e-06, - "loss": 0.2059, - "step": 15117 - }, - { - "epoch": 1.4241774805115281, - "grad_norm": 0.7647731900215149, - "learning_rate": 3.965080208211605e-06, - "loss": 0.1673, - "step": 15118 - }, - { - "epoch": 1.424271684604696, - "grad_norm": 0.6739823818206787, - "learning_rate": 3.963876235294758e-06, - "loss": 0.2161, - "step": 15119 - }, - { - "epoch": 1.4243658886978638, - "grad_norm": 0.6182190179824829, - "learning_rate": 3.962672400010868e-06, - "loss": 0.194, - "step": 15120 - }, - { - "epoch": 1.4244600927910318, - "grad_norm": 0.6688817143440247, - "learning_rate": 3.961468702387389e-06, - "loss": 0.2185, - "step": 15121 - }, - { - "epoch": 1.4245542968841995, - "grad_norm": 0.6367483139038086, - "learning_rate": 3.960265142451758e-06, - "loss": 0.198, - "step": 15122 - }, - { - "epoch": 1.4246485009773675, - "grad_norm": 0.6875576972961426, - "learning_rate": 3.959061720231422e-06, - "loss": 0.1994, - "step": 15123 - }, - { - "epoch": 1.4247427050705352, - "grad_norm": 0.6497072577476501, - "learning_rate": 3.957858435753825e-06, - "loss": 0.1734, - "step": 15124 - }, - { - "epoch": 1.4248369091637032, - "grad_norm": 0.6306024789810181, - "learning_rate": 3.9566552890464e-06, - "loss": 0.1932, - "step": 15125 - }, - { - "epoch": 1.424931113256871, - "grad_norm": 0.5928530693054199, - "learning_rate": 3.955452280136575e-06, - "loss": 0.1797, - "step": 15126 - }, - { - "epoch": 1.425025317350039, - "grad_norm": 0.6352770328521729, - "learning_rate": 3.954249409051791e-06, - "loss": 0.1943, - "step": 15127 - }, - { - "epoch": 1.4251195214432066, - "grad_norm": 0.6394056677818298, - "learning_rate": 3.953046675819472e-06, - "loss": 0.1926, - "step": 15128 - }, - { - "epoch": 1.4252137255363746, - "grad_norm": 0.7113199234008789, - "learning_rate": 3.951844080467032e-06, - "loss": 0.1988, - "step": 15129 - }, - { - "epoch": 1.4253079296295423, - "grad_norm": 0.7182572484016418, - "learning_rate": 3.950641623021909e-06, - "loss": 0.1948, - "step": 15130 - }, - { - "epoch": 1.4254021337227103, - "grad_norm": 0.6903520822525024, - "learning_rate": 3.949439303511512e-06, - "loss": 0.2071, - "step": 15131 - }, - { - "epoch": 1.425496337815878, - "grad_norm": 0.7144119143486023, - "learning_rate": 3.9482371219632535e-06, - "loss": 0.2064, - "step": 15132 - }, - { - "epoch": 1.425590541909046, - "grad_norm": 0.6513559222221375, - "learning_rate": 3.947035078404546e-06, - "loss": 0.2022, - "step": 15133 - }, - { - "epoch": 1.4256847460022137, - "grad_norm": 0.5769354104995728, - "learning_rate": 3.945833172862806e-06, - "loss": 0.1454, - "step": 15134 - }, - { - "epoch": 1.4257789500953817, - "grad_norm": 0.6987826228141785, - "learning_rate": 3.944631405365427e-06, - "loss": 0.2258, - "step": 15135 - }, - { - "epoch": 1.4258731541885494, - "grad_norm": 0.6186956763267517, - "learning_rate": 3.9434297759398164e-06, - "loss": 0.1752, - "step": 15136 - }, - { - "epoch": 1.4259673582817174, - "grad_norm": 0.7385438084602356, - "learning_rate": 3.942228284613379e-06, - "loss": 0.2365, - "step": 15137 - }, - { - "epoch": 1.4260615623748851, - "grad_norm": 0.6618998646736145, - "learning_rate": 3.9410269314135e-06, - "loss": 0.1915, - "step": 15138 - }, - { - "epoch": 1.426155766468053, - "grad_norm": 0.6850101351737976, - "learning_rate": 3.939825716367578e-06, - "loss": 0.2124, - "step": 15139 - }, - { - "epoch": 1.4262499705612208, - "grad_norm": 0.7157747149467468, - "learning_rate": 3.938624639503006e-06, - "loss": 0.2039, - "step": 15140 - }, - { - "epoch": 1.4263441746543888, - "grad_norm": 0.6254139542579651, - "learning_rate": 3.9374237008471615e-06, - "loss": 0.2011, - "step": 15141 - }, - { - "epoch": 1.4264383787475565, - "grad_norm": 0.615426778793335, - "learning_rate": 3.9362229004274376e-06, - "loss": 0.1881, - "step": 15142 - }, - { - "epoch": 1.4265325828407245, - "grad_norm": 0.7962415218353271, - "learning_rate": 3.935022238271205e-06, - "loss": 0.1924, - "step": 15143 - }, - { - "epoch": 1.4266267869338922, - "grad_norm": 0.6106542944908142, - "learning_rate": 3.933821714405846e-06, - "loss": 0.2219, - "step": 15144 - }, - { - "epoch": 1.4267209910270602, - "grad_norm": 0.7156044244766235, - "learning_rate": 3.932621328858735e-06, - "loss": 0.1895, - "step": 15145 - }, - { - "epoch": 1.426815195120228, - "grad_norm": 0.720986008644104, - "learning_rate": 3.931421081657238e-06, - "loss": 0.2261, - "step": 15146 - }, - { - "epoch": 1.4269093992133959, - "grad_norm": 0.7468794584274292, - "learning_rate": 3.930220972828726e-06, - "loss": 0.2667, - "step": 15147 - }, - { - "epoch": 1.4270036033065636, - "grad_norm": 0.6073909401893616, - "learning_rate": 3.929021002400568e-06, - "loss": 0.1955, - "step": 15148 - }, - { - "epoch": 1.4270978073997316, - "grad_norm": 0.7410126328468323, - "learning_rate": 3.927821170400115e-06, - "loss": 0.2043, - "step": 15149 - }, - { - "epoch": 1.4271920114928993, - "grad_norm": 0.5966479778289795, - "learning_rate": 3.9266214768547335e-06, - "loss": 0.1908, - "step": 15150 - }, - { - "epoch": 1.4272862155860673, - "grad_norm": 0.7098649144172668, - "learning_rate": 3.9254219217917725e-06, - "loss": 0.1902, - "step": 15151 - }, - { - "epoch": 1.427380419679235, - "grad_norm": 0.677027702331543, - "learning_rate": 3.924222505238588e-06, - "loss": 0.2138, - "step": 15152 - }, - { - "epoch": 1.427474623772403, - "grad_norm": 0.7108759880065918, - "learning_rate": 3.923023227222526e-06, - "loss": 0.1998, - "step": 15153 - }, - { - "epoch": 1.4275688278655707, - "grad_norm": 0.6519859433174133, - "learning_rate": 3.9218240877709305e-06, - "loss": 0.2162, - "step": 15154 - }, - { - "epoch": 1.4276630319587387, - "grad_norm": 0.7533174753189087, - "learning_rate": 3.92062508691115e-06, - "loss": 0.1936, - "step": 15155 - }, - { - "epoch": 1.4277572360519064, - "grad_norm": 0.587651252746582, - "learning_rate": 3.919426224670515e-06, - "loss": 0.1622, - "step": 15156 - }, - { - "epoch": 1.4278514401450744, - "grad_norm": 0.7207445502281189, - "learning_rate": 3.918227501076367e-06, - "loss": 0.1996, - "step": 15157 - }, - { - "epoch": 1.427945644238242, - "grad_norm": 0.5945635437965393, - "learning_rate": 3.917028916156041e-06, - "loss": 0.1784, - "step": 15158 - }, - { - "epoch": 1.42803984833141, - "grad_norm": 0.6569984555244446, - "learning_rate": 3.915830469936858e-06, - "loss": 0.1736, - "step": 15159 - }, - { - "epoch": 1.4281340524245778, - "grad_norm": 0.6692624092102051, - "learning_rate": 3.914632162446153e-06, - "loss": 0.2072, - "step": 15160 - }, - { - "epoch": 1.4282282565177458, - "grad_norm": 0.7507838010787964, - "learning_rate": 3.913433993711246e-06, - "loss": 0.2067, - "step": 15161 - }, - { - "epoch": 1.4283224606109135, - "grad_norm": 0.6753888130187988, - "learning_rate": 3.912235963759456e-06, - "loss": 0.1837, - "step": 15162 - }, - { - "epoch": 1.4284166647040815, - "grad_norm": 0.6562243700027466, - "learning_rate": 3.9110380726181e-06, - "loss": 0.1946, - "step": 15163 - }, - { - "epoch": 1.4285108687972492, - "grad_norm": 0.6343908905982971, - "learning_rate": 3.9098403203144965e-06, - "loss": 0.2137, - "step": 15164 - }, - { - "epoch": 1.4286050728904172, - "grad_norm": 0.6291208863258362, - "learning_rate": 3.908642706875951e-06, - "loss": 0.1765, - "step": 15165 - }, - { - "epoch": 1.428699276983585, - "grad_norm": 0.636816143989563, - "learning_rate": 3.907445232329766e-06, - "loss": 0.1738, - "step": 15166 - }, - { - "epoch": 1.4287934810767529, - "grad_norm": 0.6997500658035278, - "learning_rate": 3.90624789670326e-06, - "loss": 0.2083, - "step": 15167 - }, - { - "epoch": 1.4288876851699206, - "grad_norm": 0.7286765575408936, - "learning_rate": 3.905050700023726e-06, - "loss": 0.2262, - "step": 15168 - }, - { - "epoch": 1.4289818892630886, - "grad_norm": 0.6835652589797974, - "learning_rate": 3.903853642318453e-06, - "loss": 0.1858, - "step": 15169 - }, - { - "epoch": 1.4290760933562563, - "grad_norm": 0.6714411973953247, - "learning_rate": 3.902656723614754e-06, - "loss": 0.1996, - "step": 15170 - }, - { - "epoch": 1.4291702974494243, - "grad_norm": 0.6755989789962769, - "learning_rate": 3.90145994393991e-06, - "loss": 0.1941, - "step": 15171 - }, - { - "epoch": 1.429264501542592, - "grad_norm": 0.6982343792915344, - "learning_rate": 3.900263303321209e-06, - "loss": 0.2028, - "step": 15172 - }, - { - "epoch": 1.42935870563576, - "grad_norm": 0.604402482509613, - "learning_rate": 3.899066801785937e-06, - "loss": 0.1917, - "step": 15173 - }, - { - "epoch": 1.4294529097289277, - "grad_norm": 0.66719651222229, - "learning_rate": 3.897870439361381e-06, - "loss": 0.2128, - "step": 15174 - }, - { - "epoch": 1.4295471138220956, - "grad_norm": 0.6596137881278992, - "learning_rate": 3.896674216074812e-06, - "loss": 0.1853, - "step": 15175 - }, - { - "epoch": 1.4296413179152634, - "grad_norm": 0.6531592607498169, - "learning_rate": 3.895478131953511e-06, - "loss": 0.212, - "step": 15176 - }, - { - "epoch": 1.4297355220084313, - "grad_norm": 0.6396514773368835, - "learning_rate": 3.894282187024752e-06, - "loss": 0.222, - "step": 15177 - }, - { - "epoch": 1.429829726101599, - "grad_norm": 0.7242485880851746, - "learning_rate": 3.893086381315798e-06, - "loss": 0.2016, - "step": 15178 - }, - { - "epoch": 1.429923930194767, - "grad_norm": 0.6706082820892334, - "learning_rate": 3.89189071485392e-06, - "loss": 0.2185, - "step": 15179 - }, - { - "epoch": 1.4300181342879348, - "grad_norm": 0.6603735089302063, - "learning_rate": 3.890695187666382e-06, - "loss": 0.2115, - "step": 15180 - }, - { - "epoch": 1.4301123383811025, - "grad_norm": 0.7358943223953247, - "learning_rate": 3.8894997997804405e-06, - "loss": 0.2371, - "step": 15181 - }, - { - "epoch": 1.4302065424742705, - "grad_norm": 0.6819736361503601, - "learning_rate": 3.8883045512233564e-06, - "loss": 0.2149, - "step": 15182 - }, - { - "epoch": 1.4303007465674384, - "grad_norm": 0.6299451589584351, - "learning_rate": 3.887109442022377e-06, - "loss": 0.1961, - "step": 15183 - }, - { - "epoch": 1.4303949506606062, - "grad_norm": 0.5874504446983337, - "learning_rate": 3.8859144722047545e-06, - "loss": 0.1801, - "step": 15184 - }, - { - "epoch": 1.430489154753774, - "grad_norm": 0.72264164686203, - "learning_rate": 3.884719641797743e-06, - "loss": 0.1887, - "step": 15185 - }, - { - "epoch": 1.4305833588469419, - "grad_norm": 0.6677408814430237, - "learning_rate": 3.883524950828578e-06, - "loss": 0.1973, - "step": 15186 - }, - { - "epoch": 1.4306775629401098, - "grad_norm": 0.676523745059967, - "learning_rate": 3.8823303993245025e-06, - "loss": 0.2153, - "step": 15187 - }, - { - "epoch": 1.4307717670332776, - "grad_norm": 0.6481882929801941, - "learning_rate": 3.881135987312758e-06, - "loss": 0.1937, - "step": 15188 - }, - { - "epoch": 1.4308659711264453, - "grad_norm": 0.6754376292228699, - "learning_rate": 3.879941714820573e-06, - "loss": 0.2053, - "step": 15189 - }, - { - "epoch": 1.4309601752196133, - "grad_norm": 0.6309954524040222, - "learning_rate": 3.878747581875185e-06, - "loss": 0.1989, - "step": 15190 - }, - { - "epoch": 1.4310543793127812, - "grad_norm": 0.6919754147529602, - "learning_rate": 3.877553588503817e-06, - "loss": 0.2226, - "step": 15191 - }, - { - "epoch": 1.431148583405949, - "grad_norm": 0.9672317504882812, - "learning_rate": 3.876359734733697e-06, - "loss": 0.1738, - "step": 15192 - }, - { - "epoch": 1.4312427874991167, - "grad_norm": 0.641103982925415, - "learning_rate": 3.875166020592043e-06, - "loss": 0.2022, - "step": 15193 - }, - { - "epoch": 1.4313369915922847, - "grad_norm": 0.8770423531532288, - "learning_rate": 3.8739724461060755e-06, - "loss": 0.2035, - "step": 15194 - }, - { - "epoch": 1.4314311956854526, - "grad_norm": 0.6562981009483337, - "learning_rate": 3.872779011303014e-06, - "loss": 0.208, - "step": 15195 - }, - { - "epoch": 1.4315253997786204, - "grad_norm": 0.5817837119102478, - "learning_rate": 3.871585716210063e-06, - "loss": 0.1814, - "step": 15196 - }, - { - "epoch": 1.431619603871788, - "grad_norm": 0.6063922643661499, - "learning_rate": 3.870392560854437e-06, - "loss": 0.1735, - "step": 15197 - }, - { - "epoch": 1.431713807964956, - "grad_norm": 0.6343390941619873, - "learning_rate": 3.869199545263342e-06, - "loss": 0.1839, - "step": 15198 - }, - { - "epoch": 1.431808012058124, - "grad_norm": 0.6845496296882629, - "learning_rate": 3.868006669463977e-06, - "loss": 0.2165, - "step": 15199 - }, - { - "epoch": 1.4319022161512918, - "grad_norm": 0.752907395362854, - "learning_rate": 3.866813933483542e-06, - "loss": 0.2132, - "step": 15200 - }, - { - "epoch": 1.4319964202444595, - "grad_norm": 0.8951588273048401, - "learning_rate": 3.865621337349238e-06, - "loss": 0.2211, - "step": 15201 - }, - { - "epoch": 1.4320906243376275, - "grad_norm": 0.6657394170761108, - "learning_rate": 3.864428881088256e-06, - "loss": 0.1817, - "step": 15202 - }, - { - "epoch": 1.4321848284307954, - "grad_norm": 0.6672415733337402, - "learning_rate": 3.8632365647277756e-06, - "loss": 0.2022, - "step": 15203 - }, - { - "epoch": 1.4322790325239632, - "grad_norm": 0.8055460453033447, - "learning_rate": 3.862044388295e-06, - "loss": 0.1993, - "step": 15204 - }, - { - "epoch": 1.432373236617131, - "grad_norm": 0.7843263149261475, - "learning_rate": 3.860852351817105e-06, - "loss": 0.2299, - "step": 15205 - }, - { - "epoch": 1.4324674407102989, - "grad_norm": 0.6628491878509521, - "learning_rate": 3.8596604553212625e-06, - "loss": 0.1919, - "step": 15206 - }, - { - "epoch": 1.4325616448034668, - "grad_norm": 0.9990058541297913, - "learning_rate": 3.858468698834666e-06, - "loss": 0.1996, - "step": 15207 - }, - { - "epoch": 1.4326558488966346, - "grad_norm": 0.6103582382202148, - "learning_rate": 3.857277082384481e-06, - "loss": 0.1912, - "step": 15208 - }, - { - "epoch": 1.4327500529898023, - "grad_norm": 0.6782674193382263, - "learning_rate": 3.856085605997871e-06, - "loss": 0.2114, - "step": 15209 - }, - { - "epoch": 1.4328442570829703, - "grad_norm": 0.6303362846374512, - "learning_rate": 3.854894269702019e-06, - "loss": 0.217, - "step": 15210 - }, - { - "epoch": 1.4329384611761382, - "grad_norm": 0.7728652954101562, - "learning_rate": 3.85370307352408e-06, - "loss": 0.1905, - "step": 15211 - }, - { - "epoch": 1.433032665269306, - "grad_norm": 0.7087612748146057, - "learning_rate": 3.852512017491214e-06, - "loss": 0.19, - "step": 15212 - }, - { - "epoch": 1.4331268693624737, - "grad_norm": 0.6688991189002991, - "learning_rate": 3.8513211016305805e-06, - "loss": 0.196, - "step": 15213 - }, - { - "epoch": 1.4332210734556416, - "grad_norm": 0.686552107334137, - "learning_rate": 3.850130325969339e-06, - "loss": 0.1668, - "step": 15214 - }, - { - "epoch": 1.4333152775488096, - "grad_norm": 0.5662250518798828, - "learning_rate": 3.848939690534633e-06, - "loss": 0.1536, - "step": 15215 - }, - { - "epoch": 1.4334094816419773, - "grad_norm": 0.7102068662643433, - "learning_rate": 3.847749195353615e-06, - "loss": 0.2247, - "step": 15216 - }, - { - "epoch": 1.433503685735145, - "grad_norm": 0.6884145140647888, - "learning_rate": 3.846558840453434e-06, - "loss": 0.2108, - "step": 15217 - }, - { - "epoch": 1.433597889828313, - "grad_norm": 0.6879662871360779, - "learning_rate": 3.845368625861223e-06, - "loss": 0.2053, - "step": 15218 - }, - { - "epoch": 1.433692093921481, - "grad_norm": 0.6586831212043762, - "learning_rate": 3.844178551604128e-06, - "loss": 0.2093, - "step": 15219 - }, - { - "epoch": 1.4337862980146487, - "grad_norm": 0.7226083874702454, - "learning_rate": 3.842988617709283e-06, - "loss": 0.2202, - "step": 15220 - }, - { - "epoch": 1.4338805021078165, - "grad_norm": 0.6367272734642029, - "learning_rate": 3.841798824203818e-06, - "loss": 0.1957, - "step": 15221 - }, - { - "epoch": 1.4339747062009844, - "grad_norm": 0.6065813899040222, - "learning_rate": 3.840609171114867e-06, - "loss": 0.182, - "step": 15222 - }, - { - "epoch": 1.4340689102941524, - "grad_norm": 0.6629114151000977, - "learning_rate": 3.839419658469548e-06, - "loss": 0.2054, - "step": 15223 - }, - { - "epoch": 1.4341631143873201, - "grad_norm": 0.6589508056640625, - "learning_rate": 3.838230286294989e-06, - "loss": 0.231, - "step": 15224 - }, - { - "epoch": 1.4342573184804879, - "grad_norm": 0.6896048188209534, - "learning_rate": 3.837041054618312e-06, - "loss": 0.2144, - "step": 15225 - }, - { - "epoch": 1.4343515225736558, - "grad_norm": 0.7509519457817078, - "learning_rate": 3.8358519634666265e-06, - "loss": 0.2081, - "step": 15226 - }, - { - "epoch": 1.4344457266668238, - "grad_norm": 0.631653368473053, - "learning_rate": 3.83466301286705e-06, - "loss": 0.1847, - "step": 15227 - }, - { - "epoch": 1.4345399307599915, - "grad_norm": 0.6637871861457825, - "learning_rate": 3.833474202846695e-06, - "loss": 0.1942, - "step": 15228 - }, - { - "epoch": 1.4346341348531593, - "grad_norm": 0.6666586399078369, - "learning_rate": 3.8322855334326615e-06, - "loss": 0.2135, - "step": 15229 - }, - { - "epoch": 1.4347283389463272, - "grad_norm": 0.6628562808036804, - "learning_rate": 3.831097004652059e-06, - "loss": 0.1864, - "step": 15230 - }, - { - "epoch": 1.4348225430394952, - "grad_norm": 0.6609824895858765, - "learning_rate": 3.829908616531982e-06, - "loss": 0.2022, - "step": 15231 - }, - { - "epoch": 1.434916747132663, - "grad_norm": 0.651069164276123, - "learning_rate": 3.828720369099536e-06, - "loss": 0.1908, - "step": 15232 - }, - { - "epoch": 1.4350109512258307, - "grad_norm": 0.5750835537910461, - "learning_rate": 3.827532262381803e-06, - "loss": 0.1863, - "step": 15233 - }, - { - "epoch": 1.4351051553189986, - "grad_norm": 0.7546768188476562, - "learning_rate": 3.826344296405883e-06, - "loss": 0.2507, - "step": 15234 - }, - { - "epoch": 1.4351993594121666, - "grad_norm": 0.6671732664108276, - "learning_rate": 3.825156471198863e-06, - "loss": 0.2065, - "step": 15235 - }, - { - "epoch": 1.4352935635053343, - "grad_norm": 0.6724646687507629, - "learning_rate": 3.823968786787821e-06, - "loss": 0.1941, - "step": 15236 - }, - { - "epoch": 1.435387767598502, - "grad_norm": 0.687477171421051, - "learning_rate": 3.822781243199844e-06, - "loss": 0.177, - "step": 15237 - }, - { - "epoch": 1.43548197169167, - "grad_norm": 0.6782063841819763, - "learning_rate": 3.82159384046201e-06, - "loss": 0.2003, - "step": 15238 - }, - { - "epoch": 1.435576175784838, - "grad_norm": 0.6828404068946838, - "learning_rate": 3.820406578601389e-06, - "loss": 0.1944, - "step": 15239 - }, - { - "epoch": 1.4356703798780057, - "grad_norm": 0.6947293281555176, - "learning_rate": 3.819219457645053e-06, - "loss": 0.2044, - "step": 15240 - }, - { - "epoch": 1.4357645839711735, - "grad_norm": 0.6581609845161438, - "learning_rate": 3.818032477620079e-06, - "loss": 0.2079, - "step": 15241 - }, - { - "epoch": 1.4358587880643414, - "grad_norm": 0.6846004724502563, - "learning_rate": 3.816845638553523e-06, - "loss": 0.2115, - "step": 15242 - }, - { - "epoch": 1.4359529921575094, - "grad_norm": 0.6613644957542419, - "learning_rate": 3.8156589404724405e-06, - "loss": 0.1928, - "step": 15243 - }, - { - "epoch": 1.4360471962506771, - "grad_norm": 0.6498963236808777, - "learning_rate": 3.8144723834039076e-06, - "loss": 0.209, - "step": 15244 - }, - { - "epoch": 1.4361414003438449, - "grad_norm": 0.6460498571395874, - "learning_rate": 3.8132859673749688e-06, - "loss": 0.1703, - "step": 15245 - }, - { - "epoch": 1.4362356044370128, - "grad_norm": 0.7108054161071777, - "learning_rate": 3.812099692412672e-06, - "loss": 0.2185, - "step": 15246 - }, - { - "epoch": 1.4363298085301808, - "grad_norm": 0.6266716718673706, - "learning_rate": 3.8109135585440794e-06, - "loss": 0.2004, - "step": 15247 - }, - { - "epoch": 1.4364240126233485, - "grad_norm": 0.6401249170303345, - "learning_rate": 3.8097275657962284e-06, - "loss": 0.2087, - "step": 15248 - }, - { - "epoch": 1.4365182167165162, - "grad_norm": 0.730238676071167, - "learning_rate": 3.8085417141961554e-06, - "loss": 0.2222, - "step": 15249 - }, - { - "epoch": 1.4366124208096842, - "grad_norm": 0.6710932850837708, - "learning_rate": 3.8073560037709134e-06, - "loss": 0.2167, - "step": 15250 - }, - { - "epoch": 1.436706624902852, - "grad_norm": 0.6229298114776611, - "learning_rate": 3.8061704345475325e-06, - "loss": 0.1963, - "step": 15251 - }, - { - "epoch": 1.43680082899602, - "grad_norm": 0.8475515246391296, - "learning_rate": 3.80498500655304e-06, - "loss": 0.2124, - "step": 15252 - }, - { - "epoch": 1.4368950330891876, - "grad_norm": 0.5910754799842834, - "learning_rate": 3.8037997198144716e-06, - "loss": 0.1641, - "step": 15253 - }, - { - "epoch": 1.4369892371823556, - "grad_norm": 0.6734824776649475, - "learning_rate": 3.8026145743588548e-06, - "loss": 0.1906, - "step": 15254 - }, - { - "epoch": 1.4370834412755233, - "grad_norm": 0.5976563096046448, - "learning_rate": 3.8014295702132063e-06, - "loss": 0.1886, - "step": 15255 - }, - { - "epoch": 1.4371776453686913, - "grad_norm": 0.592056393623352, - "learning_rate": 3.8002447074045503e-06, - "loss": 0.1858, - "step": 15256 - }, - { - "epoch": 1.437271849461859, - "grad_norm": 0.662977933883667, - "learning_rate": 3.7990599859599066e-06, - "loss": 0.19, - "step": 15257 - }, - { - "epoch": 1.437366053555027, - "grad_norm": 0.6898549795150757, - "learning_rate": 3.7978754059062818e-06, - "loss": 0.2209, - "step": 15258 - }, - { - "epoch": 1.4374602576481947, - "grad_norm": 0.6281675696372986, - "learning_rate": 3.796690967270689e-06, - "loss": 0.1903, - "step": 15259 - }, - { - "epoch": 1.4375544617413627, - "grad_norm": 0.652545690536499, - "learning_rate": 3.7955066700801392e-06, - "loss": 0.1953, - "step": 15260 - }, - { - "epoch": 1.4376486658345304, - "grad_norm": 0.700383186340332, - "learning_rate": 3.79432251436163e-06, - "loss": 0.2275, - "step": 15261 - }, - { - "epoch": 1.4377428699276984, - "grad_norm": 0.5970531702041626, - "learning_rate": 3.793138500142168e-06, - "loss": 0.1798, - "step": 15262 - }, - { - "epoch": 1.4378370740208661, - "grad_norm": 0.7306908369064331, - "learning_rate": 3.791954627448743e-06, - "loss": 0.2266, - "step": 15263 - }, - { - "epoch": 1.437931278114034, - "grad_norm": 0.7055124640464783, - "learning_rate": 3.7907708963083544e-06, - "loss": 0.224, - "step": 15264 - }, - { - "epoch": 1.4380254822072018, - "grad_norm": 0.619551420211792, - "learning_rate": 3.789587306747995e-06, - "loss": 0.206, - "step": 15265 - }, - { - "epoch": 1.4381196863003698, - "grad_norm": 0.5783557891845703, - "learning_rate": 3.788403858794647e-06, - "loss": 0.1809, - "step": 15266 - }, - { - "epoch": 1.4382138903935375, - "grad_norm": 0.7996221780776978, - "learning_rate": 3.7872205524753e-06, - "loss": 0.2468, - "step": 15267 - }, - { - "epoch": 1.4383080944867055, - "grad_norm": 0.5824506282806396, - "learning_rate": 3.78603738781693e-06, - "loss": 0.2103, - "step": 15268 - }, - { - "epoch": 1.4384022985798732, - "grad_norm": 0.7703114748001099, - "learning_rate": 3.7848543648465163e-06, - "loss": 0.191, - "step": 15269 - }, - { - "epoch": 1.4384965026730412, - "grad_norm": 0.6755668520927429, - "learning_rate": 3.783671483591039e-06, - "loss": 0.1848, - "step": 15270 - }, - { - "epoch": 1.438590706766209, - "grad_norm": 0.6245675086975098, - "learning_rate": 3.782488744077463e-06, - "loss": 0.1826, - "step": 15271 - }, - { - "epoch": 1.4386849108593769, - "grad_norm": 0.7833127975463867, - "learning_rate": 3.7813061463327617e-06, - "loss": 0.1886, - "step": 15272 - }, - { - "epoch": 1.4387791149525446, - "grad_norm": 0.7326774597167969, - "learning_rate": 3.7801236903838946e-06, - "loss": 0.2157, - "step": 15273 - }, - { - "epoch": 1.4388733190457126, - "grad_norm": 0.7009978294372559, - "learning_rate": 3.7789413762578263e-06, - "loss": 0.2013, - "step": 15274 - }, - { - "epoch": 1.4389675231388803, - "grad_norm": 0.6456125378608704, - "learning_rate": 3.77775920398152e-06, - "loss": 0.1898, - "step": 15275 - }, - { - "epoch": 1.4390617272320483, - "grad_norm": 0.6135196089744568, - "learning_rate": 3.7765771735819223e-06, - "loss": 0.198, - "step": 15276 - }, - { - "epoch": 1.439155931325216, - "grad_norm": 0.713524580001831, - "learning_rate": 3.775395285085991e-06, - "loss": 0.202, - "step": 15277 - }, - { - "epoch": 1.439250135418384, - "grad_norm": 0.6370148658752441, - "learning_rate": 3.774213538520676e-06, - "loss": 0.1717, - "step": 15278 - }, - { - "epoch": 1.4393443395115517, - "grad_norm": 0.603708803653717, - "learning_rate": 3.7730319339129175e-06, - "loss": 0.1816, - "step": 15279 - }, - { - "epoch": 1.4394385436047197, - "grad_norm": 0.6384181976318359, - "learning_rate": 3.771850471289661e-06, - "loss": 0.179, - "step": 15280 - }, - { - "epoch": 1.4395327476978874, - "grad_norm": 0.6581946015357971, - "learning_rate": 3.7706691506778494e-06, - "loss": 0.1931, - "step": 15281 - }, - { - "epoch": 1.4396269517910554, - "grad_norm": 0.7333237528800964, - "learning_rate": 3.7694879721044155e-06, - "loss": 0.1842, - "step": 15282 - }, - { - "epoch": 1.4397211558842231, - "grad_norm": 0.6282057762145996, - "learning_rate": 3.768306935596283e-06, - "loss": 0.1916, - "step": 15283 - }, - { - "epoch": 1.439815359977391, - "grad_norm": 0.672105610370636, - "learning_rate": 3.767126041180398e-06, - "loss": 0.2026, - "step": 15284 - }, - { - "epoch": 1.4399095640705588, - "grad_norm": 0.7663817405700684, - "learning_rate": 3.7659452888836787e-06, - "loss": 0.2057, - "step": 15285 - }, - { - "epoch": 1.4400037681637268, - "grad_norm": 0.6527891755104065, - "learning_rate": 3.7647646787330404e-06, - "loss": 0.2168, - "step": 15286 - }, - { - "epoch": 1.4400979722568945, - "grad_norm": 0.6544922590255737, - "learning_rate": 3.763584210755418e-06, - "loss": 0.182, - "step": 15287 - }, - { - "epoch": 1.4401921763500625, - "grad_norm": 0.6385776400566101, - "learning_rate": 3.76240388497772e-06, - "loss": 0.1706, - "step": 15288 - }, - { - "epoch": 1.4402863804432302, - "grad_norm": 0.6630579829216003, - "learning_rate": 3.7612237014268538e-06, - "loss": 0.1916, - "step": 15289 - }, - { - "epoch": 1.4403805845363982, - "grad_norm": 0.641896665096283, - "learning_rate": 3.7600436601297417e-06, - "loss": 0.1957, - "step": 15290 - }, - { - "epoch": 1.440474788629566, - "grad_norm": 0.822803258895874, - "learning_rate": 3.758863761113285e-06, - "loss": 0.2088, - "step": 15291 - }, - { - "epoch": 1.4405689927227339, - "grad_norm": 0.6640996336936951, - "learning_rate": 3.757684004404383e-06, - "loss": 0.189, - "step": 15292 - }, - { - "epoch": 1.4406631968159016, - "grad_norm": 0.6632574796676636, - "learning_rate": 3.7565043900299392e-06, - "loss": 0.2002, - "step": 15293 - }, - { - "epoch": 1.4407574009090696, - "grad_norm": 0.6997697353363037, - "learning_rate": 3.755324918016855e-06, - "loss": 0.1976, - "step": 15294 - }, - { - "epoch": 1.4408516050022373, - "grad_norm": 0.6222142577171326, - "learning_rate": 3.7541455883920166e-06, - "loss": 0.1836, - "step": 15295 - }, - { - "epoch": 1.4409458090954053, - "grad_norm": 0.6241096258163452, - "learning_rate": 3.7529664011823186e-06, - "loss": 0.1911, - "step": 15296 - }, - { - "epoch": 1.441040013188573, - "grad_norm": 0.6768137812614441, - "learning_rate": 3.7517873564146503e-06, - "loss": 0.1914, - "step": 15297 - }, - { - "epoch": 1.441134217281741, - "grad_norm": 0.6475250720977783, - "learning_rate": 3.7506084541158903e-06, - "loss": 0.1831, - "step": 15298 - }, - { - "epoch": 1.4412284213749087, - "grad_norm": 0.6157442927360535, - "learning_rate": 3.7494296943129227e-06, - "loss": 0.1805, - "step": 15299 - }, - { - "epoch": 1.4413226254680767, - "grad_norm": 0.731525182723999, - "learning_rate": 3.7482510770326286e-06, - "loss": 0.2078, - "step": 15300 - }, - { - "epoch": 1.4414168295612444, - "grad_norm": 0.6318780779838562, - "learning_rate": 3.7470726023018745e-06, - "loss": 0.2073, - "step": 15301 - }, - { - "epoch": 1.4415110336544124, - "grad_norm": 0.686691164970398, - "learning_rate": 3.7458942701475385e-06, - "loss": 0.1412, - "step": 15302 - }, - { - "epoch": 1.44160523774758, - "grad_norm": 0.67552250623703, - "learning_rate": 3.744716080596482e-06, - "loss": 0.1999, - "step": 15303 - }, - { - "epoch": 1.441699441840748, - "grad_norm": 0.6695284247398376, - "learning_rate": 3.743538033675573e-06, - "loss": 0.2182, - "step": 15304 - }, - { - "epoch": 1.4417936459339158, - "grad_norm": 0.626176655292511, - "learning_rate": 3.7423601294116775e-06, - "loss": 0.199, - "step": 15305 - }, - { - "epoch": 1.4418878500270838, - "grad_norm": 0.6299342513084412, - "learning_rate": 3.741182367831644e-06, - "loss": 0.2096, - "step": 15306 - }, - { - "epoch": 1.4419820541202515, - "grad_norm": 0.6912399530410767, - "learning_rate": 3.740004748962335e-06, - "loss": 0.2153, - "step": 15307 - }, - { - "epoch": 1.4420762582134194, - "grad_norm": 0.6924498081207275, - "learning_rate": 3.7388272728305954e-06, - "loss": 0.2195, - "step": 15308 - }, - { - "epoch": 1.4421704623065872, - "grad_norm": 0.8784087300300598, - "learning_rate": 3.7376499394632783e-06, - "loss": 0.2118, - "step": 15309 - }, - { - "epoch": 1.4422646663997551, - "grad_norm": 0.6489197015762329, - "learning_rate": 3.7364727488872297e-06, - "loss": 0.1989, - "step": 15310 - }, - { - "epoch": 1.4423588704929229, - "grad_norm": 0.6295911073684692, - "learning_rate": 3.735295701129287e-06, - "loss": 0.1993, - "step": 15311 - }, - { - "epoch": 1.4424530745860908, - "grad_norm": 0.6100906133651733, - "learning_rate": 3.7341187962162928e-06, - "loss": 0.1964, - "step": 15312 - }, - { - "epoch": 1.4425472786792586, - "grad_norm": 0.6560591459274292, - "learning_rate": 3.7329420341750776e-06, - "loss": 0.2164, - "step": 15313 - }, - { - "epoch": 1.4426414827724265, - "grad_norm": 0.7599140405654907, - "learning_rate": 3.7317654150324765e-06, - "loss": 0.2352, - "step": 15314 - }, - { - "epoch": 1.4427356868655943, - "grad_norm": 0.6573992371559143, - "learning_rate": 3.7305889388153215e-06, - "loss": 0.1993, - "step": 15315 - }, - { - "epoch": 1.4428298909587622, - "grad_norm": 0.6520240902900696, - "learning_rate": 3.729412605550431e-06, - "loss": 0.2125, - "step": 15316 - }, - { - "epoch": 1.44292409505193, - "grad_norm": 0.6830780506134033, - "learning_rate": 3.72823641526463e-06, - "loss": 0.225, - "step": 15317 - }, - { - "epoch": 1.443018299145098, - "grad_norm": 0.6395137906074524, - "learning_rate": 3.7270603679847416e-06, - "loss": 0.1844, - "step": 15318 - }, - { - "epoch": 1.4431125032382657, - "grad_norm": 0.7229591608047485, - "learning_rate": 3.725884463737576e-06, - "loss": 0.1989, - "step": 15319 - }, - { - "epoch": 1.4432067073314334, - "grad_norm": 0.7150613069534302, - "learning_rate": 3.7247087025499464e-06, - "loss": 0.2221, - "step": 15320 - }, - { - "epoch": 1.4433009114246014, - "grad_norm": 0.6940883994102478, - "learning_rate": 3.723533084448667e-06, - "loss": 0.2131, - "step": 15321 - }, - { - "epoch": 1.4433951155177693, - "grad_norm": 0.6280433535575867, - "learning_rate": 3.7223576094605407e-06, - "loss": 0.1907, - "step": 15322 - }, - { - "epoch": 1.443489319610937, - "grad_norm": 0.6110469102859497, - "learning_rate": 3.721182277612363e-06, - "loss": 0.1655, - "step": 15323 - }, - { - "epoch": 1.4435835237041048, - "grad_norm": 0.5971839427947998, - "learning_rate": 3.720007088930945e-06, - "loss": 0.1979, - "step": 15324 - }, - { - "epoch": 1.4436777277972728, - "grad_norm": 0.7039603590965271, - "learning_rate": 3.7188320434430793e-06, - "loss": 0.2198, - "step": 15325 - }, - { - "epoch": 1.4437719318904407, - "grad_norm": 0.6401283740997314, - "learning_rate": 3.717657141175549e-06, - "loss": 0.1962, - "step": 15326 - }, - { - "epoch": 1.4438661359836085, - "grad_norm": 0.6265789270401001, - "learning_rate": 3.716482382155159e-06, - "loss": 0.1759, - "step": 15327 - }, - { - "epoch": 1.4439603400767762, - "grad_norm": 0.7420952320098877, - "learning_rate": 3.7153077664086877e-06, - "loss": 0.2096, - "step": 15328 - }, - { - "epoch": 1.4440545441699442, - "grad_norm": 0.6508326530456543, - "learning_rate": 3.7141332939629127e-06, - "loss": 0.1857, - "step": 15329 - }, - { - "epoch": 1.4441487482631121, - "grad_norm": 0.632019579410553, - "learning_rate": 3.712958964844626e-06, - "loss": 0.203, - "step": 15330 - }, - { - "epoch": 1.4442429523562799, - "grad_norm": 0.625632107257843, - "learning_rate": 3.7117847790805983e-06, - "loss": 0.2182, - "step": 15331 - }, - { - "epoch": 1.4443371564494476, - "grad_norm": 0.7232470512390137, - "learning_rate": 3.7106107366975995e-06, - "loss": 0.2072, - "step": 15332 - }, - { - "epoch": 1.4444313605426156, - "grad_norm": 0.5715131163597107, - "learning_rate": 3.7094368377224023e-06, - "loss": 0.1758, - "step": 15333 - }, - { - "epoch": 1.4445255646357835, - "grad_norm": 0.7722396850585938, - "learning_rate": 3.7082630821817778e-06, - "loss": 0.2351, - "step": 15334 - }, - { - "epoch": 1.4446197687289513, - "grad_norm": 0.661654531955719, - "learning_rate": 3.7070894701024806e-06, - "loss": 0.2122, - "step": 15335 - }, - { - "epoch": 1.444713972822119, - "grad_norm": 0.6670462489128113, - "learning_rate": 3.705916001511277e-06, - "loss": 0.2032, - "step": 15336 - }, - { - "epoch": 1.444808176915287, - "grad_norm": 0.8102666735649109, - "learning_rate": 3.704742676434925e-06, - "loss": 0.2276, - "step": 15337 - }, - { - "epoch": 1.444902381008455, - "grad_norm": 0.9872673153877258, - "learning_rate": 3.7035694949001733e-06, - "loss": 0.2211, - "step": 15338 - }, - { - "epoch": 1.4449965851016227, - "grad_norm": 0.6364572644233704, - "learning_rate": 3.7023964569337735e-06, - "loss": 0.2019, - "step": 15339 - }, - { - "epoch": 1.4450907891947904, - "grad_norm": 0.5677757859230042, - "learning_rate": 3.701223562562478e-06, - "loss": 0.1901, - "step": 15340 - }, - { - "epoch": 1.4451849932879584, - "grad_norm": 0.6046751737594604, - "learning_rate": 3.700050811813024e-06, - "loss": 0.2002, - "step": 15341 - }, - { - "epoch": 1.4452791973811263, - "grad_norm": 0.629215657711029, - "learning_rate": 3.6988782047121565e-06, - "loss": 0.1929, - "step": 15342 - }, - { - "epoch": 1.445373401474294, - "grad_norm": 0.8797250986099243, - "learning_rate": 3.697705741286608e-06, - "loss": 0.1939, - "step": 15343 - }, - { - "epoch": 1.4454676055674618, - "grad_norm": 0.6953577399253845, - "learning_rate": 3.6965334215631157e-06, - "loss": 0.2147, - "step": 15344 - }, - { - "epoch": 1.4455618096606297, - "grad_norm": 0.6435692310333252, - "learning_rate": 3.695361245568413e-06, - "loss": 0.2092, - "step": 15345 - }, - { - "epoch": 1.4456560137537977, - "grad_norm": 0.6839839220046997, - "learning_rate": 3.69418921332922e-06, - "loss": 0.2239, - "step": 15346 - }, - { - "epoch": 1.4457502178469654, - "grad_norm": 0.6410571336746216, - "learning_rate": 3.69301732487227e-06, - "loss": 0.1996, - "step": 15347 - }, - { - "epoch": 1.4458444219401332, - "grad_norm": 0.6643087863922119, - "learning_rate": 3.6918455802242745e-06, - "loss": 0.1942, - "step": 15348 - }, - { - "epoch": 1.4459386260333011, - "grad_norm": 0.7473112940788269, - "learning_rate": 3.690673979411955e-06, - "loss": 0.2349, - "step": 15349 - }, - { - "epoch": 1.446032830126469, - "grad_norm": 0.6772686243057251, - "learning_rate": 3.6895025224620307e-06, - "loss": 0.1812, - "step": 15350 - }, - { - "epoch": 1.4461270342196368, - "grad_norm": 0.652247965335846, - "learning_rate": 3.6883312094012047e-06, - "loss": 0.2005, - "step": 15351 - }, - { - "epoch": 1.4462212383128046, - "grad_norm": 0.7764046788215637, - "learning_rate": 3.6871600402561914e-06, - "loss": 0.1991, - "step": 15352 - }, - { - "epoch": 1.4463154424059725, - "grad_norm": 0.670874834060669, - "learning_rate": 3.685989015053689e-06, - "loss": 0.1719, - "step": 15353 - }, - { - "epoch": 1.4464096464991405, - "grad_norm": 0.6050533056259155, - "learning_rate": 3.6848181338204025e-06, - "loss": 0.22, - "step": 15354 - }, - { - "epoch": 1.4465038505923082, - "grad_norm": 0.7051668167114258, - "learning_rate": 3.683647396583032e-06, - "loss": 0.2152, - "step": 15355 - }, - { - "epoch": 1.446598054685476, - "grad_norm": 0.6484372019767761, - "learning_rate": 3.6824768033682655e-06, - "loss": 0.2102, - "step": 15356 - }, - { - "epoch": 1.446692258778644, - "grad_norm": 0.6553364396095276, - "learning_rate": 3.681306354202798e-06, - "loss": 0.1903, - "step": 15357 - }, - { - "epoch": 1.446786462871812, - "grad_norm": 0.6942259669303894, - "learning_rate": 3.680136049113322e-06, - "loss": 0.2106, - "step": 15358 - }, - { - "epoch": 1.4468806669649796, - "grad_norm": 0.6313175559043884, - "learning_rate": 3.6789658881265135e-06, - "loss": 0.1863, - "step": 15359 - }, - { - "epoch": 1.4469748710581474, - "grad_norm": 0.6666918992996216, - "learning_rate": 3.6777958712690597e-06, - "loss": 0.1896, - "step": 15360 - }, - { - "epoch": 1.4470690751513153, - "grad_norm": 0.6780277490615845, - "learning_rate": 3.67662599856764e-06, - "loss": 0.1693, - "step": 15361 - }, - { - "epoch": 1.4471632792444833, - "grad_norm": 0.6547707915306091, - "learning_rate": 3.675456270048927e-06, - "loss": 0.2084, - "step": 15362 - }, - { - "epoch": 1.447257483337651, - "grad_norm": 0.7450213432312012, - "learning_rate": 3.6742866857395855e-06, - "loss": 0.2341, - "step": 15363 - }, - { - "epoch": 1.4473516874308188, - "grad_norm": 0.6595832109451294, - "learning_rate": 3.6731172456662967e-06, - "loss": 0.1958, - "step": 15364 - }, - { - "epoch": 1.4474458915239867, - "grad_norm": 0.7072004079818726, - "learning_rate": 3.67194794985572e-06, - "loss": 0.2198, - "step": 15365 - }, - { - "epoch": 1.4475400956171547, - "grad_norm": 0.6067395806312561, - "learning_rate": 3.670778798334509e-06, - "loss": 0.2142, - "step": 15366 - }, - { - "epoch": 1.4476342997103224, - "grad_norm": 0.5847840905189514, - "learning_rate": 3.6696097911293373e-06, - "loss": 0.1829, - "step": 15367 - }, - { - "epoch": 1.4477285038034902, - "grad_norm": 0.6200958490371704, - "learning_rate": 3.668440928266852e-06, - "loss": 0.1959, - "step": 15368 - }, - { - "epoch": 1.4478227078966581, - "grad_norm": 0.7499897480010986, - "learning_rate": 3.667272209773699e-06, - "loss": 0.1951, - "step": 15369 - }, - { - "epoch": 1.447916911989826, - "grad_norm": 0.6599944829940796, - "learning_rate": 3.66610363567654e-06, - "loss": 0.2204, - "step": 15370 - }, - { - "epoch": 1.4480111160829938, - "grad_norm": 0.6325149536132812, - "learning_rate": 3.6649352060020137e-06, - "loss": 0.1736, - "step": 15371 - }, - { - "epoch": 1.4481053201761616, - "grad_norm": 0.6277233958244324, - "learning_rate": 3.663766920776759e-06, - "loss": 0.1954, - "step": 15372 - }, - { - "epoch": 1.4481995242693295, - "grad_norm": 0.6093193888664246, - "learning_rate": 3.6625987800274177e-06, - "loss": 0.1901, - "step": 15373 - }, - { - "epoch": 1.4482937283624975, - "grad_norm": 0.6746008396148682, - "learning_rate": 3.6614307837806283e-06, - "loss": 0.2099, - "step": 15374 - }, - { - "epoch": 1.4483879324556652, - "grad_norm": 0.6880073547363281, - "learning_rate": 3.660262932063017e-06, - "loss": 0.2604, - "step": 15375 - }, - { - "epoch": 1.448482136548833, - "grad_norm": 0.65242600440979, - "learning_rate": 3.6590952249012145e-06, - "loss": 0.1847, - "step": 15376 - }, - { - "epoch": 1.448576340642001, - "grad_norm": 0.7235094308853149, - "learning_rate": 3.657927662321851e-06, - "loss": 0.2094, - "step": 15377 - }, - { - "epoch": 1.4486705447351689, - "grad_norm": 0.663043737411499, - "learning_rate": 3.6567602443515416e-06, - "loss": 0.1809, - "step": 15378 - }, - { - "epoch": 1.4487647488283366, - "grad_norm": 0.6839006543159485, - "learning_rate": 3.655592971016909e-06, - "loss": 0.2093, - "step": 15379 - }, - { - "epoch": 1.4488589529215044, - "grad_norm": 0.7242204546928406, - "learning_rate": 3.654425842344571e-06, - "loss": 0.204, - "step": 15380 - }, - { - "epoch": 1.4489531570146723, - "grad_norm": 0.8180120587348938, - "learning_rate": 3.653258858361135e-06, - "loss": 0.1928, - "step": 15381 - }, - { - "epoch": 1.4490473611078403, - "grad_norm": 0.6022229194641113, - "learning_rate": 3.6520920190932152e-06, - "loss": 0.1854, - "step": 15382 - }, - { - "epoch": 1.449141565201008, - "grad_norm": 0.6465762853622437, - "learning_rate": 3.6509253245674113e-06, - "loss": 0.2374, - "step": 15383 - }, - { - "epoch": 1.4492357692941757, - "grad_norm": 0.6528346538543701, - "learning_rate": 3.6497587748103326e-06, - "loss": 0.207, - "step": 15384 - }, - { - "epoch": 1.4493299733873437, - "grad_norm": 0.6788538694381714, - "learning_rate": 3.6485923698485714e-06, - "loss": 0.1962, - "step": 15385 - }, - { - "epoch": 1.4494241774805117, - "grad_norm": 0.8187673687934875, - "learning_rate": 3.647426109708727e-06, - "loss": 0.2087, - "step": 15386 - }, - { - "epoch": 1.4495183815736794, - "grad_norm": 0.6599162817001343, - "learning_rate": 3.646259994417395e-06, - "loss": 0.2136, - "step": 15387 - }, - { - "epoch": 1.4496125856668471, - "grad_norm": 0.6779152154922485, - "learning_rate": 3.645094024001158e-06, - "loss": 0.2303, - "step": 15388 - }, - { - "epoch": 1.449706789760015, - "grad_norm": 0.6403079032897949, - "learning_rate": 3.6439281984866047e-06, - "loss": 0.1861, - "step": 15389 - }, - { - "epoch": 1.4498009938531828, - "grad_norm": 0.653789222240448, - "learning_rate": 3.6427625179003223e-06, - "loss": 0.1925, - "step": 15390 - }, - { - "epoch": 1.4498951979463508, - "grad_norm": 0.6621856689453125, - "learning_rate": 3.6415969822688824e-06, - "loss": 0.1931, - "step": 15391 - }, - { - "epoch": 1.4499894020395185, - "grad_norm": 0.6642829775810242, - "learning_rate": 3.6404315916188684e-06, - "loss": 0.1967, - "step": 15392 - }, - { - "epoch": 1.4500836061326865, - "grad_norm": 0.6505551934242249, - "learning_rate": 3.6392663459768452e-06, - "loss": 0.1911, - "step": 15393 - }, - { - "epoch": 1.4501778102258542, - "grad_norm": 0.6382884979248047, - "learning_rate": 3.6381012453693874e-06, - "loss": 0.1986, - "step": 15394 - }, - { - "epoch": 1.4502720143190222, - "grad_norm": 0.687646746635437, - "learning_rate": 3.6369362898230633e-06, - "loss": 0.2077, - "step": 15395 - }, - { - "epoch": 1.45036621841219, - "grad_norm": 0.6266944408416748, - "learning_rate": 3.6357714793644283e-06, - "loss": 0.1938, - "step": 15396 - }, - { - "epoch": 1.450460422505358, - "grad_norm": 0.666293203830719, - "learning_rate": 3.6346068140200474e-06, - "loss": 0.1862, - "step": 15397 - }, - { - "epoch": 1.4505546265985256, - "grad_norm": 0.6245224475860596, - "learning_rate": 3.633442293816478e-06, - "loss": 0.1859, - "step": 15398 - }, - { - "epoch": 1.4506488306916936, - "grad_norm": 0.6878665685653687, - "learning_rate": 3.632277918780267e-06, - "loss": 0.1804, - "step": 15399 - }, - { - "epoch": 1.4507430347848613, - "grad_norm": 0.6279546022415161, - "learning_rate": 3.6311136889379674e-06, - "loss": 0.1944, - "step": 15400 - }, - { - "epoch": 1.4508372388780293, - "grad_norm": 0.6689943075180054, - "learning_rate": 3.6299496043161285e-06, - "loss": 0.2103, - "step": 15401 - }, - { - "epoch": 1.450931442971197, - "grad_norm": 0.5821841359138489, - "learning_rate": 3.62878566494129e-06, - "loss": 0.1745, - "step": 15402 - }, - { - "epoch": 1.451025647064365, - "grad_norm": 0.6357420086860657, - "learning_rate": 3.6276218708399858e-06, - "loss": 0.2151, - "step": 15403 - }, - { - "epoch": 1.4511198511575327, - "grad_norm": 0.6925756335258484, - "learning_rate": 3.6264582220387634e-06, - "loss": 0.2016, - "step": 15404 - }, - { - "epoch": 1.4512140552507007, - "grad_norm": 0.6702736616134644, - "learning_rate": 3.625294718564152e-06, - "loss": 0.1967, - "step": 15405 - }, - { - "epoch": 1.4513082593438684, - "grad_norm": 0.6685515642166138, - "learning_rate": 3.624131360442671e-06, - "loss": 0.2129, - "step": 15406 - }, - { - "epoch": 1.4514024634370364, - "grad_norm": 0.6161972880363464, - "learning_rate": 3.622968147700864e-06, - "loss": 0.1789, - "step": 15407 - }, - { - "epoch": 1.4514966675302041, - "grad_norm": 0.6239951848983765, - "learning_rate": 3.621805080365245e-06, - "loss": 0.2033, - "step": 15408 - }, - { - "epoch": 1.451590871623372, - "grad_norm": 0.7480084896087646, - "learning_rate": 3.6206421584623296e-06, - "loss": 0.2225, - "step": 15409 - }, - { - "epoch": 1.4516850757165398, - "grad_norm": 0.7976895570755005, - "learning_rate": 3.6194793820186404e-06, - "loss": 0.199, - "step": 15410 - }, - { - "epoch": 1.4517792798097078, - "grad_norm": 0.6239093542098999, - "learning_rate": 3.618316751060692e-06, - "loss": 0.1986, - "step": 15411 - }, - { - "epoch": 1.4518734839028755, - "grad_norm": 0.6483273506164551, - "learning_rate": 3.6171542656149873e-06, - "loss": 0.1849, - "step": 15412 - }, - { - "epoch": 1.4519676879960435, - "grad_norm": 0.6124324202537537, - "learning_rate": 3.6159919257080366e-06, - "loss": 0.1855, - "step": 15413 - }, - { - "epoch": 1.4520618920892112, - "grad_norm": 0.6394926309585571, - "learning_rate": 3.6148297313663473e-06, - "loss": 0.1806, - "step": 15414 - }, - { - "epoch": 1.4521560961823792, - "grad_norm": 0.592024028301239, - "learning_rate": 3.613667682616411e-06, - "loss": 0.1929, - "step": 15415 - }, - { - "epoch": 1.452250300275547, - "grad_norm": 0.6319133043289185, - "learning_rate": 3.6125057794847286e-06, - "loss": 0.2147, - "step": 15416 - }, - { - "epoch": 1.4523445043687149, - "grad_norm": 0.6954970955848694, - "learning_rate": 3.611344021997796e-06, - "loss": 0.2286, - "step": 15417 - }, - { - "epoch": 1.4524387084618826, - "grad_norm": 0.6587648391723633, - "learning_rate": 3.610182410182096e-06, - "loss": 0.1838, - "step": 15418 - }, - { - "epoch": 1.4525329125550506, - "grad_norm": 0.691964864730835, - "learning_rate": 3.609020944064119e-06, - "loss": 0.2106, - "step": 15419 - }, - { - "epoch": 1.4526271166482183, - "grad_norm": 0.600549042224884, - "learning_rate": 3.6078596236703524e-06, - "loss": 0.1668, - "step": 15420 - }, - { - "epoch": 1.4527213207413863, - "grad_norm": 0.6247270107269287, - "learning_rate": 3.6066984490272684e-06, - "loss": 0.2028, - "step": 15421 - }, - { - "epoch": 1.452815524834554, - "grad_norm": 0.6843393445014954, - "learning_rate": 3.6055374201613503e-06, - "loss": 0.2436, - "step": 15422 - }, - { - "epoch": 1.452909728927722, - "grad_norm": 0.6278879046440125, - "learning_rate": 3.6043765370990657e-06, - "loss": 0.1806, - "step": 15423 - }, - { - "epoch": 1.4530039330208897, - "grad_norm": 0.8175564408302307, - "learning_rate": 3.6032157998668894e-06, - "loss": 0.2068, - "step": 15424 - }, - { - "epoch": 1.4530981371140577, - "grad_norm": 0.7069247961044312, - "learning_rate": 3.602055208491283e-06, - "loss": 0.1962, - "step": 15425 - }, - { - "epoch": 1.4531923412072254, - "grad_norm": 0.7326929569244385, - "learning_rate": 3.6008947629987124e-06, - "loss": 0.2096, - "step": 15426 - }, - { - "epoch": 1.4532865453003934, - "grad_norm": 0.6834288239479065, - "learning_rate": 3.5997344634156405e-06, - "loss": 0.2133, - "step": 15427 - }, - { - "epoch": 1.453380749393561, - "grad_norm": 0.6272591948509216, - "learning_rate": 3.598574309768519e-06, - "loss": 0.1773, - "step": 15428 - }, - { - "epoch": 1.453474953486729, - "grad_norm": 0.8258611559867859, - "learning_rate": 3.5974143020838017e-06, - "loss": 0.2431, - "step": 15429 - }, - { - "epoch": 1.4535691575798968, - "grad_norm": 0.6214420199394226, - "learning_rate": 3.596254440387944e-06, - "loss": 0.1845, - "step": 15430 - }, - { - "epoch": 1.4536633616730648, - "grad_norm": 0.692866861820221, - "learning_rate": 3.595094724707385e-06, - "loss": 0.19, - "step": 15431 - }, - { - "epoch": 1.4537575657662325, - "grad_norm": 0.6115120053291321, - "learning_rate": 3.593935155068575e-06, - "loss": 0.1704, - "step": 15432 - }, - { - "epoch": 1.4538517698594005, - "grad_norm": 0.598218560218811, - "learning_rate": 3.5927757314979485e-06, - "loss": 0.1791, - "step": 15433 - }, - { - "epoch": 1.4539459739525682, - "grad_norm": 0.700184166431427, - "learning_rate": 3.5916164540219435e-06, - "loss": 0.1959, - "step": 15434 - }, - { - "epoch": 1.4540401780457362, - "grad_norm": 0.7386876344680786, - "learning_rate": 3.590457322666997e-06, - "loss": 0.1972, - "step": 15435 - }, - { - "epoch": 1.454134382138904, - "grad_norm": 0.6803439855575562, - "learning_rate": 3.5892983374595335e-06, - "loss": 0.2096, - "step": 15436 - }, - { - "epoch": 1.4542285862320719, - "grad_norm": 0.6819314360618591, - "learning_rate": 3.588139498425981e-06, - "loss": 0.1913, - "step": 15437 - }, - { - "epoch": 1.4543227903252396, - "grad_norm": 0.639312744140625, - "learning_rate": 3.586980805592769e-06, - "loss": 0.1833, - "step": 15438 - }, - { - "epoch": 1.4544169944184075, - "grad_norm": 0.6426438689231873, - "learning_rate": 3.5858222589863077e-06, - "loss": 0.1959, - "step": 15439 - }, - { - "epoch": 1.4545111985115753, - "grad_norm": 0.7659159302711487, - "learning_rate": 3.5846638586330196e-06, - "loss": 0.2239, - "step": 15440 - }, - { - "epoch": 1.4546054026047432, - "grad_norm": 0.7493621110916138, - "learning_rate": 3.583505604559321e-06, - "loss": 0.1819, - "step": 15441 - }, - { - "epoch": 1.454699606697911, - "grad_norm": 0.6186150312423706, - "learning_rate": 3.582347496791616e-06, - "loss": 0.1737, - "step": 15442 - }, - { - "epoch": 1.454793810791079, - "grad_norm": 0.6180188655853271, - "learning_rate": 3.5811895353563073e-06, - "loss": 0.1866, - "step": 15443 - }, - { - "epoch": 1.4548880148842467, - "grad_norm": 0.8320621252059937, - "learning_rate": 3.5800317202798117e-06, - "loss": 0.2005, - "step": 15444 - }, - { - "epoch": 1.4549822189774146, - "grad_norm": 0.6498815417289734, - "learning_rate": 3.578874051588521e-06, - "loss": 0.1834, - "step": 15445 - }, - { - "epoch": 1.4550764230705824, - "grad_norm": 0.6876739859580994, - "learning_rate": 3.5777165293088255e-06, - "loss": 0.252, - "step": 15446 - }, - { - "epoch": 1.4551706271637503, - "grad_norm": 0.5977943539619446, - "learning_rate": 3.5765591534671316e-06, - "loss": 0.1823, - "step": 15447 - }, - { - "epoch": 1.455264831256918, - "grad_norm": 0.6090304255485535, - "learning_rate": 3.575401924089824e-06, - "loss": 0.1784, - "step": 15448 - }, - { - "epoch": 1.455359035350086, - "grad_norm": 0.6334052681922913, - "learning_rate": 3.574244841203285e-06, - "loss": 0.1734, - "step": 15449 - }, - { - "epoch": 1.4554532394432538, - "grad_norm": 0.6220776438713074, - "learning_rate": 3.573087904833901e-06, - "loss": 0.1721, - "step": 15450 - }, - { - "epoch": 1.4555474435364217, - "grad_norm": 0.6752516627311707, - "learning_rate": 3.571931115008055e-06, - "loss": 0.1778, - "step": 15451 - }, - { - "epoch": 1.4556416476295895, - "grad_norm": 0.6394069194793701, - "learning_rate": 3.5707744717521174e-06, - "loss": 0.1843, - "step": 15452 - }, - { - "epoch": 1.4557358517227574, - "grad_norm": 0.6487554311752319, - "learning_rate": 3.5696179750924653e-06, - "loss": 0.198, - "step": 15453 - }, - { - "epoch": 1.4558300558159252, - "grad_norm": 0.8762438893318176, - "learning_rate": 3.5684616250554716e-06, - "loss": 0.2065, - "step": 15454 - }, - { - "epoch": 1.4559242599090931, - "grad_norm": 0.6362253427505493, - "learning_rate": 3.5673054216674965e-06, - "loss": 0.1931, - "step": 15455 - }, - { - "epoch": 1.4560184640022609, - "grad_norm": 1.2721117734909058, - "learning_rate": 3.566149364954905e-06, - "loss": 0.2472, - "step": 15456 - }, - { - "epoch": 1.4561126680954288, - "grad_norm": 0.6947647929191589, - "learning_rate": 3.564993454944062e-06, - "loss": 0.2066, - "step": 15457 - }, - { - "epoch": 1.4562068721885966, - "grad_norm": 0.7357280254364014, - "learning_rate": 3.5638376916613173e-06, - "loss": 0.1993, - "step": 15458 - }, - { - "epoch": 1.4563010762817643, - "grad_norm": 0.655251681804657, - "learning_rate": 3.562682075133026e-06, - "loss": 0.2073, - "step": 15459 - }, - { - "epoch": 1.4563952803749323, - "grad_norm": 0.6016684174537659, - "learning_rate": 3.5615266053855423e-06, - "loss": 0.1954, - "step": 15460 - }, - { - "epoch": 1.4564894844681002, - "grad_norm": 0.6429963111877441, - "learning_rate": 3.5603712824452065e-06, - "loss": 0.1911, - "step": 15461 - }, - { - "epoch": 1.456583688561268, - "grad_norm": 0.6290830373764038, - "learning_rate": 3.559216106338368e-06, - "loss": 0.208, - "step": 15462 - }, - { - "epoch": 1.4566778926544357, - "grad_norm": 0.6305145025253296, - "learning_rate": 3.5580610770913593e-06, - "loss": 0.1744, - "step": 15463 - }, - { - "epoch": 1.4567720967476037, - "grad_norm": 0.6087737679481506, - "learning_rate": 3.556906194730524e-06, - "loss": 0.1798, - "step": 15464 - }, - { - "epoch": 1.4568663008407716, - "grad_norm": 0.6650774478912354, - "learning_rate": 3.5557514592821883e-06, - "loss": 0.2201, - "step": 15465 - }, - { - "epoch": 1.4569605049339394, - "grad_norm": 0.6110175251960754, - "learning_rate": 3.5545968707726864e-06, - "loss": 0.19, - "step": 15466 - }, - { - "epoch": 1.457054709027107, - "grad_norm": 0.6831899285316467, - "learning_rate": 3.5534424292283476e-06, - "loss": 0.2129, - "step": 15467 - }, - { - "epoch": 1.457148913120275, - "grad_norm": 0.6254287958145142, - "learning_rate": 3.5522881346754865e-06, - "loss": 0.1837, - "step": 15468 - }, - { - "epoch": 1.457243117213443, - "grad_norm": 0.6465926766395569, - "learning_rate": 3.5511339871404282e-06, - "loss": 0.1862, - "step": 15469 - }, - { - "epoch": 1.4573373213066108, - "grad_norm": 0.6309822797775269, - "learning_rate": 3.5499799866494912e-06, - "loss": 0.1732, - "step": 15470 - }, - { - "epoch": 1.4574315253997785, - "grad_norm": 0.6424391865730286, - "learning_rate": 3.548826133228983e-06, - "loss": 0.2, - "step": 15471 - }, - { - "epoch": 1.4575257294929465, - "grad_norm": 0.6469354629516602, - "learning_rate": 3.5476724269052187e-06, - "loss": 0.1803, - "step": 15472 - }, - { - "epoch": 1.4576199335861144, - "grad_norm": 0.7723602056503296, - "learning_rate": 3.546518867704499e-06, - "loss": 0.2271, - "step": 15473 - }, - { - "epoch": 1.4577141376792822, - "grad_norm": 0.7271719574928284, - "learning_rate": 3.545365455653129e-06, - "loss": 0.2317, - "step": 15474 - }, - { - "epoch": 1.45780834177245, - "grad_norm": 0.5881963968276978, - "learning_rate": 3.544212190777413e-06, - "loss": 0.2, - "step": 15475 - }, - { - "epoch": 1.4579025458656178, - "grad_norm": 0.5888350605964661, - "learning_rate": 3.5430590731036397e-06, - "loss": 0.1744, - "step": 15476 - }, - { - "epoch": 1.4579967499587858, - "grad_norm": 0.7601636052131653, - "learning_rate": 3.5419061026581046e-06, - "loss": 0.2323, - "step": 15477 - }, - { - "epoch": 1.4580909540519535, - "grad_norm": 0.631746232509613, - "learning_rate": 3.540753279467102e-06, - "loss": 0.1992, - "step": 15478 - }, - { - "epoch": 1.4581851581451213, - "grad_norm": 0.6227158308029175, - "learning_rate": 3.53960060355691e-06, - "loss": 0.1852, - "step": 15479 - }, - { - "epoch": 1.4582793622382892, - "grad_norm": 0.7304214835166931, - "learning_rate": 3.5384480749538163e-06, - "loss": 0.1964, - "step": 15480 - }, - { - "epoch": 1.4583735663314572, - "grad_norm": 0.5746965408325195, - "learning_rate": 3.537295693684102e-06, - "loss": 0.2189, - "step": 15481 - }, - { - "epoch": 1.458467770424625, - "grad_norm": 0.6734116077423096, - "learning_rate": 3.536143459774041e-06, - "loss": 0.1947, - "step": 15482 - }, - { - "epoch": 1.4585619745177927, - "grad_norm": 0.6554303765296936, - "learning_rate": 3.5349913732498984e-06, - "loss": 0.2011, - "step": 15483 - }, - { - "epoch": 1.4586561786109606, - "grad_norm": 0.659274160861969, - "learning_rate": 3.533839434137959e-06, - "loss": 0.1915, - "step": 15484 - }, - { - "epoch": 1.4587503827041286, - "grad_norm": 0.6228361129760742, - "learning_rate": 3.5326876424644798e-06, - "loss": 0.1799, - "step": 15485 - }, - { - "epoch": 1.4588445867972963, - "grad_norm": 0.6653767824172974, - "learning_rate": 3.5315359982557175e-06, - "loss": 0.1954, - "step": 15486 - }, - { - "epoch": 1.458938790890464, - "grad_norm": 0.6754751205444336, - "learning_rate": 3.5303845015379444e-06, - "loss": 0.1894, - "step": 15487 - }, - { - "epoch": 1.459032994983632, - "grad_norm": 0.6641196012496948, - "learning_rate": 3.52923315233741e-06, - "loss": 0.1968, - "step": 15488 - }, - { - "epoch": 1.4591271990768, - "grad_norm": 0.6987813115119934, - "learning_rate": 3.5280819506803645e-06, - "loss": 0.1802, - "step": 15489 - }, - { - "epoch": 1.4592214031699677, - "grad_norm": 0.6381571888923645, - "learning_rate": 3.5269308965930593e-06, - "loss": 0.1725, - "step": 15490 - }, - { - "epoch": 1.4593156072631355, - "grad_norm": 0.9219656586647034, - "learning_rate": 3.525779990101744e-06, - "loss": 0.1928, - "step": 15491 - }, - { - "epoch": 1.4594098113563034, - "grad_norm": 0.6034792065620422, - "learning_rate": 3.5246292312326536e-06, - "loss": 0.1639, - "step": 15492 - }, - { - "epoch": 1.4595040154494714, - "grad_norm": 0.6326768398284912, - "learning_rate": 3.5234786200120306e-06, - "loss": 0.1729, - "step": 15493 - }, - { - "epoch": 1.4595982195426391, - "grad_norm": 0.7265027165412903, - "learning_rate": 3.522328156466116e-06, - "loss": 0.2226, - "step": 15494 - }, - { - "epoch": 1.4596924236358069, - "grad_norm": 0.7184128165245056, - "learning_rate": 3.5211778406211326e-06, - "loss": 0.2039, - "step": 15495 - }, - { - "epoch": 1.4597866277289748, - "grad_norm": 0.6886721849441528, - "learning_rate": 3.5200276725033156e-06, - "loss": 0.2496, - "step": 15496 - }, - { - "epoch": 1.4598808318221428, - "grad_norm": 0.7013434767723083, - "learning_rate": 3.518877652138891e-06, - "loss": 0.1773, - "step": 15497 - }, - { - "epoch": 1.4599750359153105, - "grad_norm": 0.6575307846069336, - "learning_rate": 3.5177277795540763e-06, - "loss": 0.2187, - "step": 15498 - }, - { - "epoch": 1.4600692400084783, - "grad_norm": 0.7372726202011108, - "learning_rate": 3.5165780547750937e-06, - "loss": 0.2015, - "step": 15499 - }, - { - "epoch": 1.4601634441016462, - "grad_norm": 0.6284604668617249, - "learning_rate": 3.515428477828161e-06, - "loss": 0.2035, - "step": 15500 - }, - { - "epoch": 1.4602576481948142, - "grad_norm": 0.6217027306556702, - "learning_rate": 3.5142790487394883e-06, - "loss": 0.1751, - "step": 15501 - }, - { - "epoch": 1.460351852287982, - "grad_norm": 0.6114416122436523, - "learning_rate": 3.5131297675352803e-06, - "loss": 0.2129, - "step": 15502 - }, - { - "epoch": 1.4604460563811497, - "grad_norm": 0.6586828827857971, - "learning_rate": 3.511980634241745e-06, - "loss": 0.1893, - "step": 15503 - }, - { - "epoch": 1.4605402604743176, - "grad_norm": 0.7110599875450134, - "learning_rate": 3.5108316488850892e-06, - "loss": 0.1996, - "step": 15504 - }, - { - "epoch": 1.4606344645674856, - "grad_norm": 0.7150020003318787, - "learning_rate": 3.5096828114915036e-06, - "loss": 0.2008, - "step": 15505 - }, - { - "epoch": 1.4607286686606533, - "grad_norm": 0.7586897015571594, - "learning_rate": 3.508534122087187e-06, - "loss": 0.2009, - "step": 15506 - }, - { - "epoch": 1.460822872753821, - "grad_norm": 0.6721200942993164, - "learning_rate": 3.5073855806983358e-06, - "loss": 0.215, - "step": 15507 - }, - { - "epoch": 1.460917076846989, - "grad_norm": 0.6009416580200195, - "learning_rate": 3.5062371873511315e-06, - "loss": 0.2065, - "step": 15508 - }, - { - "epoch": 1.461011280940157, - "grad_norm": 0.616571843624115, - "learning_rate": 3.5050889420717615e-06, - "loss": 0.1866, - "step": 15509 - }, - { - "epoch": 1.4611054850333247, - "grad_norm": 0.6780067682266235, - "learning_rate": 3.503940844886411e-06, - "loss": 0.2358, - "step": 15510 - }, - { - "epoch": 1.4611996891264925, - "grad_norm": 0.6394216418266296, - "learning_rate": 3.502792895821253e-06, - "loss": 0.181, - "step": 15511 - }, - { - "epoch": 1.4612938932196604, - "grad_norm": 0.6644461750984192, - "learning_rate": 3.5016450949024682e-06, - "loss": 0.2183, - "step": 15512 - }, - { - "epoch": 1.4613880973128284, - "grad_norm": 0.61826491355896, - "learning_rate": 3.500497442156222e-06, - "loss": 0.2063, - "step": 15513 - }, - { - "epoch": 1.461482301405996, - "grad_norm": 0.6185021996498108, - "learning_rate": 3.499349937608685e-06, - "loss": 0.1936, - "step": 15514 - }, - { - "epoch": 1.4615765054991638, - "grad_norm": 0.6552667617797852, - "learning_rate": 3.4982025812860267e-06, - "loss": 0.1884, - "step": 15515 - }, - { - "epoch": 1.4616707095923318, - "grad_norm": 0.6592229604721069, - "learning_rate": 3.497055373214402e-06, - "loss": 0.2048, - "step": 15516 - }, - { - "epoch": 1.4617649136854998, - "grad_norm": 0.699374794960022, - "learning_rate": 3.495908313419971e-06, - "loss": 0.2093, - "step": 15517 - }, - { - "epoch": 1.4618591177786675, - "grad_norm": 0.6737303733825684, - "learning_rate": 3.4947614019288932e-06, - "loss": 0.2065, - "step": 15518 - }, - { - "epoch": 1.4619533218718352, - "grad_norm": 0.7677621245384216, - "learning_rate": 3.493614638767312e-06, - "loss": 0.1853, - "step": 15519 - }, - { - "epoch": 1.4620475259650032, - "grad_norm": 0.6520556211471558, - "learning_rate": 3.4924680239613796e-06, - "loss": 0.1791, - "step": 15520 - }, - { - "epoch": 1.4621417300581712, - "grad_norm": 0.6047016978263855, - "learning_rate": 3.491321557537244e-06, - "loss": 0.1773, - "step": 15521 - }, - { - "epoch": 1.462235934151339, - "grad_norm": 0.5962021350860596, - "learning_rate": 3.490175239521042e-06, - "loss": 0.1791, - "step": 15522 - }, - { - "epoch": 1.4623301382445066, - "grad_norm": 0.6825196146965027, - "learning_rate": 3.4890290699389062e-06, - "loss": 0.2071, - "step": 15523 - }, - { - "epoch": 1.4624243423376746, - "grad_norm": 0.6571821570396423, - "learning_rate": 3.4878830488169836e-06, - "loss": 0.1994, - "step": 15524 - }, - { - "epoch": 1.4625185464308426, - "grad_norm": 0.6083043813705444, - "learning_rate": 3.4867371761813982e-06, - "loss": 0.1825, - "step": 15525 - }, - { - "epoch": 1.4626127505240103, - "grad_norm": 0.6372992992401123, - "learning_rate": 3.4855914520582755e-06, - "loss": 0.2084, - "step": 15526 - }, - { - "epoch": 1.462706954617178, - "grad_norm": 0.6596412062644958, - "learning_rate": 3.484445876473742e-06, - "loss": 0.2024, - "step": 15527 - }, - { - "epoch": 1.462801158710346, - "grad_norm": 0.692679762840271, - "learning_rate": 3.4833004494539224e-06, - "loss": 0.2168, - "step": 15528 - }, - { - "epoch": 1.4628953628035137, - "grad_norm": 0.6300631761550903, - "learning_rate": 3.4821551710249278e-06, - "loss": 0.1813, - "step": 15529 - }, - { - "epoch": 1.4629895668966817, - "grad_norm": 0.7117627859115601, - "learning_rate": 3.4810100412128743e-06, - "loss": 0.194, - "step": 15530 - }, - { - "epoch": 1.4630837709898494, - "grad_norm": 0.6359407901763916, - "learning_rate": 3.479865060043878e-06, - "loss": 0.1849, - "step": 15531 - }, - { - "epoch": 1.4631779750830174, - "grad_norm": 0.7331950068473816, - "learning_rate": 3.478720227544038e-06, - "loss": 0.2259, - "step": 15532 - }, - { - "epoch": 1.4632721791761851, - "grad_norm": 0.6989693641662598, - "learning_rate": 3.477575543739463e-06, - "loss": 0.1972, - "step": 15533 - }, - { - "epoch": 1.463366383269353, - "grad_norm": 0.5889098644256592, - "learning_rate": 3.476431008656256e-06, - "loss": 0.1584, - "step": 15534 - }, - { - "epoch": 1.4634605873625208, - "grad_norm": 0.5969628095626831, - "learning_rate": 3.4752866223205062e-06, - "loss": 0.2111, - "step": 15535 - }, - { - "epoch": 1.4635547914556888, - "grad_norm": 0.6702145338058472, - "learning_rate": 3.4741423847583134e-06, - "loss": 0.1985, - "step": 15536 - }, - { - "epoch": 1.4636489955488565, - "grad_norm": 0.6264545321464539, - "learning_rate": 3.4729982959957697e-06, - "loss": 0.1927, - "step": 15537 - }, - { - "epoch": 1.4637431996420245, - "grad_norm": 0.6307673454284668, - "learning_rate": 3.471854356058956e-06, - "loss": 0.1882, - "step": 15538 - }, - { - "epoch": 1.4638374037351922, - "grad_norm": 0.7243738174438477, - "learning_rate": 3.470710564973958e-06, - "loss": 0.1891, - "step": 15539 - }, - { - "epoch": 1.4639316078283602, - "grad_norm": 0.7178024053573608, - "learning_rate": 3.4695669227668603e-06, - "loss": 0.1897, - "step": 15540 - }, - { - "epoch": 1.464025811921528, - "grad_norm": 0.6225504279136658, - "learning_rate": 3.4684234294637377e-06, - "loss": 0.1976, - "step": 15541 - }, - { - "epoch": 1.4641200160146959, - "grad_norm": 1.8272501230239868, - "learning_rate": 3.4672800850906574e-06, - "loss": 0.2049, - "step": 15542 - }, - { - "epoch": 1.4642142201078636, - "grad_norm": 0.6613777279853821, - "learning_rate": 3.4661368896736945e-06, - "loss": 0.1942, - "step": 15543 - }, - { - "epoch": 1.4643084242010316, - "grad_norm": 0.6397969722747803, - "learning_rate": 3.4649938432389184e-06, - "loss": 0.2033, - "step": 15544 - }, - { - "epoch": 1.4644026282941993, - "grad_norm": 0.6330974698066711, - "learning_rate": 3.463850945812387e-06, - "loss": 0.1836, - "step": 15545 - }, - { - "epoch": 1.4644968323873673, - "grad_norm": 0.6854223608970642, - "learning_rate": 3.4627081974201617e-06, - "loss": 0.1958, - "step": 15546 - }, - { - "epoch": 1.464591036480535, - "grad_norm": 0.5868847370147705, - "learning_rate": 3.4615655980883036e-06, - "loss": 0.1762, - "step": 15547 - }, - { - "epoch": 1.464685240573703, - "grad_norm": 0.6223350167274475, - "learning_rate": 3.4604231478428572e-06, - "loss": 0.1743, - "step": 15548 - }, - { - "epoch": 1.4647794446668707, - "grad_norm": 5.741677284240723, - "learning_rate": 3.459280846709877e-06, - "loss": 0.2156, - "step": 15549 - }, - { - "epoch": 1.4648736487600387, - "grad_norm": 0.6658562421798706, - "learning_rate": 3.458138694715413e-06, - "loss": 0.1989, - "step": 15550 - }, - { - "epoch": 1.4649678528532064, - "grad_norm": 0.6318368911743164, - "learning_rate": 3.4569966918855e-06, - "loss": 0.1604, - "step": 15551 - }, - { - "epoch": 1.4650620569463744, - "grad_norm": 0.6312087178230286, - "learning_rate": 3.4558548382461843e-06, - "loss": 0.1975, - "step": 15552 - }, - { - "epoch": 1.465156261039542, - "grad_norm": 0.6483545899391174, - "learning_rate": 3.4547131338234963e-06, - "loss": 0.1997, - "step": 15553 - }, - { - "epoch": 1.46525046513271, - "grad_norm": 0.6374752521514893, - "learning_rate": 3.453571578643472e-06, - "loss": 0.2125, - "step": 15554 - }, - { - "epoch": 1.4653446692258778, - "grad_norm": 0.6611496806144714, - "learning_rate": 3.4524301727321418e-06, - "loss": 0.1718, - "step": 15555 - }, - { - "epoch": 1.4654388733190458, - "grad_norm": 0.6343547701835632, - "learning_rate": 3.451288916115527e-06, - "loss": 0.1852, - "step": 15556 - }, - { - "epoch": 1.4655330774122135, - "grad_norm": 0.7141991853713989, - "learning_rate": 3.4501478088196526e-06, - "loss": 0.2021, - "step": 15557 - }, - { - "epoch": 1.4656272815053815, - "grad_norm": 0.7054789662361145, - "learning_rate": 3.449006850870542e-06, - "loss": 0.2539, - "step": 15558 - }, - { - "epoch": 1.4657214855985492, - "grad_norm": 0.6991019248962402, - "learning_rate": 3.4478660422942024e-06, - "loss": 0.2024, - "step": 15559 - }, - { - "epoch": 1.4658156896917172, - "grad_norm": 0.6906258463859558, - "learning_rate": 3.4467253831166502e-06, - "loss": 0.1809, - "step": 15560 - }, - { - "epoch": 1.465909893784885, - "grad_norm": 0.6545451879501343, - "learning_rate": 3.4455848733638974e-06, - "loss": 0.1724, - "step": 15561 - }, - { - "epoch": 1.4660040978780529, - "grad_norm": 0.7555856704711914, - "learning_rate": 3.4444445130619452e-06, - "loss": 0.1936, - "step": 15562 - }, - { - "epoch": 1.4660983019712206, - "grad_norm": 0.7309272885322571, - "learning_rate": 3.443304302236791e-06, - "loss": 0.1918, - "step": 15563 - }, - { - "epoch": 1.4661925060643886, - "grad_norm": 0.6634095311164856, - "learning_rate": 3.442164240914445e-06, - "loss": 0.2035, - "step": 15564 - }, - { - "epoch": 1.4662867101575563, - "grad_norm": 0.664567232131958, - "learning_rate": 3.441024329120897e-06, - "loss": 0.1951, - "step": 15565 - }, - { - "epoch": 1.4663809142507243, - "grad_norm": 0.6086796522140503, - "learning_rate": 3.4398845668821336e-06, - "loss": 0.2143, - "step": 15566 - }, - { - "epoch": 1.466475118343892, - "grad_norm": 0.6950627565383911, - "learning_rate": 3.438744954224147e-06, - "loss": 0.211, - "step": 15567 - }, - { - "epoch": 1.46656932243706, - "grad_norm": 0.7872999906539917, - "learning_rate": 3.4376054911729273e-06, - "loss": 0.1856, - "step": 15568 - }, - { - "epoch": 1.4666635265302277, - "grad_norm": 0.6647602319717407, - "learning_rate": 3.4364661777544472e-06, - "loss": 0.1879, - "step": 15569 - }, - { - "epoch": 1.4667577306233957, - "grad_norm": 0.6785277724266052, - "learning_rate": 3.4353270139946894e-06, - "loss": 0.1962, - "step": 15570 - }, - { - "epoch": 1.4668519347165634, - "grad_norm": 0.6362676024436951, - "learning_rate": 3.4341879999196316e-06, - "loss": 0.1907, - "step": 15571 - }, - { - "epoch": 1.4669461388097313, - "grad_norm": 0.6003015041351318, - "learning_rate": 3.4330491355552378e-06, - "loss": 0.1985, - "step": 15572 - }, - { - "epoch": 1.467040342902899, - "grad_norm": 0.6760033965110779, - "learning_rate": 3.431910420927479e-06, - "loss": 0.2223, - "step": 15573 - }, - { - "epoch": 1.467134546996067, - "grad_norm": 0.637233555316925, - "learning_rate": 3.430771856062325e-06, - "loss": 0.1917, - "step": 15574 - }, - { - "epoch": 1.4672287510892348, - "grad_norm": 0.5501244068145752, - "learning_rate": 3.4296334409857277e-06, - "loss": 0.1792, - "step": 15575 - }, - { - "epoch": 1.4673229551824027, - "grad_norm": 0.7237962484359741, - "learning_rate": 3.4284951757236506e-06, - "loss": 0.231, - "step": 15576 - }, - { - "epoch": 1.4674171592755705, - "grad_norm": 0.679079532623291, - "learning_rate": 3.427357060302049e-06, - "loss": 0.2195, - "step": 15577 - }, - { - "epoch": 1.4675113633687384, - "grad_norm": 0.6358046531677246, - "learning_rate": 3.426219094746871e-06, - "loss": 0.2156, - "step": 15578 - }, - { - "epoch": 1.4676055674619062, - "grad_norm": 0.7107905745506287, - "learning_rate": 3.4250812790840583e-06, - "loss": 0.224, - "step": 15579 - }, - { - "epoch": 1.4676997715550741, - "grad_norm": 0.7959108948707581, - "learning_rate": 3.4239436133395675e-06, - "loss": 0.1987, - "step": 15580 - }, - { - "epoch": 1.4677939756482419, - "grad_norm": 0.6889875531196594, - "learning_rate": 3.4228060975393318e-06, - "loss": 0.2116, - "step": 15581 - }, - { - "epoch": 1.4678881797414098, - "grad_norm": 1.0199923515319824, - "learning_rate": 3.4216687317092854e-06, - "loss": 0.1957, - "step": 15582 - }, - { - "epoch": 1.4679823838345776, - "grad_norm": 0.6064413189888, - "learning_rate": 3.420531515875366e-06, - "loss": 0.1675, - "step": 15583 - }, - { - "epoch": 1.4680765879277455, - "grad_norm": 0.7486051321029663, - "learning_rate": 3.4193944500635057e-06, - "loss": 0.2061, - "step": 15584 - }, - { - "epoch": 1.4681707920209133, - "grad_norm": 0.7588637471199036, - "learning_rate": 3.418257534299627e-06, - "loss": 0.2225, - "step": 15585 - }, - { - "epoch": 1.4682649961140812, - "grad_norm": 0.6680378317832947, - "learning_rate": 3.417120768609655e-06, - "loss": 0.1869, - "step": 15586 - }, - { - "epoch": 1.468359200207249, - "grad_norm": 0.6012219190597534, - "learning_rate": 3.4159841530195127e-06, - "loss": 0.1842, - "step": 15587 - }, - { - "epoch": 1.468453404300417, - "grad_norm": 0.7349840402603149, - "learning_rate": 3.4148476875551117e-06, - "loss": 0.2114, - "step": 15588 - }, - { - "epoch": 1.4685476083935847, - "grad_norm": 0.7364935278892517, - "learning_rate": 3.4137113722423677e-06, - "loss": 0.1734, - "step": 15589 - }, - { - "epoch": 1.4686418124867526, - "grad_norm": 0.6966760754585266, - "learning_rate": 3.4125752071071926e-06, - "loss": 0.236, - "step": 15590 - }, - { - "epoch": 1.4687360165799204, - "grad_norm": 0.6806097626686096, - "learning_rate": 3.4114391921754874e-06, - "loss": 0.2071, - "step": 15591 - }, - { - "epoch": 1.4688302206730883, - "grad_norm": 0.6912007927894592, - "learning_rate": 3.4103033274731624e-06, - "loss": 0.1879, - "step": 15592 - }, - { - "epoch": 1.468924424766256, - "grad_norm": 0.677087128162384, - "learning_rate": 3.4091676130261077e-06, - "loss": 0.2074, - "step": 15593 - }, - { - "epoch": 1.469018628859424, - "grad_norm": 0.6801339983940125, - "learning_rate": 3.408032048860226e-06, - "loss": 0.1929, - "step": 15594 - }, - { - "epoch": 1.4691128329525918, - "grad_norm": 0.77293461561203, - "learning_rate": 3.40689663500141e-06, - "loss": 0.23, - "step": 15595 - }, - { - "epoch": 1.4692070370457597, - "grad_norm": 0.6681442856788635, - "learning_rate": 3.4057613714755444e-06, - "loss": 0.2075, - "step": 15596 - }, - { - "epoch": 1.4693012411389275, - "grad_norm": 0.6287442445755005, - "learning_rate": 3.4046262583085188e-06, - "loss": 0.1815, - "step": 15597 - }, - { - "epoch": 1.4693954452320952, - "grad_norm": 0.6025934219360352, - "learning_rate": 3.4034912955262167e-06, - "loss": 0.1653, - "step": 15598 - }, - { - "epoch": 1.4694896493252632, - "grad_norm": 0.6409719586372375, - "learning_rate": 3.4023564831545107e-06, - "loss": 0.1911, - "step": 15599 - }, - { - "epoch": 1.4695838534184311, - "grad_norm": 0.6095578670501709, - "learning_rate": 3.4012218212192816e-06, - "loss": 0.1902, - "step": 15600 - }, - { - "epoch": 1.4696780575115989, - "grad_norm": 0.6680271625518799, - "learning_rate": 3.4000873097464036e-06, - "loss": 0.2042, - "step": 15601 - }, - { - "epoch": 1.4697722616047666, - "grad_norm": 0.6479302048683167, - "learning_rate": 3.3989529487617414e-06, - "loss": 0.2193, - "step": 15602 - }, - { - "epoch": 1.4698664656979346, - "grad_norm": 0.795434296131134, - "learning_rate": 3.3978187382911543e-06, - "loss": 0.191, - "step": 15603 - }, - { - "epoch": 1.4699606697911025, - "grad_norm": 0.69087815284729, - "learning_rate": 3.396684678360517e-06, - "loss": 0.2474, - "step": 15604 - }, - { - "epoch": 1.4700548738842703, - "grad_norm": 0.6540675163269043, - "learning_rate": 3.395550768995681e-06, - "loss": 0.1983, - "step": 15605 - }, - { - "epoch": 1.470149077977438, - "grad_norm": 0.6747296452522278, - "learning_rate": 3.3944170102224983e-06, - "loss": 0.2376, - "step": 15606 - }, - { - "epoch": 1.470243282070606, - "grad_norm": 0.6314950585365295, - "learning_rate": 3.3932834020668236e-06, - "loss": 0.1864, - "step": 15607 - }, - { - "epoch": 1.470337486163774, - "grad_norm": 0.6356666684150696, - "learning_rate": 3.392149944554508e-06, - "loss": 0.1986, - "step": 15608 - }, - { - "epoch": 1.4704316902569416, - "grad_norm": 0.6738851070404053, - "learning_rate": 3.3910166377113894e-06, - "loss": 0.2308, - "step": 15609 - }, - { - "epoch": 1.4705258943501094, - "grad_norm": 0.6042155623435974, - "learning_rate": 3.389883481563312e-06, - "loss": 0.1921, - "step": 15610 - }, - { - "epoch": 1.4706200984432773, - "grad_norm": 0.6968407034873962, - "learning_rate": 3.3887504761361178e-06, - "loss": 0.2222, - "step": 15611 - }, - { - "epoch": 1.4707143025364453, - "grad_norm": 0.6901258230209351, - "learning_rate": 3.3876176214556345e-06, - "loss": 0.204, - "step": 15612 - }, - { - "epoch": 1.470808506629613, - "grad_norm": 0.6558237671852112, - "learning_rate": 3.3864849175476957e-06, - "loss": 0.1914, - "step": 15613 - }, - { - "epoch": 1.4709027107227808, - "grad_norm": 0.639708399772644, - "learning_rate": 3.3853523644381314e-06, - "loss": 0.1884, - "step": 15614 - }, - { - "epoch": 1.4709969148159487, - "grad_norm": 0.6424216032028198, - "learning_rate": 3.3842199621527593e-06, - "loss": 0.1792, - "step": 15615 - }, - { - "epoch": 1.4710911189091167, - "grad_norm": 0.6951286196708679, - "learning_rate": 3.3830877107174042e-06, - "loss": 0.1884, - "step": 15616 - }, - { - "epoch": 1.4711853230022844, - "grad_norm": 0.6806188225746155, - "learning_rate": 3.3819556101578853e-06, - "loss": 0.2069, - "step": 15617 - }, - { - "epoch": 1.4712795270954522, - "grad_norm": 0.6278419494628906, - "learning_rate": 3.3808236605000143e-06, - "loss": 0.1935, - "step": 15618 - }, - { - "epoch": 1.4713737311886201, - "grad_norm": 0.6258873343467712, - "learning_rate": 3.379691861769594e-06, - "loss": 0.1801, - "step": 15619 - }, - { - "epoch": 1.471467935281788, - "grad_norm": 0.6044372320175171, - "learning_rate": 3.3785602139924432e-06, - "loss": 0.1706, - "step": 15620 - }, - { - "epoch": 1.4715621393749558, - "grad_norm": 0.6459899544715881, - "learning_rate": 3.377428717194361e-06, - "loss": 0.1998, - "step": 15621 - }, - { - "epoch": 1.4716563434681236, - "grad_norm": 0.6790457367897034, - "learning_rate": 3.3762973714011426e-06, - "loss": 0.2171, - "step": 15622 - }, - { - "epoch": 1.4717505475612915, - "grad_norm": 0.6954322457313538, - "learning_rate": 3.375166176638588e-06, - "loss": 0.205, - "step": 15623 - }, - { - "epoch": 1.4718447516544595, - "grad_norm": 0.5843218564987183, - "learning_rate": 3.374035132932493e-06, - "loss": 0.1933, - "step": 15624 - }, - { - "epoch": 1.4719389557476272, - "grad_norm": 0.6136788725852966, - "learning_rate": 3.37290424030864e-06, - "loss": 0.1785, - "step": 15625 - }, - { - "epoch": 1.472033159840795, - "grad_norm": 0.646652340888977, - "learning_rate": 3.371773498792821e-06, - "loss": 0.1969, - "step": 15626 - }, - { - "epoch": 1.472127363933963, - "grad_norm": 0.6401978135108948, - "learning_rate": 3.3706429084108196e-06, - "loss": 0.1831, - "step": 15627 - }, - { - "epoch": 1.472221568027131, - "grad_norm": 0.9398323893547058, - "learning_rate": 3.3695124691884084e-06, - "loss": 0.1983, - "step": 15628 - }, - { - "epoch": 1.4723157721202986, - "grad_norm": 0.6923646330833435, - "learning_rate": 3.368382181151367e-06, - "loss": 0.2101, - "step": 15629 - }, - { - "epoch": 1.4724099762134664, - "grad_norm": 0.6531728506088257, - "learning_rate": 3.367252044325473e-06, - "loss": 0.2026, - "step": 15630 - }, - { - "epoch": 1.4725041803066343, - "grad_norm": 0.6412551999092102, - "learning_rate": 3.366122058736485e-06, - "loss": 0.1883, - "step": 15631 - }, - { - "epoch": 1.4725983843998023, - "grad_norm": 0.6572036743164062, - "learning_rate": 3.3649922244101784e-06, - "loss": 0.1873, - "step": 15632 - }, - { - "epoch": 1.47269258849297, - "grad_norm": 0.6967328190803528, - "learning_rate": 3.3638625413723058e-06, - "loss": 0.2083, - "step": 15633 - }, - { - "epoch": 1.4727867925861378, - "grad_norm": 0.7096132636070251, - "learning_rate": 3.362733009648631e-06, - "loss": 0.2084, - "step": 15634 - }, - { - "epoch": 1.4728809966793057, - "grad_norm": 0.6445586085319519, - "learning_rate": 3.3616036292649113e-06, - "loss": 0.1814, - "step": 15635 - }, - { - "epoch": 1.4729752007724737, - "grad_norm": 0.6005943417549133, - "learning_rate": 3.3604744002468916e-06, - "loss": 0.1743, - "step": 15636 - }, - { - "epoch": 1.4730694048656414, - "grad_norm": 0.7228039503097534, - "learning_rate": 3.359345322620324e-06, - "loss": 0.2059, - "step": 15637 - }, - { - "epoch": 1.4731636089588092, - "grad_norm": 0.661433219909668, - "learning_rate": 3.358216396410956e-06, - "loss": 0.1939, - "step": 15638 - }, - { - "epoch": 1.4732578130519771, - "grad_norm": 0.6859742999076843, - "learning_rate": 3.3570876216445238e-06, - "loss": 0.2036, - "step": 15639 - }, - { - "epoch": 1.473352017145145, - "grad_norm": 0.6108340620994568, - "learning_rate": 3.3559589983467667e-06, - "loss": 0.1735, - "step": 15640 - }, - { - "epoch": 1.4734462212383128, - "grad_norm": 0.6382995247840881, - "learning_rate": 3.354830526543422e-06, - "loss": 0.1681, - "step": 15641 - }, - { - "epoch": 1.4735404253314806, - "grad_norm": 0.6694854497909546, - "learning_rate": 3.35370220626022e-06, - "loss": 0.1964, - "step": 15642 - }, - { - "epoch": 1.4736346294246485, - "grad_norm": 0.6144790053367615, - "learning_rate": 3.352574037522881e-06, - "loss": 0.1782, - "step": 15643 - }, - { - "epoch": 1.4737288335178165, - "grad_norm": 0.6441357135772705, - "learning_rate": 3.3514460203571365e-06, - "loss": 0.2078, - "step": 15644 - }, - { - "epoch": 1.4738230376109842, - "grad_norm": 0.6974300742149353, - "learning_rate": 3.3503181547887066e-06, - "loss": 0.1978, - "step": 15645 - }, - { - "epoch": 1.473917241704152, - "grad_norm": 0.6606546640396118, - "learning_rate": 3.349190440843304e-06, - "loss": 0.1778, - "step": 15646 - }, - { - "epoch": 1.47401144579732, - "grad_norm": 0.6658607721328735, - "learning_rate": 3.348062878546645e-06, - "loss": 0.2211, - "step": 15647 - }, - { - "epoch": 1.4741056498904879, - "grad_norm": 0.6312874555587769, - "learning_rate": 3.346935467924444e-06, - "loss": 0.1869, - "step": 15648 - }, - { - "epoch": 1.4741998539836556, - "grad_norm": 0.7111707329750061, - "learning_rate": 3.345808209002399e-06, - "loss": 0.1978, - "step": 15649 - }, - { - "epoch": 1.4742940580768233, - "grad_norm": 0.6756532788276672, - "learning_rate": 3.3446811018062177e-06, - "loss": 0.1899, - "step": 15650 - }, - { - "epoch": 1.4743882621699913, - "grad_norm": 0.7289779186248779, - "learning_rate": 3.343554146361604e-06, - "loss": 0.1924, - "step": 15651 - }, - { - "epoch": 1.4744824662631593, - "grad_norm": 0.6526416540145874, - "learning_rate": 3.3424273426942467e-06, - "loss": 0.1909, - "step": 15652 - }, - { - "epoch": 1.474576670356327, - "grad_norm": 0.7258555293083191, - "learning_rate": 3.341300690829842e-06, - "loss": 0.2143, - "step": 15653 - }, - { - "epoch": 1.4746708744494947, - "grad_norm": 0.6213929057121277, - "learning_rate": 3.340174190794082e-06, - "loss": 0.1957, - "step": 15654 - }, - { - "epoch": 1.4747650785426627, - "grad_norm": 0.6945509910583496, - "learning_rate": 3.3390478426126473e-06, - "loss": 0.1821, - "step": 15655 - }, - { - "epoch": 1.4748592826358307, - "grad_norm": 0.6115299463272095, - "learning_rate": 3.337921646311223e-06, - "loss": 0.2055, - "step": 15656 - }, - { - "epoch": 1.4749534867289984, - "grad_norm": 0.6380189061164856, - "learning_rate": 3.3367956019154923e-06, - "loss": 0.1935, - "step": 15657 - }, - { - "epoch": 1.4750476908221661, - "grad_norm": 0.6088358163833618, - "learning_rate": 3.3356697094511257e-06, - "loss": 0.1916, - "step": 15658 - }, - { - "epoch": 1.475141894915334, - "grad_norm": 0.8672076463699341, - "learning_rate": 3.334543968943791e-06, - "loss": 0.2283, - "step": 15659 - }, - { - "epoch": 1.475236099008502, - "grad_norm": 0.6404767632484436, - "learning_rate": 3.3334183804191677e-06, - "loss": 0.2344, - "step": 15660 - }, - { - "epoch": 1.4753303031016698, - "grad_norm": 0.5931467413902283, - "learning_rate": 3.332292943902915e-06, - "loss": 0.1667, - "step": 15661 - }, - { - "epoch": 1.4754245071948375, - "grad_norm": 0.641546905040741, - "learning_rate": 3.3311676594206934e-06, - "loss": 0.1975, - "step": 15662 - }, - { - "epoch": 1.4755187112880055, - "grad_norm": 0.7403029799461365, - "learning_rate": 3.3300425269981608e-06, - "loss": 0.2304, - "step": 15663 - }, - { - "epoch": 1.4756129153811732, - "grad_norm": 0.6915774345397949, - "learning_rate": 3.328917546660978e-06, - "loss": 0.1847, - "step": 15664 - }, - { - "epoch": 1.4757071194743412, - "grad_norm": 0.6517333984375, - "learning_rate": 3.327792718434789e-06, - "loss": 0.1939, - "step": 15665 - }, - { - "epoch": 1.475801323567509, - "grad_norm": 0.6195977926254272, - "learning_rate": 3.3266680423452444e-06, - "loss": 0.2018, - "step": 15666 - }, - { - "epoch": 1.4758955276606769, - "grad_norm": 0.6296122670173645, - "learning_rate": 3.325543518417993e-06, - "loss": 0.2061, - "step": 15667 - }, - { - "epoch": 1.4759897317538446, - "grad_norm": 0.656207799911499, - "learning_rate": 3.324419146678668e-06, - "loss": 0.1637, - "step": 15668 - }, - { - "epoch": 1.4760839358470126, - "grad_norm": 0.7016220688819885, - "learning_rate": 3.32329492715291e-06, - "loss": 0.2095, - "step": 15669 - }, - { - "epoch": 1.4761781399401803, - "grad_norm": 0.7365010976791382, - "learning_rate": 3.322170859866357e-06, - "loss": 0.2163, - "step": 15670 - }, - { - "epoch": 1.4762723440333483, - "grad_norm": 0.6614882349967957, - "learning_rate": 3.3210469448446325e-06, - "loss": 0.1864, - "step": 15671 - }, - { - "epoch": 1.476366548126516, - "grad_norm": 0.9303832054138184, - "learning_rate": 3.3199231821133705e-06, - "loss": 0.2342, - "step": 15672 - }, - { - "epoch": 1.476460752219684, - "grad_norm": 0.6372376680374146, - "learning_rate": 3.318799571698187e-06, - "loss": 0.1941, - "step": 15673 - }, - { - "epoch": 1.4765549563128517, - "grad_norm": 0.6549563407897949, - "learning_rate": 3.317676113624706e-06, - "loss": 0.1996, - "step": 15674 - }, - { - "epoch": 1.4766491604060197, - "grad_norm": 0.5790454149246216, - "learning_rate": 3.316552807918548e-06, - "loss": 0.1679, - "step": 15675 - }, - { - "epoch": 1.4767433644991874, - "grad_norm": 0.710728108882904, - "learning_rate": 3.3154296546053175e-06, - "loss": 0.2109, - "step": 15676 - }, - { - "epoch": 1.4768375685923554, - "grad_norm": 0.6472678780555725, - "learning_rate": 3.3143066537106306e-06, - "loss": 0.2079, - "step": 15677 - }, - { - "epoch": 1.4769317726855231, - "grad_norm": 0.6699776649475098, - "learning_rate": 3.313183805260094e-06, - "loss": 0.2014, - "step": 15678 - }, - { - "epoch": 1.477025976778691, - "grad_norm": 0.6860296130180359, - "learning_rate": 3.3120611092793043e-06, - "loss": 0.197, - "step": 15679 - }, - { - "epoch": 1.4771201808718588, - "grad_norm": 0.6158367395401001, - "learning_rate": 3.3109385657938642e-06, - "loss": 0.1856, - "step": 15680 - }, - { - "epoch": 1.4772143849650268, - "grad_norm": 0.7331546545028687, - "learning_rate": 3.3098161748293745e-06, - "loss": 0.2572, - "step": 15681 - }, - { - "epoch": 1.4773085890581945, - "grad_norm": 0.6883033514022827, - "learning_rate": 3.308693936411421e-06, - "loss": 0.1991, - "step": 15682 - }, - { - "epoch": 1.4774027931513625, - "grad_norm": 0.6336426138877869, - "learning_rate": 3.307571850565592e-06, - "loss": 0.2014, - "step": 15683 - }, - { - "epoch": 1.4774969972445302, - "grad_norm": 0.7869249582290649, - "learning_rate": 3.3064499173174734e-06, - "loss": 0.2091, - "step": 15684 - }, - { - "epoch": 1.4775912013376982, - "grad_norm": 0.6841399073600769, - "learning_rate": 3.3053281366926526e-06, - "loss": 0.1912, - "step": 15685 - }, - { - "epoch": 1.477685405430866, - "grad_norm": 0.64698326587677, - "learning_rate": 3.3042065087167008e-06, - "loss": 0.1586, - "step": 15686 - }, - { - "epoch": 1.4777796095240339, - "grad_norm": 0.6012874245643616, - "learning_rate": 3.3030850334151952e-06, - "loss": 0.1657, - "step": 15687 - }, - { - "epoch": 1.4778738136172016, - "grad_norm": 0.6796317100524902, - "learning_rate": 3.3019637108137113e-06, - "loss": 0.2049, - "step": 15688 - }, - { - "epoch": 1.4779680177103696, - "grad_norm": 0.5895856022834778, - "learning_rate": 3.30084254093781e-06, - "loss": 0.191, - "step": 15689 - }, - { - "epoch": 1.4780622218035373, - "grad_norm": 0.5807430148124695, - "learning_rate": 3.29972152381306e-06, - "loss": 0.1844, - "step": 15690 - }, - { - "epoch": 1.4781564258967053, - "grad_norm": 0.913033664226532, - "learning_rate": 3.2986006594650245e-06, - "loss": 0.1654, - "step": 15691 - }, - { - "epoch": 1.478250629989873, - "grad_norm": 0.8326259255409241, - "learning_rate": 3.297479947919253e-06, - "loss": 0.2274, - "step": 15692 - }, - { - "epoch": 1.478344834083041, - "grad_norm": 0.6548287868499756, - "learning_rate": 3.296359389201307e-06, - "loss": 0.222, - "step": 15693 - }, - { - "epoch": 1.4784390381762087, - "grad_norm": 0.6795612573623657, - "learning_rate": 3.295238983336736e-06, - "loss": 0.2183, - "step": 15694 - }, - { - "epoch": 1.4785332422693767, - "grad_norm": 0.7227304577827454, - "learning_rate": 3.2941187303510845e-06, - "loss": 0.1771, - "step": 15695 - }, - { - "epoch": 1.4786274463625444, - "grad_norm": 0.6277872323989868, - "learning_rate": 3.2929986302698913e-06, - "loss": 0.2068, - "step": 15696 - }, - { - "epoch": 1.4787216504557124, - "grad_norm": 0.6779505014419556, - "learning_rate": 3.2918786831187088e-06, - "loss": 0.1899, - "step": 15697 - }, - { - "epoch": 1.47881585454888, - "grad_norm": 0.6374543905258179, - "learning_rate": 3.2907588889230667e-06, - "loss": 0.191, - "step": 15698 - }, - { - "epoch": 1.478910058642048, - "grad_norm": 0.6203227043151855, - "learning_rate": 3.2896392477084905e-06, - "loss": 0.2057, - "step": 15699 - }, - { - "epoch": 1.4790042627352158, - "grad_norm": 0.6810561418533325, - "learning_rate": 3.2885197595005246e-06, - "loss": 0.1882, - "step": 15700 - }, - { - "epoch": 1.4790984668283838, - "grad_norm": 0.6487362384796143, - "learning_rate": 3.287400424324687e-06, - "loss": 0.2098, - "step": 15701 - }, - { - "epoch": 1.4791926709215515, - "grad_norm": 0.6776620149612427, - "learning_rate": 3.2862812422064983e-06, - "loss": 0.1931, - "step": 15702 - }, - { - "epoch": 1.4792868750147194, - "grad_norm": 0.6505196690559387, - "learning_rate": 3.28516221317148e-06, - "loss": 0.1834, - "step": 15703 - }, - { - "epoch": 1.4793810791078872, - "grad_norm": 0.6435573101043701, - "learning_rate": 3.2840433372451506e-06, - "loss": 0.2049, - "step": 15704 - }, - { - "epoch": 1.4794752832010551, - "grad_norm": 0.6023697257041931, - "learning_rate": 3.282924614453017e-06, - "loss": 0.185, - "step": 15705 - }, - { - "epoch": 1.4795694872942229, - "grad_norm": 0.6520904898643494, - "learning_rate": 3.2818060448205902e-06, - "loss": 0.1867, - "step": 15706 - }, - { - "epoch": 1.4796636913873908, - "grad_norm": 0.7034657597541809, - "learning_rate": 3.280687628373379e-06, - "loss": 0.1919, - "step": 15707 - }, - { - "epoch": 1.4797578954805586, - "grad_norm": 0.7249178290367126, - "learning_rate": 3.2795693651368776e-06, - "loss": 0.2048, - "step": 15708 - }, - { - "epoch": 1.4798520995737265, - "grad_norm": 0.6190721392631531, - "learning_rate": 3.2784512551365886e-06, - "loss": 0.2211, - "step": 15709 - }, - { - "epoch": 1.4799463036668943, - "grad_norm": 0.6146898865699768, - "learning_rate": 3.2773332983980087e-06, - "loss": 0.1842, - "step": 15710 - }, - { - "epoch": 1.4800405077600622, - "grad_norm": 0.7510910034179688, - "learning_rate": 3.276215494946624e-06, - "loss": 0.2133, - "step": 15711 - }, - { - "epoch": 1.48013471185323, - "grad_norm": 0.6793009042739868, - "learning_rate": 3.2750978448079276e-06, - "loss": 0.1825, - "step": 15712 - }, - { - "epoch": 1.480228915946398, - "grad_norm": 0.7033307552337646, - "learning_rate": 3.273980348007396e-06, - "loss": 0.1974, - "step": 15713 - }, - { - "epoch": 1.4803231200395657, - "grad_norm": 0.7821509838104248, - "learning_rate": 3.2728630045705166e-06, - "loss": 0.2525, - "step": 15714 - }, - { - "epoch": 1.4804173241327336, - "grad_norm": 0.8932392597198486, - "learning_rate": 3.2717458145227666e-06, - "loss": 0.1852, - "step": 15715 - }, - { - "epoch": 1.4805115282259014, - "grad_norm": 0.7076621651649475, - "learning_rate": 3.270628777889614e-06, - "loss": 0.1849, - "step": 15716 - }, - { - "epoch": 1.4806057323190693, - "grad_norm": 0.7159755229949951, - "learning_rate": 3.269511894696532e-06, - "loss": 0.205, - "step": 15717 - }, - { - "epoch": 1.480699936412237, - "grad_norm": 0.7273610234260559, - "learning_rate": 3.2683951649689914e-06, - "loss": 0.1993, - "step": 15718 - }, - { - "epoch": 1.480794140505405, - "grad_norm": 0.623056948184967, - "learning_rate": 3.2672785887324487e-06, - "loss": 0.1808, - "step": 15719 - }, - { - "epoch": 1.4808883445985728, - "grad_norm": 0.6815441250801086, - "learning_rate": 3.2661621660123666e-06, - "loss": 0.2028, - "step": 15720 - }, - { - "epoch": 1.4809825486917407, - "grad_norm": 0.6188926100730896, - "learning_rate": 3.2650458968342048e-06, - "loss": 0.1581, - "step": 15721 - }, - { - "epoch": 1.4810767527849085, - "grad_norm": 0.6613251566886902, - "learning_rate": 3.263929781223412e-06, - "loss": 0.2107, - "step": 15722 - }, - { - "epoch": 1.4811709568780764, - "grad_norm": 0.6647250056266785, - "learning_rate": 3.2628138192054336e-06, - "loss": 0.2139, - "step": 15723 - }, - { - "epoch": 1.4812651609712442, - "grad_norm": 0.6295896172523499, - "learning_rate": 3.2616980108057204e-06, - "loss": 0.2137, - "step": 15724 - }, - { - "epoch": 1.4813593650644121, - "grad_norm": 0.6682237386703491, - "learning_rate": 3.2605823560497163e-06, - "loss": 0.1959, - "step": 15725 - }, - { - "epoch": 1.4814535691575799, - "grad_norm": 0.6330900192260742, - "learning_rate": 3.259466854962855e-06, - "loss": 0.209, - "step": 15726 - }, - { - "epoch": 1.4815477732507478, - "grad_norm": 0.6337481737136841, - "learning_rate": 3.258351507570573e-06, - "loss": 0.1802, - "step": 15727 - }, - { - "epoch": 1.4816419773439156, - "grad_norm": 0.7027248740196228, - "learning_rate": 3.2572363138983054e-06, - "loss": 0.209, - "step": 15728 - }, - { - "epoch": 1.4817361814370835, - "grad_norm": 0.6419001221656799, - "learning_rate": 3.2561212739714752e-06, - "loss": 0.206, - "step": 15729 - }, - { - "epoch": 1.4818303855302513, - "grad_norm": 0.7860915064811707, - "learning_rate": 3.255006387815509e-06, - "loss": 0.2209, - "step": 15730 - }, - { - "epoch": 1.4819245896234192, - "grad_norm": 0.6406052112579346, - "learning_rate": 3.253891655455833e-06, - "loss": 0.2221, - "step": 15731 - }, - { - "epoch": 1.482018793716587, - "grad_norm": 0.6322653293609619, - "learning_rate": 3.2527770769178558e-06, - "loss": 0.2067, - "step": 15732 - }, - { - "epoch": 1.4821129978097547, - "grad_norm": 0.7294014096260071, - "learning_rate": 3.2516626522269965e-06, - "loss": 0.2085, - "step": 15733 - }, - { - "epoch": 1.4822072019029227, - "grad_norm": 0.8285627365112305, - "learning_rate": 3.250548381408668e-06, - "loss": 0.2149, - "step": 15734 - }, - { - "epoch": 1.4823014059960906, - "grad_norm": 0.6386702656745911, - "learning_rate": 3.249434264488276e-06, - "loss": 0.1966, - "step": 15735 - }, - { - "epoch": 1.4823956100892584, - "grad_norm": 0.6847113966941833, - "learning_rate": 3.2483203014912145e-06, - "loss": 0.2005, - "step": 15736 - }, - { - "epoch": 1.482489814182426, - "grad_norm": 0.7019851207733154, - "learning_rate": 3.2472064924428994e-06, - "loss": 0.211, - "step": 15737 - }, - { - "epoch": 1.482584018275594, - "grad_norm": 0.7889997363090515, - "learning_rate": 3.2460928373687197e-06, - "loss": 0.2056, - "step": 15738 - }, - { - "epoch": 1.482678222368762, - "grad_norm": 0.6844862103462219, - "learning_rate": 3.2449793362940617e-06, - "loss": 0.2051, - "step": 15739 - }, - { - "epoch": 1.4827724264619297, - "grad_norm": 0.6309056282043457, - "learning_rate": 3.243865989244328e-06, - "loss": 0.2041, - "step": 15740 - }, - { - "epoch": 1.4828666305550975, - "grad_norm": 0.5994337201118469, - "learning_rate": 3.242752796244898e-06, - "loss": 0.1808, - "step": 15741 - }, - { - "epoch": 1.4829608346482654, - "grad_norm": 0.5997973084449768, - "learning_rate": 3.2416397573211523e-06, - "loss": 0.1873, - "step": 15742 - }, - { - "epoch": 1.4830550387414334, - "grad_norm": 0.5694478154182434, - "learning_rate": 3.2405268724984706e-06, - "loss": 0.1771, - "step": 15743 - }, - { - "epoch": 1.4831492428346011, - "grad_norm": 0.6496260166168213, - "learning_rate": 3.2394141418022353e-06, - "loss": 0.1973, - "step": 15744 - }, - { - "epoch": 1.4832434469277689, - "grad_norm": 1.0584670305252075, - "learning_rate": 3.2383015652578077e-06, - "loss": 0.1838, - "step": 15745 - }, - { - "epoch": 1.4833376510209368, - "grad_norm": 0.6428386569023132, - "learning_rate": 3.2371891428905623e-06, - "loss": 0.2189, - "step": 15746 - }, - { - "epoch": 1.4834318551141048, - "grad_norm": 0.7382312417030334, - "learning_rate": 3.2360768747258674e-06, - "loss": 0.1986, - "step": 15747 - }, - { - "epoch": 1.4835260592072725, - "grad_norm": 1.1544512510299683, - "learning_rate": 3.2349647607890756e-06, - "loss": 0.1839, - "step": 15748 - }, - { - "epoch": 1.4836202633004403, - "grad_norm": 0.662822961807251, - "learning_rate": 3.2338528011055503e-06, - "loss": 0.167, - "step": 15749 - }, - { - "epoch": 1.4837144673936082, - "grad_norm": 0.636023759841919, - "learning_rate": 3.2327409957006493e-06, - "loss": 0.1873, - "step": 15750 - }, - { - "epoch": 1.4838086714867762, - "grad_norm": 0.7087684273719788, - "learning_rate": 3.231629344599715e-06, - "loss": 0.201, - "step": 15751 - }, - { - "epoch": 1.483902875579944, - "grad_norm": 0.7018948793411255, - "learning_rate": 3.230517847828103e-06, - "loss": 0.192, - "step": 15752 - }, - { - "epoch": 1.4839970796731117, - "grad_norm": 0.6414608955383301, - "learning_rate": 3.22940650541115e-06, - "loss": 0.2144, - "step": 15753 - }, - { - "epoch": 1.4840912837662796, - "grad_norm": 0.7314609289169312, - "learning_rate": 3.228295317374199e-06, - "loss": 0.182, - "step": 15754 - }, - { - "epoch": 1.4841854878594476, - "grad_norm": 0.6438114643096924, - "learning_rate": 3.2271842837425917e-06, - "loss": 0.2136, - "step": 15755 - }, - { - "epoch": 1.4842796919526153, - "grad_norm": 0.662824809551239, - "learning_rate": 3.226073404541652e-06, - "loss": 0.2214, - "step": 15756 - }, - { - "epoch": 1.484373896045783, - "grad_norm": 0.667413055896759, - "learning_rate": 3.224962679796716e-06, - "loss": 0.1954, - "step": 15757 - }, - { - "epoch": 1.484468100138951, - "grad_norm": 0.6730595827102661, - "learning_rate": 3.223852109533112e-06, - "loss": 0.2017, - "step": 15758 - }, - { - "epoch": 1.484562304232119, - "grad_norm": 0.6493986248970032, - "learning_rate": 3.222741693776156e-06, - "loss": 0.1947, - "step": 15759 - }, - { - "epoch": 1.4846565083252867, - "grad_norm": 0.7482509016990662, - "learning_rate": 3.2216314325511744e-06, - "loss": 0.2332, - "step": 15760 - }, - { - "epoch": 1.4847507124184545, - "grad_norm": 0.6473538875579834, - "learning_rate": 3.2205213258834754e-06, - "loss": 0.1908, - "step": 15761 - }, - { - "epoch": 1.4848449165116224, - "grad_norm": 0.6552290916442871, - "learning_rate": 3.219411373798378e-06, - "loss": 0.2022, - "step": 15762 - }, - { - "epoch": 1.4849391206047904, - "grad_norm": 0.6187937259674072, - "learning_rate": 3.2183015763211843e-06, - "loss": 0.1894, - "step": 15763 - }, - { - "epoch": 1.4850333246979581, - "grad_norm": 0.627206027507782, - "learning_rate": 3.217191933477203e-06, - "loss": 0.2141, - "step": 15764 - }, - { - "epoch": 1.4851275287911259, - "grad_norm": 0.6970780491828918, - "learning_rate": 3.2160824452917382e-06, - "loss": 0.2285, - "step": 15765 - }, - { - "epoch": 1.4852217328842938, - "grad_norm": 0.6371577978134155, - "learning_rate": 3.214973111790083e-06, - "loss": 0.1976, - "step": 15766 - }, - { - "epoch": 1.4853159369774618, - "grad_norm": 0.6760706901550293, - "learning_rate": 3.2138639329975328e-06, - "loss": 0.2077, - "step": 15767 - }, - { - "epoch": 1.4854101410706295, - "grad_norm": 0.6337851881980896, - "learning_rate": 3.212754908939384e-06, - "loss": 0.1768, - "step": 15768 - }, - { - "epoch": 1.4855043451637973, - "grad_norm": 1.500899076461792, - "learning_rate": 3.2116460396409165e-06, - "loss": 0.2179, - "step": 15769 - }, - { - "epoch": 1.4855985492569652, - "grad_norm": 0.7386575937271118, - "learning_rate": 3.2105373251274172e-06, - "loss": 0.234, - "step": 15770 - }, - { - "epoch": 1.4856927533501332, - "grad_norm": 0.6634941697120667, - "learning_rate": 3.2094287654241706e-06, - "loss": 0.2026, - "step": 15771 - }, - { - "epoch": 1.485786957443301, - "grad_norm": 0.6990938186645508, - "learning_rate": 3.2083203605564473e-06, - "loss": 0.1851, - "step": 15772 - }, - { - "epoch": 1.4858811615364687, - "grad_norm": 0.6416527032852173, - "learning_rate": 3.2072121105495224e-06, - "loss": 0.1925, - "step": 15773 - }, - { - "epoch": 1.4859753656296366, - "grad_norm": 0.6244597434997559, - "learning_rate": 3.2061040154286706e-06, - "loss": 0.1996, - "step": 15774 - }, - { - "epoch": 1.4860695697228046, - "grad_norm": 0.6215859055519104, - "learning_rate": 3.2049960752191533e-06, - "loss": 0.2081, - "step": 15775 - }, - { - "epoch": 1.4861637738159723, - "grad_norm": 0.7005284428596497, - "learning_rate": 3.2038882899462287e-06, - "loss": 0.2045, - "step": 15776 - }, - { - "epoch": 1.48625797790914, - "grad_norm": 0.6543198823928833, - "learning_rate": 3.2027806596351675e-06, - "loss": 0.1912, - "step": 15777 - }, - { - "epoch": 1.486352182002308, - "grad_norm": 0.752830445766449, - "learning_rate": 3.2016731843112193e-06, - "loss": 0.2148, - "step": 15778 - }, - { - "epoch": 1.486446386095476, - "grad_norm": 0.6643003821372986, - "learning_rate": 3.2005658639996296e-06, - "loss": 0.2081, - "step": 15779 - }, - { - "epoch": 1.4865405901886437, - "grad_norm": 0.6779760718345642, - "learning_rate": 3.1994586987256602e-06, - "loss": 0.1909, - "step": 15780 - }, - { - "epoch": 1.4866347942818114, - "grad_norm": 0.6295567154884338, - "learning_rate": 3.1983516885145503e-06, - "loss": 0.2061, - "step": 15781 - }, - { - "epoch": 1.4867289983749794, - "grad_norm": 0.6216533780097961, - "learning_rate": 3.1972448333915376e-06, - "loss": 0.1962, - "step": 15782 - }, - { - "epoch": 1.4868232024681474, - "grad_norm": 0.7079263925552368, - "learning_rate": 3.196138133381863e-06, - "loss": 0.2288, - "step": 15783 - }, - { - "epoch": 1.486917406561315, - "grad_norm": 0.6711618900299072, - "learning_rate": 3.1950315885107652e-06, - "loss": 0.2099, - "step": 15784 - }, - { - "epoch": 1.4870116106544828, - "grad_norm": 0.6961907744407654, - "learning_rate": 3.193925198803467e-06, - "loss": 0.2168, - "step": 15785 - }, - { - "epoch": 1.4871058147476508, - "grad_norm": 0.6956088542938232, - "learning_rate": 3.1928189642852e-06, - "loss": 0.2104, - "step": 15786 - }, - { - "epoch": 1.4872000188408188, - "grad_norm": 0.6182660460472107, - "learning_rate": 3.191712884981193e-06, - "loss": 0.1768, - "step": 15787 - }, - { - "epoch": 1.4872942229339865, - "grad_norm": 0.5577138066291809, - "learning_rate": 3.1906069609166568e-06, - "loss": 0.1569, - "step": 15788 - }, - { - "epoch": 1.4873884270271542, - "grad_norm": 0.6199938058853149, - "learning_rate": 3.189501192116813e-06, - "loss": 0.1885, - "step": 15789 - }, - { - "epoch": 1.4874826311203222, - "grad_norm": 0.7469103932380676, - "learning_rate": 3.1883955786068776e-06, - "loss": 0.1955, - "step": 15790 - }, - { - "epoch": 1.4875768352134902, - "grad_norm": 0.6799483895301819, - "learning_rate": 3.1872901204120554e-06, - "loss": 0.2181, - "step": 15791 - }, - { - "epoch": 1.487671039306658, - "grad_norm": 0.6947216987609863, - "learning_rate": 3.186184817557556e-06, - "loss": 0.1664, - "step": 15792 - }, - { - "epoch": 1.4877652433998256, - "grad_norm": 0.6201277375221252, - "learning_rate": 3.1850796700685783e-06, - "loss": 0.2054, - "step": 15793 - }, - { - "epoch": 1.4878594474929936, - "grad_norm": 0.6932709217071533, - "learning_rate": 3.183974677970324e-06, - "loss": 0.2093, - "step": 15794 - }, - { - "epoch": 1.4879536515861616, - "grad_norm": 0.6539770364761353, - "learning_rate": 3.182869841287991e-06, - "loss": 0.211, - "step": 15795 - }, - { - "epoch": 1.4880478556793293, - "grad_norm": 0.6298494338989258, - "learning_rate": 3.1817651600467647e-06, - "loss": 0.2015, - "step": 15796 - }, - { - "epoch": 1.488142059772497, - "grad_norm": 0.6212891340255737, - "learning_rate": 3.1806606342718383e-06, - "loss": 0.1465, - "step": 15797 - }, - { - "epoch": 1.488236263865665, - "grad_norm": 0.6836594343185425, - "learning_rate": 3.179556263988398e-06, - "loss": 0.2242, - "step": 15798 - }, - { - "epoch": 1.488330467958833, - "grad_norm": 0.695442259311676, - "learning_rate": 3.178452049221621e-06, - "loss": 0.1824, - "step": 15799 - }, - { - "epoch": 1.4884246720520007, - "grad_norm": 0.7029857039451599, - "learning_rate": 3.1773479899966897e-06, - "loss": 0.2015, - "step": 15800 - }, - { - "epoch": 1.4885188761451684, - "grad_norm": 0.5796753764152527, - "learning_rate": 3.1762440863387723e-06, - "loss": 0.1608, - "step": 15801 - }, - { - "epoch": 1.4886130802383364, - "grad_norm": 0.7375254034996033, - "learning_rate": 3.175140338273046e-06, - "loss": 0.2268, - "step": 15802 - }, - { - "epoch": 1.4887072843315041, - "grad_norm": 0.7068525552749634, - "learning_rate": 3.174036745824671e-06, - "loss": 0.188, - "step": 15803 - }, - { - "epoch": 1.488801488424672, - "grad_norm": 0.7670924663543701, - "learning_rate": 3.1729333090188153e-06, - "loss": 0.2065, - "step": 15804 - }, - { - "epoch": 1.4888956925178398, - "grad_norm": 0.6729752421379089, - "learning_rate": 3.1718300278806424e-06, - "loss": 0.1976, - "step": 15805 - }, - { - "epoch": 1.4889898966110078, - "grad_norm": 0.7018054723739624, - "learning_rate": 3.1707269024353003e-06, - "loss": 0.2203, - "step": 15806 - }, - { - "epoch": 1.4890841007041755, - "grad_norm": 0.6012523174285889, - "learning_rate": 3.169623932707947e-06, - "loss": 0.2016, - "step": 15807 - }, - { - "epoch": 1.4891783047973435, - "grad_norm": 0.643194854259491, - "learning_rate": 3.1685211187237354e-06, - "loss": 0.1731, - "step": 15808 - }, - { - "epoch": 1.4892725088905112, - "grad_norm": 0.6318308711051941, - "learning_rate": 3.167418460507803e-06, - "loss": 0.1695, - "step": 15809 - }, - { - "epoch": 1.4893667129836792, - "grad_norm": 0.6564774513244629, - "learning_rate": 3.1663159580852976e-06, - "loss": 0.2038, - "step": 15810 - }, - { - "epoch": 1.489460917076847, - "grad_norm": 0.6514729857444763, - "learning_rate": 3.1652136114813592e-06, - "loss": 0.1919, - "step": 15811 - }, - { - "epoch": 1.4895551211700149, - "grad_norm": 0.6297662854194641, - "learning_rate": 3.164111420721121e-06, - "loss": 0.1821, - "step": 15812 - }, - { - "epoch": 1.4896493252631826, - "grad_norm": 0.6865100264549255, - "learning_rate": 3.1630093858297074e-06, - "loss": 0.1833, - "step": 15813 - }, - { - "epoch": 1.4897435293563506, - "grad_norm": 0.6756977438926697, - "learning_rate": 3.1619075068322603e-06, - "loss": 0.1944, - "step": 15814 - }, - { - "epoch": 1.4898377334495183, - "grad_norm": 0.6531872153282166, - "learning_rate": 3.1608057837538976e-06, - "loss": 0.172, - "step": 15815 - }, - { - "epoch": 1.4899319375426863, - "grad_norm": 0.7377223372459412, - "learning_rate": 3.1597042166197334e-06, - "loss": 0.179, - "step": 15816 - }, - { - "epoch": 1.490026141635854, - "grad_norm": 0.6673306822776794, - "learning_rate": 3.158602805454898e-06, - "loss": 0.1883, - "step": 15817 - }, - { - "epoch": 1.490120345729022, - "grad_norm": 0.5960646867752075, - "learning_rate": 3.1575015502844995e-06, - "loss": 0.1905, - "step": 15818 - }, - { - "epoch": 1.4902145498221897, - "grad_norm": 0.6355289220809937, - "learning_rate": 3.156400451133641e-06, - "loss": 0.1965, - "step": 15819 - }, - { - "epoch": 1.4903087539153577, - "grad_norm": 0.7027295827865601, - "learning_rate": 3.1552995080274418e-06, - "loss": 0.2007, - "step": 15820 - }, - { - "epoch": 1.4904029580085254, - "grad_norm": 0.6796391606330872, - "learning_rate": 3.154198720991001e-06, - "loss": 0.185, - "step": 15821 - }, - { - "epoch": 1.4904971621016934, - "grad_norm": 0.6522025465965271, - "learning_rate": 3.1530980900494125e-06, - "loss": 0.2083, - "step": 15822 - }, - { - "epoch": 1.490591366194861, - "grad_norm": 0.6371049284934998, - "learning_rate": 3.1519976152277765e-06, - "loss": 0.1548, - "step": 15823 - }, - { - "epoch": 1.490685570288029, - "grad_norm": 0.6852626800537109, - "learning_rate": 3.1508972965511886e-06, - "loss": 0.1886, - "step": 15824 - }, - { - "epoch": 1.4907797743811968, - "grad_norm": 0.7229998111724854, - "learning_rate": 3.149797134044731e-06, - "loss": 0.198, - "step": 15825 - }, - { - "epoch": 1.4908739784743648, - "grad_norm": 0.6964998841285706, - "learning_rate": 3.148697127733493e-06, - "loss": 0.211, - "step": 15826 - }, - { - "epoch": 1.4909681825675325, - "grad_norm": 0.7541191577911377, - "learning_rate": 3.14759727764256e-06, - "loss": 0.192, - "step": 15827 - }, - { - "epoch": 1.4910623866607005, - "grad_norm": 0.7376460433006287, - "learning_rate": 3.1464975837970035e-06, - "loss": 0.2077, - "step": 15828 - }, - { - "epoch": 1.4911565907538682, - "grad_norm": 0.6410512924194336, - "learning_rate": 3.1453980462219e-06, - "loss": 0.1933, - "step": 15829 - }, - { - "epoch": 1.4912507948470362, - "grad_norm": 0.6522585153579712, - "learning_rate": 3.1442986649423266e-06, - "loss": 0.2313, - "step": 15830 - }, - { - "epoch": 1.491344998940204, - "grad_norm": 0.6673697829246521, - "learning_rate": 3.143199439983342e-06, - "loss": 0.1832, - "step": 15831 - }, - { - "epoch": 1.4914392030333719, - "grad_norm": 0.6484084725379944, - "learning_rate": 3.1421003713700184e-06, - "loss": 0.1725, - "step": 15832 - }, - { - "epoch": 1.4915334071265396, - "grad_norm": 0.6711766123771667, - "learning_rate": 3.1410014591274086e-06, - "loss": 0.2068, - "step": 15833 - }, - { - "epoch": 1.4916276112197075, - "grad_norm": 0.6606464982032776, - "learning_rate": 3.139902703280573e-06, - "loss": 0.179, - "step": 15834 - }, - { - "epoch": 1.4917218153128753, - "grad_norm": 0.6559191346168518, - "learning_rate": 3.138804103854568e-06, - "loss": 0.2205, - "step": 15835 - }, - { - "epoch": 1.4918160194060432, - "grad_norm": 0.7219986319541931, - "learning_rate": 3.137705660874438e-06, - "loss": 0.2169, - "step": 15836 - }, - { - "epoch": 1.491910223499211, - "grad_norm": 0.7164807915687561, - "learning_rate": 3.1366073743652313e-06, - "loss": 0.245, - "step": 15837 - }, - { - "epoch": 1.492004427592379, - "grad_norm": 0.6560298204421997, - "learning_rate": 3.1355092443519942e-06, - "loss": 0.222, - "step": 15838 - }, - { - "epoch": 1.4920986316855467, - "grad_norm": 0.6005601286888123, - "learning_rate": 3.1344112708597596e-06, - "loss": 0.1816, - "step": 15839 - }, - { - "epoch": 1.4921928357787146, - "grad_norm": 0.6249958872795105, - "learning_rate": 3.133313453913569e-06, - "loss": 0.1985, - "step": 15840 - }, - { - "epoch": 1.4922870398718824, - "grad_norm": 0.6505100727081299, - "learning_rate": 3.1322157935384477e-06, - "loss": 0.2066, - "step": 15841 - }, - { - "epoch": 1.4923812439650503, - "grad_norm": 0.7956879138946533, - "learning_rate": 3.1311182897594304e-06, - "loss": 0.2042, - "step": 15842 - }, - { - "epoch": 1.492475448058218, - "grad_norm": 0.6278946399688721, - "learning_rate": 3.130020942601536e-06, - "loss": 0.1901, - "step": 15843 - }, - { - "epoch": 1.492569652151386, - "grad_norm": 0.6191295981407166, - "learning_rate": 3.1289237520897885e-06, - "loss": 0.1918, - "step": 15844 - }, - { - "epoch": 1.4926638562445538, - "grad_norm": 0.6507278084754944, - "learning_rate": 3.1278267182492107e-06, - "loss": 0.1757, - "step": 15845 - }, - { - "epoch": 1.4927580603377217, - "grad_norm": 0.656032919883728, - "learning_rate": 3.1267298411048066e-06, - "loss": 0.2026, - "step": 15846 - }, - { - "epoch": 1.4928522644308895, - "grad_norm": 0.6542507410049438, - "learning_rate": 3.1256331206815926e-06, - "loss": 0.2006, - "step": 15847 - }, - { - "epoch": 1.4929464685240574, - "grad_norm": 0.6375792622566223, - "learning_rate": 3.124536557004578e-06, - "loss": 0.2012, - "step": 15848 - }, - { - "epoch": 1.4930406726172252, - "grad_norm": 0.6325379014015198, - "learning_rate": 3.12344015009876e-06, - "loss": 0.2118, - "step": 15849 - }, - { - "epoch": 1.4931348767103931, - "grad_norm": 0.6247312426567078, - "learning_rate": 3.1223438999891408e-06, - "loss": 0.1976, - "step": 15850 - }, - { - "epoch": 1.4932290808035609, - "grad_norm": 0.6271814703941345, - "learning_rate": 3.12124780670072e-06, - "loss": 0.1816, - "step": 15851 - }, - { - "epoch": 1.4933232848967288, - "grad_norm": 0.7313740253448486, - "learning_rate": 3.120151870258489e-06, - "loss": 0.205, - "step": 15852 - }, - { - "epoch": 1.4934174889898966, - "grad_norm": 0.6758707761764526, - "learning_rate": 3.119056090687428e-06, - "loss": 0.1929, - "step": 15853 - }, - { - "epoch": 1.4935116930830645, - "grad_norm": 0.6302174925804138, - "learning_rate": 3.1179604680125363e-06, - "loss": 0.1889, - "step": 15854 - }, - { - "epoch": 1.4936058971762323, - "grad_norm": 0.638113796710968, - "learning_rate": 3.1168650022587885e-06, - "loss": 0.1952, - "step": 15855 - }, - { - "epoch": 1.4937001012694002, - "grad_norm": 0.6867721676826477, - "learning_rate": 3.1157696934511572e-06, - "loss": 0.2184, - "step": 15856 - }, - { - "epoch": 1.493794305362568, - "grad_norm": 0.696965217590332, - "learning_rate": 3.1146745416146307e-06, - "loss": 0.196, - "step": 15857 - }, - { - "epoch": 1.493888509455736, - "grad_norm": 0.599137008190155, - "learning_rate": 3.1135795467741736e-06, - "loss": 0.1793, - "step": 15858 - }, - { - "epoch": 1.4939827135489037, - "grad_norm": 0.6748884320259094, - "learning_rate": 3.112484708954745e-06, - "loss": 0.1992, - "step": 15859 - }, - { - "epoch": 1.4940769176420716, - "grad_norm": 0.6321108937263489, - "learning_rate": 3.1113900281813237e-06, - "loss": 0.1855, - "step": 15860 - }, - { - "epoch": 1.4941711217352394, - "grad_norm": 0.6798797845840454, - "learning_rate": 3.110295504478864e-06, - "loss": 0.2328, - "step": 15861 - }, - { - "epoch": 1.4942653258284073, - "grad_norm": 0.6549807786941528, - "learning_rate": 3.1092011378723173e-06, - "loss": 0.1873, - "step": 15862 - }, - { - "epoch": 1.494359529921575, - "grad_norm": 0.7312194108963013, - "learning_rate": 3.1081069283866427e-06, - "loss": 0.2296, - "step": 15863 - }, - { - "epoch": 1.494453734014743, - "grad_norm": 0.625159740447998, - "learning_rate": 3.107012876046791e-06, - "loss": 0.1739, - "step": 15864 - }, - { - "epoch": 1.4945479381079108, - "grad_norm": 0.6311237812042236, - "learning_rate": 3.1059189808777036e-06, - "loss": 0.1682, - "step": 15865 - }, - { - "epoch": 1.4946421422010787, - "grad_norm": 0.6616176962852478, - "learning_rate": 3.1048252429043248e-06, - "loss": 0.1943, - "step": 15866 - }, - { - "epoch": 1.4947363462942465, - "grad_norm": 0.7420802712440491, - "learning_rate": 3.1037316621515976e-06, - "loss": 0.2159, - "step": 15867 - }, - { - "epoch": 1.4948305503874144, - "grad_norm": 0.5542891025543213, - "learning_rate": 3.10263823864445e-06, - "loss": 0.1802, - "step": 15868 - }, - { - "epoch": 1.4949247544805822, - "grad_norm": 0.6131301522254944, - "learning_rate": 3.1015449724078184e-06, - "loss": 0.1833, - "step": 15869 - }, - { - "epoch": 1.4950189585737501, - "grad_norm": 0.6341503262519836, - "learning_rate": 3.1004518634666323e-06, - "loss": 0.2234, - "step": 15870 - }, - { - "epoch": 1.4951131626669178, - "grad_norm": 0.7000523805618286, - "learning_rate": 3.099358911845811e-06, - "loss": 0.1884, - "step": 15871 - }, - { - "epoch": 1.4952073667600856, - "grad_norm": 0.6973499655723572, - "learning_rate": 3.098266117570282e-06, - "loss": 0.2032, - "step": 15872 - }, - { - "epoch": 1.4953015708532535, - "grad_norm": 1.0040256977081299, - "learning_rate": 3.0971734806649566e-06, - "loss": 0.1987, - "step": 15873 - }, - { - "epoch": 1.4953957749464215, - "grad_norm": 0.6852333545684814, - "learning_rate": 3.0960810011547503e-06, - "loss": 0.2058, - "step": 15874 - }, - { - "epoch": 1.4954899790395892, - "grad_norm": 0.6796458959579468, - "learning_rate": 3.0949886790645788e-06, - "loss": 0.2004, - "step": 15875 - }, - { - "epoch": 1.495584183132757, - "grad_norm": 0.6131322979927063, - "learning_rate": 3.09389651441934e-06, - "loss": 0.1955, - "step": 15876 - }, - { - "epoch": 1.495678387225925, - "grad_norm": 0.6436735391616821, - "learning_rate": 3.092804507243945e-06, - "loss": 0.2226, - "step": 15877 - }, - { - "epoch": 1.495772591319093, - "grad_norm": 0.6957037448883057, - "learning_rate": 3.091712657563285e-06, - "loss": 0.199, - "step": 15878 - }, - { - "epoch": 1.4958667954122606, - "grad_norm": 0.6697835922241211, - "learning_rate": 3.0906209654022613e-06, - "loss": 0.2007, - "step": 15879 - }, - { - "epoch": 1.4959609995054284, - "grad_norm": 0.6514902710914612, - "learning_rate": 3.0895294307857684e-06, - "loss": 0.1834, - "step": 15880 - }, - { - "epoch": 1.4960552035985963, - "grad_norm": 0.649848461151123, - "learning_rate": 3.0884380537386883e-06, - "loss": 0.2188, - "step": 15881 - }, - { - "epoch": 1.4961494076917643, - "grad_norm": 0.6319318413734436, - "learning_rate": 3.0873468342859125e-06, - "loss": 0.2028, - "step": 15882 - }, - { - "epoch": 1.496243611784932, - "grad_norm": 0.7015264630317688, - "learning_rate": 3.086255772452317e-06, - "loss": 0.1956, - "step": 15883 - }, - { - "epoch": 1.4963378158780998, - "grad_norm": 0.6468680500984192, - "learning_rate": 3.085164868262781e-06, - "loss": 0.1902, - "step": 15884 - }, - { - "epoch": 1.4964320199712677, - "grad_norm": 0.655238926410675, - "learning_rate": 3.0840741217421845e-06, - "loss": 0.2034, - "step": 15885 - }, - { - "epoch": 1.4965262240644357, - "grad_norm": 0.7291893362998962, - "learning_rate": 3.082983532915389e-06, - "loss": 0.1998, - "step": 15886 - }, - { - "epoch": 1.4966204281576034, - "grad_norm": 0.642575204372406, - "learning_rate": 3.0818931018072672e-06, - "loss": 0.1897, - "step": 15887 - }, - { - "epoch": 1.4967146322507712, - "grad_norm": 0.6005973219871521, - "learning_rate": 3.080802828442685e-06, - "loss": 0.1803, - "step": 15888 - }, - { - "epoch": 1.4968088363439391, - "grad_norm": 0.6991351842880249, - "learning_rate": 3.0797127128464966e-06, - "loss": 0.1731, - "step": 15889 - }, - { - "epoch": 1.496903040437107, - "grad_norm": 0.6910436153411865, - "learning_rate": 3.07862275504356e-06, - "loss": 0.2079, - "step": 15890 - }, - { - "epoch": 1.4969972445302748, - "grad_norm": 0.6432166695594788, - "learning_rate": 3.077532955058732e-06, - "loss": 0.1981, - "step": 15891 - }, - { - "epoch": 1.4970914486234426, - "grad_norm": 0.6242793798446655, - "learning_rate": 3.076443312916858e-06, - "loss": 0.1958, - "step": 15892 - }, - { - "epoch": 1.4971856527166105, - "grad_norm": 0.5713182091712952, - "learning_rate": 3.0753538286427773e-06, - "loss": 0.1909, - "step": 15893 - }, - { - "epoch": 1.4972798568097785, - "grad_norm": 0.6838253140449524, - "learning_rate": 3.074264502261346e-06, - "loss": 0.1997, - "step": 15894 - }, - { - "epoch": 1.4973740609029462, - "grad_norm": 0.6527348756790161, - "learning_rate": 3.0731753337973945e-06, - "loss": 0.2284, - "step": 15895 - }, - { - "epoch": 1.497468264996114, - "grad_norm": 0.6556838154792786, - "learning_rate": 3.0720863232757514e-06, - "loss": 0.2199, - "step": 15896 - }, - { - "epoch": 1.497562469089282, - "grad_norm": 0.6978013515472412, - "learning_rate": 3.07099747072126e-06, - "loss": 0.1974, - "step": 15897 - }, - { - "epoch": 1.4976566731824499, - "grad_norm": 0.5855468511581421, - "learning_rate": 3.069908776158743e-06, - "loss": 0.1787, - "step": 15898 - }, - { - "epoch": 1.4977508772756176, - "grad_norm": 0.6803790926933289, - "learning_rate": 3.0688202396130172e-06, - "loss": 0.1959, - "step": 15899 - }, - { - "epoch": 1.4978450813687854, - "grad_norm": 0.6151381731033325, - "learning_rate": 3.067731861108916e-06, - "loss": 0.1925, - "step": 15900 - }, - { - "epoch": 1.4979392854619533, - "grad_norm": 0.6695271730422974, - "learning_rate": 3.0666436406712485e-06, - "loss": 0.1757, - "step": 15901 - }, - { - "epoch": 1.4980334895551213, - "grad_norm": 0.606999397277832, - "learning_rate": 3.0655555783248248e-06, - "loss": 0.1845, - "step": 15902 - }, - { - "epoch": 1.498127693648289, - "grad_norm": 0.6408692002296448, - "learning_rate": 3.064467674094459e-06, - "loss": 0.2059, - "step": 15903 - }, - { - "epoch": 1.4982218977414568, - "grad_norm": 0.6725518107414246, - "learning_rate": 3.0633799280049604e-06, - "loss": 0.1927, - "step": 15904 - }, - { - "epoch": 1.4983161018346247, - "grad_norm": 0.6841219067573547, - "learning_rate": 3.0622923400811234e-06, - "loss": 0.1924, - "step": 15905 - }, - { - "epoch": 1.4984103059277927, - "grad_norm": 0.5929518938064575, - "learning_rate": 3.061204910347749e-06, - "loss": 0.1838, - "step": 15906 - }, - { - "epoch": 1.4985045100209604, - "grad_norm": 0.9958206415176392, - "learning_rate": 3.0601176388296382e-06, - "loss": 0.2065, - "step": 15907 - }, - { - "epoch": 1.4985987141141281, - "grad_norm": 0.6183670163154602, - "learning_rate": 3.059030525551575e-06, - "loss": 0.1894, - "step": 15908 - }, - { - "epoch": 1.498692918207296, - "grad_norm": 0.6932939291000366, - "learning_rate": 3.05794357053835e-06, - "loss": 0.1764, - "step": 15909 - }, - { - "epoch": 1.498787122300464, - "grad_norm": 0.642903745174408, - "learning_rate": 3.0568567738147505e-06, - "loss": 0.1892, - "step": 15910 - }, - { - "epoch": 1.4988813263936318, - "grad_norm": 0.677622377872467, - "learning_rate": 3.0557701354055516e-06, - "loss": 0.1842, - "step": 15911 - }, - { - "epoch": 1.4989755304867995, - "grad_norm": 0.7866834402084351, - "learning_rate": 3.0546836553355354e-06, - "loss": 0.2096, - "step": 15912 - }, - { - "epoch": 1.4990697345799675, - "grad_norm": 0.7292660474777222, - "learning_rate": 3.05359733362947e-06, - "loss": 0.2195, - "step": 15913 - }, - { - "epoch": 1.4991639386731355, - "grad_norm": 0.6775988936424255, - "learning_rate": 3.052511170312129e-06, - "loss": 0.2221, - "step": 15914 - }, - { - "epoch": 1.4992581427663032, - "grad_norm": 0.6540030241012573, - "learning_rate": 3.0514251654082803e-06, - "loss": 0.1985, - "step": 15915 - }, - { - "epoch": 1.499352346859471, - "grad_norm": 0.5825475454330444, - "learning_rate": 3.050339318942681e-06, - "loss": 0.1829, - "step": 15916 - }, - { - "epoch": 1.499446550952639, - "grad_norm": 0.8766392469406128, - "learning_rate": 3.0492536309400968e-06, - "loss": 0.2049, - "step": 15917 - }, - { - "epoch": 1.4995407550458069, - "grad_norm": 0.7007165551185608, - "learning_rate": 3.0481681014252763e-06, - "loss": 0.1925, - "step": 15918 - }, - { - "epoch": 1.4996349591389746, - "grad_norm": 0.7133749127388, - "learning_rate": 3.0470827304229734e-06, - "loss": 0.1924, - "step": 15919 - }, - { - "epoch": 1.4997291632321423, - "grad_norm": 0.6479188799858093, - "learning_rate": 3.0459975179579404e-06, - "loss": 0.1703, - "step": 15920 - }, - { - "epoch": 1.4998233673253103, - "grad_norm": 0.6277375817298889, - "learning_rate": 3.0449124640549154e-06, - "loss": 0.1952, - "step": 15921 - }, - { - "epoch": 1.4999175714184783, - "grad_norm": 0.6849794387817383, - "learning_rate": 3.0438275687386466e-06, - "loss": 0.2049, - "step": 15922 - }, - { - "epoch": 1.500011775511646, - "grad_norm": 0.6415727138519287, - "learning_rate": 3.0427428320338627e-06, - "loss": 0.1618, - "step": 15923 - }, - { - "epoch": 1.5001059796048137, - "grad_norm": 0.7276158332824707, - "learning_rate": 3.041658253965303e-06, - "loss": 0.1954, - "step": 15924 - }, - { - "epoch": 1.5002001836979817, - "grad_norm": 0.7348376512527466, - "learning_rate": 3.0405738345576987e-06, - "loss": 0.214, - "step": 15925 - }, - { - "epoch": 1.5002943877911497, - "grad_norm": 0.5843960046768188, - "learning_rate": 3.039489573835771e-06, - "loss": 0.1662, - "step": 15926 - }, - { - "epoch": 1.5003885918843174, - "grad_norm": 0.7853081226348877, - "learning_rate": 3.0384054718242453e-06, - "loss": 0.2091, - "step": 15927 - }, - { - "epoch": 1.5004827959774851, - "grad_norm": 0.7247233986854553, - "learning_rate": 3.037321528547845e-06, - "loss": 0.211, - "step": 15928 - }, - { - "epoch": 1.500577000070653, - "grad_norm": 0.7683022618293762, - "learning_rate": 3.0362377440312783e-06, - "loss": 0.2003, - "step": 15929 - }, - { - "epoch": 1.500671204163821, - "grad_norm": 0.7197926640510559, - "learning_rate": 3.0351541182992605e-06, - "loss": 0.2099, - "step": 15930 - }, - { - "epoch": 1.5007654082569888, - "grad_norm": 0.6092464923858643, - "learning_rate": 3.034070651376504e-06, - "loss": 0.1693, - "step": 15931 - }, - { - "epoch": 1.5008596123501565, - "grad_norm": 0.6016608476638794, - "learning_rate": 3.0329873432877087e-06, - "loss": 0.1725, - "step": 15932 - }, - { - "epoch": 1.5009538164433245, - "grad_norm": 0.6459596753120422, - "learning_rate": 3.031904194057571e-06, - "loss": 0.185, - "step": 15933 - }, - { - "epoch": 1.5010480205364924, - "grad_norm": 0.63637775182724, - "learning_rate": 3.030821203710801e-06, - "loss": 0.2081, - "step": 15934 - }, - { - "epoch": 1.5011422246296602, - "grad_norm": 0.6163280010223389, - "learning_rate": 3.029738372272084e-06, - "loss": 0.1679, - "step": 15935 - }, - { - "epoch": 1.501236428722828, - "grad_norm": 0.7546627521514893, - "learning_rate": 3.0286556997661064e-06, - "loss": 0.2143, - "step": 15936 - }, - { - "epoch": 1.5013306328159959, - "grad_norm": 0.6533668637275696, - "learning_rate": 3.027573186217567e-06, - "loss": 0.182, - "step": 15937 - }, - { - "epoch": 1.5014248369091638, - "grad_norm": 0.5748540759086609, - "learning_rate": 3.0264908316511422e-06, - "loss": 0.1821, - "step": 15938 - }, - { - "epoch": 1.5015190410023316, - "grad_norm": 0.6528898477554321, - "learning_rate": 3.0254086360915036e-06, - "loss": 0.221, - "step": 15939 - }, - { - "epoch": 1.5016132450954993, - "grad_norm": 0.6347132921218872, - "learning_rate": 3.024326599563342e-06, - "loss": 0.196, - "step": 15940 - }, - { - "epoch": 1.5017074491886673, - "grad_norm": 1.1245532035827637, - "learning_rate": 3.0232447220913207e-06, - "loss": 0.2049, - "step": 15941 - }, - { - "epoch": 1.5018016532818352, - "grad_norm": 0.7115646004676819, - "learning_rate": 3.0221630037001072e-06, - "loss": 0.2166, - "step": 15942 - }, - { - "epoch": 1.501895857375003, - "grad_norm": 0.6134077906608582, - "learning_rate": 3.0210814444143687e-06, - "loss": 0.1891, - "step": 15943 - }, - { - "epoch": 1.5019900614681707, - "grad_norm": 0.670565128326416, - "learning_rate": 3.0200000442587695e-06, - "loss": 0.2, - "step": 15944 - }, - { - "epoch": 1.5020842655613387, - "grad_norm": 0.6931569576263428, - "learning_rate": 3.0189188032579606e-06, - "loss": 0.2272, - "step": 15945 - }, - { - "epoch": 1.5021784696545066, - "grad_norm": 0.6226788759231567, - "learning_rate": 3.0178377214365994e-06, - "loss": 0.1822, - "step": 15946 - }, - { - "epoch": 1.5022726737476744, - "grad_norm": 0.6719318628311157, - "learning_rate": 3.0167567988193403e-06, - "loss": 0.1857, - "step": 15947 - }, - { - "epoch": 1.502366877840842, - "grad_norm": 0.6673892736434937, - "learning_rate": 3.0156760354308223e-06, - "loss": 0.1921, - "step": 15948 - }, - { - "epoch": 1.50246108193401, - "grad_norm": 0.5427615642547607, - "learning_rate": 3.0145954312956915e-06, - "loss": 0.1724, - "step": 15949 - }, - { - "epoch": 1.502555286027178, - "grad_norm": 0.6564127802848816, - "learning_rate": 3.0135149864385915e-06, - "loss": 0.2139, - "step": 15950 - }, - { - "epoch": 1.5026494901203458, - "grad_norm": 0.8202550411224365, - "learning_rate": 3.0124347008841515e-06, - "loss": 0.201, - "step": 15951 - }, - { - "epoch": 1.5027436942135135, - "grad_norm": 0.6755064725875854, - "learning_rate": 3.0113545746570107e-06, - "loss": 0.2421, - "step": 15952 - }, - { - "epoch": 1.5028378983066815, - "grad_norm": 0.6923037767410278, - "learning_rate": 3.0102746077817903e-06, - "loss": 0.2158, - "step": 15953 - }, - { - "epoch": 1.5029321023998494, - "grad_norm": 0.6704870462417603, - "learning_rate": 3.0091948002831183e-06, - "loss": 0.2081, - "step": 15954 - }, - { - "epoch": 1.5030263064930172, - "grad_norm": 0.6434040069580078, - "learning_rate": 3.0081151521856188e-06, - "loss": 0.1864, - "step": 15955 - }, - { - "epoch": 1.503120510586185, - "grad_norm": 0.571193516254425, - "learning_rate": 3.007035663513905e-06, - "loss": 0.1666, - "step": 15956 - }, - { - "epoch": 1.5032147146793529, - "grad_norm": 0.6750934720039368, - "learning_rate": 3.0059563342925956e-06, - "loss": 0.2158, - "step": 15957 - }, - { - "epoch": 1.5033089187725208, - "grad_norm": 0.5765085220336914, - "learning_rate": 3.0048771645462947e-06, - "loss": 0.2048, - "step": 15958 - }, - { - "epoch": 1.5034031228656886, - "grad_norm": 0.6211551427841187, - "learning_rate": 3.003798154299613e-06, - "loss": 0.1714, - "step": 15959 - }, - { - "epoch": 1.5034973269588563, - "grad_norm": 0.6313039064407349, - "learning_rate": 3.0027193035771576e-06, - "loss": 0.1599, - "step": 15960 - }, - { - "epoch": 1.5035915310520243, - "grad_norm": 0.6515534520149231, - "learning_rate": 3.001640612403518e-06, - "loss": 0.2064, - "step": 15961 - }, - { - "epoch": 1.5036857351451922, - "grad_norm": 0.68388831615448, - "learning_rate": 3.000562080803301e-06, - "loss": 0.2008, - "step": 15962 - }, - { - "epoch": 1.50377993923836, - "grad_norm": 0.6394569873809814, - "learning_rate": 2.9994837088010886e-06, - "loss": 0.1798, - "step": 15963 - }, - { - "epoch": 1.5038741433315277, - "grad_norm": 0.6411869525909424, - "learning_rate": 2.9984054964214747e-06, - "loss": 0.1959, - "step": 15964 - }, - { - "epoch": 1.5039683474246957, - "grad_norm": 0.6322797536849976, - "learning_rate": 2.9973274436890475e-06, - "loss": 0.1823, - "step": 15965 - }, - { - "epoch": 1.5040625515178636, - "grad_norm": 0.6207188367843628, - "learning_rate": 2.9962495506283805e-06, - "loss": 0.2079, - "step": 15966 - }, - { - "epoch": 1.5041567556110313, - "grad_norm": 0.6168134808540344, - "learning_rate": 2.995171817264055e-06, - "loss": 0.166, - "step": 15967 - }, - { - "epoch": 1.504250959704199, - "grad_norm": 0.6465839147567749, - "learning_rate": 2.994094243620649e-06, - "loss": 0.1947, - "step": 15968 - }, - { - "epoch": 1.504345163797367, - "grad_norm": 0.7306556105613708, - "learning_rate": 2.9930168297227257e-06, - "loss": 0.2244, - "step": 15969 - }, - { - "epoch": 1.504439367890535, - "grad_norm": 0.654899001121521, - "learning_rate": 2.9919395755948553e-06, - "loss": 0.1964, - "step": 15970 - }, - { - "epoch": 1.5045335719837027, - "grad_norm": 0.6639846563339233, - "learning_rate": 2.9908624812616038e-06, - "loss": 0.1942, - "step": 15971 - }, - { - "epoch": 1.5046277760768705, - "grad_norm": 0.6543169021606445, - "learning_rate": 2.9897855467475278e-06, - "loss": 0.1675, - "step": 15972 - }, - { - "epoch": 1.5047219801700384, - "grad_norm": 0.635490357875824, - "learning_rate": 2.988708772077177e-06, - "loss": 0.2129, - "step": 15973 - }, - { - "epoch": 1.5048161842632064, - "grad_norm": 0.6697912812232971, - "learning_rate": 2.9876321572751143e-06, - "loss": 0.1977, - "step": 15974 - }, - { - "epoch": 1.504910388356374, - "grad_norm": 0.76012122631073, - "learning_rate": 2.9865557023658843e-06, - "loss": 0.1981, - "step": 15975 - }, - { - "epoch": 1.5050045924495419, - "grad_norm": 0.7171528339385986, - "learning_rate": 2.9854794073740243e-06, - "loss": 0.2013, - "step": 15976 - }, - { - "epoch": 1.5050987965427098, - "grad_norm": 0.6229657530784607, - "learning_rate": 2.9844032723240877e-06, - "loss": 0.1954, - "step": 15977 - }, - { - "epoch": 1.5051930006358776, - "grad_norm": 0.6412497758865356, - "learning_rate": 2.983327297240607e-06, - "loss": 0.1931, - "step": 15978 - }, - { - "epoch": 1.5052872047290453, - "grad_norm": 0.7091441750526428, - "learning_rate": 2.9822514821481086e-06, - "loss": 0.2165, - "step": 15979 - }, - { - "epoch": 1.5053814088222133, - "grad_norm": 0.7062512040138245, - "learning_rate": 2.981175827071137e-06, - "loss": 0.1924, - "step": 15980 - }, - { - "epoch": 1.5054756129153812, - "grad_norm": 0.6737327575683594, - "learning_rate": 2.9801003320342104e-06, - "loss": 0.2075, - "step": 15981 - }, - { - "epoch": 1.505569817008549, - "grad_norm": 0.6810401678085327, - "learning_rate": 2.979024997061849e-06, - "loss": 0.2091, - "step": 15982 - }, - { - "epoch": 1.5056640211017167, - "grad_norm": 0.7100754976272583, - "learning_rate": 2.9779498221785774e-06, - "loss": 0.2057, - "step": 15983 - }, - { - "epoch": 1.5057582251948847, - "grad_norm": 0.6806728839874268, - "learning_rate": 2.9768748074089115e-06, - "loss": 0.2066, - "step": 15984 - }, - { - "epoch": 1.5058524292880526, - "grad_norm": 0.692293643951416, - "learning_rate": 2.9757999527773583e-06, - "loss": 0.186, - "step": 15985 - }, - { - "epoch": 1.5059466333812204, - "grad_norm": 0.652180016040802, - "learning_rate": 2.9747252583084297e-06, - "loss": 0.1849, - "step": 15986 - }, - { - "epoch": 1.506040837474388, - "grad_norm": 0.6136890053749084, - "learning_rate": 2.9736507240266332e-06, - "loss": 0.1991, - "step": 15987 - }, - { - "epoch": 1.506135041567556, - "grad_norm": 0.6313958168029785, - "learning_rate": 2.9725763499564643e-06, - "loss": 0.1889, - "step": 15988 - }, - { - "epoch": 1.506229245660724, - "grad_norm": 0.6914837956428528, - "learning_rate": 2.9715021361224216e-06, - "loss": 0.2058, - "step": 15989 - }, - { - "epoch": 1.5063234497538918, - "grad_norm": 0.6593897342681885, - "learning_rate": 2.9704280825490027e-06, - "loss": 0.194, - "step": 15990 - }, - { - "epoch": 1.5064176538470595, - "grad_norm": 0.6278865337371826, - "learning_rate": 2.9693541892606935e-06, - "loss": 0.2221, - "step": 15991 - }, - { - "epoch": 1.5065118579402275, - "grad_norm": 0.7037993669509888, - "learning_rate": 2.9682804562819835e-06, - "loss": 0.2224, - "step": 15992 - }, - { - "epoch": 1.5066060620333954, - "grad_norm": 0.6817741394042969, - "learning_rate": 2.9672068836373515e-06, - "loss": 0.1849, - "step": 15993 - }, - { - "epoch": 1.5067002661265632, - "grad_norm": 0.6765510439872742, - "learning_rate": 2.966133471351282e-06, - "loss": 0.2279, - "step": 15994 - }, - { - "epoch": 1.506794470219731, - "grad_norm": 0.879775881767273, - "learning_rate": 2.9650602194482448e-06, - "loss": 0.1813, - "step": 15995 - }, - { - "epoch": 1.5068886743128989, - "grad_norm": 0.6820473074913025, - "learning_rate": 2.9639871279527133e-06, - "loss": 0.1879, - "step": 15996 - }, - { - "epoch": 1.5069828784060668, - "grad_norm": 0.5898948907852173, - "learning_rate": 2.9629141968891604e-06, - "loss": 0.1889, - "step": 15997 - }, - { - "epoch": 1.5070770824992346, - "grad_norm": 0.6425492167472839, - "learning_rate": 2.9618414262820436e-06, - "loss": 0.2008, - "step": 15998 - }, - { - "epoch": 1.5071712865924023, - "grad_norm": 0.7598943710327148, - "learning_rate": 2.9607688161558266e-06, - "loss": 0.2493, - "step": 15999 - }, - { - "epoch": 1.5072654906855703, - "grad_norm": 0.6973783373832703, - "learning_rate": 2.959696366534971e-06, - "loss": 0.2033, - "step": 16000 - }, - { - "epoch": 1.5073596947787382, - "grad_norm": 0.6856785416603088, - "learning_rate": 2.9586240774439223e-06, - "loss": 0.1933, - "step": 16001 - }, - { - "epoch": 1.507453898871906, - "grad_norm": 0.6569346189498901, - "learning_rate": 2.957551948907138e-06, - "loss": 0.2198, - "step": 16002 - }, - { - "epoch": 1.5075481029650737, - "grad_norm": 0.6172292232513428, - "learning_rate": 2.9564799809490574e-06, - "loss": 0.2073, - "step": 16003 - }, - { - "epoch": 1.5076423070582416, - "grad_norm": 0.6552324295043945, - "learning_rate": 2.9554081735941263e-06, - "loss": 0.2161, - "step": 16004 - }, - { - "epoch": 1.5077365111514096, - "grad_norm": 0.6306608319282532, - "learning_rate": 2.9543365268667866e-06, - "loss": 0.2145, - "step": 16005 - }, - { - "epoch": 1.5078307152445773, - "grad_norm": 0.6723946928977966, - "learning_rate": 2.9532650407914676e-06, - "loss": 0.1839, - "step": 16006 - }, - { - "epoch": 1.507924919337745, - "grad_norm": 0.7015173435211182, - "learning_rate": 2.9521937153926028e-06, - "loss": 0.1978, - "step": 16007 - }, - { - "epoch": 1.508019123430913, - "grad_norm": 0.5856063365936279, - "learning_rate": 2.951122550694625e-06, - "loss": 0.1766, - "step": 16008 - }, - { - "epoch": 1.508113327524081, - "grad_norm": 0.6634768843650818, - "learning_rate": 2.9500515467219505e-06, - "loss": 0.2005, - "step": 16009 - }, - { - "epoch": 1.5082075316172487, - "grad_norm": 0.6804721355438232, - "learning_rate": 2.9489807034990037e-06, - "loss": 0.2082, - "step": 16010 - }, - { - "epoch": 1.5083017357104165, - "grad_norm": 0.7250627279281616, - "learning_rate": 2.9479100210502045e-06, - "loss": 0.1839, - "step": 16011 - }, - { - "epoch": 1.5083959398035844, - "grad_norm": 0.6678200364112854, - "learning_rate": 2.946839499399964e-06, - "loss": 0.1995, - "step": 16012 - }, - { - "epoch": 1.5084901438967524, - "grad_norm": 0.6348961591720581, - "learning_rate": 2.945769138572684e-06, - "loss": 0.1996, - "step": 16013 - }, - { - "epoch": 1.5085843479899201, - "grad_norm": 0.6270191073417664, - "learning_rate": 2.944698938592784e-06, - "loss": 0.1872, - "step": 16014 - }, - { - "epoch": 1.5086785520830879, - "grad_norm": 0.6524645090103149, - "learning_rate": 2.9436288994846583e-06, - "loss": 0.2006, - "step": 16015 - }, - { - "epoch": 1.5087727561762558, - "grad_norm": 0.6725365519523621, - "learning_rate": 2.9425590212727005e-06, - "loss": 0.2169, - "step": 16016 - }, - { - "epoch": 1.5088669602694238, - "grad_norm": 0.723200261592865, - "learning_rate": 2.9414893039813186e-06, - "loss": 0.1964, - "step": 16017 - }, - { - "epoch": 1.5089611643625915, - "grad_norm": 0.7292844653129578, - "learning_rate": 2.940419747634896e-06, - "loss": 0.2041, - "step": 16018 - }, - { - "epoch": 1.5090553684557593, - "grad_norm": 0.6949648857116699, - "learning_rate": 2.9393503522578183e-06, - "loss": 0.1989, - "step": 16019 - }, - { - "epoch": 1.5091495725489272, - "grad_norm": 0.6308997273445129, - "learning_rate": 2.938281117874472e-06, - "loss": 0.1818, - "step": 16020 - }, - { - "epoch": 1.5092437766420952, - "grad_norm": 0.7115275859832764, - "learning_rate": 2.937212044509241e-06, - "loss": 0.2033, - "step": 16021 - }, - { - "epoch": 1.509337980735263, - "grad_norm": 0.7062810063362122, - "learning_rate": 2.936143132186494e-06, - "loss": 0.1959, - "step": 16022 - }, - { - "epoch": 1.5094321848284307, - "grad_norm": 0.6680557727813721, - "learning_rate": 2.935074380930609e-06, - "loss": 0.1935, - "step": 16023 - }, - { - "epoch": 1.5095263889215986, - "grad_norm": 0.6889482736587524, - "learning_rate": 2.934005790765957e-06, - "loss": 0.1769, - "step": 16024 - }, - { - "epoch": 1.5096205930147666, - "grad_norm": 0.7399773597717285, - "learning_rate": 2.9329373617168976e-06, - "loss": 0.1852, - "step": 16025 - }, - { - "epoch": 1.5097147971079343, - "grad_norm": 0.6657445430755615, - "learning_rate": 2.9318690938077965e-06, - "loss": 0.2268, - "step": 16026 - }, - { - "epoch": 1.509809001201102, - "grad_norm": 0.6584805250167847, - "learning_rate": 2.9308009870630127e-06, - "loss": 0.2093, - "step": 16027 - }, - { - "epoch": 1.50990320529427, - "grad_norm": 0.6092638969421387, - "learning_rate": 2.929733041506897e-06, - "loss": 0.1733, - "step": 16028 - }, - { - "epoch": 1.509997409387438, - "grad_norm": 0.6681575775146484, - "learning_rate": 2.928665257163803e-06, - "loss": 0.1978, - "step": 16029 - }, - { - "epoch": 1.5100916134806057, - "grad_norm": 0.6568483114242554, - "learning_rate": 2.927597634058079e-06, - "loss": 0.2213, - "step": 16030 - }, - { - "epoch": 1.5101858175737735, - "grad_norm": 0.6793726682662964, - "learning_rate": 2.926530172214064e-06, - "loss": 0.2109, - "step": 16031 - }, - { - "epoch": 1.5102800216669414, - "grad_norm": 0.642000675201416, - "learning_rate": 2.925462871656104e-06, - "loss": 0.1953, - "step": 16032 - }, - { - "epoch": 1.5103742257601094, - "grad_norm": 0.6226553320884705, - "learning_rate": 2.9243957324085283e-06, - "loss": 0.1911, - "step": 16033 - }, - { - "epoch": 1.5104684298532771, - "grad_norm": 0.7639485001564026, - "learning_rate": 2.9233287544956747e-06, - "loss": 0.2093, - "step": 16034 - }, - { - "epoch": 1.5105626339464449, - "grad_norm": 0.6693103909492493, - "learning_rate": 2.922261937941867e-06, - "loss": 0.1792, - "step": 16035 - }, - { - "epoch": 1.5106568380396128, - "grad_norm": 0.5888148546218872, - "learning_rate": 2.921195282771433e-06, - "loss": 0.1778, - "step": 16036 - }, - { - "epoch": 1.5107510421327808, - "grad_norm": 0.6757652759552002, - "learning_rate": 2.920128789008698e-06, - "loss": 0.2134, - "step": 16037 - }, - { - "epoch": 1.5108452462259485, - "grad_norm": 0.7014308571815491, - "learning_rate": 2.9190624566779723e-06, - "loss": 0.2189, - "step": 16038 - }, - { - "epoch": 1.5109394503191163, - "grad_norm": 0.611684262752533, - "learning_rate": 2.9179962858035736e-06, - "loss": 0.1819, - "step": 16039 - }, - { - "epoch": 1.5110336544122842, - "grad_norm": 0.6903232932090759, - "learning_rate": 2.916930276409815e-06, - "loss": 0.2056, - "step": 16040 - }, - { - "epoch": 1.5111278585054522, - "grad_norm": 0.6848517060279846, - "learning_rate": 2.915864428520997e-06, - "loss": 0.1919, - "step": 16041 - }, - { - "epoch": 1.51122206259862, - "grad_norm": 0.6487221717834473, - "learning_rate": 2.9147987421614287e-06, - "loss": 0.1893, - "step": 16042 - }, - { - "epoch": 1.5113162666917876, - "grad_norm": 0.6601995229721069, - "learning_rate": 2.9137332173554043e-06, - "loss": 0.2007, - "step": 16043 - }, - { - "epoch": 1.5114104707849556, - "grad_norm": 0.7070102691650391, - "learning_rate": 2.912667854127221e-06, - "loss": 0.1818, - "step": 16044 - }, - { - "epoch": 1.5115046748781236, - "grad_norm": 0.6315514445304871, - "learning_rate": 2.9116026525011755e-06, - "loss": 0.1901, - "step": 16045 - }, - { - "epoch": 1.5115988789712913, - "grad_norm": 0.654120147228241, - "learning_rate": 2.9105376125015485e-06, - "loss": 0.1996, - "step": 16046 - }, - { - "epoch": 1.511693083064459, - "grad_norm": 0.7211007475852966, - "learning_rate": 2.9094727341526275e-06, - "loss": 0.1908, - "step": 16047 - }, - { - "epoch": 1.511787287157627, - "grad_norm": 0.6275424957275391, - "learning_rate": 2.9084080174786966e-06, - "loss": 0.1736, - "step": 16048 - }, - { - "epoch": 1.511881491250795, - "grad_norm": 0.5740016102790833, - "learning_rate": 2.9073434625040274e-06, - "loss": 0.1942, - "step": 16049 - }, - { - "epoch": 1.5119756953439627, - "grad_norm": 0.6556302309036255, - "learning_rate": 2.9062790692528963e-06, - "loss": 0.1856, - "step": 16050 - }, - { - "epoch": 1.5120698994371304, - "grad_norm": 0.6546376943588257, - "learning_rate": 2.9052148377495772e-06, - "loss": 0.1885, - "step": 16051 - }, - { - "epoch": 1.5121641035302984, - "grad_norm": 0.6720876097679138, - "learning_rate": 2.9041507680183313e-06, - "loss": 0.203, - "step": 16052 - }, - { - "epoch": 1.5122583076234664, - "grad_norm": 0.5759272575378418, - "learning_rate": 2.903086860083415e-06, - "loss": 0.1697, - "step": 16053 - }, - { - "epoch": 1.512352511716634, - "grad_norm": 0.708020031452179, - "learning_rate": 2.902023113969101e-06, - "loss": 0.1912, - "step": 16054 - }, - { - "epoch": 1.5124467158098018, - "grad_norm": 0.6964131593704224, - "learning_rate": 2.9009595296996372e-06, - "loss": 0.1885, - "step": 16055 - }, - { - "epoch": 1.5125409199029698, - "grad_norm": 0.6443415284156799, - "learning_rate": 2.899896107299268e-06, - "loss": 0.2038, - "step": 16056 - }, - { - "epoch": 1.5126351239961378, - "grad_norm": 0.8100490570068359, - "learning_rate": 2.8988328467922554e-06, - "loss": 0.2152, - "step": 16057 - }, - { - "epoch": 1.5127293280893055, - "grad_norm": 0.6466900706291199, - "learning_rate": 2.8977697482028356e-06, - "loss": 0.1781, - "step": 16058 - }, - { - "epoch": 1.5128235321824732, - "grad_norm": 0.6879572868347168, - "learning_rate": 2.8967068115552453e-06, - "loss": 0.1898, - "step": 16059 - }, - { - "epoch": 1.5129177362756412, - "grad_norm": 0.5906853675842285, - "learning_rate": 2.895644036873726e-06, - "loss": 0.201, - "step": 16060 - }, - { - "epoch": 1.5130119403688091, - "grad_norm": 0.6801936626434326, - "learning_rate": 2.8945814241825133e-06, - "loss": 0.1951, - "step": 16061 - }, - { - "epoch": 1.5131061444619769, - "grad_norm": 0.6945424675941467, - "learning_rate": 2.893518973505829e-06, - "loss": 0.2164, - "step": 16062 - }, - { - "epoch": 1.5132003485551446, - "grad_norm": 0.6183425784111023, - "learning_rate": 2.8924566848679024e-06, - "loss": 0.1894, - "step": 16063 - }, - { - "epoch": 1.5132945526483126, - "grad_norm": 0.6572199463844299, - "learning_rate": 2.8913945582929594e-06, - "loss": 0.1897, - "step": 16064 - }, - { - "epoch": 1.5133887567414805, - "grad_norm": 0.6754615902900696, - "learning_rate": 2.8903325938052108e-06, - "loss": 0.1929, - "step": 16065 - }, - { - "epoch": 1.5134829608346483, - "grad_norm": 0.7403187155723572, - "learning_rate": 2.8892707914288744e-06, - "loss": 0.2096, - "step": 16066 - }, - { - "epoch": 1.513577164927816, - "grad_norm": 0.6388410925865173, - "learning_rate": 2.888209151188163e-06, - "loss": 0.1893, - "step": 16067 - }, - { - "epoch": 1.513671369020984, - "grad_norm": 0.6676547527313232, - "learning_rate": 2.887147673107279e-06, - "loss": 0.1863, - "step": 16068 - }, - { - "epoch": 1.513765573114152, - "grad_norm": 0.6466985940933228, - "learning_rate": 2.886086357210429e-06, - "loss": 0.2171, - "step": 16069 - }, - { - "epoch": 1.5138597772073197, - "grad_norm": 0.7194705605506897, - "learning_rate": 2.885025203521814e-06, - "loss": 0.2174, - "step": 16070 - }, - { - "epoch": 1.5139539813004874, - "grad_norm": 0.6867430806159973, - "learning_rate": 2.883964212065625e-06, - "loss": 0.2138, - "step": 16071 - }, - { - "epoch": 1.5140481853936554, - "grad_norm": 0.6516374945640564, - "learning_rate": 2.8829033828660613e-06, - "loss": 0.2173, - "step": 16072 - }, - { - "epoch": 1.5141423894868233, - "grad_norm": 0.6495267152786255, - "learning_rate": 2.8818427159473027e-06, - "loss": 0.1956, - "step": 16073 - }, - { - "epoch": 1.514236593579991, - "grad_norm": 0.661798357963562, - "learning_rate": 2.8807822113335425e-06, - "loss": 0.2111, - "step": 16074 - }, - { - "epoch": 1.5143307976731588, - "grad_norm": 0.5697353482246399, - "learning_rate": 2.879721869048955e-06, - "loss": 0.1834, - "step": 16075 - }, - { - "epoch": 1.5144250017663268, - "grad_norm": 0.7053003907203674, - "learning_rate": 2.87866168911772e-06, - "loss": 0.1947, - "step": 16076 - }, - { - "epoch": 1.5145192058594947, - "grad_norm": 0.7660698890686035, - "learning_rate": 2.8776016715640155e-06, - "loss": 0.2051, - "step": 16077 - }, - { - "epoch": 1.5146134099526625, - "grad_norm": 0.6596674919128418, - "learning_rate": 2.8765418164120053e-06, - "loss": 0.2068, - "step": 16078 - }, - { - "epoch": 1.5147076140458302, - "grad_norm": 0.6137715578079224, - "learning_rate": 2.8754821236858577e-06, - "loss": 0.1715, - "step": 16079 - }, - { - "epoch": 1.5148018181389982, - "grad_norm": 0.5945097208023071, - "learning_rate": 2.874422593409738e-06, - "loss": 0.2105, - "step": 16080 - }, - { - "epoch": 1.5148960222321661, - "grad_norm": 0.6514506936073303, - "learning_rate": 2.8733632256078014e-06, - "loss": 0.2196, - "step": 16081 - }, - { - "epoch": 1.5149902263253339, - "grad_norm": 0.6846261620521545, - "learning_rate": 2.8723040203042074e-06, - "loss": 0.2014, - "step": 16082 - }, - { - "epoch": 1.5150844304185016, - "grad_norm": 0.5711610913276672, - "learning_rate": 2.8712449775231023e-06, - "loss": 0.1568, - "step": 16083 - }, - { - "epoch": 1.5151786345116696, - "grad_norm": 0.616041362285614, - "learning_rate": 2.8701860972886366e-06, - "loss": 0.1678, - "step": 16084 - }, - { - "epoch": 1.5152728386048375, - "grad_norm": 0.6680805087089539, - "learning_rate": 2.8691273796249562e-06, - "loss": 0.2017, - "step": 16085 - }, - { - "epoch": 1.5153670426980053, - "grad_norm": 0.6135803461074829, - "learning_rate": 2.868068824556197e-06, - "loss": 0.1855, - "step": 16086 - }, - { - "epoch": 1.515461246791173, - "grad_norm": 0.6743056178092957, - "learning_rate": 2.8670104321064995e-06, - "loss": 0.2018, - "step": 16087 - }, - { - "epoch": 1.515555450884341, - "grad_norm": 0.6641936898231506, - "learning_rate": 2.8659522022999977e-06, - "loss": 0.1871, - "step": 16088 - }, - { - "epoch": 1.515649654977509, - "grad_norm": 0.7147706747055054, - "learning_rate": 2.864894135160815e-06, - "loss": 0.2159, - "step": 16089 - }, - { - "epoch": 1.5157438590706767, - "grad_norm": 0.6171817183494568, - "learning_rate": 2.863836230713082e-06, - "loss": 0.1734, - "step": 16090 - }, - { - "epoch": 1.5158380631638444, - "grad_norm": 0.6959594488143921, - "learning_rate": 2.862778488980922e-06, - "loss": 0.1897, - "step": 16091 - }, - { - "epoch": 1.5159322672570124, - "grad_norm": 0.6163338422775269, - "learning_rate": 2.86172090998845e-06, - "loss": 0.197, - "step": 16092 - }, - { - "epoch": 1.5160264713501803, - "grad_norm": 0.6707096099853516, - "learning_rate": 2.860663493759774e-06, - "loss": 0.2224, - "step": 16093 - }, - { - "epoch": 1.516120675443348, - "grad_norm": 0.6781424880027771, - "learning_rate": 2.8596062403190196e-06, - "loss": 0.1859, - "step": 16094 - }, - { - "epoch": 1.5162148795365158, - "grad_norm": 0.5829667448997498, - "learning_rate": 2.858549149690284e-06, - "loss": 0.1801, - "step": 16095 - }, - { - "epoch": 1.5163090836296838, - "grad_norm": 0.6407762765884399, - "learning_rate": 2.8574922218976663e-06, - "loss": 0.1853, - "step": 16096 - }, - { - "epoch": 1.5164032877228517, - "grad_norm": 0.7396100163459778, - "learning_rate": 2.8564354569652785e-06, - "loss": 0.1907, - "step": 16097 - }, - { - "epoch": 1.5164974918160194, - "grad_norm": 0.6020578145980835, - "learning_rate": 2.85537885491721e-06, - "loss": 0.1865, - "step": 16098 - }, - { - "epoch": 1.5165916959091872, - "grad_norm": 0.6818649172782898, - "learning_rate": 2.8543224157775504e-06, - "loss": 0.1765, - "step": 16099 - }, - { - "epoch": 1.5166859000023551, - "grad_norm": 0.6281371116638184, - "learning_rate": 2.853266139570391e-06, - "loss": 0.1806, - "step": 16100 - }, - { - "epoch": 1.516780104095523, - "grad_norm": 0.6728824377059937, - "learning_rate": 2.852210026319818e-06, - "loss": 0.2119, - "step": 16101 - }, - { - "epoch": 1.5168743081886908, - "grad_norm": 0.6425108909606934, - "learning_rate": 2.851154076049909e-06, - "loss": 0.1929, - "step": 16102 - }, - { - "epoch": 1.5169685122818586, - "grad_norm": 0.6145150065422058, - "learning_rate": 2.850098288784742e-06, - "loss": 0.1863, - "step": 16103 - }, - { - "epoch": 1.5170627163750265, - "grad_norm": 0.661647617816925, - "learning_rate": 2.849042664548395e-06, - "loss": 0.2092, - "step": 16104 - }, - { - "epoch": 1.5171569204681945, - "grad_norm": 0.7248967885971069, - "learning_rate": 2.84798720336493e-06, - "loss": 0.1726, - "step": 16105 - }, - { - "epoch": 1.5172511245613622, - "grad_norm": 0.6972887516021729, - "learning_rate": 2.84693190525842e-06, - "loss": 0.2203, - "step": 16106 - }, - { - "epoch": 1.51734532865453, - "grad_norm": 0.6805843710899353, - "learning_rate": 2.8458767702529265e-06, - "loss": 0.1848, - "step": 16107 - }, - { - "epoch": 1.517439532747698, - "grad_norm": 0.6261547803878784, - "learning_rate": 2.8448217983725034e-06, - "loss": 0.2028, - "step": 16108 - }, - { - "epoch": 1.517533736840866, - "grad_norm": 0.6367445588111877, - "learning_rate": 2.84376698964121e-06, - "loss": 0.1844, - "step": 16109 - }, - { - "epoch": 1.5176279409340336, - "grad_norm": 0.6158941984176636, - "learning_rate": 2.8427123440830997e-06, - "loss": 0.2046, - "step": 16110 - }, - { - "epoch": 1.5177221450272014, - "grad_norm": 0.6778331398963928, - "learning_rate": 2.8416578617222156e-06, - "loss": 0.2139, - "step": 16111 - }, - { - "epoch": 1.5178163491203693, - "grad_norm": 0.6457752585411072, - "learning_rate": 2.8406035425826006e-06, - "loss": 0.1706, - "step": 16112 - }, - { - "epoch": 1.5179105532135373, - "grad_norm": 0.7141786217689514, - "learning_rate": 2.839549386688297e-06, - "loss": 0.2022, - "step": 16113 - }, - { - "epoch": 1.5180047573067048, - "grad_norm": 0.966853678226471, - "learning_rate": 2.838495394063344e-06, - "loss": 0.1911, - "step": 16114 - }, - { - "epoch": 1.5180989613998728, - "grad_norm": 0.6230742931365967, - "learning_rate": 2.837441564731769e-06, - "loss": 0.1786, - "step": 16115 - }, - { - "epoch": 1.5181931654930407, - "grad_norm": 0.6462972164154053, - "learning_rate": 2.836387898717603e-06, - "loss": 0.1913, - "step": 16116 - }, - { - "epoch": 1.5182873695862085, - "grad_norm": 0.745755672454834, - "learning_rate": 2.8353343960448754e-06, - "loss": 0.1984, - "step": 16117 - }, - { - "epoch": 1.5183815736793762, - "grad_norm": 0.6860038042068481, - "learning_rate": 2.8342810567376e-06, - "loss": 0.2104, - "step": 16118 - }, - { - "epoch": 1.5184757777725442, - "grad_norm": 0.7083778977394104, - "learning_rate": 2.833227880819799e-06, - "loss": 0.214, - "step": 16119 - }, - { - "epoch": 1.5185699818657121, - "grad_norm": 0.6527898907661438, - "learning_rate": 2.8321748683154893e-06, - "loss": 0.1839, - "step": 16120 - }, - { - "epoch": 1.5186641859588799, - "grad_norm": 0.6631081104278564, - "learning_rate": 2.8311220192486743e-06, - "loss": 0.2002, - "step": 16121 - }, - { - "epoch": 1.5187583900520476, - "grad_norm": 0.6198391318321228, - "learning_rate": 2.830069333643367e-06, - "loss": 0.203, - "step": 16122 - }, - { - "epoch": 1.5188525941452156, - "grad_norm": 0.709550678730011, - "learning_rate": 2.829016811523565e-06, - "loss": 0.1872, - "step": 16123 - }, - { - "epoch": 1.5189467982383835, - "grad_norm": 0.6676455736160278, - "learning_rate": 2.827964452913269e-06, - "loss": 0.2141, - "step": 16124 - }, - { - "epoch": 1.5190410023315513, - "grad_norm": 0.6386942863464355, - "learning_rate": 2.8269122578364792e-06, - "loss": 0.1799, - "step": 16125 - }, - { - "epoch": 1.519135206424719, - "grad_norm": 0.6231666207313538, - "learning_rate": 2.82586022631718e-06, - "loss": 0.2025, - "step": 16126 - }, - { - "epoch": 1.519229410517887, - "grad_norm": 0.6799577474594116, - "learning_rate": 2.8248083583793616e-06, - "loss": 0.2179, - "step": 16127 - }, - { - "epoch": 1.519323614611055, - "grad_norm": 0.6247674226760864, - "learning_rate": 2.823756654047014e-06, - "loss": 0.1725, - "step": 16128 - }, - { - "epoch": 1.5194178187042227, - "grad_norm": 0.6503288149833679, - "learning_rate": 2.8227051133441087e-06, - "loss": 0.2059, - "step": 16129 - }, - { - "epoch": 1.5195120227973904, - "grad_norm": 0.6307724118232727, - "learning_rate": 2.821653736294627e-06, - "loss": 0.2074, - "step": 16130 - }, - { - "epoch": 1.5196062268905584, - "grad_norm": 0.7084416747093201, - "learning_rate": 2.8206025229225453e-06, - "loss": 0.2136, - "step": 16131 - }, - { - "epoch": 1.5197004309837263, - "grad_norm": 0.6676310300827026, - "learning_rate": 2.819551473251828e-06, - "loss": 0.194, - "step": 16132 - }, - { - "epoch": 1.519794635076894, - "grad_norm": 0.6236906051635742, - "learning_rate": 2.8185005873064365e-06, - "loss": 0.1877, - "step": 16133 - }, - { - "epoch": 1.5198888391700618, - "grad_norm": 0.6586359143257141, - "learning_rate": 2.8174498651103445e-06, - "loss": 0.1878, - "step": 16134 - }, - { - "epoch": 1.5199830432632297, - "grad_norm": 0.6925024390220642, - "learning_rate": 2.816399306687503e-06, - "loss": 0.2178, - "step": 16135 - }, - { - "epoch": 1.5200772473563977, - "grad_norm": 0.7016938924789429, - "learning_rate": 2.8153489120618647e-06, - "loss": 0.1892, - "step": 16136 - }, - { - "epoch": 1.5201714514495654, - "grad_norm": 0.5964245200157166, - "learning_rate": 2.814298681257381e-06, - "loss": 0.1835, - "step": 16137 - }, - { - "epoch": 1.5202656555427332, - "grad_norm": 0.6487076282501221, - "learning_rate": 2.8132486142980052e-06, - "loss": 0.2053, - "step": 16138 - }, - { - "epoch": 1.5203598596359011, - "grad_norm": 0.6946191787719727, - "learning_rate": 2.812198711207671e-06, - "loss": 0.2294, - "step": 16139 - }, - { - "epoch": 1.520454063729069, - "grad_norm": 0.634026825428009, - "learning_rate": 2.8111489720103235e-06, - "loss": 0.1711, - "step": 16140 - }, - { - "epoch": 1.5205482678222368, - "grad_norm": 0.634139895439148, - "learning_rate": 2.8100993967298996e-06, - "loss": 0.1804, - "step": 16141 - }, - { - "epoch": 1.5206424719154046, - "grad_norm": 0.6851383447647095, - "learning_rate": 2.809049985390325e-06, - "loss": 0.1967, - "step": 16142 - }, - { - "epoch": 1.5207366760085725, - "grad_norm": 0.6436731219291687, - "learning_rate": 2.808000738015533e-06, - "loss": 0.1967, - "step": 16143 - }, - { - "epoch": 1.5208308801017405, - "grad_norm": 0.6751908659934998, - "learning_rate": 2.8069516546294494e-06, - "loss": 0.1777, - "step": 16144 - }, - { - "epoch": 1.5209250841949082, - "grad_norm": 0.6696066856384277, - "learning_rate": 2.80590273525599e-06, - "loss": 0.1933, - "step": 16145 - }, - { - "epoch": 1.521019288288076, - "grad_norm": 0.6453372836112976, - "learning_rate": 2.804853979919073e-06, - "loss": 0.1992, - "step": 16146 - }, - { - "epoch": 1.521113492381244, - "grad_norm": 0.6461153626441956, - "learning_rate": 2.8038053886426166e-06, - "loss": 0.1926, - "step": 16147 - }, - { - "epoch": 1.521207696474412, - "grad_norm": 0.6716781854629517, - "learning_rate": 2.802756961450522e-06, - "loss": 0.2198, - "step": 16148 - }, - { - "epoch": 1.5213019005675796, - "grad_norm": 0.5820134878158569, - "learning_rate": 2.8017086983667007e-06, - "loss": 0.1837, - "step": 16149 - }, - { - "epoch": 1.5213961046607474, - "grad_norm": 0.6865665912628174, - "learning_rate": 2.800660599415057e-06, - "loss": 0.2445, - "step": 16150 - }, - { - "epoch": 1.5214903087539153, - "grad_norm": 0.6357466578483582, - "learning_rate": 2.7996126646194844e-06, - "loss": 0.1814, - "step": 16151 - }, - { - "epoch": 1.5215845128470833, - "grad_norm": 0.848849892616272, - "learning_rate": 2.7985648940038766e-06, - "loss": 0.1885, - "step": 16152 - }, - { - "epoch": 1.521678716940251, - "grad_norm": 0.6532415151596069, - "learning_rate": 2.797517287592125e-06, - "loss": 0.1881, - "step": 16153 - }, - { - "epoch": 1.5217729210334188, - "grad_norm": 0.6398544311523438, - "learning_rate": 2.796469845408123e-06, - "loss": 0.1962, - "step": 16154 - }, - { - "epoch": 1.5218671251265867, - "grad_norm": 0.7329211831092834, - "learning_rate": 2.795422567475745e-06, - "loss": 0.2183, - "step": 16155 - }, - { - "epoch": 1.5219613292197547, - "grad_norm": 0.6775096654891968, - "learning_rate": 2.794375453818875e-06, - "loss": 0.1954, - "step": 16156 - }, - { - "epoch": 1.5220555333129224, - "grad_norm": 0.6235827803611755, - "learning_rate": 2.793328504461391e-06, - "loss": 0.1837, - "step": 16157 - }, - { - "epoch": 1.5221497374060902, - "grad_norm": 0.7192054986953735, - "learning_rate": 2.792281719427159e-06, - "loss": 0.1807, - "step": 16158 - }, - { - "epoch": 1.5222439414992581, - "grad_norm": 0.6491178870201111, - "learning_rate": 2.7912350987400515e-06, - "loss": 0.2009, - "step": 16159 - }, - { - "epoch": 1.522338145592426, - "grad_norm": 0.6319215893745422, - "learning_rate": 2.7901886424239346e-06, - "loss": 0.2008, - "step": 16160 - }, - { - "epoch": 1.5224323496855938, - "grad_norm": 0.5874225497245789, - "learning_rate": 2.7891423505026647e-06, - "loss": 0.1818, - "step": 16161 - }, - { - "epoch": 1.5225265537787616, - "grad_norm": 0.5624944567680359, - "learning_rate": 2.788096223000103e-06, - "loss": 0.1818, - "step": 16162 - }, - { - "epoch": 1.5226207578719295, - "grad_norm": 0.5381230711936951, - "learning_rate": 2.787050259940098e-06, - "loss": 0.1797, - "step": 16163 - }, - { - "epoch": 1.5227149619650975, - "grad_norm": 0.6449101567268372, - "learning_rate": 2.786004461346503e-06, - "loss": 0.1911, - "step": 16164 - }, - { - "epoch": 1.5228091660582652, - "grad_norm": 0.7586252689361572, - "learning_rate": 2.784958827243166e-06, - "loss": 0.2076, - "step": 16165 - }, - { - "epoch": 1.522903370151433, - "grad_norm": 0.7198145985603333, - "learning_rate": 2.7839133576539224e-06, - "loss": 0.185, - "step": 16166 - }, - { - "epoch": 1.522997574244601, - "grad_norm": 0.6372790336608887, - "learning_rate": 2.782868052602614e-06, - "loss": 0.1921, - "step": 16167 - }, - { - "epoch": 1.5230917783377689, - "grad_norm": 0.7216392755508423, - "learning_rate": 2.781822912113079e-06, - "loss": 0.1935, - "step": 16168 - }, - { - "epoch": 1.5231859824309366, - "grad_norm": 0.6526051163673401, - "learning_rate": 2.7807779362091415e-06, - "loss": 0.18, - "step": 16169 - }, - { - "epoch": 1.5232801865241044, - "grad_norm": 0.7025391459465027, - "learning_rate": 2.779733124914631e-06, - "loss": 0.2075, - "step": 16170 - }, - { - "epoch": 1.5233743906172723, - "grad_norm": 0.6882464289665222, - "learning_rate": 2.7786884782533765e-06, - "loss": 0.2282, - "step": 16171 - }, - { - "epoch": 1.5234685947104403, - "grad_norm": 0.7818648219108582, - "learning_rate": 2.777643996249191e-06, - "loss": 0.2138, - "step": 16172 - }, - { - "epoch": 1.523562798803608, - "grad_norm": 0.642633855342865, - "learning_rate": 2.7765996789258863e-06, - "loss": 0.1938, - "step": 16173 - }, - { - "epoch": 1.5236570028967757, - "grad_norm": 0.6424699425697327, - "learning_rate": 2.7755555263072866e-06, - "loss": 0.1951, - "step": 16174 - }, - { - "epoch": 1.5237512069899437, - "grad_norm": 0.6402602791786194, - "learning_rate": 2.774511538417193e-06, - "loss": 0.2002, - "step": 16175 - }, - { - "epoch": 1.5238454110831117, - "grad_norm": 0.6372763514518738, - "learning_rate": 2.7734677152794087e-06, - "loss": 0.1884, - "step": 16176 - }, - { - "epoch": 1.5239396151762794, - "grad_norm": 0.7095897197723389, - "learning_rate": 2.772424056917735e-06, - "loss": 0.1976, - "step": 16177 - }, - { - "epoch": 1.5240338192694471, - "grad_norm": 0.6663805246353149, - "learning_rate": 2.7713805633559755e-06, - "loss": 0.2092, - "step": 16178 - }, - { - "epoch": 1.524128023362615, - "grad_norm": 0.6282491087913513, - "learning_rate": 2.7703372346179145e-06, - "loss": 0.1948, - "step": 16179 - }, - { - "epoch": 1.524222227455783, - "grad_norm": 0.5726357102394104, - "learning_rate": 2.7692940707273453e-06, - "loss": 0.1569, - "step": 16180 - }, - { - "epoch": 1.5243164315489508, - "grad_norm": 0.5925726294517517, - "learning_rate": 2.7682510717080568e-06, - "loss": 0.192, - "step": 16181 - }, - { - "epoch": 1.5244106356421185, - "grad_norm": 0.5967519283294678, - "learning_rate": 2.767208237583825e-06, - "loss": 0.1785, - "step": 16182 - }, - { - "epoch": 1.5245048397352865, - "grad_norm": 0.7821174263954163, - "learning_rate": 2.7661655683784305e-06, - "loss": 0.2038, - "step": 16183 - }, - { - "epoch": 1.5245990438284545, - "grad_norm": 0.6633504033088684, - "learning_rate": 2.7651230641156524e-06, - "loss": 0.2039, - "step": 16184 - }, - { - "epoch": 1.5246932479216222, - "grad_norm": 0.6780624985694885, - "learning_rate": 2.7640807248192535e-06, - "loss": 0.2173, - "step": 16185 - }, - { - "epoch": 1.52478745201479, - "grad_norm": 0.6351194381713867, - "learning_rate": 2.7630385505130054e-06, - "loss": 0.1945, - "step": 16186 - }, - { - "epoch": 1.524881656107958, - "grad_norm": 0.6171590685844421, - "learning_rate": 2.761996541220674e-06, - "loss": 0.1956, - "step": 16187 - }, - { - "epoch": 1.5249758602011259, - "grad_norm": 1.4074138402938843, - "learning_rate": 2.7609546969660117e-06, - "loss": 0.166, - "step": 16188 - }, - { - "epoch": 1.5250700642942936, - "grad_norm": 0.6705545783042908, - "learning_rate": 2.7599130177727775e-06, - "loss": 0.1659, - "step": 16189 - }, - { - "epoch": 1.5251642683874613, - "grad_norm": 0.7179521322250366, - "learning_rate": 2.758871503664726e-06, - "loss": 0.2459, - "step": 16190 - }, - { - "epoch": 1.5252584724806293, - "grad_norm": 0.9105493426322937, - "learning_rate": 2.757830154665604e-06, - "loss": 0.2044, - "step": 16191 - }, - { - "epoch": 1.5253526765737973, - "grad_norm": 1.5305920839309692, - "learning_rate": 2.75678897079915e-06, - "loss": 0.1955, - "step": 16192 - }, - { - "epoch": 1.525446880666965, - "grad_norm": 0.601993203163147, - "learning_rate": 2.7557479520891104e-06, - "loss": 0.1768, - "step": 16193 - }, - { - "epoch": 1.5255410847601327, - "grad_norm": 0.7127960920333862, - "learning_rate": 2.754707098559225e-06, - "loss": 0.1897, - "step": 16194 - }, - { - "epoch": 1.5256352888533007, - "grad_norm": 0.6438483595848083, - "learning_rate": 2.7536664102332177e-06, - "loss": 0.1962, - "step": 16195 - }, - { - "epoch": 1.5257294929464686, - "grad_norm": 0.7449595928192139, - "learning_rate": 2.7526258871348245e-06, - "loss": 0.2078, - "step": 16196 - }, - { - "epoch": 1.5258236970396364, - "grad_norm": 0.6828126907348633, - "learning_rate": 2.7515855292877714e-06, - "loss": 0.1838, - "step": 16197 - }, - { - "epoch": 1.5259179011328041, - "grad_norm": 0.6403012871742249, - "learning_rate": 2.750545336715776e-06, - "loss": 0.2085, - "step": 16198 - }, - { - "epoch": 1.526012105225972, - "grad_norm": 0.6840063333511353, - "learning_rate": 2.7495053094425584e-06, - "loss": 0.2087, - "step": 16199 - }, - { - "epoch": 1.52610630931914, - "grad_norm": 0.6032674312591553, - "learning_rate": 2.748465447491835e-06, - "loss": 0.197, - "step": 16200 - }, - { - "epoch": 1.5262005134123078, - "grad_norm": 0.6746863126754761, - "learning_rate": 2.7474257508873117e-06, - "loss": 0.1897, - "step": 16201 - }, - { - "epoch": 1.5262947175054755, - "grad_norm": 0.6539238691329956, - "learning_rate": 2.7463862196527e-06, - "loss": 0.1987, - "step": 16202 - }, - { - "epoch": 1.5263889215986435, - "grad_norm": 0.7085531949996948, - "learning_rate": 2.745346853811698e-06, - "loss": 0.2066, - "step": 16203 - }, - { - "epoch": 1.5264831256918114, - "grad_norm": 0.6681137084960938, - "learning_rate": 2.7443076533880074e-06, - "loss": 0.2126, - "step": 16204 - }, - { - "epoch": 1.5265773297849792, - "grad_norm": 0.5994083285331726, - "learning_rate": 2.743268618405326e-06, - "loss": 0.1887, - "step": 16205 - }, - { - "epoch": 1.526671533878147, - "grad_norm": 0.653131365776062, - "learning_rate": 2.7422297488873395e-06, - "loss": 0.1942, - "step": 16206 - }, - { - "epoch": 1.5267657379713149, - "grad_norm": 0.6739969849586487, - "learning_rate": 2.7411910448577405e-06, - "loss": 0.1897, - "step": 16207 - }, - { - "epoch": 1.5268599420644828, - "grad_norm": 0.6109043955802917, - "learning_rate": 2.7401525063402137e-06, - "loss": 0.1867, - "step": 16208 - }, - { - "epoch": 1.5269541461576506, - "grad_norm": 0.6434895396232605, - "learning_rate": 2.7391141333584335e-06, - "loss": 0.1783, - "step": 16209 - }, - { - "epoch": 1.5270483502508183, - "grad_norm": 0.6863971948623657, - "learning_rate": 2.738075925936081e-06, - "loss": 0.2047, - "step": 16210 - }, - { - "epoch": 1.5271425543439863, - "grad_norm": 0.6185998320579529, - "learning_rate": 2.7370378840968315e-06, - "loss": 0.2065, - "step": 16211 - }, - { - "epoch": 1.5272367584371542, - "grad_norm": 0.623668909072876, - "learning_rate": 2.7360000078643512e-06, - "loss": 0.1946, - "step": 16212 - }, - { - "epoch": 1.527330962530322, - "grad_norm": 0.6324273347854614, - "learning_rate": 2.734962297262297e-06, - "loss": 0.1886, - "step": 16213 - }, - { - "epoch": 1.5274251666234897, - "grad_norm": 0.6629226803779602, - "learning_rate": 2.733924752314345e-06, - "loss": 0.212, - "step": 16214 - }, - { - "epoch": 1.5275193707166577, - "grad_norm": 0.6107110381126404, - "learning_rate": 2.7328873730441465e-06, - "loss": 0.1907, - "step": 16215 - }, - { - "epoch": 1.5276135748098256, - "grad_norm": 0.7059251070022583, - "learning_rate": 2.731850159475351e-06, - "loss": 0.2054, - "step": 16216 - }, - { - "epoch": 1.5277077789029934, - "grad_norm": 0.6416538953781128, - "learning_rate": 2.7308131116316117e-06, - "loss": 0.1819, - "step": 16217 - }, - { - "epoch": 1.527801982996161, - "grad_norm": 0.6149570345878601, - "learning_rate": 2.7297762295365794e-06, - "loss": 0.1737, - "step": 16218 - }, - { - "epoch": 1.527896187089329, - "grad_norm": 0.632332444190979, - "learning_rate": 2.7287395132138893e-06, - "loss": 0.1843, - "step": 16219 - }, - { - "epoch": 1.527990391182497, - "grad_norm": 0.7795052528381348, - "learning_rate": 2.727702962687183e-06, - "loss": 0.2041, - "step": 16220 - }, - { - "epoch": 1.5280845952756648, - "grad_norm": 0.6708505749702454, - "learning_rate": 2.7266665779800996e-06, - "loss": 0.1861, - "step": 16221 - }, - { - "epoch": 1.5281787993688325, - "grad_norm": 0.6182888746261597, - "learning_rate": 2.7256303591162637e-06, - "loss": 0.2017, - "step": 16222 - }, - { - "epoch": 1.5282730034620005, - "grad_norm": 0.7270907163619995, - "learning_rate": 2.724594306119306e-06, - "loss": 0.203, - "step": 16223 - }, - { - "epoch": 1.5283672075551684, - "grad_norm": 0.6367576718330383, - "learning_rate": 2.7235584190128517e-06, - "loss": 0.1878, - "step": 16224 - }, - { - "epoch": 1.5284614116483362, - "grad_norm": 0.6641680002212524, - "learning_rate": 2.7225226978205164e-06, - "loss": 0.1818, - "step": 16225 - }, - { - "epoch": 1.528555615741504, - "grad_norm": 0.6329187750816345, - "learning_rate": 2.7214871425659182e-06, - "loss": 0.1803, - "step": 16226 - }, - { - "epoch": 1.5286498198346719, - "grad_norm": 0.6695205569267273, - "learning_rate": 2.7204517532726724e-06, - "loss": 0.1793, - "step": 16227 - }, - { - "epoch": 1.5287440239278398, - "grad_norm": 0.6727643609046936, - "learning_rate": 2.719416529964385e-06, - "loss": 0.1958, - "step": 16228 - }, - { - "epoch": 1.5288382280210076, - "grad_norm": 0.6859456300735474, - "learning_rate": 2.718381472664654e-06, - "loss": 0.219, - "step": 16229 - }, - { - "epoch": 1.5289324321141753, - "grad_norm": 0.6441728472709656, - "learning_rate": 2.7173465813970934e-06, - "loss": 0.1676, - "step": 16230 - }, - { - "epoch": 1.5290266362073432, - "grad_norm": 0.6026852130889893, - "learning_rate": 2.716311856185293e-06, - "loss": 0.1913, - "step": 16231 - }, - { - "epoch": 1.5291208403005112, - "grad_norm": 0.7040894627571106, - "learning_rate": 2.715277297052844e-06, - "loss": 0.2152, - "step": 16232 - }, - { - "epoch": 1.529215044393679, - "grad_norm": 0.6501896381378174, - "learning_rate": 2.7142429040233387e-06, - "loss": 0.2194, - "step": 16233 - }, - { - "epoch": 1.5293092484868467, - "grad_norm": 0.6888206005096436, - "learning_rate": 2.713208677120365e-06, - "loss": 0.1928, - "step": 16234 - }, - { - "epoch": 1.5294034525800146, - "grad_norm": 0.7510620951652527, - "learning_rate": 2.7121746163675e-06, - "loss": 0.2071, - "step": 16235 - }, - { - "epoch": 1.5294976566731826, - "grad_norm": 0.5633466839790344, - "learning_rate": 2.7111407217883255e-06, - "loss": 0.1927, - "step": 16236 - }, - { - "epoch": 1.5295918607663503, - "grad_norm": 0.6861047148704529, - "learning_rate": 2.7101069934064174e-06, - "loss": 0.188, - "step": 16237 - }, - { - "epoch": 1.529686064859518, - "grad_norm": 0.6694995164871216, - "learning_rate": 2.7090734312453404e-06, - "loss": 0.2055, - "step": 16238 - }, - { - "epoch": 1.529780268952686, - "grad_norm": 0.6832932829856873, - "learning_rate": 2.708040035328665e-06, - "loss": 0.1818, - "step": 16239 - }, - { - "epoch": 1.529874473045854, - "grad_norm": 0.675410270690918, - "learning_rate": 2.707006805679958e-06, - "loss": 0.2025, - "step": 16240 - }, - { - "epoch": 1.5299686771390217, - "grad_norm": 0.6247263550758362, - "learning_rate": 2.7059737423227706e-06, - "loss": 0.1856, - "step": 16241 - }, - { - "epoch": 1.5300628812321895, - "grad_norm": 0.6771447658538818, - "learning_rate": 2.7049408452806656e-06, - "loss": 0.1922, - "step": 16242 - }, - { - "epoch": 1.5301570853253574, - "grad_norm": 0.7252166867256165, - "learning_rate": 2.7039081145771882e-06, - "loss": 0.1966, - "step": 16243 - }, - { - "epoch": 1.5302512894185254, - "grad_norm": 0.6284207701683044, - "learning_rate": 2.7028755502358907e-06, - "loss": 0.1801, - "step": 16244 - }, - { - "epoch": 1.5303454935116931, - "grad_norm": 0.5970359444618225, - "learning_rate": 2.7018431522803166e-06, - "loss": 0.1593, - "step": 16245 - }, - { - "epoch": 1.5304396976048609, - "grad_norm": 0.6221842765808105, - "learning_rate": 2.700810920734004e-06, - "loss": 0.1908, - "step": 16246 - }, - { - "epoch": 1.5305339016980288, - "grad_norm": 0.6366804242134094, - "learning_rate": 2.6997788556204906e-06, - "loss": 0.2117, - "step": 16247 - }, - { - "epoch": 1.5306281057911968, - "grad_norm": 0.6622896194458008, - "learning_rate": 2.6987469569633117e-06, - "loss": 0.2163, - "step": 16248 - }, - { - "epoch": 1.5307223098843645, - "grad_norm": 0.6698190569877625, - "learning_rate": 2.6977152247859917e-06, - "loss": 0.2053, - "step": 16249 - }, - { - "epoch": 1.5308165139775323, - "grad_norm": 0.7152218222618103, - "learning_rate": 2.696683659112057e-06, - "loss": 0.1911, - "step": 16250 - }, - { - "epoch": 1.5309107180707002, - "grad_norm": 0.7443094849586487, - "learning_rate": 2.6956522599650335e-06, - "loss": 0.2291, - "step": 16251 - }, - { - "epoch": 1.531004922163868, - "grad_norm": 0.7532116770744324, - "learning_rate": 2.6946210273684336e-06, - "loss": 0.201, - "step": 16252 - }, - { - "epoch": 1.5310991262570357, - "grad_norm": 0.6666504144668579, - "learning_rate": 2.6935899613457705e-06, - "loss": 0.1915, - "step": 16253 - }, - { - "epoch": 1.5311933303502037, - "grad_norm": 0.7396573424339294, - "learning_rate": 2.6925590619205553e-06, - "loss": 0.2026, - "step": 16254 - }, - { - "epoch": 1.5312875344433716, - "grad_norm": 0.6690954566001892, - "learning_rate": 2.6915283291162973e-06, - "loss": 0.1947, - "step": 16255 - }, - { - "epoch": 1.5313817385365394, - "grad_norm": 0.6184488534927368, - "learning_rate": 2.6904977629564942e-06, - "loss": 0.1685, - "step": 16256 - }, - { - "epoch": 1.531475942629707, - "grad_norm": 0.7449617981910706, - "learning_rate": 2.6894673634646464e-06, - "loss": 0.2041, - "step": 16257 - }, - { - "epoch": 1.531570146722875, - "grad_norm": 0.7334285974502563, - "learning_rate": 2.688437130664251e-06, - "loss": 0.1808, - "step": 16258 - }, - { - "epoch": 1.531664350816043, - "grad_norm": 0.7738111019134521, - "learning_rate": 2.687407064578793e-06, - "loss": 0.1998, - "step": 16259 - }, - { - "epoch": 1.5317585549092108, - "grad_norm": 0.6780102252960205, - "learning_rate": 2.6863771652317638e-06, - "loss": 0.2011, - "step": 16260 - }, - { - "epoch": 1.5318527590023785, - "grad_norm": 0.6936577558517456, - "learning_rate": 2.6853474326466487e-06, - "loss": 0.1961, - "step": 16261 - }, - { - "epoch": 1.5319469630955465, - "grad_norm": 0.6879404783248901, - "learning_rate": 2.684317866846923e-06, - "loss": 0.218, - "step": 16262 - }, - { - "epoch": 1.5320411671887144, - "grad_norm": 1.008069634437561, - "learning_rate": 2.6832884678560623e-06, - "loss": 0.2037, - "step": 16263 - }, - { - "epoch": 1.5321353712818822, - "grad_norm": 0.6407217383384705, - "learning_rate": 2.682259235697543e-06, - "loss": 0.1791, - "step": 16264 - }, - { - "epoch": 1.53222957537505, - "grad_norm": 0.6090212464332581, - "learning_rate": 2.6812301703948273e-06, - "loss": 0.1764, - "step": 16265 - }, - { - "epoch": 1.5323237794682179, - "grad_norm": 0.6163294911384583, - "learning_rate": 2.680201271971383e-06, - "loss": 0.1788, - "step": 16266 - }, - { - "epoch": 1.5324179835613858, - "grad_norm": 0.647861897945404, - "learning_rate": 2.679172540450672e-06, - "loss": 0.1781, - "step": 16267 - }, - { - "epoch": 1.5325121876545535, - "grad_norm": 0.6163336038589478, - "learning_rate": 2.67814397585615e-06, - "loss": 0.2099, - "step": 16268 - }, - { - "epoch": 1.5326063917477213, - "grad_norm": 0.7356839179992676, - "learning_rate": 2.6771155782112624e-06, - "loss": 0.2283, - "step": 16269 - }, - { - "epoch": 1.5327005958408892, - "grad_norm": 0.6488597989082336, - "learning_rate": 2.676087347539471e-06, - "loss": 0.1928, - "step": 16270 - }, - { - "epoch": 1.5327947999340572, - "grad_norm": 0.6527369618415833, - "learning_rate": 2.6750592838642144e-06, - "loss": 0.2036, - "step": 16271 - }, - { - "epoch": 1.532889004027225, - "grad_norm": 1.2340357303619385, - "learning_rate": 2.6740313872089306e-06, - "loss": 0.2188, - "step": 16272 - }, - { - "epoch": 1.5329832081203927, - "grad_norm": 0.6677947640419006, - "learning_rate": 2.6730036575970618e-06, - "loss": 0.1902, - "step": 16273 - }, - { - "epoch": 1.5330774122135606, - "grad_norm": 0.6580592393875122, - "learning_rate": 2.6719760950520445e-06, - "loss": 0.1929, - "step": 16274 - }, - { - "epoch": 1.5331716163067286, - "grad_norm": 0.6391635537147522, - "learning_rate": 2.670948699597302e-06, - "loss": 0.1764, - "step": 16275 - }, - { - "epoch": 1.5332658203998963, - "grad_norm": 0.6302698254585266, - "learning_rate": 2.6699214712562627e-06, - "loss": 0.186, - "step": 16276 - }, - { - "epoch": 1.533360024493064, - "grad_norm": 0.6598572134971619, - "learning_rate": 2.668894410052354e-06, - "loss": 0.201, - "step": 16277 - }, - { - "epoch": 1.533454228586232, - "grad_norm": 0.6469151377677917, - "learning_rate": 2.6678675160089872e-06, - "loss": 0.2002, - "step": 16278 - }, - { - "epoch": 1.5335484326794, - "grad_norm": 0.5996238589286804, - "learning_rate": 2.6668407891495806e-06, - "loss": 0.1552, - "step": 16279 - }, - { - "epoch": 1.5336426367725677, - "grad_norm": 0.5995888710021973, - "learning_rate": 2.6658142294975486e-06, - "loss": 0.1846, - "step": 16280 - }, - { - "epoch": 1.5337368408657355, - "grad_norm": 0.6957386136054993, - "learning_rate": 2.6647878370762903e-06, - "loss": 0.2026, - "step": 16281 - }, - { - "epoch": 1.5338310449589034, - "grad_norm": 0.5960253477096558, - "learning_rate": 2.663761611909218e-06, - "loss": 0.1952, - "step": 16282 - }, - { - "epoch": 1.5339252490520714, - "grad_norm": 0.6038472056388855, - "learning_rate": 2.662735554019722e-06, - "loss": 0.1941, - "step": 16283 - }, - { - "epoch": 1.5340194531452391, - "grad_norm": 0.7028207778930664, - "learning_rate": 2.6617096634312036e-06, - "loss": 0.1982, - "step": 16284 - }, - { - "epoch": 1.5341136572384069, - "grad_norm": 0.708573043346405, - "learning_rate": 2.660683940167057e-06, - "loss": 0.1869, - "step": 16285 - }, - { - "epoch": 1.5342078613315748, - "grad_norm": 0.6810007691383362, - "learning_rate": 2.659658384250663e-06, - "loss": 0.2024, - "step": 16286 - }, - { - "epoch": 1.5343020654247428, - "grad_norm": 0.6183075308799744, - "learning_rate": 2.6586329957054114e-06, - "loss": 0.1765, - "step": 16287 - }, - { - "epoch": 1.5343962695179105, - "grad_norm": 0.6494288444519043, - "learning_rate": 2.6576077745546823e-06, - "loss": 0.1976, - "step": 16288 - }, - { - "epoch": 1.5344904736110783, - "grad_norm": 0.6704617738723755, - "learning_rate": 2.65658272082185e-06, - "loss": 0.1741, - "step": 16289 - }, - { - "epoch": 1.5345846777042462, - "grad_norm": 0.6166954636573792, - "learning_rate": 2.655557834530288e-06, - "loss": 0.1712, - "step": 16290 - }, - { - "epoch": 1.5346788817974142, - "grad_norm": 0.7561928629875183, - "learning_rate": 2.6545331157033682e-06, - "loss": 0.191, - "step": 16291 - }, - { - "epoch": 1.534773085890582, - "grad_norm": 0.6683579087257385, - "learning_rate": 2.653508564364453e-06, - "loss": 0.195, - "step": 16292 - }, - { - "epoch": 1.5348672899837497, - "grad_norm": 0.6410741209983826, - "learning_rate": 2.652484180536902e-06, - "loss": 0.2003, - "step": 16293 - }, - { - "epoch": 1.5349614940769176, - "grad_norm": 0.6511902809143066, - "learning_rate": 2.6514599642440742e-06, - "loss": 0.1995, - "step": 16294 - }, - { - "epoch": 1.5350556981700856, - "grad_norm": 0.6683751940727234, - "learning_rate": 2.6504359155093273e-06, - "loss": 0.1971, - "step": 16295 - }, - { - "epoch": 1.5351499022632533, - "grad_norm": 0.6326442360877991, - "learning_rate": 2.649412034356005e-06, - "loss": 0.1763, - "step": 16296 - }, - { - "epoch": 1.535244106356421, - "grad_norm": 0.6577661633491516, - "learning_rate": 2.6483883208074557e-06, - "loss": 0.218, - "step": 16297 - }, - { - "epoch": 1.535338310449589, - "grad_norm": 0.673072338104248, - "learning_rate": 2.6473647748870258e-06, - "loss": 0.228, - "step": 16298 - }, - { - "epoch": 1.535432514542757, - "grad_norm": 0.6371365189552307, - "learning_rate": 2.6463413966180463e-06, - "loss": 0.1773, - "step": 16299 - }, - { - "epoch": 1.5355267186359247, - "grad_norm": 0.7425619959831238, - "learning_rate": 2.6453181860238563e-06, - "loss": 0.2231, - "step": 16300 - }, - { - "epoch": 1.5356209227290925, - "grad_norm": 0.6594802141189575, - "learning_rate": 2.6442951431277886e-06, - "loss": 0.2232, - "step": 16301 - }, - { - "epoch": 1.5357151268222604, - "grad_norm": 0.5638049840927124, - "learning_rate": 2.6432722679531654e-06, - "loss": 0.1602, - "step": 16302 - }, - { - "epoch": 1.5358093309154284, - "grad_norm": 0.6391991972923279, - "learning_rate": 2.64224956052331e-06, - "loss": 0.2083, - "step": 16303 - }, - { - "epoch": 1.535903535008596, - "grad_norm": 0.6430094242095947, - "learning_rate": 2.6412270208615477e-06, - "loss": 0.2043, - "step": 16304 - }, - { - "epoch": 1.5359977391017638, - "grad_norm": 0.6466431021690369, - "learning_rate": 2.6402046489911904e-06, - "loss": 0.1839, - "step": 16305 - }, - { - "epoch": 1.5360919431949318, - "grad_norm": 0.6663456559181213, - "learning_rate": 2.639182444935542e-06, - "loss": 0.1863, - "step": 16306 - }, - { - "epoch": 1.5361861472880998, - "grad_norm": 0.5823646187782288, - "learning_rate": 2.6381604087179247e-06, - "loss": 0.1977, - "step": 16307 - }, - { - "epoch": 1.5362803513812675, - "grad_norm": 0.6902181506156921, - "learning_rate": 2.6371385403616345e-06, - "loss": 0.2364, - "step": 16308 - }, - { - "epoch": 1.5363745554744352, - "grad_norm": 0.5892136693000793, - "learning_rate": 2.636116839889967e-06, - "loss": 0.1787, - "step": 16309 - }, - { - "epoch": 1.5364687595676032, - "grad_norm": 0.6292369961738586, - "learning_rate": 2.6350953073262297e-06, - "loss": 0.2, - "step": 16310 - }, - { - "epoch": 1.5365629636607712, - "grad_norm": 0.6879388689994812, - "learning_rate": 2.6340739426937103e-06, - "loss": 0.1945, - "step": 16311 - }, - { - "epoch": 1.536657167753939, - "grad_norm": 0.6777614951133728, - "learning_rate": 2.633052746015693e-06, - "loss": 0.1971, - "step": 16312 - }, - { - "epoch": 1.5367513718471066, - "grad_norm": 0.605811357498169, - "learning_rate": 2.6320317173154665e-06, - "loss": 0.1832, - "step": 16313 - }, - { - "epoch": 1.5368455759402746, - "grad_norm": 0.6477255821228027, - "learning_rate": 2.6310108566163138e-06, - "loss": 0.2055, - "step": 16314 - }, - { - "epoch": 1.5369397800334426, - "grad_norm": 0.625795841217041, - "learning_rate": 2.629990163941507e-06, - "loss": 0.1673, - "step": 16315 - }, - { - "epoch": 1.5370339841266103, - "grad_norm": 0.6637027859687805, - "learning_rate": 2.628969639314324e-06, - "loss": 0.1795, - "step": 16316 - }, - { - "epoch": 1.537128188219778, - "grad_norm": 0.6212215423583984, - "learning_rate": 2.6279492827580345e-06, - "loss": 0.1654, - "step": 16317 - }, - { - "epoch": 1.537222392312946, - "grad_norm": 0.6320905685424805, - "learning_rate": 2.626929094295899e-06, - "loss": 0.1973, - "step": 16318 - }, - { - "epoch": 1.537316596406114, - "grad_norm": 0.642948567867279, - "learning_rate": 2.625909073951184e-06, - "loss": 0.1787, - "step": 16319 - }, - { - "epoch": 1.5374108004992817, - "grad_norm": 0.6066054701805115, - "learning_rate": 2.624889221747149e-06, - "loss": 0.1766, - "step": 16320 - }, - { - "epoch": 1.5375050045924494, - "grad_norm": 0.7016599178314209, - "learning_rate": 2.623869537707042e-06, - "loss": 0.229, - "step": 16321 - }, - { - "epoch": 1.5375992086856174, - "grad_norm": 0.7277583479881287, - "learning_rate": 2.62285002185412e-06, - "loss": 0.1804, - "step": 16322 - }, - { - "epoch": 1.5376934127787854, - "grad_norm": 0.6963581442832947, - "learning_rate": 2.621830674211624e-06, - "loss": 0.1875, - "step": 16323 - }, - { - "epoch": 1.537787616871953, - "grad_norm": 0.7287015914916992, - "learning_rate": 2.6208114948027987e-06, - "loss": 0.2095, - "step": 16324 - }, - { - "epoch": 1.5378818209651208, - "grad_norm": 0.6737769246101379, - "learning_rate": 2.619792483650887e-06, - "loss": 0.2023, - "step": 16325 - }, - { - "epoch": 1.5379760250582888, - "grad_norm": 0.678702175617218, - "learning_rate": 2.6187736407791178e-06, - "loss": 0.1948, - "step": 16326 - }, - { - "epoch": 1.5380702291514567, - "grad_norm": 0.6418389081954956, - "learning_rate": 2.6177549662107237e-06, - "loss": 0.1974, - "step": 16327 - }, - { - "epoch": 1.5381644332446245, - "grad_norm": 0.6424870491027832, - "learning_rate": 2.616736459968936e-06, - "loss": 0.1767, - "step": 16328 - }, - { - "epoch": 1.5382586373377922, - "grad_norm": 0.7702372670173645, - "learning_rate": 2.6157181220769734e-06, - "loss": 0.2148, - "step": 16329 - }, - { - "epoch": 1.5383528414309602, - "grad_norm": 0.69382643699646, - "learning_rate": 2.6146999525580575e-06, - "loss": 0.2213, - "step": 16330 - }, - { - "epoch": 1.5384470455241281, - "grad_norm": 0.6558042168617249, - "learning_rate": 2.6136819514354075e-06, - "loss": 0.2048, - "step": 16331 - }, - { - "epoch": 1.5385412496172959, - "grad_norm": 0.6997061967849731, - "learning_rate": 2.612664118732232e-06, - "loss": 0.2212, - "step": 16332 - }, - { - "epoch": 1.5386354537104636, - "grad_norm": 0.5648784041404724, - "learning_rate": 2.611646454471736e-06, - "loss": 0.1708, - "step": 16333 - }, - { - "epoch": 1.5387296578036316, - "grad_norm": 1.061224102973938, - "learning_rate": 2.6106289586771273e-06, - "loss": 0.1672, - "step": 16334 - }, - { - "epoch": 1.5388238618967995, - "grad_norm": 0.6273217797279358, - "learning_rate": 2.609611631371609e-06, - "loss": 0.2216, - "step": 16335 - }, - { - "epoch": 1.5389180659899673, - "grad_norm": 0.5811769962310791, - "learning_rate": 2.6085944725783716e-06, - "loss": 0.187, - "step": 16336 - }, - { - "epoch": 1.539012270083135, - "grad_norm": 0.6060513257980347, - "learning_rate": 2.6075774823206122e-06, - "loss": 0.1984, - "step": 16337 - }, - { - "epoch": 1.539106474176303, - "grad_norm": 0.6198642253875732, - "learning_rate": 2.606560660621522e-06, - "loss": 0.2173, - "step": 16338 - }, - { - "epoch": 1.539200678269471, - "grad_norm": 0.6413329839706421, - "learning_rate": 2.6055440075042793e-06, - "loss": 0.1968, - "step": 16339 - }, - { - "epoch": 1.5392948823626387, - "grad_norm": 0.7054868340492249, - "learning_rate": 2.6045275229920686e-06, - "loss": 0.2208, - "step": 16340 - }, - { - "epoch": 1.5393890864558064, - "grad_norm": 0.6680474281311035, - "learning_rate": 2.6035112071080715e-06, - "loss": 0.1842, - "step": 16341 - }, - { - "epoch": 1.5394832905489744, - "grad_norm": 0.7053892612457275, - "learning_rate": 2.602495059875454e-06, - "loss": 0.2042, - "step": 16342 - }, - { - "epoch": 1.5395774946421423, - "grad_norm": 0.6567726731300354, - "learning_rate": 2.6014790813173907e-06, - "loss": 0.1894, - "step": 16343 - }, - { - "epoch": 1.53967169873531, - "grad_norm": 0.8481297492980957, - "learning_rate": 2.6004632714570486e-06, - "loss": 0.1972, - "step": 16344 - }, - { - "epoch": 1.5397659028284778, - "grad_norm": 0.6491158604621887, - "learning_rate": 2.5994476303175876e-06, - "loss": 0.1983, - "step": 16345 - }, - { - "epoch": 1.5398601069216458, - "grad_norm": 0.7349384427070618, - "learning_rate": 2.5984321579221593e-06, - "loss": 0.1801, - "step": 16346 - }, - { - "epoch": 1.5399543110148137, - "grad_norm": 0.7639728784561157, - "learning_rate": 2.5974168542939317e-06, - "loss": 0.1878, - "step": 16347 - }, - { - "epoch": 1.5400485151079815, - "grad_norm": 0.6427068114280701, - "learning_rate": 2.5964017194560466e-06, - "loss": 0.159, - "step": 16348 - }, - { - "epoch": 1.5401427192011492, - "grad_norm": 0.6762670874595642, - "learning_rate": 2.595386753431648e-06, - "loss": 0.1978, - "step": 16349 - }, - { - "epoch": 1.5402369232943172, - "grad_norm": 0.649671196937561, - "learning_rate": 2.5943719562438875e-06, - "loss": 0.1997, - "step": 16350 - }, - { - "epoch": 1.5403311273874851, - "grad_norm": 0.6771973371505737, - "learning_rate": 2.5933573279158996e-06, - "loss": 0.1911, - "step": 16351 - }, - { - "epoch": 1.5404253314806529, - "grad_norm": 0.6386894583702087, - "learning_rate": 2.5923428684708163e-06, - "loss": 0.1902, - "step": 16352 - }, - { - "epoch": 1.5405195355738206, - "grad_norm": 0.6674006581306458, - "learning_rate": 2.591328577931772e-06, - "loss": 0.1708, - "step": 16353 - }, - { - "epoch": 1.5406137396669886, - "grad_norm": 0.8000255227088928, - "learning_rate": 2.5903144563218974e-06, - "loss": 0.1919, - "step": 16354 - }, - { - "epoch": 1.5407079437601565, - "grad_norm": 0.6232596039772034, - "learning_rate": 2.589300503664308e-06, - "loss": 0.1949, - "step": 16355 - }, - { - "epoch": 1.5408021478533243, - "grad_norm": 0.7329729795455933, - "learning_rate": 2.5882867199821293e-06, - "loss": 0.2069, - "step": 16356 - }, - { - "epoch": 1.540896351946492, - "grad_norm": 0.6691109538078308, - "learning_rate": 2.587273105298479e-06, - "loss": 0.1986, - "step": 16357 - }, - { - "epoch": 1.54099055603966, - "grad_norm": 0.6441341042518616, - "learning_rate": 2.586259659636462e-06, - "loss": 0.2028, - "step": 16358 - }, - { - "epoch": 1.541084760132828, - "grad_norm": 0.7453688979148865, - "learning_rate": 2.58524638301919e-06, - "loss": 0.1959, - "step": 16359 - }, - { - "epoch": 1.5411789642259957, - "grad_norm": 0.6126582622528076, - "learning_rate": 2.584233275469772e-06, - "loss": 0.1821, - "step": 16360 - }, - { - "epoch": 1.5412731683191634, - "grad_norm": 0.6151001453399658, - "learning_rate": 2.5832203370112995e-06, - "loss": 0.1828, - "step": 16361 - }, - { - "epoch": 1.5413673724123313, - "grad_norm": 0.7793347835540771, - "learning_rate": 2.582207567666878e-06, - "loss": 0.1765, - "step": 16362 - }, - { - "epoch": 1.5414615765054993, - "grad_norm": 0.8821308016777039, - "learning_rate": 2.5811949674595916e-06, - "loss": 0.2105, - "step": 16363 - }, - { - "epoch": 1.541555780598667, - "grad_norm": 0.6928325295448303, - "learning_rate": 2.5801825364125343e-06, - "loss": 0.199, - "step": 16364 - }, - { - "epoch": 1.5416499846918348, - "grad_norm": 0.648412823677063, - "learning_rate": 2.5791702745487924e-06, - "loss": 0.1792, - "step": 16365 - }, - { - "epoch": 1.5417441887850027, - "grad_norm": 0.6915906667709351, - "learning_rate": 2.5781581818914424e-06, - "loss": 0.2278, - "step": 16366 - }, - { - "epoch": 1.5418383928781707, - "grad_norm": 0.6613881587982178, - "learning_rate": 2.577146258463563e-06, - "loss": 0.2078, - "step": 16367 - }, - { - "epoch": 1.5419325969713384, - "grad_norm": 0.5999644994735718, - "learning_rate": 2.5761345042882326e-06, - "loss": 0.1733, - "step": 16368 - }, - { - "epoch": 1.5420268010645062, - "grad_norm": 0.6650545001029968, - "learning_rate": 2.575122919388513e-06, - "loss": 0.188, - "step": 16369 - }, - { - "epoch": 1.5421210051576741, - "grad_norm": 0.640903651714325, - "learning_rate": 2.5741115037874776e-06, - "loss": 0.1903, - "step": 16370 - }, - { - "epoch": 1.542215209250842, - "grad_norm": 0.6816896200180054, - "learning_rate": 2.57310025750818e-06, - "loss": 0.199, - "step": 16371 - }, - { - "epoch": 1.5423094133440098, - "grad_norm": 0.6032940745353699, - "learning_rate": 2.5720891805736857e-06, - "loss": 0.2066, - "step": 16372 - }, - { - "epoch": 1.5424036174371776, - "grad_norm": 0.6310244798660278, - "learning_rate": 2.571078273007044e-06, - "loss": 0.186, - "step": 16373 - }, - { - "epoch": 1.5424978215303455, - "grad_norm": 0.6525641679763794, - "learning_rate": 2.570067534831305e-06, - "loss": 0.1781, - "step": 16374 - }, - { - "epoch": 1.5425920256235135, - "grad_norm": 0.6778793931007385, - "learning_rate": 2.5690569660695207e-06, - "loss": 0.1985, - "step": 16375 - }, - { - "epoch": 1.5426862297166812, - "grad_norm": 0.6526602506637573, - "learning_rate": 2.568046566744726e-06, - "loss": 0.2134, - "step": 16376 - }, - { - "epoch": 1.542780433809849, - "grad_norm": 0.7703606486320496, - "learning_rate": 2.5670363368799646e-06, - "loss": 0.2488, - "step": 16377 - }, - { - "epoch": 1.542874637903017, - "grad_norm": 0.6597605347633362, - "learning_rate": 2.5660262764982723e-06, - "loss": 0.2093, - "step": 16378 - }, - { - "epoch": 1.542968841996185, - "grad_norm": 0.6802690625190735, - "learning_rate": 2.565016385622675e-06, - "loss": 0.1996, - "step": 16379 - }, - { - "epoch": 1.5430630460893526, - "grad_norm": 0.6328654289245605, - "learning_rate": 2.5640066642762017e-06, - "loss": 0.1961, - "step": 16380 - }, - { - "epoch": 1.5431572501825204, - "grad_norm": 0.6502787470817566, - "learning_rate": 2.56299711248188e-06, - "loss": 0.2054, - "step": 16381 - }, - { - "epoch": 1.5432514542756883, - "grad_norm": 1.638429045677185, - "learning_rate": 2.561987730262723e-06, - "loss": 0.1946, - "step": 16382 - }, - { - "epoch": 1.5433456583688563, - "grad_norm": 0.5941994190216064, - "learning_rate": 2.5609785176417478e-06, - "loss": 0.1764, - "step": 16383 - }, - { - "epoch": 1.543439862462024, - "grad_norm": 0.6767863035202026, - "learning_rate": 2.559969474641971e-06, - "loss": 0.156, - "step": 16384 - }, - { - "epoch": 1.5435340665551918, - "grad_norm": 0.672737181186676, - "learning_rate": 2.5589606012863968e-06, - "loss": 0.1933, - "step": 16385 - }, - { - "epoch": 1.5436282706483597, - "grad_norm": 0.6486581563949585, - "learning_rate": 2.557951897598022e-06, - "loss": 0.1959, - "step": 16386 - }, - { - "epoch": 1.5437224747415277, - "grad_norm": 1.0440093278884888, - "learning_rate": 2.5569433635998597e-06, - "loss": 0.183, - "step": 16387 - }, - { - "epoch": 1.5438166788346954, - "grad_norm": 0.6651913523674011, - "learning_rate": 2.5559349993148984e-06, - "loss": 0.2024, - "step": 16388 - }, - { - "epoch": 1.5439108829278632, - "grad_norm": 0.669744610786438, - "learning_rate": 2.554926804766127e-06, - "loss": 0.2179, - "step": 16389 - }, - { - "epoch": 1.5440050870210311, - "grad_norm": 0.7099943161010742, - "learning_rate": 2.553918779976544e-06, - "loss": 0.2163, - "step": 16390 - }, - { - "epoch": 1.5440992911141989, - "grad_norm": 0.6635091304779053, - "learning_rate": 2.5529109249691285e-06, - "loss": 0.1972, - "step": 16391 - }, - { - "epoch": 1.5441934952073666, - "grad_norm": 0.6373885869979858, - "learning_rate": 2.5519032397668575e-06, - "loss": 0.1716, - "step": 16392 - }, - { - "epoch": 1.5442876993005346, - "grad_norm": 0.6005825996398926, - "learning_rate": 2.5508957243927126e-06, - "loss": 0.163, - "step": 16393 - }, - { - "epoch": 1.5443819033937025, - "grad_norm": 0.6473194360733032, - "learning_rate": 2.5498883788696673e-06, - "loss": 0.1759, - "step": 16394 - }, - { - "epoch": 1.5444761074868703, - "grad_norm": 1.1488068103790283, - "learning_rate": 2.5488812032206855e-06, - "loss": 0.1964, - "step": 16395 - }, - { - "epoch": 1.544570311580038, - "grad_norm": 0.7530942559242249, - "learning_rate": 2.547874197468736e-06, - "loss": 0.2287, - "step": 16396 - }, - { - "epoch": 1.544664515673206, - "grad_norm": 0.7974828481674194, - "learning_rate": 2.5468673616367835e-06, - "loss": 0.2141, - "step": 16397 - }, - { - "epoch": 1.544758719766374, - "grad_norm": 0.6990633010864258, - "learning_rate": 2.5458606957477784e-06, - "loss": 0.192, - "step": 16398 - }, - { - "epoch": 1.5448529238595416, - "grad_norm": 0.6368348002433777, - "learning_rate": 2.5448541998246767e-06, - "loss": 0.1847, - "step": 16399 - }, - { - "epoch": 1.5449471279527094, - "grad_norm": 0.7107001543045044, - "learning_rate": 2.543847873890433e-06, - "loss": 0.2334, - "step": 16400 - }, - { - "epoch": 1.5450413320458773, - "grad_norm": 0.6501368284225464, - "learning_rate": 2.5428417179679842e-06, - "loss": 0.2043, - "step": 16401 - }, - { - "epoch": 1.5451355361390453, - "grad_norm": 0.6266803741455078, - "learning_rate": 2.541835732080281e-06, - "loss": 0.2159, - "step": 16402 - }, - { - "epoch": 1.545229740232213, - "grad_norm": 0.646104633808136, - "learning_rate": 2.5408299162502546e-06, - "loss": 0.2266, - "step": 16403 - }, - { - "epoch": 1.5453239443253808, - "grad_norm": 0.6524372100830078, - "learning_rate": 2.5398242705008412e-06, - "loss": 0.2196, - "step": 16404 - }, - { - "epoch": 1.5454181484185487, - "grad_norm": 0.6856186389923096, - "learning_rate": 2.538818794854976e-06, - "loss": 0.2091, - "step": 16405 - }, - { - "epoch": 1.5455123525117167, - "grad_norm": 0.6180176138877869, - "learning_rate": 2.5378134893355777e-06, - "loss": 0.1891, - "step": 16406 - }, - { - "epoch": 1.5456065566048844, - "grad_norm": 0.6351768970489502, - "learning_rate": 2.536808353965572e-06, - "loss": 0.193, - "step": 16407 - }, - { - "epoch": 1.5457007606980522, - "grad_norm": 0.6795313954353333, - "learning_rate": 2.535803388767881e-06, - "loss": 0.2184, - "step": 16408 - }, - { - "epoch": 1.5457949647912201, - "grad_norm": 0.6426952481269836, - "learning_rate": 2.534798593765414e-06, - "loss": 0.197, - "step": 16409 - }, - { - "epoch": 1.545889168884388, - "grad_norm": 0.6773859262466431, - "learning_rate": 2.5337939689810864e-06, - "loss": 0.1714, - "step": 16410 - }, - { - "epoch": 1.5459833729775558, - "grad_norm": 0.7835344672203064, - "learning_rate": 2.5327895144378e-06, - "loss": 0.1668, - "step": 16411 - }, - { - "epoch": 1.5460775770707236, - "grad_norm": 0.6896972060203552, - "learning_rate": 2.5317852301584642e-06, - "loss": 0.2217, - "step": 16412 - }, - { - "epoch": 1.5461717811638915, - "grad_norm": 0.7565961480140686, - "learning_rate": 2.5307811161659724e-06, - "loss": 0.1882, - "step": 16413 - }, - { - "epoch": 1.5462659852570595, - "grad_norm": 0.6159785985946655, - "learning_rate": 2.5297771724832222e-06, - "loss": 0.1672, - "step": 16414 - }, - { - "epoch": 1.5463601893502272, - "grad_norm": 0.6429604291915894, - "learning_rate": 2.5287733991331074e-06, - "loss": 0.1837, - "step": 16415 - }, - { - "epoch": 1.546454393443395, - "grad_norm": 0.7100839018821716, - "learning_rate": 2.5277697961385118e-06, - "loss": 0.193, - "step": 16416 - }, - { - "epoch": 1.546548597536563, - "grad_norm": 0.6241758465766907, - "learning_rate": 2.5267663635223205e-06, - "loss": 0.1841, - "step": 16417 - }, - { - "epoch": 1.546642801629731, - "grad_norm": 0.6817502379417419, - "learning_rate": 2.525763101307417e-06, - "loss": 0.2044, - "step": 16418 - }, - { - "epoch": 1.5467370057228986, - "grad_norm": 0.750516414642334, - "learning_rate": 2.5247600095166702e-06, - "loss": 0.1999, - "step": 16419 - }, - { - "epoch": 1.5468312098160664, - "grad_norm": 0.6559773087501526, - "learning_rate": 2.5237570881729566e-06, - "loss": 0.1962, - "step": 16420 - }, - { - "epoch": 1.5469254139092343, - "grad_norm": 0.7364010810852051, - "learning_rate": 2.522754337299146e-06, - "loss": 0.1975, - "step": 16421 - }, - { - "epoch": 1.5470196180024023, - "grad_norm": 0.6457366943359375, - "learning_rate": 2.5217517569181003e-06, - "loss": 0.184, - "step": 16422 - }, - { - "epoch": 1.54711382209557, - "grad_norm": 0.6395455002784729, - "learning_rate": 2.5207493470526747e-06, - "loss": 0.1684, - "step": 16423 - }, - { - "epoch": 1.5472080261887378, - "grad_norm": 0.6169290542602539, - "learning_rate": 2.5197471077257362e-06, - "loss": 0.1963, - "step": 16424 - }, - { - "epoch": 1.5473022302819057, - "grad_norm": 0.6614916324615479, - "learning_rate": 2.518745038960132e-06, - "loss": 0.1964, - "step": 16425 - }, - { - "epoch": 1.5473964343750737, - "grad_norm": 0.6604982018470764, - "learning_rate": 2.5177431407787056e-06, - "loss": 0.192, - "step": 16426 - }, - { - "epoch": 1.5474906384682414, - "grad_norm": 0.6813318133354187, - "learning_rate": 2.516741413204312e-06, - "loss": 0.2218, - "step": 16427 - }, - { - "epoch": 1.5475848425614092, - "grad_norm": 0.6619900465011597, - "learning_rate": 2.515739856259788e-06, - "loss": 0.2102, - "step": 16428 - }, - { - "epoch": 1.5476790466545771, - "grad_norm": 0.6388282179832458, - "learning_rate": 2.5147384699679646e-06, - "loss": 0.1953, - "step": 16429 - }, - { - "epoch": 1.547773250747745, - "grad_norm": 0.7000366449356079, - "learning_rate": 2.5137372543516847e-06, - "loss": 0.2173, - "step": 16430 - }, - { - "epoch": 1.5478674548409128, - "grad_norm": 0.6196354031562805, - "learning_rate": 2.5127362094337737e-06, - "loss": 0.1891, - "step": 16431 - }, - { - "epoch": 1.5479616589340806, - "grad_norm": 0.6312999725341797, - "learning_rate": 2.5117353352370544e-06, - "loss": 0.1813, - "step": 16432 - }, - { - "epoch": 1.5480558630272485, - "grad_norm": 0.6917158365249634, - "learning_rate": 2.5107346317843494e-06, - "loss": 0.2173, - "step": 16433 - }, - { - "epoch": 1.5481500671204165, - "grad_norm": 0.8725519180297852, - "learning_rate": 2.50973409909848e-06, - "loss": 0.2205, - "step": 16434 - }, - { - "epoch": 1.5482442712135842, - "grad_norm": 0.6720922589302063, - "learning_rate": 2.5087337372022546e-06, - "loss": 0.1904, - "step": 16435 - }, - { - "epoch": 1.548338475306752, - "grad_norm": 0.6058753132820129, - "learning_rate": 2.5077335461184847e-06, - "loss": 0.1544, - "step": 16436 - }, - { - "epoch": 1.54843267939992, - "grad_norm": 0.6513206958770752, - "learning_rate": 2.506733525869981e-06, - "loss": 0.1971, - "step": 16437 - }, - { - "epoch": 1.5485268834930879, - "grad_norm": 0.6544703841209412, - "learning_rate": 2.505733676479537e-06, - "loss": 0.2054, - "step": 16438 - }, - { - "epoch": 1.5486210875862556, - "grad_norm": 0.7138603329658508, - "learning_rate": 2.504733997969957e-06, - "loss": 0.2071, - "step": 16439 - }, - { - "epoch": 1.5487152916794233, - "grad_norm": 0.7055628299713135, - "learning_rate": 2.5037344903640347e-06, - "loss": 0.2039, - "step": 16440 - }, - { - "epoch": 1.5488094957725913, - "grad_norm": 0.6753688454627991, - "learning_rate": 2.5027351536845578e-06, - "loss": 0.2096, - "step": 16441 - }, - { - "epoch": 1.5489036998657593, - "grad_norm": 0.6567484736442566, - "learning_rate": 2.5017359879543168e-06, - "loss": 0.2127, - "step": 16442 - }, - { - "epoch": 1.548997903958927, - "grad_norm": 0.6667615175247192, - "learning_rate": 2.500736993196088e-06, - "loss": 0.2014, - "step": 16443 - }, - { - "epoch": 1.5490921080520947, - "grad_norm": 0.6586611270904541, - "learning_rate": 2.499738169432654e-06, - "loss": 0.1909, - "step": 16444 - }, - { - "epoch": 1.5491863121452627, - "grad_norm": 0.6358745098114014, - "learning_rate": 2.498739516686792e-06, - "loss": 0.2005, - "step": 16445 - }, - { - "epoch": 1.5492805162384307, - "grad_norm": 0.6489126682281494, - "learning_rate": 2.497741034981267e-06, - "loss": 0.1889, - "step": 16446 - }, - { - "epoch": 1.5493747203315984, - "grad_norm": 0.6446957588195801, - "learning_rate": 2.4967427243388485e-06, - "loss": 0.2126, - "step": 16447 - }, - { - "epoch": 1.5494689244247661, - "grad_norm": 0.6304161548614502, - "learning_rate": 2.4957445847823036e-06, - "loss": 0.1778, - "step": 16448 - }, - { - "epoch": 1.549563128517934, - "grad_norm": 0.6196681261062622, - "learning_rate": 2.4947466163343838e-06, - "loss": 0.2067, - "step": 16449 - }, - { - "epoch": 1.549657332611102, - "grad_norm": 0.6782927513122559, - "learning_rate": 2.4937488190178518e-06, - "loss": 0.1987, - "step": 16450 - }, - { - "epoch": 1.5497515367042698, - "grad_norm": 0.7568427324295044, - "learning_rate": 2.4927511928554525e-06, - "loss": 0.1914, - "step": 16451 - }, - { - "epoch": 1.5498457407974375, - "grad_norm": 0.8635743856430054, - "learning_rate": 2.4917537378699386e-06, - "loss": 0.2025, - "step": 16452 - }, - { - "epoch": 1.5499399448906055, - "grad_norm": 0.6215642094612122, - "learning_rate": 2.490756454084049e-06, - "loss": 0.193, - "step": 16453 - }, - { - "epoch": 1.5500341489837735, - "grad_norm": 0.6086340546607971, - "learning_rate": 2.4897593415205255e-06, - "loss": 0.1849, - "step": 16454 - }, - { - "epoch": 1.5501283530769412, - "grad_norm": 0.6403914093971252, - "learning_rate": 2.488762400202106e-06, - "loss": 0.219, - "step": 16455 - }, - { - "epoch": 1.550222557170109, - "grad_norm": 0.6786202788352966, - "learning_rate": 2.4877656301515174e-06, - "loss": 0.2304, - "step": 16456 - }, - { - "epoch": 1.5503167612632769, - "grad_norm": 0.6342893242835999, - "learning_rate": 2.4867690313914906e-06, - "loss": 0.2067, - "step": 16457 - }, - { - "epoch": 1.5504109653564448, - "grad_norm": 0.6511574387550354, - "learning_rate": 2.485772603944753e-06, - "loss": 0.2091, - "step": 16458 - }, - { - "epoch": 1.5505051694496126, - "grad_norm": 0.682508111000061, - "learning_rate": 2.484776347834017e-06, - "loss": 0.1901, - "step": 16459 - }, - { - "epoch": 1.5505993735427803, - "grad_norm": 0.641307532787323, - "learning_rate": 2.483780263082003e-06, - "loss": 0.1877, - "step": 16460 - }, - { - "epoch": 1.5506935776359483, - "grad_norm": 0.6705555319786072, - "learning_rate": 2.4827843497114256e-06, - "loss": 0.2013, - "step": 16461 - }, - { - "epoch": 1.5507877817291162, - "grad_norm": 0.6825522184371948, - "learning_rate": 2.4817886077449917e-06, - "loss": 0.1911, - "step": 16462 - }, - { - "epoch": 1.550881985822284, - "grad_norm": 0.611046552658081, - "learning_rate": 2.480793037205398e-06, - "loss": 0.1725, - "step": 16463 - }, - { - "epoch": 1.5509761899154517, - "grad_norm": 0.672132670879364, - "learning_rate": 2.4797976381153575e-06, - "loss": 0.1983, - "step": 16464 - }, - { - "epoch": 1.5510703940086197, - "grad_norm": 0.7160225510597229, - "learning_rate": 2.4788024104975615e-06, - "loss": 0.229, - "step": 16465 - }, - { - "epoch": 1.5511645981017876, - "grad_norm": 0.7172216773033142, - "learning_rate": 2.477807354374696e-06, - "loss": 0.1916, - "step": 16466 - }, - { - "epoch": 1.5512588021949554, - "grad_norm": 0.6677335500717163, - "learning_rate": 2.476812469769463e-06, - "loss": 0.2029, - "step": 16467 - }, - { - "epoch": 1.5513530062881231, - "grad_norm": 0.6702734231948853, - "learning_rate": 2.4758177567045393e-06, - "loss": 0.1912, - "step": 16468 - }, - { - "epoch": 1.551447210381291, - "grad_norm": 0.7241215705871582, - "learning_rate": 2.474823215202602e-06, - "loss": 0.2051, - "step": 16469 - }, - { - "epoch": 1.551541414474459, - "grad_norm": 0.6005356907844543, - "learning_rate": 2.473828845286339e-06, - "loss": 0.1831, - "step": 16470 - }, - { - "epoch": 1.5516356185676268, - "grad_norm": 0.6623706221580505, - "learning_rate": 2.472834646978417e-06, - "loss": 0.1872, - "step": 16471 - }, - { - "epoch": 1.5517298226607945, - "grad_norm": 0.642845630645752, - "learning_rate": 2.4718406203015045e-06, - "loss": 0.2, - "step": 16472 - }, - { - "epoch": 1.5518240267539625, - "grad_norm": 0.6071888208389282, - "learning_rate": 2.4708467652782675e-06, - "loss": 0.1666, - "step": 16473 - }, - { - "epoch": 1.5519182308471304, - "grad_norm": 0.6258337497711182, - "learning_rate": 2.4698530819313714e-06, - "loss": 0.1925, - "step": 16474 - }, - { - "epoch": 1.5520124349402982, - "grad_norm": 0.5780388712882996, - "learning_rate": 2.468859570283467e-06, - "loss": 0.1917, - "step": 16475 - }, - { - "epoch": 1.552106639033466, - "grad_norm": 0.6399002075195312, - "learning_rate": 2.4678662303572122e-06, - "loss": 0.197, - "step": 16476 - }, - { - "epoch": 1.5522008431266339, - "grad_norm": 0.6279283761978149, - "learning_rate": 2.466873062175259e-06, - "loss": 0.189, - "step": 16477 - }, - { - "epoch": 1.5522950472198018, - "grad_norm": 0.6647738218307495, - "learning_rate": 2.4658800657602476e-06, - "loss": 0.1976, - "step": 16478 - }, - { - "epoch": 1.5523892513129696, - "grad_norm": 0.5951058268547058, - "learning_rate": 2.4648872411348223e-06, - "loss": 0.1845, - "step": 16479 - }, - { - "epoch": 1.5524834554061373, - "grad_norm": 0.6937251091003418, - "learning_rate": 2.4638945883216236e-06, - "loss": 0.1977, - "step": 16480 - }, - { - "epoch": 1.5525776594993053, - "grad_norm": 0.6020039916038513, - "learning_rate": 2.4629021073432803e-06, - "loss": 0.2067, - "step": 16481 - }, - { - "epoch": 1.5526718635924732, - "grad_norm": 0.6047183871269226, - "learning_rate": 2.461909798222428e-06, - "loss": 0.2017, - "step": 16482 - }, - { - "epoch": 1.552766067685641, - "grad_norm": 0.6549305319786072, - "learning_rate": 2.4609176609816876e-06, - "loss": 0.1903, - "step": 16483 - }, - { - "epoch": 1.5528602717788087, - "grad_norm": 0.6725935339927673, - "learning_rate": 2.4599256956436846e-06, - "loss": 0.1927, - "step": 16484 - }, - { - "epoch": 1.5529544758719767, - "grad_norm": 0.7078056335449219, - "learning_rate": 2.4589339022310386e-06, - "loss": 0.2203, - "step": 16485 - }, - { - "epoch": 1.5530486799651446, - "grad_norm": 0.7287416458129883, - "learning_rate": 2.45794228076636e-06, - "loss": 0.2246, - "step": 16486 - }, - { - "epoch": 1.5531428840583124, - "grad_norm": 0.6040006279945374, - "learning_rate": 2.4569508312722635e-06, - "loss": 0.2172, - "step": 16487 - }, - { - "epoch": 1.55323708815148, - "grad_norm": 0.6748787760734558, - "learning_rate": 2.455959553771351e-06, - "loss": 0.1957, - "step": 16488 - }, - { - "epoch": 1.553331292244648, - "grad_norm": 0.6507366895675659, - "learning_rate": 2.4549684482862278e-06, - "loss": 0.2456, - "step": 16489 - }, - { - "epoch": 1.553425496337816, - "grad_norm": 0.635037899017334, - "learning_rate": 2.453977514839496e-06, - "loss": 0.1962, - "step": 16490 - }, - { - "epoch": 1.5535197004309838, - "grad_norm": 0.6460270285606384, - "learning_rate": 2.4529867534537435e-06, - "loss": 0.1942, - "step": 16491 - }, - { - "epoch": 1.5536139045241515, - "grad_norm": 0.6219245195388794, - "learning_rate": 2.4519961641515677e-06, - "loss": 0.1803, - "step": 16492 - }, - { - "epoch": 1.5537081086173194, - "grad_norm": 0.6302159428596497, - "learning_rate": 2.4510057469555504e-06, - "loss": 0.1928, - "step": 16493 - }, - { - "epoch": 1.5538023127104874, - "grad_norm": 0.6582379341125488, - "learning_rate": 2.4500155018882755e-06, - "loss": 0.2043, - "step": 16494 - }, - { - "epoch": 1.5538965168036551, - "grad_norm": 0.7801764011383057, - "learning_rate": 2.449025428972327e-06, - "loss": 0.1884, - "step": 16495 - }, - { - "epoch": 1.5539907208968229, - "grad_norm": 0.5923693180084229, - "learning_rate": 2.4480355282302738e-06, - "loss": 0.1863, - "step": 16496 - }, - { - "epoch": 1.5540849249899908, - "grad_norm": 0.6438308358192444, - "learning_rate": 2.4470457996846896e-06, - "loss": 0.1843, - "step": 16497 - }, - { - "epoch": 1.5541791290831588, - "grad_norm": 0.8064566254615784, - "learning_rate": 2.4460562433581445e-06, - "loss": 0.2187, - "step": 16498 - }, - { - "epoch": 1.5542733331763265, - "grad_norm": 0.6894887685775757, - "learning_rate": 2.4450668592731976e-06, - "loss": 0.1998, - "step": 16499 - }, - { - "epoch": 1.5543675372694943, - "grad_norm": 0.6374278664588928, - "learning_rate": 2.4440776474524096e-06, - "loss": 0.2294, - "step": 16500 - }, - { - "epoch": 1.5544617413626622, - "grad_norm": 0.6965348124504089, - "learning_rate": 2.4430886079183402e-06, - "loss": 0.1854, - "step": 16501 - }, - { - "epoch": 1.5545559454558302, - "grad_norm": 0.6538490653038025, - "learning_rate": 2.4420997406935364e-06, - "loss": 0.1936, - "step": 16502 - }, - { - "epoch": 1.554650149548998, - "grad_norm": 0.6493823528289795, - "learning_rate": 2.4411110458005414e-06, - "loss": 0.1716, - "step": 16503 - }, - { - "epoch": 1.5547443536421657, - "grad_norm": 0.7109135985374451, - "learning_rate": 2.4401225232619117e-06, - "loss": 0.2086, - "step": 16504 - }, - { - "epoch": 1.5548385577353336, - "grad_norm": 0.6588006019592285, - "learning_rate": 2.4391341731001793e-06, - "loss": 0.2074, - "step": 16505 - }, - { - "epoch": 1.5549327618285016, - "grad_norm": 0.610553503036499, - "learning_rate": 2.438145995337875e-06, - "loss": 0.1769, - "step": 16506 - }, - { - "epoch": 1.5550269659216693, - "grad_norm": 0.6453066468238831, - "learning_rate": 2.437157989997542e-06, - "loss": 0.1916, - "step": 16507 - }, - { - "epoch": 1.555121170014837, - "grad_norm": 0.6897177696228027, - "learning_rate": 2.436170157101704e-06, - "loss": 0.1961, - "step": 16508 - }, - { - "epoch": 1.555215374108005, - "grad_norm": 0.647250235080719, - "learning_rate": 2.4351824966728775e-06, - "loss": 0.2084, - "step": 16509 - }, - { - "epoch": 1.555309578201173, - "grad_norm": 0.6197291612625122, - "learning_rate": 2.4341950087335954e-06, - "loss": 0.1854, - "step": 16510 - }, - { - "epoch": 1.5554037822943407, - "grad_norm": 0.6840108633041382, - "learning_rate": 2.4332076933063677e-06, - "loss": 0.1899, - "step": 16511 - }, - { - "epoch": 1.5554979863875085, - "grad_norm": 0.6628212928771973, - "learning_rate": 2.432220550413704e-06, - "loss": 0.1736, - "step": 16512 - }, - { - "epoch": 1.5555921904806764, - "grad_norm": 0.6374439597129822, - "learning_rate": 2.431233580078115e-06, - "loss": 0.1847, - "step": 16513 - }, - { - "epoch": 1.5556863945738444, - "grad_norm": 0.6410982012748718, - "learning_rate": 2.43024678232211e-06, - "loss": 0.2123, - "step": 16514 - }, - { - "epoch": 1.5557805986670121, - "grad_norm": 0.6972041726112366, - "learning_rate": 2.4292601571681805e-06, - "loss": 0.1872, - "step": 16515 - }, - { - "epoch": 1.5558748027601799, - "grad_norm": 0.5675978064537048, - "learning_rate": 2.4282737046388293e-06, - "loss": 0.1995, - "step": 16516 - }, - { - "epoch": 1.5559690068533478, - "grad_norm": 0.6456544399261475, - "learning_rate": 2.4272874247565492e-06, - "loss": 0.1867, - "step": 16517 - }, - { - "epoch": 1.5560632109465158, - "grad_norm": 0.6558977961540222, - "learning_rate": 2.4263013175438256e-06, - "loss": 0.1731, - "step": 16518 - }, - { - "epoch": 1.5561574150396835, - "grad_norm": 0.6935216188430786, - "learning_rate": 2.4253153830231446e-06, - "loss": 0.1999, - "step": 16519 - }, - { - "epoch": 1.5562516191328513, - "grad_norm": 0.723755955696106, - "learning_rate": 2.42432962121699e-06, - "loss": 0.1967, - "step": 16520 - }, - { - "epoch": 1.5563458232260192, - "grad_norm": 0.6899499297142029, - "learning_rate": 2.423344032147833e-06, - "loss": 0.21, - "step": 16521 - }, - { - "epoch": 1.5564400273191872, - "grad_norm": 0.6751235127449036, - "learning_rate": 2.4223586158381516e-06, - "loss": 0.2221, - "step": 16522 - }, - { - "epoch": 1.556534231412355, - "grad_norm": 0.641545295715332, - "learning_rate": 2.4213733723104115e-06, - "loss": 0.1913, - "step": 16523 - }, - { - "epoch": 1.5566284355055227, - "grad_norm": 0.5932109355926514, - "learning_rate": 2.420388301587079e-06, - "loss": 0.1629, - "step": 16524 - }, - { - "epoch": 1.5567226395986906, - "grad_norm": 0.5994042754173279, - "learning_rate": 2.419403403690618e-06, - "loss": 0.1886, - "step": 16525 - }, - { - "epoch": 1.5568168436918586, - "grad_norm": 0.7845425605773926, - "learning_rate": 2.4184186786434793e-06, - "loss": 0.1787, - "step": 16526 - }, - { - "epoch": 1.5569110477850263, - "grad_norm": 0.636869490146637, - "learning_rate": 2.417434126468123e-06, - "loss": 0.2078, - "step": 16527 - }, - { - "epoch": 1.557005251878194, - "grad_norm": 0.6482181549072266, - "learning_rate": 2.416449747186993e-06, - "loss": 0.1829, - "step": 16528 - }, - { - "epoch": 1.557099455971362, - "grad_norm": 0.6135715842247009, - "learning_rate": 2.415465540822537e-06, - "loss": 0.1757, - "step": 16529 - }, - { - "epoch": 1.5571936600645297, - "grad_norm": 0.6853678822517395, - "learning_rate": 2.414481507397198e-06, - "loss": 0.1954, - "step": 16530 - }, - { - "epoch": 1.5572878641576975, - "grad_norm": 0.619922935962677, - "learning_rate": 2.4134976469334104e-06, - "loss": 0.1763, - "step": 16531 - }, - { - "epoch": 1.5573820682508654, - "grad_norm": 1.4710171222686768, - "learning_rate": 2.4125139594536117e-06, - "loss": 0.1836, - "step": 16532 - }, - { - "epoch": 1.5574762723440334, - "grad_norm": 0.6790376305580139, - "learning_rate": 2.4115304449802255e-06, - "loss": 0.2082, - "step": 16533 - }, - { - "epoch": 1.5575704764372011, - "grad_norm": 0.6932697296142578, - "learning_rate": 2.410547103535682e-06, - "loss": 0.2046, - "step": 16534 - }, - { - "epoch": 1.5576646805303689, - "grad_norm": 0.6530988812446594, - "learning_rate": 2.409563935142404e-06, - "loss": 0.2091, - "step": 16535 - }, - { - "epoch": 1.5577588846235368, - "grad_norm": 0.6402018666267395, - "learning_rate": 2.4085809398228045e-06, - "loss": 0.1887, - "step": 16536 - }, - { - "epoch": 1.5578530887167048, - "grad_norm": 0.6248202323913574, - "learning_rate": 2.407598117599299e-06, - "loss": 0.1871, - "step": 16537 - }, - { - "epoch": 1.5579472928098725, - "grad_norm": 0.6733875274658203, - "learning_rate": 2.4066154684943023e-06, - "loss": 0.206, - "step": 16538 - }, - { - "epoch": 1.5580414969030403, - "grad_norm": 0.7862675786018372, - "learning_rate": 2.4056329925302135e-06, - "loss": 0.1907, - "step": 16539 - }, - { - "epoch": 1.5581357009962082, - "grad_norm": 0.6684961915016174, - "learning_rate": 2.404650689729436e-06, - "loss": 0.1944, - "step": 16540 - }, - { - "epoch": 1.5582299050893762, - "grad_norm": 0.6752827167510986, - "learning_rate": 2.403668560114374e-06, - "loss": 0.2158, - "step": 16541 - }, - { - "epoch": 1.558324109182544, - "grad_norm": 0.5895867943763733, - "learning_rate": 2.4026866037074158e-06, - "loss": 0.149, - "step": 16542 - }, - { - "epoch": 1.5584183132757117, - "grad_norm": 0.6277616620063782, - "learning_rate": 2.401704820530947e-06, - "loss": 0.182, - "step": 16543 - }, - { - "epoch": 1.5585125173688796, - "grad_norm": 0.5878896713256836, - "learning_rate": 2.400723210607364e-06, - "loss": 0.2018, - "step": 16544 - }, - { - "epoch": 1.5586067214620476, - "grad_norm": 0.6188334226608276, - "learning_rate": 2.3997417739590457e-06, - "loss": 0.2131, - "step": 16545 - }, - { - "epoch": 1.5587009255552153, - "grad_norm": 0.7101216912269592, - "learning_rate": 2.3987605106083623e-06, - "loss": 0.226, - "step": 16546 - }, - { - "epoch": 1.558795129648383, - "grad_norm": 0.6682934165000916, - "learning_rate": 2.3977794205777015e-06, - "loss": 0.1793, - "step": 16547 - }, - { - "epoch": 1.558889333741551, - "grad_norm": 0.6556659936904907, - "learning_rate": 2.396798503889426e-06, - "loss": 0.1864, - "step": 16548 - }, - { - "epoch": 1.558983537834719, - "grad_norm": 0.5727357268333435, - "learning_rate": 2.3958177605658985e-06, - "loss": 0.1634, - "step": 16549 - }, - { - "epoch": 1.5590777419278867, - "grad_norm": 0.6460142731666565, - "learning_rate": 2.394837190629491e-06, - "loss": 0.1767, - "step": 16550 - }, - { - "epoch": 1.5591719460210545, - "grad_norm": 0.6470164656639099, - "learning_rate": 2.393856794102557e-06, - "loss": 0.182, - "step": 16551 - }, - { - "epoch": 1.5592661501142224, - "grad_norm": 0.6724783182144165, - "learning_rate": 2.3928765710074486e-06, - "loss": 0.1817, - "step": 16552 - }, - { - "epoch": 1.5593603542073904, - "grad_norm": 0.6727150082588196, - "learning_rate": 2.391896521366519e-06, - "loss": 0.191, - "step": 16553 - }, - { - "epoch": 1.5594545583005581, - "grad_norm": 0.6314756274223328, - "learning_rate": 2.390916645202118e-06, - "loss": 0.1558, - "step": 16554 - }, - { - "epoch": 1.5595487623937259, - "grad_norm": 0.63203364610672, - "learning_rate": 2.3899369425365824e-06, - "loss": 0.2149, - "step": 16555 - }, - { - "epoch": 1.5596429664868938, - "grad_norm": 0.7332870960235596, - "learning_rate": 2.3889574133922532e-06, - "loss": 0.2019, - "step": 16556 - }, - { - "epoch": 1.5597371705800618, - "grad_norm": 0.6748360395431519, - "learning_rate": 2.387978057791469e-06, - "loss": 0.1948, - "step": 16557 - }, - { - "epoch": 1.5598313746732295, - "grad_norm": 0.6236615777015686, - "learning_rate": 2.386998875756554e-06, - "loss": 0.1884, - "step": 16558 - }, - { - "epoch": 1.5599255787663973, - "grad_norm": 0.6263788938522339, - "learning_rate": 2.386019867309839e-06, - "loss": 0.1792, - "step": 16559 - }, - { - "epoch": 1.5600197828595652, - "grad_norm": 0.6550841927528381, - "learning_rate": 2.3850410324736496e-06, - "loss": 0.1821, - "step": 16560 - }, - { - "epoch": 1.5601139869527332, - "grad_norm": 0.7686969637870789, - "learning_rate": 2.384062371270297e-06, - "loss": 0.2161, - "step": 16561 - }, - { - "epoch": 1.560208191045901, - "grad_norm": 0.5874224305152893, - "learning_rate": 2.3830838837221047e-06, - "loss": 0.1629, - "step": 16562 - }, - { - "epoch": 1.5603023951390687, - "grad_norm": 0.6518653035163879, - "learning_rate": 2.3821055698513763e-06, - "loss": 0.2069, - "step": 16563 - }, - { - "epoch": 1.5603965992322366, - "grad_norm": 0.6342228055000305, - "learning_rate": 2.3811274296804222e-06, - "loss": 0.2176, - "step": 16564 - }, - { - "epoch": 1.5604908033254046, - "grad_norm": 0.770420253276825, - "learning_rate": 2.380149463231548e-06, - "loss": 0.218, - "step": 16565 - }, - { - "epoch": 1.5605850074185723, - "grad_norm": 0.6777490377426147, - "learning_rate": 2.3791716705270484e-06, - "loss": 0.1879, - "step": 16566 - }, - { - "epoch": 1.56067921151174, - "grad_norm": 0.6285408139228821, - "learning_rate": 2.378194051589222e-06, - "loss": 0.2169, - "step": 16567 - }, - { - "epoch": 1.560773415604908, - "grad_norm": 0.7333067655563354, - "learning_rate": 2.377216606440357e-06, - "loss": 0.2246, - "step": 16568 - }, - { - "epoch": 1.560867619698076, - "grad_norm": 0.7452640533447266, - "learning_rate": 2.3762393351027424e-06, - "loss": 0.2258, - "step": 16569 - }, - { - "epoch": 1.5609618237912437, - "grad_norm": 0.6964256763458252, - "learning_rate": 2.3752622375986635e-06, - "loss": 0.2068, - "step": 16570 - }, - { - "epoch": 1.5610560278844114, - "grad_norm": 0.7265715003013611, - "learning_rate": 2.374285313950394e-06, - "loss": 0.2081, - "step": 16571 - }, - { - "epoch": 1.5611502319775794, - "grad_norm": 0.7704083323478699, - "learning_rate": 2.3733085641802168e-06, - "loss": 0.2038, - "step": 16572 - }, - { - "epoch": 1.5612444360707474, - "grad_norm": 0.6363699436187744, - "learning_rate": 2.3723319883103958e-06, - "loss": 0.2074, - "step": 16573 - }, - { - "epoch": 1.561338640163915, - "grad_norm": 0.6404546499252319, - "learning_rate": 2.371355586363202e-06, - "loss": 0.1877, - "step": 16574 - }, - { - "epoch": 1.5614328442570828, - "grad_norm": 0.7770257592201233, - "learning_rate": 2.3703793583609013e-06, - "loss": 0.2003, - "step": 16575 - }, - { - "epoch": 1.5615270483502508, - "grad_norm": 0.6772834062576294, - "learning_rate": 2.369403304325748e-06, - "loss": 0.1796, - "step": 16576 - }, - { - "epoch": 1.5616212524434188, - "grad_norm": 0.6184030175209045, - "learning_rate": 2.368427424280001e-06, - "loss": 0.2002, - "step": 16577 - }, - { - "epoch": 1.5617154565365865, - "grad_norm": 0.614189088344574, - "learning_rate": 2.3674517182459133e-06, - "loss": 0.1788, - "step": 16578 - }, - { - "epoch": 1.5618096606297542, - "grad_norm": 0.6026573777198792, - "learning_rate": 2.3664761862457276e-06, - "loss": 0.174, - "step": 16579 - }, - { - "epoch": 1.5619038647229222, - "grad_norm": 0.6518381237983704, - "learning_rate": 2.365500828301691e-06, - "loss": 0.1868, - "step": 16580 - }, - { - "epoch": 1.5619980688160902, - "grad_norm": 0.7336990833282471, - "learning_rate": 2.3645256444360443e-06, - "loss": 0.2223, - "step": 16581 - }, - { - "epoch": 1.562092272909258, - "grad_norm": 0.6668913960456848, - "learning_rate": 2.3635506346710224e-06, - "loss": 0.1703, - "step": 16582 - }, - { - "epoch": 1.5621864770024256, - "grad_norm": 0.6120978593826294, - "learning_rate": 2.362575799028849e-06, - "loss": 0.1978, - "step": 16583 - }, - { - "epoch": 1.5622806810955936, - "grad_norm": 0.6925798654556274, - "learning_rate": 2.361601137531766e-06, - "loss": 0.1987, - "step": 16584 - }, - { - "epoch": 1.5623748851887616, - "grad_norm": 0.7289588451385498, - "learning_rate": 2.360626650201989e-06, - "loss": 0.1949, - "step": 16585 - }, - { - "epoch": 1.5624690892819293, - "grad_norm": 0.6873010396957397, - "learning_rate": 2.359652337061734e-06, - "loss": 0.2177, - "step": 16586 - }, - { - "epoch": 1.562563293375097, - "grad_norm": 0.6446362733840942, - "learning_rate": 2.3586781981332276e-06, - "loss": 0.2019, - "step": 16587 - }, - { - "epoch": 1.562657497468265, - "grad_norm": 0.8122641444206238, - "learning_rate": 2.3577042334386744e-06, - "loss": 0.2087, - "step": 16588 - }, - { - "epoch": 1.562751701561433, - "grad_norm": 0.5987395644187927, - "learning_rate": 2.356730443000279e-06, - "loss": 0.209, - "step": 16589 - }, - { - "epoch": 1.5628459056546007, - "grad_norm": 0.6892624497413635, - "learning_rate": 2.3557568268402565e-06, - "loss": 0.1956, - "step": 16590 - }, - { - "epoch": 1.5629401097477684, - "grad_norm": 0.6857694387435913, - "learning_rate": 2.3547833849807987e-06, - "loss": 0.2126, - "step": 16591 - }, - { - "epoch": 1.5630343138409364, - "grad_norm": 0.634675920009613, - "learning_rate": 2.353810117444102e-06, - "loss": 0.2062, - "step": 16592 - }, - { - "epoch": 1.5631285179341043, - "grad_norm": 0.7113621830940247, - "learning_rate": 2.352837024252359e-06, - "loss": 0.2034, - "step": 16593 - }, - { - "epoch": 1.563222722027272, - "grad_norm": 0.6645920872688293, - "learning_rate": 2.35186410542776e-06, - "loss": 0.1701, - "step": 16594 - }, - { - "epoch": 1.5633169261204398, - "grad_norm": 0.6594851016998291, - "learning_rate": 2.3508913609924865e-06, - "loss": 0.1685, - "step": 16595 - }, - { - "epoch": 1.5634111302136078, - "grad_norm": 0.6979818940162659, - "learning_rate": 2.3499187909687193e-06, - "loss": 0.2235, - "step": 16596 - }, - { - "epoch": 1.5635053343067757, - "grad_norm": 0.6058693528175354, - "learning_rate": 2.348946395378637e-06, - "loss": 0.1942, - "step": 16597 - }, - { - "epoch": 1.5635995383999435, - "grad_norm": 0.6438829302787781, - "learning_rate": 2.347974174244406e-06, - "loss": 0.18, - "step": 16598 - }, - { - "epoch": 1.5636937424931112, - "grad_norm": 0.5994290113449097, - "learning_rate": 2.3470021275881995e-06, - "loss": 0.1753, - "step": 16599 - }, - { - "epoch": 1.5637879465862792, - "grad_norm": 0.6323649883270264, - "learning_rate": 2.3460302554321823e-06, - "loss": 0.1891, - "step": 16600 - }, - { - "epoch": 1.5638821506794471, - "grad_norm": 0.6612114906311035, - "learning_rate": 2.3450585577985087e-06, - "loss": 0.1971, - "step": 16601 - }, - { - "epoch": 1.5639763547726149, - "grad_norm": 0.7276307940483093, - "learning_rate": 2.3440870347093426e-06, - "loss": 0.1897, - "step": 16602 - }, - { - "epoch": 1.5640705588657826, - "grad_norm": 0.6305508017539978, - "learning_rate": 2.3431156861868288e-06, - "loss": 0.1801, - "step": 16603 - }, - { - "epoch": 1.5641647629589506, - "grad_norm": 0.6388223171234131, - "learning_rate": 2.3421445122531215e-06, - "loss": 0.1893, - "step": 16604 - }, - { - "epoch": 1.5642589670521185, - "grad_norm": 0.6965458989143372, - "learning_rate": 2.3411735129303604e-06, - "loss": 0.1973, - "step": 16605 - }, - { - "epoch": 1.5643531711452863, - "grad_norm": 0.6546480655670166, - "learning_rate": 2.340202688240687e-06, - "loss": 0.1759, - "step": 16606 - }, - { - "epoch": 1.564447375238454, - "grad_norm": 0.6175102591514587, - "learning_rate": 2.339232038206242e-06, - "loss": 0.1713, - "step": 16607 - }, - { - "epoch": 1.564541579331622, - "grad_norm": 0.7065578699111938, - "learning_rate": 2.33826156284915e-06, - "loss": 0.2033, - "step": 16608 - }, - { - "epoch": 1.56463578342479, - "grad_norm": 0.6182644963264465, - "learning_rate": 2.3372912621915445e-06, - "loss": 0.186, - "step": 16609 - }, - { - "epoch": 1.5647299875179577, - "grad_norm": 0.7014366388320923, - "learning_rate": 2.3363211362555515e-06, - "loss": 0.2152, - "step": 16610 - }, - { - "epoch": 1.5648241916111254, - "grad_norm": 0.7184340953826904, - "learning_rate": 2.335351185063286e-06, - "loss": 0.2095, - "step": 16611 - }, - { - "epoch": 1.5649183957042934, - "grad_norm": 0.677432656288147, - "learning_rate": 2.33438140863687e-06, - "loss": 0.2079, - "step": 16612 - }, - { - "epoch": 1.5650125997974613, - "grad_norm": 0.6766159534454346, - "learning_rate": 2.3334118069984102e-06, - "loss": 0.2053, - "step": 16613 - }, - { - "epoch": 1.565106803890629, - "grad_norm": 0.702167809009552, - "learning_rate": 2.3324423801700168e-06, - "loss": 0.1925, - "step": 16614 - }, - { - "epoch": 1.5652010079837968, - "grad_norm": 0.6005187630653381, - "learning_rate": 2.331473128173799e-06, - "loss": 0.1561, - "step": 16615 - }, - { - "epoch": 1.5652952120769648, - "grad_norm": 0.6294963359832764, - "learning_rate": 2.330504051031851e-06, - "loss": 0.1808, - "step": 16616 - }, - { - "epoch": 1.5653894161701327, - "grad_norm": 0.6566830277442932, - "learning_rate": 2.329535148766271e-06, - "loss": 0.1842, - "step": 16617 - }, - { - "epoch": 1.5654836202633005, - "grad_norm": 0.6712966561317444, - "learning_rate": 2.3285664213991555e-06, - "loss": 0.2009, - "step": 16618 - }, - { - "epoch": 1.5655778243564682, - "grad_norm": 0.6773460507392883, - "learning_rate": 2.327597868952587e-06, - "loss": 0.1939, - "step": 16619 - }, - { - "epoch": 1.5656720284496362, - "grad_norm": 0.6457515358924866, - "learning_rate": 2.326629491448652e-06, - "loss": 0.1829, - "step": 16620 - }, - { - "epoch": 1.5657662325428041, - "grad_norm": 0.6580275297164917, - "learning_rate": 2.3256612889094345e-06, - "loss": 0.213, - "step": 16621 - }, - { - "epoch": 1.5658604366359719, - "grad_norm": 0.5753109455108643, - "learning_rate": 2.3246932613570083e-06, - "loss": 0.1819, - "step": 16622 - }, - { - "epoch": 1.5659546407291396, - "grad_norm": 0.611003577709198, - "learning_rate": 2.323725408813441e-06, - "loss": 0.1751, - "step": 16623 - }, - { - "epoch": 1.5660488448223076, - "grad_norm": 0.6628442406654358, - "learning_rate": 2.322757731300811e-06, - "loss": 0.1938, - "step": 16624 - }, - { - "epoch": 1.5661430489154755, - "grad_norm": 0.6529707312583923, - "learning_rate": 2.3217902288411775e-06, - "loss": 0.2061, - "step": 16625 - }, - { - "epoch": 1.5662372530086432, - "grad_norm": 0.6242614388465881, - "learning_rate": 2.3208229014565963e-06, - "loss": 0.1691, - "step": 16626 - }, - { - "epoch": 1.566331457101811, - "grad_norm": 0.7480343580245972, - "learning_rate": 2.319855749169134e-06, - "loss": 0.26, - "step": 16627 - }, - { - "epoch": 1.566425661194979, - "grad_norm": 0.620380163192749, - "learning_rate": 2.318888772000839e-06, - "loss": 0.1691, - "step": 16628 - }, - { - "epoch": 1.566519865288147, - "grad_norm": 1.880743145942688, - "learning_rate": 2.3179219699737553e-06, - "loss": 0.1984, - "step": 16629 - }, - { - "epoch": 1.5666140693813146, - "grad_norm": 0.6099167466163635, - "learning_rate": 2.3169553431099325e-06, - "loss": 0.1953, - "step": 16630 - }, - { - "epoch": 1.5667082734744824, - "grad_norm": 1.4367554187774658, - "learning_rate": 2.315988891431412e-06, - "loss": 0.1781, - "step": 16631 - }, - { - "epoch": 1.5668024775676503, - "grad_norm": 0.6985436677932739, - "learning_rate": 2.315022614960225e-06, - "loss": 0.1975, - "step": 16632 - }, - { - "epoch": 1.5668966816608183, - "grad_norm": 0.730664074420929, - "learning_rate": 2.314056513718409e-06, - "loss": 0.2278, - "step": 16633 - }, - { - "epoch": 1.566990885753986, - "grad_norm": 0.5613934397697449, - "learning_rate": 2.313090587727992e-06, - "loss": 0.1831, - "step": 16634 - }, - { - "epoch": 1.5670850898471538, - "grad_norm": 0.7184987664222717, - "learning_rate": 2.3121248370109962e-06, - "loss": 0.2269, - "step": 16635 - }, - { - "epoch": 1.5671792939403217, - "grad_norm": 0.6556032299995422, - "learning_rate": 2.311159261589443e-06, - "loss": 0.1993, - "step": 16636 - }, - { - "epoch": 1.5672734980334897, - "grad_norm": 0.5733645558357239, - "learning_rate": 2.3101938614853524e-06, - "loss": 0.1839, - "step": 16637 - }, - { - "epoch": 1.5673677021266574, - "grad_norm": 0.6518971920013428, - "learning_rate": 2.309228636720732e-06, - "loss": 0.1738, - "step": 16638 - }, - { - "epoch": 1.5674619062198252, - "grad_norm": 0.6515443325042725, - "learning_rate": 2.3082635873175918e-06, - "loss": 0.1927, - "step": 16639 - }, - { - "epoch": 1.5675561103129931, - "grad_norm": 0.6167797446250916, - "learning_rate": 2.3072987132979407e-06, - "loss": 0.1726, - "step": 16640 - }, - { - "epoch": 1.567650314406161, - "grad_norm": 0.6479583382606506, - "learning_rate": 2.306334014683773e-06, - "loss": 0.1974, - "step": 16641 - }, - { - "epoch": 1.5677445184993288, - "grad_norm": 0.6952350735664368, - "learning_rate": 2.3053694914970914e-06, - "loss": 0.2245, - "step": 16642 - }, - { - "epoch": 1.5678387225924966, - "grad_norm": 0.6067778468132019, - "learning_rate": 2.3044051437598826e-06, - "loss": 0.1957, - "step": 16643 - }, - { - "epoch": 1.5679329266856645, - "grad_norm": 0.631545901298523, - "learning_rate": 2.30344097149414e-06, - "loss": 0.195, - "step": 16644 - }, - { - "epoch": 1.5680271307788325, - "grad_norm": 0.6462365984916687, - "learning_rate": 2.3024769747218433e-06, - "loss": 0.1954, - "step": 16645 - }, - { - "epoch": 1.5681213348720002, - "grad_norm": 0.7221664786338806, - "learning_rate": 2.301513153464977e-06, - "loss": 0.1836, - "step": 16646 - }, - { - "epoch": 1.568215538965168, - "grad_norm": 0.7191948294639587, - "learning_rate": 2.30054950774552e-06, - "loss": 0.2139, - "step": 16647 - }, - { - "epoch": 1.568309743058336, - "grad_norm": 0.7464423179626465, - "learning_rate": 2.2995860375854374e-06, - "loss": 0.2296, - "step": 16648 - }, - { - "epoch": 1.5684039471515039, - "grad_norm": 0.6501345038414001, - "learning_rate": 2.298622743006702e-06, - "loss": 0.2043, - "step": 16649 - }, - { - "epoch": 1.5684981512446716, - "grad_norm": 0.6614580154418945, - "learning_rate": 2.297659624031282e-06, - "loss": 0.196, - "step": 16650 - }, - { - "epoch": 1.5685923553378394, - "grad_norm": 0.6374590396881104, - "learning_rate": 2.296696680681132e-06, - "loss": 0.1881, - "step": 16651 - }, - { - "epoch": 1.5686865594310073, - "grad_norm": 0.6366037130355835, - "learning_rate": 2.295733912978213e-06, - "loss": 0.197, - "step": 16652 - }, - { - "epoch": 1.5687807635241753, - "grad_norm": 0.6388632655143738, - "learning_rate": 2.2947713209444733e-06, - "loss": 0.1718, - "step": 16653 - }, - { - "epoch": 1.568874967617343, - "grad_norm": 0.6700445413589478, - "learning_rate": 2.293808904601864e-06, - "loss": 0.1991, - "step": 16654 - }, - { - "epoch": 1.5689691717105108, - "grad_norm": 0.6413102746009827, - "learning_rate": 2.2928466639723323e-06, - "loss": 0.2413, - "step": 16655 - }, - { - "epoch": 1.5690633758036787, - "grad_norm": 0.8119991421699524, - "learning_rate": 2.2918845990778137e-06, - "loss": 0.245, - "step": 16656 - }, - { - "epoch": 1.5691575798968467, - "grad_norm": 0.5904214978218079, - "learning_rate": 2.2909227099402464e-06, - "loss": 0.1865, - "step": 16657 - }, - { - "epoch": 1.5692517839900144, - "grad_norm": 0.5855529308319092, - "learning_rate": 2.289960996581566e-06, - "loss": 0.1677, - "step": 16658 - }, - { - "epoch": 1.5693459880831822, - "grad_norm": 0.6935964822769165, - "learning_rate": 2.2889994590236965e-06, - "loss": 0.2137, - "step": 16659 - }, - { - "epoch": 1.5694401921763501, - "grad_norm": 0.6831810474395752, - "learning_rate": 2.288038097288564e-06, - "loss": 0.2173, - "step": 16660 - }, - { - "epoch": 1.569534396269518, - "grad_norm": 0.7017054557800293, - "learning_rate": 2.287076911398094e-06, - "loss": 0.2038, - "step": 16661 - }, - { - "epoch": 1.5696286003626858, - "grad_norm": 0.7106721997261047, - "learning_rate": 2.2861159013741975e-06, - "loss": 0.1934, - "step": 16662 - }, - { - "epoch": 1.5697228044558535, - "grad_norm": 0.7033999562263489, - "learning_rate": 2.2851550672387823e-06, - "loss": 0.2187, - "step": 16663 - }, - { - "epoch": 1.5698170085490215, - "grad_norm": 0.670293390750885, - "learning_rate": 2.284194409013769e-06, - "loss": 0.1615, - "step": 16664 - }, - { - "epoch": 1.5699112126421895, - "grad_norm": 0.645819365978241, - "learning_rate": 2.283233926721056e-06, - "loss": 0.1692, - "step": 16665 - }, - { - "epoch": 1.5700054167353572, - "grad_norm": 0.6455112099647522, - "learning_rate": 2.282273620382538e-06, - "loss": 0.2016, - "step": 16666 - }, - { - "epoch": 1.570099620828525, - "grad_norm": 0.6257402896881104, - "learning_rate": 2.281313490020122e-06, - "loss": 0.1738, - "step": 16667 - }, - { - "epoch": 1.570193824921693, - "grad_norm": 0.6529598236083984, - "learning_rate": 2.280353535655696e-06, - "loss": 0.1819, - "step": 16668 - }, - { - "epoch": 1.5702880290148606, - "grad_norm": 0.6802598834037781, - "learning_rate": 2.279393757311146e-06, - "loss": 0.1957, - "step": 16669 - }, - { - "epoch": 1.5703822331080284, - "grad_norm": 0.7839449048042297, - "learning_rate": 2.2784341550083577e-06, - "loss": 0.2185, - "step": 16670 - }, - { - "epoch": 1.5704764372011963, - "grad_norm": 0.7309479117393494, - "learning_rate": 2.2774747287692156e-06, - "loss": 0.2451, - "step": 16671 - }, - { - "epoch": 1.5705706412943643, - "grad_norm": 0.6225112080574036, - "learning_rate": 2.27651547861559e-06, - "loss": 0.2154, - "step": 16672 - }, - { - "epoch": 1.570664845387532, - "grad_norm": 0.652088463306427, - "learning_rate": 2.2755564045693558e-06, - "loss": 0.1877, - "step": 16673 - }, - { - "epoch": 1.5707590494806998, - "grad_norm": 0.6621286273002625, - "learning_rate": 2.274597506652384e-06, - "loss": 0.2105, - "step": 16674 - }, - { - "epoch": 1.5708532535738677, - "grad_norm": 0.8194100856781006, - "learning_rate": 2.2736387848865337e-06, - "loss": 0.204, - "step": 16675 - }, - { - "epoch": 1.5709474576670357, - "grad_norm": 0.6432061195373535, - "learning_rate": 2.2726802392936687e-06, - "loss": 0.1925, - "step": 16676 - }, - { - "epoch": 1.5710416617602034, - "grad_norm": 0.649989902973175, - "learning_rate": 2.2717218698956478e-06, - "loss": 0.185, - "step": 16677 - }, - { - "epoch": 1.5711358658533712, - "grad_norm": 0.6618895530700684, - "learning_rate": 2.270763676714317e-06, - "loss": 0.2027, - "step": 16678 - }, - { - "epoch": 1.5712300699465391, - "grad_norm": 0.729942262172699, - "learning_rate": 2.269805659771529e-06, - "loss": 0.18, - "step": 16679 - }, - { - "epoch": 1.571324274039707, - "grad_norm": 0.6783851981163025, - "learning_rate": 2.2688478190891283e-06, - "loss": 0.1899, - "step": 16680 - }, - { - "epoch": 1.5714184781328748, - "grad_norm": 0.5861131548881531, - "learning_rate": 2.267890154688952e-06, - "loss": 0.1833, - "step": 16681 - }, - { - "epoch": 1.5715126822260426, - "grad_norm": 0.6407277584075928, - "learning_rate": 2.2669326665928404e-06, - "loss": 0.2267, - "step": 16682 - }, - { - "epoch": 1.5716068863192105, - "grad_norm": 0.6662833094596863, - "learning_rate": 2.2659753548226203e-06, - "loss": 0.21, - "step": 16683 - }, - { - "epoch": 1.5717010904123785, - "grad_norm": 0.688582181930542, - "learning_rate": 2.265018219400127e-06, - "loss": 0.1982, - "step": 16684 - }, - { - "epoch": 1.5717952945055462, - "grad_norm": 0.6786016821861267, - "learning_rate": 2.264061260347177e-06, - "loss": 0.2037, - "step": 16685 - }, - { - "epoch": 1.571889498598714, - "grad_norm": 0.6584218144416809, - "learning_rate": 2.2631044776855936e-06, - "loss": 0.1759, - "step": 16686 - }, - { - "epoch": 1.571983702691882, - "grad_norm": 0.616054892539978, - "learning_rate": 2.262147871437197e-06, - "loss": 0.1661, - "step": 16687 - }, - { - "epoch": 1.5720779067850499, - "grad_norm": 0.6182340383529663, - "learning_rate": 2.261191441623792e-06, - "loss": 0.1773, - "step": 16688 - }, - { - "epoch": 1.5721721108782176, - "grad_norm": 0.6300203800201416, - "learning_rate": 2.2602351882671925e-06, - "loss": 0.2037, - "step": 16689 - }, - { - "epoch": 1.5722663149713854, - "grad_norm": 0.6373847723007202, - "learning_rate": 2.259279111389201e-06, - "loss": 0.1848, - "step": 16690 - }, - { - "epoch": 1.5723605190645533, - "grad_norm": 0.6501670479774475, - "learning_rate": 2.258323211011616e-06, - "loss": 0.2288, - "step": 16691 - }, - { - "epoch": 1.5724547231577213, - "grad_norm": 0.5920819044113159, - "learning_rate": 2.2573674871562357e-06, - "loss": 0.2012, - "step": 16692 - }, - { - "epoch": 1.572548927250889, - "grad_norm": 0.631029486656189, - "learning_rate": 2.256411939844849e-06, - "loss": 0.1904, - "step": 16693 - }, - { - "epoch": 1.5726431313440568, - "grad_norm": 0.6770601272583008, - "learning_rate": 2.2554565690992457e-06, - "loss": 0.194, - "step": 16694 - }, - { - "epoch": 1.5727373354372247, - "grad_norm": 0.7404630184173584, - "learning_rate": 2.2545013749412127e-06, - "loss": 0.2271, - "step": 16695 - }, - { - "epoch": 1.5728315395303927, - "grad_norm": 0.6604426503181458, - "learning_rate": 2.2535463573925244e-06, - "loss": 0.2113, - "step": 16696 - }, - { - "epoch": 1.5729257436235604, - "grad_norm": 0.6434115767478943, - "learning_rate": 2.252591516474959e-06, - "loss": 0.1751, - "step": 16697 - }, - { - "epoch": 1.5730199477167282, - "grad_norm": 0.6137346625328064, - "learning_rate": 2.2516368522102916e-06, - "loss": 0.1905, - "step": 16698 - }, - { - "epoch": 1.573114151809896, - "grad_norm": 0.650473952293396, - "learning_rate": 2.2506823646202834e-06, - "loss": 0.1753, - "step": 16699 - }, - { - "epoch": 1.573208355903064, - "grad_norm": 0.6308495998382568, - "learning_rate": 2.249728053726703e-06, - "loss": 0.1651, - "step": 16700 - }, - { - "epoch": 1.5733025599962318, - "grad_norm": 0.6628783941268921, - "learning_rate": 2.248773919551311e-06, - "loss": 0.1875, - "step": 16701 - }, - { - "epoch": 1.5733967640893995, - "grad_norm": 0.651411771774292, - "learning_rate": 2.2478199621158615e-06, - "loss": 0.1967, - "step": 16702 - }, - { - "epoch": 1.5734909681825675, - "grad_norm": 0.6001507639884949, - "learning_rate": 2.2468661814421e-06, - "loss": 0.1814, - "step": 16703 - }, - { - "epoch": 1.5735851722757355, - "grad_norm": 0.6134946942329407, - "learning_rate": 2.2459125775517854e-06, - "loss": 0.1804, - "step": 16704 - }, - { - "epoch": 1.5736793763689032, - "grad_norm": 0.6726778149604797, - "learning_rate": 2.2449591504666566e-06, - "loss": 0.2289, - "step": 16705 - }, - { - "epoch": 1.573773580462071, - "grad_norm": 0.5789441466331482, - "learning_rate": 2.244005900208447e-06, - "loss": 0.158, - "step": 16706 - }, - { - "epoch": 1.573867784555239, - "grad_norm": 0.60833340883255, - "learning_rate": 2.2430528267989028e-06, - "loss": 0.1838, - "step": 16707 - }, - { - "epoch": 1.5739619886484069, - "grad_norm": 0.6395013928413391, - "learning_rate": 2.242099930259751e-06, - "loss": 0.22, - "step": 16708 - }, - { - "epoch": 1.5740561927415746, - "grad_norm": 0.5981082320213318, - "learning_rate": 2.2411472106127152e-06, - "loss": 0.1982, - "step": 16709 - }, - { - "epoch": 1.5741503968347423, - "grad_norm": 0.6421570777893066, - "learning_rate": 2.2401946678795226e-06, - "loss": 0.2067, - "step": 16710 - }, - { - "epoch": 1.5742446009279103, - "grad_norm": 0.6160954236984253, - "learning_rate": 2.2392423020818954e-06, - "loss": 0.1701, - "step": 16711 - }, - { - "epoch": 1.5743388050210783, - "grad_norm": 0.6442878246307373, - "learning_rate": 2.2382901132415423e-06, - "loss": 0.2005, - "step": 16712 - }, - { - "epoch": 1.574433009114246, - "grad_norm": 0.7537941336631775, - "learning_rate": 2.23733810138018e-06, - "loss": 0.1967, - "step": 16713 - }, - { - "epoch": 1.5745272132074137, - "grad_norm": 0.6588831543922424, - "learning_rate": 2.2363862665195156e-06, - "loss": 0.2002, - "step": 16714 - }, - { - "epoch": 1.5746214173005817, - "grad_norm": 0.6572132706642151, - "learning_rate": 2.2354346086812483e-06, - "loss": 0.1875, - "step": 16715 - }, - { - "epoch": 1.5747156213937497, - "grad_norm": 0.6329585909843445, - "learning_rate": 2.2344831278870805e-06, - "loss": 0.1905, - "step": 16716 - }, - { - "epoch": 1.5748098254869174, - "grad_norm": 0.5940223336219788, - "learning_rate": 2.23353182415871e-06, - "loss": 0.1749, - "step": 16717 - }, - { - "epoch": 1.5749040295800851, - "grad_norm": 0.7670513987541199, - "learning_rate": 2.232580697517821e-06, - "loss": 0.1851, - "step": 16718 - }, - { - "epoch": 1.574998233673253, - "grad_norm": 0.7109354138374329, - "learning_rate": 2.231629747986106e-06, - "loss": 0.187, - "step": 16719 - }, - { - "epoch": 1.575092437766421, - "grad_norm": 0.6600790619850159, - "learning_rate": 2.2306789755852487e-06, - "loss": 0.2, - "step": 16720 - }, - { - "epoch": 1.5751866418595888, - "grad_norm": 0.765315055847168, - "learning_rate": 2.2297283803369265e-06, - "loss": 0.2186, - "step": 16721 - }, - { - "epoch": 1.5752808459527565, - "grad_norm": 0.5743632316589355, - "learning_rate": 2.2287779622628115e-06, - "loss": 0.1774, - "step": 16722 - }, - { - "epoch": 1.5753750500459245, - "grad_norm": 0.6753613948822021, - "learning_rate": 2.227827721384577e-06, - "loss": 0.2228, - "step": 16723 - }, - { - "epoch": 1.5754692541390924, - "grad_norm": 0.6676024794578552, - "learning_rate": 2.226877657723894e-06, - "loss": 0.1736, - "step": 16724 - }, - { - "epoch": 1.5755634582322602, - "grad_norm": 0.7018056511878967, - "learning_rate": 2.2259277713024176e-06, - "loss": 0.199, - "step": 16725 - }, - { - "epoch": 1.575657662325428, - "grad_norm": 0.6563654541969299, - "learning_rate": 2.2249780621418117e-06, - "loss": 0.2033, - "step": 16726 - }, - { - "epoch": 1.5757518664185959, - "grad_norm": 0.6973061561584473, - "learning_rate": 2.224028530263733e-06, - "loss": 0.1934, - "step": 16727 - }, - { - "epoch": 1.5758460705117638, - "grad_norm": 0.6676140427589417, - "learning_rate": 2.2230791756898273e-06, - "loss": 0.2212, - "step": 16728 - }, - { - "epoch": 1.5759402746049316, - "grad_norm": 0.613617479801178, - "learning_rate": 2.2221299984417434e-06, - "loss": 0.1762, - "step": 16729 - }, - { - "epoch": 1.5760344786980993, - "grad_norm": 0.6599076986312866, - "learning_rate": 2.221180998541126e-06, - "loss": 0.1748, - "step": 16730 - }, - { - "epoch": 1.5761286827912673, - "grad_norm": 0.6179454326629639, - "learning_rate": 2.22023217600961e-06, - "loss": 0.1827, - "step": 16731 - }, - { - "epoch": 1.5762228868844352, - "grad_norm": 0.6900299787521362, - "learning_rate": 2.2192835308688354e-06, - "loss": 0.2123, - "step": 16732 - }, - { - "epoch": 1.576317090977603, - "grad_norm": 0.6685370206832886, - "learning_rate": 2.2183350631404256e-06, - "loss": 0.194, - "step": 16733 - }, - { - "epoch": 1.5764112950707707, - "grad_norm": 0.6364673972129822, - "learning_rate": 2.217386772846012e-06, - "loss": 0.1748, - "step": 16734 - }, - { - "epoch": 1.5765054991639387, - "grad_norm": 0.6273661255836487, - "learning_rate": 2.2164386600072173e-06, - "loss": 0.1937, - "step": 16735 - }, - { - "epoch": 1.5765997032571066, - "grad_norm": 0.5692266821861267, - "learning_rate": 2.215490724645657e-06, - "loss": 0.1678, - "step": 16736 - }, - { - "epoch": 1.5766939073502744, - "grad_norm": 0.6795256733894348, - "learning_rate": 2.2145429667829464e-06, - "loss": 0.1849, - "step": 16737 - }, - { - "epoch": 1.576788111443442, - "grad_norm": 0.6403346061706543, - "learning_rate": 2.2135953864407e-06, - "loss": 0.1934, - "step": 16738 - }, - { - "epoch": 1.57688231553661, - "grad_norm": 0.6476584672927856, - "learning_rate": 2.2126479836405177e-06, - "loss": 0.2095, - "step": 16739 - }, - { - "epoch": 1.576976519629778, - "grad_norm": 0.6238870620727539, - "learning_rate": 2.211700758404005e-06, - "loss": 0.1858, - "step": 16740 - }, - { - "epoch": 1.5770707237229458, - "grad_norm": 0.6293942928314209, - "learning_rate": 2.2107537107527633e-06, - "loss": 0.1936, - "step": 16741 - }, - { - "epoch": 1.5771649278161135, - "grad_norm": 0.6041991114616394, - "learning_rate": 2.2098068407083815e-06, - "loss": 0.1931, - "step": 16742 - }, - { - "epoch": 1.5772591319092815, - "grad_norm": 0.6643226146697998, - "learning_rate": 2.208860148292448e-06, - "loss": 0.1961, - "step": 16743 - }, - { - "epoch": 1.5773533360024494, - "grad_norm": 0.6403629779815674, - "learning_rate": 2.207913633526556e-06, - "loss": 0.1793, - "step": 16744 - }, - { - "epoch": 1.5774475400956172, - "grad_norm": 0.6918803453445435, - "learning_rate": 2.206967296432285e-06, - "loss": 0.1982, - "step": 16745 - }, - { - "epoch": 1.577541744188785, - "grad_norm": 0.6899217367172241, - "learning_rate": 2.2060211370312077e-06, - "loss": 0.2132, - "step": 16746 - }, - { - "epoch": 1.5776359482819529, - "grad_norm": 0.6735967993736267, - "learning_rate": 2.2050751553449034e-06, - "loss": 0.1941, - "step": 16747 - }, - { - "epoch": 1.5777301523751208, - "grad_norm": 0.5775204300880432, - "learning_rate": 2.2041293513949436e-06, - "loss": 0.186, - "step": 16748 - }, - { - "epoch": 1.5778243564682886, - "grad_norm": 0.6950227618217468, - "learning_rate": 2.2031837252028878e-06, - "loss": 0.2158, - "step": 16749 - }, - { - "epoch": 1.5779185605614563, - "grad_norm": 0.6783015131950378, - "learning_rate": 2.2022382767903006e-06, - "loss": 0.2047, - "step": 16750 - }, - { - "epoch": 1.5780127646546243, - "grad_norm": 0.7202771306037903, - "learning_rate": 2.201293006178744e-06, - "loss": 0.2242, - "step": 16751 - }, - { - "epoch": 1.5781069687477922, - "grad_norm": 0.6268263459205627, - "learning_rate": 2.2003479133897643e-06, - "loss": 0.1812, - "step": 16752 - }, - { - "epoch": 1.57820117284096, - "grad_norm": 0.6736927628517151, - "learning_rate": 2.199402998444915e-06, - "loss": 0.2153, - "step": 16753 - }, - { - "epoch": 1.5782953769341277, - "grad_norm": 0.5757262110710144, - "learning_rate": 2.1984582613657436e-06, - "loss": 0.1706, - "step": 16754 - }, - { - "epoch": 1.5783895810272957, - "grad_norm": 0.6886053085327148, - "learning_rate": 2.197513702173786e-06, - "loss": 0.1889, - "step": 16755 - }, - { - "epoch": 1.5784837851204636, - "grad_norm": 0.7031805515289307, - "learning_rate": 2.1965693208905835e-06, - "loss": 0.2053, - "step": 16756 - }, - { - "epoch": 1.5785779892136313, - "grad_norm": 0.6282926797866821, - "learning_rate": 2.195625117537671e-06, - "loss": 0.1808, - "step": 16757 - }, - { - "epoch": 1.578672193306799, - "grad_norm": 0.7353420257568359, - "learning_rate": 2.1946810921365734e-06, - "loss": 0.2176, - "step": 16758 - }, - { - "epoch": 1.578766397399967, - "grad_norm": 0.6532454490661621, - "learning_rate": 2.1937372447088178e-06, - "loss": 0.1755, - "step": 16759 - }, - { - "epoch": 1.578860601493135, - "grad_norm": 0.6954646706581116, - "learning_rate": 2.192793575275928e-06, - "loss": 0.189, - "step": 16760 - }, - { - "epoch": 1.5789548055863027, - "grad_norm": 0.6403496861457825, - "learning_rate": 2.191850083859419e-06, - "loss": 0.1635, - "step": 16761 - }, - { - "epoch": 1.5790490096794705, - "grad_norm": 0.6036338210105896, - "learning_rate": 2.1909067704808007e-06, - "loss": 0.2017, - "step": 16762 - }, - { - "epoch": 1.5791432137726384, - "grad_norm": 0.6709696054458618, - "learning_rate": 2.1899636351615848e-06, - "loss": 0.1925, - "step": 16763 - }, - { - "epoch": 1.5792374178658064, - "grad_norm": 0.6412215828895569, - "learning_rate": 2.1890206779232804e-06, - "loss": 0.1981, - "step": 16764 - }, - { - "epoch": 1.5793316219589741, - "grad_norm": 0.6793758273124695, - "learning_rate": 2.1880778987873806e-06, - "loss": 0.2149, - "step": 16765 - }, - { - "epoch": 1.5794258260521419, - "grad_norm": 0.5914242267608643, - "learning_rate": 2.187135297775387e-06, - "loss": 0.1934, - "step": 16766 - }, - { - "epoch": 1.5795200301453098, - "grad_norm": 0.7470661401748657, - "learning_rate": 2.1861928749087936e-06, - "loss": 0.2182, - "step": 16767 - }, - { - "epoch": 1.5796142342384778, - "grad_norm": 0.6642493605613708, - "learning_rate": 2.1852506302090836e-06, - "loss": 0.1639, - "step": 16768 - }, - { - "epoch": 1.5797084383316455, - "grad_norm": 0.6759933233261108, - "learning_rate": 2.1843085636977458e-06, - "loss": 0.2289, - "step": 16769 - }, - { - "epoch": 1.5798026424248133, - "grad_norm": 0.6871334314346313, - "learning_rate": 2.1833666753962624e-06, - "loss": 0.1991, - "step": 16770 - }, - { - "epoch": 1.5798968465179812, - "grad_norm": 0.7563052773475647, - "learning_rate": 2.182424965326104e-06, - "loss": 0.2207, - "step": 16771 - }, - { - "epoch": 1.5799910506111492, - "grad_norm": 0.6464877128601074, - "learning_rate": 2.181483433508751e-06, - "loss": 0.1827, - "step": 16772 - }, - { - "epoch": 1.580085254704317, - "grad_norm": 0.6580135226249695, - "learning_rate": 2.180542079965663e-06, - "loss": 0.1866, - "step": 16773 - }, - { - "epoch": 1.5801794587974847, - "grad_norm": 0.6818664073944092, - "learning_rate": 2.17960090471831e-06, - "loss": 0.1923, - "step": 16774 - }, - { - "epoch": 1.5802736628906526, - "grad_norm": 0.6275060176849365, - "learning_rate": 2.1786599077881522e-06, - "loss": 0.2035, - "step": 16775 - }, - { - "epoch": 1.5803678669838206, - "grad_norm": 0.7090134024620056, - "learning_rate": 2.1777190891966425e-06, - "loss": 0.1996, - "step": 16776 - }, - { - "epoch": 1.5804620710769883, - "grad_norm": 0.738533079624176, - "learning_rate": 2.1767784489652345e-06, - "loss": 0.2026, - "step": 16777 - }, - { - "epoch": 1.580556275170156, - "grad_norm": 0.6837629675865173, - "learning_rate": 2.175837987115379e-06, - "loss": 0.2036, - "step": 16778 - }, - { - "epoch": 1.580650479263324, - "grad_norm": 0.6072348356246948, - "learning_rate": 2.174897703668516e-06, - "loss": 0.1913, - "step": 16779 - }, - { - "epoch": 1.580744683356492, - "grad_norm": 0.684169352054596, - "learning_rate": 2.173957598646087e-06, - "loss": 0.2253, - "step": 16780 - }, - { - "epoch": 1.5808388874496597, - "grad_norm": 0.6403519511222839, - "learning_rate": 2.1730176720695307e-06, - "loss": 0.2062, - "step": 16781 - }, - { - "epoch": 1.5809330915428275, - "grad_norm": 0.6331693530082703, - "learning_rate": 2.1720779239602753e-06, - "loss": 0.1934, - "step": 16782 - }, - { - "epoch": 1.5810272956359954, - "grad_norm": 0.6811143755912781, - "learning_rate": 2.1711383543397448e-06, - "loss": 0.2249, - "step": 16783 - }, - { - "epoch": 1.5811214997291634, - "grad_norm": 0.690163791179657, - "learning_rate": 2.170198963229372e-06, - "loss": 0.177, - "step": 16784 - }, - { - "epoch": 1.5812157038223311, - "grad_norm": 0.7179564833641052, - "learning_rate": 2.1692597506505717e-06, - "loss": 0.1992, - "step": 16785 - }, - { - "epoch": 1.5813099079154989, - "grad_norm": 0.6467909216880798, - "learning_rate": 2.1683207166247578e-06, - "loss": 0.2166, - "step": 16786 - }, - { - "epoch": 1.5814041120086668, - "grad_norm": 0.6622843146324158, - "learning_rate": 2.167381861173343e-06, - "loss": 0.1952, - "step": 16787 - }, - { - "epoch": 1.5814983161018348, - "grad_norm": 0.6338428854942322, - "learning_rate": 2.1664431843177382e-06, - "loss": 0.1996, - "step": 16788 - }, - { - "epoch": 1.5815925201950025, - "grad_norm": 0.6173291206359863, - "learning_rate": 2.1655046860793417e-06, - "loss": 0.1677, - "step": 16789 - }, - { - "epoch": 1.5816867242881703, - "grad_norm": 0.6547451019287109, - "learning_rate": 2.1645663664795534e-06, - "loss": 0.2005, - "step": 16790 - }, - { - "epoch": 1.5817809283813382, - "grad_norm": 0.6102684736251831, - "learning_rate": 2.1636282255397723e-06, - "loss": 0.1948, - "step": 16791 - }, - { - "epoch": 1.5818751324745062, - "grad_norm": 0.6864402890205383, - "learning_rate": 2.162690263281384e-06, - "loss": 0.2043, - "step": 16792 - }, - { - "epoch": 1.581969336567674, - "grad_norm": 0.6589213609695435, - "learning_rate": 2.1617524797257792e-06, - "loss": 0.1956, - "step": 16793 - }, - { - "epoch": 1.5820635406608416, - "grad_norm": 0.6801182627677917, - "learning_rate": 2.1608148748943424e-06, - "loss": 0.1761, - "step": 16794 - }, - { - "epoch": 1.5821577447540096, - "grad_norm": 0.6492635607719421, - "learning_rate": 2.1598774488084474e-06, - "loss": 0.1878, - "step": 16795 - }, - { - "epoch": 1.5822519488471776, - "grad_norm": 0.6200484037399292, - "learning_rate": 2.1589402014894714e-06, - "loss": 0.1949, - "step": 16796 - }, - { - "epoch": 1.5823461529403453, - "grad_norm": 0.6302139163017273, - "learning_rate": 2.158003132958787e-06, - "loss": 0.1821, - "step": 16797 - }, - { - "epoch": 1.582440357033513, - "grad_norm": 0.6472926735877991, - "learning_rate": 2.1570662432377576e-06, - "loss": 0.1864, - "step": 16798 - }, - { - "epoch": 1.582534561126681, - "grad_norm": 0.6446152329444885, - "learning_rate": 2.1561295323477472e-06, - "loss": 0.1838, - "step": 16799 - }, - { - "epoch": 1.582628765219849, - "grad_norm": 0.6802383661270142, - "learning_rate": 2.1551930003101163e-06, - "loss": 0.1973, - "step": 16800 - }, - { - "epoch": 1.5827229693130167, - "grad_norm": 0.6307654976844788, - "learning_rate": 2.1542566471462168e-06, - "loss": 0.2022, - "step": 16801 - }, - { - "epoch": 1.5828171734061844, - "grad_norm": 0.6357226371765137, - "learning_rate": 2.153320472877397e-06, - "loss": 0.1944, - "step": 16802 - }, - { - "epoch": 1.5829113774993524, - "grad_norm": 0.6586205959320068, - "learning_rate": 2.152384477525007e-06, - "loss": 0.2113, - "step": 16803 - }, - { - "epoch": 1.5830055815925204, - "grad_norm": 0.7027658224105835, - "learning_rate": 2.1514486611103892e-06, - "loss": 0.2011, - "step": 16804 - }, - { - "epoch": 1.5830997856856879, - "grad_norm": 0.6034343242645264, - "learning_rate": 2.1505130236548767e-06, - "loss": 0.1946, - "step": 16805 - }, - { - "epoch": 1.5831939897788558, - "grad_norm": 0.6628339290618896, - "learning_rate": 2.149577565179807e-06, - "loss": 0.2054, - "step": 16806 - }, - { - "epoch": 1.5832881938720238, - "grad_norm": 0.6580031514167786, - "learning_rate": 2.1486422857065128e-06, - "loss": 0.1951, - "step": 16807 - }, - { - "epoch": 1.5833823979651915, - "grad_norm": 0.7510112524032593, - "learning_rate": 2.1477071852563138e-06, - "loss": 0.1958, - "step": 16808 - }, - { - "epoch": 1.5834766020583593, - "grad_norm": 0.6545968055725098, - "learning_rate": 2.1467722638505352e-06, - "loss": 0.2193, - "step": 16809 - }, - { - "epoch": 1.5835708061515272, - "grad_norm": 0.6425247192382812, - "learning_rate": 2.1458375215104965e-06, - "loss": 0.1956, - "step": 16810 - }, - { - "epoch": 1.5836650102446952, - "grad_norm": 0.6466528177261353, - "learning_rate": 2.1449029582575064e-06, - "loss": 0.198, - "step": 16811 - }, - { - "epoch": 1.583759214337863, - "grad_norm": 0.6519531011581421, - "learning_rate": 2.143968574112879e-06, - "loss": 0.19, - "step": 16812 - }, - { - "epoch": 1.5838534184310307, - "grad_norm": 0.699285089969635, - "learning_rate": 2.143034369097916e-06, - "loss": 0.2098, - "step": 16813 - }, - { - "epoch": 1.5839476225241986, - "grad_norm": 0.6333559155464172, - "learning_rate": 2.1421003432339203e-06, - "loss": 0.1815, - "step": 16814 - }, - { - "epoch": 1.5840418266173666, - "grad_norm": 0.7741021513938904, - "learning_rate": 2.141166496542192e-06, - "loss": 0.1922, - "step": 16815 - }, - { - "epoch": 1.5841360307105343, - "grad_norm": 0.6113501787185669, - "learning_rate": 2.1402328290440176e-06, - "loss": 0.1773, - "step": 16816 - }, - { - "epoch": 1.584230234803702, - "grad_norm": 0.6935142874717712, - "learning_rate": 2.139299340760692e-06, - "loss": 0.205, - "step": 16817 - }, - { - "epoch": 1.58432443889687, - "grad_norm": 0.6054620146751404, - "learning_rate": 2.138366031713499e-06, - "loss": 0.1778, - "step": 16818 - }, - { - "epoch": 1.584418642990038, - "grad_norm": 0.5931374430656433, - "learning_rate": 2.1374329019237173e-06, - "loss": 0.2062, - "step": 16819 - }, - { - "epoch": 1.5845128470832057, - "grad_norm": 0.6228781938552856, - "learning_rate": 2.136499951412625e-06, - "loss": 0.1985, - "step": 16820 - }, - { - "epoch": 1.5846070511763735, - "grad_norm": 0.6543347835540771, - "learning_rate": 2.1355671802014976e-06, - "loss": 0.1828, - "step": 16821 - }, - { - "epoch": 1.5847012552695414, - "grad_norm": 0.6243523359298706, - "learning_rate": 2.1346345883116014e-06, - "loss": 0.1874, - "step": 16822 - }, - { - "epoch": 1.5847954593627094, - "grad_norm": 0.8013496398925781, - "learning_rate": 2.133702175764194e-06, - "loss": 0.239, - "step": 16823 - }, - { - "epoch": 1.5848896634558771, - "grad_norm": 0.8083053827285767, - "learning_rate": 2.1327699425805493e-06, - "loss": 0.1956, - "step": 16824 - }, - { - "epoch": 1.5849838675490449, - "grad_norm": 0.7009676098823547, - "learning_rate": 2.131837888781916e-06, - "loss": 0.2103, - "step": 16825 - }, - { - "epoch": 1.5850780716422128, - "grad_norm": 0.6287767887115479, - "learning_rate": 2.1309060143895443e-06, - "loss": 0.1802, - "step": 16826 - }, - { - "epoch": 1.5851722757353808, - "grad_norm": 0.6486721634864807, - "learning_rate": 2.129974319424686e-06, - "loss": 0.2046, - "step": 16827 - }, - { - "epoch": 1.5852664798285485, - "grad_norm": 0.6545873880386353, - "learning_rate": 2.129042803908586e-06, - "loss": 0.1912, - "step": 16828 - }, - { - "epoch": 1.5853606839217163, - "grad_norm": 0.6733275055885315, - "learning_rate": 2.1281114678624813e-06, - "loss": 0.1835, - "step": 16829 - }, - { - "epoch": 1.5854548880148842, - "grad_norm": 0.6418502330780029, - "learning_rate": 2.1271803113076085e-06, - "loss": 0.1892, - "step": 16830 - }, - { - "epoch": 1.5855490921080522, - "grad_norm": 0.7270448207855225, - "learning_rate": 2.1262493342652036e-06, - "loss": 0.2058, - "step": 16831 - }, - { - "epoch": 1.58564329620122, - "grad_norm": 0.6308165192604065, - "learning_rate": 2.1253185367564865e-06, - "loss": 0.1998, - "step": 16832 - }, - { - "epoch": 1.5857375002943876, - "grad_norm": 0.5984036922454834, - "learning_rate": 2.1243879188026873e-06, - "loss": 0.1696, - "step": 16833 - }, - { - "epoch": 1.5858317043875556, - "grad_norm": 0.6338097453117371, - "learning_rate": 2.123457480425025e-06, - "loss": 0.1807, - "step": 16834 - }, - { - "epoch": 1.5859259084807236, - "grad_norm": 0.7448612451553345, - "learning_rate": 2.12252722164471e-06, - "loss": 0.2285, - "step": 16835 - }, - { - "epoch": 1.5860201125738913, - "grad_norm": 0.6317852139472961, - "learning_rate": 2.1215971424829584e-06, - "loss": 0.1813, - "step": 16836 - }, - { - "epoch": 1.586114316667059, - "grad_norm": 0.6573054194450378, - "learning_rate": 2.1206672429609786e-06, - "loss": 0.2028, - "step": 16837 - }, - { - "epoch": 1.586208520760227, - "grad_norm": 0.7340366840362549, - "learning_rate": 2.1197375230999707e-06, - "loss": 0.2047, - "step": 16838 - }, - { - "epoch": 1.586302724853395, - "grad_norm": 0.6898893713951111, - "learning_rate": 2.1188079829211296e-06, - "loss": 0.1876, - "step": 16839 - }, - { - "epoch": 1.5863969289465627, - "grad_norm": 0.6378750801086426, - "learning_rate": 2.1178786224456603e-06, - "loss": 0.194, - "step": 16840 - }, - { - "epoch": 1.5864911330397304, - "grad_norm": 0.6489059329032898, - "learning_rate": 2.116949441694748e-06, - "loss": 0.1953, - "step": 16841 - }, - { - "epoch": 1.5865853371328984, - "grad_norm": 0.5951984524726868, - "learning_rate": 2.116020440689577e-06, - "loss": 0.1814, - "step": 16842 - }, - { - "epoch": 1.5866795412260664, - "grad_norm": 0.7144877314567566, - "learning_rate": 2.115091619451334e-06, - "loss": 0.225, - "step": 16843 - }, - { - "epoch": 1.586773745319234, - "grad_norm": 0.6485958099365234, - "learning_rate": 2.1141629780011975e-06, - "loss": 0.1985, - "step": 16844 - }, - { - "epoch": 1.5868679494124018, - "grad_norm": 0.7251630425453186, - "learning_rate": 2.1132345163603386e-06, - "loss": 0.1814, - "step": 16845 - }, - { - "epoch": 1.5869621535055698, - "grad_norm": 0.7341436743736267, - "learning_rate": 2.1123062345499303e-06, - "loss": 0.2144, - "step": 16846 - }, - { - "epoch": 1.5870563575987378, - "grad_norm": 0.730889081954956, - "learning_rate": 2.1113781325911397e-06, - "loss": 0.2146, - "step": 16847 - }, - { - "epoch": 1.5871505616919055, - "grad_norm": 0.6238629221916199, - "learning_rate": 2.110450210505126e-06, - "loss": 0.1813, - "step": 16848 - }, - { - "epoch": 1.5872447657850732, - "grad_norm": 0.6476354002952576, - "learning_rate": 2.109522468313049e-06, - "loss": 0.1905, - "step": 16849 - }, - { - "epoch": 1.5873389698782412, - "grad_norm": 0.7104042172431946, - "learning_rate": 2.1085949060360654e-06, - "loss": 0.2263, - "step": 16850 - }, - { - "epoch": 1.5874331739714092, - "grad_norm": 0.7384299039840698, - "learning_rate": 2.1076675236953194e-06, - "loss": 0.2109, - "step": 16851 - }, - { - "epoch": 1.5875273780645769, - "grad_norm": 0.609963595867157, - "learning_rate": 2.106740321311962e-06, - "loss": 0.1767, - "step": 16852 - }, - { - "epoch": 1.5876215821577446, - "grad_norm": 0.6352958679199219, - "learning_rate": 2.10581329890713e-06, - "loss": 0.1635, - "step": 16853 - }, - { - "epoch": 1.5877157862509126, - "grad_norm": 0.6387240886688232, - "learning_rate": 2.1048864565019635e-06, - "loss": 0.2126, - "step": 16854 - }, - { - "epoch": 1.5878099903440805, - "grad_norm": 0.7180310487747192, - "learning_rate": 2.1039597941175984e-06, - "loss": 0.2195, - "step": 16855 - }, - { - "epoch": 1.5879041944372483, - "grad_norm": 0.7523316144943237, - "learning_rate": 2.103033311775158e-06, - "loss": 0.1632, - "step": 16856 - }, - { - "epoch": 1.587998398530416, - "grad_norm": 0.6734952926635742, - "learning_rate": 2.102107009495772e-06, - "loss": 0.2272, - "step": 16857 - }, - { - "epoch": 1.588092602623584, - "grad_norm": 0.6469452977180481, - "learning_rate": 2.1011808873005626e-06, - "loss": 0.1658, - "step": 16858 - }, - { - "epoch": 1.588186806716752, - "grad_norm": 0.6462699770927429, - "learning_rate": 2.1002549452106422e-06, - "loss": 0.1922, - "step": 16859 - }, - { - "epoch": 1.5882810108099197, - "grad_norm": 0.6486620903015137, - "learning_rate": 2.099329183247126e-06, - "loss": 0.2065, - "step": 16860 - }, - { - "epoch": 1.5883752149030874, - "grad_norm": 0.6494008302688599, - "learning_rate": 2.098403601431126e-06, - "loss": 0.1947, - "step": 16861 - }, - { - "epoch": 1.5884694189962554, - "grad_norm": 0.6822574734687805, - "learning_rate": 2.0974781997837444e-06, - "loss": 0.1876, - "step": 16862 - }, - { - "epoch": 1.5885636230894233, - "grad_norm": 0.6509035229682922, - "learning_rate": 2.0965529783260783e-06, - "loss": 0.2122, - "step": 16863 - }, - { - "epoch": 1.588657827182591, - "grad_norm": 0.6158427596092224, - "learning_rate": 2.0956279370792276e-06, - "loss": 0.1934, - "step": 16864 - }, - { - "epoch": 1.5887520312757588, - "grad_norm": 0.6245700120925903, - "learning_rate": 2.094703076064286e-06, - "loss": 0.1884, - "step": 16865 - }, - { - "epoch": 1.5888462353689268, - "grad_norm": 0.7309079170227051, - "learning_rate": 2.093778395302338e-06, - "loss": 0.2161, - "step": 16866 - }, - { - "epoch": 1.5889404394620947, - "grad_norm": 0.6970020532608032, - "learning_rate": 2.0928538948144693e-06, - "loss": 0.1856, - "step": 16867 - }, - { - "epoch": 1.5890346435552625, - "grad_norm": 0.6402873396873474, - "learning_rate": 2.091929574621764e-06, - "loss": 0.1983, - "step": 16868 - }, - { - "epoch": 1.5891288476484302, - "grad_norm": 0.6710144877433777, - "learning_rate": 2.091005434745291e-06, - "loss": 0.1944, - "step": 16869 - }, - { - "epoch": 1.5892230517415982, - "grad_norm": 0.6847306489944458, - "learning_rate": 2.090081475206126e-06, - "loss": 0.2105, - "step": 16870 - }, - { - "epoch": 1.5893172558347661, - "grad_norm": 0.6467797756195068, - "learning_rate": 2.089157696025339e-06, - "loss": 0.1815, - "step": 16871 - }, - { - "epoch": 1.5894114599279339, - "grad_norm": 0.6493083834648132, - "learning_rate": 2.088234097223988e-06, - "loss": 0.1829, - "step": 16872 - }, - { - "epoch": 1.5895056640211016, - "grad_norm": 0.6799706816673279, - "learning_rate": 2.0873106788231346e-06, - "loss": 0.2089, - "step": 16873 - }, - { - "epoch": 1.5895998681142696, - "grad_norm": 0.6377043128013611, - "learning_rate": 2.086387440843839e-06, - "loss": 0.1904, - "step": 16874 - }, - { - "epoch": 1.5896940722074375, - "grad_norm": 0.6574447154998779, - "learning_rate": 2.0854643833071454e-06, - "loss": 0.2127, - "step": 16875 - }, - { - "epoch": 1.5897882763006053, - "grad_norm": 0.6165361404418945, - "learning_rate": 2.0845415062341035e-06, - "loss": 0.1894, - "step": 16876 - }, - { - "epoch": 1.589882480393773, - "grad_norm": 0.6518315076828003, - "learning_rate": 2.0836188096457586e-06, - "loss": 0.2247, - "step": 16877 - }, - { - "epoch": 1.589976684486941, - "grad_norm": 0.649908185005188, - "learning_rate": 2.082696293563149e-06, - "loss": 0.1771, - "step": 16878 - }, - { - "epoch": 1.590070888580109, - "grad_norm": 0.6095840334892273, - "learning_rate": 2.0817739580073026e-06, - "loss": 0.201, - "step": 16879 - }, - { - "epoch": 1.5901650926732767, - "grad_norm": 0.6645314693450928, - "learning_rate": 2.080851802999262e-06, - "loss": 0.2107, - "step": 16880 - }, - { - "epoch": 1.5902592967664444, - "grad_norm": 0.6977287530899048, - "learning_rate": 2.0799298285600455e-06, - "loss": 0.2013, - "step": 16881 - }, - { - "epoch": 1.5903535008596124, - "grad_norm": 0.7408657670021057, - "learning_rate": 2.0790080347106768e-06, - "loss": 0.1993, - "step": 16882 - }, - { - "epoch": 1.5904477049527803, - "grad_norm": 0.6141915321350098, - "learning_rate": 2.078086421472174e-06, - "loss": 0.1929, - "step": 16883 - }, - { - "epoch": 1.590541909045948, - "grad_norm": 0.6887730360031128, - "learning_rate": 2.0771649888655553e-06, - "loss": 0.1928, - "step": 16884 - }, - { - "epoch": 1.5906361131391158, - "grad_norm": 0.7160045504570007, - "learning_rate": 2.076243736911825e-06, - "loss": 0.2232, - "step": 16885 - }, - { - "epoch": 1.5907303172322838, - "grad_norm": 0.5830559134483337, - "learning_rate": 2.0753226656319914e-06, - "loss": 0.1984, - "step": 16886 - }, - { - "epoch": 1.5908245213254517, - "grad_norm": 0.6876028180122375, - "learning_rate": 2.0744017750470592e-06, - "loss": 0.1734, - "step": 16887 - }, - { - "epoch": 1.5909187254186195, - "grad_norm": 0.6495875120162964, - "learning_rate": 2.0734810651780213e-06, - "loss": 0.2092, - "step": 16888 - }, - { - "epoch": 1.5910129295117872, - "grad_norm": 0.6716800928115845, - "learning_rate": 2.0725605360458743e-06, - "loss": 0.2108, - "step": 16889 - }, - { - "epoch": 1.5911071336049551, - "grad_norm": 0.7414377331733704, - "learning_rate": 2.07164018767161e-06, - "loss": 0.2066, - "step": 16890 - }, - { - "epoch": 1.591201337698123, - "grad_norm": 0.5687053799629211, - "learning_rate": 2.070720020076207e-06, - "loss": 0.161, - "step": 16891 - }, - { - "epoch": 1.5912955417912908, - "grad_norm": 0.615281879901886, - "learning_rate": 2.0698000332806534e-06, - "loss": 0.1772, - "step": 16892 - }, - { - "epoch": 1.5913897458844586, - "grad_norm": 0.6414679884910583, - "learning_rate": 2.0688802273059218e-06, - "loss": 0.1807, - "step": 16893 - }, - { - "epoch": 1.5914839499776265, - "grad_norm": 0.6869304776191711, - "learning_rate": 2.067960602172985e-06, - "loss": 0.2049, - "step": 16894 - }, - { - "epoch": 1.5915781540707945, - "grad_norm": 0.7120691537857056, - "learning_rate": 2.0670411579028172e-06, - "loss": 0.2571, - "step": 16895 - }, - { - "epoch": 1.5916723581639622, - "grad_norm": 0.6378860473632812, - "learning_rate": 2.0661218945163776e-06, - "loss": 0.2169, - "step": 16896 - }, - { - "epoch": 1.59176656225713, - "grad_norm": 0.6451314091682434, - "learning_rate": 2.065202812034627e-06, - "loss": 0.2028, - "step": 16897 - }, - { - "epoch": 1.591860766350298, - "grad_norm": 0.6550354957580566, - "learning_rate": 2.064283910478527e-06, - "loss": 0.2118, - "step": 16898 - }, - { - "epoch": 1.591954970443466, - "grad_norm": 0.6120240092277527, - "learning_rate": 2.0633651898690256e-06, - "loss": 0.1837, - "step": 16899 - }, - { - "epoch": 1.5920491745366336, - "grad_norm": 0.6445263028144836, - "learning_rate": 2.06244665022707e-06, - "loss": 0.1903, - "step": 16900 - }, - { - "epoch": 1.5921433786298014, - "grad_norm": 0.6270973682403564, - "learning_rate": 2.06152829157361e-06, - "loss": 0.2017, - "step": 16901 - }, - { - "epoch": 1.5922375827229693, - "grad_norm": 0.668395459651947, - "learning_rate": 2.060610113929582e-06, - "loss": 0.1838, - "step": 16902 - }, - { - "epoch": 1.5923317868161373, - "grad_norm": 0.6957286596298218, - "learning_rate": 2.059692117315919e-06, - "loss": 0.161, - "step": 16903 - }, - { - "epoch": 1.592425990909305, - "grad_norm": 0.6116093993186951, - "learning_rate": 2.0587743017535564e-06, - "loss": 0.1982, - "step": 16904 - }, - { - "epoch": 1.5925201950024728, - "grad_norm": 0.8217107057571411, - "learning_rate": 2.0578566672634237e-06, - "loss": 0.2277, - "step": 16905 - }, - { - "epoch": 1.5926143990956407, - "grad_norm": 0.6316429972648621, - "learning_rate": 2.056939213866438e-06, - "loss": 0.1705, - "step": 16906 - }, - { - "epoch": 1.5927086031888087, - "grad_norm": 0.680424153804779, - "learning_rate": 2.0560219415835237e-06, - "loss": 0.1972, - "step": 16907 - }, - { - "epoch": 1.5928028072819764, - "grad_norm": 0.7076070308685303, - "learning_rate": 2.0551048504355965e-06, - "loss": 0.2183, - "step": 16908 - }, - { - "epoch": 1.5928970113751442, - "grad_norm": 0.6588847041130066, - "learning_rate": 2.054187940443563e-06, - "loss": 0.1858, - "step": 16909 - }, - { - "epoch": 1.5929912154683121, - "grad_norm": 0.5913216471672058, - "learning_rate": 2.0532712116283326e-06, - "loss": 0.1752, - "step": 16910 - }, - { - "epoch": 1.59308541956148, - "grad_norm": 0.6831443905830383, - "learning_rate": 2.0523546640108114e-06, - "loss": 0.2054, - "step": 16911 - }, - { - "epoch": 1.5931796236546478, - "grad_norm": 0.7347297072410583, - "learning_rate": 2.051438297611893e-06, - "loss": 0.2185, - "step": 16912 - }, - { - "epoch": 1.5932738277478156, - "grad_norm": 0.6393066644668579, - "learning_rate": 2.0505221124524743e-06, - "loss": 0.2076, - "step": 16913 - }, - { - "epoch": 1.5933680318409835, - "grad_norm": 0.6571082472801208, - "learning_rate": 2.049606108553448e-06, - "loss": 0.1824, - "step": 16914 - }, - { - "epoch": 1.5934622359341515, - "grad_norm": 0.7159751653671265, - "learning_rate": 2.048690285935697e-06, - "loss": 0.2031, - "step": 16915 - }, - { - "epoch": 1.5935564400273192, - "grad_norm": 0.6926108002662659, - "learning_rate": 2.0477746446200997e-06, - "loss": 0.1899, - "step": 16916 - }, - { - "epoch": 1.593650644120487, - "grad_norm": 0.6743190288543701, - "learning_rate": 2.0468591846275443e-06, - "loss": 0.1837, - "step": 16917 - }, - { - "epoch": 1.593744848213655, - "grad_norm": 0.6317774653434753, - "learning_rate": 2.0459439059789e-06, - "loss": 0.1819, - "step": 16918 - }, - { - "epoch": 1.5938390523068229, - "grad_norm": 0.677336573600769, - "learning_rate": 2.045028808695029e-06, - "loss": 0.1979, - "step": 16919 - }, - { - "epoch": 1.5939332563999906, - "grad_norm": 0.7855684757232666, - "learning_rate": 2.0441138927968094e-06, - "loss": 0.2128, - "step": 16920 - }, - { - "epoch": 1.5940274604931584, - "grad_norm": 0.6705695986747742, - "learning_rate": 2.043199158305098e-06, - "loss": 0.1975, - "step": 16921 - }, - { - "epoch": 1.5941216645863263, - "grad_norm": 0.7156023383140564, - "learning_rate": 2.0422846052407474e-06, - "loss": 0.202, - "step": 16922 - }, - { - "epoch": 1.5942158686794943, - "grad_norm": 0.7329447865486145, - "learning_rate": 2.0413702336246156e-06, - "loss": 0.2039, - "step": 16923 - }, - { - "epoch": 1.594310072772662, - "grad_norm": 0.7201051115989685, - "learning_rate": 2.0404560434775535e-06, - "loss": 0.1935, - "step": 16924 - }, - { - "epoch": 1.5944042768658298, - "grad_norm": 0.7321786880493164, - "learning_rate": 2.0395420348203996e-06, - "loss": 0.1947, - "step": 16925 - }, - { - "epoch": 1.5944984809589977, - "grad_norm": 0.6338954567909241, - "learning_rate": 2.0386282076739996e-06, - "loss": 0.1976, - "step": 16926 - }, - { - "epoch": 1.5945926850521657, - "grad_norm": 0.6547569036483765, - "learning_rate": 2.0377145620591907e-06, - "loss": 0.1811, - "step": 16927 - }, - { - "epoch": 1.5946868891453334, - "grad_norm": 0.6150227189064026, - "learning_rate": 2.0368010979968013e-06, - "loss": 0.1973, - "step": 16928 - }, - { - "epoch": 1.5947810932385011, - "grad_norm": 0.666617214679718, - "learning_rate": 2.0358878155076622e-06, - "loss": 0.204, - "step": 16929 - }, - { - "epoch": 1.594875297331669, - "grad_norm": 0.7155197262763977, - "learning_rate": 2.0349747146125996e-06, - "loss": 0.1949, - "step": 16930 - }, - { - "epoch": 1.594969501424837, - "grad_norm": 0.6648327112197876, - "learning_rate": 2.0340617953324305e-06, - "loss": 0.2177, - "step": 16931 - }, - { - "epoch": 1.5950637055180048, - "grad_norm": 0.6485323309898376, - "learning_rate": 2.033149057687974e-06, - "loss": 0.1997, - "step": 16932 - }, - { - "epoch": 1.5951579096111725, - "grad_norm": 0.6812450289726257, - "learning_rate": 2.0322365017000367e-06, - "loss": 0.2042, - "step": 16933 - }, - { - "epoch": 1.5952521137043405, - "grad_norm": 0.6267063617706299, - "learning_rate": 2.03132412738943e-06, - "loss": 0.1776, - "step": 16934 - }, - { - "epoch": 1.5953463177975085, - "grad_norm": 0.600155770778656, - "learning_rate": 2.03041193477696e-06, - "loss": 0.1498, - "step": 16935 - }, - { - "epoch": 1.5954405218906762, - "grad_norm": 0.6206088066101074, - "learning_rate": 2.0294999238834203e-06, - "loss": 0.162, - "step": 16936 - }, - { - "epoch": 1.595534725983844, - "grad_norm": 0.6867279410362244, - "learning_rate": 2.028588094729609e-06, - "loss": 0.2371, - "step": 16937 - }, - { - "epoch": 1.595628930077012, - "grad_norm": 0.6605044603347778, - "learning_rate": 2.0276764473363185e-06, - "loss": 0.1725, - "step": 16938 - }, - { - "epoch": 1.5957231341701799, - "grad_norm": 0.6010245680809021, - "learning_rate": 2.0267649817243327e-06, - "loss": 0.1632, - "step": 16939 - }, - { - "epoch": 1.5958173382633476, - "grad_norm": 0.7845609188079834, - "learning_rate": 2.0258536979144373e-06, - "loss": 0.2188, - "step": 16940 - }, - { - "epoch": 1.5959115423565153, - "grad_norm": 0.5976998209953308, - "learning_rate": 2.0249425959274116e-06, - "loss": 0.1867, - "step": 16941 - }, - { - "epoch": 1.5960057464496833, - "grad_norm": 0.6509695053100586, - "learning_rate": 2.0240316757840283e-06, - "loss": 0.1893, - "step": 16942 - }, - { - "epoch": 1.596099950542851, - "grad_norm": 0.6574884653091431, - "learning_rate": 2.023120937505054e-06, - "loss": 0.1892, - "step": 16943 - }, - { - "epoch": 1.5961941546360188, - "grad_norm": 0.6583541631698608, - "learning_rate": 2.0222103811112605e-06, - "loss": 0.186, - "step": 16944 - }, - { - "epoch": 1.5962883587291867, - "grad_norm": 0.6240532398223877, - "learning_rate": 2.02130000662341e-06, - "loss": 0.1952, - "step": 16945 - }, - { - "epoch": 1.5963825628223547, - "grad_norm": 0.594275712966919, - "learning_rate": 2.0203898140622568e-06, - "loss": 0.1833, - "step": 16946 - }, - { - "epoch": 1.5964767669155224, - "grad_norm": 0.6507365703582764, - "learning_rate": 2.0194798034485565e-06, - "loss": 0.2249, - "step": 16947 - }, - { - "epoch": 1.5965709710086902, - "grad_norm": 0.6944069266319275, - "learning_rate": 2.0185699748030607e-06, - "loss": 0.1928, - "step": 16948 - }, - { - "epoch": 1.5966651751018581, - "grad_norm": 0.6407192349433899, - "learning_rate": 2.017660328146511e-06, - "loss": 0.196, - "step": 16949 - }, - { - "epoch": 1.596759379195026, - "grad_norm": 0.6506879925727844, - "learning_rate": 2.0167508634996504e-06, - "loss": 0.2043, - "step": 16950 - }, - { - "epoch": 1.5968535832881938, - "grad_norm": 0.6605292558670044, - "learning_rate": 2.015841580883219e-06, - "loss": 0.1706, - "step": 16951 - }, - { - "epoch": 1.5969477873813616, - "grad_norm": 0.6389287114143372, - "learning_rate": 2.014932480317945e-06, - "loss": 0.2312, - "step": 16952 - }, - { - "epoch": 1.5970419914745295, - "grad_norm": 0.6113000512123108, - "learning_rate": 2.0140235618245585e-06, - "loss": 0.1951, - "step": 16953 - }, - { - "epoch": 1.5971361955676975, - "grad_norm": 0.6415550708770752, - "learning_rate": 2.0131148254237898e-06, - "loss": 0.1963, - "step": 16954 - }, - { - "epoch": 1.5972303996608652, - "grad_norm": 0.6620209217071533, - "learning_rate": 2.012206271136353e-06, - "loss": 0.1836, - "step": 16955 - }, - { - "epoch": 1.597324603754033, - "grad_norm": 0.6675146818161011, - "learning_rate": 2.0112978989829634e-06, - "loss": 0.1652, - "step": 16956 - }, - { - "epoch": 1.597418807847201, - "grad_norm": 0.6546598672866821, - "learning_rate": 2.0103897089843406e-06, - "loss": 0.2106, - "step": 16957 - }, - { - "epoch": 1.5975130119403689, - "grad_norm": 0.6360905766487122, - "learning_rate": 2.009481701161189e-06, - "loss": 0.1773, - "step": 16958 - }, - { - "epoch": 1.5976072160335366, - "grad_norm": 0.6721376776695251, - "learning_rate": 2.0085738755342067e-06, - "loss": 0.2002, - "step": 16959 - }, - { - "epoch": 1.5977014201267044, - "grad_norm": 0.6992481350898743, - "learning_rate": 2.0076662321241036e-06, - "loss": 0.1922, - "step": 16960 - }, - { - "epoch": 1.5977956242198723, - "grad_norm": 0.6002038717269897, - "learning_rate": 2.0067587709515714e-06, - "loss": 0.1759, - "step": 16961 - }, - { - "epoch": 1.5978898283130403, - "grad_norm": 0.7566602230072021, - "learning_rate": 2.0058514920372986e-06, - "loss": 0.1843, - "step": 16962 - }, - { - "epoch": 1.597984032406208, - "grad_norm": 0.6735486388206482, - "learning_rate": 2.004944395401974e-06, - "loss": 0.1815, - "step": 16963 - }, - { - "epoch": 1.5980782364993757, - "grad_norm": 0.6113762259483337, - "learning_rate": 2.0040374810662855e-06, - "loss": 0.1774, - "step": 16964 - }, - { - "epoch": 1.5981724405925437, - "grad_norm": 0.5759824514389038, - "learning_rate": 2.0031307490509054e-06, - "loss": 0.175, - "step": 16965 - }, - { - "epoch": 1.5982666446857117, - "grad_norm": 0.5985716581344604, - "learning_rate": 2.0022241993765124e-06, - "loss": 0.1791, - "step": 16966 - }, - { - "epoch": 1.5983608487788794, - "grad_norm": 0.6667709946632385, - "learning_rate": 2.0013178320637783e-06, - "loss": 0.2181, - "step": 16967 - }, - { - "epoch": 1.5984550528720471, - "grad_norm": 0.6682126522064209, - "learning_rate": 2.0004116471333644e-06, - "loss": 0.1807, - "step": 16968 - }, - { - "epoch": 1.598549256965215, - "grad_norm": 0.6660087704658508, - "learning_rate": 1.999505644605938e-06, - "loss": 0.1859, - "step": 16969 - }, - { - "epoch": 1.598643461058383, - "grad_norm": 0.6404968500137329, - "learning_rate": 1.9985998245021576e-06, - "loss": 0.2026, - "step": 16970 - }, - { - "epoch": 1.5987376651515508, - "grad_norm": 0.6893209218978882, - "learning_rate": 1.9976941868426735e-06, - "loss": 0.2068, - "step": 16971 - }, - { - "epoch": 1.5988318692447185, - "grad_norm": 0.6096252202987671, - "learning_rate": 1.9967887316481403e-06, - "loss": 0.1738, - "step": 16972 - }, - { - "epoch": 1.5989260733378865, - "grad_norm": 0.6376148462295532, - "learning_rate": 1.9958834589391983e-06, - "loss": 0.1631, - "step": 16973 - }, - { - "epoch": 1.5990202774310545, - "grad_norm": 0.6672173738479614, - "learning_rate": 1.994978368736492e-06, - "loss": 0.2128, - "step": 16974 - }, - { - "epoch": 1.5991144815242222, - "grad_norm": 0.7077866792678833, - "learning_rate": 1.9940734610606614e-06, - "loss": 0.185, - "step": 16975 - }, - { - "epoch": 1.59920868561739, - "grad_norm": 0.6899238228797913, - "learning_rate": 1.993168735932336e-06, - "loss": 0.2095, - "step": 16976 - }, - { - "epoch": 1.599302889710558, - "grad_norm": 0.6598714590072632, - "learning_rate": 1.992264193372145e-06, - "loss": 0.2066, - "step": 16977 - }, - { - "epoch": 1.5993970938037259, - "grad_norm": 0.7184349894523621, - "learning_rate": 1.9913598334007177e-06, - "loss": 0.2171, - "step": 16978 - }, - { - "epoch": 1.5994912978968936, - "grad_norm": 0.6424234509468079, - "learning_rate": 1.990455656038669e-06, - "loss": 0.2161, - "step": 16979 - }, - { - "epoch": 1.5995855019900613, - "grad_norm": 0.610517680644989, - "learning_rate": 1.9895516613066203e-06, - "loss": 0.2063, - "step": 16980 - }, - { - "epoch": 1.5996797060832293, - "grad_norm": 0.6170348525047302, - "learning_rate": 1.9886478492251805e-06, - "loss": 0.1742, - "step": 16981 - }, - { - "epoch": 1.5997739101763973, - "grad_norm": 0.7161915898323059, - "learning_rate": 1.987744219814961e-06, - "loss": 0.2132, - "step": 16982 - }, - { - "epoch": 1.599868114269565, - "grad_norm": 0.6135024428367615, - "learning_rate": 1.986840773096563e-06, - "loss": 0.187, - "step": 16983 - }, - { - "epoch": 1.5999623183627327, - "grad_norm": 0.6426527500152588, - "learning_rate": 1.9859375090905876e-06, - "loss": 0.1901, - "step": 16984 - }, - { - "epoch": 1.6000565224559007, - "grad_norm": 0.6360190510749817, - "learning_rate": 1.985034427817634e-06, - "loss": 0.192, - "step": 16985 - }, - { - "epoch": 1.6001507265490686, - "grad_norm": 0.7561869025230408, - "learning_rate": 1.984131529298288e-06, - "loss": 0.225, - "step": 16986 - }, - { - "epoch": 1.6002449306422364, - "grad_norm": 0.6566307544708252, - "learning_rate": 1.9832288135531385e-06, - "loss": 0.1912, - "step": 16987 - }, - { - "epoch": 1.6003391347354041, - "grad_norm": 0.631393551826477, - "learning_rate": 1.982326280602774e-06, - "loss": 0.1944, - "step": 16988 - }, - { - "epoch": 1.600433338828572, - "grad_norm": 0.6858162879943848, - "learning_rate": 1.9814239304677676e-06, - "loss": 0.1825, - "step": 16989 - }, - { - "epoch": 1.60052754292174, - "grad_norm": 0.6661040186882019, - "learning_rate": 1.980521763168697e-06, - "loss": 0.2158, - "step": 16990 - }, - { - "epoch": 1.6006217470149078, - "grad_norm": 0.6457971334457397, - "learning_rate": 1.979619778726134e-06, - "loss": 0.1717, - "step": 16991 - }, - { - "epoch": 1.6007159511080755, - "grad_norm": 0.6137654185295105, - "learning_rate": 1.978717977160641e-06, - "loss": 0.1697, - "step": 16992 - }, - { - "epoch": 1.6008101552012435, - "grad_norm": 0.6896564960479736, - "learning_rate": 1.9778163584927845e-06, - "loss": 0.1996, - "step": 16993 - }, - { - "epoch": 1.6009043592944114, - "grad_norm": 0.6603320837020874, - "learning_rate": 1.976914922743124e-06, - "loss": 0.2017, - "step": 16994 - }, - { - "epoch": 1.6009985633875792, - "grad_norm": 0.63653564453125, - "learning_rate": 1.9760136699322107e-06, - "loss": 0.1804, - "step": 16995 - }, - { - "epoch": 1.601092767480747, - "grad_norm": 0.6182230710983276, - "learning_rate": 1.97511260008059e-06, - "loss": 0.1739, - "step": 16996 - }, - { - "epoch": 1.6011869715739149, - "grad_norm": 0.6287508010864258, - "learning_rate": 1.9742117132088166e-06, - "loss": 0.1839, - "step": 16997 - }, - { - "epoch": 1.6012811756670828, - "grad_norm": 0.6807714700698853, - "learning_rate": 1.97331100933743e-06, - "loss": 0.2137, - "step": 16998 - }, - { - "epoch": 1.6013753797602506, - "grad_norm": 0.7110925316810608, - "learning_rate": 1.97241048848696e-06, - "loss": 0.2155, - "step": 16999 - }, - { - "epoch": 1.6014695838534183, - "grad_norm": 0.6958010792732239, - "learning_rate": 1.971510150677951e-06, - "loss": 0.2185, - "step": 17000 - }, - { - "epoch": 1.6015637879465863, - "grad_norm": 0.7163333296775818, - "learning_rate": 1.970609995930928e-06, - "loss": 0.2006, - "step": 17001 - }, - { - "epoch": 1.6016579920397542, - "grad_norm": 0.6332830786705017, - "learning_rate": 1.9697100242664112e-06, - "loss": 0.1886, - "step": 17002 - }, - { - "epoch": 1.601752196132922, - "grad_norm": 0.7441249489784241, - "learning_rate": 1.968810235704924e-06, - "loss": 0.1767, - "step": 17003 - }, - { - "epoch": 1.6018464002260897, - "grad_norm": 0.6677401661872864, - "learning_rate": 1.9679106302669882e-06, - "loss": 0.1984, - "step": 17004 - }, - { - "epoch": 1.6019406043192577, - "grad_norm": 0.6124136447906494, - "learning_rate": 1.9670112079731084e-06, - "loss": 0.1572, - "step": 17005 - }, - { - "epoch": 1.6020348084124256, - "grad_norm": 0.6781045198440552, - "learning_rate": 1.9661119688437968e-06, - "loss": 0.1985, - "step": 17006 - }, - { - "epoch": 1.6021290125055934, - "grad_norm": 0.6994994878768921, - "learning_rate": 1.965212912899559e-06, - "loss": 0.1934, - "step": 17007 - }, - { - "epoch": 1.602223216598761, - "grad_norm": 0.6974150538444519, - "learning_rate": 1.9643140401608906e-06, - "loss": 0.2143, - "step": 17008 - }, - { - "epoch": 1.602317420691929, - "grad_norm": 0.6470915079116821, - "learning_rate": 1.963415350648289e-06, - "loss": 0.1873, - "step": 17009 - }, - { - "epoch": 1.602411624785097, - "grad_norm": 0.6578443646430969, - "learning_rate": 1.9625168443822494e-06, - "loss": 0.2036, - "step": 17010 - }, - { - "epoch": 1.6025058288782648, - "grad_norm": 0.6516851186752319, - "learning_rate": 1.961618521383253e-06, - "loss": 0.2028, - "step": 17011 - }, - { - "epoch": 1.6026000329714325, - "grad_norm": 0.8125045299530029, - "learning_rate": 1.960720381671789e-06, - "loss": 0.2176, - "step": 17012 - }, - { - "epoch": 1.6026942370646005, - "grad_norm": 0.6228019595146179, - "learning_rate": 1.9598224252683297e-06, - "loss": 0.1897, - "step": 17013 - }, - { - "epoch": 1.6027884411577684, - "grad_norm": 0.8371632695198059, - "learning_rate": 1.958924652193355e-06, - "loss": 0.2218, - "step": 17014 - }, - { - "epoch": 1.6028826452509362, - "grad_norm": 0.73078453540802, - "learning_rate": 1.9580270624673346e-06, - "loss": 0.2083, - "step": 17015 - }, - { - "epoch": 1.602976849344104, - "grad_norm": 0.6354836225509644, - "learning_rate": 1.9571296561107333e-06, - "loss": 0.1784, - "step": 17016 - }, - { - "epoch": 1.6030710534372719, - "grad_norm": 0.6360405683517456, - "learning_rate": 1.956232433144014e-06, - "loss": 0.1628, - "step": 17017 - }, - { - "epoch": 1.6031652575304398, - "grad_norm": 0.614295482635498, - "learning_rate": 1.9553353935876373e-06, - "loss": 0.1904, - "step": 17018 - }, - { - "epoch": 1.6032594616236076, - "grad_norm": 0.6972296833992004, - "learning_rate": 1.9544385374620525e-06, - "loss": 0.2285, - "step": 17019 - }, - { - "epoch": 1.6033536657167753, - "grad_norm": 0.663750946521759, - "learning_rate": 1.9535418647877146e-06, - "loss": 0.2058, - "step": 17020 - }, - { - "epoch": 1.6034478698099432, - "grad_norm": 0.6997053623199463, - "learning_rate": 1.952645375585064e-06, - "loss": 0.2247, - "step": 17021 - }, - { - "epoch": 1.6035420739031112, - "grad_norm": 0.6695412993431091, - "learning_rate": 1.9517490698745466e-06, - "loss": 0.2161, - "step": 17022 - }, - { - "epoch": 1.603636277996279, - "grad_norm": 0.6272669434547424, - "learning_rate": 1.9508529476765946e-06, - "loss": 0.2141, - "step": 17023 - }, - { - "epoch": 1.6037304820894467, - "grad_norm": 0.6253587007522583, - "learning_rate": 1.949957009011644e-06, - "loss": 0.1798, - "step": 17024 - }, - { - "epoch": 1.6038246861826146, - "grad_norm": 0.5858707427978516, - "learning_rate": 1.949061253900125e-06, - "loss": 0.1818, - "step": 17025 - }, - { - "epoch": 1.6039188902757826, - "grad_norm": 0.6380616426467896, - "learning_rate": 1.9481656823624586e-06, - "loss": 0.1562, - "step": 17026 - }, - { - "epoch": 1.6040130943689503, - "grad_norm": 0.6433847546577454, - "learning_rate": 1.9472702944190657e-06, - "loss": 0.1839, - "step": 17027 - }, - { - "epoch": 1.604107298462118, - "grad_norm": 0.6600707173347473, - "learning_rate": 1.9463750900903676e-06, - "loss": 0.2091, - "step": 17028 - }, - { - "epoch": 1.604201502555286, - "grad_norm": 0.6305294632911682, - "learning_rate": 1.94548006939677e-06, - "loss": 0.1952, - "step": 17029 - }, - { - "epoch": 1.604295706648454, - "grad_norm": 0.6270998120307922, - "learning_rate": 1.9445852323586835e-06, - "loss": 0.2067, - "step": 17030 - }, - { - "epoch": 1.6043899107416217, - "grad_norm": 0.7592684626579285, - "learning_rate": 1.9436905789965144e-06, - "loss": 0.2037, - "step": 17031 - }, - { - "epoch": 1.6044841148347895, - "grad_norm": 0.690092921257019, - "learning_rate": 1.9427961093306592e-06, - "loss": 0.2143, - "step": 17032 - }, - { - "epoch": 1.6045783189279574, - "grad_norm": 0.7281653881072998, - "learning_rate": 1.941901823381508e-06, - "loss": 0.2066, - "step": 17033 - }, - { - "epoch": 1.6046725230211254, - "grad_norm": 0.6461312770843506, - "learning_rate": 1.941007721169462e-06, - "loss": 0.1812, - "step": 17034 - }, - { - "epoch": 1.6047667271142931, - "grad_norm": 0.6451976895332336, - "learning_rate": 1.9401138027149036e-06, - "loss": 0.1843, - "step": 17035 - }, - { - "epoch": 1.6048609312074609, - "grad_norm": 0.7128356695175171, - "learning_rate": 1.9392200680382113e-06, - "loss": 0.181, - "step": 17036 - }, - { - "epoch": 1.6049551353006288, - "grad_norm": 0.595065176486969, - "learning_rate": 1.938326517159772e-06, - "loss": 0.1907, - "step": 17037 - }, - { - "epoch": 1.6050493393937968, - "grad_norm": 0.6782664060592651, - "learning_rate": 1.9374331500999554e-06, - "loss": 0.1907, - "step": 17038 - }, - { - "epoch": 1.6051435434869645, - "grad_norm": 0.6576317548751831, - "learning_rate": 1.9365399668791274e-06, - "loss": 0.2098, - "step": 17039 - }, - { - "epoch": 1.6052377475801323, - "grad_norm": 0.5967773199081421, - "learning_rate": 1.9356469675176637e-06, - "loss": 0.1806, - "step": 17040 - }, - { - "epoch": 1.6053319516733002, - "grad_norm": 0.7943901419639587, - "learning_rate": 1.9347541520359203e-06, - "loss": 0.2358, - "step": 17041 - }, - { - "epoch": 1.6054261557664682, - "grad_norm": 0.671789288520813, - "learning_rate": 1.933861520454253e-06, - "loss": 0.2017, - "step": 17042 - }, - { - "epoch": 1.605520359859636, - "grad_norm": 0.7067272067070007, - "learning_rate": 1.9329690727930185e-06, - "loss": 0.2119, - "step": 17043 - }, - { - "epoch": 1.6056145639528037, - "grad_norm": 0.6046809554100037, - "learning_rate": 1.932076809072567e-06, - "loss": 0.1871, - "step": 17044 - }, - { - "epoch": 1.6057087680459716, - "grad_norm": 0.812608540058136, - "learning_rate": 1.931184729313239e-06, - "loss": 0.2227, - "step": 17045 - }, - { - "epoch": 1.6058029721391396, - "grad_norm": 0.7496895790100098, - "learning_rate": 1.9302928335353775e-06, - "loss": 0.212, - "step": 17046 - }, - { - "epoch": 1.6058971762323073, - "grad_norm": 0.5570060014724731, - "learning_rate": 1.929401121759322e-06, - "loss": 0.177, - "step": 17047 - }, - { - "epoch": 1.605991380325475, - "grad_norm": 0.71700119972229, - "learning_rate": 1.9285095940054e-06, - "loss": 0.1881, - "step": 17048 - }, - { - "epoch": 1.606085584418643, - "grad_norm": 0.6562269330024719, - "learning_rate": 1.9276182502939424e-06, - "loss": 0.1872, - "step": 17049 - }, - { - "epoch": 1.606179788511811, - "grad_norm": 0.7206482291221619, - "learning_rate": 1.926727090645275e-06, - "loss": 0.2001, - "step": 17050 - }, - { - "epoch": 1.6062739926049787, - "grad_norm": 0.6981536746025085, - "learning_rate": 1.9258361150797135e-06, - "loss": 0.2127, - "step": 17051 - }, - { - "epoch": 1.6063681966981465, - "grad_norm": 0.7051399946212769, - "learning_rate": 1.9249453236175774e-06, - "loss": 0.2152, - "step": 17052 - }, - { - "epoch": 1.6064624007913144, - "grad_norm": 0.6378706097602844, - "learning_rate": 1.9240547162791736e-06, - "loss": 0.1631, - "step": 17053 - }, - { - "epoch": 1.6065566048844824, - "grad_norm": 0.6317290663719177, - "learning_rate": 1.9231642930848126e-06, - "loss": 0.1741, - "step": 17054 - }, - { - "epoch": 1.6066508089776501, - "grad_norm": 0.6226803660392761, - "learning_rate": 1.922274054054799e-06, - "loss": 0.1939, - "step": 17055 - }, - { - "epoch": 1.6067450130708179, - "grad_norm": 0.6561529636383057, - "learning_rate": 1.9213839992094264e-06, - "loss": 0.1811, - "step": 17056 - }, - { - "epoch": 1.6068392171639858, - "grad_norm": 0.6007041931152344, - "learning_rate": 1.9204941285689926e-06, - "loss": 0.1885, - "step": 17057 - }, - { - "epoch": 1.6069334212571538, - "grad_norm": 0.6497009992599487, - "learning_rate": 1.9196044421537907e-06, - "loss": 0.1969, - "step": 17058 - }, - { - "epoch": 1.6070276253503215, - "grad_norm": 7.670639514923096, - "learning_rate": 1.918714939984102e-06, - "loss": 0.1921, - "step": 17059 - }, - { - "epoch": 1.6071218294434892, - "grad_norm": 0.618530809879303, - "learning_rate": 1.917825622080213e-06, - "loss": 0.1538, - "step": 17060 - }, - { - "epoch": 1.6072160335366572, - "grad_norm": 0.6352978944778442, - "learning_rate": 1.9169364884623953e-06, - "loss": 0.1918, - "step": 17061 - }, - { - "epoch": 1.6073102376298252, - "grad_norm": 0.6399124264717102, - "learning_rate": 1.9160475391509304e-06, - "loss": 0.2012, - "step": 17062 - }, - { - "epoch": 1.607404441722993, - "grad_norm": 0.7114628553390503, - "learning_rate": 1.9151587741660803e-06, - "loss": 0.191, - "step": 17063 - }, - { - "epoch": 1.6074986458161606, - "grad_norm": 0.6930683255195618, - "learning_rate": 1.914270193528114e-06, - "loss": 0.2051, - "step": 17064 - }, - { - "epoch": 1.6075928499093286, - "grad_norm": 0.717863917350769, - "learning_rate": 1.9133817972572944e-06, - "loss": 0.1928, - "step": 17065 - }, - { - "epoch": 1.6076870540024966, - "grad_norm": 0.6552441716194153, - "learning_rate": 1.9124935853738734e-06, - "loss": 0.1801, - "step": 17066 - }, - { - "epoch": 1.6077812580956643, - "grad_norm": 0.6529089212417603, - "learning_rate": 1.9116055578981075e-06, - "loss": 0.205, - "step": 17067 - }, - { - "epoch": 1.607875462188832, - "grad_norm": 0.6935768127441406, - "learning_rate": 1.9107177148502443e-06, - "loss": 0.2242, - "step": 17068 - }, - { - "epoch": 1.607969666282, - "grad_norm": 0.603960394859314, - "learning_rate": 1.9098300562505266e-06, - "loss": 0.1864, - "step": 17069 - }, - { - "epoch": 1.608063870375168, - "grad_norm": 0.6930103898048401, - "learning_rate": 1.908942582119194e-06, - "loss": 0.2089, - "step": 17070 - }, - { - "epoch": 1.6081580744683357, - "grad_norm": 0.7470356225967407, - "learning_rate": 1.9080552924764874e-06, - "loss": 0.1875, - "step": 17071 - }, - { - "epoch": 1.6082522785615034, - "grad_norm": 0.6638361811637878, - "learning_rate": 1.9071681873426319e-06, - "loss": 0.1741, - "step": 17072 - }, - { - "epoch": 1.6083464826546714, - "grad_norm": 0.6275650858879089, - "learning_rate": 1.9062812667378573e-06, - "loss": 0.1891, - "step": 17073 - }, - { - "epoch": 1.6084406867478394, - "grad_norm": 0.7013517022132874, - "learning_rate": 1.9053945306823884e-06, - "loss": 0.2197, - "step": 17074 - }, - { - "epoch": 1.608534890841007, - "grad_norm": 0.6467006802558899, - "learning_rate": 1.9045079791964439e-06, - "loss": 0.2125, - "step": 17075 - }, - { - "epoch": 1.6086290949341748, - "grad_norm": 0.6520530581474304, - "learning_rate": 1.9036216123002326e-06, - "loss": 0.2251, - "step": 17076 - }, - { - "epoch": 1.6087232990273428, - "grad_norm": 0.6930379867553711, - "learning_rate": 1.9027354300139743e-06, - "loss": 0.1927, - "step": 17077 - }, - { - "epoch": 1.6088175031205108, - "grad_norm": 0.632620632648468, - "learning_rate": 1.9018494323578707e-06, - "loss": 0.1939, - "step": 17078 - }, - { - "epoch": 1.6089117072136785, - "grad_norm": 0.6659923791885376, - "learning_rate": 1.9009636193521198e-06, - "loss": 0.1935, - "step": 17079 - }, - { - "epoch": 1.6090059113068462, - "grad_norm": 0.690721333026886, - "learning_rate": 1.9000779910169277e-06, - "loss": 0.1875, - "step": 17080 - }, - { - "epoch": 1.6091001154000142, - "grad_norm": 0.6876536011695862, - "learning_rate": 1.8991925473724848e-06, - "loss": 0.2163, - "step": 17081 - }, - { - "epoch": 1.609194319493182, - "grad_norm": 0.9192591309547424, - "learning_rate": 1.8983072884389753e-06, - "loss": 0.1845, - "step": 17082 - }, - { - "epoch": 1.6092885235863497, - "grad_norm": 0.8170506358146667, - "learning_rate": 1.8974222142365938e-06, - "loss": 0.2015, - "step": 17083 - }, - { - "epoch": 1.6093827276795176, - "grad_norm": 0.6728833913803101, - "learning_rate": 1.8965373247855168e-06, - "loss": 0.1778, - "step": 17084 - }, - { - "epoch": 1.6094769317726856, - "grad_norm": 0.6478422284126282, - "learning_rate": 1.8956526201059179e-06, - "loss": 0.1671, - "step": 17085 - }, - { - "epoch": 1.6095711358658533, - "grad_norm": 0.7013288736343384, - "learning_rate": 1.8947681002179729e-06, - "loss": 0.1915, - "step": 17086 - }, - { - "epoch": 1.609665339959021, - "grad_norm": 0.6429296135902405, - "learning_rate": 1.893883765141854e-06, - "loss": 0.1703, - "step": 17087 - }, - { - "epoch": 1.609759544052189, - "grad_norm": 0.6315440535545349, - "learning_rate": 1.8929996148977181e-06, - "loss": 0.1646, - "step": 17088 - }, - { - "epoch": 1.609853748145357, - "grad_norm": 0.7238953709602356, - "learning_rate": 1.892115649505728e-06, - "loss": 0.1846, - "step": 17089 - }, - { - "epoch": 1.6099479522385247, - "grad_norm": 0.7134349942207336, - "learning_rate": 1.8912318689860444e-06, - "loss": 0.1904, - "step": 17090 - }, - { - "epoch": 1.6100421563316925, - "grad_norm": 0.6606116890907288, - "learning_rate": 1.8903482733588108e-06, - "loss": 0.2081, - "step": 17091 - }, - { - "epoch": 1.6101363604248604, - "grad_norm": 1.245687484741211, - "learning_rate": 1.8894648626441792e-06, - "loss": 0.2144, - "step": 17092 - }, - { - "epoch": 1.6102305645180284, - "grad_norm": 0.6560180187225342, - "learning_rate": 1.8885816368622945e-06, - "loss": 0.19, - "step": 17093 - }, - { - "epoch": 1.6103247686111961, - "grad_norm": 1.1060028076171875, - "learning_rate": 1.8876985960332894e-06, - "loss": 0.2134, - "step": 17094 - }, - { - "epoch": 1.6104189727043638, - "grad_norm": 0.6218519806861877, - "learning_rate": 1.8868157401773058e-06, - "loss": 0.1846, - "step": 17095 - }, - { - "epoch": 1.6105131767975318, - "grad_norm": 0.6682586073875427, - "learning_rate": 1.8859330693144674e-06, - "loss": 0.207, - "step": 17096 - }, - { - "epoch": 1.6106073808906998, - "grad_norm": 0.7348806262016296, - "learning_rate": 1.885050583464907e-06, - "loss": 0.1848, - "step": 17097 - }, - { - "epoch": 1.6107015849838675, - "grad_norm": 0.7002991437911987, - "learning_rate": 1.8841682826487396e-06, - "loss": 0.2116, - "step": 17098 - }, - { - "epoch": 1.6107957890770352, - "grad_norm": 0.6557016968727112, - "learning_rate": 1.883286166886088e-06, - "loss": 0.1983, - "step": 17099 - }, - { - "epoch": 1.6108899931702032, - "grad_norm": 0.9070132970809937, - "learning_rate": 1.8824042361970663e-06, - "loss": 0.1883, - "step": 17100 - }, - { - "epoch": 1.6109841972633712, - "grad_norm": 0.6374247670173645, - "learning_rate": 1.8815224906017792e-06, - "loss": 0.1972, - "step": 17101 - }, - { - "epoch": 1.611078401356539, - "grad_norm": 0.7107747197151184, - "learning_rate": 1.8806409301203355e-06, - "loss": 0.1891, - "step": 17102 - }, - { - "epoch": 1.6111726054497066, - "grad_norm": 0.6548818945884705, - "learning_rate": 1.8797595547728386e-06, - "loss": 0.2045, - "step": 17103 - }, - { - "epoch": 1.6112668095428746, - "grad_norm": 0.7211184501647949, - "learning_rate": 1.8788783645793784e-06, - "loss": 0.2218, - "step": 17104 - }, - { - "epoch": 1.6113610136360426, - "grad_norm": 0.5960847735404968, - "learning_rate": 1.8779973595600543e-06, - "loss": 0.1847, - "step": 17105 - }, - { - "epoch": 1.6114552177292103, - "grad_norm": 0.7239816188812256, - "learning_rate": 1.8771165397349478e-06, - "loss": 0.2077, - "step": 17106 - }, - { - "epoch": 1.611549421822378, - "grad_norm": 0.6416701674461365, - "learning_rate": 1.8762359051241473e-06, - "loss": 0.1878, - "step": 17107 - }, - { - "epoch": 1.611643625915546, - "grad_norm": 0.6655055284500122, - "learning_rate": 1.8753554557477337e-06, - "loss": 0.2004, - "step": 17108 - }, - { - "epoch": 1.611737830008714, - "grad_norm": 0.6688492894172668, - "learning_rate": 1.8744751916257785e-06, - "loss": 0.1896, - "step": 17109 - }, - { - "epoch": 1.6118320341018817, - "grad_norm": 0.7001370787620544, - "learning_rate": 1.8735951127783557e-06, - "loss": 0.2163, - "step": 17110 - }, - { - "epoch": 1.6119262381950494, - "grad_norm": 0.7874501943588257, - "learning_rate": 1.8727152192255339e-06, - "loss": 0.2219, - "step": 17111 - }, - { - "epoch": 1.6120204422882174, - "grad_norm": 0.673001766204834, - "learning_rate": 1.871835510987372e-06, - "loss": 0.2036, - "step": 17112 - }, - { - "epoch": 1.6121146463813854, - "grad_norm": 0.6884879469871521, - "learning_rate": 1.8709559880839312e-06, - "loss": 0.1888, - "step": 17113 - }, - { - "epoch": 1.612208850474553, - "grad_norm": 0.6903436183929443, - "learning_rate": 1.8700766505352686e-06, - "loss": 0.2128, - "step": 17114 - }, - { - "epoch": 1.6123030545677208, - "grad_norm": 0.6216661930084229, - "learning_rate": 1.8691974983614303e-06, - "loss": 0.1816, - "step": 17115 - }, - { - "epoch": 1.6123972586608888, - "grad_norm": 0.6292722821235657, - "learning_rate": 1.8683185315824592e-06, - "loss": 0.1862, - "step": 17116 - }, - { - "epoch": 1.6124914627540567, - "grad_norm": 0.5919272899627686, - "learning_rate": 1.867439750218406e-06, - "loss": 0.172, - "step": 17117 - }, - { - "epoch": 1.6125856668472245, - "grad_norm": 0.632280707359314, - "learning_rate": 1.8665611542893047e-06, - "loss": 0.1947, - "step": 17118 - }, - { - "epoch": 1.6126798709403922, - "grad_norm": 0.6402468681335449, - "learning_rate": 1.8656827438151815e-06, - "loss": 0.1773, - "step": 17119 - }, - { - "epoch": 1.6127740750335602, - "grad_norm": 0.628898024559021, - "learning_rate": 1.864804518816078e-06, - "loss": 0.198, - "step": 17120 - }, - { - "epoch": 1.6128682791267281, - "grad_norm": 0.6768243312835693, - "learning_rate": 1.8639264793120114e-06, - "loss": 0.1948, - "step": 17121 - }, - { - "epoch": 1.6129624832198959, - "grad_norm": 0.6429453492164612, - "learning_rate": 1.8630486253230017e-06, - "loss": 0.1894, - "step": 17122 - }, - { - "epoch": 1.6130566873130636, - "grad_norm": 0.6443604230880737, - "learning_rate": 1.8621709568690683e-06, - "loss": 0.1767, - "step": 17123 - }, - { - "epoch": 1.6131508914062316, - "grad_norm": 0.6549245715141296, - "learning_rate": 1.8612934739702237e-06, - "loss": 0.1852, - "step": 17124 - }, - { - "epoch": 1.6132450954993995, - "grad_norm": 0.6844817399978638, - "learning_rate": 1.8604161766464734e-06, - "loss": 0.1986, - "step": 17125 - }, - { - "epoch": 1.6133392995925673, - "grad_norm": 0.6662511229515076, - "learning_rate": 1.8595390649178214e-06, - "loss": 0.1713, - "step": 17126 - }, - { - "epoch": 1.613433503685735, - "grad_norm": 0.699615478515625, - "learning_rate": 1.8586621388042713e-06, - "loss": 0.2162, - "step": 17127 - }, - { - "epoch": 1.613527707778903, - "grad_norm": 0.7042679786682129, - "learning_rate": 1.857785398325812e-06, - "loss": 0.2162, - "step": 17128 - }, - { - "epoch": 1.613621911872071, - "grad_norm": 0.7047753930091858, - "learning_rate": 1.8569088435024385e-06, - "loss": 0.1899, - "step": 17129 - }, - { - "epoch": 1.6137161159652387, - "grad_norm": 0.6133434772491455, - "learning_rate": 1.8560324743541391e-06, - "loss": 0.1823, - "step": 17130 - }, - { - "epoch": 1.6138103200584064, - "grad_norm": 0.6867203712463379, - "learning_rate": 1.8551562909008925e-06, - "loss": 0.202, - "step": 17131 - }, - { - "epoch": 1.6139045241515744, - "grad_norm": 0.6246338486671448, - "learning_rate": 1.854280293162678e-06, - "loss": 0.1955, - "step": 17132 - }, - { - "epoch": 1.6139987282447423, - "grad_norm": 0.701133668422699, - "learning_rate": 1.8534044811594721e-06, - "loss": 0.1999, - "step": 17133 - }, - { - "epoch": 1.61409293233791, - "grad_norm": 0.6869482398033142, - "learning_rate": 1.852528854911242e-06, - "loss": 0.2133, - "step": 17134 - }, - { - "epoch": 1.6141871364310778, - "grad_norm": 0.6875615119934082, - "learning_rate": 1.851653414437956e-06, - "loss": 0.1913, - "step": 17135 - }, - { - "epoch": 1.6142813405242458, - "grad_norm": 0.6641170978546143, - "learning_rate": 1.8507781597595709e-06, - "loss": 0.204, - "step": 17136 - }, - { - "epoch": 1.6143755446174137, - "grad_norm": 0.5416911244392395, - "learning_rate": 1.8499030908960502e-06, - "loss": 0.1708, - "step": 17137 - }, - { - "epoch": 1.6144697487105815, - "grad_norm": 0.6619910001754761, - "learning_rate": 1.8490282078673405e-06, - "loss": 0.2056, - "step": 17138 - }, - { - "epoch": 1.6145639528037492, - "grad_norm": 0.6576054692268372, - "learning_rate": 1.8481535106933935e-06, - "loss": 0.1913, - "step": 17139 - }, - { - "epoch": 1.6146581568969172, - "grad_norm": 0.6916207075119019, - "learning_rate": 1.8472789993941554e-06, - "loss": 0.2053, - "step": 17140 - }, - { - "epoch": 1.6147523609900851, - "grad_norm": 0.60109943151474, - "learning_rate": 1.8464046739895625e-06, - "loss": 0.178, - "step": 17141 - }, - { - "epoch": 1.6148465650832529, - "grad_norm": 0.7228315472602844, - "learning_rate": 1.8455305344995523e-06, - "loss": 0.2126, - "step": 17142 - }, - { - "epoch": 1.6149407691764206, - "grad_norm": 0.6615751385688782, - "learning_rate": 1.844656580944061e-06, - "loss": 0.1949, - "step": 17143 - }, - { - "epoch": 1.6150349732695886, - "grad_norm": 0.6760201454162598, - "learning_rate": 1.843782813343008e-06, - "loss": 0.1635, - "step": 17144 - }, - { - "epoch": 1.6151291773627565, - "grad_norm": 0.668273389339447, - "learning_rate": 1.8429092317163244e-06, - "loss": 0.181, - "step": 17145 - }, - { - "epoch": 1.6152233814559243, - "grad_norm": 0.6613141894340515, - "learning_rate": 1.842035836083922e-06, - "loss": 0.2068, - "step": 17146 - }, - { - "epoch": 1.615317585549092, - "grad_norm": 0.6070382595062256, - "learning_rate": 1.84116262646572e-06, - "loss": 0.1963, - "step": 17147 - }, - { - "epoch": 1.61541178964226, - "grad_norm": 0.5954920649528503, - "learning_rate": 1.8402896028816298e-06, - "loss": 0.2059, - "step": 17148 - }, - { - "epoch": 1.615505993735428, - "grad_norm": 0.6403561234474182, - "learning_rate": 1.8394167653515537e-06, - "loss": 0.1692, - "step": 17149 - }, - { - "epoch": 1.6156001978285957, - "grad_norm": 1.1120206117630005, - "learning_rate": 1.8385441138953952e-06, - "loss": 0.1819, - "step": 17150 - }, - { - "epoch": 1.6156944019217634, - "grad_norm": 0.6109408736228943, - "learning_rate": 1.837671648533057e-06, - "loss": 0.1758, - "step": 17151 - }, - { - "epoch": 1.6157886060149314, - "grad_norm": 0.8498703241348267, - "learning_rate": 1.8367993692844244e-06, - "loss": 0.2524, - "step": 17152 - }, - { - "epoch": 1.6158828101080993, - "grad_norm": 0.6426171660423279, - "learning_rate": 1.8359272761693915e-06, - "loss": 0.1947, - "step": 17153 - }, - { - "epoch": 1.615977014201267, - "grad_norm": 0.7028778791427612, - "learning_rate": 1.8350553692078454e-06, - "loss": 0.2042, - "step": 17154 - }, - { - "epoch": 1.6160712182944348, - "grad_norm": 0.64783775806427, - "learning_rate": 1.834183648419664e-06, - "loss": 0.1795, - "step": 17155 - }, - { - "epoch": 1.6161654223876027, - "grad_norm": 0.6946761012077332, - "learning_rate": 1.833312113824719e-06, - "loss": 0.1976, - "step": 17156 - }, - { - "epoch": 1.6162596264807707, - "grad_norm": 0.7017341256141663, - "learning_rate": 1.8324407654428921e-06, - "loss": 0.2018, - "step": 17157 - }, - { - "epoch": 1.6163538305739384, - "grad_norm": 0.6161543726921082, - "learning_rate": 1.8315696032940478e-06, - "loss": 0.1805, - "step": 17158 - }, - { - "epoch": 1.6164480346671062, - "grad_norm": 0.6772741079330444, - "learning_rate": 1.8306986273980442e-06, - "loss": 0.2029, - "step": 17159 - }, - { - "epoch": 1.6165422387602741, - "grad_norm": 0.6760283708572388, - "learning_rate": 1.8298278377747513e-06, - "loss": 0.2163, - "step": 17160 - }, - { - "epoch": 1.616636442853442, - "grad_norm": 0.6412596702575684, - "learning_rate": 1.8289572344440198e-06, - "loss": 0.1903, - "step": 17161 - }, - { - "epoch": 1.6167306469466098, - "grad_norm": 0.7114439010620117, - "learning_rate": 1.828086817425696e-06, - "loss": 0.2194, - "step": 17162 - }, - { - "epoch": 1.6168248510397776, - "grad_norm": 0.6724783182144165, - "learning_rate": 1.8272165867396318e-06, - "loss": 0.2137, - "step": 17163 - }, - { - "epoch": 1.6169190551329455, - "grad_norm": 0.6525052785873413, - "learning_rate": 1.8263465424056714e-06, - "loss": 0.2101, - "step": 17164 - }, - { - "epoch": 1.6170132592261135, - "grad_norm": 0.6216458678245544, - "learning_rate": 1.8254766844436477e-06, - "loss": 0.1812, - "step": 17165 - }, - { - "epoch": 1.6171074633192812, - "grad_norm": 0.625395655632019, - "learning_rate": 1.8246070128733984e-06, - "loss": 0.1848, - "step": 17166 - }, - { - "epoch": 1.617201667412449, - "grad_norm": 0.706031858921051, - "learning_rate": 1.823737527714754e-06, - "loss": 0.231, - "step": 17167 - }, - { - "epoch": 1.617295871505617, - "grad_norm": 0.6518822908401489, - "learning_rate": 1.8228682289875376e-06, - "loss": 0.2093, - "step": 17168 - }, - { - "epoch": 1.617390075598785, - "grad_norm": 0.6999724507331848, - "learning_rate": 1.8219991167115702e-06, - "loss": 0.2365, - "step": 17169 - }, - { - "epoch": 1.6174842796919526, - "grad_norm": 0.7110457420349121, - "learning_rate": 1.8211301909066747e-06, - "loss": 0.2039, - "step": 17170 - }, - { - "epoch": 1.6175784837851204, - "grad_norm": 0.7404806017875671, - "learning_rate": 1.8202614515926565e-06, - "loss": 0.2306, - "step": 17171 - }, - { - "epoch": 1.6176726878782883, - "grad_norm": 0.6038290858268738, - "learning_rate": 1.8193928987893273e-06, - "loss": 0.1804, - "step": 17172 - }, - { - "epoch": 1.6177668919714563, - "grad_norm": 0.614624559879303, - "learning_rate": 1.8185245325164935e-06, - "loss": 0.2137, - "step": 17173 - }, - { - "epoch": 1.617861096064624, - "grad_norm": 0.6252606511116028, - "learning_rate": 1.8176563527939518e-06, - "loss": 0.1702, - "step": 17174 - }, - { - "epoch": 1.6179553001577918, - "grad_norm": 0.6974897384643555, - "learning_rate": 1.8167883596415014e-06, - "loss": 0.2177, - "step": 17175 - }, - { - "epoch": 1.6180495042509597, - "grad_norm": 0.6368634700775146, - "learning_rate": 1.8159205530789303e-06, - "loss": 0.1813, - "step": 17176 - }, - { - "epoch": 1.6181437083441277, - "grad_norm": 0.7428598403930664, - "learning_rate": 1.8150529331260292e-06, - "loss": 0.2222, - "step": 17177 - }, - { - "epoch": 1.6182379124372954, - "grad_norm": 0.6269071102142334, - "learning_rate": 1.8141854998025766e-06, - "loss": 0.2157, - "step": 17178 - }, - { - "epoch": 1.6183321165304632, - "grad_norm": 0.7417349815368652, - "learning_rate": 1.8133182531283555e-06, - "loss": 0.2044, - "step": 17179 - }, - { - "epoch": 1.6184263206236311, - "grad_norm": 0.6804085969924927, - "learning_rate": 1.8124511931231403e-06, - "loss": 0.1962, - "step": 17180 - }, - { - "epoch": 1.618520524716799, - "grad_norm": 0.6238267421722412, - "learning_rate": 1.8115843198066984e-06, - "loss": 0.163, - "step": 17181 - }, - { - "epoch": 1.6186147288099668, - "grad_norm": 0.7084079384803772, - "learning_rate": 1.8107176331987974e-06, - "loss": 0.1904, - "step": 17182 - }, - { - "epoch": 1.6187089329031346, - "grad_norm": 0.6880285143852234, - "learning_rate": 1.8098511333192026e-06, - "loss": 0.2023, - "step": 17183 - }, - { - "epoch": 1.6188031369963025, - "grad_norm": 0.6665380001068115, - "learning_rate": 1.8089848201876648e-06, - "loss": 0.2026, - "step": 17184 - }, - { - "epoch": 1.6188973410894705, - "grad_norm": 0.6488173604011536, - "learning_rate": 1.8081186938239437e-06, - "loss": 0.1846, - "step": 17185 - }, - { - "epoch": 1.6189915451826382, - "grad_norm": 0.6484012603759766, - "learning_rate": 1.8072527542477825e-06, - "loss": 0.2002, - "step": 17186 - }, - { - "epoch": 1.619085749275806, - "grad_norm": 1.004101276397705, - "learning_rate": 1.8063870014789297e-06, - "loss": 0.2189, - "step": 17187 - }, - { - "epoch": 1.619179953368974, - "grad_norm": 0.6525694727897644, - "learning_rate": 1.8055214355371265e-06, - "loss": 0.1867, - "step": 17188 - }, - { - "epoch": 1.6192741574621419, - "grad_norm": 0.6535707712173462, - "learning_rate": 1.8046560564421056e-06, - "loss": 0.1901, - "step": 17189 - }, - { - "epoch": 1.6193683615553096, - "grad_norm": 0.6303517818450928, - "learning_rate": 1.8037908642136004e-06, - "loss": 0.1854, - "step": 17190 - }, - { - "epoch": 1.6194625656484773, - "grad_norm": 0.7350384593009949, - "learning_rate": 1.8029258588713427e-06, - "loss": 0.1883, - "step": 17191 - }, - { - "epoch": 1.6195567697416453, - "grad_norm": 0.5876874327659607, - "learning_rate": 1.8020610404350492e-06, - "loss": 0.1805, - "step": 17192 - }, - { - "epoch": 1.6196509738348133, - "grad_norm": 0.6583977937698364, - "learning_rate": 1.8011964089244415e-06, - "loss": 0.2002, - "step": 17193 - }, - { - "epoch": 1.619745177927981, - "grad_norm": 0.7399848103523254, - "learning_rate": 1.8003319643592388e-06, - "loss": 0.1956, - "step": 17194 - }, - { - "epoch": 1.6198393820211487, - "grad_norm": 0.8242473602294922, - "learning_rate": 1.7994677067591493e-06, - "loss": 0.1885, - "step": 17195 - }, - { - "epoch": 1.6199335861143167, - "grad_norm": 0.6625346541404724, - "learning_rate": 1.7986036361438718e-06, - "loss": 0.2006, - "step": 17196 - }, - { - "epoch": 1.6200277902074847, - "grad_norm": 0.670408308506012, - "learning_rate": 1.7977397525331208e-06, - "loss": 0.179, - "step": 17197 - }, - { - "epoch": 1.6201219943006524, - "grad_norm": 0.7978299260139465, - "learning_rate": 1.7968760559465891e-06, - "loss": 0.2172, - "step": 17198 - }, - { - "epoch": 1.6202161983938201, - "grad_norm": 0.6636545062065125, - "learning_rate": 1.7960125464039636e-06, - "loss": 0.1752, - "step": 17199 - }, - { - "epoch": 1.620310402486988, - "grad_norm": 0.7238003611564636, - "learning_rate": 1.7951492239249457e-06, - "loss": 0.2306, - "step": 17200 - }, - { - "epoch": 1.620404606580156, - "grad_norm": 0.6232163906097412, - "learning_rate": 1.7942860885292135e-06, - "loss": 0.196, - "step": 17201 - }, - { - "epoch": 1.6204988106733238, - "grad_norm": 0.6866005063056946, - "learning_rate": 1.7934231402364466e-06, - "loss": 0.1997, - "step": 17202 - }, - { - "epoch": 1.6205930147664915, - "grad_norm": 0.6618146300315857, - "learning_rate": 1.7925603790663247e-06, - "loss": 0.1906, - "step": 17203 - }, - { - "epoch": 1.6206872188596595, - "grad_norm": 0.5978967547416687, - "learning_rate": 1.7916978050385215e-06, - "loss": 0.1895, - "step": 17204 - }, - { - "epoch": 1.6207814229528275, - "grad_norm": 0.7060754895210266, - "learning_rate": 1.7908354181726994e-06, - "loss": 0.2291, - "step": 17205 - }, - { - "epoch": 1.6208756270459952, - "grad_norm": 0.643155038356781, - "learning_rate": 1.7899732184885255e-06, - "loss": 0.2077, - "step": 17206 - }, - { - "epoch": 1.620969831139163, - "grad_norm": 0.6400883793830872, - "learning_rate": 1.7891112060056626e-06, - "loss": 0.1817, - "step": 17207 - }, - { - "epoch": 1.621064035232331, - "grad_norm": 0.6722229719161987, - "learning_rate": 1.7882493807437596e-06, - "loss": 0.1776, - "step": 17208 - }, - { - "epoch": 1.6211582393254989, - "grad_norm": 0.715329110622406, - "learning_rate": 1.7873877427224706e-06, - "loss": 0.2024, - "step": 17209 - }, - { - "epoch": 1.6212524434186666, - "grad_norm": 0.7191175222396851, - "learning_rate": 1.7865262919614446e-06, - "loss": 0.2078, - "step": 17210 - }, - { - "epoch": 1.6213466475118343, - "grad_norm": 0.6324275135993958, - "learning_rate": 1.785665028480319e-06, - "loss": 0.1847, - "step": 17211 - }, - { - "epoch": 1.6214408516050023, - "grad_norm": 0.5900797843933105, - "learning_rate": 1.7848039522987347e-06, - "loss": 0.1553, - "step": 17212 - }, - { - "epoch": 1.6215350556981702, - "grad_norm": 0.62995845079422, - "learning_rate": 1.7839430634363274e-06, - "loss": 0.2161, - "step": 17213 - }, - { - "epoch": 1.621629259791338, - "grad_norm": 0.686939537525177, - "learning_rate": 1.7830823619127246e-06, - "loss": 0.2007, - "step": 17214 - }, - { - "epoch": 1.6217234638845057, - "grad_norm": 0.6298737525939941, - "learning_rate": 1.7822218477475496e-06, - "loss": 0.1777, - "step": 17215 - }, - { - "epoch": 1.6218176679776737, - "grad_norm": 0.6338664889335632, - "learning_rate": 1.7813615209604252e-06, - "loss": 0.2047, - "step": 17216 - }, - { - "epoch": 1.6219118720708416, - "grad_norm": 0.6423327326774597, - "learning_rate": 1.7805013815709715e-06, - "loss": 0.1799, - "step": 17217 - }, - { - "epoch": 1.6220060761640094, - "grad_norm": 0.6747575402259827, - "learning_rate": 1.7796414295987952e-06, - "loss": 0.2167, - "step": 17218 - }, - { - "epoch": 1.6221002802571771, - "grad_norm": 0.6236206889152527, - "learning_rate": 1.778781665063507e-06, - "loss": 0.1851, - "step": 17219 - }, - { - "epoch": 1.622194484350345, - "grad_norm": 0.5348717570304871, - "learning_rate": 1.7779220879847136e-06, - "loss": 0.1685, - "step": 17220 - }, - { - "epoch": 1.6222886884435128, - "grad_norm": 0.6100355386734009, - "learning_rate": 1.7770626983820105e-06, - "loss": 0.1873, - "step": 17221 - }, - { - "epoch": 1.6223828925366806, - "grad_norm": 0.6110270619392395, - "learning_rate": 1.7762034962749941e-06, - "loss": 0.1763, - "step": 17222 - }, - { - "epoch": 1.6224770966298485, - "grad_norm": 0.6329869627952576, - "learning_rate": 1.7753444816832588e-06, - "loss": 0.1836, - "step": 17223 - }, - { - "epoch": 1.6225713007230165, - "grad_norm": 0.7160716652870178, - "learning_rate": 1.7744856546263868e-06, - "loss": 0.2174, - "step": 17224 - }, - { - "epoch": 1.6226655048161842, - "grad_norm": 0.6878299713134766, - "learning_rate": 1.7736270151239655e-06, - "loss": 0.2144, - "step": 17225 - }, - { - "epoch": 1.622759708909352, - "grad_norm": 0.6290382146835327, - "learning_rate": 1.7727685631955682e-06, - "loss": 0.1849, - "step": 17226 - }, - { - "epoch": 1.62285391300252, - "grad_norm": 0.6803714632987976, - "learning_rate": 1.7719102988607716e-06, - "loss": 0.2088, - "step": 17227 - }, - { - "epoch": 1.6229481170956879, - "grad_norm": 0.6757082343101501, - "learning_rate": 1.771052222139147e-06, - "loss": 0.19, - "step": 17228 - }, - { - "epoch": 1.6230423211888556, - "grad_norm": 0.6137799024581909, - "learning_rate": 1.7701943330502558e-06, - "loss": 0.1797, - "step": 17229 - }, - { - "epoch": 1.6231365252820233, - "grad_norm": 0.6618055701255798, - "learning_rate": 1.7693366316136618e-06, - "loss": 0.2008, - "step": 17230 - }, - { - "epoch": 1.6232307293751913, - "grad_norm": 0.5803620219230652, - "learning_rate": 1.7684791178489236e-06, - "loss": 0.1866, - "step": 17231 - }, - { - "epoch": 1.6233249334683593, - "grad_norm": 0.6533998250961304, - "learning_rate": 1.7676217917755889e-06, - "loss": 0.1974, - "step": 17232 - }, - { - "epoch": 1.623419137561527, - "grad_norm": 0.6308205127716064, - "learning_rate": 1.76676465341321e-06, - "loss": 0.2137, - "step": 17233 - }, - { - "epoch": 1.6235133416546947, - "grad_norm": 0.7170531749725342, - "learning_rate": 1.765907702781332e-06, - "loss": 0.181, - "step": 17234 - }, - { - "epoch": 1.6236075457478627, - "grad_norm": 0.6850894093513489, - "learning_rate": 1.7650509398994919e-06, - "loss": 0.1876, - "step": 17235 - }, - { - "epoch": 1.6237017498410307, - "grad_norm": 0.6540127992630005, - "learning_rate": 1.7641943647872217e-06, - "loss": 0.2041, - "step": 17236 - }, - { - "epoch": 1.6237959539341984, - "grad_norm": 0.6298664808273315, - "learning_rate": 1.7633379774640624e-06, - "loss": 0.2044, - "step": 17237 - }, - { - "epoch": 1.6238901580273661, - "grad_norm": 0.6866254210472107, - "learning_rate": 1.762481777949534e-06, - "loss": 0.191, - "step": 17238 - }, - { - "epoch": 1.623984362120534, - "grad_norm": 0.693366527557373, - "learning_rate": 1.7616257662631597e-06, - "loss": 0.1958, - "step": 17239 - }, - { - "epoch": 1.624078566213702, - "grad_norm": 0.6166165471076965, - "learning_rate": 1.7607699424244583e-06, - "loss": 0.1974, - "step": 17240 - }, - { - "epoch": 1.6241727703068698, - "grad_norm": 0.6323557496070862, - "learning_rate": 1.7599143064529467e-06, - "loss": 0.207, - "step": 17241 - }, - { - "epoch": 1.6242669744000375, - "grad_norm": 0.6584812998771667, - "learning_rate": 1.7590588583681301e-06, - "loss": 0.1971, - "step": 17242 - }, - { - "epoch": 1.6243611784932055, - "grad_norm": 0.6964744329452515, - "learning_rate": 1.7582035981895163e-06, - "loss": 0.1947, - "step": 17243 - }, - { - "epoch": 1.6244553825863735, - "grad_norm": 0.6552649140357971, - "learning_rate": 1.7573485259366086e-06, - "loss": 0.1832, - "step": 17244 - }, - { - "epoch": 1.6245495866795412, - "grad_norm": 0.6573282480239868, - "learning_rate": 1.7564936416289003e-06, - "loss": 0.2029, - "step": 17245 - }, - { - "epoch": 1.624643790772709, - "grad_norm": 0.7544728517532349, - "learning_rate": 1.7556389452858858e-06, - "loss": 0.2356, - "step": 17246 - }, - { - "epoch": 1.6247379948658769, - "grad_norm": 0.7063323259353638, - "learning_rate": 1.7547844369270551e-06, - "loss": 0.1909, - "step": 17247 - }, - { - "epoch": 1.6248321989590448, - "grad_norm": 0.6265422105789185, - "learning_rate": 1.7539301165718892e-06, - "loss": 0.1673, - "step": 17248 - }, - { - "epoch": 1.6249264030522126, - "grad_norm": 0.7208700180053711, - "learning_rate": 1.7530759842398693e-06, - "loss": 0.2349, - "step": 17249 - }, - { - "epoch": 1.6250206071453803, - "grad_norm": 0.621737003326416, - "learning_rate": 1.752222039950473e-06, - "loss": 0.1807, - "step": 17250 - }, - { - "epoch": 1.6251148112385483, - "grad_norm": 0.6207723021507263, - "learning_rate": 1.7513682837231672e-06, - "loss": 0.168, - "step": 17251 - }, - { - "epoch": 1.6252090153317162, - "grad_norm": 0.6770493984222412, - "learning_rate": 1.7505147155774216e-06, - "loss": 0.2156, - "step": 17252 - }, - { - "epoch": 1.625303219424884, - "grad_norm": 0.6314629912376404, - "learning_rate": 1.749661335532702e-06, - "loss": 0.2019, - "step": 17253 - }, - { - "epoch": 1.6253974235180517, - "grad_norm": 0.6600848436355591, - "learning_rate": 1.7488081436084613e-06, - "loss": 0.1967, - "step": 17254 - }, - { - "epoch": 1.6254916276112197, - "grad_norm": 0.6188461780548096, - "learning_rate": 1.7479551398241546e-06, - "loss": 0.1984, - "step": 17255 - }, - { - "epoch": 1.6255858317043876, - "grad_norm": 0.6627752780914307, - "learning_rate": 1.7471023241992325e-06, - "loss": 0.1576, - "step": 17256 - }, - { - "epoch": 1.6256800357975554, - "grad_norm": 0.5846261978149414, - "learning_rate": 1.746249696753143e-06, - "loss": 0.1853, - "step": 17257 - }, - { - "epoch": 1.6257742398907231, - "grad_norm": 0.6488931775093079, - "learning_rate": 1.7453972575053224e-06, - "loss": 0.1951, - "step": 17258 - }, - { - "epoch": 1.625868443983891, - "grad_norm": 0.6597906351089478, - "learning_rate": 1.744545006475211e-06, - "loss": 0.206, - "step": 17259 - }, - { - "epoch": 1.625962648077059, - "grad_norm": 0.6859642863273621, - "learning_rate": 1.7436929436822425e-06, - "loss": 0.1766, - "step": 17260 - }, - { - "epoch": 1.6260568521702268, - "grad_norm": 0.5902611017227173, - "learning_rate": 1.742841069145842e-06, - "loss": 0.1799, - "step": 17261 - }, - { - "epoch": 1.6261510562633945, - "grad_norm": 0.6606028079986572, - "learning_rate": 1.7419893828854339e-06, - "loss": 0.2033, - "step": 17262 - }, - { - "epoch": 1.6262452603565625, - "grad_norm": 0.6939049959182739, - "learning_rate": 1.741137884920443e-06, - "loss": 0.1956, - "step": 17263 - }, - { - "epoch": 1.6263394644497304, - "grad_norm": 0.6413030624389648, - "learning_rate": 1.740286575270277e-06, - "loss": 0.2069, - "step": 17264 - }, - { - "epoch": 1.6264336685428982, - "grad_norm": 0.6549431681632996, - "learning_rate": 1.7394354539543545e-06, - "loss": 0.2089, - "step": 17265 - }, - { - "epoch": 1.626527872636066, - "grad_norm": 0.6486162543296814, - "learning_rate": 1.738584520992076e-06, - "loss": 0.192, - "step": 17266 - }, - { - "epoch": 1.6266220767292339, - "grad_norm": 0.633261501789093, - "learning_rate": 1.7377337764028468e-06, - "loss": 0.1998, - "step": 17267 - }, - { - "epoch": 1.6267162808224018, - "grad_norm": 0.6332877278327942, - "learning_rate": 1.7368832202060682e-06, - "loss": 0.1714, - "step": 17268 - }, - { - "epoch": 1.6268104849155696, - "grad_norm": 0.6749432682991028, - "learning_rate": 1.7360328524211278e-06, - "loss": 0.2122, - "step": 17269 - }, - { - "epoch": 1.6269046890087373, - "grad_norm": 0.683599591255188, - "learning_rate": 1.7351826730674192e-06, - "loss": 0.2088, - "step": 17270 - }, - { - "epoch": 1.6269988931019053, - "grad_norm": 0.6694850325584412, - "learning_rate": 1.7343326821643314e-06, - "loss": 0.1943, - "step": 17271 - }, - { - "epoch": 1.6270930971950732, - "grad_norm": 0.6966886520385742, - "learning_rate": 1.7334828797312375e-06, - "loss": 0.214, - "step": 17272 - }, - { - "epoch": 1.627187301288241, - "grad_norm": 0.6932169795036316, - "learning_rate": 1.732633265787519e-06, - "loss": 0.2024, - "step": 17273 - }, - { - "epoch": 1.6272815053814087, - "grad_norm": 0.6216726899147034, - "learning_rate": 1.73178384035255e-06, - "loss": 0.1878, - "step": 17274 - }, - { - "epoch": 1.6273757094745767, - "grad_norm": 0.651899516582489, - "learning_rate": 1.7309346034456963e-06, - "loss": 0.1959, - "step": 17275 - }, - { - "epoch": 1.6274699135677446, - "grad_norm": 0.7007536292076111, - "learning_rate": 1.730085555086317e-06, - "loss": 0.1967, - "step": 17276 - }, - { - "epoch": 1.6275641176609124, - "grad_norm": 0.6730961203575134, - "learning_rate": 1.7292366952937823e-06, - "loss": 0.1766, - "step": 17277 - }, - { - "epoch": 1.62765832175408, - "grad_norm": 0.6632020473480225, - "learning_rate": 1.7283880240874418e-06, - "loss": 0.1818, - "step": 17278 - }, - { - "epoch": 1.627752525847248, - "grad_norm": 0.7121394276618958, - "learning_rate": 1.7275395414866437e-06, - "loss": 0.2033, - "step": 17279 - }, - { - "epoch": 1.627846729940416, - "grad_norm": 0.6867882609367371, - "learning_rate": 1.7266912475107378e-06, - "loss": 0.1997, - "step": 17280 - }, - { - "epoch": 1.6279409340335838, - "grad_norm": 0.6606661677360535, - "learning_rate": 1.725843142179069e-06, - "loss": 0.1945, - "step": 17281 - }, - { - "epoch": 1.6280351381267515, - "grad_norm": 0.7026911973953247, - "learning_rate": 1.7249952255109702e-06, - "loss": 0.2041, - "step": 17282 - }, - { - "epoch": 1.6281293422199195, - "grad_norm": 0.6324657201766968, - "learning_rate": 1.7241474975257777e-06, - "loss": 0.2068, - "step": 17283 - }, - { - "epoch": 1.6282235463130874, - "grad_norm": 0.6406705975532532, - "learning_rate": 1.7232999582428244e-06, - "loss": 0.1984, - "step": 17284 - }, - { - "epoch": 1.6283177504062551, - "grad_norm": 0.6478314995765686, - "learning_rate": 1.7224526076814284e-06, - "loss": 0.169, - "step": 17285 - }, - { - "epoch": 1.6284119544994229, - "grad_norm": 0.6279692053794861, - "learning_rate": 1.7216054458609155e-06, - "loss": 0.1907, - "step": 17286 - }, - { - "epoch": 1.6285061585925908, - "grad_norm": 0.6874887347221375, - "learning_rate": 1.7207584728006033e-06, - "loss": 0.206, - "step": 17287 - }, - { - "epoch": 1.6286003626857588, - "grad_norm": 0.6094188094139099, - "learning_rate": 1.7199116885197996e-06, - "loss": 0.1995, - "step": 17288 - }, - { - "epoch": 1.6286945667789265, - "grad_norm": 0.6797674298286438, - "learning_rate": 1.719065093037815e-06, - "loss": 0.2134, - "step": 17289 - }, - { - "epoch": 1.6287887708720943, - "grad_norm": 0.7516131401062012, - "learning_rate": 1.7182186863739548e-06, - "loss": 0.2025, - "step": 17290 - }, - { - "epoch": 1.6288829749652622, - "grad_norm": 0.6889533400535583, - "learning_rate": 1.7173724685475146e-06, - "loss": 0.1877, - "step": 17291 - }, - { - "epoch": 1.6289771790584302, - "grad_norm": 0.6603233814239502, - "learning_rate": 1.7165264395777913e-06, - "loss": 0.1825, - "step": 17292 - }, - { - "epoch": 1.629071383151598, - "grad_norm": 0.7016066908836365, - "learning_rate": 1.7156805994840787e-06, - "loss": 0.1807, - "step": 17293 - }, - { - "epoch": 1.6291655872447657, - "grad_norm": 0.7911700010299683, - "learning_rate": 1.7148349482856598e-06, - "loss": 0.1915, - "step": 17294 - }, - { - "epoch": 1.6292597913379336, - "grad_norm": 0.6259404420852661, - "learning_rate": 1.7139894860018158e-06, - "loss": 0.1686, - "step": 17295 - }, - { - "epoch": 1.6293539954311016, - "grad_norm": 0.6378378868103027, - "learning_rate": 1.713144212651825e-06, - "loss": 0.1911, - "step": 17296 - }, - { - "epoch": 1.6294481995242693, - "grad_norm": 0.6419534087181091, - "learning_rate": 1.712299128254965e-06, - "loss": 0.1972, - "step": 17297 - }, - { - "epoch": 1.629542403617437, - "grad_norm": 0.6143866777420044, - "learning_rate": 1.7114542328304995e-06, - "loss": 0.1806, - "step": 17298 - }, - { - "epoch": 1.629636607710605, - "grad_norm": 0.6389146447181702, - "learning_rate": 1.7106095263976951e-06, - "loss": 0.1858, - "step": 17299 - }, - { - "epoch": 1.629730811803773, - "grad_norm": 0.6596473455429077, - "learning_rate": 1.7097650089758167e-06, - "loss": 0.1979, - "step": 17300 - }, - { - "epoch": 1.6298250158969407, - "grad_norm": 0.679501473903656, - "learning_rate": 1.708920680584115e-06, - "loss": 0.194, - "step": 17301 - }, - { - "epoch": 1.6299192199901085, - "grad_norm": 0.720536470413208, - "learning_rate": 1.7080765412418443e-06, - "loss": 0.2055, - "step": 17302 - }, - { - "epoch": 1.6300134240832764, - "grad_norm": 0.6592379808425903, - "learning_rate": 1.7072325909682542e-06, - "loss": 0.1884, - "step": 17303 - }, - { - "epoch": 1.6301076281764444, - "grad_norm": 0.6442597508430481, - "learning_rate": 1.7063888297825825e-06, - "loss": 0.1956, - "step": 17304 - }, - { - "epoch": 1.6302018322696121, - "grad_norm": 0.6127653121948242, - "learning_rate": 1.7055452577040754e-06, - "loss": 0.1849, - "step": 17305 - }, - { - "epoch": 1.6302960363627799, - "grad_norm": 0.6029418110847473, - "learning_rate": 1.7047018747519617e-06, - "loss": 0.1716, - "step": 17306 - }, - { - "epoch": 1.6303902404559478, - "grad_norm": 0.6264557838439941, - "learning_rate": 1.7038586809454738e-06, - "loss": 0.1777, - "step": 17307 - }, - { - "epoch": 1.6304844445491158, - "grad_norm": 0.6971416473388672, - "learning_rate": 1.7030156763038408e-06, - "loss": 0.2296, - "step": 17308 - }, - { - "epoch": 1.6305786486422835, - "grad_norm": 0.6296881437301636, - "learning_rate": 1.7021728608462795e-06, - "loss": 0.2043, - "step": 17309 - }, - { - "epoch": 1.6306728527354513, - "grad_norm": 0.7314896583557129, - "learning_rate": 1.7013302345920103e-06, - "loss": 0.217, - "step": 17310 - }, - { - "epoch": 1.6307670568286192, - "grad_norm": 0.6422860622406006, - "learning_rate": 1.7004877975602474e-06, - "loss": 0.1991, - "step": 17311 - }, - { - "epoch": 1.6308612609217872, - "grad_norm": 0.68195641040802, - "learning_rate": 1.6996455497701958e-06, - "loss": 0.1947, - "step": 17312 - }, - { - "epoch": 1.630955465014955, - "grad_norm": 0.8967283368110657, - "learning_rate": 1.6988034912410622e-06, - "loss": 0.2223, - "step": 17313 - }, - { - "epoch": 1.6310496691081227, - "grad_norm": 0.6603388786315918, - "learning_rate": 1.6979616219920504e-06, - "loss": 0.1911, - "step": 17314 - }, - { - "epoch": 1.6311438732012906, - "grad_norm": 0.7128387093544006, - "learning_rate": 1.6971199420423522e-06, - "loss": 0.2124, - "step": 17315 - }, - { - "epoch": 1.6312380772944586, - "grad_norm": 0.6550495028495789, - "learning_rate": 1.6962784514111564e-06, - "loss": 0.1965, - "step": 17316 - }, - { - "epoch": 1.6313322813876263, - "grad_norm": 0.5830024480819702, - "learning_rate": 1.6954371501176569e-06, - "loss": 0.1824, - "step": 17317 - }, - { - "epoch": 1.631426485480794, - "grad_norm": 0.7229143381118774, - "learning_rate": 1.6945960381810345e-06, - "loss": 0.1783, - "step": 17318 - }, - { - "epoch": 1.631520689573962, - "grad_norm": 0.6041586995124817, - "learning_rate": 1.6937551156204647e-06, - "loss": 0.1838, - "step": 17319 - }, - { - "epoch": 1.63161489366713, - "grad_norm": 0.7068755626678467, - "learning_rate": 1.6929143824551241e-06, - "loss": 0.2129, - "step": 17320 - }, - { - "epoch": 1.6317090977602977, - "grad_norm": 0.6394292712211609, - "learning_rate": 1.6920738387041857e-06, - "loss": 0.1995, - "step": 17321 - }, - { - "epoch": 1.6318033018534654, - "grad_norm": 0.6189360618591309, - "learning_rate": 1.6912334843868083e-06, - "loss": 0.1742, - "step": 17322 - }, - { - "epoch": 1.6318975059466334, - "grad_norm": 0.7381575107574463, - "learning_rate": 1.690393319522159e-06, - "loss": 0.2174, - "step": 17323 - }, - { - "epoch": 1.6319917100398014, - "grad_norm": 0.6230690479278564, - "learning_rate": 1.6895533441293943e-06, - "loss": 0.2096, - "step": 17324 - }, - { - "epoch": 1.632085914132969, - "grad_norm": 0.6463793516159058, - "learning_rate": 1.6887135582276637e-06, - "loss": 0.1923, - "step": 17325 - }, - { - "epoch": 1.6321801182261368, - "grad_norm": 0.6478497982025146, - "learning_rate": 1.6878739618361173e-06, - "loss": 0.2073, - "step": 17326 - }, - { - "epoch": 1.6322743223193048, - "grad_norm": 0.706958532333374, - "learning_rate": 1.687034554973902e-06, - "loss": 0.2041, - "step": 17327 - }, - { - "epoch": 1.6323685264124728, - "grad_norm": 0.7188374400138855, - "learning_rate": 1.6861953376601525e-06, - "loss": 0.1882, - "step": 17328 - }, - { - "epoch": 1.6324627305056405, - "grad_norm": 0.6503280997276306, - "learning_rate": 1.685356309914007e-06, - "loss": 0.2046, - "step": 17329 - }, - { - "epoch": 1.6325569345988082, - "grad_norm": 0.677300751209259, - "learning_rate": 1.6845174717545997e-06, - "loss": 0.2, - "step": 17330 - }, - { - "epoch": 1.6326511386919762, - "grad_norm": 1.1053460836410522, - "learning_rate": 1.6836788232010537e-06, - "loss": 0.1837, - "step": 17331 - }, - { - "epoch": 1.6327453427851442, - "grad_norm": 0.6405396461486816, - "learning_rate": 1.6828403642724866e-06, - "loss": 0.2099, - "step": 17332 - }, - { - "epoch": 1.632839546878312, - "grad_norm": 0.6526809334754944, - "learning_rate": 1.6820020949880268e-06, - "loss": 0.1722, - "step": 17333 - }, - { - "epoch": 1.6329337509714796, - "grad_norm": 0.7204709649085999, - "learning_rate": 1.681164015366784e-06, - "loss": 0.2174, - "step": 17334 - }, - { - "epoch": 1.6330279550646476, - "grad_norm": 0.6111131906509399, - "learning_rate": 1.6803261254278635e-06, - "loss": 0.1891, - "step": 17335 - }, - { - "epoch": 1.6331221591578156, - "grad_norm": 0.6242854595184326, - "learning_rate": 1.6794884251903753e-06, - "loss": 0.1768, - "step": 17336 - }, - { - "epoch": 1.6332163632509833, - "grad_norm": 0.6689353585243225, - "learning_rate": 1.6786509146734197e-06, - "loss": 0.1646, - "step": 17337 - }, - { - "epoch": 1.633310567344151, - "grad_norm": 0.634945273399353, - "learning_rate": 1.6778135938960915e-06, - "loss": 0.1985, - "step": 17338 - }, - { - "epoch": 1.633404771437319, - "grad_norm": 0.6326664686203003, - "learning_rate": 1.676976462877483e-06, - "loss": 0.1998, - "step": 17339 - }, - { - "epoch": 1.633498975530487, - "grad_norm": 0.5912447571754456, - "learning_rate": 1.6761395216366861e-06, - "loss": 0.1844, - "step": 17340 - }, - { - "epoch": 1.6335931796236547, - "grad_norm": 0.7482324838638306, - "learning_rate": 1.6753027701927783e-06, - "loss": 0.2028, - "step": 17341 - }, - { - "epoch": 1.6336873837168224, - "grad_norm": 0.7161656022071838, - "learning_rate": 1.6744662085648423e-06, - "loss": 0.1886, - "step": 17342 - }, - { - "epoch": 1.6337815878099904, - "grad_norm": 0.6114586591720581, - "learning_rate": 1.6736298367719538e-06, - "loss": 0.166, - "step": 17343 - }, - { - "epoch": 1.6338757919031583, - "grad_norm": 0.612693190574646, - "learning_rate": 1.6727936548331803e-06, - "loss": 0.1909, - "step": 17344 - }, - { - "epoch": 1.633969995996326, - "grad_norm": 0.655239462852478, - "learning_rate": 1.6719576627675927e-06, - "loss": 0.1954, - "step": 17345 - }, - { - "epoch": 1.6340642000894938, - "grad_norm": 0.6061803102493286, - "learning_rate": 1.6711218605942458e-06, - "loss": 0.171, - "step": 17346 - }, - { - "epoch": 1.6341584041826618, - "grad_norm": 0.6310243010520935, - "learning_rate": 1.6702862483322025e-06, - "loss": 0.1749, - "step": 17347 - }, - { - "epoch": 1.6342526082758297, - "grad_norm": 0.6760424971580505, - "learning_rate": 1.669450826000517e-06, - "loss": 0.2135, - "step": 17348 - }, - { - "epoch": 1.6343468123689975, - "grad_norm": 0.6666778922080994, - "learning_rate": 1.6686155936182347e-06, - "loss": 0.2076, - "step": 17349 - }, - { - "epoch": 1.6344410164621652, - "grad_norm": 0.7133226990699768, - "learning_rate": 1.667780551204401e-06, - "loss": 0.2241, - "step": 17350 - }, - { - "epoch": 1.6345352205553332, - "grad_norm": 0.7003960013389587, - "learning_rate": 1.6669456987780585e-06, - "loss": 0.1679, - "step": 17351 - }, - { - "epoch": 1.6346294246485011, - "grad_norm": 0.6771157383918762, - "learning_rate": 1.6661110363582389e-06, - "loss": 0.184, - "step": 17352 - }, - { - "epoch": 1.6347236287416689, - "grad_norm": 0.7309131026268005, - "learning_rate": 1.6652765639639779e-06, - "loss": 0.2288, - "step": 17353 - }, - { - "epoch": 1.6348178328348366, - "grad_norm": 0.646629273891449, - "learning_rate": 1.6644422816143024e-06, - "loss": 0.2011, - "step": 17354 - }, - { - "epoch": 1.6349120369280046, - "grad_norm": 0.6323843598365784, - "learning_rate": 1.6636081893282342e-06, - "loss": 0.1726, - "step": 17355 - }, - { - "epoch": 1.6350062410211725, - "grad_norm": 0.6303738355636597, - "learning_rate": 1.6627742871247887e-06, - "loss": 0.173, - "step": 17356 - }, - { - "epoch": 1.6351004451143403, - "grad_norm": 0.6361851692199707, - "learning_rate": 1.6619405750229834e-06, - "loss": 0.1899, - "step": 17357 - }, - { - "epoch": 1.635194649207508, - "grad_norm": 0.6274666786193848, - "learning_rate": 1.6611070530418317e-06, - "loss": 0.1708, - "step": 17358 - }, - { - "epoch": 1.635288853300676, - "grad_norm": 0.6658263802528381, - "learning_rate": 1.6602737212003316e-06, - "loss": 0.1906, - "step": 17359 - }, - { - "epoch": 1.6353830573938437, - "grad_norm": 0.6502881050109863, - "learning_rate": 1.6594405795174896e-06, - "loss": 0.2335, - "step": 17360 - }, - { - "epoch": 1.6354772614870114, - "grad_norm": 0.7372328042984009, - "learning_rate": 1.6586076280123032e-06, - "loss": 0.2047, - "step": 17361 - }, - { - "epoch": 1.6355714655801794, - "grad_norm": 0.6899813413619995, - "learning_rate": 1.6577748667037596e-06, - "loss": 0.1947, - "step": 17362 - }, - { - "epoch": 1.6356656696733474, - "grad_norm": 0.6836014986038208, - "learning_rate": 1.6569422956108515e-06, - "loss": 0.2082, - "step": 17363 - }, - { - "epoch": 1.635759873766515, - "grad_norm": 0.7940660119056702, - "learning_rate": 1.656109914752565e-06, - "loss": 0.216, - "step": 17364 - }, - { - "epoch": 1.6358540778596828, - "grad_norm": 0.6817182898521423, - "learning_rate": 1.6552777241478735e-06, - "loss": 0.1848, - "step": 17365 - }, - { - "epoch": 1.6359482819528508, - "grad_norm": 0.6277564764022827, - "learning_rate": 1.6544457238157551e-06, - "loss": 0.2055, - "step": 17366 - }, - { - "epoch": 1.6360424860460188, - "grad_norm": 0.743478536605835, - "learning_rate": 1.6536139137751829e-06, - "loss": 0.2412, - "step": 17367 - }, - { - "epoch": 1.6361366901391865, - "grad_norm": 0.6931159496307373, - "learning_rate": 1.65278229404512e-06, - "loss": 0.1853, - "step": 17368 - }, - { - "epoch": 1.6362308942323542, - "grad_norm": 0.6792427897453308, - "learning_rate": 1.6519508646445293e-06, - "loss": 0.1983, - "step": 17369 - }, - { - "epoch": 1.6363250983255222, - "grad_norm": 0.7112650871276855, - "learning_rate": 1.6511196255923711e-06, - "loss": 0.2064, - "step": 17370 - }, - { - "epoch": 1.6364193024186902, - "grad_norm": 0.5833031535148621, - "learning_rate": 1.6502885769075983e-06, - "loss": 0.1814, - "step": 17371 - }, - { - "epoch": 1.636513506511858, - "grad_norm": 0.696064293384552, - "learning_rate": 1.6494577186091543e-06, - "loss": 0.1982, - "step": 17372 - }, - { - "epoch": 1.6366077106050256, - "grad_norm": 0.6086500883102417, - "learning_rate": 1.648627050715993e-06, - "loss": 0.1847, - "step": 17373 - }, - { - "epoch": 1.6367019146981936, - "grad_norm": 0.6393113732337952, - "learning_rate": 1.6477965732470502e-06, - "loss": 0.1968, - "step": 17374 - }, - { - "epoch": 1.6367961187913616, - "grad_norm": 0.6088919043540955, - "learning_rate": 1.646966286221261e-06, - "loss": 0.1949, - "step": 17375 - }, - { - "epoch": 1.6368903228845293, - "grad_norm": 0.7196995615959167, - "learning_rate": 1.646136189657558e-06, - "loss": 0.1961, - "step": 17376 - }, - { - "epoch": 1.636984526977697, - "grad_norm": 0.58502596616745, - "learning_rate": 1.6453062835748723e-06, - "loss": 0.1746, - "step": 17377 - }, - { - "epoch": 1.637078731070865, - "grad_norm": 0.6163436770439148, - "learning_rate": 1.6444765679921215e-06, - "loss": 0.1781, - "step": 17378 - }, - { - "epoch": 1.637172935164033, - "grad_norm": 0.6482148766517639, - "learning_rate": 1.643647042928227e-06, - "loss": 0.1929, - "step": 17379 - }, - { - "epoch": 1.6372671392572007, - "grad_norm": 0.680004894733429, - "learning_rate": 1.6428177084021058e-06, - "loss": 0.205, - "step": 17380 - }, - { - "epoch": 1.6373613433503684, - "grad_norm": 0.5815574526786804, - "learning_rate": 1.6419885644326627e-06, - "loss": 0.1688, - "step": 17381 - }, - { - "epoch": 1.6374555474435364, - "grad_norm": 0.688826858997345, - "learning_rate": 1.6411596110388062e-06, - "loss": 0.1762, - "step": 17382 - }, - { - "epoch": 1.6375497515367043, - "grad_norm": 0.6474208235740662, - "learning_rate": 1.6403308482394408e-06, - "loss": 0.1948, - "step": 17383 - }, - { - "epoch": 1.637643955629872, - "grad_norm": 0.6522237062454224, - "learning_rate": 1.6395022760534574e-06, - "loss": 0.184, - "step": 17384 - }, - { - "epoch": 1.6377381597230398, - "grad_norm": 0.6208203434944153, - "learning_rate": 1.638673894499755e-06, - "loss": 0.17, - "step": 17385 - }, - { - "epoch": 1.6378323638162078, - "grad_norm": 0.6058263182640076, - "learning_rate": 1.6378457035972161e-06, - "loss": 0.1881, - "step": 17386 - }, - { - "epoch": 1.6379265679093757, - "grad_norm": 0.7474789023399353, - "learning_rate": 1.6370177033647272e-06, - "loss": 0.2422, - "step": 17387 - }, - { - "epoch": 1.6380207720025435, - "grad_norm": 0.6309479475021362, - "learning_rate": 1.6361898938211707e-06, - "loss": 0.1976, - "step": 17388 - }, - { - "epoch": 1.6381149760957112, - "grad_norm": 0.7674265503883362, - "learning_rate": 1.635362274985417e-06, - "loss": 0.1952, - "step": 17389 - }, - { - "epoch": 1.6382091801888792, - "grad_norm": 0.686428964138031, - "learning_rate": 1.63453484687634e-06, - "loss": 0.1894, - "step": 17390 - }, - { - "epoch": 1.6383033842820471, - "grad_norm": 0.6450231075286865, - "learning_rate": 1.633707609512809e-06, - "loss": 0.1921, - "step": 17391 - }, - { - "epoch": 1.6383975883752149, - "grad_norm": 0.654132604598999, - "learning_rate": 1.6328805629136801e-06, - "loss": 0.2007, - "step": 17392 - }, - { - "epoch": 1.6384917924683826, - "grad_norm": 0.6398543119430542, - "learning_rate": 1.6320537070978138e-06, - "loss": 0.1883, - "step": 17393 - }, - { - "epoch": 1.6385859965615506, - "grad_norm": 0.7517440915107727, - "learning_rate": 1.631227042084067e-06, - "loss": 0.2274, - "step": 17394 - }, - { - "epoch": 1.6386802006547185, - "grad_norm": 0.6238316297531128, - "learning_rate": 1.6304005678912872e-06, - "loss": 0.18, - "step": 17395 - }, - { - "epoch": 1.6387744047478863, - "grad_norm": 0.7552535533905029, - "learning_rate": 1.6295742845383146e-06, - "loss": 0.2045, - "step": 17396 - }, - { - "epoch": 1.638868608841054, - "grad_norm": 0.6809061765670776, - "learning_rate": 1.6287481920439941e-06, - "loss": 0.2131, - "step": 17397 - }, - { - "epoch": 1.638962812934222, - "grad_norm": 0.6528740525245667, - "learning_rate": 1.627922290427163e-06, - "loss": 0.182, - "step": 17398 - }, - { - "epoch": 1.63905701702739, - "grad_norm": 0.6091368198394775, - "learning_rate": 1.6270965797066496e-06, - "loss": 0.1671, - "step": 17399 - }, - { - "epoch": 1.6391512211205577, - "grad_norm": 0.6520994305610657, - "learning_rate": 1.6262710599012832e-06, - "loss": 0.2017, - "step": 17400 - }, - { - "epoch": 1.6392454252137254, - "grad_norm": 0.6457948088645935, - "learning_rate": 1.6254457310298887e-06, - "loss": 0.1866, - "step": 17401 - }, - { - "epoch": 1.6393396293068934, - "grad_norm": 0.6623305678367615, - "learning_rate": 1.6246205931112802e-06, - "loss": 0.1944, - "step": 17402 - }, - { - "epoch": 1.6394338334000613, - "grad_norm": 0.6019333004951477, - "learning_rate": 1.6237956461642756e-06, - "loss": 0.1966, - "step": 17403 - }, - { - "epoch": 1.639528037493229, - "grad_norm": 0.6890769600868225, - "learning_rate": 1.6229708902076868e-06, - "loss": 0.194, - "step": 17404 - }, - { - "epoch": 1.6396222415863968, - "grad_norm": 0.7059999108314514, - "learning_rate": 1.6221463252603154e-06, - "loss": 0.1919, - "step": 17405 - }, - { - "epoch": 1.6397164456795648, - "grad_norm": 0.6976580619812012, - "learning_rate": 1.621321951340963e-06, - "loss": 0.191, - "step": 17406 - }, - { - "epoch": 1.6398106497727327, - "grad_norm": 0.627890408039093, - "learning_rate": 1.620497768468431e-06, - "loss": 0.1848, - "step": 17407 - }, - { - "epoch": 1.6399048538659005, - "grad_norm": 0.6436595916748047, - "learning_rate": 1.6196737766615068e-06, - "loss": 0.1969, - "step": 17408 - }, - { - "epoch": 1.6399990579590682, - "grad_norm": 0.5872290730476379, - "learning_rate": 1.6188499759389798e-06, - "loss": 0.1797, - "step": 17409 - }, - { - "epoch": 1.6400932620522362, - "grad_norm": 0.7314695715904236, - "learning_rate": 1.6180263663196382e-06, - "loss": 0.2144, - "step": 17410 - }, - { - "epoch": 1.6401874661454041, - "grad_norm": 0.6125447154045105, - "learning_rate": 1.6172029478222595e-06, - "loss": 0.1778, - "step": 17411 - }, - { - "epoch": 1.6402816702385719, - "grad_norm": 0.7243834137916565, - "learning_rate": 1.6163797204656117e-06, - "loss": 0.2015, - "step": 17412 - }, - { - "epoch": 1.6403758743317396, - "grad_norm": 0.6943835616111755, - "learning_rate": 1.6155566842684767e-06, - "loss": 0.189, - "step": 17413 - }, - { - "epoch": 1.6404700784249076, - "grad_norm": 0.8817789554595947, - "learning_rate": 1.614733839249617e-06, - "loss": 0.199, - "step": 17414 - }, - { - "epoch": 1.6405642825180755, - "grad_norm": 0.7127186059951782, - "learning_rate": 1.6139111854277901e-06, - "loss": 0.2081, - "step": 17415 - }, - { - "epoch": 1.6406584866112432, - "grad_norm": 0.6457104682922363, - "learning_rate": 1.6130887228217584e-06, - "loss": 0.1944, - "step": 17416 - }, - { - "epoch": 1.640752690704411, - "grad_norm": 0.7006966471672058, - "learning_rate": 1.6122664514502761e-06, - "loss": 0.1831, - "step": 17417 - }, - { - "epoch": 1.640846894797579, - "grad_norm": 0.6882437467575073, - "learning_rate": 1.6114443713320893e-06, - "loss": 0.2011, - "step": 17418 - }, - { - "epoch": 1.640941098890747, - "grad_norm": 0.7212029099464417, - "learning_rate": 1.610622482485943e-06, - "loss": 0.1879, - "step": 17419 - }, - { - "epoch": 1.6410353029839146, - "grad_norm": 0.572352945804596, - "learning_rate": 1.6098007849305819e-06, - "loss": 0.1688, - "step": 17420 - }, - { - "epoch": 1.6411295070770824, - "grad_norm": 0.6336104273796082, - "learning_rate": 1.6089792786847346e-06, - "loss": 0.1832, - "step": 17421 - }, - { - "epoch": 1.6412237111702503, - "grad_norm": 0.6650234460830688, - "learning_rate": 1.6081579637671385e-06, - "loss": 0.1827, - "step": 17422 - }, - { - "epoch": 1.6413179152634183, - "grad_norm": 0.6842238903045654, - "learning_rate": 1.6073368401965206e-06, - "loss": 0.2079, - "step": 17423 - }, - { - "epoch": 1.641412119356586, - "grad_norm": 0.8026834726333618, - "learning_rate": 1.6065159079915992e-06, - "loss": 0.1905, - "step": 17424 - }, - { - "epoch": 1.6415063234497538, - "grad_norm": 0.6949887275695801, - "learning_rate": 1.6056951671710997e-06, - "loss": 0.2144, - "step": 17425 - }, - { - "epoch": 1.6416005275429217, - "grad_norm": 0.7393868565559387, - "learning_rate": 1.604874617753729e-06, - "loss": 0.1922, - "step": 17426 - }, - { - "epoch": 1.6416947316360897, - "grad_norm": 0.6920140981674194, - "learning_rate": 1.604054259758201e-06, - "loss": 0.2111, - "step": 17427 - }, - { - "epoch": 1.6417889357292574, - "grad_norm": 0.6958423852920532, - "learning_rate": 1.603234093203222e-06, - "loss": 0.1982, - "step": 17428 - }, - { - "epoch": 1.6418831398224252, - "grad_norm": 0.6376303434371948, - "learning_rate": 1.6024141181074905e-06, - "loss": 0.1697, - "step": 17429 - }, - { - "epoch": 1.6419773439155931, - "grad_norm": 0.6730598211288452, - "learning_rate": 1.6015943344897022e-06, - "loss": 0.1958, - "step": 17430 - }, - { - "epoch": 1.642071548008761, - "grad_norm": 0.5982670783996582, - "learning_rate": 1.600774742368556e-06, - "loss": 0.1731, - "step": 17431 - }, - { - "epoch": 1.6421657521019288, - "grad_norm": 0.688256025314331, - "learning_rate": 1.5999553417627312e-06, - "loss": 0.1956, - "step": 17432 - }, - { - "epoch": 1.6422599561950966, - "grad_norm": 0.6104230284690857, - "learning_rate": 1.5991361326909162e-06, - "loss": 0.1702, - "step": 17433 - }, - { - "epoch": 1.6423541602882645, - "grad_norm": 0.6189106106758118, - "learning_rate": 1.5983171151717924e-06, - "loss": 0.2018, - "step": 17434 - }, - { - "epoch": 1.6424483643814325, - "grad_norm": 1.8000823259353638, - "learning_rate": 1.5974982892240309e-06, - "loss": 0.2136, - "step": 17435 - }, - { - "epoch": 1.6425425684746002, - "grad_norm": 0.6984451413154602, - "learning_rate": 1.596679654866302e-06, - "loss": 0.1844, - "step": 17436 - }, - { - "epoch": 1.642636772567768, - "grad_norm": 0.5976433753967285, - "learning_rate": 1.5958612121172723e-06, - "loss": 0.1893, - "step": 17437 - }, - { - "epoch": 1.642730976660936, - "grad_norm": 0.7203249335289001, - "learning_rate": 1.5950429609956065e-06, - "loss": 0.2086, - "step": 17438 - }, - { - "epoch": 1.6428251807541039, - "grad_norm": 0.6157548427581787, - "learning_rate": 1.5942249015199572e-06, - "loss": 0.1917, - "step": 17439 - }, - { - "epoch": 1.6429193848472716, - "grad_norm": 0.6497460603713989, - "learning_rate": 1.5934070337089802e-06, - "loss": 0.1953, - "step": 17440 - }, - { - "epoch": 1.6430135889404394, - "grad_norm": 0.5885839462280273, - "learning_rate": 1.5925893575813255e-06, - "loss": 0.181, - "step": 17441 - }, - { - "epoch": 1.6431077930336073, - "grad_norm": 0.6948521137237549, - "learning_rate": 1.5917718731556341e-06, - "loss": 0.1945, - "step": 17442 - }, - { - "epoch": 1.6432019971267753, - "grad_norm": 0.7173735499382019, - "learning_rate": 1.5909545804505477e-06, - "loss": 0.2126, - "step": 17443 - }, - { - "epoch": 1.643296201219943, - "grad_norm": 0.6352149844169617, - "learning_rate": 1.5901374794847035e-06, - "loss": 0.1879, - "step": 17444 - }, - { - "epoch": 1.6433904053131108, - "grad_norm": 0.6374592781066895, - "learning_rate": 1.5893205702767289e-06, - "loss": 0.1771, - "step": 17445 - }, - { - "epoch": 1.6434846094062787, - "grad_norm": 0.6871230602264404, - "learning_rate": 1.5885038528452524e-06, - "loss": 0.1803, - "step": 17446 - }, - { - "epoch": 1.6435788134994467, - "grad_norm": 0.6220190525054932, - "learning_rate": 1.5876873272088999e-06, - "loss": 0.169, - "step": 17447 - }, - { - "epoch": 1.6436730175926144, - "grad_norm": 0.6975589394569397, - "learning_rate": 1.586870993386286e-06, - "loss": 0.1916, - "step": 17448 - }, - { - "epoch": 1.6437672216857822, - "grad_norm": 0.646195113658905, - "learning_rate": 1.5860548513960184e-06, - "loss": 0.1975, - "step": 17449 - }, - { - "epoch": 1.6438614257789501, - "grad_norm": 0.6283693909645081, - "learning_rate": 1.5852389012567192e-06, - "loss": 0.1695, - "step": 17450 - }, - { - "epoch": 1.643955629872118, - "grad_norm": 0.757831871509552, - "learning_rate": 1.584423142986985e-06, - "loss": 0.1838, - "step": 17451 - }, - { - "epoch": 1.6440498339652858, - "grad_norm": 0.6395748853683472, - "learning_rate": 1.5836075766054148e-06, - "loss": 0.1896, - "step": 17452 - }, - { - "epoch": 1.6441440380584535, - "grad_norm": 0.6705828309059143, - "learning_rate": 1.5827922021306118e-06, - "loss": 0.1991, - "step": 17453 - }, - { - "epoch": 1.6442382421516215, - "grad_norm": 0.7036987543106079, - "learning_rate": 1.5819770195811646e-06, - "loss": 0.1925, - "step": 17454 - }, - { - "epoch": 1.6443324462447895, - "grad_norm": 0.6130074858665466, - "learning_rate": 1.581162028975658e-06, - "loss": 0.1694, - "step": 17455 - }, - { - "epoch": 1.6444266503379572, - "grad_norm": 0.6923983693122864, - "learning_rate": 1.5803472303326772e-06, - "loss": 0.2257, - "step": 17456 - }, - { - "epoch": 1.644520854431125, - "grad_norm": 0.6676929593086243, - "learning_rate": 1.579532623670802e-06, - "loss": 0.1798, - "step": 17457 - }, - { - "epoch": 1.644615058524293, - "grad_norm": 0.6116167902946472, - "learning_rate": 1.5787182090086038e-06, - "loss": 0.202, - "step": 17458 - }, - { - "epoch": 1.6447092626174609, - "grad_norm": 0.659077525138855, - "learning_rate": 1.5779039863646538e-06, - "loss": 0.2086, - "step": 17459 - }, - { - "epoch": 1.6448034667106286, - "grad_norm": 0.5949646830558777, - "learning_rate": 1.5770899557575215e-06, - "loss": 0.1849, - "step": 17460 - }, - { - "epoch": 1.6448976708037963, - "grad_norm": 0.5903298854827881, - "learning_rate": 1.5762761172057616e-06, - "loss": 0.1746, - "step": 17461 - }, - { - "epoch": 1.6449918748969643, - "grad_norm": 0.662360429763794, - "learning_rate": 1.5754624707279331e-06, - "loss": 0.1867, - "step": 17462 - }, - { - "epoch": 1.6450860789901323, - "grad_norm": 0.6063056588172913, - "learning_rate": 1.574649016342592e-06, - "loss": 0.1769, - "step": 17463 - }, - { - "epoch": 1.6451802830833, - "grad_norm": 0.6706985831260681, - "learning_rate": 1.5738357540682804e-06, - "loss": 0.191, - "step": 17464 - }, - { - "epoch": 1.6452744871764677, - "grad_norm": 0.5914628505706787, - "learning_rate": 1.5730226839235484e-06, - "loss": 0.1807, - "step": 17465 - }, - { - "epoch": 1.6453686912696357, - "grad_norm": 0.6614682674407959, - "learning_rate": 1.5722098059269285e-06, - "loss": 0.2187, - "step": 17466 - }, - { - "epoch": 1.6454628953628037, - "grad_norm": 0.7109770774841309, - "learning_rate": 1.571397120096959e-06, - "loss": 0.2209, - "step": 17467 - }, - { - "epoch": 1.6455570994559714, - "grad_norm": 0.7011008858680725, - "learning_rate": 1.5705846264521728e-06, - "loss": 0.1948, - "step": 17468 - }, - { - "epoch": 1.6456513035491391, - "grad_norm": 0.7054222226142883, - "learning_rate": 1.5697723250110907e-06, - "loss": 0.2142, - "step": 17469 - }, - { - "epoch": 1.645745507642307, - "grad_norm": 0.6907052993774414, - "learning_rate": 1.5689602157922379e-06, - "loss": 0.1878, - "step": 17470 - }, - { - "epoch": 1.645839711735475, - "grad_norm": 0.623249351978302, - "learning_rate": 1.5681482988141329e-06, - "loss": 0.1885, - "step": 17471 - }, - { - "epoch": 1.6459339158286428, - "grad_norm": 0.6415420174598694, - "learning_rate": 1.567336574095284e-06, - "loss": 0.1766, - "step": 17472 - }, - { - "epoch": 1.6460281199218105, - "grad_norm": 0.6554179787635803, - "learning_rate": 1.5665250416542055e-06, - "loss": 0.2015, - "step": 17473 - }, - { - "epoch": 1.6461223240149785, - "grad_norm": 0.6666958928108215, - "learning_rate": 1.565713701509397e-06, - "loss": 0.212, - "step": 17474 - }, - { - "epoch": 1.6462165281081464, - "grad_norm": 0.5999184250831604, - "learning_rate": 1.5649025536793616e-06, - "loss": 0.2156, - "step": 17475 - }, - { - "epoch": 1.6463107322013142, - "grad_norm": 0.6318747401237488, - "learning_rate": 1.564091598182591e-06, - "loss": 0.2039, - "step": 17476 - }, - { - "epoch": 1.646404936294482, - "grad_norm": 0.6377449631690979, - "learning_rate": 1.5632808350375773e-06, - "loss": 0.2152, - "step": 17477 - }, - { - "epoch": 1.6464991403876499, - "grad_norm": 0.6212782263755798, - "learning_rate": 1.562470264262812e-06, - "loss": 0.1791, - "step": 17478 - }, - { - "epoch": 1.6465933444808178, - "grad_norm": 0.9053308963775635, - "learning_rate": 1.5616598858767707e-06, - "loss": 0.1643, - "step": 17479 - }, - { - "epoch": 1.6466875485739856, - "grad_norm": 0.6468185782432556, - "learning_rate": 1.5608496998979338e-06, - "loss": 0.1917, - "step": 17480 - }, - { - "epoch": 1.6467817526671533, - "grad_norm": 0.6143595576286316, - "learning_rate": 1.560039706344777e-06, - "loss": 0.1831, - "step": 17481 - }, - { - "epoch": 1.6468759567603213, - "grad_norm": 0.5871161222457886, - "learning_rate": 1.5592299052357663e-06, - "loss": 0.1753, - "step": 17482 - }, - { - "epoch": 1.6469701608534892, - "grad_norm": 0.6375309228897095, - "learning_rate": 1.558420296589367e-06, - "loss": 0.1727, - "step": 17483 - }, - { - "epoch": 1.647064364946657, - "grad_norm": 0.7632774710655212, - "learning_rate": 1.5576108804240431e-06, - "loss": 0.1852, - "step": 17484 - }, - { - "epoch": 1.6471585690398247, - "grad_norm": 0.7169836759567261, - "learning_rate": 1.5568016567582446e-06, - "loss": 0.1984, - "step": 17485 - }, - { - "epoch": 1.6472527731329927, - "grad_norm": 0.6869579553604126, - "learning_rate": 1.5559926256104262e-06, - "loss": 0.2111, - "step": 17486 - }, - { - "epoch": 1.6473469772261606, - "grad_norm": 0.6542017459869385, - "learning_rate": 1.5551837869990372e-06, - "loss": 0.1911, - "step": 17487 - }, - { - "epoch": 1.6474411813193284, - "grad_norm": 0.5915003418922424, - "learning_rate": 1.5543751409425178e-06, - "loss": 0.1605, - "step": 17488 - }, - { - "epoch": 1.6475353854124961, - "grad_norm": 0.64701247215271, - "learning_rate": 1.5535666874593026e-06, - "loss": 0.1863, - "step": 17489 - }, - { - "epoch": 1.647629589505664, - "grad_norm": 0.6819285750389099, - "learning_rate": 1.552758426567833e-06, - "loss": 0.1917, - "step": 17490 - }, - { - "epoch": 1.647723793598832, - "grad_norm": 0.8095874786376953, - "learning_rate": 1.551950358286537e-06, - "loss": 0.1917, - "step": 17491 - }, - { - "epoch": 1.6478179976919998, - "grad_norm": 0.6300013661384583, - "learning_rate": 1.5511424826338318e-06, - "loss": 0.2012, - "step": 17492 - }, - { - "epoch": 1.6479122017851675, - "grad_norm": 0.7067247033119202, - "learning_rate": 1.5503347996281492e-06, - "loss": 0.199, - "step": 17493 - }, - { - "epoch": 1.6480064058783355, - "grad_norm": 0.6496374011039734, - "learning_rate": 1.5495273092879014e-06, - "loss": 0.201, - "step": 17494 - }, - { - "epoch": 1.6481006099715034, - "grad_norm": 0.7110437750816345, - "learning_rate": 1.548720011631497e-06, - "loss": 0.1987, - "step": 17495 - }, - { - "epoch": 1.6481948140646712, - "grad_norm": 0.6960927248001099, - "learning_rate": 1.5479129066773469e-06, - "loss": 0.208, - "step": 17496 - }, - { - "epoch": 1.648289018157839, - "grad_norm": 0.6513646841049194, - "learning_rate": 1.547105994443856e-06, - "loss": 0.2019, - "step": 17497 - }, - { - "epoch": 1.6483832222510069, - "grad_norm": 0.6682662963867188, - "learning_rate": 1.5462992749494187e-06, - "loss": 0.2005, - "step": 17498 - }, - { - "epoch": 1.6484774263441746, - "grad_norm": 0.7011797428131104, - "learning_rate": 1.5454927482124327e-06, - "loss": 0.2149, - "step": 17499 - }, - { - "epoch": 1.6485716304373423, - "grad_norm": 0.6496496200561523, - "learning_rate": 1.5446864142512885e-06, - "loss": 0.2006, - "step": 17500 - }, - { - "epoch": 1.6486658345305103, - "grad_norm": 0.7411315441131592, - "learning_rate": 1.5438802730843684e-06, - "loss": 0.1993, - "step": 17501 - }, - { - "epoch": 1.6487600386236783, - "grad_norm": 0.6486896872520447, - "learning_rate": 1.5430743247300561e-06, - "loss": 0.1885, - "step": 17502 - }, - { - "epoch": 1.648854242716846, - "grad_norm": 0.6243578195571899, - "learning_rate": 1.5422685692067297e-06, - "loss": 0.1858, - "step": 17503 - }, - { - "epoch": 1.6489484468100137, - "grad_norm": 0.7015368938446045, - "learning_rate": 1.5414630065327586e-06, - "loss": 0.2376, - "step": 17504 - }, - { - "epoch": 1.6490426509031817, - "grad_norm": 0.6350616216659546, - "learning_rate": 1.5406576367265135e-06, - "loss": 0.187, - "step": 17505 - }, - { - "epoch": 1.6491368549963497, - "grad_norm": 0.6465875506401062, - "learning_rate": 1.5398524598063558e-06, - "loss": 0.2049, - "step": 17506 - }, - { - "epoch": 1.6492310590895174, - "grad_norm": 0.6372957229614258, - "learning_rate": 1.5390474757906449e-06, - "loss": 0.1708, - "step": 17507 - }, - { - "epoch": 1.6493252631826851, - "grad_norm": 0.6691349148750305, - "learning_rate": 1.5382426846977394e-06, - "loss": 0.1829, - "step": 17508 - }, - { - "epoch": 1.649419467275853, - "grad_norm": 0.6700156331062317, - "learning_rate": 1.5374380865459847e-06, - "loss": 0.1845, - "step": 17509 - }, - { - "epoch": 1.649513671369021, - "grad_norm": 0.6702751517295837, - "learning_rate": 1.5366336813537298e-06, - "loss": 0.2013, - "step": 17510 - }, - { - "epoch": 1.6496078754621888, - "grad_norm": 0.6582763195037842, - "learning_rate": 1.5358294691393173e-06, - "loss": 0.2033, - "step": 17511 - }, - { - "epoch": 1.6497020795553565, - "grad_norm": 0.697255551815033, - "learning_rate": 1.5350254499210814e-06, - "loss": 0.1999, - "step": 17512 - }, - { - "epoch": 1.6497962836485245, - "grad_norm": 0.6184289455413818, - "learning_rate": 1.534221623717358e-06, - "loss": 0.1886, - "step": 17513 - }, - { - "epoch": 1.6498904877416924, - "grad_norm": 0.6472569108009338, - "learning_rate": 1.533417990546473e-06, - "loss": 0.1988, - "step": 17514 - }, - { - "epoch": 1.6499846918348602, - "grad_norm": 0.6780841946601868, - "learning_rate": 1.5326145504267532e-06, - "loss": 0.1746, - "step": 17515 - }, - { - "epoch": 1.650078895928028, - "grad_norm": 0.6907581686973572, - "learning_rate": 1.5318113033765137e-06, - "loss": 0.2006, - "step": 17516 - }, - { - "epoch": 1.6501731000211959, - "grad_norm": 0.6199663877487183, - "learning_rate": 1.5310082494140744e-06, - "loss": 0.1836, - "step": 17517 - }, - { - "epoch": 1.6502673041143638, - "grad_norm": 0.7279683351516724, - "learning_rate": 1.530205388557746e-06, - "loss": 0.1735, - "step": 17518 - }, - { - "epoch": 1.6503615082075316, - "grad_norm": 0.6609973311424255, - "learning_rate": 1.5294027208258311e-06, - "loss": 0.1869, - "step": 17519 - }, - { - "epoch": 1.6504557123006993, - "grad_norm": 0.7526587247848511, - "learning_rate": 1.5286002462366344e-06, - "loss": 0.1935, - "step": 17520 - }, - { - "epoch": 1.6505499163938673, - "grad_norm": 0.7436555624008179, - "learning_rate": 1.527797964808455e-06, - "loss": 0.1802, - "step": 17521 - }, - { - "epoch": 1.6506441204870352, - "grad_norm": 0.6578570008277893, - "learning_rate": 1.5269958765595826e-06, - "loss": 0.2053, - "step": 17522 - }, - { - "epoch": 1.650738324580203, - "grad_norm": 0.6650799512863159, - "learning_rate": 1.5261939815083083e-06, - "loss": 0.1958, - "step": 17523 - }, - { - "epoch": 1.6508325286733707, - "grad_norm": 0.6936365962028503, - "learning_rate": 1.5253922796729193e-06, - "loss": 0.2127, - "step": 17524 - }, - { - "epoch": 1.6509267327665387, - "grad_norm": 0.6985155344009399, - "learning_rate": 1.5245907710716912e-06, - "loss": 0.2033, - "step": 17525 - }, - { - "epoch": 1.6510209368597066, - "grad_norm": 0.5962125062942505, - "learning_rate": 1.5237894557228972e-06, - "loss": 0.2049, - "step": 17526 - }, - { - "epoch": 1.6511151409528744, - "grad_norm": 0.6634477972984314, - "learning_rate": 1.522988333644816e-06, - "loss": 0.1949, - "step": 17527 - }, - { - "epoch": 1.651209345046042, - "grad_norm": 0.6634848713874817, - "learning_rate": 1.5221874048557117e-06, - "loss": 0.1851, - "step": 17528 - }, - { - "epoch": 1.65130354913921, - "grad_norm": 0.6660844683647156, - "learning_rate": 1.5213866693738411e-06, - "loss": 0.1918, - "step": 17529 - }, - { - "epoch": 1.651397753232378, - "grad_norm": 0.7167404294013977, - "learning_rate": 1.5205861272174705e-06, - "loss": 0.2064, - "step": 17530 - }, - { - "epoch": 1.6514919573255458, - "grad_norm": 0.6679966449737549, - "learning_rate": 1.519785778404851e-06, - "loss": 0.1875, - "step": 17531 - }, - { - "epoch": 1.6515861614187135, - "grad_norm": 0.5997360944747925, - "learning_rate": 1.5189856229542255e-06, - "loss": 0.1859, - "step": 17532 - }, - { - "epoch": 1.6516803655118815, - "grad_norm": 0.6006638407707214, - "learning_rate": 1.5181856608838486e-06, - "loss": 0.1715, - "step": 17533 - }, - { - "epoch": 1.6517745696050494, - "grad_norm": 0.6586245894432068, - "learning_rate": 1.5173858922119555e-06, - "loss": 0.1769, - "step": 17534 - }, - { - "epoch": 1.6518687736982172, - "grad_norm": 0.6618895530700684, - "learning_rate": 1.5165863169567808e-06, - "loss": 0.2026, - "step": 17535 - }, - { - "epoch": 1.651962977791385, - "grad_norm": 0.7292799353599548, - "learning_rate": 1.5157869351365583e-06, - "loss": 0.2152, - "step": 17536 - }, - { - "epoch": 1.6520571818845529, - "grad_norm": 0.7579766511917114, - "learning_rate": 1.5149877467695173e-06, - "loss": 0.1741, - "step": 17537 - }, - { - "epoch": 1.6521513859777208, - "grad_norm": 0.6587605476379395, - "learning_rate": 1.5141887518738752e-06, - "loss": 0.1996, - "step": 17538 - }, - { - "epoch": 1.6522455900708886, - "grad_norm": 0.6389364004135132, - "learning_rate": 1.5133899504678529e-06, - "loss": 0.1825, - "step": 17539 - }, - { - "epoch": 1.6523397941640563, - "grad_norm": 0.6553727984428406, - "learning_rate": 1.512591342569667e-06, - "loss": 0.1833, - "step": 17540 - }, - { - "epoch": 1.6524339982572243, - "grad_norm": 0.629486083984375, - "learning_rate": 1.5117929281975218e-06, - "loss": 0.1702, - "step": 17541 - }, - { - "epoch": 1.6525282023503922, - "grad_norm": 0.6960748434066772, - "learning_rate": 1.5109947073696253e-06, - "loss": 0.2204, - "step": 17542 - }, - { - "epoch": 1.65262240644356, - "grad_norm": 0.5940431356430054, - "learning_rate": 1.5101966801041812e-06, - "loss": 0.2061, - "step": 17543 - }, - { - "epoch": 1.6527166105367277, - "grad_norm": 0.7151151895523071, - "learning_rate": 1.5093988464193787e-06, - "loss": 0.2134, - "step": 17544 - }, - { - "epoch": 1.6528108146298957, - "grad_norm": 0.6399179697036743, - "learning_rate": 1.5086012063334165e-06, - "loss": 0.2077, - "step": 17545 - }, - { - "epoch": 1.6529050187230636, - "grad_norm": 0.5704237222671509, - "learning_rate": 1.5078037598644767e-06, - "loss": 0.1568, - "step": 17546 - }, - { - "epoch": 1.6529992228162314, - "grad_norm": 0.6839672923088074, - "learning_rate": 1.5070065070307449e-06, - "loss": 0.2075, - "step": 17547 - }, - { - "epoch": 1.653093426909399, - "grad_norm": 0.6249094605445862, - "learning_rate": 1.506209447850402e-06, - "loss": 0.2364, - "step": 17548 - }, - { - "epoch": 1.653187631002567, - "grad_norm": 0.6789788603782654, - "learning_rate": 1.5054125823416166e-06, - "loss": 0.1876, - "step": 17549 - }, - { - "epoch": 1.653281835095735, - "grad_norm": 0.6458231806755066, - "learning_rate": 1.5046159105225622e-06, - "loss": 0.2022, - "step": 17550 - }, - { - "epoch": 1.6533760391889027, - "grad_norm": 0.6392346620559692, - "learning_rate": 1.5038194324114053e-06, - "loss": 0.2003, - "step": 17551 - }, - { - "epoch": 1.6534702432820705, - "grad_norm": 0.6021639704704285, - "learning_rate": 1.5030231480263024e-06, - "loss": 0.1804, - "step": 17552 - }, - { - "epoch": 1.6535644473752384, - "grad_norm": 0.6376745700836182, - "learning_rate": 1.5022270573854148e-06, - "loss": 0.1921, - "step": 17553 - }, - { - "epoch": 1.6536586514684064, - "grad_norm": 0.6818093061447144, - "learning_rate": 1.5014311605068898e-06, - "loss": 0.2161, - "step": 17554 - }, - { - "epoch": 1.6537528555615741, - "grad_norm": 0.700320303440094, - "learning_rate": 1.50063545740888e-06, - "loss": 0.1857, - "step": 17555 - }, - { - "epoch": 1.6538470596547419, - "grad_norm": 0.6865605711936951, - "learning_rate": 1.4998399481095248e-06, - "loss": 0.1815, - "step": 17556 - }, - { - "epoch": 1.6539412637479098, - "grad_norm": 0.5971400737762451, - "learning_rate": 1.4990446326269637e-06, - "loss": 0.1789, - "step": 17557 - }, - { - "epoch": 1.6540354678410778, - "grad_norm": 1.5523242950439453, - "learning_rate": 1.498249510979335e-06, - "loss": 0.2267, - "step": 17558 - }, - { - "epoch": 1.6541296719342455, - "grad_norm": 0.68794184923172, - "learning_rate": 1.497454583184763e-06, - "loss": 0.1871, - "step": 17559 - }, - { - "epoch": 1.6542238760274133, - "grad_norm": 0.6559624075889587, - "learning_rate": 1.4966598492613759e-06, - "loss": 0.1996, - "step": 17560 - }, - { - "epoch": 1.6543180801205812, - "grad_norm": 0.6656409502029419, - "learning_rate": 1.4958653092272968e-06, - "loss": 0.1851, - "step": 17561 - }, - { - "epoch": 1.6544122842137492, - "grad_norm": 0.6483403444290161, - "learning_rate": 1.4950709631006388e-06, - "loss": 0.2161, - "step": 17562 - }, - { - "epoch": 1.654506488306917, - "grad_norm": 0.580389678478241, - "learning_rate": 1.4942768108995166e-06, - "loss": 0.1693, - "step": 17563 - }, - { - "epoch": 1.6546006924000847, - "grad_norm": 0.6562122106552124, - "learning_rate": 1.4934828526420387e-06, - "loss": 0.1689, - "step": 17564 - }, - { - "epoch": 1.6546948964932526, - "grad_norm": 0.648463785648346, - "learning_rate": 1.4926890883463074e-06, - "loss": 0.1988, - "step": 17565 - }, - { - "epoch": 1.6547891005864206, - "grad_norm": 0.6589934825897217, - "learning_rate": 1.4918955180304173e-06, - "loss": 0.1664, - "step": 17566 - }, - { - "epoch": 1.6548833046795883, - "grad_norm": 0.6080276966094971, - "learning_rate": 1.4911021417124716e-06, - "loss": 0.1703, - "step": 17567 - }, - { - "epoch": 1.654977508772756, - "grad_norm": 0.6568965911865234, - "learning_rate": 1.4903089594105568e-06, - "loss": 0.2019, - "step": 17568 - }, - { - "epoch": 1.655071712865924, - "grad_norm": 0.6023063659667969, - "learning_rate": 1.4895159711427542e-06, - "loss": 0.17, - "step": 17569 - }, - { - "epoch": 1.655165916959092, - "grad_norm": 0.6778880953788757, - "learning_rate": 1.4887231769271526e-06, - "loss": 0.1857, - "step": 17570 - }, - { - "epoch": 1.6552601210522597, - "grad_norm": 0.634365975856781, - "learning_rate": 1.4879305767818264e-06, - "loss": 0.1952, - "step": 17571 - }, - { - "epoch": 1.6553543251454275, - "grad_norm": 0.6282731890678406, - "learning_rate": 1.4871381707248422e-06, - "loss": 0.1807, - "step": 17572 - }, - { - "epoch": 1.6554485292385954, - "grad_norm": 0.7004312872886658, - "learning_rate": 1.4863459587742778e-06, - "loss": 0.2079, - "step": 17573 - }, - { - "epoch": 1.6555427333317634, - "grad_norm": 0.6796080470085144, - "learning_rate": 1.4855539409481922e-06, - "loss": 0.2126, - "step": 17574 - }, - { - "epoch": 1.6556369374249311, - "grad_norm": 0.6595932245254517, - "learning_rate": 1.484762117264642e-06, - "loss": 0.194, - "step": 17575 - }, - { - "epoch": 1.6557311415180989, - "grad_norm": 0.5841668844223022, - "learning_rate": 1.483970487741685e-06, - "loss": 0.1847, - "step": 17576 - }, - { - "epoch": 1.6558253456112668, - "grad_norm": 0.8948422074317932, - "learning_rate": 1.4831790523973733e-06, - "loss": 0.1852, - "step": 17577 - }, - { - "epoch": 1.6559195497044348, - "grad_norm": 0.6995124816894531, - "learning_rate": 1.4823878112497493e-06, - "loss": 0.209, - "step": 17578 - }, - { - "epoch": 1.6560137537976025, - "grad_norm": 0.6554468274116516, - "learning_rate": 1.481596764316855e-06, - "loss": 0.2028, - "step": 17579 - }, - { - "epoch": 1.6561079578907703, - "grad_norm": 0.681593656539917, - "learning_rate": 1.4808059116167306e-06, - "loss": 0.2043, - "step": 17580 - }, - { - "epoch": 1.6562021619839382, - "grad_norm": 0.6716547012329102, - "learning_rate": 1.480015253167404e-06, - "loss": 0.1995, - "step": 17581 - }, - { - "epoch": 1.6562963660771062, - "grad_norm": 0.7072934508323669, - "learning_rate": 1.4792247889869072e-06, - "loss": 0.1853, - "step": 17582 - }, - { - "epoch": 1.656390570170274, - "grad_norm": 0.6691693067550659, - "learning_rate": 1.4784345190932637e-06, - "loss": 0.1829, - "step": 17583 - }, - { - "epoch": 1.6564847742634417, - "grad_norm": 0.6505060791969299, - "learning_rate": 1.4776444435044911e-06, - "loss": 0.1994, - "step": 17584 - }, - { - "epoch": 1.6565789783566096, - "grad_norm": 0.6570205092430115, - "learning_rate": 1.4768545622386066e-06, - "loss": 0.2214, - "step": 17585 - }, - { - "epoch": 1.6566731824497776, - "grad_norm": 0.6212388873100281, - "learning_rate": 1.4760648753136174e-06, - "loss": 0.1803, - "step": 17586 - }, - { - "epoch": 1.6567673865429453, - "grad_norm": 0.7011963129043579, - "learning_rate": 1.475275382747532e-06, - "loss": 0.2393, - "step": 17587 - }, - { - "epoch": 1.656861590636113, - "grad_norm": 0.7012268304824829, - "learning_rate": 1.474486084558353e-06, - "loss": 0.2197, - "step": 17588 - }, - { - "epoch": 1.656955794729281, - "grad_norm": 0.6832666397094727, - "learning_rate": 1.4736969807640744e-06, - "loss": 0.2358, - "step": 17589 - }, - { - "epoch": 1.657049998822449, - "grad_norm": 0.6616160869598389, - "learning_rate": 1.4729080713826938e-06, - "loss": 0.1826, - "step": 17590 - }, - { - "epoch": 1.6571442029156167, - "grad_norm": 0.6552468538284302, - "learning_rate": 1.4721193564321934e-06, - "loss": 0.188, - "step": 17591 - }, - { - "epoch": 1.6572384070087844, - "grad_norm": 0.7042325139045715, - "learning_rate": 1.4713308359305611e-06, - "loss": 0.1911, - "step": 17592 - }, - { - "epoch": 1.6573326111019524, - "grad_norm": 0.6441843509674072, - "learning_rate": 1.470542509895777e-06, - "loss": 0.214, - "step": 17593 - }, - { - "epoch": 1.6574268151951204, - "grad_norm": 0.689139187335968, - "learning_rate": 1.4697543783458124e-06, - "loss": 0.2179, - "step": 17594 - }, - { - "epoch": 1.657521019288288, - "grad_norm": 0.7108054161071777, - "learning_rate": 1.4689664412986437e-06, - "loss": 0.182, - "step": 17595 - }, - { - "epoch": 1.6576152233814558, - "grad_norm": 0.7257113456726074, - "learning_rate": 1.4681786987722302e-06, - "loss": 0.2171, - "step": 17596 - }, - { - "epoch": 1.6577094274746238, - "grad_norm": 0.6485233306884766, - "learning_rate": 1.4673911507845374e-06, - "loss": 0.198, - "step": 17597 - }, - { - "epoch": 1.6578036315677918, - "grad_norm": 0.6282594203948975, - "learning_rate": 1.4666037973535253e-06, - "loss": 0.1832, - "step": 17598 - }, - { - "epoch": 1.6578978356609595, - "grad_norm": 0.6632306575775146, - "learning_rate": 1.4658166384971406e-06, - "loss": 0.1808, - "step": 17599 - }, - { - "epoch": 1.6579920397541272, - "grad_norm": 0.7635298371315002, - "learning_rate": 1.4650296742333347e-06, - "loss": 0.2266, - "step": 17600 - }, - { - "epoch": 1.6580862438472952, - "grad_norm": 0.6387211680412292, - "learning_rate": 1.4642429045800544e-06, - "loss": 0.1981, - "step": 17601 - }, - { - "epoch": 1.6581804479404632, - "grad_norm": 0.7338160276412964, - "learning_rate": 1.463456329555235e-06, - "loss": 0.2161, - "step": 17602 - }, - { - "epoch": 1.658274652033631, - "grad_norm": 0.6713192462921143, - "learning_rate": 1.4626699491768126e-06, - "loss": 0.2033, - "step": 17603 - }, - { - "epoch": 1.6583688561267986, - "grad_norm": 0.6394454836845398, - "learning_rate": 1.4618837634627214e-06, - "loss": 0.2103, - "step": 17604 - }, - { - "epoch": 1.6584630602199666, - "grad_norm": 0.6917247772216797, - "learning_rate": 1.461097772430884e-06, - "loss": 0.2007, - "step": 17605 - }, - { - "epoch": 1.6585572643131345, - "grad_norm": 0.6584296226501465, - "learning_rate": 1.460311976099219e-06, - "loss": 0.2028, - "step": 17606 - }, - { - "epoch": 1.6586514684063023, - "grad_norm": 0.6669653058052063, - "learning_rate": 1.4595263744856524e-06, - "loss": 0.2034, - "step": 17607 - }, - { - "epoch": 1.65874567249947, - "grad_norm": 0.7105797529220581, - "learning_rate": 1.4587409676080932e-06, - "loss": 0.1733, - "step": 17608 - }, - { - "epoch": 1.658839876592638, - "grad_norm": 0.6380971670150757, - "learning_rate": 1.4579557554844437e-06, - "loss": 0.1845, - "step": 17609 - }, - { - "epoch": 1.658934080685806, - "grad_norm": 0.6251140236854553, - "learning_rate": 1.4571707381326184e-06, - "loss": 0.1932, - "step": 17610 - }, - { - "epoch": 1.6590282847789737, - "grad_norm": 0.7100645899772644, - "learning_rate": 1.4563859155705118e-06, - "loss": 0.2093, - "step": 17611 - }, - { - "epoch": 1.6591224888721414, - "grad_norm": 0.6430773138999939, - "learning_rate": 1.4556012878160152e-06, - "loss": 0.2009, - "step": 17612 - }, - { - "epoch": 1.6592166929653094, - "grad_norm": 0.6023344397544861, - "learning_rate": 1.454816854887028e-06, - "loss": 0.1848, - "step": 17613 - }, - { - "epoch": 1.6593108970584773, - "grad_norm": 0.7361556887626648, - "learning_rate": 1.4540326168014318e-06, - "loss": 0.219, - "step": 17614 - }, - { - "epoch": 1.659405101151645, - "grad_norm": 0.6500685811042786, - "learning_rate": 1.4532485735771052e-06, - "loss": 0.2, - "step": 17615 - }, - { - "epoch": 1.6594993052448128, - "grad_norm": 0.6812999248504639, - "learning_rate": 1.4524647252319302e-06, - "loss": 0.201, - "step": 17616 - }, - { - "epoch": 1.6595935093379808, - "grad_norm": 0.6409395933151245, - "learning_rate": 1.4516810717837804e-06, - "loss": 0.2078, - "step": 17617 - }, - { - "epoch": 1.6596877134311487, - "grad_norm": 0.7471181750297546, - "learning_rate": 1.4508976132505204e-06, - "loss": 0.2125, - "step": 17618 - }, - { - "epoch": 1.6597819175243165, - "grad_norm": 0.7450597882270813, - "learning_rate": 1.4501143496500158e-06, - "loss": 0.223, - "step": 17619 - }, - { - "epoch": 1.6598761216174842, - "grad_norm": 0.6431857347488403, - "learning_rate": 1.4493312810001293e-06, - "loss": 0.2345, - "step": 17620 - }, - { - "epoch": 1.6599703257106522, - "grad_norm": 0.6000074148178101, - "learning_rate": 1.4485484073187107e-06, - "loss": 0.1886, - "step": 17621 - }, - { - "epoch": 1.6600645298038201, - "grad_norm": 0.6162887811660767, - "learning_rate": 1.4477657286236135e-06, - "loss": 0.1787, - "step": 17622 - }, - { - "epoch": 1.6601587338969879, - "grad_norm": 0.6685981154441833, - "learning_rate": 1.4469832449326871e-06, - "loss": 0.2122, - "step": 17623 - }, - { - "epoch": 1.6602529379901556, - "grad_norm": 0.6540847420692444, - "learning_rate": 1.4462009562637668e-06, - "loss": 0.172, - "step": 17624 - }, - { - "epoch": 1.6603471420833236, - "grad_norm": 0.6713460087776184, - "learning_rate": 1.4454188626346966e-06, - "loss": 0.193, - "step": 17625 - }, - { - "epoch": 1.6604413461764915, - "grad_norm": 0.6472384929656982, - "learning_rate": 1.444636964063303e-06, - "loss": 0.2031, - "step": 17626 - }, - { - "epoch": 1.6605355502696593, - "grad_norm": 0.6349501609802246, - "learning_rate": 1.4438552605674182e-06, - "loss": 0.1907, - "step": 17627 - }, - { - "epoch": 1.660629754362827, - "grad_norm": 0.6609911322593689, - "learning_rate": 1.4430737521648685e-06, - "loss": 0.2009, - "step": 17628 - }, - { - "epoch": 1.660723958455995, - "grad_norm": 0.6586636304855347, - "learning_rate": 1.4422924388734682e-06, - "loss": 0.2147, - "step": 17629 - }, - { - "epoch": 1.660818162549163, - "grad_norm": 0.7303486466407776, - "learning_rate": 1.4415113207110376e-06, - "loss": 0.1848, - "step": 17630 - }, - { - "epoch": 1.6609123666423307, - "grad_norm": 0.6686525344848633, - "learning_rate": 1.4407303976953824e-06, - "loss": 0.1884, - "step": 17631 - }, - { - "epoch": 1.6610065707354984, - "grad_norm": 0.6891011595726013, - "learning_rate": 1.4399496698443104e-06, - "loss": 0.2007, - "step": 17632 - }, - { - "epoch": 1.6611007748286664, - "grad_norm": 0.6761496663093567, - "learning_rate": 1.4391691371756277e-06, - "loss": 0.1793, - "step": 17633 - }, - { - "epoch": 1.6611949789218343, - "grad_norm": 0.6334503889083862, - "learning_rate": 1.4383887997071255e-06, - "loss": 0.1817, - "step": 17634 - }, - { - "epoch": 1.6612891830150018, - "grad_norm": 0.6404047608375549, - "learning_rate": 1.4376086574566018e-06, - "loss": 0.1947, - "step": 17635 - }, - { - "epoch": 1.6613833871081698, - "grad_norm": 0.6635481715202332, - "learning_rate": 1.436828710441841e-06, - "loss": 0.2029, - "step": 17636 - }, - { - "epoch": 1.6614775912013378, - "grad_norm": 0.7378470301628113, - "learning_rate": 1.436048958680628e-06, - "loss": 0.2121, - "step": 17637 - }, - { - "epoch": 1.6615717952945055, - "grad_norm": 0.7180721759796143, - "learning_rate": 1.4352694021907455e-06, - "loss": 0.199, - "step": 17638 - }, - { - "epoch": 1.6616659993876732, - "grad_norm": 0.6997587084770203, - "learning_rate": 1.4344900409899643e-06, - "loss": 0.2158, - "step": 17639 - }, - { - "epoch": 1.6617602034808412, - "grad_norm": 0.6561702489852905, - "learning_rate": 1.433710875096057e-06, - "loss": 0.1951, - "step": 17640 - }, - { - "epoch": 1.6618544075740092, - "grad_norm": 0.6174152493476868, - "learning_rate": 1.432931904526792e-06, - "loss": 0.1697, - "step": 17641 - }, - { - "epoch": 1.661948611667177, - "grad_norm": 0.5758946537971497, - "learning_rate": 1.4321531292999269e-06, - "loss": 0.1786, - "step": 17642 - }, - { - "epoch": 1.6620428157603446, - "grad_norm": 0.9768653512001038, - "learning_rate": 1.43137454943322e-06, - "loss": 0.1608, - "step": 17643 - }, - { - "epoch": 1.6621370198535126, - "grad_norm": 0.6561266779899597, - "learning_rate": 1.4305961649444277e-06, - "loss": 0.2116, - "step": 17644 - }, - { - "epoch": 1.6622312239466805, - "grad_norm": 0.6503586769104004, - "learning_rate": 1.4298179758512954e-06, - "loss": 0.1835, - "step": 17645 - }, - { - "epoch": 1.6623254280398483, - "grad_norm": 0.6128221154212952, - "learning_rate": 1.429039982171563e-06, - "loss": 0.1791, - "step": 17646 - }, - { - "epoch": 1.662419632133016, - "grad_norm": 0.69385826587677, - "learning_rate": 1.4282621839229793e-06, - "loss": 0.2068, - "step": 17647 - }, - { - "epoch": 1.662513836226184, - "grad_norm": 0.6319029927253723, - "learning_rate": 1.427484581123274e-06, - "loss": 0.1791, - "step": 17648 - }, - { - "epoch": 1.662608040319352, - "grad_norm": 0.6989104151725769, - "learning_rate": 1.4267071737901728e-06, - "loss": 0.1845, - "step": 17649 - }, - { - "epoch": 1.6627022444125197, - "grad_norm": 0.5964011549949646, - "learning_rate": 1.4259299619414114e-06, - "loss": 0.17, - "step": 17650 - }, - { - "epoch": 1.6627964485056874, - "grad_norm": 0.7641205191612244, - "learning_rate": 1.4251529455947078e-06, - "loss": 0.1687, - "step": 17651 - }, - { - "epoch": 1.6628906525988554, - "grad_norm": 0.6229805946350098, - "learning_rate": 1.4243761247677734e-06, - "loss": 0.1904, - "step": 17652 - }, - { - "epoch": 1.6629848566920233, - "grad_norm": 0.6460482478141785, - "learning_rate": 1.4235994994783297e-06, - "loss": 0.1899, - "step": 17653 - }, - { - "epoch": 1.663079060785191, - "grad_norm": 0.6331815123558044, - "learning_rate": 1.4228230697440815e-06, - "loss": 0.2021, - "step": 17654 - }, - { - "epoch": 1.6631732648783588, - "grad_norm": 0.6770843863487244, - "learning_rate": 1.42204683558273e-06, - "loss": 0.2061, - "step": 17655 - }, - { - "epoch": 1.6632674689715268, - "grad_norm": 0.6638312935829163, - "learning_rate": 1.4212707970119765e-06, - "loss": 0.1951, - "step": 17656 - }, - { - "epoch": 1.6633616730646947, - "grad_norm": 0.693246066570282, - "learning_rate": 1.4204949540495183e-06, - "loss": 0.1961, - "step": 17657 - }, - { - "epoch": 1.6634558771578625, - "grad_norm": 0.6464352607727051, - "learning_rate": 1.4197193067130422e-06, - "loss": 0.1924, - "step": 17658 - }, - { - "epoch": 1.6635500812510302, - "grad_norm": 0.5881369113922119, - "learning_rate": 1.418943855020235e-06, - "loss": 0.1694, - "step": 17659 - }, - { - "epoch": 1.6636442853441982, - "grad_norm": 0.6073905229568481, - "learning_rate": 1.4181685989887806e-06, - "loss": 0.1885, - "step": 17660 - }, - { - "epoch": 1.6637384894373661, - "grad_norm": 0.7137670516967773, - "learning_rate": 1.4173935386363524e-06, - "loss": 0.2168, - "step": 17661 - }, - { - "epoch": 1.6638326935305339, - "grad_norm": 0.6231116056442261, - "learning_rate": 1.4166186739806242e-06, - "loss": 0.2214, - "step": 17662 - }, - { - "epoch": 1.6639268976237016, - "grad_norm": 0.6087729930877686, - "learning_rate": 1.4158440050392675e-06, - "loss": 0.1817, - "step": 17663 - }, - { - "epoch": 1.6640211017168696, - "grad_norm": 0.5989171862602234, - "learning_rate": 1.4150695318299412e-06, - "loss": 0.1787, - "step": 17664 - }, - { - "epoch": 1.6641153058100375, - "grad_norm": 0.665279746055603, - "learning_rate": 1.414295254370308e-06, - "loss": 0.1869, - "step": 17665 - }, - { - "epoch": 1.6642095099032053, - "grad_norm": 0.6534229516983032, - "learning_rate": 1.4135211726780196e-06, - "loss": 0.1715, - "step": 17666 - }, - { - "epoch": 1.664303713996373, - "grad_norm": 0.6989510655403137, - "learning_rate": 1.412747286770727e-06, - "loss": 0.1934, - "step": 17667 - }, - { - "epoch": 1.664397918089541, - "grad_norm": 0.6644416451454163, - "learning_rate": 1.4119735966660797e-06, - "loss": 0.1865, - "step": 17668 - }, - { - "epoch": 1.664492122182709, - "grad_norm": 0.6417335867881775, - "learning_rate": 1.4112001023817123e-06, - "loss": 0.207, - "step": 17669 - }, - { - "epoch": 1.6645863262758767, - "grad_norm": 0.6809008121490479, - "learning_rate": 1.4104268039352686e-06, - "loss": 0.1932, - "step": 17670 - }, - { - "epoch": 1.6646805303690444, - "grad_norm": 0.6541202068328857, - "learning_rate": 1.4096537013443755e-06, - "loss": 0.1886, - "step": 17671 - }, - { - "epoch": 1.6647747344622124, - "grad_norm": 0.7825481295585632, - "learning_rate": 1.4088807946266635e-06, - "loss": 0.2121, - "step": 17672 - }, - { - "epoch": 1.6648689385553803, - "grad_norm": 0.8834565877914429, - "learning_rate": 1.4081080837997585e-06, - "loss": 0.2159, - "step": 17673 - }, - { - "epoch": 1.664963142648548, - "grad_norm": 0.6058242321014404, - "learning_rate": 1.407335568881273e-06, - "loss": 0.1838, - "step": 17674 - }, - { - "epoch": 1.6650573467417158, - "grad_norm": 0.7065714597702026, - "learning_rate": 1.4065632498888294e-06, - "loss": 0.2108, - "step": 17675 - }, - { - "epoch": 1.6651515508348838, - "grad_norm": 0.6951255202293396, - "learning_rate": 1.4057911268400305e-06, - "loss": 0.1868, - "step": 17676 - }, - { - "epoch": 1.6652457549280517, - "grad_norm": 0.6267175674438477, - "learning_rate": 1.4050191997524843e-06, - "loss": 0.1854, - "step": 17677 - }, - { - "epoch": 1.6653399590212195, - "grad_norm": 0.7287028431892395, - "learning_rate": 1.4042474686437957e-06, - "loss": 0.2042, - "step": 17678 - }, - { - "epoch": 1.6654341631143872, - "grad_norm": 0.6523891091346741, - "learning_rate": 1.4034759335315562e-06, - "loss": 0.2005, - "step": 17679 - }, - { - "epoch": 1.6655283672075551, - "grad_norm": 0.6233696937561035, - "learning_rate": 1.4027045944333606e-06, - "loss": 0.1923, - "step": 17680 - }, - { - "epoch": 1.665622571300723, - "grad_norm": 0.6110519766807556, - "learning_rate": 1.401933451366798e-06, - "loss": 0.1944, - "step": 17681 - }, - { - "epoch": 1.6657167753938908, - "grad_norm": 0.6156954765319824, - "learning_rate": 1.401162504349448e-06, - "loss": 0.1741, - "step": 17682 - }, - { - "epoch": 1.6658109794870586, - "grad_norm": 0.6730902194976807, - "learning_rate": 1.400391753398891e-06, - "loss": 0.1828, - "step": 17683 - }, - { - "epoch": 1.6659051835802265, - "grad_norm": 0.7032499313354492, - "learning_rate": 1.3996211985327034e-06, - "loss": 0.1695, - "step": 17684 - }, - { - "epoch": 1.6659993876733945, - "grad_norm": 0.6364924311637878, - "learning_rate": 1.398850839768454e-06, - "loss": 0.1632, - "step": 17685 - }, - { - "epoch": 1.6660935917665622, - "grad_norm": 0.664461076259613, - "learning_rate": 1.398080677123702e-06, - "loss": 0.1858, - "step": 17686 - }, - { - "epoch": 1.66618779585973, - "grad_norm": 0.5921967625617981, - "learning_rate": 1.397310710616019e-06, - "loss": 0.167, - "step": 17687 - }, - { - "epoch": 1.666281999952898, - "grad_norm": 0.6417059302330017, - "learning_rate": 1.396540940262956e-06, - "loss": 0.1914, - "step": 17688 - }, - { - "epoch": 1.666376204046066, - "grad_norm": 0.6602506637573242, - "learning_rate": 1.3957713660820605e-06, - "loss": 0.1865, - "step": 17689 - }, - { - "epoch": 1.6664704081392336, - "grad_norm": 0.669553279876709, - "learning_rate": 1.3950019880908894e-06, - "loss": 0.1869, - "step": 17690 - }, - { - "epoch": 1.6665646122324014, - "grad_norm": 0.7122140526771545, - "learning_rate": 1.39423280630698e-06, - "loss": 0.2258, - "step": 17691 - }, - { - "epoch": 1.6666588163255693, - "grad_norm": 0.6299037337303162, - "learning_rate": 1.393463820747868e-06, - "loss": 0.1681, - "step": 17692 - }, - { - "epoch": 1.6667530204187373, - "grad_norm": 0.6788545250892639, - "learning_rate": 1.392695031431095e-06, - "loss": 0.2038, - "step": 17693 - }, - { - "epoch": 1.666847224511905, - "grad_norm": 0.630938708782196, - "learning_rate": 1.3919264383741871e-06, - "loss": 0.2085, - "step": 17694 - }, - { - "epoch": 1.6669414286050728, - "grad_norm": 0.7515336275100708, - "learning_rate": 1.3911580415946667e-06, - "loss": 0.2029, - "step": 17695 - }, - { - "epoch": 1.6670356326982407, - "grad_norm": 0.6248697638511658, - "learning_rate": 1.3903898411100568e-06, - "loss": 0.1826, - "step": 17696 - }, - { - "epoch": 1.6671298367914087, - "grad_norm": 0.6295955181121826, - "learning_rate": 1.3896218369378756e-06, - "loss": 0.1907, - "step": 17697 - }, - { - "epoch": 1.6672240408845764, - "grad_norm": 0.5748365521430969, - "learning_rate": 1.388854029095631e-06, - "loss": 0.1632, - "step": 17698 - }, - { - "epoch": 1.6673182449777442, - "grad_norm": 0.6384146809577942, - "learning_rate": 1.3880864176008312e-06, - "loss": 0.166, - "step": 17699 - }, - { - "epoch": 1.6674124490709121, - "grad_norm": 0.6905065178871155, - "learning_rate": 1.3873190024709816e-06, - "loss": 0.1982, - "step": 17700 - }, - { - "epoch": 1.66750665316408, - "grad_norm": 0.7195608019828796, - "learning_rate": 1.3865517837235776e-06, - "loss": 0.1983, - "step": 17701 - }, - { - "epoch": 1.6676008572572478, - "grad_norm": 0.6161376237869263, - "learning_rate": 1.3857847613761134e-06, - "loss": 0.204, - "step": 17702 - }, - { - "epoch": 1.6676950613504156, - "grad_norm": 0.6908882260322571, - "learning_rate": 1.3850179354460802e-06, - "loss": 0.2004, - "step": 17703 - }, - { - "epoch": 1.6677892654435835, - "grad_norm": 0.6385560631752014, - "learning_rate": 1.38425130595096e-06, - "loss": 0.1816, - "step": 17704 - }, - { - "epoch": 1.6678834695367515, - "grad_norm": 0.6749087572097778, - "learning_rate": 1.383484872908236e-06, - "loss": 0.166, - "step": 17705 - }, - { - "epoch": 1.6679776736299192, - "grad_norm": 0.6486237645149231, - "learning_rate": 1.3827186363353818e-06, - "loss": 0.2061, - "step": 17706 - }, - { - "epoch": 1.668071877723087, - "grad_norm": 0.6470707654953003, - "learning_rate": 1.3819525962498703e-06, - "loss": 0.1921, - "step": 17707 - }, - { - "epoch": 1.668166081816255, - "grad_norm": 0.6324625611305237, - "learning_rate": 1.3811867526691668e-06, - "loss": 0.1921, - "step": 17708 - }, - { - "epoch": 1.6682602859094229, - "grad_norm": 0.6587686538696289, - "learning_rate": 1.3804211056107332e-06, - "loss": 0.2075, - "step": 17709 - }, - { - "epoch": 1.6683544900025906, - "grad_norm": 0.7326374053955078, - "learning_rate": 1.3796556550920315e-06, - "loss": 0.1837, - "step": 17710 - }, - { - "epoch": 1.6684486940957584, - "grad_norm": 0.6420719027519226, - "learning_rate": 1.3788904011305105e-06, - "loss": 0.1979, - "step": 17711 - }, - { - "epoch": 1.6685428981889263, - "grad_norm": 0.6310940384864807, - "learning_rate": 1.3781253437436216e-06, - "loss": 0.2227, - "step": 17712 - }, - { - "epoch": 1.6686371022820943, - "grad_norm": 0.6826558709144592, - "learning_rate": 1.3773604829488107e-06, - "loss": 0.2038, - "step": 17713 - }, - { - "epoch": 1.668731306375262, - "grad_norm": 0.6317348480224609, - "learning_rate": 1.3765958187635131e-06, - "loss": 0.149, - "step": 17714 - }, - { - "epoch": 1.6688255104684298, - "grad_norm": 0.6829084753990173, - "learning_rate": 1.3758313512051702e-06, - "loss": 0.2007, - "step": 17715 - }, - { - "epoch": 1.6689197145615977, - "grad_norm": 0.6108052134513855, - "learning_rate": 1.375067080291208e-06, - "loss": 0.1698, - "step": 17716 - }, - { - "epoch": 1.6690139186547657, - "grad_norm": 0.7161389589309692, - "learning_rate": 1.374303006039054e-06, - "loss": 0.2011, - "step": 17717 - }, - { - "epoch": 1.6691081227479334, - "grad_norm": 0.6809845566749573, - "learning_rate": 1.3735391284661348e-06, - "loss": 0.2201, - "step": 17718 - }, - { - "epoch": 1.6692023268411011, - "grad_norm": 0.7134252786636353, - "learning_rate": 1.3727754475898615e-06, - "loss": 0.1986, - "step": 17719 - }, - { - "epoch": 1.669296530934269, - "grad_norm": 0.6815935373306274, - "learning_rate": 1.3720119634276507e-06, - "loss": 0.1852, - "step": 17720 - }, - { - "epoch": 1.669390735027437, - "grad_norm": 0.6288490891456604, - "learning_rate": 1.3712486759969124e-06, - "loss": 0.1788, - "step": 17721 - }, - { - "epoch": 1.6694849391206048, - "grad_norm": 0.6670456528663635, - "learning_rate": 1.3704855853150467e-06, - "loss": 0.2324, - "step": 17722 - }, - { - "epoch": 1.6695791432137725, - "grad_norm": 0.6168465614318848, - "learning_rate": 1.3697226913994555e-06, - "loss": 0.1728, - "step": 17723 - }, - { - "epoch": 1.6696733473069405, - "grad_norm": 0.6151394248008728, - "learning_rate": 1.368959994267537e-06, - "loss": 0.17, - "step": 17724 - }, - { - "epoch": 1.6697675514001085, - "grad_norm": 0.6413464546203613, - "learning_rate": 1.3681974939366772e-06, - "loss": 0.1906, - "step": 17725 - }, - { - "epoch": 1.6698617554932762, - "grad_norm": 0.715067982673645, - "learning_rate": 1.367435190424261e-06, - "loss": 0.2071, - "step": 17726 - }, - { - "epoch": 1.669955959586444, - "grad_norm": 0.6484623551368713, - "learning_rate": 1.366673083747676e-06, - "loss": 0.1819, - "step": 17727 - }, - { - "epoch": 1.670050163679612, - "grad_norm": 0.6648810505867004, - "learning_rate": 1.3659111739242981e-06, - "loss": 0.2219, - "step": 17728 - }, - { - "epoch": 1.6701443677727799, - "grad_norm": 0.6700533032417297, - "learning_rate": 1.365149460971492e-06, - "loss": 0.1767, - "step": 17729 - }, - { - "epoch": 1.6702385718659476, - "grad_norm": 0.6849918365478516, - "learning_rate": 1.3643879449066377e-06, - "loss": 0.1883, - "step": 17730 - }, - { - "epoch": 1.6703327759591153, - "grad_norm": 0.7058325409889221, - "learning_rate": 1.363626625747093e-06, - "loss": 0.2169, - "step": 17731 - }, - { - "epoch": 1.6704269800522833, - "grad_norm": 0.5893673896789551, - "learning_rate": 1.3628655035102167e-06, - "loss": 0.1624, - "step": 17732 - }, - { - "epoch": 1.6705211841454513, - "grad_norm": 0.6795918345451355, - "learning_rate": 1.3621045782133635e-06, - "loss": 0.2165, - "step": 17733 - }, - { - "epoch": 1.670615388238619, - "grad_norm": 0.8626487851142883, - "learning_rate": 1.3613438498738861e-06, - "loss": 0.2098, - "step": 17734 - }, - { - "epoch": 1.6707095923317867, - "grad_norm": 0.6874573826789856, - "learning_rate": 1.3605833185091278e-06, - "loss": 0.1691, - "step": 17735 - }, - { - "epoch": 1.6708037964249547, - "grad_norm": 0.6788274645805359, - "learning_rate": 1.3598229841364308e-06, - "loss": 0.1944, - "step": 17736 - }, - { - "epoch": 1.6708980005181227, - "grad_norm": 0.7230943441390991, - "learning_rate": 1.3590628467731337e-06, - "loss": 0.1851, - "step": 17737 - }, - { - "epoch": 1.6709922046112904, - "grad_norm": 0.888152003288269, - "learning_rate": 1.358302906436566e-06, - "loss": 0.1855, - "step": 17738 - }, - { - "epoch": 1.6710864087044581, - "grad_norm": 0.6920692920684814, - "learning_rate": 1.357543163144056e-06, - "loss": 0.2027, - "step": 17739 - }, - { - "epoch": 1.671180612797626, - "grad_norm": 0.6482422947883606, - "learning_rate": 1.35678361691293e-06, - "loss": 0.1988, - "step": 17740 - }, - { - "epoch": 1.671274816890794, - "grad_norm": 0.762971043586731, - "learning_rate": 1.3560242677605018e-06, - "loss": 0.2051, - "step": 17741 - }, - { - "epoch": 1.6713690209839618, - "grad_norm": 0.6341733336448669, - "learning_rate": 1.3552651157040897e-06, - "loss": 0.1822, - "step": 17742 - }, - { - "epoch": 1.6714632250771295, - "grad_norm": 0.7378972768783569, - "learning_rate": 1.3545061607610032e-06, - "loss": 0.2038, - "step": 17743 - }, - { - "epoch": 1.6715574291702975, - "grad_norm": 0.6339300274848938, - "learning_rate": 1.3537474029485452e-06, - "loss": 0.1796, - "step": 17744 - }, - { - "epoch": 1.6716516332634654, - "grad_norm": 0.7358366847038269, - "learning_rate": 1.3529888422840199e-06, - "loss": 0.2064, - "step": 17745 - }, - { - "epoch": 1.6717458373566332, - "grad_norm": 0.697050929069519, - "learning_rate": 1.3522304787847197e-06, - "loss": 0.2048, - "step": 17746 - }, - { - "epoch": 1.671840041449801, - "grad_norm": 0.630322277545929, - "learning_rate": 1.351472312467942e-06, - "loss": 0.2001, - "step": 17747 - }, - { - "epoch": 1.6719342455429689, - "grad_norm": 1.134198784828186, - "learning_rate": 1.3507143433509673e-06, - "loss": 0.22, - "step": 17748 - }, - { - "epoch": 1.6720284496361368, - "grad_norm": 0.6161059737205505, - "learning_rate": 1.349956571451082e-06, - "loss": 0.1805, - "step": 17749 - }, - { - "epoch": 1.6721226537293046, - "grad_norm": 0.6653415560722351, - "learning_rate": 1.3491989967855668e-06, - "loss": 0.185, - "step": 17750 - }, - { - "epoch": 1.6722168578224723, - "grad_norm": 0.7343589067459106, - "learning_rate": 1.348441619371691e-06, - "loss": 0.1658, - "step": 17751 - }, - { - "epoch": 1.6723110619156403, - "grad_norm": 0.6304425001144409, - "learning_rate": 1.3476844392267264e-06, - "loss": 0.1738, - "step": 17752 - }, - { - "epoch": 1.6724052660088082, - "grad_norm": 0.599528431892395, - "learning_rate": 1.3469274563679401e-06, - "loss": 0.1587, - "step": 17753 - }, - { - "epoch": 1.672499470101976, - "grad_norm": 0.7063929438591003, - "learning_rate": 1.3461706708125878e-06, - "loss": 0.181, - "step": 17754 - }, - { - "epoch": 1.6725936741951437, - "grad_norm": 0.732979953289032, - "learning_rate": 1.3454140825779294e-06, - "loss": 0.2248, - "step": 17755 - }, - { - "epoch": 1.6726878782883117, - "grad_norm": 0.7059483528137207, - "learning_rate": 1.3446576916812116e-06, - "loss": 0.2099, - "step": 17756 - }, - { - "epoch": 1.6727820823814796, - "grad_norm": 0.6447253823280334, - "learning_rate": 1.3439014981396847e-06, - "loss": 0.2043, - "step": 17757 - }, - { - "epoch": 1.6728762864746474, - "grad_norm": 0.6934409737586975, - "learning_rate": 1.343145501970593e-06, - "loss": 0.1824, - "step": 17758 - }, - { - "epoch": 1.672970490567815, - "grad_norm": 0.6274111866950989, - "learning_rate": 1.3423897031911703e-06, - "loss": 0.184, - "step": 17759 - }, - { - "epoch": 1.673064694660983, - "grad_norm": 0.6619078516960144, - "learning_rate": 1.3416341018186507e-06, - "loss": 0.1882, - "step": 17760 - }, - { - "epoch": 1.673158898754151, - "grad_norm": 0.7142669558525085, - "learning_rate": 1.3408786978702671e-06, - "loss": 0.196, - "step": 17761 - }, - { - "epoch": 1.6732531028473188, - "grad_norm": 0.6544268131256104, - "learning_rate": 1.3401234913632377e-06, - "loss": 0.1908, - "step": 17762 - }, - { - "epoch": 1.6733473069404865, - "grad_norm": 0.6190822124481201, - "learning_rate": 1.3393684823147857e-06, - "loss": 0.1853, - "step": 17763 - }, - { - "epoch": 1.6734415110336545, - "grad_norm": 0.7447613477706909, - "learning_rate": 1.3386136707421281e-06, - "loss": 0.2173, - "step": 17764 - }, - { - "epoch": 1.6735357151268224, - "grad_norm": 0.6771336197853088, - "learning_rate": 1.337859056662475e-06, - "loss": 0.1845, - "step": 17765 - }, - { - "epoch": 1.6736299192199902, - "grad_norm": 0.678631603717804, - "learning_rate": 1.3371046400930266e-06, - "loss": 0.2036, - "step": 17766 - }, - { - "epoch": 1.673724123313158, - "grad_norm": 0.7423925995826721, - "learning_rate": 1.3363504210509936e-06, - "loss": 0.2199, - "step": 17767 - }, - { - "epoch": 1.6738183274063259, - "grad_norm": 0.6490384936332703, - "learning_rate": 1.3355963995535703e-06, - "loss": 0.1887, - "step": 17768 - }, - { - "epoch": 1.6739125314994938, - "grad_norm": 0.6802162528038025, - "learning_rate": 1.3348425756179439e-06, - "loss": 0.1708, - "step": 17769 - }, - { - "epoch": 1.6740067355926616, - "grad_norm": 0.6522113680839539, - "learning_rate": 1.334088949261313e-06, - "loss": 0.1977, - "step": 17770 - }, - { - "epoch": 1.6741009396858293, - "grad_norm": 0.631261944770813, - "learning_rate": 1.3333355205008558e-06, - "loss": 0.1832, - "step": 17771 - }, - { - "epoch": 1.6741951437789973, - "grad_norm": 0.7070450782775879, - "learning_rate": 1.33258228935375e-06, - "loss": 0.1863, - "step": 17772 - }, - { - "epoch": 1.674289347872165, - "grad_norm": 0.6928162574768066, - "learning_rate": 1.3318292558371714e-06, - "loss": 0.1836, - "step": 17773 - }, - { - "epoch": 1.6743835519653327, - "grad_norm": 0.6214320063591003, - "learning_rate": 1.3310764199682946e-06, - "loss": 0.2011, - "step": 17774 - }, - { - "epoch": 1.6744777560585007, - "grad_norm": 0.6680625081062317, - "learning_rate": 1.3303237817642789e-06, - "loss": 0.1837, - "step": 17775 - }, - { - "epoch": 1.6745719601516686, - "grad_norm": 0.623860776424408, - "learning_rate": 1.3295713412422907e-06, - "loss": 0.1836, - "step": 17776 - }, - { - "epoch": 1.6746661642448364, - "grad_norm": 0.5853019952774048, - "learning_rate": 1.328819098419486e-06, - "loss": 0.1886, - "step": 17777 - }, - { - "epoch": 1.6747603683380041, - "grad_norm": 0.675097644329071, - "learning_rate": 1.3280670533130135e-06, - "loss": 0.1737, - "step": 17778 - }, - { - "epoch": 1.674854572431172, - "grad_norm": 0.7148059010505676, - "learning_rate": 1.327315205940024e-06, - "loss": 0.2086, - "step": 17779 - }, - { - "epoch": 1.67494877652434, - "grad_norm": 0.6851511597633362, - "learning_rate": 1.3265635563176626e-06, - "loss": 0.1969, - "step": 17780 - }, - { - "epoch": 1.6750429806175078, - "grad_norm": 0.6803012490272522, - "learning_rate": 1.3258121044630633e-06, - "loss": 0.1722, - "step": 17781 - }, - { - "epoch": 1.6751371847106755, - "grad_norm": 0.6636247634887695, - "learning_rate": 1.3250608503933637e-06, - "loss": 0.1943, - "step": 17782 - }, - { - "epoch": 1.6752313888038435, - "grad_norm": 0.6178423166275024, - "learning_rate": 1.3243097941256943e-06, - "loss": 0.1779, - "step": 17783 - }, - { - "epoch": 1.6753255928970114, - "grad_norm": 0.7024008631706238, - "learning_rate": 1.3235589356771771e-06, - "loss": 0.2151, - "step": 17784 - }, - { - "epoch": 1.6754197969901792, - "grad_norm": 0.6634231805801392, - "learning_rate": 1.3228082750649362e-06, - "loss": 0.1846, - "step": 17785 - }, - { - "epoch": 1.675514001083347, - "grad_norm": 0.7389572262763977, - "learning_rate": 1.3220578123060845e-06, - "loss": 0.2154, - "step": 17786 - }, - { - "epoch": 1.6756082051765149, - "grad_norm": 0.6536784172058105, - "learning_rate": 1.3213075474177373e-06, - "loss": 0.1915, - "step": 17787 - }, - { - "epoch": 1.6757024092696828, - "grad_norm": 0.6692500114440918, - "learning_rate": 1.3205574804169973e-06, - "loss": 0.201, - "step": 17788 - }, - { - "epoch": 1.6757966133628506, - "grad_norm": 0.6383397579193115, - "learning_rate": 1.3198076113209713e-06, - "loss": 0.1718, - "step": 17789 - }, - { - "epoch": 1.6758908174560183, - "grad_norm": 0.6334207057952881, - "learning_rate": 1.3190579401467562e-06, - "loss": 0.2114, - "step": 17790 - }, - { - "epoch": 1.6759850215491863, - "grad_norm": 0.7100887894630432, - "learning_rate": 1.3183084669114442e-06, - "loss": 0.193, - "step": 17791 - }, - { - "epoch": 1.6760792256423542, - "grad_norm": 0.6818283796310425, - "learning_rate": 1.317559191632125e-06, - "loss": 0.2141, - "step": 17792 - }, - { - "epoch": 1.676173429735522, - "grad_norm": 0.644614040851593, - "learning_rate": 1.3168101143258871e-06, - "loss": 0.1761, - "step": 17793 - }, - { - "epoch": 1.6762676338286897, - "grad_norm": 0.5869244933128357, - "learning_rate": 1.3160612350098045e-06, - "loss": 0.1881, - "step": 17794 - }, - { - "epoch": 1.6763618379218577, - "grad_norm": 0.7334598302841187, - "learning_rate": 1.3153125537009581e-06, - "loss": 0.2073, - "step": 17795 - }, - { - "epoch": 1.6764560420150256, - "grad_norm": 0.6654959917068481, - "learning_rate": 1.3145640704164143e-06, - "loss": 0.1905, - "step": 17796 - }, - { - "epoch": 1.6765502461081934, - "grad_norm": 0.6341982483863831, - "learning_rate": 1.3138157851732424e-06, - "loss": 0.1922, - "step": 17797 - }, - { - "epoch": 1.676644450201361, - "grad_norm": 0.6961618661880493, - "learning_rate": 1.3130676979885059e-06, - "loss": 0.1833, - "step": 17798 - }, - { - "epoch": 1.676738654294529, - "grad_norm": 0.7072240710258484, - "learning_rate": 1.3123198088792577e-06, - "loss": 0.1792, - "step": 17799 - }, - { - "epoch": 1.676832858387697, - "grad_norm": 0.6211265921592712, - "learning_rate": 1.311572117862554e-06, - "loss": 0.1976, - "step": 17800 - }, - { - "epoch": 1.6769270624808648, - "grad_norm": 0.7154965996742249, - "learning_rate": 1.3108246249554457e-06, - "loss": 0.1953, - "step": 17801 - }, - { - "epoch": 1.6770212665740325, - "grad_norm": 0.6488645076751709, - "learning_rate": 1.3100773301749715e-06, - "loss": 0.2021, - "step": 17802 - }, - { - "epoch": 1.6771154706672005, - "grad_norm": 0.6766074895858765, - "learning_rate": 1.309330233538173e-06, - "loss": 0.1749, - "step": 17803 - }, - { - "epoch": 1.6772096747603684, - "grad_norm": 0.6735768914222717, - "learning_rate": 1.308583335062088e-06, - "loss": 0.1853, - "step": 17804 - }, - { - "epoch": 1.6773038788535362, - "grad_norm": 0.6607761383056641, - "learning_rate": 1.307836634763745e-06, - "loss": 0.1873, - "step": 17805 - }, - { - "epoch": 1.677398082946704, - "grad_norm": 0.7383633255958557, - "learning_rate": 1.307090132660165e-06, - "loss": 0.1977, - "step": 17806 - }, - { - "epoch": 1.6774922870398719, - "grad_norm": 0.6929439902305603, - "learning_rate": 1.306343828768377e-06, - "loss": 0.181, - "step": 17807 - }, - { - "epoch": 1.6775864911330398, - "grad_norm": 0.5572612881660461, - "learning_rate": 1.3055977231053962e-06, - "loss": 0.1931, - "step": 17808 - }, - { - "epoch": 1.6776806952262076, - "grad_norm": 0.7010521292686462, - "learning_rate": 1.3048518156882285e-06, - "loss": 0.2156, - "step": 17809 - }, - { - "epoch": 1.6777748993193753, - "grad_norm": 0.6510460376739502, - "learning_rate": 1.30410610653389e-06, - "loss": 0.1952, - "step": 17810 - }, - { - "epoch": 1.6778691034125433, - "grad_norm": 0.6749451756477356, - "learning_rate": 1.303360595659382e-06, - "loss": 0.2302, - "step": 17811 - }, - { - "epoch": 1.6779633075057112, - "grad_norm": 0.6118618249893188, - "learning_rate": 1.3026152830816996e-06, - "loss": 0.1855, - "step": 17812 - }, - { - "epoch": 1.678057511598879, - "grad_norm": 0.5943413972854614, - "learning_rate": 1.3018701688178393e-06, - "loss": 0.1621, - "step": 17813 - }, - { - "epoch": 1.6781517156920467, - "grad_norm": 0.696884036064148, - "learning_rate": 1.301125252884794e-06, - "loss": 0.2018, - "step": 17814 - }, - { - "epoch": 1.6782459197852146, - "grad_norm": 0.6266376972198486, - "learning_rate": 1.3003805352995425e-06, - "loss": 0.1885, - "step": 17815 - }, - { - "epoch": 1.6783401238783826, - "grad_norm": 0.6385643482208252, - "learning_rate": 1.29963601607907e-06, - "loss": 0.1706, - "step": 17816 - }, - { - "epoch": 1.6784343279715503, - "grad_norm": 0.7097641825675964, - "learning_rate": 1.298891695240354e-06, - "loss": 0.2586, - "step": 17817 - }, - { - "epoch": 1.678528532064718, - "grad_norm": 0.6582649350166321, - "learning_rate": 1.2981475728003611e-06, - "loss": 0.2036, - "step": 17818 - }, - { - "epoch": 1.678622736157886, - "grad_norm": 0.6356112360954285, - "learning_rate": 1.297403648776061e-06, - "loss": 0.1818, - "step": 17819 - }, - { - "epoch": 1.678716940251054, - "grad_norm": 0.721987783908844, - "learning_rate": 1.296659923184419e-06, - "loss": 0.2498, - "step": 17820 - }, - { - "epoch": 1.6788111443442217, - "grad_norm": 0.6707038283348083, - "learning_rate": 1.2959163960423893e-06, - "loss": 0.1692, - "step": 17821 - }, - { - "epoch": 1.6789053484373895, - "grad_norm": 0.6467146873474121, - "learning_rate": 1.2951730673669261e-06, - "loss": 0.1937, - "step": 17822 - }, - { - "epoch": 1.6789995525305574, - "grad_norm": 0.6501058340072632, - "learning_rate": 1.2944299371749812e-06, - "loss": 0.2189, - "step": 17823 - }, - { - "epoch": 1.6790937566237254, - "grad_norm": 0.6409503817558289, - "learning_rate": 1.2936870054834982e-06, - "loss": 0.1925, - "step": 17824 - }, - { - "epoch": 1.6791879607168931, - "grad_norm": 0.6841661930084229, - "learning_rate": 1.2929442723094133e-06, - "loss": 0.1662, - "step": 17825 - }, - { - "epoch": 1.6792821648100609, - "grad_norm": 0.6333213448524475, - "learning_rate": 1.2922017376696638e-06, - "loss": 0.1735, - "step": 17826 - }, - { - "epoch": 1.6793763689032288, - "grad_norm": 0.5788587927818298, - "learning_rate": 1.2914594015811854e-06, - "loss": 0.1644, - "step": 17827 - }, - { - "epoch": 1.6794705729963968, - "grad_norm": 0.6400187015533447, - "learning_rate": 1.2907172640608978e-06, - "loss": 0.1988, - "step": 17828 - }, - { - "epoch": 1.6795647770895645, - "grad_norm": 0.6779884099960327, - "learning_rate": 1.2899753251257252e-06, - "loss": 0.2341, - "step": 17829 - }, - { - "epoch": 1.6796589811827323, - "grad_norm": 0.7089108228683472, - "learning_rate": 1.2892335847925886e-06, - "loss": 0.2071, - "step": 17830 - }, - { - "epoch": 1.6797531852759002, - "grad_norm": 0.6162636280059814, - "learning_rate": 1.2884920430783942e-06, - "loss": 0.1828, - "step": 17831 - }, - { - "epoch": 1.6798473893690682, - "grad_norm": 0.626998782157898, - "learning_rate": 1.2877507000000545e-06, - "loss": 0.1888, - "step": 17832 - }, - { - "epoch": 1.679941593462236, - "grad_norm": 0.5967768430709839, - "learning_rate": 1.2870095555744755e-06, - "loss": 0.1541, - "step": 17833 - }, - { - "epoch": 1.6800357975554037, - "grad_norm": 0.6613903045654297, - "learning_rate": 1.2862686098185506e-06, - "loss": 0.1899, - "step": 17834 - }, - { - "epoch": 1.6801300016485716, - "grad_norm": 0.6769422292709351, - "learning_rate": 1.2855278627491795e-06, - "loss": 0.1844, - "step": 17835 - }, - { - "epoch": 1.6802242057417396, - "grad_norm": 0.7312199473381042, - "learning_rate": 1.284787314383249e-06, - "loss": 0.1838, - "step": 17836 - }, - { - "epoch": 1.6803184098349073, - "grad_norm": 0.6149859428405762, - "learning_rate": 1.2840469647376452e-06, - "loss": 0.1689, - "step": 17837 - }, - { - "epoch": 1.680412613928075, - "grad_norm": 0.6647670865058899, - "learning_rate": 1.2833068138292526e-06, - "loss": 0.1969, - "step": 17838 - }, - { - "epoch": 1.680506818021243, - "grad_norm": 0.6740341782569885, - "learning_rate": 1.2825668616749431e-06, - "loss": 0.1796, - "step": 17839 - }, - { - "epoch": 1.680601022114411, - "grad_norm": 0.7125089168548584, - "learning_rate": 1.2818271082915911e-06, - "loss": 0.1774, - "step": 17840 - }, - { - "epoch": 1.6806952262075787, - "grad_norm": 0.6824539303779602, - "learning_rate": 1.2810875536960654e-06, - "loss": 0.2208, - "step": 17841 - }, - { - "epoch": 1.6807894303007465, - "grad_norm": 0.7195398807525635, - "learning_rate": 1.2803481979052257e-06, - "loss": 0.2147, - "step": 17842 - }, - { - "epoch": 1.6808836343939144, - "grad_norm": 0.623807430267334, - "learning_rate": 1.279609040935933e-06, - "loss": 0.1857, - "step": 17843 - }, - { - "epoch": 1.6809778384870824, - "grad_norm": 0.6549000144004822, - "learning_rate": 1.2788700828050415e-06, - "loss": 0.1797, - "step": 17844 - }, - { - "epoch": 1.6810720425802501, - "grad_norm": 0.6454442739486694, - "learning_rate": 1.2781313235294002e-06, - "loss": 0.181, - "step": 17845 - }, - { - "epoch": 1.6811662466734179, - "grad_norm": 0.7384193539619446, - "learning_rate": 1.2773927631258487e-06, - "loss": 0.23, - "step": 17846 - }, - { - "epoch": 1.6812604507665858, - "grad_norm": 0.6494235992431641, - "learning_rate": 1.276654401611237e-06, - "loss": 0.2326, - "step": 17847 - }, - { - "epoch": 1.6813546548597538, - "grad_norm": 0.672817587852478, - "learning_rate": 1.2759162390023961e-06, - "loss": 0.1924, - "step": 17848 - }, - { - "epoch": 1.6814488589529215, - "grad_norm": 0.6448466777801514, - "learning_rate": 1.2751782753161535e-06, - "loss": 0.1845, - "step": 17849 - }, - { - "epoch": 1.6815430630460892, - "grad_norm": 0.6963547468185425, - "learning_rate": 1.2744405105693402e-06, - "loss": 0.2093, - "step": 17850 - }, - { - "epoch": 1.6816372671392572, - "grad_norm": 0.6959251761436462, - "learning_rate": 1.2737029447787797e-06, - "loss": 0.1904, - "step": 17851 - }, - { - "epoch": 1.6817314712324252, - "grad_norm": 0.6796267628669739, - "learning_rate": 1.272965577961286e-06, - "loss": 0.1694, - "step": 17852 - }, - { - "epoch": 1.681825675325593, - "grad_norm": 0.6188008785247803, - "learning_rate": 1.2722284101336724e-06, - "loss": 0.1954, - "step": 17853 - }, - { - "epoch": 1.6819198794187606, - "grad_norm": 0.7073794603347778, - "learning_rate": 1.271491441312752e-06, - "loss": 0.2062, - "step": 17854 - }, - { - "epoch": 1.6820140835119286, - "grad_norm": 0.6268400549888611, - "learning_rate": 1.270754671515323e-06, - "loss": 0.1832, - "step": 17855 - }, - { - "epoch": 1.6821082876050966, - "grad_norm": 0.7445459961891174, - "learning_rate": 1.270018100758188e-06, - "loss": 0.2333, - "step": 17856 - }, - { - "epoch": 1.6822024916982643, - "grad_norm": 0.6499781608581543, - "learning_rate": 1.2692817290581426e-06, - "loss": 0.1848, - "step": 17857 - }, - { - "epoch": 1.682296695791432, - "grad_norm": 0.6811954975128174, - "learning_rate": 1.2685455564319748e-06, - "loss": 0.1799, - "step": 17858 - }, - { - "epoch": 1.6823908998846, - "grad_norm": 0.7441204190254211, - "learning_rate": 1.2678095828964721e-06, - "loss": 0.1992, - "step": 17859 - }, - { - "epoch": 1.682485103977768, - "grad_norm": 0.6719151139259338, - "learning_rate": 1.2670738084684175e-06, - "loss": 0.2269, - "step": 17860 - }, - { - "epoch": 1.6825793080709357, - "grad_norm": 0.681549608707428, - "learning_rate": 1.2663382331645835e-06, - "loss": 0.1719, - "step": 17861 - }, - { - "epoch": 1.6826735121641034, - "grad_norm": 0.6589304804801941, - "learning_rate": 1.2656028570017441e-06, - "loss": 0.1765, - "step": 17862 - }, - { - "epoch": 1.6827677162572714, - "grad_norm": 0.6553161144256592, - "learning_rate": 1.2648676799966697e-06, - "loss": 0.188, - "step": 17863 - }, - { - "epoch": 1.6828619203504394, - "grad_norm": 0.7294104099273682, - "learning_rate": 1.2641327021661222e-06, - "loss": 0.1842, - "step": 17864 - }, - { - "epoch": 1.682956124443607, - "grad_norm": 0.6651660203933716, - "learning_rate": 1.2633979235268569e-06, - "loss": 0.194, - "step": 17865 - }, - { - "epoch": 1.6830503285367748, - "grad_norm": 0.585040807723999, - "learning_rate": 1.2626633440956305e-06, - "loss": 0.1693, - "step": 17866 - }, - { - "epoch": 1.6831445326299428, - "grad_norm": 0.7369555234909058, - "learning_rate": 1.261928963889194e-06, - "loss": 0.2079, - "step": 17867 - }, - { - "epoch": 1.6832387367231108, - "grad_norm": 0.6434837579727173, - "learning_rate": 1.2611947829242887e-06, - "loss": 0.1947, - "step": 17868 - }, - { - "epoch": 1.6833329408162785, - "grad_norm": 0.679571270942688, - "learning_rate": 1.2604608012176579e-06, - "loss": 0.2242, - "step": 17869 - }, - { - "epoch": 1.6834271449094462, - "grad_norm": 0.6934368014335632, - "learning_rate": 1.2597270187860378e-06, - "loss": 0.1976, - "step": 17870 - }, - { - "epoch": 1.6835213490026142, - "grad_norm": 0.5825608968734741, - "learning_rate": 1.2589934356461575e-06, - "loss": 0.1844, - "step": 17871 - }, - { - "epoch": 1.6836155530957821, - "grad_norm": 0.6324750781059265, - "learning_rate": 1.2582600518147448e-06, - "loss": 0.1659, - "step": 17872 - }, - { - "epoch": 1.6837097571889499, - "grad_norm": 0.6621334552764893, - "learning_rate": 1.2575268673085239e-06, - "loss": 0.1759, - "step": 17873 - }, - { - "epoch": 1.6838039612821176, - "grad_norm": 0.7086099982261658, - "learning_rate": 1.2567938821442093e-06, - "loss": 0.2152, - "step": 17874 - }, - { - "epoch": 1.6838981653752856, - "grad_norm": 0.6397452354431152, - "learning_rate": 1.2560610963385189e-06, - "loss": 0.1877, - "step": 17875 - }, - { - "epoch": 1.6839923694684535, - "grad_norm": 0.640308141708374, - "learning_rate": 1.2553285099081547e-06, - "loss": 0.2108, - "step": 17876 - }, - { - "epoch": 1.6840865735616213, - "grad_norm": 0.6639888286590576, - "learning_rate": 1.2545961228698255e-06, - "loss": 0.2011, - "step": 17877 - }, - { - "epoch": 1.684180777654789, - "grad_norm": 0.6414461731910706, - "learning_rate": 1.2538639352402305e-06, - "loss": 0.1865, - "step": 17878 - }, - { - "epoch": 1.684274981747957, - "grad_norm": 0.6911826133728027, - "learning_rate": 1.2531319470360625e-06, - "loss": 0.1992, - "step": 17879 - }, - { - "epoch": 1.684369185841125, - "grad_norm": 0.7354703545570374, - "learning_rate": 1.2524001582740141e-06, - "loss": 0.2222, - "step": 17880 - }, - { - "epoch": 1.6844633899342927, - "grad_norm": 0.6461732387542725, - "learning_rate": 1.2516685689707719e-06, - "loss": 0.1672, - "step": 17881 - }, - { - "epoch": 1.6845575940274604, - "grad_norm": 0.7001529932022095, - "learning_rate": 1.2509371791430135e-06, - "loss": 0.1848, - "step": 17882 - }, - { - "epoch": 1.6846517981206284, - "grad_norm": 0.7101517915725708, - "learning_rate": 1.2502059888074191e-06, - "loss": 0.1973, - "step": 17883 - }, - { - "epoch": 1.6847460022137963, - "grad_norm": 0.6809945702552795, - "learning_rate": 1.2494749979806608e-06, - "loss": 0.2033, - "step": 17884 - }, - { - "epoch": 1.684840206306964, - "grad_norm": 0.6272152066230774, - "learning_rate": 1.2487442066794065e-06, - "loss": 0.1805, - "step": 17885 - }, - { - "epoch": 1.6849344104001318, - "grad_norm": 0.6663166284561157, - "learning_rate": 1.2480136149203138e-06, - "loss": 0.2151, - "step": 17886 - }, - { - "epoch": 1.6850286144932998, - "grad_norm": 0.6448500752449036, - "learning_rate": 1.2472832227200505e-06, - "loss": 0.1785, - "step": 17887 - }, - { - "epoch": 1.6851228185864677, - "grad_norm": 0.6031123995780945, - "learning_rate": 1.2465530300952655e-06, - "loss": 0.2118, - "step": 17888 - }, - { - "epoch": 1.6852170226796355, - "grad_norm": 0.6824389696121216, - "learning_rate": 1.2458230370626068e-06, - "loss": 0.2117, - "step": 17889 - }, - { - "epoch": 1.6853112267728032, - "grad_norm": 0.633580207824707, - "learning_rate": 1.2450932436387208e-06, - "loss": 0.194, - "step": 17890 - }, - { - "epoch": 1.6854054308659712, - "grad_norm": 0.702239990234375, - "learning_rate": 1.2443636498402523e-06, - "loss": 0.1787, - "step": 17891 - }, - { - "epoch": 1.6854996349591391, - "grad_norm": 0.6519957184791565, - "learning_rate": 1.24363425568383e-06, - "loss": 0.1931, - "step": 17892 - }, - { - "epoch": 1.6855938390523069, - "grad_norm": 0.6838902235031128, - "learning_rate": 1.2429050611860894e-06, - "loss": 0.2116, - "step": 17893 - }, - { - "epoch": 1.6856880431454746, - "grad_norm": 0.6391249299049377, - "learning_rate": 1.2421760663636583e-06, - "loss": 0.2014, - "step": 17894 - }, - { - "epoch": 1.6857822472386426, - "grad_norm": 0.6512613892555237, - "learning_rate": 1.2414472712331549e-06, - "loss": 0.2059, - "step": 17895 - }, - { - "epoch": 1.6858764513318105, - "grad_norm": 0.6511378288269043, - "learning_rate": 1.240718675811199e-06, - "loss": 0.2034, - "step": 17896 - }, - { - "epoch": 1.6859706554249783, - "grad_norm": 0.6198116540908813, - "learning_rate": 1.239990280114407e-06, - "loss": 0.2015, - "step": 17897 - }, - { - "epoch": 1.686064859518146, - "grad_norm": 0.6795098185539246, - "learning_rate": 1.2392620841593805e-06, - "loss": 0.2042, - "step": 17898 - }, - { - "epoch": 1.686159063611314, - "grad_norm": 0.6171090006828308, - "learning_rate": 1.2385340879627283e-06, - "loss": 0.1858, - "step": 17899 - }, - { - "epoch": 1.686253267704482, - "grad_norm": 0.6320787668228149, - "learning_rate": 1.2378062915410504e-06, - "loss": 0.1771, - "step": 17900 - }, - { - "epoch": 1.6863474717976497, - "grad_norm": 0.7158340811729431, - "learning_rate": 1.2370786949109382e-06, - "loss": 0.2007, - "step": 17901 - }, - { - "epoch": 1.6864416758908174, - "grad_norm": 0.6730626225471497, - "learning_rate": 1.2363512980889836e-06, - "loss": 0.2055, - "step": 17902 - }, - { - "epoch": 1.6865358799839854, - "grad_norm": 0.6145924925804138, - "learning_rate": 1.2356241010917747e-06, - "loss": 0.188, - "step": 17903 - }, - { - "epoch": 1.6866300840771533, - "grad_norm": 0.6048918962478638, - "learning_rate": 1.2348971039358914e-06, - "loss": 0.1969, - "step": 17904 - }, - { - "epoch": 1.686724288170321, - "grad_norm": 0.6175239086151123, - "learning_rate": 1.2341703066379073e-06, - "loss": 0.1836, - "step": 17905 - }, - { - "epoch": 1.6868184922634888, - "grad_norm": 0.6972577571868896, - "learning_rate": 1.2334437092143959e-06, - "loss": 0.1855, - "step": 17906 - }, - { - "epoch": 1.6869126963566567, - "grad_norm": 0.7666021585464478, - "learning_rate": 1.2327173116819292e-06, - "loss": 0.2043, - "step": 17907 - }, - { - "epoch": 1.6870069004498247, - "grad_norm": 0.6751794219017029, - "learning_rate": 1.2319911140570629e-06, - "loss": 0.1868, - "step": 17908 - }, - { - "epoch": 1.6871011045429924, - "grad_norm": 0.6828281283378601, - "learning_rate": 1.2312651163563606e-06, - "loss": 0.2155, - "step": 17909 - }, - { - "epoch": 1.6871953086361602, - "grad_norm": 0.6635129451751709, - "learning_rate": 1.2305393185963754e-06, - "loss": 0.1727, - "step": 17910 - }, - { - "epoch": 1.6872895127293281, - "grad_norm": 0.5928038358688354, - "learning_rate": 1.2298137207936555e-06, - "loss": 0.165, - "step": 17911 - }, - { - "epoch": 1.6873837168224959, - "grad_norm": 0.6852033734321594, - "learning_rate": 1.2290883229647455e-06, - "loss": 0.1853, - "step": 17912 - }, - { - "epoch": 1.6874779209156636, - "grad_norm": 0.6938362717628479, - "learning_rate": 1.2283631251261873e-06, - "loss": 0.2052, - "step": 17913 - }, - { - "epoch": 1.6875721250088316, - "grad_norm": 0.6766852140426636, - "learning_rate": 1.2276381272945149e-06, - "loss": 0.2295, - "step": 17914 - }, - { - "epoch": 1.6876663291019995, - "grad_norm": 0.6537317633628845, - "learning_rate": 1.226913329486261e-06, - "loss": 0.1989, - "step": 17915 - }, - { - "epoch": 1.6877605331951673, - "grad_norm": 0.6622162461280823, - "learning_rate": 1.2261887317179499e-06, - "loss": 0.1808, - "step": 17916 - }, - { - "epoch": 1.687854737288335, - "grad_norm": 0.558259129524231, - "learning_rate": 1.2254643340061035e-06, - "loss": 0.1624, - "step": 17917 - }, - { - "epoch": 1.687948941381503, - "grad_norm": 0.6519166827201843, - "learning_rate": 1.2247401363672428e-06, - "loss": 0.1964, - "step": 17918 - }, - { - "epoch": 1.688043145474671, - "grad_norm": 0.6809887886047363, - "learning_rate": 1.224016138817876e-06, - "loss": 0.2071, - "step": 17919 - }, - { - "epoch": 1.6881373495678387, - "grad_norm": 0.624958336353302, - "learning_rate": 1.2232923413745146e-06, - "loss": 0.1705, - "step": 17920 - }, - { - "epoch": 1.6882315536610064, - "grad_norm": 0.6159089207649231, - "learning_rate": 1.2225687440536626e-06, - "loss": 0.1744, - "step": 17921 - }, - { - "epoch": 1.6883257577541744, - "grad_norm": 0.6885070204734802, - "learning_rate": 1.2218453468718161e-06, - "loss": 0.2124, - "step": 17922 - }, - { - "epoch": 1.6884199618473423, - "grad_norm": 1.4782142639160156, - "learning_rate": 1.2211221498454706e-06, - "loss": 0.1916, - "step": 17923 - }, - { - "epoch": 1.68851416594051, - "grad_norm": 0.6769928932189941, - "learning_rate": 1.2203991529911197e-06, - "loss": 0.1957, - "step": 17924 - }, - { - "epoch": 1.6886083700336778, - "grad_norm": 0.7010208368301392, - "learning_rate": 1.2196763563252456e-06, - "loss": 0.174, - "step": 17925 - }, - { - "epoch": 1.6887025741268458, - "grad_norm": 0.6963124871253967, - "learning_rate": 1.2189537598643253e-06, - "loss": 0.1942, - "step": 17926 - }, - { - "epoch": 1.6887967782200137, - "grad_norm": 0.6661259531974792, - "learning_rate": 1.2182313636248433e-06, - "loss": 0.1786, - "step": 17927 - }, - { - "epoch": 1.6888909823131815, - "grad_norm": 0.6243340373039246, - "learning_rate": 1.2175091676232685e-06, - "loss": 0.1821, - "step": 17928 - }, - { - "epoch": 1.6889851864063492, - "grad_norm": 0.6905919313430786, - "learning_rate": 1.2167871718760638e-06, - "loss": 0.1847, - "step": 17929 - }, - { - "epoch": 1.6890793904995172, - "grad_norm": 0.6800353527069092, - "learning_rate": 1.2160653763996965e-06, - "loss": 0.2098, - "step": 17930 - }, - { - "epoch": 1.6891735945926851, - "grad_norm": 0.6478415131568909, - "learning_rate": 1.215343781210624e-06, - "loss": 0.2011, - "step": 17931 - }, - { - "epoch": 1.6892677986858529, - "grad_norm": 0.6425968408584595, - "learning_rate": 1.2146223863252982e-06, - "loss": 0.1726, - "step": 17932 - }, - { - "epoch": 1.6893620027790206, - "grad_norm": 0.6161893606185913, - "learning_rate": 1.2139011917601673e-06, - "loss": 0.19, - "step": 17933 - }, - { - "epoch": 1.6894562068721886, - "grad_norm": 0.6595200896263123, - "learning_rate": 1.2131801975316804e-06, - "loss": 0.2129, - "step": 17934 - }, - { - "epoch": 1.6895504109653565, - "grad_norm": 0.7028770446777344, - "learning_rate": 1.212459403656272e-06, - "loss": 0.1903, - "step": 17935 - }, - { - "epoch": 1.6896446150585243, - "grad_norm": 0.6268740296363831, - "learning_rate": 1.211738810150379e-06, - "loss": 0.1834, - "step": 17936 - }, - { - "epoch": 1.689738819151692, - "grad_norm": 0.6336575746536255, - "learning_rate": 1.211018417030434e-06, - "loss": 0.1636, - "step": 17937 - }, - { - "epoch": 1.68983302324486, - "grad_norm": 0.6407804489135742, - "learning_rate": 1.2102982243128603e-06, - "loss": 0.204, - "step": 17938 - }, - { - "epoch": 1.689927227338028, - "grad_norm": 0.6109455823898315, - "learning_rate": 1.2095782320140803e-06, - "loss": 0.1753, - "step": 17939 - }, - { - "epoch": 1.6900214314311957, - "grad_norm": 0.6444677114486694, - "learning_rate": 1.2088584401505133e-06, - "loss": 0.1659, - "step": 17940 - }, - { - "epoch": 1.6901156355243634, - "grad_norm": 0.7086964845657349, - "learning_rate": 1.2081388487385704e-06, - "loss": 0.1933, - "step": 17941 - }, - { - "epoch": 1.6902098396175314, - "grad_norm": 0.6854089498519897, - "learning_rate": 1.2074194577946529e-06, - "loss": 0.2159, - "step": 17942 - }, - { - "epoch": 1.6903040437106993, - "grad_norm": 0.6481835842132568, - "learning_rate": 1.2067002673351758e-06, - "loss": 0.191, - "step": 17943 - }, - { - "epoch": 1.690398247803867, - "grad_norm": 0.6236292123794556, - "learning_rate": 1.20598127737653e-06, - "loss": 0.1907, - "step": 17944 - }, - { - "epoch": 1.6904924518970348, - "grad_norm": 0.6148814558982849, - "learning_rate": 1.2052624879351105e-06, - "loss": 0.2072, - "step": 17945 - }, - { - "epoch": 1.6905866559902027, - "grad_norm": 0.6329357624053955, - "learning_rate": 1.2045438990273062e-06, - "loss": 0.1835, - "step": 17946 - }, - { - "epoch": 1.6906808600833707, - "grad_norm": 0.6403841376304626, - "learning_rate": 1.2038255106695074e-06, - "loss": 0.1743, - "step": 17947 - }, - { - "epoch": 1.6907750641765384, - "grad_norm": 0.6254767179489136, - "learning_rate": 1.2031073228780865e-06, - "loss": 0.1779, - "step": 17948 - }, - { - "epoch": 1.6908692682697062, - "grad_norm": 0.5949995517730713, - "learning_rate": 1.202389335669425e-06, - "loss": 0.1763, - "step": 17949 - }, - { - "epoch": 1.6909634723628741, - "grad_norm": 0.709050714969635, - "learning_rate": 1.201671549059893e-06, - "loss": 0.1913, - "step": 17950 - }, - { - "epoch": 1.691057676456042, - "grad_norm": 0.6388276815414429, - "learning_rate": 1.2009539630658551e-06, - "loss": 0.1645, - "step": 17951 - }, - { - "epoch": 1.6911518805492098, - "grad_norm": 0.623392641544342, - "learning_rate": 1.2002365777036751e-06, - "loss": 0.1997, - "step": 17952 - }, - { - "epoch": 1.6912460846423776, - "grad_norm": 0.704318642616272, - "learning_rate": 1.199519392989712e-06, - "loss": 0.1975, - "step": 17953 - }, - { - "epoch": 1.6913402887355455, - "grad_norm": 0.6862435340881348, - "learning_rate": 1.198802408940315e-06, - "loss": 0.2066, - "step": 17954 - }, - { - "epoch": 1.6914344928287135, - "grad_norm": 0.5987230539321899, - "learning_rate": 1.1980856255718365e-06, - "loss": 0.1924, - "step": 17955 - }, - { - "epoch": 1.6915286969218812, - "grad_norm": 0.6653030514717102, - "learning_rate": 1.1973690429006157e-06, - "loss": 0.1938, - "step": 17956 - }, - { - "epoch": 1.691622901015049, - "grad_norm": 0.7109711766242981, - "learning_rate": 1.196652660942994e-06, - "loss": 0.2102, - "step": 17957 - }, - { - "epoch": 1.691717105108217, - "grad_norm": 0.6443082690238953, - "learning_rate": 1.1959364797153095e-06, - "loss": 0.1971, - "step": 17958 - }, - { - "epoch": 1.691811309201385, - "grad_norm": 0.6614705920219421, - "learning_rate": 1.1952204992338856e-06, - "loss": 0.2021, - "step": 17959 - }, - { - "epoch": 1.6919055132945526, - "grad_norm": 0.6767356991767883, - "learning_rate": 1.1945047195150528e-06, - "loss": 0.1916, - "step": 17960 - }, - { - "epoch": 1.6919997173877204, - "grad_norm": 0.6969180107116699, - "learning_rate": 1.1937891405751312e-06, - "loss": 0.1918, - "step": 17961 - }, - { - "epoch": 1.6920939214808883, - "grad_norm": 1.0817089080810547, - "learning_rate": 1.1930737624304346e-06, - "loss": 0.2153, - "step": 17962 - }, - { - "epoch": 1.6921881255740563, - "grad_norm": 0.6809571385383606, - "learning_rate": 1.1923585850972762e-06, - "loss": 0.1885, - "step": 17963 - }, - { - "epoch": 1.692282329667224, - "grad_norm": 1.001025915145874, - "learning_rate": 1.1916436085919648e-06, - "loss": 0.2007, - "step": 17964 - }, - { - "epoch": 1.6923765337603918, - "grad_norm": 0.8061049580574036, - "learning_rate": 1.1909288329308023e-06, - "loss": 0.205, - "step": 17965 - }, - { - "epoch": 1.6924707378535597, - "grad_norm": 0.6809053421020508, - "learning_rate": 1.1902142581300836e-06, - "loss": 0.1944, - "step": 17966 - }, - { - "epoch": 1.6925649419467277, - "grad_norm": 0.7150509357452393, - "learning_rate": 1.1894998842061045e-06, - "loss": 0.1893, - "step": 17967 - }, - { - "epoch": 1.6926591460398954, - "grad_norm": 0.70624178647995, - "learning_rate": 1.1887857111751566e-06, - "loss": 0.2108, - "step": 17968 - }, - { - "epoch": 1.6927533501330632, - "grad_norm": 0.6196686029434204, - "learning_rate": 1.1880717390535178e-06, - "loss": 0.214, - "step": 17969 - }, - { - "epoch": 1.6928475542262311, - "grad_norm": 0.7185632586479187, - "learning_rate": 1.187357967857472e-06, - "loss": 0.1987, - "step": 17970 - }, - { - "epoch": 1.692941758319399, - "grad_norm": 0.5882018804550171, - "learning_rate": 1.186644397603296e-06, - "loss": 0.1782, - "step": 17971 - }, - { - "epoch": 1.6930359624125668, - "grad_norm": 0.720920741558075, - "learning_rate": 1.1859310283072545e-06, - "loss": 0.2091, - "step": 17972 - }, - { - "epoch": 1.6931301665057346, - "grad_norm": 0.6742173433303833, - "learning_rate": 1.185217859985618e-06, - "loss": 0.1777, - "step": 17973 - }, - { - "epoch": 1.6932243705989025, - "grad_norm": 0.7380731105804443, - "learning_rate": 1.1845048926546487e-06, - "loss": 0.2349, - "step": 17974 - }, - { - "epoch": 1.6933185746920705, - "grad_norm": 0.7536550760269165, - "learning_rate": 1.1837921263305985e-06, - "loss": 0.1984, - "step": 17975 - }, - { - "epoch": 1.6934127787852382, - "grad_norm": 0.6372663378715515, - "learning_rate": 1.183079561029723e-06, - "loss": 0.1832, - "step": 17976 - }, - { - "epoch": 1.693506982878406, - "grad_norm": 0.6015812158584595, - "learning_rate": 1.1823671967682704e-06, - "loss": 0.144, - "step": 17977 - }, - { - "epoch": 1.693601186971574, - "grad_norm": 0.6695241332054138, - "learning_rate": 1.181655033562481e-06, - "loss": 0.192, - "step": 17978 - }, - { - "epoch": 1.6936953910647419, - "grad_norm": 0.6968777775764465, - "learning_rate": 1.180943071428594e-06, - "loss": 0.1887, - "step": 17979 - }, - { - "epoch": 1.6937895951579096, - "grad_norm": 0.6087028980255127, - "learning_rate": 1.1802313103828466e-06, - "loss": 0.177, - "step": 17980 - }, - { - "epoch": 1.6938837992510773, - "grad_norm": 0.6628785133361816, - "learning_rate": 1.1795197504414657e-06, - "loss": 0.2069, - "step": 17981 - }, - { - "epoch": 1.6939780033442453, - "grad_norm": 0.6341779828071594, - "learning_rate": 1.1788083916206705e-06, - "loss": 0.1866, - "step": 17982 - }, - { - "epoch": 1.6940722074374133, - "grad_norm": 0.7151958346366882, - "learning_rate": 1.1780972339366914e-06, - "loss": 0.2168, - "step": 17983 - }, - { - "epoch": 1.694166411530581, - "grad_norm": 0.7450733780860901, - "learning_rate": 1.1773862774057377e-06, - "loss": 0.2009, - "step": 17984 - }, - { - "epoch": 1.6942606156237487, - "grad_norm": 0.6015254259109497, - "learning_rate": 1.1766755220440195e-06, - "loss": 0.1629, - "step": 17985 - }, - { - "epoch": 1.6943548197169167, - "grad_norm": 0.7582864761352539, - "learning_rate": 1.1759649678677454e-06, - "loss": 0.2035, - "step": 17986 - }, - { - "epoch": 1.6944490238100847, - "grad_norm": 0.7008610963821411, - "learning_rate": 1.1752546148931188e-06, - "loss": 0.1909, - "step": 17987 - }, - { - "epoch": 1.6945432279032524, - "grad_norm": 0.6471156477928162, - "learning_rate": 1.1745444631363312e-06, - "loss": 0.1963, - "step": 17988 - }, - { - "epoch": 1.6946374319964201, - "grad_norm": 0.6539919972419739, - "learning_rate": 1.1738345126135798e-06, - "loss": 0.2085, - "step": 17989 - }, - { - "epoch": 1.694731636089588, - "grad_norm": 0.6169882416725159, - "learning_rate": 1.1731247633410536e-06, - "loss": 0.1976, - "step": 17990 - }, - { - "epoch": 1.694825840182756, - "grad_norm": 0.808809220790863, - "learning_rate": 1.172415215334932e-06, - "loss": 0.1951, - "step": 17991 - }, - { - "epoch": 1.6949200442759238, - "grad_norm": 0.6773445010185242, - "learning_rate": 1.1717058686113948e-06, - "loss": 0.194, - "step": 17992 - }, - { - "epoch": 1.6950142483690915, - "grad_norm": 0.7250120043754578, - "learning_rate": 1.1709967231866204e-06, - "loss": 0.1944, - "step": 17993 - }, - { - "epoch": 1.6951084524622595, - "grad_norm": 0.6955695152282715, - "learning_rate": 1.170287779076772e-06, - "loss": 0.2072, - "step": 17994 - }, - { - "epoch": 1.6952026565554275, - "grad_norm": 0.7111067771911621, - "learning_rate": 1.1695790362980198e-06, - "loss": 0.1859, - "step": 17995 - }, - { - "epoch": 1.6952968606485952, - "grad_norm": 0.7299478650093079, - "learning_rate": 1.1688704948665196e-06, - "loss": 0.198, - "step": 17996 - }, - { - "epoch": 1.695391064741763, - "grad_norm": 0.578731119632721, - "learning_rate": 1.16816215479843e-06, - "loss": 0.1586, - "step": 17997 - }, - { - "epoch": 1.695485268834931, - "grad_norm": 0.6320825219154358, - "learning_rate": 1.1674540161099045e-06, - "loss": 0.1719, - "step": 17998 - }, - { - "epoch": 1.6955794729280989, - "grad_norm": 0.6582252383232117, - "learning_rate": 1.1667460788170849e-06, - "loss": 0.2082, - "step": 17999 - }, - { - "epoch": 1.6956736770212666, - "grad_norm": 0.6311085820198059, - "learning_rate": 1.1660383429361155e-06, - "loss": 0.1753, - "step": 18000 - }, - { - "epoch": 1.6957678811144343, - "grad_norm": 0.7495169043540955, - "learning_rate": 1.165330808483136e-06, - "loss": 0.2064, - "step": 18001 - }, - { - "epoch": 1.6958620852076023, - "grad_norm": 0.6125212907791138, - "learning_rate": 1.1646234754742747e-06, - "loss": 0.1708, - "step": 18002 - }, - { - "epoch": 1.6959562893007702, - "grad_norm": 0.651889979839325, - "learning_rate": 1.1639163439256629e-06, - "loss": 0.1872, - "step": 18003 - }, - { - "epoch": 1.696050493393938, - "grad_norm": 0.7018201947212219, - "learning_rate": 1.1632094138534256e-06, - "loss": 0.1775, - "step": 18004 - }, - { - "epoch": 1.6961446974871057, - "grad_norm": 0.6348908543586731, - "learning_rate": 1.1625026852736797e-06, - "loss": 0.1991, - "step": 18005 - }, - { - "epoch": 1.6962389015802737, - "grad_norm": 0.5990427732467651, - "learning_rate": 1.1617961582025384e-06, - "loss": 0.1693, - "step": 18006 - }, - { - "epoch": 1.6963331056734416, - "grad_norm": 0.6148661375045776, - "learning_rate": 1.1610898326561127e-06, - "loss": 0.1561, - "step": 18007 - }, - { - "epoch": 1.6964273097666094, - "grad_norm": 0.7107434868812561, - "learning_rate": 1.160383708650511e-06, - "loss": 0.2055, - "step": 18008 - }, - { - "epoch": 1.6965215138597771, - "grad_norm": 0.8598737120628357, - "learning_rate": 1.1596777862018293e-06, - "loss": 0.2075, - "step": 18009 - }, - { - "epoch": 1.696615717952945, - "grad_norm": 0.6815014481544495, - "learning_rate": 1.158972065326166e-06, - "loss": 0.1966, - "step": 18010 - }, - { - "epoch": 1.696709922046113, - "grad_norm": 0.6880999207496643, - "learning_rate": 1.1582665460396146e-06, - "loss": 0.198, - "step": 18011 - }, - { - "epoch": 1.6968041261392808, - "grad_norm": 0.721699595451355, - "learning_rate": 1.1575612283582571e-06, - "loss": 0.1838, - "step": 18012 - }, - { - "epoch": 1.6968983302324485, - "grad_norm": 0.6525470018386841, - "learning_rate": 1.1568561122981792e-06, - "loss": 0.207, - "step": 18013 - }, - { - "epoch": 1.6969925343256165, - "grad_norm": 0.6364315152168274, - "learning_rate": 1.1561511978754604e-06, - "loss": 0.1903, - "step": 18014 - }, - { - "epoch": 1.6970867384187844, - "grad_norm": 0.631808876991272, - "learning_rate": 1.1554464851061698e-06, - "loss": 0.1961, - "step": 18015 - }, - { - "epoch": 1.6971809425119522, - "grad_norm": 0.646543025970459, - "learning_rate": 1.154741974006377e-06, - "loss": 0.1896, - "step": 18016 - }, - { - "epoch": 1.69727514660512, - "grad_norm": 0.7164186835289001, - "learning_rate": 1.1540376645921492e-06, - "loss": 0.2018, - "step": 18017 - }, - { - "epoch": 1.6973693506982879, - "grad_norm": 0.6542921662330627, - "learning_rate": 1.1533335568795412e-06, - "loss": 0.199, - "step": 18018 - }, - { - "epoch": 1.6974635547914558, - "grad_norm": 0.6426371335983276, - "learning_rate": 1.1526296508846092e-06, - "loss": 0.2056, - "step": 18019 - }, - { - "epoch": 1.6975577588846236, - "grad_norm": 0.6970218420028687, - "learning_rate": 1.1519259466234068e-06, - "loss": 0.2002, - "step": 18020 - }, - { - "epoch": 1.6976519629777913, - "grad_norm": 0.6598572134971619, - "learning_rate": 1.1512224441119756e-06, - "loss": 0.2262, - "step": 18021 - }, - { - "epoch": 1.6977461670709593, - "grad_norm": 0.7594718933105469, - "learning_rate": 1.1505191433663544e-06, - "loss": 0.2121, - "step": 18022 - }, - { - "epoch": 1.6978403711641272, - "grad_norm": 0.6339885592460632, - "learning_rate": 1.1498160444025862e-06, - "loss": 0.2117, - "step": 18023 - }, - { - "epoch": 1.697934575257295, - "grad_norm": 0.8442044854164124, - "learning_rate": 1.149113147236699e-06, - "loss": 0.1847, - "step": 18024 - }, - { - "epoch": 1.6980287793504627, - "grad_norm": 0.7565668225288391, - "learning_rate": 1.1484104518847193e-06, - "loss": 0.2016, - "step": 18025 - }, - { - "epoch": 1.6981229834436307, - "grad_norm": 0.7381719350814819, - "learning_rate": 1.1477079583626693e-06, - "loss": 0.2391, - "step": 18026 - }, - { - "epoch": 1.6982171875367986, - "grad_norm": 0.638436496257782, - "learning_rate": 1.1470056666865714e-06, - "loss": 0.183, - "step": 18027 - }, - { - "epoch": 1.6983113916299664, - "grad_norm": 0.6921818852424622, - "learning_rate": 1.146303576872433e-06, - "loss": 0.1773, - "step": 18028 - }, - { - "epoch": 1.698405595723134, - "grad_norm": 0.6457545757293701, - "learning_rate": 1.1456016889362654e-06, - "loss": 0.2013, - "step": 18029 - }, - { - "epoch": 1.698499799816302, - "grad_norm": 0.6686806082725525, - "learning_rate": 1.1449000028940749e-06, - "loss": 0.2066, - "step": 18030 - }, - { - "epoch": 1.69859400390947, - "grad_norm": 0.721925675868988, - "learning_rate": 1.1441985187618576e-06, - "loss": 0.1792, - "step": 18031 - }, - { - "epoch": 1.6986882080026378, - "grad_norm": 0.6582679748535156, - "learning_rate": 1.1434972365556086e-06, - "loss": 0.2222, - "step": 18032 - }, - { - "epoch": 1.6987824120958055, - "grad_norm": 0.6933612823486328, - "learning_rate": 1.1427961562913225e-06, - "loss": 0.2038, - "step": 18033 - }, - { - "epoch": 1.6988766161889735, - "grad_norm": 0.6419912576675415, - "learning_rate": 1.1420952779849803e-06, - "loss": 0.1892, - "step": 18034 - }, - { - "epoch": 1.6989708202821414, - "grad_norm": 0.6878123879432678, - "learning_rate": 1.1413946016525656e-06, - "loss": 0.1916, - "step": 18035 - }, - { - "epoch": 1.6990650243753092, - "grad_norm": 0.6557255387306213, - "learning_rate": 1.1406941273100525e-06, - "loss": 0.1866, - "step": 18036 - }, - { - "epoch": 1.699159228468477, - "grad_norm": 0.7365891337394714, - "learning_rate": 1.1399938549734145e-06, - "loss": 0.193, - "step": 18037 - }, - { - "epoch": 1.6992534325616448, - "grad_norm": 0.6552425622940063, - "learning_rate": 1.1392937846586216e-06, - "loss": 0.1859, - "step": 18038 - }, - { - "epoch": 1.6993476366548128, - "grad_norm": 0.8205013275146484, - "learning_rate": 1.1385939163816306e-06, - "loss": 0.2262, - "step": 18039 - }, - { - "epoch": 1.6994418407479805, - "grad_norm": 0.6691708564758301, - "learning_rate": 1.1378942501584035e-06, - "loss": 0.2103, - "step": 18040 - }, - { - "epoch": 1.6995360448411483, - "grad_norm": 0.6229633092880249, - "learning_rate": 1.1371947860048948e-06, - "loss": 0.1636, - "step": 18041 - }, - { - "epoch": 1.6996302489343162, - "grad_norm": 0.6062520742416382, - "learning_rate": 1.1364955239370501e-06, - "loss": 0.197, - "step": 18042 - }, - { - "epoch": 1.6997244530274842, - "grad_norm": 0.8412054777145386, - "learning_rate": 1.135796463970814e-06, - "loss": 0.1724, - "step": 18043 - }, - { - "epoch": 1.699818657120652, - "grad_norm": 0.7708158493041992, - "learning_rate": 1.1350976061221309e-06, - "loss": 0.2009, - "step": 18044 - }, - { - "epoch": 1.6999128612138197, - "grad_norm": 0.7285130620002747, - "learning_rate": 1.1343989504069308e-06, - "loss": 0.215, - "step": 18045 - }, - { - "epoch": 1.7000070653069876, - "grad_norm": 0.6278975605964661, - "learning_rate": 1.1337004968411437e-06, - "loss": 0.1816, - "step": 18046 - }, - { - "epoch": 1.7001012694001556, - "grad_norm": 0.619029700756073, - "learning_rate": 1.1330022454406975e-06, - "loss": 0.1763, - "step": 18047 - }, - { - "epoch": 1.7001954734933233, - "grad_norm": 0.8753061890602112, - "learning_rate": 1.1323041962215153e-06, - "loss": 0.2082, - "step": 18048 - }, - { - "epoch": 1.700289677586491, - "grad_norm": 0.6890900731086731, - "learning_rate": 1.1316063491995099e-06, - "loss": 0.207, - "step": 18049 - }, - { - "epoch": 1.700383881679659, - "grad_norm": 0.6700932383537292, - "learning_rate": 1.1309087043905943e-06, - "loss": 0.1982, - "step": 18050 - }, - { - "epoch": 1.7004780857728268, - "grad_norm": 0.6805413961410522, - "learning_rate": 1.1302112618106786e-06, - "loss": 0.2113, - "step": 18051 - }, - { - "epoch": 1.7005722898659945, - "grad_norm": 0.5929723978042603, - "learning_rate": 1.1295140214756616e-06, - "loss": 0.1503, - "step": 18052 - }, - { - "epoch": 1.7006664939591625, - "grad_norm": 0.6638513803482056, - "learning_rate": 1.128816983401444e-06, - "loss": 0.1946, - "step": 18053 - }, - { - "epoch": 1.7007606980523304, - "grad_norm": 0.6715105772018433, - "learning_rate": 1.1281201476039205e-06, - "loss": 0.1761, - "step": 18054 - }, - { - "epoch": 1.7008549021454982, - "grad_norm": 0.6856271028518677, - "learning_rate": 1.1274235140989753e-06, - "loss": 0.2068, - "step": 18055 - }, - { - "epoch": 1.700949106238666, - "grad_norm": 0.6468895673751831, - "learning_rate": 1.1267270829024968e-06, - "loss": 0.1663, - "step": 18056 - }, - { - "epoch": 1.7010433103318339, - "grad_norm": 0.7396970987319946, - "learning_rate": 1.1260308540303655e-06, - "loss": 0.1849, - "step": 18057 - }, - { - "epoch": 1.7011375144250018, - "grad_norm": 0.6991780996322632, - "learning_rate": 1.1253348274984544e-06, - "loss": 0.1959, - "step": 18058 - }, - { - "epoch": 1.7012317185181696, - "grad_norm": 0.730093240737915, - "learning_rate": 1.1246390033226295e-06, - "loss": 0.2105, - "step": 18059 - }, - { - "epoch": 1.7013259226113373, - "grad_norm": 0.63701331615448, - "learning_rate": 1.123943381518766e-06, - "loss": 0.1744, - "step": 18060 - }, - { - "epoch": 1.7014201267045053, - "grad_norm": 0.5293937921524048, - "learning_rate": 1.1232479621027203e-06, - "loss": 0.1643, - "step": 18061 - }, - { - "epoch": 1.7015143307976732, - "grad_norm": 0.6485114693641663, - "learning_rate": 1.122552745090345e-06, - "loss": 0.1888, - "step": 18062 - }, - { - "epoch": 1.701608534890841, - "grad_norm": 0.7461566925048828, - "learning_rate": 1.121857730497501e-06, - "loss": 0.1954, - "step": 18063 - }, - { - "epoch": 1.7017027389840087, - "grad_norm": 0.7191745638847351, - "learning_rate": 1.1211629183400307e-06, - "loss": 0.1896, - "step": 18064 - }, - { - "epoch": 1.7017969430771767, - "grad_norm": 0.7366457581520081, - "learning_rate": 1.1204683086337754e-06, - "loss": 0.2272, - "step": 18065 - }, - { - "epoch": 1.7018911471703446, - "grad_norm": 0.5956347584724426, - "learning_rate": 1.1197739013945752e-06, - "loss": 0.1957, - "step": 18066 - }, - { - "epoch": 1.7019853512635124, - "grad_norm": 0.7121246457099915, - "learning_rate": 1.1190796966382644e-06, - "loss": 0.2042, - "step": 18067 - }, - { - "epoch": 1.70207955535668, - "grad_norm": 0.7454216480255127, - "learning_rate": 1.1183856943806703e-06, - "loss": 0.1915, - "step": 18068 - }, - { - "epoch": 1.702173759449848, - "grad_norm": 0.6396015882492065, - "learning_rate": 1.1176918946376182e-06, - "loss": 0.176, - "step": 18069 - }, - { - "epoch": 1.702267963543016, - "grad_norm": 0.6907809376716614, - "learning_rate": 1.1169982974249295e-06, - "loss": 0.1794, - "step": 18070 - }, - { - "epoch": 1.7023621676361838, - "grad_norm": 0.7384300231933594, - "learning_rate": 1.1163049027584149e-06, - "loss": 0.1911, - "step": 18071 - }, - { - "epoch": 1.7024563717293515, - "grad_norm": 0.6600772738456726, - "learning_rate": 1.1156117106538877e-06, - "loss": 0.173, - "step": 18072 - }, - { - "epoch": 1.7025505758225195, - "grad_norm": 0.6174034476280212, - "learning_rate": 1.114918721127155e-06, - "loss": 0.1808, - "step": 18073 - }, - { - "epoch": 1.7026447799156874, - "grad_norm": 0.6251533031463623, - "learning_rate": 1.1142259341940153e-06, - "loss": 0.185, - "step": 18074 - }, - { - "epoch": 1.7027389840088551, - "grad_norm": 0.5912071466445923, - "learning_rate": 1.1135333498702683e-06, - "loss": 0.155, - "step": 18075 - }, - { - "epoch": 1.7028331881020229, - "grad_norm": 0.6848728060722351, - "learning_rate": 1.1128409681717024e-06, - "loss": 0.1908, - "step": 18076 - }, - { - "epoch": 1.7029273921951908, - "grad_norm": 0.654954195022583, - "learning_rate": 1.1121487891141059e-06, - "loss": 0.1887, - "step": 18077 - }, - { - "epoch": 1.7030215962883588, - "grad_norm": 0.7168022394180298, - "learning_rate": 1.1114568127132641e-06, - "loss": 0.2023, - "step": 18078 - }, - { - "epoch": 1.7031158003815265, - "grad_norm": 0.6656650900840759, - "learning_rate": 1.1107650389849522e-06, - "loss": 0.2134, - "step": 18079 - }, - { - "epoch": 1.7032100044746943, - "grad_norm": 0.6357574462890625, - "learning_rate": 1.1100734679449442e-06, - "loss": 0.2161, - "step": 18080 - }, - { - "epoch": 1.7033042085678622, - "grad_norm": 1.0112943649291992, - "learning_rate": 1.1093820996090121e-06, - "loss": 0.1967, - "step": 18081 - }, - { - "epoch": 1.7033984126610302, - "grad_norm": 0.6312639117240906, - "learning_rate": 1.1086909339929165e-06, - "loss": 0.2089, - "step": 18082 - }, - { - "epoch": 1.703492616754198, - "grad_norm": 0.6553789377212524, - "learning_rate": 1.1079999711124189e-06, - "loss": 0.1786, - "step": 18083 - }, - { - "epoch": 1.7035868208473657, - "grad_norm": 0.6632435321807861, - "learning_rate": 1.1073092109832718e-06, - "loss": 0.1785, - "step": 18084 - }, - { - "epoch": 1.7036810249405336, - "grad_norm": 0.7690532207489014, - "learning_rate": 1.1066186536212308e-06, - "loss": 0.1872, - "step": 18085 - }, - { - "epoch": 1.7037752290337016, - "grad_norm": 0.7200743556022644, - "learning_rate": 1.105928299042035e-06, - "loss": 0.2289, - "step": 18086 - }, - { - "epoch": 1.7038694331268693, - "grad_norm": 0.6458424925804138, - "learning_rate": 1.1052381472614293e-06, - "loss": 0.1758, - "step": 18087 - }, - { - "epoch": 1.703963637220037, - "grad_norm": 0.7137967348098755, - "learning_rate": 1.1045481982951523e-06, - "loss": 0.2087, - "step": 18088 - }, - { - "epoch": 1.704057841313205, - "grad_norm": 0.7425330877304077, - "learning_rate": 1.1038584521589313e-06, - "loss": 0.1972, - "step": 18089 - }, - { - "epoch": 1.704152045406373, - "grad_norm": 0.5867472290992737, - "learning_rate": 1.103168908868495e-06, - "loss": 0.1698, - "step": 18090 - }, - { - "epoch": 1.7042462494995407, - "grad_norm": 0.6742032766342163, - "learning_rate": 1.1024795684395695e-06, - "loss": 0.1824, - "step": 18091 - }, - { - "epoch": 1.7043404535927085, - "grad_norm": 0.6082066893577576, - "learning_rate": 1.101790430887868e-06, - "loss": 0.1746, - "step": 18092 - }, - { - "epoch": 1.7044346576858764, - "grad_norm": 0.6216294169425964, - "learning_rate": 1.1011014962291066e-06, - "loss": 0.1856, - "step": 18093 - }, - { - "epoch": 1.7045288617790444, - "grad_norm": 0.702573299407959, - "learning_rate": 1.1004127644789952e-06, - "loss": 0.2118, - "step": 18094 - }, - { - "epoch": 1.7046230658722121, - "grad_norm": 0.6047021746635437, - "learning_rate": 1.0997242356532335e-06, - "loss": 0.1749, - "step": 18095 - }, - { - "epoch": 1.7047172699653799, - "grad_norm": 0.68147212266922, - "learning_rate": 1.099035909767524e-06, - "loss": 0.2011, - "step": 18096 - }, - { - "epoch": 1.7048114740585478, - "grad_norm": 0.5945382118225098, - "learning_rate": 1.0983477868375636e-06, - "loss": 0.1743, - "step": 18097 - }, - { - "epoch": 1.7049056781517158, - "grad_norm": 0.6400014758110046, - "learning_rate": 1.0976598668790406e-06, - "loss": 0.2174, - "step": 18098 - }, - { - "epoch": 1.7049998822448835, - "grad_norm": 0.7022170424461365, - "learning_rate": 1.0969721499076357e-06, - "loss": 0.2149, - "step": 18099 - }, - { - "epoch": 1.7050940863380513, - "grad_norm": 0.6862964630126953, - "learning_rate": 1.09628463593904e-06, - "loss": 0.1827, - "step": 18100 - }, - { - "epoch": 1.7051882904312192, - "grad_norm": 1.012186884880066, - "learning_rate": 1.095597324988923e-06, - "loss": 0.1928, - "step": 18101 - }, - { - "epoch": 1.7052824945243872, - "grad_norm": 0.6898453235626221, - "learning_rate": 1.094910217072954e-06, - "loss": 0.1603, - "step": 18102 - }, - { - "epoch": 1.705376698617555, - "grad_norm": 0.6907361745834351, - "learning_rate": 1.0942233122068091e-06, - "loss": 0.1886, - "step": 18103 - }, - { - "epoch": 1.7054709027107227, - "grad_norm": 0.5983191728591919, - "learning_rate": 1.0935366104061451e-06, - "loss": 0.1772, - "step": 18104 - }, - { - "epoch": 1.7055651068038906, - "grad_norm": 0.7511815428733826, - "learning_rate": 1.0928501116866175e-06, - "loss": 0.2346, - "step": 18105 - }, - { - "epoch": 1.7056593108970586, - "grad_norm": 0.7115627527236938, - "learning_rate": 1.0921638160638836e-06, - "loss": 0.2029, - "step": 18106 - }, - { - "epoch": 1.7057535149902263, - "grad_norm": 0.7544174194335938, - "learning_rate": 1.0914777235535934e-06, - "loss": 0.2098, - "step": 18107 - }, - { - "epoch": 1.705847719083394, - "grad_norm": 0.616888701915741, - "learning_rate": 1.0907918341713853e-06, - "loss": 0.2089, - "step": 18108 - }, - { - "epoch": 1.705941923176562, - "grad_norm": 0.6795505881309509, - "learning_rate": 1.0901061479329023e-06, - "loss": 0.2163, - "step": 18109 - }, - { - "epoch": 1.70603612726973, - "grad_norm": 0.698206901550293, - "learning_rate": 1.089420664853781e-06, - "loss": 0.1752, - "step": 18110 - }, - { - "epoch": 1.7061303313628977, - "grad_norm": 0.6527726054191589, - "learning_rate": 1.0887353849496462e-06, - "loss": 0.1839, - "step": 18111 - }, - { - "epoch": 1.7062245354560654, - "grad_norm": 0.6876722574234009, - "learning_rate": 1.088050308236126e-06, - "loss": 0.2122, - "step": 18112 - }, - { - "epoch": 1.7063187395492334, - "grad_norm": 0.6527280211448669, - "learning_rate": 1.0873654347288443e-06, - "loss": 0.1718, - "step": 18113 - }, - { - "epoch": 1.7064129436424014, - "grad_norm": 0.6006307005882263, - "learning_rate": 1.0866807644434118e-06, - "loss": 0.1745, - "step": 18114 - }, - { - "epoch": 1.706507147735569, - "grad_norm": 0.7178559303283691, - "learning_rate": 1.0859962973954442e-06, - "loss": 0.2063, - "step": 18115 - }, - { - "epoch": 1.7066013518287368, - "grad_norm": 0.6288301348686218, - "learning_rate": 1.0853120336005452e-06, - "loss": 0.2015, - "step": 18116 - }, - { - "epoch": 1.7066955559219048, - "grad_norm": 0.6841304302215576, - "learning_rate": 1.0846279730743192e-06, - "loss": 0.1845, - "step": 18117 - }, - { - "epoch": 1.7067897600150728, - "grad_norm": 0.6126241087913513, - "learning_rate": 1.0839441158323648e-06, - "loss": 0.1839, - "step": 18118 - }, - { - "epoch": 1.7068839641082405, - "grad_norm": 0.687613844871521, - "learning_rate": 1.083260461890271e-06, - "loss": 0.2175, - "step": 18119 - }, - { - "epoch": 1.7069781682014082, - "grad_norm": 0.6671021580696106, - "learning_rate": 1.0825770112636292e-06, - "loss": 0.1971, - "step": 18120 - }, - { - "epoch": 1.7070723722945762, - "grad_norm": 0.6684998273849487, - "learning_rate": 1.081893763968025e-06, - "loss": 0.1991, - "step": 18121 - }, - { - "epoch": 1.7071665763877442, - "grad_norm": 0.6642913222312927, - "learning_rate": 1.0812107200190325e-06, - "loss": 0.1927, - "step": 18122 - }, - { - "epoch": 1.707260780480912, - "grad_norm": 0.6435067057609558, - "learning_rate": 1.0805278794322304e-06, - "loss": 0.1944, - "step": 18123 - }, - { - "epoch": 1.7073549845740796, - "grad_norm": 0.6502464413642883, - "learning_rate": 1.0798452422231864e-06, - "loss": 0.1953, - "step": 18124 - }, - { - "epoch": 1.7074491886672476, - "grad_norm": 0.6137212514877319, - "learning_rate": 1.0791628084074668e-06, - "loss": 0.1827, - "step": 18125 - }, - { - "epoch": 1.7075433927604156, - "grad_norm": 0.6438696980476379, - "learning_rate": 1.0784805780006302e-06, - "loss": 0.1859, - "step": 18126 - }, - { - "epoch": 1.7076375968535833, - "grad_norm": 0.6703856587409973, - "learning_rate": 1.0777985510182332e-06, - "loss": 0.1902, - "step": 18127 - }, - { - "epoch": 1.707731800946751, - "grad_norm": 0.6582531332969666, - "learning_rate": 1.07711672747583e-06, - "loss": 0.1943, - "step": 18128 - }, - { - "epoch": 1.707826005039919, - "grad_norm": 0.851245641708374, - "learning_rate": 1.0764351073889624e-06, - "loss": 0.1973, - "step": 18129 - }, - { - "epoch": 1.707920209133087, - "grad_norm": 0.8826058506965637, - "learning_rate": 1.0757536907731758e-06, - "loss": 0.2247, - "step": 18130 - }, - { - "epoch": 1.7080144132262547, - "grad_norm": 0.6787847876548767, - "learning_rate": 1.075072477644008e-06, - "loss": 0.231, - "step": 18131 - }, - { - "epoch": 1.7081086173194224, - "grad_norm": 0.6661331057548523, - "learning_rate": 1.0743914680169887e-06, - "loss": 0.2095, - "step": 18132 - }, - { - "epoch": 1.7082028214125904, - "grad_norm": 0.6378578543663025, - "learning_rate": 1.0737106619076477e-06, - "loss": 0.1976, - "step": 18133 - }, - { - "epoch": 1.7082970255057583, - "grad_norm": 0.71204674243927, - "learning_rate": 1.07303005933151e-06, - "loss": 0.2129, - "step": 18134 - }, - { - "epoch": 1.708391229598926, - "grad_norm": 0.5995174050331116, - "learning_rate": 1.072349660304093e-06, - "loss": 0.1967, - "step": 18135 - }, - { - "epoch": 1.7084854336920938, - "grad_norm": 0.6255856156349182, - "learning_rate": 1.0716694648409066e-06, - "loss": 0.1882, - "step": 18136 - }, - { - "epoch": 1.7085796377852618, - "grad_norm": 0.6656293869018555, - "learning_rate": 1.0709894729574688e-06, - "loss": 0.2014, - "step": 18137 - }, - { - "epoch": 1.7086738418784297, - "grad_norm": 0.6369428038597107, - "learning_rate": 1.0703096846692796e-06, - "loss": 0.1806, - "step": 18138 - }, - { - "epoch": 1.7087680459715975, - "grad_norm": 0.6439520120620728, - "learning_rate": 1.0696300999918353e-06, - "loss": 0.1806, - "step": 18139 - }, - { - "epoch": 1.7088622500647652, - "grad_norm": 0.6167713403701782, - "learning_rate": 1.06895071894064e-06, - "loss": 0.1976, - "step": 18140 - }, - { - "epoch": 1.7089564541579332, - "grad_norm": 1.0032925605773926, - "learning_rate": 1.0682715415311807e-06, - "loss": 0.1637, - "step": 18141 - }, - { - "epoch": 1.7090506582511011, - "grad_norm": 0.6650711894035339, - "learning_rate": 1.067592567778939e-06, - "loss": 0.1915, - "step": 18142 - }, - { - "epoch": 1.7091448623442689, - "grad_norm": 0.7527432441711426, - "learning_rate": 1.066913797699406e-06, - "loss": 0.1813, - "step": 18143 - }, - { - "epoch": 1.7092390664374366, - "grad_norm": 0.8132700324058533, - "learning_rate": 1.0662352313080526e-06, - "loss": 0.1962, - "step": 18144 - }, - { - "epoch": 1.7093332705306046, - "grad_norm": 0.6715097427368164, - "learning_rate": 1.065556868620351e-06, - "loss": 0.2228, - "step": 18145 - }, - { - "epoch": 1.7094274746237725, - "grad_norm": 0.6167192459106445, - "learning_rate": 1.06487870965177e-06, - "loss": 0.1796, - "step": 18146 - }, - { - "epoch": 1.7095216787169403, - "grad_norm": 0.6042233109474182, - "learning_rate": 1.0642007544177745e-06, - "loss": 0.1978, - "step": 18147 - }, - { - "epoch": 1.709615882810108, - "grad_norm": 0.6343446969985962, - "learning_rate": 1.0635230029338196e-06, - "loss": 0.1799, - "step": 18148 - }, - { - "epoch": 1.709710086903276, - "grad_norm": 0.7236276865005493, - "learning_rate": 1.06284545521536e-06, - "loss": 0.2177, - "step": 18149 - }, - { - "epoch": 1.709804290996444, - "grad_norm": 0.5993223190307617, - "learning_rate": 1.0621681112778482e-06, - "loss": 0.1743, - "step": 18150 - }, - { - "epoch": 1.7098984950896117, - "grad_norm": 0.6236757040023804, - "learning_rate": 1.061490971136724e-06, - "loss": 0.1815, - "step": 18151 - }, - { - "epoch": 1.7099926991827794, - "grad_norm": 0.6627745628356934, - "learning_rate": 1.0608140348074292e-06, - "loss": 0.2088, - "step": 18152 - }, - { - "epoch": 1.7100869032759474, - "grad_norm": 0.652539074420929, - "learning_rate": 1.0601373023054018e-06, - "loss": 0.167, - "step": 18153 - }, - { - "epoch": 1.7101811073691153, - "grad_norm": 0.628743588924408, - "learning_rate": 1.059460773646067e-06, - "loss": 0.1709, - "step": 18154 - }, - { - "epoch": 1.710275311462283, - "grad_norm": 0.6140427589416504, - "learning_rate": 1.058784448844855e-06, - "loss": 0.179, - "step": 18155 - }, - { - "epoch": 1.7103695155554508, - "grad_norm": 2.1912851333618164, - "learning_rate": 1.0581083279171843e-06, - "loss": 0.1854, - "step": 18156 - }, - { - "epoch": 1.7104637196486188, - "grad_norm": 0.6636489629745483, - "learning_rate": 1.0574324108784728e-06, - "loss": 0.1941, - "step": 18157 - }, - { - "epoch": 1.7105579237417867, - "grad_norm": 0.7394426465034485, - "learning_rate": 1.0567566977441334e-06, - "loss": 0.2125, - "step": 18158 - }, - { - "epoch": 1.7106521278349545, - "grad_norm": 0.6625232696533203, - "learning_rate": 1.0560811885295708e-06, - "loss": 0.201, - "step": 18159 - }, - { - "epoch": 1.7107463319281222, - "grad_norm": 0.6909732222557068, - "learning_rate": 1.055405883250189e-06, - "loss": 0.1804, - "step": 18160 - }, - { - "epoch": 1.7108405360212902, - "grad_norm": 0.6699437499046326, - "learning_rate": 1.054730781921388e-06, - "loss": 0.2196, - "step": 18161 - }, - { - "epoch": 1.7109347401144581, - "grad_norm": 0.6777843832969666, - "learning_rate": 1.054055884558558e-06, - "loss": 0.194, - "step": 18162 - }, - { - "epoch": 1.7110289442076259, - "grad_norm": 0.6522235870361328, - "learning_rate": 1.0533811911770896e-06, - "loss": 0.1768, - "step": 18163 - }, - { - "epoch": 1.7111231483007936, - "grad_norm": 0.659884512424469, - "learning_rate": 1.0527067017923654e-06, - "loss": 0.188, - "step": 18164 - }, - { - "epoch": 1.7112173523939616, - "grad_norm": 0.6575126647949219, - "learning_rate": 1.0520324164197683e-06, - "loss": 0.1951, - "step": 18165 - }, - { - "epoch": 1.7113115564871295, - "grad_norm": 0.6973704099655151, - "learning_rate": 1.051358335074667e-06, - "loss": 0.1987, - "step": 18166 - }, - { - "epoch": 1.7114057605802973, - "grad_norm": 0.6405927538871765, - "learning_rate": 1.0506844577724352e-06, - "loss": 0.1963, - "step": 18167 - }, - { - "epoch": 1.711499964673465, - "grad_norm": 0.6451680064201355, - "learning_rate": 1.0500107845284402e-06, - "loss": 0.1817, - "step": 18168 - }, - { - "epoch": 1.711594168766633, - "grad_norm": 0.5515987277030945, - "learning_rate": 1.049337315358039e-06, - "loss": 0.1632, - "step": 18169 - }, - { - "epoch": 1.711688372859801, - "grad_norm": 0.6623426079750061, - "learning_rate": 1.04866405027659e-06, - "loss": 0.1984, - "step": 18170 - }, - { - "epoch": 1.7117825769529686, - "grad_norm": 0.6332948207855225, - "learning_rate": 1.0479909892994456e-06, - "loss": 0.1891, - "step": 18171 - }, - { - "epoch": 1.7118767810461364, - "grad_norm": 0.6678027510643005, - "learning_rate": 1.047318132441949e-06, - "loss": 0.1899, - "step": 18172 - }, - { - "epoch": 1.7119709851393043, - "grad_norm": 0.7194743752479553, - "learning_rate": 1.0466454797194448e-06, - "loss": 0.2106, - "step": 18173 - }, - { - "epoch": 1.7120651892324723, - "grad_norm": 0.6241485476493835, - "learning_rate": 1.0459730311472716e-06, - "loss": 0.1709, - "step": 18174 - }, - { - "epoch": 1.71215939332564, - "grad_norm": 0.6804275512695312, - "learning_rate": 1.0453007867407617e-06, - "loss": 0.1752, - "step": 18175 - }, - { - "epoch": 1.7122535974188078, - "grad_norm": 0.7246103882789612, - "learning_rate": 1.0446287465152383e-06, - "loss": 0.1967, - "step": 18176 - }, - { - "epoch": 1.7123478015119757, - "grad_norm": 0.7021299600601196, - "learning_rate": 1.0439569104860337e-06, - "loss": 0.2153, - "step": 18177 - }, - { - "epoch": 1.7124420056051437, - "grad_norm": 0.6220550537109375, - "learning_rate": 1.043285278668462e-06, - "loss": 0.1616, - "step": 18178 - }, - { - "epoch": 1.7125362096983114, - "grad_norm": 0.6525019407272339, - "learning_rate": 1.0426138510778338e-06, - "loss": 0.1964, - "step": 18179 - }, - { - "epoch": 1.7126304137914792, - "grad_norm": 0.6876130700111389, - "learning_rate": 1.0419426277294663e-06, - "loss": 0.1733, - "step": 18180 - }, - { - "epoch": 1.7127246178846471, - "grad_norm": 0.6705190539360046, - "learning_rate": 1.041271608638662e-06, - "loss": 0.2052, - "step": 18181 - }, - { - "epoch": 1.712818821977815, - "grad_norm": 0.6083057522773743, - "learning_rate": 1.0406007938207153e-06, - "loss": 0.1654, - "step": 18182 - }, - { - "epoch": 1.7129130260709828, - "grad_norm": 0.6081562638282776, - "learning_rate": 1.0399301832909303e-06, - "loss": 0.1769, - "step": 18183 - }, - { - "epoch": 1.7130072301641506, - "grad_norm": 0.650029718875885, - "learning_rate": 1.039259777064594e-06, - "loss": 0.1716, - "step": 18184 - }, - { - "epoch": 1.7131014342573185, - "grad_norm": 0.6393464207649231, - "learning_rate": 1.0385895751569909e-06, - "loss": 0.1655, - "step": 18185 - }, - { - "epoch": 1.7131956383504865, - "grad_norm": 0.6619433164596558, - "learning_rate": 1.037919577583404e-06, - "loss": 0.2049, - "step": 18186 - }, - { - "epoch": 1.7132898424436542, - "grad_norm": 0.7716788649559021, - "learning_rate": 1.0372497843591133e-06, - "loss": 0.2108, - "step": 18187 - }, - { - "epoch": 1.713384046536822, - "grad_norm": 0.6516677737236023, - "learning_rate": 1.0365801954993871e-06, - "loss": 0.1917, - "step": 18188 - }, - { - "epoch": 1.71347825062999, - "grad_norm": 0.6674495935440063, - "learning_rate": 1.035910811019495e-06, - "loss": 0.1888, - "step": 18189 - }, - { - "epoch": 1.7135724547231577, - "grad_norm": 0.6783060431480408, - "learning_rate": 1.0352416309347003e-06, - "loss": 0.2232, - "step": 18190 - }, - { - "epoch": 1.7136666588163254, - "grad_norm": 0.7408477067947388, - "learning_rate": 1.0345726552602598e-06, - "loss": 0.2063, - "step": 18191 - }, - { - "epoch": 1.7137608629094934, - "grad_norm": 0.6930561661720276, - "learning_rate": 1.033903884011428e-06, - "loss": 0.1881, - "step": 18192 - }, - { - "epoch": 1.7138550670026613, - "grad_norm": 0.6963168382644653, - "learning_rate": 1.0332353172034548e-06, - "loss": 0.2045, - "step": 18193 - }, - { - "epoch": 1.713949271095829, - "grad_norm": 0.6521729826927185, - "learning_rate": 1.0325669548515826e-06, - "loss": 0.1841, - "step": 18194 - }, - { - "epoch": 1.7140434751889968, - "grad_norm": 0.652824342250824, - "learning_rate": 1.0318987969710548e-06, - "loss": 0.2027, - "step": 18195 - }, - { - "epoch": 1.7141376792821648, - "grad_norm": 0.5659494996070862, - "learning_rate": 1.0312308435771013e-06, - "loss": 0.167, - "step": 18196 - }, - { - "epoch": 1.7142318833753327, - "grad_norm": 0.6974686980247498, - "learning_rate": 1.0305630946849554e-06, - "loss": 0.2141, - "step": 18197 - }, - { - "epoch": 1.7143260874685005, - "grad_norm": 0.6462910771369934, - "learning_rate": 1.029895550309844e-06, - "loss": 0.1891, - "step": 18198 - }, - { - "epoch": 1.7144202915616682, - "grad_norm": 0.7359683513641357, - "learning_rate": 1.0292282104669837e-06, - "loss": 0.1938, - "step": 18199 - }, - { - "epoch": 1.7145144956548362, - "grad_norm": 0.6937127113342285, - "learning_rate": 1.028561075171597e-06, - "loss": 0.2113, - "step": 18200 - }, - { - "epoch": 1.7146086997480041, - "grad_norm": 0.6812256574630737, - "learning_rate": 1.0278941444388902e-06, - "loss": 0.1776, - "step": 18201 - }, - { - "epoch": 1.7147029038411719, - "grad_norm": 0.6547383666038513, - "learning_rate": 1.0272274182840724e-06, - "loss": 0.1916, - "step": 18202 - }, - { - "epoch": 1.7147971079343396, - "grad_norm": 0.7019928693771362, - "learning_rate": 1.0265608967223483e-06, - "loss": 0.2001, - "step": 18203 - }, - { - "epoch": 1.7148913120275076, - "grad_norm": 0.6956671476364136, - "learning_rate": 1.0258945797689112e-06, - "loss": 0.1973, - "step": 18204 - }, - { - "epoch": 1.7149855161206755, - "grad_norm": 0.6771193742752075, - "learning_rate": 1.025228467438959e-06, - "loss": 0.1947, - "step": 18205 - }, - { - "epoch": 1.7150797202138433, - "grad_norm": 0.597726583480835, - "learning_rate": 1.0245625597476748e-06, - "loss": 0.1738, - "step": 18206 - }, - { - "epoch": 1.715173924307011, - "grad_norm": 0.6639793515205383, - "learning_rate": 1.0238968567102448e-06, - "loss": 0.2011, - "step": 18207 - }, - { - "epoch": 1.715268128400179, - "grad_norm": 0.625450074672699, - "learning_rate": 1.0232313583418507e-06, - "loss": 0.2001, - "step": 18208 - }, - { - "epoch": 1.715362332493347, - "grad_norm": 0.6180112361907959, - "learning_rate": 1.022566064657663e-06, - "loss": 0.1849, - "step": 18209 - }, - { - "epoch": 1.7154565365865146, - "grad_norm": 0.6868562698364258, - "learning_rate": 1.0219009756728516e-06, - "loss": 0.2031, - "step": 18210 - }, - { - "epoch": 1.7155507406796824, - "grad_norm": 0.6511270403862, - "learning_rate": 1.0212360914025865e-06, - "loss": 0.1744, - "step": 18211 - }, - { - "epoch": 1.7156449447728503, - "grad_norm": 0.6476146578788757, - "learning_rate": 1.0205714118620214e-06, - "loss": 0.1861, - "step": 18212 - }, - { - "epoch": 1.7157391488660183, - "grad_norm": 0.758589506149292, - "learning_rate": 1.019906937066315e-06, - "loss": 0.1983, - "step": 18213 - }, - { - "epoch": 1.715833352959186, - "grad_norm": 0.6474173665046692, - "learning_rate": 1.019242667030621e-06, - "loss": 0.2005, - "step": 18214 - }, - { - "epoch": 1.7159275570523538, - "grad_norm": 0.6607803106307983, - "learning_rate": 1.0185786017700828e-06, - "loss": 0.1853, - "step": 18215 - }, - { - "epoch": 1.7160217611455217, - "grad_norm": 0.6892468333244324, - "learning_rate": 1.017914741299838e-06, - "loss": 0.1937, - "step": 18216 - }, - { - "epoch": 1.7161159652386897, - "grad_norm": 0.6857177019119263, - "learning_rate": 1.0172510856350326e-06, - "loss": 0.1789, - "step": 18217 - }, - { - "epoch": 1.7162101693318574, - "grad_norm": 0.607350766658783, - "learning_rate": 1.0165876347907944e-06, - "loss": 0.2003, - "step": 18218 - }, - { - "epoch": 1.7163043734250252, - "grad_norm": 0.638554036617279, - "learning_rate": 1.0159243887822479e-06, - "loss": 0.2004, - "step": 18219 - }, - { - "epoch": 1.7163985775181931, - "grad_norm": 0.6638485789299011, - "learning_rate": 1.0152613476245232e-06, - "loss": 0.1718, - "step": 18220 - }, - { - "epoch": 1.716492781611361, - "grad_norm": 0.6935262680053711, - "learning_rate": 1.014598511332735e-06, - "loss": 0.1828, - "step": 18221 - }, - { - "epoch": 1.7165869857045288, - "grad_norm": 0.6868494749069214, - "learning_rate": 1.0139358799219924e-06, - "loss": 0.2059, - "step": 18222 - }, - { - "epoch": 1.7166811897976966, - "grad_norm": 0.7230465412139893, - "learning_rate": 1.013273453407413e-06, - "loss": 0.1789, - "step": 18223 - }, - { - "epoch": 1.7167753938908645, - "grad_norm": 0.7610085606575012, - "learning_rate": 1.0126112318040981e-06, - "loss": 0.1926, - "step": 18224 - }, - { - "epoch": 1.7168695979840325, - "grad_norm": 0.6834355592727661, - "learning_rate": 1.0119492151271438e-06, - "loss": 0.2293, - "step": 18225 - }, - { - "epoch": 1.7169638020772002, - "grad_norm": 0.6361607313156128, - "learning_rate": 1.0112874033916465e-06, - "loss": 0.1826, - "step": 18226 - }, - { - "epoch": 1.717058006170368, - "grad_norm": 0.7722209095954895, - "learning_rate": 1.010625796612701e-06, - "loss": 0.2007, - "step": 18227 - }, - { - "epoch": 1.717152210263536, - "grad_norm": 0.6115467548370361, - "learning_rate": 1.0099643948053872e-06, - "loss": 0.2103, - "step": 18228 - }, - { - "epoch": 1.7172464143567039, - "grad_norm": 0.7447245717048645, - "learning_rate": 1.009303197984789e-06, - "loss": 0.1919, - "step": 18229 - }, - { - "epoch": 1.7173406184498716, - "grad_norm": 0.6729543805122375, - "learning_rate": 1.008642206165984e-06, - "loss": 0.1762, - "step": 18230 - }, - { - "epoch": 1.7174348225430394, - "grad_norm": 0.6252509355545044, - "learning_rate": 1.0079814193640403e-06, - "loss": 0.1947, - "step": 18231 - }, - { - "epoch": 1.7175290266362073, - "grad_norm": 0.6713355183601379, - "learning_rate": 1.007320837594027e-06, - "loss": 0.2338, - "step": 18232 - }, - { - "epoch": 1.7176232307293753, - "grad_norm": 0.6256462931632996, - "learning_rate": 1.0066604608710073e-06, - "loss": 0.2017, - "step": 18233 - }, - { - "epoch": 1.717717434822543, - "grad_norm": 0.6962444186210632, - "learning_rate": 1.0060002892100363e-06, - "loss": 0.1896, - "step": 18234 - }, - { - "epoch": 1.7178116389157108, - "grad_norm": 0.6423612833023071, - "learning_rate": 1.0053403226261694e-06, - "loss": 0.1663, - "step": 18235 - }, - { - "epoch": 1.7179058430088787, - "grad_norm": 0.6608044505119324, - "learning_rate": 1.0046805611344512e-06, - "loss": 0.1918, - "step": 18236 - }, - { - "epoch": 1.7180000471020467, - "grad_norm": 0.7020596861839294, - "learning_rate": 1.0040210047499289e-06, - "loss": 0.1788, - "step": 18237 - }, - { - "epoch": 1.7180942511952144, - "grad_norm": 0.6175522208213806, - "learning_rate": 1.0033616534876423e-06, - "loss": 0.1907, - "step": 18238 - }, - { - "epoch": 1.7181884552883822, - "grad_norm": 0.941880464553833, - "learning_rate": 1.0027025073626206e-06, - "loss": 0.1775, - "step": 18239 - }, - { - "epoch": 1.7182826593815501, - "grad_norm": 0.5877217054367065, - "learning_rate": 1.0020435663898985e-06, - "loss": 0.1795, - "step": 18240 - }, - { - "epoch": 1.718376863474718, - "grad_norm": 0.7008559703826904, - "learning_rate": 1.0013848305844975e-06, - "loss": 0.214, - "step": 18241 - }, - { - "epoch": 1.7184710675678858, - "grad_norm": 0.6821069717407227, - "learning_rate": 1.0007262999614387e-06, - "loss": 0.1908, - "step": 18242 - }, - { - "epoch": 1.7185652716610536, - "grad_norm": 0.6792659163475037, - "learning_rate": 1.00006797453574e-06, - "loss": 0.1775, - "step": 18243 - }, - { - "epoch": 1.7186594757542215, - "grad_norm": 0.6423118114471436, - "learning_rate": 9.994098543224073e-07, - "loss": 0.2076, - "step": 18244 - }, - { - "epoch": 1.7187536798473895, - "grad_norm": 0.6862238645553589, - "learning_rate": 9.98751939336452e-07, - "loss": 0.1977, - "step": 18245 - }, - { - "epoch": 1.7188478839405572, - "grad_norm": 0.7554076313972473, - "learning_rate": 9.98094229592872e-07, - "loss": 0.2046, - "step": 18246 - }, - { - "epoch": 1.718942088033725, - "grad_norm": 0.6626954078674316, - "learning_rate": 9.974367251066642e-07, - "loss": 0.2038, - "step": 18247 - }, - { - "epoch": 1.719036292126893, - "grad_norm": 0.7395123839378357, - "learning_rate": 9.967794258928243e-07, - "loss": 0.1841, - "step": 18248 - }, - { - "epoch": 1.7191304962200609, - "grad_norm": 0.6694244742393494, - "learning_rate": 9.961223319663349e-07, - "loss": 0.1823, - "step": 18249 - }, - { - "epoch": 1.7192247003132286, - "grad_norm": 0.6248368620872498, - "learning_rate": 9.954654433421818e-07, - "loss": 0.216, - "step": 18250 - }, - { - "epoch": 1.7193189044063963, - "grad_norm": 0.6329765319824219, - "learning_rate": 9.94808760035344e-07, - "loss": 0.1958, - "step": 18251 - }, - { - "epoch": 1.7194131084995643, - "grad_norm": 0.6825846433639526, - "learning_rate": 9.941522820607908e-07, - "loss": 0.1955, - "step": 18252 - }, - { - "epoch": 1.7195073125927323, - "grad_norm": 0.6598239541053772, - "learning_rate": 9.934960094334934e-07, - "loss": 0.178, - "step": 18253 - }, - { - "epoch": 1.7196015166859, - "grad_norm": 0.6834250688552856, - "learning_rate": 9.928399421684186e-07, - "loss": 0.2026, - "step": 18254 - }, - { - "epoch": 1.7196957207790677, - "grad_norm": 0.6410261392593384, - "learning_rate": 9.921840802805228e-07, - "loss": 0.1953, - "step": 18255 - }, - { - "epoch": 1.7197899248722357, - "grad_norm": 0.6028664112091064, - "learning_rate": 9.915284237847566e-07, - "loss": 0.1866, - "step": 18256 - }, - { - "epoch": 1.7198841289654037, - "grad_norm": 0.6074325442314148, - "learning_rate": 9.908729726960776e-07, - "loss": 0.1796, - "step": 18257 - }, - { - "epoch": 1.7199783330585714, - "grad_norm": 0.700425922870636, - "learning_rate": 9.902177270294288e-07, - "loss": 0.1744, - "step": 18258 - }, - { - "epoch": 1.7200725371517391, - "grad_norm": 0.6485363841056824, - "learning_rate": 9.895626867997454e-07, - "loss": 0.1787, - "step": 18259 - }, - { - "epoch": 1.720166741244907, - "grad_norm": 0.6814277172088623, - "learning_rate": 9.889078520219708e-07, - "loss": 0.187, - "step": 18260 - }, - { - "epoch": 1.720260945338075, - "grad_norm": 0.6854941248893738, - "learning_rate": 9.882532227110343e-07, - "loss": 0.1849, - "step": 18261 - }, - { - "epoch": 1.7203551494312428, - "grad_norm": 0.5841566324234009, - "learning_rate": 9.87598798881857e-07, - "loss": 0.1937, - "step": 18262 - }, - { - "epoch": 1.7204493535244105, - "grad_norm": 0.6688234806060791, - "learning_rate": 9.869445805493682e-07, - "loss": 0.2034, - "step": 18263 - }, - { - "epoch": 1.7205435576175785, - "grad_norm": 0.6713355183601379, - "learning_rate": 9.862905677284828e-07, - "loss": 0.1775, - "step": 18264 - }, - { - "epoch": 1.7206377617107464, - "grad_norm": 0.633851945400238, - "learning_rate": 9.8563676043411e-07, - "loss": 0.1961, - "step": 18265 - }, - { - "epoch": 1.7207319658039142, - "grad_norm": 0.6884269714355469, - "learning_rate": 9.849831586811597e-07, - "loss": 0.1894, - "step": 18266 - }, - { - "epoch": 1.720826169897082, - "grad_norm": 0.6791149973869324, - "learning_rate": 9.843297624845382e-07, - "loss": 0.2027, - "step": 18267 - }, - { - "epoch": 1.7209203739902499, - "grad_norm": 0.6391034126281738, - "learning_rate": 9.83676571859138e-07, - "loss": 0.1858, - "step": 18268 - }, - { - "epoch": 1.7210145780834178, - "grad_norm": 0.7244287729263306, - "learning_rate": 9.830235868198567e-07, - "loss": 0.204, - "step": 18269 - }, - { - "epoch": 1.7211087821765856, - "grad_norm": 0.6550241708755493, - "learning_rate": 9.823708073815852e-07, - "loss": 0.1966, - "step": 18270 - }, - { - "epoch": 1.7212029862697533, - "grad_norm": 0.7238351702690125, - "learning_rate": 9.817182335592023e-07, - "loss": 0.2001, - "step": 18271 - }, - { - "epoch": 1.7212971903629213, - "grad_norm": 0.6201068162918091, - "learning_rate": 9.81065865367592e-07, - "loss": 0.1891, - "step": 18272 - }, - { - "epoch": 1.7213913944560892, - "grad_norm": 0.690910816192627, - "learning_rate": 9.804137028216286e-07, - "loss": 0.1918, - "step": 18273 - }, - { - "epoch": 1.721485598549257, - "grad_norm": 0.5405566096305847, - "learning_rate": 9.797617459361808e-07, - "loss": 0.1959, - "step": 18274 - }, - { - "epoch": 1.7215798026424247, - "grad_norm": 0.6876025199890137, - "learning_rate": 9.791099947261162e-07, - "loss": 0.2096, - "step": 18275 - }, - { - "epoch": 1.7216740067355927, - "grad_norm": 0.6582169532775879, - "learning_rate": 9.784584492062942e-07, - "loss": 0.2002, - "step": 18276 - }, - { - "epoch": 1.7217682108287606, - "grad_norm": 0.6868323683738708, - "learning_rate": 9.778071093915709e-07, - "loss": 0.225, - "step": 18277 - }, - { - "epoch": 1.7218624149219284, - "grad_norm": 0.7047544717788696, - "learning_rate": 9.771559752968008e-07, - "loss": 0.2121, - "step": 18278 - }, - { - "epoch": 1.7219566190150961, - "grad_norm": 0.646683931350708, - "learning_rate": 9.765050469368254e-07, - "loss": 0.174, - "step": 18279 - }, - { - "epoch": 1.722050823108264, - "grad_norm": 0.6365481615066528, - "learning_rate": 9.758543243264939e-07, - "loss": 0.1659, - "step": 18280 - }, - { - "epoch": 1.722145027201432, - "grad_norm": 0.6406670212745667, - "learning_rate": 9.75203807480637e-07, - "loss": 0.1995, - "step": 18281 - }, - { - "epoch": 1.7222392312945998, - "grad_norm": 0.6405190229415894, - "learning_rate": 9.7455349641409e-07, - "loss": 0.2014, - "step": 18282 - }, - { - "epoch": 1.7223334353877675, - "grad_norm": 0.7335394620895386, - "learning_rate": 9.73903391141684e-07, - "loss": 0.1992, - "step": 18283 - }, - { - "epoch": 1.7224276394809355, - "grad_norm": 0.6978424191474915, - "learning_rate": 9.732534916782377e-07, - "loss": 0.1933, - "step": 18284 - }, - { - "epoch": 1.7225218435741034, - "grad_norm": 0.6657301783561707, - "learning_rate": 9.72603798038574e-07, - "loss": 0.1853, - "step": 18285 - }, - { - "epoch": 1.7226160476672712, - "grad_norm": 0.6643604040145874, - "learning_rate": 9.719543102375028e-07, - "loss": 0.205, - "step": 18286 - }, - { - "epoch": 1.722710251760439, - "grad_norm": 0.6385934352874756, - "learning_rate": 9.713050282898351e-07, - "loss": 0.1801, - "step": 18287 - }, - { - "epoch": 1.7228044558536069, - "grad_norm": 0.7379485368728638, - "learning_rate": 9.706559522103775e-07, - "loss": 0.2111, - "step": 18288 - }, - { - "epoch": 1.7228986599467748, - "grad_norm": 0.6756138801574707, - "learning_rate": 9.700070820139274e-07, - "loss": 0.1954, - "step": 18289 - }, - { - "epoch": 1.7229928640399426, - "grad_norm": 0.7081589102745056, - "learning_rate": 9.693584177152804e-07, - "loss": 0.1977, - "step": 18290 - }, - { - "epoch": 1.7230870681331103, - "grad_norm": 0.7241060137748718, - "learning_rate": 9.687099593292304e-07, - "loss": 0.2215, - "step": 18291 - }, - { - "epoch": 1.7231812722262783, - "grad_norm": 0.7350836992263794, - "learning_rate": 9.680617068705577e-07, - "loss": 0.212, - "step": 18292 - }, - { - "epoch": 1.7232754763194462, - "grad_norm": 0.6153767108917236, - "learning_rate": 9.674136603540463e-07, - "loss": 0.1926, - "step": 18293 - }, - { - "epoch": 1.723369680412614, - "grad_norm": 0.7060182690620422, - "learning_rate": 9.667658197944752e-07, - "loss": 0.2125, - "step": 18294 - }, - { - "epoch": 1.7234638845057817, - "grad_norm": 0.6536703109741211, - "learning_rate": 9.661181852066127e-07, - "loss": 0.1823, - "step": 18295 - }, - { - "epoch": 1.7235580885989497, - "grad_norm": 0.6886441111564636, - "learning_rate": 9.654707566052236e-07, - "loss": 0.2147, - "step": 18296 - }, - { - "epoch": 1.7236522926921176, - "grad_norm": 0.6352055668830872, - "learning_rate": 9.648235340050772e-07, - "loss": 0.2042, - "step": 18297 - }, - { - "epoch": 1.7237464967852854, - "grad_norm": 0.76617032289505, - "learning_rate": 9.64176517420926e-07, - "loss": 0.1771, - "step": 18298 - }, - { - "epoch": 1.723840700878453, - "grad_norm": 0.6597750782966614, - "learning_rate": 9.63529706867522e-07, - "loss": 0.1948, - "step": 18299 - }, - { - "epoch": 1.723934904971621, - "grad_norm": 0.7410958409309387, - "learning_rate": 9.628831023596197e-07, - "loss": 0.2078, - "step": 18300 - }, - { - "epoch": 1.724029109064789, - "grad_norm": 0.6253803968429565, - "learning_rate": 9.622367039119584e-07, - "loss": 0.1765, - "step": 18301 - }, - { - "epoch": 1.7241233131579567, - "grad_norm": 0.8138961791992188, - "learning_rate": 9.615905115392733e-07, - "loss": 0.1848, - "step": 18302 - }, - { - "epoch": 1.7242175172511245, - "grad_norm": 0.6228686571121216, - "learning_rate": 9.609445252563078e-07, - "loss": 0.1879, - "step": 18303 - }, - { - "epoch": 1.7243117213442924, - "grad_norm": 0.6464802026748657, - "learning_rate": 9.602987450777845e-07, - "loss": 0.2094, - "step": 18304 - }, - { - "epoch": 1.7244059254374604, - "grad_norm": 0.5831804871559143, - "learning_rate": 9.5965317101843e-07, - "loss": 0.1748, - "step": 18305 - }, - { - "epoch": 1.7245001295306281, - "grad_norm": 0.6228283643722534, - "learning_rate": 9.590078030929628e-07, - "loss": 0.1865, - "step": 18306 - }, - { - "epoch": 1.7245943336237959, - "grad_norm": 0.8141707181930542, - "learning_rate": 9.583626413161018e-07, - "loss": 0.2085, - "step": 18307 - }, - { - "epoch": 1.7246885377169638, - "grad_norm": 0.7365865707397461, - "learning_rate": 9.57717685702555e-07, - "loss": 0.2034, - "step": 18308 - }, - { - "epoch": 1.7247827418101318, - "grad_norm": 0.7254769802093506, - "learning_rate": 9.570729362670284e-07, - "loss": 0.2196, - "step": 18309 - }, - { - "epoch": 1.7248769459032995, - "grad_norm": 0.6105891466140747, - "learning_rate": 9.564283930242258e-07, - "loss": 0.1968, - "step": 18310 - }, - { - "epoch": 1.7249711499964673, - "grad_norm": 0.7820233106613159, - "learning_rate": 9.5578405598884e-07, - "loss": 0.1708, - "step": 18311 - }, - { - "epoch": 1.7250653540896352, - "grad_norm": 0.6825641393661499, - "learning_rate": 9.551399251755654e-07, - "loss": 0.2058, - "step": 18312 - }, - { - "epoch": 1.7251595581828032, - "grad_norm": 0.6899803876876831, - "learning_rate": 9.5449600059909e-07, - "loss": 0.1909, - "step": 18313 - }, - { - "epoch": 1.725253762275971, - "grad_norm": 0.6918448805809021, - "learning_rate": 9.538522822740937e-07, - "loss": 0.2049, - "step": 18314 - }, - { - "epoch": 1.7253479663691387, - "grad_norm": 0.6330221891403198, - "learning_rate": 9.53208770215257e-07, - "loss": 0.172, - "step": 18315 - }, - { - "epoch": 1.7254421704623066, - "grad_norm": 0.6738264560699463, - "learning_rate": 9.525654644372495e-07, - "loss": 0.2003, - "step": 18316 - }, - { - "epoch": 1.7255363745554746, - "grad_norm": 0.6461614370346069, - "learning_rate": 9.519223649547437e-07, - "loss": 0.2075, - "step": 18317 - }, - { - "epoch": 1.7256305786486423, - "grad_norm": 0.7237948775291443, - "learning_rate": 9.512794717823992e-07, - "loss": 0.1925, - "step": 18318 - }, - { - "epoch": 1.72572478274181, - "grad_norm": 0.7411685585975647, - "learning_rate": 9.506367849348763e-07, - "loss": 0.1935, - "step": 18319 - }, - { - "epoch": 1.725818986834978, - "grad_norm": 0.6511257886886597, - "learning_rate": 9.499943044268323e-07, - "loss": 0.1934, - "step": 18320 - }, - { - "epoch": 1.725913190928146, - "grad_norm": 0.6301068067550659, - "learning_rate": 9.49352030272912e-07, - "loss": 0.1771, - "step": 18321 - }, - { - "epoch": 1.7260073950213137, - "grad_norm": 0.6631643772125244, - "learning_rate": 9.487099624877627e-07, - "loss": 0.2078, - "step": 18322 - }, - { - "epoch": 1.7261015991144815, - "grad_norm": 0.6139720678329468, - "learning_rate": 9.48068101086026e-07, - "loss": 0.1854, - "step": 18323 - }, - { - "epoch": 1.7261958032076494, - "grad_norm": 0.6714940667152405, - "learning_rate": 9.474264460823346e-07, - "loss": 0.1943, - "step": 18324 - }, - { - "epoch": 1.7262900073008174, - "grad_norm": 0.6282404065132141, - "learning_rate": 9.467849974913212e-07, - "loss": 0.2068, - "step": 18325 - }, - { - "epoch": 1.7263842113939851, - "grad_norm": 0.6319966912269592, - "learning_rate": 9.461437553276098e-07, - "loss": 0.1847, - "step": 18326 - }, - { - "epoch": 1.7264784154871529, - "grad_norm": 0.6349130868911743, - "learning_rate": 9.45502719605822e-07, - "loss": 0.1986, - "step": 18327 - }, - { - "epoch": 1.7265726195803208, - "grad_norm": 0.646722137928009, - "learning_rate": 9.448618903405782e-07, - "loss": 0.1895, - "step": 18328 - }, - { - "epoch": 1.7266668236734886, - "grad_norm": 0.6951996088027954, - "learning_rate": 9.442212675464845e-07, - "loss": 0.2064, - "step": 18329 - }, - { - "epoch": 1.7267610277666563, - "grad_norm": 0.6779601573944092, - "learning_rate": 9.435808512381506e-07, - "loss": 0.2039, - "step": 18330 - }, - { - "epoch": 1.7268552318598243, - "grad_norm": 0.756580114364624, - "learning_rate": 9.429406414301822e-07, - "loss": 0.178, - "step": 18331 - }, - { - "epoch": 1.7269494359529922, - "grad_norm": 0.6476389765739441, - "learning_rate": 9.423006381371724e-07, - "loss": 0.1985, - "step": 18332 - }, - { - "epoch": 1.72704364004616, - "grad_norm": 0.653612494468689, - "learning_rate": 9.416608413737149e-07, - "loss": 0.18, - "step": 18333 - }, - { - "epoch": 1.7271378441393277, - "grad_norm": 0.6533164978027344, - "learning_rate": 9.410212511544025e-07, - "loss": 0.192, - "step": 18334 - }, - { - "epoch": 1.7272320482324957, - "grad_norm": 0.6046565771102905, - "learning_rate": 9.403818674938148e-07, - "loss": 0.1901, - "step": 18335 - }, - { - "epoch": 1.7273262523256636, - "grad_norm": 0.7406439185142517, - "learning_rate": 9.397426904065277e-07, - "loss": 0.1853, - "step": 18336 - }, - { - "epoch": 1.7274204564188314, - "grad_norm": 0.6120457649230957, - "learning_rate": 9.391037199071229e-07, - "loss": 0.1881, - "step": 18337 - }, - { - "epoch": 1.727514660511999, - "grad_norm": 0.6178550124168396, - "learning_rate": 9.384649560101667e-07, - "loss": 0.1781, - "step": 18338 - }, - { - "epoch": 1.727608864605167, - "grad_norm": 0.7009128332138062, - "learning_rate": 9.378263987302194e-07, - "loss": 0.2391, - "step": 18339 - }, - { - "epoch": 1.727703068698335, - "grad_norm": 0.6016954183578491, - "learning_rate": 9.371880480818485e-07, - "loss": 0.1631, - "step": 18340 - }, - { - "epoch": 1.7277972727915027, - "grad_norm": 0.6001085042953491, - "learning_rate": 9.365499040796066e-07, - "loss": 0.1927, - "step": 18341 - }, - { - "epoch": 1.7278914768846705, - "grad_norm": 0.6183971762657166, - "learning_rate": 9.359119667380412e-07, - "loss": 0.1741, - "step": 18342 - }, - { - "epoch": 1.7279856809778384, - "grad_norm": 0.7459295392036438, - "learning_rate": 9.352742360717015e-07, - "loss": 0.2034, - "step": 18343 - }, - { - "epoch": 1.7280798850710064, - "grad_norm": 0.6946557760238647, - "learning_rate": 9.346367120951305e-07, - "loss": 0.1828, - "step": 18344 - }, - { - "epoch": 1.7281740891641741, - "grad_norm": 0.6636278033256531, - "learning_rate": 9.339993948228587e-07, - "loss": 0.1718, - "step": 18345 - }, - { - "epoch": 1.7282682932573419, - "grad_norm": 0.6898250579833984, - "learning_rate": 9.333622842694234e-07, - "loss": 0.1958, - "step": 18346 - }, - { - "epoch": 1.7283624973505098, - "grad_norm": 0.6806836128234863, - "learning_rate": 9.327253804493508e-07, - "loss": 0.2064, - "step": 18347 - }, - { - "epoch": 1.7284567014436778, - "grad_norm": 0.7299802899360657, - "learning_rate": 9.320886833771603e-07, - "loss": 0.2029, - "step": 18348 - }, - { - "epoch": 1.7285509055368455, - "grad_norm": 0.6397003531455994, - "learning_rate": 9.314521930673714e-07, - "loss": 0.1931, - "step": 18349 - }, - { - "epoch": 1.7286451096300133, - "grad_norm": 0.6581284999847412, - "learning_rate": 9.308159095345004e-07, - "loss": 0.1888, - "step": 18350 - }, - { - "epoch": 1.7287393137231812, - "grad_norm": 0.6814366579055786, - "learning_rate": 9.301798327930489e-07, - "loss": 0.1968, - "step": 18351 - }, - { - "epoch": 1.7288335178163492, - "grad_norm": 0.7398734092712402, - "learning_rate": 9.295439628575253e-07, - "loss": 0.2177, - "step": 18352 - }, - { - "epoch": 1.728927721909517, - "grad_norm": 0.640472948551178, - "learning_rate": 9.289082997424281e-07, - "loss": 0.1842, - "step": 18353 - }, - { - "epoch": 1.7290219260026847, - "grad_norm": 0.6734275817871094, - "learning_rate": 9.28272843462249e-07, - "loss": 0.23, - "step": 18354 - }, - { - "epoch": 1.7291161300958526, - "grad_norm": 0.6754468083381653, - "learning_rate": 9.276375940314807e-07, - "loss": 0.1917, - "step": 18355 - }, - { - "epoch": 1.7292103341890206, - "grad_norm": 0.6099188923835754, - "learning_rate": 9.270025514646042e-07, - "loss": 0.168, - "step": 18356 - }, - { - "epoch": 1.7293045382821883, - "grad_norm": 0.651532769203186, - "learning_rate": 9.26367715776102e-07, - "loss": 0.1998, - "step": 18357 - }, - { - "epoch": 1.729398742375356, - "grad_norm": 0.6025399565696716, - "learning_rate": 9.257330869804482e-07, - "loss": 0.1894, - "step": 18358 - }, - { - "epoch": 1.729492946468524, - "grad_norm": 0.6744096875190735, - "learning_rate": 9.250986650921124e-07, - "loss": 0.2052, - "step": 18359 - }, - { - "epoch": 1.729587150561692, - "grad_norm": 0.5932868719100952, - "learning_rate": 9.24464450125564e-07, - "loss": 0.1838, - "step": 18360 - }, - { - "epoch": 1.7296813546548597, - "grad_norm": 0.627146303653717, - "learning_rate": 9.238304420952593e-07, - "loss": 0.1953, - "step": 18361 - }, - { - "epoch": 1.7297755587480275, - "grad_norm": 0.6654533743858337, - "learning_rate": 9.231966410156578e-07, - "loss": 0.1779, - "step": 18362 - }, - { - "epoch": 1.7298697628411954, - "grad_norm": 0.6812465786933899, - "learning_rate": 9.225630469012125e-07, - "loss": 0.2031, - "step": 18363 - }, - { - "epoch": 1.7299639669343634, - "grad_norm": 0.5945623517036438, - "learning_rate": 9.219296597663663e-07, - "loss": 0.1651, - "step": 18364 - }, - { - "epoch": 1.7300581710275311, - "grad_norm": 0.6883321404457092, - "learning_rate": 9.212964796255641e-07, - "loss": 0.1889, - "step": 18365 - }, - { - "epoch": 1.7301523751206989, - "grad_norm": 0.7249394059181213, - "learning_rate": 9.206635064932423e-07, - "loss": 0.2093, - "step": 18366 - }, - { - "epoch": 1.7302465792138668, - "grad_norm": 0.6443730592727661, - "learning_rate": 9.200307403838327e-07, - "loss": 0.1948, - "step": 18367 - }, - { - "epoch": 1.7303407833070348, - "grad_norm": 0.6195492744445801, - "learning_rate": 9.193981813117669e-07, - "loss": 0.2017, - "step": 18368 - }, - { - "epoch": 1.7304349874002025, - "grad_norm": 0.6574223041534424, - "learning_rate": 9.187658292914647e-07, - "loss": 0.1864, - "step": 18369 - }, - { - "epoch": 1.7305291914933703, - "grad_norm": 0.6537573337554932, - "learning_rate": 9.181336843373456e-07, - "loss": 0.1752, - "step": 18370 - }, - { - "epoch": 1.7306233955865382, - "grad_norm": 0.6777458786964417, - "learning_rate": 9.175017464638258e-07, - "loss": 0.1912, - "step": 18371 - }, - { - "epoch": 1.7307175996797062, - "grad_norm": 0.6786985397338867, - "learning_rate": 9.168700156853106e-07, - "loss": 0.192, - "step": 18372 - }, - { - "epoch": 1.730811803772874, - "grad_norm": 0.7072835564613342, - "learning_rate": 9.16238492016206e-07, - "loss": 0.2029, - "step": 18373 - }, - { - "epoch": 1.7309060078660417, - "grad_norm": 0.6088541746139526, - "learning_rate": 9.15607175470915e-07, - "loss": 0.1665, - "step": 18374 - }, - { - "epoch": 1.7310002119592096, - "grad_norm": 0.6614930033683777, - "learning_rate": 9.149760660638285e-07, - "loss": 0.185, - "step": 18375 - }, - { - "epoch": 1.7310944160523776, - "grad_norm": 0.6818583011627197, - "learning_rate": 9.143451638093348e-07, - "loss": 0.1911, - "step": 18376 - }, - { - "epoch": 1.7311886201455453, - "grad_norm": 0.5903543829917908, - "learning_rate": 9.137144687218269e-07, - "loss": 0.1596, - "step": 18377 - }, - { - "epoch": 1.731282824238713, - "grad_norm": 0.7084120512008667, - "learning_rate": 9.130839808156799e-07, - "loss": 0.1981, - "step": 18378 - }, - { - "epoch": 1.731377028331881, - "grad_norm": 0.6994187831878662, - "learning_rate": 9.124537001052692e-07, - "loss": 0.1805, - "step": 18379 - }, - { - "epoch": 1.731471232425049, - "grad_norm": 0.6367105841636658, - "learning_rate": 9.118236266049707e-07, - "loss": 0.1806, - "step": 18380 - }, - { - "epoch": 1.7315654365182167, - "grad_norm": 0.7205740809440613, - "learning_rate": 9.111937603291499e-07, - "loss": 0.2346, - "step": 18381 - }, - { - "epoch": 1.7316596406113844, - "grad_norm": 0.6373416781425476, - "learning_rate": 9.10564101292164e-07, - "loss": 0.1923, - "step": 18382 - }, - { - "epoch": 1.7317538447045524, - "grad_norm": 0.6876963973045349, - "learning_rate": 9.09934649508375e-07, - "loss": 0.2063, - "step": 18383 - }, - { - "epoch": 1.7318480487977204, - "grad_norm": 0.6859636902809143, - "learning_rate": 9.093054049921357e-07, - "loss": 0.1931, - "step": 18384 - }, - { - "epoch": 1.731942252890888, - "grad_norm": 0.649297833442688, - "learning_rate": 9.086763677577903e-07, - "loss": 0.2143, - "step": 18385 - }, - { - "epoch": 1.7320364569840558, - "grad_norm": 0.680216372013092, - "learning_rate": 9.080475378196829e-07, - "loss": 0.1921, - "step": 18386 - }, - { - "epoch": 1.7321306610772238, - "grad_norm": 0.6525050401687622, - "learning_rate": 9.074189151921553e-07, - "loss": 0.1993, - "step": 18387 - }, - { - "epoch": 1.7322248651703918, - "grad_norm": 0.6437455415725708, - "learning_rate": 9.06790499889536e-07, - "loss": 0.1816, - "step": 18388 - }, - { - "epoch": 1.7323190692635595, - "grad_norm": 0.7058247327804565, - "learning_rate": 9.061622919261571e-07, - "loss": 0.1948, - "step": 18389 - }, - { - "epoch": 1.7324132733567272, - "grad_norm": 0.740924596786499, - "learning_rate": 9.055342913163434e-07, - "loss": 0.2043, - "step": 18390 - }, - { - "epoch": 1.7325074774498952, - "grad_norm": 0.6546656489372253, - "learning_rate": 9.049064980744104e-07, - "loss": 0.1964, - "step": 18391 - }, - { - "epoch": 1.7326016815430632, - "grad_norm": 0.7722698450088501, - "learning_rate": 9.042789122146755e-07, - "loss": 0.2137, - "step": 18392 - }, - { - "epoch": 1.732695885636231, - "grad_norm": 1.0166223049163818, - "learning_rate": 9.036515337514496e-07, - "loss": 0.2079, - "step": 18393 - }, - { - "epoch": 1.7327900897293986, - "grad_norm": 0.6798054575920105, - "learning_rate": 9.030243626990343e-07, - "loss": 0.2092, - "step": 18394 - }, - { - "epoch": 1.7328842938225666, - "grad_norm": 0.6436615586280823, - "learning_rate": 9.023973990717349e-07, - "loss": 0.1765, - "step": 18395 - }, - { - "epoch": 1.7329784979157346, - "grad_norm": 0.6995458006858826, - "learning_rate": 9.017706428838425e-07, - "loss": 0.1997, - "step": 18396 - }, - { - "epoch": 1.7330727020089023, - "grad_norm": 0.6393980383872986, - "learning_rate": 9.011440941496519e-07, - "loss": 0.1835, - "step": 18397 - }, - { - "epoch": 1.73316690610207, - "grad_norm": 0.7130224108695984, - "learning_rate": 9.005177528834464e-07, - "loss": 0.1693, - "step": 18398 - }, - { - "epoch": 1.733261110195238, - "grad_norm": 0.7812324166297913, - "learning_rate": 8.998916190995078e-07, - "loss": 0.2229, - "step": 18399 - }, - { - "epoch": 1.733355314288406, - "grad_norm": 0.5957273840904236, - "learning_rate": 8.992656928121158e-07, - "loss": 0.169, - "step": 18400 - }, - { - "epoch": 1.7334495183815737, - "grad_norm": 0.5725364089012146, - "learning_rate": 8.98639974035539e-07, - "loss": 0.1582, - "step": 18401 - }, - { - "epoch": 1.7335437224747414, - "grad_norm": 0.5999626517295837, - "learning_rate": 8.98014462784047e-07, - "loss": 0.1781, - "step": 18402 - }, - { - "epoch": 1.7336379265679094, - "grad_norm": 0.6020220518112183, - "learning_rate": 8.973891590719031e-07, - "loss": 0.1918, - "step": 18403 - }, - { - "epoch": 1.7337321306610773, - "grad_norm": 0.6308332085609436, - "learning_rate": 8.967640629133611e-07, - "loss": 0.1724, - "step": 18404 - }, - { - "epoch": 1.733826334754245, - "grad_norm": 0.6978043913841248, - "learning_rate": 8.9613917432268e-07, - "loss": 0.1987, - "step": 18405 - }, - { - "epoch": 1.7339205388474128, - "grad_norm": 0.6582713723182678, - "learning_rate": 8.955144933141025e-07, - "loss": 0.1973, - "step": 18406 - }, - { - "epoch": 1.7340147429405808, - "grad_norm": 0.704949676990509, - "learning_rate": 8.94890019901875e-07, - "loss": 0.1914, - "step": 18407 - }, - { - "epoch": 1.7341089470337487, - "grad_norm": 0.6758086681365967, - "learning_rate": 8.942657541002386e-07, - "loss": 0.2316, - "step": 18408 - }, - { - "epoch": 1.7342031511269165, - "grad_norm": 0.5849722027778625, - "learning_rate": 8.936416959234229e-07, - "loss": 0.186, - "step": 18409 - }, - { - "epoch": 1.7342973552200842, - "grad_norm": 0.6267807483673096, - "learning_rate": 8.930178453856597e-07, - "loss": 0.177, - "step": 18410 - }, - { - "epoch": 1.7343915593132522, - "grad_norm": 0.6762515902519226, - "learning_rate": 8.923942025011768e-07, - "loss": 0.1808, - "step": 18411 - }, - { - "epoch": 1.7344857634064201, - "grad_norm": 0.6666058897972107, - "learning_rate": 8.917707672841879e-07, - "loss": 0.2017, - "step": 18412 - }, - { - "epoch": 1.7345799674995879, - "grad_norm": 0.7240074872970581, - "learning_rate": 8.911475397489122e-07, - "loss": 0.2095, - "step": 18413 - }, - { - "epoch": 1.7346741715927556, - "grad_norm": 0.955720841884613, - "learning_rate": 8.905245199095625e-07, - "loss": 0.1926, - "step": 18414 - }, - { - "epoch": 1.7347683756859236, - "grad_norm": 0.7881523966789246, - "learning_rate": 8.899017077803406e-07, - "loss": 0.1949, - "step": 18415 - }, - { - "epoch": 1.7348625797790915, - "grad_norm": 0.6623057723045349, - "learning_rate": 8.892791033754456e-07, - "loss": 0.2009, - "step": 18416 - }, - { - "epoch": 1.7349567838722593, - "grad_norm": 0.6543082594871521, - "learning_rate": 8.886567067090812e-07, - "loss": 0.1869, - "step": 18417 - }, - { - "epoch": 1.735050987965427, - "grad_norm": 0.608604371547699, - "learning_rate": 8.880345177954341e-07, - "loss": 0.1577, - "step": 18418 - }, - { - "epoch": 1.735145192058595, - "grad_norm": 0.6755335330963135, - "learning_rate": 8.874125366486886e-07, - "loss": 0.1895, - "step": 18419 - }, - { - "epoch": 1.735239396151763, - "grad_norm": 0.6955905556678772, - "learning_rate": 8.867907632830341e-07, - "loss": 0.1924, - "step": 18420 - }, - { - "epoch": 1.7353336002449307, - "grad_norm": 0.6710397005081177, - "learning_rate": 8.86169197712643e-07, - "loss": 0.1898, - "step": 18421 - }, - { - "epoch": 1.7354278043380984, - "grad_norm": 0.7040006518363953, - "learning_rate": 8.855478399516881e-07, - "loss": 0.2304, - "step": 18422 - }, - { - "epoch": 1.7355220084312664, - "grad_norm": 0.6744738221168518, - "learning_rate": 8.849266900143383e-07, - "loss": 0.1962, - "step": 18423 - }, - { - "epoch": 1.7356162125244343, - "grad_norm": 0.6585153937339783, - "learning_rate": 8.843057479147576e-07, - "loss": 0.1639, - "step": 18424 - }, - { - "epoch": 1.735710416617602, - "grad_norm": 0.6540251970291138, - "learning_rate": 8.836850136671027e-07, - "loss": 0.1934, - "step": 18425 - }, - { - "epoch": 1.7358046207107698, - "grad_norm": 0.705668568611145, - "learning_rate": 8.830644872855276e-07, - "loss": 0.2126, - "step": 18426 - }, - { - "epoch": 1.7358988248039378, - "grad_norm": 0.6492533683776855, - "learning_rate": 8.824441687841834e-07, - "loss": 0.1717, - "step": 18427 - }, - { - "epoch": 1.7359930288971057, - "grad_norm": 0.631546139717102, - "learning_rate": 8.81824058177212e-07, - "loss": 0.2, - "step": 18428 - }, - { - "epoch": 1.7360872329902735, - "grad_norm": 0.6409475207328796, - "learning_rate": 8.812041554787521e-07, - "loss": 0.2104, - "step": 18429 - }, - { - "epoch": 1.7361814370834412, - "grad_norm": 0.6669663786888123, - "learning_rate": 8.805844607029435e-07, - "loss": 0.1864, - "step": 18430 - }, - { - "epoch": 1.7362756411766092, - "grad_norm": 0.6519623398780823, - "learning_rate": 8.799649738639094e-07, - "loss": 0.1992, - "step": 18431 - }, - { - "epoch": 1.7363698452697771, - "grad_norm": 0.6021997928619385, - "learning_rate": 8.793456949757784e-07, - "loss": 0.1898, - "step": 18432 - }, - { - "epoch": 1.7364640493629449, - "grad_norm": 0.7147196531295776, - "learning_rate": 8.787266240526738e-07, - "loss": 0.2135, - "step": 18433 - }, - { - "epoch": 1.7365582534561126, - "grad_norm": 0.7053173780441284, - "learning_rate": 8.781077611087075e-07, - "loss": 0.2086, - "step": 18434 - }, - { - "epoch": 1.7366524575492805, - "grad_norm": 0.681104838848114, - "learning_rate": 8.774891061579904e-07, - "loss": 0.2365, - "step": 18435 - }, - { - "epoch": 1.7367466616424485, - "grad_norm": 0.640718400478363, - "learning_rate": 8.768706592146293e-07, - "loss": 0.1864, - "step": 18436 - }, - { - "epoch": 1.7368408657356162, - "grad_norm": 0.609404444694519, - "learning_rate": 8.762524202927281e-07, - "loss": 0.1594, - "step": 18437 - }, - { - "epoch": 1.736935069828784, - "grad_norm": 0.684023380279541, - "learning_rate": 8.756343894063801e-07, - "loss": 0.2187, - "step": 18438 - }, - { - "epoch": 1.737029273921952, - "grad_norm": 0.6988298892974854, - "learning_rate": 8.750165665696797e-07, - "loss": 0.1995, - "step": 18439 - }, - { - "epoch": 1.73712347801512, - "grad_norm": 0.7350949048995972, - "learning_rate": 8.743989517967155e-07, - "loss": 0.1903, - "step": 18440 - }, - { - "epoch": 1.7372176821082876, - "grad_norm": 0.5716772079467773, - "learning_rate": 8.737815451015663e-07, - "loss": 0.1826, - "step": 18441 - }, - { - "epoch": 1.7373118862014554, - "grad_norm": 0.9348627328872681, - "learning_rate": 8.731643464983109e-07, - "loss": 0.1691, - "step": 18442 - }, - { - "epoch": 1.7374060902946233, - "grad_norm": 0.7201164364814758, - "learning_rate": 8.725473560010256e-07, - "loss": 0.2413, - "step": 18443 - }, - { - "epoch": 1.7375002943877913, - "grad_norm": 0.7446562647819519, - "learning_rate": 8.719305736237749e-07, - "loss": 0.1959, - "step": 18444 - }, - { - "epoch": 1.737594498480959, - "grad_norm": 0.6912760734558105, - "learning_rate": 8.713139993806263e-07, - "loss": 0.1982, - "step": 18445 - }, - { - "epoch": 1.7376887025741268, - "grad_norm": 0.6774753928184509, - "learning_rate": 8.706976332856331e-07, - "loss": 0.1638, - "step": 18446 - }, - { - "epoch": 1.7377829066672947, - "grad_norm": 0.6914879083633423, - "learning_rate": 8.700814753528541e-07, - "loss": 0.1588, - "step": 18447 - }, - { - "epoch": 1.7378771107604627, - "grad_norm": 0.63276606798172, - "learning_rate": 8.69465525596338e-07, - "loss": 0.1755, - "step": 18448 - }, - { - "epoch": 1.7379713148536304, - "grad_norm": 0.6125465631484985, - "learning_rate": 8.688497840301269e-07, - "loss": 0.1786, - "step": 18449 - }, - { - "epoch": 1.7380655189467982, - "grad_norm": 1.0072364807128906, - "learning_rate": 8.682342506682629e-07, - "loss": 0.1974, - "step": 18450 - }, - { - "epoch": 1.7381597230399661, - "grad_norm": 0.6694815158843994, - "learning_rate": 8.676189255247814e-07, - "loss": 0.1786, - "step": 18451 - }, - { - "epoch": 1.738253927133134, - "grad_norm": 0.705101728439331, - "learning_rate": 8.670038086137111e-07, - "loss": 0.2359, - "step": 18452 - }, - { - "epoch": 1.7383481312263018, - "grad_norm": 0.6535754203796387, - "learning_rate": 8.663888999490777e-07, - "loss": 0.2002, - "step": 18453 - }, - { - "epoch": 1.7384423353194696, - "grad_norm": 0.7289561629295349, - "learning_rate": 8.657741995449043e-07, - "loss": 0.1815, - "step": 18454 - }, - { - "epoch": 1.7385365394126375, - "grad_norm": 0.6739023327827454, - "learning_rate": 8.651597074152063e-07, - "loss": 0.182, - "step": 18455 - }, - { - "epoch": 1.7386307435058055, - "grad_norm": 0.6043060421943665, - "learning_rate": 8.645454235739903e-07, - "loss": 0.1956, - "step": 18456 - }, - { - "epoch": 1.7387249475989732, - "grad_norm": 0.659800112247467, - "learning_rate": 8.639313480352707e-07, - "loss": 0.1681, - "step": 18457 - }, - { - "epoch": 1.738819151692141, - "grad_norm": 0.6852256059646606, - "learning_rate": 8.633174808130452e-07, - "loss": 0.2143, - "step": 18458 - }, - { - "epoch": 1.738913355785309, - "grad_norm": 0.6718716025352478, - "learning_rate": 8.627038219213102e-07, - "loss": 0.2027, - "step": 18459 - }, - { - "epoch": 1.7390075598784769, - "grad_norm": 0.662808895111084, - "learning_rate": 8.620903713740581e-07, - "loss": 0.1907, - "step": 18460 - }, - { - "epoch": 1.7391017639716446, - "grad_norm": 0.6268913745880127, - "learning_rate": 8.614771291852797e-07, - "loss": 0.1957, - "step": 18461 - }, - { - "epoch": 1.7391959680648124, - "grad_norm": 0.9465031027793884, - "learning_rate": 8.60864095368954e-07, - "loss": 0.2138, - "step": 18462 - }, - { - "epoch": 1.7392901721579803, - "grad_norm": 0.6139705777168274, - "learning_rate": 8.602512699390619e-07, - "loss": 0.1908, - "step": 18463 - }, - { - "epoch": 1.7393843762511483, - "grad_norm": 0.7043342590332031, - "learning_rate": 8.596386529095768e-07, - "loss": 0.1746, - "step": 18464 - }, - { - "epoch": 1.7394785803443158, - "grad_norm": 0.6834427118301392, - "learning_rate": 8.590262442944641e-07, - "loss": 0.1912, - "step": 18465 - }, - { - "epoch": 1.7395727844374838, - "grad_norm": 0.6388500332832336, - "learning_rate": 8.584140441076894e-07, - "loss": 0.1793, - "step": 18466 - }, - { - "epoch": 1.7396669885306517, - "grad_norm": 0.5999411940574646, - "learning_rate": 8.578020523632147e-07, - "loss": 0.158, - "step": 18467 - }, - { - "epoch": 1.7397611926238195, - "grad_norm": 0.6816447377204895, - "learning_rate": 8.5719026907499e-07, - "loss": 0.2044, - "step": 18468 - }, - { - "epoch": 1.7398553967169872, - "grad_norm": 0.6604149341583252, - "learning_rate": 8.565786942569677e-07, - "loss": 0.1973, - "step": 18469 - }, - { - "epoch": 1.7399496008101552, - "grad_norm": 0.7390600442886353, - "learning_rate": 8.559673279230929e-07, - "loss": 0.2278, - "step": 18470 - }, - { - "epoch": 1.740043804903323, - "grad_norm": 0.6293715238571167, - "learning_rate": 8.553561700873026e-07, - "loss": 0.2093, - "step": 18471 - }, - { - "epoch": 1.7401380089964908, - "grad_norm": 0.7075802087783813, - "learning_rate": 8.547452207635332e-07, - "loss": 0.1985, - "step": 18472 - }, - { - "epoch": 1.7402322130896586, - "grad_norm": 0.7181007266044617, - "learning_rate": 8.541344799657192e-07, - "loss": 0.2036, - "step": 18473 - }, - { - "epoch": 1.7403264171828265, - "grad_norm": 0.7272784113883972, - "learning_rate": 8.535239477077827e-07, - "loss": 0.2118, - "step": 18474 - }, - { - "epoch": 1.7404206212759945, - "grad_norm": 0.6046308279037476, - "learning_rate": 8.529136240036439e-07, - "loss": 0.1628, - "step": 18475 - }, - { - "epoch": 1.7405148253691622, - "grad_norm": 0.6928007006645203, - "learning_rate": 8.523035088672215e-07, - "loss": 0.174, - "step": 18476 - }, - { - "epoch": 1.74060902946233, - "grad_norm": 0.66817706823349, - "learning_rate": 8.516936023124267e-07, - "loss": 0.2126, - "step": 18477 - }, - { - "epoch": 1.740703233555498, - "grad_norm": 0.6573317646980286, - "learning_rate": 8.510839043531649e-07, - "loss": 0.2082, - "step": 18478 - }, - { - "epoch": 1.740797437648666, - "grad_norm": 0.674961268901825, - "learning_rate": 8.504744150033395e-07, - "loss": 0.184, - "step": 18479 - }, - { - "epoch": 1.7408916417418336, - "grad_norm": 0.6107397675514221, - "learning_rate": 8.498651342768482e-07, - "loss": 0.1752, - "step": 18480 - }, - { - "epoch": 1.7409858458350014, - "grad_norm": 0.7066890597343445, - "learning_rate": 8.492560621875823e-07, - "loss": 0.1866, - "step": 18481 - }, - { - "epoch": 1.7410800499281693, - "grad_norm": 0.6572678685188293, - "learning_rate": 8.486471987494294e-07, - "loss": 0.1799, - "step": 18482 - }, - { - "epoch": 1.7411742540213373, - "grad_norm": 0.6278140544891357, - "learning_rate": 8.480385439762751e-07, - "loss": 0.1752, - "step": 18483 - }, - { - "epoch": 1.741268458114505, - "grad_norm": 0.7136510014533997, - "learning_rate": 8.474300978819939e-07, - "loss": 0.195, - "step": 18484 - }, - { - "epoch": 1.7413626622076728, - "grad_norm": 0.6753626465797424, - "learning_rate": 8.468218604804624e-07, - "loss": 0.1834, - "step": 18485 - }, - { - "epoch": 1.7414568663008407, - "grad_norm": 0.687878429889679, - "learning_rate": 8.462138317855473e-07, - "loss": 0.1936, - "step": 18486 - }, - { - "epoch": 1.7415510703940087, - "grad_norm": 0.6868911981582642, - "learning_rate": 8.456060118111131e-07, - "loss": 0.1889, - "step": 18487 - }, - { - "epoch": 1.7416452744871764, - "grad_norm": 0.6118377447128296, - "learning_rate": 8.44998400571021e-07, - "loss": 0.1729, - "step": 18488 - }, - { - "epoch": 1.7417394785803442, - "grad_norm": 0.7410522699356079, - "learning_rate": 8.443909980791221e-07, - "loss": 0.1922, - "step": 18489 - }, - { - "epoch": 1.7418336826735121, - "grad_norm": 0.6565917730331421, - "learning_rate": 8.437838043492675e-07, - "loss": 0.1734, - "step": 18490 - }, - { - "epoch": 1.74192788676668, - "grad_norm": 0.6755452752113342, - "learning_rate": 8.431768193953049e-07, - "loss": 0.2046, - "step": 18491 - }, - { - "epoch": 1.7420220908598478, - "grad_norm": 0.6946268081665039, - "learning_rate": 8.425700432310701e-07, - "loss": 0.1965, - "step": 18492 - }, - { - "epoch": 1.7421162949530156, - "grad_norm": 0.6437829732894897, - "learning_rate": 8.419634758704009e-07, - "loss": 0.1781, - "step": 18493 - }, - { - "epoch": 1.7422104990461835, - "grad_norm": 0.7048546671867371, - "learning_rate": 8.413571173271295e-07, - "loss": 0.2046, - "step": 18494 - }, - { - "epoch": 1.7423047031393515, - "grad_norm": 0.6869180202484131, - "learning_rate": 8.407509676150794e-07, - "loss": 0.2087, - "step": 18495 - }, - { - "epoch": 1.7423989072325192, - "grad_norm": 0.6828222274780273, - "learning_rate": 8.401450267480682e-07, - "loss": 0.1926, - "step": 18496 - }, - { - "epoch": 1.742493111325687, - "grad_norm": 0.6646389365196228, - "learning_rate": 8.395392947399205e-07, - "loss": 0.2123, - "step": 18497 - }, - { - "epoch": 1.742587315418855, - "grad_norm": 0.7597160339355469, - "learning_rate": 8.389337716044443e-07, - "loss": 0.1776, - "step": 18498 - }, - { - "epoch": 1.7426815195120229, - "grad_norm": 0.835570216178894, - "learning_rate": 8.38328457355444e-07, - "loss": 0.2219, - "step": 18499 - }, - { - "epoch": 1.7427757236051906, - "grad_norm": 0.661425769329071, - "learning_rate": 8.37723352006724e-07, - "loss": 0.1839, - "step": 18500 - }, - { - "epoch": 1.7428699276983584, - "grad_norm": 0.5716953277587891, - "learning_rate": 8.371184555720824e-07, - "loss": 0.1825, - "step": 18501 - }, - { - "epoch": 1.7429641317915263, - "grad_norm": 0.6040512919425964, - "learning_rate": 8.36513768065309e-07, - "loss": 0.1688, - "step": 18502 - }, - { - "epoch": 1.7430583358846943, - "grad_norm": 0.6377480030059814, - "learning_rate": 8.35909289500193e-07, - "loss": 0.1837, - "step": 18503 - }, - { - "epoch": 1.743152539977862, - "grad_norm": 0.6187005639076233, - "learning_rate": 8.353050198905199e-07, - "loss": 0.1992, - "step": 18504 - }, - { - "epoch": 1.7432467440710298, - "grad_norm": 0.6267953515052795, - "learning_rate": 8.347009592500644e-07, - "loss": 0.153, - "step": 18505 - }, - { - "epoch": 1.7433409481641977, - "grad_norm": 0.6637890934944153, - "learning_rate": 8.340971075926007e-07, - "loss": 0.2132, - "step": 18506 - }, - { - "epoch": 1.7434351522573657, - "grad_norm": 0.6383355259895325, - "learning_rate": 8.334934649319004e-07, - "loss": 0.1964, - "step": 18507 - }, - { - "epoch": 1.7435293563505334, - "grad_norm": 0.6890792846679688, - "learning_rate": 8.328900312817234e-07, - "loss": 0.1932, - "step": 18508 - }, - { - "epoch": 1.7436235604437011, - "grad_norm": 0.7252713441848755, - "learning_rate": 8.32286806655832e-07, - "loss": 0.197, - "step": 18509 - }, - { - "epoch": 1.743717764536869, - "grad_norm": 0.6572889685630798, - "learning_rate": 8.316837910679798e-07, - "loss": 0.1913, - "step": 18510 - }, - { - "epoch": 1.743811968630037, - "grad_norm": 0.6808494925498962, - "learning_rate": 8.310809845319156e-07, - "loss": 0.1929, - "step": 18511 - }, - { - "epoch": 1.7439061727232048, - "grad_norm": 0.6998562812805176, - "learning_rate": 8.304783870613841e-07, - "loss": 0.2131, - "step": 18512 - }, - { - "epoch": 1.7440003768163725, - "grad_norm": 0.6793148517608643, - "learning_rate": 8.298759986701288e-07, - "loss": 0.1951, - "step": 18513 - }, - { - "epoch": 1.7440945809095405, - "grad_norm": 0.693396806716919, - "learning_rate": 8.292738193718819e-07, - "loss": 0.1897, - "step": 18514 - }, - { - "epoch": 1.7441887850027085, - "grad_norm": 0.6428536772727966, - "learning_rate": 8.286718491803736e-07, - "loss": 0.178, - "step": 18515 - }, - { - "epoch": 1.7442829890958762, - "grad_norm": 0.6498225927352905, - "learning_rate": 8.280700881093306e-07, - "loss": 0.2167, - "step": 18516 - }, - { - "epoch": 1.744377193189044, - "grad_norm": 0.7418729662895203, - "learning_rate": 8.274685361724755e-07, - "loss": 0.2288, - "step": 18517 - }, - { - "epoch": 1.744471397282212, - "grad_norm": 0.6538975834846497, - "learning_rate": 8.268671933835226e-07, - "loss": 0.1811, - "step": 18518 - }, - { - "epoch": 1.7445656013753799, - "grad_norm": 0.5824927687644958, - "learning_rate": 8.262660597561833e-07, - "loss": 0.2025, - "step": 18519 - }, - { - "epoch": 1.7446598054685476, - "grad_norm": 0.7858545184135437, - "learning_rate": 8.256651353041678e-07, - "loss": 0.2035, - "step": 18520 - }, - { - "epoch": 1.7447540095617153, - "grad_norm": 0.6475818753242493, - "learning_rate": 8.250644200411739e-07, - "loss": 0.1631, - "step": 18521 - }, - { - "epoch": 1.7448482136548833, - "grad_norm": 0.6249920129776001, - "learning_rate": 8.244639139808997e-07, - "loss": 0.1771, - "step": 18522 - }, - { - "epoch": 1.7449424177480513, - "grad_norm": 0.738472580909729, - "learning_rate": 8.238636171370406e-07, - "loss": 0.1844, - "step": 18523 - }, - { - "epoch": 1.745036621841219, - "grad_norm": 0.7863634824752808, - "learning_rate": 8.232635295232805e-07, - "loss": 0.1848, - "step": 18524 - }, - { - "epoch": 1.7451308259343867, - "grad_norm": 0.606253981590271, - "learning_rate": 8.226636511533059e-07, - "loss": 0.1793, - "step": 18525 - }, - { - "epoch": 1.7452250300275547, - "grad_norm": 0.7103930115699768, - "learning_rate": 8.220639820407917e-07, - "loss": 0.2121, - "step": 18526 - }, - { - "epoch": 1.7453192341207227, - "grad_norm": 0.6445847749710083, - "learning_rate": 8.214645221994122e-07, - "loss": 0.1909, - "step": 18527 - }, - { - "epoch": 1.7454134382138904, - "grad_norm": 0.6096863746643066, - "learning_rate": 8.208652716428378e-07, - "loss": 0.1886, - "step": 18528 - }, - { - "epoch": 1.7455076423070581, - "grad_norm": 0.5972059965133667, - "learning_rate": 8.202662303847298e-07, - "loss": 0.1718, - "step": 18529 - }, - { - "epoch": 1.745601846400226, - "grad_norm": 1.0268930196762085, - "learning_rate": 8.196673984387482e-07, - "loss": 0.2109, - "step": 18530 - }, - { - "epoch": 1.745696050493394, - "grad_norm": 0.6883505582809448, - "learning_rate": 8.1906877581855e-07, - "loss": 0.2144, - "step": 18531 - }, - { - "epoch": 1.7457902545865618, - "grad_norm": 0.6555224657058716, - "learning_rate": 8.184703625377799e-07, - "loss": 0.2136, - "step": 18532 - }, - { - "epoch": 1.7458844586797295, - "grad_norm": 0.6272273659706116, - "learning_rate": 8.178721586100846e-07, - "loss": 0.1943, - "step": 18533 - }, - { - "epoch": 1.7459786627728975, - "grad_norm": 0.6227652430534363, - "learning_rate": 8.172741640491066e-07, - "loss": 0.1994, - "step": 18534 - }, - { - "epoch": 1.7460728668660654, - "grad_norm": 0.6621069312095642, - "learning_rate": 8.166763788684795e-07, - "loss": 0.2011, - "step": 18535 - }, - { - "epoch": 1.7461670709592332, - "grad_norm": 0.6356806755065918, - "learning_rate": 8.1607880308183e-07, - "loss": 0.1858, - "step": 18536 - }, - { - "epoch": 1.746261275052401, - "grad_norm": 0.6337907910346985, - "learning_rate": 8.154814367027897e-07, - "loss": 0.1949, - "step": 18537 - }, - { - "epoch": 1.7463554791455689, - "grad_norm": 0.7005070447921753, - "learning_rate": 8.148842797449774e-07, - "loss": 0.2107, - "step": 18538 - }, - { - "epoch": 1.7464496832387368, - "grad_norm": 0.6095236539840698, - "learning_rate": 8.142873322220057e-07, - "loss": 0.1855, - "step": 18539 - }, - { - "epoch": 1.7465438873319046, - "grad_norm": 0.5911375880241394, - "learning_rate": 8.136905941474904e-07, - "loss": 0.1703, - "step": 18540 - }, - { - "epoch": 1.7466380914250723, - "grad_norm": 0.8228804469108582, - "learning_rate": 8.130940655350372e-07, - "loss": 0.2276, - "step": 18541 - }, - { - "epoch": 1.7467322955182403, - "grad_norm": 0.636143684387207, - "learning_rate": 8.124977463982453e-07, - "loss": 0.1966, - "step": 18542 - }, - { - "epoch": 1.7468264996114082, - "grad_norm": 0.6936721205711365, - "learning_rate": 8.119016367507138e-07, - "loss": 0.1829, - "step": 18543 - }, - { - "epoch": 1.746920703704576, - "grad_norm": 0.707241952419281, - "learning_rate": 8.113057366060362e-07, - "loss": 0.191, - "step": 18544 - }, - { - "epoch": 1.7470149077977437, - "grad_norm": 0.7110002636909485, - "learning_rate": 8.107100459777972e-07, - "loss": 0.1824, - "step": 18545 - }, - { - "epoch": 1.7471091118909117, - "grad_norm": 0.6631872057914734, - "learning_rate": 8.101145648795805e-07, - "loss": 0.1857, - "step": 18546 - }, - { - "epoch": 1.7472033159840796, - "grad_norm": 0.6365458369255066, - "learning_rate": 8.095192933249652e-07, - "loss": 0.2156, - "step": 18547 - }, - { - "epoch": 1.7472975200772474, - "grad_norm": 0.7477833032608032, - "learning_rate": 8.089242313275226e-07, - "loss": 0.1915, - "step": 18548 - }, - { - "epoch": 1.747391724170415, - "grad_norm": 0.728469967842102, - "learning_rate": 8.083293789008218e-07, - "loss": 0.168, - "step": 18549 - }, - { - "epoch": 1.747485928263583, - "grad_norm": 0.6091916561126709, - "learning_rate": 8.077347360584275e-07, - "loss": 0.1767, - "step": 18550 - }, - { - "epoch": 1.747580132356751, - "grad_norm": 0.6055939197540283, - "learning_rate": 8.071403028138969e-07, - "loss": 0.1793, - "step": 18551 - }, - { - "epoch": 1.7476743364499188, - "grad_norm": 0.6508654952049255, - "learning_rate": 8.065460791807822e-07, - "loss": 0.1848, - "step": 18552 - }, - { - "epoch": 1.7477685405430865, - "grad_norm": 0.604834794998169, - "learning_rate": 8.059520651726371e-07, - "loss": 0.1698, - "step": 18553 - }, - { - "epoch": 1.7478627446362545, - "grad_norm": 0.6898211240768433, - "learning_rate": 8.053582608030041e-07, - "loss": 0.1983, - "step": 18554 - }, - { - "epoch": 1.7479569487294224, - "grad_norm": 0.5613889694213867, - "learning_rate": 8.047646660854213e-07, - "loss": 0.185, - "step": 18555 - }, - { - "epoch": 1.7480511528225902, - "grad_norm": 0.6882143020629883, - "learning_rate": 8.041712810334245e-07, - "loss": 0.1821, - "step": 18556 - }, - { - "epoch": 1.748145356915758, - "grad_norm": 0.6715866327285767, - "learning_rate": 8.035781056605463e-07, - "loss": 0.1931, - "step": 18557 - }, - { - "epoch": 1.7482395610089259, - "grad_norm": 0.6297674775123596, - "learning_rate": 8.029851399803068e-07, - "loss": 0.1757, - "step": 18558 - }, - { - "epoch": 1.7483337651020938, - "grad_norm": 0.6968896389007568, - "learning_rate": 8.023923840062309e-07, - "loss": 0.1816, - "step": 18559 - }, - { - "epoch": 1.7484279691952616, - "grad_norm": 0.6847019195556641, - "learning_rate": 8.017998377518343e-07, - "loss": 0.1755, - "step": 18560 - }, - { - "epoch": 1.7485221732884293, - "grad_norm": 0.6652508974075317, - "learning_rate": 8.012075012306253e-07, - "loss": 0.1884, - "step": 18561 - }, - { - "epoch": 1.7486163773815973, - "grad_norm": 0.7000791430473328, - "learning_rate": 8.006153744561107e-07, - "loss": 0.1869, - "step": 18562 - }, - { - "epoch": 1.7487105814747652, - "grad_norm": 0.6731979846954346, - "learning_rate": 8.000234574417954e-07, - "loss": 0.1896, - "step": 18563 - }, - { - "epoch": 1.748804785567933, - "grad_norm": 0.6780688166618347, - "learning_rate": 7.994317502011705e-07, - "loss": 0.1945, - "step": 18564 - }, - { - "epoch": 1.7488989896611007, - "grad_norm": 0.6443861722946167, - "learning_rate": 7.988402527477335e-07, - "loss": 0.1815, - "step": 18565 - }, - { - "epoch": 1.7489931937542686, - "grad_norm": 0.6712549924850464, - "learning_rate": 7.982489650949654e-07, - "loss": 0.2053, - "step": 18566 - }, - { - "epoch": 1.7490873978474366, - "grad_norm": 0.6732940077781677, - "learning_rate": 7.976578872563534e-07, - "loss": 0.2093, - "step": 18567 - }, - { - "epoch": 1.7491816019406043, - "grad_norm": 0.6036709547042847, - "learning_rate": 7.970670192453733e-07, - "loss": 0.1926, - "step": 18568 - }, - { - "epoch": 1.749275806033772, - "grad_norm": 0.8076704740524292, - "learning_rate": 7.964763610754978e-07, - "loss": 0.1927, - "step": 18569 - }, - { - "epoch": 1.74937001012694, - "grad_norm": 0.831139326095581, - "learning_rate": 7.958859127601937e-07, - "loss": 0.1764, - "step": 18570 - }, - { - "epoch": 1.749464214220108, - "grad_norm": 0.9794051051139832, - "learning_rate": 7.95295674312927e-07, - "loss": 0.1869, - "step": 18571 - }, - { - "epoch": 1.7495584183132757, - "grad_norm": 0.6633066534996033, - "learning_rate": 7.947056457471524e-07, - "loss": 0.1751, - "step": 18572 - }, - { - "epoch": 1.7496526224064435, - "grad_norm": 0.6702530980110168, - "learning_rate": 7.941158270763261e-07, - "loss": 0.2074, - "step": 18573 - }, - { - "epoch": 1.7497468264996114, - "grad_norm": 0.6720075011253357, - "learning_rate": 7.93526218313897e-07, - "loss": 0.1741, - "step": 18574 - }, - { - "epoch": 1.7498410305927794, - "grad_norm": 0.6878265738487244, - "learning_rate": 7.929368194733089e-07, - "loss": 0.2038, - "step": 18575 - }, - { - "epoch": 1.7499352346859471, - "grad_norm": 0.6586441397666931, - "learning_rate": 7.923476305679977e-07, - "loss": 0.1867, - "step": 18576 - }, - { - "epoch": 1.7500294387791149, - "grad_norm": 0.8241437077522278, - "learning_rate": 7.917586516114007e-07, - "loss": 0.1837, - "step": 18577 - }, - { - "epoch": 1.7501236428722828, - "grad_norm": 0.6873905062675476, - "learning_rate": 7.911698826169501e-07, - "loss": 0.195, - "step": 18578 - }, - { - "epoch": 1.7502178469654508, - "grad_norm": 0.5969401597976685, - "learning_rate": 7.905813235980653e-07, - "loss": 0.1788, - "step": 18579 - }, - { - "epoch": 1.7503120510586185, - "grad_norm": 0.8423927426338196, - "learning_rate": 7.899929745681689e-07, - "loss": 0.2348, - "step": 18580 - }, - { - "epoch": 1.7504062551517863, - "grad_norm": 0.6558899283409119, - "learning_rate": 7.89404835540678e-07, - "loss": 0.2, - "step": 18581 - }, - { - "epoch": 1.7505004592449542, - "grad_norm": 0.6926812529563904, - "learning_rate": 7.888169065289997e-07, - "loss": 0.2119, - "step": 18582 - }, - { - "epoch": 1.7505946633381222, - "grad_norm": 0.6233304738998413, - "learning_rate": 7.882291875465408e-07, - "loss": 0.1665, - "step": 18583 - }, - { - "epoch": 1.75068886743129, - "grad_norm": 0.6532228589057922, - "learning_rate": 7.876416786067053e-07, - "loss": 0.2214, - "step": 18584 - }, - { - "epoch": 1.7507830715244577, - "grad_norm": 0.7344880700111389, - "learning_rate": 7.870543797228847e-07, - "loss": 0.2136, - "step": 18585 - }, - { - "epoch": 1.7508772756176256, - "grad_norm": 0.6469510197639465, - "learning_rate": 7.864672909084714e-07, - "loss": 0.1973, - "step": 18586 - }, - { - "epoch": 1.7509714797107936, - "grad_norm": 0.5995925068855286, - "learning_rate": 7.85880412176856e-07, - "loss": 0.1886, - "step": 18587 - }, - { - "epoch": 1.7510656838039613, - "grad_norm": 0.6568569540977478, - "learning_rate": 7.852937435414143e-07, - "loss": 0.1921, - "step": 18588 - }, - { - "epoch": 1.751159887897129, - "grad_norm": 0.6559253931045532, - "learning_rate": 7.847072850155268e-07, - "loss": 0.1909, - "step": 18589 - }, - { - "epoch": 1.751254091990297, - "grad_norm": 0.6460072994232178, - "learning_rate": 7.841210366125662e-07, - "loss": 0.1973, - "step": 18590 - }, - { - "epoch": 1.751348296083465, - "grad_norm": 0.7013749480247498, - "learning_rate": 7.835349983458996e-07, - "loss": 0.2206, - "step": 18591 - }, - { - "epoch": 1.7514425001766327, - "grad_norm": 0.6947945356369019, - "learning_rate": 7.829491702288839e-07, - "loss": 0.2032, - "step": 18592 - }, - { - "epoch": 1.7515367042698005, - "grad_norm": 0.6729248762130737, - "learning_rate": 7.823635522748851e-07, - "loss": 0.222, - "step": 18593 - }, - { - "epoch": 1.7516309083629684, - "grad_norm": 0.7111519575119019, - "learning_rate": 7.817781444972528e-07, - "loss": 0.2069, - "step": 18594 - }, - { - "epoch": 1.7517251124561364, - "grad_norm": 0.6821576952934265, - "learning_rate": 7.811929469093338e-07, - "loss": 0.21, - "step": 18595 - }, - { - "epoch": 1.7518193165493041, - "grad_norm": 0.6710083484649658, - "learning_rate": 7.806079595244731e-07, - "loss": 0.1811, - "step": 18596 - }, - { - "epoch": 1.7519135206424719, - "grad_norm": 0.6114735007286072, - "learning_rate": 7.8002318235601e-07, - "loss": 0.1854, - "step": 18597 - }, - { - "epoch": 1.7520077247356398, - "grad_norm": 0.759047269821167, - "learning_rate": 7.79438615417275e-07, - "loss": 0.2022, - "step": 18598 - }, - { - "epoch": 1.7521019288288078, - "grad_norm": 0.6617277264595032, - "learning_rate": 7.788542587216008e-07, - "loss": 0.1865, - "step": 18599 - }, - { - "epoch": 1.7521961329219755, - "grad_norm": 0.6306414008140564, - "learning_rate": 7.782701122823111e-07, - "loss": 0.2017, - "step": 18600 - }, - { - "epoch": 1.7522903370151433, - "grad_norm": 0.7386486530303955, - "learning_rate": 7.776861761127231e-07, - "loss": 0.1761, - "step": 18601 - }, - { - "epoch": 1.7523845411083112, - "grad_norm": 0.6602556109428406, - "learning_rate": 7.771024502261526e-07, - "loss": 0.2193, - "step": 18602 - }, - { - "epoch": 1.752478745201479, - "grad_norm": 0.7331061363220215, - "learning_rate": 7.765189346359114e-07, - "loss": 0.2026, - "step": 18603 - }, - { - "epoch": 1.7525729492946467, - "grad_norm": 0.7011824250221252, - "learning_rate": 7.759356293553011e-07, - "loss": 0.1867, - "step": 18604 - }, - { - "epoch": 1.7526671533878146, - "grad_norm": 0.8126789927482605, - "learning_rate": 7.753525343976265e-07, - "loss": 0.1952, - "step": 18605 - }, - { - "epoch": 1.7527613574809826, - "grad_norm": 0.6619715094566345, - "learning_rate": 7.747696497761781e-07, - "loss": 0.1993, - "step": 18606 - }, - { - "epoch": 1.7528555615741503, - "grad_norm": 0.6641839146614075, - "learning_rate": 7.741869755042486e-07, - "loss": 0.2053, - "step": 18607 - }, - { - "epoch": 1.752949765667318, - "grad_norm": 0.7459710836410522, - "learning_rate": 7.736045115951252e-07, - "loss": 0.2031, - "step": 18608 - }, - { - "epoch": 1.753043969760486, - "grad_norm": 0.6663244366645813, - "learning_rate": 7.73022258062086e-07, - "loss": 0.1704, - "step": 18609 - }, - { - "epoch": 1.753138173853654, - "grad_norm": 0.6225571036338806, - "learning_rate": 7.724402149184107e-07, - "loss": 0.1868, - "step": 18610 - }, - { - "epoch": 1.7532323779468217, - "grad_norm": 0.572202205657959, - "learning_rate": 7.718583821773695e-07, - "loss": 0.1762, - "step": 18611 - }, - { - "epoch": 1.7533265820399895, - "grad_norm": 0.6788207292556763, - "learning_rate": 7.712767598522275e-07, - "loss": 0.184, - "step": 18612 - }, - { - "epoch": 1.7534207861331574, - "grad_norm": 0.6373607516288757, - "learning_rate": 7.706953479562473e-07, - "loss": 0.1987, - "step": 18613 - }, - { - "epoch": 1.7535149902263254, - "grad_norm": 0.6726533770561218, - "learning_rate": 7.701141465026896e-07, - "loss": 0.1733, - "step": 18614 - }, - { - "epoch": 1.7536091943194931, - "grad_norm": 0.762245237827301, - "learning_rate": 7.695331555048024e-07, - "loss": 0.2021, - "step": 18615 - }, - { - "epoch": 1.7537033984126609, - "grad_norm": 0.6375834941864014, - "learning_rate": 7.689523749758332e-07, - "loss": 0.1786, - "step": 18616 - }, - { - "epoch": 1.7537976025058288, - "grad_norm": 0.6688347458839417, - "learning_rate": 7.683718049290267e-07, - "loss": 0.2117, - "step": 18617 - }, - { - "epoch": 1.7538918065989968, - "grad_norm": 0.6413112878799438, - "learning_rate": 7.677914453776203e-07, - "loss": 0.1768, - "step": 18618 - }, - { - "epoch": 1.7539860106921645, - "grad_norm": 0.5684576034545898, - "learning_rate": 7.672112963348466e-07, - "loss": 0.1615, - "step": 18619 - }, - { - "epoch": 1.7540802147853323, - "grad_norm": 0.6422943472862244, - "learning_rate": 7.666313578139328e-07, - "loss": 0.1704, - "step": 18620 - }, - { - "epoch": 1.7541744188785002, - "grad_norm": 0.6456118226051331, - "learning_rate": 7.660516298281062e-07, - "loss": 0.1861, - "step": 18621 - }, - { - "epoch": 1.7542686229716682, - "grad_norm": 0.6674378514289856, - "learning_rate": 7.654721123905806e-07, - "loss": 0.203, - "step": 18622 - }, - { - "epoch": 1.754362827064836, - "grad_norm": 0.6868914365768433, - "learning_rate": 7.648928055145733e-07, - "loss": 0.2268, - "step": 18623 - }, - { - "epoch": 1.7544570311580037, - "grad_norm": 0.7327835559844971, - "learning_rate": 7.643137092132935e-07, - "loss": 0.203, - "step": 18624 - }, - { - "epoch": 1.7545512352511716, - "grad_norm": 0.7301385998725891, - "learning_rate": 7.637348234999431e-07, - "loss": 0.2093, - "step": 18625 - }, - { - "epoch": 1.7546454393443396, - "grad_norm": 0.6672301888465881, - "learning_rate": 7.631561483877226e-07, - "loss": 0.1895, - "step": 18626 - }, - { - "epoch": 1.7547396434375073, - "grad_norm": 0.7064858675003052, - "learning_rate": 7.62577683889828e-07, - "loss": 0.1912, - "step": 18627 - }, - { - "epoch": 1.754833847530675, - "grad_norm": 0.6245591044425964, - "learning_rate": 7.619994300194478e-07, - "loss": 0.1825, - "step": 18628 - }, - { - "epoch": 1.754928051623843, - "grad_norm": 0.6343905925750732, - "learning_rate": 7.614213867897668e-07, - "loss": 0.2124, - "step": 18629 - }, - { - "epoch": 1.755022255717011, - "grad_norm": 0.646212100982666, - "learning_rate": 7.60843554213967e-07, - "loss": 0.1878, - "step": 18630 - }, - { - "epoch": 1.7551164598101787, - "grad_norm": 0.6522904634475708, - "learning_rate": 7.602659323052231e-07, - "loss": 0.1724, - "step": 18631 - }, - { - "epoch": 1.7552106639033465, - "grad_norm": 0.6088020205497742, - "learning_rate": 7.596885210767024e-07, - "loss": 0.1728, - "step": 18632 - }, - { - "epoch": 1.7553048679965144, - "grad_norm": 0.6309208869934082, - "learning_rate": 7.591113205415779e-07, - "loss": 0.1772, - "step": 18633 - }, - { - "epoch": 1.7553990720896824, - "grad_norm": 0.6297697424888611, - "learning_rate": 7.585343307130055e-07, - "loss": 0.176, - "step": 18634 - }, - { - "epoch": 1.7554932761828501, - "grad_norm": 0.5904176235198975, - "learning_rate": 7.579575516041415e-07, - "loss": 0.1702, - "step": 18635 - }, - { - "epoch": 1.7555874802760179, - "grad_norm": 0.610162079334259, - "learning_rate": 7.573809832281376e-07, - "loss": 0.1641, - "step": 18636 - }, - { - "epoch": 1.7556816843691858, - "grad_norm": 0.662539541721344, - "learning_rate": 7.568046255981432e-07, - "loss": 0.1811, - "step": 18637 - }, - { - "epoch": 1.7557758884623538, - "grad_norm": 0.6001244187355042, - "learning_rate": 7.562284787272978e-07, - "loss": 0.1746, - "step": 18638 - }, - { - "epoch": 1.7558700925555215, - "grad_norm": 0.6731216907501221, - "learning_rate": 7.556525426287376e-07, - "loss": 0.2095, - "step": 18639 - }, - { - "epoch": 1.7559642966486892, - "grad_norm": 0.6648710370063782, - "learning_rate": 7.550768173155975e-07, - "loss": 0.1788, - "step": 18640 - }, - { - "epoch": 1.7560585007418572, - "grad_norm": 0.6438722610473633, - "learning_rate": 7.545013028010028e-07, - "loss": 0.1609, - "step": 18641 - }, - { - "epoch": 1.7561527048350252, - "grad_norm": 0.7448234558105469, - "learning_rate": 7.539259990980763e-07, - "loss": 0.2226, - "step": 18642 - }, - { - "epoch": 1.756246908928193, - "grad_norm": 0.6625121831893921, - "learning_rate": 7.533509062199384e-07, - "loss": 0.1822, - "step": 18643 - }, - { - "epoch": 1.7563411130213606, - "grad_norm": 0.8268623352050781, - "learning_rate": 7.527760241796978e-07, - "loss": 0.2109, - "step": 18644 - }, - { - "epoch": 1.7564353171145286, - "grad_norm": 0.677430272102356, - "learning_rate": 7.522013529904671e-07, - "loss": 0.2259, - "step": 18645 - }, - { - "epoch": 1.7565295212076966, - "grad_norm": 0.6224123239517212, - "learning_rate": 7.516268926653458e-07, - "loss": 0.1739, - "step": 18646 - }, - { - "epoch": 1.7566237253008643, - "grad_norm": 0.6237516403198242, - "learning_rate": 7.510526432174336e-07, - "loss": 0.1794, - "step": 18647 - }, - { - "epoch": 1.756717929394032, - "grad_norm": 0.611696183681488, - "learning_rate": 7.504786046598278e-07, - "loss": 0.1806, - "step": 18648 - }, - { - "epoch": 1.7568121334872, - "grad_norm": 0.7928467988967896, - "learning_rate": 7.499047770056123e-07, - "loss": 0.2075, - "step": 18649 - }, - { - "epoch": 1.756906337580368, - "grad_norm": 0.6605842709541321, - "learning_rate": 7.493311602678732e-07, - "loss": 0.2053, - "step": 18650 - }, - { - "epoch": 1.7570005416735357, - "grad_norm": 0.6715100407600403, - "learning_rate": 7.487577544596913e-07, - "loss": 0.19, - "step": 18651 - }, - { - "epoch": 1.7570947457667034, - "grad_norm": 0.741716742515564, - "learning_rate": 7.481845595941383e-07, - "loss": 0.2028, - "step": 18652 - }, - { - "epoch": 1.7571889498598714, - "grad_norm": 0.6929696798324585, - "learning_rate": 7.476115756842861e-07, - "loss": 0.2194, - "step": 18653 - }, - { - "epoch": 1.7572831539530394, - "grad_norm": 0.6631684303283691, - "learning_rate": 7.470388027432007e-07, - "loss": 0.2077, - "step": 18654 - }, - { - "epoch": 1.757377358046207, - "grad_norm": 0.6561174392700195, - "learning_rate": 7.464662407839408e-07, - "loss": 0.1861, - "step": 18655 - }, - { - "epoch": 1.7574715621393748, - "grad_norm": 0.6470728516578674, - "learning_rate": 7.458938898195601e-07, - "loss": 0.1884, - "step": 18656 - }, - { - "epoch": 1.7575657662325428, - "grad_norm": 0.6355688571929932, - "learning_rate": 7.453217498631093e-07, - "loss": 0.2058, - "step": 18657 - }, - { - "epoch": 1.7576599703257108, - "grad_norm": 0.7344101667404175, - "learning_rate": 7.447498209276382e-07, - "loss": 0.1986, - "step": 18658 - }, - { - "epoch": 1.7577541744188785, - "grad_norm": 0.6645457148551941, - "learning_rate": 7.441781030261819e-07, - "loss": 0.1789, - "step": 18659 - }, - { - "epoch": 1.7578483785120462, - "grad_norm": 0.6591945886611938, - "learning_rate": 7.436065961717797e-07, - "loss": 0.2197, - "step": 18660 - }, - { - "epoch": 1.7579425826052142, - "grad_norm": 0.6482409238815308, - "learning_rate": 7.430353003774638e-07, - "loss": 0.1952, - "step": 18661 - }, - { - "epoch": 1.7580367866983821, - "grad_norm": 0.7033007144927979, - "learning_rate": 7.424642156562578e-07, - "loss": 0.1828, - "step": 18662 - }, - { - "epoch": 1.7581309907915499, - "grad_norm": 0.6775185465812683, - "learning_rate": 7.418933420211849e-07, - "loss": 0.2068, - "step": 18663 - }, - { - "epoch": 1.7582251948847176, - "grad_norm": 0.6388871669769287, - "learning_rate": 7.413226794852635e-07, - "loss": 0.1761, - "step": 18664 - }, - { - "epoch": 1.7583193989778856, - "grad_norm": 0.626383900642395, - "learning_rate": 7.40752228061502e-07, - "loss": 0.187, - "step": 18665 - }, - { - "epoch": 1.7584136030710535, - "grad_norm": 0.6561257243156433, - "learning_rate": 7.40181987762909e-07, - "loss": 0.1919, - "step": 18666 - }, - { - "epoch": 1.7585078071642213, - "grad_norm": 0.6641181111335754, - "learning_rate": 7.396119586024897e-07, - "loss": 0.2131, - "step": 18667 - }, - { - "epoch": 1.758602011257389, - "grad_norm": 0.5611432790756226, - "learning_rate": 7.39042140593238e-07, - "loss": 0.1754, - "step": 18668 - }, - { - "epoch": 1.758696215350557, - "grad_norm": 0.6419809460639954, - "learning_rate": 7.384725337481458e-07, - "loss": 0.2138, - "step": 18669 - }, - { - "epoch": 1.758790419443725, - "grad_norm": 0.6619876623153687, - "learning_rate": 7.37903138080206e-07, - "loss": 0.2042, - "step": 18670 - }, - { - "epoch": 1.7588846235368927, - "grad_norm": 0.6841108202934265, - "learning_rate": 7.373339536023982e-07, - "loss": 0.1991, - "step": 18671 - }, - { - "epoch": 1.7589788276300604, - "grad_norm": 0.6848726272583008, - "learning_rate": 7.367649803276988e-07, - "loss": 0.1862, - "step": 18672 - }, - { - "epoch": 1.7590730317232284, - "grad_norm": 0.6747719645500183, - "learning_rate": 7.361962182690863e-07, - "loss": 0.1852, - "step": 18673 - }, - { - "epoch": 1.7591672358163963, - "grad_norm": 0.6555435657501221, - "learning_rate": 7.356276674395269e-07, - "loss": 0.1659, - "step": 18674 - }, - { - "epoch": 1.759261439909564, - "grad_norm": 0.7199277281761169, - "learning_rate": 7.350593278519824e-07, - "loss": 0.2054, - "step": 18675 - }, - { - "epoch": 1.7593556440027318, - "grad_norm": 0.6618119478225708, - "learning_rate": 7.344911995194149e-07, - "loss": 0.1674, - "step": 18676 - }, - { - "epoch": 1.7594498480958998, - "grad_norm": 0.7032886147499084, - "learning_rate": 7.339232824547782e-07, - "loss": 0.2016, - "step": 18677 - }, - { - "epoch": 1.7595440521890677, - "grad_norm": 0.692939281463623, - "learning_rate": 7.333555766710188e-07, - "loss": 0.2168, - "step": 18678 - }, - { - "epoch": 1.7596382562822355, - "grad_norm": 0.6232210397720337, - "learning_rate": 7.32788082181084e-07, - "loss": 0.1994, - "step": 18679 - }, - { - "epoch": 1.7597324603754032, - "grad_norm": 0.6903019547462463, - "learning_rate": 7.322207989979146e-07, - "loss": 0.2051, - "step": 18680 - }, - { - "epoch": 1.7598266644685712, - "grad_norm": 0.8852872252464294, - "learning_rate": 7.316537271344426e-07, - "loss": 0.1994, - "step": 18681 - }, - { - "epoch": 1.7599208685617391, - "grad_norm": 0.638813316822052, - "learning_rate": 7.310868666035986e-07, - "loss": 0.1615, - "step": 18682 - }, - { - "epoch": 1.7600150726549069, - "grad_norm": 0.6534923911094666, - "learning_rate": 7.305202174183112e-07, - "loss": 0.1899, - "step": 18683 - }, - { - "epoch": 1.7601092767480746, - "grad_norm": 0.7521393299102783, - "learning_rate": 7.299537795914958e-07, - "loss": 0.2198, - "step": 18684 - }, - { - "epoch": 1.7602034808412426, - "grad_norm": 0.6901976466178894, - "learning_rate": 7.293875531360728e-07, - "loss": 0.1765, - "step": 18685 - }, - { - "epoch": 1.7602976849344105, - "grad_norm": 0.6153367161750793, - "learning_rate": 7.2882153806495e-07, - "loss": 0.1736, - "step": 18686 - }, - { - "epoch": 1.7603918890275783, - "grad_norm": 0.7112525701522827, - "learning_rate": 7.282557343910335e-07, - "loss": 0.2419, - "step": 18687 - }, - { - "epoch": 1.760486093120746, - "grad_norm": 0.6666744947433472, - "learning_rate": 7.276901421272264e-07, - "loss": 0.1875, - "step": 18688 - }, - { - "epoch": 1.760580297213914, - "grad_norm": 0.5716570019721985, - "learning_rate": 7.27124761286423e-07, - "loss": 0.1697, - "step": 18689 - }, - { - "epoch": 1.760674501307082, - "grad_norm": 0.6441033482551575, - "learning_rate": 7.265595918815149e-07, - "loss": 0.1911, - "step": 18690 - }, - { - "epoch": 1.7607687054002497, - "grad_norm": 0.7376765608787537, - "learning_rate": 7.25994633925392e-07, - "loss": 0.1789, - "step": 18691 - }, - { - "epoch": 1.7608629094934174, - "grad_norm": 0.5842069387435913, - "learning_rate": 7.254298874309328e-07, - "loss": 0.1747, - "step": 18692 - }, - { - "epoch": 1.7609571135865854, - "grad_norm": 0.7579582333564758, - "learning_rate": 7.248653524110172e-07, - "loss": 0.2121, - "step": 18693 - }, - { - "epoch": 1.7610513176797533, - "grad_norm": 0.6068270802497864, - "learning_rate": 7.243010288785135e-07, - "loss": 0.1719, - "step": 18694 - }, - { - "epoch": 1.761145521772921, - "grad_norm": 0.6542767882347107, - "learning_rate": 7.237369168462937e-07, - "loss": 0.1911, - "step": 18695 - }, - { - "epoch": 1.7612397258660888, - "grad_norm": 0.6807528734207153, - "learning_rate": 7.231730163272166e-07, - "loss": 0.1853, - "step": 18696 - }, - { - "epoch": 1.7613339299592567, - "grad_norm": 0.6296195387840271, - "learning_rate": 7.226093273341406e-07, - "loss": 0.1906, - "step": 18697 - }, - { - "epoch": 1.7614281340524247, - "grad_norm": 0.6691365242004395, - "learning_rate": 7.220458498799221e-07, - "loss": 0.1941, - "step": 18698 - }, - { - "epoch": 1.7615223381455924, - "grad_norm": 0.6610446572303772, - "learning_rate": 7.214825839774053e-07, - "loss": 0.1996, - "step": 18699 - }, - { - "epoch": 1.7616165422387602, - "grad_norm": 0.8709343671798706, - "learning_rate": 7.209195296394356e-07, - "loss": 0.2024, - "step": 18700 - }, - { - "epoch": 1.7617107463319281, - "grad_norm": 0.7031115293502808, - "learning_rate": 7.203566868788514e-07, - "loss": 0.1927, - "step": 18701 - }, - { - "epoch": 1.761804950425096, - "grad_norm": 0.662255585193634, - "learning_rate": 7.197940557084848e-07, - "loss": 0.2048, - "step": 18702 - }, - { - "epoch": 1.7618991545182638, - "grad_norm": 0.7295622825622559, - "learning_rate": 7.192316361411666e-07, - "loss": 0.2029, - "step": 18703 - }, - { - "epoch": 1.7619933586114316, - "grad_norm": 0.5789169073104858, - "learning_rate": 7.18669428189721e-07, - "loss": 0.159, - "step": 18704 - }, - { - "epoch": 1.7620875627045995, - "grad_norm": 0.627537190914154, - "learning_rate": 7.181074318669645e-07, - "loss": 0.1804, - "step": 18705 - }, - { - "epoch": 1.7621817667977675, - "grad_norm": 0.6640955209732056, - "learning_rate": 7.175456471857134e-07, - "loss": 0.1793, - "step": 18706 - }, - { - "epoch": 1.7622759708909352, - "grad_norm": 0.6473426818847656, - "learning_rate": 7.169840741587797e-07, - "loss": 0.2012, - "step": 18707 - }, - { - "epoch": 1.762370174984103, - "grad_norm": 0.6217076182365417, - "learning_rate": 7.164227127989643e-07, - "loss": 0.1752, - "step": 18708 - }, - { - "epoch": 1.762464379077271, - "grad_norm": 0.6201004385948181, - "learning_rate": 7.158615631190657e-07, - "loss": 0.1701, - "step": 18709 - }, - { - "epoch": 1.762558583170439, - "grad_norm": 0.6264468431472778, - "learning_rate": 7.15300625131885e-07, - "loss": 0.1692, - "step": 18710 - }, - { - "epoch": 1.7626527872636066, - "grad_norm": 0.6493064165115356, - "learning_rate": 7.147398988502086e-07, - "loss": 0.1864, - "step": 18711 - }, - { - "epoch": 1.7627469913567744, - "grad_norm": 0.8641001582145691, - "learning_rate": 7.141793842868194e-07, - "loss": 0.1627, - "step": 18712 - }, - { - "epoch": 1.7628411954499423, - "grad_norm": 0.6677326560020447, - "learning_rate": 7.136190814545052e-07, - "loss": 0.2123, - "step": 18713 - }, - { - "epoch": 1.7629353995431103, - "grad_norm": 0.5769965648651123, - "learning_rate": 7.130589903660368e-07, - "loss": 0.1812, - "step": 18714 - }, - { - "epoch": 1.763029603636278, - "grad_norm": 0.663173258304596, - "learning_rate": 7.124991110341839e-07, - "loss": 0.1851, - "step": 18715 - }, - { - "epoch": 1.7631238077294458, - "grad_norm": 0.6828328371047974, - "learning_rate": 7.119394434717152e-07, - "loss": 0.2102, - "step": 18716 - }, - { - "epoch": 1.7632180118226137, - "grad_norm": 0.7221760749816895, - "learning_rate": 7.11379987691393e-07, - "loss": 0.1936, - "step": 18717 - }, - { - "epoch": 1.7633122159157817, - "grad_norm": 0.6994985342025757, - "learning_rate": 7.10820743705971e-07, - "loss": 0.2159, - "step": 18718 - }, - { - "epoch": 1.7634064200089494, - "grad_norm": 0.663030743598938, - "learning_rate": 7.102617115282018e-07, - "loss": 0.1965, - "step": 18719 - }, - { - "epoch": 1.7635006241021172, - "grad_norm": 0.6177852749824524, - "learning_rate": 7.097028911708337e-07, - "loss": 0.1795, - "step": 18720 - }, - { - "epoch": 1.7635948281952851, - "grad_norm": 0.6149967312812805, - "learning_rate": 7.091442826466055e-07, - "loss": 0.2113, - "step": 18721 - }, - { - "epoch": 1.763689032288453, - "grad_norm": 0.6708802580833435, - "learning_rate": 7.08585885968257e-07, - "loss": 0.2216, - "step": 18722 - }, - { - "epoch": 1.7637832363816208, - "grad_norm": 0.7028321623802185, - "learning_rate": 7.080277011485204e-07, - "loss": 0.1943, - "step": 18723 - }, - { - "epoch": 1.7638774404747886, - "grad_norm": 0.6543398499488831, - "learning_rate": 7.074697282001219e-07, - "loss": 0.2065, - "step": 18724 - }, - { - "epoch": 1.7639716445679565, - "grad_norm": 0.6498114466667175, - "learning_rate": 7.06911967135786e-07, - "loss": 0.2022, - "step": 18725 - }, - { - "epoch": 1.7640658486611245, - "grad_norm": 0.6824131011962891, - "learning_rate": 7.06354417968228e-07, - "loss": 0.1957, - "step": 18726 - }, - { - "epoch": 1.7641600527542922, - "grad_norm": 0.5951253771781921, - "learning_rate": 7.057970807101621e-07, - "loss": 0.1615, - "step": 18727 - }, - { - "epoch": 1.76425425684746, - "grad_norm": 0.6944735050201416, - "learning_rate": 7.052399553742972e-07, - "loss": 0.1955, - "step": 18728 - }, - { - "epoch": 1.764348460940628, - "grad_norm": 0.684751570224762, - "learning_rate": 7.046830419733353e-07, - "loss": 0.1782, - "step": 18729 - }, - { - "epoch": 1.7644426650337959, - "grad_norm": 0.7559695839881897, - "learning_rate": 7.04126340519975e-07, - "loss": 0.2014, - "step": 18730 - }, - { - "epoch": 1.7645368691269636, - "grad_norm": 0.5982239246368408, - "learning_rate": 7.035698510269129e-07, - "loss": 0.1676, - "step": 18731 - }, - { - "epoch": 1.7646310732201314, - "grad_norm": 0.6184801459312439, - "learning_rate": 7.030135735068333e-07, - "loss": 0.1653, - "step": 18732 - }, - { - "epoch": 1.7647252773132993, - "grad_norm": 0.6522563099861145, - "learning_rate": 7.024575079724239e-07, - "loss": 0.188, - "step": 18733 - }, - { - "epoch": 1.7648194814064673, - "grad_norm": 0.6570615768432617, - "learning_rate": 7.019016544363599e-07, - "loss": 0.1918, - "step": 18734 - }, - { - "epoch": 1.764913685499635, - "grad_norm": 0.661007821559906, - "learning_rate": 7.013460129113203e-07, - "loss": 0.1744, - "step": 18735 - }, - { - "epoch": 1.7650078895928027, - "grad_norm": 0.679739773273468, - "learning_rate": 7.007905834099715e-07, - "loss": 0.1797, - "step": 18736 - }, - { - "epoch": 1.7651020936859707, - "grad_norm": 0.7122992873191833, - "learning_rate": 7.002353659449779e-07, - "loss": 0.1935, - "step": 18737 - }, - { - "epoch": 1.7651962977791387, - "grad_norm": 0.6649571657180786, - "learning_rate": 6.996803605290015e-07, - "loss": 0.2127, - "step": 18738 - }, - { - "epoch": 1.7652905018723064, - "grad_norm": 0.6347363591194153, - "learning_rate": 6.991255671746955e-07, - "loss": 0.1867, - "step": 18739 - }, - { - "epoch": 1.7653847059654741, - "grad_norm": 0.6770117878913879, - "learning_rate": 6.985709858947099e-07, - "loss": 0.1727, - "step": 18740 - }, - { - "epoch": 1.765478910058642, - "grad_norm": 0.722261369228363, - "learning_rate": 6.980166167016922e-07, - "loss": 0.2064, - "step": 18741 - }, - { - "epoch": 1.7655731141518098, - "grad_norm": 0.6550199389457703, - "learning_rate": 6.974624596082802e-07, - "loss": 0.1881, - "step": 18742 - }, - { - "epoch": 1.7656673182449776, - "grad_norm": 0.6317087411880493, - "learning_rate": 6.969085146271116e-07, - "loss": 0.1966, - "step": 18743 - }, - { - "epoch": 1.7657615223381455, - "grad_norm": 0.6629270315170288, - "learning_rate": 6.963547817708171e-07, - "loss": 0.2014, - "step": 18744 - }, - { - "epoch": 1.7658557264313135, - "grad_norm": 0.6230559349060059, - "learning_rate": 6.958012610520215e-07, - "loss": 0.1863, - "step": 18745 - }, - { - "epoch": 1.7659499305244812, - "grad_norm": 0.618025004863739, - "learning_rate": 6.952479524833444e-07, - "loss": 0.1758, - "step": 18746 - }, - { - "epoch": 1.766044134617649, - "grad_norm": 0.6751699447631836, - "learning_rate": 6.946948560774059e-07, - "loss": 0.2253, - "step": 18747 - }, - { - "epoch": 1.766138338710817, - "grad_norm": 0.6590555310249329, - "learning_rate": 6.941419718468168e-07, - "loss": 0.2017, - "step": 18748 - }, - { - "epoch": 1.766232542803985, - "grad_norm": 0.6804190874099731, - "learning_rate": 6.935892998041782e-07, - "loss": 0.1949, - "step": 18749 - }, - { - "epoch": 1.7663267468971526, - "grad_norm": 0.6995640397071838, - "learning_rate": 6.930368399621001e-07, - "loss": 0.2027, - "step": 18750 - }, - { - "epoch": 1.7664209509903204, - "grad_norm": 0.6769449710845947, - "learning_rate": 6.924845923331758e-07, - "loss": 0.2004, - "step": 18751 - }, - { - "epoch": 1.7665151550834883, - "grad_norm": 0.6506038904190063, - "learning_rate": 6.919325569299939e-07, - "loss": 0.176, - "step": 18752 - }, - { - "epoch": 1.7666093591766563, - "grad_norm": 0.6735564470291138, - "learning_rate": 6.913807337651479e-07, - "loss": 0.2084, - "step": 18753 - }, - { - "epoch": 1.766703563269824, - "grad_norm": 2.2290399074554443, - "learning_rate": 6.908291228512165e-07, - "loss": 0.1817, - "step": 18754 - }, - { - "epoch": 1.7667977673629918, - "grad_norm": 0.7361657023429871, - "learning_rate": 6.902777242007775e-07, - "loss": 0.2233, - "step": 18755 - }, - { - "epoch": 1.7668919714561597, - "grad_norm": 0.7112666368484497, - "learning_rate": 6.897265378264039e-07, - "loss": 0.2168, - "step": 18756 - }, - { - "epoch": 1.7669861755493277, - "grad_norm": 1.0875725746154785, - "learning_rate": 6.89175563740665e-07, - "loss": 0.2086, - "step": 18757 - }, - { - "epoch": 1.7670803796424954, - "grad_norm": 0.736074686050415, - "learning_rate": 6.886248019561215e-07, - "loss": 0.1983, - "step": 18758 - }, - { - "epoch": 1.7671745837356632, - "grad_norm": 0.6421671509742737, - "learning_rate": 6.880742524853323e-07, - "loss": 0.2058, - "step": 18759 - }, - { - "epoch": 1.7672687878288311, - "grad_norm": 0.8324503898620605, - "learning_rate": 6.875239153408541e-07, - "loss": 0.1933, - "step": 18760 - }, - { - "epoch": 1.767362991921999, - "grad_norm": 0.6461583375930786, - "learning_rate": 6.869737905352303e-07, - "loss": 0.2001, - "step": 18761 - }, - { - "epoch": 1.7674571960151668, - "grad_norm": 0.6334694623947144, - "learning_rate": 6.864238780810062e-07, - "loss": 0.2183, - "step": 18762 - }, - { - "epoch": 1.7675514001083346, - "grad_norm": 0.6211568713188171, - "learning_rate": 6.85874177990724e-07, - "loss": 0.1882, - "step": 18763 - }, - { - "epoch": 1.7676456042015025, - "grad_norm": 0.7000827193260193, - "learning_rate": 6.853246902769129e-07, - "loss": 0.1908, - "step": 18764 - }, - { - "epoch": 1.7677398082946705, - "grad_norm": 0.6585289835929871, - "learning_rate": 6.847754149521069e-07, - "loss": 0.202, - "step": 18765 - }, - { - "epoch": 1.7678340123878382, - "grad_norm": 0.6150330901145935, - "learning_rate": 6.84226352028825e-07, - "loss": 0.1888, - "step": 18766 - }, - { - "epoch": 1.767928216481006, - "grad_norm": 0.8438021540641785, - "learning_rate": 6.836775015195895e-07, - "loss": 0.1794, - "step": 18767 - }, - { - "epoch": 1.768022420574174, - "grad_norm": 0.6473048329353333, - "learning_rate": 6.831288634369171e-07, - "loss": 0.2077, - "step": 18768 - }, - { - "epoch": 1.7681166246673419, - "grad_norm": 0.6201759576797485, - "learning_rate": 6.825804377933142e-07, - "loss": 0.1901, - "step": 18769 - }, - { - "epoch": 1.7682108287605096, - "grad_norm": 0.6857398152351379, - "learning_rate": 6.820322246012856e-07, - "loss": 0.2226, - "step": 18770 - }, - { - "epoch": 1.7683050328536773, - "grad_norm": 0.661655068397522, - "learning_rate": 6.814842238733354e-07, - "loss": 0.2176, - "step": 18771 - }, - { - "epoch": 1.7683992369468453, - "grad_norm": 0.67503422498703, - "learning_rate": 6.80936435621955e-07, - "loss": 0.1948, - "step": 18772 - }, - { - "epoch": 1.7684934410400133, - "grad_norm": 0.6564956903457642, - "learning_rate": 6.803888598596364e-07, - "loss": 0.1822, - "step": 18773 - }, - { - "epoch": 1.768587645133181, - "grad_norm": 0.693681001663208, - "learning_rate": 6.798414965988643e-07, - "loss": 0.1914, - "step": 18774 - }, - { - "epoch": 1.7686818492263487, - "grad_norm": 0.6001759171485901, - "learning_rate": 6.792943458521206e-07, - "loss": 0.192, - "step": 18775 - }, - { - "epoch": 1.7687760533195167, - "grad_norm": 0.6142124533653259, - "learning_rate": 6.787474076318801e-07, - "loss": 0.1932, - "step": 18776 - }, - { - "epoch": 1.7688702574126847, - "grad_norm": 0.6608695983886719, - "learning_rate": 6.782006819506137e-07, - "loss": 0.169, - "step": 18777 - }, - { - "epoch": 1.7689644615058524, - "grad_norm": 1.1633796691894531, - "learning_rate": 6.776541688207905e-07, - "loss": 0.1945, - "step": 18778 - }, - { - "epoch": 1.7690586655990201, - "grad_norm": 0.6649760007858276, - "learning_rate": 6.77107868254867e-07, - "loss": 0.1766, - "step": 18779 - }, - { - "epoch": 1.769152869692188, - "grad_norm": 0.6783766150474548, - "learning_rate": 6.765617802653024e-07, - "loss": 0.1836, - "step": 18780 - }, - { - "epoch": 1.769247073785356, - "grad_norm": 0.701332151889801, - "learning_rate": 6.760159048645499e-07, - "loss": 0.1972, - "step": 18781 - }, - { - "epoch": 1.7693412778785238, - "grad_norm": 0.6640893816947937, - "learning_rate": 6.754702420650527e-07, - "loss": 0.1672, - "step": 18782 - }, - { - "epoch": 1.7694354819716915, - "grad_norm": 0.7054677605628967, - "learning_rate": 6.749247918792556e-07, - "loss": 0.1914, - "step": 18783 - }, - { - "epoch": 1.7695296860648595, - "grad_norm": 0.6651020050048828, - "learning_rate": 6.743795543195953e-07, - "loss": 0.1852, - "step": 18784 - }, - { - "epoch": 1.7696238901580275, - "grad_norm": 0.7026653289794922, - "learning_rate": 6.738345293985038e-07, - "loss": 0.1886, - "step": 18785 - }, - { - "epoch": 1.7697180942511952, - "grad_norm": 0.6611322164535522, - "learning_rate": 6.732897171284059e-07, - "loss": 0.1805, - "step": 18786 - }, - { - "epoch": 1.769812298344363, - "grad_norm": 0.6668574810028076, - "learning_rate": 6.727451175217282e-07, - "loss": 0.1885, - "step": 18787 - }, - { - "epoch": 1.769906502437531, - "grad_norm": 0.6399896144866943, - "learning_rate": 6.722007305908873e-07, - "loss": 0.1729, - "step": 18788 - }, - { - "epoch": 1.7700007065306989, - "grad_norm": 0.6232643127441406, - "learning_rate": 6.716565563482925e-07, - "loss": 0.186, - "step": 18789 - }, - { - "epoch": 1.7700949106238666, - "grad_norm": 0.6482470035552979, - "learning_rate": 6.71112594806358e-07, - "loss": 0.1938, - "step": 18790 - }, - { - "epoch": 1.7701891147170343, - "grad_norm": 0.6848767399787903, - "learning_rate": 6.705688459774839e-07, - "loss": 0.1835, - "step": 18791 - }, - { - "epoch": 1.7702833188102023, - "grad_norm": 0.8514116406440735, - "learning_rate": 6.70025309874065e-07, - "loss": 0.1972, - "step": 18792 - }, - { - "epoch": 1.7703775229033702, - "grad_norm": 0.6496803760528564, - "learning_rate": 6.694819865085012e-07, - "loss": 0.1751, - "step": 18793 - }, - { - "epoch": 1.770471726996538, - "grad_norm": 0.699759304523468, - "learning_rate": 6.689388758931781e-07, - "loss": 0.1821, - "step": 18794 - }, - { - "epoch": 1.7705659310897057, - "grad_norm": 0.6508598327636719, - "learning_rate": 6.68395978040477e-07, - "loss": 0.1952, - "step": 18795 - }, - { - "epoch": 1.7706601351828737, - "grad_norm": 0.6039749979972839, - "learning_rate": 6.678532929627801e-07, - "loss": 0.1792, - "step": 18796 - }, - { - "epoch": 1.7707543392760416, - "grad_norm": 0.6619493365287781, - "learning_rate": 6.673108206724621e-07, - "loss": 0.1999, - "step": 18797 - }, - { - "epoch": 1.7708485433692094, - "grad_norm": 0.5428429841995239, - "learning_rate": 6.667685611818886e-07, - "loss": 0.1648, - "step": 18798 - }, - { - "epoch": 1.7709427474623771, - "grad_norm": 0.6618406772613525, - "learning_rate": 6.662265145034263e-07, - "loss": 0.1957, - "step": 18799 - }, - { - "epoch": 1.771036951555545, - "grad_norm": 0.6611621975898743, - "learning_rate": 6.656846806494366e-07, - "loss": 0.2116, - "step": 18800 - }, - { - "epoch": 1.771131155648713, - "grad_norm": 0.6667648553848267, - "learning_rate": 6.651430596322695e-07, - "loss": 0.2, - "step": 18801 - }, - { - "epoch": 1.7712253597418808, - "grad_norm": 0.6235458254814148, - "learning_rate": 6.646016514642784e-07, - "loss": 0.1848, - "step": 18802 - }, - { - "epoch": 1.7713195638350485, - "grad_norm": 0.6254004836082458, - "learning_rate": 6.640604561578079e-07, - "loss": 0.1878, - "step": 18803 - }, - { - "epoch": 1.7714137679282165, - "grad_norm": 0.663865864276886, - "learning_rate": 6.635194737251959e-07, - "loss": 0.2085, - "step": 18804 - }, - { - "epoch": 1.7715079720213844, - "grad_norm": 0.6067942380905151, - "learning_rate": 6.629787041787805e-07, - "loss": 0.177, - "step": 18805 - }, - { - "epoch": 1.7716021761145522, - "grad_norm": 0.6628295183181763, - "learning_rate": 6.624381475308883e-07, - "loss": 0.2121, - "step": 18806 - }, - { - "epoch": 1.77169638020772, - "grad_norm": 0.6948346495628357, - "learning_rate": 6.618978037938484e-07, - "loss": 0.211, - "step": 18807 - }, - { - "epoch": 1.7717905843008879, - "grad_norm": 0.6519178152084351, - "learning_rate": 6.613576729799809e-07, - "loss": 0.2031, - "step": 18808 - }, - { - "epoch": 1.7718847883940558, - "grad_norm": 0.6935162544250488, - "learning_rate": 6.608177551016004e-07, - "loss": 0.1988, - "step": 18809 - }, - { - "epoch": 1.7719789924872236, - "grad_norm": 0.6876966953277588, - "learning_rate": 6.602780501710193e-07, - "loss": 0.1954, - "step": 18810 - }, - { - "epoch": 1.7720731965803913, - "grad_norm": 0.6378141641616821, - "learning_rate": 6.597385582005411e-07, - "loss": 0.1895, - "step": 18811 - }, - { - "epoch": 1.7721674006735593, - "grad_norm": 0.6201403737068176, - "learning_rate": 6.591992792024693e-07, - "loss": 0.1738, - "step": 18812 - }, - { - "epoch": 1.7722616047667272, - "grad_norm": 0.6032953858375549, - "learning_rate": 6.586602131891007e-07, - "loss": 0.2068, - "step": 18813 - }, - { - "epoch": 1.772355808859895, - "grad_norm": 0.6335887908935547, - "learning_rate": 6.581213601727243e-07, - "loss": 0.1885, - "step": 18814 - }, - { - "epoch": 1.7724500129530627, - "grad_norm": 0.6177152991294861, - "learning_rate": 6.575827201656304e-07, - "loss": 0.1835, - "step": 18815 - }, - { - "epoch": 1.7725442170462307, - "grad_norm": 0.6353725790977478, - "learning_rate": 6.570442931800969e-07, - "loss": 0.2003, - "step": 18816 - }, - { - "epoch": 1.7726384211393986, - "grad_norm": 0.6191625595092773, - "learning_rate": 6.565060792284028e-07, - "loss": 0.1756, - "step": 18817 - }, - { - "epoch": 1.7727326252325664, - "grad_norm": 0.6994245052337646, - "learning_rate": 6.559680783228217e-07, - "loss": 0.2076, - "step": 18818 - }, - { - "epoch": 1.772826829325734, - "grad_norm": 0.6597037315368652, - "learning_rate": 6.554302904756182e-07, - "loss": 0.1897, - "step": 18819 - }, - { - "epoch": 1.772921033418902, - "grad_norm": 0.6516751050949097, - "learning_rate": 6.548927156990548e-07, - "loss": 0.2202, - "step": 18820 - }, - { - "epoch": 1.77301523751207, - "grad_norm": 0.6519423127174377, - "learning_rate": 6.543553540053926e-07, - "loss": 0.21, - "step": 18821 - }, - { - "epoch": 1.7731094416052378, - "grad_norm": 0.6038287281990051, - "learning_rate": 6.538182054068787e-07, - "loss": 0.1818, - "step": 18822 - }, - { - "epoch": 1.7732036456984055, - "grad_norm": 0.6104490756988525, - "learning_rate": 6.532812699157653e-07, - "loss": 0.1644, - "step": 18823 - }, - { - "epoch": 1.7732978497915735, - "grad_norm": 0.6544206738471985, - "learning_rate": 6.52744547544295e-07, - "loss": 0.1995, - "step": 18824 - }, - { - "epoch": 1.7733920538847414, - "grad_norm": 0.6738982200622559, - "learning_rate": 6.522080383047047e-07, - "loss": 0.2134, - "step": 18825 - }, - { - "epoch": 1.7734862579779092, - "grad_norm": 0.6365258097648621, - "learning_rate": 6.516717422092245e-07, - "loss": 0.226, - "step": 18826 - }, - { - "epoch": 1.773580462071077, - "grad_norm": 0.6545007824897766, - "learning_rate": 6.51135659270089e-07, - "loss": 0.1955, - "step": 18827 - }, - { - "epoch": 1.7736746661642449, - "grad_norm": 0.8270087838172913, - "learning_rate": 6.505997894995187e-07, - "loss": 0.2051, - "step": 18828 - }, - { - "epoch": 1.7737688702574128, - "grad_norm": 0.674146294593811, - "learning_rate": 6.500641329097302e-07, - "loss": 0.202, - "step": 18829 - }, - { - "epoch": 1.7738630743505805, - "grad_norm": 0.7583029866218567, - "learning_rate": 6.495286895129415e-07, - "loss": 0.1984, - "step": 18830 - }, - { - "epoch": 1.7739572784437483, - "grad_norm": 0.6309941411018372, - "learning_rate": 6.489934593213598e-07, - "loss": 0.1569, - "step": 18831 - }, - { - "epoch": 1.7740514825369162, - "grad_norm": 0.6320933699607849, - "learning_rate": 6.484584423471852e-07, - "loss": 0.1927, - "step": 18832 - }, - { - "epoch": 1.7741456866300842, - "grad_norm": 0.6087074279785156, - "learning_rate": 6.479236386026244e-07, - "loss": 0.1782, - "step": 18833 - }, - { - "epoch": 1.774239890723252, - "grad_norm": 0.691861093044281, - "learning_rate": 6.47389048099868e-07, - "loss": 0.2143, - "step": 18834 - }, - { - "epoch": 1.7743340948164197, - "grad_norm": 0.7514015436172485, - "learning_rate": 6.468546708511037e-07, - "loss": 0.2197, - "step": 18835 - }, - { - "epoch": 1.7744282989095876, - "grad_norm": 0.6492466926574707, - "learning_rate": 6.463205068685174e-07, - "loss": 0.1603, - "step": 18836 - }, - { - "epoch": 1.7745225030027556, - "grad_norm": 0.6837599277496338, - "learning_rate": 6.457865561642907e-07, - "loss": 0.2147, - "step": 18837 - }, - { - "epoch": 1.7746167070959233, - "grad_norm": 0.6451874375343323, - "learning_rate": 6.452528187505969e-07, - "loss": 0.1651, - "step": 18838 - }, - { - "epoch": 1.774710911189091, - "grad_norm": 0.7161141037940979, - "learning_rate": 6.447192946396052e-07, - "loss": 0.2112, - "step": 18839 - }, - { - "epoch": 1.774805115282259, - "grad_norm": 0.6483317613601685, - "learning_rate": 6.441859838434827e-07, - "loss": 0.1829, - "step": 18840 - }, - { - "epoch": 1.774899319375427, - "grad_norm": 0.7365931272506714, - "learning_rate": 6.436528863743885e-07, - "loss": 0.1826, - "step": 18841 - }, - { - "epoch": 1.7749935234685947, - "grad_norm": 0.6644814610481262, - "learning_rate": 6.431200022444773e-07, - "loss": 0.1862, - "step": 18842 - }, - { - "epoch": 1.7750877275617625, - "grad_norm": 0.6445244550704956, - "learning_rate": 6.425873314659037e-07, - "loss": 0.1986, - "step": 18843 - }, - { - "epoch": 1.7751819316549304, - "grad_norm": 0.6263026595115662, - "learning_rate": 6.42054874050807e-07, - "loss": 0.2123, - "step": 18844 - }, - { - "epoch": 1.7752761357480984, - "grad_norm": 0.6531116366386414, - "learning_rate": 6.415226300113342e-07, - "loss": 0.181, - "step": 18845 - }, - { - "epoch": 1.7753703398412661, - "grad_norm": 0.6579196453094482, - "learning_rate": 6.409905993596178e-07, - "loss": 0.2073, - "step": 18846 - }, - { - "epoch": 1.7754645439344339, - "grad_norm": 0.5932646989822388, - "learning_rate": 6.404587821077879e-07, - "loss": 0.1665, - "step": 18847 - }, - { - "epoch": 1.7755587480276018, - "grad_norm": 0.6194208264350891, - "learning_rate": 6.399271782679762e-07, - "loss": 0.1814, - "step": 18848 - }, - { - "epoch": 1.7756529521207698, - "grad_norm": 0.5777454376220703, - "learning_rate": 6.393957878522972e-07, - "loss": 0.1754, - "step": 18849 - }, - { - "epoch": 1.7757471562139375, - "grad_norm": 0.7162359356880188, - "learning_rate": 6.388646108728724e-07, - "loss": 0.1936, - "step": 18850 - }, - { - "epoch": 1.7758413603071053, - "grad_norm": 0.6331944465637207, - "learning_rate": 6.383336473418111e-07, - "loss": 0.1895, - "step": 18851 - }, - { - "epoch": 1.7759355644002732, - "grad_norm": 0.652292013168335, - "learning_rate": 6.378028972712202e-07, - "loss": 0.2272, - "step": 18852 - }, - { - "epoch": 1.7760297684934412, - "grad_norm": 0.7276932001113892, - "learning_rate": 6.372723606732046e-07, - "loss": 0.1998, - "step": 18853 - }, - { - "epoch": 1.776123972586609, - "grad_norm": 0.5779160261154175, - "learning_rate": 6.367420375598554e-07, - "loss": 0.1627, - "step": 18854 - }, - { - "epoch": 1.7762181766797767, - "grad_norm": 0.6181293725967407, - "learning_rate": 6.36211927943271e-07, - "loss": 0.1708, - "step": 18855 - }, - { - "epoch": 1.7763123807729446, - "grad_norm": 0.6790673136711121, - "learning_rate": 6.35682031835535e-07, - "loss": 0.2167, - "step": 18856 - }, - { - "epoch": 1.7764065848661126, - "grad_norm": 0.6449689865112305, - "learning_rate": 6.3515234924873e-07, - "loss": 0.2031, - "step": 18857 - }, - { - "epoch": 1.7765007889592803, - "grad_norm": 0.6832816004753113, - "learning_rate": 6.346228801949372e-07, - "loss": 0.2348, - "step": 18858 - }, - { - "epoch": 1.776594993052448, - "grad_norm": 0.685299277305603, - "learning_rate": 6.340936246862239e-07, - "loss": 0.1738, - "step": 18859 - }, - { - "epoch": 1.776689197145616, - "grad_norm": 0.6643345952033997, - "learning_rate": 6.335645827346604e-07, - "loss": 0.1897, - "step": 18860 - }, - { - "epoch": 1.776783401238784, - "grad_norm": 0.6404387950897217, - "learning_rate": 6.330357543523125e-07, - "loss": 0.1921, - "step": 18861 - }, - { - "epoch": 1.7768776053319517, - "grad_norm": 0.66966313123703, - "learning_rate": 6.325071395512328e-07, - "loss": 0.1945, - "step": 18862 - }, - { - "epoch": 1.7769718094251195, - "grad_norm": 0.6696110963821411, - "learning_rate": 6.319787383434783e-07, - "loss": 0.2189, - "step": 18863 - }, - { - "epoch": 1.7770660135182874, - "grad_norm": 0.695486307144165, - "learning_rate": 6.314505507410984e-07, - "loss": 0.2251, - "step": 18864 - }, - { - "epoch": 1.7771602176114554, - "grad_norm": 0.6277366280555725, - "learning_rate": 6.309225767561345e-07, - "loss": 0.1937, - "step": 18865 - }, - { - "epoch": 1.777254421704623, - "grad_norm": 0.631859302520752, - "learning_rate": 6.303948164006212e-07, - "loss": 0.1868, - "step": 18866 - }, - { - "epoch": 1.7773486257977908, - "grad_norm": 0.7215867042541504, - "learning_rate": 6.298672696866004e-07, - "loss": 0.1877, - "step": 18867 - }, - { - "epoch": 1.7774428298909588, - "grad_norm": 0.6455090641975403, - "learning_rate": 6.293399366260977e-07, - "loss": 0.1855, - "step": 18868 - }, - { - "epoch": 1.7775370339841268, - "grad_norm": 0.6297513842582703, - "learning_rate": 6.288128172311326e-07, - "loss": 0.1885, - "step": 18869 - }, - { - "epoch": 1.7776312380772945, - "grad_norm": 0.7350378632545471, - "learning_rate": 6.282859115137308e-07, - "loss": 0.2221, - "step": 18870 - }, - { - "epoch": 1.7777254421704622, - "grad_norm": 0.62552410364151, - "learning_rate": 6.277592194859039e-07, - "loss": 0.1835, - "step": 18871 - }, - { - "epoch": 1.7778196462636302, - "grad_norm": 0.6194043159484863, - "learning_rate": 6.27232741159659e-07, - "loss": 0.1974, - "step": 18872 - }, - { - "epoch": 1.7779138503567982, - "grad_norm": 0.6858185529708862, - "learning_rate": 6.267064765470055e-07, - "loss": 0.1927, - "step": 18873 - }, - { - "epoch": 1.778008054449966, - "grad_norm": 0.627632200717926, - "learning_rate": 6.261804256599402e-07, - "loss": 0.2018, - "step": 18874 - }, - { - "epoch": 1.7781022585431336, - "grad_norm": 0.6912941336631775, - "learning_rate": 6.25654588510457e-07, - "loss": 0.1982, - "step": 18875 - }, - { - "epoch": 1.7781964626363016, - "grad_norm": 0.6505584120750427, - "learning_rate": 6.251289651105464e-07, - "loss": 0.1927, - "step": 18876 - }, - { - "epoch": 1.7782906667294696, - "grad_norm": 0.7614810466766357, - "learning_rate": 6.246035554721963e-07, - "loss": 0.2277, - "step": 18877 - }, - { - "epoch": 1.7783848708226373, - "grad_norm": 0.6901644468307495, - "learning_rate": 6.240783596073819e-07, - "loss": 0.1836, - "step": 18878 - }, - { - "epoch": 1.778479074915805, - "grad_norm": 0.6384099125862122, - "learning_rate": 6.235533775280811e-07, - "loss": 0.1885, - "step": 18879 - }, - { - "epoch": 1.778573279008973, - "grad_norm": 0.6384451389312744, - "learning_rate": 6.230286092462667e-07, - "loss": 0.1858, - "step": 18880 - }, - { - "epoch": 1.7786674831021407, - "grad_norm": 0.6098473072052002, - "learning_rate": 6.225040547738992e-07, - "loss": 0.1979, - "step": 18881 - }, - { - "epoch": 1.7787616871953085, - "grad_norm": 0.635390043258667, - "learning_rate": 6.219797141229422e-07, - "loss": 0.2048, - "step": 18882 - }, - { - "epoch": 1.7788558912884764, - "grad_norm": 0.6741024851799011, - "learning_rate": 6.21455587305353e-07, - "loss": 0.1782, - "step": 18883 - }, - { - "epoch": 1.7789500953816444, - "grad_norm": 0.7610291242599487, - "learning_rate": 6.209316743330796e-07, - "loss": 0.1835, - "step": 18884 - }, - { - "epoch": 1.7790442994748121, - "grad_norm": 0.7101349234580994, - "learning_rate": 6.204079752180692e-07, - "loss": 0.1999, - "step": 18885 - }, - { - "epoch": 1.7791385035679799, - "grad_norm": 0.641293466091156, - "learning_rate": 6.198844899722623e-07, - "loss": 0.1997, - "step": 18886 - }, - { - "epoch": 1.7792327076611478, - "grad_norm": 0.6606078147888184, - "learning_rate": 6.193612186075948e-07, - "loss": 0.2157, - "step": 18887 - }, - { - "epoch": 1.7793269117543158, - "grad_norm": 0.7069306969642639, - "learning_rate": 6.188381611360006e-07, - "loss": 0.1989, - "step": 18888 - }, - { - "epoch": 1.7794211158474835, - "grad_norm": 0.6216179132461548, - "learning_rate": 6.183153175694034e-07, - "loss": 0.2156, - "step": 18889 - }, - { - "epoch": 1.7795153199406513, - "grad_norm": 0.7265723347663879, - "learning_rate": 6.177926879197271e-07, - "loss": 0.1902, - "step": 18890 - }, - { - "epoch": 1.7796095240338192, - "grad_norm": 0.7844327092170715, - "learning_rate": 6.172702721988866e-07, - "loss": 0.2204, - "step": 18891 - }, - { - "epoch": 1.7797037281269872, - "grad_norm": 0.7905182838439941, - "learning_rate": 6.167480704187944e-07, - "loss": 0.1886, - "step": 18892 - }, - { - "epoch": 1.779797932220155, - "grad_norm": 0.692022442817688, - "learning_rate": 6.162260825913591e-07, - "loss": 0.1888, - "step": 18893 - }, - { - "epoch": 1.7798921363133227, - "grad_norm": 0.6562528014183044, - "learning_rate": 6.157043087284797e-07, - "loss": 0.2017, - "step": 18894 - }, - { - "epoch": 1.7799863404064906, - "grad_norm": 0.7171071171760559, - "learning_rate": 6.15182748842057e-07, - "loss": 0.2045, - "step": 18895 - }, - { - "epoch": 1.7800805444996586, - "grad_norm": 0.6339259743690491, - "learning_rate": 6.14661402943979e-07, - "loss": 0.1747, - "step": 18896 - }, - { - "epoch": 1.7801747485928263, - "grad_norm": 0.6818625926971436, - "learning_rate": 6.141402710461363e-07, - "loss": 0.2319, - "step": 18897 - }, - { - "epoch": 1.780268952685994, - "grad_norm": 0.6656239628791809, - "learning_rate": 6.136193531604128e-07, - "loss": 0.1867, - "step": 18898 - }, - { - "epoch": 1.780363156779162, - "grad_norm": 0.6257752180099487, - "learning_rate": 6.130986492986835e-07, - "loss": 0.1755, - "step": 18899 - }, - { - "epoch": 1.78045736087233, - "grad_norm": 0.647856593132019, - "learning_rate": 6.125781594728208e-07, - "loss": 0.1818, - "step": 18900 - }, - { - "epoch": 1.7805515649654977, - "grad_norm": 0.673330545425415, - "learning_rate": 6.120578836946956e-07, - "loss": 0.2133, - "step": 18901 - }, - { - "epoch": 1.7806457690586655, - "grad_norm": 0.6279948949813843, - "learning_rate": 6.115378219761681e-07, - "loss": 0.1814, - "step": 18902 - }, - { - "epoch": 1.7807399731518334, - "grad_norm": 0.6680616736412048, - "learning_rate": 6.110179743290979e-07, - "loss": 0.1902, - "step": 18903 - }, - { - "epoch": 1.7808341772450014, - "grad_norm": 0.6360419988632202, - "learning_rate": 6.104983407653397e-07, - "loss": 0.1885, - "step": 18904 - }, - { - "epoch": 1.780928381338169, - "grad_norm": 0.6680687069892883, - "learning_rate": 6.099789212967411e-07, - "loss": 0.204, - "step": 18905 - }, - { - "epoch": 1.7810225854313368, - "grad_norm": 0.6434610486030579, - "learning_rate": 6.094597159351412e-07, - "loss": 0.2015, - "step": 18906 - }, - { - "epoch": 1.7811167895245048, - "grad_norm": 0.670667290687561, - "learning_rate": 6.089407246923861e-07, - "loss": 0.1918, - "step": 18907 - }, - { - "epoch": 1.7812109936176728, - "grad_norm": 0.5744823217391968, - "learning_rate": 6.084219475803055e-07, - "loss": 0.1582, - "step": 18908 - }, - { - "epoch": 1.7813051977108405, - "grad_norm": 0.6056269407272339, - "learning_rate": 6.079033846107262e-07, - "loss": 0.1756, - "step": 18909 - }, - { - "epoch": 1.7813994018040082, - "grad_norm": 0.7023749351501465, - "learning_rate": 6.07385035795478e-07, - "loss": 0.1877, - "step": 18910 - }, - { - "epoch": 1.7814936058971762, - "grad_norm": 0.6520842909812927, - "learning_rate": 6.068669011463768e-07, - "loss": 0.1901, - "step": 18911 - }, - { - "epoch": 1.7815878099903442, - "grad_norm": 0.7571308016777039, - "learning_rate": 6.063489806752332e-07, - "loss": 0.2238, - "step": 18912 - }, - { - "epoch": 1.781682014083512, - "grad_norm": 0.6574953198432922, - "learning_rate": 6.058312743938643e-07, - "loss": 0.2013, - "step": 18913 - }, - { - "epoch": 1.7817762181766796, - "grad_norm": 0.670661211013794, - "learning_rate": 6.053137823140709e-07, - "loss": 0.171, - "step": 18914 - }, - { - "epoch": 1.7818704222698476, - "grad_norm": 0.668809711933136, - "learning_rate": 6.047965044476501e-07, - "loss": 0.1783, - "step": 18915 - }, - { - "epoch": 1.7819646263630156, - "grad_norm": 0.630757749080658, - "learning_rate": 6.042794408064001e-07, - "loss": 0.1935, - "step": 18916 - }, - { - "epoch": 1.7820588304561833, - "grad_norm": 0.6640926599502563, - "learning_rate": 6.037625914021106e-07, - "loss": 0.1649, - "step": 18917 - }, - { - "epoch": 1.782153034549351, - "grad_norm": 0.666729211807251, - "learning_rate": 6.032459562465631e-07, - "loss": 0.2147, - "step": 18918 - }, - { - "epoch": 1.782247238642519, - "grad_norm": 0.6913374066352844, - "learning_rate": 6.027295353515417e-07, - "loss": 0.1813, - "step": 18919 - }, - { - "epoch": 1.782341442735687, - "grad_norm": 0.7098759412765503, - "learning_rate": 6.022133287288201e-07, - "loss": 0.2107, - "step": 18920 - }, - { - "epoch": 1.7824356468288547, - "grad_norm": 1.4247428178787231, - "learning_rate": 6.016973363901679e-07, - "loss": 0.2083, - "step": 18921 - }, - { - "epoch": 1.7825298509220224, - "grad_norm": 0.6035720109939575, - "learning_rate": 6.011815583473512e-07, - "loss": 0.1628, - "step": 18922 - }, - { - "epoch": 1.7826240550151904, - "grad_norm": 0.6486483812332153, - "learning_rate": 6.006659946121318e-07, - "loss": 0.2028, - "step": 18923 - }, - { - "epoch": 1.7827182591083583, - "grad_norm": 0.6193046569824219, - "learning_rate": 6.001506451962624e-07, - "loss": 0.1745, - "step": 18924 - }, - { - "epoch": 1.782812463201526, - "grad_norm": 0.6953390836715698, - "learning_rate": 5.996355101114981e-07, - "loss": 0.2003, - "step": 18925 - }, - { - "epoch": 1.7829066672946938, - "grad_norm": 0.6082496047019958, - "learning_rate": 5.991205893695795e-07, - "loss": 0.1648, - "step": 18926 - }, - { - "epoch": 1.7830008713878618, - "grad_norm": 0.6720678210258484, - "learning_rate": 5.986058829822516e-07, - "loss": 0.1852, - "step": 18927 - }, - { - "epoch": 1.7830950754810297, - "grad_norm": 0.6750378608703613, - "learning_rate": 5.980913909612485e-07, - "loss": 0.2034, - "step": 18928 - }, - { - "epoch": 1.7831892795741975, - "grad_norm": 0.7238875031471252, - "learning_rate": 5.975771133183006e-07, - "loss": 0.2009, - "step": 18929 - }, - { - "epoch": 1.7832834836673652, - "grad_norm": 0.6554574966430664, - "learning_rate": 5.970630500651386e-07, - "loss": 0.2014, - "step": 18930 - }, - { - "epoch": 1.7833776877605332, - "grad_norm": 0.6924046874046326, - "learning_rate": 5.965492012134788e-07, - "loss": 0.2403, - "step": 18931 - }, - { - "epoch": 1.7834718918537011, - "grad_norm": 0.6402032375335693, - "learning_rate": 5.960355667750395e-07, - "loss": 0.1738, - "step": 18932 - }, - { - "epoch": 1.7835660959468689, - "grad_norm": 0.7946323752403259, - "learning_rate": 5.955221467615346e-07, - "loss": 0.1985, - "step": 18933 - }, - { - "epoch": 1.7836603000400366, - "grad_norm": 0.7671371102333069, - "learning_rate": 5.950089411846683e-07, - "loss": 0.1942, - "step": 18934 - }, - { - "epoch": 1.7837545041332046, - "grad_norm": 0.7086248993873596, - "learning_rate": 5.944959500561442e-07, - "loss": 0.2018, - "step": 18935 - }, - { - "epoch": 1.7838487082263725, - "grad_norm": 0.6174538135528564, - "learning_rate": 5.939831733876567e-07, - "loss": 0.1913, - "step": 18936 - }, - { - "epoch": 1.7839429123195403, - "grad_norm": 0.6665925979614258, - "learning_rate": 5.934706111908994e-07, - "loss": 0.1845, - "step": 18937 - }, - { - "epoch": 1.784037116412708, - "grad_norm": 0.6362730264663696, - "learning_rate": 5.929582634775611e-07, - "loss": 0.1804, - "step": 18938 - }, - { - "epoch": 1.784131320505876, - "grad_norm": 0.6679801344871521, - "learning_rate": 5.924461302593221e-07, - "loss": 0.1816, - "step": 18939 - }, - { - "epoch": 1.784225524599044, - "grad_norm": 0.8629591464996338, - "learning_rate": 5.919342115478599e-07, - "loss": 0.208, - "step": 18940 - }, - { - "epoch": 1.7843197286922117, - "grad_norm": 0.6771908402442932, - "learning_rate": 5.914225073548485e-07, - "loss": 0.1816, - "step": 18941 - }, - { - "epoch": 1.7844139327853794, - "grad_norm": 0.6483758687973022, - "learning_rate": 5.90911017691953e-07, - "loss": 0.1789, - "step": 18942 - }, - { - "epoch": 1.7845081368785474, - "grad_norm": 0.620595395565033, - "learning_rate": 5.903997425708385e-07, - "loss": 0.1938, - "step": 18943 - }, - { - "epoch": 1.7846023409717153, - "grad_norm": 0.6206614971160889, - "learning_rate": 5.898886820031635e-07, - "loss": 0.1972, - "step": 18944 - }, - { - "epoch": 1.784696545064883, - "grad_norm": 0.6920983791351318, - "learning_rate": 5.893778360005786e-07, - "loss": 0.2124, - "step": 18945 - }, - { - "epoch": 1.7847907491580508, - "grad_norm": 0.5786925554275513, - "learning_rate": 5.888672045747313e-07, - "loss": 0.1786, - "step": 18946 - }, - { - "epoch": 1.7848849532512188, - "grad_norm": 0.621561586856842, - "learning_rate": 5.883567877372687e-07, - "loss": 0.2001, - "step": 18947 - }, - { - "epoch": 1.7849791573443867, - "grad_norm": 0.6603097319602966, - "learning_rate": 5.878465854998261e-07, - "loss": 0.2111, - "step": 18948 - }, - { - "epoch": 1.7850733614375545, - "grad_norm": 0.6399976015090942, - "learning_rate": 5.873365978740353e-07, - "loss": 0.1638, - "step": 18949 - }, - { - "epoch": 1.7851675655307222, - "grad_norm": 0.6775232553482056, - "learning_rate": 5.868268248715292e-07, - "loss": 0.2016, - "step": 18950 - }, - { - "epoch": 1.7852617696238902, - "grad_norm": 0.6444012522697449, - "learning_rate": 5.863172665039308e-07, - "loss": 0.1836, - "step": 18951 - }, - { - "epoch": 1.7853559737170581, - "grad_norm": 0.5827812552452087, - "learning_rate": 5.858079227828539e-07, - "loss": 0.1704, - "step": 18952 - }, - { - "epoch": 1.7854501778102259, - "grad_norm": 0.8426401019096375, - "learning_rate": 5.852987937199172e-07, - "loss": 0.1883, - "step": 18953 - }, - { - "epoch": 1.7855443819033936, - "grad_norm": 0.6850066184997559, - "learning_rate": 5.847898793267292e-07, - "loss": 0.2335, - "step": 18954 - }, - { - "epoch": 1.7856385859965616, - "grad_norm": 0.6605983972549438, - "learning_rate": 5.842811796148906e-07, - "loss": 0.208, - "step": 18955 - }, - { - "epoch": 1.7857327900897295, - "grad_norm": 0.7833412885665894, - "learning_rate": 5.837726945960032e-07, - "loss": 0.1941, - "step": 18956 - }, - { - "epoch": 1.7858269941828973, - "grad_norm": 0.651619017124176, - "learning_rate": 5.832644242816632e-07, - "loss": 0.1953, - "step": 18957 - }, - { - "epoch": 1.785921198276065, - "grad_norm": 0.652336835861206, - "learning_rate": 5.827563686834548e-07, - "loss": 0.1763, - "step": 18958 - }, - { - "epoch": 1.786015402369233, - "grad_norm": 0.6696328520774841, - "learning_rate": 5.822485278129664e-07, - "loss": 0.1847, - "step": 18959 - }, - { - "epoch": 1.786109606462401, - "grad_norm": 0.6166433691978455, - "learning_rate": 5.817409016817765e-07, - "loss": 0.1649, - "step": 18960 - }, - { - "epoch": 1.7862038105555686, - "grad_norm": 0.6885592937469482, - "learning_rate": 5.812334903014593e-07, - "loss": 0.1897, - "step": 18961 - }, - { - "epoch": 1.7862980146487364, - "grad_norm": 0.6394980549812317, - "learning_rate": 5.807262936835845e-07, - "loss": 0.1814, - "step": 18962 - }, - { - "epoch": 1.7863922187419043, - "grad_norm": 0.5924050807952881, - "learning_rate": 5.802193118397193e-07, - "loss": 0.1835, - "step": 18963 - }, - { - "epoch": 1.7864864228350723, - "grad_norm": 0.7013352513313293, - "learning_rate": 5.7971254478142e-07, - "loss": 0.1959, - "step": 18964 - }, - { - "epoch": 1.78658062692824, - "grad_norm": 0.6453135013580322, - "learning_rate": 5.792059925202443e-07, - "loss": 0.1701, - "step": 18965 - }, - { - "epoch": 1.7866748310214078, - "grad_norm": 0.665113091468811, - "learning_rate": 5.786996550677415e-07, - "loss": 0.2228, - "step": 18966 - }, - { - "epoch": 1.7867690351145757, - "grad_norm": 0.6630818247795105, - "learning_rate": 5.781935324354571e-07, - "loss": 0.1952, - "step": 18967 - }, - { - "epoch": 1.7868632392077437, - "grad_norm": 0.6618506908416748, - "learning_rate": 5.776876246349294e-07, - "loss": 0.1958, - "step": 18968 - }, - { - "epoch": 1.7869574433009114, - "grad_norm": 0.7780861854553223, - "learning_rate": 5.771819316776972e-07, - "loss": 0.197, - "step": 18969 - }, - { - "epoch": 1.7870516473940792, - "grad_norm": 0.7015801072120667, - "learning_rate": 5.7667645357529e-07, - "loss": 0.1792, - "step": 18970 - }, - { - "epoch": 1.7871458514872471, - "grad_norm": 0.6162405014038086, - "learning_rate": 5.76171190339232e-07, - "loss": 0.2133, - "step": 18971 - }, - { - "epoch": 1.787240055580415, - "grad_norm": 0.6168376207351685, - "learning_rate": 5.756661419810449e-07, - "loss": 0.1964, - "step": 18972 - }, - { - "epoch": 1.7873342596735828, - "grad_norm": 0.6623192429542542, - "learning_rate": 5.751613085122465e-07, - "loss": 0.1815, - "step": 18973 - }, - { - "epoch": 1.7874284637667506, - "grad_norm": 0.6553171873092651, - "learning_rate": 5.746566899443451e-07, - "loss": 0.1782, - "step": 18974 - }, - { - "epoch": 1.7875226678599185, - "grad_norm": 0.6070336103439331, - "learning_rate": 5.741522862888483e-07, - "loss": 0.1739, - "step": 18975 - }, - { - "epoch": 1.7876168719530865, - "grad_norm": 0.6405748724937439, - "learning_rate": 5.736480975572555e-07, - "loss": 0.1778, - "step": 18976 - }, - { - "epoch": 1.7877110760462542, - "grad_norm": 0.6848527193069458, - "learning_rate": 5.731441237610646e-07, - "loss": 0.2019, - "step": 18977 - }, - { - "epoch": 1.787805280139422, - "grad_norm": 0.6570281982421875, - "learning_rate": 5.726403649117684e-07, - "loss": 0.2136, - "step": 18978 - }, - { - "epoch": 1.78789948423259, - "grad_norm": 0.7297435998916626, - "learning_rate": 5.721368210208489e-07, - "loss": 0.222, - "step": 18979 - }, - { - "epoch": 1.787993688325758, - "grad_norm": 0.6587645411491394, - "learning_rate": 5.716334920997913e-07, - "loss": 0.1749, - "step": 18980 - }, - { - "epoch": 1.7880878924189256, - "grad_norm": 0.6483520269393921, - "learning_rate": 5.711303781600719e-07, - "loss": 0.1995, - "step": 18981 - }, - { - "epoch": 1.7881820965120934, - "grad_norm": 0.6250553131103516, - "learning_rate": 5.706274792131616e-07, - "loss": 0.1928, - "step": 18982 - }, - { - "epoch": 1.7882763006052613, - "grad_norm": 0.7242453694343567, - "learning_rate": 5.701247952705269e-07, - "loss": 0.2132, - "step": 18983 - }, - { - "epoch": 1.7883705046984293, - "grad_norm": 0.67424076795578, - "learning_rate": 5.696223263436317e-07, - "loss": 0.1875, - "step": 18984 - }, - { - "epoch": 1.788464708791597, - "grad_norm": 0.6228921413421631, - "learning_rate": 5.691200724439328e-07, - "loss": 0.2008, - "step": 18985 - }, - { - "epoch": 1.7885589128847648, - "grad_norm": 0.7612162828445435, - "learning_rate": 5.686180335828773e-07, - "loss": 0.1724, - "step": 18986 - }, - { - "epoch": 1.7886531169779327, - "grad_norm": 0.845587432384491, - "learning_rate": 5.681162097719195e-07, - "loss": 0.1946, - "step": 18987 - }, - { - "epoch": 1.7887473210711007, - "grad_norm": 0.6682807207107544, - "learning_rate": 5.676146010224992e-07, - "loss": 0.2019, - "step": 18988 - }, - { - "epoch": 1.7888415251642684, - "grad_norm": 0.6140900254249573, - "learning_rate": 5.671132073460505e-07, - "loss": 0.189, - "step": 18989 - }, - { - "epoch": 1.7889357292574362, - "grad_norm": 0.683236300945282, - "learning_rate": 5.666120287540122e-07, - "loss": 0.19, - "step": 18990 - }, - { - "epoch": 1.7890299333506041, - "grad_norm": 0.6152132749557495, - "learning_rate": 5.661110652578083e-07, - "loss": 0.1844, - "step": 18991 - }, - { - "epoch": 1.789124137443772, - "grad_norm": 0.609509289264679, - "learning_rate": 5.65610316868861e-07, - "loss": 0.1522, - "step": 18992 - }, - { - "epoch": 1.7892183415369398, - "grad_norm": 0.6877182126045227, - "learning_rate": 5.651097835985886e-07, - "loss": 0.1975, - "step": 18993 - }, - { - "epoch": 1.7893125456301076, - "grad_norm": 0.6550241112709045, - "learning_rate": 5.646094654584055e-07, - "loss": 0.1891, - "step": 18994 - }, - { - "epoch": 1.7894067497232755, - "grad_norm": 0.7051395177841187, - "learning_rate": 5.641093624597172e-07, - "loss": 0.2281, - "step": 18995 - }, - { - "epoch": 1.7895009538164435, - "grad_norm": 0.6980713605880737, - "learning_rate": 5.636094746139287e-07, - "loss": 0.2056, - "step": 18996 - }, - { - "epoch": 1.7895951579096112, - "grad_norm": 0.8167104721069336, - "learning_rate": 5.6310980193244e-07, - "loss": 0.202, - "step": 18997 - }, - { - "epoch": 1.789689362002779, - "grad_norm": 0.7286479473114014, - "learning_rate": 5.626103444266395e-07, - "loss": 0.2073, - "step": 18998 - }, - { - "epoch": 1.789783566095947, - "grad_norm": 0.7497352957725525, - "learning_rate": 5.621111021079195e-07, - "loss": 0.2161, - "step": 18999 - }, - { - "epoch": 1.7898777701891149, - "grad_norm": 0.6730028986930847, - "learning_rate": 5.61612074987663e-07, - "loss": 0.1954, - "step": 19000 - }, - { - "epoch": 1.7899719742822826, - "grad_norm": 0.5976526737213135, - "learning_rate": 5.611132630772465e-07, - "loss": 0.1686, - "step": 19001 - }, - { - "epoch": 1.7900661783754503, - "grad_norm": 0.6673054695129395, - "learning_rate": 5.606146663880463e-07, - "loss": 0.1965, - "step": 19002 - }, - { - "epoch": 1.7901603824686183, - "grad_norm": 0.6583447456359863, - "learning_rate": 5.601162849314301e-07, - "loss": 0.1917, - "step": 19003 - }, - { - "epoch": 1.7902545865617863, - "grad_norm": 0.6494748592376709, - "learning_rate": 5.59618118718761e-07, - "loss": 0.1933, - "step": 19004 - }, - { - "epoch": 1.790348790654954, - "grad_norm": 0.6123377680778503, - "learning_rate": 5.591201677613988e-07, - "loss": 0.1653, - "step": 19005 - }, - { - "epoch": 1.7904429947481217, - "grad_norm": 0.6768871545791626, - "learning_rate": 5.586224320706979e-07, - "loss": 0.2026, - "step": 19006 - }, - { - "epoch": 1.7905371988412897, - "grad_norm": 0.6274359226226807, - "learning_rate": 5.581249116580068e-07, - "loss": 0.2034, - "step": 19007 - }, - { - "epoch": 1.7906314029344577, - "grad_norm": 0.6595060229301453, - "learning_rate": 5.576276065346686e-07, - "loss": 0.1899, - "step": 19008 - }, - { - "epoch": 1.7907256070276254, - "grad_norm": 0.6125357151031494, - "learning_rate": 5.571305167120245e-07, - "loss": 0.1715, - "step": 19009 - }, - { - "epoch": 1.7908198111207931, - "grad_norm": 0.6774811148643494, - "learning_rate": 5.566336422014096e-07, - "loss": 0.191, - "step": 19010 - }, - { - "epoch": 1.790914015213961, - "grad_norm": 0.637002170085907, - "learning_rate": 5.561369830141505e-07, - "loss": 0.1784, - "step": 19011 - }, - { - "epoch": 1.791008219307129, - "grad_norm": 0.7424461841583252, - "learning_rate": 5.556405391615738e-07, - "loss": 0.1928, - "step": 19012 - }, - { - "epoch": 1.7911024234002968, - "grad_norm": 0.6591859459877014, - "learning_rate": 5.551443106549991e-07, - "loss": 0.1948, - "step": 19013 - }, - { - "epoch": 1.7911966274934645, - "grad_norm": 0.6124240756034851, - "learning_rate": 5.546482975057399e-07, - "loss": 0.1833, - "step": 19014 - }, - { - "epoch": 1.7912908315866325, - "grad_norm": 0.6331250667572021, - "learning_rate": 5.541524997251091e-07, - "loss": 0.1866, - "step": 19015 - }, - { - "epoch": 1.7913850356798005, - "grad_norm": 0.6631602048873901, - "learning_rate": 5.536569173244078e-07, - "loss": 0.1848, - "step": 19016 - }, - { - "epoch": 1.7914792397729682, - "grad_norm": 0.6510537266731262, - "learning_rate": 5.531615503149379e-07, - "loss": 0.1703, - "step": 19017 - }, - { - "epoch": 1.791573443866136, - "grad_norm": 0.6576735377311707, - "learning_rate": 5.526663987079961e-07, - "loss": 0.1933, - "step": 19018 - }, - { - "epoch": 1.7916676479593039, - "grad_norm": 0.6598816514015198, - "learning_rate": 5.521714625148689e-07, - "loss": 0.1822, - "step": 19019 - }, - { - "epoch": 1.7917618520524716, - "grad_norm": 0.6303108930587769, - "learning_rate": 5.516767417468438e-07, - "loss": 0.1842, - "step": 19020 - }, - { - "epoch": 1.7918560561456394, - "grad_norm": 0.6936563849449158, - "learning_rate": 5.511822364152031e-07, - "loss": 0.207, - "step": 19021 - }, - { - "epoch": 1.7919502602388073, - "grad_norm": 1.0449975728988647, - "learning_rate": 5.506879465312187e-07, - "loss": 0.1773, - "step": 19022 - }, - { - "epoch": 1.7920444643319753, - "grad_norm": 0.6483970880508423, - "learning_rate": 5.501938721061628e-07, - "loss": 0.1649, - "step": 19023 - }, - { - "epoch": 1.792138668425143, - "grad_norm": 0.6974971890449524, - "learning_rate": 5.49700013151302e-07, - "loss": 0.1798, - "step": 19024 - }, - { - "epoch": 1.7922328725183108, - "grad_norm": 0.6729770302772522, - "learning_rate": 5.492063696778971e-07, - "loss": 0.1857, - "step": 19025 - }, - { - "epoch": 1.7923270766114787, - "grad_norm": 0.6208738684654236, - "learning_rate": 5.487129416971992e-07, - "loss": 0.166, - "step": 19026 - }, - { - "epoch": 1.7924212807046467, - "grad_norm": 0.6766891479492188, - "learning_rate": 5.48219729220465e-07, - "loss": 0.1979, - "step": 19027 - }, - { - "epoch": 1.7925154847978144, - "grad_norm": 0.6793447732925415, - "learning_rate": 5.477267322589397e-07, - "loss": 0.2425, - "step": 19028 - }, - { - "epoch": 1.7926096888909822, - "grad_norm": 0.6638621687889099, - "learning_rate": 5.4723395082386e-07, - "loss": 0.1736, - "step": 19029 - }, - { - "epoch": 1.7927038929841501, - "grad_norm": 0.6527950763702393, - "learning_rate": 5.46741384926468e-07, - "loss": 0.1992, - "step": 19030 - }, - { - "epoch": 1.792798097077318, - "grad_norm": 0.6407003998756409, - "learning_rate": 5.462490345779925e-07, - "loss": 0.1792, - "step": 19031 - }, - { - "epoch": 1.7928923011704858, - "grad_norm": 0.7016992568969727, - "learning_rate": 5.457568997896567e-07, - "loss": 0.2017, - "step": 19032 - }, - { - "epoch": 1.7929865052636536, - "grad_norm": 0.6246228218078613, - "learning_rate": 5.452649805726862e-07, - "loss": 0.1738, - "step": 19033 - }, - { - "epoch": 1.7930807093568215, - "grad_norm": 0.6411997079849243, - "learning_rate": 5.447732769382974e-07, - "loss": 0.1899, - "step": 19034 - }, - { - "epoch": 1.7931749134499895, - "grad_norm": 0.65321946144104, - "learning_rate": 5.442817888976992e-07, - "loss": 0.1956, - "step": 19035 - }, - { - "epoch": 1.7932691175431572, - "grad_norm": 0.6064602732658386, - "learning_rate": 5.437905164620993e-07, - "loss": 0.1716, - "step": 19036 - }, - { - "epoch": 1.793363321636325, - "grad_norm": 0.7640340924263, - "learning_rate": 5.432994596427021e-07, - "loss": 0.1923, - "step": 19037 - }, - { - "epoch": 1.793457525729493, - "grad_norm": 0.6219149231910706, - "learning_rate": 5.428086184507009e-07, - "loss": 0.1731, - "step": 19038 - }, - { - "epoch": 1.7935517298226609, - "grad_norm": 0.61652672290802, - "learning_rate": 5.423179928972878e-07, - "loss": 0.1758, - "step": 19039 - }, - { - "epoch": 1.7936459339158286, - "grad_norm": 0.7278186082839966, - "learning_rate": 5.418275829936537e-07, - "loss": 0.1938, - "step": 19040 - }, - { - "epoch": 1.7937401380089963, - "grad_norm": 0.6120370626449585, - "learning_rate": 5.413373887509766e-07, - "loss": 0.1806, - "step": 19041 - }, - { - "epoch": 1.7938343421021643, - "grad_norm": 0.7241223454475403, - "learning_rate": 5.40847410180434e-07, - "loss": 0.1943, - "step": 19042 - }, - { - "epoch": 1.7939285461953323, - "grad_norm": 0.6974479556083679, - "learning_rate": 5.403576472932015e-07, - "loss": 0.1824, - "step": 19043 - }, - { - "epoch": 1.7940227502885, - "grad_norm": 0.6181360483169556, - "learning_rate": 5.398681001004447e-07, - "loss": 0.1747, - "step": 19044 - }, - { - "epoch": 1.7941169543816677, - "grad_norm": 0.6469744443893433, - "learning_rate": 5.393787686133234e-07, - "loss": 0.1992, - "step": 19045 - }, - { - "epoch": 1.7942111584748357, - "grad_norm": 0.6220036745071411, - "learning_rate": 5.388896528429977e-07, - "loss": 0.1961, - "step": 19046 - }, - { - "epoch": 1.7943053625680037, - "grad_norm": 0.6217597723007202, - "learning_rate": 5.384007528006219e-07, - "loss": 0.1751, - "step": 19047 - }, - { - "epoch": 1.7943995666611714, - "grad_norm": 0.6373041272163391, - "learning_rate": 5.379120684973393e-07, - "loss": 0.1729, - "step": 19048 - }, - { - "epoch": 1.7944937707543391, - "grad_norm": 0.6712929010391235, - "learning_rate": 5.374235999442944e-07, - "loss": 0.1873, - "step": 19049 - }, - { - "epoch": 1.794587974847507, - "grad_norm": 0.6496888399124146, - "learning_rate": 5.369353471526285e-07, - "loss": 0.1811, - "step": 19050 - }, - { - "epoch": 1.794682178940675, - "grad_norm": 0.6715037226676941, - "learning_rate": 5.364473101334688e-07, - "loss": 0.1974, - "step": 19051 - }, - { - "epoch": 1.7947763830338428, - "grad_norm": 0.6432616710662842, - "learning_rate": 5.359594888979469e-07, - "loss": 0.1631, - "step": 19052 - }, - { - "epoch": 1.7948705871270105, - "grad_norm": 0.698383629322052, - "learning_rate": 5.354718834571859e-07, - "loss": 0.2134, - "step": 19053 - }, - { - "epoch": 1.7949647912201785, - "grad_norm": 0.6947939991950989, - "learning_rate": 5.349844938223026e-07, - "loss": 0.1871, - "step": 19054 - }, - { - "epoch": 1.7950589953133465, - "grad_norm": 0.654518187046051, - "learning_rate": 5.344973200044112e-07, - "loss": 0.1871, - "step": 19055 - }, - { - "epoch": 1.7951531994065142, - "grad_norm": 0.6151201128959656, - "learning_rate": 5.340103620146186e-07, - "loss": 0.1855, - "step": 19056 - }, - { - "epoch": 1.795247403499682, - "grad_norm": 0.632982075214386, - "learning_rate": 5.335236198640293e-07, - "loss": 0.2119, - "step": 19057 - }, - { - "epoch": 1.7953416075928499, - "grad_norm": 0.7302631139755249, - "learning_rate": 5.33037093563743e-07, - "loss": 0.2168, - "step": 19058 - }, - { - "epoch": 1.7954358116860178, - "grad_norm": 0.6297193765640259, - "learning_rate": 5.3255078312485e-07, - "loss": 0.1909, - "step": 19059 - }, - { - "epoch": 1.7955300157791856, - "grad_norm": 0.6735662221908569, - "learning_rate": 5.320646885584413e-07, - "loss": 0.2023, - "step": 19060 - }, - { - "epoch": 1.7956242198723533, - "grad_norm": 0.6780900359153748, - "learning_rate": 5.315788098756014e-07, - "loss": 0.1866, - "step": 19061 - }, - { - "epoch": 1.7957184239655213, - "grad_norm": 0.6257510781288147, - "learning_rate": 5.31093147087407e-07, - "loss": 0.1768, - "step": 19062 - }, - { - "epoch": 1.7958126280586892, - "grad_norm": 0.6984760761260986, - "learning_rate": 5.306077002049326e-07, - "loss": 0.1998, - "step": 19063 - }, - { - "epoch": 1.795906832151857, - "grad_norm": 0.6464702486991882, - "learning_rate": 5.301224692392482e-07, - "loss": 0.1832, - "step": 19064 - }, - { - "epoch": 1.7960010362450247, - "grad_norm": 0.6231998205184937, - "learning_rate": 5.296374542014171e-07, - "loss": 0.191, - "step": 19065 - }, - { - "epoch": 1.7960952403381927, - "grad_norm": 0.6024487018585205, - "learning_rate": 5.291526551024951e-07, - "loss": 0.18, - "step": 19066 - }, - { - "epoch": 1.7961894444313606, - "grad_norm": 0.6694967150688171, - "learning_rate": 5.286680719535431e-07, - "loss": 0.1978, - "step": 19067 - }, - { - "epoch": 1.7962836485245284, - "grad_norm": 0.6204403042793274, - "learning_rate": 5.281837047656069e-07, - "loss": 0.1856, - "step": 19068 - }, - { - "epoch": 1.7963778526176961, - "grad_norm": 0.6918314695358276, - "learning_rate": 5.276995535497286e-07, - "loss": 0.1882, - "step": 19069 - }, - { - "epoch": 1.796472056710864, - "grad_norm": 0.6221495866775513, - "learning_rate": 5.272156183169497e-07, - "loss": 0.1921, - "step": 19070 - }, - { - "epoch": 1.796566260804032, - "grad_norm": 0.7115456461906433, - "learning_rate": 5.267318990783066e-07, - "loss": 0.1937, - "step": 19071 - }, - { - "epoch": 1.7966604648971998, - "grad_norm": 0.6703652739524841, - "learning_rate": 5.26248395844825e-07, - "loss": 0.1953, - "step": 19072 - }, - { - "epoch": 1.7967546689903675, - "grad_norm": 0.6182885766029358, - "learning_rate": 5.257651086275317e-07, - "loss": 0.1893, - "step": 19073 - }, - { - "epoch": 1.7968488730835355, - "grad_norm": 0.7072000503540039, - "learning_rate": 5.252820374374468e-07, - "loss": 0.1899, - "step": 19074 - }, - { - "epoch": 1.7969430771767034, - "grad_norm": 0.6250677704811096, - "learning_rate": 5.247991822855835e-07, - "loss": 0.1744, - "step": 19075 - }, - { - "epoch": 1.7970372812698712, - "grad_norm": 0.710290789604187, - "learning_rate": 5.243165431829522e-07, - "loss": 0.1847, - "step": 19076 - }, - { - "epoch": 1.797131485363039, - "grad_norm": 0.683290958404541, - "learning_rate": 5.238341201405605e-07, - "loss": 0.1817, - "step": 19077 - }, - { - "epoch": 1.7972256894562069, - "grad_norm": 0.6023078560829163, - "learning_rate": 5.233519131694042e-07, - "loss": 0.1756, - "step": 19078 - }, - { - "epoch": 1.7973198935493748, - "grad_norm": 0.7020680904388428, - "learning_rate": 5.228699222804801e-07, - "loss": 0.2055, - "step": 19079 - }, - { - "epoch": 1.7974140976425426, - "grad_norm": 0.6486931443214417, - "learning_rate": 5.223881474847791e-07, - "loss": 0.174, - "step": 19080 - }, - { - "epoch": 1.7975083017357103, - "grad_norm": 0.6468766331672668, - "learning_rate": 5.21906588793285e-07, - "loss": 0.1969, - "step": 19081 - }, - { - "epoch": 1.7976025058288783, - "grad_norm": 0.6431040167808533, - "learning_rate": 5.214252462169789e-07, - "loss": 0.176, - "step": 19082 - }, - { - "epoch": 1.7976967099220462, - "grad_norm": 0.5801704525947571, - "learning_rate": 5.209441197668375e-07, - "loss": 0.1646, - "step": 19083 - }, - { - "epoch": 1.797790914015214, - "grad_norm": 0.5869934558868408, - "learning_rate": 5.204632094538298e-07, - "loss": 0.1798, - "step": 19084 - }, - { - "epoch": 1.7978851181083817, - "grad_norm": 0.6227966547012329, - "learning_rate": 5.199825152889193e-07, - "loss": 0.1832, - "step": 19085 - }, - { - "epoch": 1.7979793222015497, - "grad_norm": 0.5921785831451416, - "learning_rate": 5.195020372830695e-07, - "loss": 0.1767, - "step": 19086 - }, - { - "epoch": 1.7980735262947176, - "grad_norm": 5.5205302238464355, - "learning_rate": 5.19021775447236e-07, - "loss": 0.1777, - "step": 19087 - }, - { - "epoch": 1.7981677303878854, - "grad_norm": 0.6705521941184998, - "learning_rate": 5.185417297923678e-07, - "loss": 0.188, - "step": 19088 - }, - { - "epoch": 1.798261934481053, - "grad_norm": 0.6479392647743225, - "learning_rate": 5.180619003294107e-07, - "loss": 0.1869, - "step": 19089 - }, - { - "epoch": 1.798356138574221, - "grad_norm": 0.5793355703353882, - "learning_rate": 5.175822870693081e-07, - "loss": 0.1734, - "step": 19090 - }, - { - "epoch": 1.798450342667389, - "grad_norm": 0.5716192126274109, - "learning_rate": 5.171028900229924e-07, - "loss": 0.1715, - "step": 19091 - }, - { - "epoch": 1.7985445467605568, - "grad_norm": 0.7655090689659119, - "learning_rate": 5.16623709201397e-07, - "loss": 0.1876, - "step": 19092 - }, - { - "epoch": 1.7986387508537245, - "grad_norm": 0.6193764209747314, - "learning_rate": 5.161447446154489e-07, - "loss": 0.1659, - "step": 19093 - }, - { - "epoch": 1.7987329549468924, - "grad_norm": 0.7286919355392456, - "learning_rate": 5.156659962760657e-07, - "loss": 0.2241, - "step": 19094 - }, - { - "epoch": 1.7988271590400604, - "grad_norm": 0.6866196990013123, - "learning_rate": 5.151874641941679e-07, - "loss": 0.1928, - "step": 19095 - }, - { - "epoch": 1.7989213631332281, - "grad_norm": 0.6107556223869324, - "learning_rate": 5.147091483806621e-07, - "loss": 0.182, - "step": 19096 - }, - { - "epoch": 1.7990155672263959, - "grad_norm": 0.7106184363365173, - "learning_rate": 5.142310488464575e-07, - "loss": 0.2252, - "step": 19097 - }, - { - "epoch": 1.7991097713195638, - "grad_norm": 0.6771349906921387, - "learning_rate": 5.137531656024563e-07, - "loss": 0.1757, - "step": 19098 - }, - { - "epoch": 1.7992039754127318, - "grad_norm": 0.6582804322242737, - "learning_rate": 5.132754986595522e-07, - "loss": 0.217, - "step": 19099 - }, - { - "epoch": 1.7992981795058995, - "grad_norm": 0.647718071937561, - "learning_rate": 5.127980480286377e-07, - "loss": 0.1662, - "step": 19100 - }, - { - "epoch": 1.7993923835990673, - "grad_norm": 0.6686140894889832, - "learning_rate": 5.123208137206026e-07, - "loss": 0.2037, - "step": 19101 - }, - { - "epoch": 1.7994865876922352, - "grad_norm": 0.7159162163734436, - "learning_rate": 5.118437957463229e-07, - "loss": 0.1787, - "step": 19102 - }, - { - "epoch": 1.7995807917854032, - "grad_norm": 0.6867672204971313, - "learning_rate": 5.113669941166799e-07, - "loss": 0.1727, - "step": 19103 - }, - { - "epoch": 1.799674995878571, - "grad_norm": 0.653090238571167, - "learning_rate": 5.108904088425448e-07, - "loss": 0.2106, - "step": 19104 - }, - { - "epoch": 1.7997691999717387, - "grad_norm": 0.7055180072784424, - "learning_rate": 5.104140399347835e-07, - "loss": 0.2227, - "step": 19105 - }, - { - "epoch": 1.7998634040649066, - "grad_norm": 0.9326152801513672, - "learning_rate": 5.099378874042548e-07, - "loss": 0.2171, - "step": 19106 - }, - { - "epoch": 1.7999576081580746, - "grad_norm": 0.6862870454788208, - "learning_rate": 5.094619512618226e-07, - "loss": 0.2241, - "step": 19107 - }, - { - "epoch": 1.8000518122512423, - "grad_norm": 0.6412129998207092, - "learning_rate": 5.089862315183347e-07, - "loss": 0.1893, - "step": 19108 - }, - { - "epoch": 1.80014601634441, - "grad_norm": 0.6480741500854492, - "learning_rate": 5.085107281846369e-07, - "loss": 0.1951, - "step": 19109 - }, - { - "epoch": 1.800240220437578, - "grad_norm": 0.7141343355178833, - "learning_rate": 5.080354412715749e-07, - "loss": 0.1978, - "step": 19110 - }, - { - "epoch": 1.800334424530746, - "grad_norm": 0.6640894412994385, - "learning_rate": 5.075603707899846e-07, - "loss": 0.2012, - "step": 19111 - }, - { - "epoch": 1.8004286286239137, - "grad_norm": 0.6541938781738281, - "learning_rate": 5.070855167506972e-07, - "loss": 0.1696, - "step": 19112 - }, - { - "epoch": 1.8005228327170815, - "grad_norm": 1.3459975719451904, - "learning_rate": 5.066108791645407e-07, - "loss": 0.197, - "step": 19113 - }, - { - "epoch": 1.8006170368102494, - "grad_norm": 0.6184680461883545, - "learning_rate": 5.0613645804234e-07, - "loss": 0.1677, - "step": 19114 - }, - { - "epoch": 1.8007112409034174, - "grad_norm": 0.6969945430755615, - "learning_rate": 5.056622533949085e-07, - "loss": 0.1861, - "step": 19115 - }, - { - "epoch": 1.8008054449965851, - "grad_norm": 0.6932345628738403, - "learning_rate": 5.05188265233062e-07, - "loss": 0.1817, - "step": 19116 - }, - { - "epoch": 1.8008996490897529, - "grad_norm": 0.6407642364501953, - "learning_rate": 5.047144935676073e-07, - "loss": 0.1787, - "step": 19117 - }, - { - "epoch": 1.8009938531829208, - "grad_norm": 0.6974245309829712, - "learning_rate": 5.042409384093461e-07, - "loss": 0.2094, - "step": 19118 - }, - { - "epoch": 1.8010880572760888, - "grad_norm": 0.7181513905525208, - "learning_rate": 5.037675997690772e-07, - "loss": 0.1934, - "step": 19119 - }, - { - "epoch": 1.8011822613692565, - "grad_norm": 0.6898902654647827, - "learning_rate": 5.032944776575943e-07, - "loss": 0.175, - "step": 19120 - }, - { - "epoch": 1.8012764654624243, - "grad_norm": 0.653157651424408, - "learning_rate": 5.028215720856821e-07, - "loss": 0.2066, - "step": 19121 - }, - { - "epoch": 1.8013706695555922, - "grad_norm": 0.7411891222000122, - "learning_rate": 5.023488830641266e-07, - "loss": 0.1762, - "step": 19122 - }, - { - "epoch": 1.8014648736487602, - "grad_norm": 0.6776013374328613, - "learning_rate": 5.018764106037066e-07, - "loss": 0.2004, - "step": 19123 - }, - { - "epoch": 1.801559077741928, - "grad_norm": 0.750198245048523, - "learning_rate": 5.014041547151927e-07, - "loss": 0.1793, - "step": 19124 - }, - { - "epoch": 1.8016532818350957, - "grad_norm": 0.7108379006385803, - "learning_rate": 5.009321154093538e-07, - "loss": 0.1869, - "step": 19125 - }, - { - "epoch": 1.8017474859282636, - "grad_norm": 0.6518669724464417, - "learning_rate": 5.004602926969515e-07, - "loss": 0.1789, - "step": 19126 - }, - { - "epoch": 1.8018416900214316, - "grad_norm": 0.6811784505844116, - "learning_rate": 4.999886865887483e-07, - "loss": 0.2039, - "step": 19127 - }, - { - "epoch": 1.8019358941145993, - "grad_norm": 0.7192262411117554, - "learning_rate": 4.995172970954943e-07, - "loss": 0.2265, - "step": 19128 - }, - { - "epoch": 1.802030098207767, - "grad_norm": 0.6727216243743896, - "learning_rate": 4.990461242279377e-07, - "loss": 0.2054, - "step": 19129 - }, - { - "epoch": 1.802124302300935, - "grad_norm": 0.6997501850128174, - "learning_rate": 4.985751679968243e-07, - "loss": 0.2029, - "step": 19130 - }, - { - "epoch": 1.802218506394103, - "grad_norm": 0.5826748609542847, - "learning_rate": 4.981044284128911e-07, - "loss": 0.179, - "step": 19131 - }, - { - "epoch": 1.8023127104872707, - "grad_norm": 0.663162112236023, - "learning_rate": 4.976339054868706e-07, - "loss": 0.1897, - "step": 19132 - }, - { - "epoch": 1.8024069145804384, - "grad_norm": 0.6346247792243958, - "learning_rate": 4.971635992294943e-07, - "loss": 0.1799, - "step": 19133 - }, - { - "epoch": 1.8025011186736064, - "grad_norm": 0.6651972532272339, - "learning_rate": 4.966935096514835e-07, - "loss": 0.2241, - "step": 19134 - }, - { - "epoch": 1.8025953227667744, - "grad_norm": 0.6187891364097595, - "learning_rate": 4.962236367635597e-07, - "loss": 0.1736, - "step": 19135 - }, - { - "epoch": 1.802689526859942, - "grad_norm": 0.6713681817054749, - "learning_rate": 4.957539805764322e-07, - "loss": 0.2125, - "step": 19136 - }, - { - "epoch": 1.8027837309531098, - "grad_norm": 0.6716582775115967, - "learning_rate": 4.952845411008133e-07, - "loss": 0.1806, - "step": 19137 - }, - { - "epoch": 1.8028779350462778, - "grad_norm": 0.6291553974151611, - "learning_rate": 4.94815318347408e-07, - "loss": 0.1781, - "step": 19138 - }, - { - "epoch": 1.8029721391394458, - "grad_norm": 0.6568560004234314, - "learning_rate": 4.94346312326911e-07, - "loss": 0.1638, - "step": 19139 - }, - { - "epoch": 1.8030663432326135, - "grad_norm": 0.6333551406860352, - "learning_rate": 4.938775230500192e-07, - "loss": 0.1971, - "step": 19140 - }, - { - "epoch": 1.8031605473257812, - "grad_norm": 0.6884417533874512, - "learning_rate": 4.934089505274231e-07, - "loss": 0.2009, - "step": 19141 - }, - { - "epoch": 1.8032547514189492, - "grad_norm": 0.7167954444885254, - "learning_rate": 4.92940594769804e-07, - "loss": 0.1796, - "step": 19142 - }, - { - "epoch": 1.8033489555121172, - "grad_norm": 0.783322811126709, - "learning_rate": 4.924724557878413e-07, - "loss": 0.2155, - "step": 19143 - }, - { - "epoch": 1.803443159605285, - "grad_norm": 0.7364895939826965, - "learning_rate": 4.920045335922119e-07, - "loss": 0.2048, - "step": 19144 - }, - { - "epoch": 1.8035373636984526, - "grad_norm": 0.6621474027633667, - "learning_rate": 4.915368281935839e-07, - "loss": 0.1866, - "step": 19145 - }, - { - "epoch": 1.8036315677916206, - "grad_norm": 0.6693629622459412, - "learning_rate": 4.910693396026189e-07, - "loss": 0.1939, - "step": 19146 - }, - { - "epoch": 1.8037257718847886, - "grad_norm": 0.7286280989646912, - "learning_rate": 4.906020678299816e-07, - "loss": 0.1841, - "step": 19147 - }, - { - "epoch": 1.8038199759779563, - "grad_norm": 0.5911803245544434, - "learning_rate": 4.901350128863236e-07, - "loss": 0.1701, - "step": 19148 - }, - { - "epoch": 1.803914180071124, - "grad_norm": 0.6541381478309631, - "learning_rate": 4.896681747822929e-07, - "loss": 0.1726, - "step": 19149 - }, - { - "epoch": 1.804008384164292, - "grad_norm": 0.6095507144927979, - "learning_rate": 4.892015535285355e-07, - "loss": 0.1846, - "step": 19150 - }, - { - "epoch": 1.80410258825746, - "grad_norm": 0.6767376661300659, - "learning_rate": 4.887351491356929e-07, - "loss": 0.2344, - "step": 19151 - }, - { - "epoch": 1.8041967923506277, - "grad_norm": 0.6861122250556946, - "learning_rate": 4.882689616143976e-07, - "loss": 0.2194, - "step": 19152 - }, - { - "epoch": 1.8042909964437954, - "grad_norm": 0.7323029637336731, - "learning_rate": 4.878029909752801e-07, - "loss": 0.1884, - "step": 19153 - }, - { - "epoch": 1.8043852005369634, - "grad_norm": 0.6265630125999451, - "learning_rate": 4.873372372289664e-07, - "loss": 0.1771, - "step": 19154 - }, - { - "epoch": 1.8044794046301313, - "grad_norm": 0.5967429876327515, - "learning_rate": 4.868717003860735e-07, - "loss": 0.1783, - "step": 19155 - }, - { - "epoch": 1.8045736087232989, - "grad_norm": 0.6530036330223083, - "learning_rate": 4.864063804572183e-07, - "loss": 0.1992, - "step": 19156 - }, - { - "epoch": 1.8046678128164668, - "grad_norm": 0.7074195742607117, - "learning_rate": 4.859412774530126e-07, - "loss": 0.1943, - "step": 19157 - }, - { - "epoch": 1.8047620169096348, - "grad_norm": 0.6856885552406311, - "learning_rate": 4.854763913840576e-07, - "loss": 0.1971, - "step": 19158 - }, - { - "epoch": 1.8048562210028025, - "grad_norm": 0.6670020818710327, - "learning_rate": 4.850117222609563e-07, - "loss": 0.1848, - "step": 19159 - }, - { - "epoch": 1.8049504250959703, - "grad_norm": 0.6792109608650208, - "learning_rate": 4.845472700943033e-07, - "loss": 0.1978, - "step": 19160 - }, - { - "epoch": 1.8050446291891382, - "grad_norm": 0.5933213233947754, - "learning_rate": 4.84083034894689e-07, - "loss": 0.1635, - "step": 19161 - }, - { - "epoch": 1.8051388332823062, - "grad_norm": 0.6266000270843506, - "learning_rate": 4.836190166726951e-07, - "loss": 0.1722, - "step": 19162 - }, - { - "epoch": 1.805233037375474, - "grad_norm": 0.6405494213104248, - "learning_rate": 4.831552154389074e-07, - "loss": 0.2012, - "step": 19163 - }, - { - "epoch": 1.8053272414686417, - "grad_norm": 0.604314923286438, - "learning_rate": 4.826916312038999e-07, - "loss": 0.1682, - "step": 19164 - }, - { - "epoch": 1.8054214455618096, - "grad_norm": 0.5882378220558167, - "learning_rate": 4.822282639782405e-07, - "loss": 0.1597, - "step": 19165 - }, - { - "epoch": 1.8055156496549776, - "grad_norm": 0.653429388999939, - "learning_rate": 4.817651137724955e-07, - "loss": 0.1651, - "step": 19166 - }, - { - "epoch": 1.8056098537481453, - "grad_norm": 0.7554649710655212, - "learning_rate": 4.813021805972273e-07, - "loss": 0.1771, - "step": 19167 - }, - { - "epoch": 1.805704057841313, - "grad_norm": 0.7071376442909241, - "learning_rate": 4.808394644629899e-07, - "loss": 0.1935, - "step": 19168 - }, - { - "epoch": 1.805798261934481, - "grad_norm": 0.6626911759376526, - "learning_rate": 4.803769653803336e-07, - "loss": 0.1821, - "step": 19169 - }, - { - "epoch": 1.805892466027649, - "grad_norm": 0.6097730994224548, - "learning_rate": 4.799146833598067e-07, - "loss": 0.1864, - "step": 19170 - }, - { - "epoch": 1.8059866701208167, - "grad_norm": 0.7417007684707642, - "learning_rate": 4.794526184119463e-07, - "loss": 0.1902, - "step": 19171 - }, - { - "epoch": 1.8060808742139844, - "grad_norm": 0.6449300050735474, - "learning_rate": 4.789907705472907e-07, - "loss": 0.1854, - "step": 19172 - }, - { - "epoch": 1.8061750783071524, - "grad_norm": 0.6992318034172058, - "learning_rate": 4.785291397763714e-07, - "loss": 0.1829, - "step": 19173 - }, - { - "epoch": 1.8062692824003204, - "grad_norm": 0.682033896446228, - "learning_rate": 4.78067726109711e-07, - "loss": 0.2036, - "step": 19174 - }, - { - "epoch": 1.806363486493488, - "grad_norm": 0.6673874855041504, - "learning_rate": 4.776065295578336e-07, - "loss": 0.1852, - "step": 19175 - }, - { - "epoch": 1.8064576905866558, - "grad_norm": 0.6430282592773438, - "learning_rate": 4.771455501312538e-07, - "loss": 0.1915, - "step": 19176 - }, - { - "epoch": 1.8065518946798238, - "grad_norm": 0.6463133096694946, - "learning_rate": 4.766847878404823e-07, - "loss": 0.1993, - "step": 19177 - }, - { - "epoch": 1.8066460987729918, - "grad_norm": 0.6992751955986023, - "learning_rate": 4.762242426960262e-07, - "loss": 0.1657, - "step": 19178 - }, - { - "epoch": 1.8067403028661595, - "grad_norm": 0.6533843278884888, - "learning_rate": 4.757639147083859e-07, - "loss": 0.168, - "step": 19179 - }, - { - "epoch": 1.8068345069593272, - "grad_norm": 0.6807149648666382, - "learning_rate": 4.753038038880575e-07, - "loss": 0.1938, - "step": 19180 - }, - { - "epoch": 1.8069287110524952, - "grad_norm": 0.6650716066360474, - "learning_rate": 4.7484391024553376e-07, - "loss": 0.1906, - "step": 19181 - }, - { - "epoch": 1.8070229151456632, - "grad_norm": 0.7790847420692444, - "learning_rate": 4.743842337912985e-07, - "loss": 0.2003, - "step": 19182 - }, - { - "epoch": 1.807117119238831, - "grad_norm": 0.7163403034210205, - "learning_rate": 4.739247745358344e-07, - "loss": 0.1867, - "step": 19183 - }, - { - "epoch": 1.8072113233319986, - "grad_norm": 0.6882708072662354, - "learning_rate": 4.7346553248961867e-07, - "loss": 0.1994, - "step": 19184 - }, - { - "epoch": 1.8073055274251666, - "grad_norm": 0.6356422901153564, - "learning_rate": 4.7300650766312294e-07, - "loss": 0.187, - "step": 19185 - }, - { - "epoch": 1.8073997315183346, - "grad_norm": 0.7441783547401428, - "learning_rate": 4.7254770006681105e-07, - "loss": 0.2253, - "step": 19186 - }, - { - "epoch": 1.8074939356115023, - "grad_norm": 0.5874780416488647, - "learning_rate": 4.7208910971114577e-07, - "loss": 0.1716, - "step": 19187 - }, - { - "epoch": 1.80758813970467, - "grad_norm": 0.6484742760658264, - "learning_rate": 4.716307366065853e-07, - "loss": 0.192, - "step": 19188 - }, - { - "epoch": 1.807682343797838, - "grad_norm": 0.6442360877990723, - "learning_rate": 4.7117258076357806e-07, - "loss": 0.1775, - "step": 19189 - }, - { - "epoch": 1.807776547891006, - "grad_norm": 0.7025601267814636, - "learning_rate": 4.7071464219257343e-07, - "loss": 0.2379, - "step": 19190 - }, - { - "epoch": 1.8078707519841737, - "grad_norm": 0.6472790241241455, - "learning_rate": 4.702569209040131e-07, - "loss": 0.1845, - "step": 19191 - }, - { - "epoch": 1.8079649560773414, - "grad_norm": 0.6979325413703918, - "learning_rate": 4.6979941690833196e-07, - "loss": 0.2053, - "step": 19192 - }, - { - "epoch": 1.8080591601705094, - "grad_norm": 0.6204474568367004, - "learning_rate": 4.6934213021596285e-07, - "loss": 0.1803, - "step": 19193 - }, - { - "epoch": 1.8081533642636773, - "grad_norm": 0.7227051258087158, - "learning_rate": 4.688850608373341e-07, - "loss": 0.2019, - "step": 19194 - }, - { - "epoch": 1.808247568356845, - "grad_norm": 0.7145089507102966, - "learning_rate": 4.68428208782864e-07, - "loss": 0.1617, - "step": 19195 - }, - { - "epoch": 1.8083417724500128, - "grad_norm": 0.6264746785163879, - "learning_rate": 4.67971574062972e-07, - "loss": 0.1627, - "step": 19196 - }, - { - "epoch": 1.8084359765431808, - "grad_norm": 0.6440044045448303, - "learning_rate": 4.67515156688072e-07, - "loss": 0.2196, - "step": 19197 - }, - { - "epoch": 1.8085301806363487, - "grad_norm": 0.6561369895935059, - "learning_rate": 4.670589566685657e-07, - "loss": 0.2, - "step": 19198 - }, - { - "epoch": 1.8086243847295165, - "grad_norm": 0.6602401733398438, - "learning_rate": 4.6660297401485923e-07, - "loss": 0.1724, - "step": 19199 - }, - { - "epoch": 1.8087185888226842, - "grad_norm": 0.7573186159133911, - "learning_rate": 4.6614720873734976e-07, - "loss": 0.1782, - "step": 19200 - }, - { - "epoch": 1.8088127929158522, - "grad_norm": 0.6608302593231201, - "learning_rate": 4.6569166084642904e-07, - "loss": 0.1873, - "step": 19201 - }, - { - "epoch": 1.8089069970090201, - "grad_norm": 0.8319967985153198, - "learning_rate": 4.652363303524798e-07, - "loss": 0.2098, - "step": 19202 - }, - { - "epoch": 1.8090012011021879, - "grad_norm": 0.6997202038764954, - "learning_rate": 4.6478121726589056e-07, - "loss": 0.1604, - "step": 19203 - }, - { - "epoch": 1.8090954051953556, - "grad_norm": 0.651648223400116, - "learning_rate": 4.643263215970373e-07, - "loss": 0.1718, - "step": 19204 - }, - { - "epoch": 1.8091896092885236, - "grad_norm": 0.627785861492157, - "learning_rate": 4.6387164335628844e-07, - "loss": 0.1798, - "step": 19205 - }, - { - "epoch": 1.8092838133816915, - "grad_norm": 0.628860354423523, - "learning_rate": 4.634171825540146e-07, - "loss": 0.2039, - "step": 19206 - }, - { - "epoch": 1.8093780174748593, - "grad_norm": 0.6039461493492126, - "learning_rate": 4.6296293920057854e-07, - "loss": 0.166, - "step": 19207 - }, - { - "epoch": 1.809472221568027, - "grad_norm": 0.6618579030036926, - "learning_rate": 4.625089133063365e-07, - "loss": 0.1844, - "step": 19208 - }, - { - "epoch": 1.809566425661195, - "grad_norm": 0.6979915499687195, - "learning_rate": 4.620551048816402e-07, - "loss": 0.1894, - "step": 19209 - }, - { - "epoch": 1.809660629754363, - "grad_norm": 0.7428672909736633, - "learning_rate": 4.616015139368402e-07, - "loss": 0.2169, - "step": 19210 - }, - { - "epoch": 1.8097548338475307, - "grad_norm": 0.622818112373352, - "learning_rate": 4.6114814048227483e-07, - "loss": 0.1859, - "step": 19211 - }, - { - "epoch": 1.8098490379406984, - "grad_norm": 0.6509173512458801, - "learning_rate": 4.6069498452828487e-07, - "loss": 0.199, - "step": 19212 - }, - { - "epoch": 1.8099432420338664, - "grad_norm": 0.6740192174911499, - "learning_rate": 4.6024204608520305e-07, - "loss": 0.2001, - "step": 19213 - }, - { - "epoch": 1.8100374461270343, - "grad_norm": 0.6373996734619141, - "learning_rate": 4.597893251633556e-07, - "loss": 0.2098, - "step": 19214 - }, - { - "epoch": 1.810131650220202, - "grad_norm": 0.7393183708190918, - "learning_rate": 4.593368217730665e-07, - "loss": 0.2019, - "step": 19215 - }, - { - "epoch": 1.8102258543133698, - "grad_norm": 0.7272049188613892, - "learning_rate": 4.588845359246508e-07, - "loss": 0.2151, - "step": 19216 - }, - { - "epoch": 1.8103200584065378, - "grad_norm": 0.6717706322669983, - "learning_rate": 4.5843246762842466e-07, - "loss": 0.1979, - "step": 19217 - }, - { - "epoch": 1.8104142624997057, - "grad_norm": 0.7467896342277527, - "learning_rate": 4.579806168946943e-07, - "loss": 0.2139, - "step": 19218 - }, - { - "epoch": 1.8105084665928735, - "grad_norm": 0.6617717742919922, - "learning_rate": 4.5752898373376263e-07, - "loss": 0.2048, - "step": 19219 - }, - { - "epoch": 1.8106026706860412, - "grad_norm": 0.6223923563957214, - "learning_rate": 4.5707756815592805e-07, - "loss": 0.1697, - "step": 19220 - }, - { - "epoch": 1.8106968747792092, - "grad_norm": 0.6542524099349976, - "learning_rate": 4.5662637017148346e-07, - "loss": 0.218, - "step": 19221 - }, - { - "epoch": 1.8107910788723771, - "grad_norm": 0.6476825475692749, - "learning_rate": 4.5617538979071616e-07, - "loss": 0.1909, - "step": 19222 - }, - { - "epoch": 1.8108852829655449, - "grad_norm": 0.6108734011650085, - "learning_rate": 4.5572462702391016e-07, - "loss": 0.1751, - "step": 19223 - }, - { - "epoch": 1.8109794870587126, - "grad_norm": 0.6277502179145813, - "learning_rate": 4.5527408188134393e-07, - "loss": 0.171, - "step": 19224 - }, - { - "epoch": 1.8110736911518805, - "grad_norm": 0.631671130657196, - "learning_rate": 4.548237543732903e-07, - "loss": 0.184, - "step": 19225 - }, - { - "epoch": 1.8111678952450485, - "grad_norm": 0.6968255639076233, - "learning_rate": 4.543736445100144e-07, - "loss": 0.2269, - "step": 19226 - }, - { - "epoch": 1.8112620993382162, - "grad_norm": 0.6485012173652649, - "learning_rate": 4.5392375230178363e-07, - "loss": 0.1698, - "step": 19227 - }, - { - "epoch": 1.811356303431384, - "grad_norm": 0.6907787322998047, - "learning_rate": 4.534740777588553e-07, - "loss": 0.2169, - "step": 19228 - }, - { - "epoch": 1.811450507524552, - "grad_norm": 0.6859610676765442, - "learning_rate": 4.5302462089148015e-07, - "loss": 0.2113, - "step": 19229 - }, - { - "epoch": 1.81154471161772, - "grad_norm": 0.6284610033035278, - "learning_rate": 4.5257538170990876e-07, - "loss": 0.1978, - "step": 19230 - }, - { - "epoch": 1.8116389157108876, - "grad_norm": 0.6435138583183289, - "learning_rate": 4.5212636022438527e-07, - "loss": 0.2233, - "step": 19231 - }, - { - "epoch": 1.8117331198040554, - "grad_norm": 0.5762256979942322, - "learning_rate": 4.516775564451459e-07, - "loss": 0.1754, - "step": 19232 - }, - { - "epoch": 1.8118273238972233, - "grad_norm": 0.6733455657958984, - "learning_rate": 4.5122897038242465e-07, - "loss": 0.2014, - "step": 19233 - }, - { - "epoch": 1.8119215279903913, - "grad_norm": 0.6698833107948303, - "learning_rate": 4.507806020464522e-07, - "loss": 0.1868, - "step": 19234 - }, - { - "epoch": 1.812015732083559, - "grad_norm": 0.6298755407333374, - "learning_rate": 4.503324514474483e-07, - "loss": 0.1914, - "step": 19235 - }, - { - "epoch": 1.8121099361767268, - "grad_norm": 0.5595676302909851, - "learning_rate": 4.4988451859563355e-07, - "loss": 0.1586, - "step": 19236 - }, - { - "epoch": 1.8122041402698947, - "grad_norm": 0.9319345951080322, - "learning_rate": 4.49436803501222e-07, - "loss": 0.2107, - "step": 19237 - }, - { - "epoch": 1.8122983443630627, - "grad_norm": 0.7476606965065002, - "learning_rate": 4.4898930617442105e-07, - "loss": 0.218, - "step": 19238 - }, - { - "epoch": 1.8123925484562304, - "grad_norm": 0.6579570770263672, - "learning_rate": 4.485420266254348e-07, - "loss": 0.2043, - "step": 19239 - }, - { - "epoch": 1.8124867525493982, - "grad_norm": 0.6271825432777405, - "learning_rate": 4.480949648644628e-07, - "loss": 0.1884, - "step": 19240 - }, - { - "epoch": 1.8125809566425661, - "grad_norm": 0.7584695816040039, - "learning_rate": 4.4764812090169804e-07, - "loss": 0.2018, - "step": 19241 - }, - { - "epoch": 1.812675160735734, - "grad_norm": 0.6255564093589783, - "learning_rate": 4.472014947473269e-07, - "loss": 0.1973, - "step": 19242 - }, - { - "epoch": 1.8127693648289018, - "grad_norm": 0.5978403687477112, - "learning_rate": 4.4675508641153776e-07, - "loss": 0.1982, - "step": 19243 - }, - { - "epoch": 1.8128635689220696, - "grad_norm": 0.695257842540741, - "learning_rate": 4.46308895904507e-07, - "loss": 0.1881, - "step": 19244 - }, - { - "epoch": 1.8129577730152375, - "grad_norm": 0.7909465432167053, - "learning_rate": 4.4586292323640643e-07, - "loss": 0.2134, - "step": 19245 - }, - { - "epoch": 1.8130519771084055, - "grad_norm": 0.6328903436660767, - "learning_rate": 4.45417168417408e-07, - "loss": 0.1916, - "step": 19246 - }, - { - "epoch": 1.8131461812015732, - "grad_norm": 0.6638057827949524, - "learning_rate": 4.4497163145767576e-07, - "loss": 0.1938, - "step": 19247 - }, - { - "epoch": 1.813240385294741, - "grad_norm": 0.7206124663352966, - "learning_rate": 4.44526312367366e-07, - "loss": 0.2141, - "step": 19248 - }, - { - "epoch": 1.813334589387909, - "grad_norm": 0.6205139756202698, - "learning_rate": 4.4408121115663396e-07, - "loss": 0.1834, - "step": 19249 - }, - { - "epoch": 1.8134287934810769, - "grad_norm": 0.667712390422821, - "learning_rate": 4.436363278356304e-07, - "loss": 0.1915, - "step": 19250 - }, - { - "epoch": 1.8135229975742446, - "grad_norm": 1.0919421911239624, - "learning_rate": 4.431916624144961e-07, - "loss": 0.1779, - "step": 19251 - }, - { - "epoch": 1.8136172016674124, - "grad_norm": 0.685817301273346, - "learning_rate": 4.427472149033718e-07, - "loss": 0.1803, - "step": 19252 - }, - { - "epoch": 1.8137114057605803, - "grad_norm": 0.6311653256416321, - "learning_rate": 4.423029853123928e-07, - "loss": 0.2102, - "step": 19253 - }, - { - "epoch": 1.8138056098537483, - "grad_norm": 0.6238813996315002, - "learning_rate": 4.418589736516865e-07, - "loss": 0.2117, - "step": 19254 - }, - { - "epoch": 1.813899813946916, - "grad_norm": 0.9527063369750977, - "learning_rate": 4.414151799313782e-07, - "loss": 0.2, - "step": 19255 - }, - { - "epoch": 1.8139940180400838, - "grad_norm": 0.7247177362442017, - "learning_rate": 4.4097160416158525e-07, - "loss": 0.209, - "step": 19256 - }, - { - "epoch": 1.8140882221332517, - "grad_norm": 0.6345008611679077, - "learning_rate": 4.4052824635242296e-07, - "loss": 0.1965, - "step": 19257 - }, - { - "epoch": 1.8141824262264197, - "grad_norm": 0.6915823221206665, - "learning_rate": 4.4008510651400215e-07, - "loss": 0.2105, - "step": 19258 - }, - { - "epoch": 1.8142766303195874, - "grad_norm": 0.7735414505004883, - "learning_rate": 4.396421846564236e-07, - "loss": 0.2186, - "step": 19259 - }, - { - "epoch": 1.8143708344127552, - "grad_norm": 0.6169028282165527, - "learning_rate": 4.391994807897892e-07, - "loss": 0.1835, - "step": 19260 - }, - { - "epoch": 1.8144650385059231, - "grad_norm": 0.807994544506073, - "learning_rate": 4.387569949241943e-07, - "loss": 0.2006, - "step": 19261 - }, - { - "epoch": 1.814559242599091, - "grad_norm": 0.6707485914230347, - "learning_rate": 4.383147270697252e-07, - "loss": 0.1995, - "step": 19262 - }, - { - "epoch": 1.8146534466922588, - "grad_norm": 0.6820725202560425, - "learning_rate": 4.378726772364672e-07, - "loss": 0.1934, - "step": 19263 - }, - { - "epoch": 1.8147476507854265, - "grad_norm": 0.6363739371299744, - "learning_rate": 4.374308454345022e-07, - "loss": 0.1836, - "step": 19264 - }, - { - "epoch": 1.8148418548785945, - "grad_norm": 0.7199511528015137, - "learning_rate": 4.369892316739022e-07, - "loss": 0.1984, - "step": 19265 - }, - { - "epoch": 1.8149360589717625, - "grad_norm": 0.6855634450912476, - "learning_rate": 4.365478359647368e-07, - "loss": 0.2045, - "step": 19266 - }, - { - "epoch": 1.8150302630649302, - "grad_norm": 0.6977314949035645, - "learning_rate": 4.361066583170703e-07, - "loss": 0.195, - "step": 19267 - }, - { - "epoch": 1.815124467158098, - "grad_norm": 0.7039359211921692, - "learning_rate": 4.3566569874096356e-07, - "loss": 0.2127, - "step": 19268 - }, - { - "epoch": 1.815218671251266, - "grad_norm": 0.6829505562782288, - "learning_rate": 4.352249572464695e-07, - "loss": 0.2056, - "step": 19269 - }, - { - "epoch": 1.8153128753444339, - "grad_norm": 0.7237241268157959, - "learning_rate": 4.347844338436391e-07, - "loss": 0.2045, - "step": 19270 - }, - { - "epoch": 1.8154070794376016, - "grad_norm": 0.7656180262565613, - "learning_rate": 4.343441285425176e-07, - "loss": 0.2182, - "step": 19271 - }, - { - "epoch": 1.8155012835307693, - "grad_norm": 0.6508693695068359, - "learning_rate": 4.339040413531426e-07, - "loss": 0.2191, - "step": 19272 - }, - { - "epoch": 1.8155954876239373, - "grad_norm": 0.5876890420913696, - "learning_rate": 4.3346417228554927e-07, - "loss": 0.1686, - "step": 19273 - }, - { - "epoch": 1.8156896917171053, - "grad_norm": 0.8887391686439514, - "learning_rate": 4.330245213497686e-07, - "loss": 0.1815, - "step": 19274 - }, - { - "epoch": 1.815783895810273, - "grad_norm": 0.674713671207428, - "learning_rate": 4.325850885558236e-07, - "loss": 0.1841, - "step": 19275 - }, - { - "epoch": 1.8158780999034407, - "grad_norm": 0.6085453629493713, - "learning_rate": 4.321458739137352e-07, - "loss": 0.192, - "step": 19276 - }, - { - "epoch": 1.8159723039966087, - "grad_norm": 0.7171904444694519, - "learning_rate": 4.317068774335187e-07, - "loss": 0.2017, - "step": 19277 - }, - { - "epoch": 1.8160665080897767, - "grad_norm": 0.6836258769035339, - "learning_rate": 4.3126809912518274e-07, - "loss": 0.2024, - "step": 19278 - }, - { - "epoch": 1.8161607121829444, - "grad_norm": 0.7146151065826416, - "learning_rate": 4.308295389987305e-07, - "loss": 0.2202, - "step": 19279 - }, - { - "epoch": 1.8162549162761121, - "grad_norm": 0.6684953570365906, - "learning_rate": 4.303911970641661e-07, - "loss": 0.2119, - "step": 19280 - }, - { - "epoch": 1.81634912036928, - "grad_norm": 0.6618233323097229, - "learning_rate": 4.2995307333148273e-07, - "loss": 0.2026, - "step": 19281 - }, - { - "epoch": 1.816443324462448, - "grad_norm": 0.680989146232605, - "learning_rate": 4.2951516781066574e-07, - "loss": 0.1818, - "step": 19282 - }, - { - "epoch": 1.8165375285556158, - "grad_norm": 0.7819607257843018, - "learning_rate": 4.2907748051170816e-07, - "loss": 0.1894, - "step": 19283 - }, - { - "epoch": 1.8166317326487835, - "grad_norm": 0.6956021189689636, - "learning_rate": 4.2864001144458435e-07, - "loss": 0.1937, - "step": 19284 - }, - { - "epoch": 1.8167259367419515, - "grad_norm": 0.6716555953025818, - "learning_rate": 4.282027606192696e-07, - "loss": 0.1866, - "step": 19285 - }, - { - "epoch": 1.8168201408351194, - "grad_norm": 0.6552572846412659, - "learning_rate": 4.277657280457359e-07, - "loss": 0.1902, - "step": 19286 - }, - { - "epoch": 1.8169143449282872, - "grad_norm": 0.702485978603363, - "learning_rate": 4.2732891373394757e-07, - "loss": 0.2184, - "step": 19287 - }, - { - "epoch": 1.817008549021455, - "grad_norm": 0.6234025359153748, - "learning_rate": 4.268923176938633e-07, - "loss": 0.1775, - "step": 19288 - }, - { - "epoch": 1.8171027531146229, - "grad_norm": 0.6936768293380737, - "learning_rate": 4.2645593993543953e-07, - "loss": 0.1994, - "step": 19289 - }, - { - "epoch": 1.8171969572077908, - "grad_norm": 0.6392292380332947, - "learning_rate": 4.2601978046862833e-07, - "loss": 0.1835, - "step": 19290 - }, - { - "epoch": 1.8172911613009586, - "grad_norm": 0.6391674280166626, - "learning_rate": 4.2558383930336954e-07, - "loss": 0.1804, - "step": 19291 - }, - { - "epoch": 1.8173853653941263, - "grad_norm": 0.6619796752929688, - "learning_rate": 4.2514811644960743e-07, - "loss": 0.1971, - "step": 19292 - }, - { - "epoch": 1.8174795694872943, - "grad_norm": 0.6730448603630066, - "learning_rate": 4.2471261191727733e-07, - "loss": 0.188, - "step": 19293 - }, - { - "epoch": 1.8175737735804622, - "grad_norm": 0.7545876502990723, - "learning_rate": 4.242773257163069e-07, - "loss": 0.1949, - "step": 19294 - }, - { - "epoch": 1.8176679776736298, - "grad_norm": 0.6351592540740967, - "learning_rate": 4.238422578566226e-07, - "loss": 0.1849, - "step": 19295 - }, - { - "epoch": 1.8177621817667977, - "grad_norm": 0.7674629092216492, - "learning_rate": 4.2340740834814433e-07, - "loss": 0.1879, - "step": 19296 - }, - { - "epoch": 1.8178563858599657, - "grad_norm": 0.664547860622406, - "learning_rate": 4.229727772007886e-07, - "loss": 0.1951, - "step": 19297 - }, - { - "epoch": 1.8179505899531334, - "grad_norm": 0.6626757979393005, - "learning_rate": 4.2253836442446406e-07, - "loss": 0.1921, - "step": 19298 - }, - { - "epoch": 1.8180447940463011, - "grad_norm": 0.6940608024597168, - "learning_rate": 4.2210417002907736e-07, - "loss": 0.1735, - "step": 19299 - }, - { - "epoch": 1.818138998139469, - "grad_norm": 0.6468650698661804, - "learning_rate": 4.21670194024526e-07, - "loss": 0.1775, - "step": 19300 - }, - { - "epoch": 1.818233202232637, - "grad_norm": 0.650922417640686, - "learning_rate": 4.2123643642071e-07, - "loss": 0.1883, - "step": 19301 - }, - { - "epoch": 1.8183274063258048, - "grad_norm": 0.6870720982551575, - "learning_rate": 4.208028972275158e-07, - "loss": 0.2107, - "step": 19302 - }, - { - "epoch": 1.8184216104189725, - "grad_norm": 0.6382945775985718, - "learning_rate": 4.20369576454831e-07, - "loss": 0.194, - "step": 19303 - }, - { - "epoch": 1.8185158145121405, - "grad_norm": 0.6483957171440125, - "learning_rate": 4.1993647411253336e-07, - "loss": 0.1921, - "step": 19304 - }, - { - "epoch": 1.8186100186053085, - "grad_norm": 0.6496073007583618, - "learning_rate": 4.195035902105027e-07, - "loss": 0.1764, - "step": 19305 - }, - { - "epoch": 1.8187042226984762, - "grad_norm": 0.7171269059181213, - "learning_rate": 4.190709247586044e-07, - "loss": 0.1975, - "step": 19306 - }, - { - "epoch": 1.818798426791644, - "grad_norm": 0.5712437033653259, - "learning_rate": 4.186384777667063e-07, - "loss": 0.196, - "step": 19307 - }, - { - "epoch": 1.818892630884812, - "grad_norm": 0.6560826897621155, - "learning_rate": 4.1820624924466926e-07, - "loss": 0.2195, - "step": 19308 - }, - { - "epoch": 1.8189868349779799, - "grad_norm": 0.6667826175689697, - "learning_rate": 4.1777423920234763e-07, - "loss": 0.1854, - "step": 19309 - }, - { - "epoch": 1.8190810390711476, - "grad_norm": 0.6026217341423035, - "learning_rate": 4.173424476495924e-07, - "loss": 0.2189, - "step": 19310 - }, - { - "epoch": 1.8191752431643153, - "grad_norm": 0.6463965177536011, - "learning_rate": 4.1691087459625136e-07, - "loss": 0.1802, - "step": 19311 - }, - { - "epoch": 1.8192694472574833, - "grad_norm": 0.6469002366065979, - "learning_rate": 4.1647952005216096e-07, - "loss": 0.188, - "step": 19312 - }, - { - "epoch": 1.8193636513506513, - "grad_norm": 0.6489649415016174, - "learning_rate": 4.16048384027159e-07, - "loss": 0.1836, - "step": 19313 - }, - { - "epoch": 1.819457855443819, - "grad_norm": 0.5855406522750854, - "learning_rate": 4.1561746653107635e-07, - "loss": 0.1832, - "step": 19314 - }, - { - "epoch": 1.8195520595369867, - "grad_norm": 0.7545716166496277, - "learning_rate": 4.1518676757373753e-07, - "loss": 0.2094, - "step": 19315 - }, - { - "epoch": 1.8196462636301547, - "grad_norm": 0.6075674295425415, - "learning_rate": 4.147562871649624e-07, - "loss": 0.1663, - "step": 19316 - }, - { - "epoch": 1.8197404677233227, - "grad_norm": 0.5916205644607544, - "learning_rate": 4.143260253145709e-07, - "loss": 0.2141, - "step": 19317 - }, - { - "epoch": 1.8198346718164904, - "grad_norm": 0.7186866402626038, - "learning_rate": 4.138959820323696e-07, - "loss": 0.1988, - "step": 19318 - }, - { - "epoch": 1.8199288759096581, - "grad_norm": 0.6810881495475769, - "learning_rate": 4.1346615732816173e-07, - "loss": 0.1868, - "step": 19319 - }, - { - "epoch": 1.820023080002826, - "grad_norm": 0.6811802387237549, - "learning_rate": 4.1303655121175515e-07, - "loss": 0.1989, - "step": 19320 - }, - { - "epoch": 1.820117284095994, - "grad_norm": 0.6818514466285706, - "learning_rate": 4.1260716369294073e-07, - "loss": 0.187, - "step": 19321 - }, - { - "epoch": 1.8202114881891618, - "grad_norm": 0.6336425542831421, - "learning_rate": 4.1217799478150855e-07, - "loss": 0.1848, - "step": 19322 - }, - { - "epoch": 1.8203056922823295, - "grad_norm": 0.6554447412490845, - "learning_rate": 4.117490444872474e-07, - "loss": 0.1853, - "step": 19323 - }, - { - "epoch": 1.8203998963754975, - "grad_norm": 0.643510639667511, - "learning_rate": 4.1132031281993724e-07, - "loss": 0.1783, - "step": 19324 - }, - { - "epoch": 1.8204941004686654, - "grad_norm": 0.7794122099876404, - "learning_rate": 4.1089179978935245e-07, - "loss": 0.2049, - "step": 19325 - }, - { - "epoch": 1.8205883045618332, - "grad_norm": 0.6050351858139038, - "learning_rate": 4.10463505405263e-07, - "loss": 0.1839, - "step": 19326 - }, - { - "epoch": 1.820682508655001, - "grad_norm": 0.6417489051818848, - "learning_rate": 4.100354296774389e-07, - "loss": 0.2121, - "step": 19327 - }, - { - "epoch": 1.8207767127481689, - "grad_norm": 0.5908655524253845, - "learning_rate": 4.096075726156357e-07, - "loss": 0.1775, - "step": 19328 - }, - { - "epoch": 1.8208709168413368, - "grad_norm": 0.6148104667663574, - "learning_rate": 4.091799342296121e-07, - "loss": 0.1973, - "step": 19329 - }, - { - "epoch": 1.8209651209345046, - "grad_norm": 0.6370846629142761, - "learning_rate": 4.087525145291205e-07, - "loss": 0.2013, - "step": 19330 - }, - { - "epoch": 1.8210593250276723, - "grad_norm": 0.6966099739074707, - "learning_rate": 4.083253135239029e-07, - "loss": 0.2336, - "step": 19331 - }, - { - "epoch": 1.8211535291208403, - "grad_norm": 0.672433078289032, - "learning_rate": 4.078983312237017e-07, - "loss": 0.2297, - "step": 19332 - }, - { - "epoch": 1.8212477332140082, - "grad_norm": 0.6689161062240601, - "learning_rate": 4.074715676382546e-07, - "loss": 0.196, - "step": 19333 - }, - { - "epoch": 1.821341937307176, - "grad_norm": 0.680631160736084, - "learning_rate": 4.070450227772893e-07, - "loss": 0.1977, - "step": 19334 - }, - { - "epoch": 1.8214361414003437, - "grad_norm": 0.6848549842834473, - "learning_rate": 4.0661869665053476e-07, - "loss": 0.1986, - "step": 19335 - }, - { - "epoch": 1.8215303454935117, - "grad_norm": 0.6180917620658875, - "learning_rate": 4.0619258926770877e-07, - "loss": 0.1795, - "step": 19336 - }, - { - "epoch": 1.8216245495866796, - "grad_norm": 0.611783504486084, - "learning_rate": 4.0576670063852907e-07, - "loss": 0.172, - "step": 19337 - }, - { - "epoch": 1.8217187536798474, - "grad_norm": 0.6417288780212402, - "learning_rate": 4.053410307727079e-07, - "loss": 0.2153, - "step": 19338 - }, - { - "epoch": 1.821812957773015, - "grad_norm": 0.6193172931671143, - "learning_rate": 4.0491557967994866e-07, - "loss": 0.1822, - "step": 19339 - }, - { - "epoch": 1.821907161866183, - "grad_norm": 0.7174019813537598, - "learning_rate": 4.044903473699524e-07, - "loss": 0.2116, - "step": 19340 - }, - { - "epoch": 1.822001365959351, - "grad_norm": 0.6933748722076416, - "learning_rate": 4.0406533385241807e-07, - "loss": 0.2008, - "step": 19341 - }, - { - "epoch": 1.8220955700525188, - "grad_norm": 0.6742868423461914, - "learning_rate": 4.036405391370324e-07, - "loss": 0.2109, - "step": 19342 - }, - { - "epoch": 1.8221897741456865, - "grad_norm": 0.6713540554046631, - "learning_rate": 4.032159632334853e-07, - "loss": 0.1746, - "step": 19343 - }, - { - "epoch": 1.8222839782388545, - "grad_norm": 0.6660988926887512, - "learning_rate": 4.0279160615145476e-07, - "loss": 0.2031, - "step": 19344 - }, - { - "epoch": 1.8223781823320224, - "grad_norm": 0.6715826988220215, - "learning_rate": 4.023674679006184e-07, - "loss": 0.1947, - "step": 19345 - }, - { - "epoch": 1.8224723864251902, - "grad_norm": 0.5954000353813171, - "learning_rate": 4.0194354849064645e-07, - "loss": 0.1786, - "step": 19346 - }, - { - "epoch": 1.822566590518358, - "grad_norm": 0.675621509552002, - "learning_rate": 4.0151984793120546e-07, - "loss": 0.2103, - "step": 19347 - }, - { - "epoch": 1.8226607946115259, - "grad_norm": 0.6137824654579163, - "learning_rate": 4.010963662319567e-07, - "loss": 0.1714, - "step": 19348 - }, - { - "epoch": 1.8227549987046938, - "grad_norm": 0.6363803744316101, - "learning_rate": 4.006731034025546e-07, - "loss": 0.1876, - "step": 19349 - }, - { - "epoch": 1.8228492027978616, - "grad_norm": 0.6920458078384399, - "learning_rate": 4.002500594526526e-07, - "loss": 0.201, - "step": 19350 - }, - { - "epoch": 1.8229434068910293, - "grad_norm": 0.7287300229072571, - "learning_rate": 3.9982723439189517e-07, - "loss": 0.2102, - "step": 19351 - }, - { - "epoch": 1.8230376109841973, - "grad_norm": 0.6533631682395935, - "learning_rate": 3.9940462822992354e-07, - "loss": 0.1668, - "step": 19352 - }, - { - "epoch": 1.8231318150773652, - "grad_norm": 0.6046972870826721, - "learning_rate": 3.989822409763733e-07, - "loss": 0.1712, - "step": 19353 - }, - { - "epoch": 1.823226019170533, - "grad_norm": 0.7671160101890564, - "learning_rate": 3.985600726408778e-07, - "loss": 0.2099, - "step": 19354 - }, - { - "epoch": 1.8233202232637007, - "grad_norm": 0.7179263234138489, - "learning_rate": 3.9813812323306166e-07, - "loss": 0.2026, - "step": 19355 - }, - { - "epoch": 1.8234144273568687, - "grad_norm": 0.6337263584136963, - "learning_rate": 3.977163927625438e-07, - "loss": 0.1701, - "step": 19356 - }, - { - "epoch": 1.8235086314500366, - "grad_norm": 0.9400070905685425, - "learning_rate": 3.9729488123894435e-07, - "loss": 0.1918, - "step": 19357 - }, - { - "epoch": 1.8236028355432043, - "grad_norm": 0.716310977935791, - "learning_rate": 3.968735886718722e-07, - "loss": 0.1817, - "step": 19358 - }, - { - "epoch": 1.823697039636372, - "grad_norm": 0.6727654337882996, - "learning_rate": 3.9645251507093197e-07, - "loss": 0.1958, - "step": 19359 - }, - { - "epoch": 1.82379124372954, - "grad_norm": 0.7326470613479614, - "learning_rate": 3.960316604457282e-07, - "loss": 0.2024, - "step": 19360 - }, - { - "epoch": 1.823885447822708, - "grad_norm": 0.5652415752410889, - "learning_rate": 3.9561102480585644e-07, - "loss": 0.1667, - "step": 19361 - }, - { - "epoch": 1.8239796519158757, - "grad_norm": 0.6441262364387512, - "learning_rate": 3.951906081609036e-07, - "loss": 0.1873, - "step": 19362 - }, - { - "epoch": 1.8240738560090435, - "grad_norm": 0.6255739331245422, - "learning_rate": 3.947704105204619e-07, - "loss": 0.172, - "step": 19363 - }, - { - "epoch": 1.8241680601022114, - "grad_norm": 0.6597809195518494, - "learning_rate": 3.9435043189410935e-07, - "loss": 0.1958, - "step": 19364 - }, - { - "epoch": 1.8242622641953794, - "grad_norm": 0.674697995185852, - "learning_rate": 3.9393067229142045e-07, - "loss": 0.1795, - "step": 19365 - }, - { - "epoch": 1.8243564682885471, - "grad_norm": 0.6789885759353638, - "learning_rate": 3.9351113172196976e-07, - "loss": 0.1844, - "step": 19366 - }, - { - "epoch": 1.8244506723817149, - "grad_norm": 0.6318550705909729, - "learning_rate": 3.930918101953218e-07, - "loss": 0.1812, - "step": 19367 - }, - { - "epoch": 1.8245448764748828, - "grad_norm": 0.6167610883712769, - "learning_rate": 3.926727077210379e-07, - "loss": 0.2021, - "step": 19368 - }, - { - "epoch": 1.8246390805680508, - "grad_norm": 0.6036345958709717, - "learning_rate": 3.9225382430867377e-07, - "loss": 0.1781, - "step": 19369 - }, - { - "epoch": 1.8247332846612185, - "grad_norm": 0.6404289603233337, - "learning_rate": 3.9183515996778163e-07, - "loss": 0.168, - "step": 19370 - }, - { - "epoch": 1.8248274887543863, - "grad_norm": 0.6928600668907166, - "learning_rate": 3.914167147079073e-07, - "loss": 0.1899, - "step": 19371 - }, - { - "epoch": 1.8249216928475542, - "grad_norm": 0.6132739186286926, - "learning_rate": 3.909984885385909e-07, - "loss": 0.1929, - "step": 19372 - }, - { - "epoch": 1.8250158969407222, - "grad_norm": 0.6557868123054504, - "learning_rate": 3.9058048146937144e-07, - "loss": 0.1857, - "step": 19373 - }, - { - "epoch": 1.82511010103389, - "grad_norm": 0.6878867149353027, - "learning_rate": 3.9016269350977574e-07, - "loss": 0.2068, - "step": 19374 - }, - { - "epoch": 1.8252043051270577, - "grad_norm": 0.6477445363998413, - "learning_rate": 3.897451246693351e-07, - "loss": 0.229, - "step": 19375 - }, - { - "epoch": 1.8252985092202256, - "grad_norm": 0.6238030195236206, - "learning_rate": 3.8932777495756633e-07, - "loss": 0.1667, - "step": 19376 - }, - { - "epoch": 1.8253927133133936, - "grad_norm": 0.6949501037597656, - "learning_rate": 3.889106443839874e-07, - "loss": 0.1909, - "step": 19377 - }, - { - "epoch": 1.8254869174065613, - "grad_norm": 0.627713143825531, - "learning_rate": 3.884937329581118e-07, - "loss": 0.183, - "step": 19378 - }, - { - "epoch": 1.825581121499729, - "grad_norm": 0.7722302675247192, - "learning_rate": 3.8807704068944075e-07, - "loss": 0.2147, - "step": 19379 - }, - { - "epoch": 1.825675325592897, - "grad_norm": 0.6550974249839783, - "learning_rate": 3.8766056758748004e-07, - "loss": 0.198, - "step": 19380 - }, - { - "epoch": 1.825769529686065, - "grad_norm": 0.7231143116950989, - "learning_rate": 3.872443136617243e-07, - "loss": 0.1825, - "step": 19381 - }, - { - "epoch": 1.8258637337792327, - "grad_norm": 0.6727703213691711, - "learning_rate": 3.8682827892166373e-07, - "loss": 0.1667, - "step": 19382 - }, - { - "epoch": 1.8259579378724005, - "grad_norm": 0.6196810007095337, - "learning_rate": 3.8641246337678627e-07, - "loss": 0.19, - "step": 19383 - }, - { - "epoch": 1.8260521419655684, - "grad_norm": 0.6855287551879883, - "learning_rate": 3.85996867036571e-07, - "loss": 0.1965, - "step": 19384 - }, - { - "epoch": 1.8261463460587364, - "grad_norm": 0.6707298159599304, - "learning_rate": 3.8558148991049704e-07, - "loss": 0.2028, - "step": 19385 - }, - { - "epoch": 1.8262405501519041, - "grad_norm": 0.7413190007209778, - "learning_rate": 3.8516633200803346e-07, - "loss": 0.2143, - "step": 19386 - }, - { - "epoch": 1.8263347542450719, - "grad_norm": 0.6734809875488281, - "learning_rate": 3.8475139333864597e-07, - "loss": 0.1849, - "step": 19387 - }, - { - "epoch": 1.8264289583382398, - "grad_norm": 0.7256351709365845, - "learning_rate": 3.8433667391179927e-07, - "loss": 0.1967, - "step": 19388 - }, - { - "epoch": 1.8265231624314078, - "grad_norm": 0.6640051603317261, - "learning_rate": 3.8392217373694583e-07, - "loss": 0.2081, - "step": 19389 - }, - { - "epoch": 1.8266173665245755, - "grad_norm": 0.7027711272239685, - "learning_rate": 3.8350789282353805e-07, - "loss": 0.2062, - "step": 19390 - }, - { - "epoch": 1.8267115706177433, - "grad_norm": 0.6376315951347351, - "learning_rate": 3.8309383118102396e-07, - "loss": 0.1711, - "step": 19391 - }, - { - "epoch": 1.8268057747109112, - "grad_norm": 0.6850985288619995, - "learning_rate": 3.826799888188426e-07, - "loss": 0.1751, - "step": 19392 - }, - { - "epoch": 1.8268999788040792, - "grad_norm": 0.7391743659973145, - "learning_rate": 3.8226636574642983e-07, - "loss": 0.2025, - "step": 19393 - }, - { - "epoch": 1.826994182897247, - "grad_norm": 0.7282021045684814, - "learning_rate": 3.818529619732203e-07, - "loss": 0.1604, - "step": 19394 - }, - { - "epoch": 1.8270883869904146, - "grad_norm": 0.6102774739265442, - "learning_rate": 3.814397775086376e-07, - "loss": 0.1807, - "step": 19395 - }, - { - "epoch": 1.8271825910835826, - "grad_norm": 0.6688569784164429, - "learning_rate": 3.810268123621008e-07, - "loss": 0.1938, - "step": 19396 - }, - { - "epoch": 1.8272767951767506, - "grad_norm": 0.6612884998321533, - "learning_rate": 3.8061406654303134e-07, - "loss": 0.1826, - "step": 19397 - }, - { - "epoch": 1.8273709992699183, - "grad_norm": 0.667577862739563, - "learning_rate": 3.802015400608372e-07, - "loss": 0.2058, - "step": 19398 - }, - { - "epoch": 1.827465203363086, - "grad_norm": 0.5990484356880188, - "learning_rate": 3.797892329249231e-07, - "loss": 0.167, - "step": 19399 - }, - { - "epoch": 1.827559407456254, - "grad_norm": 0.7349708080291748, - "learning_rate": 3.793771451446948e-07, - "loss": 0.2224, - "step": 19400 - }, - { - "epoch": 1.827653611549422, - "grad_norm": 0.6905130743980408, - "learning_rate": 3.7896527672954707e-07, - "loss": 0.1711, - "step": 19401 - }, - { - "epoch": 1.8277478156425897, - "grad_norm": 0.7009580731391907, - "learning_rate": 3.7855362768886684e-07, - "loss": 0.2093, - "step": 19402 - }, - { - "epoch": 1.8278420197357574, - "grad_norm": 0.6246546506881714, - "learning_rate": 3.7814219803204654e-07, - "loss": 0.1887, - "step": 19403 - }, - { - "epoch": 1.8279362238289254, - "grad_norm": 0.6888798475265503, - "learning_rate": 3.777309877684654e-07, - "loss": 0.1849, - "step": 19404 - }, - { - "epoch": 1.8280304279220934, - "grad_norm": 0.6739806532859802, - "learning_rate": 3.773199969074959e-07, - "loss": 0.1991, - "step": 19405 - }, - { - "epoch": 1.828124632015261, - "grad_norm": 0.6582845449447632, - "learning_rate": 3.769092254585138e-07, - "loss": 0.1862, - "step": 19406 - }, - { - "epoch": 1.8282188361084288, - "grad_norm": 0.6401177048683167, - "learning_rate": 3.76498673430884e-07, - "loss": 0.1936, - "step": 19407 - }, - { - "epoch": 1.8283130402015968, - "grad_norm": 0.7239953279495239, - "learning_rate": 3.760883408339666e-07, - "loss": 0.1838, - "step": 19408 - }, - { - "epoch": 1.8284072442947648, - "grad_norm": 1.277886986732483, - "learning_rate": 3.756782276771187e-07, - "loss": 0.188, - "step": 19409 - }, - { - "epoch": 1.8285014483879325, - "grad_norm": 0.6182141304016113, - "learning_rate": 3.752683339696928e-07, - "loss": 0.1804, - "step": 19410 - }, - { - "epoch": 1.8285956524811002, - "grad_norm": 0.6419548392295837, - "learning_rate": 3.748586597210324e-07, - "loss": 0.2021, - "step": 19411 - }, - { - "epoch": 1.8286898565742682, - "grad_norm": 0.7066452503204346, - "learning_rate": 3.744492049404802e-07, - "loss": 0.2029, - "step": 19412 - }, - { - "epoch": 1.8287840606674362, - "grad_norm": 0.650208055973053, - "learning_rate": 3.740399696373742e-07, - "loss": 0.1919, - "step": 19413 - }, - { - "epoch": 1.8288782647606039, - "grad_norm": 0.6237804889678955, - "learning_rate": 3.7363095382104143e-07, - "loss": 0.1668, - "step": 19414 - }, - { - "epoch": 1.8289724688537716, - "grad_norm": 0.6652894020080566, - "learning_rate": 3.7322215750081214e-07, - "loss": 0.2056, - "step": 19415 - }, - { - "epoch": 1.8290666729469396, - "grad_norm": 0.6470969915390015, - "learning_rate": 3.728135806860045e-07, - "loss": 0.2095, - "step": 19416 - }, - { - "epoch": 1.8291608770401075, - "grad_norm": 0.6608524322509766, - "learning_rate": 3.7240522338593655e-07, - "loss": 0.1808, - "step": 19417 - }, - { - "epoch": 1.8292550811332753, - "grad_norm": 0.6656803488731384, - "learning_rate": 3.7199708560991974e-07, - "loss": 0.199, - "step": 19418 - }, - { - "epoch": 1.829349285226443, - "grad_norm": 0.6752721667289734, - "learning_rate": 3.715891673672578e-07, - "loss": 0.1808, - "step": 19419 - }, - { - "epoch": 1.829443489319611, - "grad_norm": 0.6856604218482971, - "learning_rate": 3.7118146866725433e-07, - "loss": 0.1986, - "step": 19420 - }, - { - "epoch": 1.829537693412779, - "grad_norm": 0.622015655040741, - "learning_rate": 3.707739895192042e-07, - "loss": 0.2018, - "step": 19421 - }, - { - "epoch": 1.8296318975059467, - "grad_norm": 0.7418761849403381, - "learning_rate": 3.703667299323988e-07, - "loss": 0.1864, - "step": 19422 - }, - { - "epoch": 1.8297261015991144, - "grad_norm": 0.8238862156867981, - "learning_rate": 3.6995968991612505e-07, - "loss": 0.1876, - "step": 19423 - }, - { - "epoch": 1.8298203056922824, - "grad_norm": 0.5917040109634399, - "learning_rate": 3.695528694796624e-07, - "loss": 0.1714, - "step": 19424 - }, - { - "epoch": 1.8299145097854503, - "grad_norm": 0.6978359818458557, - "learning_rate": 3.6914626863229e-07, - "loss": 0.1976, - "step": 19425 - }, - { - "epoch": 1.830008713878618, - "grad_norm": 0.6442034840583801, - "learning_rate": 3.687398873832759e-07, - "loss": 0.1762, - "step": 19426 - }, - { - "epoch": 1.8301029179717858, - "grad_norm": 0.6447684168815613, - "learning_rate": 3.683337257418873e-07, - "loss": 0.1844, - "step": 19427 - }, - { - "epoch": 1.8301971220649538, - "grad_norm": 0.7106531262397766, - "learning_rate": 3.679277837173856e-07, - "loss": 0.1863, - "step": 19428 - }, - { - "epoch": 1.8302913261581217, - "grad_norm": 0.6524806618690491, - "learning_rate": 3.6752206131902666e-07, - "loss": 0.2002, - "step": 19429 - }, - { - "epoch": 1.8303855302512895, - "grad_norm": 0.575257420539856, - "learning_rate": 3.67116558556061e-07, - "loss": 0.1731, - "step": 19430 - }, - { - "epoch": 1.8304797343444572, - "grad_norm": 0.7416486144065857, - "learning_rate": 3.6671127543773667e-07, - "loss": 0.1919, - "step": 19431 - }, - { - "epoch": 1.8305739384376252, - "grad_norm": 0.5736696124076843, - "learning_rate": 3.6630621197329297e-07, - "loss": 0.1704, - "step": 19432 - }, - { - "epoch": 1.830668142530793, - "grad_norm": 0.7141389846801758, - "learning_rate": 3.6590136817196585e-07, - "loss": 0.188, - "step": 19433 - }, - { - "epoch": 1.8307623466239606, - "grad_norm": 0.6803520321846008, - "learning_rate": 3.6549674404298796e-07, - "loss": 0.2003, - "step": 19434 - }, - { - "epoch": 1.8308565507171286, - "grad_norm": 0.6170375347137451, - "learning_rate": 3.650923395955841e-07, - "loss": 0.2045, - "step": 19435 - }, - { - "epoch": 1.8309507548102966, - "grad_norm": 0.7466601729393005, - "learning_rate": 3.646881548389736e-07, - "loss": 0.1842, - "step": 19436 - }, - { - "epoch": 1.8310449589034643, - "grad_norm": 0.6154623627662659, - "learning_rate": 3.642841897823768e-07, - "loss": 0.1744, - "step": 19437 - }, - { - "epoch": 1.831139162996632, - "grad_norm": 1.2468785047531128, - "learning_rate": 3.6388044443500306e-07, - "loss": 0.1646, - "step": 19438 - }, - { - "epoch": 1.8312333670898, - "grad_norm": 0.741445779800415, - "learning_rate": 3.6347691880605494e-07, - "loss": 0.2029, - "step": 19439 - }, - { - "epoch": 1.831327571182968, - "grad_norm": 0.6602708101272583, - "learning_rate": 3.630736129047385e-07, - "loss": 0.1977, - "step": 19440 - }, - { - "epoch": 1.8314217752761357, - "grad_norm": 0.6252934336662292, - "learning_rate": 3.626705267402475e-07, - "loss": 0.1898, - "step": 19441 - }, - { - "epoch": 1.8315159793693034, - "grad_norm": 0.720191478729248, - "learning_rate": 3.622676603217701e-07, - "loss": 0.194, - "step": 19442 - }, - { - "epoch": 1.8316101834624714, - "grad_norm": 0.6151571273803711, - "learning_rate": 3.618650136584978e-07, - "loss": 0.1842, - "step": 19443 - }, - { - "epoch": 1.8317043875556394, - "grad_norm": 0.6189284324645996, - "learning_rate": 3.614625867596089e-07, - "loss": 0.1729, - "step": 19444 - }, - { - "epoch": 1.831798591648807, - "grad_norm": 0.6306717991828918, - "learning_rate": 3.610603796342782e-07, - "loss": 0.1664, - "step": 19445 - }, - { - "epoch": 1.8318927957419748, - "grad_norm": 0.6504979133605957, - "learning_rate": 3.606583922916773e-07, - "loss": 0.178, - "step": 19446 - }, - { - "epoch": 1.8319869998351428, - "grad_norm": 0.6690945625305176, - "learning_rate": 3.602566247409744e-07, - "loss": 0.1962, - "step": 19447 - }, - { - "epoch": 1.8320812039283108, - "grad_norm": 0.6376203298568726, - "learning_rate": 3.5985507699132673e-07, - "loss": 0.161, - "step": 19448 - }, - { - "epoch": 1.8321754080214785, - "grad_norm": 0.6145581603050232, - "learning_rate": 3.5945374905189236e-07, - "loss": 0.1756, - "step": 19449 - }, - { - "epoch": 1.8322696121146462, - "grad_norm": 0.7321510314941406, - "learning_rate": 3.5905264093182293e-07, - "loss": 0.1899, - "step": 19450 - }, - { - "epoch": 1.8323638162078142, - "grad_norm": 0.7382429242134094, - "learning_rate": 3.586517526402622e-07, - "loss": 0.2289, - "step": 19451 - }, - { - "epoch": 1.8324580203009821, - "grad_norm": 0.6109058260917664, - "learning_rate": 3.582510841863529e-07, - "loss": 0.1925, - "step": 19452 - }, - { - "epoch": 1.8325522243941499, - "grad_norm": 0.6512452363967896, - "learning_rate": 3.5785063557923106e-07, - "loss": 0.2179, - "step": 19453 - }, - { - "epoch": 1.8326464284873176, - "grad_norm": 0.6336072683334351, - "learning_rate": 3.574504068280249e-07, - "loss": 0.1848, - "step": 19454 - }, - { - "epoch": 1.8327406325804856, - "grad_norm": 0.6218070983886719, - "learning_rate": 3.570503979418627e-07, - "loss": 0.1755, - "step": 19455 - }, - { - "epoch": 1.8328348366736535, - "grad_norm": 0.688758134841919, - "learning_rate": 3.566506089298638e-07, - "loss": 0.1956, - "step": 19456 - }, - { - "epoch": 1.8329290407668213, - "grad_norm": 0.643220067024231, - "learning_rate": 3.562510398011454e-07, - "loss": 0.1834, - "step": 19457 - }, - { - "epoch": 1.833023244859989, - "grad_norm": 0.6620644927024841, - "learning_rate": 3.558516905648179e-07, - "loss": 0.185, - "step": 19458 - }, - { - "epoch": 1.833117448953157, - "grad_norm": 0.7510180473327637, - "learning_rate": 3.554525612299864e-07, - "loss": 0.2435, - "step": 19459 - }, - { - "epoch": 1.833211653046325, - "grad_norm": 0.6576058268547058, - "learning_rate": 3.550536518057535e-07, - "loss": 0.1892, - "step": 19460 - }, - { - "epoch": 1.8333058571394927, - "grad_norm": 0.6471046209335327, - "learning_rate": 3.5465496230121076e-07, - "loss": 0.1922, - "step": 19461 - }, - { - "epoch": 1.8334000612326604, - "grad_norm": 0.6720855832099915, - "learning_rate": 3.5425649272545326e-07, - "loss": 0.1875, - "step": 19462 - }, - { - "epoch": 1.8334942653258284, - "grad_norm": 0.624860942363739, - "learning_rate": 3.538582430875659e-07, - "loss": 0.185, - "step": 19463 - }, - { - "epoch": 1.8335884694189963, - "grad_norm": 0.6520931124687195, - "learning_rate": 3.5346021339662696e-07, - "loss": 0.1967, - "step": 19464 - }, - { - "epoch": 1.833682673512164, - "grad_norm": 0.6448037028312683, - "learning_rate": 3.5306240366171584e-07, - "loss": 0.1984, - "step": 19465 - }, - { - "epoch": 1.8337768776053318, - "grad_norm": 0.6768196225166321, - "learning_rate": 3.526648138918998e-07, - "loss": 0.2234, - "step": 19466 - }, - { - "epoch": 1.8338710816984998, - "grad_norm": 0.6211151480674744, - "learning_rate": 3.522674440962448e-07, - "loss": 0.1543, - "step": 19467 - }, - { - "epoch": 1.8339652857916677, - "grad_norm": 0.66634202003479, - "learning_rate": 3.518702942838148e-07, - "loss": 0.179, - "step": 19468 - }, - { - "epoch": 1.8340594898848355, - "grad_norm": 0.7418509721755981, - "learning_rate": 3.5147336446366254e-07, - "loss": 0.19, - "step": 19469 - }, - { - "epoch": 1.8341536939780032, - "grad_norm": 0.6177558302879333, - "learning_rate": 3.5107665464483854e-07, - "loss": 0.1752, - "step": 19470 - }, - { - "epoch": 1.8342478980711712, - "grad_norm": 0.6559076905250549, - "learning_rate": 3.5068016483639e-07, - "loss": 0.1959, - "step": 19471 - }, - { - "epoch": 1.8343421021643391, - "grad_norm": 0.6523414850234985, - "learning_rate": 3.5028389504735637e-07, - "loss": 0.2168, - "step": 19472 - }, - { - "epoch": 1.8344363062575069, - "grad_norm": 0.6761232614517212, - "learning_rate": 3.498878452867727e-07, - "loss": 0.2221, - "step": 19473 - }, - { - "epoch": 1.8345305103506746, - "grad_norm": 0.6956623792648315, - "learning_rate": 3.494920155636716e-07, - "loss": 0.2094, - "step": 19474 - }, - { - "epoch": 1.8346247144438426, - "grad_norm": 0.6148270964622498, - "learning_rate": 3.490964058870772e-07, - "loss": 0.1809, - "step": 19475 - }, - { - "epoch": 1.8347189185370105, - "grad_norm": 0.7161653637886047, - "learning_rate": 3.487010162660087e-07, - "loss": 0.2107, - "step": 19476 - }, - { - "epoch": 1.8348131226301783, - "grad_norm": 0.6752950549125671, - "learning_rate": 3.4830584670948464e-07, - "loss": 0.197, - "step": 19477 - }, - { - "epoch": 1.834907326723346, - "grad_norm": 0.6317424178123474, - "learning_rate": 3.4791089722651437e-07, - "loss": 0.1743, - "step": 19478 - }, - { - "epoch": 1.835001530816514, - "grad_norm": 0.7135169506072998, - "learning_rate": 3.4751616782610075e-07, - "loss": 0.2119, - "step": 19479 - }, - { - "epoch": 1.835095734909682, - "grad_norm": 0.6575676798820496, - "learning_rate": 3.4712165851724764e-07, - "loss": 0.2017, - "step": 19480 - }, - { - "epoch": 1.8351899390028497, - "grad_norm": 0.6301974058151245, - "learning_rate": 3.467273693089501e-07, - "loss": 0.1984, - "step": 19481 - }, - { - "epoch": 1.8352841430960174, - "grad_norm": 0.6600757837295532, - "learning_rate": 3.463333002101954e-07, - "loss": 0.1607, - "step": 19482 - }, - { - "epoch": 1.8353783471891854, - "grad_norm": 0.6832287311553955, - "learning_rate": 3.4593945122997295e-07, - "loss": 0.1819, - "step": 19483 - }, - { - "epoch": 1.8354725512823533, - "grad_norm": 0.6303820013999939, - "learning_rate": 3.455458223772612e-07, - "loss": 0.1971, - "step": 19484 - }, - { - "epoch": 1.835566755375521, - "grad_norm": 0.6942116618156433, - "learning_rate": 3.4515241366103405e-07, - "loss": 0.2125, - "step": 19485 - }, - { - "epoch": 1.8356609594686888, - "grad_norm": 0.6522420644760132, - "learning_rate": 3.447592250902643e-07, - "loss": 0.195, - "step": 19486 - }, - { - "epoch": 1.8357551635618568, - "grad_norm": 0.6443928480148315, - "learning_rate": 3.4436625667391697e-07, - "loss": 0.1925, - "step": 19487 - }, - { - "epoch": 1.8358493676550247, - "grad_norm": 0.6874093413352966, - "learning_rate": 3.4397350842095054e-07, - "loss": 0.1964, - "step": 19488 - }, - { - "epoch": 1.8359435717481924, - "grad_norm": 0.7090978026390076, - "learning_rate": 3.435809803403223e-07, - "loss": 0.1954, - "step": 19489 - }, - { - "epoch": 1.8360377758413602, - "grad_norm": 0.6059409976005554, - "learning_rate": 3.4318867244098165e-07, - "loss": 0.1786, - "step": 19490 - }, - { - "epoch": 1.8361319799345281, - "grad_norm": 0.6071126461029053, - "learning_rate": 3.427965847318737e-07, - "loss": 0.1904, - "step": 19491 - }, - { - "epoch": 1.836226184027696, - "grad_norm": 0.7138732671737671, - "learning_rate": 3.4240471722193804e-07, - "loss": 0.2023, - "step": 19492 - }, - { - "epoch": 1.8363203881208638, - "grad_norm": 0.7073351144790649, - "learning_rate": 3.4201306992011187e-07, - "loss": 0.1697, - "step": 19493 - }, - { - "epoch": 1.8364145922140316, - "grad_norm": 0.6532337069511414, - "learning_rate": 3.416216428353236e-07, - "loss": 0.1824, - "step": 19494 - }, - { - "epoch": 1.8365087963071995, - "grad_norm": 0.7384734749794006, - "learning_rate": 3.4123043597649953e-07, - "loss": 0.1986, - "step": 19495 - }, - { - "epoch": 1.8366030004003675, - "grad_norm": 0.7011557817459106, - "learning_rate": 3.4083944935255686e-07, - "loss": 0.204, - "step": 19496 - }, - { - "epoch": 1.8366972044935352, - "grad_norm": 0.6641374230384827, - "learning_rate": 3.404486829724141e-07, - "loss": 0.2082, - "step": 19497 - }, - { - "epoch": 1.836791408586703, - "grad_norm": 0.6848839521408081, - "learning_rate": 3.400581368449818e-07, - "loss": 0.1907, - "step": 19498 - }, - { - "epoch": 1.836885612679871, - "grad_norm": 0.6220717430114746, - "learning_rate": 3.396678109791607e-07, - "loss": 0.1855, - "step": 19499 - }, - { - "epoch": 1.836979816773039, - "grad_norm": 0.6117632985115051, - "learning_rate": 3.3927770538385584e-07, - "loss": 0.1911, - "step": 19500 - }, - { - "epoch": 1.8370740208662066, - "grad_norm": 0.6125076413154602, - "learning_rate": 3.3888782006795795e-07, - "loss": 0.17, - "step": 19501 - }, - { - "epoch": 1.8371682249593744, - "grad_norm": 0.7635436654090881, - "learning_rate": 3.384981550403599e-07, - "loss": 0.191, - "step": 19502 - }, - { - "epoch": 1.8372624290525423, - "grad_norm": 0.7081218957901001, - "learning_rate": 3.3810871030994564e-07, - "loss": 0.1809, - "step": 19503 - }, - { - "epoch": 1.8373566331457103, - "grad_norm": 0.6071407794952393, - "learning_rate": 3.377194858855948e-07, - "loss": 0.1706, - "step": 19504 - }, - { - "epoch": 1.837450837238878, - "grad_norm": 0.6723511219024658, - "learning_rate": 3.373304817761835e-07, - "loss": 0.2303, - "step": 19505 - }, - { - "epoch": 1.8375450413320458, - "grad_norm": 0.6099964380264282, - "learning_rate": 3.3694169799058043e-07, - "loss": 0.1877, - "step": 19506 - }, - { - "epoch": 1.8376392454252137, - "grad_norm": 0.629387617111206, - "learning_rate": 3.365531345376505e-07, - "loss": 0.1945, - "step": 19507 - }, - { - "epoch": 1.8377334495183817, - "grad_norm": 0.6301486492156982, - "learning_rate": 3.361647914262545e-07, - "loss": 0.2245, - "step": 19508 - }, - { - "epoch": 1.8378276536115494, - "grad_norm": 0.7205371856689453, - "learning_rate": 3.357766686652464e-07, - "loss": 0.2173, - "step": 19509 - }, - { - "epoch": 1.8379218577047172, - "grad_norm": 0.6755000352859497, - "learning_rate": 3.353887662634758e-07, - "loss": 0.2357, - "step": 19510 - }, - { - "epoch": 1.8380160617978851, - "grad_norm": 0.71434485912323, - "learning_rate": 3.3500108422978906e-07, - "loss": 0.2079, - "step": 19511 - }, - { - "epoch": 1.838110265891053, - "grad_norm": 0.7100639939308167, - "learning_rate": 3.346136225730234e-07, - "loss": 0.2047, - "step": 19512 - }, - { - "epoch": 1.8382044699842208, - "grad_norm": 0.6243544816970825, - "learning_rate": 3.3422638130201526e-07, - "loss": 0.1963, - "step": 19513 - }, - { - "epoch": 1.8382986740773886, - "grad_norm": 0.6586558222770691, - "learning_rate": 3.338393604255952e-07, - "loss": 0.1856, - "step": 19514 - }, - { - "epoch": 1.8383928781705565, - "grad_norm": 0.6588220596313477, - "learning_rate": 3.334525599525862e-07, - "loss": 0.2013, - "step": 19515 - }, - { - "epoch": 1.8384870822637245, - "grad_norm": 0.6576763987541199, - "learning_rate": 3.3306597989180677e-07, - "loss": 0.2188, - "step": 19516 - }, - { - "epoch": 1.8385812863568922, - "grad_norm": 0.682857871055603, - "learning_rate": 3.3267962025207545e-07, - "loss": 0.2009, - "step": 19517 - }, - { - "epoch": 1.83867549045006, - "grad_norm": 0.6771490573883057, - "learning_rate": 3.322934810421996e-07, - "loss": 0.1909, - "step": 19518 - }, - { - "epoch": 1.838769694543228, - "grad_norm": 0.6297563314437866, - "learning_rate": 3.319075622709811e-07, - "loss": 0.1833, - "step": 19519 - }, - { - "epoch": 1.8388638986363959, - "grad_norm": 0.6497674584388733, - "learning_rate": 3.3152186394722506e-07, - "loss": 0.1742, - "step": 19520 - }, - { - "epoch": 1.8389581027295636, - "grad_norm": 0.6712440848350525, - "learning_rate": 3.311363860797223e-07, - "loss": 0.2257, - "step": 19521 - }, - { - "epoch": 1.8390523068227314, - "grad_norm": 0.7431421875953674, - "learning_rate": 3.307511286772613e-07, - "loss": 0.2156, - "step": 19522 - }, - { - "epoch": 1.8391465109158993, - "grad_norm": 0.6532416939735413, - "learning_rate": 3.3036609174863066e-07, - "loss": 0.2286, - "step": 19523 - }, - { - "epoch": 1.8392407150090673, - "grad_norm": 0.6792806386947632, - "learning_rate": 3.299812753026077e-07, - "loss": 0.2126, - "step": 19524 - }, - { - "epoch": 1.839334919102235, - "grad_norm": 0.6522883772850037, - "learning_rate": 3.295966793479655e-07, - "loss": 0.1737, - "step": 19525 - }, - { - "epoch": 1.8394291231954027, - "grad_norm": 0.673279881477356, - "learning_rate": 3.292123038934747e-07, - "loss": 0.1991, - "step": 19526 - }, - { - "epoch": 1.8395233272885707, - "grad_norm": 0.6407334804534912, - "learning_rate": 3.288281489478995e-07, - "loss": 0.1805, - "step": 19527 - }, - { - "epoch": 1.8396175313817387, - "grad_norm": 0.6358546018600464, - "learning_rate": 3.2844421451999954e-07, - "loss": 0.195, - "step": 19528 - }, - { - "epoch": 1.8397117354749064, - "grad_norm": 0.6791768670082092, - "learning_rate": 3.280605006185278e-07, - "loss": 0.1705, - "step": 19529 - }, - { - "epoch": 1.8398059395680741, - "grad_norm": 0.6021818518638611, - "learning_rate": 3.2767700725223617e-07, - "loss": 0.1819, - "step": 19530 - }, - { - "epoch": 1.839900143661242, - "grad_norm": 0.7272765636444092, - "learning_rate": 3.272937344298666e-07, - "loss": 0.1851, - "step": 19531 - }, - { - "epoch": 1.83999434775441, - "grad_norm": 0.6813640594482422, - "learning_rate": 3.269106821601586e-07, - "loss": 0.1815, - "step": 19532 - }, - { - "epoch": 1.8400885518475778, - "grad_norm": 0.685269296169281, - "learning_rate": 3.2652785045184764e-07, - "loss": 0.237, - "step": 19533 - }, - { - "epoch": 1.8401827559407455, - "grad_norm": 0.6834416389465332, - "learning_rate": 3.2614523931366105e-07, - "loss": 0.1951, - "step": 19534 - }, - { - "epoch": 1.8402769600339135, - "grad_norm": 0.6788691282272339, - "learning_rate": 3.2576284875432517e-07, - "loss": 0.2031, - "step": 19535 - }, - { - "epoch": 1.8403711641270815, - "grad_norm": 0.7511125206947327, - "learning_rate": 3.253806787825564e-07, - "loss": 0.2095, - "step": 19536 - }, - { - "epoch": 1.8404653682202492, - "grad_norm": 0.723578929901123, - "learning_rate": 3.249987294070711e-07, - "loss": 0.1858, - "step": 19537 - }, - { - "epoch": 1.840559572313417, - "grad_norm": 0.6681480407714844, - "learning_rate": 3.2461700063657785e-07, - "loss": 0.1839, - "step": 19538 - }, - { - "epoch": 1.840653776406585, - "grad_norm": 0.6754745841026306, - "learning_rate": 3.242354924797786e-07, - "loss": 0.2039, - "step": 19539 - }, - { - "epoch": 1.8407479804997529, - "grad_norm": 0.6273301839828491, - "learning_rate": 3.238542049453763e-07, - "loss": 0.1506, - "step": 19540 - }, - { - "epoch": 1.8408421845929206, - "grad_norm": 0.7100375294685364, - "learning_rate": 3.234731380420608e-07, - "loss": 0.2205, - "step": 19541 - }, - { - "epoch": 1.8409363886860883, - "grad_norm": 0.7500102519989014, - "learning_rate": 3.2309229177852287e-07, - "loss": 0.1939, - "step": 19542 - }, - { - "epoch": 1.8410305927792563, - "grad_norm": 0.6986610889434814, - "learning_rate": 3.227116661634466e-07, - "loss": 0.202, - "step": 19543 - }, - { - "epoch": 1.8411247968724243, - "grad_norm": 0.5962001085281372, - "learning_rate": 3.223312612055107e-07, - "loss": 0.1802, - "step": 19544 - }, - { - "epoch": 1.841219000965592, - "grad_norm": 0.6139877438545227, - "learning_rate": 3.2195107691338933e-07, - "loss": 0.1629, - "step": 19545 - }, - { - "epoch": 1.8413132050587597, - "grad_norm": 0.7145199775695801, - "learning_rate": 3.2157111329574997e-07, - "loss": 0.1788, - "step": 19546 - }, - { - "epoch": 1.8414074091519277, - "grad_norm": 0.6981918811798096, - "learning_rate": 3.211913703612568e-07, - "loss": 0.1853, - "step": 19547 - }, - { - "epoch": 1.8415016132450956, - "grad_norm": 0.6296716332435608, - "learning_rate": 3.208118481185707e-07, - "loss": 0.1902, - "step": 19548 - }, - { - "epoch": 1.8415958173382634, - "grad_norm": 0.7081273198127747, - "learning_rate": 3.204325465763425e-07, - "loss": 0.182, - "step": 19549 - }, - { - "epoch": 1.8416900214314311, - "grad_norm": 0.6704627275466919, - "learning_rate": 3.2005346574322195e-07, - "loss": 0.1726, - "step": 19550 - }, - { - "epoch": 1.841784225524599, - "grad_norm": 0.6365349888801575, - "learning_rate": 3.1967460562785325e-07, - "loss": 0.1836, - "step": 19551 - }, - { - "epoch": 1.841878429617767, - "grad_norm": 0.6927284002304077, - "learning_rate": 3.192959662388739e-07, - "loss": 0.1965, - "step": 19552 - }, - { - "epoch": 1.8419726337109348, - "grad_norm": 0.6339163780212402, - "learning_rate": 3.189175475849171e-07, - "loss": 0.1913, - "step": 19553 - }, - { - "epoch": 1.8420668378041025, - "grad_norm": 0.6798602938652039, - "learning_rate": 3.1853934967461363e-07, - "loss": 0.1805, - "step": 19554 - }, - { - "epoch": 1.8421610418972705, - "grad_norm": 0.5639039874076843, - "learning_rate": 3.1816137251658664e-07, - "loss": 0.1609, - "step": 19555 - }, - { - "epoch": 1.8422552459904384, - "grad_norm": 0.6263942718505859, - "learning_rate": 3.177836161194503e-07, - "loss": 0.1723, - "step": 19556 - }, - { - "epoch": 1.8423494500836062, - "grad_norm": 0.6475573778152466, - "learning_rate": 3.1740608049182444e-07, - "loss": 0.1978, - "step": 19557 - }, - { - "epoch": 1.842443654176774, - "grad_norm": 0.6725324988365173, - "learning_rate": 3.1702876564231434e-07, - "loss": 0.1766, - "step": 19558 - }, - { - "epoch": 1.8425378582699419, - "grad_norm": 0.5613603591918945, - "learning_rate": 3.1665167157952093e-07, - "loss": 0.1604, - "step": 19559 - }, - { - "epoch": 1.8426320623631098, - "grad_norm": 0.6868391036987305, - "learning_rate": 3.162747983120473e-07, - "loss": 0.2246, - "step": 19560 - }, - { - "epoch": 1.8427262664562776, - "grad_norm": 0.6153104305267334, - "learning_rate": 3.1589814584848334e-07, - "loss": 0.1964, - "step": 19561 - }, - { - "epoch": 1.8428204705494453, - "grad_norm": 0.6290621161460876, - "learning_rate": 3.1552171419741874e-07, - "loss": 0.1935, - "step": 19562 - }, - { - "epoch": 1.8429146746426133, - "grad_norm": 0.6391206383705139, - "learning_rate": 3.1514550336743554e-07, - "loss": 0.1899, - "step": 19563 - }, - { - "epoch": 1.8430088787357812, - "grad_norm": 0.7169826030731201, - "learning_rate": 3.147695133671147e-07, - "loss": 0.1895, - "step": 19564 - }, - { - "epoch": 1.843103082828949, - "grad_norm": 0.6721808314323425, - "learning_rate": 3.14393744205026e-07, - "loss": 0.1867, - "step": 19565 - }, - { - "epoch": 1.8431972869221167, - "grad_norm": 0.6748124957084656, - "learning_rate": 3.1401819588973814e-07, - "loss": 0.1858, - "step": 19566 - }, - { - "epoch": 1.8432914910152847, - "grad_norm": 0.7615307569503784, - "learning_rate": 3.1364286842981763e-07, - "loss": 0.2052, - "step": 19567 - }, - { - "epoch": 1.8433856951084526, - "grad_norm": 0.6342546343803406, - "learning_rate": 3.1326776183381757e-07, - "loss": 0.1788, - "step": 19568 - }, - { - "epoch": 1.8434798992016204, - "grad_norm": 0.6604020595550537, - "learning_rate": 3.128928761102945e-07, - "loss": 0.2, - "step": 19569 - }, - { - "epoch": 1.843574103294788, - "grad_norm": 0.6869939565658569, - "learning_rate": 3.1251821126779494e-07, - "loss": 0.1853, - "step": 19570 - }, - { - "epoch": 1.843668307387956, - "grad_norm": 0.6485978364944458, - "learning_rate": 3.1214376731486194e-07, - "loss": 0.2084, - "step": 19571 - }, - { - "epoch": 1.8437625114811238, - "grad_norm": 0.662097692489624, - "learning_rate": 3.1176954426003327e-07, - "loss": 0.1827, - "step": 19572 - }, - { - "epoch": 1.8438567155742915, - "grad_norm": 0.7116630673408508, - "learning_rate": 3.113955421118442e-07, - "loss": 0.1871, - "step": 19573 - }, - { - "epoch": 1.8439509196674595, - "grad_norm": 0.7092744708061218, - "learning_rate": 3.11021760878818e-07, - "loss": 0.1922, - "step": 19574 - }, - { - "epoch": 1.8440451237606275, - "grad_norm": 0.6666938066482544, - "learning_rate": 3.106482005694822e-07, - "loss": 0.1793, - "step": 19575 - }, - { - "epoch": 1.8441393278537952, - "grad_norm": 0.6568140387535095, - "learning_rate": 3.1027486119235117e-07, - "loss": 0.1878, - "step": 19576 - }, - { - "epoch": 1.844233531946963, - "grad_norm": 0.6853905916213989, - "learning_rate": 3.099017427559392e-07, - "loss": 0.208, - "step": 19577 - }, - { - "epoch": 1.844327736040131, - "grad_norm": 0.5811006426811218, - "learning_rate": 3.0952884526875285e-07, - "loss": 0.1856, - "step": 19578 - }, - { - "epoch": 1.8444219401332989, - "grad_norm": 0.6356227993965149, - "learning_rate": 3.091561687392952e-07, - "loss": 0.2054, - "step": 19579 - }, - { - "epoch": 1.8445161442264666, - "grad_norm": 0.6708242893218994, - "learning_rate": 3.0878371317606513e-07, - "loss": 0.1842, - "step": 19580 - }, - { - "epoch": 1.8446103483196343, - "grad_norm": 0.6495194435119629, - "learning_rate": 3.084114785875525e-07, - "loss": 0.1941, - "step": 19581 - }, - { - "epoch": 1.8447045524128023, - "grad_norm": 0.6219097375869751, - "learning_rate": 3.080394649822471e-07, - "loss": 0.1932, - "step": 19582 - }, - { - "epoch": 1.8447987565059702, - "grad_norm": 0.6671397089958191, - "learning_rate": 3.0766767236863226e-07, - "loss": 0.1922, - "step": 19583 - }, - { - "epoch": 1.844892960599138, - "grad_norm": 0.620679497718811, - "learning_rate": 3.072961007551822e-07, - "loss": 0.171, - "step": 19584 - }, - { - "epoch": 1.8449871646923057, - "grad_norm": 0.6596792936325073, - "learning_rate": 3.069247501503725e-07, - "loss": 0.1866, - "step": 19585 - }, - { - "epoch": 1.8450813687854737, - "grad_norm": 5.839211463928223, - "learning_rate": 3.065536205626685e-07, - "loss": 0.1831, - "step": 19586 - }, - { - "epoch": 1.8451755728786416, - "grad_norm": 0.6346095204353333, - "learning_rate": 3.061827120005323e-07, - "loss": 0.1834, - "step": 19587 - }, - { - "epoch": 1.8452697769718094, - "grad_norm": 0.6453737616539001, - "learning_rate": 3.058120244724239e-07, - "loss": 0.1848, - "step": 19588 - }, - { - "epoch": 1.8453639810649771, - "grad_norm": 0.6811696290969849, - "learning_rate": 3.05441557986792e-07, - "loss": 0.2404, - "step": 19589 - }, - { - "epoch": 1.845458185158145, - "grad_norm": 0.7306646108627319, - "learning_rate": 3.0507131255208545e-07, - "loss": 0.2129, - "step": 19590 - }, - { - "epoch": 1.845552389251313, - "grad_norm": 0.6541388630867004, - "learning_rate": 3.047012881767475e-07, - "loss": 0.1988, - "step": 19591 - }, - { - "epoch": 1.8456465933444808, - "grad_norm": 0.614191472530365, - "learning_rate": 3.043314848692136e-07, - "loss": 0.1756, - "step": 19592 - }, - { - "epoch": 1.8457407974376485, - "grad_norm": 0.5694600343704224, - "learning_rate": 3.03961902637917e-07, - "loss": 0.1822, - "step": 19593 - }, - { - "epoch": 1.8458350015308165, - "grad_norm": 0.7498295307159424, - "learning_rate": 3.035925414912844e-07, - "loss": 0.1825, - "step": 19594 - }, - { - "epoch": 1.8459292056239844, - "grad_norm": 0.7187083959579468, - "learning_rate": 3.0322340143773777e-07, - "loss": 0.1976, - "step": 19595 - }, - { - "epoch": 1.8460234097171522, - "grad_norm": 0.6782183647155762, - "learning_rate": 3.028544824856916e-07, - "loss": 0.195, - "step": 19596 - }, - { - "epoch": 1.84611761381032, - "grad_norm": 0.7186533212661743, - "learning_rate": 3.0248578464356246e-07, - "loss": 0.1832, - "step": 19597 - }, - { - "epoch": 1.8462118179034879, - "grad_norm": 0.664090633392334, - "learning_rate": 3.021173079197559e-07, - "loss": 0.2159, - "step": 19598 - }, - { - "epoch": 1.8463060219966558, - "grad_norm": 0.6171144247055054, - "learning_rate": 3.017490523226696e-07, - "loss": 0.1685, - "step": 19599 - }, - { - "epoch": 1.8464002260898236, - "grad_norm": 1.0775052309036255, - "learning_rate": 3.0138101786070575e-07, - "loss": 0.1899, - "step": 19600 - }, - { - "epoch": 1.8464944301829913, - "grad_norm": 0.6579177379608154, - "learning_rate": 3.0101320454225424e-07, - "loss": 0.2263, - "step": 19601 - }, - { - "epoch": 1.8465886342761593, - "grad_norm": 0.5745784640312195, - "learning_rate": 3.0064561237570067e-07, - "loss": 0.1777, - "step": 19602 - }, - { - "epoch": 1.8466828383693272, - "grad_norm": 0.5918304324150085, - "learning_rate": 3.0027824136942607e-07, - "loss": 0.1726, - "step": 19603 - }, - { - "epoch": 1.846777042462495, - "grad_norm": 0.6967973113059998, - "learning_rate": 2.999110915318115e-07, - "loss": 0.2281, - "step": 19604 - }, - { - "epoch": 1.8468712465556627, - "grad_norm": 0.6738758087158203, - "learning_rate": 2.9954416287122257e-07, - "loss": 0.2047, - "step": 19605 - }, - { - "epoch": 1.8469654506488307, - "grad_norm": 0.640472948551178, - "learning_rate": 2.9917745539603024e-07, - "loss": 0.1881, - "step": 19606 - }, - { - "epoch": 1.8470596547419986, - "grad_norm": 0.6335986852645874, - "learning_rate": 2.988109691145946e-07, - "loss": 0.1787, - "step": 19607 - }, - { - "epoch": 1.8471538588351664, - "grad_norm": 0.6743058562278748, - "learning_rate": 2.984447040352712e-07, - "loss": 0.1991, - "step": 19608 - }, - { - "epoch": 1.847248062928334, - "grad_norm": 0.623181939125061, - "learning_rate": 2.9807866016641316e-07, - "loss": 0.1831, - "step": 19609 - }, - { - "epoch": 1.847342267021502, - "grad_norm": 0.6506023406982422, - "learning_rate": 2.9771283751636627e-07, - "loss": 0.1946, - "step": 19610 - }, - { - "epoch": 1.84743647111467, - "grad_norm": 0.7284735441207886, - "learning_rate": 2.973472360934704e-07, - "loss": 0.1806, - "step": 19611 - }, - { - "epoch": 1.8475306752078378, - "grad_norm": 0.7416127324104309, - "learning_rate": 2.969818559060633e-07, - "loss": 0.2006, - "step": 19612 - }, - { - "epoch": 1.8476248793010055, - "grad_norm": 0.6531298756599426, - "learning_rate": 2.966166969624762e-07, - "loss": 0.2042, - "step": 19613 - }, - { - "epoch": 1.8477190833941735, - "grad_norm": 0.6501283049583435, - "learning_rate": 2.9625175927103455e-07, - "loss": 0.2047, - "step": 19614 - }, - { - "epoch": 1.8478132874873414, - "grad_norm": 0.605383574962616, - "learning_rate": 2.9588704284006176e-07, - "loss": 0.1642, - "step": 19615 - }, - { - "epoch": 1.8479074915805092, - "grad_norm": 0.7704370021820068, - "learning_rate": 2.9552254767787005e-07, - "loss": 0.2283, - "step": 19616 - }, - { - "epoch": 1.848001695673677, - "grad_norm": 0.691565752029419, - "learning_rate": 2.9515827379277386e-07, - "loss": 0.2185, - "step": 19617 - }, - { - "epoch": 1.8480958997668449, - "grad_norm": 0.7182550430297852, - "learning_rate": 2.947942211930765e-07, - "loss": 0.2153, - "step": 19618 - }, - { - "epoch": 1.8481901038600128, - "grad_norm": 0.6316009759902954, - "learning_rate": 2.944303898870804e-07, - "loss": 0.188, - "step": 19619 - }, - { - "epoch": 1.8482843079531805, - "grad_norm": 0.6875702738761902, - "learning_rate": 2.940667798830821e-07, - "loss": 0.1976, - "step": 19620 - }, - { - "epoch": 1.8483785120463483, - "grad_norm": 0.830883264541626, - "learning_rate": 2.9370339118937164e-07, - "loss": 0.1762, - "step": 19621 - }, - { - "epoch": 1.8484727161395162, - "grad_norm": 0.7475153207778931, - "learning_rate": 2.933402238142336e-07, - "loss": 0.2076, - "step": 19622 - }, - { - "epoch": 1.8485669202326842, - "grad_norm": 0.6479750871658325, - "learning_rate": 2.929772777659523e-07, - "loss": 0.1797, - "step": 19623 - }, - { - "epoch": 1.848661124325852, - "grad_norm": 0.6192037463188171, - "learning_rate": 2.926145530528002e-07, - "loss": 0.1769, - "step": 19624 - }, - { - "epoch": 1.8487553284190197, - "grad_norm": 0.7229530215263367, - "learning_rate": 2.9225204968304944e-07, - "loss": 0.1947, - "step": 19625 - }, - { - "epoch": 1.8488495325121876, - "grad_norm": 0.8542710542678833, - "learning_rate": 2.918897676649646e-07, - "loss": 0.1902, - "step": 19626 - }, - { - "epoch": 1.8489437366053556, - "grad_norm": 0.5919458866119385, - "learning_rate": 2.9152770700680677e-07, - "loss": 0.1791, - "step": 19627 - }, - { - "epoch": 1.8490379406985233, - "grad_norm": 0.6756226420402527, - "learning_rate": 2.9116586771683273e-07, - "loss": 0.1849, - "step": 19628 - }, - { - "epoch": 1.849132144791691, - "grad_norm": 0.6769475936889648, - "learning_rate": 2.9080424980329147e-07, - "loss": 0.1676, - "step": 19629 - }, - { - "epoch": 1.849226348884859, - "grad_norm": 0.6706231236457825, - "learning_rate": 2.904428532744274e-07, - "loss": 0.1998, - "step": 19630 - }, - { - "epoch": 1.849320552978027, - "grad_norm": 0.6368475556373596, - "learning_rate": 2.900816781384852e-07, - "loss": 0.1771, - "step": 19631 - }, - { - "epoch": 1.8494147570711947, - "grad_norm": 0.662482500076294, - "learning_rate": 2.897207244036948e-07, - "loss": 0.1941, - "step": 19632 - }, - { - "epoch": 1.8495089611643625, - "grad_norm": 0.6530662178993225, - "learning_rate": 2.893599920782908e-07, - "loss": 0.1699, - "step": 19633 - }, - { - "epoch": 1.8496031652575304, - "grad_norm": 0.5605717897415161, - "learning_rate": 2.889994811704966e-07, - "loss": 0.1567, - "step": 19634 - }, - { - "epoch": 1.8496973693506984, - "grad_norm": 0.6888538599014282, - "learning_rate": 2.886391916885323e-07, - "loss": 0.1917, - "step": 19635 - }, - { - "epoch": 1.8497915734438661, - "grad_norm": 0.7064043283462524, - "learning_rate": 2.882791236406124e-07, - "loss": 0.2006, - "step": 19636 - }, - { - "epoch": 1.8498857775370339, - "grad_norm": 0.630302369594574, - "learning_rate": 2.8791927703494924e-07, - "loss": 0.171, - "step": 19637 - }, - { - "epoch": 1.8499799816302018, - "grad_norm": 0.792571485042572, - "learning_rate": 2.8755965187974633e-07, - "loss": 0.1968, - "step": 19638 - }, - { - "epoch": 1.8500741857233698, - "grad_norm": 0.6825298070907593, - "learning_rate": 2.8720024818320256e-07, - "loss": 0.1816, - "step": 19639 - }, - { - "epoch": 1.8501683898165375, - "grad_norm": 0.6284648180007935, - "learning_rate": 2.868410659535159e-07, - "loss": 0.1926, - "step": 19640 - }, - { - "epoch": 1.8502625939097053, - "grad_norm": 0.6546636819839478, - "learning_rate": 2.864821051988753e-07, - "loss": 0.1859, - "step": 19641 - }, - { - "epoch": 1.8503567980028732, - "grad_norm": 0.6364118456840515, - "learning_rate": 2.861233659274632e-07, - "loss": 0.1617, - "step": 19642 - }, - { - "epoch": 1.8504510020960412, - "grad_norm": 0.6821374893188477, - "learning_rate": 2.8576484814746176e-07, - "loss": 0.1985, - "step": 19643 - }, - { - "epoch": 1.850545206189209, - "grad_norm": 0.6279967427253723, - "learning_rate": 2.854065518670457e-07, - "loss": 0.1971, - "step": 19644 - }, - { - "epoch": 1.8506394102823767, - "grad_norm": 0.6143205165863037, - "learning_rate": 2.850484770943829e-07, - "loss": 0.1705, - "step": 19645 - }, - { - "epoch": 1.8507336143755446, - "grad_norm": 0.6796934604644775, - "learning_rate": 2.846906238376401e-07, - "loss": 0.2045, - "step": 19646 - }, - { - "epoch": 1.8508278184687126, - "grad_norm": 0.692689061164856, - "learning_rate": 2.8433299210497645e-07, - "loss": 0.2174, - "step": 19647 - }, - { - "epoch": 1.8509220225618803, - "grad_norm": 0.6097289323806763, - "learning_rate": 2.839755819045453e-07, - "loss": 0.1979, - "step": 19648 - }, - { - "epoch": 1.851016226655048, - "grad_norm": 0.7667192816734314, - "learning_rate": 2.836183932444969e-07, - "loss": 0.187, - "step": 19649 - }, - { - "epoch": 1.851110430748216, - "grad_norm": 0.6377841234207153, - "learning_rate": 2.832614261329769e-07, - "loss": 0.1942, - "step": 19650 - }, - { - "epoch": 1.851204634841384, - "grad_norm": 0.5748652815818787, - "learning_rate": 2.829046805781221e-07, - "loss": 0.1794, - "step": 19651 - }, - { - "epoch": 1.8512988389345517, - "grad_norm": 0.6595532298088074, - "learning_rate": 2.8254815658806944e-07, - "loss": 0.19, - "step": 19652 - }, - { - "epoch": 1.8513930430277195, - "grad_norm": 0.6605759263038635, - "learning_rate": 2.8219185417094784e-07, - "loss": 0.1878, - "step": 19653 - }, - { - "epoch": 1.8514872471208874, - "grad_norm": 0.6056495904922485, - "learning_rate": 2.818357733348798e-07, - "loss": 0.1816, - "step": 19654 - }, - { - "epoch": 1.8515814512140554, - "grad_norm": 0.7514147758483887, - "learning_rate": 2.8147991408798534e-07, - "loss": 0.1719, - "step": 19655 - }, - { - "epoch": 1.8516756553072231, - "grad_norm": 0.5801824331283569, - "learning_rate": 2.8112427643837927e-07, - "loss": 0.1728, - "step": 19656 - }, - { - "epoch": 1.8517698594003908, - "grad_norm": 0.600534200668335, - "learning_rate": 2.8076886039417053e-07, - "loss": 0.1742, - "step": 19657 - }, - { - "epoch": 1.8518640634935588, - "grad_norm": 0.6482451558113098, - "learning_rate": 2.8041366596346267e-07, - "loss": 0.1869, - "step": 19658 - }, - { - "epoch": 1.8519582675867268, - "grad_norm": 0.6890844106674194, - "learning_rate": 2.8005869315435365e-07, - "loss": 0.2116, - "step": 19659 - }, - { - "epoch": 1.8520524716798945, - "grad_norm": 0.605459451675415, - "learning_rate": 2.797039419749403e-07, - "loss": 0.1708, - "step": 19660 - }, - { - "epoch": 1.8521466757730622, - "grad_norm": 0.6100725531578064, - "learning_rate": 2.793494124333085e-07, - "loss": 0.1537, - "step": 19661 - }, - { - "epoch": 1.8522408798662302, - "grad_norm": 0.598700225353241, - "learning_rate": 2.7899510453754386e-07, - "loss": 0.1716, - "step": 19662 - }, - { - "epoch": 1.8523350839593982, - "grad_norm": 0.6506624817848206, - "learning_rate": 2.7864101829572557e-07, - "loss": 0.1815, - "step": 19663 - }, - { - "epoch": 1.852429288052566, - "grad_norm": 0.6575130820274353, - "learning_rate": 2.7828715371592483e-07, - "loss": 0.1943, - "step": 19664 - }, - { - "epoch": 1.8525234921457336, - "grad_norm": 0.7161694765090942, - "learning_rate": 2.7793351080621425e-07, - "loss": 0.1939, - "step": 19665 - }, - { - "epoch": 1.8526176962389016, - "grad_norm": 0.6616843342781067, - "learning_rate": 2.7758008957465275e-07, - "loss": 0.1841, - "step": 19666 - }, - { - "epoch": 1.8527119003320696, - "grad_norm": 0.6931621432304382, - "learning_rate": 2.7722689002930183e-07, - "loss": 0.2313, - "step": 19667 - }, - { - "epoch": 1.8528061044252373, - "grad_norm": 0.6817244291305542, - "learning_rate": 2.768739121782149e-07, - "loss": 0.1765, - "step": 19668 - }, - { - "epoch": 1.852900308518405, - "grad_norm": 0.6396641135215759, - "learning_rate": 2.7652115602943895e-07, - "loss": 0.1903, - "step": 19669 - }, - { - "epoch": 1.852994512611573, - "grad_norm": 0.6513628959655762, - "learning_rate": 2.7616862159101866e-07, - "loss": 0.1911, - "step": 19670 - }, - { - "epoch": 1.853088716704741, - "grad_norm": 0.6474629640579224, - "learning_rate": 2.7581630887099307e-07, - "loss": 0.1885, - "step": 19671 - }, - { - "epoch": 1.8531829207979087, - "grad_norm": 0.7338670492172241, - "learning_rate": 2.7546421787739363e-07, - "loss": 0.1923, - "step": 19672 - }, - { - "epoch": 1.8532771248910764, - "grad_norm": 0.6354264616966248, - "learning_rate": 2.751123486182483e-07, - "loss": 0.1827, - "step": 19673 - }, - { - "epoch": 1.8533713289842444, - "grad_norm": 0.6260570287704468, - "learning_rate": 2.747607011015829e-07, - "loss": 0.1812, - "step": 19674 - }, - { - "epoch": 1.8534655330774124, - "grad_norm": 0.6957045197486877, - "learning_rate": 2.744092753354144e-07, - "loss": 0.2319, - "step": 19675 - }, - { - "epoch": 1.85355973717058, - "grad_norm": 0.7678351998329163, - "learning_rate": 2.740580713277519e-07, - "loss": 0.1946, - "step": 19676 - }, - { - "epoch": 1.8536539412637478, - "grad_norm": 0.5689692497253418, - "learning_rate": 2.7370708908661003e-07, - "loss": 0.1989, - "step": 19677 - }, - { - "epoch": 1.8537481453569158, - "grad_norm": 0.6453081965446472, - "learning_rate": 2.7335632861998807e-07, - "loss": 0.1909, - "step": 19678 - }, - { - "epoch": 1.8538423494500837, - "grad_norm": 0.6650949120521545, - "learning_rate": 2.730057899358829e-07, - "loss": 0.1831, - "step": 19679 - }, - { - "epoch": 1.8539365535432515, - "grad_norm": 0.714195191860199, - "learning_rate": 2.7265547304229034e-07, - "loss": 0.2036, - "step": 19680 - }, - { - "epoch": 1.8540307576364192, - "grad_norm": 0.6691907644271851, - "learning_rate": 2.7230537794719623e-07, - "loss": 0.1933, - "step": 19681 - }, - { - "epoch": 1.8541249617295872, - "grad_norm": 0.5945155024528503, - "learning_rate": 2.719555046585831e-07, - "loss": 0.1849, - "step": 19682 - }, - { - "epoch": 1.8542191658227551, - "grad_norm": 0.7244347929954529, - "learning_rate": 2.71605853184429e-07, - "loss": 0.1836, - "step": 19683 - }, - { - "epoch": 1.8543133699159229, - "grad_norm": 0.6852580904960632, - "learning_rate": 2.7125642353270755e-07, - "loss": 0.2216, - "step": 19684 - }, - { - "epoch": 1.8544075740090906, - "grad_norm": 0.6954776048660278, - "learning_rate": 2.709072157113846e-07, - "loss": 0.2154, - "step": 19685 - }, - { - "epoch": 1.8545017781022586, - "grad_norm": 0.7100127935409546, - "learning_rate": 2.705582297284237e-07, - "loss": 0.2137, - "step": 19686 - }, - { - "epoch": 1.8545959821954265, - "grad_norm": 0.7123499512672424, - "learning_rate": 2.7020946559178306e-07, - "loss": 0.1878, - "step": 19687 - }, - { - "epoch": 1.8546901862885943, - "grad_norm": 0.6751053333282471, - "learning_rate": 2.6986092330941295e-07, - "loss": 0.2135, - "step": 19688 - }, - { - "epoch": 1.854784390381762, - "grad_norm": 0.6273108124732971, - "learning_rate": 2.695126028892614e-07, - "loss": 0.2124, - "step": 19689 - }, - { - "epoch": 1.85487859447493, - "grad_norm": 0.7018774747848511, - "learning_rate": 2.691645043392721e-07, - "loss": 0.1948, - "step": 19690 - }, - { - "epoch": 1.854972798568098, - "grad_norm": 0.6028143763542175, - "learning_rate": 2.688166276673809e-07, - "loss": 0.1724, - "step": 19691 - }, - { - "epoch": 1.8550670026612657, - "grad_norm": 0.7303981781005859, - "learning_rate": 2.684689728815193e-07, - "loss": 0.2126, - "step": 19692 - }, - { - "epoch": 1.8551612067544334, - "grad_norm": 0.6413070559501648, - "learning_rate": 2.681215399896164e-07, - "loss": 0.1758, - "step": 19693 - }, - { - "epoch": 1.8552554108476014, - "grad_norm": 0.5861358642578125, - "learning_rate": 2.677743289995927e-07, - "loss": 0.1699, - "step": 19694 - }, - { - "epoch": 1.8553496149407693, - "grad_norm": 0.7060631513595581, - "learning_rate": 2.67427339919365e-07, - "loss": 0.1697, - "step": 19695 - }, - { - "epoch": 1.855443819033937, - "grad_norm": 0.7234995365142822, - "learning_rate": 2.6708057275684595e-07, - "loss": 0.2168, - "step": 19696 - }, - { - "epoch": 1.8555380231271048, - "grad_norm": 0.6536963582038879, - "learning_rate": 2.667340275199426e-07, - "loss": 0.1915, - "step": 19697 - }, - { - "epoch": 1.8556322272202728, - "grad_norm": 0.7373390197753906, - "learning_rate": 2.663877042165552e-07, - "loss": 0.1955, - "step": 19698 - }, - { - "epoch": 1.8557264313134407, - "grad_norm": 0.8008182644844055, - "learning_rate": 2.6604160285458203e-07, - "loss": 0.178, - "step": 19699 - }, - { - "epoch": 1.8558206354066085, - "grad_norm": 0.8788657188415527, - "learning_rate": 2.656957234419144e-07, - "loss": 0.2104, - "step": 19700 - }, - { - "epoch": 1.8559148394997762, - "grad_norm": 0.6970170140266418, - "learning_rate": 2.653500659864372e-07, - "loss": 0.1984, - "step": 19701 - }, - { - "epoch": 1.8560090435929442, - "grad_norm": 0.6879594922065735, - "learning_rate": 2.650046304960352e-07, - "loss": 0.1762, - "step": 19702 - }, - { - "epoch": 1.8561032476861121, - "grad_norm": 0.643665075302124, - "learning_rate": 2.6465941697858213e-07, - "loss": 0.1909, - "step": 19703 - }, - { - "epoch": 1.8561974517792799, - "grad_norm": 0.7155811786651611, - "learning_rate": 2.6431442544195053e-07, - "loss": 0.2002, - "step": 19704 - }, - { - "epoch": 1.8562916558724476, - "grad_norm": 0.6288726925849915, - "learning_rate": 2.6396965589400746e-07, - "loss": 0.1683, - "step": 19705 - }, - { - "epoch": 1.8563858599656156, - "grad_norm": 0.6954754590988159, - "learning_rate": 2.6362510834261226e-07, - "loss": 0.1861, - "step": 19706 - }, - { - "epoch": 1.8564800640587835, - "grad_norm": 0.6139570474624634, - "learning_rate": 2.6328078279562185e-07, - "loss": 0.1833, - "step": 19707 - }, - { - "epoch": 1.8565742681519513, - "grad_norm": 0.6650378108024597, - "learning_rate": 2.62936679260889e-07, - "loss": 0.2117, - "step": 19708 - }, - { - "epoch": 1.856668472245119, - "grad_norm": 0.625217080116272, - "learning_rate": 2.625927977462572e-07, - "loss": 0.1759, - "step": 19709 - }, - { - "epoch": 1.856762676338287, - "grad_norm": 1.1064122915267944, - "learning_rate": 2.6224913825956933e-07, - "loss": 0.1915, - "step": 19710 - }, - { - "epoch": 1.8568568804314547, - "grad_norm": 0.6624402403831482, - "learning_rate": 2.619057008086612e-07, - "loss": 0.1808, - "step": 19711 - }, - { - "epoch": 1.8569510845246224, - "grad_norm": 0.6218162178993225, - "learning_rate": 2.615624854013632e-07, - "loss": 0.1853, - "step": 19712 - }, - { - "epoch": 1.8570452886177904, - "grad_norm": 0.6406760215759277, - "learning_rate": 2.6121949204550024e-07, - "loss": 0.1946, - "step": 19713 - }, - { - "epoch": 1.8571394927109584, - "grad_norm": 0.6376236081123352, - "learning_rate": 2.6087672074889603e-07, - "loss": 0.1923, - "step": 19714 - }, - { - "epoch": 1.857233696804126, - "grad_norm": 0.6326330304145813, - "learning_rate": 2.6053417151936435e-07, - "loss": 0.1913, - "step": 19715 - }, - { - "epoch": 1.8573279008972938, - "grad_norm": 0.6673585176467896, - "learning_rate": 2.6019184436471335e-07, - "loss": 0.1839, - "step": 19716 - }, - { - "epoch": 1.8574221049904618, - "grad_norm": 0.6535778045654297, - "learning_rate": 2.598497392927535e-07, - "loss": 0.2008, - "step": 19717 - }, - { - "epoch": 1.8575163090836297, - "grad_norm": 0.7765671014785767, - "learning_rate": 2.5950785631128294e-07, - "loss": 0.1733, - "step": 19718 - }, - { - "epoch": 1.8576105131767975, - "grad_norm": 0.6393800377845764, - "learning_rate": 2.591661954280966e-07, - "loss": 0.1653, - "step": 19719 - }, - { - "epoch": 1.8577047172699652, - "grad_norm": 0.6803317070007324, - "learning_rate": 2.5882475665098493e-07, - "loss": 0.2272, - "step": 19720 - }, - { - "epoch": 1.8577989213631332, - "grad_norm": 0.6579533219337463, - "learning_rate": 2.58483539987735e-07, - "loss": 0.2057, - "step": 19721 - }, - { - "epoch": 1.8578931254563011, - "grad_norm": 0.642487108707428, - "learning_rate": 2.5814254544612503e-07, - "loss": 0.1982, - "step": 19722 - }, - { - "epoch": 1.8579873295494689, - "grad_norm": 0.6265017986297607, - "learning_rate": 2.5780177303393105e-07, - "loss": 0.1968, - "step": 19723 - }, - { - "epoch": 1.8580815336426366, - "grad_norm": 0.6324055194854736, - "learning_rate": 2.574612227589246e-07, - "loss": 0.1923, - "step": 19724 - }, - { - "epoch": 1.8581757377358046, - "grad_norm": 0.6792360544204712, - "learning_rate": 2.5712089462886836e-07, - "loss": 0.2093, - "step": 19725 - }, - { - "epoch": 1.8582699418289725, - "grad_norm": 0.6746578812599182, - "learning_rate": 2.5678078865152276e-07, - "loss": 0.2031, - "step": 19726 - }, - { - "epoch": 1.8583641459221403, - "grad_norm": 0.9754471182823181, - "learning_rate": 2.56440904834645e-07, - "loss": 0.1892, - "step": 19727 - }, - { - "epoch": 1.858458350015308, - "grad_norm": 0.6703466176986694, - "learning_rate": 2.5610124318598216e-07, - "loss": 0.2043, - "step": 19728 - }, - { - "epoch": 1.858552554108476, - "grad_norm": 0.663557231426239, - "learning_rate": 2.557618037132803e-07, - "loss": 0.1758, - "step": 19729 - }, - { - "epoch": 1.858646758201644, - "grad_norm": 0.5749669075012207, - "learning_rate": 2.5542258642427987e-07, - "loss": 0.1784, - "step": 19730 - }, - { - "epoch": 1.8587409622948117, - "grad_norm": 0.6767820715904236, - "learning_rate": 2.5508359132671356e-07, - "loss": 0.1933, - "step": 19731 - }, - { - "epoch": 1.8588351663879794, - "grad_norm": 0.7008445262908936, - "learning_rate": 2.5474481842831187e-07, - "loss": 0.2114, - "step": 19732 - }, - { - "epoch": 1.8589293704811474, - "grad_norm": 0.6820916533470154, - "learning_rate": 2.54406267736802e-07, - "loss": 0.1771, - "step": 19733 - }, - { - "epoch": 1.8590235745743153, - "grad_norm": 0.681978702545166, - "learning_rate": 2.540679392598999e-07, - "loss": 0.1864, - "step": 19734 - }, - { - "epoch": 1.859117778667483, - "grad_norm": 0.6696916222572327, - "learning_rate": 2.537298330053206e-07, - "loss": 0.1991, - "step": 19735 - }, - { - "epoch": 1.8592119827606508, - "grad_norm": 0.7833788394927979, - "learning_rate": 2.5339194898077346e-07, - "loss": 0.2168, - "step": 19736 - }, - { - "epoch": 1.8593061868538188, - "grad_norm": 0.6476829648017883, - "learning_rate": 2.530542871939645e-07, - "loss": 0.1839, - "step": 19737 - }, - { - "epoch": 1.8594003909469867, - "grad_norm": 0.7010082006454468, - "learning_rate": 2.52716847652591e-07, - "loss": 0.2018, - "step": 19738 - }, - { - "epoch": 1.8594945950401545, - "grad_norm": 0.6451516151428223, - "learning_rate": 2.5237963036434775e-07, - "loss": 0.176, - "step": 19739 - }, - { - "epoch": 1.8595887991333222, - "grad_norm": 0.7889953255653381, - "learning_rate": 2.520426353369254e-07, - "loss": 0.2127, - "step": 19740 - }, - { - "epoch": 1.8596830032264902, - "grad_norm": 0.6260870099067688, - "learning_rate": 2.517058625780044e-07, - "loss": 0.1932, - "step": 19741 - }, - { - "epoch": 1.8597772073196581, - "grad_norm": 0.594916582107544, - "learning_rate": 2.513693120952665e-07, - "loss": 0.1776, - "step": 19742 - }, - { - "epoch": 1.8598714114128259, - "grad_norm": 0.6291800141334534, - "learning_rate": 2.5103298389638544e-07, - "loss": 0.1819, - "step": 19743 - }, - { - "epoch": 1.8599656155059936, - "grad_norm": 0.7713466882705688, - "learning_rate": 2.5069687798902954e-07, - "loss": 0.2119, - "step": 19744 - }, - { - "epoch": 1.8600598195991616, - "grad_norm": 0.623530924320221, - "learning_rate": 2.5036099438086157e-07, - "loss": 0.2096, - "step": 19745 - }, - { - "epoch": 1.8601540236923295, - "grad_norm": 0.6710311770439148, - "learning_rate": 2.5002533307954103e-07, - "loss": 0.19, - "step": 19746 - }, - { - "epoch": 1.8602482277854973, - "grad_norm": 0.7561832666397095, - "learning_rate": 2.496898940927217e-07, - "loss": 0.2043, - "step": 19747 - }, - { - "epoch": 1.860342431878665, - "grad_norm": 0.6054635643959045, - "learning_rate": 2.493546774280531e-07, - "loss": 0.1888, - "step": 19748 - }, - { - "epoch": 1.860436635971833, - "grad_norm": 0.6866369247436523, - "learning_rate": 2.490196830931757e-07, - "loss": 0.1946, - "step": 19749 - }, - { - "epoch": 1.860530840065001, - "grad_norm": 0.6473537683486938, - "learning_rate": 2.4868491109573013e-07, - "loss": 0.1817, - "step": 19750 - }, - { - "epoch": 1.8606250441581687, - "grad_norm": 0.692039430141449, - "learning_rate": 2.4835036144335024e-07, - "loss": 0.1943, - "step": 19751 - }, - { - "epoch": 1.8607192482513364, - "grad_norm": 0.7021738886833191, - "learning_rate": 2.480160341436633e-07, - "loss": 0.1855, - "step": 19752 - }, - { - "epoch": 1.8608134523445043, - "grad_norm": 0.6827689409255981, - "learning_rate": 2.476819292042909e-07, - "loss": 0.2112, - "step": 19753 - }, - { - "epoch": 1.8609076564376723, - "grad_norm": 0.7296918630599976, - "learning_rate": 2.4734804663285485e-07, - "loss": 0.1787, - "step": 19754 - }, - { - "epoch": 1.86100186053084, - "grad_norm": 0.6645269989967346, - "learning_rate": 2.470143864369656e-07, - "loss": 0.1859, - "step": 19755 - }, - { - "epoch": 1.8610960646240078, - "grad_norm": 0.8175538182258606, - "learning_rate": 2.4668094862422943e-07, - "loss": 0.2661, - "step": 19756 - }, - { - "epoch": 1.8611902687171757, - "grad_norm": 0.6240702867507935, - "learning_rate": 2.463477332022535e-07, - "loss": 0.2444, - "step": 19757 - }, - { - "epoch": 1.8612844728103437, - "grad_norm": 0.6328794360160828, - "learning_rate": 2.460147401786339e-07, - "loss": 0.1992, - "step": 19758 - }, - { - "epoch": 1.8613786769035114, - "grad_norm": 0.6536951065063477, - "learning_rate": 2.4568196956096245e-07, - "loss": 0.2007, - "step": 19759 - }, - { - "epoch": 1.8614728809966792, - "grad_norm": 0.5887195467948914, - "learning_rate": 2.4534942135682637e-07, - "loss": 0.1582, - "step": 19760 - }, - { - "epoch": 1.8615670850898471, - "grad_norm": 0.6196441650390625, - "learning_rate": 2.4501709557381183e-07, - "loss": 0.1794, - "step": 19761 - }, - { - "epoch": 1.861661289183015, - "grad_norm": 0.6211194396018982, - "learning_rate": 2.446849922194916e-07, - "loss": 0.1877, - "step": 19762 - }, - { - "epoch": 1.8617554932761828, - "grad_norm": 0.6575772762298584, - "learning_rate": 2.443531113014408e-07, - "loss": 0.1821, - "step": 19763 - }, - { - "epoch": 1.8618496973693506, - "grad_norm": 0.7031137943267822, - "learning_rate": 2.440214528272278e-07, - "loss": 0.2013, - "step": 19764 - }, - { - "epoch": 1.8619439014625185, - "grad_norm": 0.6978479027748108, - "learning_rate": 2.436900168044121e-07, - "loss": 0.1889, - "step": 19765 - }, - { - "epoch": 1.8620381055556865, - "grad_norm": 0.697313666343689, - "learning_rate": 2.4335880324055204e-07, - "loss": 0.2136, - "step": 19766 - }, - { - "epoch": 1.8621323096488542, - "grad_norm": 0.7061405181884766, - "learning_rate": 2.430278121432017e-07, - "loss": 0.1921, - "step": 19767 - }, - { - "epoch": 1.862226513742022, - "grad_norm": 0.6298143267631531, - "learning_rate": 2.4269704351990606e-07, - "loss": 0.2184, - "step": 19768 - }, - { - "epoch": 1.86232071783519, - "grad_norm": 0.6164076328277588, - "learning_rate": 2.4236649737820695e-07, - "loss": 0.1586, - "step": 19769 - }, - { - "epoch": 1.862414921928358, - "grad_norm": 0.6876676678657532, - "learning_rate": 2.420361737256438e-07, - "loss": 0.1788, - "step": 19770 - }, - { - "epoch": 1.8625091260215256, - "grad_norm": 0.5701471567153931, - "learning_rate": 2.417060725697473e-07, - "loss": 0.1744, - "step": 19771 - }, - { - "epoch": 1.8626033301146934, - "grad_norm": 0.6103629469871521, - "learning_rate": 2.413761939180415e-07, - "loss": 0.1647, - "step": 19772 - }, - { - "epoch": 1.8626975342078613, - "grad_norm": 0.648344099521637, - "learning_rate": 2.410465377780513e-07, - "loss": 0.1946, - "step": 19773 - }, - { - "epoch": 1.8627917383010293, - "grad_norm": 0.6380503177642822, - "learning_rate": 2.407171041572942e-07, - "loss": 0.1682, - "step": 19774 - }, - { - "epoch": 1.862885942394197, - "grad_norm": 0.7133384943008423, - "learning_rate": 2.403878930632786e-07, - "loss": 0.2384, - "step": 19775 - }, - { - "epoch": 1.8629801464873648, - "grad_norm": 0.605835497379303, - "learning_rate": 2.400589045035118e-07, - "loss": 0.1668, - "step": 19776 - }, - { - "epoch": 1.8630743505805327, - "grad_norm": 0.6401779651641846, - "learning_rate": 2.3973013848549775e-07, - "loss": 0.2044, - "step": 19777 - }, - { - "epoch": 1.8631685546737007, - "grad_norm": 0.6554655432701111, - "learning_rate": 2.3940159501673054e-07, - "loss": 0.1887, - "step": 19778 - }, - { - "epoch": 1.8632627587668684, - "grad_norm": 0.7029209136962891, - "learning_rate": 2.3907327410470084e-07, - "loss": 0.1947, - "step": 19779 - }, - { - "epoch": 1.8633569628600362, - "grad_norm": 0.7291501760482788, - "learning_rate": 2.3874517575689817e-07, - "loss": 0.1917, - "step": 19780 - }, - { - "epoch": 1.8634511669532041, - "grad_norm": 0.7364242076873779, - "learning_rate": 2.3841729998079987e-07, - "loss": 0.1828, - "step": 19781 - }, - { - "epoch": 1.863545371046372, - "grad_norm": 0.6417863368988037, - "learning_rate": 2.3808964678388447e-07, - "loss": 0.1812, - "step": 19782 - }, - { - "epoch": 1.8636395751395398, - "grad_norm": 0.6952944993972778, - "learning_rate": 2.3776221617362261e-07, - "loss": 0.2288, - "step": 19783 - }, - { - "epoch": 1.8637337792327076, - "grad_norm": 0.6842653751373291, - "learning_rate": 2.3743500815747834e-07, - "loss": 0.2004, - "step": 19784 - }, - { - "epoch": 1.8638279833258755, - "grad_norm": 0.7092806100845337, - "learning_rate": 2.3710802274291567e-07, - "loss": 0.2084, - "step": 19785 - }, - { - "epoch": 1.8639221874190435, - "grad_norm": 0.7035544514656067, - "learning_rate": 2.3678125993738753e-07, - "loss": 0.1994, - "step": 19786 - }, - { - "epoch": 1.8640163915122112, - "grad_norm": 0.7044257521629333, - "learning_rate": 2.3645471974834577e-07, - "loss": 0.1983, - "step": 19787 - }, - { - "epoch": 1.864110595605379, - "grad_norm": 0.6268710494041443, - "learning_rate": 2.3612840218323664e-07, - "loss": 0.1734, - "step": 19788 - }, - { - "epoch": 1.864204799698547, - "grad_norm": 0.6927345395088196, - "learning_rate": 2.358023072494997e-07, - "loss": 0.1911, - "step": 19789 - }, - { - "epoch": 1.8642990037917149, - "grad_norm": 0.6738446950912476, - "learning_rate": 2.3547643495457018e-07, - "loss": 0.2192, - "step": 19790 - }, - { - "epoch": 1.8643932078848826, - "grad_norm": 0.6546797156333923, - "learning_rate": 2.35150785305881e-07, - "loss": 0.1623, - "step": 19791 - }, - { - "epoch": 1.8644874119780503, - "grad_norm": 0.6611368060112, - "learning_rate": 2.3482535831085396e-07, - "loss": 0.1766, - "step": 19792 - }, - { - "epoch": 1.8645816160712183, - "grad_norm": 0.683911919593811, - "learning_rate": 2.3450015397690985e-07, - "loss": 0.1853, - "step": 19793 - }, - { - "epoch": 1.8646758201643863, - "grad_norm": 0.6592962145805359, - "learning_rate": 2.3417517231146712e-07, - "loss": 0.1974, - "step": 19794 - }, - { - "epoch": 1.864770024257554, - "grad_norm": 0.6624939441680908, - "learning_rate": 2.3385041332193325e-07, - "loss": 0.1924, - "step": 19795 - }, - { - "epoch": 1.8648642283507217, - "grad_norm": 0.7302245497703552, - "learning_rate": 2.335258770157134e-07, - "loss": 0.2298, - "step": 19796 - }, - { - "epoch": 1.8649584324438897, - "grad_norm": 0.6368466019630432, - "learning_rate": 2.3320156340020605e-07, - "loss": 0.1981, - "step": 19797 - }, - { - "epoch": 1.8650526365370577, - "grad_norm": 0.5639004707336426, - "learning_rate": 2.3287747248280978e-07, - "loss": 0.1561, - "step": 19798 - }, - { - "epoch": 1.8651468406302254, - "grad_norm": 0.6973299384117126, - "learning_rate": 2.32553604270912e-07, - "loss": 0.209, - "step": 19799 - }, - { - "epoch": 1.8652410447233931, - "grad_norm": 0.6718546748161316, - "learning_rate": 2.322299587718968e-07, - "loss": 0.1788, - "step": 19800 - }, - { - "epoch": 1.865335248816561, - "grad_norm": 0.6919240355491638, - "learning_rate": 2.3190653599314493e-07, - "loss": 0.1624, - "step": 19801 - }, - { - "epoch": 1.865429452909729, - "grad_norm": 0.7096720933914185, - "learning_rate": 2.3158333594203054e-07, - "loss": 0.1912, - "step": 19802 - }, - { - "epoch": 1.8655236570028968, - "grad_norm": 0.6396719217300415, - "learning_rate": 2.3126035862592322e-07, - "loss": 0.1877, - "step": 19803 - }, - { - "epoch": 1.8656178610960645, - "grad_norm": 0.6330217123031616, - "learning_rate": 2.3093760405218823e-07, - "loss": 0.1923, - "step": 19804 - }, - { - "epoch": 1.8657120651892325, - "grad_norm": 0.633255124092102, - "learning_rate": 2.3061507222818303e-07, - "loss": 0.1969, - "step": 19805 - }, - { - "epoch": 1.8658062692824005, - "grad_norm": 0.7594079971313477, - "learning_rate": 2.3029276316126281e-07, - "loss": 0.1952, - "step": 19806 - }, - { - "epoch": 1.8659004733755682, - "grad_norm": 0.6460544466972351, - "learning_rate": 2.299706768587784e-07, - "loss": 0.2028, - "step": 19807 - }, - { - "epoch": 1.865994677468736, - "grad_norm": 0.7043282985687256, - "learning_rate": 2.2964881332807053e-07, - "loss": 0.1979, - "step": 19808 - }, - { - "epoch": 1.866088881561904, - "grad_norm": 0.6759858131408691, - "learning_rate": 2.2932717257648008e-07, - "loss": 0.1922, - "step": 19809 - }, - { - "epoch": 1.8661830856550718, - "grad_norm": 0.9607840776443481, - "learning_rate": 2.2900575461134112e-07, - "loss": 0.1948, - "step": 19810 - }, - { - "epoch": 1.8662772897482396, - "grad_norm": 0.6503164768218994, - "learning_rate": 2.2868455943998224e-07, - "loss": 0.2198, - "step": 19811 - }, - { - "epoch": 1.8663714938414073, - "grad_norm": 0.7966095805168152, - "learning_rate": 2.283635870697265e-07, - "loss": 0.2345, - "step": 19812 - }, - { - "epoch": 1.8664656979345753, - "grad_norm": 0.6505135893821716, - "learning_rate": 2.2804283750789357e-07, - "loss": 0.201, - "step": 19813 - }, - { - "epoch": 1.8665599020277432, - "grad_norm": 0.706760048866272, - "learning_rate": 2.2772231076179652e-07, - "loss": 0.1887, - "step": 19814 - }, - { - "epoch": 1.866654106120911, - "grad_norm": 0.705281674861908, - "learning_rate": 2.2740200683874392e-07, - "loss": 0.199, - "step": 19815 - }, - { - "epoch": 1.8667483102140787, - "grad_norm": 0.725080668926239, - "learning_rate": 2.270819257460377e-07, - "loss": 0.2273, - "step": 19816 - }, - { - "epoch": 1.8668425143072467, - "grad_norm": 0.6675900220870972, - "learning_rate": 2.2676206749097985e-07, - "loss": 0.1812, - "step": 19817 - }, - { - "epoch": 1.8669367184004146, - "grad_norm": 0.7213043570518494, - "learning_rate": 2.2644243208086003e-07, - "loss": 0.1949, - "step": 19818 - }, - { - "epoch": 1.8670309224935824, - "grad_norm": 0.6684525609016418, - "learning_rate": 2.26123019522968e-07, - "loss": 0.1977, - "step": 19819 - }, - { - "epoch": 1.8671251265867501, - "grad_norm": 0.6752498745918274, - "learning_rate": 2.2580382982458794e-07, - "loss": 0.2049, - "step": 19820 - }, - { - "epoch": 1.867219330679918, - "grad_norm": 0.6144601106643677, - "learning_rate": 2.254848629929951e-07, - "loss": 0.1635, - "step": 19821 - }, - { - "epoch": 1.867313534773086, - "grad_norm": 0.6752665042877197, - "learning_rate": 2.2516611903546482e-07, - "loss": 0.1816, - "step": 19822 - }, - { - "epoch": 1.8674077388662538, - "grad_norm": 0.8126716613769531, - "learning_rate": 2.248475979592646e-07, - "loss": 0.1905, - "step": 19823 - }, - { - "epoch": 1.8675019429594215, - "grad_norm": 0.6823545098304749, - "learning_rate": 2.2452929977165528e-07, - "loss": 0.2016, - "step": 19824 - }, - { - "epoch": 1.8675961470525895, - "grad_norm": 0.673636794090271, - "learning_rate": 2.2421122447989773e-07, - "loss": 0.1951, - "step": 19825 - }, - { - "epoch": 1.8676903511457574, - "grad_norm": 0.6112269759178162, - "learning_rate": 2.2389337209124173e-07, - "loss": 0.1794, - "step": 19826 - }, - { - "epoch": 1.8677845552389252, - "grad_norm": 0.7177988886833191, - "learning_rate": 2.2357574261293592e-07, - "loss": 0.2162, - "step": 19827 - }, - { - "epoch": 1.867878759332093, - "grad_norm": 0.640326201915741, - "learning_rate": 2.2325833605222337e-07, - "loss": 0.1776, - "step": 19828 - }, - { - "epoch": 1.8679729634252609, - "grad_norm": 0.624055802822113, - "learning_rate": 2.229411524163394e-07, - "loss": 0.1667, - "step": 19829 - }, - { - "epoch": 1.8680671675184288, - "grad_norm": 0.6927505135536194, - "learning_rate": 2.2262419171251827e-07, - "loss": 0.2026, - "step": 19830 - }, - { - "epoch": 1.8681613716115966, - "grad_norm": 0.7343767881393433, - "learning_rate": 2.2230745394798748e-07, - "loss": 0.1816, - "step": 19831 - }, - { - "epoch": 1.8682555757047643, - "grad_norm": 0.6705753803253174, - "learning_rate": 2.2199093912996684e-07, - "loss": 0.2015, - "step": 19832 - }, - { - "epoch": 1.8683497797979323, - "grad_norm": 0.6273950338363647, - "learning_rate": 2.21674647265675e-07, - "loss": 0.1789, - "step": 19833 - }, - { - "epoch": 1.8684439838911002, - "grad_norm": 0.6339665055274963, - "learning_rate": 2.2135857836232511e-07, - "loss": 0.1847, - "step": 19834 - }, - { - "epoch": 1.868538187984268, - "grad_norm": 0.769565224647522, - "learning_rate": 2.2104273242712138e-07, - "loss": 0.2013, - "step": 19835 - }, - { - "epoch": 1.8686323920774357, - "grad_norm": 0.7009322047233582, - "learning_rate": 2.2072710946726693e-07, - "loss": 0.1743, - "step": 19836 - }, - { - "epoch": 1.8687265961706037, - "grad_norm": 0.6942918300628662, - "learning_rate": 2.2041170948995827e-07, - "loss": 0.1651, - "step": 19837 - }, - { - "epoch": 1.8688208002637716, - "grad_norm": 0.6284037828445435, - "learning_rate": 2.2009653250238738e-07, - "loss": 0.1899, - "step": 19838 - }, - { - "epoch": 1.8689150043569394, - "grad_norm": 0.6660770773887634, - "learning_rate": 2.1978157851174076e-07, - "loss": 0.1821, - "step": 19839 - }, - { - "epoch": 1.869009208450107, - "grad_norm": 0.626297116279602, - "learning_rate": 2.194668475251982e-07, - "loss": 0.1937, - "step": 19840 - }, - { - "epoch": 1.869103412543275, - "grad_norm": 0.6980593800544739, - "learning_rate": 2.1915233954993843e-07, - "loss": 0.1972, - "step": 19841 - }, - { - "epoch": 1.869197616636443, - "grad_norm": 0.6938313841819763, - "learning_rate": 2.1883805459313122e-07, - "loss": 0.2205, - "step": 19842 - }, - { - "epoch": 1.8692918207296108, - "grad_norm": 0.6362899541854858, - "learning_rate": 2.1852399266194312e-07, - "loss": 0.1878, - "step": 19843 - }, - { - "epoch": 1.8693860248227785, - "grad_norm": 0.850473165512085, - "learning_rate": 2.1821015376353728e-07, - "loss": 0.1885, - "step": 19844 - }, - { - "epoch": 1.8694802289159465, - "grad_norm": 0.5963436365127563, - "learning_rate": 2.178965379050657e-07, - "loss": 0.1531, - "step": 19845 - }, - { - "epoch": 1.8695744330091144, - "grad_norm": 0.7101747989654541, - "learning_rate": 2.1758314509368163e-07, - "loss": 0.1954, - "step": 19846 - }, - { - "epoch": 1.8696686371022821, - "grad_norm": 0.5779017210006714, - "learning_rate": 2.1726997533653149e-07, - "loss": 0.1963, - "step": 19847 - }, - { - "epoch": 1.8697628411954499, - "grad_norm": 0.683344304561615, - "learning_rate": 2.1695702864075408e-07, - "loss": 0.2018, - "step": 19848 - }, - { - "epoch": 1.8698570452886178, - "grad_norm": 0.6608116030693054, - "learning_rate": 2.166443050134859e-07, - "loss": 0.2196, - "step": 19849 - }, - { - "epoch": 1.8699512493817856, - "grad_norm": 0.6488050818443298, - "learning_rate": 2.1633180446185897e-07, - "loss": 0.1804, - "step": 19850 - }, - { - "epoch": 1.8700454534749533, - "grad_norm": 0.6669037938117981, - "learning_rate": 2.1601952699299877e-07, - "loss": 0.2198, - "step": 19851 - }, - { - "epoch": 1.8701396575681213, - "grad_norm": 0.6127009391784668, - "learning_rate": 2.157074726140218e-07, - "loss": 0.1847, - "step": 19852 - }, - { - "epoch": 1.8702338616612892, - "grad_norm": 0.6842362880706787, - "learning_rate": 2.1539564133204793e-07, - "loss": 0.2067, - "step": 19853 - }, - { - "epoch": 1.870328065754457, - "grad_norm": 0.6373916864395142, - "learning_rate": 2.1508403315418592e-07, - "loss": 0.1647, - "step": 19854 - }, - { - "epoch": 1.8704222698476247, - "grad_norm": 0.6885888576507568, - "learning_rate": 2.1477264808753896e-07, - "loss": 0.2138, - "step": 19855 - }, - { - "epoch": 1.8705164739407927, - "grad_norm": 0.6469976305961609, - "learning_rate": 2.1446148613920914e-07, - "loss": 0.1906, - "step": 19856 - }, - { - "epoch": 1.8706106780339606, - "grad_norm": 0.6703843474388123, - "learning_rate": 2.1415054731629192e-07, - "loss": 0.1879, - "step": 19857 - }, - { - "epoch": 1.8707048821271284, - "grad_norm": 0.6307921409606934, - "learning_rate": 2.1383983162587607e-07, - "loss": 0.197, - "step": 19858 - }, - { - "epoch": 1.8707990862202961, - "grad_norm": 0.6355965733528137, - "learning_rate": 2.1352933907504592e-07, - "loss": 0.2153, - "step": 19859 - }, - { - "epoch": 1.870893290313464, - "grad_norm": 0.6926652193069458, - "learning_rate": 2.1321906967088356e-07, - "loss": 0.2309, - "step": 19860 - }, - { - "epoch": 1.870987494406632, - "grad_norm": 0.7311079502105713, - "learning_rate": 2.1290902342046006e-07, - "loss": 0.2102, - "step": 19861 - }, - { - "epoch": 1.8710816984997998, - "grad_norm": 0.6396556496620178, - "learning_rate": 2.1259920033084746e-07, - "loss": 0.1748, - "step": 19862 - }, - { - "epoch": 1.8711759025929675, - "grad_norm": 0.6666446924209595, - "learning_rate": 2.1228960040911128e-07, - "loss": 0.1977, - "step": 19863 - }, - { - "epoch": 1.8712701066861355, - "grad_norm": 0.6239649057388306, - "learning_rate": 2.1198022366230698e-07, - "loss": 0.1886, - "step": 19864 - }, - { - "epoch": 1.8713643107793034, - "grad_norm": 0.5872995257377625, - "learning_rate": 2.1167107009749223e-07, - "loss": 0.1899, - "step": 19865 - }, - { - "epoch": 1.8714585148724712, - "grad_norm": 0.67463219165802, - "learning_rate": 2.113621397217147e-07, - "loss": 0.2007, - "step": 19866 - }, - { - "epoch": 1.871552718965639, - "grad_norm": 0.6818503737449646, - "learning_rate": 2.1105343254201772e-07, - "loss": 0.1918, - "step": 19867 - }, - { - "epoch": 1.8716469230588069, - "grad_norm": 0.5923857092857361, - "learning_rate": 2.1074494856544335e-07, - "loss": 0.1603, - "step": 19868 - }, - { - "epoch": 1.8717411271519748, - "grad_norm": 0.6048617362976074, - "learning_rate": 2.1043668779902272e-07, - "loss": 0.1812, - "step": 19869 - }, - { - "epoch": 1.8718353312451426, - "grad_norm": 0.6945805549621582, - "learning_rate": 2.1012865024978458e-07, - "loss": 0.1966, - "step": 19870 - }, - { - "epoch": 1.8719295353383103, - "grad_norm": 0.6653010249137878, - "learning_rate": 2.0982083592475443e-07, - "loss": 0.1866, - "step": 19871 - }, - { - "epoch": 1.8720237394314783, - "grad_norm": 0.6669188141822815, - "learning_rate": 2.0951324483095003e-07, - "loss": 0.2064, - "step": 19872 - }, - { - "epoch": 1.8721179435246462, - "grad_norm": 0.8476389646530151, - "learning_rate": 2.0920587697538464e-07, - "loss": 0.2292, - "step": 19873 - }, - { - "epoch": 1.872212147617814, - "grad_norm": 0.624190628528595, - "learning_rate": 2.0889873236506707e-07, - "loss": 0.2114, - "step": 19874 - }, - { - "epoch": 1.8723063517109817, - "grad_norm": 0.6130045652389526, - "learning_rate": 2.0859181100700175e-07, - "loss": 0.1713, - "step": 19875 - }, - { - "epoch": 1.8724005558041497, - "grad_norm": 0.7087114453315735, - "learning_rate": 2.082851129081842e-07, - "loss": 0.2217, - "step": 19876 - }, - { - "epoch": 1.8724947598973176, - "grad_norm": 1.2582696676254272, - "learning_rate": 2.0797863807560992e-07, - "loss": 0.2109, - "step": 19877 - }, - { - "epoch": 1.8725889639904854, - "grad_norm": 0.6868299841880798, - "learning_rate": 2.0767238651626664e-07, - "loss": 0.1885, - "step": 19878 - }, - { - "epoch": 1.872683168083653, - "grad_norm": 0.7186660170555115, - "learning_rate": 2.0736635823713657e-07, - "loss": 0.1858, - "step": 19879 - }, - { - "epoch": 1.872777372176821, - "grad_norm": 0.70173180103302, - "learning_rate": 2.0706055324519748e-07, - "loss": 0.193, - "step": 19880 - }, - { - "epoch": 1.872871576269989, - "grad_norm": 0.5818378329277039, - "learning_rate": 2.0675497154742374e-07, - "loss": 0.1686, - "step": 19881 - }, - { - "epoch": 1.8729657803631568, - "grad_norm": 0.6666647791862488, - "learning_rate": 2.0644961315078205e-07, - "loss": 0.1864, - "step": 19882 - }, - { - "epoch": 1.8730599844563245, - "grad_norm": 0.7315630316734314, - "learning_rate": 2.061444780622346e-07, - "loss": 0.1925, - "step": 19883 - }, - { - "epoch": 1.8731541885494924, - "grad_norm": 0.6360408663749695, - "learning_rate": 2.0583956628874137e-07, - "loss": 0.1837, - "step": 19884 - }, - { - "epoch": 1.8732483926426604, - "grad_norm": 0.6370952129364014, - "learning_rate": 2.0553487783725122e-07, - "loss": 0.1782, - "step": 19885 - }, - { - "epoch": 1.8733425967358281, - "grad_norm": 0.6465074419975281, - "learning_rate": 2.0523041271471312e-07, - "loss": 0.184, - "step": 19886 - }, - { - "epoch": 1.8734368008289959, - "grad_norm": 0.6361848711967468, - "learning_rate": 2.0492617092807142e-07, - "loss": 0.1932, - "step": 19887 - }, - { - "epoch": 1.8735310049221638, - "grad_norm": 0.6392167806625366, - "learning_rate": 2.0462215248426066e-07, - "loss": 0.161, - "step": 19888 - }, - { - "epoch": 1.8736252090153318, - "grad_norm": 0.6628153324127197, - "learning_rate": 2.043183573902119e-07, - "loss": 0.1837, - "step": 19889 - }, - { - "epoch": 1.8737194131084995, - "grad_norm": 0.6620554327964783, - "learning_rate": 2.0401478565285626e-07, - "loss": 0.1989, - "step": 19890 - }, - { - "epoch": 1.8738136172016673, - "grad_norm": 0.6533010005950928, - "learning_rate": 2.037114372791127e-07, - "loss": 0.1882, - "step": 19891 - }, - { - "epoch": 1.8739078212948352, - "grad_norm": 0.6901665329933167, - "learning_rate": 2.0340831227589675e-07, - "loss": 0.193, - "step": 19892 - }, - { - "epoch": 1.8740020253880032, - "grad_norm": 0.6867063045501709, - "learning_rate": 2.0310541065012401e-07, - "loss": 0.19, - "step": 19893 - }, - { - "epoch": 1.874096229481171, - "grad_norm": 0.6477521657943726, - "learning_rate": 2.0280273240869896e-07, - "loss": 0.18, - "step": 19894 - }, - { - "epoch": 1.8741904335743387, - "grad_norm": 0.6686272621154785, - "learning_rate": 2.0250027755852274e-07, - "loss": 0.1796, - "step": 19895 - }, - { - "epoch": 1.8742846376675066, - "grad_norm": 0.6758459806442261, - "learning_rate": 2.0219804610649208e-07, - "loss": 0.1974, - "step": 19896 - }, - { - "epoch": 1.8743788417606746, - "grad_norm": 0.6487590670585632, - "learning_rate": 2.018960380594992e-07, - "loss": 0.2006, - "step": 19897 - }, - { - "epoch": 1.8744730458538423, - "grad_norm": 0.6570483446121216, - "learning_rate": 2.0159425342442972e-07, - "loss": 0.1605, - "step": 19898 - }, - { - "epoch": 1.87456724994701, - "grad_norm": 0.6574012041091919, - "learning_rate": 2.0129269220816372e-07, - "loss": 0.2001, - "step": 19899 - }, - { - "epoch": 1.874661454040178, - "grad_norm": 0.6349261999130249, - "learning_rate": 2.00991354417579e-07, - "loss": 0.1656, - "step": 19900 - }, - { - "epoch": 1.874755658133346, - "grad_norm": 0.6339261531829834, - "learning_rate": 2.0069024005954562e-07, - "loss": 0.2064, - "step": 19901 - }, - { - "epoch": 1.8748498622265137, - "grad_norm": 0.6916906833648682, - "learning_rate": 2.0038934914093033e-07, - "loss": 0.2335, - "step": 19902 - }, - { - "epoch": 1.8749440663196815, - "grad_norm": 0.7444424033164978, - "learning_rate": 2.000886816685932e-07, - "loss": 0.2186, - "step": 19903 - }, - { - "epoch": 1.8750382704128494, - "grad_norm": 0.7623520493507385, - "learning_rate": 1.9978823764938983e-07, - "loss": 0.1852, - "step": 19904 - }, - { - "epoch": 1.8751324745060174, - "grad_norm": 0.7236540913581848, - "learning_rate": 1.994880170901714e-07, - "loss": 0.1911, - "step": 19905 - }, - { - "epoch": 1.8752266785991851, - "grad_norm": 0.6486384272575378, - "learning_rate": 1.9918801999778137e-07, - "loss": 0.2081, - "step": 19906 - }, - { - "epoch": 1.8753208826923529, - "grad_norm": 0.6325013041496277, - "learning_rate": 1.9888824637906312e-07, - "loss": 0.1813, - "step": 19907 - }, - { - "epoch": 1.8754150867855208, - "grad_norm": 0.6888840794563293, - "learning_rate": 1.9858869624085007e-07, - "loss": 0.1898, - "step": 19908 - }, - { - "epoch": 1.8755092908786888, - "grad_norm": 0.6658682823181152, - "learning_rate": 1.9828936958997348e-07, - "loss": 0.1689, - "step": 19909 - }, - { - "epoch": 1.8756034949718565, - "grad_norm": 0.684428870677948, - "learning_rate": 1.979902664332567e-07, - "loss": 0.2025, - "step": 19910 - }, - { - "epoch": 1.8756976990650243, - "grad_norm": 0.7188761234283447, - "learning_rate": 1.9769138677752208e-07, - "loss": 0.1983, - "step": 19911 - }, - { - "epoch": 1.8757919031581922, - "grad_norm": 0.6171512007713318, - "learning_rate": 1.9739273062958307e-07, - "loss": 0.1802, - "step": 19912 - }, - { - "epoch": 1.8758861072513602, - "grad_norm": 0.6133853197097778, - "learning_rate": 1.970942979962509e-07, - "loss": 0.1807, - "step": 19913 - }, - { - "epoch": 1.875980311344528, - "grad_norm": 0.6813076138496399, - "learning_rate": 1.9679608888432787e-07, - "loss": 0.1673, - "step": 19914 - }, - { - "epoch": 1.8760745154376957, - "grad_norm": 0.6362394094467163, - "learning_rate": 1.9649810330061524e-07, - "loss": 0.2137, - "step": 19915 - }, - { - "epoch": 1.8761687195308636, - "grad_norm": 0.6668578386306763, - "learning_rate": 1.9620034125190645e-07, - "loss": 0.1992, - "step": 19916 - }, - { - "epoch": 1.8762629236240316, - "grad_norm": 0.6715618968009949, - "learning_rate": 1.9590280274499274e-07, - "loss": 0.2102, - "step": 19917 - }, - { - "epoch": 1.8763571277171993, - "grad_norm": 0.6445623636245728, - "learning_rate": 1.9560548778665755e-07, - "loss": 0.1746, - "step": 19918 - }, - { - "epoch": 1.876451331810367, - "grad_norm": 0.6466159224510193, - "learning_rate": 1.9530839638367993e-07, - "loss": 0.2126, - "step": 19919 - }, - { - "epoch": 1.876545535903535, - "grad_norm": 0.673216700553894, - "learning_rate": 1.9501152854283333e-07, - "loss": 0.2381, - "step": 19920 - }, - { - "epoch": 1.876639739996703, - "grad_norm": 0.6047160029411316, - "learning_rate": 1.9471488427088903e-07, - "loss": 0.1863, - "step": 19921 - }, - { - "epoch": 1.8767339440898707, - "grad_norm": 0.6081609129905701, - "learning_rate": 1.9441846357460825e-07, - "loss": 0.1868, - "step": 19922 - }, - { - "epoch": 1.8768281481830384, - "grad_norm": 0.6987688541412354, - "learning_rate": 1.9412226646075115e-07, - "loss": 0.1822, - "step": 19923 - }, - { - "epoch": 1.8769223522762064, - "grad_norm": 0.7152296304702759, - "learning_rate": 1.9382629293607237e-07, - "loss": 0.19, - "step": 19924 - }, - { - "epoch": 1.8770165563693744, - "grad_norm": 0.6419005393981934, - "learning_rate": 1.9353054300731865e-07, - "loss": 0.1771, - "step": 19925 - }, - { - "epoch": 1.877110760462542, - "grad_norm": 0.7111563682556152, - "learning_rate": 1.9323501668123467e-07, - "loss": 0.2131, - "step": 19926 - }, - { - "epoch": 1.8772049645557098, - "grad_norm": 0.6358315348625183, - "learning_rate": 1.9293971396455945e-07, - "loss": 0.1886, - "step": 19927 - }, - { - "epoch": 1.8772991686488778, - "grad_norm": 0.7015502452850342, - "learning_rate": 1.926446348640265e-07, - "loss": 0.1877, - "step": 19928 - }, - { - "epoch": 1.8773933727420458, - "grad_norm": 0.745125412940979, - "learning_rate": 1.9234977938636046e-07, - "loss": 0.1983, - "step": 19929 - }, - { - "epoch": 1.8774875768352135, - "grad_norm": 0.7070435285568237, - "learning_rate": 1.9205514753828924e-07, - "loss": 0.2048, - "step": 19930 - }, - { - "epoch": 1.8775817809283812, - "grad_norm": 0.6494175791740417, - "learning_rate": 1.9176073932652972e-07, - "loss": 0.2071, - "step": 19931 - }, - { - "epoch": 1.8776759850215492, - "grad_norm": 0.6327586770057678, - "learning_rate": 1.9146655475779207e-07, - "loss": 0.2104, - "step": 19932 - }, - { - "epoch": 1.8777701891147172, - "grad_norm": 0.6279453635215759, - "learning_rate": 1.911725938387865e-07, - "loss": 0.191, - "step": 19933 - }, - { - "epoch": 1.877864393207885, - "grad_norm": 0.6193822026252747, - "learning_rate": 1.908788565762165e-07, - "loss": 0.1915, - "step": 19934 - }, - { - "epoch": 1.8779585973010526, - "grad_norm": 0.6685409545898438, - "learning_rate": 1.9058534297677789e-07, - "loss": 0.1846, - "step": 19935 - }, - { - "epoch": 1.8780528013942206, - "grad_norm": 0.6512874364852905, - "learning_rate": 1.9029205304716415e-07, - "loss": 0.2003, - "step": 19936 - }, - { - "epoch": 1.8781470054873886, - "grad_norm": 0.6568180918693542, - "learning_rate": 1.8999898679406325e-07, - "loss": 0.1967, - "step": 19937 - }, - { - "epoch": 1.8782412095805563, - "grad_norm": 0.6729118824005127, - "learning_rate": 1.8970614422415656e-07, - "loss": 0.1732, - "step": 19938 - }, - { - "epoch": 1.878335413673724, - "grad_norm": 0.6299525499343872, - "learning_rate": 1.8941352534412095e-07, - "loss": 0.1785, - "step": 19939 - }, - { - "epoch": 1.878429617766892, - "grad_norm": 0.6564587950706482, - "learning_rate": 1.8912113016062994e-07, - "loss": 0.2139, - "step": 19940 - }, - { - "epoch": 1.87852382186006, - "grad_norm": 0.5952286124229431, - "learning_rate": 1.8882895868035045e-07, - "loss": 0.1631, - "step": 19941 - }, - { - "epoch": 1.8786180259532277, - "grad_norm": 0.6052758693695068, - "learning_rate": 1.885370109099427e-07, - "loss": 0.185, - "step": 19942 - }, - { - "epoch": 1.8787122300463954, - "grad_norm": 0.6648350358009338, - "learning_rate": 1.882452868560658e-07, - "loss": 0.2097, - "step": 19943 - }, - { - "epoch": 1.8788064341395634, - "grad_norm": 0.6911534070968628, - "learning_rate": 1.8795378652537e-07, - "loss": 0.2143, - "step": 19944 - }, - { - "epoch": 1.8789006382327313, - "grad_norm": 0.9408258199691772, - "learning_rate": 1.876625099245033e-07, - "loss": 0.1842, - "step": 19945 - }, - { - "epoch": 1.878994842325899, - "grad_norm": 0.6716334819793701, - "learning_rate": 1.8737145706010594e-07, - "loss": 0.1649, - "step": 19946 - }, - { - "epoch": 1.8790890464190668, - "grad_norm": 0.7083847522735596, - "learning_rate": 1.8708062793881487e-07, - "loss": 0.2114, - "step": 19947 - }, - { - "epoch": 1.8791832505122348, - "grad_norm": 0.682701587677002, - "learning_rate": 1.8679002256726143e-07, - "loss": 0.1999, - "step": 19948 - }, - { - "epoch": 1.8792774546054027, - "grad_norm": 0.6334688663482666, - "learning_rate": 1.864996409520714e-07, - "loss": 0.1749, - "step": 19949 - }, - { - "epoch": 1.8793716586985705, - "grad_norm": 0.8308340907096863, - "learning_rate": 1.862094830998673e-07, - "loss": 0.2284, - "step": 19950 - }, - { - "epoch": 1.8794658627917382, - "grad_norm": 0.6952033042907715, - "learning_rate": 1.8591954901726385e-07, - "loss": 0.2064, - "step": 19951 - }, - { - "epoch": 1.8795600668849062, - "grad_norm": 0.6250718832015991, - "learning_rate": 1.8562983871087237e-07, - "loss": 0.1716, - "step": 19952 - }, - { - "epoch": 1.8796542709780741, - "grad_norm": 0.6349963545799255, - "learning_rate": 1.8534035218729984e-07, - "loss": 0.191, - "step": 19953 - }, - { - "epoch": 1.8797484750712419, - "grad_norm": 0.6657272577285767, - "learning_rate": 1.8505108945314432e-07, - "loss": 0.1768, - "step": 19954 - }, - { - "epoch": 1.8798426791644096, - "grad_norm": 0.6782479882240295, - "learning_rate": 1.8476205051500495e-07, - "loss": 0.2027, - "step": 19955 - }, - { - "epoch": 1.8799368832575776, - "grad_norm": 0.6128109097480774, - "learning_rate": 1.8447323537946871e-07, - "loss": 0.1593, - "step": 19956 - }, - { - "epoch": 1.8800310873507455, - "grad_norm": 0.8520538210868835, - "learning_rate": 1.8418464405312253e-07, - "loss": 0.2047, - "step": 19957 - }, - { - "epoch": 1.8801252914439133, - "grad_norm": 0.624204695224762, - "learning_rate": 1.8389627654254894e-07, - "loss": 0.1936, - "step": 19958 - }, - { - "epoch": 1.880219495537081, - "grad_norm": 0.6802290678024292, - "learning_rate": 1.8360813285431932e-07, - "loss": 0.195, - "step": 19959 - }, - { - "epoch": 1.880313699630249, - "grad_norm": 0.686010479927063, - "learning_rate": 1.8332021299500513e-07, - "loss": 0.1886, - "step": 19960 - }, - { - "epoch": 1.880407903723417, - "grad_norm": 0.6636178493499756, - "learning_rate": 1.830325169711744e-07, - "loss": 0.2212, - "step": 19961 - }, - { - "epoch": 1.8805021078165847, - "grad_norm": 0.6198559403419495, - "learning_rate": 1.8274504478938194e-07, - "loss": 0.1818, - "step": 19962 - }, - { - "epoch": 1.8805963119097524, - "grad_norm": 0.6393874287605286, - "learning_rate": 1.8245779645618577e-07, - "loss": 0.2007, - "step": 19963 - }, - { - "epoch": 1.8806905160029204, - "grad_norm": 0.6653652787208557, - "learning_rate": 1.8217077197813627e-07, - "loss": 0.2078, - "step": 19964 - }, - { - "epoch": 1.8807847200960883, - "grad_norm": 0.6419366002082825, - "learning_rate": 1.8188397136177594e-07, - "loss": 0.1905, - "step": 19965 - }, - { - "epoch": 1.880878924189256, - "grad_norm": 0.6758688688278198, - "learning_rate": 1.81597394613644e-07, - "loss": 0.1839, - "step": 19966 - }, - { - "epoch": 1.8809731282824238, - "grad_norm": 0.7126138210296631, - "learning_rate": 1.8131104174027746e-07, - "loss": 0.2255, - "step": 19967 - }, - { - "epoch": 1.8810673323755918, - "grad_norm": 0.7680550217628479, - "learning_rate": 1.8102491274820444e-07, - "loss": 0.2362, - "step": 19968 - }, - { - "epoch": 1.8811615364687597, - "grad_norm": 0.6370869874954224, - "learning_rate": 1.8073900764394636e-07, - "loss": 0.1744, - "step": 19969 - }, - { - "epoch": 1.8812557405619275, - "grad_norm": 0.7094757556915283, - "learning_rate": 1.804533264340269e-07, - "loss": 0.1835, - "step": 19970 - }, - { - "epoch": 1.8813499446550952, - "grad_norm": 0.6785231828689575, - "learning_rate": 1.8016786912495753e-07, - "loss": 0.2028, - "step": 19971 - }, - { - "epoch": 1.8814441487482632, - "grad_norm": 0.7784723043441772, - "learning_rate": 1.7988263572324527e-07, - "loss": 0.2338, - "step": 19972 - }, - { - "epoch": 1.8815383528414311, - "grad_norm": 0.651883602142334, - "learning_rate": 1.7959762623539822e-07, - "loss": 0.2161, - "step": 19973 - }, - { - "epoch": 1.8816325569345989, - "grad_norm": 0.5864567756652832, - "learning_rate": 1.7931284066791345e-07, - "loss": 0.1738, - "step": 19974 - }, - { - "epoch": 1.8817267610277666, - "grad_norm": 0.6762986779212952, - "learning_rate": 1.7902827902728236e-07, - "loss": 0.1753, - "step": 19975 - }, - { - "epoch": 1.8818209651209346, - "grad_norm": 0.6702238321304321, - "learning_rate": 1.7874394131999428e-07, - "loss": 0.1954, - "step": 19976 - }, - { - "epoch": 1.8819151692141025, - "grad_norm": 0.663995623588562, - "learning_rate": 1.784598275525351e-07, - "loss": 0.1939, - "step": 19977 - }, - { - "epoch": 1.8820093733072703, - "grad_norm": 0.702885627746582, - "learning_rate": 1.7817593773137965e-07, - "loss": 0.2107, - "step": 19978 - }, - { - "epoch": 1.882103577400438, - "grad_norm": 0.6725521683692932, - "learning_rate": 1.7789227186300274e-07, - "loss": 0.2048, - "step": 19979 - }, - { - "epoch": 1.882197781493606, - "grad_norm": 0.7834375500679016, - "learning_rate": 1.7760882995387363e-07, - "loss": 0.2052, - "step": 19980 - }, - { - "epoch": 1.882291985586774, - "grad_norm": 0.6639535427093506, - "learning_rate": 1.7732561201045274e-07, - "loss": 0.1986, - "step": 19981 - }, - { - "epoch": 1.8823861896799416, - "grad_norm": 0.6540392637252808, - "learning_rate": 1.770426180391982e-07, - "loss": 0.1797, - "step": 19982 - }, - { - "epoch": 1.8824803937731094, - "grad_norm": 0.5807163119316101, - "learning_rate": 1.7675984804656488e-07, - "loss": 0.167, - "step": 19983 - }, - { - "epoch": 1.8825745978662773, - "grad_norm": 0.7048665881156921, - "learning_rate": 1.764773020389987e-07, - "loss": 0.2171, - "step": 19984 - }, - { - "epoch": 1.8826688019594453, - "grad_norm": 0.7323140501976013, - "learning_rate": 1.7619498002294234e-07, - "loss": 0.2208, - "step": 19985 - }, - { - "epoch": 1.8827630060526128, - "grad_norm": 0.6514236330986023, - "learning_rate": 1.7591288200483282e-07, - "loss": 0.2085, - "step": 19986 - }, - { - "epoch": 1.8828572101457808, - "grad_norm": 0.6266919374465942, - "learning_rate": 1.756310079911028e-07, - "loss": 0.1982, - "step": 19987 - }, - { - "epoch": 1.8829514142389487, - "grad_norm": 0.6137820482254028, - "learning_rate": 1.7534935798818042e-07, - "loss": 0.1874, - "step": 19988 - }, - { - "epoch": 1.8830456183321165, - "grad_norm": 0.6909066438674927, - "learning_rate": 1.7506793200248507e-07, - "loss": 0.1914, - "step": 19989 - }, - { - "epoch": 1.8831398224252842, - "grad_norm": 0.5796274542808533, - "learning_rate": 1.74786730040436e-07, - "loss": 0.2058, - "step": 19990 - }, - { - "epoch": 1.8832340265184522, - "grad_norm": 0.6112196445465088, - "learning_rate": 1.7450575210844588e-07, - "loss": 0.1845, - "step": 19991 - }, - { - "epoch": 1.8833282306116201, - "grad_norm": 0.6688932180404663, - "learning_rate": 1.742249982129185e-07, - "loss": 0.202, - "step": 19992 - }, - { - "epoch": 1.8834224347047879, - "grad_norm": 0.9059047698974609, - "learning_rate": 1.7394446836025758e-07, - "loss": 0.1984, - "step": 19993 - }, - { - "epoch": 1.8835166387979556, - "grad_norm": 0.7394698858261108, - "learning_rate": 1.7366416255685914e-07, - "loss": 0.1916, - "step": 19994 - }, - { - "epoch": 1.8836108428911236, - "grad_norm": 0.6489742994308472, - "learning_rate": 1.7338408080911473e-07, - "loss": 0.1968, - "step": 19995 - }, - { - "epoch": 1.8837050469842915, - "grad_norm": 0.6854963302612305, - "learning_rate": 1.7310422312341035e-07, - "loss": 0.2127, - "step": 19996 - }, - { - "epoch": 1.8837992510774593, - "grad_norm": 0.679210901260376, - "learning_rate": 1.7282458950612646e-07, - "loss": 0.1826, - "step": 19997 - }, - { - "epoch": 1.883893455170627, - "grad_norm": 0.7623416185379028, - "learning_rate": 1.7254517996364127e-07, - "loss": 0.2065, - "step": 19998 - }, - { - "epoch": 1.883987659263795, - "grad_norm": 0.7248129844665527, - "learning_rate": 1.7226599450232306e-07, - "loss": 0.1895, - "step": 19999 - }, - { - "epoch": 1.884081863356963, - "grad_norm": 0.6562262177467346, - "learning_rate": 1.7198703312853892e-07, - "loss": 0.1733, - "step": 20000 - }, - { - "epoch": 1.8841760674501307, - "grad_norm": 0.6889053583145142, - "learning_rate": 1.7170829584865155e-07, - "loss": 0.1781, - "step": 20001 - }, - { - "epoch": 1.8842702715432984, - "grad_norm": 0.6888020038604736, - "learning_rate": 1.7142978266901367e-07, - "loss": 0.204, - "step": 20002 - }, - { - "epoch": 1.8843644756364664, - "grad_norm": 0.608525812625885, - "learning_rate": 1.711514935959768e-07, - "loss": 0.1892, - "step": 20003 - }, - { - "epoch": 1.8844586797296343, - "grad_norm": 0.6745103597640991, - "learning_rate": 1.7087342863588708e-07, - "loss": 0.1947, - "step": 20004 - }, - { - "epoch": 1.884552883822802, - "grad_norm": 0.6390058398246765, - "learning_rate": 1.705955877950838e-07, - "loss": 0.1909, - "step": 20005 - }, - { - "epoch": 1.8846470879159698, - "grad_norm": 0.6507443785667419, - "learning_rate": 1.703179710799019e-07, - "loss": 0.1795, - "step": 20006 - }, - { - "epoch": 1.8847412920091378, - "grad_norm": 0.7287372946739197, - "learning_rate": 1.70040578496673e-07, - "loss": 0.1581, - "step": 20007 - }, - { - "epoch": 1.8848354961023057, - "grad_norm": 0.6483942866325378, - "learning_rate": 1.6976341005172204e-07, - "loss": 0.1877, - "step": 20008 - }, - { - "epoch": 1.8849297001954735, - "grad_norm": 0.6140819787979126, - "learning_rate": 1.694864657513662e-07, - "loss": 0.1849, - "step": 20009 - }, - { - "epoch": 1.8850239042886412, - "grad_norm": 0.6788772940635681, - "learning_rate": 1.692097456019226e-07, - "loss": 0.2083, - "step": 20010 - }, - { - "epoch": 1.8851181083818092, - "grad_norm": 0.6177812218666077, - "learning_rate": 1.689332496097018e-07, - "loss": 0.1967, - "step": 20011 - }, - { - "epoch": 1.8852123124749771, - "grad_norm": 0.6948332190513611, - "learning_rate": 1.6865697778100543e-07, - "loss": 0.2067, - "step": 20012 - }, - { - "epoch": 1.8853065165681449, - "grad_norm": 0.6861203908920288, - "learning_rate": 1.6838093012213507e-07, - "loss": 0.1807, - "step": 20013 - }, - { - "epoch": 1.8854007206613126, - "grad_norm": 0.6214278936386108, - "learning_rate": 1.6810510663938462e-07, - "loss": 0.1668, - "step": 20014 - }, - { - "epoch": 1.8854949247544806, - "grad_norm": 0.6081794500350952, - "learning_rate": 1.6782950733904235e-07, - "loss": 0.1663, - "step": 20015 - }, - { - "epoch": 1.8855891288476485, - "grad_norm": 0.6765573620796204, - "learning_rate": 1.6755413222739324e-07, - "loss": 0.2058, - "step": 20016 - }, - { - "epoch": 1.8856833329408162, - "grad_norm": 0.6488627195358276, - "learning_rate": 1.6727898131071673e-07, - "loss": 0.1857, - "step": 20017 - }, - { - "epoch": 1.885777537033984, - "grad_norm": 0.6169323325157166, - "learning_rate": 1.6700405459528556e-07, - "loss": 0.1729, - "step": 20018 - }, - { - "epoch": 1.885871741127152, - "grad_norm": 0.6104135513305664, - "learning_rate": 1.667293520873692e-07, - "loss": 0.1904, - "step": 20019 - }, - { - "epoch": 1.88596594522032, - "grad_norm": 0.7144765853881836, - "learning_rate": 1.6645487379323143e-07, - "loss": 0.2003, - "step": 20020 - }, - { - "epoch": 1.8860601493134876, - "grad_norm": 0.6818016171455383, - "learning_rate": 1.6618061971912848e-07, - "loss": 0.226, - "step": 20021 - }, - { - "epoch": 1.8861543534066554, - "grad_norm": 0.6748316884040833, - "learning_rate": 1.6590658987131746e-07, - "loss": 0.2002, - "step": 20022 - }, - { - "epoch": 1.8862485574998233, - "grad_norm": 0.7445883750915527, - "learning_rate": 1.6563278425604456e-07, - "loss": 0.2012, - "step": 20023 - }, - { - "epoch": 1.8863427615929913, - "grad_norm": 0.6547936797142029, - "learning_rate": 1.6535920287955365e-07, - "loss": 0.1807, - "step": 20024 - }, - { - "epoch": 1.886436965686159, - "grad_norm": 0.7145929932594299, - "learning_rate": 1.6508584574808194e-07, - "loss": 0.2002, - "step": 20025 - }, - { - "epoch": 1.8865311697793268, - "grad_norm": 0.6741235256195068, - "learning_rate": 1.648127128678634e-07, - "loss": 0.1981, - "step": 20026 - }, - { - "epoch": 1.8866253738724947, - "grad_norm": 0.6544743776321411, - "learning_rate": 1.6453980424512405e-07, - "loss": 0.2128, - "step": 20027 - }, - { - "epoch": 1.8867195779656627, - "grad_norm": 0.6516964435577393, - "learning_rate": 1.6426711988609012e-07, - "loss": 0.2026, - "step": 20028 - }, - { - "epoch": 1.8868137820588304, - "grad_norm": 0.6945900917053223, - "learning_rate": 1.6399465979697548e-07, - "loss": 0.195, - "step": 20029 - }, - { - "epoch": 1.8869079861519982, - "grad_norm": 0.6723647117614746, - "learning_rate": 1.6372242398399517e-07, - "loss": 0.1887, - "step": 20030 - }, - { - "epoch": 1.8870021902451661, - "grad_norm": 0.6073116660118103, - "learning_rate": 1.6345041245335536e-07, - "loss": 0.1613, - "step": 20031 - }, - { - "epoch": 1.887096394338334, - "grad_norm": 0.8018856048583984, - "learning_rate": 1.6317862521125883e-07, - "loss": 0.2245, - "step": 20032 - }, - { - "epoch": 1.8871905984315018, - "grad_norm": 0.6451196670532227, - "learning_rate": 1.6290706226390286e-07, - "loss": 0.1764, - "step": 20033 - }, - { - "epoch": 1.8872848025246696, - "grad_norm": 0.6342352032661438, - "learning_rate": 1.6263572361747805e-07, - "loss": 0.1632, - "step": 20034 - }, - { - "epoch": 1.8873790066178375, - "grad_norm": 0.7167083024978638, - "learning_rate": 1.6236460927817387e-07, - "loss": 0.205, - "step": 20035 - }, - { - "epoch": 1.8874732107110055, - "grad_norm": 0.651252269744873, - "learning_rate": 1.6209371925216987e-07, - "loss": 0.1874, - "step": 20036 - }, - { - "epoch": 1.8875674148041732, - "grad_norm": 0.7464094758033752, - "learning_rate": 1.618230535456433e-07, - "loss": 0.1811, - "step": 20037 - }, - { - "epoch": 1.887661618897341, - "grad_norm": 0.6483302116394043, - "learning_rate": 1.6155261216476814e-07, - "loss": 0.1919, - "step": 20038 - }, - { - "epoch": 1.887755822990509, - "grad_norm": 0.6636731028556824, - "learning_rate": 1.612823951157072e-07, - "loss": 0.1963, - "step": 20039 - }, - { - "epoch": 1.8878500270836769, - "grad_norm": 0.6971153616905212, - "learning_rate": 1.6101240240462334e-07, - "loss": 0.1915, - "step": 20040 - }, - { - "epoch": 1.8879442311768446, - "grad_norm": 0.6511744856834412, - "learning_rate": 1.6074263403767388e-07, - "loss": 0.2154, - "step": 20041 - }, - { - "epoch": 1.8880384352700124, - "grad_norm": 0.7304086685180664, - "learning_rate": 1.6047309002100832e-07, - "loss": 0.2097, - "step": 20042 - }, - { - "epoch": 1.8881326393631803, - "grad_norm": 0.6479068398475647, - "learning_rate": 1.6020377036077395e-07, - "loss": 0.1919, - "step": 20043 - }, - { - "epoch": 1.8882268434563483, - "grad_norm": 0.6447890996932983, - "learning_rate": 1.5993467506311144e-07, - "loss": 0.1861, - "step": 20044 - }, - { - "epoch": 1.888321047549516, - "grad_norm": 0.6425917148590088, - "learning_rate": 1.5966580413415588e-07, - "loss": 0.1676, - "step": 20045 - }, - { - "epoch": 1.8884152516426838, - "grad_norm": 0.7210531830787659, - "learning_rate": 1.5939715758003794e-07, - "loss": 0.203, - "step": 20046 - }, - { - "epoch": 1.8885094557358517, - "grad_norm": 0.6781623363494873, - "learning_rate": 1.591287354068849e-07, - "loss": 0.2009, - "step": 20047 - }, - { - "epoch": 1.8886036598290197, - "grad_norm": 0.6199914216995239, - "learning_rate": 1.5886053762081522e-07, - "loss": 0.1801, - "step": 20048 - }, - { - "epoch": 1.8886978639221874, - "grad_norm": 0.6513656973838806, - "learning_rate": 1.585925642279429e-07, - "loss": 0.1806, - "step": 20049 - }, - { - "epoch": 1.8887920680153552, - "grad_norm": 1.187853455543518, - "learning_rate": 1.5832481523438304e-07, - "loss": 0.174, - "step": 20050 - }, - { - "epoch": 1.8888862721085231, - "grad_norm": 0.6098828315734863, - "learning_rate": 1.5805729064623742e-07, - "loss": 0.1808, - "step": 20051 - }, - { - "epoch": 1.888980476201691, - "grad_norm": 0.6046240329742432, - "learning_rate": 1.5778999046960452e-07, - "loss": 0.1749, - "step": 20052 - }, - { - "epoch": 1.8890746802948588, - "grad_norm": 0.6492502093315125, - "learning_rate": 1.5752291471058277e-07, - "loss": 0.1886, - "step": 20053 - }, - { - "epoch": 1.8891688843880265, - "grad_norm": 0.6995798945426941, - "learning_rate": 1.5725606337526067e-07, - "loss": 0.182, - "step": 20054 - }, - { - "epoch": 1.8892630884811945, - "grad_norm": 0.6726314425468445, - "learning_rate": 1.5698943646972109e-07, - "loss": 0.2054, - "step": 20055 - }, - { - "epoch": 1.8893572925743625, - "grad_norm": 0.6404174566268921, - "learning_rate": 1.5672303400004474e-07, - "loss": 0.1767, - "step": 20056 - }, - { - "epoch": 1.8894514966675302, - "grad_norm": 0.6526312232017517, - "learning_rate": 1.5645685597230675e-07, - "loss": 0.2123, - "step": 20057 - }, - { - "epoch": 1.889545700760698, - "grad_norm": 0.671974241733551, - "learning_rate": 1.5619090239257562e-07, - "loss": 0.2004, - "step": 20058 - }, - { - "epoch": 1.889639904853866, - "grad_norm": 1.1866815090179443, - "learning_rate": 1.5592517326691536e-07, - "loss": 0.2233, - "step": 20059 - }, - { - "epoch": 1.8897341089470339, - "grad_norm": 0.7386412024497986, - "learning_rate": 1.5565966860138671e-07, - "loss": 0.2209, - "step": 20060 - }, - { - "epoch": 1.8898283130402016, - "grad_norm": 0.7039284706115723, - "learning_rate": 1.5539438840204146e-07, - "loss": 0.2161, - "step": 20061 - }, - { - "epoch": 1.8899225171333693, - "grad_norm": 0.6055103540420532, - "learning_rate": 1.5512933267492813e-07, - "loss": 0.1669, - "step": 20062 - }, - { - "epoch": 1.8900167212265373, - "grad_norm": 0.650798499584198, - "learning_rate": 1.5486450142609298e-07, - "loss": 0.1972, - "step": 20063 - }, - { - "epoch": 1.8901109253197053, - "grad_norm": 0.6920046806335449, - "learning_rate": 1.5459989466157123e-07, - "loss": 0.1981, - "step": 20064 - }, - { - "epoch": 1.890205129412873, - "grad_norm": 0.6305274963378906, - "learning_rate": 1.543355123874002e-07, - "loss": 0.2177, - "step": 20065 - }, - { - "epoch": 1.8902993335060407, - "grad_norm": 0.6411617994308472, - "learning_rate": 1.54071354609604e-07, - "loss": 0.184, - "step": 20066 - }, - { - "epoch": 1.8903935375992087, - "grad_norm": 0.6737891435623169, - "learning_rate": 1.538074213342089e-07, - "loss": 0.1961, - "step": 20067 - }, - { - "epoch": 1.8904877416923767, - "grad_norm": 0.7235912084579468, - "learning_rate": 1.5354371256723232e-07, - "loss": 0.2219, - "step": 20068 - }, - { - "epoch": 1.8905819457855444, - "grad_norm": 0.6493980288505554, - "learning_rate": 1.5328022831468615e-07, - "loss": 0.174, - "step": 20069 - }, - { - "epoch": 1.8906761498787121, - "grad_norm": 0.6539517045021057, - "learning_rate": 1.5301696858257996e-07, - "loss": 0.1801, - "step": 20070 - }, - { - "epoch": 1.89077035397188, - "grad_norm": 0.6524957418441772, - "learning_rate": 1.527539333769146e-07, - "loss": 0.1695, - "step": 20071 - }, - { - "epoch": 1.890864558065048, - "grad_norm": 0.5983673334121704, - "learning_rate": 1.5249112270368848e-07, - "loss": 0.1808, - "step": 20072 - }, - { - "epoch": 1.8909587621582158, - "grad_norm": 0.6361520290374756, - "learning_rate": 1.522285365688958e-07, - "loss": 0.1751, - "step": 20073 - }, - { - "epoch": 1.8910529662513835, - "grad_norm": 0.5791687369346619, - "learning_rate": 1.5196617497852062e-07, - "loss": 0.1603, - "step": 20074 - }, - { - "epoch": 1.8911471703445515, - "grad_norm": 0.7468518614768982, - "learning_rate": 1.5170403793854816e-07, - "loss": 0.2091, - "step": 20075 - }, - { - "epoch": 1.8912413744377194, - "grad_norm": 0.6266886591911316, - "learning_rate": 1.5144212545495362e-07, - "loss": 0.2033, - "step": 20076 - }, - { - "epoch": 1.8913355785308872, - "grad_norm": 0.6483442783355713, - "learning_rate": 1.5118043753371004e-07, - "loss": 0.1796, - "step": 20077 - }, - { - "epoch": 1.891429782624055, - "grad_norm": 0.6359021067619324, - "learning_rate": 1.5091897418078482e-07, - "loss": 0.2068, - "step": 20078 - }, - { - "epoch": 1.8915239867172229, - "grad_norm": 0.6496128439903259, - "learning_rate": 1.5065773540213658e-07, - "loss": 0.1739, - "step": 20079 - }, - { - "epoch": 1.8916181908103908, - "grad_norm": 0.5661174654960632, - "learning_rate": 1.5039672120372605e-07, - "loss": 0.1427, - "step": 20080 - }, - { - "epoch": 1.8917123949035586, - "grad_norm": 0.6317970752716064, - "learning_rate": 1.5013593159150296e-07, - "loss": 0.1852, - "step": 20081 - }, - { - "epoch": 1.8918065989967263, - "grad_norm": 0.671819806098938, - "learning_rate": 1.4987536657141254e-07, - "loss": 0.2126, - "step": 20082 - }, - { - "epoch": 1.8919008030898943, - "grad_norm": 0.5922439098358154, - "learning_rate": 1.4961502614939781e-07, - "loss": 0.1864, - "step": 20083 - }, - { - "epoch": 1.8919950071830622, - "grad_norm": 0.6616160273551941, - "learning_rate": 1.4935491033139515e-07, - "loss": 0.1833, - "step": 20084 - }, - { - "epoch": 1.89208921127623, - "grad_norm": 0.6308454871177673, - "learning_rate": 1.4909501912333534e-07, - "loss": 0.1751, - "step": 20085 - }, - { - "epoch": 1.8921834153693977, - "grad_norm": 0.6988815665245056, - "learning_rate": 1.4883535253114146e-07, - "loss": 0.1915, - "step": 20086 - }, - { - "epoch": 1.8922776194625657, - "grad_norm": 0.7224921584129333, - "learning_rate": 1.4857591056073873e-07, - "loss": 0.2116, - "step": 20087 - }, - { - "epoch": 1.8923718235557336, - "grad_norm": 0.6897280216217041, - "learning_rate": 1.4831669321804022e-07, - "loss": 0.184, - "step": 20088 - }, - { - "epoch": 1.8924660276489014, - "grad_norm": 0.6268225312232971, - "learning_rate": 1.4805770050895561e-07, - "loss": 0.1891, - "step": 20089 - }, - { - "epoch": 1.892560231742069, - "grad_norm": 0.7011879682540894, - "learning_rate": 1.4779893243939358e-07, - "loss": 0.1736, - "step": 20090 - }, - { - "epoch": 1.892654435835237, - "grad_norm": 0.6202412247657776, - "learning_rate": 1.475403890152527e-07, - "loss": 0.2048, - "step": 20091 - }, - { - "epoch": 1.892748639928405, - "grad_norm": 0.6074902415275574, - "learning_rate": 1.4728207024242601e-07, - "loss": 0.1935, - "step": 20092 - }, - { - "epoch": 1.8928428440215728, - "grad_norm": 0.6898573637008667, - "learning_rate": 1.4702397612680774e-07, - "loss": 0.2253, - "step": 20093 - }, - { - "epoch": 1.8929370481147405, - "grad_norm": 0.5918618440628052, - "learning_rate": 1.4676610667428094e-07, - "loss": 0.1476, - "step": 20094 - }, - { - "epoch": 1.8930312522079085, - "grad_norm": 0.730482280254364, - "learning_rate": 1.465084618907231e-07, - "loss": 0.2003, - "step": 20095 - }, - { - "epoch": 1.8931254563010764, - "grad_norm": 0.7516427040100098, - "learning_rate": 1.462510417820129e-07, - "loss": 0.2024, - "step": 20096 - }, - { - "epoch": 1.8932196603942442, - "grad_norm": 0.6613539457321167, - "learning_rate": 1.4599384635401782e-07, - "loss": 0.182, - "step": 20097 - }, - { - "epoch": 1.893313864487412, - "grad_norm": 0.5953218340873718, - "learning_rate": 1.4573687561260096e-07, - "loss": 0.1722, - "step": 20098 - }, - { - "epoch": 1.8934080685805799, - "grad_norm": 0.6937636137008667, - "learning_rate": 1.4548012956362435e-07, - "loss": 0.2115, - "step": 20099 - }, - { - "epoch": 1.8935022726737478, - "grad_norm": 0.649179995059967, - "learning_rate": 1.4522360821294214e-07, - "loss": 0.1802, - "step": 20100 - }, - { - "epoch": 1.8935964767669156, - "grad_norm": 0.661853015422821, - "learning_rate": 1.4496731156640076e-07, - "loss": 0.1799, - "step": 20101 - }, - { - "epoch": 1.8936906808600833, - "grad_norm": 3.292248249053955, - "learning_rate": 1.4471123962984668e-07, - "loss": 0.209, - "step": 20102 - }, - { - "epoch": 1.8937848849532513, - "grad_norm": 0.6019024848937988, - "learning_rate": 1.4445539240911744e-07, - "loss": 0.1684, - "step": 20103 - }, - { - "epoch": 1.8938790890464192, - "grad_norm": 0.626997709274292, - "learning_rate": 1.4419976991004724e-07, - "loss": 0.1496, - "step": 20104 - }, - { - "epoch": 1.893973293139587, - "grad_norm": 0.708346962928772, - "learning_rate": 1.439443721384648e-07, - "loss": 0.1885, - "step": 20105 - }, - { - "epoch": 1.8940674972327547, - "grad_norm": 0.6549419164657593, - "learning_rate": 1.436891991001932e-07, - "loss": 0.1761, - "step": 20106 - }, - { - "epoch": 1.8941617013259227, - "grad_norm": 0.6317760348320007, - "learning_rate": 1.4343425080105112e-07, - "loss": 0.1765, - "step": 20107 - }, - { - "epoch": 1.8942559054190906, - "grad_norm": 0.739311933517456, - "learning_rate": 1.4317952724685168e-07, - "loss": 0.1923, - "step": 20108 - }, - { - "epoch": 1.8943501095122584, - "grad_norm": 0.6841950416564941, - "learning_rate": 1.429250284434025e-07, - "loss": 0.2034, - "step": 20109 - }, - { - "epoch": 1.894444313605426, - "grad_norm": 0.67605060338974, - "learning_rate": 1.4267075439650669e-07, - "loss": 0.2024, - "step": 20110 - }, - { - "epoch": 1.894538517698594, - "grad_norm": 0.6526362895965576, - "learning_rate": 1.4241670511196292e-07, - "loss": 0.218, - "step": 20111 - }, - { - "epoch": 1.894632721791762, - "grad_norm": 0.5401507616043091, - "learning_rate": 1.4216288059556217e-07, - "loss": 0.1535, - "step": 20112 - }, - { - "epoch": 1.8947269258849297, - "grad_norm": 0.6044568419456482, - "learning_rate": 1.419092808530953e-07, - "loss": 0.1881, - "step": 20113 - }, - { - "epoch": 1.8948211299780975, - "grad_norm": 0.9056061506271362, - "learning_rate": 1.4165590589034106e-07, - "loss": 0.1577, - "step": 20114 - }, - { - "epoch": 1.8949153340712654, - "grad_norm": 0.7809275388717651, - "learning_rate": 1.4140275571307926e-07, - "loss": 0.2021, - "step": 20115 - }, - { - "epoch": 1.8950095381644334, - "grad_norm": 0.8135993480682373, - "learning_rate": 1.4114983032707974e-07, - "loss": 0.213, - "step": 20116 - }, - { - "epoch": 1.8951037422576011, - "grad_norm": 0.7396150827407837, - "learning_rate": 1.408971297381123e-07, - "loss": 0.2063, - "step": 20117 - }, - { - "epoch": 1.8951979463507689, - "grad_norm": 0.6179821491241455, - "learning_rate": 1.4064465395193794e-07, - "loss": 0.1617, - "step": 20118 - }, - { - "epoch": 1.8952921504439368, - "grad_norm": 0.6806744337081909, - "learning_rate": 1.4039240297431202e-07, - "loss": 0.2023, - "step": 20119 - }, - { - "epoch": 1.8953863545371048, - "grad_norm": 0.6412350535392761, - "learning_rate": 1.401403768109888e-07, - "loss": 0.1918, - "step": 20120 - }, - { - "epoch": 1.8954805586302725, - "grad_norm": 0.6875594854354858, - "learning_rate": 1.3988857546771262e-07, - "loss": 0.2108, - "step": 20121 - }, - { - "epoch": 1.8955747627234403, - "grad_norm": 0.6698450446128845, - "learning_rate": 1.3963699895022664e-07, - "loss": 0.196, - "step": 20122 - }, - { - "epoch": 1.8956689668166082, - "grad_norm": 0.9093018174171448, - "learning_rate": 1.3938564726426517e-07, - "loss": 0.1867, - "step": 20123 - }, - { - "epoch": 1.8957631709097762, - "grad_norm": 0.6904942393302917, - "learning_rate": 1.3913452041556253e-07, - "loss": 0.2227, - "step": 20124 - }, - { - "epoch": 1.8958573750029437, - "grad_norm": 0.6031886339187622, - "learning_rate": 1.3888361840984187e-07, - "loss": 0.1764, - "step": 20125 - }, - { - "epoch": 1.8959515790961117, - "grad_norm": 0.6098013520240784, - "learning_rate": 1.3863294125282532e-07, - "loss": 0.192, - "step": 20126 - }, - { - "epoch": 1.8960457831892796, - "grad_norm": 0.6808583736419678, - "learning_rate": 1.3838248895022832e-07, - "loss": 0.1942, - "step": 20127 - }, - { - "epoch": 1.8961399872824474, - "grad_norm": 0.6212560534477234, - "learning_rate": 1.3813226150776293e-07, - "loss": 0.1779, - "step": 20128 - }, - { - "epoch": 1.896234191375615, - "grad_norm": 0.6856077909469604, - "learning_rate": 1.3788225893113239e-07, - "loss": 0.235, - "step": 20129 - }, - { - "epoch": 1.896328395468783, - "grad_norm": 0.6871376037597656, - "learning_rate": 1.3763248122603878e-07, - "loss": 0.2022, - "step": 20130 - }, - { - "epoch": 1.896422599561951, - "grad_norm": 0.5714020133018494, - "learning_rate": 1.3738292839817757e-07, - "loss": 0.1616, - "step": 20131 - }, - { - "epoch": 1.8965168036551188, - "grad_norm": 0.6463074088096619, - "learning_rate": 1.3713360045323753e-07, - "loss": 0.2026, - "step": 20132 - }, - { - "epoch": 1.8966110077482865, - "grad_norm": 0.6515389084815979, - "learning_rate": 1.3688449739690524e-07, - "loss": 0.1741, - "step": 20133 - }, - { - "epoch": 1.8967052118414545, - "grad_norm": 0.6189078688621521, - "learning_rate": 1.3663561923486059e-07, - "loss": 0.1829, - "step": 20134 - }, - { - "epoch": 1.8967994159346224, - "grad_norm": 0.7111350297927856, - "learning_rate": 1.3638696597277678e-07, - "loss": 0.2132, - "step": 20135 - }, - { - "epoch": 1.8968936200277902, - "grad_norm": 0.6568692922592163, - "learning_rate": 1.361385376163238e-07, - "loss": 0.1729, - "step": 20136 - }, - { - "epoch": 1.896987824120958, - "grad_norm": 0.6552120447158813, - "learning_rate": 1.3589033417116927e-07, - "loss": 0.2014, - "step": 20137 - }, - { - "epoch": 1.8970820282141259, - "grad_norm": 0.6305198073387146, - "learning_rate": 1.356423556429687e-07, - "loss": 0.2047, - "step": 20138 - }, - { - "epoch": 1.8971762323072938, - "grad_norm": 0.6298912763595581, - "learning_rate": 1.3539460203737754e-07, - "loss": 0.1524, - "step": 20139 - }, - { - "epoch": 1.8972704364004616, - "grad_norm": 0.682796835899353, - "learning_rate": 1.3514707336004685e-07, - "loss": 0.192, - "step": 20140 - }, - { - "epoch": 1.8973646404936293, - "grad_norm": 0.5889750719070435, - "learning_rate": 1.3489976961661765e-07, - "loss": 0.1597, - "step": 20141 - }, - { - "epoch": 1.8974588445867973, - "grad_norm": 0.6618540287017822, - "learning_rate": 1.3465269081273102e-07, - "loss": 0.1992, - "step": 20142 - }, - { - "epoch": 1.8975530486799652, - "grad_norm": 0.687576413154602, - "learning_rate": 1.344058369540202e-07, - "loss": 0.1907, - "step": 20143 - }, - { - "epoch": 1.897647252773133, - "grad_norm": 0.6274227499961853, - "learning_rate": 1.3415920804611294e-07, - "loss": 0.1833, - "step": 20144 - }, - { - "epoch": 1.8977414568663007, - "grad_norm": 0.7021694183349609, - "learning_rate": 1.339128040946347e-07, - "loss": 0.1872, - "step": 20145 - }, - { - "epoch": 1.8978356609594687, - "grad_norm": 0.6623535752296448, - "learning_rate": 1.3366662510520212e-07, - "loss": 0.21, - "step": 20146 - }, - { - "epoch": 1.8979298650526366, - "grad_norm": 0.6356085538864136, - "learning_rate": 1.334206710834296e-07, - "loss": 0.1825, - "step": 20147 - }, - { - "epoch": 1.8980240691458043, - "grad_norm": 0.7023216485977173, - "learning_rate": 1.3317494203492486e-07, - "loss": 0.1884, - "step": 20148 - }, - { - "epoch": 1.898118273238972, - "grad_norm": 0.5692326426506042, - "learning_rate": 1.329294379652901e-07, - "loss": 0.1502, - "step": 20149 - }, - { - "epoch": 1.89821247733214, - "grad_norm": 0.7072599530220032, - "learning_rate": 1.326841588801242e-07, - "loss": 0.1983, - "step": 20150 - }, - { - "epoch": 1.898306681425308, - "grad_norm": 0.6492814421653748, - "learning_rate": 1.324391047850193e-07, - "loss": 0.203, - "step": 20151 - }, - { - "epoch": 1.8984008855184757, - "grad_norm": 0.6531577706336975, - "learning_rate": 1.321942756855632e-07, - "loss": 0.1945, - "step": 20152 - }, - { - "epoch": 1.8984950896116435, - "grad_norm": 0.6064938902854919, - "learning_rate": 1.3194967158733917e-07, - "loss": 0.2079, - "step": 20153 - }, - { - "epoch": 1.8985892937048114, - "grad_norm": 0.6969150900840759, - "learning_rate": 1.3170529249592278e-07, - "loss": 0.1949, - "step": 20154 - }, - { - "epoch": 1.8986834977979794, - "grad_norm": 0.6392337083816528, - "learning_rate": 1.3146113841688845e-07, - "loss": 0.172, - "step": 20155 - }, - { - "epoch": 1.8987777018911471, - "grad_norm": 0.7279536128044128, - "learning_rate": 1.3121720935580174e-07, - "loss": 0.1923, - "step": 20156 - }, - { - "epoch": 1.8988719059843149, - "grad_norm": 1.4701002836227417, - "learning_rate": 1.3097350531822485e-07, - "loss": 0.1832, - "step": 20157 - }, - { - "epoch": 1.8989661100774828, - "grad_norm": 0.6353821754455566, - "learning_rate": 1.3073002630971444e-07, - "loss": 0.1703, - "step": 20158 - }, - { - "epoch": 1.8990603141706508, - "grad_norm": 0.5935443043708801, - "learning_rate": 1.3048677233582274e-07, - "loss": 0.1731, - "step": 20159 - }, - { - "epoch": 1.8991545182638185, - "grad_norm": 0.6935319304466248, - "learning_rate": 1.3024374340209645e-07, - "loss": 0.2273, - "step": 20160 - }, - { - "epoch": 1.8992487223569863, - "grad_norm": 0.6721147298812866, - "learning_rate": 1.3000093951407667e-07, - "loss": 0.1765, - "step": 20161 - }, - { - "epoch": 1.8993429264501542, - "grad_norm": 0.684522271156311, - "learning_rate": 1.297583606773001e-07, - "loss": 0.2394, - "step": 20162 - }, - { - "epoch": 1.8994371305433222, - "grad_norm": 0.6819477081298828, - "learning_rate": 1.295160068972967e-07, - "loss": 0.2015, - "step": 20163 - }, - { - "epoch": 1.89953133463649, - "grad_norm": 0.8085724711418152, - "learning_rate": 1.2927387817959325e-07, - "loss": 0.2037, - "step": 20164 - }, - { - "epoch": 1.8996255387296577, - "grad_norm": 0.6287370324134827, - "learning_rate": 1.2903197452971196e-07, - "loss": 0.1824, - "step": 20165 - }, - { - "epoch": 1.8997197428228256, - "grad_norm": 0.6373084187507629, - "learning_rate": 1.2879029595316616e-07, - "loss": 0.1777, - "step": 20166 - }, - { - "epoch": 1.8998139469159936, - "grad_norm": 0.6652271151542664, - "learning_rate": 1.2854884245546818e-07, - "loss": 0.167, - "step": 20167 - }, - { - "epoch": 1.8999081510091613, - "grad_norm": 0.6203648447990417, - "learning_rate": 1.2830761404212355e-07, - "loss": 0.1893, - "step": 20168 - }, - { - "epoch": 1.900002355102329, - "grad_norm": 0.6763032674789429, - "learning_rate": 1.2806661071863124e-07, - "loss": 0.2128, - "step": 20169 - }, - { - "epoch": 1.900096559195497, - "grad_norm": 0.7861047983169556, - "learning_rate": 1.2782583249048798e-07, - "loss": 0.2064, - "step": 20170 - }, - { - "epoch": 1.900190763288665, - "grad_norm": 0.8120763897895813, - "learning_rate": 1.2758527936318377e-07, - "loss": 0.182, - "step": 20171 - }, - { - "epoch": 1.9002849673818327, - "grad_norm": 0.8147682547569275, - "learning_rate": 1.2734495134220204e-07, - "loss": 0.2264, - "step": 20172 - }, - { - "epoch": 1.9003791714750005, - "grad_norm": 0.6320006251335144, - "learning_rate": 1.2710484843302506e-07, - "loss": 0.1756, - "step": 20173 - }, - { - "epoch": 1.9004733755681684, - "grad_norm": 0.6364458203315735, - "learning_rate": 1.2686497064112513e-07, - "loss": 0.1835, - "step": 20174 - }, - { - "epoch": 1.9005675796613364, - "grad_norm": 0.6355153918266296, - "learning_rate": 1.2662531797197343e-07, - "loss": 0.1915, - "step": 20175 - }, - { - "epoch": 1.9006617837545041, - "grad_norm": 0.6249171495437622, - "learning_rate": 1.2638589043103445e-07, - "loss": 0.1619, - "step": 20176 - }, - { - "epoch": 1.9007559878476719, - "grad_norm": 0.7112020254135132, - "learning_rate": 1.2614668802376717e-07, - "loss": 0.1869, - "step": 20177 - }, - { - "epoch": 1.9008501919408398, - "grad_norm": 0.7528996467590332, - "learning_rate": 1.259077107556239e-07, - "loss": 0.1981, - "step": 20178 - }, - { - "epoch": 1.9009443960340078, - "grad_norm": 0.6597367525100708, - "learning_rate": 1.2566895863205698e-07, - "loss": 0.192, - "step": 20179 - }, - { - "epoch": 1.9010386001271755, - "grad_norm": 0.6589730978012085, - "learning_rate": 1.2543043165850865e-07, - "loss": 0.1977, - "step": 20180 - }, - { - "epoch": 1.9011328042203433, - "grad_norm": 0.6787749528884888, - "learning_rate": 1.2519212984041684e-07, - "loss": 0.1911, - "step": 20181 - }, - { - "epoch": 1.9012270083135112, - "grad_norm": 0.6698355078697205, - "learning_rate": 1.2495405318321606e-07, - "loss": 0.186, - "step": 20182 - }, - { - "epoch": 1.9013212124066792, - "grad_norm": 0.6706238985061646, - "learning_rate": 1.2471620169233644e-07, - "loss": 0.2028, - "step": 20183 - }, - { - "epoch": 1.901415416499847, - "grad_norm": 0.6361337304115295, - "learning_rate": 1.2447857537319918e-07, - "loss": 0.1627, - "step": 20184 - }, - { - "epoch": 1.9015096205930146, - "grad_norm": 0.6631746888160706, - "learning_rate": 1.242411742312233e-07, - "loss": 0.2048, - "step": 20185 - }, - { - "epoch": 1.9016038246861826, - "grad_norm": 0.7597262859344482, - "learning_rate": 1.2400399827182107e-07, - "loss": 0.182, - "step": 20186 - }, - { - "epoch": 1.9016980287793506, - "grad_norm": 0.7315596342086792, - "learning_rate": 1.2376704750040159e-07, - "loss": 0.1897, - "step": 20187 - }, - { - "epoch": 1.9017922328725183, - "grad_norm": 0.61861652135849, - "learning_rate": 1.2353032192236713e-07, - "loss": 0.1684, - "step": 20188 - }, - { - "epoch": 1.901886436965686, - "grad_norm": 0.7388166785240173, - "learning_rate": 1.2329382154311565e-07, - "loss": 0.216, - "step": 20189 - }, - { - "epoch": 1.901980641058854, - "grad_norm": 0.6822174191474915, - "learning_rate": 1.2305754636804056e-07, - "loss": 0.1772, - "step": 20190 - }, - { - "epoch": 1.902074845152022, - "grad_norm": 0.7317981719970703, - "learning_rate": 1.2282149640252762e-07, - "loss": 0.2467, - "step": 20191 - }, - { - "epoch": 1.9021690492451897, - "grad_norm": 0.6668426990509033, - "learning_rate": 1.2258567165195912e-07, - "loss": 0.206, - "step": 20192 - }, - { - "epoch": 1.9022632533383574, - "grad_norm": 0.6788719296455383, - "learning_rate": 1.2235007212171414e-07, - "loss": 0.1744, - "step": 20193 - }, - { - "epoch": 1.9023574574315254, - "grad_norm": 0.6401882767677307, - "learning_rate": 1.2211469781716388e-07, - "loss": 0.2099, - "step": 20194 - }, - { - "epoch": 1.9024516615246934, - "grad_norm": 0.6083666086196899, - "learning_rate": 1.2187954874367413e-07, - "loss": 0.1973, - "step": 20195 - }, - { - "epoch": 1.902545865617861, - "grad_norm": 0.6650665998458862, - "learning_rate": 1.2164462490660722e-07, - "loss": 0.1755, - "step": 20196 - }, - { - "epoch": 1.9026400697110288, - "grad_norm": 0.7086012363433838, - "learning_rate": 1.2140992631132108e-07, - "loss": 0.2069, - "step": 20197 - }, - { - "epoch": 1.9027342738041968, - "grad_norm": 0.6304386258125305, - "learning_rate": 1.211754529631659e-07, - "loss": 0.2152, - "step": 20198 - }, - { - "epoch": 1.9028284778973648, - "grad_norm": 0.6851261854171753, - "learning_rate": 1.2094120486748739e-07, - "loss": 0.2461, - "step": 20199 - }, - { - "epoch": 1.9029226819905325, - "grad_norm": 0.7031466960906982, - "learning_rate": 1.2070718202962794e-07, - "loss": 0.1928, - "step": 20200 - }, - { - "epoch": 1.9030168860837002, - "grad_norm": 0.676091730594635, - "learning_rate": 1.2047338445492442e-07, - "loss": 0.1927, - "step": 20201 - }, - { - "epoch": 1.9031110901768682, - "grad_norm": 0.6730955243110657, - "learning_rate": 1.2023981214870585e-07, - "loss": 0.1544, - "step": 20202 - }, - { - "epoch": 1.9032052942700362, - "grad_norm": 0.7274423837661743, - "learning_rate": 1.200064651162991e-07, - "loss": 0.1963, - "step": 20203 - }, - { - "epoch": 1.903299498363204, - "grad_norm": 0.6768518686294556, - "learning_rate": 1.1977334336302437e-07, - "loss": 0.2041, - "step": 20204 - }, - { - "epoch": 1.9033937024563716, - "grad_norm": 0.6196874976158142, - "learning_rate": 1.1954044689419852e-07, - "loss": 0.1871, - "step": 20205 - }, - { - "epoch": 1.9034879065495396, - "grad_norm": 0.6369749307632446, - "learning_rate": 1.1930777571512843e-07, - "loss": 0.1709, - "step": 20206 - }, - { - "epoch": 1.9035821106427075, - "grad_norm": 0.7110297083854675, - "learning_rate": 1.1907532983112424e-07, - "loss": 0.1748, - "step": 20207 - }, - { - "epoch": 1.9036763147358753, - "grad_norm": 0.6341861486434937, - "learning_rate": 1.18843109247484e-07, - "loss": 0.2121, - "step": 20208 - }, - { - "epoch": 1.903770518829043, - "grad_norm": 0.6789807081222534, - "learning_rate": 1.186111139695001e-07, - "loss": 0.2006, - "step": 20209 - }, - { - "epoch": 1.903864722922211, - "grad_norm": 0.6980389356613159, - "learning_rate": 1.1837934400246609e-07, - "loss": 0.2074, - "step": 20210 - }, - { - "epoch": 1.903958927015379, - "grad_norm": 0.7165026068687439, - "learning_rate": 1.1814779935166553e-07, - "loss": 0.1957, - "step": 20211 - }, - { - "epoch": 1.9040531311085467, - "grad_norm": 0.6362962126731873, - "learning_rate": 1.1791648002237643e-07, - "loss": 0.1841, - "step": 20212 - }, - { - "epoch": 1.9041473352017144, - "grad_norm": 0.6579410433769226, - "learning_rate": 1.1768538601987566e-07, - "loss": 0.1918, - "step": 20213 - }, - { - "epoch": 1.9042415392948824, - "grad_norm": 0.7375689744949341, - "learning_rate": 1.1745451734943125e-07, - "loss": 0.2069, - "step": 20214 - }, - { - "epoch": 1.9043357433880503, - "grad_norm": 0.8764556050300598, - "learning_rate": 1.1722387401630675e-07, - "loss": 0.1832, - "step": 20215 - }, - { - "epoch": 1.904429947481218, - "grad_norm": 0.6192811727523804, - "learning_rate": 1.169934560257624e-07, - "loss": 0.179, - "step": 20216 - }, - { - "epoch": 1.9045241515743858, - "grad_norm": 0.7142428755760193, - "learning_rate": 1.1676326338305178e-07, - "loss": 0.186, - "step": 20217 - }, - { - "epoch": 1.9046183556675538, - "grad_norm": 0.753334641456604, - "learning_rate": 1.16533296093424e-07, - "loss": 0.2124, - "step": 20218 - }, - { - "epoch": 1.9047125597607217, - "grad_norm": 0.6500793695449829, - "learning_rate": 1.1630355416212047e-07, - "loss": 0.179, - "step": 20219 - }, - { - "epoch": 1.9048067638538895, - "grad_norm": 0.708154022693634, - "learning_rate": 1.1607403759438363e-07, - "loss": 0.1871, - "step": 20220 - }, - { - "epoch": 1.9049009679470572, - "grad_norm": 0.6770479083061218, - "learning_rate": 1.1584474639544263e-07, - "loss": 0.2003, - "step": 20221 - }, - { - "epoch": 1.9049951720402252, - "grad_norm": 0.6528639197349548, - "learning_rate": 1.1561568057052775e-07, - "loss": 0.1887, - "step": 20222 - }, - { - "epoch": 1.9050893761333931, - "grad_norm": 0.650513768196106, - "learning_rate": 1.1538684012486257e-07, - "loss": 0.1968, - "step": 20223 - }, - { - "epoch": 1.9051835802265609, - "grad_norm": 0.6311100125312805, - "learning_rate": 1.1515822506366403e-07, - "loss": 0.1724, - "step": 20224 - }, - { - "epoch": 1.9052777843197286, - "grad_norm": 0.6588751077651978, - "learning_rate": 1.1492983539214576e-07, - "loss": 0.1835, - "step": 20225 - }, - { - "epoch": 1.9053719884128966, - "grad_norm": 0.6212252378463745, - "learning_rate": 1.1470167111551356e-07, - "loss": 0.2025, - "step": 20226 - }, - { - "epoch": 1.9054661925060645, - "grad_norm": 0.6927551031112671, - "learning_rate": 1.1447373223897218e-07, - "loss": 0.2378, - "step": 20227 - }, - { - "epoch": 1.9055603965992323, - "grad_norm": 0.6051120758056641, - "learning_rate": 1.1424601876771746e-07, - "loss": 0.1782, - "step": 20228 - }, - { - "epoch": 1.9056546006924, - "grad_norm": 0.6797753572463989, - "learning_rate": 1.1401853070694192e-07, - "loss": 0.2105, - "step": 20229 - }, - { - "epoch": 1.905748804785568, - "grad_norm": 0.6577433347702026, - "learning_rate": 1.137912680618336e-07, - "loss": 0.2051, - "step": 20230 - }, - { - "epoch": 1.905843008878736, - "grad_norm": 0.7104130983352661, - "learning_rate": 1.1356423083757284e-07, - "loss": 0.1819, - "step": 20231 - }, - { - "epoch": 1.9059372129719037, - "grad_norm": 0.5975115299224854, - "learning_rate": 1.1333741903933771e-07, - "loss": 0.1865, - "step": 20232 - }, - { - "epoch": 1.9060314170650714, - "grad_norm": 0.583385169506073, - "learning_rate": 1.131108326722996e-07, - "loss": 0.1636, - "step": 20233 - }, - { - "epoch": 1.9061256211582394, - "grad_norm": 0.9173849821090698, - "learning_rate": 1.1288447174162443e-07, - "loss": 0.1802, - "step": 20234 - }, - { - "epoch": 1.9062198252514073, - "grad_norm": 0.665015697479248, - "learning_rate": 1.1265833625247469e-07, - "loss": 0.1786, - "step": 20235 - }, - { - "epoch": 1.906314029344575, - "grad_norm": 0.6025359034538269, - "learning_rate": 1.1243242621000516e-07, - "loss": 0.1622, - "step": 20236 - }, - { - "epoch": 1.9064082334377428, - "grad_norm": 0.649183988571167, - "learning_rate": 1.1220674161936839e-07, - "loss": 0.1929, - "step": 20237 - }, - { - "epoch": 1.9065024375309108, - "grad_norm": 0.6834205389022827, - "learning_rate": 1.1198128248570916e-07, - "loss": 0.1898, - "step": 20238 - }, - { - "epoch": 1.9065966416240787, - "grad_norm": 0.6396623253822327, - "learning_rate": 1.117560488141689e-07, - "loss": 0.1844, - "step": 20239 - }, - { - "epoch": 1.9066908457172465, - "grad_norm": 0.6639078259468079, - "learning_rate": 1.1153104060988352e-07, - "loss": 0.224, - "step": 20240 - }, - { - "epoch": 1.9067850498104142, - "grad_norm": 0.5923094749450684, - "learning_rate": 1.1130625787798333e-07, - "loss": 0.1751, - "step": 20241 - }, - { - "epoch": 1.9068792539035821, - "grad_norm": 0.6624804735183716, - "learning_rate": 1.1108170062359313e-07, - "loss": 0.2065, - "step": 20242 - }, - { - "epoch": 1.90697345799675, - "grad_norm": 0.6475779414176941, - "learning_rate": 1.1085736885183329e-07, - "loss": 0.1869, - "step": 20243 - }, - { - "epoch": 1.9070676620899178, - "grad_norm": 0.7120112180709839, - "learning_rate": 1.1063326256782081e-07, - "loss": 0.1913, - "step": 20244 - }, - { - "epoch": 1.9071618661830856, - "grad_norm": 0.671790361404419, - "learning_rate": 1.1040938177666383e-07, - "loss": 0.1788, - "step": 20245 - }, - { - "epoch": 1.9072560702762535, - "grad_norm": 0.6533803939819336, - "learning_rate": 1.1018572648346493e-07, - "loss": 0.1903, - "step": 20246 - }, - { - "epoch": 1.9073502743694215, - "grad_norm": 0.5851737260818481, - "learning_rate": 1.0996229669332892e-07, - "loss": 0.167, - "step": 20247 - }, - { - "epoch": 1.9074444784625892, - "grad_norm": 0.6730858087539673, - "learning_rate": 1.0973909241134839e-07, - "loss": 0.2212, - "step": 20248 - }, - { - "epoch": 1.907538682555757, - "grad_norm": 0.5762879848480225, - "learning_rate": 1.0951611364260927e-07, - "loss": 0.1573, - "step": 20249 - }, - { - "epoch": 1.907632886648925, - "grad_norm": 0.6985439658164978, - "learning_rate": 1.0929336039220084e-07, - "loss": 0.2021, - "step": 20250 - }, - { - "epoch": 1.907727090742093, - "grad_norm": 0.7053725719451904, - "learning_rate": 1.0907083266520013e-07, - "loss": 0.2159, - "step": 20251 - }, - { - "epoch": 1.9078212948352606, - "grad_norm": 0.6184783577919006, - "learning_rate": 1.0884853046667976e-07, - "loss": 0.1779, - "step": 20252 - }, - { - "epoch": 1.9079154989284284, - "grad_norm": 0.6174992322921753, - "learning_rate": 1.086264538017101e-07, - "loss": 0.1519, - "step": 20253 - }, - { - "epoch": 1.9080097030215963, - "grad_norm": 0.7269670963287354, - "learning_rate": 1.0840460267535601e-07, - "loss": 0.2499, - "step": 20254 - }, - { - "epoch": 1.9081039071147643, - "grad_norm": 0.6113536357879639, - "learning_rate": 1.0818297709267344e-07, - "loss": 0.1747, - "step": 20255 - }, - { - "epoch": 1.908198111207932, - "grad_norm": 0.7576474547386169, - "learning_rate": 1.0796157705871724e-07, - "loss": 0.2002, - "step": 20256 - }, - { - "epoch": 1.9082923153010998, - "grad_norm": 0.6527425646781921, - "learning_rate": 1.0774040257853557e-07, - "loss": 0.1962, - "step": 20257 - }, - { - "epoch": 1.9083865193942677, - "grad_norm": 5.325571537017822, - "learning_rate": 1.075194536571711e-07, - "loss": 0.2052, - "step": 20258 - }, - { - "epoch": 1.9084807234874357, - "grad_norm": 0.6798804998397827, - "learning_rate": 1.0729873029966198e-07, - "loss": 0.204, - "step": 20259 - }, - { - "epoch": 1.9085749275806034, - "grad_norm": 0.6803638935089111, - "learning_rate": 1.0707823251104199e-07, - "loss": 0.1818, - "step": 20260 - }, - { - "epoch": 1.9086691316737712, - "grad_norm": 0.572781503200531, - "learning_rate": 1.068579602963371e-07, - "loss": 0.1903, - "step": 20261 - }, - { - "epoch": 1.9087633357669391, - "grad_norm": 0.7031172513961792, - "learning_rate": 1.0663791366056997e-07, - "loss": 0.2152, - "step": 20262 - }, - { - "epoch": 1.9088575398601069, - "grad_norm": 0.5797815322875977, - "learning_rate": 1.0641809260876101e-07, - "loss": 0.1712, - "step": 20263 - }, - { - "epoch": 1.9089517439532746, - "grad_norm": 0.7296099066734314, - "learning_rate": 1.0619849714591957e-07, - "loss": 0.2167, - "step": 20264 - }, - { - "epoch": 1.9090459480464426, - "grad_norm": 0.871605396270752, - "learning_rate": 1.0597912727705273e-07, - "loss": 0.1918, - "step": 20265 - }, - { - "epoch": 1.9091401521396105, - "grad_norm": 0.6410840153694153, - "learning_rate": 1.0575998300716317e-07, - "loss": 0.178, - "step": 20266 - }, - { - "epoch": 1.9092343562327783, - "grad_norm": 0.6435098648071289, - "learning_rate": 1.05541064341248e-07, - "loss": 0.1723, - "step": 20267 - }, - { - "epoch": 1.909328560325946, - "grad_norm": 0.6844145059585571, - "learning_rate": 1.0532237128429879e-07, - "loss": 0.1897, - "step": 20268 - }, - { - "epoch": 1.909422764419114, - "grad_norm": 0.6417859196662903, - "learning_rate": 1.0510390384130153e-07, - "loss": 0.1831, - "step": 20269 - }, - { - "epoch": 1.909516968512282, - "grad_norm": 0.5997002720832825, - "learning_rate": 1.0488566201723782e-07, - "loss": 0.1719, - "step": 20270 - }, - { - "epoch": 1.9096111726054497, - "grad_norm": 0.6502866744995117, - "learning_rate": 1.0466764581708478e-07, - "loss": 0.1949, - "step": 20271 - }, - { - "epoch": 1.9097053766986174, - "grad_norm": 0.6558030247688293, - "learning_rate": 1.0444985524581285e-07, - "loss": 0.2132, - "step": 20272 - }, - { - "epoch": 1.9097995807917854, - "grad_norm": 0.6933730244636536, - "learning_rate": 1.0423229030838811e-07, - "loss": 0.1989, - "step": 20273 - }, - { - "epoch": 1.9098937848849533, - "grad_norm": 0.7598581314086914, - "learning_rate": 1.0401495100976988e-07, - "loss": 0.2297, - "step": 20274 - }, - { - "epoch": 1.909987988978121, - "grad_norm": 0.6667605042457581, - "learning_rate": 1.0379783735491756e-07, - "loss": 0.1992, - "step": 20275 - }, - { - "epoch": 1.9100821930712888, - "grad_norm": 0.7057095170021057, - "learning_rate": 1.0358094934877716e-07, - "loss": 0.1963, - "step": 20276 - }, - { - "epoch": 1.9101763971644568, - "grad_norm": 0.6734713912010193, - "learning_rate": 1.0336428699629696e-07, - "loss": 0.2209, - "step": 20277 - }, - { - "epoch": 1.9102706012576247, - "grad_norm": 0.691481351852417, - "learning_rate": 1.0314785030241636e-07, - "loss": 0.1905, - "step": 20278 - }, - { - "epoch": 1.9103648053507924, - "grad_norm": 0.7067854404449463, - "learning_rate": 1.0293163927207139e-07, - "loss": 0.1922, - "step": 20279 - }, - { - "epoch": 1.9104590094439602, - "grad_norm": 0.6748788952827454, - "learning_rate": 1.0271565391018922e-07, - "loss": 0.1621, - "step": 20280 - }, - { - "epoch": 1.9105532135371281, - "grad_norm": 0.6618409156799316, - "learning_rate": 1.0249989422169926e-07, - "loss": 0.1876, - "step": 20281 - }, - { - "epoch": 1.910647417630296, - "grad_norm": 0.7199574112892151, - "learning_rate": 1.0228436021151645e-07, - "loss": 0.196, - "step": 20282 - }, - { - "epoch": 1.9107416217234638, - "grad_norm": 0.7408638000488281, - "learning_rate": 1.0206905188455796e-07, - "loss": 0.1892, - "step": 20283 - }, - { - "epoch": 1.9108358258166316, - "grad_norm": 0.6736592054367065, - "learning_rate": 1.0185396924573209e-07, - "loss": 0.2049, - "step": 20284 - }, - { - "epoch": 1.9109300299097995, - "grad_norm": 0.6533849239349365, - "learning_rate": 1.0163911229994494e-07, - "loss": 0.1868, - "step": 20285 - }, - { - "epoch": 1.9110242340029675, - "grad_norm": 0.6521468162536621, - "learning_rate": 1.0142448105209257e-07, - "loss": 0.1949, - "step": 20286 - }, - { - "epoch": 1.9111184380961352, - "grad_norm": 0.6551706790924072, - "learning_rate": 1.0121007550707107e-07, - "loss": 0.1896, - "step": 20287 - }, - { - "epoch": 1.911212642189303, - "grad_norm": 0.7433573007583618, - "learning_rate": 1.0099589566976875e-07, - "loss": 0.1899, - "step": 20288 - }, - { - "epoch": 1.911306846282471, - "grad_norm": 0.6351568102836609, - "learning_rate": 1.0078194154506948e-07, - "loss": 0.1748, - "step": 20289 - }, - { - "epoch": 1.911401050375639, - "grad_norm": 0.6436189413070679, - "learning_rate": 1.0056821313785048e-07, - "loss": 0.2004, - "step": 20290 - }, - { - "epoch": 1.9114952544688066, - "grad_norm": 0.6571237444877625, - "learning_rate": 1.0035471045298673e-07, - "loss": 0.1974, - "step": 20291 - }, - { - "epoch": 1.9115894585619744, - "grad_norm": 0.6632843017578125, - "learning_rate": 1.0014143349534433e-07, - "loss": 0.1633, - "step": 20292 - }, - { - "epoch": 1.9116836626551423, - "grad_norm": 0.7167983055114746, - "learning_rate": 9.99283822697894e-08, - "loss": 0.1928, - "step": 20293 - }, - { - "epoch": 1.9117778667483103, - "grad_norm": 0.673212468624115, - "learning_rate": 9.971555678117695e-08, - "loss": 0.195, - "step": 20294 - }, - { - "epoch": 1.911872070841478, - "grad_norm": 0.6878961324691772, - "learning_rate": 9.950295703436086e-08, - "loss": 0.1676, - "step": 20295 - }, - { - "epoch": 1.9119662749346458, - "grad_norm": 0.6188074946403503, - "learning_rate": 9.929058303418947e-08, - "loss": 0.1617, - "step": 20296 - }, - { - "epoch": 1.9120604790278137, - "grad_norm": 0.6131939888000488, - "learning_rate": 9.90784347855045e-08, - "loss": 0.1658, - "step": 20297 - }, - { - "epoch": 1.9121546831209817, - "grad_norm": 0.752884030342102, - "learning_rate": 9.886651229314315e-08, - "loss": 0.1878, - "step": 20298 - }, - { - "epoch": 1.9122488872141494, - "grad_norm": 0.7726864218711853, - "learning_rate": 9.865481556193713e-08, - "loss": 0.2343, - "step": 20299 - }, - { - "epoch": 1.9123430913073172, - "grad_norm": 0.6393436789512634, - "learning_rate": 9.844334459671479e-08, - "loss": 0.1942, - "step": 20300 - }, - { - "epoch": 1.9124372954004851, - "grad_norm": 0.7119874954223633, - "learning_rate": 9.823209940229673e-08, - "loss": 0.1838, - "step": 20301 - }, - { - "epoch": 1.912531499493653, - "grad_norm": 0.6908301711082458, - "learning_rate": 9.80210799835002e-08, - "loss": 0.2055, - "step": 20302 - }, - { - "epoch": 1.9126257035868208, - "grad_norm": 0.8175268173217773, - "learning_rate": 9.7810286345138e-08, - "loss": 0.1851, - "step": 20303 - }, - { - "epoch": 1.9127199076799886, - "grad_norm": 0.6205576658248901, - "learning_rate": 9.75997184920141e-08, - "loss": 0.1817, - "step": 20304 - }, - { - "epoch": 1.9128141117731565, - "grad_norm": 0.656181275844574, - "learning_rate": 9.738937642893132e-08, - "loss": 0.191, - "step": 20305 - }, - { - "epoch": 1.9129083158663245, - "grad_norm": 0.6175089478492737, - "learning_rate": 9.717926016068468e-08, - "loss": 0.1718, - "step": 20306 - }, - { - "epoch": 1.9130025199594922, - "grad_norm": 0.6579227447509766, - "learning_rate": 9.696936969206705e-08, - "loss": 0.182, - "step": 20307 - }, - { - "epoch": 1.91309672405266, - "grad_norm": 0.7359580993652344, - "learning_rate": 9.675970502786236e-08, - "loss": 0.1923, - "step": 20308 - }, - { - "epoch": 1.913190928145828, - "grad_norm": 0.6713550090789795, - "learning_rate": 9.655026617285235e-08, - "loss": 0.1953, - "step": 20309 - }, - { - "epoch": 1.9132851322389959, - "grad_norm": 0.6320589780807495, - "learning_rate": 9.634105313181207e-08, - "loss": 0.1974, - "step": 20310 - }, - { - "epoch": 1.9133793363321636, - "grad_norm": 0.6372619867324829, - "learning_rate": 9.613206590951219e-08, - "loss": 0.1738, - "step": 20311 - }, - { - "epoch": 1.9134735404253314, - "grad_norm": 0.687659740447998, - "learning_rate": 9.592330451071774e-08, - "loss": 0.1917, - "step": 20312 - }, - { - "epoch": 1.9135677445184993, - "grad_norm": 0.7863016724586487, - "learning_rate": 9.571476894018938e-08, - "loss": 0.2445, - "step": 20313 - }, - { - "epoch": 1.9136619486116673, - "grad_norm": 0.7134937644004822, - "learning_rate": 9.550645920268109e-08, - "loss": 0.2446, - "step": 20314 - }, - { - "epoch": 1.913756152704835, - "grad_norm": 0.6415748596191406, - "learning_rate": 9.52983753029435e-08, - "loss": 0.2007, - "step": 20315 - }, - { - "epoch": 1.9138503567980027, - "grad_norm": 0.7357137799263, - "learning_rate": 9.50905172457206e-08, - "loss": 0.212, - "step": 20316 - }, - { - "epoch": 1.9139445608911707, - "grad_norm": 0.6473010778427124, - "learning_rate": 9.488288503575193e-08, - "loss": 0.1914, - "step": 20317 - }, - { - "epoch": 1.9140387649843387, - "grad_norm": 0.6214069128036499, - "learning_rate": 9.467547867777261e-08, - "loss": 0.1621, - "step": 20318 - }, - { - "epoch": 1.9141329690775064, - "grad_norm": 0.7933803796768188, - "learning_rate": 9.446829817650993e-08, - "loss": 0.2149, - "step": 20319 - }, - { - "epoch": 1.9142271731706741, - "grad_norm": 0.5886857509613037, - "learning_rate": 9.426134353669015e-08, - "loss": 0.1725, - "step": 20320 - }, - { - "epoch": 1.914321377263842, - "grad_norm": 0.6440650224685669, - "learning_rate": 9.405461476303168e-08, - "loss": 0.1838, - "step": 20321 - }, - { - "epoch": 1.91441558135701, - "grad_norm": 0.695639431476593, - "learning_rate": 9.384811186024744e-08, - "loss": 0.2212, - "step": 20322 - }, - { - "epoch": 1.9145097854501778, - "grad_norm": 0.6366848349571228, - "learning_rate": 9.36418348330459e-08, - "loss": 0.1734, - "step": 20323 - }, - { - "epoch": 1.9146039895433455, - "grad_norm": 0.6480180025100708, - "learning_rate": 9.343578368613215e-08, - "loss": 0.1764, - "step": 20324 - }, - { - "epoch": 1.9146981936365135, - "grad_norm": 0.6710912585258484, - "learning_rate": 9.322995842420247e-08, - "loss": 0.1866, - "step": 20325 - }, - { - "epoch": 1.9147923977296815, - "grad_norm": 0.6814228892326355, - "learning_rate": 9.302435905195084e-08, - "loss": 0.2081, - "step": 20326 - }, - { - "epoch": 1.9148866018228492, - "grad_norm": 0.6365862488746643, - "learning_rate": 9.281898557406577e-08, - "loss": 0.1719, - "step": 20327 - }, - { - "epoch": 1.914980805916017, - "grad_norm": 0.6735439896583557, - "learning_rate": 9.261383799523016e-08, - "loss": 0.1744, - "step": 20328 - }, - { - "epoch": 1.915075010009185, - "grad_norm": 0.6732083559036255, - "learning_rate": 9.240891632011917e-08, - "loss": 0.2123, - "step": 20329 - }, - { - "epoch": 1.9151692141023529, - "grad_norm": 0.6826969981193542, - "learning_rate": 9.220422055340794e-08, - "loss": 0.2134, - "step": 20330 - }, - { - "epoch": 1.9152634181955206, - "grad_norm": 0.6632513999938965, - "learning_rate": 9.199975069976496e-08, - "loss": 0.2069, - "step": 20331 - }, - { - "epoch": 1.9153576222886883, - "grad_norm": 0.6673405170440674, - "learning_rate": 9.179550676384874e-08, - "loss": 0.1835, - "step": 20332 - }, - { - "epoch": 1.9154518263818563, - "grad_norm": 0.6578190326690674, - "learning_rate": 9.159148875031887e-08, - "loss": 0.1841, - "step": 20333 - }, - { - "epoch": 1.9155460304750243, - "grad_norm": 1.0255050659179688, - "learning_rate": 9.138769666382719e-08, - "loss": 0.1841, - "step": 20334 - }, - { - "epoch": 1.915640234568192, - "grad_norm": 0.6334037184715271, - "learning_rate": 9.118413050901886e-08, - "loss": 0.2038, - "step": 20335 - }, - { - "epoch": 1.9157344386613597, - "grad_norm": 0.70944744348526, - "learning_rate": 9.098079029053686e-08, - "loss": 0.198, - "step": 20336 - }, - { - "epoch": 1.9158286427545277, - "grad_norm": 0.6494820713996887, - "learning_rate": 9.077767601301856e-08, - "loss": 0.18, - "step": 20337 - }, - { - "epoch": 1.9159228468476956, - "grad_norm": 0.6620484590530396, - "learning_rate": 9.057478768109362e-08, - "loss": 0.1865, - "step": 20338 - }, - { - "epoch": 1.9160170509408634, - "grad_norm": 0.7518098950386047, - "learning_rate": 9.037212529938832e-08, - "loss": 0.1946, - "step": 20339 - }, - { - "epoch": 1.9161112550340311, - "grad_norm": 0.6428109407424927, - "learning_rate": 9.016968887252454e-08, - "loss": 0.1787, - "step": 20340 - }, - { - "epoch": 1.916205459127199, - "grad_norm": 0.6537749171257019, - "learning_rate": 8.996747840511744e-08, - "loss": 0.1849, - "step": 20341 - }, - { - "epoch": 1.916299663220367, - "grad_norm": 0.7021124362945557, - "learning_rate": 8.97654939017778e-08, - "loss": 0.183, - "step": 20342 - }, - { - "epoch": 1.9163938673135348, - "grad_norm": 0.634307861328125, - "learning_rate": 8.956373536711194e-08, - "loss": 0.162, - "step": 20343 - }, - { - "epoch": 1.9164880714067025, - "grad_norm": 0.6356215476989746, - "learning_rate": 8.93622028057195e-08, - "loss": 0.1747, - "step": 20344 - }, - { - "epoch": 1.9165822754998705, - "grad_norm": 0.7076974511146545, - "learning_rate": 8.91608962221957e-08, - "loss": 0.2075, - "step": 20345 - }, - { - "epoch": 1.9166764795930384, - "grad_norm": 0.7133541107177734, - "learning_rate": 8.895981562113021e-08, - "loss": 0.2031, - "step": 20346 - }, - { - "epoch": 1.9167706836862062, - "grad_norm": 0.6232772469520569, - "learning_rate": 8.875896100711046e-08, - "loss": 0.1717, - "step": 20347 - }, - { - "epoch": 1.916864887779374, - "grad_norm": 0.7322257161140442, - "learning_rate": 8.855833238471279e-08, - "loss": 0.2104, - "step": 20348 - }, - { - "epoch": 1.9169590918725419, - "grad_norm": 0.6773502230644226, - "learning_rate": 8.835792975851354e-08, - "loss": 0.201, - "step": 20349 - }, - { - "epoch": 1.9170532959657098, - "grad_norm": 0.6514344215393066, - "learning_rate": 8.81577531330835e-08, - "loss": 0.1896, - "step": 20350 - }, - { - "epoch": 1.9171475000588776, - "grad_norm": 0.6321004033088684, - "learning_rate": 8.795780251298458e-08, - "loss": 0.1911, - "step": 20351 - }, - { - "epoch": 1.9172417041520453, - "grad_norm": 0.7514435052871704, - "learning_rate": 8.775807790277757e-08, - "loss": 0.1937, - "step": 20352 - }, - { - "epoch": 1.9173359082452133, - "grad_norm": 0.605535626411438, - "learning_rate": 8.75585793070155e-08, - "loss": 0.1696, - "step": 20353 - }, - { - "epoch": 1.9174301123383812, - "grad_norm": 0.6275140047073364, - "learning_rate": 8.735930673024806e-08, - "loss": 0.1816, - "step": 20354 - }, - { - "epoch": 1.917524316431549, - "grad_norm": 0.6267510056495667, - "learning_rate": 8.716026017701829e-08, - "loss": 0.1861, - "step": 20355 - }, - { - "epoch": 1.9176185205247167, - "grad_norm": 0.6523311138153076, - "learning_rate": 8.696143965186587e-08, - "loss": 0.2062, - "step": 20356 - }, - { - "epoch": 1.9177127246178847, - "grad_norm": 0.6325419545173645, - "learning_rate": 8.676284515932166e-08, - "loss": 0.177, - "step": 20357 - }, - { - "epoch": 1.9178069287110526, - "grad_norm": 0.6407454609870911, - "learning_rate": 8.656447670391755e-08, - "loss": 0.2011, - "step": 20358 - }, - { - "epoch": 1.9179011328042204, - "grad_norm": 0.6525038480758667, - "learning_rate": 8.636633429017438e-08, - "loss": 0.2089, - "step": 20359 - }, - { - "epoch": 1.917995336897388, - "grad_norm": 0.6729236245155334, - "learning_rate": 8.616841792260966e-08, - "loss": 0.1705, - "step": 20360 - }, - { - "epoch": 1.918089540990556, - "grad_norm": 0.6049158573150635, - "learning_rate": 8.597072760573754e-08, - "loss": 0.185, - "step": 20361 - }, - { - "epoch": 1.918183745083724, - "grad_norm": 0.6646718382835388, - "learning_rate": 8.577326334406555e-08, - "loss": 0.1724, - "step": 20362 - }, - { - "epoch": 1.9182779491768918, - "grad_norm": 0.6387299299240112, - "learning_rate": 8.557602514209562e-08, - "loss": 0.1698, - "step": 20363 - }, - { - "epoch": 1.9183721532700595, - "grad_norm": 0.6570069789886475, - "learning_rate": 8.537901300432528e-08, - "loss": 0.2044, - "step": 20364 - }, - { - "epoch": 1.9184663573632275, - "grad_norm": 0.651914119720459, - "learning_rate": 8.518222693524647e-08, - "loss": 0.1818, - "step": 20365 - }, - { - "epoch": 1.9185605614563954, - "grad_norm": 0.7072866559028625, - "learning_rate": 8.498566693934563e-08, - "loss": 0.1767, - "step": 20366 - }, - { - "epoch": 1.9186547655495632, - "grad_norm": 0.5826490521430969, - "learning_rate": 8.478933302110692e-08, - "loss": 0.1895, - "step": 20367 - }, - { - "epoch": 1.918748969642731, - "grad_norm": 0.6720185875892639, - "learning_rate": 8.459322518500568e-08, - "loss": 0.1924, - "step": 20368 - }, - { - "epoch": 1.9188431737358989, - "grad_norm": 0.6119192242622375, - "learning_rate": 8.439734343551276e-08, - "loss": 0.178, - "step": 20369 - }, - { - "epoch": 1.9189373778290668, - "grad_norm": 0.6786589026451111, - "learning_rate": 8.420168777709459e-08, - "loss": 0.1727, - "step": 20370 - }, - { - "epoch": 1.9190315819222346, - "grad_norm": 0.6455722451210022, - "learning_rate": 8.400625821421426e-08, - "loss": 0.1887, - "step": 20371 - }, - { - "epoch": 1.9191257860154023, - "grad_norm": 0.721203625202179, - "learning_rate": 8.38110547513249e-08, - "loss": 0.2199, - "step": 20372 - }, - { - "epoch": 1.9192199901085703, - "grad_norm": 0.6263687014579773, - "learning_rate": 8.36160773928807e-08, - "loss": 0.1862, - "step": 20373 - }, - { - "epoch": 1.9193141942017382, - "grad_norm": 0.6503720283508301, - "learning_rate": 8.342132614332587e-08, - "loss": 0.1976, - "step": 20374 - }, - { - "epoch": 1.919408398294906, - "grad_norm": 0.6442580223083496, - "learning_rate": 8.322680100710023e-08, - "loss": 0.1629, - "step": 20375 - }, - { - "epoch": 1.9195026023880737, - "grad_norm": 0.6746324896812439, - "learning_rate": 8.303250198864021e-08, - "loss": 0.2157, - "step": 20376 - }, - { - "epoch": 1.9195968064812416, - "grad_norm": 0.7027804255485535, - "learning_rate": 8.283842909237671e-08, - "loss": 0.1923, - "step": 20377 - }, - { - "epoch": 1.9196910105744096, - "grad_norm": 0.7159023284912109, - "learning_rate": 8.264458232273398e-08, - "loss": 0.2157, - "step": 20378 - }, - { - "epoch": 1.9197852146675773, - "grad_norm": 0.6947713494300842, - "learning_rate": 8.24509616841318e-08, - "loss": 0.2016, - "step": 20379 - }, - { - "epoch": 1.919879418760745, - "grad_norm": 0.694394052028656, - "learning_rate": 8.225756718098554e-08, - "loss": 0.2118, - "step": 20380 - }, - { - "epoch": 1.919973622853913, - "grad_norm": 0.7258616089820862, - "learning_rate": 8.206439881770611e-08, - "loss": 0.204, - "step": 20381 - }, - { - "epoch": 1.920067826947081, - "grad_norm": 0.6162565350532532, - "learning_rate": 8.187145659869445e-08, - "loss": 0.1816, - "step": 20382 - }, - { - "epoch": 1.9201620310402487, - "grad_norm": 0.7104194164276123, - "learning_rate": 8.167874052835367e-08, - "loss": 0.1873, - "step": 20383 - }, - { - "epoch": 1.9202562351334165, - "grad_norm": 0.699333667755127, - "learning_rate": 8.148625061107695e-08, - "loss": 0.2319, - "step": 20384 - }, - { - "epoch": 1.9203504392265844, - "grad_norm": 0.629817008972168, - "learning_rate": 8.1293986851253e-08, - "loss": 0.1998, - "step": 20385 - }, - { - "epoch": 1.9204446433197524, - "grad_norm": 0.7063592672348022, - "learning_rate": 8.110194925326498e-08, - "loss": 0.1816, - "step": 20386 - }, - { - "epoch": 1.9205388474129201, - "grad_norm": 0.6157312989234924, - "learning_rate": 8.09101378214927e-08, - "loss": 0.1853, - "step": 20387 - }, - { - "epoch": 1.9206330515060879, - "grad_norm": 0.6738056540489197, - "learning_rate": 8.071855256030936e-08, - "loss": 0.2251, - "step": 20388 - }, - { - "epoch": 1.9207272555992558, - "grad_norm": 0.636600136756897, - "learning_rate": 8.052719347408367e-08, - "loss": 0.1744, - "step": 20389 - }, - { - "epoch": 1.9208214596924238, - "grad_norm": 0.6762627363204956, - "learning_rate": 8.03360605671788e-08, - "loss": 0.191, - "step": 20390 - }, - { - "epoch": 1.9209156637855915, - "grad_norm": 0.6872976422309875, - "learning_rate": 8.014515384395239e-08, - "loss": 0.1777, - "step": 20391 - }, - { - "epoch": 1.9210098678787593, - "grad_norm": 0.6908997893333435, - "learning_rate": 7.99544733087576e-08, - "loss": 0.1986, - "step": 20392 - }, - { - "epoch": 1.9211040719719272, - "grad_norm": 0.6895566582679749, - "learning_rate": 7.976401896594322e-08, - "loss": 0.1888, - "step": 20393 - }, - { - "epoch": 1.9211982760650952, - "grad_norm": 0.5949784517288208, - "learning_rate": 7.957379081985017e-08, - "loss": 0.1556, - "step": 20394 - }, - { - "epoch": 1.921292480158263, - "grad_norm": 0.6485186219215393, - "learning_rate": 7.938378887481834e-08, - "loss": 0.1813, - "step": 20395 - }, - { - "epoch": 1.9213866842514307, - "grad_norm": 0.6753706336021423, - "learning_rate": 7.91940131351776e-08, - "loss": 0.1891, - "step": 20396 - }, - { - "epoch": 1.9214808883445986, - "grad_norm": 0.73382967710495, - "learning_rate": 7.90044636052556e-08, - "loss": 0.219, - "step": 20397 - }, - { - "epoch": 1.9215750924377666, - "grad_norm": 0.6741048097610474, - "learning_rate": 7.881514028937664e-08, - "loss": 0.158, - "step": 20398 - }, - { - "epoch": 1.9216692965309343, - "grad_norm": 0.7196323871612549, - "learning_rate": 7.862604319185507e-08, - "loss": 0.2049, - "step": 20399 - }, - { - "epoch": 1.921763500624102, - "grad_norm": 0.6244286298751831, - "learning_rate": 7.843717231700299e-08, - "loss": 0.194, - "step": 20400 - }, - { - "epoch": 1.92185770471727, - "grad_norm": 0.7138739228248596, - "learning_rate": 7.824852766912806e-08, - "loss": 0.1892, - "step": 20401 - }, - { - "epoch": 1.9219519088104378, - "grad_norm": 0.6458199620246887, - "learning_rate": 7.806010925253016e-08, - "loss": 0.2042, - "step": 20402 - }, - { - "epoch": 1.9220461129036055, - "grad_norm": 0.6492854952812195, - "learning_rate": 7.787191707150699e-08, - "loss": 0.2186, - "step": 20403 - }, - { - "epoch": 1.9221403169967735, - "grad_norm": 0.6758908033370972, - "learning_rate": 7.768395113034955e-08, - "loss": 0.1919, - "step": 20404 - }, - { - "epoch": 1.9222345210899414, - "grad_norm": 0.7675185799598694, - "learning_rate": 7.749621143334218e-08, - "loss": 0.2034, - "step": 20405 - }, - { - "epoch": 1.9223287251831092, - "grad_norm": 0.6330274939537048, - "learning_rate": 7.730869798476704e-08, - "loss": 0.18, - "step": 20406 - }, - { - "epoch": 1.922422929276277, - "grad_norm": 0.7554860711097717, - "learning_rate": 7.712141078889957e-08, - "loss": 0.1995, - "step": 20407 - }, - { - "epoch": 1.9225171333694449, - "grad_norm": 0.7081100940704346, - "learning_rate": 7.693434985000969e-08, - "loss": 0.1805, - "step": 20408 - }, - { - "epoch": 1.9226113374626128, - "grad_norm": 0.6767376661300659, - "learning_rate": 7.67475151723629e-08, - "loss": 0.2089, - "step": 20409 - }, - { - "epoch": 1.9227055415557806, - "grad_norm": 0.7426806092262268, - "learning_rate": 7.65609067602191e-08, - "loss": 0.1946, - "step": 20410 - }, - { - "epoch": 1.9227997456489483, - "grad_norm": 0.6279316544532776, - "learning_rate": 7.637452461783379e-08, - "loss": 0.1672, - "step": 20411 - }, - { - "epoch": 1.9228939497421162, - "grad_norm": 0.6278228163719177, - "learning_rate": 7.618836874945579e-08, - "loss": 0.1907, - "step": 20412 - }, - { - "epoch": 1.9229881538352842, - "grad_norm": 0.7160715460777283, - "learning_rate": 7.60024391593317e-08, - "loss": 0.2292, - "step": 20413 - }, - { - "epoch": 1.923082357928452, - "grad_norm": 0.6022506356239319, - "learning_rate": 7.581673585169924e-08, - "loss": 0.1743, - "step": 20414 - }, - { - "epoch": 1.9231765620216197, - "grad_norm": 0.630576491355896, - "learning_rate": 7.56312588307928e-08, - "loss": 0.1912, - "step": 20415 - }, - { - "epoch": 1.9232707661147876, - "grad_norm": 0.6564637422561646, - "learning_rate": 7.544600810084234e-08, - "loss": 0.1843, - "step": 20416 - }, - { - "epoch": 1.9233649702079556, - "grad_norm": 0.6240977644920349, - "learning_rate": 7.526098366607227e-08, - "loss": 0.1947, - "step": 20417 - }, - { - "epoch": 1.9234591743011233, - "grad_norm": 0.6668100953102112, - "learning_rate": 7.507618553069918e-08, - "loss": 0.1758, - "step": 20418 - }, - { - "epoch": 1.923553378394291, - "grad_norm": 0.6493514180183411, - "learning_rate": 7.489161369893971e-08, - "loss": 0.1886, - "step": 20419 - }, - { - "epoch": 1.923647582487459, - "grad_norm": 0.6922727823257446, - "learning_rate": 7.470726817500052e-08, - "loss": 0.2163, - "step": 20420 - }, - { - "epoch": 1.923741786580627, - "grad_norm": 0.6386302709579468, - "learning_rate": 7.452314896308488e-08, - "loss": 0.1744, - "step": 20421 - }, - { - "epoch": 1.9238359906737947, - "grad_norm": 0.6456319689750671, - "learning_rate": 7.433925606739168e-08, - "loss": 0.1942, - "step": 20422 - }, - { - "epoch": 1.9239301947669625, - "grad_norm": 0.655189573764801, - "learning_rate": 7.41555894921131e-08, - "loss": 0.2088, - "step": 20423 - }, - { - "epoch": 1.9240243988601304, - "grad_norm": 0.7557071447372437, - "learning_rate": 7.397214924143914e-08, - "loss": 0.2351, - "step": 20424 - }, - { - "epoch": 1.9241186029532984, - "grad_norm": 0.6467447876930237, - "learning_rate": 7.378893531954979e-08, - "loss": 0.1937, - "step": 20425 - }, - { - "epoch": 1.9242128070464661, - "grad_norm": 0.6445953845977783, - "learning_rate": 7.360594773062502e-08, - "loss": 0.1799, - "step": 20426 - }, - { - "epoch": 1.9243070111396339, - "grad_norm": 0.7056514620780945, - "learning_rate": 7.342318647883595e-08, - "loss": 0.1942, - "step": 20427 - }, - { - "epoch": 1.9244012152328018, - "grad_norm": 0.6664270162582397, - "learning_rate": 7.324065156834925e-08, - "loss": 0.1935, - "step": 20428 - }, - { - "epoch": 1.9244954193259698, - "grad_norm": 0.6955601572990417, - "learning_rate": 7.305834300332715e-08, - "loss": 0.1971, - "step": 20429 - }, - { - "epoch": 1.9245896234191375, - "grad_norm": 0.6699842214584351, - "learning_rate": 7.287626078792854e-08, - "loss": 0.1635, - "step": 20430 - }, - { - "epoch": 1.9246838275123053, - "grad_norm": 0.6588364839553833, - "learning_rate": 7.269440492630342e-08, - "loss": 0.1974, - "step": 20431 - }, - { - "epoch": 1.9247780316054732, - "grad_norm": 0.629204273223877, - "learning_rate": 7.251277542259849e-08, - "loss": 0.1776, - "step": 20432 - }, - { - "epoch": 1.9248722356986412, - "grad_norm": 0.6657838225364685, - "learning_rate": 7.233137228095599e-08, - "loss": 0.1848, - "step": 20433 - }, - { - "epoch": 1.924966439791809, - "grad_norm": 0.6298301219940186, - "learning_rate": 7.215019550551039e-08, - "loss": 0.1993, - "step": 20434 - }, - { - "epoch": 1.9250606438849767, - "grad_norm": 0.982568085193634, - "learning_rate": 7.196924510039505e-08, - "loss": 0.1842, - "step": 20435 - }, - { - "epoch": 1.9251548479781446, - "grad_norm": 0.7430713772773743, - "learning_rate": 7.178852106973444e-08, - "loss": 0.2153, - "step": 20436 - }, - { - "epoch": 1.9252490520713126, - "grad_norm": 0.6544826030731201, - "learning_rate": 7.160802341765083e-08, - "loss": 0.1877, - "step": 20437 - }, - { - "epoch": 1.9253432561644803, - "grad_norm": 0.8141215443611145, - "learning_rate": 7.142775214825759e-08, - "loss": 0.1938, - "step": 20438 - }, - { - "epoch": 1.925437460257648, - "grad_norm": 0.6867548823356628, - "learning_rate": 7.124770726566699e-08, - "loss": 0.2055, - "step": 20439 - }, - { - "epoch": 1.925531664350816, - "grad_norm": 0.7175918817520142, - "learning_rate": 7.106788877398352e-08, - "loss": 0.1829, - "step": 20440 - }, - { - "epoch": 1.925625868443984, - "grad_norm": 0.6427897810935974, - "learning_rate": 7.088829667730834e-08, - "loss": 0.2073, - "step": 20441 - }, - { - "epoch": 1.9257200725371517, - "grad_norm": 0.6684300303459167, - "learning_rate": 7.070893097973486e-08, - "loss": 0.2149, - "step": 20442 - }, - { - "epoch": 1.9258142766303195, - "grad_norm": 0.6904497742652893, - "learning_rate": 7.052979168535312e-08, - "loss": 0.19, - "step": 20443 - }, - { - "epoch": 1.9259084807234874, - "grad_norm": 0.657542884349823, - "learning_rate": 7.035087879824987e-08, - "loss": 0.211, - "step": 20444 - }, - { - "epoch": 1.9260026848166554, - "grad_norm": 0.7140212059020996, - "learning_rate": 7.017219232250295e-08, - "loss": 0.2022, - "step": 20445 - }, - { - "epoch": 1.9260968889098231, - "grad_norm": 0.6623939871788025, - "learning_rate": 6.999373226218575e-08, - "loss": 0.1808, - "step": 20446 - }, - { - "epoch": 1.9261910930029909, - "grad_norm": 0.6854307055473328, - "learning_rate": 6.981549862136839e-08, - "loss": 0.2012, - "step": 20447 - }, - { - "epoch": 1.9262852970961588, - "grad_norm": 0.668220043182373, - "learning_rate": 6.963749140411647e-08, - "loss": 0.1721, - "step": 20448 - }, - { - "epoch": 1.9263795011893268, - "grad_norm": 0.6678497195243835, - "learning_rate": 6.945971061448676e-08, - "loss": 0.1829, - "step": 20449 - }, - { - "epoch": 1.9264737052824945, - "grad_norm": 0.6869219541549683, - "learning_rate": 6.928215625653267e-08, - "loss": 0.206, - "step": 20450 - }, - { - "epoch": 1.9265679093756622, - "grad_norm": 0.7499255537986755, - "learning_rate": 6.910482833430432e-08, - "loss": 0.2119, - "step": 20451 - }, - { - "epoch": 1.9266621134688302, - "grad_norm": 0.667402982711792, - "learning_rate": 6.892772685184401e-08, - "loss": 0.1866, - "step": 20452 - }, - { - "epoch": 1.9267563175619982, - "grad_norm": 0.6190139651298523, - "learning_rate": 6.875085181318963e-08, - "loss": 0.1795, - "step": 20453 - }, - { - "epoch": 1.926850521655166, - "grad_norm": 0.7362930178642273, - "learning_rate": 6.857420322237574e-08, - "loss": 0.1715, - "step": 20454 - }, - { - "epoch": 1.9269447257483336, - "grad_norm": 0.6166073083877563, - "learning_rate": 6.83977810834291e-08, - "loss": 0.1835, - "step": 20455 - }, - { - "epoch": 1.9270389298415016, - "grad_norm": 0.7105567455291748, - "learning_rate": 6.822158540037204e-08, - "loss": 0.1931, - "step": 20456 - }, - { - "epoch": 1.9271331339346696, - "grad_norm": 0.6946137547492981, - "learning_rate": 6.80456161772236e-08, - "loss": 0.1973, - "step": 20457 - }, - { - "epoch": 1.9272273380278373, - "grad_norm": 0.6640714406967163, - "learning_rate": 6.7869873417995e-08, - "loss": 0.1764, - "step": 20458 - }, - { - "epoch": 1.927321542121005, - "grad_norm": 0.6363579630851746, - "learning_rate": 6.769435712669303e-08, - "loss": 0.2156, - "step": 20459 - }, - { - "epoch": 1.927415746214173, - "grad_norm": 0.6204269528388977, - "learning_rate": 6.751906730732116e-08, - "loss": 0.1818, - "step": 20460 - }, - { - "epoch": 1.927509950307341, - "grad_norm": 0.6275210380554199, - "learning_rate": 6.734400396387508e-08, - "loss": 0.1657, - "step": 20461 - }, - { - "epoch": 1.9276041544005087, - "grad_norm": 0.7040166258811951, - "learning_rate": 6.716916710034604e-08, - "loss": 0.2073, - "step": 20462 - }, - { - "epoch": 1.9276983584936764, - "grad_norm": 0.6579363346099854, - "learning_rate": 6.699455672072197e-08, - "loss": 0.1735, - "step": 20463 - }, - { - "epoch": 1.9277925625868444, - "grad_norm": 0.6550668478012085, - "learning_rate": 6.682017282898412e-08, - "loss": 0.183, - "step": 20464 - }, - { - "epoch": 1.9278867666800124, - "grad_norm": 0.6513266563415527, - "learning_rate": 6.664601542910709e-08, - "loss": 0.2005, - "step": 20465 - }, - { - "epoch": 1.92798097077318, - "grad_norm": 0.6602237224578857, - "learning_rate": 6.647208452506437e-08, - "loss": 0.206, - "step": 20466 - }, - { - "epoch": 1.9280751748663478, - "grad_norm": 0.6728419065475464, - "learning_rate": 6.629838012082057e-08, - "loss": 0.2028, - "step": 20467 - }, - { - "epoch": 1.9281693789595158, - "grad_norm": 0.6689519882202148, - "learning_rate": 6.612490222033585e-08, - "loss": 0.1919, - "step": 20468 - }, - { - "epoch": 1.9282635830526837, - "grad_norm": 0.675183892250061, - "learning_rate": 6.595165082756594e-08, - "loss": 0.1951, - "step": 20469 - }, - { - "epoch": 1.9283577871458515, - "grad_norm": 0.6705566644668579, - "learning_rate": 6.577862594646323e-08, - "loss": 0.2098, - "step": 20470 - }, - { - "epoch": 1.9284519912390192, - "grad_norm": 0.6774200201034546, - "learning_rate": 6.560582758097011e-08, - "loss": 0.2222, - "step": 20471 - }, - { - "epoch": 1.9285461953321872, - "grad_norm": 0.7202482223510742, - "learning_rate": 6.54332557350279e-08, - "loss": 0.2014, - "step": 20472 - }, - { - "epoch": 1.9286403994253551, - "grad_norm": 0.6623703241348267, - "learning_rate": 6.526091041257231e-08, - "loss": 0.2471, - "step": 20473 - }, - { - "epoch": 1.9287346035185229, - "grad_norm": 0.7550646662712097, - "learning_rate": 6.508879161753246e-08, - "loss": 0.2039, - "step": 20474 - }, - { - "epoch": 1.9288288076116906, - "grad_norm": 0.6356313824653625, - "learning_rate": 6.491689935383294e-08, - "loss": 0.1916, - "step": 20475 - }, - { - "epoch": 1.9289230117048586, - "grad_norm": 0.5859925746917725, - "learning_rate": 6.474523362539175e-08, - "loss": 0.1876, - "step": 20476 - }, - { - "epoch": 1.9290172157980265, - "grad_norm": 0.604534924030304, - "learning_rate": 6.457379443612467e-08, - "loss": 0.1855, - "step": 20477 - }, - { - "epoch": 1.9291114198911943, - "grad_norm": 0.6435129046440125, - "learning_rate": 6.440258178994185e-08, - "loss": 0.1972, - "step": 20478 - }, - { - "epoch": 1.929205623984362, - "grad_norm": 0.6097612380981445, - "learning_rate": 6.423159569074466e-08, - "loss": 0.1838, - "step": 20479 - }, - { - "epoch": 1.92929982807753, - "grad_norm": 0.8696154952049255, - "learning_rate": 6.40608361424333e-08, - "loss": 0.2129, - "step": 20480 - }, - { - "epoch": 1.929394032170698, - "grad_norm": 0.669417679309845, - "learning_rate": 6.389030314890132e-08, - "loss": 0.1839, - "step": 20481 - }, - { - "epoch": 1.9294882362638657, - "grad_norm": 0.6988387107849121, - "learning_rate": 6.371999671403672e-08, - "loss": 0.2303, - "step": 20482 - }, - { - "epoch": 1.9295824403570334, - "grad_norm": 0.6801834106445312, - "learning_rate": 6.354991684172307e-08, - "loss": 0.2104, - "step": 20483 - }, - { - "epoch": 1.9296766444502014, - "grad_norm": 0.6566222906112671, - "learning_rate": 6.338006353583837e-08, - "loss": 0.1936, - "step": 20484 - }, - { - "epoch": 1.9297708485433693, - "grad_norm": 0.6295554041862488, - "learning_rate": 6.321043680025506e-08, - "loss": 0.1843, - "step": 20485 - }, - { - "epoch": 1.929865052636537, - "grad_norm": 0.6945067048072815, - "learning_rate": 6.304103663884231e-08, - "loss": 0.1984, - "step": 20486 - }, - { - "epoch": 1.9299592567297048, - "grad_norm": 0.6206005811691284, - "learning_rate": 6.287186305546033e-08, - "loss": 0.1999, - "step": 20487 - }, - { - "epoch": 1.9300534608228728, - "grad_norm": 0.6710313558578491, - "learning_rate": 6.270291605396938e-08, - "loss": 0.1945, - "step": 20488 - }, - { - "epoch": 1.9301476649160407, - "grad_norm": 0.666602611541748, - "learning_rate": 6.253419563821971e-08, - "loss": 0.1935, - "step": 20489 - }, - { - "epoch": 1.9302418690092085, - "grad_norm": 0.6301133632659912, - "learning_rate": 6.236570181205826e-08, - "loss": 0.1839, - "step": 20490 - }, - { - "epoch": 1.9303360731023762, - "grad_norm": 0.5872913002967834, - "learning_rate": 6.219743457932859e-08, - "loss": 0.1781, - "step": 20491 - }, - { - "epoch": 1.9304302771955442, - "grad_norm": 0.7004070281982422, - "learning_rate": 6.202939394386542e-08, - "loss": 0.2036, - "step": 20492 - }, - { - "epoch": 1.9305244812887121, - "grad_norm": 0.657778799533844, - "learning_rate": 6.186157990950236e-08, - "loss": 0.1629, - "step": 20493 - }, - { - "epoch": 1.9306186853818799, - "grad_norm": 0.7054680585861206, - "learning_rate": 6.169399248006525e-08, - "loss": 0.2054, - "step": 20494 - }, - { - "epoch": 1.9307128894750476, - "grad_norm": 0.7021805644035339, - "learning_rate": 6.152663165937434e-08, - "loss": 0.1837, - "step": 20495 - }, - { - "epoch": 1.9308070935682156, - "grad_norm": 0.6459454894065857, - "learning_rate": 6.135949745124659e-08, - "loss": 0.1786, - "step": 20496 - }, - { - "epoch": 1.9309012976613835, - "grad_norm": 0.6974847912788391, - "learning_rate": 6.119258985949227e-08, - "loss": 0.1964, - "step": 20497 - }, - { - "epoch": 1.9309955017545513, - "grad_norm": 0.6011584997177124, - "learning_rate": 6.102590888791837e-08, - "loss": 0.1849, - "step": 20498 - }, - { - "epoch": 1.931089705847719, - "grad_norm": 0.6021872758865356, - "learning_rate": 6.085945454032405e-08, - "loss": 0.1708, - "step": 20499 - }, - { - "epoch": 1.931183909940887, - "grad_norm": 0.649183452129364, - "learning_rate": 6.069322682050516e-08, - "loss": 0.1923, - "step": 20500 - }, - { - "epoch": 1.931278114034055, - "grad_norm": 0.6922840476036072, - "learning_rate": 6.052722573225312e-08, - "loss": 0.18, - "step": 20501 - }, - { - "epoch": 1.9313723181272227, - "grad_norm": 0.6575815677642822, - "learning_rate": 6.036145127935045e-08, - "loss": 0.2021, - "step": 20502 - }, - { - "epoch": 1.9314665222203904, - "grad_norm": 0.6628938317298889, - "learning_rate": 6.019590346557969e-08, - "loss": 0.1705, - "step": 20503 - }, - { - "epoch": 1.9315607263135584, - "grad_norm": 0.6496869921684265, - "learning_rate": 6.003058229471448e-08, - "loss": 0.1946, - "step": 20504 - }, - { - "epoch": 1.9316549304067263, - "grad_norm": 0.6441403031349182, - "learning_rate": 5.986548777052514e-08, - "loss": 0.183, - "step": 20505 - }, - { - "epoch": 1.931749134499894, - "grad_norm": 0.6747954487800598, - "learning_rate": 5.970061989677422e-08, - "loss": 0.1668, - "step": 20506 - }, - { - "epoch": 1.9318433385930618, - "grad_norm": 0.6658079028129578, - "learning_rate": 5.953597867722316e-08, - "loss": 0.2094, - "step": 20507 - }, - { - "epoch": 1.9319375426862297, - "grad_norm": 0.6353333592414856, - "learning_rate": 5.937156411562339e-08, - "loss": 0.1758, - "step": 20508 - }, - { - "epoch": 1.9320317467793977, - "grad_norm": 0.6521235108375549, - "learning_rate": 5.9207376215726364e-08, - "loss": 0.1817, - "step": 20509 - }, - { - "epoch": 1.9321259508725654, - "grad_norm": 0.6600897908210754, - "learning_rate": 5.9043414981275746e-08, - "loss": 0.2001, - "step": 20510 - }, - { - "epoch": 1.9322201549657332, - "grad_norm": 0.7067880630493164, - "learning_rate": 5.8879680416007446e-08, - "loss": 0.2525, - "step": 20511 - }, - { - "epoch": 1.9323143590589011, - "grad_norm": 0.6741988658905029, - "learning_rate": 5.871617252365847e-08, - "loss": 0.2186, - "step": 20512 - }, - { - "epoch": 1.932408563152069, - "grad_norm": 0.6814460158348083, - "learning_rate": 5.855289130795472e-08, - "loss": 0.1737, - "step": 20513 - }, - { - "epoch": 1.9325027672452368, - "grad_norm": 0.6088449954986572, - "learning_rate": 5.838983677261878e-08, - "loss": 0.1628, - "step": 20514 - }, - { - "epoch": 1.9325969713384046, - "grad_norm": 0.6774396896362305, - "learning_rate": 5.8227008921371006e-08, - "loss": 0.1886, - "step": 20515 - }, - { - "epoch": 1.9326911754315725, - "grad_norm": 0.6780672073364258, - "learning_rate": 5.806440775792177e-08, - "loss": 0.1772, - "step": 20516 - }, - { - "epoch": 1.9327853795247405, - "grad_norm": 0.6692124605178833, - "learning_rate": 5.7902033285979206e-08, - "loss": 0.2049, - "step": 20517 - }, - { - "epoch": 1.9328795836179082, - "grad_norm": 0.6149314045906067, - "learning_rate": 5.773988550924703e-08, - "loss": 0.1849, - "step": 20518 - }, - { - "epoch": 1.932973787711076, - "grad_norm": 0.695295512676239, - "learning_rate": 5.757796443142116e-08, - "loss": 0.2055, - "step": 20519 - }, - { - "epoch": 1.933067991804244, - "grad_norm": 0.7034395933151245, - "learning_rate": 5.74162700561931e-08, - "loss": 0.1683, - "step": 20520 - }, - { - "epoch": 1.933162195897412, - "grad_norm": 0.667819619178772, - "learning_rate": 5.7254802387252116e-08, - "loss": 0.1928, - "step": 20521 - }, - { - "epoch": 1.9332563999905796, - "grad_norm": 0.6549688577651978, - "learning_rate": 5.709356142827749e-08, - "loss": 0.1894, - "step": 20522 - }, - { - "epoch": 1.9333506040837474, - "grad_norm": 0.6827957630157471, - "learning_rate": 5.6932547182946274e-08, - "loss": 0.2203, - "step": 20523 - }, - { - "epoch": 1.9334448081769153, - "grad_norm": 0.6484357118606567, - "learning_rate": 5.677175965493109e-08, - "loss": 0.1796, - "step": 20524 - }, - { - "epoch": 1.9335390122700833, - "grad_norm": 0.6589913368225098, - "learning_rate": 5.661119884789679e-08, - "loss": 0.2064, - "step": 20525 - }, - { - "epoch": 1.933633216363251, - "grad_norm": 0.7631707191467285, - "learning_rate": 5.645086476550488e-08, - "loss": 0.2103, - "step": 20526 - }, - { - "epoch": 1.9337274204564188, - "grad_norm": 0.6485490202903748, - "learning_rate": 5.6290757411411324e-08, - "loss": 0.1945, - "step": 20527 - }, - { - "epoch": 1.9338216245495867, - "grad_norm": 0.6263374090194702, - "learning_rate": 5.613087678926765e-08, - "loss": 0.176, - "step": 20528 - }, - { - "epoch": 1.9339158286427547, - "grad_norm": 0.6242257952690125, - "learning_rate": 5.5971222902716506e-08, - "loss": 0.1901, - "step": 20529 - }, - { - "epoch": 1.9340100327359224, - "grad_norm": 0.652399480342865, - "learning_rate": 5.581179575540163e-08, - "loss": 0.1974, - "step": 20530 - }, - { - "epoch": 1.9341042368290902, - "grad_norm": 0.6437662243843079, - "learning_rate": 5.5652595350956795e-08, - "loss": 0.1727, - "step": 20531 - }, - { - "epoch": 1.9341984409222581, - "grad_norm": 0.587958812713623, - "learning_rate": 5.549362169301131e-08, - "loss": 0.1661, - "step": 20532 - }, - { - "epoch": 1.934292645015426, - "grad_norm": 0.6266226172447205, - "learning_rate": 5.533487478519117e-08, - "loss": 0.1831, - "step": 20533 - }, - { - "epoch": 1.9343868491085938, - "grad_norm": 0.7345966100692749, - "learning_rate": 5.5176354631115703e-08, - "loss": 0.1631, - "step": 20534 - }, - { - "epoch": 1.9344810532017616, - "grad_norm": 0.6884997487068176, - "learning_rate": 5.501806123439868e-08, - "loss": 0.182, - "step": 20535 - }, - { - "epoch": 1.9345752572949295, - "grad_norm": 0.6600018739700317, - "learning_rate": 5.485999459865055e-08, - "loss": 0.2295, - "step": 20536 - }, - { - "epoch": 1.9346694613880975, - "grad_norm": 0.6569324135780334, - "learning_rate": 5.47021547274762e-08, - "loss": 0.1773, - "step": 20537 - }, - { - "epoch": 1.9347636654812652, - "grad_norm": 0.7247839570045471, - "learning_rate": 5.4544541624471645e-08, - "loss": 0.2064, - "step": 20538 - }, - { - "epoch": 1.934857869574433, - "grad_norm": 0.6159790754318237, - "learning_rate": 5.438715529323291e-08, - "loss": 0.1731, - "step": 20539 - }, - { - "epoch": 1.934952073667601, - "grad_norm": 0.6411365270614624, - "learning_rate": 5.422999573734822e-08, - "loss": 0.183, - "step": 20540 - }, - { - "epoch": 1.9350462777607687, - "grad_norm": 0.7017724514007568, - "learning_rate": 5.4073062960401376e-08, - "loss": 0.1574, - "step": 20541 - }, - { - "epoch": 1.9351404818539364, - "grad_norm": 0.7132136821746826, - "learning_rate": 5.391635696596953e-08, - "loss": 0.2154, - "step": 20542 - }, - { - "epoch": 1.9352346859471043, - "grad_norm": 0.6471320390701294, - "learning_rate": 5.3759877757628696e-08, - "loss": 0.1964, - "step": 20543 - }, - { - "epoch": 1.9353288900402723, - "grad_norm": 0.6615820527076721, - "learning_rate": 5.36036253389427e-08, - "loss": 0.1993, - "step": 20544 - }, - { - "epoch": 1.93542309413344, - "grad_norm": 0.6011489033699036, - "learning_rate": 5.3447599713477574e-08, - "loss": 0.1711, - "step": 20545 - }, - { - "epoch": 1.9355172982266078, - "grad_norm": 0.6503063440322876, - "learning_rate": 5.3291800884789356e-08, - "loss": 0.2259, - "step": 20546 - }, - { - "epoch": 1.9356115023197757, - "grad_norm": 0.7811630964279175, - "learning_rate": 5.313622885643077e-08, - "loss": 0.2019, - "step": 20547 - }, - { - "epoch": 1.9357057064129437, - "grad_norm": 0.6372681856155396, - "learning_rate": 5.298088363195009e-08, - "loss": 0.1767, - "step": 20548 - }, - { - "epoch": 1.9357999105061114, - "grad_norm": 0.6439894437789917, - "learning_rate": 5.2825765214887806e-08, - "loss": 0.1816, - "step": 20549 - }, - { - "epoch": 1.9358941145992792, - "grad_norm": 0.6761665344238281, - "learning_rate": 5.2670873608782205e-08, - "loss": 0.1906, - "step": 20550 - }, - { - "epoch": 1.9359883186924471, - "grad_norm": 0.6620761156082153, - "learning_rate": 5.251620881716379e-08, - "loss": 0.1579, - "step": 20551 - }, - { - "epoch": 1.936082522785615, - "grad_norm": 0.7432847619056702, - "learning_rate": 5.236177084356087e-08, - "loss": 0.2337, - "step": 20552 - }, - { - "epoch": 1.9361767268787828, - "grad_norm": 0.6455764770507812, - "learning_rate": 5.2207559691492825e-08, - "loss": 0.1861, - "step": 20553 - }, - { - "epoch": 1.9362709309719506, - "grad_norm": 0.6577297449111938, - "learning_rate": 5.205357536447797e-08, - "loss": 0.188, - "step": 20554 - }, - { - "epoch": 1.9363651350651185, - "grad_norm": 0.6162768006324768, - "learning_rate": 5.1899817866025715e-08, - "loss": 0.193, - "step": 20555 - }, - { - "epoch": 1.9364593391582865, - "grad_norm": 0.7234073877334595, - "learning_rate": 5.174628719964325e-08, - "loss": 0.2097, - "step": 20556 - }, - { - "epoch": 1.9365535432514542, - "grad_norm": 0.59145188331604, - "learning_rate": 5.159298336883001e-08, - "loss": 0.1879, - "step": 20557 - }, - { - "epoch": 1.936647747344622, - "grad_norm": 0.6481671929359436, - "learning_rate": 5.143990637708318e-08, - "loss": 0.1887, - "step": 20558 - }, - { - "epoch": 1.93674195143779, - "grad_norm": 0.7127054929733276, - "learning_rate": 5.12870562278911e-08, - "loss": 0.2034, - "step": 20559 - }, - { - "epoch": 1.936836155530958, - "grad_norm": 0.7050976753234863, - "learning_rate": 5.113443292474096e-08, - "loss": 0.231, - "step": 20560 - }, - { - "epoch": 1.9369303596241256, - "grad_norm": 0.6505662798881531, - "learning_rate": 5.098203647111111e-08, - "loss": 0.185, - "step": 20561 - }, - { - "epoch": 1.9370245637172934, - "grad_norm": 0.6301153898239136, - "learning_rate": 5.082986687047764e-08, - "loss": 0.194, - "step": 20562 - }, - { - "epoch": 1.9371187678104613, - "grad_norm": 0.6156437397003174, - "learning_rate": 5.0677924126311116e-08, - "loss": 0.1698, - "step": 20563 - }, - { - "epoch": 1.9372129719036293, - "grad_norm": 0.6495509743690491, - "learning_rate": 5.0526208242073214e-08, - "loss": 0.1731, - "step": 20564 - }, - { - "epoch": 1.937307175996797, - "grad_norm": 0.6313520073890686, - "learning_rate": 5.037471922122561e-08, - "loss": 0.1806, - "step": 20565 - }, - { - "epoch": 1.9374013800899648, - "grad_norm": 0.6380580067634583, - "learning_rate": 5.0223457067222205e-08, - "loss": 0.1929, - "step": 20566 - }, - { - "epoch": 1.9374955841831327, - "grad_norm": 0.6428207159042358, - "learning_rate": 5.007242178351024e-08, - "loss": 0.1943, - "step": 20567 - }, - { - "epoch": 1.9375897882763007, - "grad_norm": 0.657741129398346, - "learning_rate": 4.992161337353696e-08, - "loss": 0.1745, - "step": 20568 - }, - { - "epoch": 1.9376839923694684, - "grad_norm": 0.6035696268081665, - "learning_rate": 4.977103184073739e-08, - "loss": 0.1974, - "step": 20569 - }, - { - "epoch": 1.9377781964626362, - "grad_norm": 0.6318914890289307, - "learning_rate": 4.9620677188546574e-08, - "loss": 0.2102, - "step": 20570 - }, - { - "epoch": 1.9378724005558041, - "grad_norm": 0.6037071943283081, - "learning_rate": 4.947054942039398e-08, - "loss": 0.1714, - "step": 20571 - }, - { - "epoch": 1.937966604648972, - "grad_norm": 0.575206458568573, - "learning_rate": 4.93206485397002e-08, - "loss": 0.1661, - "step": 20572 - }, - { - "epoch": 1.9380608087421398, - "grad_norm": 0.6761251091957092, - "learning_rate": 4.9170974549885844e-08, - "loss": 0.1891, - "step": 20573 - }, - { - "epoch": 1.9381550128353076, - "grad_norm": 0.6738649010658264, - "learning_rate": 4.902152745436261e-08, - "loss": 0.1772, - "step": 20574 - }, - { - "epoch": 1.9382492169284755, - "grad_norm": 0.7871718406677246, - "learning_rate": 4.8872307256537796e-08, - "loss": 0.1654, - "step": 20575 - }, - { - "epoch": 1.9383434210216435, - "grad_norm": 0.6799382567405701, - "learning_rate": 4.872331395981311e-08, - "loss": 0.1969, - "step": 20576 - }, - { - "epoch": 1.9384376251148112, - "grad_norm": 0.7592743039131165, - "learning_rate": 4.857454756758806e-08, - "loss": 0.1738, - "step": 20577 - }, - { - "epoch": 1.938531829207979, - "grad_norm": 0.6246234178543091, - "learning_rate": 4.842600808325326e-08, - "loss": 0.1758, - "step": 20578 - }, - { - "epoch": 1.938626033301147, - "grad_norm": 0.6319633722305298, - "learning_rate": 4.82776955101949e-08, - "loss": 0.1651, - "step": 20579 - }, - { - "epoch": 1.9387202373943149, - "grad_norm": 0.632641077041626, - "learning_rate": 4.812960985179693e-08, - "loss": 0.1751, - "step": 20580 - }, - { - "epoch": 1.9388144414874826, - "grad_norm": 0.6395126581192017, - "learning_rate": 4.798175111143444e-08, - "loss": 0.1976, - "step": 20581 - }, - { - "epoch": 1.9389086455806503, - "grad_norm": 0.7053261995315552, - "learning_rate": 4.783411929247805e-08, - "loss": 0.2402, - "step": 20582 - }, - { - "epoch": 1.9390028496738183, - "grad_norm": 0.6206017136573792, - "learning_rate": 4.76867143982962e-08, - "loss": 0.1891, - "step": 20583 - }, - { - "epoch": 1.9390970537669863, - "grad_norm": 0.5933031439781189, - "learning_rate": 4.7539536432249514e-08, - "loss": 0.1845, - "step": 20584 - }, - { - "epoch": 1.939191257860154, - "grad_norm": 0.6479809284210205, - "learning_rate": 4.7392585397691984e-08, - "loss": 0.1945, - "step": 20585 - }, - { - "epoch": 1.9392854619533217, - "grad_norm": 0.7193289995193481, - "learning_rate": 4.724586129797537e-08, - "loss": 0.2043, - "step": 20586 - }, - { - "epoch": 1.9393796660464897, - "grad_norm": 0.6792411804199219, - "learning_rate": 4.709936413644589e-08, - "loss": 0.1795, - "step": 20587 - }, - { - "epoch": 1.9394738701396577, - "grad_norm": 0.6745784878730774, - "learning_rate": 4.695309391644309e-08, - "loss": 0.1985, - "step": 20588 - }, - { - "epoch": 1.9395680742328254, - "grad_norm": 0.6432570219039917, - "learning_rate": 4.680705064130209e-08, - "loss": 0.1558, - "step": 20589 - }, - { - "epoch": 1.9396622783259931, - "grad_norm": 0.5894613265991211, - "learning_rate": 4.6661234314353546e-08, - "loss": 0.1871, - "step": 20590 - }, - { - "epoch": 1.939756482419161, - "grad_norm": 0.6259870529174805, - "learning_rate": 4.651564493892258e-08, - "loss": 0.1642, - "step": 20591 - }, - { - "epoch": 1.939850686512329, - "grad_norm": 0.70440274477005, - "learning_rate": 4.6370282518327646e-08, - "loss": 0.1899, - "step": 20592 - }, - { - "epoch": 1.9399448906054968, - "grad_norm": 0.6017360687255859, - "learning_rate": 4.622514705588388e-08, - "loss": 0.1562, - "step": 20593 - }, - { - "epoch": 1.9400390946986645, - "grad_norm": 0.6458675265312195, - "learning_rate": 4.608023855490085e-08, - "loss": 0.1943, - "step": 20594 - }, - { - "epoch": 1.9401332987918325, - "grad_norm": 0.6895346641540527, - "learning_rate": 4.593555701868257e-08, - "loss": 0.2013, - "step": 20595 - }, - { - "epoch": 1.9402275028850005, - "grad_norm": 0.7026508450508118, - "learning_rate": 4.5791102450527536e-08, - "loss": 0.2151, - "step": 20596 - }, - { - "epoch": 1.9403217069781682, - "grad_norm": 0.6759473085403442, - "learning_rate": 4.564687485372976e-08, - "loss": 0.1829, - "step": 20597 - }, - { - "epoch": 1.940415911071336, - "grad_norm": 0.6939036846160889, - "learning_rate": 4.550287423157773e-08, - "loss": 0.1713, - "step": 20598 - }, - { - "epoch": 1.940510115164504, - "grad_norm": 0.6667701601982117, - "learning_rate": 4.5359100587355484e-08, - "loss": 0.1779, - "step": 20599 - }, - { - "epoch": 1.9406043192576719, - "grad_norm": 0.7237780690193176, - "learning_rate": 4.5215553924340406e-08, - "loss": 0.2004, - "step": 20600 - }, - { - "epoch": 1.9406985233508396, - "grad_norm": 0.635470986366272, - "learning_rate": 4.507223424580765e-08, - "loss": 0.1792, - "step": 20601 - }, - { - "epoch": 1.9407927274440073, - "grad_norm": 0.5838043689727783, - "learning_rate": 4.4929141555021264e-08, - "loss": 0.17, - "step": 20602 - }, - { - "epoch": 1.9408869315371753, - "grad_norm": 0.7381105422973633, - "learning_rate": 4.478627585524753e-08, - "loss": 0.1825, - "step": 20603 - }, - { - "epoch": 1.9409811356303432, - "grad_norm": 0.6283450126647949, - "learning_rate": 4.464363714974274e-08, - "loss": 0.1676, - "step": 20604 - }, - { - "epoch": 1.941075339723511, - "grad_norm": 0.6756641268730164, - "learning_rate": 4.4501225441759834e-08, - "loss": 0.2005, - "step": 20605 - }, - { - "epoch": 1.9411695438166787, - "grad_norm": 0.6628778576850891, - "learning_rate": 4.4359040734544e-08, - "loss": 0.1685, - "step": 20606 - }, - { - "epoch": 1.9412637479098467, - "grad_norm": 0.6249292492866516, - "learning_rate": 4.4217083031339314e-08, - "loss": 0.1731, - "step": 20607 - }, - { - "epoch": 1.9413579520030146, - "grad_norm": 0.5770609378814697, - "learning_rate": 4.407535233538318e-08, - "loss": 0.1637, - "step": 20608 - }, - { - "epoch": 1.9414521560961824, - "grad_norm": 0.6798595190048218, - "learning_rate": 4.3933848649904133e-08, - "loss": 0.1909, - "step": 20609 - }, - { - "epoch": 1.9415463601893501, - "grad_norm": 0.6528865098953247, - "learning_rate": 4.37925719781318e-08, - "loss": 0.1786, - "step": 20610 - }, - { - "epoch": 1.941640564282518, - "grad_norm": 0.7164531946182251, - "learning_rate": 4.365152232328695e-08, - "loss": 0.1838, - "step": 20611 - }, - { - "epoch": 1.941734768375686, - "grad_norm": 0.5923558473587036, - "learning_rate": 4.351069968858479e-08, - "loss": 0.189, - "step": 20612 - }, - { - "epoch": 1.9418289724688538, - "grad_norm": 0.645929217338562, - "learning_rate": 4.3370104077236075e-08, - "loss": 0.2127, - "step": 20613 - }, - { - "epoch": 1.9419231765620215, - "grad_norm": 0.6221653819084167, - "learning_rate": 4.322973549244713e-08, - "loss": 0.1982, - "step": 20614 - }, - { - "epoch": 1.9420173806551895, - "grad_norm": 0.7193864583969116, - "learning_rate": 4.308959393741985e-08, - "loss": 0.1985, - "step": 20615 - }, - { - "epoch": 1.9421115847483574, - "grad_norm": 0.6187189221382141, - "learning_rate": 4.294967941534722e-08, - "loss": 0.2, - "step": 20616 - }, - { - "epoch": 1.9422057888415252, - "grad_norm": 0.7417852878570557, - "learning_rate": 4.280999192942115e-08, - "loss": 0.1993, - "step": 20617 - }, - { - "epoch": 1.942299992934693, - "grad_norm": 0.6823968291282654, - "learning_rate": 4.267053148282685e-08, - "loss": 0.2025, - "step": 20618 - }, - { - "epoch": 1.9423941970278609, - "grad_norm": 0.6772221922874451, - "learning_rate": 4.25312980787429e-08, - "loss": 0.1873, - "step": 20619 - }, - { - "epoch": 1.9424884011210288, - "grad_norm": 0.6777254939079285, - "learning_rate": 4.239229172034565e-08, - "loss": 0.2373, - "step": 20620 - }, - { - "epoch": 1.9425826052141966, - "grad_norm": 0.6470945477485657, - "learning_rate": 4.2253512410803666e-08, - "loss": 0.187, - "step": 20621 - }, - { - "epoch": 1.9426768093073643, - "grad_norm": 0.6564001441001892, - "learning_rate": 4.211496015328109e-08, - "loss": 0.1966, - "step": 20622 - }, - { - "epoch": 1.9427710134005323, - "grad_norm": 0.6375804543495178, - "learning_rate": 4.197663495093873e-08, - "loss": 0.2003, - "step": 20623 - }, - { - "epoch": 1.9428652174937002, - "grad_norm": 0.644978404045105, - "learning_rate": 4.183853680692851e-08, - "loss": 0.1998, - "step": 20624 - }, - { - "epoch": 1.942959421586868, - "grad_norm": 0.6153957843780518, - "learning_rate": 4.170066572440124e-08, - "loss": 0.1617, - "step": 20625 - }, - { - "epoch": 1.9430536256800357, - "grad_norm": 0.6638757586479187, - "learning_rate": 4.156302170649773e-08, - "loss": 0.1924, - "step": 20626 - }, - { - "epoch": 1.9431478297732037, - "grad_norm": 0.6751947402954102, - "learning_rate": 4.1425604756359926e-08, - "loss": 0.1853, - "step": 20627 - }, - { - "epoch": 1.9432420338663716, - "grad_norm": 0.6615750789642334, - "learning_rate": 4.1288414877118656e-08, - "loss": 0.2287, - "step": 20628 - }, - { - "epoch": 1.9433362379595394, - "grad_norm": 0.7551138401031494, - "learning_rate": 4.115145207190363e-08, - "loss": 0.1796, - "step": 20629 - }, - { - "epoch": 1.943430442052707, - "grad_norm": 0.7317015528678894, - "learning_rate": 4.10147163438368e-08, - "loss": 0.1991, - "step": 20630 - }, - { - "epoch": 1.943524646145875, - "grad_norm": 0.656338632106781, - "learning_rate": 4.0878207696036785e-08, - "loss": 0.2065, - "step": 20631 - }, - { - "epoch": 1.943618850239043, - "grad_norm": 0.6661907434463501, - "learning_rate": 4.074192613161554e-08, - "loss": 0.1673, - "step": 20632 - }, - { - "epoch": 1.9437130543322108, - "grad_norm": 0.6182055473327637, - "learning_rate": 4.060587165368057e-08, - "loss": 0.1812, - "step": 20633 - }, - { - "epoch": 1.9438072584253785, - "grad_norm": 0.5980770587921143, - "learning_rate": 4.047004426533385e-08, - "loss": 0.1686, - "step": 20634 - }, - { - "epoch": 1.9439014625185465, - "grad_norm": 0.6584701538085938, - "learning_rate": 4.03344439696729e-08, - "loss": 0.197, - "step": 20635 - }, - { - "epoch": 1.9439956666117144, - "grad_norm": 0.5787577033042908, - "learning_rate": 4.019907076978968e-08, - "loss": 0.1736, - "step": 20636 - }, - { - "epoch": 1.9440898707048822, - "grad_norm": 0.6624363660812378, - "learning_rate": 4.0063924668770625e-08, - "loss": 0.1777, - "step": 20637 - }, - { - "epoch": 1.9441840747980499, - "grad_norm": 0.7424578666687012, - "learning_rate": 3.9929005669697704e-08, - "loss": 0.2047, - "step": 20638 - }, - { - "epoch": 1.9442782788912178, - "grad_norm": 0.7298041582107544, - "learning_rate": 3.9794313775647356e-08, - "loss": 0.2012, - "step": 20639 - }, - { - "epoch": 1.9443724829843858, - "grad_norm": 0.6549528241157532, - "learning_rate": 3.965984898969044e-08, - "loss": 0.1871, - "step": 20640 - }, - { - "epoch": 1.9444666870775535, - "grad_norm": 0.7583874464035034, - "learning_rate": 3.95256113148923e-08, - "loss": 0.1971, - "step": 20641 - }, - { - "epoch": 1.9445608911707213, - "grad_norm": 0.6117969155311584, - "learning_rate": 3.939160075431381e-08, - "loss": 0.1929, - "step": 20642 - }, - { - "epoch": 1.9446550952638892, - "grad_norm": 0.6076126098632812, - "learning_rate": 3.925781731101252e-08, - "loss": 0.1872, - "step": 20643 - }, - { - "epoch": 1.9447492993570572, - "grad_norm": 0.6362821459770203, - "learning_rate": 3.912426098803712e-08, - "loss": 0.1621, - "step": 20644 - }, - { - "epoch": 1.944843503450225, - "grad_norm": 0.8712957501411438, - "learning_rate": 3.899093178843294e-08, - "loss": 0.1825, - "step": 20645 - }, - { - "epoch": 1.9449377075433927, - "grad_norm": 0.5873278379440308, - "learning_rate": 3.885782971524088e-08, - "loss": 0.1802, - "step": 20646 - }, - { - "epoch": 1.9450319116365606, - "grad_norm": 0.6395159959793091, - "learning_rate": 3.872495477149518e-08, - "loss": 0.236, - "step": 20647 - }, - { - "epoch": 1.9451261157297286, - "grad_norm": 0.6261362433433533, - "learning_rate": 3.8592306960226755e-08, - "loss": 0.1899, - "step": 20648 - }, - { - "epoch": 1.9452203198228963, - "grad_norm": 0.6366989016532898, - "learning_rate": 3.8459886284458736e-08, - "loss": 0.1882, - "step": 20649 - }, - { - "epoch": 1.945314523916064, - "grad_norm": 0.6599946022033691, - "learning_rate": 3.8327692747210934e-08, - "loss": 0.1902, - "step": 20650 - }, - { - "epoch": 1.945408728009232, - "grad_norm": 0.6210522651672363, - "learning_rate": 3.819572635149871e-08, - "loss": 0.166, - "step": 20651 - }, - { - "epoch": 1.9455029321024, - "grad_norm": 0.6272507905960083, - "learning_rate": 3.8063987100328546e-08, - "loss": 0.1754, - "step": 20652 - }, - { - "epoch": 1.9455971361955677, - "grad_norm": 0.6985970735549927, - "learning_rate": 3.7932474996706935e-08, - "loss": 0.213, - "step": 20653 - }, - { - "epoch": 1.9456913402887355, - "grad_norm": 0.6704853177070618, - "learning_rate": 3.780119004363148e-08, - "loss": 0.2075, - "step": 20654 - }, - { - "epoch": 1.9457855443819034, - "grad_norm": 0.668319046497345, - "learning_rate": 3.7670132244096434e-08, - "loss": 0.1871, - "step": 20655 - }, - { - "epoch": 1.9458797484750714, - "grad_norm": 0.675618052482605, - "learning_rate": 3.753930160108832e-08, - "loss": 0.1683, - "step": 20656 - }, - { - "epoch": 1.9459739525682391, - "grad_norm": 0.6533845663070679, - "learning_rate": 3.7408698117591404e-08, - "loss": 0.1827, - "step": 20657 - }, - { - "epoch": 1.9460681566614069, - "grad_norm": 0.6358233094215393, - "learning_rate": 3.727832179658442e-08, - "loss": 0.2002, - "step": 20658 - }, - { - "epoch": 1.9461623607545748, - "grad_norm": 0.6991185545921326, - "learning_rate": 3.714817264103832e-08, - "loss": 0.1645, - "step": 20659 - }, - { - "epoch": 1.9462565648477428, - "grad_norm": 0.6090744733810425, - "learning_rate": 3.701825065392184e-08, - "loss": 0.1733, - "step": 20660 - }, - { - "epoch": 1.9463507689409105, - "grad_norm": 0.6858947277069092, - "learning_rate": 3.688855583819817e-08, - "loss": 0.2196, - "step": 20661 - }, - { - "epoch": 1.9464449730340783, - "grad_norm": 0.6599785089492798, - "learning_rate": 3.675908819682272e-08, - "loss": 0.1772, - "step": 20662 - }, - { - "epoch": 1.9465391771272462, - "grad_norm": 0.6226392388343811, - "learning_rate": 3.66298477327498e-08, - "loss": 0.1916, - "step": 20663 - }, - { - "epoch": 1.9466333812204142, - "grad_norm": 0.6897684931755066, - "learning_rate": 3.6500834448923714e-08, - "loss": 0.1855, - "step": 20664 - }, - { - "epoch": 1.946727585313582, - "grad_norm": 0.6366028189659119, - "learning_rate": 3.637204834828767e-08, - "loss": 0.1716, - "step": 20665 - }, - { - "epoch": 1.9468217894067497, - "grad_norm": 0.6105790734291077, - "learning_rate": 3.62434894337782e-08, - "loss": 0.1825, - "step": 20666 - }, - { - "epoch": 1.9469159934999176, - "grad_norm": 0.7614659070968628, - "learning_rate": 3.611515770832741e-08, - "loss": 0.1832, - "step": 20667 - }, - { - "epoch": 1.9470101975930856, - "grad_norm": 0.699749767780304, - "learning_rate": 3.598705317485851e-08, - "loss": 0.2074, - "step": 20668 - }, - { - "epoch": 1.9471044016862533, - "grad_norm": 0.5799646973609924, - "learning_rate": 3.5859175836295835e-08, - "loss": 0.1649, - "step": 20669 - }, - { - "epoch": 1.947198605779421, - "grad_norm": 0.6293885707855225, - "learning_rate": 3.5731525695553714e-08, - "loss": 0.1942, - "step": 20670 - }, - { - "epoch": 1.947292809872589, - "grad_norm": 0.5930148363113403, - "learning_rate": 3.5604102755542045e-08, - "loss": 0.1958, - "step": 20671 - }, - { - "epoch": 1.947387013965757, - "grad_norm": 0.6413487792015076, - "learning_rate": 3.54769070191674e-08, - "loss": 0.2138, - "step": 20672 - }, - { - "epoch": 1.9474812180589247, - "grad_norm": 0.6529505252838135, - "learning_rate": 3.534993848932966e-08, - "loss": 0.144, - "step": 20673 - }, - { - "epoch": 1.9475754221520925, - "grad_norm": 0.6703647971153259, - "learning_rate": 3.522319716892431e-08, - "loss": 0.1825, - "step": 20674 - }, - { - "epoch": 1.9476696262452604, - "grad_norm": 1.0214293003082275, - "learning_rate": 3.509668306084124e-08, - "loss": 0.1857, - "step": 20675 - }, - { - "epoch": 1.9477638303384284, - "grad_norm": 0.6616676449775696, - "learning_rate": 3.497039616796372e-08, - "loss": 0.1599, - "step": 20676 - }, - { - "epoch": 1.947858034431596, - "grad_norm": 0.6677464246749878, - "learning_rate": 3.4844336493172756e-08, - "loss": 0.1868, - "step": 20677 - }, - { - "epoch": 1.9479522385247638, - "grad_norm": 0.6382472515106201, - "learning_rate": 3.471850403934274e-08, - "loss": 0.19, - "step": 20678 - }, - { - "epoch": 1.9480464426179318, - "grad_norm": 0.6883176565170288, - "learning_rate": 3.459289880934247e-08, - "loss": 0.182, - "step": 20679 - }, - { - "epoch": 1.9481406467110995, - "grad_norm": 0.6284949779510498, - "learning_rate": 3.4467520806035216e-08, - "loss": 0.1463, - "step": 20680 - }, - { - "epoch": 1.9482348508042673, - "grad_norm": 0.6195784211158752, - "learning_rate": 3.434237003228091e-08, - "loss": 0.1756, - "step": 20681 - }, - { - "epoch": 1.9483290548974352, - "grad_norm": 1.060126781463623, - "learning_rate": 3.4217446490932836e-08, - "loss": 0.1801, - "step": 20682 - }, - { - "epoch": 1.9484232589906032, - "grad_norm": 0.6482633948326111, - "learning_rate": 3.409275018483982e-08, - "loss": 0.1984, - "step": 20683 - }, - { - "epoch": 1.948517463083771, - "grad_norm": 0.6856955885887146, - "learning_rate": 3.396828111684402e-08, - "loss": 0.1791, - "step": 20684 - }, - { - "epoch": 1.9486116671769387, - "grad_norm": 0.6002383828163147, - "learning_rate": 3.38440392897843e-08, - "loss": 0.1688, - "step": 20685 - }, - { - "epoch": 1.9487058712701066, - "grad_norm": 0.614564836025238, - "learning_rate": 3.3720024706492825e-08, - "loss": 0.1931, - "step": 20686 - }, - { - "epoch": 1.9488000753632746, - "grad_norm": 0.68077152967453, - "learning_rate": 3.359623736979844e-08, - "loss": 0.2129, - "step": 20687 - }, - { - "epoch": 1.9488942794564423, - "grad_norm": 0.6167360544204712, - "learning_rate": 3.3472677282523344e-08, - "loss": 0.1796, - "step": 20688 - }, - { - "epoch": 1.94898848354961, - "grad_norm": 0.5975761413574219, - "learning_rate": 3.3349344447485276e-08, - "loss": 0.1715, - "step": 20689 - }, - { - "epoch": 1.949082687642778, - "grad_norm": 0.7060175538063049, - "learning_rate": 3.322623886749532e-08, - "loss": 0.1988, - "step": 20690 - }, - { - "epoch": 1.949176891735946, - "grad_norm": 0.6285139322280884, - "learning_rate": 3.310336054536123e-08, - "loss": 0.1556, - "step": 20691 - }, - { - "epoch": 1.9492710958291137, - "grad_norm": 0.6609798669815063, - "learning_rate": 3.29807094838841e-08, - "loss": 0.2013, - "step": 20692 - }, - { - "epoch": 1.9493652999222815, - "grad_norm": 0.6288373470306396, - "learning_rate": 3.285828568586058e-08, - "loss": 0.1625, - "step": 20693 - }, - { - "epoch": 1.9494595040154494, - "grad_norm": 0.6585246324539185, - "learning_rate": 3.2736089154083996e-08, - "loss": 0.1839, - "step": 20694 - }, - { - "epoch": 1.9495537081086174, - "grad_norm": 0.6595421433448792, - "learning_rate": 3.261411989133878e-08, - "loss": 0.1927, - "step": 20695 - }, - { - "epoch": 1.9496479122017851, - "grad_norm": 0.8907338976860046, - "learning_rate": 3.249237790040605e-08, - "loss": 0.2283, - "step": 20696 - }, - { - "epoch": 1.9497421162949529, - "grad_norm": 0.5782504081726074, - "learning_rate": 3.237086318406246e-08, - "loss": 0.1581, - "step": 20697 - }, - { - "epoch": 1.9498363203881208, - "grad_norm": 0.7235888838768005, - "learning_rate": 3.224957574507914e-08, - "loss": 0.197, - "step": 20698 - }, - { - "epoch": 1.9499305244812888, - "grad_norm": 0.6389434337615967, - "learning_rate": 3.2128515586219436e-08, - "loss": 0.1921, - "step": 20699 - }, - { - "epoch": 1.9500247285744565, - "grad_norm": 0.7259790897369385, - "learning_rate": 3.2007682710245566e-08, - "loss": 0.1944, - "step": 20700 - }, - { - "epoch": 1.9501189326676243, - "grad_norm": 0.7283763885498047, - "learning_rate": 3.1887077119913125e-08, - "loss": 0.1937, - "step": 20701 - }, - { - "epoch": 1.9502131367607922, - "grad_norm": 0.6539479494094849, - "learning_rate": 3.176669881797101e-08, - "loss": 0.2125, - "step": 20702 - }, - { - "epoch": 1.9503073408539602, - "grad_norm": 0.693548321723938, - "learning_rate": 3.16465478071637e-08, - "loss": 0.2007, - "step": 20703 - }, - { - "epoch": 1.950401544947128, - "grad_norm": 0.6632748246192932, - "learning_rate": 3.152662409023233e-08, - "loss": 0.1843, - "step": 20704 - }, - { - "epoch": 1.9504957490402957, - "grad_norm": 0.6446796655654907, - "learning_rate": 3.140692766991027e-08, - "loss": 0.1897, - "step": 20705 - }, - { - "epoch": 1.9505899531334636, - "grad_norm": 0.7004801630973816, - "learning_rate": 3.128745854892645e-08, - "loss": 0.1994, - "step": 20706 - }, - { - "epoch": 1.9506841572266316, - "grad_norm": 0.6569026708602905, - "learning_rate": 3.116821673000647e-08, - "loss": 0.1799, - "step": 20707 - }, - { - "epoch": 1.9507783613197993, - "grad_norm": 0.6596437096595764, - "learning_rate": 3.104920221586705e-08, - "loss": 0.201, - "step": 20708 - }, - { - "epoch": 1.950872565412967, - "grad_norm": 0.6498751640319824, - "learning_rate": 3.093041500922378e-08, - "loss": 0.1701, - "step": 20709 - }, - { - "epoch": 1.950966769506135, - "grad_norm": 1.0099226236343384, - "learning_rate": 3.08118551127845e-08, - "loss": 0.1956, - "step": 20710 - }, - { - "epoch": 1.951060973599303, - "grad_norm": 0.5959270596504211, - "learning_rate": 3.069352252925262e-08, - "loss": 0.1547, - "step": 20711 - }, - { - "epoch": 1.9511551776924707, - "grad_norm": 0.6641378998756409, - "learning_rate": 3.0575417261325954e-08, - "loss": 0.2029, - "step": 20712 - }, - { - "epoch": 1.9512493817856384, - "grad_norm": 0.5687611699104309, - "learning_rate": 3.045753931169792e-08, - "loss": 0.1604, - "step": 20713 - }, - { - "epoch": 1.9513435858788064, - "grad_norm": 0.6910865306854248, - "learning_rate": 3.033988868305637e-08, - "loss": 0.2156, - "step": 20714 - }, - { - "epoch": 1.9514377899719744, - "grad_norm": 0.7515230774879456, - "learning_rate": 3.02224653780836e-08, - "loss": 0.1985, - "step": 20715 - }, - { - "epoch": 1.951531994065142, - "grad_norm": 0.695780336856842, - "learning_rate": 3.010526939945746e-08, - "loss": 0.2037, - "step": 20716 - }, - { - "epoch": 1.9516261981583098, - "grad_norm": 0.9847862720489502, - "learning_rate": 2.998830074984915e-08, - "loss": 0.2012, - "step": 20717 - }, - { - "epoch": 1.9517204022514778, - "grad_norm": 0.6138686537742615, - "learning_rate": 2.9871559431927656e-08, - "loss": 0.1473, - "step": 20718 - }, - { - "epoch": 1.9518146063446458, - "grad_norm": 0.7259178757667542, - "learning_rate": 2.9755045448351948e-08, - "loss": 0.2157, - "step": 20719 - }, - { - "epoch": 1.9519088104378135, - "grad_norm": 0.6341254711151123, - "learning_rate": 2.963875880178213e-08, - "loss": 0.2033, - "step": 20720 - }, - { - "epoch": 1.9520030145309812, - "grad_norm": 0.6881905794143677, - "learning_rate": 2.952269949486719e-08, - "loss": 0.1853, - "step": 20721 - }, - { - "epoch": 1.9520972186241492, - "grad_norm": 0.6890899538993835, - "learning_rate": 2.9406867530255013e-08, - "loss": 0.1895, - "step": 20722 - }, - { - "epoch": 1.9521914227173172, - "grad_norm": 0.6870761513710022, - "learning_rate": 2.9291262910585705e-08, - "loss": 0.1887, - "step": 20723 - }, - { - "epoch": 1.952285626810485, - "grad_norm": 0.6434525847434998, - "learning_rate": 2.9175885638494937e-08, - "loss": 0.2036, - "step": 20724 - }, - { - "epoch": 1.9523798309036526, - "grad_norm": 0.6391791105270386, - "learning_rate": 2.9060735716615052e-08, - "loss": 0.1865, - "step": 20725 - }, - { - "epoch": 1.9524740349968206, - "grad_norm": 0.6555296778678894, - "learning_rate": 2.8945813147570613e-08, - "loss": 0.1823, - "step": 20726 - }, - { - "epoch": 1.9525682390899886, - "grad_norm": 0.6324348449707031, - "learning_rate": 2.8831117933981746e-08, - "loss": 0.2056, - "step": 20727 - }, - { - "epoch": 1.9526624431831563, - "grad_norm": 0.6991801857948303, - "learning_rate": 2.8716650078465247e-08, - "loss": 0.1755, - "step": 20728 - }, - { - "epoch": 1.952756647276324, - "grad_norm": 0.6337845921516418, - "learning_rate": 2.8602409583629032e-08, - "loss": 0.1886, - "step": 20729 - }, - { - "epoch": 1.952850851369492, - "grad_norm": 0.6326943039894104, - "learning_rate": 2.8488396452078792e-08, - "loss": 0.1732, - "step": 20730 - }, - { - "epoch": 1.95294505546266, - "grad_norm": 2.937892436981201, - "learning_rate": 2.837461068641467e-08, - "loss": 0.1972, - "step": 20731 - }, - { - "epoch": 1.9530392595558277, - "grad_norm": 0.6691240668296814, - "learning_rate": 2.8261052289231262e-08, - "loss": 0.2107, - "step": 20732 - }, - { - "epoch": 1.9531334636489954, - "grad_norm": 0.6853199005126953, - "learning_rate": 2.814772126311649e-08, - "loss": 0.174, - "step": 20733 - }, - { - "epoch": 1.9532276677421634, - "grad_norm": 0.769709587097168, - "learning_rate": 2.8034617610656068e-08, - "loss": 0.2015, - "step": 20734 - }, - { - "epoch": 1.9533218718353313, - "grad_norm": 0.7242408990859985, - "learning_rate": 2.7921741334429043e-08, - "loss": 0.1769, - "step": 20735 - }, - { - "epoch": 1.953416075928499, - "grad_norm": 0.5964789390563965, - "learning_rate": 2.780909243700669e-08, - "loss": 0.2053, - "step": 20736 - }, - { - "epoch": 1.9535102800216668, - "grad_norm": 0.597507894039154, - "learning_rate": 2.769667092096029e-08, - "loss": 0.1854, - "step": 20737 - }, - { - "epoch": 1.9536044841148348, - "grad_norm": 0.6764325499534607, - "learning_rate": 2.7584476788852233e-08, - "loss": 0.1898, - "step": 20738 - }, - { - "epoch": 1.9536986882080027, - "grad_norm": 0.6390020251274109, - "learning_rate": 2.7472510043240476e-08, - "loss": 0.1711, - "step": 20739 - }, - { - "epoch": 1.9537928923011705, - "grad_norm": 0.6687731146812439, - "learning_rate": 2.7360770686678528e-08, - "loss": 0.1789, - "step": 20740 - }, - { - "epoch": 1.9538870963943382, - "grad_norm": 0.6528924107551575, - "learning_rate": 2.7249258721714354e-08, - "loss": 0.1725, - "step": 20741 - }, - { - "epoch": 1.9539813004875062, - "grad_norm": 0.6461551785469055, - "learning_rate": 2.7137974150889258e-08, - "loss": 0.1745, - "step": 20742 - }, - { - "epoch": 1.9540755045806741, - "grad_norm": 0.6037712693214417, - "learning_rate": 2.7026916976742313e-08, - "loss": 0.1654, - "step": 20743 - }, - { - "epoch": 1.9541697086738419, - "grad_norm": 0.6182787418365479, - "learning_rate": 2.691608720180594e-08, - "loss": 0.1831, - "step": 20744 - }, - { - "epoch": 1.9542639127670096, - "grad_norm": 0.668842077255249, - "learning_rate": 2.680548482860479e-08, - "loss": 0.2006, - "step": 20745 - }, - { - "epoch": 1.9543581168601776, - "grad_norm": 0.7709841728210449, - "learning_rate": 2.6695109859663502e-08, - "loss": 0.1965, - "step": 20746 - }, - { - "epoch": 1.9544523209533455, - "grad_norm": 0.6828837394714355, - "learning_rate": 2.658496229749785e-08, - "loss": 0.1648, - "step": 20747 - }, - { - "epoch": 1.9545465250465133, - "grad_norm": 0.6607949733734131, - "learning_rate": 2.6475042144619157e-08, - "loss": 0.1985, - "step": 20748 - }, - { - "epoch": 1.954640729139681, - "grad_norm": 0.6904767155647278, - "learning_rate": 2.636534940353319e-08, - "loss": 0.2011, - "step": 20749 - }, - { - "epoch": 1.954734933232849, - "grad_norm": 0.6577705144882202, - "learning_rate": 2.62558840767424e-08, - "loss": 0.1972, - "step": 20750 - }, - { - "epoch": 1.954829137326017, - "grad_norm": 0.6928842067718506, - "learning_rate": 2.6146646166741452e-08, - "loss": 0.1792, - "step": 20751 - }, - { - "epoch": 1.9549233414191847, - "grad_norm": 0.7457311749458313, - "learning_rate": 2.60376356760228e-08, - "loss": 0.182, - "step": 20752 - }, - { - "epoch": 1.9550175455123524, - "grad_norm": 0.6536678671836853, - "learning_rate": 2.5928852607070008e-08, - "loss": 0.2133, - "step": 20753 - }, - { - "epoch": 1.9551117496055204, - "grad_norm": 0.6196486949920654, - "learning_rate": 2.5820296962365543e-08, - "loss": 0.1561, - "step": 20754 - }, - { - "epoch": 1.9552059536986883, - "grad_norm": 0.6116530299186707, - "learning_rate": 2.5711968744382975e-08, - "loss": 0.1589, - "step": 20755 - }, - { - "epoch": 1.955300157791856, - "grad_norm": 0.5360685586929321, - "learning_rate": 2.5603867955593666e-08, - "loss": 0.1532, - "step": 20756 - }, - { - "epoch": 1.9553943618850238, - "grad_norm": 0.6683377027511597, - "learning_rate": 2.5495994598461193e-08, - "loss": 0.1899, - "step": 20757 - }, - { - "epoch": 1.9554885659781918, - "grad_norm": 0.6415463089942932, - "learning_rate": 2.538834867544693e-08, - "loss": 0.1595, - "step": 20758 - }, - { - "epoch": 1.9555827700713597, - "grad_norm": 0.6860426664352417, - "learning_rate": 2.528093018900335e-08, - "loss": 0.1714, - "step": 20759 - }, - { - "epoch": 1.9556769741645275, - "grad_norm": 0.684950590133667, - "learning_rate": 2.517373914158072e-08, - "loss": 0.1818, - "step": 20760 - }, - { - "epoch": 1.9557711782576952, - "grad_norm": 0.7248852252960205, - "learning_rate": 2.5066775535623756e-08, - "loss": 0.1866, - "step": 20761 - }, - { - "epoch": 1.9558653823508632, - "grad_norm": 0.6961067318916321, - "learning_rate": 2.49600393735705e-08, - "loss": 0.2025, - "step": 20762 - }, - { - "epoch": 1.9559595864440311, - "grad_norm": 0.624409556388855, - "learning_rate": 2.4853530657855674e-08, - "loss": 0.1796, - "step": 20763 - }, - { - "epoch": 1.9560537905371989, - "grad_norm": 0.6325263977050781, - "learning_rate": 2.4747249390906224e-08, - "loss": 0.1957, - "step": 20764 - }, - { - "epoch": 1.9561479946303666, - "grad_norm": 0.6427236795425415, - "learning_rate": 2.4641195575147992e-08, - "loss": 0.1819, - "step": 20765 - }, - { - "epoch": 1.9562421987235346, - "grad_norm": 0.6469488143920898, - "learning_rate": 2.453536921299571e-08, - "loss": 0.1907, - "step": 20766 - }, - { - "epoch": 1.9563364028167025, - "grad_norm": 0.6345981955528259, - "learning_rate": 2.442977030686522e-08, - "loss": 0.1878, - "step": 20767 - }, - { - "epoch": 1.9564306069098703, - "grad_norm": 0.6015830039978027, - "learning_rate": 2.43243988591646e-08, - "loss": 0.1661, - "step": 20768 - }, - { - "epoch": 1.956524811003038, - "grad_norm": 0.7506802678108215, - "learning_rate": 2.421925487229415e-08, - "loss": 0.2052, - "step": 20769 - }, - { - "epoch": 1.956619015096206, - "grad_norm": 0.6623401641845703, - "learning_rate": 2.4114338348653067e-08, - "loss": 0.19, - "step": 20770 - }, - { - "epoch": 1.956713219189374, - "grad_norm": 0.5909781455993652, - "learning_rate": 2.4009649290632762e-08, - "loss": 0.1666, - "step": 20771 - }, - { - "epoch": 1.9568074232825416, - "grad_norm": 0.7276968359947205, - "learning_rate": 2.3905187700620223e-08, - "loss": 0.1946, - "step": 20772 - }, - { - "epoch": 1.9569016273757094, - "grad_norm": 0.7656787037849426, - "learning_rate": 2.3800953580997988e-08, - "loss": 0.2245, - "step": 20773 - }, - { - "epoch": 1.9569958314688773, - "grad_norm": 0.6558153629302979, - "learning_rate": 2.369694693414304e-08, - "loss": 0.2119, - "step": 20774 - }, - { - "epoch": 1.9570900355620453, - "grad_norm": 0.7375321984291077, - "learning_rate": 2.3593167762425707e-08, - "loss": 0.201, - "step": 20775 - }, - { - "epoch": 1.957184239655213, - "grad_norm": 0.6460342407226562, - "learning_rate": 2.3489616068212983e-08, - "loss": 0.182, - "step": 20776 - }, - { - "epoch": 1.9572784437483808, - "grad_norm": 0.6745608448982239, - "learning_rate": 2.3386291853866318e-08, - "loss": 0.2549, - "step": 20777 - }, - { - "epoch": 1.9573726478415487, - "grad_norm": 0.6970850229263306, - "learning_rate": 2.3283195121741597e-08, - "loss": 0.2054, - "step": 20778 - }, - { - "epoch": 1.9574668519347167, - "grad_norm": 0.6697490811347961, - "learning_rate": 2.318032587418917e-08, - "loss": 0.1944, - "step": 20779 - }, - { - "epoch": 1.9575610560278844, - "grad_norm": 0.7169244289398193, - "learning_rate": 2.3077684113554933e-08, - "loss": 0.2082, - "step": 20780 - }, - { - "epoch": 1.9576552601210522, - "grad_norm": 0.6706309914588928, - "learning_rate": 2.2975269842178127e-08, - "loss": 0.2004, - "step": 20781 - }, - { - "epoch": 1.9577494642142201, - "grad_norm": 0.6824950575828552, - "learning_rate": 2.2873083062395775e-08, - "loss": 0.1927, - "step": 20782 - }, - { - "epoch": 1.957843668307388, - "grad_norm": 0.625842273235321, - "learning_rate": 2.2771123776537117e-08, - "loss": 0.1686, - "step": 20783 - }, - { - "epoch": 1.9579378724005558, - "grad_norm": 0.6121344566345215, - "learning_rate": 2.266939198692586e-08, - "loss": 0.1839, - "step": 20784 - }, - { - "epoch": 1.9580320764937236, - "grad_norm": 0.6271312832832336, - "learning_rate": 2.2567887695883472e-08, - "loss": 0.1969, - "step": 20785 - }, - { - "epoch": 1.9581262805868915, - "grad_norm": 0.6655808687210083, - "learning_rate": 2.2466610905723662e-08, - "loss": 0.1727, - "step": 20786 - }, - { - "epoch": 1.9582204846800595, - "grad_norm": 0.7358273267745972, - "learning_rate": 2.2365561618755694e-08, - "loss": 0.1868, - "step": 20787 - }, - { - "epoch": 1.9583146887732272, - "grad_norm": 0.6705983281135559, - "learning_rate": 2.2264739837283278e-08, - "loss": 0.1884, - "step": 20788 - }, - { - "epoch": 1.958408892866395, - "grad_norm": 1.703456163406372, - "learning_rate": 2.21641455636068e-08, - "loss": 0.2058, - "step": 20789 - }, - { - "epoch": 1.958503096959563, - "grad_norm": 0.7661097049713135, - "learning_rate": 2.2063778800017755e-08, - "loss": 0.1642, - "step": 20790 - }, - { - "epoch": 1.9585973010527309, - "grad_norm": 0.6239246726036072, - "learning_rate": 2.1963639548805426e-08, - "loss": 0.1883, - "step": 20791 - }, - { - "epoch": 1.9586915051458986, - "grad_norm": 0.7588548064231873, - "learning_rate": 2.1863727812254653e-08, - "loss": 0.2189, - "step": 20792 - }, - { - "epoch": 1.9587857092390664, - "grad_norm": 0.6334348917007446, - "learning_rate": 2.1764043592641394e-08, - "loss": 0.1825, - "step": 20793 - }, - { - "epoch": 1.9588799133322343, - "grad_norm": 0.6758701205253601, - "learning_rate": 2.166458689223938e-08, - "loss": 0.1957, - "step": 20794 - }, - { - "epoch": 1.9589741174254023, - "grad_norm": 0.65650874376297, - "learning_rate": 2.1565357713317914e-08, - "loss": 0.1863, - "step": 20795 - }, - { - "epoch": 1.95906832151857, - "grad_norm": 0.6460846662521362, - "learning_rate": 2.1466356058137406e-08, - "loss": 0.1894, - "step": 20796 - }, - { - "epoch": 1.9591625256117378, - "grad_norm": 0.7715196013450623, - "learning_rate": 2.136758192895605e-08, - "loss": 0.2193, - "step": 20797 - }, - { - "epoch": 1.9592567297049057, - "grad_norm": 0.72203129529953, - "learning_rate": 2.126903532802538e-08, - "loss": 0.2003, - "step": 20798 - }, - { - "epoch": 1.9593509337980737, - "grad_norm": 0.5925700664520264, - "learning_rate": 2.1170716257594704e-08, - "loss": 0.163, - "step": 20799 - }, - { - "epoch": 1.9594451378912414, - "grad_norm": 0.6946857571601868, - "learning_rate": 2.1072624719903346e-08, - "loss": 0.211, - "step": 20800 - }, - { - "epoch": 1.9595393419844092, - "grad_norm": 0.6126140356063843, - "learning_rate": 2.0974760717188402e-08, - "loss": 0.1883, - "step": 20801 - }, - { - "epoch": 1.9596335460775771, - "grad_norm": 0.6870433688163757, - "learning_rate": 2.0877124251682535e-08, - "loss": 0.2168, - "step": 20802 - }, - { - "epoch": 1.959727750170745, - "grad_norm": 0.6843072175979614, - "learning_rate": 2.077971532561063e-08, - "loss": 0.1801, - "step": 20803 - }, - { - "epoch": 1.9598219542639128, - "grad_norm": 0.6433764696121216, - "learning_rate": 2.0682533941195347e-08, - "loss": 0.1753, - "step": 20804 - }, - { - "epoch": 1.9599161583570806, - "grad_norm": 0.7131536602973938, - "learning_rate": 2.058558010065048e-08, - "loss": 0.1776, - "step": 20805 - }, - { - "epoch": 1.9600103624502485, - "grad_norm": 0.60561603307724, - "learning_rate": 2.0488853806188703e-08, - "loss": 0.188, - "step": 20806 - }, - { - "epoch": 1.9601045665434165, - "grad_norm": 0.6701720952987671, - "learning_rate": 2.0392355060013802e-08, - "loss": 0.197, - "step": 20807 - }, - { - "epoch": 1.9601987706365842, - "grad_norm": 0.7826750874519348, - "learning_rate": 2.0296083864327354e-08, - "loss": 0.2015, - "step": 20808 - }, - { - "epoch": 1.960292974729752, - "grad_norm": 0.6032291054725647, - "learning_rate": 2.0200040221324268e-08, - "loss": 0.1881, - "step": 20809 - }, - { - "epoch": 1.96038717882292, - "grad_norm": 0.6667314767837524, - "learning_rate": 2.0104224133193907e-08, - "loss": 0.1953, - "step": 20810 - }, - { - "epoch": 1.9604813829160879, - "grad_norm": 0.6437525153160095, - "learning_rate": 2.0008635602122294e-08, - "loss": 0.1878, - "step": 20811 - }, - { - "epoch": 1.9605755870092556, - "grad_norm": 0.6329096555709839, - "learning_rate": 1.9913274630287694e-08, - "loss": 0.1737, - "step": 20812 - }, - { - "epoch": 1.9606697911024233, - "grad_norm": 0.7089539766311646, - "learning_rate": 1.9818141219865028e-08, - "loss": 0.1867, - "step": 20813 - }, - { - "epoch": 1.9607639951955913, - "grad_norm": 0.6241235136985779, - "learning_rate": 1.9723235373023674e-08, - "loss": 0.1948, - "step": 20814 - }, - { - "epoch": 1.9608581992887593, - "grad_norm": 0.6769440770149231, - "learning_rate": 1.962855709192635e-08, - "loss": 0.2273, - "step": 20815 - }, - { - "epoch": 1.9609524033819268, - "grad_norm": 0.7112860083580017, - "learning_rate": 1.9534106378733543e-08, - "loss": 0.185, - "step": 20816 - }, - { - "epoch": 1.9610466074750947, - "grad_norm": 0.6273424625396729, - "learning_rate": 1.943988323559798e-08, - "loss": 0.219, - "step": 20817 - }, - { - "epoch": 1.9611408115682627, - "grad_norm": 0.6284955739974976, - "learning_rate": 1.934588766466905e-08, - "loss": 0.1928, - "step": 20818 - }, - { - "epoch": 1.9612350156614304, - "grad_norm": 0.6763734817504883, - "learning_rate": 1.9252119668088377e-08, - "loss": 0.2021, - "step": 20819 - }, - { - "epoch": 1.9613292197545982, - "grad_norm": 0.6680343151092529, - "learning_rate": 1.9158579247995356e-08, - "loss": 0.1989, - "step": 20820 - }, - { - "epoch": 1.9614234238477661, - "grad_norm": 0.7202074527740479, - "learning_rate": 1.9065266406521622e-08, - "loss": 0.1871, - "step": 20821 - }, - { - "epoch": 1.961517627940934, - "grad_norm": 0.7383487224578857, - "learning_rate": 1.8972181145796576e-08, - "loss": 0.2086, - "step": 20822 - }, - { - "epoch": 1.9616118320341018, - "grad_norm": 0.6500657796859741, - "learning_rate": 1.8879323467940748e-08, - "loss": 0.1911, - "step": 20823 - }, - { - "epoch": 1.9617060361272696, - "grad_norm": 0.6608936190605164, - "learning_rate": 1.878669337507355e-08, - "loss": 0.2075, - "step": 20824 - }, - { - "epoch": 1.9618002402204375, - "grad_norm": 0.6508350372314453, - "learning_rate": 1.869429086930552e-08, - "loss": 0.1741, - "step": 20825 - }, - { - "epoch": 1.9618944443136055, - "grad_norm": 0.8364340662956238, - "learning_rate": 1.860211595274386e-08, - "loss": 0.1714, - "step": 20826 - }, - { - "epoch": 1.9619886484067732, - "grad_norm": 0.6984458565711975, - "learning_rate": 1.8510168627490222e-08, - "loss": 0.1909, - "step": 20827 - }, - { - "epoch": 1.962082852499941, - "grad_norm": 0.7252766489982605, - "learning_rate": 1.8418448895641816e-08, - "loss": 0.2306, - "step": 20828 - }, - { - "epoch": 1.962177056593109, - "grad_norm": 0.7223662734031677, - "learning_rate": 1.8326956759290305e-08, - "loss": 0.1873, - "step": 20829 - }, - { - "epoch": 1.9622712606862769, - "grad_norm": 0.6001616716384888, - "learning_rate": 1.8235692220520683e-08, - "loss": 0.1931, - "step": 20830 - }, - { - "epoch": 1.9623654647794446, - "grad_norm": 0.704746425151825, - "learning_rate": 1.8144655281413515e-08, - "loss": 0.2057, - "step": 20831 - }, - { - "epoch": 1.9624596688726124, - "grad_norm": 0.6368359923362732, - "learning_rate": 1.8053845944046024e-08, - "loss": 0.1926, - "step": 20832 - }, - { - "epoch": 1.9625538729657803, - "grad_norm": 0.8043307065963745, - "learning_rate": 1.7963264210488774e-08, - "loss": 0.1829, - "step": 20833 - }, - { - "epoch": 1.9626480770589483, - "grad_norm": 0.6016947031021118, - "learning_rate": 1.787291008280567e-08, - "loss": 0.1647, - "step": 20834 - }, - { - "epoch": 1.962742281152116, - "grad_norm": 0.677052915096283, - "learning_rate": 1.7782783563058403e-08, - "loss": 0.1874, - "step": 20835 - }, - { - "epoch": 1.9628364852452838, - "grad_norm": 0.7198638319969177, - "learning_rate": 1.7692884653301988e-08, - "loss": 0.2044, - "step": 20836 - }, - { - "epoch": 1.9629306893384517, - "grad_norm": 0.7062585353851318, - "learning_rate": 1.7603213355584793e-08, - "loss": 0.1919, - "step": 20837 - }, - { - "epoch": 1.9630248934316197, - "grad_norm": 0.5616207122802734, - "learning_rate": 1.751376967195295e-08, - "loss": 0.1785, - "step": 20838 - }, - { - "epoch": 1.9631190975247874, - "grad_norm": 0.6764761209487915, - "learning_rate": 1.7424553604444837e-08, - "loss": 0.1881, - "step": 20839 - }, - { - "epoch": 1.9632133016179552, - "grad_norm": 0.6514866352081299, - "learning_rate": 1.7335565155095492e-08, - "loss": 0.2154, - "step": 20840 - }, - { - "epoch": 1.9633075057111231, - "grad_norm": 0.6551622748374939, - "learning_rate": 1.7246804325934396e-08, - "loss": 0.1762, - "step": 20841 - }, - { - "epoch": 1.963401709804291, - "grad_norm": 0.7031304240226746, - "learning_rate": 1.715827111898327e-08, - "loss": 0.1962, - "step": 20842 - }, - { - "epoch": 1.9634959138974588, - "grad_norm": 0.6236386299133301, - "learning_rate": 1.7069965536263834e-08, - "loss": 0.1811, - "step": 20843 - }, - { - "epoch": 1.9635901179906265, - "grad_norm": 0.5879573822021484, - "learning_rate": 1.6981887579787802e-08, - "loss": 0.1586, - "step": 20844 - }, - { - "epoch": 1.9636843220837945, - "grad_norm": 0.6167858839035034, - "learning_rate": 1.6894037251563578e-08, - "loss": 0.1576, - "step": 20845 - }, - { - "epoch": 1.9637785261769625, - "grad_norm": 0.692031979560852, - "learning_rate": 1.6806414553593998e-08, - "loss": 0.1806, - "step": 20846 - }, - { - "epoch": 1.9638727302701302, - "grad_norm": 0.6623846292495728, - "learning_rate": 1.6719019487877463e-08, - "loss": 0.1965, - "step": 20847 - }, - { - "epoch": 1.963966934363298, - "grad_norm": 0.8027023673057556, - "learning_rate": 1.6631852056407936e-08, - "loss": 0.1857, - "step": 20848 - }, - { - "epoch": 1.964061138456466, - "grad_norm": 0.6101537942886353, - "learning_rate": 1.6544912261170498e-08, - "loss": 0.2053, - "step": 20849 - }, - { - "epoch": 1.9641553425496339, - "grad_norm": 0.6344097852706909, - "learning_rate": 1.6458200104149115e-08, - "loss": 0.2072, - "step": 20850 - }, - { - "epoch": 1.9642495466428016, - "grad_norm": 0.6655710935592651, - "learning_rate": 1.637171558732109e-08, - "loss": 0.1984, - "step": 20851 - }, - { - "epoch": 1.9643437507359693, - "grad_norm": 0.6916816234588623, - "learning_rate": 1.6285458712657077e-08, - "loss": 0.1997, - "step": 20852 - }, - { - "epoch": 1.9644379548291373, - "grad_norm": 0.7174298167228699, - "learning_rate": 1.6199429482125495e-08, - "loss": 0.1941, - "step": 20853 - }, - { - "epoch": 1.9645321589223053, - "grad_norm": 0.5899234414100647, - "learning_rate": 1.6113627897687e-08, - "loss": 0.2045, - "step": 20854 - }, - { - "epoch": 1.964626363015473, - "grad_norm": 0.6875238418579102, - "learning_rate": 1.6028053961297808e-08, - "loss": 0.1827, - "step": 20855 - }, - { - "epoch": 1.9647205671086407, - "grad_norm": 0.7000439167022705, - "learning_rate": 1.5942707674909687e-08, - "loss": 0.2122, - "step": 20856 - }, - { - "epoch": 1.9648147712018087, - "grad_norm": 0.6685363054275513, - "learning_rate": 1.585758904046886e-08, - "loss": 0.2103, - "step": 20857 - }, - { - "epoch": 1.9649089752949767, - "grad_norm": 0.7183793783187866, - "learning_rate": 1.577269805991488e-08, - "loss": 0.2031, - "step": 20858 - }, - { - "epoch": 1.9650031793881444, - "grad_norm": 0.6927871108055115, - "learning_rate": 1.5688034735185098e-08, - "loss": 0.1904, - "step": 20859 - }, - { - "epoch": 1.9650973834813121, - "grad_norm": 0.733188807964325, - "learning_rate": 1.5603599068209075e-08, - "loss": 0.2567, - "step": 20860 - }, - { - "epoch": 1.96519158757448, - "grad_norm": 0.6338531374931335, - "learning_rate": 1.5519391060911936e-08, - "loss": 0.1739, - "step": 20861 - }, - { - "epoch": 1.965285791667648, - "grad_norm": 0.6898400187492371, - "learning_rate": 1.543541071521326e-08, - "loss": 0.2261, - "step": 20862 - }, - { - "epoch": 1.9653799957608158, - "grad_norm": 0.7352140545845032, - "learning_rate": 1.5351658033029293e-08, - "loss": 0.184, - "step": 20863 - }, - { - "epoch": 1.9654741998539835, - "grad_norm": 0.703982949256897, - "learning_rate": 1.5268133016269614e-08, - "loss": 0.2074, - "step": 20864 - }, - { - "epoch": 1.9655684039471515, - "grad_norm": 0.6581400036811829, - "learning_rate": 1.518483566683826e-08, - "loss": 0.2005, - "step": 20865 - }, - { - "epoch": 1.9656626080403194, - "grad_norm": 0.7103128433227539, - "learning_rate": 1.510176598663371e-08, - "loss": 0.2084, - "step": 20866 - }, - { - "epoch": 1.9657568121334872, - "grad_norm": 0.6426986455917358, - "learning_rate": 1.5018923977551115e-08, - "loss": 0.1778, - "step": 20867 - }, - { - "epoch": 1.965851016226655, - "grad_norm": 0.6864650845527649, - "learning_rate": 1.4936309641478964e-08, - "loss": 0.1784, - "step": 20868 - }, - { - "epoch": 1.9659452203198229, - "grad_norm": 0.5860040187835693, - "learning_rate": 1.4853922980301305e-08, - "loss": 0.1818, - "step": 20869 - }, - { - "epoch": 1.9660394244129908, - "grad_norm": 0.6675563454627991, - "learning_rate": 1.4771763995896637e-08, - "loss": 0.1914, - "step": 20870 - }, - { - "epoch": 1.9661336285061586, - "grad_norm": 0.6752128601074219, - "learning_rate": 1.4689832690137907e-08, - "loss": 0.1751, - "step": 20871 - }, - { - "epoch": 1.9662278325993263, - "grad_norm": 0.680773138999939, - "learning_rate": 1.4608129064893617e-08, - "loss": 0.1913, - "step": 20872 - }, - { - "epoch": 1.9663220366924943, - "grad_norm": 0.7888768315315247, - "learning_rate": 1.4526653122026723e-08, - "loss": 0.1837, - "step": 20873 - }, - { - "epoch": 1.9664162407856622, - "grad_norm": 0.6779602766036987, - "learning_rate": 1.444540486339574e-08, - "loss": 0.1974, - "step": 20874 - }, - { - "epoch": 1.96651044487883, - "grad_norm": 0.6856969594955444, - "learning_rate": 1.4364384290851408e-08, - "loss": 0.2144, - "step": 20875 - }, - { - "epoch": 1.9666046489719977, - "grad_norm": 0.8639304041862488, - "learning_rate": 1.4283591406242247e-08, - "loss": 0.2132, - "step": 20876 - }, - { - "epoch": 1.9666988530651657, - "grad_norm": 0.6300287842750549, - "learning_rate": 1.4203026211411231e-08, - "loss": 0.1926, - "step": 20877 - }, - { - "epoch": 1.9667930571583336, - "grad_norm": 0.65023273229599, - "learning_rate": 1.4122688708193555e-08, - "loss": 0.1996, - "step": 20878 - }, - { - "epoch": 1.9668872612515014, - "grad_norm": 0.6050977110862732, - "learning_rate": 1.40425788984222e-08, - "loss": 0.1718, - "step": 20879 - }, - { - "epoch": 1.9669814653446691, - "grad_norm": 0.7740190625190735, - "learning_rate": 1.396269678392459e-08, - "loss": 0.2018, - "step": 20880 - }, - { - "epoch": 1.967075669437837, - "grad_norm": 0.6520513892173767, - "learning_rate": 1.3883042366520382e-08, - "loss": 0.1751, - "step": 20881 - }, - { - "epoch": 1.967169873531005, - "grad_norm": 0.6170967221260071, - "learning_rate": 1.3803615648027014e-08, - "loss": 0.1862, - "step": 20882 - }, - { - "epoch": 1.9672640776241728, - "grad_norm": 0.6485597491264343, - "learning_rate": 1.3724416630255256e-08, - "loss": 0.1849, - "step": 20883 - }, - { - "epoch": 1.9673582817173405, - "grad_norm": 0.6519567370414734, - "learning_rate": 1.3645445315010331e-08, - "loss": 0.2085, - "step": 20884 - }, - { - "epoch": 1.9674524858105085, - "grad_norm": 0.5942420363426208, - "learning_rate": 1.3566701704094131e-08, - "loss": 0.1782, - "step": 20885 - }, - { - "epoch": 1.9675466899036764, - "grad_norm": 0.7131619453430176, - "learning_rate": 1.3488185799300779e-08, - "loss": 0.2051, - "step": 20886 - }, - { - "epoch": 1.9676408939968442, - "grad_norm": 0.6562275886535645, - "learning_rate": 1.3409897602421063e-08, - "loss": 0.2094, - "step": 20887 - }, - { - "epoch": 1.967735098090012, - "grad_norm": 0.6430373787879944, - "learning_rate": 1.3331837115241331e-08, - "loss": 0.1886, - "step": 20888 - }, - { - "epoch": 1.9678293021831799, - "grad_norm": 0.7399052977561951, - "learning_rate": 1.325400433953905e-08, - "loss": 0.2365, - "step": 20889 - }, - { - "epoch": 1.9679235062763478, - "grad_norm": 0.7590723633766174, - "learning_rate": 1.317639927709058e-08, - "loss": 0.189, - "step": 20890 - }, - { - "epoch": 1.9680177103695156, - "grad_norm": 0.689531683921814, - "learning_rate": 1.3099021929665612e-08, - "loss": 0.1898, - "step": 20891 - }, - { - "epoch": 1.9681119144626833, - "grad_norm": 0.664011538028717, - "learning_rate": 1.3021872299028292e-08, - "loss": 0.196, - "step": 20892 - }, - { - "epoch": 1.9682061185558513, - "grad_norm": 0.6723976731300354, - "learning_rate": 1.2944950386937217e-08, - "loss": 0.1864, - "step": 20893 - }, - { - "epoch": 1.9683003226490192, - "grad_norm": 0.7510846853256226, - "learning_rate": 1.2868256195145423e-08, - "loss": 0.2106, - "step": 20894 - }, - { - "epoch": 1.968394526742187, - "grad_norm": 0.7858664989471436, - "learning_rate": 1.2791789725404846e-08, - "loss": 0.208, - "step": 20895 - }, - { - "epoch": 1.9684887308353547, - "grad_norm": 0.6809086799621582, - "learning_rate": 1.2715550979455205e-08, - "loss": 0.1966, - "step": 20896 - }, - { - "epoch": 1.9685829349285227, - "grad_norm": 0.6665279865264893, - "learning_rate": 1.2639539959037328e-08, - "loss": 0.2033, - "step": 20897 - }, - { - "epoch": 1.9686771390216906, - "grad_norm": 0.6726946830749512, - "learning_rate": 1.2563756665883165e-08, - "loss": 0.1964, - "step": 20898 - }, - { - "epoch": 1.9687713431148584, - "grad_norm": 0.8123178482055664, - "learning_rate": 1.2488201101722442e-08, - "loss": 0.2193, - "step": 20899 - }, - { - "epoch": 1.968865547208026, - "grad_norm": 0.610478937625885, - "learning_rate": 1.2412873268274895e-08, - "loss": 0.1741, - "step": 20900 - }, - { - "epoch": 1.968959751301194, - "grad_norm": 0.6494256258010864, - "learning_rate": 1.2337773167261369e-08, - "loss": 0.1874, - "step": 20901 - }, - { - "epoch": 1.969053955394362, - "grad_norm": 0.7286932468414307, - "learning_rate": 1.2262900800392719e-08, - "loss": 0.196, - "step": 20902 - }, - { - "epoch": 1.9691481594875297, - "grad_norm": 0.6259111166000366, - "learning_rate": 1.2188256169375357e-08, - "loss": 0.204, - "step": 20903 - }, - { - "epoch": 1.9692423635806975, - "grad_norm": 0.6284198760986328, - "learning_rate": 1.2113839275913475e-08, - "loss": 0.1742, - "step": 20904 - }, - { - "epoch": 1.9693365676738654, - "grad_norm": 0.6095848679542542, - "learning_rate": 1.2039650121702384e-08, - "loss": 0.1963, - "step": 20905 - }, - { - "epoch": 1.9694307717670334, - "grad_norm": 0.6099793314933777, - "learning_rate": 1.1965688708432954e-08, - "loss": 0.1802, - "step": 20906 - }, - { - "epoch": 1.9695249758602011, - "grad_norm": 0.6415167450904846, - "learning_rate": 1.1891955037793835e-08, - "loss": 0.1839, - "step": 20907 - }, - { - "epoch": 1.9696191799533689, - "grad_norm": 0.6364742517471313, - "learning_rate": 1.181844911146368e-08, - "loss": 0.1616, - "step": 20908 - }, - { - "epoch": 1.9697133840465368, - "grad_norm": 0.6914396286010742, - "learning_rate": 1.1745170931121152e-08, - "loss": 0.2044, - "step": 20909 - }, - { - "epoch": 1.9698075881397048, - "grad_norm": 0.5957686305046082, - "learning_rate": 1.1672120498434914e-08, - "loss": 0.1871, - "step": 20910 - }, - { - "epoch": 1.9699017922328725, - "grad_norm": 0.6073902249336243, - "learning_rate": 1.1599297815072519e-08, - "loss": 0.1727, - "step": 20911 - }, - { - "epoch": 1.9699959963260403, - "grad_norm": 0.7050167918205261, - "learning_rate": 1.1526702882692641e-08, - "loss": 0.1733, - "step": 20912 - }, - { - "epoch": 1.9700902004192082, - "grad_norm": 0.6207622289657593, - "learning_rate": 1.1454335702951735e-08, - "loss": 0.1791, - "step": 20913 - }, - { - "epoch": 1.9701844045123762, - "grad_norm": 0.6318159103393555, - "learning_rate": 1.138219627749959e-08, - "loss": 0.1814, - "step": 20914 - }, - { - "epoch": 1.970278608605544, - "grad_norm": 0.6200817823410034, - "learning_rate": 1.1310284607980448e-08, - "loss": 0.1966, - "step": 20915 - }, - { - "epoch": 1.9703728126987117, - "grad_norm": 0.7505404353141785, - "learning_rate": 1.1238600696035218e-08, - "loss": 0.2249, - "step": 20916 - }, - { - "epoch": 1.9704670167918796, - "grad_norm": 0.6427599191665649, - "learning_rate": 1.1167144543298147e-08, - "loss": 0.1887, - "step": 20917 - }, - { - "epoch": 1.9705612208850476, - "grad_norm": 0.6350158452987671, - "learning_rate": 1.1095916151397935e-08, - "loss": 0.2103, - "step": 20918 - }, - { - "epoch": 1.9706554249782153, - "grad_norm": 0.6772736310958862, - "learning_rate": 1.1024915521958835e-08, - "loss": 0.2059, - "step": 20919 - }, - { - "epoch": 1.970749629071383, - "grad_norm": 0.6636703014373779, - "learning_rate": 1.0954142656600663e-08, - "loss": 0.2083, - "step": 20920 - }, - { - "epoch": 1.970843833164551, - "grad_norm": 0.6341129541397095, - "learning_rate": 1.0883597556935466e-08, - "loss": 0.207, - "step": 20921 - }, - { - "epoch": 1.970938037257719, - "grad_norm": 0.6010454297065735, - "learning_rate": 1.0813280224573063e-08, - "loss": 0.1776, - "step": 20922 - }, - { - "epoch": 1.9710322413508867, - "grad_norm": 0.6927230954170227, - "learning_rate": 1.074319066111551e-08, - "loss": 0.2501, - "step": 20923 - }, - { - "epoch": 1.9711264454440545, - "grad_norm": 0.6573917865753174, - "learning_rate": 1.0673328868162635e-08, - "loss": 0.1835, - "step": 20924 - }, - { - "epoch": 1.9712206495372224, - "grad_norm": 0.8325679898262024, - "learning_rate": 1.0603694847307611e-08, - "loss": 0.1972, - "step": 20925 - }, - { - "epoch": 1.9713148536303904, - "grad_norm": 0.6681457161903381, - "learning_rate": 1.0534288600135834e-08, - "loss": 0.2064, - "step": 20926 - }, - { - "epoch": 1.9714090577235581, - "grad_norm": 0.6150078773498535, - "learning_rate": 1.0465110128231593e-08, - "loss": 0.1748, - "step": 20927 - }, - { - "epoch": 1.9715032618167259, - "grad_norm": 0.6080055832862854, - "learning_rate": 1.0396159433172514e-08, - "loss": 0.1718, - "step": 20928 - }, - { - "epoch": 1.9715974659098938, - "grad_norm": 0.6420427560806274, - "learning_rate": 1.0327436516529566e-08, - "loss": 0.1793, - "step": 20929 - }, - { - "epoch": 1.9716916700030618, - "grad_norm": 0.6000234484672546, - "learning_rate": 1.025894137987149e-08, - "loss": 0.192, - "step": 20930 - }, - { - "epoch": 1.9717858740962295, - "grad_norm": 0.6501505970954895, - "learning_rate": 1.0190674024758151e-08, - "loss": 0.1859, - "step": 20931 - }, - { - "epoch": 1.9718800781893973, - "grad_norm": 0.6825153231620789, - "learning_rate": 1.0122634452747193e-08, - "loss": 0.2155, - "step": 20932 - }, - { - "epoch": 1.9719742822825652, - "grad_norm": 0.740336000919342, - "learning_rate": 1.0054822665388486e-08, - "loss": 0.1839, - "step": 20933 - }, - { - "epoch": 1.9720684863757332, - "grad_norm": 0.6732248663902283, - "learning_rate": 9.987238664231902e-09, - "loss": 0.1905, - "step": 20934 - }, - { - "epoch": 1.972162690468901, - "grad_norm": 0.6618796586990356, - "learning_rate": 9.919882450815099e-09, - "loss": 0.2039, - "step": 20935 - }, - { - "epoch": 1.9722568945620687, - "grad_norm": 0.6390891671180725, - "learning_rate": 9.852754026673517e-09, - "loss": 0.1539, - "step": 20936 - }, - { - "epoch": 1.9723510986552366, - "grad_norm": 0.681266188621521, - "learning_rate": 9.785853393341483e-09, - "loss": 0.2135, - "step": 20937 - }, - { - "epoch": 1.9724453027484046, - "grad_norm": 0.6857741475105286, - "learning_rate": 9.719180552341113e-09, - "loss": 0.1801, - "step": 20938 - }, - { - "epoch": 1.9725395068415723, - "grad_norm": 0.7095636129379272, - "learning_rate": 9.652735505193411e-09, - "loss": 0.1985, - "step": 20939 - }, - { - "epoch": 1.97263371093474, - "grad_norm": 0.7109867334365845, - "learning_rate": 9.586518253413835e-09, - "loss": 0.2128, - "step": 20940 - }, - { - "epoch": 1.972727915027908, - "grad_norm": 0.6484083533287048, - "learning_rate": 9.520528798512286e-09, - "loss": 0.1798, - "step": 20941 - }, - { - "epoch": 1.972822119121076, - "grad_norm": 0.7068780660629272, - "learning_rate": 9.454767141993115e-09, - "loss": 0.2016, - "step": 20942 - }, - { - "epoch": 1.9729163232142437, - "grad_norm": 0.7174715399742126, - "learning_rate": 9.389233285356237e-09, - "loss": 0.2383, - "step": 20943 - }, - { - "epoch": 1.9730105273074114, - "grad_norm": 0.6550261378288269, - "learning_rate": 9.323927230096008e-09, - "loss": 0.1767, - "step": 20944 - }, - { - "epoch": 1.9731047314005794, - "grad_norm": 0.727986752986908, - "learning_rate": 9.25884897770013e-09, - "loss": 0.1937, - "step": 20945 - }, - { - "epoch": 1.9731989354937474, - "grad_norm": 0.6893591284751892, - "learning_rate": 9.19399852965408e-09, - "loss": 0.1766, - "step": 20946 - }, - { - "epoch": 1.973293139586915, - "grad_norm": 0.6319138407707214, - "learning_rate": 9.129375887435566e-09, - "loss": 0.1713, - "step": 20947 - }, - { - "epoch": 1.9733873436800828, - "grad_norm": 0.7045475244522095, - "learning_rate": 9.064981052518962e-09, - "loss": 0.2057, - "step": 20948 - }, - { - "epoch": 1.9734815477732508, - "grad_norm": 0.6276645064353943, - "learning_rate": 9.000814026371985e-09, - "loss": 0.1659, - "step": 20949 - }, - { - "epoch": 1.9735757518664188, - "grad_norm": 0.6116898655891418, - "learning_rate": 8.936874810456798e-09, - "loss": 0.1723, - "step": 20950 - }, - { - "epoch": 1.9736699559595865, - "grad_norm": 0.6246646046638489, - "learning_rate": 8.873163406233342e-09, - "loss": 0.1769, - "step": 20951 - }, - { - "epoch": 1.9737641600527542, - "grad_norm": 0.6728521585464478, - "learning_rate": 8.809679815153793e-09, - "loss": 0.1988, - "step": 20952 - }, - { - "epoch": 1.9738583641459222, - "grad_norm": 0.6070953011512756, - "learning_rate": 8.746424038663658e-09, - "loss": 0.1755, - "step": 20953 - }, - { - "epoch": 1.97395256823909, - "grad_norm": 0.6520624756813049, - "learning_rate": 8.68339607820845e-09, - "loss": 0.1921, - "step": 20954 - }, - { - "epoch": 1.9740467723322577, - "grad_norm": 0.5930506587028503, - "learning_rate": 8.620595935222575e-09, - "loss": 0.2102, - "step": 20955 - }, - { - "epoch": 1.9741409764254256, - "grad_norm": 0.632493257522583, - "learning_rate": 8.558023611139332e-09, - "loss": 0.1945, - "step": 20956 - }, - { - "epoch": 1.9742351805185936, - "grad_norm": 0.6277579665184021, - "learning_rate": 8.495679107385357e-09, - "loss": 0.195, - "step": 20957 - }, - { - "epoch": 1.9743293846117613, - "grad_norm": 0.614940345287323, - "learning_rate": 8.433562425382847e-09, - "loss": 0.1631, - "step": 20958 - }, - { - "epoch": 1.974423588704929, - "grad_norm": 0.6482104659080505, - "learning_rate": 8.371673566546223e-09, - "loss": 0.2037, - "step": 20959 - }, - { - "epoch": 1.974517792798097, - "grad_norm": 0.6362460255622864, - "learning_rate": 8.310012532287692e-09, - "loss": 0.1875, - "step": 20960 - }, - { - "epoch": 1.974611996891265, - "grad_norm": 0.6358654499053955, - "learning_rate": 8.248579324013905e-09, - "loss": 0.1679, - "step": 20961 - }, - { - "epoch": 1.9747062009844327, - "grad_norm": 0.6750145554542542, - "learning_rate": 8.187373943124854e-09, - "loss": 0.1883, - "step": 20962 - }, - { - "epoch": 1.9748004050776005, - "grad_norm": 0.7022234201431274, - "learning_rate": 8.126396391017199e-09, - "loss": 0.199, - "step": 20963 - }, - { - "epoch": 1.9748946091707684, - "grad_norm": 0.6213939189910889, - "learning_rate": 8.065646669078719e-09, - "loss": 0.1867, - "step": 20964 - }, - { - "epoch": 1.9749888132639364, - "grad_norm": 0.6233702898025513, - "learning_rate": 8.005124778698304e-09, - "loss": 0.187, - "step": 20965 - }, - { - "epoch": 1.9750830173571041, - "grad_norm": 0.7024995684623718, - "learning_rate": 7.944830721252627e-09, - "loss": 0.192, - "step": 20966 - }, - { - "epoch": 1.9751772214502719, - "grad_norm": 0.569704532623291, - "learning_rate": 7.88476449811726e-09, - "loss": 0.1759, - "step": 20967 - }, - { - "epoch": 1.9752714255434398, - "grad_norm": 0.8393198847770691, - "learning_rate": 7.824926110663323e-09, - "loss": 0.1922, - "step": 20968 - }, - { - "epoch": 1.9753656296366078, - "grad_norm": 0.8141598105430603, - "learning_rate": 7.765315560254172e-09, - "loss": 0.1702, - "step": 20969 - }, - { - "epoch": 1.9754598337297755, - "grad_norm": 0.6242630481719971, - "learning_rate": 7.70593284824872e-09, - "loss": 0.1508, - "step": 20970 - }, - { - "epoch": 1.9755540378229433, - "grad_norm": 0.6809794902801514, - "learning_rate": 7.64677797600255e-09, - "loss": 0.2107, - "step": 20971 - }, - { - "epoch": 1.9756482419161112, - "grad_norm": 0.675658106803894, - "learning_rate": 7.587850944862363e-09, - "loss": 0.2127, - "step": 20972 - }, - { - "epoch": 1.9757424460092792, - "grad_norm": 0.6515462398529053, - "learning_rate": 7.529151756172637e-09, - "loss": 0.1931, - "step": 20973 - }, - { - "epoch": 1.975836650102447, - "grad_norm": 0.6538036465644836, - "learning_rate": 7.470680411272301e-09, - "loss": 0.1893, - "step": 20974 - }, - { - "epoch": 1.9759308541956146, - "grad_norm": 0.6760832071304321, - "learning_rate": 7.412436911493626e-09, - "loss": 0.204, - "step": 20975 - }, - { - "epoch": 1.9760250582887826, - "grad_norm": 0.6080470085144043, - "learning_rate": 7.354421258165545e-09, - "loss": 0.1816, - "step": 20976 - }, - { - "epoch": 1.9761192623819506, - "grad_norm": 0.6867570281028748, - "learning_rate": 7.296633452611446e-09, - "loss": 0.19, - "step": 20977 - }, - { - "epoch": 1.9762134664751183, - "grad_norm": 0.6577616333961487, - "learning_rate": 7.239073496146942e-09, - "loss": 0.1888, - "step": 20978 - }, - { - "epoch": 1.976307670568286, - "grad_norm": 0.7067936658859253, - "learning_rate": 7.181741390086539e-09, - "loss": 0.2001, - "step": 20979 - }, - { - "epoch": 1.976401874661454, - "grad_norm": 0.7678654789924622, - "learning_rate": 7.124637135735857e-09, - "loss": 0.1906, - "step": 20980 - }, - { - "epoch": 1.976496078754622, - "grad_norm": 0.7363203167915344, - "learning_rate": 7.067760734398299e-09, - "loss": 0.1853, - "step": 20981 - }, - { - "epoch": 1.9765902828477897, - "grad_norm": 0.6460639238357544, - "learning_rate": 7.0111121873694956e-09, - "loss": 0.1728, - "step": 20982 - }, - { - "epoch": 1.9766844869409574, - "grad_norm": 0.6732394695281982, - "learning_rate": 6.954691495941745e-09, - "loss": 0.19, - "step": 20983 - }, - { - "epoch": 1.9767786910341254, - "grad_norm": 0.7039426565170288, - "learning_rate": 6.898498661401798e-09, - "loss": 0.2023, - "step": 20984 - }, - { - "epoch": 1.9768728951272934, - "grad_norm": 0.7103269100189209, - "learning_rate": 6.84253368503085e-09, - "loss": 0.1967, - "step": 20985 - }, - { - "epoch": 1.976967099220461, - "grad_norm": 0.6651984453201294, - "learning_rate": 6.786796568105658e-09, - "loss": 0.1835, - "step": 20986 - }, - { - "epoch": 1.9770613033136288, - "grad_norm": 0.6574163436889648, - "learning_rate": 6.731287311895207e-09, - "loss": 0.2106, - "step": 20987 - }, - { - "epoch": 1.9771555074067968, - "grad_norm": 0.6663548946380615, - "learning_rate": 6.676005917666262e-09, - "loss": 0.1886, - "step": 20988 - }, - { - "epoch": 1.9772497114999648, - "grad_norm": 0.5763439536094666, - "learning_rate": 6.620952386680035e-09, - "loss": 0.1604, - "step": 20989 - }, - { - "epoch": 1.9773439155931325, - "grad_norm": 0.7055424451828003, - "learning_rate": 6.56612672019108e-09, - "loss": 0.2046, - "step": 20990 - }, - { - "epoch": 1.9774381196863002, - "grad_norm": 0.6616795659065247, - "learning_rate": 6.511528919449505e-09, - "loss": 0.1805, - "step": 20991 - }, - { - "epoch": 1.9775323237794682, - "grad_norm": 0.7356581091880798, - "learning_rate": 6.457158985699874e-09, - "loss": 0.1928, - "step": 20992 - }, - { - "epoch": 1.9776265278726362, - "grad_norm": 0.7133409976959229, - "learning_rate": 6.403016920182304e-09, - "loss": 0.2264, - "step": 20993 - }, - { - "epoch": 1.977720731965804, - "grad_norm": 0.7125285267829895, - "learning_rate": 6.3491027241313615e-09, - "loss": 0.174, - "step": 20994 - }, - { - "epoch": 1.9778149360589716, - "grad_norm": 0.6706700325012207, - "learning_rate": 6.295416398777176e-09, - "loss": 0.2053, - "step": 20995 - }, - { - "epoch": 1.9779091401521396, - "grad_norm": 0.6878630518913269, - "learning_rate": 6.241957945342103e-09, - "loss": 0.2206, - "step": 20996 - }, - { - "epoch": 1.9780033442453075, - "grad_norm": 0.671504557132721, - "learning_rate": 6.188727365046276e-09, - "loss": 0.227, - "step": 20997 - }, - { - "epoch": 1.9780975483384753, - "grad_norm": 0.6460820436477661, - "learning_rate": 6.135724659103171e-09, - "loss": 0.1885, - "step": 20998 - }, - { - "epoch": 1.978191752431643, - "grad_norm": 0.6574946045875549, - "learning_rate": 6.082949828721818e-09, - "loss": 0.1643, - "step": 20999 - }, - { - "epoch": 1.978285956524811, - "grad_norm": 0.6905536651611328, - "learning_rate": 6.030402875104591e-09, - "loss": 0.2162, - "step": 21000 - }, - { - "epoch": 1.978380160617979, - "grad_norm": 0.7331179976463318, - "learning_rate": 5.978083799450529e-09, - "loss": 0.1888, - "step": 21001 - }, - { - "epoch": 1.9784743647111467, - "grad_norm": 0.6354816555976868, - "learning_rate": 5.925992602952013e-09, - "loss": 0.187, - "step": 21002 - }, - { - "epoch": 1.9785685688043144, - "grad_norm": 0.6240286231040955, - "learning_rate": 5.8741292867969815e-09, - "loss": 0.1625, - "step": 21003 - }, - { - "epoch": 1.9786627728974824, - "grad_norm": 0.7908602952957153, - "learning_rate": 5.8224938521678206e-09, - "loss": 0.198, - "step": 21004 - }, - { - "epoch": 1.9787569769906503, - "grad_norm": 0.598463773727417, - "learning_rate": 5.771086300242479e-09, - "loss": 0.1771, - "step": 21005 - }, - { - "epoch": 1.978851181083818, - "grad_norm": 0.7266849875450134, - "learning_rate": 5.719906632193351e-09, - "loss": 0.203, - "step": 21006 - }, - { - "epoch": 1.9789453851769858, - "grad_norm": 0.6000388264656067, - "learning_rate": 5.66895484918617e-09, - "loss": 0.1789, - "step": 21007 - }, - { - "epoch": 1.9790395892701538, - "grad_norm": 0.6529139280319214, - "learning_rate": 5.618230952382231e-09, - "loss": 0.2196, - "step": 21008 - }, - { - "epoch": 1.9791337933633217, - "grad_norm": 0.7088423371315002, - "learning_rate": 5.567734942940605e-09, - "loss": 0.2085, - "step": 21009 - }, - { - "epoch": 1.9792279974564895, - "grad_norm": 0.6654103994369507, - "learning_rate": 5.517466822011486e-09, - "loss": 0.1979, - "step": 21010 - }, - { - "epoch": 1.9793222015496572, - "grad_norm": 0.6654288172721863, - "learning_rate": 5.467426590739511e-09, - "loss": 0.2016, - "step": 21011 - }, - { - "epoch": 1.9794164056428252, - "grad_norm": 0.6455620527267456, - "learning_rate": 5.41761425026821e-09, - "loss": 0.1829, - "step": 21012 - }, - { - "epoch": 1.9795106097359931, - "grad_norm": 0.7251503467559814, - "learning_rate": 5.368029801732233e-09, - "loss": 0.1934, - "step": 21013 - }, - { - "epoch": 1.9796048138291609, - "grad_norm": 0.6838080883026123, - "learning_rate": 5.318673246261785e-09, - "loss": 0.2239, - "step": 21014 - }, - { - "epoch": 1.9796990179223286, - "grad_norm": 0.644503116607666, - "learning_rate": 5.269544584982633e-09, - "loss": 0.1811, - "step": 21015 - }, - { - "epoch": 1.9797932220154966, - "grad_norm": 0.7098379135131836, - "learning_rate": 5.220643819014992e-09, - "loss": 0.2072, - "step": 21016 - }, - { - "epoch": 1.9798874261086645, - "grad_norm": 0.6474086046218872, - "learning_rate": 5.171970949473526e-09, - "loss": 0.1932, - "step": 21017 - }, - { - "epoch": 1.9799816302018323, - "grad_norm": 0.7026640772819519, - "learning_rate": 5.1235259774695675e-09, - "loss": 0.1896, - "step": 21018 - }, - { - "epoch": 1.980075834295, - "grad_norm": 0.6126406788825989, - "learning_rate": 5.0753089041055695e-09, - "loss": 0.2013, - "step": 21019 - }, - { - "epoch": 1.980170038388168, - "grad_norm": 0.7585750818252563, - "learning_rate": 5.027319730481761e-09, - "loss": 0.1909, - "step": 21020 - }, - { - "epoch": 1.980264242481336, - "grad_norm": 0.6204404830932617, - "learning_rate": 4.979558457693934e-09, - "loss": 0.1926, - "step": 21021 - }, - { - "epoch": 1.9803584465745037, - "grad_norm": 0.6853671073913574, - "learning_rate": 4.9320250868289955e-09, - "loss": 0.1982, - "step": 21022 - }, - { - "epoch": 1.9804526506676714, - "grad_norm": 0.5925233364105225, - "learning_rate": 4.884719618971634e-09, - "loss": 0.1758, - "step": 21023 - }, - { - "epoch": 1.9805468547608394, - "grad_norm": 0.6932353973388672, - "learning_rate": 4.837642055199876e-09, - "loss": 0.2079, - "step": 21024 - }, - { - "epoch": 1.9806410588540073, - "grad_norm": 0.7288113236427307, - "learning_rate": 4.790792396588417e-09, - "loss": 0.211, - "step": 21025 - }, - { - "epoch": 1.980735262947175, - "grad_norm": 0.6650190353393555, - "learning_rate": 4.744170644204182e-09, - "loss": 0.2163, - "step": 21026 - }, - { - "epoch": 1.9808294670403428, - "grad_norm": 0.6971545815467834, - "learning_rate": 4.697776799110764e-09, - "loss": 0.195, - "step": 21027 - }, - { - "epoch": 1.9809236711335108, - "grad_norm": 0.6654476523399353, - "learning_rate": 4.651610862366207e-09, - "loss": 0.1858, - "step": 21028 - }, - { - "epoch": 1.9810178752266787, - "grad_norm": 0.6698181629180908, - "learning_rate": 4.605672835024111e-09, - "loss": 0.1772, - "step": 21029 - }, - { - "epoch": 1.9811120793198465, - "grad_norm": 0.6747551560401917, - "learning_rate": 4.559962718129196e-09, - "loss": 0.1806, - "step": 21030 - }, - { - "epoch": 1.9812062834130142, - "grad_norm": 0.6727416515350342, - "learning_rate": 4.514480512726183e-09, - "loss": 0.2066, - "step": 21031 - }, - { - "epoch": 1.9813004875061822, - "grad_norm": 0.6404261589050293, - "learning_rate": 4.469226219852019e-09, - "loss": 0.1907, - "step": 21032 - }, - { - "epoch": 1.98139469159935, - "grad_norm": 0.6485568284988403, - "learning_rate": 4.424199840536991e-09, - "loss": 0.1919, - "step": 21033 - }, - { - "epoch": 1.9814888956925178, - "grad_norm": 0.7125912308692932, - "learning_rate": 4.379401375809167e-09, - "loss": 0.2046, - "step": 21034 - }, - { - "epoch": 1.9815830997856856, - "grad_norm": 0.6653649806976318, - "learning_rate": 4.3348308266888405e-09, - "loss": 0.1825, - "step": 21035 - }, - { - "epoch": 1.9816773038788535, - "grad_norm": 0.6265192627906799, - "learning_rate": 4.290488194192976e-09, - "loss": 0.1716, - "step": 21036 - }, - { - "epoch": 1.9817715079720215, - "grad_norm": 0.735074520111084, - "learning_rate": 4.246373479332988e-09, - "loss": 0.221, - "step": 21037 - }, - { - "epoch": 1.9818657120651892, - "grad_norm": 0.6242669224739075, - "learning_rate": 4.202486683114737e-09, - "loss": 0.1945, - "step": 21038 - }, - { - "epoch": 1.981959916158357, - "grad_norm": 0.6551904678344727, - "learning_rate": 4.158827806538534e-09, - "loss": 0.2021, - "step": 21039 - }, - { - "epoch": 1.982054120251525, - "grad_norm": 0.7333979606628418, - "learning_rate": 4.11539685059914e-09, - "loss": 0.2229, - "step": 21040 - }, - { - "epoch": 1.982148324344693, - "grad_norm": 0.6325864195823669, - "learning_rate": 4.072193816287983e-09, - "loss": 0.1741, - "step": 21041 - }, - { - "epoch": 1.9822425284378606, - "grad_norm": 0.6482536196708679, - "learning_rate": 4.029218704589832e-09, - "loss": 0.1838, - "step": 21042 - }, - { - "epoch": 1.9823367325310284, - "grad_norm": 0.7280809879302979, - "learning_rate": 3.986471516485013e-09, - "loss": 0.1788, - "step": 21043 - }, - { - "epoch": 1.9824309366241963, - "grad_norm": 0.6607115268707275, - "learning_rate": 3.943952252947192e-09, - "loss": 0.2105, - "step": 21044 - }, - { - "epoch": 1.9825251407173643, - "grad_norm": 0.6494821310043335, - "learning_rate": 3.901660914946703e-09, - "loss": 0.1802, - "step": 21045 - }, - { - "epoch": 1.982619344810532, - "grad_norm": 0.7037579417228699, - "learning_rate": 3.859597503448331e-09, - "loss": 0.1895, - "step": 21046 - }, - { - "epoch": 1.9827135489036998, - "grad_norm": 0.639551043510437, - "learning_rate": 3.817762019409088e-09, - "loss": 0.1785, - "step": 21047 - }, - { - "epoch": 1.9828077529968677, - "grad_norm": 0.7694389820098877, - "learning_rate": 3.776154463784875e-09, - "loss": 0.2015, - "step": 21048 - }, - { - "epoch": 1.9829019570900357, - "grad_norm": 0.6747332811355591, - "learning_rate": 3.7347748375238245e-09, - "loss": 0.1784, - "step": 21049 - }, - { - "epoch": 1.9829961611832034, - "grad_norm": 0.623313844203949, - "learning_rate": 3.6936231415696243e-09, - "loss": 0.1946, - "step": 21050 - }, - { - "epoch": 1.9830903652763712, - "grad_norm": 0.5907295346260071, - "learning_rate": 3.6526993768604134e-09, - "loss": 0.1746, - "step": 21051 - }, - { - "epoch": 1.9831845693695391, - "grad_norm": 0.6866522431373596, - "learning_rate": 3.6120035443287792e-09, - "loss": 0.1809, - "step": 21052 - }, - { - "epoch": 1.983278773462707, - "grad_norm": 0.6459100842475891, - "learning_rate": 3.5715356449039786e-09, - "loss": 0.1776, - "step": 21053 - }, - { - "epoch": 1.9833729775558748, - "grad_norm": 0.6088927388191223, - "learning_rate": 3.531295679507496e-09, - "loss": 0.185, - "step": 21054 - }, - { - "epoch": 1.9834671816490426, - "grad_norm": 0.6171404123306274, - "learning_rate": 3.4912836490574863e-09, - "loss": 0.1779, - "step": 21055 - }, - { - "epoch": 1.9835613857422105, - "grad_norm": 0.5789969563484192, - "learning_rate": 3.451499554465443e-09, - "loss": 0.1727, - "step": 21056 - }, - { - "epoch": 1.9836555898353785, - "grad_norm": 0.6608253717422485, - "learning_rate": 3.411943396639528e-09, - "loss": 0.2095, - "step": 21057 - }, - { - "epoch": 1.9837497939285462, - "grad_norm": 0.701216995716095, - "learning_rate": 3.3726151764812422e-09, - "loss": 0.1964, - "step": 21058 - }, - { - "epoch": 1.983843998021714, - "grad_norm": 0.6264675259590149, - "learning_rate": 3.333514894887646e-09, - "loss": 0.1879, - "step": 21059 - }, - { - "epoch": 1.983938202114882, - "grad_norm": 0.6653956770896912, - "learning_rate": 3.294642552750249e-09, - "loss": 0.2088, - "step": 21060 - }, - { - "epoch": 1.9840324062080499, - "grad_norm": 0.6880512237548828, - "learning_rate": 3.2559981509550087e-09, - "loss": 0.2115, - "step": 21061 - }, - { - "epoch": 1.9841266103012176, - "grad_norm": 0.6396224498748779, - "learning_rate": 3.2175816903834423e-09, - "loss": 0.1788, - "step": 21062 - }, - { - "epoch": 1.9842208143943854, - "grad_norm": 0.6308459043502808, - "learning_rate": 3.1793931719104054e-09, - "loss": 0.1938, - "step": 21063 - }, - { - "epoch": 1.9843150184875533, - "grad_norm": 0.6442133784294128, - "learning_rate": 3.1414325964085335e-09, - "loss": 0.1815, - "step": 21064 - }, - { - "epoch": 1.9844092225807213, - "grad_norm": 0.6585400700569153, - "learning_rate": 3.1036999647426903e-09, - "loss": 0.1702, - "step": 21065 - }, - { - "epoch": 1.984503426673889, - "grad_norm": 0.6935811042785645, - "learning_rate": 3.066195277772188e-09, - "loss": 0.1974, - "step": 21066 - }, - { - "epoch": 1.9845976307670568, - "grad_norm": 0.6294695734977722, - "learning_rate": 3.0289185363530095e-09, - "loss": 0.1808, - "step": 21067 - }, - { - "epoch": 1.9846918348602247, - "grad_norm": 0.7599447965621948, - "learning_rate": 2.9918697413355844e-09, - "loss": 0.2071, - "step": 21068 - }, - { - "epoch": 1.9847860389533927, - "grad_norm": 0.6241219639778137, - "learning_rate": 2.9550488935636834e-09, - "loss": 0.1812, - "step": 21069 - }, - { - "epoch": 1.9848802430465604, - "grad_norm": 0.6321325302124023, - "learning_rate": 2.9184559938777445e-09, - "loss": 0.1707, - "step": 21070 - }, - { - "epoch": 1.9849744471397281, - "grad_norm": 0.6703804731369019, - "learning_rate": 2.882091043112656e-09, - "loss": 0.1764, - "step": 21071 - }, - { - "epoch": 1.985068651232896, - "grad_norm": 0.6230263710021973, - "learning_rate": 2.8459540420955333e-09, - "loss": 0.1956, - "step": 21072 - }, - { - "epoch": 1.985162855326064, - "grad_norm": 0.6744650602340698, - "learning_rate": 2.810044991651273e-09, - "loss": 0.1939, - "step": 21073 - }, - { - "epoch": 1.9852570594192318, - "grad_norm": 0.62555992603302, - "learning_rate": 2.774363892600329e-09, - "loss": 0.1987, - "step": 21074 - }, - { - "epoch": 1.9853512635123995, - "grad_norm": 0.6785553693771362, - "learning_rate": 2.7389107457542753e-09, - "loss": 0.1902, - "step": 21075 - }, - { - "epoch": 1.9854454676055675, - "grad_norm": 0.6026608347892761, - "learning_rate": 2.7036855519213532e-09, - "loss": 0.1864, - "step": 21076 - }, - { - "epoch": 1.9855396716987355, - "grad_norm": 0.7131816744804382, - "learning_rate": 2.6686883119064755e-09, - "loss": 0.2175, - "step": 21077 - }, - { - "epoch": 1.9856338757919032, - "grad_norm": 0.6569606065750122, - "learning_rate": 2.6339190265056714e-09, - "loss": 0.1861, - "step": 21078 - }, - { - "epoch": 1.985728079885071, - "grad_norm": 0.6766698956489563, - "learning_rate": 2.599377696513861e-09, - "loss": 0.1809, - "step": 21079 - }, - { - "epoch": 1.985822283978239, - "grad_norm": 0.6847541928291321, - "learning_rate": 2.565064322717081e-09, - "loss": 0.1806, - "step": 21080 - }, - { - "epoch": 1.9859164880714069, - "grad_norm": 0.6673032641410828, - "learning_rate": 2.5309789058980405e-09, - "loss": 0.1815, - "step": 21081 - }, - { - "epoch": 1.9860106921645746, - "grad_norm": 0.6046951413154602, - "learning_rate": 2.4971214468338945e-09, - "loss": 0.177, - "step": 21082 - }, - { - "epoch": 1.9861048962577423, - "grad_norm": 0.6393413543701172, - "learning_rate": 2.4634919462973584e-09, - "loss": 0.1649, - "step": 21083 - }, - { - "epoch": 1.9861991003509103, - "grad_norm": 0.6841180324554443, - "learning_rate": 2.430090405054486e-09, - "loss": 0.1977, - "step": 21084 - }, - { - "epoch": 1.9862933044440783, - "grad_norm": 0.6634268164634705, - "learning_rate": 2.3969168238668906e-09, - "loss": 0.201, - "step": 21085 - }, - { - "epoch": 1.986387508537246, - "grad_norm": 0.7379873394966125, - "learning_rate": 2.3639712034906336e-09, - "loss": 0.231, - "step": 21086 - }, - { - "epoch": 1.9864817126304137, - "grad_norm": 0.6023994088172913, - "learning_rate": 2.331253544678447e-09, - "loss": 0.1829, - "step": 21087 - }, - { - "epoch": 1.9865759167235817, - "grad_norm": 0.6844269633293152, - "learning_rate": 2.2987638481752892e-09, - "loss": 0.1836, - "step": 21088 - }, - { - "epoch": 1.9866701208167497, - "grad_norm": 0.6246742010116577, - "learning_rate": 2.26650211472168e-09, - "loss": 0.1885, - "step": 21089 - }, - { - "epoch": 1.9867643249099174, - "grad_norm": 0.7646008729934692, - "learning_rate": 2.2344683450536977e-09, - "loss": 0.2014, - "step": 21090 - }, - { - "epoch": 1.9868585290030851, - "grad_norm": 0.585985004901886, - "learning_rate": 2.2026625399018675e-09, - "loss": 0.1718, - "step": 21091 - }, - { - "epoch": 1.986952733096253, - "grad_norm": 0.6901452541351318, - "learning_rate": 2.1710846999911663e-09, - "loss": 0.202, - "step": 21092 - }, - { - "epoch": 1.9870469371894208, - "grad_norm": 0.6707181334495544, - "learning_rate": 2.139734826041018e-09, - "loss": 0.2082, - "step": 21093 - }, - { - "epoch": 1.9871411412825886, - "grad_norm": 0.636265218257904, - "learning_rate": 2.108612918767516e-09, - "loss": 0.1917, - "step": 21094 - }, - { - "epoch": 1.9872353453757565, - "grad_norm": 0.6749941110610962, - "learning_rate": 2.0777189788800944e-09, - "loss": 0.207, - "step": 21095 - }, - { - "epoch": 1.9873295494689245, - "grad_norm": 0.652577817440033, - "learning_rate": 2.047053007081523e-09, - "loss": 0.1886, - "step": 21096 - }, - { - "epoch": 1.9874237535620922, - "grad_norm": 0.6920233964920044, - "learning_rate": 2.0166150040734634e-09, - "loss": 0.1792, - "step": 21097 - }, - { - "epoch": 1.98751795765526, - "grad_norm": 0.5810310244560242, - "learning_rate": 1.9864049705486942e-09, - "loss": 0.1831, - "step": 21098 - }, - { - "epoch": 1.987612161748428, - "grad_norm": 0.6675482988357544, - "learning_rate": 1.9564229071955542e-09, - "loss": 0.1858, - "step": 21099 - }, - { - "epoch": 1.9877063658415959, - "grad_norm": 0.7007507085800171, - "learning_rate": 1.9266688146979406e-09, - "loss": 0.1904, - "step": 21100 - }, - { - "epoch": 1.9878005699347636, - "grad_norm": 0.7041182518005371, - "learning_rate": 1.89714269373531e-09, - "loss": 0.2054, - "step": 21101 - }, - { - "epoch": 1.9878947740279314, - "grad_norm": 0.6133896708488464, - "learning_rate": 1.8678445449804574e-09, - "loss": 0.1738, - "step": 21102 - }, - { - "epoch": 1.9879889781210993, - "grad_norm": 0.598605751991272, - "learning_rate": 1.8387743691006267e-09, - "loss": 0.1745, - "step": 21103 - }, - { - "epoch": 1.9880831822142673, - "grad_norm": 0.6418994665145874, - "learning_rate": 1.8099321667586211e-09, - "loss": 0.1743, - "step": 21104 - }, - { - "epoch": 1.988177386307435, - "grad_norm": 0.6561501026153564, - "learning_rate": 1.7813179386139134e-09, - "loss": 0.1717, - "step": 21105 - }, - { - "epoch": 1.9882715904006028, - "grad_norm": 0.667719841003418, - "learning_rate": 1.7529316853170941e-09, - "loss": 0.1967, - "step": 21106 - }, - { - "epoch": 1.9883657944937707, - "grad_norm": 0.799945592880249, - "learning_rate": 1.7247734075154232e-09, - "loss": 0.1945, - "step": 21107 - }, - { - "epoch": 1.9884599985869387, - "grad_norm": 0.6253148913383484, - "learning_rate": 1.69684310585283e-09, - "loss": 0.2079, - "step": 21108 - }, - { - "epoch": 1.9885542026801064, - "grad_norm": 0.6322131156921387, - "learning_rate": 1.6691407809643624e-09, - "loss": 0.2096, - "step": 21109 - }, - { - "epoch": 1.9886484067732741, - "grad_norm": 0.6566372513771057, - "learning_rate": 1.6416664334817368e-09, - "loss": 0.1995, - "step": 21110 - }, - { - "epoch": 1.988742610866442, - "grad_norm": 0.6330196261405945, - "learning_rate": 1.6144200640322294e-09, - "loss": 0.1688, - "step": 21111 - }, - { - "epoch": 1.98883681495961, - "grad_norm": 0.6636334657669067, - "learning_rate": 1.587401673236455e-09, - "loss": 0.1712, - "step": 21112 - }, - { - "epoch": 1.9889310190527778, - "grad_norm": 0.6503570079803467, - "learning_rate": 1.5606112617105873e-09, - "loss": 0.184, - "step": 21113 - }, - { - "epoch": 1.9890252231459455, - "grad_norm": 0.6788511872291565, - "learning_rate": 1.5340488300663592e-09, - "loss": 0.2023, - "step": 21114 - }, - { - "epoch": 1.9891194272391135, - "grad_norm": 0.6875736713409424, - "learning_rate": 1.5077143789088423e-09, - "loss": 0.204, - "step": 21115 - }, - { - "epoch": 1.9892136313322815, - "grad_norm": 0.6112891435623169, - "learning_rate": 1.4816079088375567e-09, - "loss": 0.1792, - "step": 21116 - }, - { - "epoch": 1.9893078354254492, - "grad_norm": 0.623278796672821, - "learning_rate": 1.4557294204498028e-09, - "loss": 0.2012, - "step": 21117 - }, - { - "epoch": 1.989402039518617, - "grad_norm": 0.622858464717865, - "learning_rate": 1.4300789143328887e-09, - "loss": 0.1846, - "step": 21118 - }, - { - "epoch": 1.989496243611785, - "grad_norm": 0.6334249377250671, - "learning_rate": 1.4046563910741218e-09, - "loss": 0.1719, - "step": 21119 - }, - { - "epoch": 1.9895904477049529, - "grad_norm": 0.6933590173721313, - "learning_rate": 1.379461851253039e-09, - "loss": 0.2181, - "step": 21120 - }, - { - "epoch": 1.9896846517981206, - "grad_norm": 0.6153637170791626, - "learning_rate": 1.354495295442515e-09, - "loss": 0.1806, - "step": 21121 - }, - { - "epoch": 1.9897788558912883, - "grad_norm": 1.0160863399505615, - "learning_rate": 1.3297567242120945e-09, - "loss": 0.1945, - "step": 21122 - }, - { - "epoch": 1.9898730599844563, - "grad_norm": 0.7127150893211365, - "learning_rate": 1.3052461381279912e-09, - "loss": 0.1917, - "step": 21123 - }, - { - "epoch": 1.9899672640776243, - "grad_norm": 0.6873974204063416, - "learning_rate": 1.2809635377464268e-09, - "loss": 0.1915, - "step": 21124 - }, - { - "epoch": 1.990061468170792, - "grad_norm": 0.6947431564331055, - "learning_rate": 1.256908923622513e-09, - "loss": 0.2103, - "step": 21125 - }, - { - "epoch": 1.9901556722639597, - "grad_norm": 0.6591554284095764, - "learning_rate": 1.2330822963046996e-09, - "loss": 0.1967, - "step": 21126 - }, - { - "epoch": 1.9902498763571277, - "grad_norm": 0.6387174129486084, - "learning_rate": 1.2094836563358858e-09, - "loss": 0.1994, - "step": 21127 - }, - { - "epoch": 1.9903440804502956, - "grad_norm": 0.6355052590370178, - "learning_rate": 1.18611300425453e-09, - "loss": 0.2133, - "step": 21128 - }, - { - "epoch": 1.9904382845434634, - "grad_norm": 0.6713987588882446, - "learning_rate": 1.1629703405924286e-09, - "loss": 0.2125, - "step": 21129 - }, - { - "epoch": 1.9905324886366311, - "grad_norm": 0.7112892866134644, - "learning_rate": 1.1400556658780482e-09, - "loss": 0.211, - "step": 21130 - }, - { - "epoch": 1.990626692729799, - "grad_norm": 0.6970196962356567, - "learning_rate": 1.1173689806354137e-09, - "loss": 0.2274, - "step": 21131 - }, - { - "epoch": 1.990720896822967, - "grad_norm": 0.7993209362030029, - "learning_rate": 1.0949102853785586e-09, - "loss": 0.1852, - "step": 21132 - }, - { - "epoch": 1.9908151009161348, - "grad_norm": 0.6768918633460999, - "learning_rate": 1.0726795806226263e-09, - "loss": 0.1814, - "step": 21133 - }, - { - "epoch": 1.9909093050093025, - "grad_norm": 0.6346969604492188, - "learning_rate": 1.0506768668727685e-09, - "loss": 0.1727, - "step": 21134 - }, - { - "epoch": 1.9910035091024705, - "grad_norm": 0.6714830994606018, - "learning_rate": 1.0289021446308057e-09, - "loss": 0.2042, - "step": 21135 - }, - { - "epoch": 1.9910977131956384, - "grad_norm": 0.6177438497543335, - "learning_rate": 1.0073554143941178e-09, - "loss": 0.1928, - "step": 21136 - }, - { - "epoch": 1.9911919172888062, - "grad_norm": 0.8300781846046448, - "learning_rate": 9.860366766534234e-10, - "loss": 0.2055, - "step": 21137 - }, - { - "epoch": 1.991286121381974, - "grad_norm": 0.6905630826950073, - "learning_rate": 9.649459318950006e-10, - "loss": 0.1829, - "step": 21138 - }, - { - "epoch": 1.9913803254751419, - "grad_norm": 0.5799639225006104, - "learning_rate": 9.440831805984652e-10, - "loss": 0.1669, - "step": 21139 - }, - { - "epoch": 1.9914745295683098, - "grad_norm": 0.7242265939712524, - "learning_rate": 9.234484232423236e-10, - "loss": 0.1898, - "step": 21140 - }, - { - "epoch": 1.9915687336614776, - "grad_norm": 0.6649330854415894, - "learning_rate": 9.030416602939795e-10, - "loss": 0.1789, - "step": 21141 - }, - { - "epoch": 1.9916629377546453, - "grad_norm": 0.6277757287025452, - "learning_rate": 8.828628922208371e-10, - "loss": 0.1725, - "step": 21142 - }, - { - "epoch": 1.9917571418478133, - "grad_norm": 0.6722846031188965, - "learning_rate": 8.629121194825285e-10, - "loss": 0.1899, - "step": 21143 - }, - { - "epoch": 1.9918513459409812, - "grad_norm": 0.8008500933647156, - "learning_rate": 8.431893425342453e-10, - "loss": 0.1756, - "step": 21144 - }, - { - "epoch": 1.991945550034149, - "grad_norm": 0.5988976359367371, - "learning_rate": 8.236945618245174e-10, - "loss": 0.1752, - "step": 21145 - }, - { - "epoch": 1.9920397541273167, - "grad_norm": 0.6233307719230652, - "learning_rate": 8.044277777985443e-10, - "loss": 0.1696, - "step": 21146 - }, - { - "epoch": 1.9921339582204847, - "grad_norm": 0.7080352306365967, - "learning_rate": 7.853889908959744e-10, - "loss": 0.2124, - "step": 21147 - }, - { - "epoch": 1.9922281623136526, - "grad_norm": 0.6927669048309326, - "learning_rate": 7.665782015497946e-10, - "loss": 0.2214, - "step": 21148 - }, - { - "epoch": 1.9923223664068204, - "grad_norm": 0.6810639500617981, - "learning_rate": 7.479954101907716e-10, - "loss": 0.2008, - "step": 21149 - }, - { - "epoch": 1.992416570499988, - "grad_norm": 0.6719371676445007, - "learning_rate": 7.296406172407899e-10, - "loss": 0.1887, - "step": 21150 - }, - { - "epoch": 1.992510774593156, - "grad_norm": 0.6678481698036194, - "learning_rate": 7.11513823119514e-10, - "loss": 0.1964, - "step": 21151 - }, - { - "epoch": 1.992604978686324, - "grad_norm": 0.7267085909843445, - "learning_rate": 6.936150282399467e-10, - "loss": 0.1981, - "step": 21152 - }, - { - "epoch": 1.9926991827794918, - "grad_norm": 0.6565226316452026, - "learning_rate": 6.759442330106503e-10, - "loss": 0.2088, - "step": 21153 - }, - { - "epoch": 1.9927933868726595, - "grad_norm": 0.6537086963653564, - "learning_rate": 6.585014378335253e-10, - "loss": 0.1796, - "step": 21154 - }, - { - "epoch": 1.9928875909658275, - "grad_norm": 0.7065827250480652, - "learning_rate": 6.412866431071418e-10, - "loss": 0.1854, - "step": 21155 - }, - { - "epoch": 1.9929817950589954, - "grad_norm": 0.6767070293426514, - "learning_rate": 6.242998492234087e-10, - "loss": 0.2037, - "step": 21156 - }, - { - "epoch": 1.9930759991521632, - "grad_norm": 0.6987535357475281, - "learning_rate": 6.075410565697937e-10, - "loss": 0.1934, - "step": 21157 - }, - { - "epoch": 1.993170203245331, - "grad_norm": 0.6832001805305481, - "learning_rate": 5.910102655293237e-10, - "loss": 0.1842, - "step": 21158 - }, - { - "epoch": 1.9932644073384989, - "grad_norm": 0.6096380949020386, - "learning_rate": 5.747074764783645e-10, - "loss": 0.1772, - "step": 21159 - }, - { - "epoch": 1.9933586114316668, - "grad_norm": 0.6690545678138733, - "learning_rate": 5.586326897877303e-10, - "loss": 0.2058, - "step": 21160 - }, - { - "epoch": 1.9934528155248346, - "grad_norm": 0.6548620462417603, - "learning_rate": 5.427859058249052e-10, - "loss": 0.1867, - "step": 21161 - }, - { - "epoch": 1.9935470196180023, - "grad_norm": 0.6081674695014954, - "learning_rate": 5.271671249507116e-10, - "loss": 0.1811, - "step": 21162 - }, - { - "epoch": 1.9936412237111703, - "grad_norm": 0.6763384938240051, - "learning_rate": 5.11776347521531e-10, - "loss": 0.1969, - "step": 21163 - }, - { - "epoch": 1.9937354278043382, - "grad_norm": 0.621370792388916, - "learning_rate": 4.966135738893041e-10, - "loss": 0.189, - "step": 21164 - }, - { - "epoch": 1.993829631897506, - "grad_norm": 0.6853028535842896, - "learning_rate": 4.816788043982001e-10, - "loss": 0.2095, - "step": 21165 - }, - { - "epoch": 1.9939238359906737, - "grad_norm": 0.674619734287262, - "learning_rate": 4.669720393890575e-10, - "loss": 0.189, - "step": 21166 - }, - { - "epoch": 1.9940180400838416, - "grad_norm": 0.7634488344192505, - "learning_rate": 4.524932791982739e-10, - "loss": 0.1928, - "step": 21167 - }, - { - "epoch": 1.9941122441770096, - "grad_norm": 0.6809353232383728, - "learning_rate": 4.382425241544752e-10, - "loss": 0.1871, - "step": 21168 - }, - { - "epoch": 1.9942064482701773, - "grad_norm": 0.7955659031867981, - "learning_rate": 4.2421977458406706e-10, - "loss": 0.1707, - "step": 21169 - }, - { - "epoch": 1.994300652363345, - "grad_norm": 0.6505053043365479, - "learning_rate": 4.104250308056834e-10, - "loss": 0.2144, - "step": 21170 - }, - { - "epoch": 1.994394856456513, - "grad_norm": 0.6431297659873962, - "learning_rate": 3.9685829313462765e-10, - "loss": 0.1708, - "step": 21171 - }, - { - "epoch": 1.994489060549681, - "grad_norm": 0.641369640827179, - "learning_rate": 3.8351956187954175e-10, - "loss": 0.201, - "step": 21172 - }, - { - "epoch": 1.9945832646428487, - "grad_norm": 0.6753309369087219, - "learning_rate": 3.7040883734462685e-10, - "loss": 0.1774, - "step": 21173 - }, - { - "epoch": 1.9946774687360165, - "grad_norm": 0.6169741153717041, - "learning_rate": 3.575261198296431e-10, - "loss": 0.1898, - "step": 21174 - }, - { - "epoch": 1.9947716728291844, - "grad_norm": 0.7171098589897156, - "learning_rate": 3.448714096276895e-10, - "loss": 0.1973, - "step": 21175 - }, - { - "epoch": 1.9948658769223524, - "grad_norm": 0.6104955077171326, - "learning_rate": 3.3244470702742393e-10, - "loss": 0.1667, - "step": 21176 - }, - { - "epoch": 1.9949600810155201, - "grad_norm": 0.6183255314826965, - "learning_rate": 3.202460123119533e-10, - "loss": 0.1809, - "step": 21177 - }, - { - "epoch": 1.9950542851086879, - "grad_norm": 0.6417047381401062, - "learning_rate": 3.0827532575994355e-10, - "loss": 0.1965, - "step": 21178 - }, - { - "epoch": 1.9951484892018558, - "grad_norm": 0.6793981790542603, - "learning_rate": 2.965326476445096e-10, - "loss": 0.1922, - "step": 21179 - }, - { - "epoch": 1.9952426932950238, - "grad_norm": 0.6903060674667358, - "learning_rate": 2.85017978232105e-10, - "loss": 0.1954, - "step": 21180 - }, - { - "epoch": 1.9953368973881915, - "grad_norm": 0.618532657623291, - "learning_rate": 2.7373131778696273e-10, - "loss": 0.1777, - "step": 21181 - }, - { - "epoch": 1.9954311014813593, - "grad_norm": 0.6335455775260925, - "learning_rate": 2.6267266656443415e-10, - "loss": 0.226, - "step": 21182 - }, - { - "epoch": 1.9955253055745272, - "grad_norm": 0.6843485832214355, - "learning_rate": 2.518420248187603e-10, - "loss": 0.2062, - "step": 21183 - }, - { - "epoch": 1.9956195096676952, - "grad_norm": 0.6743312478065491, - "learning_rate": 2.4123939279641073e-10, - "loss": 0.2091, - "step": 21184 - }, - { - "epoch": 1.995713713760863, - "grad_norm": 0.6582971811294556, - "learning_rate": 2.308647707371936e-10, - "loss": 0.1927, - "step": 21185 - }, - { - "epoch": 1.9958079178540307, - "grad_norm": 0.6203919053077698, - "learning_rate": 2.2071815888091708e-10, - "loss": 0.1995, - "step": 21186 - }, - { - "epoch": 1.9959021219471986, - "grad_norm": 0.6721940040588379, - "learning_rate": 2.107995574562871e-10, - "loss": 0.2133, - "step": 21187 - }, - { - "epoch": 1.9959963260403666, - "grad_norm": 0.6331708431243896, - "learning_rate": 2.011089666908994e-10, - "loss": 0.2051, - "step": 21188 - }, - { - "epoch": 1.9960905301335343, - "grad_norm": 0.6765003204345703, - "learning_rate": 1.9164638680457813e-10, - "loss": 0.1733, - "step": 21189 - }, - { - "epoch": 1.996184734226702, - "grad_norm": 0.6852294206619263, - "learning_rate": 1.824118180138168e-10, - "loss": 0.219, - "step": 21190 - }, - { - "epoch": 1.99627893831987, - "grad_norm": 0.6871711015701294, - "learning_rate": 1.7340526052955775e-10, - "loss": 0.2076, - "step": 21191 - }, - { - "epoch": 1.996373142413038, - "grad_norm": 0.6813973784446716, - "learning_rate": 1.6462671455608202e-10, - "loss": 0.1989, - "step": 21192 - }, - { - "epoch": 1.9964673465062057, - "grad_norm": 0.5848715901374817, - "learning_rate": 1.5607618029434003e-10, - "loss": 0.176, - "step": 21193 - }, - { - "epoch": 1.9965615505993735, - "grad_norm": 0.6575416922569275, - "learning_rate": 1.4775365793973096e-10, - "loss": 0.194, - "step": 21194 - }, - { - "epoch": 1.9966557546925414, - "grad_norm": 0.7851409912109375, - "learning_rate": 1.3965914768099277e-10, - "loss": 0.1856, - "step": 21195 - }, - { - "epoch": 1.9967499587857094, - "grad_norm": 0.6446042656898499, - "learning_rate": 1.3179264970242246e-10, - "loss": 0.172, - "step": 21196 - }, - { - "epoch": 1.9968441628788771, - "grad_norm": 0.6077773571014404, - "learning_rate": 1.2415416418498639e-10, - "loss": 0.1735, - "step": 21197 - }, - { - "epoch": 1.9969383669720449, - "grad_norm": 0.6568995118141174, - "learning_rate": 1.1674369130187935e-10, - "loss": 0.1956, - "step": 21198 - }, - { - "epoch": 1.9970325710652128, - "grad_norm": 0.6406089663505554, - "learning_rate": 1.0956123122185524e-10, - "loss": 0.2155, - "step": 21199 - }, - { - "epoch": 1.9971267751583808, - "grad_norm": 0.6521788239479065, - "learning_rate": 1.0260678410922709e-10, - "loss": 0.2187, - "step": 21200 - }, - { - "epoch": 1.9972209792515485, - "grad_norm": 1.0811222791671753, - "learning_rate": 9.588035012164654e-11, - "loss": 0.1717, - "step": 21201 - }, - { - "epoch": 1.9973151833447162, - "grad_norm": 0.7579187750816345, - "learning_rate": 8.93819294134346e-11, - "loss": 0.1841, - "step": 21202 - }, - { - "epoch": 1.9974093874378842, - "grad_norm": 0.6129136681556702, - "learning_rate": 8.311152213336116e-11, - "loss": 0.1583, - "step": 21203 - }, - { - "epoch": 1.9975035915310522, - "grad_norm": 0.6253575086593628, - "learning_rate": 7.706912842242453e-11, - "loss": 0.1878, - "step": 21204 - }, - { - "epoch": 1.99759779562422, - "grad_norm": 0.6372220516204834, - "learning_rate": 7.125474842051283e-11, - "loss": 0.2136, - "step": 21205 - }, - { - "epoch": 1.9976919997173876, - "grad_norm": 0.6634848713874817, - "learning_rate": 6.566838225752215e-11, - "loss": 0.1663, - "step": 21206 - }, - { - "epoch": 1.9977862038105556, - "grad_norm": 0.6424389481544495, - "learning_rate": 6.031003006445879e-11, - "loss": 0.2077, - "step": 21207 - }, - { - "epoch": 1.9978804079037236, - "grad_norm": 0.710203230381012, - "learning_rate": 5.5179691960116634e-11, - "loss": 0.2175, - "step": 21208 - }, - { - "epoch": 1.9979746119968913, - "grad_norm": 0.8055575489997864, - "learning_rate": 5.027736806328953e-11, - "loss": 0.194, - "step": 21209 - }, - { - "epoch": 1.998068816090059, - "grad_norm": 0.5930612683296204, - "learning_rate": 4.56030584849998e-11, - "loss": 0.1555, - "step": 21210 - }, - { - "epoch": 1.998163020183227, - "grad_norm": 0.6169185042381287, - "learning_rate": 4.1156763332939055e-11, - "loss": 0.1783, - "step": 21211 - }, - { - "epoch": 1.998257224276395, - "grad_norm": 0.6444560885429382, - "learning_rate": 3.693848270702738e-11, - "loss": 0.1945, - "step": 21212 - }, - { - "epoch": 1.9983514283695627, - "grad_norm": 0.6154327392578125, - "learning_rate": 3.29482167049644e-11, - "loss": 0.1705, - "step": 21213 - }, - { - "epoch": 1.9984456324627304, - "grad_norm": 0.6865419745445251, - "learning_rate": 2.9185965416678173e-11, - "loss": 0.1827, - "step": 21214 - }, - { - "epoch": 1.9985398365558984, - "grad_norm": 0.7709320187568665, - "learning_rate": 2.5651728927655882e-11, - "loss": 0.1861, - "step": 21215 - }, - { - "epoch": 1.9986340406490664, - "grad_norm": 0.6527905464172363, - "learning_rate": 2.2345507320054028e-11, - "loss": 0.2061, - "step": 21216 - }, - { - "epoch": 1.998728244742234, - "grad_norm": 0.696857213973999, - "learning_rate": 1.9267300667147327e-11, - "loss": 0.1955, - "step": 21217 - }, - { - "epoch": 1.9988224488354018, - "grad_norm": 0.6352835297584534, - "learning_rate": 1.6417109041100277e-11, - "loss": 0.1845, - "step": 21218 - }, - { - "epoch": 1.9989166529285698, - "grad_norm": 0.6665422916412354, - "learning_rate": 1.379493250519559e-11, - "loss": 0.173, - "step": 21219 - }, - { - "epoch": 1.9990108570217378, - "grad_norm": 0.6305170655250549, - "learning_rate": 1.1400771120495535e-11, - "loss": 0.1696, - "step": 21220 - }, - { - "epoch": 1.9991050611149055, - "grad_norm": 0.656869649887085, - "learning_rate": 9.234624940290815e-12, - "loss": 0.1995, - "step": 21221 - }, - { - "epoch": 1.9991992652080732, - "grad_norm": 0.6680158972740173, - "learning_rate": 7.296494015651689e-12, - "loss": 0.1975, - "step": 21222 - }, - { - "epoch": 1.9992934693012412, - "grad_norm": 0.6655610799789429, - "learning_rate": 5.586378389876856e-12, - "loss": 0.2161, - "step": 21223 - }, - { - "epoch": 1.9993876733944091, - "grad_norm": 0.6311565637588501, - "learning_rate": 4.104278102934345e-12, - "loss": 0.1804, - "step": 21224 - }, - { - "epoch": 1.9994818774875769, - "grad_norm": 0.6253184676170349, - "learning_rate": 2.8501931859103994e-12, - "loss": 0.1869, - "step": 21225 - }, - { - "epoch": 1.9995760815807446, - "grad_norm": 0.6699874401092529, - "learning_rate": 1.8241236698912645e-12, - "loss": 0.1773, - "step": 21226 - }, - { - "epoch": 1.9996702856739126, - "grad_norm": 0.697054922580719, - "learning_rate": 1.0260695781916241e-12, - "loss": 0.2091, - "step": 21227 - }, - { - "epoch": 1.9997644897670805, - "grad_norm": 0.6569732427597046, - "learning_rate": 4.560309274648234e-13, - "loss": 0.1943, - "step": 21228 - }, - { - "epoch": 1.9998586938602483, - "grad_norm": 0.660923182964325, - "learning_rate": 1.140077321437616e-13, - "loss": 0.1805, - "step": 21229 - }, - { - "epoch": 1.999952897953416, - "grad_norm": 0.5622615218162537, - "learning_rate": 0.0, - "loss": 0.1563, - "step": 21230 - }, - { - "epoch": 1.999952897953416, - "step": 21230, - "total_flos": 8.859100111364424e+17, - "train_loss": 0.27731341837475, - "train_runtime": 55817.536, - "train_samples_per_second": 36.514, - "train_steps_per_second": 0.38 - } - ], - "logging_steps": 1.0, - "max_steps": 21230, - "num_input_tokens_seen": 0, - "num_train_epochs": 2, - "save_steps": 2000000, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 8.859100111364424e+17, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}