| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9771827820393805, | |
| "eval_steps": 500, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009771827820393806, | |
| "grad_norm": 0.5417118072509766, | |
| "learning_rate": 4.995602247740044e-05, | |
| "loss": 1.378, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.001954365564078761, | |
| "grad_norm": 0.6493918895721436, | |
| "learning_rate": 4.990715856340093e-05, | |
| "loss": 1.3304, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0029315483461181415, | |
| "grad_norm": 0.9062462449073792, | |
| "learning_rate": 4.9858294649401425e-05, | |
| "loss": 1.3284, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.003908731128157522, | |
| "grad_norm": 0.750052273273468, | |
| "learning_rate": 4.9809430735401906e-05, | |
| "loss": 1.3166, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.004885913910196903, | |
| "grad_norm": 0.6602022051811218, | |
| "learning_rate": 4.97605668214024e-05, | |
| "loss": 1.3166, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005863096692236283, | |
| "grad_norm": 0.4193927049636841, | |
| "learning_rate": 4.971170290740288e-05, | |
| "loss": 1.3098, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.006840279474275663, | |
| "grad_norm": 0.6095415949821472, | |
| "learning_rate": 4.966283899340338e-05, | |
| "loss": 1.3103, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.007817462256315045, | |
| "grad_norm": 0.9943467378616333, | |
| "learning_rate": 4.9613975079403865e-05, | |
| "loss": 1.3096, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.008794645038354424, | |
| "grad_norm": 1.2263585329055786, | |
| "learning_rate": 4.9565111165404346e-05, | |
| "loss": 1.3067, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.009771827820393805, | |
| "grad_norm": 0.7198677659034729, | |
| "learning_rate": 4.951624725140484e-05, | |
| "loss": 1.3041, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.010749010602433185, | |
| "grad_norm": 0.7370775938034058, | |
| "learning_rate": 4.946738333740533e-05, | |
| "loss": 1.302, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.011726193384472566, | |
| "grad_norm": 0.5109437704086304, | |
| "learning_rate": 4.941851942340582e-05, | |
| "loss": 1.3089, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.012703376166511945, | |
| "grad_norm": 0.1879555583000183, | |
| "learning_rate": 4.9369655509406305e-05, | |
| "loss": 1.3043, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.013680558948551327, | |
| "grad_norm": 0.951046884059906, | |
| "learning_rate": 4.932079159540679e-05, | |
| "loss": 1.3098, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.014657741730590706, | |
| "grad_norm": 0.2478829026222229, | |
| "learning_rate": 4.927192768140728e-05, | |
| "loss": 1.3026, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01563492451263009, | |
| "grad_norm": 0.5585843324661255, | |
| "learning_rate": 4.9223063767407776e-05, | |
| "loss": 1.3014, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.016612107294669467, | |
| "grad_norm": 0.48532453179359436, | |
| "learning_rate": 4.917419985340826e-05, | |
| "loss": 1.2981, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.017589290076708848, | |
| "grad_norm": 0.4233573079109192, | |
| "learning_rate": 4.912533593940875e-05, | |
| "loss": 1.2992, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.01856647285874823, | |
| "grad_norm": 0.3272475600242615, | |
| "learning_rate": 4.9076472025409234e-05, | |
| "loss": 1.292, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.01954365564078761, | |
| "grad_norm": 0.5299385786056519, | |
| "learning_rate": 4.902760811140973e-05, | |
| "loss": 1.2963, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02052083842282699, | |
| "grad_norm": 0.1614024043083191, | |
| "learning_rate": 4.8978744197410216e-05, | |
| "loss": 1.2945, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02149802120486637, | |
| "grad_norm": 0.6039963960647583, | |
| "learning_rate": 4.8929880283410705e-05, | |
| "loss": 1.2913, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02247520398690575, | |
| "grad_norm": 0.5772804021835327, | |
| "learning_rate": 4.888101636941119e-05, | |
| "loss": 1.2895, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.023452386768945132, | |
| "grad_norm": 0.7489622235298157, | |
| "learning_rate": 4.883215245541168e-05, | |
| "loss": 1.2847, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.024429569550984513, | |
| "grad_norm": 0.30208253860473633, | |
| "learning_rate": 4.878328854141217e-05, | |
| "loss": 1.2924, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02540675233302389, | |
| "grad_norm": 0.36944472789764404, | |
| "learning_rate": 4.873442462741266e-05, | |
| "loss": 1.2916, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.026383935115063272, | |
| "grad_norm": 0.3268676698207855, | |
| "learning_rate": 4.8685560713413145e-05, | |
| "loss": 1.2893, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.027361117897102653, | |
| "grad_norm": 0.2795974910259247, | |
| "learning_rate": 4.863669679941363e-05, | |
| "loss": 1.282, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.028338300679142035, | |
| "grad_norm": 0.36298853158950806, | |
| "learning_rate": 4.858783288541413e-05, | |
| "loss": 1.2832, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.029315483461181412, | |
| "grad_norm": 0.5242423415184021, | |
| "learning_rate": 4.853896897141461e-05, | |
| "loss": 1.2819, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.030292666243220794, | |
| "grad_norm": 0.25340864062309265, | |
| "learning_rate": 4.8490105057415104e-05, | |
| "loss": 1.2809, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.03126984902526018, | |
| "grad_norm": 0.7241976261138916, | |
| "learning_rate": 4.844124114341559e-05, | |
| "loss": 1.2802, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.032247031807299556, | |
| "grad_norm": 0.5154001712799072, | |
| "learning_rate": 4.839237722941608e-05, | |
| "loss": 1.2748, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.033224214589338934, | |
| "grad_norm": 0.5323473811149597, | |
| "learning_rate": 4.834351331541657e-05, | |
| "loss": 1.284, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03420139737137832, | |
| "grad_norm": 0.3947168290615082, | |
| "learning_rate": 4.8294649401417056e-05, | |
| "loss": 1.276, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.035178580153417696, | |
| "grad_norm": 0.4776057302951813, | |
| "learning_rate": 4.8245785487417544e-05, | |
| "loss": 1.2783, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.036155762935457074, | |
| "grad_norm": 0.4884164035320282, | |
| "learning_rate": 4.819692157341804e-05, | |
| "loss": 1.2745, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03713294571749646, | |
| "grad_norm": 0.5210428833961487, | |
| "learning_rate": 4.814805765941852e-05, | |
| "loss": 1.2707, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.038110128499535836, | |
| "grad_norm": 0.46214359998703003, | |
| "learning_rate": 4.809919374541901e-05, | |
| "loss": 1.2727, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03908731128157522, | |
| "grad_norm": 0.2656782865524292, | |
| "learning_rate": 4.8050329831419496e-05, | |
| "loss": 1.2694, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0400644940636146, | |
| "grad_norm": 0.4923059940338135, | |
| "learning_rate": 4.8001465917419985e-05, | |
| "loss": 1.2665, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04104167684565398, | |
| "grad_norm": 0.92928147315979, | |
| "learning_rate": 4.795260200342048e-05, | |
| "loss": 1.2627, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04201885962769336, | |
| "grad_norm": 1.0651229619979858, | |
| "learning_rate": 4.790373808942096e-05, | |
| "loss": 1.2623, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.04299604240973274, | |
| "grad_norm": 0.9612557888031006, | |
| "learning_rate": 4.7854874175421456e-05, | |
| "loss": 1.2482, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.043973225191772124, | |
| "grad_norm": 1.0120874643325806, | |
| "learning_rate": 4.7806010261421944e-05, | |
| "loss": 1.2589, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0449504079738115, | |
| "grad_norm": 0.6250020861625671, | |
| "learning_rate": 4.775714634742243e-05, | |
| "loss": 1.2499, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04592759075585088, | |
| "grad_norm": 0.2850038707256317, | |
| "learning_rate": 4.770828243342292e-05, | |
| "loss": 1.2446, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.046904773537890264, | |
| "grad_norm": 1.2032625675201416, | |
| "learning_rate": 4.765941851942341e-05, | |
| "loss": 1.2238, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04788195631992964, | |
| "grad_norm": 0.42024949193000793, | |
| "learning_rate": 4.7610554605423896e-05, | |
| "loss": 1.2255, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.048859139101969026, | |
| "grad_norm": 0.7451406121253967, | |
| "learning_rate": 4.756169069142439e-05, | |
| "loss": 1.2071, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.049836321884008404, | |
| "grad_norm": 0.8735096454620361, | |
| "learning_rate": 4.751282677742487e-05, | |
| "loss": 1.2126, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.05081350466604778, | |
| "grad_norm": 0.73675137758255, | |
| "learning_rate": 4.746396286342537e-05, | |
| "loss": 1.2036, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.051790687448087167, | |
| "grad_norm": 0.6540606617927551, | |
| "learning_rate": 4.741509894942585e-05, | |
| "loss": 1.1825, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.052767870230126544, | |
| "grad_norm": 0.825066864490509, | |
| "learning_rate": 4.7366235035426336e-05, | |
| "loss": 1.1655, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.05374505301216593, | |
| "grad_norm": 1.6421219110488892, | |
| "learning_rate": 4.731737112142683e-05, | |
| "loss": 1.1716, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05472223579420531, | |
| "grad_norm": 1.0644057989120483, | |
| "learning_rate": 4.726850720742731e-05, | |
| "loss": 1.1384, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.055699418576244684, | |
| "grad_norm": 1.1611616611480713, | |
| "learning_rate": 4.721964329342781e-05, | |
| "loss": 1.1499, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.05667660135828407, | |
| "grad_norm": 2.0900723934173584, | |
| "learning_rate": 4.7170779379428295e-05, | |
| "loss": 1.1323, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05765378414032345, | |
| "grad_norm": 1.0580404996871948, | |
| "learning_rate": 4.712191546542878e-05, | |
| "loss": 1.112, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.058630966922362825, | |
| "grad_norm": 0.6299407482147217, | |
| "learning_rate": 4.707305155142927e-05, | |
| "loss": 1.104, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05960814970440221, | |
| "grad_norm": 0.6816271543502808, | |
| "learning_rate": 4.702418763742976e-05, | |
| "loss": 1.1128, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.06058533248644159, | |
| "grad_norm": 0.654796302318573, | |
| "learning_rate": 4.697532372343025e-05, | |
| "loss": 1.0942, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.06156251526848097, | |
| "grad_norm": 1.0433884859085083, | |
| "learning_rate": 4.692645980943074e-05, | |
| "loss": 1.0862, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.06253969805052036, | |
| "grad_norm": 0.6256537437438965, | |
| "learning_rate": 4.6877595895431224e-05, | |
| "loss": 1.081, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.06351688083255973, | |
| "grad_norm": 0.8173975348472595, | |
| "learning_rate": 4.682873198143172e-05, | |
| "loss": 1.0767, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.06449406361459911, | |
| "grad_norm": 0.7856473922729492, | |
| "learning_rate": 4.6779868067432206e-05, | |
| "loss": 1.0767, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.0654712463966385, | |
| "grad_norm": 0.6337741017341614, | |
| "learning_rate": 4.6731004153432695e-05, | |
| "loss": 1.0829, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.06644842917867787, | |
| "grad_norm": 0.5813809037208557, | |
| "learning_rate": 4.668214023943318e-05, | |
| "loss": 1.0571, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06742561196071725, | |
| "grad_norm": 0.4155445992946625, | |
| "learning_rate": 4.6633276325433664e-05, | |
| "loss": 1.0707, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.06840279474275664, | |
| "grad_norm": 0.6730567812919617, | |
| "learning_rate": 4.658441241143416e-05, | |
| "loss": 1.0477, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06937997752479601, | |
| "grad_norm": 0.8348300457000732, | |
| "learning_rate": 4.653554849743465e-05, | |
| "loss": 1.0644, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.07035716030683539, | |
| "grad_norm": 2.2414326667785645, | |
| "learning_rate": 4.6486684583435135e-05, | |
| "loss": 1.0577, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.07133434308887478, | |
| "grad_norm": 1.6573911905288696, | |
| "learning_rate": 4.643782066943562e-05, | |
| "loss": 1.0836, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.07231152587091415, | |
| "grad_norm": 0.5690039396286011, | |
| "learning_rate": 4.638895675543611e-05, | |
| "loss": 1.0541, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.07328870865295353, | |
| "grad_norm": 0.527215301990509, | |
| "learning_rate": 4.63400928414366e-05, | |
| "loss": 1.0164, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.07426589143499292, | |
| "grad_norm": 0.7997362613677979, | |
| "learning_rate": 4.6291228927437094e-05, | |
| "loss": 1.0447, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.0752430742170323, | |
| "grad_norm": 2.257143259048462, | |
| "learning_rate": 4.6242365013437575e-05, | |
| "loss": 1.0365, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.07622025699907167, | |
| "grad_norm": 0.9132490158081055, | |
| "learning_rate": 4.619350109943807e-05, | |
| "loss": 1.0498, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.07719743978111106, | |
| "grad_norm": 0.5229859948158264, | |
| "learning_rate": 4.614463718543856e-05, | |
| "loss": 1.0342, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.07817462256315044, | |
| "grad_norm": 0.6948792338371277, | |
| "learning_rate": 4.6095773271439046e-05, | |
| "loss": 1.0325, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07915180534518981, | |
| "grad_norm": 0.8526360988616943, | |
| "learning_rate": 4.6046909357439534e-05, | |
| "loss": 1.0183, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.0801289881272292, | |
| "grad_norm": 1.1457374095916748, | |
| "learning_rate": 4.599804544344002e-05, | |
| "loss": 1.0243, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.08110617090926858, | |
| "grad_norm": 0.9335997700691223, | |
| "learning_rate": 4.594918152944051e-05, | |
| "loss": 1.046, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.08208335369130795, | |
| "grad_norm": 0.8367229700088501, | |
| "learning_rate": 4.5900317615441e-05, | |
| "loss": 1.0176, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.08306053647334734, | |
| "grad_norm": 3.7648801803588867, | |
| "learning_rate": 4.5851453701441486e-05, | |
| "loss": 1.0047, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.08403771925538672, | |
| "grad_norm": 0.5877612829208374, | |
| "learning_rate": 4.5802589787441975e-05, | |
| "loss": 1.0346, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.08501490203742611, | |
| "grad_norm": 0.5145990252494812, | |
| "learning_rate": 4.575372587344246e-05, | |
| "loss": 1.0268, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.08599208481946548, | |
| "grad_norm": 0.9310688376426697, | |
| "learning_rate": 4.570486195944295e-05, | |
| "loss": 1.0109, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.08696926760150486, | |
| "grad_norm": 0.5182886719703674, | |
| "learning_rate": 4.5655998045443445e-05, | |
| "loss": 1.0117, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.08794645038354425, | |
| "grad_norm": 0.4319695234298706, | |
| "learning_rate": 4.560713413144393e-05, | |
| "loss": 1.0053, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08892363316558362, | |
| "grad_norm": 4.307732582092285, | |
| "learning_rate": 4.555827021744442e-05, | |
| "loss": 1.0151, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.089900815947623, | |
| "grad_norm": 0.46516236662864685, | |
| "learning_rate": 4.550940630344491e-05, | |
| "loss": 0.9945, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.09087799872966239, | |
| "grad_norm": 1.2372952699661255, | |
| "learning_rate": 4.54605423894454e-05, | |
| "loss": 0.9865, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.09185518151170176, | |
| "grad_norm": 0.7494595646858215, | |
| "learning_rate": 4.5411678475445886e-05, | |
| "loss": 0.9824, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.09283236429374114, | |
| "grad_norm": 0.5540333390235901, | |
| "learning_rate": 4.5362814561446374e-05, | |
| "loss": 1.0132, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.09380954707578053, | |
| "grad_norm": 0.48533427715301514, | |
| "learning_rate": 4.531395064744686e-05, | |
| "loss": 1.0173, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.0947867298578199, | |
| "grad_norm": 0.4972572922706604, | |
| "learning_rate": 4.526508673344736e-05, | |
| "loss": 1.0078, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.09576391263985928, | |
| "grad_norm": 0.6748878955841064, | |
| "learning_rate": 4.521622281944784e-05, | |
| "loss": 1.0172, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.09674109542189867, | |
| "grad_norm": 0.5261876583099365, | |
| "learning_rate": 4.5167358905448326e-05, | |
| "loss": 1.0189, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.09771827820393805, | |
| "grad_norm": 0.4164600670337677, | |
| "learning_rate": 4.5118494991448814e-05, | |
| "loss": 0.9978, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.09869546098597742, | |
| "grad_norm": 0.40417763590812683, | |
| "learning_rate": 4.50696310774493e-05, | |
| "loss": 1.0103, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.09967264376801681, | |
| "grad_norm": 0.8591890931129456, | |
| "learning_rate": 4.50207671634498e-05, | |
| "loss": 1.0065, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.10064982655005619, | |
| "grad_norm": 0.5676371455192566, | |
| "learning_rate": 4.497190324945028e-05, | |
| "loss": 1.0089, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.10162700933209556, | |
| "grad_norm": 0.616646945476532, | |
| "learning_rate": 4.492303933545077e-05, | |
| "loss": 0.9897, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.10260419211413495, | |
| "grad_norm": 0.37536484003067017, | |
| "learning_rate": 4.487417542145126e-05, | |
| "loss": 0.9989, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.10358137489617433, | |
| "grad_norm": 0.6801789402961731, | |
| "learning_rate": 4.482531150745175e-05, | |
| "loss": 0.9923, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.1045585576782137, | |
| "grad_norm": 0.5848776698112488, | |
| "learning_rate": 4.477644759345224e-05, | |
| "loss": 0.9919, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.10553574046025309, | |
| "grad_norm": 0.7715157866477966, | |
| "learning_rate": 4.4727583679452725e-05, | |
| "loss": 0.9814, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.10651292324229247, | |
| "grad_norm": 0.8080986142158508, | |
| "learning_rate": 4.4678719765453214e-05, | |
| "loss": 0.9935, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.10749010602433186, | |
| "grad_norm": 0.4375016391277313, | |
| "learning_rate": 4.462985585145371e-05, | |
| "loss": 0.988, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.10846728880637123, | |
| "grad_norm": 0.8055805563926697, | |
| "learning_rate": 4.458099193745419e-05, | |
| "loss": 0.9861, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.10944447158841061, | |
| "grad_norm": 1.1914618015289307, | |
| "learning_rate": 4.4532128023454685e-05, | |
| "loss": 0.9622, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.11042165437045, | |
| "grad_norm": 0.4247540533542633, | |
| "learning_rate": 4.448326410945517e-05, | |
| "loss": 0.9602, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.11139883715248937, | |
| "grad_norm": 0.5454650521278381, | |
| "learning_rate": 4.4434400195455654e-05, | |
| "loss": 0.9696, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.11237601993452875, | |
| "grad_norm": 0.5259748697280884, | |
| "learning_rate": 4.438553628145615e-05, | |
| "loss": 1.0021, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.11335320271656814, | |
| "grad_norm": 0.5165246725082397, | |
| "learning_rate": 4.433667236745663e-05, | |
| "loss": 0.982, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.11433038549860751, | |
| "grad_norm": 0.6768147945404053, | |
| "learning_rate": 4.4287808453457125e-05, | |
| "loss": 0.9398, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.1153075682806469, | |
| "grad_norm": 1.0245041847229004, | |
| "learning_rate": 4.423894453945761e-05, | |
| "loss": 0.9934, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.11628475106268628, | |
| "grad_norm": 0.6241583228111267, | |
| "learning_rate": 4.41900806254581e-05, | |
| "loss": 0.9697, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.11726193384472565, | |
| "grad_norm": 0.4234873652458191, | |
| "learning_rate": 4.414121671145859e-05, | |
| "loss": 0.9723, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.11823911662676503, | |
| "grad_norm": 0.3932545781135559, | |
| "learning_rate": 4.409235279745908e-05, | |
| "loss": 0.9826, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.11921629940880442, | |
| "grad_norm": 1.5067880153656006, | |
| "learning_rate": 4.4043488883459565e-05, | |
| "loss": 0.9581, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.1201934821908438, | |
| "grad_norm": 0.41707366704940796, | |
| "learning_rate": 4.399462496946006e-05, | |
| "loss": 0.9666, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.12117066497288317, | |
| "grad_norm": 1.1278653144836426, | |
| "learning_rate": 4.394576105546054e-05, | |
| "loss": 0.9553, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.12214784775492256, | |
| "grad_norm": 0.350543737411499, | |
| "learning_rate": 4.3896897141461036e-05, | |
| "loss": 0.9422, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.12312503053696194, | |
| "grad_norm": 0.3775838315486908, | |
| "learning_rate": 4.3848033227461524e-05, | |
| "loss": 0.9626, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.12410221331900131, | |
| "grad_norm": 0.8341017365455627, | |
| "learning_rate": 4.379916931346201e-05, | |
| "loss": 0.9289, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1250793961010407, | |
| "grad_norm": 0.805614173412323, | |
| "learning_rate": 4.37503053994625e-05, | |
| "loss": 0.9474, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.12605657888308008, | |
| "grad_norm": 0.8439397215843201, | |
| "learning_rate": 4.370144148546299e-05, | |
| "loss": 0.9661, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.12703376166511945, | |
| "grad_norm": 1.1272892951965332, | |
| "learning_rate": 4.3652577571463476e-05, | |
| "loss": 0.9514, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.12801094444715885, | |
| "grad_norm": 0.6426375508308411, | |
| "learning_rate": 4.3603713657463965e-05, | |
| "loss": 0.9448, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.12898812722919822, | |
| "grad_norm": 1.3205431699752808, | |
| "learning_rate": 4.355484974346445e-05, | |
| "loss": 0.9511, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1299653100112376, | |
| "grad_norm": 0.3671954870223999, | |
| "learning_rate": 4.350598582946494e-05, | |
| "loss": 0.9506, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.130942492793277, | |
| "grad_norm": 0.7566332817077637, | |
| "learning_rate": 4.345712191546543e-05, | |
| "loss": 0.9363, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.13191967557531636, | |
| "grad_norm": 0.8800159692764282, | |
| "learning_rate": 4.340825800146592e-05, | |
| "loss": 0.9388, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.13289685835735573, | |
| "grad_norm": 0.7134628891944885, | |
| "learning_rate": 4.335939408746641e-05, | |
| "loss": 0.9162, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.13387404113939513, | |
| "grad_norm": 0.5555543899536133, | |
| "learning_rate": 4.331053017346689e-05, | |
| "loss": 0.9366, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.1348512239214345, | |
| "grad_norm": 0.4485512375831604, | |
| "learning_rate": 4.326166625946739e-05, | |
| "loss": 0.9286, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.13582840670347388, | |
| "grad_norm": 0.8888948559761047, | |
| "learning_rate": 4.3212802345467876e-05, | |
| "loss": 0.943, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.13680558948551327, | |
| "grad_norm": 0.6719749569892883, | |
| "learning_rate": 4.3163938431468364e-05, | |
| "loss": 0.9217, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.13778277226755264, | |
| "grad_norm": 0.695377767086029, | |
| "learning_rate": 4.311507451746885e-05, | |
| "loss": 0.9093, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.13875995504959202, | |
| "grad_norm": 0.5966312885284424, | |
| "learning_rate": 4.306621060346934e-05, | |
| "loss": 0.9195, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.13973713783163141, | |
| "grad_norm": 0.8073310256004333, | |
| "learning_rate": 4.301734668946983e-05, | |
| "loss": 0.9309, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.14071432061367078, | |
| "grad_norm": 0.6303800940513611, | |
| "learning_rate": 4.2968482775470316e-05, | |
| "loss": 0.9458, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.14169150339571016, | |
| "grad_norm": 0.7043970823287964, | |
| "learning_rate": 4.2919618861470804e-05, | |
| "loss": 0.9132, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.14266868617774955, | |
| "grad_norm": 0.9100736379623413, | |
| "learning_rate": 4.287075494747129e-05, | |
| "loss": 0.9296, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.14364586895978892, | |
| "grad_norm": 0.787862241268158, | |
| "learning_rate": 4.282189103347179e-05, | |
| "loss": 0.9643, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.1446230517418283, | |
| "grad_norm": 0.8169028162956238, | |
| "learning_rate": 4.277302711947227e-05, | |
| "loss": 0.9244, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.1456002345238677, | |
| "grad_norm": 0.9544184803962708, | |
| "learning_rate": 4.272416320547276e-05, | |
| "loss": 0.918, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.14657741730590707, | |
| "grad_norm": 0.5325574278831482, | |
| "learning_rate": 4.2675299291473245e-05, | |
| "loss": 0.9273, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.14755460008794646, | |
| "grad_norm": 1.1403323411941528, | |
| "learning_rate": 4.262643537747374e-05, | |
| "loss": 0.9095, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.14853178286998583, | |
| "grad_norm": 1.0411937236785889, | |
| "learning_rate": 4.257757146347423e-05, | |
| "loss": 0.8967, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.1495089656520252, | |
| "grad_norm": 0.630393922328949, | |
| "learning_rate": 4.2528707549474715e-05, | |
| "loss": 0.8883, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.1504861484340646, | |
| "grad_norm": 0.9445775747299194, | |
| "learning_rate": 4.2479843635475204e-05, | |
| "loss": 0.9253, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.15146333121610397, | |
| "grad_norm": 0.5689444541931152, | |
| "learning_rate": 4.243097972147569e-05, | |
| "loss": 0.8983, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.15244051399814335, | |
| "grad_norm": 0.7726677656173706, | |
| "learning_rate": 4.238211580747618e-05, | |
| "loss": 0.9228, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.15341769678018274, | |
| "grad_norm": 0.8260165452957153, | |
| "learning_rate": 4.2333251893476675e-05, | |
| "loss": 0.9202, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.15439487956222211, | |
| "grad_norm": 0.4869302809238434, | |
| "learning_rate": 4.2284387979477156e-05, | |
| "loss": 0.9283, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.15537206234426149, | |
| "grad_norm": 0.5768991708755493, | |
| "learning_rate": 4.2235524065477644e-05, | |
| "loss": 0.9233, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.15634924512630088, | |
| "grad_norm": 0.8856435418128967, | |
| "learning_rate": 4.218666015147814e-05, | |
| "loss": 0.8825, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.15732642790834026, | |
| "grad_norm": 0.5258185267448425, | |
| "learning_rate": 4.213779623747862e-05, | |
| "loss": 0.8834, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.15830361069037963, | |
| "grad_norm": 0.8340526223182678, | |
| "learning_rate": 4.2088932323479115e-05, | |
| "loss": 0.8856, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.15928079347241902, | |
| "grad_norm": 0.4123723804950714, | |
| "learning_rate": 4.2040068409479596e-05, | |
| "loss": 0.8957, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.1602579762544584, | |
| "grad_norm": 0.8336274027824402, | |
| "learning_rate": 4.199120449548009e-05, | |
| "loss": 0.9053, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.16123515903649777, | |
| "grad_norm": 0.7977516055107117, | |
| "learning_rate": 4.194234058148058e-05, | |
| "loss": 0.8698, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.16221234181853716, | |
| "grad_norm": 0.5064985156059265, | |
| "learning_rate": 4.189347666748107e-05, | |
| "loss": 0.8945, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.16318952460057654, | |
| "grad_norm": 0.8241267204284668, | |
| "learning_rate": 4.1844612753481555e-05, | |
| "loss": 0.8875, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.1641667073826159, | |
| "grad_norm": 0.7517113089561462, | |
| "learning_rate": 4.179574883948204e-05, | |
| "loss": 0.8845, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.1651438901646553, | |
| "grad_norm": 0.6297169923782349, | |
| "learning_rate": 4.174688492548253e-05, | |
| "loss": 0.9303, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.16612107294669468, | |
| "grad_norm": 0.5828490257263184, | |
| "learning_rate": 4.1698021011483026e-05, | |
| "loss": 0.8654, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.16709825572873405, | |
| "grad_norm": 0.3038561940193176, | |
| "learning_rate": 4.164915709748351e-05, | |
| "loss": 0.8933, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.16807543851077344, | |
| "grad_norm": 0.8928827047348022, | |
| "learning_rate": 4.1600293183484e-05, | |
| "loss": 0.8509, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.16905262129281282, | |
| "grad_norm": 0.7055086493492126, | |
| "learning_rate": 4.155142926948449e-05, | |
| "loss": 0.8814, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.17002980407485221, | |
| "grad_norm": 0.5377823710441589, | |
| "learning_rate": 4.150256535548497e-05, | |
| "loss": 0.888, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.17100698685689159, | |
| "grad_norm": 0.6319778561592102, | |
| "learning_rate": 4.1453701441485466e-05, | |
| "loss": 0.8575, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.17198416963893096, | |
| "grad_norm": 0.8756042122840881, | |
| "learning_rate": 4.1404837527485954e-05, | |
| "loss": 0.8805, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.17296135242097035, | |
| "grad_norm": 0.5293178558349609, | |
| "learning_rate": 4.135597361348644e-05, | |
| "loss": 0.8471, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.17393853520300973, | |
| "grad_norm": 0.9118284583091736, | |
| "learning_rate": 4.130710969948693e-05, | |
| "loss": 0.8426, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.1749157179850491, | |
| "grad_norm": 1.0211195945739746, | |
| "learning_rate": 4.125824578548742e-05, | |
| "loss": 0.8877, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.1758929007670885, | |
| "grad_norm": 1.4174985885620117, | |
| "learning_rate": 4.120938187148791e-05, | |
| "loss": 0.8731, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.17687008354912787, | |
| "grad_norm": 0.8243415951728821, | |
| "learning_rate": 4.1160517957488395e-05, | |
| "loss": 0.8852, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.17784726633116724, | |
| "grad_norm": 0.8385602235794067, | |
| "learning_rate": 4.111165404348888e-05, | |
| "loss": 0.8361, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.17882444911320663, | |
| "grad_norm": 1.003968358039856, | |
| "learning_rate": 4.106279012948938e-05, | |
| "loss": 0.8738, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.179801631895246, | |
| "grad_norm": 0.7428449988365173, | |
| "learning_rate": 4.101392621548986e-05, | |
| "loss": 0.8563, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.18077881467728538, | |
| "grad_norm": 1.8963735103607178, | |
| "learning_rate": 4.0965062301490354e-05, | |
| "loss": 0.8428, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.18175599745932478, | |
| "grad_norm": 0.6868895888328552, | |
| "learning_rate": 4.091619838749084e-05, | |
| "loss": 0.8727, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.18273318024136415, | |
| "grad_norm": 1.8936256170272827, | |
| "learning_rate": 4.086733447349133e-05, | |
| "loss": 0.9211, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.18371036302340352, | |
| "grad_norm": 1.004941463470459, | |
| "learning_rate": 4.081847055949182e-05, | |
| "loss": 0.8404, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.18468754580544292, | |
| "grad_norm": 1.4084818363189697, | |
| "learning_rate": 4.0769606645492306e-05, | |
| "loss": 0.868, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.1856647285874823, | |
| "grad_norm": 0.6459541320800781, | |
| "learning_rate": 4.0720742731492794e-05, | |
| "loss": 0.8583, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.18664191136952166, | |
| "grad_norm": 0.7335548996925354, | |
| "learning_rate": 4.067187881749328e-05, | |
| "loss": 0.8622, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.18761909415156106, | |
| "grad_norm": 0.6783348321914673, | |
| "learning_rate": 4.062301490349377e-05, | |
| "loss": 0.8572, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.18859627693360043, | |
| "grad_norm": 0.6323419809341431, | |
| "learning_rate": 4.057415098949426e-05, | |
| "loss": 0.8763, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.1895734597156398, | |
| "grad_norm": 0.963927686214447, | |
| "learning_rate": 4.052528707549475e-05, | |
| "loss": 0.8543, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.1905506424976792, | |
| "grad_norm": 0.4785550832748413, | |
| "learning_rate": 4.0476423161495234e-05, | |
| "loss": 0.863, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.19152782527971857, | |
| "grad_norm": 0.6358627080917358, | |
| "learning_rate": 4.042755924749573e-05, | |
| "loss": 0.8842, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.19250500806175797, | |
| "grad_norm": 0.7857956886291504, | |
| "learning_rate": 4.037869533349621e-05, | |
| "loss": 0.8698, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.19348219084379734, | |
| "grad_norm": 0.5225537419319153, | |
| "learning_rate": 4.0329831419496705e-05, | |
| "loss": 0.8842, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.1944593736258367, | |
| "grad_norm": 0.582313597202301, | |
| "learning_rate": 4.0280967505497194e-05, | |
| "loss": 0.8506, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.1954365564078761, | |
| "grad_norm": 0.7206740379333496, | |
| "learning_rate": 4.023210359149768e-05, | |
| "loss": 0.8529, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.19641373918991548, | |
| "grad_norm": 0.45054760575294495, | |
| "learning_rate": 4.018323967749817e-05, | |
| "loss": 0.8564, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.19739092197195485, | |
| "grad_norm": 0.9214595556259155, | |
| "learning_rate": 4.013437576349866e-05, | |
| "loss": 0.8443, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.19836810475399425, | |
| "grad_norm": 0.9843263626098633, | |
| "learning_rate": 4.0085511849499146e-05, | |
| "loss": 0.856, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.19934528753603362, | |
| "grad_norm": 0.6508098840713501, | |
| "learning_rate": 4.0036647935499634e-05, | |
| "loss": 0.8532, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.200322470318073, | |
| "grad_norm": 0.8091655969619751, | |
| "learning_rate": 3.998778402150012e-05, | |
| "loss": 0.8691, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.20129965310011239, | |
| "grad_norm": 0.8139657378196716, | |
| "learning_rate": 3.993892010750061e-05, | |
| "loss": 0.8608, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.20227683588215176, | |
| "grad_norm": 0.628423273563385, | |
| "learning_rate": 3.9890056193501105e-05, | |
| "loss": 0.8369, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.20325401866419113, | |
| "grad_norm": 1.737331748008728, | |
| "learning_rate": 3.9841192279501586e-05, | |
| "loss": 0.8363, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.20423120144623053, | |
| "grad_norm": 1.036280870437622, | |
| "learning_rate": 3.979232836550208e-05, | |
| "loss": 0.8387, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.2052083842282699, | |
| "grad_norm": 0.35834863781929016, | |
| "learning_rate": 3.974346445150256e-05, | |
| "loss": 0.8565, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.7657331824302673, | |
| "learning_rate": 3.969460053750306e-05, | |
| "loss": 0.8654, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.20716274979234867, | |
| "grad_norm": 1.077300786972046, | |
| "learning_rate": 3.9645736623503545e-05, | |
| "loss": 0.8218, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.20813993257438804, | |
| "grad_norm": 0.5806353688240051, | |
| "learning_rate": 3.959687270950403e-05, | |
| "loss": 0.8375, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.2091171153564274, | |
| "grad_norm": 0.3875705599784851, | |
| "learning_rate": 3.954800879550452e-05, | |
| "loss": 0.8342, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.2100942981384668, | |
| "grad_norm": 0.7829961180686951, | |
| "learning_rate": 3.949914488150501e-05, | |
| "loss": 0.832, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.21107148092050618, | |
| "grad_norm": 1.9466382265090942, | |
| "learning_rate": 3.94502809675055e-05, | |
| "loss": 0.8118, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.21204866370254555, | |
| "grad_norm": 0.6271357536315918, | |
| "learning_rate": 3.940141705350599e-05, | |
| "loss": 0.8436, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.21302584648458495, | |
| "grad_norm": 1.320719838142395, | |
| "learning_rate": 3.9352553139506474e-05, | |
| "loss": 0.8586, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.21400302926662432, | |
| "grad_norm": 0.6017069220542908, | |
| "learning_rate": 3.930368922550697e-05, | |
| "loss": 0.8242, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.21498021204866372, | |
| "grad_norm": 0.8584203124046326, | |
| "learning_rate": 3.9254825311507456e-05, | |
| "loss": 0.815, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.2159573948307031, | |
| "grad_norm": 0.623652458190918, | |
| "learning_rate": 3.920596139750794e-05, | |
| "loss": 0.812, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.21693457761274246, | |
| "grad_norm": 0.6867117881774902, | |
| "learning_rate": 3.915709748350843e-05, | |
| "loss": 0.8141, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.21791176039478186, | |
| "grad_norm": 0.6963294744491577, | |
| "learning_rate": 3.910823356950892e-05, | |
| "loss": 0.8227, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.21888894317682123, | |
| "grad_norm": 0.6727440357208252, | |
| "learning_rate": 3.905936965550941e-05, | |
| "loss": 0.8285, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.2198661259588606, | |
| "grad_norm": 1.261771559715271, | |
| "learning_rate": 3.90105057415099e-05, | |
| "loss": 0.8396, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.2208433087409, | |
| "grad_norm": 0.9146804809570312, | |
| "learning_rate": 3.8961641827510385e-05, | |
| "loss": 0.8194, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.22182049152293937, | |
| "grad_norm": 0.9350225329399109, | |
| "learning_rate": 3.891277791351087e-05, | |
| "loss": 0.8376, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.22279767430497874, | |
| "grad_norm": 0.6317518353462219, | |
| "learning_rate": 3.886391399951137e-05, | |
| "loss": 0.8313, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.22377485708701814, | |
| "grad_norm": 0.6716780662536621, | |
| "learning_rate": 3.881505008551185e-05, | |
| "loss": 0.8033, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.2247520398690575, | |
| "grad_norm": 0.4494755268096924, | |
| "learning_rate": 3.8766186171512344e-05, | |
| "loss": 0.8047, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.22572922265109688, | |
| "grad_norm": 0.5505642890930176, | |
| "learning_rate": 3.8717322257512825e-05, | |
| "loss": 0.8456, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.22670640543313628, | |
| "grad_norm": 0.8866478800773621, | |
| "learning_rate": 3.866845834351332e-05, | |
| "loss": 0.8105, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.22768358821517565, | |
| "grad_norm": 0.7525384426116943, | |
| "learning_rate": 3.861959442951381e-05, | |
| "loss": 0.8292, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.22866077099721502, | |
| "grad_norm": 0.8182941675186157, | |
| "learning_rate": 3.8570730515514296e-05, | |
| "loss": 0.8392, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.22963795377925442, | |
| "grad_norm": 0.6246720552444458, | |
| "learning_rate": 3.8521866601514784e-05, | |
| "loss": 0.8292, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.2306151365612938, | |
| "grad_norm": 0.7931325435638428, | |
| "learning_rate": 3.847300268751527e-05, | |
| "loss": 0.83, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.23159231934333316, | |
| "grad_norm": 0.4839908480644226, | |
| "learning_rate": 3.842413877351576e-05, | |
| "loss": 0.8544, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.23256950212537256, | |
| "grad_norm": 0.694095253944397, | |
| "learning_rate": 3.837527485951625e-05, | |
| "loss": 0.8168, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.23354668490741193, | |
| "grad_norm": 0.6341009140014648, | |
| "learning_rate": 3.8326410945516736e-05, | |
| "loss": 0.8007, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.2345238676894513, | |
| "grad_norm": 0.6198739409446716, | |
| "learning_rate": 3.8277547031517224e-05, | |
| "loss": 0.8222, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.2355010504714907, | |
| "grad_norm": 0.7246755361557007, | |
| "learning_rate": 3.822868311751772e-05, | |
| "loss": 0.8239, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.23647823325353007, | |
| "grad_norm": 1.1782780885696411, | |
| "learning_rate": 3.81798192035182e-05, | |
| "loss": 0.8069, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.23745541603556947, | |
| "grad_norm": 0.7902185320854187, | |
| "learning_rate": 3.8130955289518695e-05, | |
| "loss": 0.8283, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.23843259881760884, | |
| "grad_norm": 1.605393648147583, | |
| "learning_rate": 3.808209137551918e-05, | |
| "loss": 0.7758, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.2394097815996482, | |
| "grad_norm": 0.5076558589935303, | |
| "learning_rate": 3.803322746151967e-05, | |
| "loss": 0.8178, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.2403869643816876, | |
| "grad_norm": 0.777646005153656, | |
| "learning_rate": 3.798436354752016e-05, | |
| "loss": 0.8074, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.24136414716372698, | |
| "grad_norm": 1.3850637674331665, | |
| "learning_rate": 3.793549963352065e-05, | |
| "loss": 0.8058, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.24234132994576635, | |
| "grad_norm": 0.6476046442985535, | |
| "learning_rate": 3.7886635719521136e-05, | |
| "loss": 0.7967, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.24331851272780575, | |
| "grad_norm": 0.5768633484840393, | |
| "learning_rate": 3.7837771805521624e-05, | |
| "loss": 0.8269, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.24429569550984512, | |
| "grad_norm": 0.7800481915473938, | |
| "learning_rate": 3.778890789152211e-05, | |
| "loss": 0.8237, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.2452728782918845, | |
| "grad_norm": 0.591273844242096, | |
| "learning_rate": 3.77400439775226e-05, | |
| "loss": 0.8045, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.2462500610739239, | |
| "grad_norm": 0.5170730352401733, | |
| "learning_rate": 3.769118006352309e-05, | |
| "loss": 0.818, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.24722724385596326, | |
| "grad_norm": 0.7280113101005554, | |
| "learning_rate": 3.7642316149523576e-05, | |
| "loss": 0.806, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.24820442663800263, | |
| "grad_norm": 0.48092082142829895, | |
| "learning_rate": 3.759345223552407e-05, | |
| "loss": 0.804, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.24918160942004203, | |
| "grad_norm": 0.8031238913536072, | |
| "learning_rate": 3.754458832152455e-05, | |
| "loss": 0.8031, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.2501587922020814, | |
| "grad_norm": 0.5290892720222473, | |
| "learning_rate": 3.749572440752505e-05, | |
| "loss": 0.816, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.25113597498412077, | |
| "grad_norm": 1.850685477256775, | |
| "learning_rate": 3.7446860493525535e-05, | |
| "loss": 0.8241, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.25211315776616017, | |
| "grad_norm": 0.9196923971176147, | |
| "learning_rate": 3.739799657952602e-05, | |
| "loss": 0.8115, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.25309034054819957, | |
| "grad_norm": 0.8779144883155823, | |
| "learning_rate": 3.734913266552651e-05, | |
| "loss": 0.8065, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.2540675233302389, | |
| "grad_norm": 0.6696827411651611, | |
| "learning_rate": 3.7300268751527e-05, | |
| "loss": 0.7827, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.2550447061122783, | |
| "grad_norm": 0.5037100315093994, | |
| "learning_rate": 3.725140483752749e-05, | |
| "loss": 0.7955, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.2560218888943177, | |
| "grad_norm": 1.4716683626174927, | |
| "learning_rate": 3.7202540923527975e-05, | |
| "loss": 0.8076, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.25699907167635705, | |
| "grad_norm": 0.7515909671783447, | |
| "learning_rate": 3.7153677009528463e-05, | |
| "loss": 0.7645, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.25797625445839645, | |
| "grad_norm": 0.8641912341117859, | |
| "learning_rate": 3.710481309552896e-05, | |
| "loss": 0.7794, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.25895343724043585, | |
| "grad_norm": 0.7385029792785645, | |
| "learning_rate": 3.705594918152944e-05, | |
| "loss": 0.8047, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.2599306200224752, | |
| "grad_norm": 1.194313645362854, | |
| "learning_rate": 3.700708526752993e-05, | |
| "loss": 0.7973, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.2609078028045146, | |
| "grad_norm": 0.8573377728462219, | |
| "learning_rate": 3.695822135353042e-05, | |
| "loss": 0.8054, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.261884985586554, | |
| "grad_norm": 0.7428358793258667, | |
| "learning_rate": 3.6909357439530904e-05, | |
| "loss": 0.8194, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.26286216836859333, | |
| "grad_norm": 1.1976490020751953, | |
| "learning_rate": 3.68604935255314e-05, | |
| "loss": 0.7745, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.26383935115063273, | |
| "grad_norm": 0.8391226530075073, | |
| "learning_rate": 3.681162961153189e-05, | |
| "loss": 0.7981, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.2648165339326721, | |
| "grad_norm": 1.0753370523452759, | |
| "learning_rate": 3.6762765697532375e-05, | |
| "loss": 0.8018, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.26579371671471147, | |
| "grad_norm": 0.8495202660560608, | |
| "learning_rate": 3.671390178353286e-05, | |
| "loss": 0.7894, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.26677089949675087, | |
| "grad_norm": 2.3333170413970947, | |
| "learning_rate": 3.666503786953335e-05, | |
| "loss": 0.7892, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.26774808227879027, | |
| "grad_norm": 0.7213625311851501, | |
| "learning_rate": 3.661617395553384e-05, | |
| "loss": 0.7902, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.2687252650608296, | |
| "grad_norm": 1.045614242553711, | |
| "learning_rate": 3.6567310041534334e-05, | |
| "loss": 0.7719, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.269702447842869, | |
| "grad_norm": 0.42100274562835693, | |
| "learning_rate": 3.6518446127534815e-05, | |
| "loss": 0.7705, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.2706796306249084, | |
| "grad_norm": 0.5944122076034546, | |
| "learning_rate": 3.646958221353531e-05, | |
| "loss": 0.7717, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.27165681340694775, | |
| "grad_norm": 0.7398585677146912, | |
| "learning_rate": 3.642071829953579e-05, | |
| "loss": 0.7896, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.27263399618898715, | |
| "grad_norm": 0.8064782023429871, | |
| "learning_rate": 3.6371854385536286e-05, | |
| "loss": 0.7917, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.27361117897102655, | |
| "grad_norm": 0.6715266108512878, | |
| "learning_rate": 3.6322990471536774e-05, | |
| "loss": 0.7771, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.2745883617530659, | |
| "grad_norm": 1.1130329370498657, | |
| "learning_rate": 3.6274126557537255e-05, | |
| "loss": 0.7476, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.2755655445351053, | |
| "grad_norm": 0.7601907253265381, | |
| "learning_rate": 3.622526264353775e-05, | |
| "loss": 0.7745, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.2765427273171447, | |
| "grad_norm": 0.8511783480644226, | |
| "learning_rate": 3.617639872953824e-05, | |
| "loss": 0.7737, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.27751991009918403, | |
| "grad_norm": 0.8136917948722839, | |
| "learning_rate": 3.6127534815538726e-05, | |
| "loss": 0.7905, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.27849709288122343, | |
| "grad_norm": 0.5580685138702393, | |
| "learning_rate": 3.6078670901539214e-05, | |
| "loss": 0.7957, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.27947427566326283, | |
| "grad_norm": 0.750845730304718, | |
| "learning_rate": 3.60298069875397e-05, | |
| "loss": 0.7396, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.28045145844530217, | |
| "grad_norm": 0.9611383080482483, | |
| "learning_rate": 3.598094307354019e-05, | |
| "loss": 0.774, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.28142864122734157, | |
| "grad_norm": 0.6622794270515442, | |
| "learning_rate": 3.5932079159540685e-05, | |
| "loss": 0.7993, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.28240582400938097, | |
| "grad_norm": 0.4816977381706238, | |
| "learning_rate": 3.588321524554117e-05, | |
| "loss": 0.7868, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.2833830067914203, | |
| "grad_norm": 0.6779691576957703, | |
| "learning_rate": 3.583435133154166e-05, | |
| "loss": 0.7838, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.2843601895734597, | |
| "grad_norm": 0.9714117646217346, | |
| "learning_rate": 3.578548741754214e-05, | |
| "loss": 0.7686, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.2853373723554991, | |
| "grad_norm": 0.7163410186767578, | |
| "learning_rate": 3.573662350354264e-05, | |
| "loss": 0.7747, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.28631455513753845, | |
| "grad_norm": 0.7338354587554932, | |
| "learning_rate": 3.5687759589543126e-05, | |
| "loss": 0.7703, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.28729173791957785, | |
| "grad_norm": 0.765074610710144, | |
| "learning_rate": 3.5638895675543614e-05, | |
| "loss": 0.7811, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.28826892070161725, | |
| "grad_norm": 0.6714346408843994, | |
| "learning_rate": 3.55900317615441e-05, | |
| "loss": 0.7971, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.2892461034836566, | |
| "grad_norm": 0.6784923672676086, | |
| "learning_rate": 3.554116784754459e-05, | |
| "loss": 0.7704, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.290223286265696, | |
| "grad_norm": 0.6446245312690735, | |
| "learning_rate": 3.549230393354508e-05, | |
| "loss": 0.7843, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.2912004690477354, | |
| "grad_norm": 0.9739934206008911, | |
| "learning_rate": 3.5443440019545566e-05, | |
| "loss": 0.7423, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.2921776518297748, | |
| "grad_norm": 0.2898177206516266, | |
| "learning_rate": 3.5394576105546054e-05, | |
| "loss": 0.7322, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.29315483461181413, | |
| "grad_norm": 0.720974862575531, | |
| "learning_rate": 3.534571219154654e-05, | |
| "loss": 0.7593, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.29413201739385353, | |
| "grad_norm": 0.4672446548938751, | |
| "learning_rate": 3.529684827754704e-05, | |
| "loss": 0.7422, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.2951092001758929, | |
| "grad_norm": 0.7546716332435608, | |
| "learning_rate": 3.524798436354752e-05, | |
| "loss": 0.7788, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.29608638295793227, | |
| "grad_norm": 0.6265705823898315, | |
| "learning_rate": 3.519912044954801e-05, | |
| "loss": 0.745, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.29706356573997167, | |
| "grad_norm": 1.092965841293335, | |
| "learning_rate": 3.51502565355485e-05, | |
| "loss": 0.789, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.29804074852201107, | |
| "grad_norm": 0.7648272514343262, | |
| "learning_rate": 3.510139262154899e-05, | |
| "loss": 0.758, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.2990179313040504, | |
| "grad_norm": 0.785746157169342, | |
| "learning_rate": 3.505252870754948e-05, | |
| "loss": 0.7744, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.2999951140860898, | |
| "grad_norm": 0.8007264733314514, | |
| "learning_rate": 3.5003664793549965e-05, | |
| "loss": 0.7696, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.3009722968681292, | |
| "grad_norm": 1.1369248628616333, | |
| "learning_rate": 3.4954800879550453e-05, | |
| "loss": 0.7667, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.30194947965016855, | |
| "grad_norm": 0.6251523494720459, | |
| "learning_rate": 3.490593696555095e-05, | |
| "loss": 0.7686, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.30292666243220795, | |
| "grad_norm": 1.1552335023880005, | |
| "learning_rate": 3.485707305155143e-05, | |
| "loss": 0.7693, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.30390384521424735, | |
| "grad_norm": 0.9136368036270142, | |
| "learning_rate": 3.480820913755192e-05, | |
| "loss": 0.7898, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.3048810279962867, | |
| "grad_norm": 0.4203650951385498, | |
| "learning_rate": 3.4759345223552406e-05, | |
| "loss": 0.7541, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.3058582107783261, | |
| "grad_norm": 0.671546995639801, | |
| "learning_rate": 3.4710481309552894e-05, | |
| "loss": 0.735, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.3068353935603655, | |
| "grad_norm": 0.6711509227752686, | |
| "learning_rate": 3.466161739555339e-05, | |
| "loss": 0.7481, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.30781257634240483, | |
| "grad_norm": 0.7787076234817505, | |
| "learning_rate": 3.461275348155387e-05, | |
| "loss": 0.7701, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.30878975912444423, | |
| "grad_norm": 0.5270808935165405, | |
| "learning_rate": 3.4563889567554365e-05, | |
| "loss": 0.7166, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.30976694190648363, | |
| "grad_norm": 0.7732633352279663, | |
| "learning_rate": 3.451502565355485e-05, | |
| "loss": 0.7857, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.31074412468852297, | |
| "grad_norm": 0.6347182989120483, | |
| "learning_rate": 3.446616173955534e-05, | |
| "loss": 0.7384, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.31172130747056237, | |
| "grad_norm": 0.9557164311408997, | |
| "learning_rate": 3.441729782555583e-05, | |
| "loss": 0.755, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.31269849025260177, | |
| "grad_norm": 0.8120887279510498, | |
| "learning_rate": 3.436843391155632e-05, | |
| "loss": 0.7356, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.3136756730346411, | |
| "grad_norm": 0.6804450750350952, | |
| "learning_rate": 3.4319569997556805e-05, | |
| "loss": 0.785, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.3146528558166805, | |
| "grad_norm": 0.7511081695556641, | |
| "learning_rate": 3.42707060835573e-05, | |
| "loss": 0.7427, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.3156300385987199, | |
| "grad_norm": 0.8396822214126587, | |
| "learning_rate": 3.422184216955778e-05, | |
| "loss": 0.7801, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.31660722138075925, | |
| "grad_norm": 1.0063520669937134, | |
| "learning_rate": 3.4172978255558276e-05, | |
| "loss": 0.7638, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.31758440416279865, | |
| "grad_norm": 1.349414587020874, | |
| "learning_rate": 3.412411434155876e-05, | |
| "loss": 0.7522, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.31856158694483805, | |
| "grad_norm": 0.8259103298187256, | |
| "learning_rate": 3.4075250427559245e-05, | |
| "loss": 0.7351, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.3195387697268774, | |
| "grad_norm": 0.4894813597202301, | |
| "learning_rate": 3.402638651355974e-05, | |
| "loss": 0.7593, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.3205159525089168, | |
| "grad_norm": 0.6558930277824402, | |
| "learning_rate": 3.397752259956022e-05, | |
| "loss": 0.7496, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.3214931352909562, | |
| "grad_norm": 1.2009482383728027, | |
| "learning_rate": 3.3928658685560716e-05, | |
| "loss": 0.7379, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.32247031807299553, | |
| "grad_norm": 0.8621765375137329, | |
| "learning_rate": 3.3879794771561204e-05, | |
| "loss": 0.7381, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.32344750085503493, | |
| "grad_norm": 0.5097255706787109, | |
| "learning_rate": 3.383093085756169e-05, | |
| "loss": 0.7567, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.32442468363707433, | |
| "grad_norm": 0.48458051681518555, | |
| "learning_rate": 3.378206694356218e-05, | |
| "loss": 0.7649, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.3254018664191137, | |
| "grad_norm": 0.7467001676559448, | |
| "learning_rate": 3.373320302956267e-05, | |
| "loss": 0.7612, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.32637904920115307, | |
| "grad_norm": 1.1591566801071167, | |
| "learning_rate": 3.368433911556316e-05, | |
| "loss": 0.7394, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.32735623198319247, | |
| "grad_norm": 0.9665714502334595, | |
| "learning_rate": 3.363547520156365e-05, | |
| "loss": 0.7472, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.3283334147652318, | |
| "grad_norm": 0.5714060664176941, | |
| "learning_rate": 3.358661128756413e-05, | |
| "loss": 0.7385, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.3293105975472712, | |
| "grad_norm": 0.8278976082801819, | |
| "learning_rate": 3.353774737356463e-05, | |
| "loss": 0.724, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.3302877803293106, | |
| "grad_norm": 0.9210988283157349, | |
| "learning_rate": 3.3488883459565116e-05, | |
| "loss": 0.7542, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.33126496311134995, | |
| "grad_norm": 1.0610690116882324, | |
| "learning_rate": 3.3440019545565604e-05, | |
| "loss": 0.7284, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.33224214589338935, | |
| "grad_norm": 0.6521257162094116, | |
| "learning_rate": 3.339115563156609e-05, | |
| "loss": 0.755, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.33321932867542875, | |
| "grad_norm": 1.0515367984771729, | |
| "learning_rate": 3.334229171756657e-05, | |
| "loss": 0.7423, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.3341965114574681, | |
| "grad_norm": 0.8415219783782959, | |
| "learning_rate": 3.329342780356707e-05, | |
| "loss": 0.716, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.3351736942395075, | |
| "grad_norm": 0.5018264651298523, | |
| "learning_rate": 3.3244563889567556e-05, | |
| "loss": 0.7556, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.3361508770215469, | |
| "grad_norm": 0.6532925963401794, | |
| "learning_rate": 3.3195699975568044e-05, | |
| "loss": 0.7335, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.3371280598035863, | |
| "grad_norm": 0.6794486045837402, | |
| "learning_rate": 3.314683606156853e-05, | |
| "loss": 0.7466, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.33810524258562563, | |
| "grad_norm": 0.7372865080833435, | |
| "learning_rate": 3.309797214756902e-05, | |
| "loss": 0.727, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.33908242536766503, | |
| "grad_norm": 0.6354756355285645, | |
| "learning_rate": 3.304910823356951e-05, | |
| "loss": 0.725, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.34005960814970443, | |
| "grad_norm": 0.7180996537208557, | |
| "learning_rate": 3.300024431957e-05, | |
| "loss": 0.7049, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.34103679093174377, | |
| "grad_norm": 1.3991978168487549, | |
| "learning_rate": 3.2951380405570484e-05, | |
| "loss": 0.7251, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.34201397371378317, | |
| "grad_norm": 0.5680633783340454, | |
| "learning_rate": 3.290251649157098e-05, | |
| "loss": 0.744, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.34299115649582257, | |
| "grad_norm": 0.5309197306632996, | |
| "learning_rate": 3.285365257757147e-05, | |
| "loss": 0.7277, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.3439683392778619, | |
| "grad_norm": 1.449625849723816, | |
| "learning_rate": 3.2804788663571955e-05, | |
| "loss": 0.7127, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.3449455220599013, | |
| "grad_norm": 0.6244996190071106, | |
| "learning_rate": 3.2755924749572443e-05, | |
| "loss": 0.6992, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.3459227048419407, | |
| "grad_norm": 1.037988305091858, | |
| "learning_rate": 3.270706083557293e-05, | |
| "loss": 0.7095, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.34689988762398005, | |
| "grad_norm": 1.2503726482391357, | |
| "learning_rate": 3.265819692157342e-05, | |
| "loss": 0.7264, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.34787707040601945, | |
| "grad_norm": 1.2136774063110352, | |
| "learning_rate": 3.260933300757391e-05, | |
| "loss": 0.7418, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.34885425318805885, | |
| "grad_norm": 0.9328750371932983, | |
| "learning_rate": 3.2560469093574396e-05, | |
| "loss": 0.7509, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.3498314359700982, | |
| "grad_norm": 0.5122935771942139, | |
| "learning_rate": 3.2511605179574884e-05, | |
| "loss": 0.7114, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.3508086187521376, | |
| "grad_norm": 1.153583288192749, | |
| "learning_rate": 3.246274126557537e-05, | |
| "loss": 0.7316, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.351785801534177, | |
| "grad_norm": 0.7405250668525696, | |
| "learning_rate": 3.241387735157586e-05, | |
| "loss": 0.7404, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.35276298431621633, | |
| "grad_norm": 0.607565701007843, | |
| "learning_rate": 3.2365013437576355e-05, | |
| "loss": 0.7196, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.35374016709825573, | |
| "grad_norm": 1.4975577592849731, | |
| "learning_rate": 3.2316149523576836e-05, | |
| "loss": 0.703, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.35471734988029513, | |
| "grad_norm": 0.9088447093963623, | |
| "learning_rate": 3.226728560957733e-05, | |
| "loss": 0.7203, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.3556945326623345, | |
| "grad_norm": 0.9132680892944336, | |
| "learning_rate": 3.221842169557782e-05, | |
| "loss": 0.7248, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.35667171544437387, | |
| "grad_norm": 0.7861882448196411, | |
| "learning_rate": 3.216955778157831e-05, | |
| "loss": 0.7118, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.35764889822641327, | |
| "grad_norm": 1.2251768112182617, | |
| "learning_rate": 3.2120693867578795e-05, | |
| "loss": 0.7304, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.3586260810084526, | |
| "grad_norm": 1.1924370527267456, | |
| "learning_rate": 3.207182995357928e-05, | |
| "loss": 0.7394, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.359603263790492, | |
| "grad_norm": 0.7275030016899109, | |
| "learning_rate": 3.202296603957977e-05, | |
| "loss": 0.7399, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.3605804465725314, | |
| "grad_norm": 0.7406324148178101, | |
| "learning_rate": 3.1974102125580266e-05, | |
| "loss": 0.7432, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.36155762935457075, | |
| "grad_norm": 1.0701793432235718, | |
| "learning_rate": 3.192523821158075e-05, | |
| "loss": 0.7099, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.36253481213661015, | |
| "grad_norm": 0.7077426314353943, | |
| "learning_rate": 3.1876374297581235e-05, | |
| "loss": 0.7127, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.36351199491864955, | |
| "grad_norm": 0.5806621313095093, | |
| "learning_rate": 3.1827510383581723e-05, | |
| "loss": 0.7002, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.3644891777006889, | |
| "grad_norm": 1.1311944723129272, | |
| "learning_rate": 3.177864646958221e-05, | |
| "loss": 0.6876, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.3654663604827283, | |
| "grad_norm": 0.9112023711204529, | |
| "learning_rate": 3.1729782555582706e-05, | |
| "loss": 0.7169, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.3664435432647677, | |
| "grad_norm": 0.5986848473548889, | |
| "learning_rate": 3.168091864158319e-05, | |
| "loss": 0.7294, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.36742072604680703, | |
| "grad_norm": 1.297155737876892, | |
| "learning_rate": 3.163205472758368e-05, | |
| "loss": 0.7061, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.36839790882884643, | |
| "grad_norm": 0.6597927808761597, | |
| "learning_rate": 3.158319081358417e-05, | |
| "loss": 0.7166, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.36937509161088583, | |
| "grad_norm": 0.36105087399482727, | |
| "learning_rate": 3.153432689958466e-05, | |
| "loss": 0.7017, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.3703522743929252, | |
| "grad_norm": 0.5487505197525024, | |
| "learning_rate": 3.148546298558515e-05, | |
| "loss": 0.7081, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.3713294571749646, | |
| "grad_norm": 1.5384310483932495, | |
| "learning_rate": 3.1436599071585635e-05, | |
| "loss": 0.7064, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.37230663995700397, | |
| "grad_norm": 1.0113205909729004, | |
| "learning_rate": 3.138773515758612e-05, | |
| "loss": 0.7197, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.3732838227390433, | |
| "grad_norm": 1.4755492210388184, | |
| "learning_rate": 3.133887124358662e-05, | |
| "loss": 0.755, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.3742610055210827, | |
| "grad_norm": 0.7554188370704651, | |
| "learning_rate": 3.12900073295871e-05, | |
| "loss": 0.7083, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.3752381883031221, | |
| "grad_norm": 0.7589747905731201, | |
| "learning_rate": 3.1241143415587594e-05, | |
| "loss": 0.6917, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.37621537108516145, | |
| "grad_norm": 0.485612690448761, | |
| "learning_rate": 3.119227950158808e-05, | |
| "loss": 0.7429, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.37719255386720085, | |
| "grad_norm": 0.5043421983718872, | |
| "learning_rate": 3.114341558758856e-05, | |
| "loss": 0.7217, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.37816973664924025, | |
| "grad_norm": 1.6078003644943237, | |
| "learning_rate": 3.109455167358906e-05, | |
| "loss": 0.7019, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "grad_norm": 0.3607342839241028, | |
| "learning_rate": 3.104568775958954e-05, | |
| "loss": 0.772, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.380124102213319, | |
| "grad_norm": 1.002525806427002, | |
| "learning_rate": 3.0996823845590034e-05, | |
| "loss": 0.7213, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.3811012849953584, | |
| "grad_norm": 0.7605811357498169, | |
| "learning_rate": 3.094795993159052e-05, | |
| "loss": 0.7, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.3820784677773978, | |
| "grad_norm": 2.388939619064331, | |
| "learning_rate": 3.089909601759101e-05, | |
| "loss": 0.7307, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.38305565055943713, | |
| "grad_norm": 0.824883222579956, | |
| "learning_rate": 3.08502321035915e-05, | |
| "loss": 0.7255, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.38403283334147653, | |
| "grad_norm": 0.6755787134170532, | |
| "learning_rate": 3.0801368189591986e-05, | |
| "loss": 0.7013, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.38501001612351593, | |
| "grad_norm": 0.580859899520874, | |
| "learning_rate": 3.0752504275592474e-05, | |
| "loss": 0.7357, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.3859871989055553, | |
| "grad_norm": 0.6988548636436462, | |
| "learning_rate": 3.070364036159297e-05, | |
| "loss": 0.6902, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.38696438168759467, | |
| "grad_norm": 0.5997043251991272, | |
| "learning_rate": 3.065477644759345e-05, | |
| "loss": 0.7093, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.38794156446963407, | |
| "grad_norm": 0.7906262874603271, | |
| "learning_rate": 3.0605912533593945e-05, | |
| "loss": 0.7376, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.3889187472516734, | |
| "grad_norm": 0.7436035871505737, | |
| "learning_rate": 3.0557048619594433e-05, | |
| "loss": 0.7159, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.3898959300337128, | |
| "grad_norm": 0.6913009285926819, | |
| "learning_rate": 3.050818470559492e-05, | |
| "loss": 0.7267, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.3908731128157522, | |
| "grad_norm": 1.0030348300933838, | |
| "learning_rate": 3.045932079159541e-05, | |
| "loss": 0.7186, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.39185029559779155, | |
| "grad_norm": 0.7223851084709167, | |
| "learning_rate": 3.0410456877595894e-05, | |
| "loss": 0.7113, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.39282747837983095, | |
| "grad_norm": 1.0449798107147217, | |
| "learning_rate": 3.0361592963596386e-05, | |
| "loss": 0.6985, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.39380466116187035, | |
| "grad_norm": 0.7078452110290527, | |
| "learning_rate": 3.031272904959687e-05, | |
| "loss": 0.714, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.3947818439439097, | |
| "grad_norm": 0.5977550148963928, | |
| "learning_rate": 3.0263865135597362e-05, | |
| "loss": 0.7126, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.3957590267259491, | |
| "grad_norm": 0.6963929533958435, | |
| "learning_rate": 3.021500122159785e-05, | |
| "loss": 0.6922, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.3967362095079885, | |
| "grad_norm": 0.49735382199287415, | |
| "learning_rate": 3.016613730759834e-05, | |
| "loss": 0.6914, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.39771339229002783, | |
| "grad_norm": 0.8894415497779846, | |
| "learning_rate": 3.0117273393598826e-05, | |
| "loss": 0.6988, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.39869057507206723, | |
| "grad_norm": 0.5845156311988831, | |
| "learning_rate": 3.0068409479599317e-05, | |
| "loss": 0.705, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.39966775785410663, | |
| "grad_norm": 0.7496864199638367, | |
| "learning_rate": 3.0019545565599806e-05, | |
| "loss": 0.669, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.400644940636146, | |
| "grad_norm": 1.2446004152297974, | |
| "learning_rate": 2.9970681651600297e-05, | |
| "loss": 0.7063, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.4016221234181854, | |
| "grad_norm": 0.37521255016326904, | |
| "learning_rate": 2.992181773760078e-05, | |
| "loss": 0.6966, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.40259930620022477, | |
| "grad_norm": 0.7953245639801025, | |
| "learning_rate": 2.9872953823601273e-05, | |
| "loss": 0.6934, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.4035764889822641, | |
| "grad_norm": 0.844543993473053, | |
| "learning_rate": 2.982408990960176e-05, | |
| "loss": 0.6926, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.4045536717643035, | |
| "grad_norm": 0.5298857688903809, | |
| "learning_rate": 2.9775225995602253e-05, | |
| "loss": 0.6926, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.4055308545463429, | |
| "grad_norm": 0.6932188272476196, | |
| "learning_rate": 2.9726362081602737e-05, | |
| "loss": 0.6868, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.40650803732838225, | |
| "grad_norm": 0.7204051613807678, | |
| "learning_rate": 2.9677498167603225e-05, | |
| "loss": 0.7064, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.40748522011042165, | |
| "grad_norm": 1.0420963764190674, | |
| "learning_rate": 2.9628634253603717e-05, | |
| "loss": 0.7072, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.40846240289246105, | |
| "grad_norm": 0.4677026867866516, | |
| "learning_rate": 2.95797703396042e-05, | |
| "loss": 0.691, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.4094395856745004, | |
| "grad_norm": 0.6934903860092163, | |
| "learning_rate": 2.9530906425604693e-05, | |
| "loss": 0.6962, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.4104167684565398, | |
| "grad_norm": 0.7500805854797363, | |
| "learning_rate": 2.9482042511605178e-05, | |
| "loss": 0.708, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.4113939512385792, | |
| "grad_norm": 0.8887515664100647, | |
| "learning_rate": 2.943317859760567e-05, | |
| "loss": 0.702, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.39899566769599915, | |
| "learning_rate": 2.9384314683606157e-05, | |
| "loss": 0.709, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.41334831680265793, | |
| "grad_norm": 0.8467943668365479, | |
| "learning_rate": 2.933545076960665e-05, | |
| "loss": 0.6928, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.41432549958469733, | |
| "grad_norm": 0.6024282574653625, | |
| "learning_rate": 2.9286586855607133e-05, | |
| "loss": 0.6928, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.4153026823667367, | |
| "grad_norm": 0.7921658158302307, | |
| "learning_rate": 2.9237722941607625e-05, | |
| "loss": 0.6865, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.4162798651487761, | |
| "grad_norm": 0.9025784730911255, | |
| "learning_rate": 2.9188859027608113e-05, | |
| "loss": 0.6863, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.4172570479308155, | |
| "grad_norm": 0.9453756809234619, | |
| "learning_rate": 2.9139995113608604e-05, | |
| "loss": 0.6924, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.4182342307128548, | |
| "grad_norm": 0.8638947010040283, | |
| "learning_rate": 2.909113119960909e-05, | |
| "loss": 0.7011, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.4192114134948942, | |
| "grad_norm": 0.6639747619628906, | |
| "learning_rate": 2.904226728560958e-05, | |
| "loss": 0.6766, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.4201885962769336, | |
| "grad_norm": 0.7019941210746765, | |
| "learning_rate": 2.899340337161007e-05, | |
| "loss": 0.7025, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.42116577905897296, | |
| "grad_norm": 0.6988587379455566, | |
| "learning_rate": 2.8944539457610553e-05, | |
| "loss": 0.6768, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.42214296184101235, | |
| "grad_norm": 0.5817476511001587, | |
| "learning_rate": 2.8895675543611045e-05, | |
| "loss": 0.702, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.42312014462305175, | |
| "grad_norm": 0.4533466398715973, | |
| "learning_rate": 2.8846811629611533e-05, | |
| "loss": 0.6949, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.4240973274050911, | |
| "grad_norm": 0.6197069883346558, | |
| "learning_rate": 2.8797947715612024e-05, | |
| "loss": 0.684, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.4250745101871305, | |
| "grad_norm": 1.693144679069519, | |
| "learning_rate": 2.874908380161251e-05, | |
| "loss": 0.7201, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.4260516929691699, | |
| "grad_norm": 1.1772024631500244, | |
| "learning_rate": 2.8700219887613e-05, | |
| "loss": 0.6936, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.4270288757512093, | |
| "grad_norm": 0.5265709161758423, | |
| "learning_rate": 2.8651355973613485e-05, | |
| "loss": 0.6994, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.42800605853324863, | |
| "grad_norm": 0.8301248550415039, | |
| "learning_rate": 2.8602492059613976e-05, | |
| "loss": 0.6968, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.42898324131528803, | |
| "grad_norm": 1.2123380899429321, | |
| "learning_rate": 2.8553628145614464e-05, | |
| "loss": 0.7013, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.42996042409732743, | |
| "grad_norm": 1.3780418634414673, | |
| "learning_rate": 2.8504764231614956e-05, | |
| "loss": 0.6826, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.4309376068793668, | |
| "grad_norm": 0.6333886981010437, | |
| "learning_rate": 2.845590031761544e-05, | |
| "loss": 0.6842, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.4319147896614062, | |
| "grad_norm": 0.5353469252586365, | |
| "learning_rate": 2.8407036403615932e-05, | |
| "loss": 0.6751, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.43289197244344557, | |
| "grad_norm": 0.9482343792915344, | |
| "learning_rate": 2.835817248961642e-05, | |
| "loss": 0.6961, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.4338691552254849, | |
| "grad_norm": 0.7306164503097534, | |
| "learning_rate": 2.830930857561691e-05, | |
| "loss": 0.6829, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.4348463380075243, | |
| "grad_norm": 0.9290406107902527, | |
| "learning_rate": 2.8260444661617396e-05, | |
| "loss": 0.7109, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.4358235207895637, | |
| "grad_norm": 0.5903436541557312, | |
| "learning_rate": 2.8211580747617884e-05, | |
| "loss": 0.7144, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.43680070357160306, | |
| "grad_norm": 0.7370823621749878, | |
| "learning_rate": 2.8162716833618376e-05, | |
| "loss": 0.6858, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.43777788635364245, | |
| "grad_norm": 0.5477197766304016, | |
| "learning_rate": 2.811385291961886e-05, | |
| "loss": 0.6951, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.43875506913568185, | |
| "grad_norm": 0.8994666934013367, | |
| "learning_rate": 2.8064989005619352e-05, | |
| "loss": 0.705, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.4397322519177212, | |
| "grad_norm": 1.171186089515686, | |
| "learning_rate": 2.8016125091619836e-05, | |
| "loss": 0.6812, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.4407094346997606, | |
| "grad_norm": 0.6986414194107056, | |
| "learning_rate": 2.796726117762033e-05, | |
| "loss": 0.6729, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.4416866174818, | |
| "grad_norm": 0.8245409727096558, | |
| "learning_rate": 2.7918397263620816e-05, | |
| "loss": 0.6679, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.44266380026383934, | |
| "grad_norm": 0.8805913925170898, | |
| "learning_rate": 2.7869533349621307e-05, | |
| "loss": 0.7042, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.44364098304587873, | |
| "grad_norm": 0.7037094831466675, | |
| "learning_rate": 2.7820669435621792e-05, | |
| "loss": 0.6988, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.44461816582791813, | |
| "grad_norm": 1.118363380432129, | |
| "learning_rate": 2.7771805521622284e-05, | |
| "loss": 0.6866, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.4455953486099575, | |
| "grad_norm": 1.0665768384933472, | |
| "learning_rate": 2.772294160762277e-05, | |
| "loss": 0.6732, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.4465725313919969, | |
| "grad_norm": 0.7593882083892822, | |
| "learning_rate": 2.7674077693623263e-05, | |
| "loss": 0.6951, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.4475497141740363, | |
| "grad_norm": 2.3182179927825928, | |
| "learning_rate": 2.7625213779623748e-05, | |
| "loss": 0.6695, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.4485268969560756, | |
| "grad_norm": 1.2548315525054932, | |
| "learning_rate": 2.757634986562424e-05, | |
| "loss": 0.7128, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.449504079738115, | |
| "grad_norm": 0.8613176941871643, | |
| "learning_rate": 2.7527485951624727e-05, | |
| "loss": 0.6956, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.4504812625201544, | |
| "grad_norm": 0.946165919303894, | |
| "learning_rate": 2.7478622037625212e-05, | |
| "loss": 0.7177, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.45145844530219376, | |
| "grad_norm": 0.9122072458267212, | |
| "learning_rate": 2.7429758123625703e-05, | |
| "loss": 0.7094, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.45243562808423315, | |
| "grad_norm": 0.8797391057014465, | |
| "learning_rate": 2.738089420962619e-05, | |
| "loss": 0.7118, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.45341281086627255, | |
| "grad_norm": 0.5321417450904846, | |
| "learning_rate": 2.7332030295626683e-05, | |
| "loss": 0.6923, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.4543899936483119, | |
| "grad_norm": 1.0878016948699951, | |
| "learning_rate": 2.7283166381627168e-05, | |
| "loss": 0.72, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.4553671764303513, | |
| "grad_norm": 0.8534865975379944, | |
| "learning_rate": 2.723430246762766e-05, | |
| "loss": 0.6945, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.4563443592123907, | |
| "grad_norm": 0.8475703597068787, | |
| "learning_rate": 2.7185438553628144e-05, | |
| "loss": 0.6891, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.45732154199443004, | |
| "grad_norm": 0.7100959420204163, | |
| "learning_rate": 2.713657463962864e-05, | |
| "loss": 0.6605, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.45829872477646943, | |
| "grad_norm": 0.6616931557655334, | |
| "learning_rate": 2.7087710725629123e-05, | |
| "loss": 0.6678, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.45927590755850883, | |
| "grad_norm": 1.2114359140396118, | |
| "learning_rate": 2.7038846811629615e-05, | |
| "loss": 0.6525, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.4602530903405482, | |
| "grad_norm": 0.4216634929180145, | |
| "learning_rate": 2.69899828976301e-05, | |
| "loss": 0.6881, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.4612302731225876, | |
| "grad_norm": 0.7598534822463989, | |
| "learning_rate": 2.694111898363059e-05, | |
| "loss": 0.6555, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.462207455904627, | |
| "grad_norm": 0.9792212843894958, | |
| "learning_rate": 2.689225506963108e-05, | |
| "loss": 0.6866, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.4631846386866663, | |
| "grad_norm": 0.5867584943771362, | |
| "learning_rate": 2.684339115563157e-05, | |
| "loss": 0.6541, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.4641618214687057, | |
| "grad_norm": 0.8288137912750244, | |
| "learning_rate": 2.6794527241632055e-05, | |
| "loss": 0.7057, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.4651390042507451, | |
| "grad_norm": 1.5305638313293457, | |
| "learning_rate": 2.6745663327632543e-05, | |
| "loss": 0.6752, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.46611618703278446, | |
| "grad_norm": 1.0784820318222046, | |
| "learning_rate": 2.6696799413633035e-05, | |
| "loss": 0.7041, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.46709336981482386, | |
| "grad_norm": 0.7708161473274231, | |
| "learning_rate": 2.664793549963352e-05, | |
| "loss": 0.6766, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.46807055259686325, | |
| "grad_norm": 0.7639223337173462, | |
| "learning_rate": 2.659907158563401e-05, | |
| "loss": 0.6553, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.4690477353789026, | |
| "grad_norm": 0.4256194233894348, | |
| "learning_rate": 2.65502076716345e-05, | |
| "loss": 0.6921, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.470024918160942, | |
| "grad_norm": 1.2620900869369507, | |
| "learning_rate": 2.650134375763499e-05, | |
| "loss": 0.6945, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.4710021009429814, | |
| "grad_norm": 0.7683165073394775, | |
| "learning_rate": 2.6452479843635475e-05, | |
| "loss": 0.6594, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.4719792837250208, | |
| "grad_norm": 0.784582257270813, | |
| "learning_rate": 2.6403615929635966e-05, | |
| "loss": 0.6877, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.47295646650706014, | |
| "grad_norm": 0.7894740104675293, | |
| "learning_rate": 2.635475201563645e-05, | |
| "loss": 0.6944, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "grad_norm": 0.6949831247329712, | |
| "learning_rate": 2.6305888101636942e-05, | |
| "loss": 0.6625, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.47491083207113893, | |
| "grad_norm": 0.5648496747016907, | |
| "learning_rate": 2.625702418763743e-05, | |
| "loss": 0.6489, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.4758880148531783, | |
| "grad_norm": 0.8879817128181458, | |
| "learning_rate": 2.6208160273637922e-05, | |
| "loss": 0.6465, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.4768651976352177, | |
| "grad_norm": 0.5845817923545837, | |
| "learning_rate": 2.6159296359638407e-05, | |
| "loss": 0.7044, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.4778423804172571, | |
| "grad_norm": 0.8040775060653687, | |
| "learning_rate": 2.6110432445638898e-05, | |
| "loss": 0.6745, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.4788195631992964, | |
| "grad_norm": 0.5439351201057434, | |
| "learning_rate": 2.6061568531639386e-05, | |
| "loss": 0.6924, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.4797967459813358, | |
| "grad_norm": 1.1411272287368774, | |
| "learning_rate": 2.601270461763987e-05, | |
| "loss": 0.6834, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.4807739287633752, | |
| "grad_norm": 0.7273046374320984, | |
| "learning_rate": 2.5963840703640362e-05, | |
| "loss": 0.6547, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.48175111154541456, | |
| "grad_norm": 0.9065064787864685, | |
| "learning_rate": 2.591497678964085e-05, | |
| "loss": 0.6792, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.48272829432745396, | |
| "grad_norm": 0.6722708344459534, | |
| "learning_rate": 2.5866112875641342e-05, | |
| "loss": 0.6913, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.48370547710949335, | |
| "grad_norm": 0.6576828360557556, | |
| "learning_rate": 2.5817248961641826e-05, | |
| "loss": 0.6741, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.4846826598915327, | |
| "grad_norm": 0.46869999170303345, | |
| "learning_rate": 2.5768385047642318e-05, | |
| "loss": 0.6729, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.4856598426735721, | |
| "grad_norm": 0.735565185546875, | |
| "learning_rate": 2.5719521133642806e-05, | |
| "loss": 0.6781, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.4866370254556115, | |
| "grad_norm": 0.6392993927001953, | |
| "learning_rate": 2.5670657219643297e-05, | |
| "loss": 0.6824, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.48761420823765084, | |
| "grad_norm": 3.2004761695861816, | |
| "learning_rate": 2.5621793305643782e-05, | |
| "loss": 0.6862, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.48859139101969024, | |
| "grad_norm": 0.6201328635215759, | |
| "learning_rate": 2.5572929391644274e-05, | |
| "loss": 0.664, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.48956857380172963, | |
| "grad_norm": 1.179991364479065, | |
| "learning_rate": 2.5524065477644758e-05, | |
| "loss": 0.6841, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.490545756583769, | |
| "grad_norm": 0.942451000213623, | |
| "learning_rate": 2.547520156364525e-05, | |
| "loss": 0.6555, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.4915229393658084, | |
| "grad_norm": 1.1190769672393799, | |
| "learning_rate": 2.5426337649645738e-05, | |
| "loss": 0.673, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.4925001221478478, | |
| "grad_norm": 0.712053656578064, | |
| "learning_rate": 2.537747373564623e-05, | |
| "loss": 0.6849, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.4934773049298871, | |
| "grad_norm": 1.3936710357666016, | |
| "learning_rate": 2.5328609821646714e-05, | |
| "loss": 0.6751, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.4944544877119265, | |
| "grad_norm": 0.5909391045570374, | |
| "learning_rate": 2.5279745907647205e-05, | |
| "loss": 0.683, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.4954316704939659, | |
| "grad_norm": 0.8883010149002075, | |
| "learning_rate": 2.5230881993647693e-05, | |
| "loss": 0.6806, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.49640885327600526, | |
| "grad_norm": 0.7069185376167297, | |
| "learning_rate": 2.5182018079648178e-05, | |
| "loss": 0.6779, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.49738603605804466, | |
| "grad_norm": 0.7906535267829895, | |
| "learning_rate": 2.513315416564867e-05, | |
| "loss": 0.663, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.49836321884008405, | |
| "grad_norm": 1.8775051832199097, | |
| "learning_rate": 2.5084290251649158e-05, | |
| "loss": 0.6924, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.4993404016221234, | |
| "grad_norm": 0.4028649628162384, | |
| "learning_rate": 2.503542633764965e-05, | |
| "loss": 0.6611, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.5003175844041629, | |
| "grad_norm": 0.8514829277992249, | |
| "learning_rate": 2.4986562423650137e-05, | |
| "loss": 0.6423, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.5012947671862021, | |
| "grad_norm": 0.5659759044647217, | |
| "learning_rate": 2.4937698509650625e-05, | |
| "loss": 0.6978, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.5022719499682415, | |
| "grad_norm": 0.8396779298782349, | |
| "learning_rate": 2.488883459565111e-05, | |
| "loss": 0.6593, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.5032491327502809, | |
| "grad_norm": 0.6824951767921448, | |
| "learning_rate": 2.48399706816516e-05, | |
| "loss": 0.6839, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.5042263155323203, | |
| "grad_norm": 0.6299941539764404, | |
| "learning_rate": 2.479110676765209e-05, | |
| "loss": 0.6743, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.5052034983143597, | |
| "grad_norm": 1.2409921884536743, | |
| "learning_rate": 2.4742242853652577e-05, | |
| "loss": 0.6477, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.5061806810963991, | |
| "grad_norm": 0.668393075466156, | |
| "learning_rate": 2.4693378939653065e-05, | |
| "loss": 0.6568, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.5071578638784384, | |
| "grad_norm": 0.5376803278923035, | |
| "learning_rate": 2.4644515025653557e-05, | |
| "loss": 0.6476, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.5081350466604778, | |
| "grad_norm": 1.710288166999817, | |
| "learning_rate": 2.4595651111654045e-05, | |
| "loss": 0.6404, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.5091122294425172, | |
| "grad_norm": 0.6142415404319763, | |
| "learning_rate": 2.4546787197654533e-05, | |
| "loss": 0.7026, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.5100894122245566, | |
| "grad_norm": 0.4976397454738617, | |
| "learning_rate": 2.449792328365502e-05, | |
| "loss": 0.6659, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.511066595006596, | |
| "grad_norm": 0.8558853268623352, | |
| "learning_rate": 2.4449059369655513e-05, | |
| "loss": 0.67, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.5120437777886354, | |
| "grad_norm": 0.620583713054657, | |
| "learning_rate": 2.4400195455656e-05, | |
| "loss": 0.6596, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.5130209605706747, | |
| "grad_norm": 0.8520305752754211, | |
| "learning_rate": 2.435133154165649e-05, | |
| "loss": 0.653, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.5139981433527141, | |
| "grad_norm": 0.43671169877052307, | |
| "learning_rate": 2.4302467627656977e-05, | |
| "loss": 0.6554, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.5149753261347535, | |
| "grad_norm": 0.5502797961235046, | |
| "learning_rate": 2.4253603713657465e-05, | |
| "loss": 0.6432, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.5159525089167929, | |
| "grad_norm": 0.918704628944397, | |
| "learning_rate": 2.4204739799657956e-05, | |
| "loss": 0.6604, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.5169296916988323, | |
| "grad_norm": 0.44583848118782043, | |
| "learning_rate": 2.415587588565844e-05, | |
| "loss": 0.6736, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.5179068744808717, | |
| "grad_norm": 0.8312250971794128, | |
| "learning_rate": 2.410701197165893e-05, | |
| "loss": 0.6645, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.518884057262911, | |
| "grad_norm": 0.39499637484550476, | |
| "learning_rate": 2.4058148057659417e-05, | |
| "loss": 0.6876, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.5198612400449504, | |
| "grad_norm": 0.5650041699409485, | |
| "learning_rate": 2.400928414365991e-05, | |
| "loss": 0.691, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.5208384228269898, | |
| "grad_norm": 0.7247036099433899, | |
| "learning_rate": 2.3960420229660397e-05, | |
| "loss": 0.6367, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.5218156056090292, | |
| "grad_norm": 0.8500406742095947, | |
| "learning_rate": 2.3911556315660885e-05, | |
| "loss": 0.6678, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.5227927883910686, | |
| "grad_norm": 1.2467963695526123, | |
| "learning_rate": 2.3862692401661373e-05, | |
| "loss": 0.6439, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.523769971173108, | |
| "grad_norm": 1.0069133043289185, | |
| "learning_rate": 2.3813828487661864e-05, | |
| "loss": 0.6555, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.5247471539551473, | |
| "grad_norm": 0.9213836193084717, | |
| "learning_rate": 2.3764964573662352e-05, | |
| "loss": 0.6374, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.5257243367371867, | |
| "grad_norm": 0.7063928246498108, | |
| "learning_rate": 2.371610065966284e-05, | |
| "loss": 0.6473, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.5267015195192261, | |
| "grad_norm": 0.7876357436180115, | |
| "learning_rate": 2.366723674566333e-05, | |
| "loss": 0.6599, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.5276787023012655, | |
| "grad_norm": 0.5371726751327515, | |
| "learning_rate": 2.3618372831663816e-05, | |
| "loss": 0.6569, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.5286558850833049, | |
| "grad_norm": 0.6501371264457703, | |
| "learning_rate": 2.3569508917664308e-05, | |
| "loss": 0.6502, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.5296330678653443, | |
| "grad_norm": 1.9818251132965088, | |
| "learning_rate": 2.3520645003664796e-05, | |
| "loss": 0.6628, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.5306102506473835, | |
| "grad_norm": 0.6198662519454956, | |
| "learning_rate": 2.3471781089665284e-05, | |
| "loss": 0.6771, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.5315874334294229, | |
| "grad_norm": 0.70624840259552, | |
| "learning_rate": 2.3422917175665772e-05, | |
| "loss": 0.6685, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.5325646162114623, | |
| "grad_norm": 0.5182805061340332, | |
| "learning_rate": 2.337405326166626e-05, | |
| "loss": 0.6651, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.5335417989935017, | |
| "grad_norm": 1.0862709283828735, | |
| "learning_rate": 2.3325189347666748e-05, | |
| "loss": 0.668, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.5345189817755411, | |
| "grad_norm": 0.5830691456794739, | |
| "learning_rate": 2.3276325433667236e-05, | |
| "loss": 0.67, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.5354961645575805, | |
| "grad_norm": 0.5614120960235596, | |
| "learning_rate": 2.3227461519667724e-05, | |
| "loss": 0.6466, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.5364733473396198, | |
| "grad_norm": 0.6346180438995361, | |
| "learning_rate": 2.3178597605668216e-05, | |
| "loss": 0.6784, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.5374505301216592, | |
| "grad_norm": 0.5453216433525085, | |
| "learning_rate": 2.3129733691668704e-05, | |
| "loss": 0.6507, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.5384277129036986, | |
| "grad_norm": 0.8145617246627808, | |
| "learning_rate": 2.3080869777669192e-05, | |
| "loss": 0.6874, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.539404895685738, | |
| "grad_norm": 0.8334397673606873, | |
| "learning_rate": 2.303200586366968e-05, | |
| "loss": 0.6772, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.5403820784677774, | |
| "grad_norm": 0.5468283295631409, | |
| "learning_rate": 2.298314194967017e-05, | |
| "loss": 0.6448, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.5413592612498168, | |
| "grad_norm": 0.8369360566139221, | |
| "learning_rate": 2.293427803567066e-05, | |
| "loss": 0.6593, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.5423364440318562, | |
| "grad_norm": 0.498793363571167, | |
| "learning_rate": 2.2885414121671148e-05, | |
| "loss": 0.6236, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.5433136268138955, | |
| "grad_norm": 0.6096756458282471, | |
| "learning_rate": 2.2836550207671636e-05, | |
| "loss": 0.6766, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.5442908095959349, | |
| "grad_norm": 0.8249727487564087, | |
| "learning_rate": 2.2787686293672124e-05, | |
| "loss": 0.654, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.5452679923779743, | |
| "grad_norm": 0.9821385145187378, | |
| "learning_rate": 2.2738822379672615e-05, | |
| "loss": 0.6633, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.5462451751600137, | |
| "grad_norm": 1.025420069694519, | |
| "learning_rate": 2.26899584656731e-05, | |
| "loss": 0.6691, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.5472223579420531, | |
| "grad_norm": 1.1872769594192505, | |
| "learning_rate": 2.2641094551673588e-05, | |
| "loss": 0.6811, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.5481995407240925, | |
| "grad_norm": 0.6862273812294006, | |
| "learning_rate": 2.259223063767408e-05, | |
| "loss": 0.6503, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.5491767235061318, | |
| "grad_norm": 1.9515796899795532, | |
| "learning_rate": 2.2543366723674567e-05, | |
| "loss": 0.6672, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.5501539062881712, | |
| "grad_norm": 1.5116077661514282, | |
| "learning_rate": 2.2494502809675055e-05, | |
| "loss": 0.6714, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.5511310890702106, | |
| "grad_norm": 0.710858166217804, | |
| "learning_rate": 2.2445638895675544e-05, | |
| "loss": 0.6577, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.55210827185225, | |
| "grad_norm": 0.6870605945587158, | |
| "learning_rate": 2.239677498167603e-05, | |
| "loss": 0.6655, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.5530854546342894, | |
| "grad_norm": 0.802883505821228, | |
| "learning_rate": 2.2347911067676523e-05, | |
| "loss": 0.6812, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.5540626374163288, | |
| "grad_norm": 1.244555115699768, | |
| "learning_rate": 2.229904715367701e-05, | |
| "loss": 0.655, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.5550398201983681, | |
| "grad_norm": 0.7662067413330078, | |
| "learning_rate": 2.22501832396775e-05, | |
| "loss": 0.6867, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.5560170029804075, | |
| "grad_norm": 0.9172037839889526, | |
| "learning_rate": 2.2201319325677987e-05, | |
| "loss": 0.6427, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.5569941857624469, | |
| "grad_norm": 0.8700697422027588, | |
| "learning_rate": 2.215245541167848e-05, | |
| "loss": 0.6959, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.5579713685444863, | |
| "grad_norm": 1.1184202432632446, | |
| "learning_rate": 2.2103591497678967e-05, | |
| "loss": 0.6601, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.5589485513265257, | |
| "grad_norm": 1.1001787185668945, | |
| "learning_rate": 2.2054727583679455e-05, | |
| "loss": 0.6753, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.559925734108565, | |
| "grad_norm": 0.29295894503593445, | |
| "learning_rate": 2.2005863669679943e-05, | |
| "loss": 0.625, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.5609029168906043, | |
| "grad_norm": 0.5778409242630005, | |
| "learning_rate": 2.195699975568043e-05, | |
| "loss": 0.6554, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.5618800996726437, | |
| "grad_norm": 0.8341584801673889, | |
| "learning_rate": 2.190813584168092e-05, | |
| "loss": 0.6324, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.5628572824546831, | |
| "grad_norm": 1.329548716545105, | |
| "learning_rate": 2.1859271927681407e-05, | |
| "loss": 0.6657, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.5638344652367225, | |
| "grad_norm": 0.6559785604476929, | |
| "learning_rate": 2.1810408013681895e-05, | |
| "loss": 0.6411, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.5648116480187619, | |
| "grad_norm": 1.1021350622177124, | |
| "learning_rate": 2.1761544099682387e-05, | |
| "loss": 0.6363, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.5657888308008013, | |
| "grad_norm": 1.0015547275543213, | |
| "learning_rate": 2.1712680185682875e-05, | |
| "loss": 0.632, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.5667660135828406, | |
| "grad_norm": 0.7394452691078186, | |
| "learning_rate": 2.1663816271683363e-05, | |
| "loss": 0.6882, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.56774319636488, | |
| "grad_norm": 1.0177232027053833, | |
| "learning_rate": 2.161495235768385e-05, | |
| "loss": 0.659, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.5687203791469194, | |
| "grad_norm": 1.182385802268982, | |
| "learning_rate": 2.156608844368434e-05, | |
| "loss": 0.6304, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.5696975619289588, | |
| "grad_norm": 0.6992839574813843, | |
| "learning_rate": 2.151722452968483e-05, | |
| "loss": 0.6419, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.5706747447109982, | |
| "grad_norm": 1.127772331237793, | |
| "learning_rate": 2.146836061568532e-05, | |
| "loss": 0.6762, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.5716519274930376, | |
| "grad_norm": 1.0480372905731201, | |
| "learning_rate": 2.1419496701685806e-05, | |
| "loss": 0.649, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.5726291102750769, | |
| "grad_norm": 0.62301105260849, | |
| "learning_rate": 2.1370632787686295e-05, | |
| "loss": 0.6423, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.5736062930571163, | |
| "grad_norm": 0.7996447086334229, | |
| "learning_rate": 2.1321768873686786e-05, | |
| "loss": 0.6675, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.5745834758391557, | |
| "grad_norm": 0.8735845685005188, | |
| "learning_rate": 2.1272904959687274e-05, | |
| "loss": 0.6251, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.5755606586211951, | |
| "grad_norm": 1.0168455839157104, | |
| "learning_rate": 2.1224041045687762e-05, | |
| "loss": 0.6623, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.5765378414032345, | |
| "grad_norm": 0.7308356165885925, | |
| "learning_rate": 2.1175177131688247e-05, | |
| "loss": 0.6613, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.5775150241852739, | |
| "grad_norm": 1.2486464977264404, | |
| "learning_rate": 2.1126313217688738e-05, | |
| "loss": 0.6424, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.5784922069673132, | |
| "grad_norm": 0.8921827077865601, | |
| "learning_rate": 2.1077449303689226e-05, | |
| "loss": 0.6403, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.5794693897493526, | |
| "grad_norm": 0.5246706604957581, | |
| "learning_rate": 2.1028585389689714e-05, | |
| "loss": 0.6494, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.580446572531392, | |
| "grad_norm": 0.8651568293571472, | |
| "learning_rate": 2.0979721475690202e-05, | |
| "loss": 0.6352, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.5814237553134314, | |
| "grad_norm": 0.9502151608467102, | |
| "learning_rate": 2.093085756169069e-05, | |
| "loss": 0.6661, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.5824009380954708, | |
| "grad_norm": 0.6827490925788879, | |
| "learning_rate": 2.0881993647691182e-05, | |
| "loss": 0.625, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.5833781208775102, | |
| "grad_norm": 0.8105266690254211, | |
| "learning_rate": 2.083312973369167e-05, | |
| "loss": 0.6699, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.5843553036595496, | |
| "grad_norm": 1.005845308303833, | |
| "learning_rate": 2.0784265819692158e-05, | |
| "loss": 0.6528, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.5853324864415889, | |
| "grad_norm": 0.8736119270324707, | |
| "learning_rate": 2.0735401905692646e-05, | |
| "loss": 0.6691, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.5863096692236283, | |
| "grad_norm": 0.8782946467399597, | |
| "learning_rate": 2.0686537991693138e-05, | |
| "loss": 0.6677, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.5872868520056677, | |
| "grad_norm": 0.7457369565963745, | |
| "learning_rate": 2.0637674077693626e-05, | |
| "loss": 0.6323, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.5882640347877071, | |
| "grad_norm": 1.0230743885040283, | |
| "learning_rate": 2.0588810163694114e-05, | |
| "loss": 0.6521, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.5892412175697465, | |
| "grad_norm": 0.8328123688697815, | |
| "learning_rate": 2.0539946249694602e-05, | |
| "loss": 0.6356, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.5902184003517859, | |
| "grad_norm": 0.7374850511550903, | |
| "learning_rate": 2.049108233569509e-05, | |
| "loss": 0.6669, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.5911955831338251, | |
| "grad_norm": 0.505228579044342, | |
| "learning_rate": 2.0442218421695578e-05, | |
| "loss": 0.6734, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.5921727659158645, | |
| "grad_norm": 0.8307722210884094, | |
| "learning_rate": 2.0393354507696066e-05, | |
| "loss": 0.657, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.5931499486979039, | |
| "grad_norm": 0.8867704272270203, | |
| "learning_rate": 2.0344490593696554e-05, | |
| "loss": 0.6407, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.5941271314799433, | |
| "grad_norm": 0.716373085975647, | |
| "learning_rate": 2.0295626679697045e-05, | |
| "loss": 0.6428, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.5951043142619827, | |
| "grad_norm": 0.5812042355537415, | |
| "learning_rate": 2.0246762765697534e-05, | |
| "loss": 0.63, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.5960814970440221, | |
| "grad_norm": 1.0057129859924316, | |
| "learning_rate": 2.019789885169802e-05, | |
| "loss": 0.6161, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.5970586798260614, | |
| "grad_norm": 0.6143211126327515, | |
| "learning_rate": 2.014903493769851e-05, | |
| "loss": 0.6454, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.5980358626081008, | |
| "grad_norm": 1.038710594177246, | |
| "learning_rate": 2.0100171023698998e-05, | |
| "loss": 0.6701, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.5990130453901402, | |
| "grad_norm": 0.6891298294067383, | |
| "learning_rate": 2.005130710969949e-05, | |
| "loss": 0.6666, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.5999902281721796, | |
| "grad_norm": 0.7872188091278076, | |
| "learning_rate": 2.0002443195699977e-05, | |
| "loss": 0.6357, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.600967410954219, | |
| "grad_norm": 1.2167768478393555, | |
| "learning_rate": 1.9953579281700465e-05, | |
| "loss": 0.6686, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.6019445937362584, | |
| "grad_norm": 1.0418341159820557, | |
| "learning_rate": 1.9904715367700953e-05, | |
| "loss": 0.6356, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.6029217765182977, | |
| "grad_norm": 0.6209270358085632, | |
| "learning_rate": 1.9855851453701445e-05, | |
| "loss": 0.657, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.6038989593003371, | |
| "grad_norm": 0.8585149645805359, | |
| "learning_rate": 1.9806987539701933e-05, | |
| "loss": 0.6157, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.6048761420823765, | |
| "grad_norm": 0.5286767482757568, | |
| "learning_rate": 1.975812362570242e-05, | |
| "loss": 0.6734, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.6058533248644159, | |
| "grad_norm": 0.6499518156051636, | |
| "learning_rate": 1.9709259711702906e-05, | |
| "loss": 0.6545, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.6068305076464553, | |
| "grad_norm": 1.4340311288833618, | |
| "learning_rate": 1.9660395797703397e-05, | |
| "loss": 0.6402, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.6078076904284947, | |
| "grad_norm": 0.4783228039741516, | |
| "learning_rate": 1.9611531883703885e-05, | |
| "loss": 0.6495, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.608784873210534, | |
| "grad_norm": 0.6510328054428101, | |
| "learning_rate": 1.9562667969704373e-05, | |
| "loss": 0.6398, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.6097620559925734, | |
| "grad_norm": 0.7298358082771301, | |
| "learning_rate": 1.951380405570486e-05, | |
| "loss": 0.6406, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.6107392387746128, | |
| "grad_norm": 0.7467713952064514, | |
| "learning_rate": 1.9464940141705353e-05, | |
| "loss": 0.6618, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.6117164215566522, | |
| "grad_norm": 1.1706078052520752, | |
| "learning_rate": 1.941607622770584e-05, | |
| "loss": 0.6603, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.6126936043386916, | |
| "grad_norm": 1.9863495826721191, | |
| "learning_rate": 1.936721231370633e-05, | |
| "loss": 0.628, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.613670787120731, | |
| "grad_norm": 1.1297212839126587, | |
| "learning_rate": 1.9318348399706817e-05, | |
| "loss": 0.6198, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.6146479699027703, | |
| "grad_norm": 0.6895560026168823, | |
| "learning_rate": 1.9269484485707305e-05, | |
| "loss": 0.654, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.6156251526848097, | |
| "grad_norm": 0.5572859644889832, | |
| "learning_rate": 1.9220620571707796e-05, | |
| "loss": 0.6237, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.6166023354668491, | |
| "grad_norm": 1.7625269889831543, | |
| "learning_rate": 1.9171756657708284e-05, | |
| "loss": 0.6615, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.6175795182488885, | |
| "grad_norm": 0.9473828673362732, | |
| "learning_rate": 1.9122892743708773e-05, | |
| "loss": 0.624, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 1.6622077226638794, | |
| "learning_rate": 1.907402882970926e-05, | |
| "loss": 0.648, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.6195338838129673, | |
| "grad_norm": 0.889667809009552, | |
| "learning_rate": 1.9025164915709752e-05, | |
| "loss": 0.6321, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.6205110665950065, | |
| "grad_norm": 0.7613341212272644, | |
| "learning_rate": 1.8976301001710237e-05, | |
| "loss": 0.637, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.6214882493770459, | |
| "grad_norm": 0.9912586212158203, | |
| "learning_rate": 1.8927437087710725e-05, | |
| "loss": 0.6422, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.6224654321590853, | |
| "grad_norm": 0.7905563712120056, | |
| "learning_rate": 1.8878573173711213e-05, | |
| "loss": 0.6362, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.6234426149411247, | |
| "grad_norm": 0.4368293881416321, | |
| "learning_rate": 1.8829709259711704e-05, | |
| "loss": 0.6472, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.6244197977231641, | |
| "grad_norm": 0.8466482758522034, | |
| "learning_rate": 1.8780845345712192e-05, | |
| "loss": 0.673, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.6253969805052035, | |
| "grad_norm": 1.4137593507766724, | |
| "learning_rate": 1.873198143171268e-05, | |
| "loss": 0.6382, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.6263741632872428, | |
| "grad_norm": 1.7590171098709106, | |
| "learning_rate": 1.868311751771317e-05, | |
| "loss": 0.6421, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.6273513460692822, | |
| "grad_norm": 0.7667103409767151, | |
| "learning_rate": 1.863425360371366e-05, | |
| "loss": 0.6448, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.6283285288513216, | |
| "grad_norm": 1.0524508953094482, | |
| "learning_rate": 1.8585389689714148e-05, | |
| "loss": 0.6491, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.629305711633361, | |
| "grad_norm": 0.6090672612190247, | |
| "learning_rate": 1.8536525775714636e-05, | |
| "loss": 0.6416, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.6302828944154004, | |
| "grad_norm": 0.5970349311828613, | |
| "learning_rate": 1.8487661861715124e-05, | |
| "loss": 0.6393, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.6312600771974398, | |
| "grad_norm": 0.9564999341964722, | |
| "learning_rate": 1.8438797947715612e-05, | |
| "loss": 0.6656, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.6322372599794792, | |
| "grad_norm": 1.319643259048462, | |
| "learning_rate": 1.8389934033716104e-05, | |
| "loss": 0.6372, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.6332144427615185, | |
| "grad_norm": 1.0311692953109741, | |
| "learning_rate": 1.8341070119716592e-05, | |
| "loss": 0.6377, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.6341916255435579, | |
| "grad_norm": 0.5185050964355469, | |
| "learning_rate": 1.829220620571708e-05, | |
| "loss": 0.6489, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.6351688083255973, | |
| "grad_norm": 1.0611315965652466, | |
| "learning_rate": 1.8243342291717564e-05, | |
| "loss": 0.6262, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.6361459911076367, | |
| "grad_norm": 0.5177842974662781, | |
| "learning_rate": 1.8194478377718056e-05, | |
| "loss": 0.6424, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.6371231738896761, | |
| "grad_norm": 0.6148577928543091, | |
| "learning_rate": 1.8145614463718544e-05, | |
| "loss": 0.6402, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.6381003566717155, | |
| "grad_norm": 0.686576247215271, | |
| "learning_rate": 1.8096750549719032e-05, | |
| "loss": 0.6361, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.6390775394537548, | |
| "grad_norm": 1.5292381048202515, | |
| "learning_rate": 1.804788663571952e-05, | |
| "loss": 0.6263, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.6400547222357942, | |
| "grad_norm": 0.7201911807060242, | |
| "learning_rate": 1.799902272172001e-05, | |
| "loss": 0.6402, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.6410319050178336, | |
| "grad_norm": 0.7407404184341431, | |
| "learning_rate": 1.79501588077205e-05, | |
| "loss": 0.6149, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.642009087799873, | |
| "grad_norm": 0.7911986708641052, | |
| "learning_rate": 1.7901294893720988e-05, | |
| "loss": 0.6273, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.6429862705819124, | |
| "grad_norm": 0.467869371175766, | |
| "learning_rate": 1.7852430979721476e-05, | |
| "loss": 0.6344, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.6439634533639518, | |
| "grad_norm": 1.0182818174362183, | |
| "learning_rate": 1.7803567065721967e-05, | |
| "loss": 0.612, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.6449406361459911, | |
| "grad_norm": 0.5325811505317688, | |
| "learning_rate": 1.7754703151722455e-05, | |
| "loss": 0.6427, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.6459178189280305, | |
| "grad_norm": 1.1324542760849, | |
| "learning_rate": 1.7705839237722943e-05, | |
| "loss": 0.6161, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.6468950017100699, | |
| "grad_norm": 0.7836804389953613, | |
| "learning_rate": 1.765697532372343e-05, | |
| "loss": 0.632, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.6478721844921093, | |
| "grad_norm": 0.6157903075218201, | |
| "learning_rate": 1.760811140972392e-05, | |
| "loss": 0.6497, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.6488493672741487, | |
| "grad_norm": 0.776150643825531, | |
| "learning_rate": 1.755924749572441e-05, | |
| "loss": 0.5929, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.6498265500561881, | |
| "grad_norm": 0.6307646036148071, | |
| "learning_rate": 1.7510383581724896e-05, | |
| "loss": 0.66, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.6508037328382273, | |
| "grad_norm": 0.5305992364883423, | |
| "learning_rate": 1.7461519667725384e-05, | |
| "loss": 0.5985, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.6517809156202667, | |
| "grad_norm": 0.6581500172615051, | |
| "learning_rate": 1.7412655753725872e-05, | |
| "loss": 0.6393, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.6527580984023061, | |
| "grad_norm": 1.0988273620605469, | |
| "learning_rate": 1.7363791839726363e-05, | |
| "loss": 0.6453, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.6537352811843455, | |
| "grad_norm": 0.6662785410881042, | |
| "learning_rate": 1.731492792572685e-05, | |
| "loss": 0.6831, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.6547124639663849, | |
| "grad_norm": 0.5156288743019104, | |
| "learning_rate": 1.726606401172734e-05, | |
| "loss": 0.647, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.6556896467484243, | |
| "grad_norm": 0.8832482695579529, | |
| "learning_rate": 1.7217200097727827e-05, | |
| "loss": 0.6263, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.6566668295304636, | |
| "grad_norm": 0.8194277882575989, | |
| "learning_rate": 1.716833618372832e-05, | |
| "loss": 0.6293, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.657644012312503, | |
| "grad_norm": 0.5544142127037048, | |
| "learning_rate": 1.7119472269728807e-05, | |
| "loss": 0.6207, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.6586211950945424, | |
| "grad_norm": 1.0161030292510986, | |
| "learning_rate": 1.7070608355729295e-05, | |
| "loss": 0.6166, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.6595983778765818, | |
| "grad_norm": 1.1273646354675293, | |
| "learning_rate": 1.7021744441729783e-05, | |
| "loss": 0.6326, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.6605755606586212, | |
| "grad_norm": 0.5743687748908997, | |
| "learning_rate": 1.697288052773027e-05, | |
| "loss": 0.5943, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.6615527434406606, | |
| "grad_norm": 0.5743625164031982, | |
| "learning_rate": 1.6924016613730763e-05, | |
| "loss": 0.6337, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.6625299262226999, | |
| "grad_norm": 0.47358232736587524, | |
| "learning_rate": 1.687515269973125e-05, | |
| "loss": 0.6272, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.6635071090047393, | |
| "grad_norm": 0.7825568318367004, | |
| "learning_rate": 1.682628878573174e-05, | |
| "loss": 0.6407, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.6644842917867787, | |
| "grad_norm": 1.0739299058914185, | |
| "learning_rate": 1.6777424871732227e-05, | |
| "loss": 0.6213, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.6654614745688181, | |
| "grad_norm": 0.6242460608482361, | |
| "learning_rate": 1.6728560957732715e-05, | |
| "loss": 0.6247, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.6664386573508575, | |
| "grad_norm": 0.674392580986023, | |
| "learning_rate": 1.6679697043733203e-05, | |
| "loss": 0.6405, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.6674158401328969, | |
| "grad_norm": 0.4114531874656677, | |
| "learning_rate": 1.663083312973369e-05, | |
| "loss": 0.6235, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.6683930229149362, | |
| "grad_norm": 0.5812088847160339, | |
| "learning_rate": 1.658196921573418e-05, | |
| "loss": 0.6175, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.6693702056969756, | |
| "grad_norm": 0.48696669936180115, | |
| "learning_rate": 1.653310530173467e-05, | |
| "loss": 0.6264, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.670347388479015, | |
| "grad_norm": 0.5733768939971924, | |
| "learning_rate": 1.648424138773516e-05, | |
| "loss": 0.6371, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.6713245712610544, | |
| "grad_norm": 0.9609115123748779, | |
| "learning_rate": 1.6435377473735647e-05, | |
| "loss": 0.618, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.6723017540430938, | |
| "grad_norm": 1.226388692855835, | |
| "learning_rate": 1.6386513559736135e-05, | |
| "loss": 0.6499, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.6732789368251332, | |
| "grad_norm": 0.6776556372642517, | |
| "learning_rate": 1.6337649645736626e-05, | |
| "loss": 0.6356, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.6742561196071726, | |
| "grad_norm": 0.6129021644592285, | |
| "learning_rate": 1.6288785731737114e-05, | |
| "loss": 0.6133, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.6752333023892119, | |
| "grad_norm": 1.4161570072174072, | |
| "learning_rate": 1.6239921817737602e-05, | |
| "loss": 0.6419, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.6762104851712513, | |
| "grad_norm": 0.5857706665992737, | |
| "learning_rate": 1.619105790373809e-05, | |
| "loss": 0.6227, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.6771876679532907, | |
| "grad_norm": 0.933807909488678, | |
| "learning_rate": 1.614219398973858e-05, | |
| "loss": 0.6392, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.6781648507353301, | |
| "grad_norm": 0.9411168098449707, | |
| "learning_rate": 1.609333007573907e-05, | |
| "loss": 0.649, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.6791420335173695, | |
| "grad_norm": 0.5923060178756714, | |
| "learning_rate": 1.6044466161739554e-05, | |
| "loss": 0.6286, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.6801192162994089, | |
| "grad_norm": 0.744339108467102, | |
| "learning_rate": 1.5995602247740043e-05, | |
| "loss": 0.6178, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.6810963990814481, | |
| "grad_norm": 1.0202040672302246, | |
| "learning_rate": 1.5946738333740534e-05, | |
| "loss": 0.6254, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.6820735818634875, | |
| "grad_norm": 0.8653994798660278, | |
| "learning_rate": 1.5897874419741022e-05, | |
| "loss": 0.6214, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.6830507646455269, | |
| "grad_norm": 0.4566790461540222, | |
| "learning_rate": 1.584901050574151e-05, | |
| "loss": 0.6517, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.6840279474275663, | |
| "grad_norm": 0.9629371166229248, | |
| "learning_rate": 1.5800146591741998e-05, | |
| "loss": 0.6359, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.6850051302096057, | |
| "grad_norm": 0.7253994941711426, | |
| "learning_rate": 1.5751282677742486e-05, | |
| "loss": 0.6405, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.6859823129916451, | |
| "grad_norm": 0.8287329077720642, | |
| "learning_rate": 1.5702418763742978e-05, | |
| "loss": 0.6085, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.6869594957736844, | |
| "grad_norm": 0.5002869367599487, | |
| "learning_rate": 1.5653554849743466e-05, | |
| "loss": 0.6255, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.6879366785557238, | |
| "grad_norm": 0.4376012682914734, | |
| "learning_rate": 1.5604690935743954e-05, | |
| "loss": 0.5933, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.6889138613377632, | |
| "grad_norm": 0.756737232208252, | |
| "learning_rate": 1.5555827021744442e-05, | |
| "loss": 0.609, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.6898910441198026, | |
| "grad_norm": 1.1462029218673706, | |
| "learning_rate": 1.5506963107744933e-05, | |
| "loss": 0.6349, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.690868226901842, | |
| "grad_norm": 0.5806009769439697, | |
| "learning_rate": 1.545809919374542e-05, | |
| "loss": 0.6242, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.6918454096838814, | |
| "grad_norm": 0.41798803210258484, | |
| "learning_rate": 1.540923527974591e-05, | |
| "loss": 0.6688, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.6928225924659207, | |
| "grad_norm": 0.5598849058151245, | |
| "learning_rate": 1.5360371365746398e-05, | |
| "loss": 0.6371, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.6937997752479601, | |
| "grad_norm": 1.0417990684509277, | |
| "learning_rate": 1.5311507451746886e-05, | |
| "loss": 0.5966, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.6947769580299995, | |
| "grad_norm": 0.5547340512275696, | |
| "learning_rate": 1.5262643537747374e-05, | |
| "loss": 0.6221, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.6957541408120389, | |
| "grad_norm": 0.4499816298484802, | |
| "learning_rate": 1.5213779623747862e-05, | |
| "loss": 0.6194, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.6967313235940783, | |
| "grad_norm": 2.521627902984619, | |
| "learning_rate": 1.5164915709748351e-05, | |
| "loss": 0.6279, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.6977085063761177, | |
| "grad_norm": 1.0940284729003906, | |
| "learning_rate": 1.511605179574884e-05, | |
| "loss": 0.6376, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.698685689158157, | |
| "grad_norm": 0.515785276889801, | |
| "learning_rate": 1.5067187881749328e-05, | |
| "loss": 0.6046, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.6996628719401964, | |
| "grad_norm": 0.5034206509590149, | |
| "learning_rate": 1.5018323967749817e-05, | |
| "loss": 0.6036, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.7006400547222358, | |
| "grad_norm": 0.6637565493583679, | |
| "learning_rate": 1.4969460053750305e-05, | |
| "loss": 0.6288, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.7016172375042752, | |
| "grad_norm": 0.7677326202392578, | |
| "learning_rate": 1.4920596139750795e-05, | |
| "loss": 0.655, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.7025944202863146, | |
| "grad_norm": 0.6796774864196777, | |
| "learning_rate": 1.4871732225751283e-05, | |
| "loss": 0.5955, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.703571603068354, | |
| "grad_norm": 0.9217430353164673, | |
| "learning_rate": 1.4822868311751773e-05, | |
| "loss": 0.6268, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.7045487858503933, | |
| "grad_norm": 0.846118688583374, | |
| "learning_rate": 1.4774004397752261e-05, | |
| "loss": 0.6345, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.7055259686324327, | |
| "grad_norm": 0.7406280040740967, | |
| "learning_rate": 1.472514048375275e-05, | |
| "loss": 0.631, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.7065031514144721, | |
| "grad_norm": 0.8265899419784546, | |
| "learning_rate": 1.4676276569753239e-05, | |
| "loss": 0.6135, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.7074803341965115, | |
| "grad_norm": 0.7813581228256226, | |
| "learning_rate": 1.4627412655753727e-05, | |
| "loss": 0.6448, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.7084575169785509, | |
| "grad_norm": 0.4718623757362366, | |
| "learning_rate": 1.4578548741754217e-05, | |
| "loss": 0.5952, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.7094346997605903, | |
| "grad_norm": 2.193324565887451, | |
| "learning_rate": 1.4529684827754703e-05, | |
| "loss": 0.6199, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.7104118825426295, | |
| "grad_norm": 1.0357561111450195, | |
| "learning_rate": 1.4480820913755191e-05, | |
| "loss": 0.6342, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.711389065324669, | |
| "grad_norm": 1.0319572687149048, | |
| "learning_rate": 1.4431956999755681e-05, | |
| "loss": 0.5836, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.7123662481067083, | |
| "grad_norm": 1.0852116346359253, | |
| "learning_rate": 1.4383093085756169e-05, | |
| "loss": 0.6246, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.7133434308887477, | |
| "grad_norm": 0.5591370463371277, | |
| "learning_rate": 1.4334229171756659e-05, | |
| "loss": 0.6022, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.7143206136707871, | |
| "grad_norm": 1.129408836364746, | |
| "learning_rate": 1.4285365257757147e-05, | |
| "loss": 0.6414, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.7152977964528265, | |
| "grad_norm": 0.9241653680801392, | |
| "learning_rate": 1.4236501343757635e-05, | |
| "loss": 0.5954, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.7162749792348658, | |
| "grad_norm": 0.5140904188156128, | |
| "learning_rate": 1.4187637429758125e-05, | |
| "loss": 0.6499, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.7172521620169052, | |
| "grad_norm": 0.8134740591049194, | |
| "learning_rate": 1.4138773515758613e-05, | |
| "loss": 0.6199, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.7182293447989446, | |
| "grad_norm": 0.8259909749031067, | |
| "learning_rate": 1.4089909601759102e-05, | |
| "loss": 0.6181, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.719206527580984, | |
| "grad_norm": 0.7081485390663147, | |
| "learning_rate": 1.404104568775959e-05, | |
| "loss": 0.6056, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.7201837103630234, | |
| "grad_norm": 0.7906745076179504, | |
| "learning_rate": 1.399218177376008e-05, | |
| "loss": 0.6341, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.7211608931450628, | |
| "grad_norm": 0.5661380290985107, | |
| "learning_rate": 1.3943317859760568e-05, | |
| "loss": 0.621, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.7221380759271022, | |
| "grad_norm": 1.0971596240997314, | |
| "learning_rate": 1.3894453945761058e-05, | |
| "loss": 0.6261, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.7231152587091415, | |
| "grad_norm": 1.6842643022537231, | |
| "learning_rate": 1.3845590031761546e-05, | |
| "loss": 0.6065, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.7240924414911809, | |
| "grad_norm": 1.0033600330352783, | |
| "learning_rate": 1.3796726117762033e-05, | |
| "loss": 0.6364, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.7250696242732203, | |
| "grad_norm": 0.8704243898391724, | |
| "learning_rate": 1.374786220376252e-05, | |
| "loss": 0.6259, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.7260468070552597, | |
| "grad_norm": 0.855398416519165, | |
| "learning_rate": 1.369899828976301e-05, | |
| "loss": 0.653, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.7270239898372991, | |
| "grad_norm": 1.733904480934143, | |
| "learning_rate": 1.3650134375763498e-05, | |
| "loss": 0.6284, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.7280011726193385, | |
| "grad_norm": 0.49585819244384766, | |
| "learning_rate": 1.3601270461763988e-05, | |
| "loss": 0.6165, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.7289783554013778, | |
| "grad_norm": 0.5818326473236084, | |
| "learning_rate": 1.3552406547764476e-05, | |
| "loss": 0.6403, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.7299555381834172, | |
| "grad_norm": 0.8778244853019714, | |
| "learning_rate": 1.3503542633764964e-05, | |
| "loss": 0.5963, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.7309327209654566, | |
| "grad_norm": 0.6378918290138245, | |
| "learning_rate": 1.3454678719765454e-05, | |
| "loss": 0.6242, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.731909903747496, | |
| "grad_norm": 0.792775571346283, | |
| "learning_rate": 1.3405814805765942e-05, | |
| "loss": 0.6348, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.7328870865295354, | |
| "grad_norm": 0.8906835317611694, | |
| "learning_rate": 1.3356950891766432e-05, | |
| "loss": 0.6074, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.7338642693115748, | |
| "grad_norm": 0.7266893982887268, | |
| "learning_rate": 1.330808697776692e-05, | |
| "loss": 0.6253, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.7348414520936141, | |
| "grad_norm": 0.6896129250526428, | |
| "learning_rate": 1.325922306376741e-05, | |
| "loss": 0.6273, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.7358186348756535, | |
| "grad_norm": 1.0812867879867554, | |
| "learning_rate": 1.3210359149767898e-05, | |
| "loss": 0.6474, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.7367958176576929, | |
| "grad_norm": 0.6664975881576538, | |
| "learning_rate": 1.3161495235768388e-05, | |
| "loss": 0.6114, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.7377730004397323, | |
| "grad_norm": 0.6565041542053223, | |
| "learning_rate": 1.3112631321768876e-05, | |
| "loss": 0.6059, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.7387501832217717, | |
| "grad_norm": 0.5191747546195984, | |
| "learning_rate": 1.3063767407769362e-05, | |
| "loss": 0.6, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.7397273660038111, | |
| "grad_norm": 0.9525347948074341, | |
| "learning_rate": 1.301490349376985e-05, | |
| "loss": 0.6032, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.7407045487858503, | |
| "grad_norm": 1.1167237758636475, | |
| "learning_rate": 1.296603957977034e-05, | |
| "loss": 0.6095, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.7416817315678897, | |
| "grad_norm": 0.8300033807754517, | |
| "learning_rate": 1.2917175665770828e-05, | |
| "loss": 0.6246, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.7426589143499291, | |
| "grad_norm": 0.7098196148872375, | |
| "learning_rate": 1.2868311751771318e-05, | |
| "loss": 0.6188, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.7436360971319685, | |
| "grad_norm": 0.42002958059310913, | |
| "learning_rate": 1.2819447837771806e-05, | |
| "loss": 0.5943, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.7446132799140079, | |
| "grad_norm": 0.7477664947509766, | |
| "learning_rate": 1.2770583923772295e-05, | |
| "loss": 0.6368, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.7455904626960473, | |
| "grad_norm": 1.2381956577301025, | |
| "learning_rate": 1.2721720009772783e-05, | |
| "loss": 0.6528, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.7465676454780866, | |
| "grad_norm": 0.46650367975234985, | |
| "learning_rate": 1.2672856095773272e-05, | |
| "loss": 0.6062, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.747544828260126, | |
| "grad_norm": 0.9223760366439819, | |
| "learning_rate": 1.2623992181773761e-05, | |
| "loss": 0.6386, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.7485220110421654, | |
| "grad_norm": 0.6782642602920532, | |
| "learning_rate": 1.257512826777425e-05, | |
| "loss": 0.5926, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.7494991938242048, | |
| "grad_norm": 0.8533148765563965, | |
| "learning_rate": 1.2526264353774739e-05, | |
| "loss": 0.6076, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.7504763766062442, | |
| "grad_norm": 0.6998764276504517, | |
| "learning_rate": 1.2477400439775225e-05, | |
| "loss": 0.6136, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.7514535593882836, | |
| "grad_norm": 0.4632514715194702, | |
| "learning_rate": 1.2428536525775715e-05, | |
| "loss": 0.6174, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.7524307421703229, | |
| "grad_norm": 0.6624991297721863, | |
| "learning_rate": 1.2379672611776203e-05, | |
| "loss": 0.6053, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.7534079249523623, | |
| "grad_norm": 0.8521330952644348, | |
| "learning_rate": 1.2330808697776693e-05, | |
| "loss": 0.6261, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.7543851077344017, | |
| "grad_norm": 0.6917625665664673, | |
| "learning_rate": 1.2281944783777181e-05, | |
| "loss": 0.6049, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.7553622905164411, | |
| "grad_norm": 0.4985372722148895, | |
| "learning_rate": 1.2233080869777671e-05, | |
| "loss": 0.6057, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.7563394732984805, | |
| "grad_norm": 0.6484245657920837, | |
| "learning_rate": 1.2184216955778159e-05, | |
| "loss": 0.602, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.7573166560805199, | |
| "grad_norm": 0.7993507981300354, | |
| "learning_rate": 1.2135353041778647e-05, | |
| "loss": 0.5809, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "grad_norm": 0.6944275498390198, | |
| "learning_rate": 1.2086489127779135e-05, | |
| "loss": 0.5959, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.7592710216445986, | |
| "grad_norm": 0.6688080430030823, | |
| "learning_rate": 1.2037625213779625e-05, | |
| "loss": 0.6038, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.760248204426638, | |
| "grad_norm": 0.8234009742736816, | |
| "learning_rate": 1.1988761299780113e-05, | |
| "loss": 0.6287, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.7612253872086774, | |
| "grad_norm": 1.0987696647644043, | |
| "learning_rate": 1.1939897385780601e-05, | |
| "loss": 0.631, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.7622025699907168, | |
| "grad_norm": 0.7760794758796692, | |
| "learning_rate": 1.189103347178109e-05, | |
| "loss": 0.6356, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.7631797527727562, | |
| "grad_norm": 1.422297716140747, | |
| "learning_rate": 1.1842169557781579e-05, | |
| "loss": 0.5983, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.7641569355547956, | |
| "grad_norm": 0.7743082046508789, | |
| "learning_rate": 1.1793305643782067e-05, | |
| "loss": 0.6132, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.7651341183368349, | |
| "grad_norm": 1.0263071060180664, | |
| "learning_rate": 1.1744441729782555e-05, | |
| "loss": 0.6364, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.7661113011188743, | |
| "grad_norm": 0.49797773361206055, | |
| "learning_rate": 1.1695577815783045e-05, | |
| "loss": 0.6384, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.7670884839009137, | |
| "grad_norm": 0.58949214220047, | |
| "learning_rate": 1.1646713901783533e-05, | |
| "loss": 0.6176, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.7680656666829531, | |
| "grad_norm": 0.8523328304290771, | |
| "learning_rate": 1.1597849987784022e-05, | |
| "loss": 0.6238, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.7690428494649925, | |
| "grad_norm": 2.231853723526001, | |
| "learning_rate": 1.154898607378451e-05, | |
| "loss": 0.6553, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.7700200322470319, | |
| "grad_norm": 0.7179421782493591, | |
| "learning_rate": 1.1500122159785e-05, | |
| "loss": 0.6222, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.7709972150290711, | |
| "grad_norm": 0.7334624528884888, | |
| "learning_rate": 1.1451258245785488e-05, | |
| "loss": 0.6513, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.7719743978111105, | |
| "grad_norm": 0.8650888204574585, | |
| "learning_rate": 1.1402394331785976e-05, | |
| "loss": 0.6382, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.77295158059315, | |
| "grad_norm": 1.277421474456787, | |
| "learning_rate": 1.1353530417786465e-05, | |
| "loss": 0.6032, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.7739287633751893, | |
| "grad_norm": 0.4764556288719177, | |
| "learning_rate": 1.1304666503786954e-05, | |
| "loss": 0.5852, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.7749059461572287, | |
| "grad_norm": 0.7180933952331543, | |
| "learning_rate": 1.1255802589787442e-05, | |
| "loss": 0.6271, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.7758831289392681, | |
| "grad_norm": 0.6978940367698669, | |
| "learning_rate": 1.1206938675787932e-05, | |
| "loss": 0.6252, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.7768603117213074, | |
| "grad_norm": 0.9205247759819031, | |
| "learning_rate": 1.115807476178842e-05, | |
| "loss": 0.6227, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.7778374945033468, | |
| "grad_norm": 0.6126120686531067, | |
| "learning_rate": 1.1109210847788908e-05, | |
| "loss": 0.6164, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.7788146772853862, | |
| "grad_norm": 0.660234808921814, | |
| "learning_rate": 1.1060346933789396e-05, | |
| "loss": 0.6336, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.7797918600674256, | |
| "grad_norm": 0.5239884257316589, | |
| "learning_rate": 1.1011483019789886e-05, | |
| "loss": 0.6324, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.780769042849465, | |
| "grad_norm": 0.6763221621513367, | |
| "learning_rate": 1.0962619105790374e-05, | |
| "loss": 0.6063, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.7817462256315044, | |
| "grad_norm": 0.6201728582382202, | |
| "learning_rate": 1.0913755191790862e-05, | |
| "loss": 0.6168, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.7827234084135437, | |
| "grad_norm": 0.8859091997146606, | |
| "learning_rate": 1.0864891277791352e-05, | |
| "loss": 0.593, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.7837005911955831, | |
| "grad_norm": 0.7334877848625183, | |
| "learning_rate": 1.081602736379184e-05, | |
| "loss": 0.6225, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.7846777739776225, | |
| "grad_norm": 0.49573615193367004, | |
| "learning_rate": 1.076716344979233e-05, | |
| "loss": 0.6007, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.7856549567596619, | |
| "grad_norm": 1.1509833335876465, | |
| "learning_rate": 1.0718299535792818e-05, | |
| "loss": 0.587, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.7866321395417013, | |
| "grad_norm": 0.6591099500656128, | |
| "learning_rate": 1.0669435621793306e-05, | |
| "loss": 0.6462, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.7876093223237407, | |
| "grad_norm": 0.7265052199363708, | |
| "learning_rate": 1.0620571707793794e-05, | |
| "loss": 0.6183, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.78858650510578, | |
| "grad_norm": 1.2156593799591064, | |
| "learning_rate": 1.0571707793794284e-05, | |
| "loss": 0.5811, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.7895636878878194, | |
| "grad_norm": 0.960753858089447, | |
| "learning_rate": 1.0522843879794772e-05, | |
| "loss": 0.6054, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.7905408706698588, | |
| "grad_norm": 1.5062034130096436, | |
| "learning_rate": 1.0473979965795262e-05, | |
| "loss": 0.599, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.7915180534518982, | |
| "grad_norm": 0.7047529816627502, | |
| "learning_rate": 1.042511605179575e-05, | |
| "loss": 0.6149, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.7924952362339376, | |
| "grad_norm": 0.4432947337627411, | |
| "learning_rate": 1.037625213779624e-05, | |
| "loss": 0.6182, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.793472419015977, | |
| "grad_norm": 0.6442515850067139, | |
| "learning_rate": 1.0327388223796726e-05, | |
| "loss": 0.5864, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.7944496017980163, | |
| "grad_norm": 1.2354743480682373, | |
| "learning_rate": 1.0278524309797215e-05, | |
| "loss": 0.6068, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.7954267845800557, | |
| "grad_norm": 0.7862667441368103, | |
| "learning_rate": 1.0229660395797704e-05, | |
| "loss": 0.6072, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.7964039673620951, | |
| "grad_norm": 0.5142656564712524, | |
| "learning_rate": 1.0180796481798192e-05, | |
| "loss": 0.6009, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.7973811501441345, | |
| "grad_norm": 0.8478522300720215, | |
| "learning_rate": 1.0131932567798681e-05, | |
| "loss": 0.5979, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.7983583329261739, | |
| "grad_norm": 0.5929884910583496, | |
| "learning_rate": 1.008306865379917e-05, | |
| "loss": 0.6076, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.7993355157082133, | |
| "grad_norm": 0.8067489862442017, | |
| "learning_rate": 1.003420473979966e-05, | |
| "loss": 0.6123, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.8003126984902525, | |
| "grad_norm": 1.3287664651870728, | |
| "learning_rate": 9.985340825800147e-06, | |
| "loss": 0.6151, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.801289881272292, | |
| "grad_norm": 0.7158493995666504, | |
| "learning_rate": 9.936476911800635e-06, | |
| "loss": 0.5906, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.8022670640543313, | |
| "grad_norm": 0.7307409644126892, | |
| "learning_rate": 9.887612997801123e-06, | |
| "loss": 0.6165, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.8032442468363707, | |
| "grad_norm": 0.6903741359710693, | |
| "learning_rate": 9.838749083801613e-06, | |
| "loss": 0.6175, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.8042214296184101, | |
| "grad_norm": 0.7754660248756409, | |
| "learning_rate": 9.789885169802101e-06, | |
| "loss": 0.6349, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.8051986124004495, | |
| "grad_norm": 0.7808040976524353, | |
| "learning_rate": 9.741021255802591e-06, | |
| "loss": 0.5909, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.8061757951824888, | |
| "grad_norm": 0.8575007915496826, | |
| "learning_rate": 9.692157341803079e-06, | |
| "loss": 0.5861, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.8071529779645282, | |
| "grad_norm": 1.18577241897583, | |
| "learning_rate": 9.643293427803569e-06, | |
| "loss": 0.6137, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.8081301607465676, | |
| "grad_norm": 0.7913909554481506, | |
| "learning_rate": 9.594429513804057e-06, | |
| "loss": 0.6077, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.809107343528607, | |
| "grad_norm": 0.8221011161804199, | |
| "learning_rate": 9.545565599804545e-06, | |
| "loss": 0.5946, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.8100845263106464, | |
| "grad_norm": 0.7047521471977234, | |
| "learning_rate": 9.496701685805033e-06, | |
| "loss": 0.5973, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.8110617090926858, | |
| "grad_norm": 0.5717597007751465, | |
| "learning_rate": 9.447837771805523e-06, | |
| "loss": 0.6236, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.8120388918747252, | |
| "grad_norm": 0.93315190076828, | |
| "learning_rate": 9.39897385780601e-06, | |
| "loss": 0.6335, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.8130160746567645, | |
| "grad_norm": 0.7691722512245178, | |
| "learning_rate": 9.350109943806499e-06, | |
| "loss": 0.5986, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.8139932574388039, | |
| "grad_norm": 0.8947746157646179, | |
| "learning_rate": 9.301246029806989e-06, | |
| "loss": 0.5995, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.8149704402208433, | |
| "grad_norm": 0.8654600381851196, | |
| "learning_rate": 9.252382115807477e-06, | |
| "loss": 0.5844, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.8159476230028827, | |
| "grad_norm": 0.6563751697540283, | |
| "learning_rate": 9.203518201807965e-06, | |
| "loss": 0.588, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.8169248057849221, | |
| "grad_norm": 0.756237804889679, | |
| "learning_rate": 9.154654287808453e-06, | |
| "loss": 0.5814, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.8179019885669615, | |
| "grad_norm": 1.106650948524475, | |
| "learning_rate": 9.105790373808943e-06, | |
| "loss": 0.5924, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.8188791713490008, | |
| "grad_norm": 0.39193272590637207, | |
| "learning_rate": 9.05692645980943e-06, | |
| "loss": 0.6048, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.8198563541310402, | |
| "grad_norm": 0.7022530436515808, | |
| "learning_rate": 9.00806254580992e-06, | |
| "loss": 0.624, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.8208335369130796, | |
| "grad_norm": 0.7286639213562012, | |
| "learning_rate": 8.959198631810408e-06, | |
| "loss": 0.5825, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.821810719695119, | |
| "grad_norm": 0.9062661528587341, | |
| "learning_rate": 8.910334717810898e-06, | |
| "loss": 0.6024, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.8227879024771584, | |
| "grad_norm": 1.0051745176315308, | |
| "learning_rate": 8.861470803811386e-06, | |
| "loss": 0.5881, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.8237650852591978, | |
| "grad_norm": 0.5622514486312866, | |
| "learning_rate": 8.812606889811874e-06, | |
| "loss": 0.625, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.80225670337677, | |
| "learning_rate": 8.763742975812362e-06, | |
| "loss": 0.6142, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.8257194508232765, | |
| "grad_norm": 0.7154406905174255, | |
| "learning_rate": 8.714879061812852e-06, | |
| "loss": 0.6009, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.8266966336053159, | |
| "grad_norm": 0.8191014528274536, | |
| "learning_rate": 8.66601514781334e-06, | |
| "loss": 0.6054, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.8276738163873553, | |
| "grad_norm": 1.4982640743255615, | |
| "learning_rate": 8.617151233813828e-06, | |
| "loss": 0.5917, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.8286509991693947, | |
| "grad_norm": 0.6662930250167847, | |
| "learning_rate": 8.568287319814318e-06, | |
| "loss": 0.6047, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.8296281819514341, | |
| "grad_norm": 0.8533642888069153, | |
| "learning_rate": 8.519423405814806e-06, | |
| "loss": 0.6275, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.8306053647334734, | |
| "grad_norm": 1.0405080318450928, | |
| "learning_rate": 8.470559491815294e-06, | |
| "loss": 0.6325, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.8315825475155127, | |
| "grad_norm": 0.3838236629962921, | |
| "learning_rate": 8.421695577815782e-06, | |
| "loss": 0.617, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.8325597302975521, | |
| "grad_norm": 0.7229349613189697, | |
| "learning_rate": 8.372831663816272e-06, | |
| "loss": 0.6095, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.8335369130795915, | |
| "grad_norm": 0.538932204246521, | |
| "learning_rate": 8.32396774981676e-06, | |
| "loss": 0.597, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.834514095861631, | |
| "grad_norm": 0.9081258177757263, | |
| "learning_rate": 8.27510383581725e-06, | |
| "loss": 0.576, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.8354912786436703, | |
| "grad_norm": 1.1647875308990479, | |
| "learning_rate": 8.226239921817738e-06, | |
| "loss": 0.6177, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.8364684614257096, | |
| "grad_norm": 0.5544024705886841, | |
| "learning_rate": 8.177376007818228e-06, | |
| "loss": 0.5944, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.837445644207749, | |
| "grad_norm": 0.49571287631988525, | |
| "learning_rate": 8.128512093818716e-06, | |
| "loss": 0.6417, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.8384228269897884, | |
| "grad_norm": 0.8068299293518066, | |
| "learning_rate": 8.079648179819204e-06, | |
| "loss": 0.6224, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.8394000097718278, | |
| "grad_norm": 0.9682297706604004, | |
| "learning_rate": 8.030784265819692e-06, | |
| "loss": 0.6111, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.8403771925538672, | |
| "grad_norm": 1.051151990890503, | |
| "learning_rate": 7.981920351820182e-06, | |
| "loss": 0.6, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.8413543753359066, | |
| "grad_norm": 0.568880558013916, | |
| "learning_rate": 7.93305643782067e-06, | |
| "loss": 0.6129, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.8423315581179459, | |
| "grad_norm": 0.7681874632835388, | |
| "learning_rate": 7.88419252382116e-06, | |
| "loss": 0.6291, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.8433087408999853, | |
| "grad_norm": 0.7521129250526428, | |
| "learning_rate": 7.835328609821647e-06, | |
| "loss": 0.5983, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.8442859236820247, | |
| "grad_norm": 0.6910899877548218, | |
| "learning_rate": 7.786464695822136e-06, | |
| "loss": 0.6065, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.8452631064640641, | |
| "grad_norm": 1.0774552822113037, | |
| "learning_rate": 7.737600781822624e-06, | |
| "loss": 0.6481, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.8462402892461035, | |
| "grad_norm": 0.5744395852088928, | |
| "learning_rate": 7.688736867823113e-06, | |
| "loss": 0.5881, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.8472174720281429, | |
| "grad_norm": 0.9754884839057922, | |
| "learning_rate": 7.639872953823601e-06, | |
| "loss": 0.6028, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.8481946548101822, | |
| "grad_norm": 0.5664985775947571, | |
| "learning_rate": 7.59100903982409e-06, | |
| "loss": 0.5759, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.8491718375922216, | |
| "grad_norm": 0.7173051238059998, | |
| "learning_rate": 7.542145125824579e-06, | |
| "loss": 0.6038, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.850149020374261, | |
| "grad_norm": 0.5157271027565002, | |
| "learning_rate": 7.493281211825068e-06, | |
| "loss": 0.5872, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.8511262031563004, | |
| "grad_norm": 2.847447156906128, | |
| "learning_rate": 7.444417297825557e-06, | |
| "loss": 0.6008, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.8521033859383398, | |
| "grad_norm": 1.259730577468872, | |
| "learning_rate": 7.395553383826045e-06, | |
| "loss": 0.6047, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.8530805687203792, | |
| "grad_norm": 0.5175238847732544, | |
| "learning_rate": 7.346689469826533e-06, | |
| "loss": 0.6294, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.8540577515024186, | |
| "grad_norm": 0.5168502926826477, | |
| "learning_rate": 7.297825555827022e-06, | |
| "loss": 0.5987, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.8550349342844579, | |
| "grad_norm": 0.7485826015472412, | |
| "learning_rate": 7.24896164182751e-06, | |
| "loss": 0.604, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.8560121170664973, | |
| "grad_norm": 1.2643144130706787, | |
| "learning_rate": 7.200097727827999e-06, | |
| "loss": 0.6271, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.8569892998485367, | |
| "grad_norm": 0.598031222820282, | |
| "learning_rate": 7.151233813828488e-06, | |
| "loss": 0.6201, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.8579664826305761, | |
| "grad_norm": 0.7994399666786194, | |
| "learning_rate": 7.102369899828977e-06, | |
| "loss": 0.6028, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.8589436654126155, | |
| "grad_norm": 0.47928521037101746, | |
| "learning_rate": 7.053505985829466e-06, | |
| "loss": 0.6042, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.8599208481946549, | |
| "grad_norm": 0.6901227831840515, | |
| "learning_rate": 7.004642071829953e-06, | |
| "loss": 0.6289, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.8608980309766942, | |
| "grad_norm": 0.9630447030067444, | |
| "learning_rate": 6.955778157830442e-06, | |
| "loss": 0.6097, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.8618752137587335, | |
| "grad_norm": 0.42696672677993774, | |
| "learning_rate": 6.906914243830931e-06, | |
| "loss": 0.6314, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.862852396540773, | |
| "grad_norm": 0.5964066982269287, | |
| "learning_rate": 6.85805032983142e-06, | |
| "loss": 0.5934, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.8638295793228123, | |
| "grad_norm": 0.5652678608894348, | |
| "learning_rate": 6.809186415831909e-06, | |
| "loss": 0.6032, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.8648067621048517, | |
| "grad_norm": 0.6129952669143677, | |
| "learning_rate": 6.7603225018323976e-06, | |
| "loss": 0.6116, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.8657839448868911, | |
| "grad_norm": 0.5786252021789551, | |
| "learning_rate": 6.7114585878328865e-06, | |
| "loss": 0.6042, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.8667611276689304, | |
| "grad_norm": 0.9830735325813293, | |
| "learning_rate": 6.662594673833375e-06, | |
| "loss": 0.5763, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.8677383104509698, | |
| "grad_norm": 0.7167491316795349, | |
| "learning_rate": 6.613730759833863e-06, | |
| "loss": 0.5774, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.8687154932330092, | |
| "grad_norm": 0.5763813257217407, | |
| "learning_rate": 6.5648668458343515e-06, | |
| "loss": 0.6219, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.8696926760150486, | |
| "grad_norm": 0.552343487739563, | |
| "learning_rate": 6.5160029318348404e-06, | |
| "loss": 0.5983, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.870669858797088, | |
| "grad_norm": 0.6471940279006958, | |
| "learning_rate": 6.4671390178353285e-06, | |
| "loss": 0.616, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.8716470415791274, | |
| "grad_norm": 0.2821710407733917, | |
| "learning_rate": 6.418275103835817e-06, | |
| "loss": 0.6093, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.8726242243611667, | |
| "grad_norm": 0.8784298896789551, | |
| "learning_rate": 6.369411189836306e-06, | |
| "loss": 0.6004, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.8736014071432061, | |
| "grad_norm": 0.5774518847465515, | |
| "learning_rate": 6.320547275836795e-06, | |
| "loss": 0.6177, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.8745785899252455, | |
| "grad_norm": 2.489976406097412, | |
| "learning_rate": 6.2716833618372825e-06, | |
| "loss": 0.6294, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.8755557727072849, | |
| "grad_norm": 0.8063492774963379, | |
| "learning_rate": 6.222819447837772e-06, | |
| "loss": 0.5815, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.8765329554893243, | |
| "grad_norm": 0.9328792095184326, | |
| "learning_rate": 6.17395553383826e-06, | |
| "loss": 0.5709, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.8775101382713637, | |
| "grad_norm": 1.1980705261230469, | |
| "learning_rate": 6.125091619838749e-06, | |
| "loss": 0.5916, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.878487321053403, | |
| "grad_norm": 0.9140294194221497, | |
| "learning_rate": 6.076227705839238e-06, | |
| "loss": 0.5975, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.8794645038354424, | |
| "grad_norm": 0.42323464155197144, | |
| "learning_rate": 6.027363791839727e-06, | |
| "loss": 0.5908, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.8804416866174818, | |
| "grad_norm": 0.8265115022659302, | |
| "learning_rate": 5.978499877840215e-06, | |
| "loss": 0.6236, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.8814188693995212, | |
| "grad_norm": 0.6848395466804504, | |
| "learning_rate": 5.929635963840704e-06, | |
| "loss": 0.6081, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.8823960521815606, | |
| "grad_norm": 0.8593265414237976, | |
| "learning_rate": 5.880772049841193e-06, | |
| "loss": 0.5926, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.8833732349636, | |
| "grad_norm": 0.9084621667861938, | |
| "learning_rate": 5.831908135841682e-06, | |
| "loss": 0.5795, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.8843504177456393, | |
| "grad_norm": 0.5158432126045227, | |
| "learning_rate": 5.78304422184217e-06, | |
| "loss": 0.5887, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.8853276005276787, | |
| "grad_norm": 0.9710085988044739, | |
| "learning_rate": 5.734180307842659e-06, | |
| "loss": 0.5888, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.8863047833097181, | |
| "grad_norm": 0.4963410794734955, | |
| "learning_rate": 5.685316393843147e-06, | |
| "loss": 0.5981, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.8872819660917575, | |
| "grad_norm": 0.39078134298324585, | |
| "learning_rate": 5.636452479843636e-06, | |
| "loss": 0.5991, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.8882591488737969, | |
| "grad_norm": 0.5350062847137451, | |
| "learning_rate": 5.587588565844124e-06, | |
| "loss": 0.5887, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.8892363316558363, | |
| "grad_norm": 0.6059613823890686, | |
| "learning_rate": 5.538724651844613e-06, | |
| "loss": 0.6072, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.8902135144378756, | |
| "grad_norm": 0.4223475158214569, | |
| "learning_rate": 5.489860737845102e-06, | |
| "loss": 0.5866, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.891190697219915, | |
| "grad_norm": 0.8053774237632751, | |
| "learning_rate": 5.44099682384559e-06, | |
| "loss": 0.6031, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.8921678800019543, | |
| "grad_norm": 0.8851518034934998, | |
| "learning_rate": 5.392132909846079e-06, | |
| "loss": 0.5766, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.8931450627839937, | |
| "grad_norm": 0.6842949986457825, | |
| "learning_rate": 5.3432689958465675e-06, | |
| "loss": 0.5593, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.8941222455660331, | |
| "grad_norm": 0.8229865431785583, | |
| "learning_rate": 5.2944050818470564e-06, | |
| "loss": 0.5802, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.8950994283480725, | |
| "grad_norm": 0.7434598207473755, | |
| "learning_rate": 5.2455411678475445e-06, | |
| "loss": 0.6004, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.8960766111301118, | |
| "grad_norm": 0.47747936844825745, | |
| "learning_rate": 5.196677253848033e-06, | |
| "loss": 0.5937, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.8970537939121512, | |
| "grad_norm": 0.7917630076408386, | |
| "learning_rate": 5.147813339848522e-06, | |
| "loss": 0.6119, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.8980309766941906, | |
| "grad_norm": 0.8409056663513184, | |
| "learning_rate": 5.098949425849011e-06, | |
| "loss": 0.6004, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.89900815947623, | |
| "grad_norm": 0.5597165822982788, | |
| "learning_rate": 5.050085511849499e-06, | |
| "loss": 0.6076, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.8999853422582694, | |
| "grad_norm": 0.5740428566932678, | |
| "learning_rate": 5.001221597849988e-06, | |
| "loss": 0.5925, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.9009625250403088, | |
| "grad_norm": 0.739456832408905, | |
| "learning_rate": 4.952357683850477e-06, | |
| "loss": 0.5945, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.9019397078223482, | |
| "grad_norm": 0.5648947954177856, | |
| "learning_rate": 4.903493769850965e-06, | |
| "loss": 0.5712, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.9029168906043875, | |
| "grad_norm": 0.5736894607543945, | |
| "learning_rate": 4.854629855851454e-06, | |
| "loss": 0.6111, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.9038940733864269, | |
| "grad_norm": 0.7701774835586548, | |
| "learning_rate": 4.805765941851942e-06, | |
| "loss": 0.599, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.9048712561684663, | |
| "grad_norm": 0.7485201358795166, | |
| "learning_rate": 4.756902027852431e-06, | |
| "loss": 0.5842, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.9058484389505057, | |
| "grad_norm": 0.6121499538421631, | |
| "learning_rate": 4.70803811385292e-06, | |
| "loss": 0.6198, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.9068256217325451, | |
| "grad_norm": 0.7362948656082153, | |
| "learning_rate": 4.659174199853408e-06, | |
| "loss": 0.6123, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.9078028045145845, | |
| "grad_norm": 0.606191098690033, | |
| "learning_rate": 4.610310285853897e-06, | |
| "loss": 0.6028, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.9087799872966238, | |
| "grad_norm": 0.6618565917015076, | |
| "learning_rate": 4.561446371854386e-06, | |
| "loss": 0.5963, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.9097571700786632, | |
| "grad_norm": 1.5052400827407837, | |
| "learning_rate": 4.512582457854874e-06, | |
| "loss": 0.603, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.9107343528607026, | |
| "grad_norm": 0.8985777497291565, | |
| "learning_rate": 4.463718543855363e-06, | |
| "loss": 0.6156, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.911711535642742, | |
| "grad_norm": 0.8037851452827454, | |
| "learning_rate": 4.414854629855852e-06, | |
| "loss": 0.6406, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.9126887184247814, | |
| "grad_norm": 0.49996376037597656, | |
| "learning_rate": 4.365990715856341e-06, | |
| "loss": 0.6139, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.9136659012068208, | |
| "grad_norm": 0.8254772424697876, | |
| "learning_rate": 4.317126801856829e-06, | |
| "loss": 0.6149, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.9146430839888601, | |
| "grad_norm": 0.7700937390327454, | |
| "learning_rate": 4.268262887857318e-06, | |
| "loss": 0.5993, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.9156202667708995, | |
| "grad_norm": 0.38511478900909424, | |
| "learning_rate": 4.2193989738578065e-06, | |
| "loss": 0.6232, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.9165974495529389, | |
| "grad_norm": 0.6567879319190979, | |
| "learning_rate": 4.1705350598582955e-06, | |
| "loss": 0.5813, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.9175746323349783, | |
| "grad_norm": 0.8876736760139465, | |
| "learning_rate": 4.1216711458587835e-06, | |
| "loss": 0.5938, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.9185518151170177, | |
| "grad_norm": 0.41622501611709595, | |
| "learning_rate": 4.0728072318592724e-06, | |
| "loss": 0.579, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.9195289978990571, | |
| "grad_norm": 0.7455472946166992, | |
| "learning_rate": 4.0239433178597605e-06, | |
| "loss": 0.6011, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.9205061806810964, | |
| "grad_norm": 0.5976389646530151, | |
| "learning_rate": 3.975079403860249e-06, | |
| "loss": 0.6143, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.9214833634631358, | |
| "grad_norm": 0.7773202657699585, | |
| "learning_rate": 3.9262154898607375e-06, | |
| "loss": 0.5796, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.9224605462451752, | |
| "grad_norm": 0.5033147931098938, | |
| "learning_rate": 3.877351575861226e-06, | |
| "loss": 0.5994, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.9234377290272145, | |
| "grad_norm": 0.7234833240509033, | |
| "learning_rate": 3.828487661861715e-06, | |
| "loss": 0.6102, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.924414911809254, | |
| "grad_norm": 0.4259088635444641, | |
| "learning_rate": 3.7796237478622038e-06, | |
| "loss": 0.5787, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.9253920945912933, | |
| "grad_norm": 0.43989598751068115, | |
| "learning_rate": 3.7307598338626923e-06, | |
| "loss": 0.5841, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.9263692773733326, | |
| "grad_norm": 0.4430140256881714, | |
| "learning_rate": 3.681895919863181e-06, | |
| "loss": 0.5933, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.927346460155372, | |
| "grad_norm": 0.7848074436187744, | |
| "learning_rate": 3.63303200586367e-06, | |
| "loss": 0.6138, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.9283236429374114, | |
| "grad_norm": 0.8117037415504456, | |
| "learning_rate": 3.584168091864158e-06, | |
| "loss": 0.5917, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.9293008257194508, | |
| "grad_norm": 0.6667145490646362, | |
| "learning_rate": 3.535304177864647e-06, | |
| "loss": 0.5542, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.9302780085014902, | |
| "grad_norm": 0.7902615070343018, | |
| "learning_rate": 3.486440263865136e-06, | |
| "loss": 0.5741, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.9312551912835296, | |
| "grad_norm": 0.7067260146141052, | |
| "learning_rate": 3.4375763498656245e-06, | |
| "loss": 0.5961, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.9322323740655689, | |
| "grad_norm": 2.328338861465454, | |
| "learning_rate": 3.388712435866113e-06, | |
| "loss": 0.5716, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.9332095568476083, | |
| "grad_norm": 1.1518771648406982, | |
| "learning_rate": 3.3398485218666014e-06, | |
| "loss": 0.6306, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.9341867396296477, | |
| "grad_norm": 0.5183611512184143, | |
| "learning_rate": 3.2909846078670904e-06, | |
| "loss": 0.5998, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.9351639224116871, | |
| "grad_norm": 0.6827223300933838, | |
| "learning_rate": 3.2421206938675793e-06, | |
| "loss": 0.5948, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.9361411051937265, | |
| "grad_norm": 0.6556549668312073, | |
| "learning_rate": 3.1932567798680673e-06, | |
| "loss": 0.6014, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.9371182879757659, | |
| "grad_norm": 0.5259923934936523, | |
| "learning_rate": 3.1443928658685562e-06, | |
| "loss": 0.6192, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.9380954707578052, | |
| "grad_norm": 0.6890705823898315, | |
| "learning_rate": 3.095528951869045e-06, | |
| "loss": 0.5922, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.9390726535398446, | |
| "grad_norm": 0.5739189386367798, | |
| "learning_rate": 3.0466650378695336e-06, | |
| "loss": 0.572, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.940049836321884, | |
| "grad_norm": 0.4784778356552124, | |
| "learning_rate": 2.997801123870022e-06, | |
| "loss": 0.5924, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.9410270191039234, | |
| "grad_norm": 0.4622921049594879, | |
| "learning_rate": 2.9489372098705106e-06, | |
| "loss": 0.6223, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.9420042018859628, | |
| "grad_norm": 0.7146719098091125, | |
| "learning_rate": 2.900073295870999e-06, | |
| "loss": 0.589, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.9429813846680022, | |
| "grad_norm": 0.5467257499694824, | |
| "learning_rate": 2.851209381871488e-06, | |
| "loss": 0.6197, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.9439585674500416, | |
| "grad_norm": 0.6875296831130981, | |
| "learning_rate": 2.8023454678719765e-06, | |
| "loss": 0.588, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.9449357502320809, | |
| "grad_norm": 0.8921650052070618, | |
| "learning_rate": 2.7534815538724654e-06, | |
| "loss": 0.6008, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.9459129330141203, | |
| "grad_norm": 0.6401572823524475, | |
| "learning_rate": 2.704617639872954e-06, | |
| "loss": 0.5858, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.9468901157961597, | |
| "grad_norm": 0.7191618084907532, | |
| "learning_rate": 2.655753725873443e-06, | |
| "loss": 0.5763, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "grad_norm": 0.6186959147453308, | |
| "learning_rate": 2.6068898118739313e-06, | |
| "loss": 0.5695, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.9488444813602385, | |
| "grad_norm": 0.36472517251968384, | |
| "learning_rate": 2.5580258978744198e-06, | |
| "loss": 0.5819, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.9498216641422779, | |
| "grad_norm": 1.0958882570266724, | |
| "learning_rate": 2.5091619838749083e-06, | |
| "loss": 0.6167, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.9507988469243172, | |
| "grad_norm": 0.7372691631317139, | |
| "learning_rate": 2.460298069875397e-06, | |
| "loss": 0.5936, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.9517760297063566, | |
| "grad_norm": 0.4143502116203308, | |
| "learning_rate": 2.4114341558758857e-06, | |
| "loss": 0.5873, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.952753212488396, | |
| "grad_norm": 1.134059190750122, | |
| "learning_rate": 2.3625702418763746e-06, | |
| "loss": 0.6143, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.9537303952704353, | |
| "grad_norm": 0.40213558077812195, | |
| "learning_rate": 2.313706327876863e-06, | |
| "loss": 0.5725, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.9547075780524747, | |
| "grad_norm": 0.5387831926345825, | |
| "learning_rate": 2.264842413877352e-06, | |
| "loss": 0.5959, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.9556847608345141, | |
| "grad_norm": 0.8288729786872864, | |
| "learning_rate": 2.2159784998778405e-06, | |
| "loss": 0.5881, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.9566619436165534, | |
| "grad_norm": 0.7433648109436035, | |
| "learning_rate": 2.167114585878329e-06, | |
| "loss": 0.5881, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.9576391263985928, | |
| "grad_norm": 0.7633154392242432, | |
| "learning_rate": 2.1182506718788174e-06, | |
| "loss": 0.6218, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.9586163091806322, | |
| "grad_norm": 0.5039961338043213, | |
| "learning_rate": 2.069386757879306e-06, | |
| "loss": 0.5973, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.9595934919626716, | |
| "grad_norm": 0.9047883152961731, | |
| "learning_rate": 2.020522843879795e-06, | |
| "loss": 0.5741, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.960570674744711, | |
| "grad_norm": 0.6591965556144714, | |
| "learning_rate": 1.9716589298802833e-06, | |
| "loss": 0.5914, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.9615478575267504, | |
| "grad_norm": 0.6809371113777161, | |
| "learning_rate": 1.9227950158807722e-06, | |
| "loss": 0.5876, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.9625250403087897, | |
| "grad_norm": 0.5399168133735657, | |
| "learning_rate": 1.8739311018812607e-06, | |
| "loss": 0.5921, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.9635022230908291, | |
| "grad_norm": 0.6308420896530151, | |
| "learning_rate": 1.8250671878817494e-06, | |
| "loss": 0.5805, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.9644794058728685, | |
| "grad_norm": 0.8909119963645935, | |
| "learning_rate": 1.776203273882238e-06, | |
| "loss": 0.6062, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.9654565886549079, | |
| "grad_norm": 0.5217241048812866, | |
| "learning_rate": 1.7273393598827268e-06, | |
| "loss": 0.5866, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.9664337714369473, | |
| "grad_norm": 0.5441256165504456, | |
| "learning_rate": 1.6784754458832153e-06, | |
| "loss": 0.5889, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.9674109542189867, | |
| "grad_norm": 0.6473023891448975, | |
| "learning_rate": 1.629611531883704e-06, | |
| "loss": 0.6066, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.968388137001026, | |
| "grad_norm": 0.7462273836135864, | |
| "learning_rate": 1.5807476178841925e-06, | |
| "loss": 0.5926, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.9693653197830654, | |
| "grad_norm": 0.4794386029243469, | |
| "learning_rate": 1.5318837038846812e-06, | |
| "loss": 0.5856, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.9703425025651048, | |
| "grad_norm": 0.5676984190940857, | |
| "learning_rate": 1.48301978988517e-06, | |
| "loss": 0.5797, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.9713196853471442, | |
| "grad_norm": 0.7232435941696167, | |
| "learning_rate": 1.4341558758856586e-06, | |
| "loss": 0.6122, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.9722968681291836, | |
| "grad_norm": 0.6773326396942139, | |
| "learning_rate": 1.385291961886147e-06, | |
| "loss": 0.5877, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.973274050911223, | |
| "grad_norm": 0.522219717502594, | |
| "learning_rate": 1.3364280478866358e-06, | |
| "loss": 0.5819, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.9742512336932623, | |
| "grad_norm": 0.7057138681411743, | |
| "learning_rate": 1.2875641338871245e-06, | |
| "loss": 0.6047, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.9752284164753017, | |
| "grad_norm": 0.8740668296813965, | |
| "learning_rate": 1.2387002198876132e-06, | |
| "loss": 0.5909, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.9762055992573411, | |
| "grad_norm": 0.6199445128440857, | |
| "learning_rate": 1.1898363058881017e-06, | |
| "loss": 0.5972, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.9771827820393805, | |
| "grad_norm": 0.8061028122901917, | |
| "learning_rate": 1.1409723918885904e-06, | |
| "loss": 0.5958, | |
| "step": 100000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 102335, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.254292317011968e+18, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |